diff --git a/.config/dotnet-tools.json b/.config/dotnet-tools.json index d372056dad04..ac59f4000046 100644 --- a/.config/dotnet-tools.json +++ b/.config/dotnet-tools.json @@ -15,13 +15,13 @@ ] }, "microsoft.dotnet.xharness.cli": { - "version": "10.0.0-prerelease.25077.1", + "version": "10.0.0-prerelease.25255.1", "commands": [ "xharness" ] }, "microsoft.visualstudio.slngen.tool": { - "version": "11.1.0", + "version": "12.0.15", "commands": [ "slngen" ] diff --git a/.editorconfig b/.editorconfig index 6115362ec2ce..7931e7592eff 100644 --- a/.editorconfig +++ b/.editorconfig @@ -155,6 +155,10 @@ csharp_space_between_square_brackets = false # License header file_header_template = Licensed to the .NET Foundation under one or more agreements.\nThe .NET Foundation licenses this file to you under the MIT license. +[src/libraries/System.Net.Http/src/System/Net/Http/{SocketsHttpHandler/Http3RequestStream.cs,BrowserHttpHandler/BrowserHttpHandler.cs}] +# disable CA2025, the analyzer throws a NullReferenceException when processing this file: https://github.com/dotnet/roslyn-analyzers/issues/7652 +dotnet_diagnostic.CA2025.severity = none + # C++ Files [*.{cpp,h,in}] curly_bracket_next_line = true diff --git a/.github/CODEOWNERS-stop-notifications b/.github/CODEOWNERS-stop-notifications index 34e18599e5be..f634bbfd0023 100644 --- a/.github/CODEOWNERS-stop-notifications +++ b/.github/CODEOWNERS-stop-notifications @@ -10,6 +10,8 @@ /src/coreclr/inc/corinfo.h @dotnet/jit-contrib /src/coreclr/inc/corjit.h @dotnet/jit-contrib /src/coreclr/jit/ @dotnet/jit-contrib +/src/coreclr/interpreter/ @brzvlad @janvorli @kg +/src/coreclr/vm/interpexec* @brzvlad @janvorli @kg /src/coreclr/nativeaot @MichalStrehovsky /src/coreclr/tools/Common @dotnet/crossgen-contrib @MichalStrehovsky /src/coreclr/tools/aot @dotnet/crossgen-contrib diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 000000000000..63704fcb2618 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,290 @@ +**Any code you commit SHOULD compile, and new and existing tests related to the change SHOULD pass.** + +You MUST make your best effort to ensure your changes satisfy those criteria before committing. If for any reason you were unable to build or test the changes, you MUST report that. You MUST NOT claim success unless all builds and tests pass as described above. + +You MUST refer to the [Building & Testing in dotnet/runtime](#building--testing-in-dotnetruntime) instructions and use the commands and approaches specified there before attempting your own suggestions. + +You MUST follow all code-formatting and naming conventions defined in [`.editorconfig`](/.editorconfig). + +In addition to the rules enforced by `.editorconfig`, you SHOULD: + +- Prefer file-scoped namespace declarations and single-line using directives. +- Ensure that the final return statement of a method is on its own line. +- Use pattern matching and switch expressions wherever possible. +- Use `nameof` instead of string literals when referring to member names. +- Always use `is null` or `is not null` instead of `== null` or `!= null`. +- Trust the C# null annotations and don't add null checks when the type system says a value cannot be null. +- Prefer `?.` if applicable (e.g. `scope?.Dispose()`). +- Use `ObjectDisposedException.ThrowIf` where applicable. +- When writing tests, do not emit "Act", "Arrange" or "Assert" comments. + +--- + +# Building & Testing in dotnet/runtime + +- [1. Prerequisites](#1-prerequisites) + - [1.1. Determine Affected Components](#11-determine-affected-components) + - [1.2. Baseline Setup](#12-baseline-setup) +- [2. Iterative Build and Test Strategy](#2-iterative-build-and-test-strategy) + - [2.1. Success Criteria](#21-success-criteria) +- [3. CoreCLR (CLR) Workflow](#3-coreclr-clr-workflow) +- [4. Mono Runtime Workflow](#4-mono-runtime-workflow) +- [5. Libraries Workflow](#5-libraries-workflow) + - [5.1. How To: Identify Affected Libraries](#51-how-to-identify-affected-libraries) + - [5.2. How To: Build and Test Specific Library](#52-how-to-build-and-test-specific-library) +- [6. WebAssembly (WASM) Libraries Workflow](#6-webassembly-wasm-libraries-workflow) +- [7. Additional Notes](#7-additional-notes) + - [7.1. Troubleshooting](#71-troubleshooting) + - [7.2. Windows Command Equivalents](#72-windows-command-equivalents) + - [7.3. References](#73-references) + +## 1. Prerequisites + +These steps need to be done **before** applying any changes. + +### 1.1. Determine Affected Components + +Identify which components will be impacted by the changes. If in doubt, analyze the paths of the files to be updated: + +- **CoreCLR (CLR):** Changes in `src/coreclr/` or `src/tests/` +- **Mono Runtime:** Changes in `src/mono/` +- **Libraries:** Changes in `src/libraries/` +- **WASM/WASI Libraries:** Changes in `src/libraries/` *and* the affected library targets WASM or WASI *and* the changes are included for the target (see below for details). +- If none above apply, it is most possibly an infra-only or a docs-only change. Skip build and test steps. + +**WASM/WASI Library Change Detection** + +A change is considered WASM/WASI-relevant if: + +- The relevant `.csproj` contains explicit Browser/WASM or WASI targets (look for ``, `$(TargetPlatformIdentifier)`, or `Condition` attributes referencing `browser` or `wasi`, as well as `TARGET_BROWSER` or `TARGET_WASI` constants), **and** +- The changed file is not excluded from the build for that platform in any way with a `Condition` attribute on `` or ``. + +--- + +### 1.2. Baseline Setup + +Before applying any changes, ensure you have a full successful build of the needed runtime+libraries as a baseline. + +1. Checkout `main` branch + +2. From the repository root, run the build depending on the affected component. If multiple components are affected, subsequently run and verify the builds for all of them. + - **CoreCLR (CLR):** `./build.sh clr+libs+host` + - **Mono Runtime:** `./build.sh mono+libs` + - **Libraries:** `./build.sh clr+libs -rc release` + - **WASM/WASI Libraries:** `./build.sh mono+libs -os browser` + +3. Verify the build completed without error. + - _If the baseline build failed, report the failure and don't proceed with the changes._ + +4. From the repository root: + - Configure PATH: `export PATH="$(pwd)/.dotnet:$PATH"` + - Verify SDK Version: `dotnet --version` should match `sdk.version` in `global.json`. + +5. Switch back to the working branch. + +--- + +## 2. Iterative Build and Test Strategy + +1. Apply the intended changes + +2. **Attempt Build.** If the build fails, attempt to fix and retry the step (up to 5 attempts). + +3. **Attempt Test.** + - If a test _build_ fails, attempt to fix and retry the step (up to 5 attempts). + - If a test _run_ fails, + - Determine if the problem is in the test or in the source + - If the problem is in the test, attempt to fix and retry the step (up to 5 attempts). + - If the problem is in the source, reconsider the full changeset, attempt to fix and repeat the workflow. + +4. **Workflow Iteration:** + - Repeat build and test up to 5 cycles. + - If issues persist after 5 workflow cycles, report failure. + - If the same error persists after each fix attempt, do not repeat the same fix. Instead, escalate or report with full logs. + +When retrying, attempt different fixes and adjust based on the build/test results. + +### 2.1. Success Criteria + +- **Build:** + - Completes without errors. + - Any non-zero exit code from build commands is considered a failure. + +- **Tests:** + - All tests must pass (zero failures). + - Any non-zero exit code from test commands is considered a failure. + +- **Workflow:** + - On success: Report completion + - Otherwise: Report error(s) with logs for diagnostics. + - Collect logs from `artifacts/log/` and the console output for both build and test steps. + - Attach relevant log files or error snippets when reporting failures. + +--- + +## 3. CoreCLR (CLR) Workflow + +From the repository root: + +- Build: + `./build.sh clr` + +- Run tests: + `cd src/tests && ./build.sh && ./run.sh` + +- More info can be found in the dedicated workflow docs: + - [Building CoreCLR Guide](/docs/workflow/building/coreclr/README.md) + - [Building and Running CoreCLR Tests](/docs/workflow/testing/coreclr/testing.md) + +--- + +## 4. Mono Runtime Workflow + +From the repository root: + +- Build: + `./build.sh mono+libs` + +- Run tests: + + ```bash + ./build.sh clr.host + cd src/tests + ./build.sh mono debug /p:LibrariesConfiguration=debug + ./run.sh + ``` + +- More info can be found in the dedicated workflow docs: + - [Building Mono](/docs/workflow/building/mono/README.md) + - [Running test suites using Mono](/docs/workflow/testing/mono/testing.md) + +--- + +## 5. Libraries Workflow + +From the repository root: + +- Build all libraries: + `./build.sh libs -rc release` + +- Run all tests for libraries: + `./build.sh libs.tests -test -rc release` + +- Build a specific library: + - Refer to the section [5.2. How To: Build and Test Specific Library](#52-how-to-build-and-test-specific-library) below. + +- Test a specific library: + - Refer to the sections [5.1. How To: Identify Affected Libraries](#51-how-to-identify-affected-libraries) and [5.2. How To: Build and Test Specific Library](#52-how-to-build-and-test-specific-library) below. + +- More info can be found in the dedicated workflow docs: + - [Build Libraries](/docs/workflow/building/libraries/README.md) + - [Testing Libraries](/docs/workflow/testing/libraries/testing.md) + +### 5.1. How To: Identify Affected Libraries + +For each changed file under `src/libraries/`, find the matching library and its test project(s). +Most libraries use: + +- Source: `src/libraries//src/.csproj` + +- Tests (single): + - `src/libraries//tests/.Tests.csproj` + - OR `src/libraries//tests/.Tests/.Tests.csproj` + +- Tests (multiple types): + - `src/libraries//tests/FunctionalTests/.Functional.Tests.csproj` + - `src/libraries//tests/UnitTests/.Unit.Tests.csproj` + - Or similar. + +--- + +### 5.2. How To: Build and Test Specific Library + +If only one library is affected: + +1. **Navigate to the library directory:** + `cd src/libraries/` + +2. **Build the library:** + `dotnet build` + +3. **Build and run all test projects:** + + - For each discovered `*.Tests.csproj` in the `tests` subdirectory: + `dotnet build /t:test ./tests/.csproj` + + - *Adjust path as needed. If in doubt, search with `find tests -name '*.csproj'`.* + + - `dotnet build /t:test` is generally preferred over `dotnet test` + +--- + +## 6. WebAssembly (WASM) Libraries Workflow + +From the repository root: + +- Build: + `./build.sh libs -os browser` + +- Run tests: + `./build.sh libs.tests -test -os browser` + +- More info can be found in the dedicated workflow docs: + - [Build libraries for WebAssembly](/docs/workflow/building/libraries/webassembly-instructions.md) + - [Testing Libraries on WebAssembly](/docs/workflow/testing/libraries/testing-wasm.md) + +--- + +## 7. Additional Notes + +### 7.1. Troubleshooting + +- **Shared Framework Missing** + + - If the build fails with an error "The shared framework must be built before the local targeting pack can be consumed.", build both the runtime (clr or mono) and the libs. + E.g., from the repo root, run `./build.sh clr+libs -rc release` if working on Libraries on CoreCLR. To find the applicable command, refer to the section [1.2. Baseline Setup](#12-baseline-setup). + +- **Testhost Is Missing** + + - If a test run fails with errors indicating a missing testhost, such as: + - "Failed to launch testhost with error: System.IO.FileNotFoundException", or + - "artifacts/bin/testhost/... No such file or directory", + that means some of the prerequisites were not built. + + - To resolve, build both the appropriate runtime (clr or mono) and the libs as a single command before running tests. + E.g., from the repo root, run `./build.sh clr+libs -rc release` before testing Libraries on CoreCLR. To find the applicable command, refer to the section [1.2. Baseline Setup](#12-baseline-setup). + +- **Build Timeout** + + - Do not fail or cancel initial `./build.sh` builds due to timeout unless at least 40 minutes have elapsed. + A full `clr+libs` build from scratch can take up to 32 minutes or more on some systems. + + - Only wait for long-running `./build.sh` commands if they continue to produce output. + If there is no output for 5 minutes, assume the build is stuck and fail early. + +- **Target Does Not Exist** + + - Avoid specifying a target framework when building unless explicitly asked. + Build should identify and select the appropriate `$(NetCoreAppCurrent)` automatically. + +--- + +### 7.2. Windows Command Equivalents + +- Use `build.cmd` instead of `build.sh` on Windows. +- Set PATH: `set PATH=%CD%\.dotnet;%PATH%` +- All other commands are similar unless otherwise noted. + +--- + +### 7.3. References + +- [`.editorconfig`](/.editorconfig) +- [Building CoreCLR Guide](/docs/workflow/building/coreclr/README.md) +- [Building and Running CoreCLR Tests](/docs/workflow/testing/coreclr/testing.md) +- [Building Mono](/docs/workflow/building/mono/README.md) +- [Running test suites using Mono](/docs/workflow/testing/mono/testing.md) +- [Build Libraries](/docs/workflow/building/libraries/README.md) +- [Testing Libraries](/docs/workflow/testing/libraries/testing.md) +- [Build libraries for WebAssembly](/docs/workflow/building/libraries/webassembly-instructions.md) +- [Testing Libraries on WebAssembly](/docs/workflow/testing/libraries/testing-wasm.md) diff --git a/.github/policies/resourceManagement.yml b/.github/policies/resourceManagement.yml index 3ba127d8835b..9ee3d102b170 100644 --- a/.github/policies/resourceManagement.yml +++ b/.github/policies/resourceManagement.yml @@ -19,6 +19,12 @@ configuration: - isOpen - isNotLabeledWith: label: backlog-cleanup-candidate + - isNotLabeledWith: + label: area-codegen-coreclr + - isNotLabeledWith: + label: area-iltools-coreclr + - isNotLabeledWith: + label: area-tools-ilverification actions: - addReply: reply: >- @@ -124,6 +130,8 @@ configuration: label: area-CodeGen-coreclr - labelAdded: label: area-Codegen-Interpreter-mono + - labelAdded: + label: area-CodeGen-Interpreter-coreclr - labelAdded: label: area-Codegen-JIT-Mono - labelAdded: @@ -377,6 +385,20 @@ configuration: See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed. assignMentionees: False - if: + - hasLabel: + label: area-CodeGen-Interpreter-coreclr + then: + - mentionUsers: + mentionees: + - brzvlad + - janvorli + - kg + replyTemplate: >- + Tagging subscribers to this area: ${mentionees} + + See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed. + assignMentionees: False + - if: - hasLabel: label: area-Codegen-JIT-Mono then: @@ -703,6 +725,7 @@ configuration: mentionees: - akoeplinger - matouskozak + - simonrozsival replyTemplate: >- Tagging subscribers to this area: ${mentionees} @@ -911,7 +934,7 @@ configuration: - mentionUsers: mentionees: - roji - - ajcvickers + - SamMonoRT replyTemplate: >- Tagging subscribers to this area: ${mentionees} @@ -924,7 +947,7 @@ configuration: - mentionUsers: mentionees: - roji - - ajcvickers + - SamMonoRT replyTemplate: >- Tagging subscribers to this area: ${mentionees} @@ -937,7 +960,7 @@ configuration: - mentionUsers: mentionees: - roji - - ajcvickers + - SamMonoRT replyTemplate: >- Tagging subscribers to this area: ${mentionees} @@ -951,6 +974,7 @@ configuration: mentionees: - cheenamalhotra - david-engel + - roji replyTemplate: >- Tagging subscribers to this area: ${mentionees} @@ -1565,7 +1589,7 @@ configuration: - mentionUsers: mentionees: - roji - - ajcvickers + - SamMonoRT replyTemplate: >- Tagging subscribers to this area: ${mentionees} @@ -1650,11 +1674,9 @@ configuration: description: Add breaking change doc label to PR - if: - payloadType: Pull_Request - - or: - - filesMatchPattern: - pattern: .*ILLink.* - - filesMatchPattern: - pattern: .*illink.* + - filesMatchPattern: + pattern: (?i).*ILLink.* + matchAny: true - not: hasLabel: label: linkable-framework @@ -1666,11 +1688,9 @@ configuration: description: '[Linkable-framework workgroup] Add linkable-framework label to new Prs that touch files with *ILLink* that not have it already' - if: - payloadType: Pull_Request - - or: - - filesMatchPattern: - pattern: .*ILLink.* - - filesMatchPattern: - pattern: .*illink.* + - filesMatchPattern: + pattern: (?i).*ILLink.* + matchAny: true - not: hasLabel: label: linkable-framework diff --git a/.github/prompts/add-new-jit-ee-api.prompt.md b/.github/prompts/add-new-jit-ee-api.prompt.md new file mode 100644 index 000000000000..ce3f56b937f6 --- /dev/null +++ b/.github/prompts/add-new-jit-ee-api.prompt.md @@ -0,0 +1,229 @@ +--- +mode: 'agent' +tools: ['githubRepo', 'codebase', 'terminalLastCommand'] +description: 'Add a new API to the JIT-VM (aka JIT-EE) interface in the codebase.' +--- + +#### 1 — Goal + +Implement **one** new JIT-VM (also known as JIT-EE) API and all supporting glue. +The JIT-VM interface defines the APIs through which the JIT compiler communicates with the runtime (VM). + +#### 2 — Prerequisites for the model + +* You have full repo access +* You may run scripts (e.g., `.sh` or `.bat`) +* Ask **clarifying questions** before the first code change if anything (signature, types, platform constraints) is unclear. + +#### 3 — Required user inputs + +Ask the user for a C-like signature of the new API if it's not provided. +Suggest `/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt` file as a reference. Example: + +``` +CORINFO_METHOD_HANDLE getUnboxedEntry(CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg); +``` + +#### 4 — Implementation steps (must be completed in order) + +1. Update the `ThunkInput.txt` file with the new API definition. Example: + +```diff ++CORINFO_METHOD_HANDLE getUnboxedEntry(CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg); +``` + +Insert the new API definition without removing any existing entries, placing it near similar signatures. + +2. Invoke `/src/coreclr/tools/Common/JitInterface/ThunkGenerator/gen.sh` script +(or `/src/coreclr/tools/Common/JitInterface/ThunkGenerator/gen.bat` on Windows) to update auto-generated files. +Use the correct directory for the script to run. + +3. Open `/src/coreclr/inc/corinfo.h` and add the new API inside `class ICorStaticInfo` class as the last member. Example: + +```diff ++ virtual CORINFO_METHOD_HANDLE getUnboxedEntry( ++ CORINFO_METHOD_HANDLE ftn, ++ bool* requiresInstMethodTableArg ++ ) = 0; +``` + +4. Open `/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs` and add the new API in the end of `class CorInfoImpl` class declaration. Use `/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs` to inspect how type parameters look like for C# for the newly added API since it is expected to be auto-generated there by the gen.sh(bat) script. Example: + +```diff ++ private CORINFO_METHOD_STRUCT_* getUnboxedEntry(CORINFO_METHOD_STRUCT_* ftn, ref bool requiresInstMethodTableArg) ++ { ++ // Hint for the developer: Use CorInfoImpl.RyuJit.cs and CorInfoImpl.ReadyToRun.cs if the implementation ++ // is not shared for NativeAOT and R2R. ++ throw new NotImplementedException(); ++ } +``` + +5. Open `/src/coreclr/vm/jitinterface.cpp` and add a dummy implementation at the file's end. Example: + +```diff ++CORINFO_METHOD_HANDLE CEEInfo::getUnboxedEntry( ++ CORINFO_METHOD_HANDLE ftn, ++ bool* requiresInstMethodTableArg) ++{ ++ CONTRACTL { ++ THROWS; ++ GC_TRIGGERS; ++ MODE_PREEMPTIVE; ++ } CONTRACTL_END; ++ ++ CORINFO_METHOD_HANDLE result = NULL; ++ ++ JIT_TO_EE_TRANSITION(); ++ ++ UNREACHABLE(); // To be implemented ++ ++ EE_TO_JIT_TRANSITION(); ++ ++ return result; ++} +``` + +6. Now implement the most complex part - SuperPMI. SuperPMI acts as a (de)serializer for JIT-VM queries in order +to then replay them without the actual VM to speed up jit-diffs and other scenarios. All parameters and return +values recorded/restored using special primitve types and helpers. We need to update the following files: + +* `/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h`: +* `/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h`: +* `/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h`: +* `/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp`: + +Go through each of them one by one. + +* `/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h`: +Define two `Agnostic_*` types for input arguments and another one for output parameters (return value, output arguments). + Do not create them if one of the generics ones can be re-used such as `DLD`, `DD`, `DLDL`, etc. Use `DWORD*` + like types for integers. Inspect the whole file to see how other APIs are defined. + +* `/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h`: +Add a new entry to the `LWM` list. Example: + +```diff ++LWM(GetUnboxedEntry, DWORDLONG, DLD); +``` + +NOTE: Use upper-case for the first letter of the API name here. +Add the new record after the very last LWM one. + +* `/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h`: +Define 3 methods in this header file inside `class MethodContext` class (at the end of its definition). + +The methods are prefixed with `rec*` (record), `dmp*` (dump to console) and `rep*` (replay). Example + +```diff ++ void recGetUnboxedEntry(CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg, CORINFO_METHOD_HANDLE result); ++ void dmpGetUnboxedEntry(DWORDLONG key, DLD value); ++ CORINFO_METHOD_HANDLE repGetUnboxedEntry(CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg); +``` +Now add a new element to `enum mcPackets` enum in the same file. Example: + +```diff ++ Packet_GetUnboxedEntry = , +``` + +* `/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp`: +Add the implementation of the 3 methods to `methodcontext.cpp` at the end of it. +Consider other similar methods in the file for reference. Do not change implementations of other methods in the file. Example: + +```diff ++void MethodContext::recGetUnboxedEntry(CORINFO_METHOD_HANDLE ftn, ++ bool* requiresInstMethodTableArg, ++ CORINFO_METHOD_HANDLE result) ++{ ++ // Initialize the "input - output" map if it is not already initialized ++ if (GetUnboxedEntry == nullptr) ++ { ++ GetUnboxedEntry = new LightWeightMap(); ++ } ++ ++ // Create a key out of the input arguments ++ DWORDLONG key = CastHandle(ftn); ++ DLD value; ++ value.A = CastHandle(result); ++ ++ // Create a value out of the return value and out parameters ++ if (requiresInstMethodTableArg != nullptr) ++ { ++ value.B = (DWORD)*requiresInstMethodTableArg ? 1 : 0; ++ } ++ else ++ { ++ value.B = 0; ++ } ++ ++ // Save it to the map ++ GetUnboxedEntry->Add(key, value); ++ DEBUG_REC(dmpGetUnboxedEntry(key, value)); ++} ++void MethodContext::dmpGetUnboxedEntry(DWORDLONG key, DLD value) ++{ ++ // Dump key and value to the console for debug purposes. ++ printf("GetUnboxedEntry ftn-%016" PRIX64 ", result-%016" PRIX64 ", requires-inst-%u", key, value.A, value.B); ++} ++CORINFO_METHOD_HANDLE MethodContext::repGetUnboxedEntry(CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg) ++{ ++ // Create a key out of the input arguments ++ DWORDLONG key = CastHandle(ftn); ++ ++ // Perform the lookup to obtain the value (output arguments and return value) ++ DLD value = LookupByKeyOrMiss(GetUnboxedEntry, key, ": key %016" PRIX64 "", key); ++ DEBUG_REP(dmpGetUnboxedEntry(key, value)); ++ ++ // propagate result to output arguments and return value (if exists) ++ if (requiresInstMethodTableArg != nullptr) ++ { ++ *requiresInstMethodTableArg = (value.B == 1); ++ } ++ return (CORINFO_METHOD_HANDLE)(value.A); ++} +``` + +7. Add a new function to `/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp` that calls the `rep*` method. Example: + +```diff ++CORINFO_METHOD_HANDLE MyICJI::getUnboxedEntry(CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg) ++{ ++ jitInstance->mc->cr->AddCall("getUnboxedEntry"); ++ CORINFO_METHOD_HANDLE result = jitInstance->mc->repGetUnboxedEntry(ftn, requiresInstMethodTableArg); ++ return result; ++} +``` + +8. Add a new function to `/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp` that calls the `rec*` method. Example: + +```diff ++CORINFO_METHOD_HANDLE interceptor_ICJI::getUnboxedEntry(CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg) ++{ ++ mc->cr->AddCall("getUnboxedEntry"); ++ bool localRequiresInstMethodTableArg = false; ++ CORINFO_METHOD_HANDLE result = original_ICorJitInfo->getUnboxedEntry(ftn, &localRequiresInstMethodTableArg); ++ mc->recGetUnboxedEntry(ftn, &localRequiresInstMethodTableArg, result); ++ if (requiresInstMethodTableArg != nullptr) ++ { ++ *requiresInstMethodTableArg = localRequiresInstMethodTableArg; ++ } ++ return result; ++} +``` + +#### 5 — Definition of Done (self-check list) + +* [ ] New API present in **all** layers. +* [ ] Each source file changed exactly once; no unrelated edits. The following files must be changed: + * `/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt` + * `/src/coreclr/inc/corinfo.h` + * `/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs` + * `/src/coreclr/vm/jitinterface.cpp` + * `/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h`: + * `/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h`: + * `/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h`: + * `/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp`: + * `/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h` [optional] + * `/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h` + * `/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp` + * `/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp` +* [ ] All TODO/UNREACHABLE markers remain for future functional implementation. diff --git a/.github/workflows/check-no-merge-label.yml b/.github/workflows/check-no-merge-label.yml index 55154410c2c0..d42d72a623db 100644 --- a/.github/workflows/check-no-merge-label.yml +++ b/.github/workflows/check-no-merge-label.yml @@ -5,7 +5,7 @@ permissions: on: pull_request_target: - types: [labeled, unlabeled] + types: [opened, edited, reopened, labeled, unlabeled, synchronize] branches: - 'main' - 'release/**' diff --git a/.github/workflows/check-service-labels.yml b/.github/workflows/check-service-labels.yml index 6cc9bfddd5c6..f2e800feea5b 100644 --- a/.github/workflows/check-service-labels.yml +++ b/.github/workflows/check-service-labels.yml @@ -5,7 +5,7 @@ permissions: on: pull_request_target: - types: [labeled, unlabeled] + types: [opened, edited, reopened, labeled, unlabeled, synchronize] branches: - 'release/**' diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml new file mode 100644 index 000000000000..91edd82f2a3f --- /dev/null +++ b/.github/workflows/copilot-setup-steps.yml @@ -0,0 +1,47 @@ +name: "Copilot Setup Steps" + +# Allow testing of the setup steps from your repository's "Actions" tab. +on: workflow_dispatch + +jobs: + # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot. + # See https://docs.github.com/en/copilot/customizing-copilot/customizing-the-development-environment-for-copilot-coding-agent + copilot-setup-steps: + runs-on: 8-core-ubuntu-latest + + permissions: + contents: read + + # You can define any steps you want, and they will run before the agent starts. + # If you do not check out your code, Copilot will do this for you. + steps: + - uses: actions/checkout@v4.2.2 + + - name: Install Dependencies + env: + LIBMSQUIC_VERSION: '2.4.8' + run: | + sudo ./eng/common/native/install-dependencies.sh && \ + sudo apt-get install -qq -y \ + curl \ + libbpf1 \ + libelf1t64 \ + libnl-3-200 \ + libnl-route-3-200 \ + libnuma1 \ + libxdp1 && \ + curl -LO "https://packages.microsoft.com/ubuntu/24.04/prod/pool/main/libm/libmsquic/libmsquic_${LIBMSQUIC_VERSION}_amd64.deb" && \ + sudo dpkg -i libmsquic* && \ + rm libmsquic* + + - name: Restore solution + run: ./build.sh --restore --excludecibinarylog --warnaserror false /p:BuildAllConfigurations=true /p:DotNetBuildAllRuntimePacks=true /p:DotNetBuildTests=true + + - name: Put dotnet on the path + run: echo "PATH=$PWD/.dotnet:$PATH" >> $GITHUB_ENV + + - name: Run dotnet info + run: dotnet --info + + - name: Build clr+libs + run: ./build.sh clr+libs -rc release diff --git a/.github/workflows/labeler-cache-retention.yml b/.github/workflows/labeler-cache-retention.yml new file mode 100644 index 000000000000..2c4013895af0 --- /dev/null +++ b/.github/workflows/labeler-cache-retention.yml @@ -0,0 +1,40 @@ +# Workflow template imported and updated from: +# https://github.com/dotnet/issue-labeler/wiki/Onboarding +# +# See labeler.md for more information +# +# Regularly restore the prediction models from cache to prevent cache eviction +name: "Labeler: Cache Retention" + +# For more information about GitHub's action cache limits and eviction policy, see: +# https://docs.github.com/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy + +on: + schedule: + - cron: "58 15 * * *" # 15:58 every day (arbitrary time daily) + + workflow_dispatch: + inputs: + cache_key: + description: "The cache key suffix to use for restoring the model from cache. Defaults to 'ACTIVE'." + required: true + default: "ACTIVE" + +env: + CACHE_KEY: ${{ inputs.cache_key || 'ACTIVE' }} + +jobs: + restore-cache: + # Do not automatically run the workflow on forks outside the 'dotnet' org + if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + type: ["issues", "pulls"] + steps: + - uses: dotnet/issue-labeler/restore@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: ${{ matrix.type }} + cache_key: ${{ env.CACHE_KEY }} + fail-on-cache-miss: true diff --git a/.github/workflows/labeler-predict-issues.yml b/.github/workflows/labeler-predict-issues.yml new file mode 100644 index 000000000000..a12efc1512af --- /dev/null +++ b/.github/workflows/labeler-predict-issues.yml @@ -0,0 +1,60 @@ +# Workflow template imported and updated from: +# https://github.com/dotnet/issue-labeler/wiki/Onboarding +# +# See labeler.md for more information +# +# Predict labels for Issues using a trained model +name: "Labeler: Predict (Issues)" + +on: + # Only automatically predict area labels when issues are first opened + issues: + types: opened + + # Allow dispatching the workflow via the Actions UI, specifying ranges of numbers + workflow_dispatch: + inputs: + issues: + description: "Issue Numbers (comma-separated list of ranges)." + required: true + cache_key: + description: "The cache key suffix to use for restoring the model. Defaults to 'ACTIVE'." + required: true + default: "ACTIVE" + +env: + # Do not allow failure for jobs triggered automatically (as this causes red noise on the workflows list) + ALLOW_FAILURE: ${{ github.event_name == 'workflow_dispatch' }} + + LABEL_PREFIX: "area-" + THRESHOLD: 0.40 + DEFAULT_LABEL: "needs-area-label" + +jobs: + predict-issue-label: + # Do not automatically run the workflow on forks outside the 'dotnet' org + if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }} + runs-on: ubuntu-latest + permissions: + issues: write + steps: + - name: "Restore issues model from cache" + id: restore-model + uses: dotnet/issue-labeler/restore@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: issues + fail-on-cache-miss: ${{ env.ALLOW_FAILURE }} + quiet: true + + - name: "Predict issue labels" + id: prediction + if: ${{ steps.restore-model.outputs.cache-hit == 'true' }} + uses: dotnet/issue-labeler/predict@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + issues: ${{ inputs.issues || github.event.issue.number }} + label_prefix: ${{ env.LABEL_PREFIX }} + threshold: ${{ env.THRESHOLD }} + default_label: ${{ env.DEFAULT_LABEL }} + env: + GITHUB_TOKEN: ${{ github.token }} + continue-on-error: ${{ !env.ALLOW_FAILURE }} diff --git a/.github/workflows/labeler-predict-pulls.yml b/.github/workflows/labeler-predict-pulls.yml new file mode 100644 index 000000000000..490c9b5a717e --- /dev/null +++ b/.github/workflows/labeler-predict-pulls.yml @@ -0,0 +1,74 @@ +# Workflow template imported and updated from: +# https://github.com/dotnet/issue-labeler/wiki/Onboarding +# +# See labeler.md for more information +# +# Predict labels for Pull Requests using a trained model +name: "Labeler: Predict (Pulls)" + +on: + # Per to the following documentation: + # https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#pull_request_target + # + # The `pull_request_target` event runs in the context of the base of the pull request, rather + # than in the context of the merge commit, as the `pull_request` event does. This prevents + # execution of unsafe code from the head of the pull request that could alter the repository + # or steal any secrets you use in your workflow. This event allows your workflow to do things + # like label or comment on pull requests from forks. + # + # Only automatically predict area labels when pull requests are first opened + pull_request_target: + types: opened + + # Configure the branches that need to have PRs labeled + branches: + - 'main' + - 'release/*' + + # Allow dispatching the workflow via the Actions UI, specifying ranges of numbers + workflow_dispatch: + inputs: + pulls: + description: "Pull Request Numbers (comma-separated list of ranges)." + required: true + cache_key: + description: "The cache key suffix to use for restoring the model. Defaults to 'ACTIVE'." + required: true + default: "ACTIVE" + +env: + # Do not allow failure for jobs triggered automatically (this can block PR merge) + ALLOW_FAILURE: ${{ github.event_name == 'workflow_dispatch' }} + + LABEL_PREFIX: "area-" + THRESHOLD: 0.40 + DEFAULT_LABEL: "needs-area-label" + +jobs: + predict-pull-label: + # Do not automatically run the workflow on forks outside the 'dotnet' org + if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }} + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - name: "Restore pulls model from cache" + id: restore-model + uses: dotnet/issue-labeler/restore@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: pulls + fail-on-cache-miss: ${{ env.ALLOW_FAILURE }} + quiet: true + + - name: "Predict pull labels" + id: prediction + if: ${{ steps.restore-model.outputs.cache-hit == 'true' }} + uses: dotnet/issue-labeler/predict@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + pulls: ${{ inputs.pulls || github.event.number }} + label_prefix: ${{ env.LABEL_PREFIX }} + threshold: ${{ env.THRESHOLD }} + default_label: ${{ env.DEFAULT_LABEL }} + env: + GITHUB_TOKEN: ${{ github.token }} + continue-on-error: ${{ !env.ALLOW_FAILURE }} diff --git a/.github/workflows/labeler-promote.yml b/.github/workflows/labeler-promote.yml new file mode 100644 index 000000000000..c01086c51779 --- /dev/null +++ b/.github/workflows/labeler-promote.yml @@ -0,0 +1,54 @@ +# Workflow template imported and updated from: +# https://github.com/dotnet/issue-labeler/wiki/Onboarding +# +# See labeler.md for more information +# +# Promote a model from staging to 'ACTIVE', backing up the currently 'ACTIVE' model +name: "Labeler: Promotion" + +on: + # Dispatched via the Actions UI, promotes the staged models from + # a staged slot into the prediction environment + workflow_dispatch: + inputs: + issues: + description: "Issues: Promote Model" + type: boolean + required: true + pulls: + description: "Pulls: Promote Model" + type: boolean + required: true + staged_key: + description: "The cache key suffix to use for promoting a staged model to 'ACTIVE'. Defaults to 'staged'." + required: true + default: "staged" + backup_key: + description: "The cache key suffix to use for backing up the currently active model. Defaults to 'backup'." + default: "backup" + +permissions: + actions: write + +jobs: + promote-issues: + if: ${{ inputs.issues }} + runs-on: ubuntu-latest + steps: + - name: "Promote Model for Issues" + uses: dotnet/issue-labeler/promote@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: "issues" + staged_key: ${{ inputs.staged_key }} + backup_key: ${{ inputs.backup_key }} + + promote-pulls: + if: ${{ inputs.pulls }} + runs-on: ubuntu-latest + steps: + - name: "Promote Model for Pull Requests" + uses: dotnet/issue-labeler/promote@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: "pulls" + staged_key: ${{ inputs.staged_key }} + backup_key: ${{ inputs.backup_key }} diff --git a/.github/workflows/labeler-train.yml b/.github/workflows/labeler-train.yml new file mode 100644 index 000000000000..79ad2db0cced --- /dev/null +++ b/.github/workflows/labeler-train.yml @@ -0,0 +1,161 @@ +# Workflow template imported and updated from: +# https://github.com/dotnet/issue-labeler/wiki/Onboarding +# +# See labeler.md for more information +# +# Train the Issues and Pull Requests models for label prediction +name: "Labeler: Training" + +on: + workflow_dispatch: + inputs: + type: + description: "Issues or Pull Requests" + type: choice + required: true + default: "Both" + options: + - "Both" + - "Issues" + - "Pull Requests" + + steps: + description: "Training Steps" + type: choice + required: true + default: "All" + options: + - "All" + - "Download Data" + - "Train Model" + - "Test Model" + + limit: + description: "Max number of items to download for training/testing the model (newest items are used). Defaults to the max number of pages times the page size." + type: number + page_size: + description: "Number of items per page in GitHub API requests. Defaults to 100 for issues, 25 for pull requests." + type: number + page_limit: + description: "Maximum number of pages to download for training/testing the model. Defaults to 1000 for issues, 4000 for pull requests." + type: number + cache_key_suffix: + description: "The cache key suffix to use for staged data/models (use 'ACTIVE' to bypass staging). Defaults to 'staged'." + required: true + default: "staged" + +env: + CACHE_KEY: ${{ inputs.cache_key_suffix }} + REPOSITORY: ${{ github.repository }} + LABEL_PREFIX: "area-" + THRESHOLD: "0.40" + LIMIT: ${{ inputs.limit }} + PAGE_SIZE: ${{ inputs.page_size }} + PAGE_LIMIT: ${{ inputs.page_limit }} + +jobs: + download-issues: + if: ${{ contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Download Data"]'), inputs.steps) }} + runs-on: ubuntu-latest + permissions: + issues: read + steps: + - name: "Download Issues" + uses: dotnet/issue-labeler/download@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: "issues" + cache_key: ${{ env.CACHE_KEY }} + repository: ${{ env.REPOSITORY }} + label_prefix: ${{ env.LABEL_PREFIX }} + limit: ${{ env.LIMIT }} + page_size: ${{ env.PAGE_SIZE }} + page_limit: ${{ env.PAGE_LIMIT }} + env: + GITHUB_TOKEN: ${{ github.token }} + + download-pulls: + if: ${{ contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Download Data"]'), inputs.steps) }} + runs-on: ubuntu-latest + permissions: + pull-requests: read + steps: + - name: "Download Pull Requests" + uses: dotnet/issue-labeler/download@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: "pulls" + cache_key: ${{ env.CACHE_KEY }} + repository: ${{ env.REPOSITORY }} + label_prefix: ${{ env.LABEL_PREFIX }} + limit: ${{ env.LIMIT }} + page_size: ${{ env.PAGE_SIZE }} + page_limit: ${{ env.PAGE_LIMIT }} + env: + GITHUB_TOKEN: ${{ github.token }} + + train-issues: + if: ${{ always() && contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Train Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.download-issues.result) }} + runs-on: ubuntu-latest + permissions: {} + needs: download-issues + steps: + - name: "Train Model for Issues" + uses: dotnet/issue-labeler/train@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: "issues" + data_cache_key: ${{ env.CACHE_KEY }} + model_cache_key: ${{ env.CACHE_KEY }} + + train-pulls: + if: ${{ always() && contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Train Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.download-pulls.result) }} + runs-on: ubuntu-latest + permissions: {} + needs: download-pulls + steps: + - name: "Train Model for Pull Requests" + uses: dotnet/issue-labeler/train@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: "pulls" + data_cache_key: ${{ env.CACHE_KEY }} + model_cache_key: ${{ env.CACHE_KEY }} + + test-issues: + if: ${{ always() && contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Test Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.train-issues.result) }} + runs-on: ubuntu-latest + permissions: + issues: read + needs: train-issues + steps: + - name: "Test Model for Issues" + uses: dotnet/issue-labeler/test@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: "issues" + cache_key: ${{ env.CACHE_KEY }} + repository: ${{ env.REPOSITORY }} + label_prefix: ${{ env.LABEL_PREFIX }} + threshold: ${{ env.THRESHOLD }} + limit: ${{ env.LIMIT }} + page_size: ${{ env.PAGE_SIZE }} + page_limit: ${{ env.PAGE_LIMIT }} + env: + GITHUB_TOKEN: ${{ github.token }} + + test-pulls: + if: ${{ always() && contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Test Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.train-pulls.result) }} + runs-on: ubuntu-latest + permissions: + pull-requests: read + needs: train-pulls + steps: + - name: "Test Model for Pull Requests" + uses: dotnet/issue-labeler/test@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0 + with: + type: "pulls" + cache_key: ${{ env.CACHE_KEY }} + repository: ${{ env.REPOSITORY }} + label_prefix: ${{ env.LABEL_PREFIX }} + threshold: ${{ env.THRESHOLD }} + limit: ${{ env.LIMIT }} + page_size: ${{ env.PAGE_SIZE }} + page_limit: ${{ env.PAGE_LIMIT }} + env: + GITHUB_TOKEN: ${{ github.token }} diff --git a/.github/workflows/labeler.md b/.github/workflows/labeler.md new file mode 100644 index 000000000000..502ebb98a8d0 --- /dev/null +++ b/.github/workflows/labeler.md @@ -0,0 +1,32 @@ +# Issue-Labeler Workflows + +This repository uses actions from [dotnet/issue-labeler](https://github.com/dotnet/issue-labeler) to predict area labels for issues and pull requests. + +The following workflow templates were imported and updated from [dotnet/issue-labeler/wiki/Onboarding](https://github.com/dotnet/issue-labeler/wiki/Onboarding): + +1. `labeler-cache-retention.yml` +2. `labeler-predict-issues.yml` +3. `labeler-predict-pulls.yml` +4. `labeler-promote.yml` +5. `labeler-train.yml` + +## Repository Configuration + +Across these workflows, the following changes were made to configure the issue labeler for this repository: + +1. Set `LABEL_PREFIX` to `"area-"`: + - `labeler-predict-issues.yml` + - `labeler-predict-pulls.yml` + - `labeler-train.yml` +2. Set `DEFAULT_LABEL` to `"needs-area-label"`: + - `labeler-predict-issues.yml` + - `labeler-predict-pulls.yml` +3. Remove the `EXCLUDED_AUTHORS` value as we do not bypass labeling for any authors' issues/pulls in this repository: + - `labeler-predict-issues.yml` + - `labeler-predict-pulls.yml` +4. Update the pull request labeling branches to include `main` and `release/*`: + - `labeler-predict-pulls.yml` +5. Remove the `repository` input for training the models against another repository: + - `labeler-train.yml` +6. Update the cache retention cron schedule to an arbitrary time of day: + - `labeler-cache-retention.yml` diff --git a/.gitignore b/.gitignore index 35f166e2c297..b587718b119a 100644 --- a/.gitignore +++ b/.gitignore @@ -364,5 +364,12 @@ src/coreclr/System.Private.CoreLib/common run-stress-* test:.cs +<<<<<<< HEAD # NativeAOT-LLVM: wasmjit-diff analysis results wasmjit-diff/ +======= +# XUnit +*.tempLog.xml +*.testResults.xml +*.testStats.csv +>>>>>>> upstream-jun diff --git a/Build.proj b/Build.proj index 2f687610b4a9..f0633d786301 100644 --- a/Build.proj +++ b/Build.proj @@ -1,6 +1,6 @@ - + @@ -15,4 +15,40 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/Directory.Build.props b/Directory.Build.props index 8f022a1fad44..61e78257e3a6 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -17,6 +17,7 @@ false +<<<<<<< HEAD browser @@ -56,12 +57,17 @@ x64 $(TargetArchitecture) +======= + + +>>>>>>> upstream-jun @@ -202,6 +205,7 @@ $(MonoCrossAOTTargetOS)+tvos+ios+maccatalyst +<<<<<<< HEAD false true @@ -306,20 +310,24 @@ true true +======= + +>>>>>>> upstream-jun $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'microsoft.netcore.app.ref')) $([MSBuild]::NormalizeDirectory('$(MicrosoftNetCoreAppRefPackDir)', 'ref', '$(NetCoreAppCurrent)')) $([MSBuild]::NormalizeDirectory('$(MicrosoftNetCoreAppRefPackDir)', 'data')) - $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'microsoft.netcore.app.runtime.$(OutputRID)', '$(LibrariesConfiguration)')) - $([MSBuild]::NormalizeDirectory('$(MicrosoftNetCoreAppRuntimePackDir)', 'runtimes', '$(OutputRID)')) + $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'microsoft.netcore.app.runtime.$(TargetRid)', '$(LibrariesConfiguration)')) + $([MSBuild]::NormalizeDirectory('$(MicrosoftNetCoreAppRuntimePackDir)', 'runtimes', '$(TargetRid)')) $([MSBuild]::NormalizeDirectory('$(MicrosoftNetCoreAppRuntimePackRidDir)', 'lib', '$(NetCoreAppCurrent)')) $([MSBuild]::NormalizeDirectory('$(MicrosoftNetCoreAppRuntimePackRidDir)', 'native')) - $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', '$(OutputRID).$(HostConfiguration)', 'corehost')) + $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', '$(TargetRid).$(HostConfiguration)', 'corehost')) + $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'mscordaccore_universal', '$(Configuration)', '$(NetCoreAppCurrent)', '$(TargetRid)', 'publish')) @@ -333,6 +341,8 @@ false false + + false true + true @@ -444,6 +454,7 @@ '$(MSBuildProjectExtension)' != '.pkgproj' and '$(UsingMicrosoftNoTargetsSdk)' != 'true' and '$(UsingMicrosoftTraversalSdk)' != 'true'">true + true @@ -505,12 +516,7 @@ $(RepositoryEngineeringDir)NoTargetsSdk.BeforeTargets.targets $(RepositoryEngineeringDir)TraversalSdk.AfterTargets.targets - - - - - $([MSBuild]::NormalizePath('$(ArtifactsBinDir)', 'Microsoft.NETCore.Platforms', 'runtime.json')) - $([MSBuild]::NormalizePath('$(LibrariesProjectRoot)', 'Microsoft.NETCore.Platforms', 'src', 'runtime.json')) + $(BeforeMicrosoftNETSdkTargets);$(RepositoryEngineeringDir)ILSdk.BeforeTargets.targets diff --git a/Directory.Build.targets b/Directory.Build.targets index fb19adafecf9..d1c90653897e 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -7,12 +7,25 @@ false + + + + + $([MSBuild]::NormalizePath('$(BootstrapRidGraphDir)', 'runtime.json')) + + $([MSBuild]::NormalizePath('$(LibrariesProjectRoot)', 'Microsoft.NETCore.Platforms', 'src', 'runtime.json')) + + + + + $([MSBuild]::NormalizePath('$(ArtifactsBinDir)', 'Microsoft.NETCore.Platforms', 'runtime.json')) + $([MSBuild]::NormalizePath('$(LibrariesProjectRoot)', 'Microsoft.NETCore.Platforms', 'src', 'runtime.json')) + + - - @@ -25,11 +38,11 @@ %(RuntimePackRuntimeIdentifiers);$(PackageRID) + Condition="'%(TargetFramework)' == '$(NetCoreAppCurrent)'">%(RuntimePackRuntimeIdentifiers);$(NETCoreSdkRuntimeIdentifier) %(Crossgen2RuntimeIdentifiers);$(PackageRID) + Condition="'%(TargetFramework)' == '$(NetCoreAppCurrent)'" >%(Crossgen2RuntimeIdentifiers);$(NETCoreSdkRuntimeIdentifier) @@ -56,7 +69,7 @@ - true + true true @@ -70,11 +83,6 @@ - - diff --git a/NuGet.config b/NuGet.config index b85394430658..240b4807a1aa 100644 --- a/NuGet.config +++ b/NuGet.config @@ -19,6 +19,8 @@ + + diff --git a/README.md b/README.md index 678e8eef11fe..b95366e17522 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,6 @@ Join the chat at the [CSharp discord](https://aka.ms/csharp-discord): `#allow-un [![Build Status](https://dev.azure.com/dnceng-public/public/_apis/build/status/dotnet/runtime/runtime?branchName=main)](https://dev.azure.com/dnceng-public/public/_build/latest?definitionId=129&branchName=main) [![Help Wanted](https://img.shields.io/github/issues/dotnet/runtime/help%20wanted?style=flat-square&color=%232EA043&label=help%20wanted)](https://github.com/dotnet/runtime/labels/help%20wanted) [![Good First Issue](https://img.shields.io/github/issues/dotnet/runtime/good%20first%20issue?style=flat-square&color=%232EA043&label=good%20first%20issue)](https://github.com/dotnet/runtime/labels/good%20first%20issue) -[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/dotnet/runtime) [![Discord](https://img.shields.io/discord/732297728826277939?style=flat-square&label=Discord&logo=discord&logoColor=white&color=7289DA)](https://aka.ms/dotnet-discord) * [What is .NET?](#what-is-net) diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT index 238235a96367..1e194f3b353a 100644 --- a/THIRD-PARTY-NOTICES.TXT +++ b/THIRD-PARTY-NOTICES.TXT @@ -90,6 +90,18 @@ freely, subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. +License notice for opentelemetry-dotnet +--------------------------------------- + +https://github.com/open-telemetry/opentelemetry-dotnet/blob/805dd6b4abfa18ef2706d04c30d0ed28dbc2955e/LICENSE.TXT#L1 + +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +Copyright The OpenTelemetry Authors + + License notice for LinuxTracepoints ----------------------------------- @@ -1393,3 +1405,14 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +License for National Institute of Standards and Technology ACVP Data +-------------------------------------------------------------------- +Available at https://github.com/usnistgov/ACVP-Server/blob/85f8742965b2691862079172982683757d8d91db/README.md#License + +NIST-developed software is provided by NIST as a public service. You may use, copy, and distribute copies of the software in any medium, provided that you keep intact this entire notice. You may improve, modify, and create derivative works of the software or any portion of the software, and you may copy and distribute such modifications or works. Modified works should carry a notice stating that you changed the software and should note the date and nature of any such change. Please explicitly acknowledge the National Institute of Standards and Technology as the source of the software. + +NIST-developed software is expressly provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT, OR ARISING BY OPERATION OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND DATA ACCURACY. NIST NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE SOFTWARE. + +You are solely responsible for determining the appropriateness of using and distributing the software and you assume all risks associated with its use, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and the unavailability or interruption of operation. This software is not intended to be used in any situation where a failure could cause risk of injury or damage to property. The software developed by NIST employees is not subject to copyright protection within the United States. + diff --git a/build.sh b/build.sh index 205b3326c0cb..b55abfcb4205 100755 --- a/build.sh +++ b/build.sh @@ -25,10 +25,10 @@ scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" if is_cygwin_or_mingw; then # if bash shell running on Windows (not WSL), - # pass control to powershell build script. - scriptroot=$(cygpath -d "$scriptroot") - powershell -c "$scriptroot\\build.cmd" $@ + # pass control to batch build script. + "$scriptroot/build.cmd" "$@" else +<<<<<<< HEAD if [[ "$*" == *"wasm"* && "$*" == *"-ci"* ]]; then # This is a bit of a workaround for the fact that the pipelines do not have a great # way of preserving the environment between scripts. Set by install-emscripten.ps1. @@ -37,4 +37,7 @@ else fi fi "$scriptroot/eng/build.sh" $@ +======= + "$scriptroot/eng/build.sh" "$@" +>>>>>>> upstream-jun fi diff --git a/docs/README.md b/docs/README.md index 1c396c4992a7..3d7f5a529ff0 100644 --- a/docs/README.md +++ b/docs/README.md @@ -61,7 +61,11 @@ Coding Guidelines Project Docs ================= -To be added. Visit the [project docs folder](project/) directly meanwhile. +- [Breaking change process](./project/breaking-change-process.md) +- [Copyright](./project/copyright.md) +- [Linux build methodology](./project/linux-build-methodology.md) +- [Onboarding Guide for New Operating System Versions](./project/os-onboarding.md) +- [Project docs folder](./project/) Other Information ================= diff --git a/docs/area-owners.md b/docs/area-owners.md index 7960fcaa9ef9..ce2dd4d59101 100644 --- a/docs/area-owners.md +++ b/docs/area-owners.md @@ -18,12 +18,12 @@ Note: Editing this file doesn't update the mapping used by `@dotnet-policy-servi | area-Codeflow | @dotnet/dnr-codeflow | @dotnet/dnr-codeflow | Used for automated PRs that ingest code from other repos | | area-Codegen-AOT-mono | @steveisok | @kotlarmilos | | | area-CodeGen-coreclr | @JulieLeeMSFT | @BruceForstall @dotnet/jit-contrib | | +| area-Codegen-Interpreter-coreclr | @vitek-karas | @BrzVlad @janvorli | | | area-Codegen-Interpreter-mono | @vitek-karas | @BrzVlad @kotlarmilos | | | area-Codegen-Intrinsics-mono | @steveisok | @fanyang-mono | | | area-Codegen-JIT-mono | @steveisok | | | | area-Codegen-LLVM-mono | @steveisok | | | | area-Codegen-meta-mono | @steveisok | | | -| area-CrossGen/NGEN-coreclr | @steveisok | @dotnet/crossgen-contrib | | | area-crossgen2-coreclr | @steveisok | @dotnet/crossgen-contrib | | | area-Debugger-mono | @tommcdon | @thaystg | | | area-DependencyModel | @ericstj | @dotnet/area-dependencymodel | Included:
  • Microsoft.Extensions.DependencyModel
| @@ -42,8 +42,8 @@ Note: Editing this file doesn't update the mapping used by `@dotnet-policy-servi | area-Extensions-Primitives | @ericstj | @dotnet/area-extensions-primitives | | | area-GC-coreclr | @mangod9 | @Maoni0 | | | area-GC-mono | @mangod9 | @mangod9 | @BrzVlad to consult | -| area-Host | @agocke | @jeffschwMSFT @vitek-karas @vsadov | Issues with dotnet.exe including bootstrapping, framework detection, hostfxr.dll and hostpolicy.dll | -| area-HostModel | @agocke | @vitek-karas | | +| area-Host | @agocke | @jeffschwMSFT @elinor-fung | Issues with dotnet.exe including bootstrapping, framework detection, hostfxr.dll and hostpolicy.dll | +| area-HostModel | @agocke | @elinor-fung | | | area-ILTools-coreclr | @JulieLeeMSFT | @BruceForstall @dotnet/jit-contrib | | | area-Infrastructure | @agocke | @jeffschwMSFT @MichaelSimons | | | area-Infrastructure-coreclr | @agocke | @jeffschwMSFT | | @@ -62,7 +62,7 @@ Note: Editing this file doesn't update the mapping used by `@dotnet-policy-servi | area-ReadyToRun-coreclr | @steveisok | @dotnet/area-type-system-and-startup | | | area-Serialization | @HongGit | @StephenMolloy @HongGit | Packages:
  • System.Runtime.Serialization.Xml
  • System.Runtime.Serialization.Json
  • System.Private.DataContractSerialization
  • System.Xml.XmlSerializer
Excluded:
  • System.Runtime.Serialization.Formatters
| | area-Setup | @MichaelSimons | @NikolaMilosavljevic | Distro-specific (Linux, Mac and Windows) setup packages and msi files | -| area-Single-File | @agocke | @vitek-karas @vsadov | | +| area-Single-File | @agocke | @elinor-fung @vsadov | | | area-Snap | @MichaelSimons | @NikolaMilosavljevic @leecow @MichaelSimons | | | area-System.Buffers | @jeffhandley | @dotnet/area-system-buffers | | | area-System.ClientModel | @terrajobst | @dotnet/fxdc | Bugs and feature requests should go to https://github.com/Azure/azure-sdk-for-net/issues. We don't own the code, but FXDC reviews changes to determine overlap with other `System` concepts. The Azure SDK team will post API updates in this repo for us to review. | @@ -133,7 +133,7 @@ Note: Editing this file doesn't update the mapping used by `@dotnet-policy-servi | area-System.Text.Encodings.Web | @ericstj | @dotnet/area-system-text-encodings-web | | | area-System.Text.Json | @jeffhandley | @dotnet/area-system-text-json | | | area-System.Text.RegularExpressions | @ericstj | @dotnet/area-system-text-regularexpressions | Consultants: @stephentoub | -| area-System.Threading | @mangod9 | @kouvel | | +| area-System.Threading | @mangod9 | @kouvel @vsadov | | | area-System.Threading.Channels | @jeffhandley | @dotnet/area-system-threading-channels | Consultants: @stephentoub | | area-System.Threading.RateLimiting | @rafikiassumani-msft | @BrennanConroy @halter73 | | | area-System.Threading.Tasks | @jeffhandley | @dotnet/area-system-threading-tasks | Consultants: @stephentoub | @@ -146,7 +146,7 @@ Note: Editing this file doesn't update the mapping used by `@dotnet-policy-servi | area-Tracing-mono | @tommcdon | @tommcdon @thaystg | | | area-TypeSystem-coreclr | @steveisok | @davidwrighton @MichalStrehovsky @janvorli @mangod9 @dotnet/area-type-system-and-startup | | | area-UWP | @tommcdon | @dotnet/area-uwp | UWP-specific issues including Microsoft.NETCore.UniversalWindowsPlatform and Microsoft.Net.UWPCoreRuntimeSdk | -| area-VM-coreclr | @mangod9 | @mangod9 | | +| area-VM-coreclr | @mangod9 | @mangod9 @vsadov | | | area-VM-meta-mono | @steveisok | @vitek-karas | | | area-VM-reflection-mono | @steveisok | @vitek-karas | MonoVM-specific reflection and reflection-emit issues | | area-VM-threading-mono | @mangod9 | @steveisok | | diff --git a/docs/coding-guidelines/breaking-change-rules.md b/docs/coding-guidelines/breaking-change-rules.md index 21fc3103106f..95e38c507c9b 100644 --- a/docs/coding-guidelines/breaking-change-rules.md +++ b/docs/coding-guidelines/breaking-change-rules.md @@ -238,6 +238,10 @@ successfully bind to that overload, if simply passing an `int` value. However, i * Adding a reference type field, a `ref` field, or a field involving a generic type parameter without the `unmanaged` constraint, to a value type that formerly had none of those field kinds. If the value type already contains at least one such field, adding another is non-breaking. This rule applies recursively to new fields that contain value types that may also introduce a new field kind. +* Adding `partial` modifier to an interface method + + Roslyn has [a spec violation](https://github.com/dotnet/roslyn/blob/6f6d64494dc75614f14ef1ac66dde3cc8d2d0092/docs/compilers/CSharp/Deviations%20from%20Standard.md#interface-partial-methods) that makes partial interface methods implicitly non-virtual. When you add the `partial` modifier to an interface method that was previously implicitly virtual, be sure to also include the `virtual` modifier to avoid a breaking change. + ### Signatures ✓ **Allowed** * Adding `params` to a parameter diff --git a/docs/coding-guidelines/coding-style.md b/docs/coding-guidelines/coding-style.md index 32dd1ec8bb65..58f8664206bf 100644 --- a/docs/coding-guidelines/coding-style.md +++ b/docs/coding-guidelines/coding-style.md @@ -19,7 +19,7 @@ The general rule we follow is "use Visual Studio defaults". Consider enabling "View White Space (Ctrl+R, Ctrl+W)" or "Edit -> Advanced -> View White Space" if using Visual Studio to aid detection. 9. If a file happens to differ in style from these guidelines (e.g. private members are named `m_member` rather than `_member`), the existing style in that file takes precedence. -10. We only use `var` when the type is explicitly named on the right-hand side, typically due to either `new` or an explicit cast, e.g. `var stream = new FileStream(...)` not `var stream = OpenStandardInput()`. +10. We only permit ( not enforce ) `var` when the type is explicitly named on the right-hand side, typically due to either `new` or an explicit cast, e.g. `var stream = new FileStream(...)` not `var stream = OpenStandardInput()`. - Similarly, target-typed `new()` can only be used when the type is explicitly named on the left-hand side, in a variable definition statement or a field definition statement. e.g. `FileStream stream = new(...);`, but not `stream = new(...);` (where the type was specified on a previous line). 11. We use language keywords instead of BCL types (e.g. `int, string, float` instead of `Int32, String, Single`, etc) for both type references as well as method calls (e.g. `int.Parse` instead of `Int32.Parse`). See issue [#13976](https://github.com/dotnet/runtime/issues/13976) for examples. 12. We use PascalCasing to name all our constant local variables and fields. The only exception is for interop code where the constant value should exactly match the name and value of the code you are calling via interop. diff --git a/docs/coding-guidelines/project-guidelines.md b/docs/coding-guidelines/project-guidelines.md index 35059779fa37..c6c49bf3e131 100644 --- a/docs/coding-guidelines/project-guidelines.md +++ b/docs/coding-guidelines/project-guidelines.md @@ -69,7 +69,7 @@ When building an individual project the `BuildTargetFramework` and `TargetOS` wi ## Supported full build settings - .NET Core latest on current OS (default) -> `$(NetCoreAppCurrent)-[RunningOS]` -- .NET Framework latest -> `net48` +- .NET Framework latest -> `net481` # Library project guidelines @@ -173,6 +173,24 @@ All src outputs are under `artifacts\bin\$(MSBuildProjectName)\$(TargetFramework)` +### XML Documentation Files +The `UseCompilerGeneratedDocXmlFile` property controls how XML documentation files are generated for a library project. XML documentation files are used for IntelliSense, API reference documentation, and code analysis. + +- When set to `true` (default), the compiler generates the XML documentation file based on XML comments in the source code. +- When set to `false`, the build system attempts to use a pre-built XML documentation file from the Microsoft.Private.Intellisense package. + +```xml + + false + +``` + +Setting `UseCompilerGeneratedDocXmlFile` to `false` is typically done for stable APIs where manually curated documentation exists that should be preferred over compiler-generated documentation. + +If a project sets this to `false` but the Microsoft.Private.Intellisense package doesn't have documentation for the assembly, a warning is shown suggesting to remove the property to let the compiler generate the file. + +The implementation of this property can be found in `eng/intellisense.targets`. + ## tests Similar to the src projects tests projects will define a `TargetFrameworks` property so they can list out the set of target frameworks they support. diff --git a/docs/coding-guidelines/source-generator-guidelines.md b/docs/coding-guidelines/source-generator-guidelines.md new file mode 100644 index 000000000000..84ae12c5b4de --- /dev/null +++ b/docs/coding-guidelines/source-generator-guidelines.md @@ -0,0 +1,31 @@ +Recommended reading to better understand source generators, +[Roslyn Source Generators Cookbook](https://github.com/dotnet/roslyn/blob/main/docs/features/source-generators.cookbook.md). + +[Project guidance](./project-guidelines.md#directory-layout) + +[Packaging guidance](./libraries-packaging.md#analyzers--source-generators) + +## Source Generator Best Practices + +### DOs + +- **DO** generate code that looks as if a developer would write it manually. +- **DO** emit strings rather than using the Roslyn Syntax API for better performance. +- **DO** use consistent indentation and formatting in generated code. +- **DO** generators should use the [`IIncrementalGenerator`](https://learn.microsoft.com/dotnet/api/microsoft.codeanalysis.iincrementalgenerator) interface. +- **DO** set `true` in the source generator project and then don't use any of the banned APIs it lists. +- **DO** set `true` in the source generator project to enable debugging support in Visual Studio. +- **DO** disable the following Roslyn warning, `RS2008`, in the source generator project (that is, `$(NoWarn);RS2008`). The reported issue is handled differently in the runtime repo. +- **DO** emit diagnostics from a separate [analyzer](https://learn.microsoft.com/visualstudio/extensibility/getting-started-with-roslyn-analyzers). The analyzer and source generator can be in the same assembly. +- **DO** cache intermediate results to avoid redundant computation. +- **DO** consider the impact on build time and optimize accordingly. +- **DO** have separate projects for testing the generator and testing the code generated by the generator. +- **DO** use the Roslyn Testing SDK to test the generator (and any corresponding analyzers). + +### DON'Ts + +- **DON'T** use the Roslyn Syntax API for emitting source code. +- **DON'T** perform expensive operations during the generation process unless absolutely necessary. +- **DON'T** emit code that introduces runtime dependencies not explicitly referenced by the project. +- **DON'T** emit code that would trigger compiler warnings in normal usage scenarios. +- **DON'T** emit diagnostics from the generator itself, emit them from an [analyzer](https://learn.microsoft.com/visualstudio/extensibility/getting-started-with-roslyn-analyzers). diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 0eed53cd415e..8d872db6bc18 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -96,6 +96,34 @@ There is no defined/enforced/declared ordering between the generic parameter and call(["this" pointer] [return buffer pointer] [generics context|varargs cookie] [userargs]*) ``` +## Async + +Async calling convention is additive to other calling conventions when supported. The set of scenarios is constrained to regular static/virtual calls and does not, for example, support PInvokes or varargs. At the minimum ordinary static calls, calls with `this` parameter or generic hidden parameters are supported. + +Async calling convention adds an extra `Continuation` parameter and an extra return, which sematically takes precedence when not `null`. A non-null `Continuation` upon return signals that the computation is not complete and the formal result is not ready. A non-null argument means that the function is resuming and should extract the state from the `Continuation` and continue execution (while ignoring all other arguments). + +The `Continuation` is a managed object and needs to be tracked accordingly. The GC info includes the continuation result as live at Async call sites. + +### Returning `Continuation` +To return `Continuation` we use a volatile/calee-trash register that cannot be used to return the actual result. + +| arch | `REG_ASYNC_CONTINUATION_RET` | +| ------------- | ------------- | +| x86 | ecx | +| x64 | rcx | +| arm | r2 | +| arm64 | x2 | +| risc-v | a2 | + +### Passing `Continuation` argument +The `Continuation` parameter is passed at the same position as generic instantiation parameter or immediately after, if both present. + +``` +call(["this" pointer] [return buffer pointer] [generics context] [continuation] [userargs]) // not x86 + +call(["this" pointer] [return buffer pointer] [userargs] [generics context] [continuation]) // x86 +``` + ## AMD64-only: by-value value types Just like native, AMD64 has implicit-byrefs. Any structure (value type in IL parlance) that is not 1, 2, 4, or 8 bytes in size (i.e., 3, 5, 6, 7, or >= 9 bytes in size) that is declared to be passed by value, is instead passed by reference. For JIT generated code, it follows the native ABI where the passed-in reference is a pointer to a compiler generated temp local on the stack. However, there are some cases within remoting or reflection where apparently stackalloc is too hard, and so they pass in pointers within the GC heap, thus the JITed code must report these implicit byref parameters as interior pointers (BYREFs in JIT parlance), in case the callee is one of these reflection paths. Similarly, all writes must use checked write barriers. @@ -114,7 +142,7 @@ ARM64-only: When a method returns a structure that is larger than 16 bytes the c ## Hidden parameters -*Stub dispatch* - when a virtual call uses a VSD stub, rather than back-patching the calling code (or disassembling it), the JIT must place the address of the stub used to load the call target, the "stub indirection cell", in (x86) `EAX` / (AMD64) `R11` / (AMD64 NativeAOT ABI) `R10` / (ARM) `R4` / (ARM NativeAOT ABI) `R12` / (ARM64) `R11`. In the JIT, this is encapsulated in the `VirtualStubParamInfo` class. +*Stub dispatch* - when a virtual call uses a VSD stub, rather than back-patching the calling code (or disassembling it), the JIT must place the address of the stub used to load the call target, the "stub indirection cell", in (x86) `EAX` / (AMD64) `R11` / (ARM) `R4` / (ARM NativeAOT ABI) `R12` / (ARM64) `R11`. In the JIT, this is encapsulated in the `VirtualStubParamInfo` class. *Calli Pinvoke* - The VM wants the address of the PInvoke in (AMD64) `R10` / (ARM) `R12` / (ARM64) `R14` (In the JIT: `REG_PINVOKE_TARGET_PARAM`), and the signature (the pinvoke cookie) in (AMD64) `R11` / (ARM) `R4` / (ARM64) `R15` (in the JIT: `REG_PINVOKE_COOKIE_PARAM`). @@ -190,27 +218,19 @@ This section describes the conventions the JIT needs to follow when generating c ## Funclets -For all platforms except Windows/x86 on CoreCLR, all managed EH handlers (finally, fault, filter, filter-handler, and catch) are extracted into their own 'funclets'. To the OS they are treated just like first class functions (separate PDATA and XDATA (`RUNTIME_FUNCTION` entry), etc.). The CLR currently treats them just like part of the parent function in many ways. The main function and all funclets must be allocated in a single code allocation (see hot cold splitting). They 'share' GC info. Only the main function prolog can be hot patched. +For all platforms, managed EH handlers (finally, fault, filter, filter-handler, and catch) are extracted into their own 'funclets'. To the OS they are treated just like first class functions (separate PDATA and XDATA (`RUNTIME_FUNCTION` entry), etc.). The CLR currently treats them just like part of the parent function in many ways. The main function and all funclets must be allocated in a single code allocation (see hot cold splitting). They 'share' GC info. Only the main function prolog can be hot patched. The only way to enter a handler funclet is via a call. In the case of an exception, the call is from the VM's EH subsystem as part of exception dispatch/unwind. In the non-exceptional case, this is called local unwind or a non-local exit. In C# this is accomplished by simply falling-through/out of a try body or an explicit goto. In IL this is always accomplished via a LEAVE opcode, within a try body, targeting an IL offset outside the try body. In such cases the call is from the JITed code of the parent function. -For Windows/x86 on CoreCLR, all handlers are generated within the method body, typically in lexical order. A nested try/catch is generated completely within the EH region in which it is nested. These handlers are essentially "in-line funclets", but they do not look like normal functions: they do not have a normal prolog or epilog, although they do have special entry/exit and register conventions. Also, nested handlers are not un-nested as for funclets: the code for a nested handler is generated within the handler in which it is nested. - -For Windows/x86 on NativeAOT and Linux/x86, funclets are used just like on other platforms. - ## Cloned finallys -JIT64 attempts to speed the normal control flow by 'inlining' a called finally along the 'normal' control flow (i.e., leaving a try body in a non-exceptional manner via C# fall-through). Because the VM semantics for non-rude Thread.Abort dictate that handlers will not be aborted, the JIT must mark these 'inlined' finally bodies. These show up as special entries at the end of the EH tables and are marked with `COR_ILEXCEPTION_CLAUSE_FINALLY | COR_ILEXCEPTION_CLAUSE_DUPLICATED`, and the try_start, try_end, and handler_start are all the same: the start of the cloned finally. - -RyuJit also implements finally cloning, for all supported architectures. However, the implementation does not yet handle the thread abort case; cloned finally bodies are not guaranteed to remain intact and are not reported to the runtime. Because of this, finally cloning is disabled for VMs that support thread abort (desktop clr). - -JIT32 does not implement finally cloning. +RyuJIT attempts to speed the normal control flow by 'inlining' a called finally along the 'normal' control flow (i.e., leaving a try body in a non-exceptional manner via C# fall-through). This optimization is supported on all architectures. ## Invoking Finallys/Non-local exits In order to have proper forward progress and `Thread.Abort` semantics, there are restrictions on where a call-to-finally can be, and what the call site must look like. The return address can **NOT** be in the corresponding try body (otherwise the VM would think the finally protects itself). The return address **MUST** be within any outer protected region (so exceptions from the finally body are properly handled). -JIT64, and RyuJIT for non-x86, creates something similar to a jump island: a block of code outside the try body that calls the finally and then branches to the final target of the leave/non-local-exit. This jump island is then marked in the EH tables as if it were a cloned finally. The cloned finally clause prevents a Thread.Abort from firing before entering the handler. By having the return address outside of the try body we satisfy the other constraint. +RyuJIT creates something similar to a jump island: a block of code outside the try body that calls the finally and then branches to the final target of the leave/non-local-exit. This jump island is then marked in the EH tables as if it were a cloned finally. The cloned finally clause prevents a Thread.Abort from firing before entering the handler. By having the return address outside of the try body we satisfy the other constraint. ## ThreadAbortException considerations @@ -322,33 +342,9 @@ Finally1: Note that JIT64 does not implement this properly. The C# compiler used to always insert all necessary "step" blocks. The Roslyn C# compiler at one point did not, but then was changed to once again insert them. -## The PSPSym and funclet parameters - -The *PSPSym* (which stands for Previous Stack Pointer Symbol) is a pointer-sized local variable used to access locals from the main function body. - -NativeAOT does not use PSPSym. For filter funclets the VM sets the frame register to be the same as the parent function. For second pass funclets the VM restores all non-volatile registers. The same convention is used across all platforms. - -CoreCLR uses PSPSym for all platforms except x86: the frame pointer on x86 is always preserved when the handlers are invoked. - -First, two definitions. - -*Caller-SP* is the value of the stack pointer in a function's caller before the call instruction is executed. That is, when function A calls function B, Caller-SP for B is the value of the stack pointer immediately before the call instruction in A (calling B) was executed. Note that this definition holds for both AMD64, which pushes the return value when a call instruction is executed, and for ARM, which doesn't. For AMD64, Caller-SP is the address above the call return address. - -*Initial-SP* is the initial value of the stack pointer after the fixed-size portion of the frame has been allocated. That is, before any "alloca"-type allocations. - -The value stored in PSPSym is the value of Initial-SP for AMD64 or Caller-SP for other platforms, for the main function. The stack offset of the PSPSym is reported to the VM in the GC information header. The value reported in the GC information is the offset of the PSPSym from Initial-SP for AMD64 or Caller-SP for other platforms. (Note that both the value stored, and the way the value is reported to the VM, differs between architectures. In particular, note that most things in the GC information header are reported as offsets relative to Caller-SP, but PSPSym on AMD64 is one exception, and maybe the only exception.) - -The VM uses the PSPSym to find other locals it cares about (such as the generics context in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that the frame pointer is the same value in a funclet as it is in the main function body. +## Funclet parameters -When a funclet is called, it is passed the *Establisher Frame Pointer*. For AMD64 this is true for all funclets and it is passed as the first argument in RCX, but for ARM and ARM64 this is only true for first pass funclets (currently just filters) and it is passed as the second argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent" frame in the exception processing system. For the CLR, it points either to the main function frame or a dynamically enclosing funclet frame from the same function, for the funclet being invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on x86, ARM, and ARM64. - -Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we don't know if the Establisher Frame is from the main function or a funclet, we design the main function and funclet frame layouts to place the PSPSym at an identical, small, constant offset from the Establisher Frame in each case. (This is also required because we only report a single offset to the PSPSym in the GC information, and that offset must be valid for the main function and all of its funclets). Then, the funclet uses this known offset to compute the PSPSym address and read its value. From this, it can compute the value of the frame pointer (which is a constant offset from the PSPSym value) and set the frame register to be the same as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular, for every nested funclet invocation. - -On ARM and ARM64, for all second pass funclets (finally, fault, catch, and filter-handler) the VM restores all non-volatile registers to their values within the parent frame. This includes the frame register (`R11`). Thus, the PSPSym is not used to recompute the frame pointer register in this case, though the PSPSym is copied to the funclet's frame, as for all funclets. - -Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument (`REG_EXCEPTION_OBJECT`). On AMD64 it is the second argument and thus passed in RDX. On ARM and ARM64 this is the first argument and passed in R0. - -(Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always pass the correct establisher frame to the funclet. Funclet may receive establisher frame of funclet when expecting that of original routine." It indicates this is the reason that a PSPSym is required in all funclets as well as the main function, whereas if the establisher frame was correctly reported, the PSPSym could be omitted in some cases.) +Catch, Filter, and Filter-handlers get an Exception object (GC ref) as an argument (`REG_EXCEPTION_OBJECT`). On AMD64 it is passed in RCX (Windows ABI) or RSI (Unix ABI). On ARM and ARM64 this is the first argument and passed in R0. ## Funclet Return Values @@ -374,111 +370,19 @@ Some definitions: When an exception occurs, the VM is invoked to do some processing. If the exception is within a "try" region, it eventually calls a corresponding handler (which also includes calling filters). The exception location within a function might be where a "throw" instruction executes, the point of a processor exception like null pointer dereference or divide by zero, or the point of a call where the callee threw an exception but did not catch it. -On AMD64, all register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters. +The VM sets the frame register to be the same as the parent function. This allows the funclets to access local variables using frame-relative addresses. -On ARM and ARM64, all registers are restored to their values at the exception point. +For filter funclets, all other register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters and the frame register. -On x86: TBD. +For other funclets, all non-volatile registers are restored to their values at the exception point. The JIT codegen [does not take advantage of it currently](https://github.com/dotnet/runtime/pull/114630#issuecomment-2810210759). ### Registers on return from a funclet When a funclet finishes execution, and the VM returns execution to the function (or an enclosing funclet, if there is EH clause nesting), the non-volatile registers are restored to the values they held at the exception point. Note that the volatile registers have been trashed. -Any register value changes made in the funclet are lost. If a funclet wants to make a variable change known to the main function (or the funclet that contains the "try" region), that variable change needs to be made to the shared main function stack frame. - -## Windows/x86 EH considerations - -The Windows/x86 model is somewhat different than non-Windows/x86 model. Windows/X86-specific concerns are mentioned here. - -### catch / filter-handler regions - -When leaving a `catch` or `filter-handler` region, the JIT calls the helper `CORINFO_JIT_ENDCATCH` (implemented in the VM by the `JIT_EndCatch` function) before transferring control to the target location. The code to call to `CORINFO_JIT_ENDCATCH` is within the catch region itself. - -### finally / fault regions - -"finally" clauses are invoked in the non-exceptional code by the generated JIT code, and in the exceptional case by the VM. "fault" clauses are only executed in exceptional cases by the VM. - -On entry to the finally or fault, the top of the stack is the address that should be jumped to on exit from the finally, using a "pop eax; jmp eax" sequence. A simple 'ret' could be used, but we avoid it to avoid potentially creating an unbalanced processor call/ret buffer stack, and messing up call/ret prediction. - -There are no register or other stack arguments to a 'finally' or 'fault'. - -### ShadowSP slots - -X86 exception handlers (e.g., catch, finally) do not establish their own frames. They don't (really) have prologs and epilogs. However, they do use the stack, and need to restore the stack pointer of the enclosing exception handling region when the handler completes executing. - -To implement this requirement, for any function with EH, we create a frame-local variable to store a stack of "Shadow SP" values, or ShadowSP slots. In the JIT, the local var is called lvaShadowSPslotsVar, and in dumps it is called "EHSlots". The variable is created in lvaMarkLocalVars() and is sized as follows: -1. 1 slot is reserved for the VM (for ICodeManager::FixContext(ppEndRegion)). -2. 1 slot for each handler nesting level (total: ehMaxHndNestingCount). -3. 1 slot for a filter (we do this even if there aren't any filters; size optimization opportunity to not do this if there are no filters?) -4. 1 slot for zero termination - -Note that the since a slot on x86 is 4 bytes, the minimum size is 16 bytes. The idea is to have 1 slot for each handler that could be possibly be invoked at the same time. For example, for: - -```cs - try { - ... - } catch { - try { - ... - } catch { - ... - } - } -``` - -When the inner 'catch' is running, the outer 'catch' is also conceptually "on the stack", or in the middle of execution. So the maximum handler nesting count would be 2. - -The ShadowSP slots are filled in from the highest address downwards to the lowest address. The highest slot is reserved. The first address with a zero is a zero terminator. So, we always zero terminate by setting the second-to-highest slot to zero in the function prolog (if we didn't zero initialize all locals anyway). - -When calling a finally, we set the appropriate level to 0xFC (aka "finally call") and zero terminate the next-lower address. +Any register value changes made in the funclet are lost. If a funclet wants to make a variable change known to the main function (or the funclet that contains the "try" region), that variable change needs to be made to the shared main function stack frame. This not a fundamental limitation. If necessary, the runtime can be updated to preserve non-volatile register changes made in funclets. -Thus, calling a finally from JIT generated code looks like: - -```asm - mov dword ptr [L_02+0x4 ebp-10H], 0 // This must happen before the 0xFC is written - mov dword ptr [L_02+0x8 ebp-0CH], 252 // 0xFC - push G_M52300_IG07 - jmp SHORT G_M52300_IG04 -``` - -In this case, `G_M52300_IG07` is not the address after the 'jmp', so a simple 'call' wouldn't work. - -The code this finally returns to looks like this: - -```asm - mov dword ptr [L_02+0x8 ebp-0CH], 0 - jmp SHORT G_M52300_IG05 -``` - -In this case, it zeros out the ShadowSP slot that it previously set to 0xFC, then jumps to the address that is the actual target of the leave from the finally. - -The JIT does this "end finally restore" by creating a GT_END_LFIN tree node, with the appropriate stack level as an operand, that generates this code. - -In the case of an exceptional 'finally' invocation, the VM sets up the 'return address' to whatever address it wants the JIT to return to. - -For catch handlers, the VM is completely in control of filling and reading the ShadowSP slots; the JIT just makes sure there is enough space. - -### ShadowSP slots frame location - -The ShadowSP slots are required to live in a very particular location, reported via the GC info header. Note that the GC info header does not contain an actual pointer or offset to the ShadowSP slots variable. Instead, the VM calculates the location from other data that does exist in the GC info header, as a negative offset from the EBP frame pointer (which must be established in functions with EH) using the function `GetFirstBaseSPslotPtr()` / `GetStartShadowSPSlotsOffset()`. The VM thus assumes the following frame layout: - -1. callee-saved registers <= EBP points to the top of this range -2. GS cookie -3. 1 slot if localloc is used (Saved localloc SP?) -4. 1 slot for CORINFO_GENERICS_CTXT_FROM_PARAMTYPEARG -- assumed for any function with EH, to avoid adding a flag to the GC info about whether it exists or not. -5. ShadowSP slots - -(note, these don't have to be in this order for this calculation, but they possibly do need to be in this order for other calculations.) See also `GetEndShadowSPSlotsOffset()`. - -The VM walks the ShadowSP slots in the function `GetHandlerFrameInfo()`, and sets it in various functions such as `EECodeManager::FixContext()`. - -### JIT implementation: finally - -An aside on the JIT implementation for x86. - -The JIT creates BBJ_CALLFINALLY/BBJ_ALWAYS pairs for calling the 'finally' clause. The BBJ_CALLFINALLY block will have a series of CORINFO_JIT_ENDCATCH calls appended at the end, if we need to "leave" a series of nested catches before calling the finally handler (due to a single 'leave' opcode attempting to leave multiple levels of different types of handlers). Then, a GT_END_LFIN statement with the finally clause handler nesting level as an argument is added to the step block where the finally returns to. This is used to generate code to zero out the appropriate level of the ShadowSP slot array after the finally has been executed. The BBJ_CALLFINALLY block itself generates the code to insert the 0xFC value into the ShadowSP slot array. If the 'finally' is invoked by the VM, in exceptional cases, then the VM itself updates the ShadowSP slot array before invoking the 'finally'. - -At the end of a finally or filter, a GT_RETFILT is inserted. For a finally, this is a TYP_VOID which is just a placeholder. For a filter, it takes an argument which evaluates to the return value from the filter. On legacy JIT, this tree triggers the generation of both the return value load (for filters) and the "funclet" exit sequence, which is either a "pop eax; jmp eax" for a finally, or a "ret" for a filter. When processing the BBJ_EHFINALLYRET or BBJ_EHFILTERRET block itself (at the end of code generation for the block), nothing is generated. In RyuJIT, the GT_RETFILT only loads up the return value (for filters) and does nothing for finally, and the block type processing after all the tree processing triggers the exit sequence to be generated. There is no real difference between these, except to centralize all "exit sequence" generation in the same place. +Funclets are not required to preserve non-volatile registers. # EH Info, GC Info, and Hot & Cold Splitting @@ -523,85 +427,6 @@ When the inner "throw new UserException4" is executed, the exception handling fi Filters are invoked in the 1st pass of EH processing and as such execution might resume back at the faulting address, or in the filter-handler, or someplace else. Because the VM must allow GC's to occur during and after a filter invocation, but before the EH subsystem knows where it will resume, we need to keep everything alive at both the faulting address **and** within the filter. This is accomplished by 3 means: (1) the VM's stackwalker and GCInfoDecoder report as live both the filter frame and its corresponding parent frame, (2) the JIT encodes all stack slots that are live within the filter as being pinned, and (3) the JIT reports as live (and possible zero-initializes) anything live-out of the filter. Because of (1) it is likely that a stack variable that is live within the filter and the try body will be double reported. During the mark phase of the GC double reporting is not a problem. The problem only arises if the object is relocated: if the same location is reported twice, the GC will try to relocate the address stored at that location twice. Thus we prevent the object from being relocated by pinning it, which leads us to why we must do (2). (3) is done so that after the filter returns, we can still safely incur a GC before executing the filter-handler or any outer handler within the same frame. For the same reason, control must exit a filter region via its final block (in other words, a filter region must terminate with the instruction that leaves the filter region, and the program may not exit the filter region via other paths). -## Duplicated Clauses - -Duplicated clauses are a special set of entries in the EH tables to assist the VM. Specifically, if handler 'A' is also protected by an outer EH clause 'B', then the JIT must emit a duplicated clause, a duplicate of 'B', that marks the whole handler 'A' (which is now lexically disjoint for the range of code for the corresponding try body 'A') as being protected by the handler for 'B'. - -Duplicated clauses are not needed for x86 and for NativeAOT ABI. - -During exception dispatch the VM uses these duplicated clauses to know when to skip any frames between the handler and its parent function. After skipping to the parent function, due to a duplicated clause, the VM searches for a regular/non-duplicate clause in the parent function. The order of duplicated clauses is important. They should appear after all of the main function clauses. They should still follow the normal sorting rules (inner-to-outer, top-to-bottom), but because the try-start/try-end will all be the same for a given handler, they should maintain the ordering, regarding inner-to-outer, as the corresponding original clause. - -Example: - -``` -A: try { -B: ... -C: try { -D: ... -E: try { -F: ... -G: } -H: catch { -I: ... -J: } -K: ... -L: } -M: finally { -N: ... -O: } -P: ... -Q: } -R: catch { -S: ... -T: } -``` - -In MSIL this would generate 3 EH clauses: - -``` -.try E-G catch H-J -.try C-L finally M-O -.try A-Q catch R-T -``` - -The native code would be laid out as follows (the order of the handlers is irrelevant except they are after the main method body) with their corresponding (fake) native offsets: - -``` -A: -> 1 -B: -> 2 -C: -> 3 -D: -> 4 -E: -> 5 -F: -> 6 -G: -> 7 -K: -> 8 -L: -> 9 -P: -> 10 -Q: -> 11 -H: -> 12 -I: -> 13 -J: -> 14 -M: -> 15 -N: -> 16 -O: -> 17 -R: -> 18 -S: -> 19 -T: -> 20 -``` - -The native EH clauses would be listed as follows: - -``` -1. .try 5-7 catch 12-14 (top-most & inner-most first) -2. .try 3-9 finally 15-17 (top-most & next inner-most) -3. .try 1-11 catch 18-20 (top-most & outer-most) -4. .try 12-14 finally 15-17 duplicated (inner-most because clause 2 is inside clause 3, top-most because handler H-J is first) -5. .try 12-14 catch 18-20 duplicated -6. .try 15-17 catch 18-20 -``` - -If the handlers were in a different order, then clause 6 might appear before clauses 4 and 5, but never in between. - ## Clauses covering the same try region Several consecutive clauses may cover the same `try` block. A clause covering the same region as the previous one is marked by the `COR_ILEXCEPTION_CLAUSE_SAMETRY` flag. When exception ex1 is thrown while running handler for another exception ex2 and the exception ex2 escapes the ex1's handler frame, this enables the runtime to skip clauses that cover the same `try` block as the clause that handled the ex1. @@ -696,12 +521,6 @@ x64 currently saves RBP, RSI and RDI while ARM64 saves just FP and LR. However, EnC remap is not supported inside funclets. The stack layout of funclets does not matter for EnC. -## Considerations with regards to PSPSym - -As explained previously in this document, on x64 we have Initial RSP == PSPSym. For EnC methods, as we disallow remappings after localloc (see below), we furthermore have RBP == PSPSym. -For ARM64 we have Caller SP == PSPSym and the FP points to the previously saved FP/LR pair. For EnC the JIT always sets up the stack frame so that the FP/LR pair is at Caller SP - 16 and does not save any additional callee saves. -These invariants allow the VM to compute new value of the frame pointer and PSPSym after the edit without any additional information. Note that the frame pointer and PSPSym do not change values or location on ARM64. However, EH may be added to a function in which case a new PSPSym needs to be materialized, even on ARM64. Location of PSPSym is found via GC info. - ## Localloc Localloc is allowed in EnC code, but remap is disallowed after the method has executed a localloc instruction. VM uses the invariants above (`RSP == RBP` on x64, `FP + 16 == SP + stack size` on ARM64) to detect whether localloc was executed by the method. @@ -812,7 +631,7 @@ Therefore it will expand all indirect calls via the validation helper and a manu ## CFG details for x64 On x64, `CORINFO_HELP_VALIDATE_INDIRECT_CALL` takes the call address in `rcx`. -In addition to the usual registers it also preserves all float registers and `rcx` and `r10`; furthermore, shadow stack space is not required to be allocated. +In addition to the usual registers it also preserves all float registers, `rcx`, and `r10`; furthermore, shadow stack space is not required to be allocated. `CORINFO_HELP_DISPATCH_INDIRECT_CALL` takes the call address in `rax` and it reserves the right to use and trash `r10` and `r11`. The JIT uses the dispatch helper on x64 whenever possible as it is expected that the code size benefits outweighs the less accurate branch prediction. @@ -842,4 +661,4 @@ MyStruct Test2() // We can use memset here return default; } -``` \ No newline at end of file +``` diff --git a/docs/design/coreclr/botr/corelib.md b/docs/design/coreclr/botr/corelib.md index 9662dc3f4e20..1dd2fc60d4bb 100644 --- a/docs/design/coreclr/botr/corelib.md +++ b/docs/design/coreclr/botr/corelib.md @@ -42,8 +42,6 @@ The CLR provides a [`mscorlib` binder](https://github.com/dotnet/runtime/blob/ma Two techniques exist for calling into the CLR from managed code. FCall allows you to call directly into the CLR code, and provides a lot of flexibility in terms of manipulating objects, though it is easy to cause GC holes by not tracking object references correctly. QCall also allows you to call into the CLR via the P/Invoke, but is much harder to accidentally mis-use. FCalls are identified in managed code as extern methods with the [`MethodImplOptions.InternalCall`](https://learn.microsoft.com/dotnet/api/system.runtime.compilerservices.methodimploptions) bit set. QCalls are marked `static extern` methods similar to regular P/Invokes, but are directed toward a library called `"QCall"`. -There is a small variant of FCall called HCall (for Helper call) for implementing JIT helpers. The HCall is intended for doing things like accessing multi-dimensional array elements, range checks, etc. The only difference between HCall and FCall is that HCall methods won't show up in an exception stack trace. - ### Choosing between FCall, QCall, P/Invoke, and writing in managed code First, remember that you should be writing as much as possible in managed code. You avoid a raft of potential GC hole issues, you get a better debugging experience, and the code is often simpler. @@ -54,7 +52,7 @@ If the only reason you're defining a FCall method is to call a native method, yo If you still need to implement a feature inside the runtime, consider if there is a way to reduce the frequency of transitioning to native code. Can you write the common case in managed and only call into native for some rare corner cases? You're usually best off keeping as much as possible in managed code. -QCalls are the preferred mechanism going forward. You should only use FCalls when you are "forced" to. This happens when there is common "short path" through the code that is important to optimize. This short path should not be more than a few hundred instructions, cannot allocate GC memory, take locks or throw exceptions (`GC_NOTRIGGER`, `NOTHROWS`). In all other circumstances (and especially when you enter a FCall and then simply erect HelperMethodFrame), you should be using QCall. +QCalls are the preferred mechanism going forward. You should only use FCalls when you are "forced" to. This happens when there is common "short path" through the code that is important to optimize. This short path should not be more than a few hundred instructions, cannot allocate GC memory, take locks or throw exceptions (`GC_NOTRIGGER`, `NOTHROWS`). In all other circumstances, you should be using QCall. FCalls were specifically designed for short paths of code that must be optimized. They allowed explicit control over when erecting a frame was done. However, it is error prone and not worth the complexity for many APIs. QCalls are essentially P/Invokes into the CLR. In the event the performance of an FCall is required consider creating a QCall and marking it with [`SuppressGCTransitionAttribute`](https://learn.microsoft.com/dotnet/api/system.runtime.interopservices.suppressgctransitionattribute). @@ -64,8 +62,6 @@ As a result, QCalls give you some advantageous marshaling for `SafeHandle`s auto QCalls are very much like a normal P/Invoke from CoreLib to CLR. Unlike FCalls, QCalls will marshal all arguments as unmanaged types like a normal P/Invoke. QCall also switch to preemptive GC mode like a normal P/Invoke. These two features should make QCalls easier to write reliably compared to FCalls. QCalls are not prone to GC holes and GC starvation bugs that are common with FCalls. -QCalls perform better than FCalls that erect a `HelperMethodFrame`. The overhead is about 1.4x less compared to FCall w/ `HelperMethodFrame` overhead on x86 and x64. - The preferred types for QCall arguments are primitive types that are efficiently handled by the P/Invoke marshaler (`INT32`, `LPCWSTR`, `BOOL`). Notice that `BOOL` is the correct boolean flavor for QCall arguments. On the other hand, `CLR_BOOL` is the correct boolean flavor for FCall arguments. The pointers to common unmanaged EE structures should be wrapped into handle types. This is to make the managed implementation type safe and avoid falling into unsafe C# everywhere. See AssemblyHandle in [vm\qcall.h][qcall] for an example. @@ -164,7 +160,7 @@ extern "C" BOOL QCALLTYPE Foo_BarInternal(int flags, LPCWSTR wszString, QCall::S ## FCall functional behavior -FCalls allow more flexibility in terms of passing object references around, but with higher code complexity and more opportunities to make mistakes. Additionally, FCall methods must either erect a helper method frame along their common code paths, or for any FCall of non-trivial length, explicitly poll for whether a garbage collection must occur. Failing to do so will lead to starvation issues if managed code repeatedly calls the FCall method in a tight loop, because FCalls execute while the thread only allows the GC to run in a cooperative manner. +FCalls allow more flexibility in terms of passing object references around, but with higher code complexity and more opportunities to make mistakes. Additionally, for any FCall of non-trivial length, explicitly poll for whether a garbage collection must occur. Failing to do so will lead to starvation issues if managed code repeatedly calls the FCall method in a tight loop, because FCalls execute while the thread only allows the GC to run in a cooperative manner. FCalls require a lot of boilerplate code, too much to describe here. Refer to [fcall.h][fcall] for details. @@ -176,8 +172,6 @@ A more complete discussion on GC holes can be found in the [CLR Code Guide](../. Object references passed as parameters to FCall methods are not GC-protected, meaning that if a GC occurs, those references will point to the old location in memory of an object, not the new location. For this reason, FCalls usually follow the discipline of accepting something like `StringObject*` as their parameter type, then explicitly converting that to a `STRINGREF` before doing operations that may trigger a GC. If you expect to use an object reference later, you must GC protect object references before triggering a GC. -All GC heap allocations within an FCall method must happen within a helper method frame. If you allocate memory on the GC heap, the GC may collect dead objects and move objects around in unpredictable ways, with some low probability. For this reason, you must manually report any object references in your method to the GC, so that if a garbage collection occurs, your object reference will be updated to refer to the new location in memory. Any pointers into managed objects (like arrays or Strings) within your code will not be updated automatically, and must be re-fetched after any operation that may allocate memory and before your first usage. Reporting a reference can be done via the `GCPROTECT_*` macros or as parameters when erecting a helper method frame. - Failing to properly report an `OBJECTREF` or to update an interior pointer is commonly referred to as a "GC hole", because the `OBJECTREF` class will do some validation that it points to a valid object every time you dereference it in Debug and Checked builds. When an `OBJECTREF` pointing to an invalid object is dereferenced, an assert will trigger saying something like "Detected an invalid object reference. Possible GC hole?". This assert is unfortunately easy to hit when writing "manually managed" code. Note that QCall's programming model is restrictive to sidestep GC holes by forcing you to pass in the address of an object reference on the stack. This guarantees that the object reference is GC protected by the JIT's reporting logic, and that the actual object reference will not move because it is not allocated in the GC heap. QCall is our recommended approach, precisely because it makes GC holes harder to write. @@ -188,8 +182,6 @@ The managed stack walker needs to be able to find its way from FCalls. It is rel Complex constructs like stack allocated objects with destructors or exception handling in the FCall implementation may confuse the epilog walker. This can lead to GC holes or crashes during stack walking. There is no comprehensive list of what constructs should be avoided to prevent this class of bugs. An FCall implementation that is fine one day may break with the next C++ compiler update. We depend on stress runs and code coverage to find bugs in this area. -Setting a breakpoint inside an FCall implementation may confuse the epilog walker. It leads to an "Invalid breakpoint in a helpermethod frame epilog" assert inside [vm\i386\gmsx86.cpp](https://github.com/dotnet/runtime/blob/main/src/coreclr/vm/i386/gmsx86.cpp). - ### FCall example – managed Here's a real-world example from the `String` class: @@ -218,29 +210,18 @@ The FCall entrypoint has to be registered in tables in [vm\ecalllist.h][ecalllis [ecalllist]: https://github.com/dotnet/runtime/blob/main/src/coreclr/vm/ecalllist.h -This method is an instance method in managed code, with the "this" parameter passed as the first argument. We use `StringObject*` as the argument type, then copy it into a `STRINGREF` so we get some error checking when we use it. +This example shows an FCall method that takes a managed object (`Object*`) as a raw pointer. These raw inputs are considered "unsafe" and must be validated or converted if they’re used in a GC-sensitive context. ```C++ -FCIMPL1(Object*, AppDomainNative::IsStringInterned, StringObject* pStringUNSAFE) +FCIMPL1(FC_BOOL_RET, ExceptionNative::IsImmutableAgileException, Object* pExceptionUNSAFE) { FCALL_CONTRACT; - STRINGREF refString = ObjectToSTRINGREF(pStringUNSAFE); - STRINGREF* prefRetVal = NULL; - - HELPER_METHOD_FRAME_BEGIN_RET_1(refString); - - if (refString == NULL) - COMPlusThrow(kArgumentNullException, W("ArgumentNull_String")); - - prefRetVal = GetAppDomain()->IsStringInterned(&refString); - - HELPER_METHOD_FRAME_END(); + ASSERT(pExceptionUNSAFE != NULL); - if (prefRetVal == NULL) - return NULL; + OBJECTREF pException = (OBJECTREF) pExceptionUNSAFE; - return OBJECTREFToObject(*prefRetVal); + FC_RETURN_BOOL(CLRException::IsPreallocatedExceptionObject(pException)); } FCIMPLEND ``` diff --git a/docs/design/coreclr/botr/exceptions.md b/docs/design/coreclr/botr/exceptions.md index 1fc3029df64c..835ca9f0e036 100644 --- a/docs/design/coreclr/botr/exceptions.md +++ b/docs/design/coreclr/botr/exceptions.md @@ -255,9 +255,9 @@ This is the "fcall", "jit helper", and so forth. The typical way that the runtim On the other hand, if an fcall function can do anything that might throw a CLR internal exception (one of the C++ exceptions), that exception must not be allowed to leak back out to managed code. To handle this case, the CLR has the UnwindAndContinueHandler (UACH), which is a set of code to catch the C++ EH exceptions, and re-raise them as managed exceptions. -Any runtime function that is called from managed code, and might throw a C++ EH exception, must wrap the throwing code in INSTALL_UNWIND_AND_CONTINUE_HANDLER / UNINSTALL_UNWIND_AND_CONTINUE_HANDLER. Installing a HELPER_METHOD_FRAME will automatically install the UACH. There is a non-trivial amount of overhead to installing a UACH, so they shouldn't be used everywhere. One technique that is used in performance critical code is to run without a UACH, and install one just before throwing an exception. +Any runtime function that is called from managed code, and might throw a C++ EH exception, must wrap the throwing code in INSTALL_UNWIND_AND_CONTINUE_HANDLER / UNINSTALL_UNWIND_AND_CONTINUE_HANDLER. There is a non-trivial amount of overhead to installing a UACH, so they shouldn't be used everywhere. One technique that is used in performance critical code is to run without a UACH, and install one just before throwing an exception. -When a C++ exception is thrown, and there is a missing UACH, the typical failure will be a Contract Violation of "GC_TRIGGERS called in a GC_NOTRIGGER region" in CPFH_RealFirstPassHandler. To fix these, look for managed to runtime transitions, and check for INSTALL_UNWIND_AND_CONTINUE_HANDLER or HELPER_METHOD_FRAME_BEGIN_XXX. +When a C++ exception is thrown, and there is a missing UACH, the typical failure will be a Contract Violation of "GC_TRIGGERS called in a GC_NOTRIGGER region" in CPFH_RealFirstPassHandler. To fix these, look for managed to runtime transitions, and check for INSTALL_UNWIND_AND_CONTINUE_HANDLER. Runtime code into managed code ------------------------------ diff --git a/docs/design/coreclr/botr/guide-for-porting.md b/docs/design/coreclr/botr/guide-for-porting.md index 106dc157061f..500d95394266 100644 --- a/docs/design/coreclr/botr/guide-for-porting.md +++ b/docs/design/coreclr/botr/guide-for-porting.md @@ -340,27 +340,22 @@ Here is an annotated list of the stubs implemented for Unix on Arm64. calls. Necessary for all applications as this is how the main method is called. - 2. `LazyMachStateCaptureState`/`HelperMethodFrameRestoreState` – Needed to - support a GC occurring with an FCALL or HCALL on the stack. (Incorrect - implementations will cause unpredictable crashes during or after garbage - collection) - - 3. `NDirectImportThunk` – Needed to support saving off a set of arguments to + 2. `NDirectImportThunk` – Needed to support saving off a set of arguments to a p/invoke so that the runtime can find the actual target. Also uses one of the secret arguments (Used by all p/invoke methods) - 4. `PrecodeFixupThunk` – Needed to convert the secret argument from a + 3. `PrecodeFixupThunk` – Needed to convert the secret argument from a FixupPrecode\* to a MethodDesc\*. This function exists to reduce the code size of FixupPrecodes as there are (Used by many managed methods) - 5. `ThePreStub` - Needed to support saving off a set of arguments to the + 4. `ThePreStub` - Needed to support saving off a set of arguments to the stack so that the runtime can find or jit the right target method. (Needed for any jitted method to execute Used by all managed methods) - 6. `ThePreStubPatch` – Exists to provide a reliable spot for the managed + 5. `ThePreStubPatch` – Exists to provide a reliable spot for the managed debugger to put a breakpoint. - 7. GC Write Barriers – These are used to provide the GC with information + 6. GC Write Barriers – These are used to provide the GC with information about what memory is being updated. The existing implementations of these are all complex, and there are a number of controls where the runtime can adjust to tweak the behavior of the barrier in various ways. @@ -373,42 +368,40 @@ Here is an annotated list of the stubs implemented for Unix on Arm64. FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP can be implemented as performance needs require. - 8. `ComCallPreStub`/ `COMToCLRDispatchHelper` /`GenericComCallStub` - not + 7. `ComCallPreStub`/ `COMToCLRDispatchHelper` /`GenericComCallStub` - not necessary for non-Windows platforms at this time - 9. `TheUMEntryPrestub`/ `UMThunkStub` - used to enter the runtime from + 8. `TheUMEntryPrestub`/ `UMThunkStub` - used to enter the runtime from non-managed code through entrypoints generated from the Marshal.GetFunctionPointerForDelegate api. - 10. `OnHijackTripThread` - needed for thread suspension to support GC + other + 9. `OnHijackTripThread` - needed for thread suspension to support GC + other suspension requiring events. This is typically not needed for very early stage bringup of the product, but will be needed for any decent size application - 11. `CallEHFunclet` – Used to call catch, finally and fault funclets. Behavior - is specific to exactly how funclets are implemented. Only used if - USE_FUNCLET_CALL_HELPER is set + 10. `CallEHFunclet` – Used to call catch, finally and fault funclets. Behavior + is specific to exactly how funclets are implemented. - 12. `CallEHFilterFunclet` – Used to call filter funclets. Behavior is specific - to exactly how funclets are implemented. Only used if - USE_FUNCLET_CALL_HELPER is set + 11. `CallEHFilterFunclet` – Used to call filter funclets. Behavior is specific + to exactly how funclets are implemented. - 13. `ResolveWorkerChainLookupAsmStub`/ `ResolveWorkerAsmStub` Used for virtual + 12. `ResolveWorkerChainLookupAsmStub`/ `ResolveWorkerAsmStub` Used for virtual stub dispatch (virtual call support for interface, and some virtual methods). These work in tandem with the logic in virtualcallstubcpu.h to implement the logic described in [Virtual Stub Dispatch](virtual-stub-dispatch.md) - 14. `ProfileEnter`/ `ProfileLeave`/ `ProfileTailcall` – Used to call function + 13. `ProfileEnter`/ `ProfileLeave`/ `ProfileTailcall` – Used to call function entry/exit profile functions acquired through the ICorProfiler interface. Used in VERY rare circumstances. It is reasonable to wait to implement these until the final stages of productization. Most profilers do not use this functionality. - 15. `JIT_PInvokeBegin`/`JIT_PInvokeEnd` – Leave/enter the managed runtime state. Necessary + 14. `JIT_PInvokeBegin`/`JIT_PInvokeEnd` – Leave/enter the managed runtime state. Necessary for ReadyToRun pre-compiled pinvoke calls, so that they do not cause GC starvation - 16. `VarargPInvokeStub`/ `GenericPInvokeCalliHelper` Used to support calli + 15. `VarargPInvokeStub`/ `GenericPInvokeCalliHelper` Used to support calli pinvokes. It is expected that C\# 8.0 will increase use of this feature. Today use of this feature on Unix requires hand-written IL. On Windows this feature is commonly used by C++/CLI diff --git a/docs/design/coreclr/botr/readytorun-format.md b/docs/design/coreclr/botr/readytorun-format.md index a9a5c8b91630..82cade6222ce 100644 --- a/docs/design/coreclr/botr/readytorun-format.md +++ b/docs/design/coreclr/botr/readytorun-format.md @@ -814,7 +814,7 @@ enum ReadyToRunHelper READYTORUN_HELPER_GetString = 0x50, // Used by /Tuning for Profile optimizations - READYTORUN_HELPER_LogMethodEnter = 0x51, + READYTORUN_HELPER_LogMethodEnter = 0x51, // Unused since READYTORUN_MAJOR_VERSION 10.0 // Reflection helpers READYTORUN_HELPER_GetRuntimeTypeHandle = 0x54, @@ -870,12 +870,14 @@ enum ReadyToRunHelper READYTORUN_HELPER_Dbl2UIntOvf = 0xD5, READYTORUN_HELPER_Dbl2ULng = 0xD6, READYTORUN_HELPER_Dbl2ULngOvf = 0xD7, + READYTORUN_HELPER_Lng2Flt = 0xD8, + READYTORUN_HELPER_ULng2Flt = 0xD9, // Floating point ops READYTORUN_HELPER_DblRem = 0xE0, READYTORUN_HELPER_FltRem = 0xE1, - READYTORUN_HELPER_DblRound = 0xE2, - READYTORUN_HELPER_FltRound = 0xE3, + READYTORUN_HELPER_DblRound = 0xE2, // Unused since READYTORUN_MAJOR_VERSION 10.0 + READYTORUN_HELPER_FltRound = 0xE3, // Unused since READYTORUN_MAJOR_VERSION 10.0 #ifndef _TARGET_X86_ // Personality routines diff --git a/docs/design/coreclr/jit/DeabstractionAndConditionalEscapeAnalysis.md b/docs/design/coreclr/jit/DeabstractionAndConditionalEscapeAnalysis.md index 9b75f5a8c982..7e953478e543 100644 --- a/docs/design/coreclr/jit/DeabstractionAndConditionalEscapeAnalysis.md +++ b/docs/design/coreclr/jit/DeabstractionAndConditionalEscapeAnalysis.md @@ -5,7 +5,7 @@ There are interesting, important, and optimizable patterns where objects of seve For example, consider the abstract enumeration supported by `IEnumerable`. Here an enumerable `o` of some type (say `O`) can produce an enumerator `e` of some type (say `E`) that then operates on `o`. The typical pattern is: ```C# O o = ... -foreach(T t in o) { ... t } +foreach (T t in o) { ... t } ``` Under the covers, this requires creation of (or access to) a ref class or boxed value class `e` (Footnote 1). @@ -118,14 +118,14 @@ Based on this PGO data, the JIT first translates the above into something like t IEnumerator tt = null; if (o.GetType().Equals(typeof(int[]))) { - auto a = (int[]) o; + var a = (int[]) o; tt = a.GetEnumerator(); } else { - tt = o.GetEnumerator; + tt = o.GetEnumerator(); } - e = tt; + var e = tt; // -------------- End GDV "diamond" try @@ -134,7 +134,7 @@ Based on this PGO data, the JIT first translates the above into something like t bool b0 = false; if (e.GetType().Equals(typeof(SZGenericArrayEnumerator))) { - auto ea = (SZGenericArrayEnumerator) e; + var ea = (SZGenericArrayEnumerator) e; b0 = ea.MoveNext(); } else @@ -334,21 +334,21 @@ The eventual set of surviving allocations are then transformed to be new struct- IEnumerable t = null; if (o.Type == O) { - ac = new E(); + var ac = new E(); t = ac; } else { t = o.GetEnumerator(); } - e = t; + var e = t; if (e.Type == E) { // inlined e.MoveNext - ea = (E) e; + var ea = (E) e; // enumerator ref may be copied to other locals - tt = ea; + var tt = ea; tt.field--; } else diff --git a/docs/design/coreclr/jit/GC-write-barriers.md b/docs/design/coreclr/jit/GC-write-barriers.md new file mode 100644 index 000000000000..c0b58e572d5d --- /dev/null +++ b/docs/design/coreclr/jit/GC-write-barriers.md @@ -0,0 +1,101 @@ +# GC write barriers + +The GC write barrier function (JIT_WriteBarrier) is generally the hottest function in CoreCLR and is written in assembly. The full pseudo code for the function is as follows: + + +```` +JIT_WriteBarrier(Object **dst, Object *ref) + Set *dst = ref + + // Shadow Heap update + ifdef WRITE_BARRIER_CHECK: // Only set in DEBUG mode + if g_GCShadow != 0: + long *shadow_dst = g_GCShadow + (dst - g_lowest_address) + // Check shadow heap location is within shadow heap + if shadow_dst < g_GCShadowEnd: + *shadow_dst = ref + atomic: wait for stores to complete + if *dst != ref: + *shadow_dst = INVALIDGCVALUE + + // Update the write watch table, if it's in use + ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP: + if g_sw_ww_table != 0: + char *ww_table_dst = g_sw_ww_table + (dst>>11) + if *ww_table_dst != 0: + *ww_table_dst = 0xff + + // Return if the reference is not in ephemeral generations + if ref < g_ephemeral_low || ref >= g_ephemeral_high: + return + + // Region Checks + if g_region_to_generation_table != 0: + + // Calculate region generations + char reg_loc_dst = *((dst >> g_region_shr) + g_region_to_generation_table) + char reg_loc_ref = *((ref >> g_region_shr) + g_region_to_generation_table) + + // Return if the region we're storing into is Gen 0 + if reg_loc_dst == 0: + return + + // Return if the new reference is not from old to young + if reg_loc_ref >= reg_loc_dst: + return + + // Bitwise write barriers only + if g_region_use_bitwise_write_barrier: + + char *card_table_dst = (dst >> 11) + g_card_table + char dst_bit = 1 << (dst >> 8 && 7) + + // Check if we need to update the card table + if *card_table_dst & dst_bit == 0: + return + + // Atomically update the card table + lock: *card_table_dst |= dst_bit + + goto CardBundle + + // Check if we need to update the card table + char *card_table_dst = (dst >> 11) + g_card_table + if *card_table_dst == 0xff: + return + + // Update the card table + *card_table_dst = 0xff + +CardBundle: + + // Mark the card bundle table as dirty + Ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES: + char card_bundle_dst = (dst >> 21) + g_card_bundle_table + if *card_bundle_dst != 0xff: + *card_bundle_dst = 0xff + +```` + +The Checked Write Barrier has additional checks: + +```` +JIT_CheckedWriteBarrier(Object **dst, Object *ref) + + // Return if the destination is not on the heap + if ref < g_lowest_address || ref >= g_highest_address: + return + + return JIT_WriteBarrier(dst, ref) +```` + +## WriteBarrierManager + +On AMD64 and Arm64, there several different implementations of the write barrier function. Each version is a subset of the `JIT_WriteBarrier` above, assuming different state, meaning most `if` checks can be skipped. The actual write barrier that is called is a copy of one of these implementations. + +The WriteBarrierManager keeps track of which implementation is currently being used. As internal state changes, the WriteBarrierManager updates the copy to the correct implementation. In practice, most of the internal state is fixed on startup, with only changes to/from use of write watch barriers changing during runtime. + +`WRITE_BARRIER_CHECK` is only set in `DEBUG` mode. On Arm64 `WRITE_BARRIER_CHECK` checks exist at the top of each version of the function when `DEBUG` mode is enabled. On `Amd64` these checks do not exist. Instead, a special `JIT_WriteBarrier_Debug` version of the function exists, which contains most of the functionality of `JIT_WriteBarrier` pseudo code and is used exclusively when `DEBUG` mode is enabled. + +On Arm64, `g_region_use_bitwise_write_barrier` is only set if LSE atomics are present on the hardware, as only LSE provides a single instruction to atomically update a byte via a bitwise OR. + diff --git a/docs/design/coreclr/jit/Stack Buffer Overflow Protection.md b/docs/design/coreclr/jit/Stack Buffer Overflow Protection.md new file mode 100644 index 000000000000..99571b701009 --- /dev/null +++ b/docs/design/coreclr/jit/Stack Buffer Overflow Protection.md @@ -0,0 +1,67 @@ +# Stack Buffer Overflow Protection + +This document describes mechanisms in the .NET code generator to guard against stack buffer overflows at runtime. + +## Background + +While .NET is primarily a type and memory safe "managed" programming platform, it also offers lower-level +facilities to allow for interop with native code, as well as some constructs that cannot be proven safe. + +Use of these potentially "unsafe" constructs can threaten the integrity of the .NET runtime stack, enabling modification +of key information on a stack frame, like the values of code and data addresses. + +The .NET code generator includes stack buffer overflow protection (aka Guard Stack or "GS") so that the integrity of the stack +can be checked at key points during program execution—walking the stack for EH or GC, or returning from methods. + +Stack buffer overflow protection is one part of a more comprehensive set of +[.NET runtime security mitigations](https://github.com/dotnet/designs/blob/main/accepted/2021/runtime-security-mitigations.md). + +## How GS Works + +GS is intended to detect buffer overruns from unsafe on-stack buffers that might corrupt vulnerable data on the stack. + +Unsafe buffers include: +* memory regions allocated dynamically on the stack, via `stackalloc` in C# (aka `localloc`, in IL) +* value classes marked as unsafe by language compilers, via `System.Runtime.CompilerServices.UnsafeValueTypeAttribute`. +For instance, C# [fixed-sized buffers](https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/unsafe-code#fixed-size-buffers). + +Vulnerable data on the stack frame generally includes addresses of code and data. + +GS protects this data in two ways: +* When possible, vulnerable data is moved lower on the stack frame, below unsafe buffers. +* For data that cannot be relocated (like the return address), a "stack cookie" (aka "stack canary") is allocated between +the unsafe buffers and the un-relocatable vulnerable data. This cookie value varies from run to run and its value +is verified before method exit and on stack walks done by the runtime for EH and GC. + +The net effect is that the stack layout for methods with unsafe buffers is as follows (note stacks grow down, +so any caller frames would be above and any callee frames below) + +| Stack Frame | +| --------- | +| memory arguments | +| return address | +| saved frame pointer | +| callee save area | +| stack cookie | +| fixed-sized unsafe buffers (without pointers) | +| fixed-sized unsafe buffers (with pointers) | +| shadow copies of vulnerable memory arguments | +| local variables | +| dynamically allocated buffers (localloc) | +| outgoing arguments | +| (stack pointer points here) | + +Vulnerable memory arguments are relocated to a shadow copy region below the unsafe fixed buffers. Within the fixed-sized +buffer region, buffers are ordered so that buffers containing pointers are at lower addresses than buffers without pointers. + +A buffer overrun that can corrupt vulnerable data will likely also corrupt the stack cookie. The cookie value is verified +before the method returns (and also when the runtime triggers stack walks). + +In addition, the return address may also be protected by hardware mechanisms like +[Control-flow Enforcement Technology (CET)](https://github.com/dotnet/runtime/blob/main/docs/design/features/cet-feature.md), +when these facilities are available on the host machine. + +## GS Check Failures + +A cookie verification failure leads to an immediate, uncatchable process exit (`FailFast`) since the integrity +of the process is in question. \ No newline at end of file diff --git a/docs/design/coreclr/jit/porting-ryujit.md b/docs/design/coreclr/jit/porting-ryujit.md index b1ef7b4e114f..a2de2c17df92 100644 --- a/docs/design/coreclr/jit/porting-ryujit.md +++ b/docs/design/coreclr/jit/porting-ryujit.md @@ -23,13 +23,18 @@ The following components need to be updated, or target-specific versions created * The basics * target.h * Instruction set architecture: - * registerXXX.h - * emitXXX.h, emitfmtXXX.h - * instrsXXX.h, emitXXX.cpp and targetXXX.cpp - * lowerXXX.cpp - * lsraXXX.cpp - * codegenXXX.cpp and simdcodegenXXX.cpp - * unwindXXX.cpp + * registerXXX.h - defines all registers used by the architecture and any aliases for them + * emitXXX.h - defines signatures for public instruction emission methods (e.g. "emit an instruction which takes a single integer argument") and private architecture-specific helpers + * emitXXX.cpp - implementation for emitXXX.h + * emitfmtXXX.h - optionally defines validity rules for how instructions should be formatted (e.g. RISC-V has no rules defined) + * instrsXXX.h - defines per-architecture instructions in assembly + * targetXXX.h - defines architectural constraints used elsewhere, such as "bitmask for all integer registers where callee is saved" or "size in bytes of a floating point register" + * targetXXX.cpp - implements ABI classifier for this architecture + * lowerXXX.cpp - implements [Lowering](https://github.com/dotnet/runtime/blob/main/docs/design/coreclr/jit/ryujit-overview.md#lowering) for this architecture + * lsraXXX.cpp - implements register requirement setting based on [GenTree Nodes](https://github.com/dotnet/runtime/blob/main/docs/design/coreclr/jit/ryujit-overview.md#gentree-nodes) + * codegenXXX.cpp - implements main codegen for this architecture (i.e. generating per-architecture instructions based on [GenTree Nodes](https://github.com/dotnet/runtime/blob/main/docs/design/coreclr/jit/ryujit-overview.md#gentree-nodes)) + * hwintrinsic\*XXX.\* and simdashwintrinsic\*XXX.h - defines and implements hardware intrinsic features, e.g. vector instructions + * unwindXXX.cpp - implements public unwinding API and unwind info dumping for debug use * Calling Convention and ABI: all over the place * 32 vs. 64 bits * Also all over the place. Some pointer size-specific data is centralized in target.h, but probably not 100%. diff --git a/docs/design/datacontracts/ExecutionManager.md b/docs/design/datacontracts/ExecutionManager.md index 8504456e1d74..9075ee2548b6 100644 --- a/docs/design/datacontracts/ExecutionManager.md +++ b/docs/design/datacontracts/ExecutionManager.md @@ -23,6 +23,10 @@ struct CodeBlockHandle TargetPointer GetMethodDesc(CodeBlockHandle codeInfoHandle); // Get the instruction pointer address of the start of the code block TargetCodePointer GetStartAddress(CodeBlockHandle codeInfoHandle); + // Gets the unwind info of the code block at the specified code pointer + TargetPointer GetUnwindInfo(CodeBlockHandle codeInfoHandle, TargetCodePointer ip); + // Gets the base address the UnwindInfo of codeInfoHandle is relative to. + TargetPointer GetUnwindInfoBaseAddress(CodeBlockHandle codeInfoHandle); ``` ## Version 1 @@ -53,6 +57,8 @@ Data descriptors used: | `CodeHeapListNode` | `MapBase` | Start of the map - start address rounded down based on OS page size | | `CodeHeapListNode` | `HeaderMap` | Bit array used to find the start of methods - relative to `MapBase` | | `RealCodeHeader` | `MethodDesc` | Pointer to the corresponding `MethodDesc` | +| `RealCodeHeader` | `NumUnwindInfos` | Number of Unwind Infos | +| `RealCodeHeader` | `UnwindInfos` | Start address of Unwind Infos | | `Module` | `ReadyToRunInfo` | Pointer to the `ReadyToRunInfo` for the module | | `ReadyToRunInfo` | `CompositeInfo` | Pointer to composite R2R info - or itself for non-composite | | `ReadyToRunInfo` | `NumRuntimeFunctions` | Number of `RuntimeFunctions` | @@ -214,7 +220,7 @@ class CodeBlock } ``` -The remaining contract APIs extract fields of the `CodeBlock`: +The `GetMethodDesc` and `GetStartAddress` APIs extract fields of the `CodeBlock`: ```csharp TargetPointer IExecutionManager.GetMethodDesc(CodeBlockHandle codeInfoHandle) @@ -230,6 +236,15 @@ The remaining contract APIs extract fields of the `CodeBlock`: } ``` +`GetUnwindInfo` gets the Windows style unwind data in the form of `RUNTIME_FUNCTION` which has a platform dependent implementation. The ExecutionManager delegates to the JitManager implementations as the unwind infos (`RUNTIME_FUNCTION`) are stored differently on jitted and R2R code. + +* For jitted code (`EEJitManager`) a list of sorted `RUNTIME_FUNCTION` are stored on the `RealCodeHeader` which is accessed in the same was as `GetMethodInfo` described above. The correct `RUNTIME_FUNCTION` is found by binary searching the list based on IP. + +* For R2R code (`ReadyToRunJitManager`), a list of sorted `RUNTIME_FUNCTION` are stored on the module's `ReadyToRunInfo`. This is accessed as described above for `GetMethodInfo`. Again, the relevant `RUNTIME_FUNCTION` is found by binary searching the list based on IP. + +Unwind info (`RUNTIME_FUNCTION`) use relative addressing. For managed code, these values are relative to the start of the code's containing range in the RangeSectionMap (described below). This could be the beginning of a `CodeHeap` for jitted code or the base address of the loaded image for ReadyToRun code. +`GetUnwindInfoBaseAddress` finds this base address for a given `CodeBlockHandle`. + ### RangeSectionMap The range section map logically partitions the entire 32-bit or 64-bit addressable space into chunks. diff --git a/docs/design/datacontracts/Loader.md b/docs/design/datacontracts/Loader.md index e4649ccc0ffc..916b4ff03bf7 100644 --- a/docs/design/datacontracts/Loader.md +++ b/docs/design/datacontracts/Loader.md @@ -15,10 +15,32 @@ readonly struct ModuleHandle [Flags] enum ModuleFlags { + Tenured = 0x00000001, // Set once we know for sure the Module will not be freed until the appdomain itself exits EditAndContinue = 0x00000008, // Edit and Continue is enabled for this module ReflectionEmit = 0x00000040, // Reflection.Emit was used to create this module } +[Flags] +public enum AssemblyIterationFlags +{ + // load status flags + IncludeLoaded = 0x00000001, // include assemblies that are already loaded + // (m_level >= code:FILE_LOAD_DELIVER_EVENTS) + IncludeLoading = 0x00000002, // include assemblies that are still in the process of loading + // (all m_level values) + IncludeAvailableToProfilers = 0x00000020, // include assemblies available to profilers + // See comment at code:DomainAssembly::IsAvailableToProfilers + + // Execution / introspection flags + IncludeExecution = 0x00000004, // include assemblies that are loaded for execution only + + IncludeFailedToLoad = 0x00000010, // include assemblies that failed to load + + // Collectible assemblies flags + ExcludeCollectible = 0x00000040, // Exclude all collectible assemblies + IncludeCollected = 0x00000080, // Include all collectible assemblies that have been collected +} + record struct ModuleLookupTables( TargetPointer FieldDefToDesc, TargetPointer ManifestModuleReferences, @@ -31,7 +53,13 @@ record struct ModuleLookupTables( ``` csharp ModuleHandle GetModuleHandle(TargetPointer module); +IEnumerable GetModules(TargetPointer appDomain, AssemblyIterationFlags iterationFlags); +TargetPointer GetRootAssembly(); TargetPointer GetAssembly(ModuleHandle handle); +TargetPointer GetPEAssembly(ModuleHandle handle); +bool TryGetLoadedImageContents(ModuleHandle handle, out TargetPointer baseAddress, out uint size, out uint imageFlags); +bool TryGetSymbolStream(ModuleHandle handle, out TargetPointer buffer, out uint size); +bool IsProbeExtensionResultValid(ModuleHandle handle); ModuleFlags GetFlags(ModuleHandle handle); string GetPath(ModuleHandle handle); string GetFileName(ModuleHandle handle); @@ -41,20 +69,23 @@ TargetPointer GetILBase(ModuleHandle handle); ModuleLookupTables GetLookupTables(ModuleHandle handle); TargetPointer GetModuleLookupMapElement(TargetPointer table, uint token, out TargetNUInt flags); bool IsCollectible(ModuleHandle handle); +bool IsAssemblyLoaded(ModuleHandle handle); ``` ## Version 1 -Data descriptors used: +### Data descriptors used: | Data Descriptor Name | Field | Meaning | | --- | --- | --- | | `Module` | `Assembly` | Assembly of the Module | +| `Module` | `PEAssembly` | PEAssembly of the Module | | `Module` | `Base` | Pointer to start of PE file in memory | | `Module` | `Flags` | Assembly of the Module | | `Module` | `LoaderAllocator` | LoaderAllocator of the Module | | `Module` | `ThunkHeap` | Pointer to the thunk heap | | `Module` | `Path` | Path of the Module (UTF-16, null-terminated) | | `Module` | `FileName` | File name of the Module (UTF-16, null-terminated) | +| `Module` | `GrowableSymbolStream` | Pointer to the in memory symbol stream | | `Module` | `FieldDefToDescMap` | Mapping table | | `Module` | `ManifestModuleReferencesMap` | Mapping table | | `Module` | `MemberRefToDescMap` | Mapping table | @@ -64,23 +95,238 @@ Data descriptors used: | `ModuleLookupMap` | `TableData` | Start of the mapping table's data | | `ModuleLookupMap` | `SupportedFlagsMask` | Mask for flag bits on lookup map entries | | `ModuleLookupMap` | `Count` | Number of TargetPointer sized entries in this section of the map | -| `ModuleLookupMap` | `Next` | Pointer to next ModuleLookupMap segment for this map -| `Assembly` | `IsCollectible` | Flag indicating if this is module may be collected +| `ModuleLookupMap` | `Next` | Pointer to next ModuleLookupMap segment for this map | +| `Assembly` | `Module` | Pointer to the Assemblies module | +| `Assembly` | `IsCollectible` | Flag indicating if this is module may be collected | +| `Assembly` | `Error` | Pointer to exception. No error if nullptr | +| `Assembly` | `NotifyFlags` | Flags relating to the debugger/profiler notification state of the assembly | +| `Assembly` | `Level` | File load level of the assembly | +| `PEAssembly` | `PEImage` | Pointer to the PEAssembly's PEImage | +| `PEImage` | `LoadedImageLayout` | Pointer to the PEImage's loaded PEImageLayout | +| `PEImage` | `ProbeExtensionResult` | PEImage's ProbeExtensionResult | +| `ProbeExtensionResult` | `Type` | Type of ProbeExtensionResult | +| `PEImageLayout` | `Base` | Base address of the image layout | +| `PEImageLayout` | `Size` | Size of the image layout | +| `PEImageLayout` | `Flags` | Flags associated with the PEImageLayout | +| `CGrowableSymbolStream` | `Buffer` | Pointer to the raw symbol stream buffer start | +| `CGrowableSymbolStream` | `Size` | Size of the raw symbol stream buffer | +| `AppDomain` | `RootAssembly` | Pointer to the root assembly | +| `AppDomain` | `DomainAssemblyList` | ArrayListBase of assemblies in the AppDomain | +| `LoaderAllocator` | `ReferenceCount` | Reference count of LoaderAllocator | +| `ArrayListBase` | `Count` | Total number of elements in the ArrayListBase | +| `ArrayListBase` | `FirstBlock` | First ArrayListBlock | +| `ArrayListBlock` | `Next` | Next ArrayListBlock in chain | +| `ArrayListBlock` | `Size` | Size of data section in block | +| `ArrayListBlock` | `ArrayStart` | Start of data section in block | + + +### Global variables used: +| Global Name | Type | Purpose | +| --- | --- | --- | +| `AppDomain` | TargetPointer | Pointer to the global AppDomain | + +### Contract Constants: +| Name | Type | Purpose | Value | +| --- | --- | --- | --- | +| `ASSEMBLY_LEVEL_LOADED` | uint | The value of Assembly Level required for an Assembly to be considered loaded. In the runtime, this is `FILE_LOAD_DELIVER_EVENTS` | `0x4` | +| `ASSEMBLY_NOTIFYFLAGS_PROFILER_NOTIFIED` | uint | Flag in Assembly NotifyFlags indicating the Assembly will notify profilers. | `0x1` | + +### Data Structures +```csharp +// The runtime representation of Module's flag field. +// For contract version 1, these are identical to ModuleFlags on the contract interface, but could diverge in the future. +private enum ModuleFlags_1 : uint +{ + Tenured = 0x00000001, // Set once we know for sure the Module will not be freed until the appdomain itself exits + EditAndContinue = 0x00000008, // Edit and Continue is enabled for this module + ReflectionEmit = 0x00000040, // Reflection.Emit was used to create this module +} +``` + +### Method Implementations ``` csharp ModuleHandle GetModuleHandle(TargetPointer modulePointer) { return new ModuleHandle(modulePointer); } +IEnumerable GetModules(TargetPointer appDomain, AssemblyIterationFlags iterationFlags) +{ + if (appDomain == TargetPointer.Null) throw new ArgumentException("appDomain must not be null"); + + // ArrayListBase encapsulates the data structure defined in arraylist.h + // It handles reading each contained pointer and exposing them as a C# List + ArrayListBase arrayList = // read ArrayListBase starting at appDomain + AppDomain::DomainAssemblyList offset + + foreach (TargetPointer domainAssembly in arrayList.Elements) + { + // We have a list of DomainAssemblies, this class contains a single pointer to an Assembly. + // Therefore we can read a pointer at the DomainAssembly to access the actual Assembly. + TargetPointer pAssembly = target.ReadPointer(domainAssembly); + Assembly assembly = // read Assembly object at pAssembly + + // The Assemblies map 1:1 to Modules, however we must filter them based on the iterationFlags before returning. + // The following filtering logic is based on AppDomain::AssemblyIterator::Next_Unlocked in appdomain.cpp + + if (assembly.IsError) + { + // assembly is in an error state, return if we are supposed to include it + // in either case, we continue to the next assembly + if (iterationFlags.HasFlag(AssemblyIterationFlags.IncludeFailedToLoad)) + { + yield return new ModuleHandle(assembly.Module); + } + continue; + } + + if ((assembly.NotifyFlags & ASSEMBLY_NOTIFYFLAGS_PROFILER_NOTIFIED) != 0 && + !iterationFlags.HasFlag(AssemblyIterationFlags.IncludeAvailableToProfilers)) + { + // The assembly has reached the state at which we would notify profilers, + // and we're supposed to include such assemblies in the enumeration. So + // don't reject it (i.e., noop here, and don't bother with the rest of + // the load status checks). Check for this first, since + // IncludeAvailableToProfilers contains some loaded AND loading + // assemblies. + } + else if (assembly.Level >= ASSEMBLY_LEVEL_LOADED) + { + if (!iterationFlags.HasFlag(AssemblyIterationFlags.IncludeLoaded)) + { + // the assembly is loaded, but we aren't including loaded assemblies, skip + continue; + } + } + else + { + // assembly must be in the process of loading as it is not currently loaded + + if (!iterationFlags.HasFlag(AssemblyIterationFlags.IncludeLoading)) + { + // the assembly is loading, but we aren't including loading assemblies, skip + continue; + } + } + + // Next, reject assemblies whose execution status is + // not to be included in the enumeration + + if (!iterationFlags.HasFlag(AssemblyIterationFlags.IncludeExecution)) + { + // the assembly is executing, but we aren't including executing assemblies, skip + continue; + } + + if (assembly.IsCollectible != 0) + { + if (iterationFlags.HasFlag(AssemblyIterationFlags.ExcludeCollectible)) + { + // the assembly is collectible, but we are excluding collectible assemblies, skip + continue; + } + + Module module = // read Module at assembly.Module + if (((ModuleFlags)module.Flags).HasFlag(ModuleFlags.Tenured)) + { + // Un-tenured collectible assemblies should not be returned. (This can only happen in a brief + // window during collectible assembly creation. No thread should need to have a pointer + // to the just allocated DomainAssembly at this stage.) + // the assemblies Module is not Tenured, skip + continue; + } + + LoaderAllocator loaderAllocator = // read LoaderAllocator at module.LoaderAllocator + if (!loaderAllocator.IsAlive && !iterationFlags.HasFlag(AssemblyIterationFlags.IncludeCollected)) + { + // if the assembly is not alive anymore and we aren't including Collected assemblies, skip + continue; + } + } + + yield return new ModuleHandle(assembly.Module); + } +} + +TargetPointer GetRootAssembly() +{ + TargetPointer appDomainPointer = target.ReadGlobalPointer(Constants.Globals.AppDomain); + AppDomain appDomain = // read AppDomain object starting at appDomainPointer + return appDomain.RootAssembly; +} + TargetPointer GetAssembly(ModuleHandle handle) { - return target.ReadPointer(handle.Address + /* Module::Assrembly offset */); + return target.ReadPointer(handle.Address + /* Module::Assembly offset */); +} + +TargetPointer GetPEAssembly(ModuleHandle handle) +{ + return target.ReadPointer(handle.Address + /* Module::PEAssembly offset */); +} + +bool TryGetLoadedImageContents(ModuleHandle handle, out TargetPointer baseAddress, out uint size, out uint imageFlags) +{ + baseAddress = TargetPointer.Null; + size = 0; + imageFlags = 0; + + TargetPointer peAssembly = target.ReadPointer(handle.Address + /* Module::PEAssembly offset */); + if (peAssembly == 0) return false; // no loaded PEAssembly + + TargetPointer peImage = target.ReadPointer(peAssembly + /* PEAssembly::PEImage offset */); + if(peImage == 0) return false; // no loaded PEImage + + TargetPointer peImageLayout = target.ReadPointer(peImage + /* PEImage::LoadedImageLayout offset */); + + baseAddress = target.ReadPointer(peImageLayout + /* PEImageLayout::Base offset */); + size = target.Read(peImageLayout + /* PEImageLayout::Size offset */); + imageFlags = target.Read(peImageLayout + /* PEImageLayout::Flags offset */); + return true; +} + +bool TryGetSymbolStream(ModuleHandle handle, out TargetPointer buffer, out uint size) +{ + buffer = TargetPointer.Null; + size = 0; + + TargetPointer growableSymbolStream = target.ReadPointer(handle.Address + /* Module::GrowableSymbolStream offset */); + if (growableSymbolStream == 0) return false; // no GrowableSymbolStream found + + buffer = target.ReadPointer(growableSymbolStream + /* CGrowableSymbolStream::Buffer offset */); + size = target.Read(growableSymbolStream + /* CGrowableSymbolStream::Size offset */); + return true; +} + +bool IsProbeExtensionResultValid(ModuleHandle handle) +{ + TargetPointer peAssembly = target.ReadPointer(handle.Address + /* Module::PEAssembly offset */); + if (peAssembly == 0) return false; // no loaded PEAssembly + + TargetPointer peImage = target.ReadPointer(peAssembly + /* PEAssembly::PEImage offset */); + if(peImage == 0) return false; // no loaded PEImage + + TargetPointer probeExtensionResult = target.ReadPointer(peImage + /* PEImage::ProbeExtensionResult offset */); + int type = target.Read(probeExtensionResult + /* ProbeExtensionResult::Type offset */); + return type != 0; // 0 is the invalid type. See assemblyprobeextension.h for details +} + +private static ModuleFlags GetFlags(uint flags) +{ + ModuleFlags_1 runtimeFlags = (ModuleFlags_1)flags; + ModuleFlags flags = default; + if (runtimeFlags.HasFlag(ModuleFlags_1.Tenured)) + flags |= ModuleFlags.Tenured; + if (runtimeFlags.HasFlag(ModuleFlags_1.EditAndContinue)) + flags |= ModuleFlags.EditAndContinue; + if (runtimeFlags.HasFlag(ModuleFlags_1.ReflectionEmit)) + flags |= ModuleFlags.ReflectionEmit; + return flags; } ModuleFlags GetFlags(ModuleHandle handle) { - return target.Read(handle.Address + /* Module::Flags offset */); + return GetFlags(target.Read(handle.Address + /* Module::Flags offset */)); } string GetPath(ModuleHandle handle) @@ -161,4 +407,11 @@ bool ILoader.IsCollectible(ModuleHandle handle) byte isCollectible = _target.Read(assembly + /* Assembly::IsCollectible*/); return isCollectible != 0; } + +bool ILoader.IsAssemblyLoaded(ModuleHandle handle) +{ + TargetPointer assembly = _target.ReadPointer(handle.Address + /*Module::Assembly*/); + uint loadLevel = _target.Read(assembly + /* Assembly::Level*/); + return assembly.Level >= ASSEMBLY_LEVEL_LOADED; +} ``` diff --git a/docs/design/datacontracts/PrecodeStubs.md b/docs/design/datacontracts/PrecodeStubs.md index b9448ed0507f..83f593fcf2c0 100644 --- a/docs/design/datacontracts/PrecodeStubs.md +++ b/docs/design/datacontracts/PrecodeStubs.md @@ -9,7 +9,7 @@ This contract provides support for examining [precode](../coreclr/botr/method-de TargetPointer GetMethodDescFromStubAddress(TargetCodePointer entryPoint); ``` -## Version 1 +## Version 1 and 2 Data descriptors used: | Data Descriptor Name | Field | Meaning | @@ -24,9 +24,12 @@ Data descriptors used: | PrecodeMachineDescriptor | PInvokeImportPrecodeType| precode sort byte for PInvoke precode stubs, if supported | | PrecodeMachineDescriptor | HasFixupPrecode | 1 if platform supports fixup precode stubs | | PrecodeMachineDescriptor | FixupPrecodeType| precode sort byte for fixup precode stubs, if supported | -| StubPrecodeData | MethodDesc | pointer to the MethodDesc associated with this stub precode | +| PrecodeMachineDescriptor | ThisPointerRetBufPrecodeType | precode sort byte for this pointer ret buf precodes | +| StubPrecodeData | MethodDesc | pointer to the MethodDesc associated with this stub precode (Version 1 only) | +| StubPrecodeData | SecretParam | pointer to the MethodDesc associated with this stub precode or a second stub data pointer for other types (Version 2 only) | | StubPrecodeData | Type | precise sort of stub precode | | FixupPrecodeData | MethodDesc | pointer to the MethodDesc associated with this fixup precode | +| ThisPtrRetBufPrecodeData | MethodDesc | pointer to the MethodDesc associated with the ThisPtrRetBufPrecode (Version 2 only) | arm32 note: the `CodePointerToInstrPointerMask` is used to convert IP values that may include an arm Thumb bit (for example extracted from disassembling a call instruction or from a snapshot of the registers) into an address. On other architectures applying the mask is a no-op. @@ -145,7 +148,10 @@ After the initial precode type is determined, for stub precodes a refined precod internal override TargetPointer GetMethodDesc(Target target, Data.PrecodeMachineDescriptor precodeMachineDescriptor) { TargetPointer stubPrecodeDataAddress = InstrPointer + precodeMachineDescriptor.StubCodePageSize; - return target.ReadPointer (stubPrecodeDataAddress + /* offset of StubPrecodeData.MethodDesc */ ); + if (ContractVersion(PrecodeStubs) == 1) + return target.ReadPointer (stubPrecodeDataAddress + /* offset of StubPrecodeData.MethodDesc */ ); + else + return target.ReadPointer (stubPrecodeDataAddress + /* offset of StubPrecodeData.SecretParam */ ); } } @@ -170,7 +176,10 @@ After the initial precode type is determined, for stub precodes a refined precod internal override TargetPointer GetMethodDesc(Target target, Data.PrecodeMachineDescriptor precodeMachineDescriptor) { - throw new NotImplementedException(); // TODO(cdac) + if (ContractVersion(PrecodeStubs) == 1) + throw new NotImplementedException(); // TODO(cdac) + else + return target.ReadPointer(target.ReadPointer (stubPrecodeDataAddress + /* offset of StubPrecodeData.SecretParam */ ) + /*offset of ThisPtrRetBufPrecodeData.MethodDesc*/); } } diff --git a/docs/design/datacontracts/RuntimeInfo.md b/docs/design/datacontracts/RuntimeInfo.md new file mode 100644 index 000000000000..e3c5e0c14cbe --- /dev/null +++ b/docs/design/datacontracts/RuntimeInfo.md @@ -0,0 +1,43 @@ +# Contract RuntimeInfo + +This contract encapsulates support for fetching information about the target runtime. + +## APIs of contract + +```csharp +public enum RuntimeInfoArchitecture : uint +{ + Unknown = 0, + X86, + Arm32, + X64, + Arm64, + LoongArch64, + RISCV, +} + +public enum RuntimeInfoOperatingSystem : uint +{ + Unknown = 0, + Win, + Unix, +} +``` + +```csharp +// Gets the targets architecture. If this information is not available returns Unknown. +RuntimeInfoArchitecture GetTargetArchitecture(); + +// Gets the targets operating system. If this information is not available returns Unknown. +RuntimeInfoOperatingSystem GetTargetOperatingSystem(); +``` + +## Version 1 + +Global variables used: +| Global Name | Type | Purpose | +| --- | --- | --- | +| Architecture | string | Target architecture | +| OperatingSystem | string | Target operating system | + +The contract implementation simply returns the contract descriptor global values parsed as the respective enum case-insensitively. If these globals are not available, the contract returns Unknown. diff --git a/docs/design/datacontracts/StackWalk.md b/docs/design/datacontracts/StackWalk.md new file mode 100644 index 000000000000..143a2f89435f --- /dev/null +++ b/docs/design/datacontracts/StackWalk.md @@ -0,0 +1,329 @@ +# Contract StackWalk + +This contract encapsulates support for walking the stack of managed threads. + +## APIs of contract + +```csharp +public interface IStackDataFrameHandle { }; +``` + +```csharp +// Creates a stack walk and returns a handle +IEnumerable CreateStackWalk(ThreadData threadData); + +// Gets the thread context at the given stack dataframe. +byte[] GetRawContext(IStackDataFrameHandle stackDataFrameHandle); +// Gets the Frame address at the given stack dataframe. Returns TargetPointer.Null if the current dataframe does not have a valid Frame. +TargetPointer GetFrameAddress(IStackDataFrameHandle stackDataFrameHandle); + +// Gets the Frame name associated with the given Frame identifier. If no matching Frame name found returns an empty string. +string GetFrameName(TargetPointer frameIdentifier); +``` + +## Version 1 +To create a full walk of the managed stack, two types of 'stacks' must be read. + +1. True call frames on the thread's stack +2. Capital "F" Frames (referred to as Frames as opposed to frames) which are used by the runtime for book keeping purposes. + +Capital "F" Frames are pushed and popped to a singly-linked list on the runtime's Thread object and are accessible using the [IThread](./Thread.md) contract. These capital "F" Frames are allocated within a functions call frame, meaning they also live on the stack. A subset of Frame types store extra data allowing us to recover a portion of the context from when they were created For our purposes, these are relevant because they mark every transition where managed code calls native code. For more information about Frames see: [BOTR Stack Walking](https://github.com/dotnet/runtime/blob/44b7251f94772c69c2efb9daa7b69979d7ddd001/docs/design/coreclr/botr/stackwalking.md). + +Unwinding call frames on the stack usually requires an OS specific implementation. However, in our particular circumstance of unwinding only **managed function** call frames, the runtime uses Windows style unwind logic/codes for all platforms (this isn't true for NativeAOT). Therefore we can delegate to the existing native unwinding code located in `src/coreclr/unwinder/`. For more information on the Windows unwinding algorithm and unwind codes see the following docs: + +* [Windows x64](https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64) +* [Windows ARM64](https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling) + +This contract depends on the following descriptors: + +| Data Descriptor Name | Field | Meaning | +| --- | --- | --- | +| `Frame` | `Next` | Pointer to next from on linked list | +| `InlinedCallFrame` | `CallSiteSP` | SP saved in Frame | +| `InlinedCallFrame` | `CallerReturnAddress` | Return address saved in Frame | +| `InlinedCallFrame` | `CalleeSavedFP` | FP saved in Frame | +| `SoftwareExceptionFrame` | `TargetContext` | Context object saved in Frame | +| `SoftwareExceptionFrame` | `ReturnAddress` | Return address saved in Frame | +| `FramedMethodFrame` | `TransitionBlockPtr` | Pointer to Frame's TransitionBlock | +| `TransitionBlock` | `ReturnAddress` | Return address associated with the TransitionBlock | +| `TransitionBlock` | `CalleeSavedRegisters` | Platform specific CalleeSavedRegisters struct associated with the TransitionBlock | +| `FuncEvalFrame` | `DebuggerEvalPtr` | Pointer to the Frame's DebuggerEval object | +| `DebuggerEval` | `TargetContext` | Context saved inside DebuggerEval | +| `DebuggerEval` | `EvalDuringException` | Flag used in processing FuncEvalFrame | +| `ResumableFrame` | `TargetContextPtr` | Pointer to the Frame's Target Context | +| `FaultingExceptionFrame` | `TargetContext` | Frame's Target Context | +| `HijackFrame` | `ReturnAddress` | Frame's stored instruction pointer | +| `HijackFrame` | `HijackArgsPtr` | Pointer to the Frame's stored HijackArgs | +| `HijackArgs` (amd64) | `CalleeSavedRegisters` | CalleeSavedRegisters data structure | +| `HijackArgs` (amd64 Windows) | `Rsp` | Saved stack pointer | +| `HijackArgs` (arm64) | For each register `r` saved in HijackArgs, `r` | Register names associated with stored register values | +| `CalleeSavedRegisters` | For each callee saved register `r`, `r` | Register names associated with stored register values | + +Global variables used: +| Global Name | Type | Purpose | +| --- | --- | --- | +| For each FrameType ``, `##Identifier` | `FrameIdentifier` enum value | Identifier used to determine concrete type of Frames | + +Contracts used: +| Contract Name | +| --- | +| `ExecutionManager` | +| `Thread` | + + +### Stackwalk Algorithm +The intuition for walking a managed stack is relatively simply: unwind managed portions of the stack until we hit native code then use capital "F" Frames as checkpoints to get into new sections of managed code. Because Frames are added at each point before managed code (higher SP value) calls native code (lower SP values), we are guaranteed that a Frame exists at the top (lower SP value) of each managed call frame run. + +In reality, the actual algorithm is a little more complex fow two reasons. It requires pausing to return the current context and Frame at certain points and it checks for "skipped Frames" which can occur if an capital "F" Frame is allocated in a managed stack frame (e.g. an inlined P/Invoke call). + +1. Setup + 1. Set the current context `currContext` to be the thread's context. Fetched as part of the [ICorDebugDataTarget](https://learn.microsoft.com/en-us/dotnet/framework/unmanaged-api/debugging/icordebugdatatarget-getthreadcontext-method) COM interface. + 2. Create a stack of the thread's capital "F" Frames `frameStack`. +2. **Return the current context**. +3. While the `currContext` is in managed code or `frameStack` is not empty: + 1. If `currContext` is native code, pop the top Frame from `frameStack` update the context using the popped Frame. **Return the updated context** and **go to step 3**. + 2. If `frameStack` is not empty, check for skipped Frames. Peek `frameStack` to find a Frame `frame`. Compare the address of `frame` (allocated on the stack) with the caller of the current context's stack pointer (found by unwinding current context one iteration). + If the address of the `frame` is less than the caller's stack pointer, **return the current context**, pop the top Frame from `frameStack`, and **go to step 3**. + 3. Unwind `currContext` using the Windows style unwinder. **Return the current context**. + + +#### Simple Example + +In this example we walk through the algorithm without instances of skipped Frames. + +Given the following call stack and capital "F" Frames linked list, we can apply the above algorithm. + + + + + + + + + +
Call Stack (growing down) Capital "F" Frames Linked List
+ +``` +Managed Call: ----------- + + | Native | <- 's SP + - | | + |-----------| <- 's SP + | | + | Managed | + | | + |-----------| <- 's SP + | | + | Native | + + | | + | StackBase | +``` + + +``` +SoftwareExceptionFrame + (Context = ) + + || + \/ + + NULL TERMINATOR +``` + +
+ +1. (1) Set `currContext` to the thread context ``. Create a stack of Frames `frameStack`. +2. (2) Return the `currContext` which has the threads context. +3. (3) `currContext` is in unmanaged code (native) however, because `frameStack` is not empty, we begin processing the context. +4. (3.1) Since `currContext` is unmanaged. We pop the SoftwareExceptionFrame from `frameStack` and use it to update `currContext`. The SoftwareExceptionFrame is holding context `` which we set `currContext` to. Return the current context and go back to step 3. +5. (3) Now `currContext` is in managed code as shown by ``'s SP. Therefore, we begin to process the context. +6. (3.1) Since `currContext` is managed, skip step 3.1. +7. (3.2) Since `frameStack` is empty, we do not check for skipped Frames. +8. (3.3) Unwind `currContext` a single iteration to `` and return the current context. +9. (3) `currContext` is now at unmanaged (native) code and `frameStack` is empty. Therefore we are done. + +The following C# code could yield a stack similar to the example above: +```csharp +void foo() +{ + // Call native code or function that calls down to native. + Console.ReadLine(); + // Capture stack trace while inside native code. +} +``` + +#### Skipped Frame Example +The skipped Frame check is important when managed code calls managed code through an unmanaged boundary. +This occurs when calling a function marked with `[UnmanagedCallersOnly]` as an unmanaged delegate from a managed caller. +In this case, if we ignored the skipped Frame check we would miss the unmanaged boundary. + +Given the following call stack and capital "F" Frames linked list, we can apply the above algorithm. + + + + + + + + + +
Call Stack (growing down) Capital "F" Frames Linked List
+ +``` +Unmanaged Call: -X-X-X-X-X- +Managed Call: ----------- +InlinedCallFrame location: [ICF] + + | Managed | <- 's SP + - | | + | | + |-X-X-X-X-X-| <- 's SP + | [ICF] | + | Managed | + | | + |-----------| <- 's SP + | | + | Native | + + | | + | StackBase | +``` + + +``` +InlinedCallFrame + (Context = ) + + || + \/ + + NULL TERMINATOR +``` + +
+ +1. (1) Set `currContext` to the thread context `
`. Create a stack of Frames `frameStack`. +2. (2) Return the `currContext` which has the threads context. +3. (3) Since `currContext` is in managed code, we begin to process the context. +4. (3.1) Since `currContext` is managed, skip step 3.1. +5. (3.2) Check for skipped Frames. Copy `currContext` into `parentContext` and unwind `parentContext` once using the Windows style unwinder. As seen from the call stack, unwinding `currContext=` will yield ``. We peek the top of `frameStack` and find an InlinedCallFrame (shown in call stack above as `[ICF]`). Since `parentContext`'s SP is greater than the address of `[ICF]` there are no skipped Frames. +6. (3.3) Unwind `currContext` a single iteration to `` and return the current context. +7. (3) Since `currContext` is still in managed code, we continue processing the context. +8. (3.1) Since `currContext` is managed, skip step 3.1. +9. (3.2) Check for skipped Frames. Copy `currContext` into `parentContext` and unwind `parentContext` once using the Windows style unwinder. As seen from the call stack, unwinding `currContext=` will yield ``. We peek the top of `frameStack` and find an InlinedCallFrame (shown in call stack above as `[ICF]`). This time the the address of `[ICF]` is less than `parentContext`'s SP. Therefore we return the current context then pop the InlinedCallFrame from `frameStack` which is now empty and return to step 3. +10. (3) Since `currContext` is still in managed code, we continue processing the context. +11. (3.1) Since `currContext` is managed, skip step 3.1. +12. (3.2) Since `frameStack` is empty, we do not check for skipped Frames. +13. (3.3) Unwind `currContext` a single iteration to `` and return the current context. +14. (3) `currContext` is now at unmanaged (native) code and `frameStack` is empty. Therefore we are done. + +The following C# code could yield a stack similar to the example above: +```csharp +void foo() +{ + var fptr = (delegate* unmanaged)&bar; + fptr(); +} + +[UnmanagedCallersOnly] +private static void bar() +{ + // Do something + // Capture stack trace while in here +} +``` + +### Capital 'F' Frame Handling + +Capital 'F' Frame's store context data in a number of different ways. Of the couple dozen Frame types defined in `src/coreclr/vm/frames.h` several do not store any context data or update the context, signified by `NeedsUpdateRegDisplay_Impl() == false`. Of that Frames that do update the context, several share implementations of `UpdateRegDisplay_Impl` through inheritance. This leaves us with 9 distinct mechanisms to update the context that will be detailed below. Each mechanism is referred to using the Frame class that implements the mechanism and may be used by subclasses. + +Most of the handlers are implemented in `BaseFrameHandler`. Platform specific components are implemented/overridden in `FrameHandler`. + +#### InlinedCallFrame + +InlinedCallFrames store and update only the IP, SP, and FP of a given context. If the stored IP (CallerReturnAddress) is 0 then the InlinedCallFrame does not have an active call and should not update the context. + +#### SoftwareExceptionFrame + +SoftwareExceptionFrames store a copy of the context struct. The IP, SP, and all ABI specified (platform specific) callee-saved registers are copied from the stored context to the working context. + +#### TransitionFrame + +TransitionFrames hold a pointer to a `TransitionBlock`. The TransitionBlock holds a return address along with a `CalleeSavedRegisters` struct which has values for all ABI specified callee-saved registers. The SP can be found using the address of the TransitionBlock. Since the TransitionBlock will be the lowest element on the stack, the SP is the address of the TransitionBlock + sizeof(TransitionBlock). + +When updating the context from a TransitionFrame, the IP, SP, and all ABI specified callee-saved registers are copied over. + +The following Frame types also use this mechanism: +* FramedMethodFrame +* CLRToCOMMethodFrame +* PInvokeCallIFrame +* PrestubMethodFrame +* StubDispatchFrame +* CallCountingHelperFrame +* ExternalMethodFrame +* DynamicHelperFrame + +#### FuncEvalFrame + +FuncEvalFrames hold a pointer to a `DebuggerEval`. The DebuggerEval holds a full context which is completely copied over to the working context when updating. + +#### ResumableFrame + +ResumableFrames hold a pointer to a context object (Note this is different from SoftwareExceptionFrames which hold the context directly). The entire context object is copied over to the working context when updating. + +RedirectedThreadFrames also use this mechanism. + +#### FaultingExceptionFrame + +FaultingExceptionFrames have two different implementations. One for Windows x86 and another for all other builds (with funclets). + +Given the cDAC does not yet support Windows x86, this version is not supported. + +The other version stores a context struct. To update the working context, the entire stored context is copied over. In addition the `ContextFlags` are updated to ensure the `CONTEXT_XSTATE` bit is not set given the debug version of the contexts can not store extended state. This bit is architecture specific. + +#### HijackFrame + +HijackFrames carry a IP (ReturnAddress) and a pointer to `HijackArgs`. All platforms update the IP and use the platform specific HijackArgs to update further registers. The following details currently implemented platforms. + +* x64 - On x64, HijackArgs contains a CalleeSavedRegister struct. The saved registers values contained in the struct are copied over to the working context. + * Windows - On Windows, HijackArgs also contains the SP value directly which is copied over to the working context. + * Non-Windows - On OS's other than Windows, HijackArgs does not contain an SP value. Instead since the HijackArgs struct lives on the stack, the SP is `&hijackArgs + sizeof(HijackArgs)`. This value is also copied over. +* arm64 - Unlike on x64, on arm64 HijackArgs contains a list of register values instead of the CalleeSavedRegister struct. These values are copied over to the working context. The SP is fetched using the same technique as on x64 non-Windows where `SP = &hijackArgs + sizeof(HijackArgs)` and is copied over to the working context. + +#### TailCallFrame + +TailCallFrames are only used on Windows x86 which is not yet supported in the cDAC and therefore not implemented. + +### APIs + +The majority of the contract's complexity is the stack walking algorithm (detailed above) implemented as part of `CreateStackWalk`. +The `IEnumerable` return value is computed lazily. + +```csharp +IEnumerable CreateStackWalk(ThreadData threadData); +``` + +The rest of the APIs convey state about the stack walk at a given point which fall out of the stack walking algorithm relatively simply. + +`GetRawContext` Retrieves the raw Windows style thread context of the current frame as a byte array. The size and shape of the context is platform dependent. + +* On Windows the context is defined directly in Windows header `winnt.h`. See [CONTEXT structure](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-context) for more info. +* On non-Windows platform the context's are defined in `src/coreclr/pal/inc/pal.h` and should mimic the Windows structure. + +This context is not guaranteed to be complete. Not all capital "F" Frames store the entire context, some only store the IP/SP/FP. Therefore, at points where the context is based on these Frames it will be incomplete. +```csharp +byte[] GetRawContext(IStackDataFrameHandle stackDataFrameHandle); +``` + + +`GetFrameAddress` gets the address of the current capital "F" Frame. This is only valid if the `IStackDataFrameHandle` is at a point where the context is based on a capital "F" Frame. For example, it is not valid when when the current context was created by using the stack frame unwinder. +If the Frame is not valid, returns `TargetPointer.Null`. + +```csharp +TargetPointer GetFrameAddress(IStackDataFrameHandle stackDataFrameHandle); +``` + + +`GetFrameName` gets the name associated with a FrameIdentifier (pointer sized value) from the Globals stored in the contract descriptor. If no associated Frame name is found, it returns an empty string. +```csharp +string GetFrameName(TargetPointer frameIdentifier); +``` diff --git a/docs/design/datacontracts/StressLog.md b/docs/design/datacontracts/StressLog.md new file mode 100644 index 000000000000..abb7824a837f --- /dev/null +++ b/docs/design/datacontracts/StressLog.md @@ -0,0 +1,335 @@ +# Contract StressLog + +This contract is for reading the stress log of the process. + +## APIs of the contract + +```csharp +internal record struct StressLogData( + uint LoggedFacilities, + uint Level, + uint MaxSizePerThread, + uint MaxSizeTotal, + int TotalChunks, + ulong TickFrequency, + ulong StartTimestamp, + TargetPointer Logs); + +internal record struct ThreadStressLogData( + TargetPointer NextPointer, + ulong ThreadId, + bool WriteHasWrapped, + TargetPointer CurrentPointer, + TargetPointer ChunkListHead, + TargetPointer ChunkListTail, + TargetPointer CurrentWriteChunk); + +internal record struct StressMsgData( + uint Facility, + TargetPointer FormatString, + ulong Timestamp, + IReadOnlyList Args); +``` + +```csharp +bool HasStressLog(); +StressLogData GetStressLogData(); +StressLogData GetStressLogData(TargetPointer stressLogPointer); +IEnumerable GetThreadStressLogs(TargetPointer logs); +IEnumerable GetStressMessages(ThreadStressLogData threadLog); +bool IsPointerInStressLog(StressLogData stressLog, TargetPointer pointer); +``` + +## Versions 1 and 2 + +Data descriptors used: +| Data Descriptor Name | Field | Meaning | +| --- | --- | --- | +| StressLog | LoggedFacilities | Bitmask of facilities that are logged | +| StressLog | Level | Level of logging | +| StressLog | MaxSizePerThread | Maximum size of the log per thread | +| StressLog | MaxSizeTotal | Maximum size of the log | +| StressLog | TotalChunks | Total number of chunks across all thread-specific logs | +| StressLog | TickFrequency | Number of ticks per second for stresslog timestamps | +| StressLog | StartTimestamp | Timestamp when the stress log was started | +| StressLog | ModuleOffset | Offset of the module in the stress log | +| StressLog | Logs | Pointer to the thread-specific logs | +| StressLogModuleDesc | BaseAddress | Base address of the module | +| StressLogModuleDesc | Size | Size of the module | +| ThreadStressLog | Next | Pointer to the next thread-specific log | +| ThreadStressLog | ThreadId | ID of the thread | +| ThreadStressLog | WriteHasWrapped | Whether the write pointer is writing to previously used chunks | +| ThreadStressLog | CurrentPtr | Pointer to the most recently written message | +| ThreadStressLog | ChunkListHead | Pointer to the head of the chunk list | +| ThreadStressLog | ChunkListTail | Pointer to the tail of the chunk list | +| ThreadStressLog | CurrentWriteChunk | Pointer to the chunk currently being written to | +| StressLogChunk | Prev | Pointer to the previous chunk | +| StressLogChunk | Next | Pointer to the next chunk | +| StressLogChunk | Buf | The data stored in the chunk | +| StressLogChunk | Sig1 | First byte of the chunk signature (to ensure validity) | +| StressLogChunk | Sig2 | Second byte of the chunk signature (to ensure validity) | +| StressMsgHeader | Opaque structure | Header of a stress message. Meaning of bits is version-dependent. | +| StressMsg | Header | The message header | +| StressMsg | Args | The arguments of the message (number of arguments specified in the header) | + +Global variables used: +| Global Name | Type | Purpose | +| --- | --- | --- | +| StressLogEnabled | byte | Whether the stress log is enabled | +| StressLog | pointer | Pointer to the stress log | +| StressLogChunkSize | uint | Size of a stress log chunk | +| StressLogMaxMessageSize | ulong | Maximum size of a stress log message | +| StressLogHasModuleTable | byte | Whether the stress log module table is present | +| StressLogModuleTable | pointer | Pointer to the stress log's module table (if StressLogHasModuleTable is `1`) | + +```csharp +bool HasStressLog() +{ + return Target.ReadGlobal("StressLogEnabled") != 0; +} + +StressLogData GetStressLogData() +{ + if (!HasStressLog()) + { + return default; + } + + StressLog stressLog = new StressLog(Target, Target.ReadGlobalPointer(Constants.Globals.StressLog)); + return new StressLogData( + stressLog.LoggedFacilities, + stressLog.Level, + stressLog.MaxSizePerThread, + stressLog.MaxSizeTotal, + stressLog.TotalChunks, + stressLog.TickFrequency, + stressLog.StartTimestamp, + stressLog.Logs); +} + +StressLogData GetStressLogData(TargetPointer stressLogPointer) +{ + StressLog stressLog = new StressLog(Target, stressLogPointer); + return new StressLogData( + stressLog.LoggedFacilities, + stressLog.Level, + stressLog.MaxSizePerThread, + stressLog.MaxSizeTotal, + stressLog.TotalChunks, + stressLog.TickFrequency, + stressLog.StartTimestamp, + stressLog.Logs); +} + +IEnumerable GetThreadStressLogs(TargetPointer logs) +{ + TargetPointer currentPointer = logs; + while (currentPointer != TargetPointer.Null) + { + ThreadStressLog threadStressLog = new(Target, currentPointer); + + if (threadStressLog.ChunkListHead == TargetPointer.Null) + { + // If the chunk list head is null, this thread log isn't valid. + currentPointer = threadStressLog.Next; + continue; + } + + if (threadStressLog.CurrentWriteChunk == TargetPointer.Null) + { + // If the current write chunk is null, this thread log isn't valid. + currentPointer = threadStressLog.Next; + continue; + } + + StressLogChunk currentChunkData = new(Target, threadStressLog.CurrentWriteChunk); + if (currentChunkData.Sig1 != 0xCFCFCFCF || currentChunkData.Sig2 != 0xCFCFCFCF) + { + // If the current write chunk isn't valid, this thread log isn't valid. + currentPointer = threadStressLog.Next; + continue; + } + + yield return new ThreadStressLogData( + threadStressLog.Next, + threadStressLog.ThreadId, + threadStressLog.WriteHasWrapped, + threadStressLog.CurrentPtr, + threadStressLog.ChunkListHead, + threadStressLog.ChunkListTail, + threadStressLog.CurrentWriteChunk); + + currentPointer = threadStressLog.Next; + } +} + +// Return messages going in reverse chronological order, newest first. +IEnumerable GetStressMessages(ThreadStressLogData threadLog) +{ + // 1. Get the current message pointer from the log and the info about the current chunk the runtime is writing into. + // Record our current read pointer as the current message pointer. + + // 2. The last written log, if it wrapped around may have partially overwritten a previous record. + // Update our current message pointer to reflect the last safe beginning of a record (StressLogMaxMessageSize before our current message pointer) + // without going before the start of the current chunk's buffer. Do not update the current read pointer in this way. + + // 3. If the current read pointer is at the end of the chunk (this will never happen on the first iteration), check if current read pointer is at the end of the chunk list. + // Otherwise, skip to step 8. + + // 4. If current chunk is at the end of the chunk list and this thread never wrapped around while writing, + // DONE. + + // 5. Otherwise, get the next chunk in the list. + // The tail will wrap around to the head if the current chunk at the end of the list. Record if we have wrapped around. + + // 6. StressLog writes variable-sized payloads starting from the end of a chunk. + // Chunks are zero-initialized, so look in the first StressLogMaxMessageSize bytes, for any non-0 bytes. + // If we find any, that's the start of the first message of the chunk. + // Set the current read pointer to that location. + + // 7. If we didn't find a message before we read a whole message size, there's no message in this chunk (it was freshly allocated), + // DONE. + + // 8. If we have wrapped around while reading, we are reading in the thread's current write chunk, and our current read pointer is ahead of the current message pointer, + // DONE. + + // 9. Read the messsage at the current read pointer. + + // 10. Advance the current read pointer to the next message (advance by "stress message header size + pointer size * number of arguments"). + + // 11. Go to step 3. +} + +bool IsPointerInStressLog(StressLogData stressLog, TargetPointer pointer) +{ + // For all chunks in all thread stress logs, check if + // any pointer-aligned offset in the chunk's data has the value of 'pointer'. + // If found, return true. +} + +// This method is a helper for the various specific versions. +protected TargetPointer GetFormatPointer(ulong formatOffset) +{ + if (Target.ReadGlobal(Constants.Globals.StressLogHasModuleTable) == 0) + { + StressLog stressLog = new(Target, target.ReadGlobalPointer(Constants.Globals.StressLog)); + return new TargetPointer(stressLog.ModuleOffset + formatOffset); + } + + TargetPointer moduleTable = target.ReadGlobalPointer(Constants.Globals.StressLogModuleTable); + uint moduleEntrySize = target.GetTypeInfo(DataType.StressLogModuleDesc).Size!.Value; + uint maxModules = target.ReadGlobal(Constants.Globals.StressLogMaxModules); + for (uint i = 0; i < maxModules; ++i) + { + StressLogModuleDesc module = new(Target, moduleTable + i * moduleEntrySize); + ulong relativeOffset = formatOffset - cumulativeOffset; + if (relativeOffset < module.Size.Value) + { + return new TargetPointer((ulong)module.BaseAddress + relativeOffset); + } + cumulativeOffset += module.Size.Value; + } + + return TargetPointer.Null; +} +``` + +A StressLog message, represented by a `StressMsgData` struct, can be formatted as though the null-terminated UTF-8 string located at `FormatString` is a `printf`-style format string, with all arguments located at `Args`. Additionally, the following special format specifiers are supported: + +| Format Specifier | Argument Type | Description | +| --- | --- | --- | +| `%pT` | pointer | A `TypeHandle`, accessible through the `GetTypeHandle` API in the [RuntimeTypeSystem contract](./RuntimeTypeSystem.md), possibly with bits of the `ObjectToMethodTableUnmask` data contract global variable set. | +| `%pM` | pointer | A `MethodDescHandle`, accessible through the `GetMethodDescHandle` API in the [RuntimeTypeSystem contract](./RuntimeTypeSystem.md) | +| `%pV` | pointer | A pointer to an unmanaged symbol in the image. | +| `%pK` | pointer | A pointer to an offset from a symbol in the image, generally representing an IP in a stack trace. | + +## Version 1 + +Version 1 stress logs are included in any .NET runtime version corresponding to an SOS breaking change version of 0, 1, 2, or 3, or a memory-mapped version of `0x00010001`. +SOS breaking change versions of 0, 1, or 2 do not have a module table. SOS breaking change version 3 logs and memory mapped logs have a module table. + +These functions implement additional logic required for the shared contract implementation above. + +The message header data is stored in the following format: + +```c++ +struct +{ + uint32_t numberOfArgsLow : 3; + uint32_t formatOffset : 26; + uint32_t numberOfArgsHigh : 3; + uint32_t facility; + uint64_t timeStamp; +}; +``` + +The format offset refers to the offset from the module offset on the stress log. + +```csharp +StressMsgData GetStressMsgData(StressMsg msg) +{ + uint pointerSize = Target.GetTypeInfo(DataType.pointer).Size!.Value; + uint payload = Target.Read(msg.Header); + int numArgs = (int)((payload & 0x7) | ((payload >> 29) & 0x7)); + var args = new TargetPointer[numArgs]; + for (int i = 0; i < numArgs; i++) + { + args[i] = Target.ReadPointer((ulong)msg.Args + (ulong)(i * pointerSize)); + } + + return new StressMsgData( + Facility: Target.Read((ulong)msg.Header + 4), + FormatString: GetFormatPointer(((payload >> 3) & ((1 << 26) - 1))), + Timestamp: Target.Read((ulong)msg.Header + 8), + Args: args); +} +``` + +## Version 2 + +Version 2 stress logs are included in any .NET runtime version corresponding to an SOS breaking change version of 4 or a memory-mapped version of `0x00010002`. +SOS breaking change version 4 stress logs and memory mapped stress logs will have a module table. + +These functions implement additional logic required for the shared contract implementation above. + +The message header data is stored in the following format: + +```c++ +struct +{ + static const size_t formatOffsetLowBits = 26; + static const size_t formatOffsetHighBits = 13; + + uint64_t facility: 32; + uint64_t numberOfArgs : 6; + uint64_t formatOffsetLow: formatOffsetLowBits; + uint64_t formatOffsetHigh: formatOffsetHighBits; + uint64_t timeStamp: 51; +}; +``` + +The format offset refers to the cummulative offset into a module referred to in the module table. + +```csharp +StressMsgData GetStressMsgData(StressMsg msg) +{ + StressLog stressLog = new(Target, target.ReadGlobalPointer(Constants.Globals.StressLog)); + uint pointerSize = Target.GetTypeInfo(DataType.pointer).Size!.Value; + + ulong payload1 = target.Read(msg.Header); + ulong payload2 = target.Read((ulong)msg.Header + 8); + int numArgs = (int)((payload1 >> 32) & ((1 << 6) - 1)); + var args = new TargetPointer[numArgs]; + for (int i = 0; i < numArgs; i++) + { + args[i] = target.ReadPointer((ulong)msg.Args + (ulong)(i * pointerSize)); + } + ulong formatOffset = ((payload1 >> 38) & ((1 << 26) - 1)) | ((payload2 & ((1ul << 13) - 1)) << 26); + + return new StressMsgData( + Facility: (uint)payload1, + FormatString: GetFormatPointer(formatOffset), + Timestamp: payload2 >> 13, + Args: args); +} +``` diff --git a/docs/design/datacontracts/contract-descriptor.md b/docs/design/datacontracts/contract-descriptor.md index fbd58eb33eb9..b2388b1ec7d0 100644 --- a/docs/design/datacontracts/contract-descriptor.md +++ b/docs/design/datacontracts/contract-descriptor.md @@ -83,7 +83,8 @@ a JSON integer constant. "globals": { "FEATURE_COMINTEROP": 0, - "s_pThreadStore": [ 0 ] // indirect from pointer data offset 0 + "s_pThreadStore": [ 0 ], // indirect from pointer data offset 0 + "RuntimeID": "win-x64" // string value }, "contracts": {"Thread": 1, "GCHandle": 1, "ThreadStore": 1} } diff --git a/docs/design/datacontracts/data_descriptor.md b/docs/design/datacontracts/data_descriptor.md index 1338e1ae87aa..9880f225c741 100644 --- a/docs/design/datacontracts/data_descriptor.md +++ b/docs/design/datacontracts/data_descriptor.md @@ -212,26 +212,60 @@ The global values will be in an array, with each value described by a dictionary * `"name": "global value name"` the name of the global value * `"type": "type name"` the type of the global value -* optional `"value": VALUE | [ int ] | "unknown"` the value of the global value, or an offset in an auxiliary array containing the value or "unknown". +* optional `"value": ` where `` is defined below + + +Numeric constants must be within the range of the type of the global value. If a constant is out of range, behavior is undefined. -The `VALUE` may be a JSON numeric constant integer or a string containing a signed or unsigned -decimal or hex (with prefix `0x` or `0X`) integer constant. The constant must be within the range -of the type of the global value. **Compact format**: The global values will be in a dictionary, with each key being the name of a global and the values being one of: -* `[VALUE | [int], "type name"]` the type and value of a global -* `VALUE | [int]` just the value of a global +* `[, "type name"]` the type and value of a global +* `` just the value of a global -As in the regular format, `VALUE` is a numeric constant or a string containing an integer constant. +Where `` is defined as below. + +Numeric constants must be within the range of the type of the global value. If a constant is out of range, behavior is undefined. Note that a two element array is unambiguously "type and value", whereas a one-element array is unambiguously "indirect value". + **Both formats** +#### Specification Appendix + +``` + ::= | + ::= [ ] + ::= | + ::= | | + + is any JSON string element + is any JSON number element + is a which can be parsed as a hexadecimal number prefixed with "0x" or "0X" + is a which can be parsed as a decimal number. +``` + +#### Parsing Rules +`` is parsed as a numeric value. +`` and `` can be parsed as either a string or numeric value. +`` (that does not form a valid hex or decimal number) is parsed as a string. + +Example using compact format: +```json +{ + "int" : 1234, // Can only be parsed as numeric constant 1234 + "stringyInt" : "1234", // Can be parsed as 1234 or "1234" + "stringyHex" : "0x1234", // Can be parsed as 4660 (0x1234 in decimal) or "0x1234" + "stringValue" : "Hello World" // Can only be parsed as "Hello World" +} +``` + +#### Typing + For pointer and nuint globals, the value may be assumed to fit in a 64-bit unsigned integer. For nint globals, the value may be assumed to fit in a 64-bit signed integer. @@ -239,6 +273,8 @@ Note that the logical descriptor does not contain "unknown" values: it is expect in-memory data descriptor will augment the baseline with a known offset for all fields in the baseline. +#### Indirect Types + If the value is given as a single-element array `[ int ]` then the value is stored in an auxiliary array that is part of the data contract descriptor. Only in-memory data descriptors may have indirect values; baseline data descriptors may not have indirect values. @@ -251,7 +287,6 @@ The indirection array is not part of the data descriptor spec. It is part of th descriptor](./contract_descriptor.md#Contract_descriptor). - ## Example This is an example of a baseline descriptor for a 64-bit architecture. Suppose it has the name `"example-64"` @@ -288,7 +323,7 @@ The baseline is given in the "regular" format. ], "globals": [ { "name": "FEATURE_EH_FUNCLETS", "type": "uint8", "value": "0" }, // baseline defaults value to 0 - { "name": "FEATURE_COMINTEROP", "type", "uint8", "value": "1"}, + { "name": "FEATURE_COMINTEROP", "type": "uint8", "value": "1"}, { "name": "s_pThreadStore", "type": "pointer" } // no baseline value ] } @@ -308,7 +343,8 @@ The following is an example of an in-memory descriptor that references the above "globals": { "FEATURE_COMINTEROP": 0, - "s_pThreadStore": [ 0 ] // indirect from aux data offset 0 + "s_pThreadStore": [ 0 ], // indirect from aux data offset 0 + "RuntimeID": "windows-x64" } } ``` @@ -332,6 +368,7 @@ And the globals will be: | FEATURE_COMINTEROP | uint8 | 0 | | FEATURE_EH_FUNCLETS | uint8 | 0 | | s_pThreadStore | pointer | 0x0100ffe0 | +| RuntimeID | string |"windows-x64"| The `FEATURE_EH_FUNCLETS` global's value comes from the baseline - not the in-memory data descriptor. By contrast, `FEATURE_COMINTEROP` comes from the in-memory data descriptor - with the diff --git a/docs/design/features/OsrDetailsAndDebugging.md b/docs/design/features/OsrDetailsAndDebugging.md index 3ca35c2b9198..e1080fbc8bd7 100644 --- a/docs/design/features/OsrDetailsAndDebugging.md +++ b/docs/design/features/OsrDetailsAndDebugging.md @@ -307,8 +307,6 @@ On Arm64 we have epilog unwind codes and the second SP adjust does not appear to OSR funclets are more or less normal funclets. -On Arm64, to satisfy PSPSym reporting constraints, the funclet frame must be padded to include the Tier0 frame size. This is conceptually similar to the way the funclet frames also pad for homed varargs arguments -- in both cases the padded space is never used, it is just there to ensure the PSPSym ends up at the same caller-SP relative offset for the main function and any funclet. - #### OSR Unwind Info On x64 the prolog unwind includes a phantom SP adjustment at offset 0 for the Tier0 frame. diff --git a/docs/design/features/globalization-invariant-mode.md b/docs/design/features/globalization-invariant-mode.md index 0b71ffc992d3..d60c4be2a9ef 100644 --- a/docs/design/features/globalization-invariant-mode.md +++ b/docs/design/features/globalization-invariant-mode.md @@ -35,7 +35,7 @@ Note: On Linux, .NET Core relies on globalization data from ICU. For example, [. ## Cultures and culture data -When enabling the invariant mode, all cultures behave like the invariant culture. The invariant culture has the following characteristics: +When enabling the invariant mode, the behavior depends on the [PredefinedCulturesOnly](https://learn.microsoft.com/en-us/dotnet/core/runtime-config/globalization#predefined-cultures) setting. When `true` (the default), creation of any culture except the invariant culture is disallowed. When `false`, all cultures behave like the invariant culture. The invariant culture has the following characteristics: * Culture names (English, native display, ISO, language names) will return invariant names. For instance, when requesting culture native name, you will get "Invariant Language (Invariant Country)". * All cultures LCID will have value 0x1000 (which means Custom Locale ID). The exception is the invariant cultures which will still have 0x7F. diff --git a/docs/design/features/host-runtime-information.md b/docs/design/features/host-runtime-information.md index d2d96ff6ada8..b4210293081c 100644 --- a/docs/design/features/host-runtime-information.md +++ b/docs/design/features/host-runtime-information.md @@ -80,6 +80,10 @@ List of directory paths corresponding to shared store paths and additional probi ### Single-file +`BUNDLE_EXTRACTION_PATH` + +**Added in .NET 10** Path to extraction directory, if the single-file bundle extracted any files. This is used by the runtime to search for native libraries associated with bundled managed assemblies. + `BUNDLE_PROBE` Hex string representation of a function pointer. It is set when running a single-file application. The function is called by the runtime to look for assemblies bundled into the application. The expected signature is defined as `BundleProbeFn` in [`coreclrhost.h`](/src/coreclr/hosts/inc/coreclrhost.h) diff --git a/docs/design/features/host-tracing.md b/docs/design/features/host-tracing.md index ff05afa23a5a..61f637f123ad 100644 --- a/docs/design/features/host-tracing.md +++ b/docs/design/features/host-tracing.md @@ -31,6 +31,8 @@ Starting with .NET Core 3, tracing can be redirected and its verbosity controlle * `COREHOST_TRACE_VERBOSITY=3` shows errors, warnings, and info * `COREHOST_TRACE_VERBOSITY=4` shows errors, warnings, info, and verbose. (currently the default and maximum level of detail) +In .NET 10 and above, if `COREHOST_TRACEFILE` is set to a directory that exists, the host will trace to a file named `..log` in that directory. If the specified path does not exist or is not a directory, tracing behaves the same as before .NET 10. + ## Error routing The host components implement two routes for outputting errors: diff --git a/docs/design/features/hw-intrinsics.md b/docs/design/features/hw-intrinsics.md index 5234b366528e..853b51f712c3 100644 --- a/docs/design/features/hw-intrinsics.md +++ b/docs/design/features/hw-intrinsics.md @@ -18,7 +18,7 @@ The hardware intrinsics operate on and produce both primitive types (`int`, `flo ### Platform-agnostic vector types -The vector types supported by one or more target ISAs are supported across platforms, though they extent to which operations on them are available and accelerated is dependent on the target ISA. These are: +The vector types supported by one or more target ISAs are supported across platforms, though the extent to which operations on them are available and accelerated is dependent on the target ISA. These are: * `Vector64` - A 64-bit vector of type `T`. For example, a `Vector64` would hold two 32-bit integers. * Note that `Vector64` intrinsics are currently supported only on Arm64, and these are not supported for `double`. Support could be added for this, but would require additional handling. @@ -51,7 +51,7 @@ The JIT depends on the VM and configuration settings to determine what target pl Hardware intrinsics are built on RyuJIT's `NamedIntrinsic` mechanism to identify method calls that should be recognized as intrinsics (see https://github.com/dotnet/runtime/blob/main/src/coreclr/jit/namedintrinsiclist.h). In the incoming IL, intrinsic invocations are just method calls, so the JIT must distinguish intrinsic calls from ordinary call-sites and map them to its IR representation: the `GenTreeHWIntrinsic` node. -The [Intrinsic] attribute was added to eliminate the need to check each call-site. It [Intrinsic] attribute has a different meaning on each attribute target: +The [Intrinsic] attribute was added to eliminate the need to check each call-site. It has a different meaning on each attribute target: * Method: call targets marked with [Intrinsic] will be checked by the JIT when importing call-sites. If the method's (namespace, class name, method name) triple matches a record in the Hardware Intrinsics Table, it will be recognized as an intrinsic call. @@ -98,7 +98,7 @@ The register allocator has three main passes. The `LinearScan::buildNode` method is responsible for identifying all register references in the IR, and constructing the `RefPosition`s that represent those references, for each node. For hardware intrinsics it delegates this function to `LinearScan::buildHWIntrinsic()` and the `LinearScan::getKillSetForHWIntrinsic()` method is responsible for generating kill `RefPositions` for these nodes. -The other thing to be aware of is that the calling convention for large vectors (256-bit vectors on x86, and 128-bit vectors on Arm64) does not preserve the upper half of the callee-save vector registers. As a result, this require some special modeling in the register allocator. See the places where `FEATURE_PARTIAL_SIMD_CALLEE_SAVE` appears in the code. This code, fortunately, requires little differentiation between the two platforms. +The other thing to be aware of is that the calling convention for large vectors (256-bit vectors on x86, and 128-bit vectors on Arm64) does not preserve the upper half of the callee-save vector registers. As a result, this requires some special modeling in the register allocator. See the places where `FEATURE_PARTIAL_SIMD_CALLEE_SAVE` appears in the code. This code, fortunately, requires little differentiation between the two platforms. ## Code Generation diff --git a/docs/design/specs/Ecma-335-Augments.md b/docs/design/specs/Ecma-335-Augments.md index fd8a0fd93ca8..2f9ae5cb995c 100644 --- a/docs/design/specs/Ecma-335-Augments.md +++ b/docs/design/specs/Ecma-335-Augments.md @@ -1063,15 +1063,60 @@ In order to maintain alignment, if the field needs alignment to be preserved, th ## Checked user-defined operators -Section "I.10.3.1 Unary operators" of ECMA-335 adds *op_CheckedIncrement*, *op_CheckedDecrement*, *op_CheckedUnaryNegation* as the names for methods implementing checked `++`, `--` and `-` unary operators. +Section "I.10.3.1 Unary operators" is reworded to include the support for instance form operators: +```diff +- Unary operators take one operand, perform some operation on it, and return the result. They are +- represented as static methods on the class that defines the type of their one operand. Table I.4: +- Unary Operator Names shows the names that are defined. ++ Unary operators take one operand and perform some operation on it. They are exposed on the class ++ that defines the type of their one operand and are represented as either static methods which ++ return the result or as void returning instance methods which take the first operand as the this ++ pointer and which mutates that operand directly. "Table I.4: Unary Operator Names" shows the names ++ that are defined. +``` -Section "I.10.3.2 Binary operators" of ECMA-335 adds *op_CheckedAddition*, *op_CheckedSubtraction*, -*op_CheckedMultiply*, *op_CheckedDivision* as the names for methods implementing checked `+`, `-`, `*`, and `/` binary operators. +"Table I.4: Unary Operator Names" is expanded to include a third column, "Method Kind", which indicates whether the operator is represented as an instance or a static method. All existing table entries should have this column set to "static". The following additional entries are added to the table: +> | Name | ISO/IEC 14882:20003 C++ Operator Symbol | Method Kind | +> | ------------------------------- | --------------------------------------- | ----------- | +> | *op_CheckedIncrement* | Similar to `++`1, 3 | static | +> | *op_CheckedDecrement* | Similar to `--`1, 3 | static | +> | *op_CheckedUnaryNegation* | `-` (unary)3 | static | +> | *op_DecrementAssignment* | Similar to `++`4 | instance | +> | *op_IncrementAssignment* | Similar to `--`4 | instance | +> | *op_CheckedDecrementAssignment* | Similar to `++`3, 4 | instance | +> | *op_CheckedIncrementAssignment* | Similar to `--`3, 4 | instance | +> +> 3 A checked operator is expected to throw an exception when the result of an operation is too large to represent in the destination type. What does it mean to be too large actually depends on the nature of the destination type. Typically the exception thrown is `System.OverflowException`. +> +> 4 Unlike 1, these methods increment or decrement their operand directly and so better match the pure C++ point of view. + +Section "I.10.3.2 Binary operators" is reworded to include the support for instance form operators: +```diff +- Binary operators take two operands, perform some operation on them, and return a value. They +- are represented as static methods on the class that defines the type of one of their two operands. +- Table I.5: Binary Operator Names shows the names that are defined. ++ Binary operators take two operands and perform some operation on them. They are exposed on the class ++ that defines the type of one of their two operands and are represented as either static methods which ++ return the result or as void returning instance methods which take the first operand as the `this` ++ pointer and which mutates that operand directly. "Table I.5: Binary Operator Names" shows the names ++ that are defined. +``` -Section "I.10.3.3 Conversion operators" of ECMA-335 adds *op_CheckedExplicit* as the name for a method -implementing checked explicit conversion operator. +"Table I.5: Binary Operator Names" is expanded to include a third column, "Method Kind", which indicates whether the operator is represented as an instance or a static method. All existing table entries should have this column set to "static". Table entries where the name column ends with "Assignment" should have this column set to "static or instance". The following additional entries are added to the table: +> | Name | ISO/IEC 14882:20003 C++ Operator Symbol | Method Kind | +> | ------------------------------------ | --------------------------------------- | ------------------ | +> | *op_CheckedAddition* | + (binary)1 | static | +> | *op_CheckedSubtraction* | - (binary)1 | static | +> | *op_CheckedMultiply* | \* (binary)1 | static | +> | *op_CheckedDivision* | /1 | static | +> | *op_CheckedAdditionAssignment* | +=1 | static or instance | +> | *op_CheckedSubtractionAssignment* | -=1 | static or instance | +> | *op_CheckedMultiplicationAssignment* | \*=1 | static or instance | +> | *op_CheckedDivisionAssignment* | /=1 | static or instance | +> +> 1 A checked operator is expected to throw an exception when the result of an operation is too large to represent in the destination type. What does it mean to be too large actually depends on the nature of the destination type. Typically the exception thrown is `System.OverflowException`. -A checked user-defined operator is expected to throw an exception when the result of an operation is too large to represent in the destination type. What does it mean to be too large actually depends on the nature of the destination type. Typically the exception thrown is a System.OverflowException. +Section "I.10.3.3 Conversion operators" of ECMA-335 adds *op_CheckedExplicit* as the name for a method implementing checked explicit conversion operator. ## Atomic reads and writes diff --git a/docs/design/specs/Memory-model.md b/docs/design/specs/Memory-model.md index 32369c40f54c..28aeb475f1ec 100644 --- a/docs/design/specs/Memory-model.md +++ b/docs/design/specs/Memory-model.md @@ -55,7 +55,7 @@ As a consequence: The practical motivations for these rules are: - We can't allow speculative writes as we consider changing the value to be observable, thus effects of a speculative write may not be possible to undo. - A read cannot be re-done, since it could fetch a different value and thus introduce a data race that the program did not have. -- Reading from a variable and not observing sideeffects of the read is the same as not performing a read, thus unused reads can be removed. +- Reading from a variable and not observing side-effects of the read is the same as not performing a read, thus unused reads can be removed. - Coalescing of adjacent ordinary memory accesses to the same location is ok because most programs do not rely on presence of data races thus, unlike introducing, removing data races is ok. Programs that do rely on observing data races shall use `volatile` accesses. ## Thread-local memory accesses @@ -85,10 +85,10 @@ The effects of ordinary reads and writes can be reordered as long as that preser - `System.Threading.Volatile.WriteBarrier` (applies to all following writes) - Releasing a lock (`System.Threading.Monitor.Exit` or leaving a synchronized method, applies to all following writes) -* **volatile. initblk** has "release semantics" - the effects of `.volatile initblk` will not be observable earlier than the effects of preceeding reads and writes. +* **volatile. initblk** has "release semantics" - the effects of `.volatile initblk` will not be observable earlier than the effects of preceding reads and writes. * **volatile. cpblk** combines ordering semantics of a volatile read and write with respect to the read and written memory locations. - - The writes performed by `volatile. cpblk` will not be observable earlier than the effects of preceeding reads and writes. + - The writes performed by `volatile. cpblk` will not be observable earlier than the effects of preceding reads and writes. - No read or write that is later in the program order may be speculatively executed before the reads performed by `volatile. cpblk` - `cpblk` may be implemented as a sequence of reads and writes. The granularity and mutual order of such reads and writes is unspecified. @@ -138,7 +138,7 @@ The motivation is to ensure that storing an object reference to shared memory ac The reading thread does not need to perform an acquiring read before accessing the content of an instance since runtime guarantees ordering of data-dependent reads. The ordering side-effects of reference assignment should not be used for general ordering purposes because: -- independent nonvolatile reference assignments could be reordered by the compiler. +- independent non-volatile reference assignments could be reordered by the compiler. - an optimizing compiler can omit the release semantics if it can prove that the instance is not shared with other threads. There was a lot of ambiguity around the guarantees provided by object assignments. Going forward the runtimes will only provide the guarantees described in this document. @@ -169,7 +169,7 @@ That applies even for locations targeted by overlapping aligned reads and writes Either the platform defaults to release consistency or stronger (that is, x64 is TSO, which is stronger), or provides means to implement release consistency via fencing operations. * It is possible to guarantee ordering of data-dependent reads. -Either the platform honors data dependedncy by default (all currently supported platforms), or provides means to order data-dependent reads via fencing operations. +Either the platform honors data dependency by default (all currently supported platforms), or provides means to order data-dependent reads via fencing operations. ## Examples and common patterns The following examples work correctly on all supported implementations of .NET runtime regardless of the target OS or architecture. diff --git a/docs/design/specs/PortablePdb-Metadata.md b/docs/design/specs/PortablePdb-Metadata.md index 5ccfda76b79a..82d19dc7ba38 100644 --- a/docs/design/specs/PortablePdb-Metadata.md +++ b/docs/design/specs/PortablePdb-Metadata.md @@ -26,6 +26,7 @@ The ECMA-335-II standard is amended by an addition of the following tables to th * [SourceLink](#source-link-c-and-vb-compilers) * [CompilationMetadataReferences](#compilation-metadata-references-c-and-vb-compilers) * [CompilationOptions](#compilation-options-c-and-vb-compilers) + * [TypeDefinitionDocument](#type-definition-document-c-and-vb-compilers) Debugging metadata tables may be embedded into type system metadata (and part of a PE file), or they may be stored separately in a metadata blob contained in a .pdb file. In the latter case additional information is included that connects the debugging metadata to the type system metadata. @@ -598,3 +599,22 @@ The order of the options in the list is insignificant. > The purpose of this data is to allow a tool to reconstruct the compilation the module was built from. > The source files for the compilation are expected to be recovered from the source server using [SourceLink](#source-link-c-and-vb-compilers) and/or from [sources embedded](#embedded-source-c-and-vb-compilers) in the PDB. > The metadata references for the compilation are expected to be recovered from a file indexing service (e.g. symbol server) using information in [Compilation Metadata References](#compilation-metadata-references-c-and-vb-compilers) record. + +##### Type Definition Document (C# and VB compilers) +Parent: TypeDef + +Kind: {932E74BC-DBA9-4478-8D46-0F32A7BAB3D3} + +Stores document information for type definitions that would otherwise not have document information stored through other means, for example in the [MethodDebugInformation](#methoddebuginformation-table-0x31) table. + +The blob has the following structure: + + Blob ::= (document-ordinal)* + +| terminal | encoding | description | +|:-----------------|:----------------------------|:---------------------------------------| +| document-ordinal | compressed unsigned integer | row id of the Document table. | + +> The purpose of this data is to allow a tool navigate to the source of a type definition where there would +> otherwise not be any data available. For example an empty class definition, or an enum, where there are +> no methods to provide the data in the `MethodDebugInformation` table. diff --git a/docs/design/specs/runtime-async.md b/docs/design/specs/runtime-async.md index 0929c6d931ad..25e8c77478ae 100644 --- a/docs/design/specs/runtime-async.md +++ b/docs/design/specs/runtime-async.md @@ -18,14 +18,14 @@ Applicability of `MethodImplOptions.Async`: * The `[MethodImpl(MethodImplOptions.Async)]` only has effect when applied to method definitions with CIL implementation. * Async method definitions are only valid inside async-capable assemblies. An async-capable assembly is one which references a corlib containing an `abstract sealed class RuntimeFeature` with a `public const string` field member named `Async`. * Combining `MethodImplOptions.Async` with `MethodImplOptions.Synchronized` is invalid. -* Applying `MethodImplOptions.Async` to methods with `byref` or `ref-like` parameters is invalid. +* Applying `MethodImplOptions.Async` to methods with a `byref` or `ref-like` return value is invalid. * Applying `MethodImplOptions.Async` to vararg methods is invalid. _[Note: these rules operate before generic substitution, meaning that a method which only meets requirements after substitution would not be considered as valid.]_ Sync methods are all other methods. -Unlike sync methods, async methods support suspension. Suspension allows async methods to yield control flow back to their caller at certain well-defined suspension points, and resume execution of the remaining method at a later time or location, potentially on another thread. +Unlike sync methods, async methods support suspension. Suspension allows async methods to yield control flow back to their caller at certain well-defined suspension points, and resume execution of the remaining method at a later time or location, potentially on another thread. Suspension points are where suspension may occur, but suspension is not required if all Task-like objects are completed. Async methods also do not have matching return type conventions as sync methods. For sync methods, the stack should contain a value convertible to the stated return type before the `ret` instruction. For async methods, the stack should be empty in the case of `Task` or `ValueTask`, or the type argument in the case of `Task` or `ValueTask`. @@ -34,12 +34,12 @@ Async methods support suspension using one of the following methods: ```C# namespace System.Runtime.CompilerServices { - public static class RuntimeHelpers + public static class AsyncHelpers { [MethodImpl(MethodImplOptions.Async)] - public static void AwaitAwaiterFromRuntimeAsync(TAwaiter awaiter) where TAwaiter : INotifyCompletion { ... } + public static void AwaitAwaiter(TAwaiter awaiter) where TAwaiter : INotifyCompletion; [MethodImpl(MethodImplOptions.Async)] - public static void UnsafeAwaitAwaiterFromRuntimeAsync(TAwaiter awaiter) where TAwaiter : ICriticalNotifyCompletion + public static void UnsafeAwaitAwaiter(TAwaiter awaiter) where TAwaiter : ICriticalNotifyCompletion; [MethodImpl(MethodImplOptions.Async)] public static void Await(Task task); @@ -49,19 +49,27 @@ Async methods support suspension using one of the following methods: public static T Await(Task task); [MethodImpl(MethodImplOptions.Async)] public static T Await(ValueTask task); + + [MethodImpl(MethodImplOptions.Async)] + public static void Await(ConfiguredTaskAwaitable configuredAwaitable); + [MethodImpl(MethodImplOptions.Async)] + public static void Await(ConfiguredValueTaskAwaitable configuredAwaitable); + [MethodImpl(MethodImplOptions.Async)] + public static T Await(ConfiguredTaskAwaitable configuredAwaitable); + [MethodImpl(MethodImplOptions.Async)] + public static T Await(ConfiguredValueTaskAwaitable configuredAwaitable); } } ``` -These methods are only legal to call inside async methods. The `...AwaitAwaiter...` methods will have semantics analogous to the current `AsyncTaskMethodBuilder.AwaitOnCompleted/AwaitUnsafeOnCompleted` methods. After calling either method, it can be presumed that the task or awaiter has completed. The `Await` methods perform suspension like the `AwaitAwaiter...` methods, but are optimized for calling on the return value of a call to an async method. To achieve maximum performance, the IL sequence of two `call` instructions -- one to the async method and immediately one to the `Await` method -- should be preferred. +These methods are only legal to call inside async methods. The `...AwaitAwaiter` methods will have semantics analogous to the current `AsyncTaskMethodBuilder.AwaitOnCompleted/AwaitUnsafeOnCompleted` methods. After calling either method, it can be presumed that the task or awaiter has completed. The `Await` methods perform suspension like the `...AwaitAwaiter` methods, but are optimized for calling on the return value of a call to an async method. To achieve maximum performance, the IL sequence of two `call` instructions -- one to the async method and immediately one to the `Await` method -- should be preferred. -Only local variables which are "hoisted" may be used across suspension points. That is, only "hoisted" local variables will have their state preserved after returning from a suspension. On methods with the `localsinit` flag set, non-"hoisted" local variables will be initialized to their default value when resuming from suspension. Otherwise, these variables will have an undefined value. To identify "hoisted" local variables, they must have an optional custom modifier to the `System.Runtime.CompilerServices.HoistedLocal` class, which will be a new .NET runtime API. This custom modifier must be the last custom modifier on the variable. It is invalid for by-ref variables, or variables with a by-ref-like type, to be marked hoisted. Hoisted local variables are stored in managed memory and cannot be converted to unmanaged pointers without explicit pinning. -The code generator is free to ignore the `HoistedLocal` modifier if it can prove that this makes no observable difference in the execution of the generated program. This can be observable in diagnostics since it may mean the value of a local with the `HoistedLocal` modifier will not be available after certain suspension points. +Local variables used across suspension points are considered "hoisted." That is, only "hoisted" local variables will have their state preserved after returning from a suspension. By-ref variables may not be hoisted across suspension points, and any read of a by-ref variable after a suspension point will produce null. Structs containing by-ref variables will also not be hoisted across suspension points and will have their default value after a suspension point. +In the same way, pinning locals may not be "hoisted" across suspension points and will have `null` value after a suspension point. Async methods have some temporary restrictions with may be lifted later: * The `tail` prefix is forbidden * Usage of the `localloc` instruction is forbidden -* Pinning locals may not be marked `HoistedLocal` Other restrictions are likely to be permanent, including * By-ref locals cannot be hoisted across suspension points @@ -74,4 +82,4 @@ Other restrictions are likely to be permanent, including | Flag | Value | Description | | ------------- | ------------- | ------------- | | . . . | . . . | . . . | -|Async |0x0400 |Method is an Async Method.| +|Async |0x2000 |Method is an Async Method.| diff --git a/docs/project/glossary.md b/docs/project/glossary.md index d9cd40d4a606..5af037a3de19 100644 --- a/docs/project/glossary.md +++ b/docs/project/glossary.md @@ -15,6 +15,7 @@ terminology. | BBT | Microsoft internal early version of C/C++ PGO. See https://www.microsoft.com/windows/cse/bit_projects.mspx. | | BOTR | Book Of The Runtime. | | BCL | Base Class Library. A set of `System.*` (and to a limited extent `Microsoft.*`) libraries that make up the lower layer of the .NET library stack. See CoreFX. | +| CDAC | Codename for [Data Contracts](https://github.com/dotnet/runtime/blob/main/docs/design/datacontracts/datacontracts_design.md) | | CIL | Common Intermediate Language. Equivalent to IL, also equivalent to [MSIL](https://learn.microsoft.com/dotnet/standard/managed-execution-process#compiling-to-msil). | | CLI | Command Line Interface, or Common Language Infastructure. | | CLR | [Common Language Runtime](https://learn.microsoft.com/dotnet/standard/clr). | diff --git a/docs/project/list-of-diagnostics.md b/docs/project/list-of-diagnostics.md index 56aac0743803..e203738bcac9 100644 --- a/docs/project/list-of-diagnostics.md +++ b/docs/project/list-of-diagnostics.md @@ -112,9 +112,10 @@ The PR that reveals the implementation of the ` are obsolete. Use the new ones that take an IComparer\. | ## Analyzer Warnings @@ -185,7 +186,7 @@ The diagnostic id values reserved for .NET Libraries analyzer warnings are `SYSL | __`SYSLIB1059`__ | Marshaller type does not support allocating constructor | | __`SYSLIB1060`__ | Specified marshaller type is invalid | | __`SYSLIB1061`__ | Marshaller type has incompatible method signatures | -| __`SYSLIB1062`__ | Project must be updated with 'true' | +| __`SYSLIB1062`__ | Project must be updated with '\true\' | | __`SYSLIB1063`__ | _`SYSLIB1063`-`SYSLIB1069` reserved for Microsoft.Interop.LibraryImportGenerator._ | | __`SYSLIB1064`__ | _`SYSLIB1063`-`SYSLIB1069` reserved for Microsoft.Interop.LibraryImportGenerator._ | | __`SYSLIB1065`__ | _`SYSLIB1063`-`SYSLIB1069` reserved for Microsoft.Interop.LibraryImportGenerator._ | @@ -314,3 +315,4 @@ Diagnostic id values for experimental APIs must not be recycled, as that could s | __`SYSLIB5003`__ | .NET 9 | TBD | `System.Runtime.Intrinsics.Arm.Sve` is experimental | | __`SYSLIB5004`__ | .NET 9 | TBD | `X86Base.DivRem` is experimental since performance is not as optimized as `T.DivRem` | | __`SYSLIB5005`__ | .NET 9 | TBD | `System.Formats.Nrbf` is experimental | +| __`SYSLIB5006`__ | .NET 10 | TBD | Types for Post-Quantum Cryptography (PQC) are experimental. | diff --git a/docs/project/os-onboarding.md b/docs/project/os-onboarding.md index c233c5ebdfab..5e09ff38500d 100644 --- a/docs/project/os-onboarding.md +++ b/docs/project/os-onboarding.md @@ -8,20 +8,14 @@ This witticism is the underlying philosophy of our approach. By actively maintai > Users are best served when we act _quickly_ not _exhaustively_. - This double meaning is instructing us to be boldly pragmatic. Each new OS release brings a certain risk of breakage. The risk is far from uniform across the various repos and components that we maintain. Users are best served when we've developed 80% confidence and to leave the remaining (potential) 20% to bug reports. Exhaustive testing serves no one. We've also found that our users do a great job finding corner cases and enthusiastically participate in the process by opening issues in the appropriate repo. - Continuing with the idea of pragmatism, if you only read this far, you've got the basic idea. The rest of the doc describes more context and mechanics. References: +- [New Operating System Version Onboarding Guide](https://github.com/dotnet/dnceng/blob/main/Documentation/ProjectDocs/OS%20Onboarding/Guidance.md) - [.NET OS Support Tracking](https://github.com/dotnet/core/issues/9638) -- [.NET Support](https://github.com/dotnet/core/blob/main/support.md) -- [Prereq container image lifecycle](https://github.com/dotnet/dotnet-buildtools-prereqs-docker/blob/main/lifecycle.md) -- [Support for Linux Distros](https://dev.azure.com/dnceng/internal/_wiki/wikis/DNCEng%20Services%20Wiki/940/Support-for-Linux-Distros) (MS internal) -- [Support for Apple Operating Systems](https://dev.azure.com/dnceng/internal/_wiki/wikis/DNCEng%20Services%20Wiki/933/Support-for-Apple-Operating-Systems-(macOS-iOS-and-tvOS)) (MS internal) -- [Support for Windows Operating Systems](https://dev.azure.com/dnceng/internal/_wiki/wikis/DNCEng%20Services%20Wiki/939/Support-for-Windows-Operating-Systems) (MS internal) ## Context @@ -33,7 +27,7 @@ Nearly all the APIs that touch native code (networking, cryptography) and deal w Our rule is that we declare support (for all [supported .NET releases](https://github.com/dotnet/core/blob/main/releases.md)) for a new OS version after it is validated in dotnet/runtime `main`. We will only hold support on additional testing in special cases (which are uncommon). -We aim to have "day of" support for about half the OSes we support, including Azure Linux, Ubuntu LTS, and Windows. This means we need to perform ahead-of-time signoff on non-final builds. +We aim to have "day of" support for about half the OSes we support, including Azure Linux, Ubuntu LTS, and Windows. This means we need to perform ahead-of-time signoff on [non-final builds](https://github.com/dotnet/runtime/pull/111768#issuecomment-2617229139). Our testing philosophy is based on perceived risk and past experience. The effective test matrix is huge, the product of OSes \* supported versions \* architectures. We try to make smart choices to **skip testing most of the matrix** while retaining much of the **practical coverage**. We also know where we tend to get bitten most when we don't pay sufficient attention. For example, our bug risk across Linux, macOS, and Windows is not uniform. @@ -98,7 +92,8 @@ Installing/building the Helix client can be quite involved, particularly for Arm Container images are referenced in our pipeline files: - [eng/pipelines/coreclr/templates/helix-queues-setup.yml](https://github.com/dotnet/runtime/blob/main/eng/pipelines/coreclr/templates/helix-queues-setup.yml) -- [eng/pipelines/libraries/helix.yml](https://github.com/dotnet/runtime/blob/main/eng/pipelines/libraries/helix.yml) +- [eng/pipelines/installer/helix-queues-setup.yml](https://github.com/dotnet/runtime/blob/main/eng/pipelines/installer/helix-queues-setup.yml) +- [eng/pipelines/libraries/helix-queues-setup.yml](https://github.com/dotnet/runtime/blob/main/eng/pipelines/libraries/helix-queues-setup.yml) - [eng/pipelines/common/templates/pipeline-with-resources.yml](https://github.com/dotnet/runtime/blob/main/eng/pipelines/common/templates/pipeline-with-resources.yml) Notes: @@ -125,7 +120,9 @@ VMs and raw metal environments are used for Android, Apple, Linux, and Windows O ### Other -Other environments are typically use a custom process. +Other environments have their own process. + +- [Android](../workflow/testing/libraries/testing-android.md#upgrading-the-android-ndk-version-in-ci-pipelines) - [Browser Wasm](https://github.com/dotnet/runtime/pull/112066) diff --git a/docs/project/updating-jitinterface.md b/docs/project/updating-jitinterface.md index 56ced8bc0942..571edab9a0ab 100644 --- a/docs/project/updating-jitinterface.md +++ b/docs/project/updating-jitinterface.md @@ -2,8 +2,6 @@ JitInterface is the binary interface that is used to communicate with the JIT. The bulk of the interface consists of the ICorStaticInfo and ICorDynamicInfo interfaces and enums/structs used by those interfaces. -Following header files define parts of the JIT interface: cordebuginfo.h, corinfo.h, corjit.h, corjitflags.h, corjithost.h. - The JitInterface serves two purposes: * Standardizes the interface between the runtime and the JIT (potentially allowing mixing and matching JITs and runtimes) * Allows the JIT to be used elsewhere (outside of the runtime) @@ -12,12 +10,22 @@ There are several components that consume the JIT outside of the runtime. Since The JitInterface is versioned by a GUID. Any change to JitInterface is required to update the JitInterface GUID located in jiteeversionguid.h (look for `JITEEVersionIdentifier`). Not doing so has consequences that are sometimes hard to debug. -If a method was added or modified in ICorStaticInfo or ICorDynamicInfo, port the change to src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt. Functions must be in the same order in ThunkInput.txt as they exist in corinfo.h and corjit.h. Run gen.bat or gen.sh to regenerate all \*_generated.\* files from ThunkInput.txt. Provide a managed implementation of the method in CorInfoImpl.cs. +## Adding a new JIT-VM API manually -## Porting JitInterface changes to crossgen2 +It's a good idea to choose an existing API that is similar to the one you want to add and use it as a template. The following steps are required to add a new JIT-VM API: -Crossgen2 is the AOT compiler for CoreCLR. It generates native code for .NET apps ahead of time and uses the JIT to do that. Since crossgen2 is written in managed code, it doesn't consume the C++ headers and maintains a managed copy of them. Changes to JitInterface need to be ported managed code. +1) Start from adding a new entry in the `ThunkInput.txt` file. This file is used to generate the JIT-VM interface and is located in `src/coreclr/tools/Common/JitInterface/ThunkGenerator/`. For complex types, you may need to also configure type mapping in the beginning of the file. +2) Invoke the `gen.sh` script (or `gen.bat` on Windows) to update the auto-generated files `*_generated.*` and update the JIT-EE guid. +3) Open `src/coreclr/inc/corinfo.h` and add the new API in `ICorStaticInfo` +4) Open `src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs` and add the new API in `CorInfoImpl` class. If the implementation is not shared for NativeAOT and R2R, use `CorInfoImpl.RyuJit.cs` and `CorInfoImpl.ReadyToRun.cs` to implement the API. +5) Open `src/coreclr/vm/jitinterface.cpp` and add the CoreCLR-specific implementation +6) Open `lwmlist.h` and add a definition of "input-args" - "output-args" map. Either use the generic `DLD`-like structs or create new ones in `agnostic.h` +7) Open `src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h` and add the necessary recording, dumping, and replaying methods for the new API and then implement them in `methodcontext.cpp` +8) Update `enum mcPackets` in `src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h` to include an entry for the new API and bump the max value of the enum +9) Use the `rec*` and `rep*` methods in `src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp` and `src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp` accordingly -1. If an enum/struct was modified or added, port the change to CorInfoTypes.cs. -2. If a method was added or modified in ICorStaticInfo or ICorDynamicInfo, if the managed implementation is specific to CoreCLR ReadyToRun (and doesn't apply to full AOT compilation), provide the implementation in CorInfoImpl.ReadyToRun.cs instead or CorInfoImpl.cs. +## Adding a new JIT-VM API through an agent +[add-new-jit-ee-api.prompt.md](../../.github/prompts/add-new-jit-ee-api.prompt.md) contains a prompt that can be used to add a new JIT-VM API through an agent. Example usage in VSCode: +* Open the Copilot Chat Window +* Type "/add-new-jit-ee-api.prompt" and either hit enter and follow the instructions or provide the API signature directly. Gpt-4.1 and Claude Sonnet 4 or 3.7 are recommended for this task. diff --git a/docs/tools/illink/data-formats.md b/docs/tools/illink/data-formats.md index 5c0dc0ac4782..06490e3e3308 100644 --- a/docs/tools/illink/data-formats.md +++ b/docs/tools/illink/data-formats.md @@ -253,20 +253,6 @@ Entire method body is replaces with `throw` instruction when method is reference ``` -### Override static field value with a constant - -The `initialize` attribute is optional and when not specified the code to set the static field to the value will not be generated. - -```xml - - - - - - - -``` - ### Remove embedded resources ```xml diff --git a/docs/workflow/building/coreclr/android-studio-coreclr-debug-symbols-added.png b/docs/workflow/building/coreclr/android-studio-coreclr-debug-symbols-added.png new file mode 100644 index 000000000000..9056a16a4941 Binary files /dev/null and b/docs/workflow/building/coreclr/android-studio-coreclr-debug-symbols-added.png differ diff --git a/docs/workflow/building/coreclr/android-studio-coreclr-debug-symbols-adding.png b/docs/workflow/building/coreclr/android-studio-coreclr-debug-symbols-adding.png new file mode 100644 index 000000000000..0fd7d9f0f8ba Binary files /dev/null and b/docs/workflow/building/coreclr/android-studio-coreclr-debug-symbols-adding.png differ diff --git a/docs/workflow/building/coreclr/android-studio-coreclr-debugging.png b/docs/workflow/building/coreclr/android-studio-coreclr-debugging.png new file mode 100644 index 000000000000..9efddbe08242 Binary files /dev/null and b/docs/workflow/building/coreclr/android-studio-coreclr-debugging.png differ diff --git a/docs/workflow/building/coreclr/android.md b/docs/workflow/building/coreclr/android.md index 0195d00677e2..3a6769d062ac 100644 --- a/docs/workflow/building/coreclr/android.md +++ b/docs/workflow/building/coreclr/android.md @@ -1,100 +1,212 @@ -Cross Compilation for Android on Linux -====================================== +# Experimental support of CoreCLR on Android -Through cross compilation, on Linux it is possible to build CoreCLR for arm64 Android. +This is the internal documentation which outlines experimental support of CoreCLR on Android and includes instructions on how to: +- [Build CoreCLR for Android](./android.md#building-coreclr-for-android) +- [Build and run a sample application with CoreCLR](./android.md#building-and-running-a-sample-app) +- [Debug the sample app and the runtime](./android.md#debugging-the-runtime-and-the-sample-app) -Requirements ------------- +## Prerequisite -You'll need to generate a toolchain and a sysroot for Android. There's a script which takes care of the required steps. +- Download and install [OpenJDK 23](https://openjdk.org/projects/jdk/23/) +- Download and install [Android Studio](https://developer.android.com/studio/install) and the following: + - Android SDK (minimum supported API level is 21) + - Android NDK r27 -Generating the rootfs ---------------------- +> [!NOTE] +> Prerequisites can also be downloaded and installed manually: +> - by running the automated script as described in [Testing Libraries on Android](../../testing/libraries/testing-android.md#using-a-terminal) +> - by downloading the archives: +> - Android SDK - Download [command-line tools](https://developer.android.com/studio#command-line-tools-only) and use `sdkmanager` to download the SDK. +> - Android NDK - Download [NDK](https://developer.android.com/ndk/downloads) -To generate the rootfs, run the following command in the `coreclr` folder: +## Building CoreCLR for Android + +Supported host systems for building CoreCLR for Android: +- [MacOS](./android.md#macos-and-linux) ✔ +- [Linux](./android.md#macos-and-linux) ✔ +- [Windows](./android.md#windows) ❌ (only through WSL) + +Supported target architectures: +- x86 ❌ +- x64 ✔ +- arm ❌ +- arm64 ✔ + +### MacOS and Linux + +#### Requirements + +Set the following environment variables: + - ANDROID_SDK_ROOT=`` + - ANDROID_NDK_ROOT=`` + +#### Building the runtime, libraries and tools + +To build CoreCLR runtime, libraries and tools for local development, run the following command from ``: ``` -cross/init-android-rootfs.sh +./build.sh clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.packages+libs -os android -arch -c ``` -This will download the NDK and any packages required to compile Android on your system. It's over 1 GB of data, so it may take a while. +To build CoreCLR runtime NuGet packages, run the following command from ``: + +``` +./build.sh clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.packages+libs+host+packs -os android -arch -c +``` +> [!NOTE] +> The runtime packages will be located at: `/artifacts/packages//Shipping/` -Cross compiling CoreCLR ------------------------ -Once the rootfs has been generated, it will be possible to cross compile CoreCLR. +### Windows -When cross compiling, you need to set both the `CONFIG_DIR` and `ROOTFS_DIR` variables. +Building on Windows is not directly supported yet. However it is possible to use WSL2 for this purpose. -To compile for arm64, run: +#### WSL2 + +##### Requirements + +1. Install the Android SDK and NDK in WSL per the [prerequisites](#prerequisite). This can be done by downloading the archives or using Android Studio. +- In case of Android Studio: + - Make sure WSL is updated: from Windows host, `wsl --update` + - [Enabled systemd](https://devblogs.microsoft.com/commandline/systemd-support-is-now-available-in-wsl/#set-the-systemd-flag-set-in-your-wsl-distro-settings) + - `sudo snap install android-studio --classic` +2. Set the following environment variables: + - ANDROID_SDK_ROOT=`` + - ANDROID_NDK_ROOT=`` + +#### Building the runtime, libraries and tools + +To build CoreCLR runtime, libraries and tools, run the following command from ``: ``` -CONFIG_DIR=`realpath cross/android/arm64` ROOTFS_DIR=`realpath cross/android-rootfs/toolchain/arm64/sysroot` ./build.sh cross arm64 cmakeargs -DENABLE_LLDBPLUGIN=0 +./build.sh clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.packages+libs -os android -arch -c ``` -The resulting binaries will be found in `artifacts/bin/coreclr/Linux.BuildArch.BuildType/` +## Building and running a sample app + +To demonstrate building and running an Android sample application with CoreCLR, we will use: +- the [HelloAndroid sample app](../../../../src/mono/sample/Android/AndroidSampleApp.csproj). +- a functional tests [Android.Device_Emulator.JIT.Test](../../../../src/tests/FunctionalTests/Android/Device_Emulator/JIT/Android.Device_Emulator.JIT.Test.csproj) -Running the PAL tests on Android --------------------------------- +A prerequisite for building and running samples locally is to have CoreCLR successfully built for desired Android platform. -You can run the PAL tests on an Android device. To run the tests, you first copy the PAL tests to your Android phone using -`adb`, and then run them in an interactive Android shell using `adb shell`: +### Building HelloAndroid sample + +To build `HelloAndroid`, run the following command from ``: -To copy the PAL tests over to an Android phone: ``` -adb push artifacts/obj/coreclr/Linux.arm64.Debug/src/pal/tests/palsuite/ /data/local/tmp/coreclr/pal/tests/palsuite -adb push cross/android/toolchain/arm64/sysroot/usr/lib/libandroid-support.so /data/local/tmp/coreclr/lib/ -adb push cross/android/toolchain/arm64/sysroot/usr/lib/libandroid-glob.so /data/local/tmp/coreclr/lib/ -adb push src/pal/tests/palsuite/paltestlist.txt /data/local/tmp/coreclr -adb push src/pal/tests/palsuite/runpaltests.sh /data/local/tmp/coreclr/ +make BUILD_CONFIG= TARGET_ARCH= RUNTIME_FLAVOR=CoreCLR DEPLOY_AND_RUN=false run -C src/mono/sample/Android ``` -Then, use `adb shell` to launch a shell on Android. Inside that shell, you can launch the PAL tests: +On successful execution, the command will output the `HelloAndroid.apk` at: ``` -LD_LIBRARY_PATH=/data/local/tmp/coreclr/lib ./runpaltests.sh /data/local/tmp/coreclr/ +artifacts/bin/AndroidSampleApp///android-/Bundle/bin/HelloAndroid.apk ``` -Debugging coreclr on Android ----------------------------- +### Running HelloAndroid sample on an emulator -You can debug coreclr on Android using a remote lldb server which you run on your Android device. +To run the sample on an emulator, the emulator first needs to be up and running. -First, push the lldb server to Android: +Creating an emulator (ADV - Android Virtual Device) can be achieved through [Android Studio - Device Manager](https://developer.android.com/studio/run/managing-avds). +After its creation, the emulator needs to be booted up and running, so that we can run the `HelloAndroid` sample on it via: ``` -adb push cross/android/lldb/2.2/android/arm64-v8a/lldb-server /data/local/tmp/ +make BUILD_CONFIG= TARGET_ARCH= RUNTIME_FLAVOR=CoreCLR DEPLOY_AND_RUN=true run -C src/mono/sample/Android ``` -Then, launch the lldb server on the Android device. Open a shell using `adb shell` and run: + +> [!NOTE] +> Emulators can be also started from the terminal via: +> ``` +> $ANDROID_SDK_ROOT/emulator/emulator -avd +> ``` + +#### WSL2 + +The app can be run on an emulator running on the Windows host. +1. Install Android Studio on the Windows host (same versions as in [prerequisites](#prerequisite)) +2. In Windows, create and start an emulator +3. In WSL, swap the `adb` from the Android SDK in WSL2 with that from Windows + - `mv $ANDROID_SDK_ROOT/platform-tools/adb $ANDROID_SDK_ROOT/platform-tools/adb-orig` + - `ln -s /mnt/ $ANDROID_SDK_ROOT/platform-tools/adb` +4. In WSL, Make xharness use the `adb` corresponding to the Windows host: + - `export ADB_EXE_PATH=$ANDROID_SDK_ROOT/platform-tools/adb` +5. In WSL, run the `make` command as [above](#running-helloandroid-sample-on-an-emulator) + +### Building and running functional tests on an emulator + +Similarly to the `HelloAndroid` sample, it is possible to build and run a functional test on Android with CoreCLR on an emulator. + +To build and run a functional test on Android with CoreCLR, run the following command from ``: ``` -adb shell -cd /data/local/tmp -./lldb-server platform --listen *:1234 +./dotnet.sh build -c Release src/tests/FunctionalTests/Android/Device_Emulator/JIT/Android.Device_Emulator.JIT.Test.csproj /p:TargetOS=android /p:TargetArchitecture=arm64 /t:Test /p:RuntimeFlavor=coreclr ``` -After that, you'll need to forward port 1234 from your Android device to your PC: +> [!NOTE] +> Similarly to the `HelloAndroid` sample the emulator needs to be up and running. + +### Useful make commands + +For convenience it is possible to run a single make command which builds all required dependencies, the app and runs it: ``` -adb forward tcp:1234 tcp:1234 +make BUILD_CONFIG= TARGET_ARCH= RUNTIME_FLAVOR=CoreCLR DEPLOY_AND_RUN=true all -C src/mono/sample/Android ``` -Finally, install lldb on your PC and connect to the debug server running on your Android device: +## Debugging the runtime and the sample app -``` -lldb-3.9 -(lldb) platform select remote-android - Platform: remote-android - Connected: no -(lldb) platform connect connect://localhost:1234 - Platform: remote-android - Triple: aarch64-*-linux-android -OS Version: 23.0.0 (3.10.84-perf-gf38969a) - Kernel: #1 SMP PREEMPT Fri Sep 16 11:29:29 2016 - Hostname: localhost - Connected: yes -WorkingDir: /data/local/tmp +Managed debugging is currently not supported, but we can debug: +- Java portion of the sample app +- Native code for the CoreCLR host and the runtime it self + +This can be achieved in `Android Studio` via `Profile or Debug APK`. + +### Steps + +1. Build the runtime and `HelloAndroid` sample app in `Debug` configuration targeting `arm64` target architecture. +2. Rename the debug symbols file of the runtime library from `libcoreclr.so.dbg` into `libcoreclr.so.so`, the file is located at: `/artifacts/bin/AndroidSampleApp/arm64/Debug/android-arm64/publish/libcoreclr.so.dbg` +3. Open Android Studio and select `Profile or Debug APK` project. +4. Find and select the desired `.apk` file (example: `/artifacts/bin/AndroidSampleApp/arm64/Debug/android-arm64/Bundle/bin/HelloAndroid.apk`) +5. In the project pane, expand `HelloAndroid->cpp->libcoreclr` and double-click `libcoreclr.so` +![Adding debug symbols](./android-studio-coreclr-debug-symbols-adding.png) +6. From the `Debug Symbols` pane on the right, select `Add` +7. Navigate to the renamed file from step 2. and select it `/artifacts/bin/AndroidSampleApp/arm64/Debug/android-arm64/publish/libcoreclr.so.so` +8. Once loaded it will show all the source files under `HelloAndroid->cpp->libcoreclr` +![Debug symbols loaded](./android-studio-coreclr-debug-symbols-adding.png) +9. Find the `exports.cpp` and set a breakpoint in `coreclr_initialize` function and launch the debug session +![Debugging CoreCLR](./android-studio-coreclr-debugging.png) -(lldb) target create coreclr/pal/tests/palsuite/file_io/CopyFileA/test4/paltest_copyfilea_test4 -(lldb) env LD_LIBRARY_PATH=/data/local/tmp/coreclr/lib -(lldb) run +> [!NOTE] +> Steps 5) through 8) can be omitted if the runtime is built without stripping debug symbols to a separate file (e.g., `libcoreclr.so.dbg`). +> This can be achieved by including `-keepnativesymbols true` option when building the runtime, e.g.,: +> ``` +> ./build.sh clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.packages+libs -os android -arch -c Debug -keepnativesymbols true +> ``` + +## See also + +Similar instructions for debugging Android apps with Mono runtime can be found [here](../../debugging/mono/android-debugging.md). + +## Troubleshooting + +### Android samples or functional tests fail to build + +If multiple JDKs are installed on your system, you may encounter the following error: + +``` +`src/mono/msbuild/android/build/AndroidBuild.targets(237,5): error MSB4018: java.lang.NullPointerException: Cannot invoke String.length() because is null ``` + +when building the Android samples or functional tests. + +To resolve this: +1. Remove older JDK versions +2. Install [OpenJDK 23](https://openjdk.org/projects/jdk/23/) +3. Make sure OpenJDK 23 binaries are added to the path. + - On Unix system this can be verifed via: + ``` + $> java -version + openjdk version "23.0.1" 2024-10-15 + OpenJDK Runtime Environment Homebrew (build 23.0.1) + OpenJDK 64-Bit Server VM Homebrew (build 23.0.1, mixed mode, sharing) + ``` \ No newline at end of file diff --git a/docs/workflow/building/coreclr/cross-building.md b/docs/workflow/building/coreclr/cross-building.md index 6e93e7538ce5..28ea2f0ef118 100644 --- a/docs/workflow/building/coreclr/cross-building.md +++ b/docs/workflow/building/coreclr/cross-building.md @@ -171,3 +171,15 @@ docker run --rm \ mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-18.04-cross-freebsd-12 \ ./build.sh --subset clr --cross --os freebsd ``` + +### Building CoreCLR with Bootstrapping + +CoreCLR builds a few tools, including NativeAOT compiler itself, using NativeAOT (or single file where NativeAOT is not supported). The build defaults to using a "Last Known Good" version of NativeAOT to build the tools. This "Last Known Good" version comes from the .NET SDK referenced in the global.json file. This default was chosen for a good local build experience of most repo contributors. Building with live NativeAOT version would make the local build longer and it would make debugging local changes that impact NativeAOT compiler complicated. + +The runtime's build scripts provide an additional set of options to build with the live NativeAOT version instead of the "Last Known Good" version. This is useful for testing changes to NativeAOT or the tools that are built with it, and is required for building those tools for target platforms that are not known to the "Last Known Good" version of NativeAOT, such as FreeBSD, community architectures, or non-portable builds of .NET. This is not yet implemented for Windows. + +To build the bootstrap subset of the runtime repo, you can build the `bootstrap` subset. To use the bootstrap components in the runtime repo build, you can pass the `--use-bootstrap` argument to the build script. This will use the bootstrap components instead of the "Last Known Good" version of NativeAOT. + +For simplicity, a `--bootstrap` option is also provided. This option will build the `bootstrap` subset, clean up the artifacts directory, and then build the runtime repo with the `--use-bootstrap` option. This is useful for building the runtime repo with the live NativeAOT version without having to run two separate commands. + +The `--bootstrap` option is automatically specified when building the runtime repo for .NET Source Build, as the vast majority of Source Build scenarios use non-portable RIDs. diff --git a/docs/workflow/building/coreclr/freebsd-instructions.md b/docs/workflow/building/coreclr/freebsd-instructions.md index 9ff21b3c3225..cbc4dd864231 100644 --- a/docs/workflow/building/coreclr/freebsd-instructions.md +++ b/docs/workflow/building/coreclr/freebsd-instructions.md @@ -38,6 +38,8 @@ Ensure you have all of the prerequisites installed from the [Linux Requirements] Once that is done, refer to the [Linux section of the cross-building doc](/docs/workflow/building/coreclr/cross-building.md#linux-cross-building). There are detailed instructions on how to cross-compile using your Linux environment, including a section dedicated to FreeBSD building. +You'll also need to use the `--bootstrap` option as documented in the [cross-building doc](/docs/workflow/building/coreclr/cross-building.md#building-coreclr-with-bootstrapping) to build the cross-compilation toolchain. + ## Build directly on FreeBSD Ensure you have all of the prerequisites installed from the [FreeBSD Requirements](/docs/workflow/requirements/freebsd-requirements.md). diff --git a/docs/workflow/building/coreclr/ios.md b/docs/workflow/building/coreclr/ios.md index 9574010e2f8b..d854fca86b18 100644 --- a/docs/workflow/building/coreclr/ios.md +++ b/docs/workflow/building/coreclr/ios.md @@ -1,4 +1,4 @@ -# Cross Compilation for iOS Simulator on macOS +# Cross Compilation for iOS/tvOS Simulator on macOS ## Requirements @@ -9,7 +9,7 @@ Build requirements are the same as for building native CoreCLR on macOS. iPhone Build the runtime pack and tools with ``` -./build.sh clr+clr.runtime+libs+packs -os [iossimulator/maccatalyst] -arch [x64/arm64] -cross -c Release +./build.sh clr+clr.runtime+libs+packs -os [iossimulator/tvossimulator/maccatalyst] -arch [x64/arm64] -cross -c Release ``` ## Running the sample iOS app diff --git a/docs/workflow/building/coreclr/nativeaot.md b/docs/workflow/building/coreclr/nativeaot.md index e2e6e0df31ba..e04c5add57c0 100644 --- a/docs/workflow/building/coreclr/nativeaot.md +++ b/docs/workflow/building/coreclr/nativeaot.md @@ -125,22 +125,22 @@ The compiler also has a mode where each managed assembly can be compiled into a ## Visual Studio Solutions -The repository has a number of Visual Studio Solutions files (`*.sln`) that are useful for editing parts of the repository. Build the repo from command line first before building using the solution files. Remember to select the appropriate configuration that you built. By default, `build.cmd` builds Debug x64 and so `Debug` and `x64` must be selected in the solution build configuration drop downs. +The repository has a number of Visual Studio Solutions files (`*.slnx`) that are useful for editing parts of the repository. Build the repo from command line first before building using the solution files. Remember to select the appropriate configuration that you built. By default, `build.cmd` builds Debug x64 and so `Debug` and `x64` must be selected in the solution build configuration drop downs. Solutions related to this: -* `src\coreclr\nativeaot\nativeaot.sln`. This solution is for the runtime libraries. -* `src\coreclr\tools\aot\ilc.sln`. This solution is for the compiler. +* `src\coreclr\nativeaot\nativeaot.slnx`. This solution is for the runtime libraries. +* `src\coreclr\tools\aot\ilc.slnx`. This solution is for the compiler. Typical workflow for working on the compiler: -* Open `ilc.sln` in Visual Studio +* Open `ilc.slnx` in Visual Studio * Set "ILCompiler" project in solution explorer as your startup project * Set Working directory in the project Debug options to your test project directory, e.g. `C:\test` * Set Application arguments in the project Debug options to the response file that was generated by regular native aot publishing of your test project, e.g. `@obj\Release\net8.0\win-x64\native\HelloWorld.ilc.rsp` * Build & run using **F5** -NOTE: this requires that you globally install the same .NET SDK version as the one that is used to build the repo. You can avoid this requirement by launching Visual Studio through the build.cmd script at the root of the repo. `build.cmd -vs src\coreclr\tools\aot\ilc.sln` opens the ILC solution, `build.cmd -vs nativeaot` opens the native AOT solution. +NOTE: this requires that you globally install the same .NET SDK version as the one that is used to build the repo. You can avoid this requirement by launching Visual Studio through the build.cmd script at the root of the repo. `build.cmd -vs src\coreclr\tools\aot\ilc.slnx` opens the ILC solution, `build.cmd -vs nativeaot` opens the native AOT solution. ## Convenience Visual Studio "repro" project @@ -149,7 +149,7 @@ Typical native AOT runtime developer scenario workflow is to native AOT compile The workflow looks like this: * Build the repo using the Building instructions above -* Open the ilc.sln solution described above. This solution contains the compiler, but also an unrelated project named "repro". This repro project is a small Hello World. You can place any piece of C# you would like to compile in it. Building the project will compile the source code into IL, but also generate a response file that is suitable to pass to the AOT compiler. +* Open the ilc.slnx solution described above. This solution contains the compiler, but also an unrelated project named "repro". This repro project is a small Hello World. You can place any piece of C# you would like to compile in it. Building the project will compile the source code into IL, but also generate a response file that is suitable to pass to the AOT compiler. * Make sure you set the solution configuration in VS to the configuration you just built (e.g. x64 Debug). * In the ILCompiler project properties, on the Debug tab, set the "Application arguments" to `@$(ArtifactsBinDir)repro\$(TargetArchitecture)\$(Configuration)\compile-with-Release-libs.rsp`. The `@` at the front of the argument indicates that this is the path to the response file generated when "repro" was built. Adjust the "compile-with-Release-libs" part to "compile-with-Debug-libs" depending on how you built the libraries (the `-lc` argument to `build.cmd`). Visual Studio will expand the path to something like `@C:\runtime\artifacts\bin\repro\x64\Debug\compile-with-Release-libs.rsp`. * Build & run ILCompiler using **F5**. This will compile the repro project into an `.obj` file. You can debug the compiler and set breakpoints in it at this point. diff --git a/docs/workflow/building/libraries/README.md b/docs/workflow/building/libraries/README.md index e75b8a8f8db4..6916fcd6e9d2 100644 --- a/docs/workflow/building/libraries/README.md +++ b/docs/workflow/building/libraries/README.md @@ -12,7 +12,7 @@ git pull upstream main & git push origin main build.cmd clr+libs -rc Release :: Performing the above is usually only needed once in a day, or when you pull down significant new changes. -:: If you use Visual Studio, you might open System.Collections.Concurrent.sln here. +:: If you use Visual Studio, you might open System.Collections.Concurrent.slnx here. build.cmd -vs System.Collections.Concurrent :: Switch to working on a given library (System.Collections.Concurrent in this case) @@ -76,7 +76,7 @@ The libraries build has two logical components, the native build which produces The build settings (BuildTargetFramework, TargetOS, Configuration, Architecture) are generally defaulted based on where you are building (i.e. which OS or which architecture) but we have a few shortcuts for the individual properties that can be passed to the build scripts: -- `-framework|-f` identifies the target framework for the build. Possible values include `net10.0` (currently the latest .NET version) or `net48` (the latest .NET Framework version). (msbuild property `BuildTargetFramework`) +- `-framework|-f` identifies the target framework for the build. Possible values include `net10.0` (currently the latest .NET version) or `net481` (the latest .NET Framework version). (msbuild property `BuildTargetFramework`) - `-os` identifies the OS for the build. It defaults to the OS you are running on but possible values include `windows`, `unix`, `linux`, or `osx`. (msbuild property `TargetOS`) - `-configuration|-c Debug|Release` controls the optimization level the compilers use for the build. It defaults to `Debug`. (msbuild property `Configuration`) - `-arch` identifies the architecture for the build. It defaults to `x64` but possible values include `x64`, `x86`, `arm`, or `arm64`. (msbuild property `TargetArchitecture`) @@ -128,7 +128,7 @@ The libraries build contains some native code. This includes shims over libc, op - Building and updating the binplace (for e.g. the testhost), which is needed when iterating on native components ```bash -dotnet.sh build src/native/libraries/build-native.proj +dotnet.sh build src/native/libs/build-native.proj ``` - The following example shows how you would do an arm cross-compile build @@ -147,7 +147,7 @@ Similar to building the entire repo with `build.cmd` or `build.sh` in the root y - Build all projects for a given library (e.g.: System.Collections) including running the tests ```bash - ./build.sh -projects src/libraries/*/System.Collections.sln + ./build.sh -projects src/libraries/*/System.Collections.slnx ``` - Build just the tests for a library project @@ -157,7 +157,7 @@ Similar to building the entire repo with `build.cmd` or `build.sh` in the root y - All the options listed above like framework and configuration are also supported (note they must be after the directory) ```bash - ./build.sh -projects src/libraries/*/System.Collections.sln -f net472 -c Release + ./build.sh -projects src/libraries/*/System.Collections.slnx -f net472 -c Release ``` As `dotnet build` works on both Unix and Windows and calls the restore target implicitly, we will use it throughout this guide. @@ -256,3 +256,11 @@ Same as for `dotnet build` or `dotnet publish`, you can specify the desired conf ```cmd dotnet.cmd pack src\libraries\System.Text.Json\src\ -c Release ``` + +## APICompat + +If changes to the library include any API incompatibilities, calling `dotnet build` or `dotnet pack` may result in API compatibility errors. + +In rare cases where these are expected (e.g. updating APIs previously shipped only in preview or as experimental), the errors may be suppressed. This can be done by following the directions in the error to invoke `dotnet build` (if the project isn't packable) or `dotnet pack` (if the project is packable) with an additional `/p:ApiCompatGenerateSuppressionFile=true` argument. + +See https://learn.microsoft.com/dotnet/fundamentals/apicompat/overview for more details. diff --git a/docs/workflow/building/libraries/webassembly-instructions.md b/docs/workflow/building/libraries/webassembly-instructions.md index 1f230eedbdef..162ddc57c0fc 100644 --- a/docs/workflow/building/libraries/webassembly-instructions.md +++ b/docs/workflow/building/libraries/webassembly-instructions.md @@ -80,7 +80,7 @@ Individual projects and libraries can be build by specifying the build configura - Build all projects for a given library (e.g.: System.Net.Http) including the tests ```bash -./build.sh -os browser -c Release --projects /src/libraries/System.Net.Http/System.Net.Http.sln +./build.sh -os browser -c Release --projects /src/libraries/System.Net.Http/System.Net.Http.slnx ``` - Build only the source project of a given library (e.g.: System.Net.Http) diff --git a/docs/workflow/ci/failure-analysis.md b/docs/workflow/ci/failure-analysis.md index 4b3e96334277..680ff7fb1de9 100644 --- a/docs/workflow/ci/failure-analysis.md +++ b/docs/workflow/ci/failure-analysis.md @@ -57,7 +57,7 @@ Validation may fail for several reasons, and for each one we have a different re ### Additional information: * If the license/cla check fails to register a response, the check can be rerun by submitting a `@dotnet-policy-service rerun` comment to the PR. - * Reach out to the infrastructure team for assistance on [Teams channel](https://teams.microsoft.com/l/channel/19%3ab27b36ecd10a46398da76b02f0411de7%40thread.skype/Infrastructure?groupId=014ca51d-be57-47fa-9628-a15efcc3c376&tenantId=72f988bf-86f1-41af-91ab-2d7cd011db47) (for corpnet users) or on [Gitter](https://gitter.im/dotnet/community) in other cases. + * Reach out to the infrastructure team for assistance on [Teams channel](https://teams.microsoft.com/l/channel/19%3ab27b36ecd10a46398da76b02f0411de7%40thread.skype/Infrastructure?groupId=014ca51d-be57-47fa-9628-a15efcc3c376&tenantId=72f988bf-86f1-41af-91ab-2d7cd011db47) (for corpnet users) or on [Discord](https://aka.ms/dotnet-discord) #runtime channel in other cases. ## What to do if you determine the failure is unrelated diff --git a/docs/workflow/debugging/coreclr/debugging-runtime.md b/docs/workflow/debugging/coreclr/debugging-runtime.md index b41c1a577e56..3519e649aacc 100644 --- a/docs/workflow/debugging/coreclr/debugging-runtime.md +++ b/docs/workflow/debugging/coreclr/debugging-runtime.md @@ -43,12 +43,12 @@ If for some reason `System.Private.CoreLib.dll` is missing, you can rebuild it w Visual Studio's capabilities as a full IDE provide a lot of help making the runtime debugging more amiable. 0. Run `.\build.cmd clr.nativeprereqs -a -c `. This will build some of the tools requiremented for the native build. This step only needs to be run once as long you don't clean the `artifacts` directory. -1. Open the CoreCLR solution _(coreclr.sln)_ in Visual Studio. +1. Open the CoreCLR solution _(coreclr.slnx)_ in Visual Studio. * _Method 1_: Use the build scripts to open the solution: - 1. Run `.\build.cmd -vs coreclr.sln -a -c `. This will create and launch the CoreCLR solution in VS for the specified architecture and configuration. By default, this will be `x64 Debug`. + 1. Run `.\build.cmd -vs coreclr.slnx -a -c `. This will create and launch the CoreCLR solution in VS for the specified architecture and configuration. By default, this will be `x64 Debug`. * _Method 2_: Manually build and open the solution: 1. Perform a build of the repo with the `-msbuild` flag. - 2. Open solution `path\to\runtime\artifacts\obj\coreclr\windows..\ide\CoreCLR.sln` in Visual Studio. As in the previous method, the architecture and configuration by default are `x64` and `Debug`, unless explicitly stated otherwise. + 2. Open solution `path\to\runtime\artifacts\obj\coreclr\windows..\ide\CoreCLR.slnx` in Visual Studio. As in the previous method, the architecture and configuration by default are `x64` and `Debug`, unless explicitly stated otherwise. 2. Right-click the **INSTALL** project and choose `Set as StartUp Project`. 3. Bring up the properties page for the **INSTALL** project. 4. Select _Configuration Properties -> Debugging_ from the left side tree control. @@ -114,7 +114,7 @@ Visual Studio can also be used to debug builds built externally from CLI scripts 5. To set breakpoints, runtime source files can to be added by right clicking the solution in the Solution Explorer and selecting Add -> Existing Item. 6. Set breakpoints and run the application with `F5` to start debugging. -Note, the `.sln` file can be saved and stores paths to `corerun.exe`, included files, and debug settings. It can be reused as long as the paths do not change. +Note, the `.slnx` file can be saved and stores paths to `corerun.exe`, included files, and debug settings. It can be reused as long as the paths do not change. ### Using Visual Studio Code diff --git a/docs/workflow/editing-and-debugging.md b/docs/workflow/editing-and-debugging.md index c219ed43f28e..0a919656f544 100644 --- a/docs/workflow/editing-and-debugging.md +++ b/docs/workflow/editing-and-debugging.md @@ -15,26 +15,26 @@ more on Markdown in general. # Visual Studio Solutions -The repository has a number of Visual Studio Solutions files (`*.sln`) that are useful for editing parts of the repository. In particular +The repository has a number of Visual Studio Solutions files (`*.slnx`) that are useful for editing parts of the repository. In particular - * `src\coreclr\System.Private.CoreLib\System.Private.CoreLib.sln` - This solution is for all managed (C#) code that is defined + * `src\coreclr\System.Private.CoreLib\System.Private.CoreLib.slnx` - This solution is for all managed (C#) code that is defined in the runtime itself. This is all class library support of one form or another. - * `artifacts\obj\coreclr\windows..\ide\CoreCLR.sln` - this solution contains most native (C++) projects + * `artifacts\obj\coreclr\windows..\ide\CoreCLR.slnx` - this solution contains most native (C++) projects associated with the repository, including * `coreclr` - This is the main runtime DLL (the GC, class loader, interop are all here) * `corjit` - This is the Just In Time (JIT) compiler that compiles .NET Intermediate language to native code. * `corerun` - This is the simple host program that can run a .NET application * `crossgen` - This is the host program that runs the JIT compiler and produces .NET Native images (`*.ni.dll`) for C# code. - * This project can be automatically generated and opened in Visual Studio by running `./build.cmd -vs CoreCLR.sln -a -c ` from the root of the repository. - * `artifacts\obj\win-.\corehost\ide\corehost.sln` - this solution contains the native (C++) projects for the [host components](../design/features/host-components.md) - * This project can be automatically generated and opened in Visual Studio by running `./build.cmd -vs corehost.sln -a -c ` from the root of the repository. + * This project can be automatically generated and opened in Visual Studio by running `./build.cmd -vs CoreCLR.slnx -a -c ` from the root of the repository. + * `artifacts\obj\win-.\corehost\ide\corehost.slnx` - this solution contains the native (C++) projects for the [host components](../design/features/host-components.md) + * This project can be automatically generated and opened in Visual Studio by running `./build.cmd -vs corehost.slnx -a -c ` from the root of the repository. Thus opening one of these solution files (double clicking on them in Explorer) is typically all you need to do most editing. Notice that the CoreCLR and corehost solutions are under the `artifacts` directory. This is because they are created as part of the build. -Thus you can only launch these solutions after you have built at least once with the `-msbuild` flag or run the `./build.cmd -vs CoreCLR.sln` or `./build.cmd -vs corehost.sln` command line with the specified architecture and configuration. +Thus you can only launch these solutions after you have built at least once with the `-msbuild` flag or run the `./build.cmd -vs CoreCLR.slnx` or `./build.cmd -vs corehost.slnx` command line with the specified architecture and configuration. * See [Debugging CoreCLR](debugging/coreclr/debugging-runtime.md) diff --git a/docs/workflow/requirements/macos-requirements.md b/docs/workflow/requirements/macos-requirements.md index e9606b12569b..67e7840a64d1 100644 --- a/docs/workflow/requirements/macos-requirements.md +++ b/docs/workflow/requirements/macos-requirements.md @@ -23,8 +23,8 @@ To build the runtime repo, you will also need to install the following dependenc - `python3` - `ninja` (This one is optional. It is an alternative tool to `make` for building native code) -You can install them separately, or you can alternatively opt to install *[Homebrew](https://brew.sh/)* and use the `Brewfile` provided by the repo, which takes care of everything for you. If you go by this route, once you have *Homebrew* up and running on your machine, run the following command from the root of the repo to download and install all the necessary dependencies at once: +You can install them separately, or you can alternatively opt to install *[Homebrew](https://brew.sh/)* and use the `install-dependencies.sh` script provided by the repo, which takes care of everything for you. If you go by this route, once you have *Homebrew* up and running on your machine, run the following command from the root of the repo to download and install all the necessary dependencies at once: ```bash -brew bundle --no-lock --file eng/Brewfile +./eng/common/native/install-dependencies.sh ``` diff --git a/docs/workflow/requirements/windows-requirements.md b/docs/workflow/requirements/windows-requirements.md index 1a528572575d..aa2c54c3976a 100644 --- a/docs/workflow/requirements/windows-requirements.md +++ b/docs/workflow/requirements/windows-requirements.md @@ -84,7 +84,7 @@ Alternatively, if you would rather avoid modifying your machine state, you can u .\build.cmd -vs System.Text.RegularExpressions ``` -This will set the `DOTNET_ROOT` and `PATH` environment variables to point to the locally acquired SDK under the `.dotnet` directory found at the root of the repo for the duration of this terminal session. Then, it will launch the Visual Studio instance that is registered for the `.sln` extension, and open the solution you passed as argument to the command-line. +This will set the `DOTNET_ROOT` and `PATH` environment variables to point to the locally acquired SDK under the `.dotnet` directory found at the root of the repo for the duration of this terminal session. Then, it will launch the Visual Studio instance that is registered for the `.slnx` extension, and open the solution you passed as argument to the command-line. ## Installing dependencies with winget diff --git a/docs/workflow/testing/host/testing.md b/docs/workflow/testing/host/testing.md index 1bdeadbaca60..82496f754b16 100644 --- a/docs/workflow/testing/host/testing.md +++ b/docs/workflow/testing/host/testing.md @@ -78,7 +78,7 @@ The `category!=failing` is to respect the [filtering traits](../libraries/filter ### Visual Studio -The [Microsoft.DotNet.CoreSetup.sln](/src/installer/Microsoft.DotNet.CoreSetup.sln) can be used to run and debug host tests through Visual Studio. When using the solution, the product should have already been [built](#building-tests) and the [test context](#test-context) set up. +The [Microsoft.DotNet.CoreSetup.slnx](/src/installer/Microsoft.DotNet.CoreSetup.slnx) can be used to run and debug host tests through Visual Studio. When using the solution, the product should have already been [built](#building-tests) and the [test context](#test-context) set up. If you built the runtime or libraries with a different configuration from the host, you have to specify this when starting visual studio: diff --git a/docs/workflow/testing/libraries/testing-android.md b/docs/workflow/testing/libraries/testing-android.md index 6322950a1fec..9b138a0b61b5 100644 --- a/docs/workflow/testing/libraries/testing-android.md +++ b/docs/workflow/testing/libraries/testing-android.md @@ -22,7 +22,7 @@ Android SDK and NDK can be automatically installed via the following script: #!/usr/bin/env bash set -e -NDK_VER=r23c +NDK_VER=r27c SDK_VER=9123335_latest SDK_API_LEVEL=33 SDK_BUILD_TOOLS=33.0.1 @@ -154,3 +154,37 @@ The emulator can be launched with a variety of options. Run `emulator -help` to ### Debugging the native runtime code using Android Studio See [Debugging Android](../../debugging/mono/android-debugging.md) + +## Upgrading the Android NDK Version in CI Pipelines + +The Android NDK has two release channels: a rolling release, which occurs approximately every quarter, and a Long Term Support (LTS) release, which happens once a year (typically in Q3). While release dates are not guaranteed, LTS versions receive support for at least one year or until the next LTS reaches the release candidate stage. After that, the NDK version stops receiving bug fixes and security updates. + +The LTS NDK release schedule roughly aligns with the .NET Release Candidate (RC) timeline. Given this, we should plan to upgrade the NDK version used in `main` around that time. If we successfully upgrade before .NET release, we can ensure that our CI builds and tests run against a supported NDK version for approximately 9 months after the release. + +.NET MAUI is supported for 18 months after each .NET release. This means the NDK version used in CI will be supported for about half the lifecycle of a given .NET MAUI release. If we want to ensure that the NDK version used in CI is supported for the entire lifecycle of a given .NET MAUI release, we should consider upgrading the NDK version in the `release` branches. + +CI pipelines retrieve the NDK version from Docker images hosted in the [dotnet-buildtools-prereqs-docker](https://github.com/dotnet/dotnet-buildtools-prereqs-docker) repository. + +For reference, see an example Dockerfile NDK definition: +[Azure Linux 3.0 .NET 10.0 Android Dockerfile](https://github.com/dotnet/dotnet-buildtools-prereqs-docker/blob/c480b239b3731983e36b0879f5b60d8f4ab7b945/src/azurelinux/3.0/net10.0/android/amd64/Dockerfile#L2). + +Bumping version of the NDK in the prereqs repo will automatically propagate it to all CI runs Thus, bumping the NDK requires a three step process in order to ensure that CI continues to operate correctly. +To upgrade the NDK version used in CI for building and testing Android, follow these steps: + +### 1. Verify the New NDK Version Locally +- Download the new NDK version. +- Test the local build using the new NDK by building a sample Android app. +- Ensure **AOT** and **AOT_WITH_LIBRARY_FILES** are enabled in the build. + +### 2. Test the New NDK in CI and Fix Issues +- Create a new Docker image containing the updated NDK version (based on the original docker image from the [dotnet-buildtools-prereqs-docker](https://github.com/dotnet/dotnet-buildtools-prereqs-docker) repository). +- Open a **draft PR** in the **runtime** repository that updates the Dockerfile reference to use the new image. +- Monitor CI results and fix any failures. +- Once CI is green, **commit only the necessary changes** (e.g., fixes, build adjustments) to the respective branch. +- **Do not** change the Docker image reference in the final commit. + +### 3. Update the NDK Version in the Prerequisites Repository +- Update the NDK version in the [dotnet-buildtools-prereqs-docker](https://github.com/dotnet/dotnet-buildtools-prereqs-docker) repository by modifying the Dockerfile. +- The updated NDK will automatically flow to all builds of a given branch once merged. + +By following these steps, you ensure a smooth upgrade of the Android NDK in CI while maintaining stability and compatibility. diff --git a/docs/workflow/using-docker.md b/docs/workflow/using-docker.md index b7a116046704..99601f1219ed 100644 --- a/docs/workflow/using-docker.md +++ b/docs/workflow/using-docker.md @@ -47,9 +47,8 @@ The main Docker images are the most commonly used ones, and the ones you will pr | Azure Linux (x64) | Debian sid | LoongArch | `mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-loongarch64` | `/crossrootfs/loongarch64` | | Azure Linux (x64) | Ubuntu 18.04 | S390x | `mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-s390x` | `/crossrootfs/s390x` | | Azure Linux (x64) | Ubuntu 18.04 (Wasm) | x64 | `mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-webassembly-amd64` | `/crossrootfs/x64` | -| Debian (x64) | Debian 12 | x64 | `mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-gcc14-amd64` | *N/A* | +| Debian (x64) | Debian 12 | x64 | `mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-gcc15-amd64` | *N/A* | | Ubuntu (x64) | Tizen 9.0 | Arm32 (armel) | `mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-cross-armel-tizen` | `/crossrootfs/armel` | -| Ubuntu (x64) | Ubuntu 20.04 | Arm32 (v6) | `mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-cross-armv6-raspbian-10` | `/crossrootfs/armv6` | ## Build the Repo diff --git a/eng/Brewfile b/eng/Brewfile deleted file mode 100644 index 7a145df5dc63..000000000000 --- a/eng/Brewfile +++ /dev/null @@ -1,5 +0,0 @@ -brew "cmake" -brew "icu4c" -brew "openssl@3" -brew "pkg-config" -brew "python3" diff --git a/eng/CodeAnalysis.src.globalconfig b/eng/CodeAnalysis.src.globalconfig index 3569479b9ae0..a4def7dae8b1 100644 --- a/eng/CodeAnalysis.src.globalconfig +++ b/eng/CodeAnalysis.src.globalconfig @@ -564,6 +564,9 @@ dotnet_diagnostic.CA2021.severity = warning # CA2022: Avoid inexact read with 'Stream.Read' dotnet_diagnostic.CA2022.severity = warning +# CA2025: Ensure tasks using 'IDisposable' instances complete before the instances are disposed +dotnet_diagnostic.CA2025.severity = warning + # CA2100: Review SQL queries for security vulnerabilities dotnet_diagnostic.CA2100.severity = none @@ -1178,7 +1181,7 @@ dotnet_diagnostic.SA1013.severity = none dotnet_diagnostic.SA1014.severity = warning # SA1015: Closing generic bracket should not be followed by a space -dotnet_diagnostic.SA1015.severity = none +dotnet_diagnostic.SA1015.severity = warning # SA1018: Nullable type symbol should not be preceded by a space dotnet_diagnostic.SA1018.severity = warning diff --git a/eng/CodeAnalysis.test.globalconfig b/eng/CodeAnalysis.test.globalconfig index 0d944fbd890f..3093e262b83b 100644 --- a/eng/CodeAnalysis.test.globalconfig +++ b/eng/CodeAnalysis.test.globalconfig @@ -561,6 +561,9 @@ dotnet_diagnostic.CA2021.severity = none # CA2022: Avoid inexact read with 'Stream.Read' dotnet_diagnostic.CA2022.severity = none +# CA2025: Ensure tasks using 'IDisposable' instances complete before the instances are disposed +dotnet_diagnostic.CA2025.severity = none + # CA2100: Review SQL queries for security vulnerabilities dotnet_diagnostic.CA2100.severity = none diff --git a/eng/DotNetBuild.props b/eng/DotNetBuild.props deleted file mode 100644 index 75cfd1acb039..000000000000 --- a/eng/DotNetBuild.props +++ /dev/null @@ -1,145 +0,0 @@ - - - - - - runtime - - .\build.cmd - ./build.sh - - <_hostRid>$([System.Runtime.InteropServices.RuntimeInformation]::RuntimeIdentifier) - - $(_hostRid) - - - <_targetRidPlatformIndex>$(TargetRid.LastIndexOf('-')) - $(TargetRid.Substring($(_targetRidPlatformIndex)).TrimStart('-')) - $(TargetRid.Substring(0, $(_targetRidPlatformIndex))) - - <_hostRidPlatformIndex>$(_hostRid.LastIndexOf('-')) - <_hostArch>$(_hostRid.Substring($(_hostRidPlatformIndex)).TrimStart('-')) - - minimal - - - - - true - true - true - true - true - true - true - true - true - - true - - - - - - - - $(InnerBuildArgs) $(FlagParameterPrefix)restore $(FlagParameterPrefix)build $(FlagParameterPrefix)publish - $(InnerBuildArgs) $(FlagParameterPrefix)sign - $(InnerBuildArgs) $(FlagParameterPrefix)pack - - $(InnerBuildArgs) $(FlagParameterPrefix)arch $(TargetArch) - $(InnerBuildArgs) $(FlagParameterPrefix)os $(TargetOS) - $(InnerBuildArgs) $(FlagParameterPrefix)cross - $(InnerBuildArgs) $(FlagParameterPrefix)configuration $(Configuration) - $(InnerBuildArgs) $(FlagParameterPrefix)verbosity $(LogVerbosity) - $(InnerBuildArgs) $(FlagParameterPrefix)nodereuse $(ArcadeFalseBoolBuildArg) - $(InnerBuildArgs) $(FlagParameterPrefix)warnAsError $(ArcadeFalseBoolBuildArg) - $(InnerBuildArgs) $(FlagParameterPrefix)usemonoruntime - - $(InnerBuildArgs) --outputrid $(TargetRid) - - $(InnerBuildArgs) /p:PackageOS=$(RuntimeOS) /p:ToolsOS=$(RuntimeOS) - - $(InnerBuildArgs) /p:AdditionalRuntimeIdentifierParent=$(BaseOS) /p:BaseOS=$(BaseOS) - - $(InnerBuildArgs) /p:WasmEnableThreads=true - $(InnerBuildArgs) /p:MonoEnableLLVM=$(DotNetBuildMonoEnableLLVM) - $(InnerBuildArgs) /p:MonoAOTEnableLLVM=$(DotNetBuildMonoAOTEnableLLVM) - $(InnerBuildArgs) /p:MonoBundleLLVMOptimizer=$(DotNetBuildMonoBundleLLVMOptimizer) - $(InnerBuildArgs) /p:DotNetBuildAllRuntimePacks=$(DotNetBuildAllRuntimePacks) - $(InnerBuildArgs) /p:DotNetBuildPass=$(DotNetBuildPass) - $(InnerBuildArgs) $(FlagParameterPrefix)pgoinstrument - - - $(InnerBuildArgs) /p:DotNetBuildRepo=true - $(InnerBuildArgs) /p:DotNetBuildOrchestrator=true - $(InnerBuildArgs) /p:OfficialBuildId=$(OfficialBuildId) - $(InnerBuildArgs) /p:ContinuousIntegrationBuild=$(ContinuousIntegrationBuild) - $(InnerBuildArgs) /p:PortableBuild=$(PortableBuild) - $(InnerBuildArgs) /p:RestoreConfigFile=$(RestoreConfigFile) - $(InnerBuildArgs) /p:ForceDryRunSigning=$(ForceDryRunSigning) - $(InnerBuildArgs) /p:DefaultArtifactVisibility=$(DefaultArtifactVisibility) - $(InnerBuildArgs) /p:DotNetEsrpToolPath=$(DotNetEsrpToolPath) - - - $(InnerBuildArgs) /p:SourceBuiltAssetsDir=$(SourceBuiltAssetsDir) - $(InnerBuildArgs) /p:SourceBuiltShippingPackagesDir=$(SourceBuiltShippingPackagesDir) - $(InnerBuildArgs) /p:SourceBuiltNonShippingPackagesDir=$(SourceBuiltNonShippingPackagesDir) - $(InnerBuildArgs) /p:SourceBuiltAssetManifestsDir=$(SourceBuiltAssetManifestsDir) - $(InnerBuildArgs) /p:SourceBuiltSymbolsDir=$(SourceBuiltSymbolsDir) - $(InnerBuildArgs) /p:GitHubRepositoryName=$(GitHubRepositoryName) - - - +$(UseSystemLibs)+ - $(InnerBuildArgs) --cmakeargs -DCLR_CMAKE_USE_SYSTEM_BROTLI=true - $(InnerBuildArgs) --cmakeargs -DCLR_CMAKE_USE_SYSTEM_LIBUNWIND=true - - - $(InnerBuildArgs) --cmakeargs -DCLR_CMAKE_USE_SYSTEM_RAPIDJSON=true - $(InnerBuildArgs) --cmakeargs -DCLR_CMAKE_USE_SYSTEM_ZLIB=true - - - $(InnerBuildArgs) /p:NetCoreAppToolCurrentVersion=$(NetCoreAppToolCurrentVersion) - - - - - - - runtime - - - - - - - - - - - - - - - - - - - - - - diff --git a/eng/ILSdk.BeforeTargets.targets b/eng/ILSdk.BeforeTargets.targets new file mode 100644 index 000000000000..d6915f01f68d --- /dev/null +++ b/eng/ILSdk.BeforeTargets.targets @@ -0,0 +1,6 @@ + + + + $(NETCoreSdkRuntimeIdentifier) + + diff --git a/eng/OSArch.props b/eng/OSArch.props new file mode 100644 index 000000000000..22fb8b577fca --- /dev/null +++ b/eng/OSArch.props @@ -0,0 +1,39 @@ + + + <_hostOS>linux + <_hostOS Condition="$([MSBuild]::IsOSPlatform('OSX'))">osx + <_hostOS Condition="$([MSBuild]::IsOSPlatform('FREEBSD'))">freebsd + <_hostOS Condition="$([MSBuild]::IsOSPlatform('NETBSD'))">netbsd + <_hostOS Condition="$([MSBuild]::IsOSPlatform('ILLUMOS'))">illumos + <_hostOS Condition="$([MSBuild]::IsOSPlatform('SOLARIS'))">solaris + <_hostOS Condition="$([MSBuild]::IsOSPlatform('HAIKU'))">haiku + <_hostOS Condition="$([MSBuild]::IsOSPlatform('WINDOWS'))">windows + $(_hostOS) + browser + $(_hostOS) + true + true + + + + + <_hostArch>$([System.Runtime.InteropServices.RuntimeInformation]::ProcessArchitecture.ToString().ToLowerInvariant) + $(_hostArch) + wasm + wasm + arm + armv6 + armel + arm64 + loongarch64 + s390x + ppc64le + x64 + x64 + $(TargetArchitecture) + + + + <_ImportedOSArchProps>true + + diff --git a/eng/Publishing.props b/eng/Publishing.props index da3d606ed683..172a6c0b220e 100644 --- a/eng/Publishing.props +++ b/eng/Publishing.props @@ -1,153 +1,36 @@ - - + true false - - $(TargetArchitecture) - - - - - - + + + - - - - - - - - - - - - - - - - - - - + + <_HostArtifact Include="$(ArtifactsPackagesDir)**\runtime.*.Microsoft.NETCore.ILAsm.*.nupkg" + Exclude="$(ArtifactsPackagesDir)**\runtime.$(TargetRid).Microsoft.NETCore.ILAsm.*.nupkg" /> - - + <_HostArtifact Include="$(ArtifactsPackagesDir)**\runtime.*.Microsoft.NETCore.ILDAsm.*.nupkg" + Exclude="$(ArtifactsPackagesDir)**\runtime.$(TargetRid).Microsoft.NETCore.ILDAsm.*.nupkg" /> - - + <_HostArtifact Include="$(ArtifactsPackagesDir)**\runtime.*.Microsoft.DotNet.ILCompiler.*.nupkg" + Exclude="$(ArtifactsPackagesDir)**\runtime.$(TargetRid).Microsoft.DotNet.ILCompiler.*.nupkg" /> - - + <_HostArtifact Include="$(ArtifactsPackagesDir)**\Microsoft.NETCore.App.Crossgen2.*.nupkg" + Exclude="$(ArtifactsPackagesDir)**\Microsoft.NETCore.App.Crossgen2.$(TargetRid).*.nupkg" /> - - - - - - - - - - - - - - - - - - - + @@ -168,15 +51,15 @@ This ensures that we don't produce these files in the "Repo source build" builds, but we do produce them in both the VMR and the runtime official build. --> - - <_ShouldGenerateProductVersionFiles Condition="'$(DotNetBuildRepo)' == 'true' and '$(DotNetBuildOrchestrator)' == 'true'">true - <_ShouldGenerateProductVersionFiles Condition="'$(DotNetBuildRepo)' != 'true' and '$(DotNetBuildOrchestrator)' != 'true'">true + + true + true + Condition="'$(ShouldGenerateProductVersionFiles)' == 'true'"> - - + + + + + + + + + + - - - - - - - - - diff --git a/eng/RuntimeIdentifier.props b/eng/RuntimeIdentifier.props new file mode 100644 index 000000000000..b22c70853985 --- /dev/null +++ b/eng/RuntimeIdentifier.props @@ -0,0 +1,71 @@ + + + false + true + + + + + + + $(TargetOS.ToLowerInvariant()) + win + + + $(__PortableTargetOS) + + + linux-musl + linux-bionic + + + win + + + + + <_hostRid Condition="'$(MSBuildRuntimeType)' == 'core'">$([System.Runtime.InteropServices.RuntimeInformation]::RuntimeIdentifier) + <_hostRid Condition="'$(MSBuildRuntimeType)' != 'core'">win-$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture.ToString().ToLowerInvariant) + + <_parseDistroRid>$(__DistroRid) + <_parseDistroRid Condition="'$(_parseDistroRid)' == ''">$(_hostRid) + <_distroRidIndex>$(_parseDistroRid.LastIndexOf('-')) + + <_targetRidOS>$(_parseDistroRid.SubString(0, $(_distroRidIndex))) + <_targetRidOS Condition="'$(PortableBuild)' == 'true'">$(PortableOS) + + $(_targetRidOS)-$(TargetArchitecture) + + + + true + true + true + true + true + true + true + true + true + true + true + true + true + true + true + true + true + true + true + true + + + <_ImportedRuntimeIdentifierProps>true + + diff --git a/eng/Signing.props b/eng/Signing.props index c1ff2d8ba476..9591f13edbde 100644 --- a/eng/Signing.props +++ b/eng/Signing.props @@ -1,6 +1,31 @@ + + + - false + + false + true + false + + + true + + true @@ -14,6 +39,13 @@ + + + + + + @@ -24,51 +56,93 @@ - - - - - - - - - - - - - - - - + %(FullPath).sha512 - + - - - + + + + + - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/eng/SourceBuildPrebuiltBaseline.xml b/eng/SourceBuildPrebuiltBaseline.xml deleted file mode 100644 index 69be84119be4..000000000000 --- a/eng/SourceBuildPrebuiltBaseline.xml +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/eng/Subsets.props b/eng/Subsets.props index a55f55b8e848..48bbbd150702 100644 --- a/eng/Subsets.props +++ b/eng/Subsets.props @@ -31,8 +31,9 @@ <_CoreCLRSupportedOS Condition="'$(TargetsAndroid)' == 'true' and '$(TargetArchitecture)' != 'arm' and '$(TargetArchitecture)' != 'x86'">true + <_CoreCLRSupportedOS Condition="'$(TargetsBrowser)' == 'true'">true - <_CoreCLRSupportedArch Condition="'$(TargetArchitecture)' != 'armv6' and '$(TargetArchitecture)' != 'ppc64le' and '$(TargetArchitecture)' != 's390x'">true + <_CoreCLRSupportedArch Condition="'$(TargetArchitecture)' != 'ppc64le' and '$(TargetArchitecture)' != 's390x'">true true @@ -49,18 +50,27 @@ <_NativeAotSupportedOS Condition="'$(TargetOS)' == 'windows' or '$(TargetOS)' == 'linux' or '$(TargetOS)' == 'osx' or '$(TargetOS)' == 'maccatalyst' or '$(TargetOS)' == 'iossimulator' or '$(TargetOS)' == 'ios' or '$(TargetOS)' == 'tvossimulator' or '$(TargetOS)' == 'tvos' or '$(TargetOS)' == 'freebsd'">true <_NativeAotSupportedArch Condition="'$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm' or '$(TargetArchitecture)' == 'loongarch64' or '$(TargetArchitecture)' == 'riscv64' or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'x86')">true true + + + <_SdkToolsSupportedOS Condition="'$(TargetsMobile)' != 'true' and '$(TargetsLinuxBionic)' != 'true'">true + <_SdkToolsSupportedArch Condition="'$(TargetArchitecture)' != 'armel'">true + true + + <_UseNativeAotForComponentsCrossOS Condition="'$(CrossBuild)' == 'true' and '$(_hostArchitecture)' == '$(_targetArchitecture)' and '$(_hostOS)' != 'windows'">true + true CoreCLR Mono + Mono $(DefaultPrimaryRuntimeFlavor) clr+mono+libs+tools+host+packs mono+libs+packs - clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.packages+mono+libs+host+packs + clr+mono+libs+host+packs clr.nativeaotruntime+clr.nativeaotlibs+mono+libs+packs clr.nativeaotruntime+clr.nativeaotlibs+mono+libs+host+packs clr.nativeaotruntime+clr.nativeaotlibs+libs+packs @@ -73,7 +83,7 @@ - + true @@ -98,7 +108,8 @@ clr.native+clr.corelib+clr.tools+clr.nativecorelib+clr.packages+clr.nativeaotlibs+clr.crossarchtools+host.native - clr.native+clr.corelib+clr.tools+clr.nativecorelib+clr.packages+clr.nativeaotlibs+clr.crossarchtools + clr.native+clr.corelib+clr.tools+clr.nativecorelib+clr.packages+clr.nativeaotlibs+clr.crossarchtools + clr.native+clr.corelib+clr.tools+clr.nativecorelib+clr.packages+clr.nativeaotlibs+clr.crossarchtools clr.iltools+clr.packages @@ -129,7 +140,7 @@ $(DefaultLibrariesSubsets)+libs.tests - tools.illink+tools.cdacreader + tools.illink+tools.cdac host.native+host.tools+host.pkg $(DefaultHostSubsets)+host.pretest+host.tests @@ -142,6 +153,12 @@ $(DefaultPacksSubsets)+packs.installers $(DefaultPacksSubsets)+packs.tests $(DefaultPacksSubsets)+mono.manifests + + host.native+libs.sfx+libs.pretest + $(BootstrapSubsets)+clr.runtime+clr.corelib + $(BootstrapSubsets)+clr.nativeaotlibs+clr.nativeaotruntime+libs.native + + true @@ -153,6 +170,8 @@ <_subset>$(_subset.Replace('+tools+', '+$(DefaultToolsSubsets)+')) <_subset>$(_subset.Replace('+host+', '+$(DefaultHostSubsets)+')) <_subset>$(_subset.Replace('+packs+', '+$(DefaultPacksSubsets)+')) + <_subset>$(_subset.Replace('+bootstrap+', '+bootstrap+$(BootstrapSubsets)+')) + <_subset Condition="'$(TargetOS)' == 'browser'">$(_subset.Replace('+clr.runtime+', '+mono.emsdk+clr.runtime+')) <_subset>+$(_subset.Trim('+'))+ @@ -162,9 +181,6 @@ - <_IsCommunityCrossArchitecture Condition="'$(CrossBuild)' == 'true' and ('$(TargetArchitecture)' == 'loongarch64' or '$(TargetArchitecture)' == 'riscv64')">true - true - true true @@ -214,6 +230,7 @@ + @@ -228,10 +245,9 @@ - + - - + @@ -259,6 +275,7 @@ +
@@ -271,6 +288,11 @@ + + + + + @@ -312,7 +334,15 @@ $(ClrRuntimeBuildSubsets);ClrILToolsSubset=true +<<<<<<< HEAD +======= + + $(ClrRuntimeBuildSubsets);ClrCdacSubset=true + + + +>>>>>>> upstream-jun $(ClrRuntimeBuildSubsets);ClrNativeAotSubset=true @@ -336,7 +366,7 @@ The cross tools are used as part of the build process with the downloaded build tools, so we need to build them for the host architecture and build them as unsanitized binaries. --> - <_BuildAnyCrossArch Condition="'$(CrossBuild)' == 'true' or '$(BuildArchitecture)' != '$(TargetArchitecture)' or '$(HostOS)' != '$(TargetOS)' or '$(EnableNativeSanitizers)' != ''">true + <_BuildAnyCrossArch Condition="('$(CrossBuild)' == 'true' or '$(BuildArchitecture)' != '$(TargetArchitecture)' or '$(HostOS)' != '$(TargetOS)' or '$(EnableNativeSanitizers)' != '') and '$(TargetArchitecture)' != 'wasm'">true <_BuildCrossComponents Condition="$(_subset.Contains('+clr.crossarchtools+'))">true <_BuildCrossComponents Condition="'$(ClrRuntimeBuildSubsets)' != '' and ('$(PrimaryRuntimeFlavor)' == 'CoreCLR' or '$(TargetsMobile)' == 'true')">true <_CrossBitwidthBuild Condition="'$(BuildArchitecture)' == 'x64' and ('$(TargetArchitecture)' == 'x86' or '$(TargetArchitecture)' == 'arm')">true @@ -344,8 +374,8 @@ - - <_CrossToolSubset Condition="'$(_BuildCrossComponents)' == 'true'" Include="ClrAllJitsSubset=true" /> + + <_CrossToolSubset Condition="'$(_BuildCrossComponents)' == 'true' and ($(_subset.Contains('+clr.tools+')) or $(_subset.Contains('+clr.nativecorelib+')) or $(_subset.Contains('+clr.crossarchtools+')))" Include="ClrAllJitsSubset=true" /> <_CrossToolSubset Condition="'$(_BuildCrossComponents)' == 'true' and '$(TargetsWindows)' == 'true'" Include="ClrDebugSubset=true" /> - - - - - - - - - + $(CoreClrProjectRoot)tools\AssemblyChecker\AssemblyChecker.csproj; + $(ToolsProjectRoot)StressLogAnalyzer\src\StressLogAnalyzer.csproj" Category="clr" Condition="'$(DotNetBuildSourceOnly)' != 'true'"/> + + + + + + + + + + + + @@ -462,12 +497,16 @@ Test="true" Category="clr" Condition="'$(DotNetBuildSourceOnly)' != 'true' and '$(NativeAotSupported)' == 'true'"/> - + + <_CDacToolsBuilt Condition="'$(_CDacToolsBuilt)' != 'true'">true + + + - - + + @@ -636,11 +675,13 @@ <_BuildCoreCLRRuntimePack Condition="'$(RuntimeFlavor)' == 'CoreCLR' and '$(CoreCLRSupported)' == 'true'">true <_BuildMonoRuntimePack Condition="'$(RuntimeFlavor)' == 'Mono' and '$(MonoSupported)' == 'true'">true <_BuildHostPack Condition="'$(RuntimeFlavor)' == '$(PrimaryRuntimeFlavor)' and '$(TargetsMobile)' != 'true'">true + <_BuildCdacPack Condition="'$(_CDacToolsBuilt)' == 'true' and '$(RuntimeFlavor)' == 'CoreCLR' and '$(TargetsMobile)' != 'true' and '$(TargetsLinuxMusl)' != 'true' and ('$(TargetOS)' == 'windows' or '$(TargetOS)' == 'osx' or '$(TargetOS)' == 'linux')">true + <_BuildCdacPack Condition="'$(DotNetBuildSourceOnly)' == 'true' or '$(TargetArchitecture)' == 'arm' or '$(TargetArchitecture)' == 'armel' or '$(TargetArchitecture)' == 'x86' or '$(TargetArchitecture)' == 'riscv64'">false <_BuildBundle Condition="'$(RuntimeFlavor)' == '$(PrimaryRuntimeFlavor)' and '$(TargetsMobile)' != 'true'">true - <_BuildCoreCLRRuntimePack Condition="'$(CoreCLRSupported)' == 'true'">true + <_BuildCoreCLRRuntimePack Condition="'$(CoreCLRSupported)' == 'true' and '$(TargetsBrowser)' != 'true'">true <_BuildMonoRuntimePack Condition="'$(MonoSupported)' == 'true'">true @@ -668,6 +709,7 @@ + + + + + diff --git a/eng/Tools.props b/eng/Tools.props index 3baa40f4f32e..0747c78a32db 100644 --- a/eng/Tools.props +++ b/eng/Tools.props @@ -1,18 +1,8 @@ - - - <_RepoToolManifest Condition="'$(ContinuousIntegrationBuild)' == 'true'" /> - - - - - - diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml index c9ceae82ec87..f3d4fa6cb4d6 100644 --- a/eng/Version.Details.xml +++ b/eng/Version.Details.xml @@ -1,418 +1,367 @@ + - + https://github.com/dotnet/icu - 4cfe3beb254e824bf989bc1107d5ac6057640820 + 4db6d6cffd9ae63ed72c27caf3f0d2d8816c29c0 https://github.com/dotnet/wcf 7f504aabb1988e9a093c1e74d8040bd52feb2f01 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 - - - https://github.com/dotnet/command-line-api - feb61c7f328a2401d74f4317b39d02126cfdfe24 - - - - https://github.com/dotnet/command-line-api - feb61c7f328a2401d74f4317b39d02126cfdfe24 - - - - https://github.com/dotnet/cecil - dfa03011d6474bd0e6c9d0363e4f3b18b99f2ad8 - - - - https://github.com/dotnet/cecil - dfa03011d6474bd0e6c9d0363e4f3b18b99f2ad8 - - - - https://github.com/dotnet/emsdk - c8c1ff004deb78cbd4b1599c96f9a19e9f09ca62 - - - - https://github.com/dotnet/emsdk - c8c1ff004deb78cbd4b1599c96f9a19e9f09ca62 - - - - - https://github.com/dotnet/source-build-reference-packages - 81b495268ffb3f5cffbe63724ad085f831bcc1b1 - - - - - https://github.com/dotnet/source-build-externals - 962bb86f95ebece01c2b1de97ed7d4261e00b058 - + da5dd054a531e6fea65643b7e754285b73eab433 - - - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c - + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 + + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 + da5dd054a531e6fea65643b7e754285b73eab433 - + https://github.com/dotnet/llvm-project - 0417b8265304b7e52c63c9309271b15feb39ddd3 - - - https://github.com/dotnet/runtime - 29013d8ae50f5bc35427a9155234ccebfa5e227c + da5dd054a531e6fea65643b7e754285b73eab433 - - https://github.com/dotnet/runtime - 29013d8ae50f5bc35427a9155234ccebfa5e227c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/runtime - 29013d8ae50f5bc35427a9155234ccebfa5e227c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/runtime - 29013d8ae50f5bc35427a9155234ccebfa5e227c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/runtime - 29013d8ae50f5bc35427a9155234ccebfa5e227c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - - https://github.com/dotnet/runtime - 29013d8ae50f5bc35427a9155234ccebfa5e227c - + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/runtime - 29013d8ae50f5bc35427a9155234ccebfa5e227c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/runtime - 29013d8ae50f5bc35427a9155234ccebfa5e227c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - + https://github.com/dotnet/xharness - 0d72885f0fd3329e58254831f04f4517a73e1b56 + e85bb14e85357ab678c2bcb0b6f2bac634fdd49b - + https://github.com/dotnet/xharness - 0d72885f0fd3329e58254831f04f4517a73e1b56 + e85bb14e85357ab678c2bcb0b6f2bac634fdd49b - + https://github.com/dotnet/xharness - 0d72885f0fd3329e58254831f04f4517a73e1b56 + e85bb14e85357ab678c2bcb0b6f2bac634fdd49b - - https://github.com/dotnet/arcade - 91630b31ce859c28f637b62b566ea8829b982f2c + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 9fc4fa70da875eed41d3a69a5d65c83d2e3f4b06 + a68196f69e40740fce716778138acaa26488b333 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 9fc4fa70da875eed41d3a69a5d65c83d2e3f4b06 + a68196f69e40740fce716778138acaa26488b333 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 9fc4fa70da875eed41d3a69a5d65c83d2e3f4b06 + a68196f69e40740fce716778138acaa26488b333 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 9fc4fa70da875eed41d3a69a5d65c83d2e3f4b06 + a68196f69e40740fce716778138acaa26488b333 - + https://github.com/dotnet/hotreload-utils - f30fb00238a27eff06b5a6bf11ff9c4a5e35c9ca + b8ca0c681f43f6affb21032e2135f489306e8625 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 - - - https://github.com/dotnet/roslyn - 651ee91d035cdf8e659c00a384d5d6671aadbe46 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 - - https://github.com/dotnet/roslyn - 651ee91d035cdf8e659c00a384d5d6671aadbe46 + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/roslyn - 651ee91d035cdf8e659c00a384d5d6671aadbe46 + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/roslyn-analyzers - 8fe7aeb135c64e095f43292c427453858d937184 + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/roslyn-analyzers - 8fe7aeb135c64e095f43292c427453858d937184 + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - - https://github.com/dotnet/roslyn - 651ee91d035cdf8e659c00a384d5d6671aadbe46 - + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - - https://github.com/dotnet/sdk - 23e2ba847d79562b972dbf54eca3f87c3044d925 + + https://github.com/dotnet/dotnet + d2434b1b5ed778f9869b1e51ba1df4bc3a64eda7 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 9fc4fa70da875eed41d3a69a5d65c83d2e3f4b06 + a68196f69e40740fce716778138acaa26488b333 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 9fc4fa70da875eed41d3a69a5d65c83d2e3f4b06 + a68196f69e40740fce716778138acaa26488b333 @@ -432,41 +381,41 @@ https://github.com/NuGet/NuGet.Client 8fef55f5a55a3b4f2c96cd1a9b5ddc51d4b927f8 - + https://github.com/dotnet/node - f157b219c30f5296cb3ffaa6937ef19d702c5aab + 7f33d14aae0d91f2d5befda939160177e13b3f47 - + https://github.com/dotnet/node - f157b219c30f5296cb3ffaa6937ef19d702c5aab + 7f33d14aae0d91f2d5befda939160177e13b3f47 - + https://github.com/dotnet/node - f157b219c30f5296cb3ffaa6937ef19d702c5aab + 7f33d14aae0d91f2d5befda939160177e13b3f47 - + https://github.com/dotnet/node - f157b219c30f5296cb3ffaa6937ef19d702c5aab + 7f33d14aae0d91f2d5befda939160177e13b3f47 - + https://github.com/dotnet/node - f157b219c30f5296cb3ffaa6937ef19d702c5aab + 7f33d14aae0d91f2d5befda939160177e13b3f47 - + https://github.com/dotnet/node - f157b219c30f5296cb3ffaa6937ef19d702c5aab + 7f33d14aae0d91f2d5befda939160177e13b3f47 - + https://github.com/dotnet/node - f157b219c30f5296cb3ffaa6937ef19d702c5aab + 7f33d14aae0d91f2d5befda939160177e13b3f47 - + https://github.com/dotnet/node - f157b219c30f5296cb3ffaa6937ef19d702c5aab + 7f33d14aae0d91f2d5befda939160177e13b3f47 - + https://github.com/dotnet/runtime-assets - 3faec1bca5963b4ca54741e12939c77b951df604 + 1cfc6ba21d0377b51f17eac4fdc2557f7b1e8693 diff --git a/eng/Versions.props b/eng/Versions.props index 158cf46912a3..ba4835ce6b58 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -7,12 +7,12 @@ 0 0 $(MajorVersion).0.100 - 9.0.0 - 8.0.8 + 9.0.3 + 8.0.$([MSBuild]::Add($([System.Version]::Parse('$(PackageVersionNet9)').Build),11)) 7.0.20 - 6.0.$([MSBuild]::Add($([System.Version]::Parse('$(PackageVersionNet8)').Build),25)) + 6.0.36 preview - 2 + 6 false release @@ -36,17 +36,17 @@ - 3.12.0-beta1.25081.1 - 10.0.0-preview.25081.1 + 10.0.0-preview.25302.103 + 5.0.0-1.25302.103 - 4.14.0-2.25107.8 - 4.14.0-2.25107.8 - 4.14.0-2.25107.8 + 5.0.0-1.25302.103 + 5.0.0-1.25302.103 + 5.0.0-1.25302.103 - 10.0.100-alpha.1.25077.2 + 10.0.100-preview.6.25302.103 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 2.9.2-beta.25106.4 - 10.0.0-beta.25106.4 - 2.9.2-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 - 10.0.0-beta.25106.4 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 2.9.2-beta.25302.103 + 10.0.0-beta.25302.103 + 2.9.2-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 + 10.0.0-beta.25302.103 1.4.0 6.0.0-preview.1.102 - 10.0.0-alpha.1.25068.1 + 10.0.0-preview.6.25302.103 6.0.0 - 10.0.0-alpha.1.25068.1 + 10.0.0-preview.6.25302.103 + 10.0.0-preview.6.25302.103 + + 6.0.0 + 4.6.1 + 4.9.0 + 4.6.3 + 4.6.1 + 6.1.2 + 4.6.3 + 4.6.1 6.0.0 - 6.0.0 5.0.0 1.2.0-beta.556 - 4.6.0 5.0.0 - 4.9.0 8.0.0 8.0.1 5.0.0 - 4.6.0 - 4.6.0 - 10.0.0-alpha.1.25068.1 - 10.0.0-alpha.1.25068.1 + 10.0.0-preview.6.25302.103 + 10.0.0-preview.6.25302.103 6.0.0 5.0.0 5.0.0 5.0.0 7.0.0 - 10.0.0-alpha.1.25068.1 - 6.1.0 + 10.0.0-preview.6.25302.103 7.0.0 - 4.6.0 - 4.5.0 - 10.0.0-alpha.1.25068.1 + 10.0.0-preview.6.25302.103 + 8.0.0 + 4.5.1 8.0.0 +<<<<<<< HEAD 8.0.5 8.0.0 +======= + 4.5.5 + 8.0.5 +>>>>>>> upstream-jun 8.0.0 + 8.0.0 + 8.0.0 + 4.5.4 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 - 10.0.0-beta.25103.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 + 10.0.0-beta.25262.1 - 10.0.0-prerelease.25077.1 - 10.0.0-prerelease.25077.1 - 10.0.0-prerelease.25077.1 + 10.0.0-prerelease.25255.1 + 10.0.0-prerelease.25255.1 + 10.0.0-prerelease.25255.1 - 10.0.0-alpha.0.25077.1 + 10.0.0-alpha.0.25255.1 - 1.0.0-prerelease.25079.10 - 1.0.0-prerelease.25079.10 - 1.0.0-prerelease.25079.10 - 1.0.0-prerelease.25079.10 - 1.0.0-prerelease.25079.10 - 1.0.0-prerelease.25079.10 + 1.0.0-prerelease.25256.1 + 1.0.0-prerelease.25256.1 + 1.0.0-prerelease.25256.1 + 1.0.0-prerelease.25256.1 + 1.0.0-prerelease.25256.1 + 1.0.0-prerelease.25256.1 2.0.0 17.10.0-beta1.24272.1 - 2.0.0-beta4.24528.1 + 2.0.0-beta5.25302.103 3.1.16 2.1.0 2.0.3 1.0.4-preview6.19326.1 2.0.5 17.8.3 - $(MicrosoftBuildVersion) - $(MicrosoftBuildVersion) - $(MicrosoftBuildVersion) + 17.8.3 + 17.8.3 + 17.8.3 6.2.4 6.2.4 6.2.4 @@ -190,7 +202,7 @@ 7.0.412701 6.0 - 1.4.0 + 1.5.0 17.4.0-preview-20220707-01 3.12.0 4.5.0 @@ -201,7 +213,7 @@ 1.0.2 2.0.4 4.18.4 - 6.7.0 + 8.0.2 2.14.3 2.9.1 @@ -219,59 +231,59 @@ 9.0.0-preview-20241010.1 - 0.11.5-alpha.25078.1 + 0.11.5-alpha.25302.103 - 10.0.0-preview.2.25102.3 + 10.0.0-preview.5.25261.1 - 2.4.3 + 2.4.8 9.0.0-alpha.1.24167.3 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 - 10.0.0-preview.2.25105.3 + 10.0.100-preview.6.25302.103 $(MicrosoftNETWorkloadEmscriptenCurrentManifest100100TransportVersion) 1.1.87-gba258badda 1.0.0-v3.14.0.5722 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 - 19.1.0-alpha.1.25077.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 + 19.1.0-alpha.1.25167.1 3.1.7 1.0.406601 $(MicrosoftDotNetApiCompatTaskVersion) - 10.0.0-alpha.1.25103.1 - $(MicrosoftNETRuntimeEmscriptenVersion) + 10.0.0-alpha.1.25169.1 + 10.0.0-preview.4.25217.3 $(runtimewinx64MicrosoftNETCoreRuntimeWasmNodeTransportPackageVersion) 3.1.56 diff --git a/eng/build-analysis-configuration.json b/eng/build-analysis-configuration.json index d647594ab402..c8109038a67d 100644 --- a/eng/build-analysis-configuration.json +++ b/eng/build-analysis-configuration.json @@ -7,6 +7,22 @@ { "PipelineId": 133, "PipelineName": "runtime-dev-innerloop" + }, + { + "PipelineId": 154, + "PipelineName": "runtime-extra-platforms" + }, + { + "PipelineId": 157, + "PipelineName": "runtime-llvm" + }, + { + "PipelineId": 265, + "PipelineName": "runtime-nativeaot-outerloop" + }, + { + "PipelineId": 108, + "PipelineName": "runtime-coreclr outerloop" } ] } diff --git a/eng/build.ps1 b/eng/build.ps1 index e38be81d660e..e6810524b635 100644 --- a/eng/build.ps1 +++ b/eng/build.ps1 @@ -78,7 +78,7 @@ function Get-Help() { Write-Host "Libraries settings:" Write-Host " -coverage Collect code coverage when testing." - Write-Host " -framework (-f) Build framework: net10.0 or net48." + Write-Host " -framework (-f) Build framework: net10.0 or net481." Write-Host " [Default: net10.0]" Write-Host " -testnobuild Skip building tests when invoking -test." Write-Host " -testscope Scope tests, allowed values: innerloop, outerloop, all." @@ -175,12 +175,13 @@ if ($vs) { $configToOpen = $runtimeConfiguration } + # Auto-generated solution file that still uses the sln format if ($vs -ieq "coreclr.sln") { # If someone passes in coreclr.sln (case-insensitive), # launch the generated CMake solution. $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "artifacts\obj\coreclr" | Join-Path -ChildPath "windows.$archToOpen.$((Get-Culture).TextInfo.ToTitleCase($configToOpen))" | Join-Path -ChildPath "ide" | Join-Path -ChildPath "CoreCLR.sln" if (-Not (Test-Path $vs)) { - Invoke-Expression "& `"$repoRoot/src/coreclr/build-runtime.cmd`" -configureonly -$archToOpen -$configToOpen -msbuild" + Invoke-Expression "& `"$repoRoot/eng/common/msbuild.ps1`" $repoRoot/src/coreclr/runtime.proj /clp:nosummary /restore /p:Ninja=false /p:Configuration=$configToOpen /p:TargetArchitecture=$archToOpen /p:ConfigureOnly=true /p:ClrFullNativeBuild=true" if ($lastExitCode -ne 0) { Write-Error "Failed to generate the CoreCLR solution file." exit 1 @@ -190,6 +191,7 @@ if ($vs) { } } } + # Auto-generated solution file that still uses the sln format elseif ($vs -ieq "corehost.sln") { $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "artifacts\obj\" | Join-Path -ChildPath "win-$archToOpen.$((Get-Culture).TextInfo.ToTitleCase($configToOpen))" | Join-Path -ChildPath "corehost" | Join-Path -ChildPath "ide" | Join-Path -ChildPath "corehost.sln" if (-Not (Test-Path $vs)) { @@ -208,24 +210,24 @@ if ($vs) { if ($runtimeFlavor -eq "Mono") { # Search for the solution in mono - $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "src\mono" | Join-Path -ChildPath $vs | Join-Path -ChildPath "$vs.sln" + $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "src\mono" | Join-Path -ChildPath $vs | Join-Path -ChildPath "$vs.slnx" } else { # Search for the solution in coreclr - $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "src\coreclr" | Join-Path -ChildPath $vs | Join-Path -ChildPath "$vs.sln" + $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "src\coreclr" | Join-Path -ChildPath $vs | Join-Path -ChildPath "$vs.slnx" } if (-Not (Test-Path $vs)) { $vs = $solution # Search for the solution in libraries - $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "src\libraries" | Join-Path -ChildPath $vs | Join-Path -ChildPath "$vs.sln" + $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "src\libraries" | Join-Path -ChildPath $vs | Join-Path -ChildPath "$vs.slnx" if (-Not (Test-Path $vs)) { $vs = $solution # Search for the solution in installer - if (-Not ($vs.endswith(".sln"))) { - $vs = "$vs.sln" + if (-Not ($vs.endswith(".slnx"))) { + $vs = "$vs.slnx" } $vs = Split-Path $PSScriptRoot -Parent | Join-Path -ChildPath "src\installer" | Join-Path -ChildPath $vs diff --git a/eng/build.sh b/eng/build.sh index 1cab7739726f..3919b9624d78 100755 --- a/eng/build.sh +++ b/eng/build.sh @@ -34,7 +34,7 @@ usage() echo " tvossimulator, ios, iossimulator, android, browser, wasi, netbsd, illumos, solaris" echo " linux-musl, linux-bionic, tizen, or haiku." echo " [Default: Your machine's OS.]" - echo " --outputrid Optional argument that overrides the target rid name." + echo " --targetrid Optional argument that overrides the target rid name." echo " --projects Project or solution file(s) to build." echo " --runtimeConfiguration (-rc) Runtime build configuration: Debug, Release or Checked." echo " Checked is exclusive to the CLR runtime. It is the same as Debug, except code is" @@ -48,6 +48,8 @@ usage() echo " --usemonoruntime Product a .NET runtime with Mono as the underlying runtime." echo " --verbosity (-v) MSBuild verbosity: q[uiet], m[inimal], n[ormal], d[etailed], and diag[nostic]." echo " [Default: Minimal]" + echo " --use-bootstrap Use the results of building the bootstrap subset to build published tools on the target machine." + echo " --bootstrap Build the bootstrap subset and then build the repo with --use-bootstrap." echo "" echo "Actions (defaults to --restore --build):" @@ -66,7 +68,7 @@ usage() echo "Libraries settings:" echo " --coverage Collect code coverage when testing." - echo " --framework (-f) Build framework: net10.0 or net48." + echo " --framework (-f) Build framework: net10.0 or net481." echo " [Default: net10.0]" echo " --testnobuild Skip building tests when invoking -test." echo " --testscope Test scope, allowed values: innerloop, outerloop, all." @@ -151,11 +153,12 @@ showSubsetHelp() "$scriptroot/common/build.sh" "-restore" "-build" "/p:Subset=help" "/clp:nosummary /tl:false" } -arguments='' +arguments=() cmakeargs='' -extraargs='' +extraargs=() crossBuild=0 portableBuild=1 +bootstrap=0 source $scriptroot/common/native/init-os-and-arch.sh @@ -175,7 +178,7 @@ while [[ $# > 0 ]]; do exit 0 fi - arguments="$arguments /p:Subset=$1" + arguments+=("/p:Subset=$1") shift 1 continue fi @@ -198,7 +201,7 @@ while [[ $# > 0 ]]; do showSubsetHelp exit 0 fi - arguments="$arguments /p:Subset=$2" + arguments+=("/p:Subset=$2") shift 2 fi ;; @@ -238,7 +241,7 @@ while [[ $# > 0 ]]; do exit 1 ;; esac - arguments="$arguments -configuration $val" + arguments+=("-configuration" "$val") shift 2 ;; @@ -248,7 +251,7 @@ while [[ $# > 0 ]]; do exit 1 fi val="$(echo "$2" | tr "[:upper:]" "[:lower:]")" - arguments="$arguments /p:BuildTargetFramework=$val" + arguments+=("/p:BuildTargetFramework=$val") shift 2 ;; @@ -303,12 +306,12 @@ while [[ $# > 0 ]]; do exit 1 ;; esac - arguments="$arguments /p:TargetOS=$os" + arguments+=("/p:TargetOS=$os") shift 2 ;; -pack) - arguments="$arguments --pack /p:BuildAllConfigurations=true" + arguments+=("--pack" "/p:BuildAllConfigurations=true") shift 1 ;; @@ -317,17 +320,17 @@ while [[ $# > 0 ]]; do echo "No test scope supplied. See help (--help) for supported test scope values." 1>&2 exit 1 fi - arguments="$arguments /p:TestScope=$2" + arguments+=("/p:TestScope=$2") shift 2 ;; -testnobuild) - arguments="$arguments /p:TestNoBuild=true" + arguments+=("/p:TestNoBuild=true") shift 1 ;; -coverage) - arguments="$arguments /p:Coverage=true" + arguments+=("/p:Coverage=true") shift 1 ;; @@ -347,7 +350,7 @@ while [[ $# > 0 ]]; do exit 1 ;; esac - arguments="$arguments /p:RuntimeConfiguration=$val" + arguments+=("/p:RuntimeConfiguration=$val") shift 2 ;; @@ -367,12 +370,12 @@ while [[ $# > 0 ]]; do exit 1 ;; esac - arguments="$arguments /p:RuntimeFlavor=$val" + arguments+=("/p:RuntimeFlavor=$val") shift 2 ;; -usemonoruntime) - arguments="$arguments /p:PrimaryRuntimeFlavor=Mono" + arguments+=("/p:PrimaryRuntimeFlavor=Mono") shift 1 ;; @@ -392,7 +395,7 @@ while [[ $# > 0 ]]; do exit 1 ;; esac - arguments="$arguments /p:LibrariesConfiguration=$val" + arguments+=("/p:LibrariesConfiguration=$val") shift 2 ;; @@ -412,25 +415,25 @@ while [[ $# > 0 ]]; do exit 1 ;; esac - arguments="$arguments /p:HostConfiguration=$val" + arguments+=("/p:HostConfiguration=$val") shift 2 ;; -cross) crossBuild=1 - arguments="$arguments /p:CrossBuild=True" + arguments+=("/p:CrossBuild=True") shift 1 ;; *crossbuild=true*) crossBuild=1 - extraargs="$extraargs $1" + extraargs+=("$1") shift 1 ;; -clang*) compiler="${opt/#-/}" # -clang-9 => clang-9 or clang-9 => (unchanged) - arguments="$arguments /p:Compiler=$compiler /p:CppCompilerAndLinker=$compiler" + arguments+=("/p:Compiler=$compiler" "/p:CppCompilerAndLinker=$compiler") shift 1 ;; @@ -445,16 +448,16 @@ while [[ $# > 0 ]]; do -gcc*) compiler="${opt/#-/}" # -gcc-9 => gcc-9 or gcc-9 => (unchanged) - arguments="$arguments /p:Compiler=$compiler /p:CppCompilerAndLinker=$compiler" + arguments+=("/p:Compiler=$compiler" "/p:CppCompilerAndLinker=$compiler") shift 1 ;; - -outputrid) + -targetrid|-outputrid) if [ -z ${2+x} ]; then - echo "No value for outputrid is supplied. See help (--help) for supported values." 1>&2 + echo "No value for targetrid is supplied. See help (--help) for supported values." 1>&2 exit 1 fi - arguments="$arguments /p:OutputRID=$(echo "$2" | tr "[:upper:]" "[:lower:]")" + arguments+=("/p:TargetRid=$(echo "$2" | tr "[:upper:]" "[:lower:]")") shift 2 ;; @@ -466,7 +469,7 @@ while [[ $# > 0 ]]; do passedPortable="$(echo "$2" | tr "[:upper:]" "[:lower:]")" if [ "$passedPortable" = false ]; then portableBuild=0 - arguments="$arguments /p:PortableBuild=false" + arguments+=("/p:PortableBuild=false") fi shift 2 ;; @@ -478,7 +481,7 @@ while [[ $# > 0 ]]; do fi passedKeepNativeSymbols="$(echo "$2" | tr "[:upper:]" "[:lower:]")" if [ "$passedKeepNativeSymbols" = true ]; then - arguments="$arguments /p:KeepNativeSymbols=true" + arguments+=("/p:KeepNativeSymbols=true") fi shift 2 ;; @@ -486,25 +489,35 @@ while [[ $# > 0 ]]; do -ninja) if [ -z ${2+x} ]; then - arguments="$arguments /p:Ninja=true" + arguments+=("/p:Ninja=true") shift 1 else ninja="$(echo "$2" | tr "[:upper:]" "[:lower:]")" if [ "$ninja" = true ]; then - arguments="$arguments /p:Ninja=true" + arguments+=("/p:Ninja=true") shift 2 elif [ "$ninja" = false ]; then - arguments="$arguments /p:Ninja=false" + arguments+=("/p:Ninja=false") shift 2 else - arguments="$arguments /p:Ninja=true" + arguments+=("/p:Ninja=true") shift 1 fi fi ;; -pgoinstrument) - arguments="$arguments /p:PgoInstrument=true" + arguments+=("/p:PgoInstrument=true") + shift 1 + ;; + + -use-bootstrap) + arguments+=("/p:UseBootstrap=true") + shift 1 + ;; + + -bootstrap) + bootstrap=1 shift 1 ;; @@ -513,35 +526,39 @@ while [[ $# > 0 ]]; do echo "No value for -fsanitize is supplied. See help (--help) for supported values." 1>&2 exit 1 fi - arguments="$arguments /p:EnableNativeSanitizers=$2" + arguments+=("/p:EnableNativeSanitizers=$2") shift 2 ;; -fsanitize=*) sanitizers="${opt/#-fsanitize=/}" # -fsanitize=address => address - arguments="$arguments /p:EnableNativeSanitizers=$sanitizers" + arguments+=("/p:EnableNativeSanitizers=$sanitizers") shift 2 ;; -verbose) - arguments="$arguments /p:CoreclrVerbose=true" + arguments+=("/p:CoreclrVerbose=true") shift 1 ;; *) - extraargs="$extraargs $1" + extraargs+=("$1") shift 1 ;; esac done if [ ${#actInt[@]} -eq 0 ]; then - arguments="-restore -build $arguments" + arguments=("-restore" "-build" ${arguments[@]+"${arguments[@]}"}) fi if [[ "$os" == "browser" ]]; then # override default arch for Browser, we only support wasm arch=wasm + # because on docker instance without swap file, MSBuild nodes need to make some room for LLVM + # https://github.com/dotnet/runtime/issues/113724 + # this is hexa percentage: 46-> 70% + export DOTNET_GCHeapHardLimitPercent="46" fi if [[ "$os" == "wasi" ]]; then # override default arch for wasi, we only support wasm @@ -549,11 +566,11 @@ if [[ "$os" == "wasi" ]]; then fi if [[ "${TreatWarningsAsErrors:-}" == "false" ]]; then - arguments="$arguments -warnAsError false" + arguments+=("-warnAsError" "false") fi # disable terminal logger for now: https://github.com/dotnet/runtime/issues/97211 -arguments="$arguments -tl:false" +arguments+=("-tl:false") initDistroRid "$os" "$arch" "$crossBuild" @@ -564,6 +581,31 @@ export DOTNETSDK_ALLOW_TARGETING_PACK_CACHING=0 # URL-encode space (%20) to avoid quoting issues until the msbuild call in /eng/common/tools.sh. # In *proj files (XML docs), URL-encoded string are rendered in their decoded form. cmakeargs="${cmakeargs// /%20}" -arguments="$arguments /p:TargetArchitecture=$arch /p:BuildArchitecture=$hostArch" -arguments="$arguments /p:CMakeArgs=\"$cmakeargs\" $extraargs" -"$scriptroot/common/build.sh" $arguments +arguments+=("/p:TargetArchitecture=$arch" "/p:BuildArchitecture=$hostArch") +arguments+=("/p:CMakeArgs=\"$cmakeargs\"" ${extraargs[@]+"${extraargs[@]}"}) + +if [[ "$bootstrap" == "1" ]]; then + # Strip build actions other than -restore and -build from the arguments for the bootstrap build. + bootstrapArguments=() + for argument in "${arguments[@]}"; do + add=1 + for flag in --sign --publish --pack --test -sign -publish -pack -test; do + if [[ "$argument" == "$flag" ]]; then + add=0 + fi + done + if [[ $add == 1 ]]; then + bootstrapArguments+=("$argument") + fi + done + "$scriptroot/common/build.sh" ${bootstrapArguments[@]+"${bootstrapArguments[@]}"} /p:Subset=bootstrap -bl:$scriptroot/../artifacts/log/bootstrap.binlog + + # Remove artifacts from the bootstrap build so the product build is a "clean" build. + echo "Cleaning up artifacts from bootstrap build..." + rm -r "$scriptroot/../artifacts/bin" + # Remove all directories in obj except for the source-built-upstream-cache directory to avoid breaking SourceBuild. + find "$scriptroot/../artifacts/obj" -mindepth 1 -maxdepth 1 ! -name 'source-built-upstream-cache' -exec rm -rf {} + + arguments+=("/p:UseBootstrap=true") +fi + +"$scriptroot/common/build.sh" ${arguments[@]+"${arguments[@]}"} diff --git a/eng/common/CIBuild.cmd b/eng/common/CIBuild.cmd index 56c2f25ac22f..ac1f72bf94e0 100644 --- a/eng/common/CIBuild.cmd +++ b/eng/common/CIBuild.cmd @@ -1,2 +1,2 @@ @echo off -powershell -ExecutionPolicy ByPass -NoProfile -command "& """%~dp0Build.ps1""" -restore -build -test -sign -pack -publish -ci %*" \ No newline at end of file +powershell -ExecutionPolicy ByPass -NoProfile -command "& """%~dp0Build.ps1""" -restore -build -test -sign -pack -publish -ci %*" diff --git a/eng/common/SetupNugetSources.sh b/eng/common/SetupNugetSources.sh old mode 100644 new mode 100755 diff --git a/eng/common/build.ps1 b/eng/common/build.ps1 index 7148db862a14..58cae488bd68 100644 --- a/eng/common/build.ps1 +++ b/eng/common/build.ps1 @@ -7,6 +7,7 @@ Param( [string] $msbuildEngine = $null, [bool] $warnAsError = $true, [bool] $nodeReuse = $true, + [switch] $buildCheck = $false, [switch][Alias('r')]$restore, [switch] $deployDeps, [switch][Alias('b')]$build, @@ -20,6 +21,7 @@ Param( [switch] $publish, [switch] $clean, [switch][Alias('pb')]$productBuild, + [switch]$fromVMR, [switch][Alias('bl')]$binaryLog, [switch][Alias('nobl')]$excludeCIBinarylog, [switch] $ci, @@ -71,6 +73,9 @@ function Print-Usage() { Write-Host " -msbuildEngine Msbuild engine to use to run build ('dotnet', 'vs', or unspecified)." Write-Host " -excludePrereleaseVS Set to exclude build engines in prerelease versions of Visual Studio" Write-Host " -nativeToolsOnMachine Sets the native tools on machine environment variable (indicating that the script should use native tools on machine)" + Write-Host " -nodeReuse Sets nodereuse msbuild parameter ('true' or 'false')" + Write-Host " -buildCheck Sets /check msbuild parameter" + Write-Host " -fromVMR Set when building from within the VMR" Write-Host "" Write-Host "Command line arguments not listed above are passed thru to msbuild." @@ -97,6 +102,7 @@ function Build { $bl = if ($binaryLog) { '/bl:' + (Join-Path $LogDir 'Build.binlog') } else { '' } $platformArg = if ($platform) { "/p:Platform=$platform" } else { '' } + $check = if ($buildCheck) { '/check' } else { '' } if ($projects) { # Re-assign properties to a new variable because PowerShell doesn't let us append properties directly for unclear reasons. @@ -113,6 +119,7 @@ function Build { MSBuild $toolsetBuildProj ` $bl ` $platformArg ` + $check ` /p:Configuration=$configuration ` /p:RepoRoot=$RepoRoot ` /p:Restore=$restore ` @@ -122,11 +129,13 @@ function Build { /p:Deploy=$deploy ` /p:Test=$test ` /p:Pack=$pack ` - /p:DotNetBuildRepo=$productBuild ` + /p:DotNetBuild=$productBuild ` + /p:DotNetBuildFromVMR=$fromVMR ` /p:IntegrationTest=$integrationTest ` /p:PerformanceTest=$performanceTest ` /p:Sign=$sign ` /p:Publish=$publish ` + /p:RestoreStaticGraphEnableBinaryLogger=$binaryLog ` @properties } diff --git a/eng/common/build.sh b/eng/common/build.sh index 483647daf182..9767bb411a4f 100755 --- a/eng/common/build.sh +++ b/eng/common/build.sh @@ -42,6 +42,8 @@ usage() echo " --prepareMachine Prepare machine for CI run, clean up processes after build" echo " --nodeReuse Sets nodereuse msbuild parameter ('true' or 'false')" echo " --warnAsError Sets warnaserror msbuild parameter ('true' or 'false')" + echo " --buildCheck Sets /check msbuild parameter" + echo " --fromVMR Set when building from within the VMR" echo "" echo "Command line arguments not listed above are passed thru to msbuild." echo "Arguments can also be passed in with a single hyphen." @@ -63,6 +65,7 @@ restore=false build=false source_build=false product_build=false +from_vmr=false rebuild=false test=false integration_test=false @@ -76,6 +79,7 @@ clean=false warn_as_error=true node_reuse=true +build_check=false binary_log=false exclude_ci_binary_log=false pipelines_log=false @@ -87,7 +91,7 @@ verbosity='minimal' runtime_source_feed='' runtime_source_feed_key='' -properties='' +properties=() while [[ $# > 0 ]]; do opt="$(echo "${1/#--/-}" | tr "[:upper:]" "[:lower:]")" case "$opt" in @@ -127,19 +131,22 @@ while [[ $# > 0 ]]; do -pack) pack=true ;; - -sourcebuild|-sb) + -sourcebuild|-source-build|-sb) build=true source_build=true product_build=true restore=true pack=true ;; - -productBuild|-pb) + -productbuild|-product-build|-pb) build=true product_build=true restore=true pack=true ;; + -fromvmr|-from-vmr) + from_vmr=true + ;; -test|-t) test=true ;; @@ -173,6 +180,9 @@ while [[ $# > 0 ]]; do node_reuse=$2 shift ;; + -buildcheck) + build_check=true + ;; -runtimesourcefeed) runtime_source_feed=$2 shift @@ -182,7 +192,7 @@ while [[ $# > 0 ]]; do shift ;; *) - properties="$properties $1" + properties+=("$1") ;; esac @@ -216,7 +226,7 @@ function Build { InitializeCustomToolset if [[ ! -z "$projects" ]]; then - properties="$properties /p:Projects=$projects" + properties+=("/p:Projects=$projects") fi local bl="" @@ -224,14 +234,21 @@ function Build { bl="/bl:\"$log_dir/Build.binlog\"" fi + local check="" + if [[ "$build_check" == true ]]; then + check="/check" + fi + MSBuild $_InitializeToolset \ $bl \ + $check \ /p:Configuration=$configuration \ /p:RepoRoot="$repo_root" \ /p:Restore=$restore \ /p:Build=$build \ - /p:DotNetBuildRepo=$product_build \ + /p:DotNetBuild=$product_build \ /p:DotNetBuildSourceOnly=$source_build \ + /p:DotNetBuildFromVMR=$from_vmr \ /p:Rebuild=$rebuild \ /p:Test=$test \ /p:Pack=$pack \ @@ -239,7 +256,8 @@ function Build { /p:PerformanceTest=$performance_test \ /p:Sign=$sign \ /p:Publish=$publish \ - $properties + /p:RestoreStaticGraphEnableBinaryLogger=$binary_log \ + ${properties[@]+"${properties[@]}"} ExitWithExitCode 0 } diff --git a/eng/common/cibuild.sh b/eng/common/cibuild.sh index 1a02c0dec8fd..66e3b0ac61c3 100755 --- a/eng/common/cibuild.sh +++ b/eng/common/cibuild.sh @@ -13,4 +13,4 @@ while [[ -h $source ]]; do done scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" -. "$scriptroot/build.sh" --restore --build --test --pack --publish --ci $@ \ No newline at end of file +. "$scriptroot/build.sh" --restore --build --test --pack --publish --ci $@ diff --git a/eng/common/core-templates/job/job.yml b/eng/common/core-templates/job/job.yml index 295c9a2317c4..6badecba7bcc 100644 --- a/eng/common/core-templates/job/job.yml +++ b/eng/common/core-templates/job/job.yml @@ -23,7 +23,6 @@ parameters: enablePublishBuildArtifacts: false enablePublishBuildAssets: false enablePublishTestResults: false - enablePublishUsingPipelines: false enableBuildRetry: false mergeTestResults: false testRunTitle: '' @@ -74,9 +73,6 @@ jobs: - ${{ if ne(parameters.enableTelemetry, 'false') }}: - name: DOTNET_CLI_TELEMETRY_PROFILE value: '$(Build.Repository.Uri)' - - ${{ if eq(parameters.enableRichCodeNavigation, 'true') }}: - - name: EnableRichCodeNavigation - value: 'true' # Retry signature validation up to three times, waiting 2 seconds between attempts. # See https://learn.microsoft.com/en-us/nuget/reference/errors-and-warnings/nu3028#retry-untrusted-root-failures - name: NUGET_EXPERIMENTAL_CHAIN_BUILD_RETRY_POLICY @@ -148,16 +144,6 @@ jobs: - ${{ each step in parameters.steps }}: - ${{ step }} - - ${{ if eq(parameters.enableRichCodeNavigation, true) }}: - - task: RichCodeNavIndexer@0 - displayName: RichCodeNav Upload - inputs: - languages: ${{ coalesce(parameters.richCodeNavigationLanguage, 'csharp') }} - environment: ${{ coalesce(parameters.richCodeNavigationEnvironment, 'internal') }} - richNavLogOutputDirectory: $(Build.SourcesDirectory)/artifacts/bin - uploadRichNavArtifacts: ${{ coalesce(parameters.richCodeNavigationUploadArtifacts, false) }} - continueOnError: true - - ${{ each step in parameters.componentGovernanceSteps }}: - ${{ step }} diff --git a/eng/common/core-templates/job/publish-build-assets.yml b/eng/common/core-templates/job/publish-build-assets.yml index 3d3356e31967..4f1dc42e02c5 100644 --- a/eng/common/core-templates/job/publish-build-assets.yml +++ b/eng/common/core-templates/job/publish-build-assets.yml @@ -20,9 +20,6 @@ parameters: # if 'true', the build won't run any of the internal only steps, even if it is running in non-public projects. runAsPublic: false - # Optional: whether the build's artifacts will be published using release pipelines or direct feed publishing - publishUsingPipelines: false - # Optional: whether the build's artifacts will be published using release pipelines or direct feed publishing publishAssetsImmediately: false @@ -32,6 +29,9 @@ parameters: is1ESPipeline: '' + # Optional: 🌤️ or not the build has assets it wants to publish to BAR + isAssetlessBuild: false + jobs: - job: Asset_Registry_Publish @@ -75,15 +75,15 @@ jobs: - checkout: self fetchDepth: 3 clean: true - - - task: DownloadBuildArtifacts@0 - displayName: Download artifact - inputs: - artifactName: AssetManifests - downloadPath: '$(Build.StagingDirectory)/Download' - checkDownloadedFiles: true - condition: ${{ parameters.condition }} - continueOnError: ${{ parameters.continueOnError }} + + - ${{ if eq(parameters.isAssetlessBuild, 'false') }}: + - task: DownloadPipelineArtifact@2 + displayName: Download Asset Manifests + inputs: + artifactName: AssetManifests + targetPath: '$(Build.StagingDirectory)/AssetManifests' + condition: ${{ parameters.condition }} + continueOnError: ${{ parameters.continueOnError }} - task: NuGetAuthenticate@1 @@ -95,9 +95,9 @@ jobs: scriptLocation: scriptPath scriptPath: $(Build.SourcesDirectory)/eng/common/sdk-task.ps1 arguments: -task PublishBuildAssets -restore -msbuildEngine dotnet - /p:ManifestsPath='$(Build.StagingDirectory)/Download/AssetManifests' + /p:ManifestsPath='$(Build.StagingDirectory)/AssetManifests' + /p:IsAssetlessBuild=${{ parameters.isAssetlessBuild }} /p:MaestroApiEndpoint=https://maestro.dot.net - /p:PublishUsingPipelines=${{ parameters.publishUsingPipelines }} /p:OfficialBuildId=$(Build.BuildNumber) condition: ${{ parameters.condition }} continueOnError: ${{ parameters.continueOnError }} @@ -129,7 +129,7 @@ jobs: publishLocation: Container artifactName: ReleaseConfigs - - ${{ if eq(parameters.publishAssetsImmediately, 'true') }}: + - ${{ if or(eq(parameters.publishAssetsImmediately, 'true'), eq(parameters.isAssetlessBuild, 'true')) }}: - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml parameters: BARBuildId: ${{ parameters.BARBuildId }} @@ -150,6 +150,7 @@ jobs: -WaitPublishingFinish true -ArtifactsPublishingAdditionalParameters '${{ parameters.artifactsPublishingAdditionalParameters }}' -SymbolPublishingAdditionalParameters '${{ parameters.symbolPublishingAdditionalParameters }}' + -SkipAssetsPublishing '${{ parameters.isAssetlessBuild }}' - ${{ if eq(parameters.enablePublishBuildArtifacts, 'true') }}: - template: /eng/common/core-templates/steps/publish-logs.yml diff --git a/eng/common/core-templates/job/source-build.yml b/eng/common/core-templates/job/source-build.yml index 05f7ad6ef0d5..d805d5faeb94 100644 --- a/eng/common/core-templates/job/source-build.yml +++ b/eng/common/core-templates/job/source-build.yml @@ -27,6 +27,8 @@ parameters: # Specifies the build script to invoke to perform the build in the repo. The default # './build.sh' should work for typical Arcade repositories, but this is customizable for # difficult situations. + # buildArguments: '' + # Specifies additional build arguments to pass to the build script. # jobProperties: {} # A list of job properties to inject at the top level, for potential extensibility beyond # container and pool. diff --git a/eng/common/core-templates/jobs/codeql-build.yml b/eng/common/core-templates/jobs/codeql-build.yml index f2144252cc65..693b00b37044 100644 --- a/eng/common/core-templates/jobs/codeql-build.yml +++ b/eng/common/core-templates/jobs/codeql-build.yml @@ -15,7 +15,6 @@ jobs: enablePublishBuildArtifacts: false enablePublishTestResults: false enablePublishBuildAssets: false - enablePublishUsingPipelines: false enableTelemetry: true variables: diff --git a/eng/common/core-templates/jobs/jobs.yml b/eng/common/core-templates/jobs/jobs.yml index ea69be4341c6..bf35b78faa60 100644 --- a/eng/common/core-templates/jobs/jobs.yml +++ b/eng/common/core-templates/jobs/jobs.yml @@ -5,9 +5,6 @@ parameters: # Optional: Include PublishBuildArtifacts task enablePublishBuildArtifacts: false - # Optional: Enable publishing using release pipelines - enablePublishUsingPipelines: false - # Optional: Enable running the source-build jobs to build repo from source enableSourceBuild: false @@ -30,6 +27,9 @@ parameters: # Optional: Publish the assets as soon as the publish to BAR stage is complete, rather doing so in a separate stage. publishAssetsImmediately: false + # Optional: 🌤️ or not the build has assets it wants to publish to BAR + isAssetlessBuild: false + # Optional: If using publishAssetsImmediately and additional parameters are needed, can be used to send along additional parameters (normally sent to post-build.yml) artifactsPublishingAdditionalParameters: '' signingValidationAdditionalParameters: '' @@ -96,7 +96,7 @@ jobs: ${{ parameter.key }}: ${{ parameter.value }} - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}: - - ${{ if or(eq(parameters.enablePublishBuildAssets, true), eq(parameters.artifacts.publish.manifests, 'true'), ne(parameters.artifacts.publish.manifests, '')) }}: + - ${{ if or(eq(parameters.enablePublishBuildAssets, true), eq(parameters.artifacts.publish.manifests, 'true'), ne(parameters.artifacts.publish.manifests, ''), eq(parameters.isAssetlessBuild, true)) }}: - template: ../job/publish-build-assets.yml parameters: is1ESPipeline: ${{ parameters.is1ESPipeline }} @@ -112,8 +112,8 @@ jobs: - Source_Build_Complete runAsPublic: ${{ parameters.runAsPublic }} - publishUsingPipelines: ${{ parameters.enablePublishUsingPipelines }} - publishAssetsImmediately: ${{ parameters.publishAssetsImmediately }} + publishAssetsImmediately: ${{ or(parameters.publishAssetsImmediately, parameters.isAssetlessBuild) }} + isAssetlessBuild: ${{ parameters.isAssetlessBuild }} enablePublishBuildArtifacts: ${{ parameters.enablePublishBuildArtifacts }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} signingValidationAdditionalParameters: ${{ parameters.signingValidationAdditionalParameters }} diff --git a/eng/common/core-templates/jobs/source-build.yml b/eng/common/core-templates/jobs/source-build.yml index a10ccfbee6de..df24c948ba12 100644 --- a/eng/common/core-templates/jobs/source-build.yml +++ b/eng/common/core-templates/jobs/source-build.yml @@ -14,7 +14,7 @@ parameters: # This is the default platform provided by Arcade, intended for use by a managed-only repo. defaultManagedPlatform: name: 'Managed' - container: 'mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream9' + container: 'mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream-10-amd64' # Defines the platforms on which to run build jobs. One job is created for each platform, and the # object in this array is sent to the job template as 'platform'. If no platforms are specified, diff --git a/eng/common/core-templates/post-build/post-build.yml b/eng/common/core-templates/post-build/post-build.yml index a8c0bd3b9214..a151fd811e3e 100644 --- a/eng/common/core-templates/post-build/post-build.yml +++ b/eng/common/core-templates/post-build/post-build.yml @@ -60,6 +60,11 @@ parameters: artifactNames: '' downloadArtifacts: true + - name: isAssetlessBuild + type: boolean + displayName: Is Assetless Build + default: false + # These parameters let the user customize the call to sdk-task.ps1 for publishing # symbols & general artifacts as well as for signing validation - name: symbolPublishingAdditionalParameters @@ -188,9 +193,6 @@ stages: buildId: $(AzDOBuildId) artifactName: PackageArtifacts checkDownloadedFiles: true - itemPattern: | - ** - !**/Microsoft.SourceBuild.Intermediate.*.nupkg # This is necessary whenever we want to publish/restore to an AzDO private feed # Since sdk-task.ps1 tries to restore packages we need to do this authentication here @@ -320,3 +322,4 @@ stages: -RequireDefaultChannels ${{ parameters.requireDefaultChannels }} -ArtifactsPublishingAdditionalParameters '${{ parameters.artifactsPublishingAdditionalParameters }}' -SymbolPublishingAdditionalParameters '${{ parameters.symbolPublishingAdditionalParameters }}' + -SkipAssetsPublishing '${{ parameters.isAssetlessBuild }}' diff --git a/eng/common/core-templates/steps/generate-sbom.yml b/eng/common/core-templates/steps/generate-sbom.yml index d938b60e1bb5..44a9636cdff9 100644 --- a/eng/common/core-templates/steps/generate-sbom.yml +++ b/eng/common/core-templates/steps/generate-sbom.yml @@ -5,7 +5,7 @@ # IgnoreDirectories - Directories to ignore for SBOM generation. This will be passed through to the CG component detector. parameters: - PackageVersion: 9.0.0 + PackageVersion: 10.0.0 BuildDropPath: '$(Build.SourcesDirectory)/artifacts' PackageName: '.NET' ManifestDirPath: $(Build.ArtifactStagingDirectory)/sbom @@ -38,7 +38,7 @@ steps: PackageName: ${{ parameters.packageName }} BuildDropPath: ${{ parameters.buildDropPath }} PackageVersion: ${{ parameters.packageVersion }} - ManifestDirPath: ${{ parameters.manifestDirPath }} + ManifestDirPath: ${{ parameters.manifestDirPath }}/$(ARTIFACT_NAME) ${{ if ne(parameters.IgnoreDirectories, '') }}: AdditionalComponentDetectorArgs: '--IgnoreDirectories ${{ parameters.IgnoreDirectories }}' diff --git a/eng/common/core-templates/steps/install-microbuild.yml b/eng/common/core-templates/steps/install-microbuild.yml index 2a6a529482b5..a3540ba00c7e 100644 --- a/eng/common/core-templates/steps/install-microbuild.yml +++ b/eng/common/core-templates/steps/install-microbuild.yml @@ -5,43 +5,19 @@ parameters: # Will be ignored if 'enableMicrobuild' is false or 'Agent.Os' is 'Windows_NT' enableMicrobuildForMacAndLinux: false # Location of the MicroBuild output folder - microBuildOutputFolder: '$(Agent.TempDirectory)' + microBuildOutputFolder: '$(Build.SourcesDirectory)' continueOnError: false steps: - ${{ if eq(parameters.enableMicrobuild, 'true') }}: - ${{ if eq(parameters.enableMicrobuildForMacAndLinux, 'true') }}: - # Install Python 3.12.x on when Python > 3.12.x is installed - https://github.com/dotnet/source-build/issues/4802 - - script: | - version=$(python3 --version | awk '{print $2}') - major=$(echo $version | cut -d. -f1) - minor=$(echo $version | cut -d. -f2) - - installPython=false - if [ "$major" -gt 3 ] || { [ "$major" -eq 3 ] && [ "$minor" -gt 12 ]; }; then - installPython=true - fi - - echo "Python version: $version." - echo "Install Python 3.12.x: $installPython." - echo "##vso[task.setvariable variable=installPython;isOutput=true]$installPython" - name: InstallPython - displayName: 'Determine Python installation' - condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) - - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.12.x' - displayName: 'Use Python 3.12.x' - condition: and(succeeded(), eq(variables['InstallPython.installPython'], 'true'), ne(variables['Agent.Os'], 'Windows_NT')) - # Needed to download the MicroBuild plugin nupkgs on Mac and Linux when nuget.exe is unavailable - task: UseDotNet@2 displayName: Install .NET 8.0 SDK for MicroBuild Plugin inputs: packageType: sdk version: 8.0.x - installationPath: ${{ parameters.microBuildOutputFolder }}/dotnet + installationPath: ${{ parameters.microBuildOutputFolder }}/.dotnet workingDirectory: ${{ parameters.microBuildOutputFolder }} condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) @@ -53,6 +29,7 @@ steps: feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json ${{ if and(eq(parameters.enableMicrobuildForMacAndLinux, 'true'), ne(variables['Agent.Os'], 'Windows_NT')) }}: azureSubscription: 'MicroBuild Signing Task (DevDiv)' + useEsrpCli: true env: TeamName: $(_TeamName) MicroBuildOutputFolderOverride: ${{ parameters.microBuildOutputFolder }} diff --git a/eng/common/core-templates/steps/source-build.yml b/eng/common/core-templates/steps/source-build.yml index f9ba1625c20f..0dde553c3ebf 100644 --- a/eng/common/core-templates/steps/source-build.yml +++ b/eng/common/core-templates/steps/source-build.yml @@ -19,25 +19,12 @@ steps: set -x df -h - # If file changes are detected, set CopyWipIntoInnerSourceBuildRepo to copy the WIP changes into the inner source build repo. - internalRestoreArgs= - if ! git diff --quiet; then - internalRestoreArgs='/p:CopyWipIntoInnerSourceBuildRepo=true' - # The 'Copy WIP' feature of source build uses git stash to apply changes from the original repo. - # This only works if there is a username/email configured, which won't be the case in most CI runs. - git config --get user.email - if [ $? -ne 0 ]; then - git config user.email dn-bot@microsoft.com - git config user.name dn-bot - fi - fi - # If building on the internal project, the internal storage variable may be available (usually only if needed) # In that case, add variables to allow the download of internal runtimes if the specified versions are not found # in the default public locations. internalRuntimeDownloadArgs= if [ '$(dotnetbuilds-internal-container-read-token-base64)' != '$''(dotnetbuilds-internal-container-read-token-base64)' ]; then - internalRuntimeDownloadArgs='/p:DotNetRuntimeSourceFeed=https://dotnetbuilds.blob.core.windows.net/internal /p:DotNetRuntimeSourceFeedKey=$(dotnetbuilds-internal-container-read-token-base64) --runtimesourcefeed https://dotnetbuilds.blob.core.windows.net/internal --runtimesourcefeedkey $(dotnetbuilds-internal-container-read-token-base64)' + internalRuntimeDownloadArgs='/p:DotNetRuntimeSourceFeed=https://ci.dot.net/internal /p:DotNetRuntimeSourceFeedKey=$(dotnetbuilds-internal-container-read-token-base64) --runtimesourcefeed https://ci.dot.net/internal --runtimesourcefeedkey $(dotnetbuilds-internal-container-read-token-base64)' fi buildConfig=Release @@ -46,34 +33,14 @@ steps: buildConfig='$(_BuildConfig)' fi - officialBuildArgs= - if [ '${{ and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}' = 'True' ]; then - officialBuildArgs='/p:DotNetPublishUsingPipelines=true /p:OfficialBuildId=$(BUILD.BUILDNUMBER)' - fi - targetRidArgs= if [ '${{ parameters.platform.targetRID }}' != '' ]; then targetRidArgs='/p:TargetRid=${{ parameters.platform.targetRID }}' fi - runtimeOsArgs= - if [ '${{ parameters.platform.runtimeOS }}' != '' ]; then - runtimeOsArgs='/p:RuntimeOS=${{ parameters.platform.runtimeOS }}' - fi - - baseOsArgs= - if [ '${{ parameters.platform.baseOS }}' != '' ]; then - baseOsArgs='/p:BaseOS=${{ parameters.platform.baseOS }}' - fi - - publishArgs= - if [ '${{ parameters.platform.skipPublishValidation }}' != 'true' ]; then - publishArgs='--publish' - fi - - assetManifestFileName=SourceBuild_RidSpecific.xml - if [ '${{ parameters.platform.name }}' != '' ]; then - assetManifestFileName=SourceBuild_${{ parameters.platform.name }}.xml + baseRidArgs= + if [ '${{ parameters.platform.baseRID }}' != '' ]; then + baseRidArgs='/p:BaseRid=${{ parameters.platform.baseRID }}' fi portableBuildArgs= @@ -83,51 +50,22 @@ steps: ${{ coalesce(parameters.platform.buildScript, './build.sh') }} --ci \ --configuration $buildConfig \ - --restore --build --pack $publishArgs -bl \ - $officialBuildArgs \ + --restore --build --pack -bl \ + --source-build \ + ${{ parameters.platform.buildArguments }} \ $internalRuntimeDownloadArgs \ - $internalRestoreArgs \ $targetRidArgs \ - $runtimeOsArgs \ - $baseOsArgs \ + $baseRidArgs \ $portableBuildArgs \ - /p:DotNetBuildSourceOnly=true \ - /p:DotNetBuildRepo=true \ - /p:AssetManifestFileName=$assetManifestFileName displayName: Build -# Upload build logs for diagnosis. -- task: CopyFiles@2 - displayName: Prepare BuildLogs staging directory - inputs: - SourceFolder: '$(Build.SourcesDirectory)' - Contents: | - **/*.log - **/*.binlog - artifacts/sb/prebuilt-report/** - TargetFolder: '$(Build.StagingDirectory)/BuildLogs' - CleanTargetFolder: true - continueOnError: true - condition: succeededOrFailed() - - template: /eng/common/core-templates/steps/publish-pipeline-artifacts.yml parameters: is1ESPipeline: ${{ parameters.is1ESPipeline }} args: displayName: Publish BuildLogs - targetPath: '$(Build.StagingDirectory)/BuildLogs' + targetPath: artifacts/log/${{ coalesce(variables._BuildConfig, 'Release') }} artifactName: BuildLogs_SourceBuild_${{ parameters.platform.name }}_Attempt$(System.JobAttempt) continueOnError: true condition: succeededOrFailed() sbomEnabled: false # we don't need SBOM for logs - -# Manually inject component detection so that we can ignore the source build upstream cache, which contains -# a nupkg cache of input packages (a local feed). -# This path must match the upstream cache path in property 'CurrentRepoSourceBuiltNupkgCacheDir' -# in src\Microsoft.DotNet.Arcade.Sdk\tools\SourceBuild\SourceBuildArcade.targets -- template: /eng/common/core-templates/steps/component-governance.yml - parameters: - displayName: Component Detection (Exclude upstream cache) - is1ESPipeline: ${{ parameters.is1ESPipeline }} - componentGovernanceIgnoreDirectories: '$(Build.SourcesDirectory)/artifacts/sb/src/artifacts/obj/source-built-upstream-cache' - disableComponentGovernance: ${{ eq(variables['System.TeamProject'], 'public') }} diff --git a/eng/common/core-templates/steps/source-index-stage1-publish.yml b/eng/common/core-templates/steps/source-index-stage1-publish.yml index 473a22c4719d..99c2326fc190 100644 --- a/eng/common/core-templates/steps/source-index-stage1-publish.yml +++ b/eng/common/core-templates/steps/source-index-stage1-publish.yml @@ -1,6 +1,6 @@ parameters: - sourceIndexUploadPackageVersion: 2.0.0-20240522.1 - sourceIndexProcessBinlogPackageVersion: 1.0.1-20240522.1 + sourceIndexUploadPackageVersion: 2.0.0-20250425.2 + sourceIndexProcessBinlogPackageVersion: 1.0.1-20250425.2 sourceIndexPackageSource: https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json binlogPath: artifacts/log/Debug/Build.binlog diff --git a/eng/common/cross/arm64/tizen/tizen.patch b/eng/common/cross/arm64/tizen/tizen.patch index af7c8be05906..2cebc547382e 100644 --- a/eng/common/cross/arm64/tizen/tizen.patch +++ b/eng/common/cross/arm64/tizen/tizen.patch @@ -5,5 +5,5 @@ diff -u -r a/usr/lib/libc.so b/usr/lib/libc.so Use the shared library, but some functions are only in the static library, so try that secondarily. */ OUTPUT_FORMAT(elf64-littleaarch64) --GROUP ( /lib64/libc.so.6 /usr/lib64/libc_nonshared.a AS_NEEDED ( /lib/ld-linux-aarch64.so.1 ) ) +-GROUP ( /lib64/libc.so.6 /usr/lib64/libc_nonshared.a AS_NEEDED ( /lib64/ld-linux-aarch64.so.1 ) ) +GROUP ( libc.so.6 libc_nonshared.a AS_NEEDED ( ld-linux-aarch64.so.1 ) ) diff --git a/eng/common/cross/armel/armel.jessie.patch b/eng/common/cross/armel/armel.jessie.patch deleted file mode 100644 index 2d2615619351..000000000000 --- a/eng/common/cross/armel/armel.jessie.patch +++ /dev/null @@ -1,43 +0,0 @@ -diff -u -r a/usr/include/urcu/uatomic/generic.h b/usr/include/urcu/uatomic/generic.h ---- a/usr/include/urcu/uatomic/generic.h 2014-10-22 15:00:58.000000000 -0700 -+++ b/usr/include/urcu/uatomic/generic.h 2020-10-30 21:38:28.550000000 -0700 -@@ -69,10 +69,10 @@ - #endif - #ifdef UATOMIC_HAS_ATOMIC_SHORT - case 2: -- return __sync_val_compare_and_swap_2(addr, old, _new); -+ return __sync_val_compare_and_swap_2((uint16_t*) addr, old, _new); - #endif - case 4: -- return __sync_val_compare_and_swap_4(addr, old, _new); -+ return __sync_val_compare_and_swap_4((uint32_t*) addr, old, _new); - #if (CAA_BITS_PER_LONG == 64) - case 8: - return __sync_val_compare_and_swap_8(addr, old, _new); -@@ -109,7 +109,7 @@ - return; - #endif - case 4: -- __sync_and_and_fetch_4(addr, val); -+ __sync_and_and_fetch_4((uint32_t*) addr, val); - return; - #if (CAA_BITS_PER_LONG == 64) - case 8: -@@ -148,7 +148,7 @@ - return; - #endif - case 4: -- __sync_or_and_fetch_4(addr, val); -+ __sync_or_and_fetch_4((uint32_t*) addr, val); - return; - #if (CAA_BITS_PER_LONG == 64) - case 8: -@@ -187,7 +187,7 @@ - return __sync_add_and_fetch_2(addr, val); - #endif - case 4: -- return __sync_add_and_fetch_4(addr, val); -+ return __sync_add_and_fetch_4((uint32_t*) addr, val); - #if (CAA_BITS_PER_LONG == 64) - case 8: - return __sync_add_and_fetch_8(addr, val); diff --git a/eng/common/cross/build-rootfs.sh b/eng/common/cross/build-rootfs.sh index 74f399716ba8..d6f005b5dabe 100755 --- a/eng/common/cross/build-rootfs.sh +++ b/eng/common/cross/build-rootfs.sh @@ -164,9 +164,13 @@ while :; do armel) __BuildArch=armel __UbuntuArch=armel - __UbuntuRepo="http://ftp.debian.org/debian/" - __CodeName=jessie + __UbuntuRepo="http://archive.debian.org/debian/" + __CodeName=buster __KeyringFile="/usr/share/keyrings/debian-archive-keyring.gpg" + __LLDB_Package="liblldb-6.0-dev" + __UbuntuPackages="${__UbuntuPackages// libomp-dev/}" + __UbuntuPackages="${__UbuntuPackages// libomp5/}" + __UbuntuSuites= ;; armv6) __BuildArch=armv6 @@ -278,46 +282,23 @@ while :; do ;; xenial) # Ubuntu 16.04 - if [[ "$__CodeName" != "jessie" ]]; then - __CodeName=xenial - fi - ;; - zesty) # Ubuntu 17.04 - if [[ "$__CodeName" != "jessie" ]]; then - __CodeName=zesty - fi + __CodeName=xenial ;; bionic) # Ubuntu 18.04 - if [[ "$__CodeName" != "jessie" ]]; then - __CodeName=bionic - fi + __CodeName=bionic ;; focal) # Ubuntu 20.04 - if [[ "$__CodeName" != "jessie" ]]; then - __CodeName=focal - fi + __CodeName=focal ;; jammy) # Ubuntu 22.04 - if [[ "$__CodeName" != "jessie" ]]; then - __CodeName=jammy - fi + __CodeName=jammy ;; noble) # Ubuntu 24.04 - if [[ "$__CodeName" != "jessie" ]]; then - __CodeName=noble - fi + __CodeName=noble if [[ -n "$__LLDB_Package" ]]; then __LLDB_Package="liblldb-18-dev" fi ;; - jessie) # Debian 8 - __CodeName=jessie - __KeyringFile="/usr/share/keyrings/debian-archive-keyring.gpg" - - if [[ -z "$__UbuntuRepo" ]]; then - __UbuntuRepo="http://ftp.debian.org/debian/" - fi - ;; stretch) # Debian 9 __CodeName=stretch __LLDB_Package="liblldb-6.0-dev" @@ -333,7 +314,7 @@ while :; do __KeyringFile="/usr/share/keyrings/debian-archive-keyring.gpg" if [[ -z "$__UbuntuRepo" ]]; then - __UbuntuRepo="http://ftp.debian.org/debian/" + __UbuntuRepo="http://archive.debian.org/debian/" fi ;; bullseye) # Debian 11 @@ -473,10 +454,6 @@ if [[ "$__AlpineVersion" =~ 3\.1[345] ]]; then __AlpinePackages="${__AlpinePackages/compiler-rt/compiler-rt-static}" fi -if [[ "$__BuildArch" == "armel" ]]; then - __LLDB_Package="lldb-3.5-dev" -fi - __UbuntuPackages+=" ${__LLDB_Package:-}" if [[ -z "$__UbuntuRepo" ]]; then @@ -850,12 +827,6 @@ EOF if [[ "$__SkipUnmount" == "0" ]]; then umount "$__RootfsDir"/* || true fi - - if [[ "$__BuildArch" == "armel" && "$__CodeName" == "jessie" ]]; then - pushd "$__RootfsDir" - patch -p1 < "$__CrossDir/$__BuildArch/armel.jessie.patch" - popd - fi elif [[ "$__Tizen" == "tizen" ]]; then ROOTFS_DIR="$__RootfsDir" "$__CrossDir/tizen-build-rootfs.sh" "$__BuildArch" else diff --git a/eng/common/cross/install-debs.py b/eng/common/cross/install-debs.py old mode 100644 new mode 100755 diff --git a/eng/common/cross/tizen-fetch.sh b/eng/common/cross/tizen-fetch.sh index 28936ceef3a7..37c3a61f1de8 100755 --- a/eng/common/cross/tizen-fetch.sh +++ b/eng/common/cross/tizen-fetch.sh @@ -156,13 +156,8 @@ fetch_tizen_pkgs() done } -if [ "$TIZEN_ARCH" == "riscv64" ]; then - BASE="Tizen-Base-RISCV" - UNIFIED="Tizen-Unified-RISCV" -else - BASE="Tizen-Base" - UNIFIED="Tizen-Unified" -fi +BASE="Tizen-Base" +UNIFIED="Tizen-Unified" Inform "Initialize ${TIZEN_ARCH} base" fetch_tizen_pkgs_init standard $BASE diff --git a/eng/common/darc-init.sh b/eng/common/darc-init.sh index 36dbd45e1ce8..e889f439b8dc 100755 --- a/eng/common/darc-init.sh +++ b/eng/common/darc-init.sh @@ -68,7 +68,7 @@ function InstallDarcCli { fi fi - local arcadeServicesSource="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" + local arcadeServicesSource="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json" echo "Installing Darc CLI version $darcVersion..." echo "You may need to restart your command shell if this is the first dotnet tool you have installed." diff --git a/eng/common/generate-sbom-prep.ps1 b/eng/common/generate-sbom-prep.ps1 index 3e5c1c74a1c5..a0c7d792a76f 100644 --- a/eng/common/generate-sbom-prep.ps1 +++ b/eng/common/generate-sbom-prep.ps1 @@ -4,18 +4,26 @@ Param( . $PSScriptRoot\pipeline-logging-functions.ps1 +# Normally - we'd listen to the manifest path given, but 1ES templates will overwrite if this level gets uploaded directly +# with their own overwriting ours. So we create it as a sub directory of the requested manifest path. +$ArtifactName = "${env:SYSTEM_STAGENAME}_${env:AGENT_JOBNAME}_SBOM" +$SafeArtifactName = $ArtifactName -replace '["/:<>\\|?@*"() ]', '_' +$SbomGenerationDir = Join-Path $ManifestDirPath $SafeArtifactName + +Write-Host "Artifact name before : $ArtifactName" +Write-Host "Artifact name after : $SafeArtifactName" + Write-Host "Creating dir $ManifestDirPath" + # create directory for sbom manifest to be placed -if (!(Test-Path -path $ManifestDirPath)) +if (!(Test-Path -path $SbomGenerationDir)) { - New-Item -ItemType Directory -path $ManifestDirPath - Write-Host "Successfully created directory $ManifestDirPath" + New-Item -ItemType Directory -path $SbomGenerationDir + Write-Host "Successfully created directory $SbomGenerationDir" } else{ Write-PipelineTelemetryError -category 'Build' "Unable to create sbom folder." } Write-Host "Updating artifact name" -$artifact_name = "${env:SYSTEM_STAGENAME}_${env:AGENT_JOBNAME}_SBOM" -replace '["/:<>\\|?@*"() ]', '_' -Write-Host "Artifact name $artifact_name" -Write-Host "##vso[task.setvariable variable=ARTIFACT_NAME]$artifact_name" +Write-Host "##vso[task.setvariable variable=ARTIFACT_NAME]$SafeArtifactName" diff --git a/eng/common/generate-sbom-prep.sh b/eng/common/generate-sbom-prep.sh old mode 100644 new mode 100755 index d5c76dc827b4..b8ecca72bbf5 --- a/eng/common/generate-sbom-prep.sh +++ b/eng/common/generate-sbom-prep.sh @@ -14,19 +14,24 @@ done scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" . $scriptroot/pipeline-logging-functions.sh + +# replace all special characters with _, some builds use special characters like : in Agent.Jobname, that is not a permissible name while uploading artifacts. +artifact_name=$SYSTEM_STAGENAME"_"$AGENT_JOBNAME"_SBOM" +safe_artifact_name="${artifact_name//["/:<>\\|?@*$" ]/_}" manifest_dir=$1 -if [ ! -d "$manifest_dir" ] ; then - mkdir -p "$manifest_dir" - echo "Sbom directory created." $manifest_dir +# Normally - we'd listen to the manifest path given, but 1ES templates will overwrite if this level gets uploaded directly +# with their own overwriting ours. So we create it as a sub directory of the requested manifest path. +sbom_generation_dir="$manifest_dir/$safe_artifact_name" + +if [ ! -d "$sbom_generation_dir" ] ; then + mkdir -p "$sbom_generation_dir" + echo "Sbom directory created." $sbom_generation_dir else Write-PipelineTelemetryError -category 'Build' "Unable to create sbom folder." fi -artifact_name=$SYSTEM_STAGENAME"_"$AGENT_JOBNAME"_SBOM" echo "Artifact name before : "$artifact_name -# replace all special characters with _, some builds use special characters like : in Agent.Jobname, that is not a permissible name while uploading artifacts. -safe_artifact_name="${artifact_name//["/:<>\\|?@*$" ]/_}" echo "Artifact name after : "$safe_artifact_name export ARTIFACT_NAME=$safe_artifact_name echo "##vso[task.setvariable variable=ARTIFACT_NAME]$safe_artifact_name" diff --git a/eng/common/native/install-dependencies.sh b/eng/common/native/install-dependencies.sh index bc19ea2a0d35..477a44f335be 100755 --- a/eng/common/native/install-dependencies.sh +++ b/eng/common/native/install-dependencies.sh @@ -27,8 +27,9 @@ case "$os" in libssl-dev libkrb5-dev pigz cpio localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 - elif [ "$ID" = "fedora" ] || [ "$ID" = "rhel" ]; then - dnf install -y cmake llvm lld lldb clang python curl libicu-devel openssl-devel krb5-devel lttng-ust-devel pigz cpio + elif [ "$ID" = "fedora" ] || [ "$ID" = "rhel" ] || [ "$ID" = "azurelinux" ]; then + pkg_mgr="$(command -v tdnf 2>/dev/null || command -v dnf)" + $pkg_mgr install -y cmake llvm lld lldb clang python curl libicu-devel openssl-devel krb5-devel lttng-ust-devel pigz cpio elif [ "$ID" = "alpine" ]; then apk add build-base cmake bash curl clang llvm-dev lld lldb krb5-dev lttng-ust-dev icu-dev openssl-dev pigz cpio else diff --git a/eng/common/post-build/publish-using-darc.ps1 b/eng/common/post-build/publish-using-darc.ps1 index a261517ef906..1eda208a3bbf 100644 --- a/eng/common/post-build/publish-using-darc.ps1 +++ b/eng/common/post-build/publish-using-darc.ps1 @@ -6,7 +6,8 @@ param( [Parameter(Mandatory=$true)][string] $WaitPublishingFinish, [Parameter(Mandatory=$false)][string] $ArtifactsPublishingAdditionalParameters, [Parameter(Mandatory=$false)][string] $SymbolPublishingAdditionalParameters, - [Parameter(Mandatory=$false)][string] $RequireDefaultChannels + [Parameter(Mandatory=$false)][string] $RequireDefaultChannels, + [Parameter(Mandatory=$false)][string] $SkipAssetsPublishing ) try { @@ -39,6 +40,10 @@ try { $optionalParams.Add("--default-channels-required") | Out-Null } + if ("true" -eq $SkipAssetsPublishing) { + $optionalParams.Add("--skip-assets-publishing") | Out-Null + } + & $darc add-build-to-channel ` --id $buildId ` --publishing-infra-version $PublishingInfraVersion ` diff --git a/eng/common/sdk-task.ps1 b/eng/common/sdk-task.ps1 index 4f0546dce120..a9d2a2d26996 100644 --- a/eng/common/sdk-task.ps1 +++ b/eng/common/sdk-task.ps1 @@ -6,12 +6,13 @@ Param( [string] $msbuildEngine = $null, [switch] $restore, [switch] $prepareMachine, + [switch][Alias('nobl')]$excludeCIBinaryLog, [switch] $help, [Parameter(ValueFromRemainingArguments=$true)][String[]]$properties ) $ci = $true -$binaryLog = $true +$binaryLog = if ($excludeCIBinaryLog) { $false } else { $true } $warnAsError = $true . $PSScriptRoot\tools.ps1 @@ -27,6 +28,7 @@ function Print-Usage() { Write-Host "Advanced settings:" Write-Host " -prepareMachine Prepare machine for CI run" Write-Host " -msbuildEngine Msbuild engine to use to run build ('dotnet', 'vs', or unspecified)." + Write-Host " -excludeCIBinaryLog When running on CI, allow no binary log (short: -nobl)" Write-Host "" Write-Host "Command line arguments not listed above are passed thru to msbuild." } @@ -34,10 +36,11 @@ function Print-Usage() { function Build([string]$target) { $logSuffix = if ($target -eq 'Execute') { '' } else { ".$target" } $log = Join-Path $LogDir "$task$logSuffix.binlog" + $binaryLogArg = if ($binaryLog) { "/bl:$log" } else { "" } $outputPath = Join-Path $ToolsetDir "$task\" MSBuild $taskProject ` - /bl:$log ` + $binaryLogArg ` /t:$target ` /p:Configuration=$configuration ` /p:RepoRoot=$RepoRoot ` @@ -64,7 +67,7 @@ try { $GlobalJson.tools | Add-Member -Name "vs" -Value (ConvertFrom-Json "{ `"version`": `"16.5`" }") -MemberType NoteProperty } if( -not ($GlobalJson.tools.PSObject.Properties.Name -match "xcopy-msbuild" )) { - $GlobalJson.tools | Add-Member -Name "xcopy-msbuild" -Value "17.12.0" -MemberType NoteProperty + $GlobalJson.tools | Add-Member -Name "xcopy-msbuild" -Value "17.13.0" -MemberType NoteProperty } if ($GlobalJson.tools."xcopy-msbuild".Trim() -ine "none") { $xcopyMSBuildToolsFolder = InitializeXCopyMSBuild $GlobalJson.tools."xcopy-msbuild" -install $true diff --git a/eng/common/sdk-task.sh b/eng/common/sdk-task.sh old mode 100644 new mode 100755 index b9b9e58db9ad..2f83adc0269f --- a/eng/common/sdk-task.sh +++ b/eng/common/sdk-task.sh @@ -7,6 +7,10 @@ show_usage() { echo " --verbosity Msbuild verbosity: q[uiet], m[inimal], n[ormal], d[etailed], and diag[nostic]" echo " --help Print help and exit" echo "" + + echo "Advanced settings:" + echo " --excludeCIBinarylog Don't output binary log (short: -nobl)" + echo "" echo "Command line arguments not listed above are passed thru to msbuild." } @@ -27,10 +31,12 @@ Build() { local log_suffix="" [[ "$target" != "Execute" ]] && log_suffix=".$target" local log="$log_dir/$task$log_suffix.binlog" + local binaryLogArg="" + [[ $binary_log == true ]] && binaryLogArg="/bl:$log" local output_path="$toolset_dir/$task/" MSBuild "$taskProject" \ - /bl:"$log" \ + $binaryLogArg \ /t:"$target" \ /p:Configuration="$configuration" \ /p:RepoRoot="$repo_root" \ @@ -39,8 +45,10 @@ Build() { $properties } +binary_log=true configuration="Debug" verbosity="minimal" +exclude_ci_binary_log=false restore=false help=false properties='' @@ -60,6 +68,11 @@ while (($# > 0)); do verbosity=$2 shift 2 ;; + --excludecibinarylog|--nobl) + binary_log=false + exclude_ci_binary_log=true + shift 1 + ;; --help) help=true shift 1 @@ -72,7 +85,6 @@ while (($# > 0)); do done ci=true -binaryLog=true warnAsError=true if $help; then diff --git a/eng/common/sdl/packages.config b/eng/common/sdl/packages.config index 4585cfd6bba1..e5f543ea68c2 100644 --- a/eng/common/sdl/packages.config +++ b/eng/common/sdl/packages.config @@ -1,4 +1,4 @@ - + diff --git a/eng/common/templates-official/job/job.yml b/eng/common/templates-official/job/job.yml index 605692d2fb77..a8a943287458 100644 --- a/eng/common/templates-official/job/job.yml +++ b/eng/common/templates-official/job/job.yml @@ -16,6 +16,7 @@ jobs: parameters: PackageVersion: ${{ parameters.packageVersion }} BuildDropPath: ${{ parameters.buildDropPath }} + ManifestDirPath: $(Build.ArtifactStagingDirectory)/sbom publishArtifacts: false # publish artifacts @@ -30,6 +31,7 @@ jobs: PathtoPublish: '$(Build.ArtifactStagingDirectory)/artifacts' ArtifactName: ${{ coalesce(parameters.artifacts.publish.artifacts.name , 'Artifacts_$(Agent.Os)_$(_BuildConfig)') }} condition: always() + retryCountOnTaskFailure: 10 # for any logs being locked continueOnError: true - ${{ if and(ne(parameters.artifacts.publish.logs, 'false'), ne(parameters.artifacts.publish.logs, '')) }}: - output: pipelineArtifact @@ -38,6 +40,7 @@ jobs: displayName: 'Publish logs' continueOnError: true condition: always() + retryCountOnTaskFailure: 10 # for any logs being locked sbomEnabled: false # we don't need SBOM for logs - ${{ if eq(parameters.enablePublishBuildArtifacts, true) }}: @@ -45,7 +48,7 @@ jobs: displayName: Publish Logs PathtoPublish: '$(Build.ArtifactStagingDirectory)/artifacts/log/$(_BuildConfig)' publishLocation: Container - ArtifactName: ${{ coalesce(parameters.enablePublishBuildArtifacts.artifactName, '$(Agent.Os)_$(Agent.JobName)' ) }} + ArtifactName: ${{ coalesce(parameters.enablePublishBuildArtifacts.artifactName, '$(Agent.Os)_$(Agent.JobName)_Attempt$(System.JobAttempt)' ) }} continueOnError: true condition: always() sbomEnabled: false # we don't need SBOM for logs diff --git a/eng/common/templates-official/steps/publish-build-artifacts.yml b/eng/common/templates-official/steps/publish-build-artifacts.yml index 100a3fc98493..fcf6637b2ebc 100644 --- a/eng/common/templates-official/steps/publish-build-artifacts.yml +++ b/eng/common/templates-official/steps/publish-build-artifacts.yml @@ -24,6 +24,10 @@ parameters: - name: is1ESPipeline type: boolean default: true + +- name: retryCountOnTaskFailure + type: string + default: 10 steps: - ${{ if ne(parameters.is1ESPipeline, true) }}: @@ -38,4 +42,5 @@ steps: PathtoPublish: ${{ parameters.pathToPublish }} ${{ if parameters.artifactName }}: ArtifactName: ${{ parameters.artifactName }} - + ${{ if parameters.retryCountOnTaskFailure }}: + retryCountOnTaskFailure: ${{ parameters.retryCountOnTaskFailure }} diff --git a/eng/common/templates/job/job.yml b/eng/common/templates/job/job.yml index d1aeb92fcea5..7cbf668c22bc 100644 --- a/eng/common/templates/job/job.yml +++ b/eng/common/templates/job/job.yml @@ -46,6 +46,7 @@ jobs: artifactName: ${{ coalesce(parameters.artifacts.publish.artifacts.name , 'Artifacts_$(Agent.Os)_$(_BuildConfig)') }} continueOnError: true condition: always() + retryCountOnTaskFailure: 10 # for any logs being locked - ${{ if and(ne(parameters.artifacts.publish.logs, 'false'), ne(parameters.artifacts.publish.logs, '')) }}: - template: /eng/common/core-templates/steps/publish-pipeline-artifacts.yml parameters: @@ -56,6 +57,7 @@ jobs: displayName: 'Publish logs' continueOnError: true condition: always() + retryCountOnTaskFailure: 10 # for any logs being locked sbomEnabled: false # we don't need SBOM for logs - ${{ if ne(parameters.enablePublishBuildArtifacts, 'false') }}: @@ -66,7 +68,7 @@ jobs: displayName: Publish Logs pathToPublish: '$(Build.ArtifactStagingDirectory)/artifacts/log/$(_BuildConfig)' publishLocation: Container - artifactName: ${{ coalesce(parameters.enablePublishBuildArtifacts.artifactName, '$(Agent.Os)_$(Agent.JobName)' ) }} + artifactName: ${{ coalesce(parameters.enablePublishBuildArtifacts.artifactName, '$(Agent.Os)_$(Agent.JobName)_Attempt$(System.JobAttempt)' ) }} continueOnError: true condition: always() diff --git a/eng/common/templates/steps/publish-build-artifacts.yml b/eng/common/templates/steps/publish-build-artifacts.yml index 6428a98dfef6..605e602e94d1 100644 --- a/eng/common/templates/steps/publish-build-artifacts.yml +++ b/eng/common/templates/steps/publish-build-artifacts.yml @@ -25,6 +25,10 @@ parameters: type: string default: 'Container' +- name: retryCountOnTaskFailure + type: string + default: 10 + steps: - ${{ if eq(parameters.is1ESPipeline, true) }}: - 'eng/common/templates cannot be referenced from a 1ES managed template': error @@ -37,4 +41,6 @@ steps: PublishLocation: ${{ parameters.publishLocation }} PathtoPublish: ${{ parameters.pathToPublish }} ${{ if parameters.artifactName }}: - ArtifactName: ${{ parameters.artifactName }} \ No newline at end of file + ArtifactName: ${{ parameters.artifactName }} + ${{ if parameters.retryCountOnTaskFailure }}: + retryCountOnTaskFailure: ${{ parameters.retryCountOnTaskFailure }} diff --git a/eng/common/templates/steps/vmr-sync.yml b/eng/common/templates/steps/vmr-sync.yml new file mode 100644 index 000000000000..599afb6186b8 --- /dev/null +++ b/eng/common/templates/steps/vmr-sync.yml @@ -0,0 +1,207 @@ +### These steps synchronize new code from product repositories into the VMR (https://github.com/dotnet/dotnet). +### They initialize the darc CLI and pull the new updates. +### Changes are applied locally onto the already cloned VMR (located in $vmrPath). + +parameters: +- name: targetRef + displayName: Target revision in dotnet/ to synchronize + type: string + default: $(Build.SourceVersion) + +- name: vmrPath + displayName: Path where the dotnet/dotnet is checked out to + type: string + default: $(Agent.BuildDirectory)/vmr + +- name: additionalSyncs + displayName: Optional list of package names whose repo's source will also be synchronized in the local VMR, e.g. NuGet.Protocol + type: object + default: [] + +steps: +- checkout: vmr + displayName: Clone dotnet/dotnet + path: vmr + clean: true + +- checkout: self + displayName: Clone $(Build.Repository.Name) + path: repo + fetchDepth: 0 + +# This step is needed so that when we get a detached HEAD / shallow clone, +# we still pull the commit into the temporary repo clone to use it during the sync. +# Also unshallow the clone so that forwardflow command would work. +- script: | + git branch repo-head + git rev-parse HEAD + displayName: Label PR commit + workingDirectory: $(Agent.BuildDirectory)/repo + +- script: | + vmr_sha=$(grep -oP '(?<=Sha=")[^"]*' $(Agent.BuildDirectory)/repo/eng/Version.Details.xml) + echo "##vso[task.setvariable variable=vmr_sha]$vmr_sha" + displayName: Obtain the vmr sha from Version.Details.xml (Unix) + condition: ne(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + +- powershell: | + [xml]$xml = Get-Content -Path $(Agent.BuildDirectory)/repo/eng/Version.Details.xml + $vmr_sha = $xml.SelectSingleNode("//Source").Sha + Write-Output "##vso[task.setvariable variable=vmr_sha]$vmr_sha" + displayName: Obtain the vmr sha from Version.Details.xml (Windows) + condition: eq(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + +- script: | + git fetch --all + git checkout $(vmr_sha) + displayName: Checkout VMR at correct sha for repo flow + workingDirectory: ${{ parameters.vmrPath }} + +- script: | + git config --global user.name "dotnet-maestro[bot]" + git config --global user.email "dotnet-maestro[bot]@users.noreply.github.com" + displayName: Set git author to dotnet-maestro[bot] + workingDirectory: ${{ parameters.vmrPath }} + +- script: | + ./eng/common/vmr-sync.sh \ + --vmr ${{ parameters.vmrPath }} \ + --tmp $(Agent.TempDirectory) \ + --azdev-pat '$(dn-bot-all-orgs-code-r)' \ + --ci \ + --debug + + if [ "$?" -ne 0 ]; then + echo "##vso[task.logissue type=error]Failed to synchronize the VMR" + exit 1 + fi + displayName: Sync repo into VMR (Unix) + condition: ne(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + +- script: | + git config --global diff.astextplain.textconv echo + git config --system core.longpaths true + displayName: Configure Windows git (longpaths, astextplain) + condition: eq(variables['Agent.OS'], 'Windows_NT') + +- powershell: | + ./eng/common/vmr-sync.ps1 ` + -vmr ${{ parameters.vmrPath }} ` + -tmp $(Agent.TempDirectory) ` + -azdevPat '$(dn-bot-all-orgs-code-r)' ` + -ci ` + -debugOutput + + if ($LASTEXITCODE -ne 0) { + echo "##vso[task.logissue type=error]Failed to synchronize the VMR" + exit 1 + } + displayName: Sync repo into VMR (Windows) + condition: eq(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + +- ${{ if eq(variables['Build.Reason'], 'PullRequest') }}: + - task: CopyFiles@2 + displayName: Collect failed patches + condition: failed() + inputs: + SourceFolder: '$(Agent.TempDirectory)' + Contents: '*.patch' + TargetFolder: '$(Build.ArtifactStagingDirectory)/FailedPatches' + + - publish: '$(Build.ArtifactStagingDirectory)/FailedPatches' + artifact: $(System.JobDisplayName)_FailedPatches + displayName: Upload failed patches + condition: failed() + +- ${{ each assetName in parameters.additionalSyncs }}: + # The vmr-sync script ends up staging files in the local VMR so we have to commit those + - script: + git commit --allow-empty -am "Forward-flow $(Build.Repository.Name)" + displayName: Commit local VMR changes + workingDirectory: ${{ parameters.vmrPath }} + + - script: | + set -ex + + echo "Searching for details of asset ${{ assetName }}..." + + # Use darc to get dependencies information + dependencies=$(./.dotnet/dotnet darc get-dependencies --name '${{ assetName }}' --ci) + + # Extract repository URL and commit hash + repository=$(echo "$dependencies" | grep 'Repo:' | sed 's/Repo:[[:space:]]*//' | head -1) + + if [ -z "$repository" ]; then + echo "##vso[task.logissue type=error]Asset ${{ assetName }} not found in the dependency list" + exit 1 + fi + + commit=$(echo "$dependencies" | grep 'Commit:' | sed 's/Commit:[[:space:]]*//' | head -1) + + echo "Updating the VMR from $repository / $commit..." + cd .. + git clone $repository ${{ assetName }} + cd ${{ assetName }} + git checkout $commit + git branch "sync/$commit" + + ./eng/common/vmr-sync.sh \ + --vmr ${{ parameters.vmrPath }} \ + --tmp $(Agent.TempDirectory) \ + --azdev-pat '$(dn-bot-all-orgs-code-r)' \ + --ci \ + --debug + + if [ "$?" -ne 0 ]; then + echo "##vso[task.logissue type=error]Failed to synchronize the VMR" + exit 1 + fi + displayName: Sync ${{ assetName }} into (Unix) + condition: ne(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + + - powershell: | + $ErrorActionPreference = 'Stop' + + Write-Host "Searching for details of asset ${{ assetName }}..." + + $dependencies = .\.dotnet\dotnet darc get-dependencies --name '${{ assetName }}' --ci + + $repository = $dependencies | Select-String -Pattern 'Repo:\s+([^\s]+)' | Select-Object -First 1 + $repository -match 'Repo:\s+([^\s]+)' | Out-Null + $repository = $matches[1] + + if ($repository -eq $null) { + Write-Error "Asset ${{ assetName }} not found in the dependency list" + exit 1 + } + + $commit = $dependencies | Select-String -Pattern 'Commit:\s+([^\s]+)' | Select-Object -First 1 + $commit -match 'Commit:\s+([^\s]+)' | Out-Null + $commit = $matches[1] + + Write-Host "Updating the VMR from $repository / $commit..." + cd .. + git clone $repository ${{ assetName }} + cd ${{ assetName }} + git checkout $commit + git branch "sync/$commit" + + .\eng\common\vmr-sync.ps1 ` + -vmr ${{ parameters.vmrPath }} ` + -tmp $(Agent.TempDirectory) ` + -azdevPat '$(dn-bot-all-orgs-code-r)' ` + -ci ` + -debugOutput + + if ($LASTEXITCODE -ne 0) { + echo "##vso[task.logissue type=error]Failed to synchronize the VMR" + exit 1 + } + displayName: Sync ${{ assetName }} into (Windows) + condition: ne(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo diff --git a/eng/common/templates/vmr-build-pr.yml b/eng/common/templates/vmr-build-pr.yml new file mode 100644 index 000000000000..670cf32c3bd1 --- /dev/null +++ b/eng/common/templates/vmr-build-pr.yml @@ -0,0 +1,33 @@ +trigger: none +pr: + branches: + include: + - main + - release/* + paths: + exclude: + - documentation/* + - README.md + - CODEOWNERS + +variables: +- template: /eng/common/templates/variables/pool-providers.yml@self + +- name: skipComponentGovernanceDetection # we run CG on internal builds only + value: true + +- name: Codeql.Enabled # we run CodeQL on internal builds only + value: false + +resources: + repositories: + - repository: vmr + type: github + name: dotnet/dotnet + endpoint: dotnet + +stages: +- template: /eng/pipelines/templates/stages/vmr-build.yml@vmr + parameters: + isBuiltFromVmr: false + scope: lite diff --git a/eng/common/tools.ps1 b/eng/common/tools.ps1 index 80f9130b1508..c9e39595b586 100644 --- a/eng/common/tools.ps1 +++ b/eng/common/tools.ps1 @@ -65,10 +65,8 @@ $ErrorActionPreference = 'Stop' # Base-64 encoded SAS token that has permission to storage container described by $runtimeSourceFeed [string]$runtimeSourceFeedKey = if (Test-Path variable:runtimeSourceFeedKey) { $runtimeSourceFeedKey } else { $null } -# True if the build is a product build -[bool]$productBuild = if (Test-Path variable:productBuild) { $productBuild } else { $false } - -[String[]]$properties = if (Test-Path variable:properties) { $properties } else { @() } +# True when the build is running within the VMR. +[bool]$fromVMR = if (Test-Path variable:fromVMR) { $fromVMR } else { $false } function Create-Directory ([string[]] $path) { New-Item -Path $path -Force -ItemType 'Directory' | Out-Null @@ -262,7 +260,7 @@ function GetDotNetInstallScript([string] $dotnetRoot) { if (!(Test-Path $installScript)) { Create-Directory $dotnetRoot $ProgressPreference = 'SilentlyContinue' # Don't display the console progress UI - it's a huge perf hit - $uri = "https://builds.dotnet.microsoft.com/dotnet/scripts/v1/dotnet-install.ps1" + $uri = "https://builds.dotnet.microsoft.com/dotnet/scripts/$dotnetInstallScriptVersion/dotnet-install.ps1" Retry({ Write-Host "GET $uri" @@ -383,8 +381,8 @@ function InitializeVisualStudioMSBuild([bool]$install, [object]$vsRequirements = # If the version of msbuild is going to be xcopied, # use this version. Version matches a package here: - # https://dev.azure.com/dnceng/public/_artifacts/feed/dotnet-eng/NuGet/Microsoft.DotNet.Arcade.MSBuild.Xcopy/versions/17.12.0 - $defaultXCopyMSBuildVersion = '17.12.0' + # https://dev.azure.com/dnceng/public/_artifacts/feed/dotnet-eng/NuGet/Microsoft.DotNet.Arcade.MSBuild.Xcopy/versions/17.13.0 + $defaultXCopyMSBuildVersion = '17.13.0' if (!$vsRequirements) { if (Get-Member -InputObject $GlobalJson.tools -Name 'vs') { @@ -646,7 +644,6 @@ function GetNuGetPackageCachePath() { $env:NUGET_PACKAGES = Join-Path $env:UserProfile '.nuget\packages\' } else { $env:NUGET_PACKAGES = Join-Path $RepoRoot '.packages\' - $env:RESTORENOHTTPCACHE = $true } } @@ -852,8 +849,8 @@ function MSBuild-Core() { } # When running on Azure Pipelines, override the returned exit code to avoid double logging. - # Skip this when the build is a child of the VMR orchestrator build. - if ($ci -and $env:SYSTEM_TEAMPROJECT -ne $null -and !$productBuild -and -not($properties -like "*DotNetBuildRepo=true*")) { + # Skip this when the build is a child of the VMR build. + if ($ci -and $env:SYSTEM_TEAMPROJECT -ne $null -and !$fromVMR) { Write-PipelineSetResult -Result "Failed" -Message "msbuild execution failed." # Exiting with an exit code causes the azure pipelines task to log yet another "noise" error # The above Write-PipelineSetResult will cause the task to be marked as failure without adding yet another error diff --git a/eng/common/tools.sh b/eng/common/tools.sh index 4a5fa99478d1..28944dfcb3f4 100755 --- a/eng/common/tools.sh +++ b/eng/common/tools.sh @@ -5,6 +5,9 @@ # CI mode - set to true on CI server for PR validation build or official build. ci=${ci:-false} +# Build mode +source_build=${source_build:-false} + # Set to true to use the pipelines logger which will enable Azure logging output. # https://github.com/Microsoft/azure-pipelines-tasks/blob/master/docs/authoring/commands.md # This flag is meant as a temporary opt-opt for the feature while validate it across @@ -58,7 +61,8 @@ use_installed_dotnet_cli=${use_installed_dotnet_cli:-true} dotnetInstallScriptVersion=${dotnetInstallScriptVersion:-'v1'} # True to use global NuGet cache instead of restoring packages to repository-local directory. -if [[ "$ci" == true ]]; then +# Keep in sync with NuGetPackageroot in Arcade SDK's RepositoryLayout.props. +if [[ "$ci" == true || "$source_build" == true ]]; then use_global_nuget_cache=${use_global_nuget_cache:-false} else use_global_nuget_cache=${use_global_nuget_cache:-true} @@ -68,8 +72,8 @@ fi runtime_source_feed=${runtime_source_feed:-''} runtime_source_feed_key=${runtime_source_feed_key:-''} -# True if the build is a product build -product_build=${product_build:-false} +# True when the build is running within the VMR. +from_vmr=${from_vmr:-false} # Resolve any symlinks in the given path. function ResolvePath { @@ -295,7 +299,7 @@ function with_retries { function GetDotNetInstallScript { local root=$1 local install_script="$root/dotnet-install.sh" - local install_script_url="https://builds.dotnet.microsoft.com/dotnet/scripts/v1/dotnet-install.sh" + local install_script_url="https://builds.dotnet.microsoft.com/dotnet/scripts/$dotnetInstallScriptVersion/dotnet-install.sh" if [[ ! -a "$install_script" ]]; then mkdir -p "$root" @@ -341,14 +345,12 @@ function InitializeBuildTool { _InitializeBuildToolCommand="msbuild" } -# Set RestoreNoHttpCache as a workaround for https://github.com/NuGet/Home/issues/3116 function GetNuGetPackageCachePath { if [[ -z ${NUGET_PACKAGES:-} ]]; then if [[ "$use_global_nuget_cache" == true ]]; then export NUGET_PACKAGES="$HOME/.nuget/packages/" else export NUGET_PACKAGES="$repo_root/.packages/" - export RESTORENOHTTPCACHE=true fi fi @@ -502,8 +504,8 @@ function MSBuild-Core { echo "Build failed with exit code $exit_code. Check errors above." # When running on Azure Pipelines, override the returned exit code to avoid double logging. - # Skip this when the build is a child of the VMR orchestrator build. - if [[ "$ci" == true && -n ${SYSTEM_TEAMPROJECT:-} && "$product_build" != true && "$properties" != *"DotNetBuildRepo=true"* ]]; then + # Skip this when the build is a child of the VMR build. + if [[ "$ci" == true && -n ${SYSTEM_TEAMPROJECT:-} && "$from_vmr" != true ]]; then Write-PipelineSetResult -result "Failed" -message "msbuild execution failed." # Exiting with an exit code causes the azure pipelines task to log yet another "noise" error # The above Write-PipelineSetResult will cause the task to be marked as failure without adding yet another error diff --git a/eng/common/vmr-sync.ps1 b/eng/common/vmr-sync.ps1 new file mode 100755 index 000000000000..8c3c91ce8ded --- /dev/null +++ b/eng/common/vmr-sync.ps1 @@ -0,0 +1,138 @@ +<# +.SYNOPSIS + +This script is used for synchronizing the current repository into a local VMR. +It pulls the current repository's code into the specified VMR directory for local testing or +Source-Build validation. + +.DESCRIPTION + +The tooling used for synchronization will clone the VMR repository into a temporary folder if +it does not already exist. These clones can be reused in future synchronizations, so it is +recommended to dedicate a folder for this to speed up re-runs. + +.EXAMPLE + Synchronize current repository into a local VMR: + ./vmr-sync.ps1 -vmrDir "$HOME/repos/dotnet" -tmpDir "$HOME/repos/tmp" + +.PARAMETER tmpDir +Required. Path to the temporary folder where repositories will be cloned + +.PARAMETER vmrBranch +Optional. Branch of the 'dotnet/dotnet' repo to synchronize. The VMR will be checked out to this branch + +.PARAMETER azdevPat +Optional. Azure DevOps PAT to use for cloning private repositories. + +.PARAMETER vmrDir +Optional. Path to the dotnet/dotnet repository. When null, gets cloned to the temporary folder + +.PARAMETER debugOutput +Optional. Enables debug logging in the darc vmr command. + +.PARAMETER ci +Optional. Denotes that the script is running in a CI environment. +#> +param ( + [Parameter(Mandatory=$true, HelpMessage="Path to the temporary folder where repositories will be cloned")] + [string][Alias('t', 'tmp')]$tmpDir, + [string][Alias('b', 'branch')]$vmrBranch, + [string]$remote, + [string]$azdevPat, + [string][Alias('v', 'vmr')]$vmrDir, + [switch]$ci, + [switch]$debugOutput +) + +function Fail { + Write-Host "> $($args[0])" -ForegroundColor 'Red' +} + +function Highlight { + Write-Host "> $($args[0])" -ForegroundColor 'Cyan' +} + +$verbosity = 'verbose' +if ($debugOutput) { + $verbosity = 'debug' +} +# Validation + +if (-not $tmpDir) { + Fail "Missing -tmpDir argument. Please specify the path to the temporary folder where the repositories will be cloned" + exit 1 +} + +# Sanitize the input + +if (-not $vmrDir) { + $vmrDir = Join-Path $tmpDir 'dotnet' +} + +if (-not (Test-Path -Path $tmpDir -PathType Container)) { + New-Item -ItemType Directory -Path $tmpDir | Out-Null +} + +# Prepare the VMR + +if (-not (Test-Path -Path $vmrDir -PathType Container)) { + Highlight "Cloning 'dotnet/dotnet' into $vmrDir.." + git clone https://github.com/dotnet/dotnet $vmrDir + + if ($vmrBranch) { + git -C $vmrDir switch -c $vmrBranch + } +} +else { + if ((git -C $vmrDir diff --quiet) -eq $false) { + Fail "There are changes in the working tree of $vmrDir. Please commit or stash your changes" + exit 1 + } + + if ($vmrBranch) { + Highlight "Preparing $vmrDir" + git -C $vmrDir checkout $vmrBranch + git -C $vmrDir pull + } +} + +Set-StrictMode -Version Latest + +# Prepare darc + +Highlight 'Installing .NET, preparing the tooling..' +. .\eng\common\tools.ps1 +$dotnetRoot = InitializeDotNetCli -install:$true +$dotnet = "$dotnetRoot\dotnet.exe" +& "$dotnet" tool restore + +Highlight "Starting the synchronization of VMR.." + +# Synchronize the VMR +$darcArgs = ( + "darc", "vmr", "forwardflow", + "--tmp", $tmpDir, + "--$verbosity", + $vmrDir +) + +if ($ci) { + $darcArgs += ("--ci") +} + +if ($azdevPat) { + $darcArgs += ("--azdev-pat", $azdevPat) +} + +& "$dotnet" $darcArgs + +if ($LASTEXITCODE -eq 0) { + Highlight "Synchronization succeeded" +} +else { + Fail "Synchronization of repo to VMR failed!" + Fail "'$vmrDir' is left in its last state (re-run of this script will reset it)." + Fail "Please inspect the logs which contain path to the failing patch file (use -debugOutput to get all the details)." + Fail "Once you make changes to the conflicting VMR patch, commit it locally and re-run this script." + exit 1 +} diff --git a/eng/common/vmr-sync.sh b/eng/common/vmr-sync.sh new file mode 100755 index 000000000000..86d77ccf5b48 --- /dev/null +++ b/eng/common/vmr-sync.sh @@ -0,0 +1,205 @@ +#!/bin/bash + +### This script is used for synchronizing the current repository into a local VMR. +### It pulls the current repository's code into the specified VMR directory for local testing or +### Source-Build validation. +### +### The tooling used for synchronization will clone the VMR repository into a temporary folder if +### it does not already exist. These clones can be reused in future synchronizations, so it is +### recommended to dedicate a folder for this to speed up re-runs. +### +### USAGE: +### Synchronize current repository into a local VMR: +### ./vmr-sync.sh --tmp "$HOME/repos/tmp" "$HOME/repos/dotnet" +### +### Options: +### -t, --tmp, --tmp-dir PATH +### Required. Path to the temporary folder where repositories will be cloned +### +### -b, --branch, --vmr-branch BRANCH_NAME +### Optional. Branch of the 'dotnet/dotnet' repo to synchronize. The VMR will be checked out to this branch +### +### --debug +### Optional. Turns on the most verbose logging for the VMR tooling +### +### --remote name:URI +### Optional. Additional remote to use during the synchronization +### This can be used to synchronize to a commit from a fork of the repository +### Example: 'runtime:https://github.com/yourfork/runtime' +### +### --azdev-pat +### Optional. Azure DevOps PAT to use for cloning private repositories. +### +### -v, --vmr, --vmr-dir PATH +### Optional. Path to the dotnet/dotnet repository. When null, gets cloned to the temporary folder + +source="${BASH_SOURCE[0]}" + +# resolve $source until the file is no longer a symlink +while [[ -h "$source" ]]; do + scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" + source="$(readlink "$source")" + # if $source was a relative symlink, we need to resolve it relative to the path where the + # symlink file was located + [[ $source != /* ]] && source="$scriptroot/$source" +done +scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" + +function print_help () { + sed -n '/^### /,/^$/p' "$source" | cut -b 5- +} + +COLOR_RED=$(tput setaf 1 2>/dev/null || true) +COLOR_CYAN=$(tput setaf 6 2>/dev/null || true) +COLOR_CLEAR=$(tput sgr0 2>/dev/null || true) +COLOR_RESET=uniquesearchablestring +FAILURE_PREFIX='> ' + +function fail () { + echo "${COLOR_RED}$FAILURE_PREFIX${1//${COLOR_RESET}/${COLOR_RED}}${COLOR_CLEAR}" >&2 +} + +function highlight () { + echo "${COLOR_CYAN}$FAILURE_PREFIX${1//${COLOR_RESET}/${COLOR_CYAN}}${COLOR_CLEAR}" +} + +tmp_dir='' +vmr_dir='' +vmr_branch='' +additional_remotes='' +verbosity=verbose +azdev_pat='' +ci=false + +while [[ $# -gt 0 ]]; do + opt="$(echo "$1" | tr "[:upper:]" "[:lower:]")" + case "$opt" in + -t|--tmp|--tmp-dir) + tmp_dir=$2 + shift + ;; + -v|--vmr|--vmr-dir) + vmr_dir=$2 + shift + ;; + -b|--branch|--vmr-branch) + vmr_branch=$2 + shift + ;; + --remote) + additional_remotes="$additional_remotes $2" + shift + ;; + --azdev-pat) + azdev_pat=$2 + shift + ;; + --ci) + ci=true + ;; + -d|--debug) + verbosity=debug + ;; + -h|--help) + print_help + exit 0 + ;; + *) + fail "Invalid argument: $1" + print_help + exit 1 + ;; + esac + + shift +done + +# Validation + +if [[ -z "$tmp_dir" ]]; then + fail "Missing --tmp-dir argument. Please specify the path to the temporary folder where the repositories will be cloned" + exit 1 +fi + +# Sanitize the input + +if [[ -z "$vmr_dir" ]]; then + vmr_dir="$tmp_dir/dotnet" +fi + +if [[ ! -d "$tmp_dir" ]]; then + mkdir -p "$tmp_dir" +fi + +if [[ "$verbosity" == "debug" ]]; then + set -x +fi + +# Prepare the VMR + +if [[ ! -d "$vmr_dir" ]]; then + highlight "Cloning 'dotnet/dotnet' into $vmr_dir.." + git clone https://github.com/dotnet/dotnet "$vmr_dir" + + if [[ -n "$vmr_branch" ]]; then + git -C "$vmr_dir" switch -c "$vmr_branch" + fi +else + if ! git -C "$vmr_dir" diff --quiet; then + fail "There are changes in the working tree of $vmr_dir. Please commit or stash your changes" + exit 1 + fi + + if [[ -n "$vmr_branch" ]]; then + highlight "Preparing $vmr_dir" + git -C "$vmr_dir" checkout "$vmr_branch" + git -C "$vmr_dir" pull + fi +fi + +set -e + +# Prepare darc + +highlight 'Installing .NET, preparing the tooling..' +source "./eng/common/tools.sh" +InitializeDotNetCli true +dotnetDir=$( cd ./.dotnet/; pwd -P ) +dotnet=$dotnetDir/dotnet +"$dotnet" tool restore + +highlight "Starting the synchronization of VMR.." +set +e + +if [[ -n "$additional_remotes" ]]; then + additional_remotes="--additional-remotes $additional_remotes" +fi + +if [[ -n "$azdev_pat" ]]; then + azdev_pat="--azdev-pat $azdev_pat" +fi + +ci_arg='' +if [[ "$ci" == "true" ]]; then + ci_arg="--ci" +fi + +# Synchronize the VMR + +"$dotnet" darc vmr forwardflow \ + --tmp "$tmp_dir" \ + $azdev_pat \ + --$verbosity \ + $ci_arg \ + $additional_remotes \ + "$vmr_dir" + +if [[ $? == 0 ]]; then + highlight "Synchronization succeeded" +else + fail "Synchronization of repo to VMR failed!" + fail "'$vmr_dir' is left in its last state (re-run of this script will reset it)." + fail "Please inspect the logs which contain path to the failing patch file (use --debug to get all the details)." + fail "Once you make changes to the conflicting VMR patch, commit it locally and re-run this script." + exit 1 +fi diff --git a/eng/docker/libraries-sdk.linux.Dockerfile b/eng/docker/libraries-sdk.linux.Dockerfile index 7e06eeb5e8dc..74752318dc66 100644 --- a/eng/docker/libraries-sdk.linux.Dockerfile +++ b/eng/docker/libraries-sdk.linux.Dockerfile @@ -20,7 +20,7 @@ ENV _DOTNET_INSTALL_CHANNEL=$VERSION RUN rm -rf /usr/share/dotnet # Install latest daily SDK: -RUN wget https://dot.net/v1/dotnet-install.sh +RUN wget https://builds.dotnet.microsoft.com/dotnet/scripts/v1/dotnet-install.sh RUN bash ./dotnet-install.sh --channel $_DOTNET_INSTALL_CHANNEL --quality daily --install-dir /usr/share/dotnet # Collect the following artifacts under /live-runtime-artifacts, diff --git a/eng/docker/libraries-sdk.windows.Dockerfile b/eng/docker/libraries-sdk.windows.Dockerfile index 5345413303b7..437a70543ef6 100644 --- a/eng/docker/libraries-sdk.windows.Dockerfile +++ b/eng/docker/libraries-sdk.windows.Dockerfile @@ -14,7 +14,7 @@ USER ContainerAdministrator # remove the existing ASP.NET SDK, we want to keep only the latest one we download later RUN Remove-Item -Force -Recurse 'C:/Program Files/dotnet/shared/Microsoft.AspNetCore.App/*' -RUN Invoke-WebRequest -Uri https://dot.net/v1/dotnet-install.ps1 -OutFile .\dotnet-install.ps1 +RUN Invoke-WebRequest -Uri https://builds.dotnet.microsoft.com/dotnet/scripts/v1/dotnet-install.ps1 -OutFile .\dotnet-install.ps1 RUN & .\dotnet-install.ps1 -Channel $env:_DOTNET_INSTALL_CHANNEL -Quality daily -InstallDir 'C:/Program Files/dotnet' USER ContainerUser diff --git a/eng/extract-for-crossdac.ps1 b/eng/extract-for-crossdac.ps1 index 24ce91d1043f..2ae2e61e5d32 100644 --- a/eng/extract-for-crossdac.ps1 +++ b/eng/extract-for-crossdac.ps1 @@ -20,6 +20,6 @@ foreach ($file in Get-ChildItem $DownloadDirectory -Recurse -Filter '*.nupkg') { Write-Host "Extracting Package: $id $ver to $ExtractDirectory/$($id.ToLowerInvariant())/$ver" [System.IO.Compression.ZipFile]::ExtractToDirectory($file.FullName, "$ExtractDirectory/$($id.ToLowerInvariant())/$ver") } else { - throw "Unexpected file name: $($file.Name)" + Write-Host "Skipping non-runtime pack: $($file.Name)" } } diff --git a/eng/liveBuilds.targets b/eng/liveBuilds.targets index 17281b8214a2..23960c401c55 100644 --- a/eng/liveBuilds.targets +++ b/eng/liveBuilds.targets @@ -63,9 +63,22 @@ x64 - - $([MSBuild]::NormalizePath('$(DotNetHostBinDir)', 'apphost$(ExeSuffix)')) - $([MSBuild]::NormalizePath('$(CoreCLRArtifactsPath)', 'corehost', 'singlefilehost$(ExeSuffix)')) + + $(ArtifactsDir)bootstrap/$(TargetRid)/microsoft.netcore.app/ref + $(ArtifactsDir)bootstrap/$(TargetRid)/microsoft.netcore.app/lib + $(ArtifactsDir)bootstrap/$(TargetRid)/aotsdk + $(ArtifactsDir)bootstrap/$(TargetRid)/host + $(ArtifactsDir)bootstrap/$(TargetRid)/ridgraph + + + + $([MSBuild]::NormalizePath('$(DotNetHostBinDir)', 'apphost$(ExeSuffix)')) + $([MSBuild]::NormalizePath('$(CoreCLRArtifactsPath)', 'corehost', 'singlefilehost$(ExeSuffix)')) + + + + $(BootstrapHostDir)/apphost$(ExeSuffix) + $(BootstrapHostDir)/singlefilehost$(ExeSuffix) @@ -131,7 +144,7 @@ - + true @@ -226,6 +239,8 @@ $(LibrariesNativeArtifactsPath)dotnet.native.js; $(LibrariesNativeArtifactsPath)dotnet.runtime.js; $(LibrariesNativeArtifactsPath)dotnet.runtime.js.map; + $(LibrariesNativeArtifactsPath)dotnet.diagnostics.js; + $(LibrariesNativeArtifactsPath)dotnet.diagnostics.js.map; $(LibrariesNativeArtifactsPath)dotnet.d.ts; $(LibrariesNativeArtifactsPath)package.json; $(LibrariesNativeArtifactsPath)dotnet.native.wasm; diff --git a/eng/native/build-commons.sh b/eng/native/build-commons.sh index bf17618d972a..2bc1faf27e7f 100755 --- a/eng/native/build-commons.sh +++ b/eng/native/build-commons.sh @@ -63,6 +63,20 @@ build_native() # All set to commence the build echo "Commencing build of \"$target\" target in \"$message\" for $__TargetOS.$__TargetArch.$__BuildType in $intermediatesDir" + SAVED_CFLAGS="${CFLAGS}" + SAVED_CXXFLAGS="${CXXFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + + # Let users provide additional compiler/linker flags via EXTRA_CFLAGS/EXTRA_CXXFLAGS/EXTRA_LDFLAGS. + # If users directly override CFLAG/CXXFLAGS/LDFLAGS, that may lead to some configure tests working incorrectly. + # See https://github.com/dotnet/runtime/issues/35727 for more information. + # + # These flags MUST be exported before gen-buildsys.sh runs or cmake will ignore them + # + export CFLAGS="${CFLAGS} ${EXTRA_CFLAGS}" + export CXXFLAGS="${CXXFLAGS} ${EXTRA_CXXFLAGS}" + export LDFLAGS="${LDFLAGS} ${EXTRA_LDFLAGS}" + if [[ "$targetOS" == osx || "$targetOS" == maccatalyst ]]; then if [[ "$hostArch" == x64 ]]; then cmakeArgs="-DCMAKE_OSX_ARCHITECTURES=\"x86_64\" $cmakeArgs" @@ -86,10 +100,7 @@ build_native() exit 1 fi - # cmake cache scripts can't see command line args - export ANDROID_BUILD=1 - - cmakeArgs="-C $__RepoRootDir/eng/native/tryrun.cmake $cmakeArgs" + cmakeArgs="-DANDROID_BUILD=1 -C $__RepoRootDir/eng/native/tryrun.cmake $cmakeArgs" cmakeArgs="-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-${ANDROID_API_LEVEL} -DANDROID_NATIVE_API_LEVEL=${ANDROID_API_LEVEL} $cmakeArgs" # Don't try to set CC/CXX in init-compiler.sh - it's handled in android.toolchain.cmake already @@ -197,17 +208,6 @@ build_native() return fi - SAVED_CFLAGS="${CFLAGS}" - SAVED_CXXFLAGS="${CXXFLAGS}" - SAVED_LDFLAGS="${LDFLAGS}" - - # Let users provide additional compiler/linker flags via EXTRA_CFLAGS/EXTRA_CXXFLAGS/EXTRA_LDFLAGS. - # If users directly override CFLAG/CXXFLAGS/LDFLAGS, that may lead to some configure tests working incorrectly. - # See https://github.com/dotnet/runtime/issues/35727 for more information. - export CFLAGS="${CFLAGS} ${EXTRA_CFLAGS}" - export CXXFLAGS="${CXXFLAGS} ${EXTRA_CXXFLAGS}" - export LDFLAGS="${LDFLAGS} ${EXTRA_LDFLAGS}" - local exit_code if [[ "$__StaticAnalyzer" == 1 ]]; then pushd "$intermediatesDir" @@ -269,7 +269,7 @@ usage() echo "-gccx.y: optional argument to build using gcc version x.y." echo "-ninja: target ninja instead of GNU make" echo "-numproc: set the number of build processes." - echo "-outputrid: optional argument that overrides the target rid name." + echo "-targetrid: optional argument that overrides the target rid name." echo "-portablebuild: pass -portablebuild=false to force a non-portable build." echo "-skipconfigure: skip build configuration." echo "-keepnativesymbols: keep native/unmanaged debug symbols." @@ -289,7 +289,7 @@ source "$__RepoRootDir/eng/common/native/init-os-and-arch.sh" __TargetArch=$arch __TargetOS=$os -__OutputRid='' +__TargetRid='' # Get the number of processors available to the scheduler platform="$(uname -s | tr '[:upper:]' '[:lower:]')" @@ -461,12 +461,12 @@ while :; do __TargetArch=wasm ;; - outputrid|-outputrid) + targetrid|-targetrid|outputrid|-outputrid) if [[ -n "$2" ]]; then - __OutputRid="$2" + __TargetRid="$2" shift else - echo "ERROR: 'outputrid' requires a non-empty option argument" + echo "ERROR: 'targetrid' requires a non-empty option argument" exit 1 fi ;; @@ -567,15 +567,15 @@ fi # init the target distro name (__DistroRid) and target portable os (__PortableTargetOS). initTargetDistroRid -if [ -z "$__OutputRid" ]; then +if [ -z "$__TargetRid" ]; then if [[ "$__PortableBuild" == 0 ]]; then - __OutputRid="$__DistroRid" + __TargetRid="$__DistroRid" else - __OutputRid="$__PortableTargetOS-$__TargetArch" + __TargetRid="$__PortableTargetOS-$__TargetArch" fi fi -export __OutputRid -echo "__OutputRid: ${__OutputRid}" +export __TargetRid +echo "__TargetRid: ${__TargetRid}" # When the host runs on an unknown rid, it falls back to the output rid -__HostFallbackOS="${__OutputRid%-*}" # Strip architecture +__HostFallbackOS="${__TargetRid%-*}" # Strip architecture diff --git a/eng/native/configurecompiler.cmake b/eng/native/configurecompiler.cmake index 985af44f99ec..f1453deb0d86 100644 --- a/eng/native/configurecompiler.cmake +++ b/eng/native/configurecompiler.cmake @@ -70,6 +70,12 @@ if (MSVC) add_compile_options($<$:$>) add_link_options($<$>:/guard:cf>) + if (NOT CLR_CMAKE_PGO_INSTRUMENT) + # Load all imported DLLs from the System32 directory. + # Don't do this when instrumenting for PGO as a local DLL dependency is introduced by the instrumentation + add_linker_flag(/DEPENDENTLOADFLAG:0x800) + endif() + # Linker flags # set (WINDOWS_SUBSYSTEM_VERSION 6.01) @@ -433,6 +439,8 @@ if (CLR_CMAKE_HOST_UNIX) message("Detected Haiku x86_64") elseif(CLR_CMAKE_HOST_BROWSER) add_definitions(-DHOST_BROWSER) + elseif(CLR_CMAKE_HOST_ANDROID) + add_definitions(-DHOST_ANDROID) endif() elseif(CLR_CMAKE_HOST_WASI) add_definitions(-DHOST_WASI) diff --git a/eng/native/configureplatform.cmake b/eng/native/configureplatform.cmake index c0961f1fed0b..285060b2b666 100644 --- a/eng/native/configureplatform.cmake +++ b/eng/native/configureplatform.cmake @@ -29,6 +29,8 @@ if(CLR_CMAKE_HOST_OS STREQUAL linux) set(CLR_CMAKE_HOST_UNIX_X86 1) elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL aarch64) set(CLR_CMAKE_HOST_UNIX_ARM64 1) + elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL riscv64) + set(CLR_CMAKE_HOST_UNIX_RISCV64 1) else() clr_unknown_arch() endif() @@ -368,6 +370,11 @@ if(CLR_CMAKE_HOST_LINUX_MUSL OR CLR_CMAKE_TARGET_OS STREQUAL alpine) set(CLR_CMAKE_TARGET_LINUX_MUSL 1) endif(CLR_CMAKE_HOST_LINUX_MUSL OR CLR_CMAKE_TARGET_OS STREQUAL alpine) +macro(set_cache_value) + set(${ARGV0} ${ARGV1} CACHE STRING "Result from TRY_RUN" FORCE) + set(${ARGV0}__TRYRUN_OUTPUT "dummy output" CACHE STRING "Output from TRY_RUN" FORCE) +endmacro() + if(CLR_CMAKE_TARGET_OS STREQUAL android) set(CLR_CMAKE_TARGET_UNIX 1) set(CLR_CMAKE_TARGET_LINUX 1) @@ -427,10 +434,10 @@ if(CLR_CMAKE_TARGET_OS STREQUAL haiku) set(CLR_CMAKE_TARGET_HAIKU 1) endif(CLR_CMAKE_TARGET_OS STREQUAL haiku) -if(CLR_CMAKE_TARGET_OS STREQUAL emscripten) +if(CLR_CMAKE_TARGET_OS STREQUAL emscripten OR CLR_CMAKE_TARGET_OS STREQUAL browser) set(CLR_CMAKE_TARGET_UNIX 1) set(CLR_CMAKE_TARGET_BROWSER 1) -endif(CLR_CMAKE_TARGET_OS STREQUAL emscripten) +endif(CLR_CMAKE_TARGET_OS STREQUAL emscripten OR CLR_CMAKE_TARGET_OS STREQUAL browser) if(CLR_CMAKE_TARGET_OS STREQUAL wasi) set(CLR_CMAKE_TARGET_WASI 1) @@ -471,7 +478,11 @@ if(CLR_CMAKE_TARGET_OS STREQUAL windows) endif() # check if host & target os/arch combination are valid +<<<<<<< HEAD if (NOT (CLR_CMAKE_TARGET_OS STREQUAL CLR_CMAKE_HOST_OS) AND NOT CLR_CMAKE_TARGET_WASI AND NOT CLR_CMAKE_TARGET_BROWSER) +======= +if (NOT (CLR_CMAKE_TARGET_OS STREQUAL CLR_CMAKE_HOST_OS) AND NOT CLR_CMAKE_TARGET_WASI AND NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_TARGET_BROWSER) +>>>>>>> upstream-jun if(NOT (CLR_CMAKE_HOST_OS STREQUAL windows)) message(FATAL_ERROR "Invalid host and target os/arch combination. Host OS: ${CLR_CMAKE_HOST_OS}") endif() @@ -494,7 +505,7 @@ if(NOT CLR_CMAKE_TARGET_BROWSER AND NOT CLR_CMAKE_TARGET_WASI) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() -if (CLR_CMAKE_TARGET_ANDROID) +if (CLR_CMAKE_HOST_ANDROID) # Google requires all the native libraries to be aligned to 16 bytes (for 16k memory page size) # This applies only to 64-bit binaries if(CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) @@ -515,7 +526,7 @@ if (CLR_CMAKE_TARGET_ANDROID OR CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET set(CLR_CMAKE_USE_SYSTEM_ZLIB 1) endif() -if (NOT CLR_CMAKE_TARGET_ANDROID) +if (NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_TARGET_BROWSER) # opt into building tools like ildasm/ilasm set(CLR_CMAKE_BUILD_TOOLS 1) endif() diff --git a/eng/native/configuretools.cmake b/eng/native/configuretools.cmake index b5869de31e18..ccf7238025cc 100644 --- a/eng/native/configuretools.cmake +++ b/eng/native/configuretools.cmake @@ -77,10 +77,17 @@ endif() if (NOT CLR_CMAKE_HOST_WIN32 AND NOT CLR_CMAKE_TARGET_ARCH_WASM) # detect linker - execute_process(COMMAND sh -c "${CMAKE_C_COMPILER} ${CMAKE_SHARED_LINKER_FLAGS} -Wl,--version | head -1" - ERROR_QUIET - OUTPUT_VARIABLE ldVersionOutput - OUTPUT_STRIP_TRAILING_WHITESPACE) + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + execute_process(COMMAND ${CMAKE_C_COMPILER} -Wl,--version + ERROR_QUIET + OUTPUT_VARIABLE ldVersionOutput + OUTPUT_STRIP_TRAILING_WHITESPACE) + else() + execute_process(COMMAND sh -c "${CMAKE_C_COMPILER} ${CMAKE_SHARED_LINKER_FLAGS} -Wl,--version | head -1" + ERROR_QUIET + OUTPUT_VARIABLE ldVersionOutput + OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() if("${ldVersionOutput}" MATCHES "LLD") set(LD_LLVM 1) diff --git a/eng/native/functions.cmake b/eng/native/functions.cmake index 2dee8bf0c509..05c09a2a6fca 100644 --- a/eng/native/functions.cmake +++ b/eng/native/functions.cmake @@ -1,11 +1,5 @@ function(clr_unknown_arch) - if (WIN32) - message(FATAL_ERROR "Only AMD64, ARM64, ARM, I386, LOONGARCH64 and RISCV64 hosts are supported. Found: ${CMAKE_SYSTEM_PROCESSOR}") - elseif(CLR_CROSS_COMPONENTS_BUILD) - message(FATAL_ERROR "Only AMD64, ARM64, I386, LOONGARCH64 and RISCV64 hosts are supported for linux cross-architecture component. Found: ${CMAKE_SYSTEM_PROCESSOR}") - else() - message(FATAL_ERROR "'${CMAKE_SYSTEM_PROCESSOR}' is an unsupported architecture.") - endif() + message(FATAL_ERROR "'${CMAKE_SYSTEM_PROCESSOR}' is an unsupported architecture.") endfunction() # C to MASM include file translator @@ -338,39 +332,66 @@ function(generate_exports_file) list(GET INPUT_LIST -1 outputFilename) list(REMOVE_AT INPUT_LIST -1) - if(CLR_CMAKE_TARGET_APPLE) - set(SCRIPT_NAME generateexportedsymbols.sh) + # Win32 may be false when cross compiling + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + set(SCRIPT_NAME ${CLR_ENG_NATIVE_DIR}/generateversionscript.ps1) + + add_custom_command( + OUTPUT ${outputFilename} + COMMAND powershell -NoProfile -ExecutionPolicy ByPass -File "${SCRIPT_NAME}" ${INPUT_LIST} >${outputFilename} + DEPENDS ${INPUT_LIST} ${SCRIPT_NAME} + COMMENT "Generating exports file ${outputFilename}" + ) else() - set(SCRIPT_NAME generateversionscript.sh) + if(CLR_CMAKE_TARGET_APPLE) + set(SCRIPT_NAME ${CLR_ENG_NATIVE_DIR}/generateexportedsymbols.sh) + else() + set(SCRIPT_NAME ${CLR_ENG_NATIVE_DIR}/generateversionscript.sh) + endif() + + add_custom_command( + OUTPUT ${outputFilename} + COMMAND ${SCRIPT_NAME} ${INPUT_LIST} >${outputFilename} + DEPENDS ${INPUT_LIST} ${SCRIPT_NAME} + COMMENT "Generating exports file ${outputFilename}" + ) endif() - add_custom_command( - OUTPUT ${outputFilename} - COMMAND ${CLR_ENG_NATIVE_DIR}/${SCRIPT_NAME} ${INPUT_LIST} >${outputFilename} - DEPENDS ${INPUT_LIST} ${CLR_ENG_NATIVE_DIR}/${SCRIPT_NAME} - COMMENT "Generating exports file ${outputFilename}" - ) set_source_files_properties(${outputFilename} PROPERTIES GENERATED TRUE) endfunction() function(generate_exports_file_prefix inputFilename outputFilename prefix) - if(CMAKE_SYSTEM_NAME STREQUAL Darwin) set(SCRIPT_NAME generateexportedsymbols.sh) else() - set(SCRIPT_NAME generateversionscript.sh) + # Win32 may be false when cross compiling + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + set(SCRIPT_NAME ${CLR_ENG_NATIVE_DIR}/generateversionscript.ps1) + else() + set(SCRIPT_NAME ${CLR_ENG_NATIVE_DIR}/generateversionscript.sh) + endif() + if (NOT ${prefix} STREQUAL "") set(EXTRA_ARGS ${prefix}) endif() endif(CMAKE_SYSTEM_NAME STREQUAL Darwin) - add_custom_command( - OUTPUT ${outputFilename} - COMMAND ${CLR_ENG_NATIVE_DIR}/${SCRIPT_NAME} ${inputFilename} ${EXTRA_ARGS} >${outputFilename} - DEPENDS ${inputFilename} ${CLR_ENG_NATIVE_DIR}/${SCRIPT_NAME} - COMMENT "Generating exports file ${outputFilename}" - ) + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + add_custom_command( + OUTPUT ${outputFilename} + COMMAND powershell -NoProfile -ExecutionPolicy ByPass -File \"${SCRIPT_NAME}\" ${inputFilename} ${EXTRA_ARGS} >${outputFilename} + DEPENDS ${inputFilename} ${SCRIPT_NAME} + COMMENT "Generating exports file ${outputFilename}" + ) + else() + add_custom_command( + OUTPUT ${outputFilename} + COMMAND ${SCRIPT_NAME} ${inputFilename} ${EXTRA_ARGS} >${outputFilename} + DEPENDS ${inputFilename} ${SCRIPT_NAME} + COMMENT "Generating exports file ${outputFilename}" + ) + endif() set_source_files_properties(${outputFilename} PROPERTIES GENERATED TRUE) endfunction() @@ -445,16 +466,28 @@ function(strip_symbols targetName outputFilename) COMMAND ${strip_command} ) else (CLR_CMAKE_TARGET_APPLE) - - add_custom_command( - TARGET ${targetName} - POST_BUILD - VERBATIM - COMMAND sh -c "echo Stripping symbols from $(basename '${strip_source_file}') into $(basename '${strip_destination_file}')" - COMMAND ${CMAKE_OBJCOPY} --only-keep-debug ${strip_source_file} ${strip_destination_file} - COMMAND ${CMAKE_OBJCOPY} --strip-debug --strip-unneeded ${strip_source_file} - COMMAND ${CMAKE_OBJCOPY} --add-gnu-debuglink=${strip_destination_file} ${strip_source_file} + # Win32 may be false when cross compiling + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + add_custom_command( + TARGET ${targetName} + POST_BUILD + VERBATIM + COMMAND powershell -C "echo Stripping symbols from $(Split-Path -Path '${strip_source_file}' -Leaf) into $(Split-Path -Path '${strip_destination_file}' -Leaf)" + COMMAND ${CMAKE_OBJCOPY} --only-keep-debug ${strip_source_file} ${strip_destination_file} + COMMAND ${CMAKE_OBJCOPY} --strip-debug --strip-unneeded ${strip_source_file} + COMMAND ${CMAKE_OBJCOPY} --add-gnu-debuglink=${strip_destination_file} ${strip_source_file} ) + else() + add_custom_command( + TARGET ${targetName} + POST_BUILD + VERBATIM + COMMAND sh -c "echo Stripping symbols from $(basename '${strip_source_file}') into $(basename '${strip_destination_file}')" + COMMAND ${CMAKE_OBJCOPY} --only-keep-debug ${strip_source_file} ${strip_destination_file} + COMMAND ${CMAKE_OBJCOPY} --strip-debug --strip-unneeded ${strip_source_file} + COMMAND ${CMAKE_OBJCOPY} --add-gnu-debuglink=${strip_destination_file} ${strip_source_file} + ) + endif() endif (CLR_CMAKE_TARGET_APPLE) endif(CLR_CMAKE_HOST_UNIX) endfunction() diff --git a/eng/native/gen-buildsys.cmd b/eng/native/gen-buildsys.cmd index b78a073c8c11..1a8356f3e1df 100644 --- a/eng/native/gen-buildsys.cmd +++ b/eng/native/gen-buildsys.cmd @@ -28,7 +28,12 @@ set __UseEmcmake=0 if /i "%__Ninja%" == "1" ( set __CmakeGenerator=Ninja ) else ( +<<<<<<< HEAD if /i "%__VSVersion%" == "vs2022" (set __CmakeGenerator=%__CmakeGenerator% 17 2022) +======= + if /i NOT "%__Arch%" == "wasm" ( + if /i "%__VSVersion%" == "17.0" (set __CmakeGenerator=%__CmakeGenerator% 17 2022) +>>>>>>> upstream-jun if /i "%__Arch%" == "x64" (set __ExtraCmakeParams=%__ExtraCmakeParams% -A x64) if /i "%__Arch%" == "arm" (set __ExtraCmakeParams=%__ExtraCmakeParams% -A ARM) @@ -71,6 +76,33 @@ if /i "%__Arch%" == "wasm" ( set __ExtraCmakeParams=%__ExtraCmakeParams% "-DCMAKE_SYSTEM_VERSION=10.0" ) +if /i "%__Os%" == "android" ( + :: Keep in sync with $(AndroidApiLevelMin) in Directory.Build.props in the repository rooot + set __ANDROID_API_LEVEL=21 + if "%ANDROID_NDK_ROOT%" == "" ( + echo Error: You need to set the ANDROID_NDK_ROOT environment variable pointing to the Android NDK root. + exit /B 1 + ) + + set __ExtraCmakeParams=!__ExtraCmakeParams! "-DANDROID_BUILD=1" "-DANDROID_CPP_FEATURES='no-rtti exceptions'" + set __ExtraCmakeParams=!__ExtraCmakeParams! "-DANDROID_PLATFORM=android-!__ANDROID_API_LEVEL!" "-DANDROID_NATIVE_API_LEVEL=!__ANDROID_API_LEVEL!" + + if "%__Arch%" == "x64" ( + set __ExtraCmakeParams=!__ExtraCmakeParams! "-DANDROID_ABI=x86_64" + ) + if "%__Arch%" == "x86" ( + set __ExtraCmakeParams=!__ExtraCmakeParams! "-DANDROID_ABI=x86" + ) + if "%__Arch%" == "arm64" ( + set __ExtraCmakeParams=!__ExtraCmakeParams! "-DANDROID_ABI=arm64-v8a" + ) + if "%__Arch%" == "arm" ( + set __ExtraCmakeParams=!__ExtraCmakeParams! "-DANDROID_ABI=armeabi-v7a" + ) + + set __ExtraCmakeParams=!__ExtraCmakeParams! "-DCMAKE_TOOLCHAIN_FILE='%ANDROID_NDK_ROOT:\=/%/build/cmake/android.toolchain.cmake'" "-C %__repoRoot%/eng/native/tryrun.cmake" +) + :loop if [%6] == [] goto end_loop set __ExtraCmakeParams=%__ExtraCmakeParams% %6 @@ -102,6 +134,7 @@ if not "%__ConfigureOnly%" == "1" ( if /i "%__UseEmcmake%" == "1" ( call "!EMSDK!/emsdk_env" > nul 2>&1 && emcmake "%CMakePath%" %__ExtraCmakeParams% --no-warn-unused-cli -G "%__CmakeGenerator%" -B %__IntermediatesDir% -S %__SourceDir% ) else ( + echo "%CMakePath% %__ExtraCmakeParams% --no-warn-unused-cli -G %__CmakeGenerator% -B %__IntermediatesDir% -S %__SourceDir%" "%CMakePath%" %__ExtraCmakeParams% --no-warn-unused-cli -G "%__CmakeGenerator%" -B %__IntermediatesDir% -S %__SourceDir% ) diff --git a/eng/native/generateversionscript.ps1 b/eng/native/generateversionscript.ps1 new file mode 100644 index 000000000000..945ab6f706a4 --- /dev/null +++ b/eng/native/generateversionscript.ps1 @@ -0,0 +1,34 @@ +param ( + [string]$inputFile, + [string]$prefix +) + +# Print the header +Write-Output "V1.0 {" +Write-Output " global:" + +# Read the input file line by line +Get-Content $inputFile | ForEach-Object { + $line = $_.Trim() + + # Skip empty lines and comment lines starting with semicolon + if ($line -match '^\;.*$' -or $line -match '^[\s]*$') { + return + } + + # Remove the CR character in case the sources are mapped from + # a Windows share and contain CRLF line endings + $line = $line -replace "`r", "" + + # Only prefix the entries that start with "#" + if ($line -match '^#.*$') { + $line = $line -replace '^#', '' + Write-Output " $prefix$line;" + } else { + Write-Output " $line;" + } +} + +# Print the footer +Write-Output " local: *;" +Write-Output "};" \ No newline at end of file diff --git a/eng/native/ijw/IJW.cmake b/eng/native/ijw/IJW.cmake index 4d145a68470a..2529ec596f3b 100644 --- a/eng/native/ijw/IJW.cmake +++ b/eng/native/ijw/IJW.cmake @@ -64,6 +64,10 @@ if (CLR_CMAKE_HOST_WIN32) remove_ijw_incompatible_options("${dirCompileOptions}" dirCompileOptions) set_directory_properties(PROPERTIES COMPILE_OPTIONS "${dirCompileOptions}") + # IJW tests needs to load DLLs from somewhere other than System32 + string(REPLACE "/DEPENDENTLOADFLAG:0x800" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + string(REPLACE "/DEPENDENTLOADFLAG:0x800" "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}") + set(CLR_SDK_REF_PACK_OUTPUT "") set(CLR_SDK_REF_PACK_DISCOVERY_ERROR "") set(CLR_SDK_REF_PACK_DISCOVERY_RESULT 0) diff --git a/eng/native/init-vs-env.cmd b/eng/native/init-vs-env.cmd index 0d28cac9893b..6f417fc23f6d 100644 --- a/eng/native/init-vs-env.cmd +++ b/eng/native/init-vs-env.cmd @@ -53,11 +53,7 @@ set "__VSCOMNTOOLS=" set "VSCMD_START_DIR=" :VSDetected -if "%VisualStudioVersion%"=="17.0" ( - set __VSVersion=vs2022 - set __PlatformToolset=v143 - goto :SetVCEnvironment -) +goto :SetVCEnvironment :VSMissing echo %__MsgPrefix%Error: Visual Studio 2022 with C++ tools required. ^ diff --git a/eng/native/naming.props b/eng/native/naming.props index aa59f00c7903..76480557920d 100644 --- a/eng/native/naming.props +++ b/eng/native/naming.props @@ -30,16 +30,16 @@ - + - + .dll .lib .pdb - + lib .dylib diff --git a/eng/native/signing/auth.json b/eng/native/signing/auth.json index 10f5d5791a7e..a467bd95b555 100644 --- a/eng/native/signing/auth.json +++ b/eng/native/signing/auth.json @@ -1,18 +1,20 @@ { "Version" : "1.0.0", - "AuthenticationType" : "AAD_CERT", - "TenantId" : "72f988bf-86f1-41af-91ab-2d7cd011db47", - "ClientId" : "2234cdec-a13f-4bb2-aa63-04c57fd7a1f9", - "AuthCert" : + "AuthenticationType" : "AAD_MSI_WIF", + "TenantId" : "975f013f-7f24-47e8-a7d3-abc4752bf346", + "ClientId" : "22346933-af99-4e94-97d5-7fa1dcf4bba6", + "EsrpClientId": "22346933-af99-4e94-97d5-7fa1dcf4bba6", + "RequestSigningCert" : { - "SubjectName" : "CN=2234cdec-a13f-4bb2-aa63-04c57fd7a1f9.microsoft.com", - "StoreLocation" : "CurrentUser", - "StoreName": "My", - "SendX5c" : "true" + "GetCertFromKeyVault" : true, + "KeyVaultName": "clrdiag-esrp-pme", + "KeyVaultCertName": "dac-dnceng-esrpclient-cert", + "SendX5c": false, + "WithAzureRegion": false, + "StoreLocation": null, + "StoreName": null, + "SubjectName": null }, - "RequestSigningCert" : { - "SubjectName" : "CN=2234cdec-a13f-4bb2-aa63-04c57fd7a1f9", - "StoreLocation" : "CurrentUser", - "StoreName" : "My" - } + "OAuthToken": null, + "FederatedTokenData": {} } diff --git a/eng/native/signing/config.json b/eng/native/signing/config.json index 95fa7faba4b3..ab1530645a65 100644 --- a/eng/native/signing/config.json +++ b/eng/native/signing/config.json @@ -1,6 +1,6 @@ { "Version" : "1.0.0", - "MaxDegreeOfParallelism" : "50", - "ExponentialRetryCount" : "5", - "EsrpSessionTimeoutInSec" : "1800" + "MaxDegreeOfParallelism" : 50, + "ExponentialRetryCount" : 5, + "EsrpSessionTimeoutInSec" : 1800 } diff --git a/eng/native/tryrun.cmake b/eng/native/tryrun.cmake index e71c9bbd8e96..0887d8428342 100644 --- a/eng/native/tryrun.cmake +++ b/eng/native/tryrun.cmake @@ -1,6 +1,5 @@ set(CROSS_ROOTFS $ENV{ROOTFS_DIR}) set(TARGET_ARCH_NAME $ENV{TARGET_BUILD_ARCH}) -set(ANDROID_BUILD $ENV{ANDROID_BUILD}) # Also allow building as Android without specifying `-cross`. if(NOT DEFINED TARGET_ARCH_NAME AND DEFINED ANDROID_BUILD) @@ -50,8 +49,8 @@ if(NOT DEFINED ANDROID_BUILD) endif() endif() -if(DARWIN) - if(DEFINED ANDROID_BUILD OR TARGET_ARCH_NAME MATCHES "^(arm64|x64)$") +if(DARWIN AND NOT DEFINED ANDROID_BUILD) + if(TARGET_ARCH_NAME MATCHES "^(arm64|x64)$") set_cache_value(HAS_POSIX_SEMAPHORES_EXITCODE 1) set_cache_value(HAVE_BROKEN_FIFO_KEVENT_EXITCODE 1) set_cache_value(HAVE_BROKEN_FIFO_SELECT_EXITCODE 1) @@ -78,7 +77,11 @@ if(DARWIN) else() message(FATAL_ERROR "Arch is ${TARGET_ARCH_NAME}. Only arm64 or x64 is supported for OSX cross build!") endif() +<<<<<<< HEAD elseif(TARGET_ARCH_NAME MATCHES "^(armel|arm|armv6|arm64|loongarch64|riscv64|s390x|ppc64le|x86|x64|wasm)$" OR FREEBSD OR ILLUMOS OR TIZEN OR HAIKU) +======= +elseif(DEFINED ANDROID_BUILD OR TARGET_ARCH_NAME MATCHES "^(armel|arm|armv6|arm64|loongarch64|riscv64|s390x|ppc64le|x86|x64)$" OR FREEBSD OR ILLUMOS OR TIZEN OR HAIKU) +>>>>>>> upstream-jun set_cache_value(HAS_POSIX_SEMAPHORES_EXITCODE 0) set_cache_value(HAVE_CLOCK_MONOTONIC_COARSE_EXITCODE 0) set_cache_value(HAVE_CLOCK_MONOTONIC_EXITCODE 0) @@ -123,8 +126,10 @@ elseif(TARGET_ARCH_NAME MATCHES "^(armel|arm|armv6|arm64|loongarch64|riscv64|s39 set_cache_value(HAVE_CLOCK_MONOTONIC_COARSE_EXITCODE 1) set_cache_value(HAVE_PROCFS_STAT_EXITCODE 1) set_cache_value(HAVE_PROCFS_STATM_EXITCODE 1) + elseif(ANDROID_BUILD) + set_cache_value(ONE_SHARED_MAPPING_PER_FILEREGION_PER_PROCESS 0) endif() -else() +elseif(NOT WIN32) message(FATAL_ERROR "Unsupported platform. OS: ${CMAKE_SYSTEM_NAME}, arch: ${TARGET_ARCH_NAME}") endif() diff --git a/eng/native/version/copy_version_files.ps1 b/eng/native/version/copy_version_files.ps1 new file mode 100644 index 000000000000..7573abcbb11a --- /dev/null +++ b/eng/native/version/copy_version_files.ps1 @@ -0,0 +1,27 @@ +$VersionFolder = $PSScriptRoot +$RepoRoot = (Resolve-Path "$VersionFolder/../../../").Path.TrimEnd("\") + +Get-ChildItem -Path "$VersionFolder" -Filter "_version.*" | ForEach-Object { + $path = $_.FullName + if ($_.Name -eq "_version.c") { + # For _version.c, update the commit ID if it has changed from the last build. + $commit = (git rev-parse HEAD 2>$null) + if (-not $commit) { $commit = "N/A" } + $substitute = "static char sccsid[] __attribute__((used)) = `"@(#)Version N/A @Commit: $commit`";" + $version_file_contents = Get-Content -Path $path | ForEach-Object { $_ -replace "^static.*", $substitute } + $version_file_destination = "$RepoRoot\\artifacts\\obj\\_version.c" + $current_contents = "" + $is_placeholder_file = $false + if (Test-Path -Path $version_file_destination) { + $current_contents = Get-Content -Path $version_file_destination -Raw + $is_placeholder_file = $current_contents -match "@\(#\)Version N/A @Commit:" + } else { + $is_placeholder_file = $true + } + if ($is_placeholder_file -and $version_file_contents -ne $current_contents) { + $version_file_contents | Set-Content -Path $version_file_destination + } + } elseif (-not (Test-Path -Path "$RepoRoot\\artifacts\\obj\\$($_.Name)")) { + Copy-Item -Path $path -Destination "$RepoRoot\\artifacts\\obj\\" + } +} diff --git a/eng/nativepgo.targets b/eng/nativepgo.targets index 99344e20b8e2..9f338d70b96d 100644 --- a/eng/nativepgo.targets +++ b/eng/nativepgo.targets @@ -33,7 +33,7 @@ - + @@ -60,8 +60,4 @@ - - - - diff --git a/eng/packaging.targets b/eng/packaging.targets index c64ccfb2fcc3..eafff4fd012b 100644 --- a/eng/packaging.targets +++ b/eng/packaging.targets @@ -1,309 +1,303 @@ - - - - - true - - true - $(ApiCompatNetCoreAppBaselineVersion) - - $(BeforePack);IncludeAnalyzersInPackage;AddNETStandardCompatErrorFileForPackaging - $(TargetsForTfmSpecificContentInPackage);AddRuntimeSpecificFilesToPackage;IncludeProjectReferencesWithPackAttributeInPackage - - false - true - - true - $(MSBuildThisFileDirectory)useSharedDesignerContext.txt - - - true - PACKAGE.md - PACKAGE.md - $(BeforePack);ValidatePackageReadmeExists - - - true - true - - - false - - - - - true - - $(NoWarn);CP0003 - - - - - - - - - <_FrameworkAssemblyReferences Include="$(MSBuildProjectName)" - TargetFramework="$(NetFrameworkMinimum)" /> - - - - - - - - - - - - $(NoWarn);NU5128 - - - - - - - - - - - - - - - - - - $(TargetDir)$(TargetName).pdb - <_packageTargetRuntime>$(TargetPlatformIdentifier.ToLowerInvariant().Replace('windows', 'win')) - <_targetFrameworkWithoutSuffix>$(TargetFramework) - <_targetFrameworkWithoutSuffix Condition="$(TargetFramework.Contains('-'))">$(TargetFramework.SubString(0, $(TargetFramework.IndexOf('-')))) - - - - - - - <_RuntimeSymbolPath Include="@(TfmRuntimeSpecificPackageFile->'%(RootDir)%(Directory)%(FileName).pdb')" Condition="'%(TfmRuntimeSpecificPackageFile.Extension)' == '.dll'" KeepMetadata="None" /> - - - - - - - - - - - - - - <_TargetPathsToSymbols Include="@(_AnalyzerFile)" TargetPath="/%(_AnalyzerFile.PackagePath)" Condition="%(_AnalyzerFile.IsSymbol)" /> - - - - - <_MultiTargetRoslynComponentTargetsTemplate>$(MSBuildThisFileDirectory)MultiTargetRoslynComponent.targets.template - $(IntermediateOutputPath)MultiTargetRoslynComponent.targets - true - - - - - - - - - - - - - - - - - - <_MultiTargetRoslynComponentTargetPrefix>$(PackageId.Replace('.', '_')) - Disable$(PackageId.Replace('.', ''))SourceGenerator - - - - - - - - - <_NETStandardCompatErrorFilePath>$(BaseIntermediateOutputPath)netstandardcompaterror_%(NETStandardCompatError.Identity).targets - <_NETStandardCompatErrorFileTarget>NETStandardCompatError_$(PackageId.Replace('.', '_'))_$([System.String]::new('%(NETStandardCompatError.Supported)').Replace('.', '_')) - <_NETStandardCompatErrorFileContent> - - - - -]]> - - <_NETStandardCompatErrorPlaceholderFilePackagePath>buildTransitive$([System.IO.Path]::DirectorySeparatorChar)%(NETStandardCompatError.Supported) - - - - - - <_PackageBuildFile Include="@(None->Metadata('PackagePath')); - @(Content->Metadata('PackagePath'))" /> - <_PackageBuildFile PackagePathWithoutFilename="$([System.IO.Path]::GetDirectoryName('%(Identity)'))" /> - - - - - - - - - - <_referringTargetFramework>$(TargetFramework) - <_referringTargetFramework Condition="'$(PackageUsePlatformTargeting)' != 'true' and $(TargetFramework.Contains('-'))">$(TargetFramework.SubString(0, $(TargetFramework.IndexOf('-')))) - - - - - <_projectReferenceCopyLocalPaths Include="@(ReferenceCopyLocalPaths->WithMetadataValue('ReferenceSourceTarget', 'ProjectReference')->WithMetadataValue('Pack', 'true'))" /> - - - - - - - - - - - <_referenceAssemblyPaths Include="@(_projectReferenceCopyLocalPaths->WithMetadataValue('Extension', '.dll')->WithMetadataValue('IncludeReferenceAssemblyInPackage', 'true')->Metadata('ReferenceAssembly'))" /> - <_referenceAssemblyPaths Include="@(_projectReferenceCopyLocalPaths->WithMetadataValue('Extension', '.xml')->WithMetadataValue('IncludeReferenceAssemblyInPackage', 'true'))" /> - - - - - - - - net - netframework - - - - - - - <_BuildOutputPackageFile Include="$(OutputPath)**" - Exclude="$(OutputPath)publish\**; - $(OutputPath)" /> - - - - - - - - - - - - + + + + + true + + true + $(ApiCompatNetCoreAppBaselineVersion) + + $(BeforePack);IncludeAnalyzersInPackage;AddNETStandardCompatErrorFileForPackaging + $(TargetsForTfmSpecificContentInPackage);AddRuntimeSpecificFilesToPackage;IncludeProjectReferencesWithPackAttributeInPackage + + false + true + + true + $(MSBuildThisFileDirectory)useSharedDesignerContext.txt + + + true + PACKAGE.md + PACKAGE.md + $(BeforePack);ValidatePackageReadmeExists + + + true + true + + + false + + + + + + true + $(PackageId) + $(PackageVersion) + + + + + + true + + $(NoWarn);CP0003 + + + + + + + + + <_FrameworkAssemblyReferences Include="$(MSBuildProjectName)" + TargetFramework="$(NetFrameworkMinimum)" /> + + + + + + + + + + + + $(NoWarn);NU5128 + + + + + + + + + + + + + + + + + + $(TargetDir)$(TargetName).pdb + <_packageTargetRuntime>$(TargetPlatformIdentifier.ToLowerInvariant().Replace('windows', 'win')) + <_targetFrameworkWithoutSuffix>$(TargetFramework) + <_targetFrameworkWithoutSuffix Condition="$(TargetFramework.Contains('-'))">$(TargetFramework.SubString(0, $(TargetFramework.IndexOf('-')))) + + + + + + + <_RuntimeSymbolPath Include="@(TfmRuntimeSpecificPackageFile->'%(RootDir)%(Directory)%(FileName).pdb')" Condition="'%(TfmRuntimeSpecificPackageFile.Extension)' == '.dll'" KeepMetadata="None" /> + + + + + + + + + + + + + + <_TargetPathsToSymbols Include="@(_AnalyzerFile)" TargetPath="/%(_AnalyzerFile.PackagePath)" Condition="%(_AnalyzerFile.IsSymbol)" /> + + + + + <_MultiTargetRoslynComponentTargetsTemplate>$(MSBuildThisFileDirectory)MultiTargetRoslynComponent.targets.template + $(IntermediateOutputPath)MultiTargetRoslynComponent.targets + true + + + + + + + + + + + + + <_MultiTargetRoslynComponentTargetPrefix>$(PackageId.Replace('.', '_')) + Disable$(PackageId.Replace('.', ''))SourceGenerator + + + + + + + + + <_NETStandardCompatErrorFilePath>$(BaseIntermediateOutputPath)netstandardcompaterror_%(NETStandardCompatError.Identity).targets + <_NETStandardCompatErrorFileTarget>NETStandardCompatError_$(PackageId.Replace('.', '_'))_$([System.String]::new('%(NETStandardCompatError.Supported)').Replace('.', '_')) + <_NETStandardCompatErrorFileContent> + + + + +]]> + + <_NETStandardCompatErrorPlaceholderFilePackagePath>buildTransitive$([System.IO.Path]::DirectorySeparatorChar)%(NETStandardCompatError.Supported) + + + + + + <_PackageBuildFile Include="@(None->Metadata('PackagePath')); + @(Content->Metadata('PackagePath'))" /> + <_PackageBuildFile PackagePathWithoutFilename="$([System.IO.Path]::GetDirectoryName('%(Identity)'))" /> + + + + + + + + + + <_referringTargetFramework>$(TargetFramework) + <_referringTargetFramework Condition="'$(PackageUsePlatformTargeting)' != 'true' and $(TargetFramework.Contains('-'))">$(TargetFramework.SubString(0, $(TargetFramework.IndexOf('-')))) + + + + + <_projectReferenceCopyLocalPaths Include="@(ReferenceCopyLocalPaths->WithMetadataValue('ReferenceSourceTarget', 'ProjectReference')->WithMetadataValue('Pack', 'true'))" /> + + + + + + + + + + + <_referenceAssemblyPaths Include="@(_projectReferenceCopyLocalPaths->WithMetadataValue('Extension', '.dll')->WithMetadataValue('IncludeReferenceAssemblyInPackage', 'true')->Metadata('ReferenceAssembly'))" /> + <_referenceAssemblyPaths Include="@(_projectReferenceCopyLocalPaths->WithMetadataValue('Extension', '.xml')->WithMetadataValue('IncludeReferenceAssemblyInPackage', 'true'))" /> + + + + + + + + net + netframework + + + + + + + <_BuildOutputPackageFile Include="$(OutputPath)**" + Exclude="$(OutputPath)publish\**; + $(OutputPath)" /> + + + + + + + + + + + + diff --git a/eng/pipelines/common/evaluate-default-paths.yml b/eng/pipelines/common/evaluate-default-paths.yml index b2adcfd16ef0..076c0bceeb92 100644 --- a/eng/pipelines/common/evaluate-default-paths.yml +++ b/eng/pipelines/common/evaluate-default-paths.yml @@ -161,9 +161,9 @@ jobs: - src/coreclr/tools/ILVerification/* - global.json - - subset: tools_cdacreader + - subset: tools_cdac include: - - src/native/managed/cdacreader/* + - src/native/managed/cdac/* - subset: installer include: diff --git a/eng/pipelines/common/global-build-job.yml b/eng/pipelines/common/global-build-job.yml index aa630ae9d46a..da3740844524 100644 --- a/eng/pipelines/common/global-build-job.yml +++ b/eng/pipelines/common/global-build-job.yml @@ -52,9 +52,9 @@ jobs: timeoutInMinutes: ${{ parameters.timeoutInMinutes }} enablePublishTestResults: ${{ parameters.enablePublishTestResults }} testResultsFormat: ${{ parameters.testResultsFormat }} - - ${{ if ne(parameters.templateContext, '') }}: - templateContext: ${{ parameters.templateContext }} + enableMicrobuild: ${{ parameters.isOfficialBuild }} + enableMicrobuildForMacAndLinux: ${{ parameters.isOfficialBuild }} + templateContext: ${{ parameters.templateContext }} artifacts: publish: @@ -90,6 +90,12 @@ jobs: - name: _archParameter value: -arch ${{ parameters.archType }} + - name: _AssetManifestName + value: ${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ parameters.nameSuffix }} + + - name: _SignType + value: $[ coalesce(variables.OfficialSignType, 'real') ] + - ${{ if and(eq(parameters.osGroup, 'linux'), eq(parameters.osSubGroup, '_bionic')) }}: - name: _osParameter value: -os linux-bionic @@ -117,22 +123,16 @@ jobs: - ${{ if ne(parameters.cxxAbiLibrary, '') }}: - name: CxxAbiLibraryArg value: /p:TargetCxxAbiLibrary=${{ parameters.cxxAbiLibrary }} - + - name: TargetCxxLibraryConfigurationArgs value: $(CxxStandardLibraryArg) $(CxxStandardLibraryStaticArg) $(CxxAbiLibraryArg) - name: _officialBuildParameter ${{ if eq(parameters.isOfficialBuild, true) }}: - value: /p:OfficialBuildId=$(Build.BuildNumber) + value: /p:OfficialBuildId=$(Build.BuildNumber) /p:DotNetPublishUsingPipelines=true /p:SignType=$(_SignType) /p:DotNetSignType=$(_SignType) ${{ if ne(parameters.isOfficialBuild, true) }}: value: '' - - name: _buildDarwinFrameworksParameter - ${{ if in(parameters.osGroup, 'ios', 'tvos', 'maccatalyst')}}: - value: /p:BuildDarwinFrameworks=true - ${{ if notin(parameters.osGroup, 'ios', 'tvos', 'maccatalyst')}}: - value: '' - # Set no native sanitizers by default - name: _nativeSanitizersArg value: '' diff --git a/eng/pipelines/common/macos-sign-with-entitlements.yml b/eng/pipelines/common/macos-sign-with-entitlements.yml index 72a03b90f340..6a20a31481eb 100644 --- a/eng/pipelines/common/macos-sign-with-entitlements.yml +++ b/eng/pipelines/common/macos-sign-with-entitlements.yml @@ -30,12 +30,13 @@ steps: - task: EsrpCodeSigning@5 displayName: 'ESRP CodeSigning' inputs: - ConnectedServiceName: 'DotNet-Engineering-Services_KeyVault' - AppRegistrationClientId: '28ec6507-2167-4eaa-a294-34408cf5dd0e' - AppRegistrationTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' - AuthAKVName: 'EngKeyVault' - AuthCertName: 'DotNetCore-ESRP-AuthCert' - AuthSignCertName: 'DotNetCore-ESRP-AuthSignCert' + ConnectedServiceName: 'DotNetBuildESRP' + UseMSIAuthentication: true + EsrpClientId: '28ec6507-2167-4eaa-a294-34408cf5dd0e' + AppRegistrationClientId: '0ecbcdb7-8451-4cbe-940a-4ed97b08b955' + AppRegistrationTenantId: '975f013f-7f24-47e8-a7d3-abc4752bf346' + AuthAKVName: 'DotNetEngKeyVault' + AuthSignCertName: 'DotNet-ESRP-AuthSignCert' FolderPath: '$(Build.ArtifactStagingDirectory)/' Pattern: 'mac_entitled_to_sign.zip' UseMinimatch: true diff --git a/eng/pipelines/common/platform-matrix.yml b/eng/pipelines/common/platform-matrix.yml index c0d705a4cf6f..95eca89ff22b 100644 --- a/eng/pipelines/common/platform-matrix.yml +++ b/eng/pipelines/common/platform-matrix.yml @@ -45,27 +45,6 @@ jobs: crossBuild: true ${{ insert }}: ${{ parameters.jobParameters }} -# Linux armv6 - -- ${{ if containsValue(parameters.platforms, 'linux_armv6') }}: - - template: xplat-setup.yml - parameters: - jobTemplate: ${{ parameters.jobTemplate }} - helixQueuesTemplate: ${{ parameters.helixQueuesTemplate }} - variables: ${{ parameters.variables }} - osGroup: linux - archType: armv6 - targetRid: linux-armv6 - platform: linux_armv6 - shouldContinueOnError: ${{ parameters.shouldContinueOnError }} - container: linux_armv6 - jobParameters: - runtimeFlavor: ${{ parameters.runtimeFlavor }} - buildConfig: ${{ parameters.buildConfig }} - helixQueueGroup: ${{ parameters.helixQueueGroup }} - crossBuild: true - ${{ insert }}: ${{ parameters.jobParameters }} - # Linux arm64 - ${{ if or(containsValue(parameters.platforms, 'linux_arm64'), in(parameters.platformGroup, 'all', 'gcstress')) }}: @@ -348,7 +327,7 @@ jobs: targetRid: linux-x64 platform: linux_x64 shouldContinueOnError: ${{ parameters.shouldContinueOnError }} - container: debian-12-gcc14-amd64 + container: debian-12-gcc15-amd64 jobParameters: runtimeFlavor: ${{ parameters.runtimeFlavor }} buildConfig: ${{ parameters.buildConfig }} diff --git a/eng/pipelines/common/templates/browser-wasm-build-tests.yml b/eng/pipelines/common/templates/browser-wasm-build-tests.yml index 3bcbec8607ca..0963ebed5a53 100644 --- a/eng/pipelines/common/templates/browser-wasm-build-tests.yml +++ b/eng/pipelines/common/templates/browser-wasm-build-tests.yml @@ -25,11 +25,12 @@ jobs: - name: alwaysRunVar value: ${{ parameters.alwaysRun }} - name: workloadsTestPreviousVersionsVar - value: $[ - or( - eq(variables['Build.SourceBranchName'], 'main'), - eq(variables['System.PullRequest.TargetBranch'], 'main')) - ] + value: false +# value: $[ +# or( +# eq(variables['Build.SourceBranchName'], 'main'), +# eq(variables['System.PullRequest.TargetBranch'], 'main')) +# ] - name: wbtProjectArg ${{ if eq(platform, 'browser_wasm_win') }}: value: '-projects' diff --git a/eng/pipelines/common/templates/global-build-step.yml b/eng/pipelines/common/templates/global-build-step.yml index 2a2262d9a4ea..983e27c7422c 100644 --- a/eng/pipelines/common/templates/global-build-step.yml +++ b/eng/pipelines/common/templates/global-build-step.yml @@ -10,7 +10,7 @@ parameters: condition: succeeded() steps: - - script: $(Build.SourcesDirectory)$(dir)build$(scriptExt) -ci ${{ parameters.archParameter }} $(_osParameter) ${{ parameters.crossArg }} ${{ parameters.buildArgs }} ${{ parameters.targetCxxLibraryConfigurationArgs }} $(_officialBuildParameter) $(_buildDarwinFrameworksParameter) $(_overrideTestScriptWindowsCmdParameter) + - script: $(Build.SourcesDirectory)$(dir)build$(scriptExt) -ci ${{ parameters.archParameter }} $(_osParameter) ${{ parameters.crossArg }} ${{ parameters.buildArgs }} ${{ parameters.targetCxxLibraryConfigurationArgs }} $(_officialBuildParameter) $(_overrideTestScriptWindowsCmdParameter) displayName: ${{ parameters.displayName }} ${{ if eq(parameters.useContinueOnErrorDuringBuild, true) }}: continueOnError: ${{ parameters.shouldContinueOnError }} diff --git a/eng/pipelines/common/templates/pipeline-with-resources.yml b/eng/pipelines/common/templates/pipeline-with-resources.yml index 62559ef461e8..97a929160314 100644 --- a/eng/pipelines/common/templates/pipeline-with-resources.yml +++ b/eng/pipelines/common/templates/pipeline-with-resources.yml @@ -21,11 +21,6 @@ extends: env: ROOTFS_DIR: /crossrootfs/arm - linux_armv6: - image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-cross-armv6-raspbian-10 - env: - ROOTFS_DIR: /crossrootfs/armv6 - linux_arm64: image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm64 env: @@ -69,16 +64,17 @@ extends: ROOTFS_DIR: /crossrootfs/x86 linux_x64_dev_innerloop: - image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04 + image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-24.04 linux_musl_x64_dev_innerloop: - image: mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.19-WithNode + image: mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.21-amd64 linux_x64_sanitizer: image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-amd64-sanitizer env: ROOTFS_DIR: /crossrootfs/x64 +<<<<<<< HEAD # We need to be able to use 'tdnf install'. We can't because of https://github.com/dotnet/dotnet-docker/issues/788. # This is the hackaround: https://github.com/microsoft/azure-pipelines-agent/issues/2043#issuecomment-489692810. # TODO-LLVM-Upstream: replace with an image that has our prerequisites (LLDB) already installed. @@ -96,12 +92,15 @@ extends: ROOTFS_DIR: /crossrootfs/arm64 # We use a CentOS Stream 8 image here to test building from source on CentOS Stream 9. +======= + # Used to test RHEL compatibility: CentOS Stream is upstream of RHEL +>>>>>>> upstream-jun SourceBuild_centos_x64: - image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream9 + image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream-10-amd64 - # AlmaLinux 8 is a RHEL 8 rebuild, so we use it to test building from source on RHEL 8. + # Used to test RHEL compatibility: Alma Linux is downstream of RHEL SourceBuild_linux_x64: - image: mcr.microsoft.com/dotnet-buildtools/prereqs:almalinux-8-source-build + image: mcr.microsoft.com/dotnet-buildtools/prereqs:almalinux-9-source-build-amd64 linux_s390x: image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-s390x @@ -123,11 +122,11 @@ extends: env: ROOTFS_DIR: /crossrootfs/loongarch64 - debian-12-gcc14-amd64: - image: mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-gcc14-amd64 + debian-12-gcc15-amd64: + image: mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-gcc15-amd64 linux_x64_llvmaot: - image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8 + image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream-10-amd64 browser_wasm: image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-webassembly-amd64 diff --git a/eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml b/eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml index 26b86483cd8e..f88692190cc2 100644 --- a/eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml +++ b/eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml @@ -27,7 +27,6 @@ parameters: displayName: '' timeoutInMinutes: '' enableMicrobuild: '' - gatherAssetManifests: false shouldContinueOnError: false steps: diff --git a/eng/pipelines/common/templates/runtimes/build-runtime-tests.yml b/eng/pipelines/common/templates/runtimes/build-runtime-tests.yml index a822ccf28fd2..1b1660693d6e 100644 --- a/eng/pipelines/common/templates/runtimes/build-runtime-tests.yml +++ b/eng/pipelines/common/templates/runtimes/build-runtime-tests.yml @@ -11,7 +11,6 @@ parameters: displayName: '' timeoutInMinutes: '' enableMicrobuild: '' - gatherAssetManifests: false shouldContinueOnError: false diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index 551a502ae9ed..88ad3e36cda0 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -190,7 +190,7 @@ jobs: timeoutInMinutes: 390 ${{ if in(parameters.testGroup, 'gcstress-extra', 'r2r-extra', 'pgo', 'pgostress', 'jit-experimental') }}: timeoutInMinutes: 510 - ${{ if in(parameters.testGroup, 'jitstress-isas-x86', 'jitstress-isas-avx512', 'jitstress-isas-sve') }}: + ${{ if in(parameters.testGroup, 'jitstress-isas-x86', 'jitstress-isas-avx512') }}: timeoutInMinutes: 960 steps: @@ -386,6 +386,9 @@ jobs: - jitstress_isas_x86_nosse41 - jitstress_isas_x86_nosse42 - jitstress_isas_x86_nossse3 + - jitstress_isas_x86_vectort128 + - jitstress_isas_x86_vectort512 + - jitstress_isas_x86_noavx512_vectort128 - jitstress_isas_1_x86_noaes - jitstress_isas_1_x86_noavx - jitstress_isas_1_x86_noavx2 @@ -427,9 +430,6 @@ jobs: - jitstress_isas_x86_evex - jitstress_isas_x86_noavx512 - jitstressregs0x2000 - ${{ if in(parameters.testGroup, 'jitstress-isas-sve') }}: - scenarios: - - jitstress_isas_arm64_sve ${{ if in(parameters.testGroup, 'jitstressregs-x86') }}: scenarios: - jitstressregs1_x86_noavx @@ -540,7 +540,6 @@ jobs: - jitpartialcompilation_pgo - jitpartialcompilation_pgo_stress_random - jitoptrepeat - - jitoldlayout ${{ else }}: scenarios: - jitosr_stress @@ -554,7 +553,6 @@ jobs: - jitphysicalpromotion_full - jitrlcse - jitoptrepeat - - jitoldlayout ${{ if in(parameters.testGroup, 'jit-cfg') }}: scenarios: - jitcfg diff --git a/eng/pipelines/common/templates/runtimes/send-to-helix-step.yml b/eng/pipelines/common/templates/runtimes/send-to-helix-step.yml index 989ccdc9319a..918ba5b814aa 100644 --- a/eng/pipelines/common/templates/runtimes/send-to-helix-step.yml +++ b/eng/pipelines/common/templates/runtimes/send-to-helix-step.yml @@ -30,6 +30,7 @@ parameters: runtimeVariant: '' shouldContinueOnError: false SuperPmiCollect: '' + SuperPmiReplayType: '' SuperPmiDiffType: '' SuperPmiBaseJitOptions: '' SuperPmiDiffJitOptions: '' @@ -65,6 +66,7 @@ steps: RuntimeFlavor: ${{ parameters.runtimeFlavor }} _RuntimeVariant: ${{ parameters.runtimeVariant }} _SuperPmiCollect: ${{ parameters.SuperPmiCollect }} + _SuperPmiReplayType: ${{ parameters.SuperPmiReplayType }} _SuperPmiDiffType: ${{ parameters.SuperPmiDiffType }} _SuperPmiBaseJitOptions: ${{ parameters.SuperPmiBaseJitOptions }} _SuperPmiDiffJitOptions: ${{ parameters.SuperPmiDiffJitOptions }} diff --git a/eng/pipelines/common/templates/runtimes/test-variables.yml b/eng/pipelines/common/templates/runtimes/test-variables.yml index b9a5e92dcf2c..7903626a4905 100644 --- a/eng/pipelines/common/templates/runtimes/test-variables.yml +++ b/eng/pipelines/common/templates/runtimes/test-variables.yml @@ -50,7 +50,7 @@ variables: # gc reliability may take up to 2 hours to shutdown. Some scenarios have very long iteration times. - name: timeoutPerTestInMinutes value: 240 - - ${{ if in(parameters.testGroup, 'jitstress', 'jitstress-random', 'jitstress-isas-arm', 'jitstress-isas-x86', 'jitstress-isas-avx512', 'jitstress-isas-sve', 'jitstressregs-x86', 'jitstressregs', 'jitstress2-jitstressregs', 'jitelthookenabled' ) }}: + - ${{ if in(parameters.testGroup, 'jitstress', 'jitstress-random', 'jitstress-isas-arm', 'jitstress-isas-x86', 'jitstress-isas-avx512', 'jitstressregs-x86', 'jitstressregs', 'jitstress2-jitstressregs', 'jitelthookenabled' ) }}: - name: timeoutPerTestCollectionInMinutes value: 120 - name: timeoutPerTestInMinutes diff --git a/eng/pipelines/common/templates/runtimes/xplat-job.yml b/eng/pipelines/common/templates/runtimes/xplat-job.yml index e22f8f968c47..3b7dfa334ed2 100644 --- a/eng/pipelines/common/templates/runtimes/xplat-job.yml +++ b/eng/pipelines/common/templates/runtimes/xplat-job.yml @@ -18,7 +18,6 @@ parameters: displayName: '' timeoutInMinutes: '' enableMicrobuild: '' - gatherAssetManifests: false disableComponentGovernance: '' templatePath: 'templates' @@ -69,11 +68,6 @@ jobs: ${{ else }}: disableComponentGovernance: ${{ parameters.disableComponentGovernance }} - # Setting this results in the arcade job template including a step - # that gathers asset manifests and publishes them to pipeline - # storage. Only relevant for build jobs. - enablePublishBuildAssets: ${{ parameters.gatherAssetManifests }} - artifacts: publish: ${{ if ne(parameters.logsName, '') }}: diff --git a/eng/pipelines/common/templates/simple-wasm-build-tests.yml b/eng/pipelines/common/templates/simple-wasm-build-tests.yml index 273a80fc65ee..ec0678f6463e 100644 --- a/eng/pipelines/common/templates/simple-wasm-build-tests.yml +++ b/eng/pipelines/common/templates/simple-wasm-build-tests.yml @@ -25,11 +25,12 @@ jobs: - name: alwaysRunVar value: ${{ parameters.alwaysRun }} - name: workloadsTestPreviousVersionsVar - value: $[ - or( - eq(variables['Build.SourceBranchName'], 'main'), - eq(variables['System.PullRequest.TargetBranch'], 'main')) - ] + value: false +# value: $[ +# or( +# eq(variables['Build.SourceBranchName'], 'main'), +# eq(variables['System.PullRequest.TargetBranch'], 'main')) +# ] jobParameters: isExtraPlatforms: ${{ parameters.isExtraPlatformsBuild }} testGroup: innerloop diff --git a/eng/pipelines/common/templates/template1es.yml b/eng/pipelines/common/templates/template1es.yml index 31295b1e6bbc..527c81677411 100644 --- a/eng/pipelines/common/templates/template1es.yml +++ b/eng/pipelines/common/templates/template1es.yml @@ -22,9 +22,9 @@ extends: parameters: sdl: codeql: - compiled: + compiled: enabled: false - justificationForDisabling: 'CodeQL is run on the runtime-codeql pipeline' + justificationForDisabling: 'CodeQL is run manually on the runtime-codeql pipeline' credscan: suppressionsFile: $(Build.SourcesDirectory)/.config/CredScanSuppressions.json policheck: diff --git a/eng/pipelines/common/templates/wasm-library-tests.yml b/eng/pipelines/common/templates/wasm-library-tests.yml index 51cf40074ff5..149215ea5019 100644 --- a/eng/pipelines/common/templates/wasm-library-tests.yml +++ b/eng/pipelines/common/templates/wasm-library-tests.yml @@ -80,7 +80,7 @@ jobs: isExtraPlatforms: ${{ parameters.isExtraPlatformsBuild }} testGroup: innerloop nameSuffix: LibraryTests${{ parameters.nameSuffix }} - buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:MonoEnableAssertMessages=true /p:BrowserHost=$(_hostedOs) $(_wasmRunSmokeTestsOnlyArg) $(chromeInstallArg) $(firefoxInstallArg) $(v8InstallArg) ${{ parameters.extraBuildArgs }} + buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:MonoEnableAssertMessages=true /p:BrowserHost=$(_hostedOs) $(_wasmRunSmokeTestsOnlyArg) $(chromeInstallArg) $(firefoxInstallArg) $(v8InstallArg) /maxcpucount:1 ${{ parameters.extraBuildArgs }} timeoutInMinutes: 240 # if !alwaysRun, then: # if this is runtime-wasm (isWasmOnlyBuild): diff --git a/eng/pipelines/coreclr/jitstress-isas-avx512.yml b/eng/pipelines/coreclr/jitstress-isas-avx512.yml index 87b33c847952..2af4e466d569 100644 --- a/eng/pipelines/coreclr/jitstress-isas-avx512.yml +++ b/eng/pipelines/coreclr/jitstress-isas-avx512.yml @@ -1,20 +1,6 @@ # This pipeline only runs on GitHub PRs, not on merges. trigger: none -# Only run on specific changes to the JIT directory that are likely to affect AVX-512. -pr: - branches: - include: - - main - paths: - include: - - src/coreclr/jit/hwintrinsiccodegenxarch.cpp - - src/coreclr/jit/hwintrinsiclistxarch.h - - src/coreclr/jit/hwintrinsicxarch.cpp - - src/coreclr/jit/instrsxarch.h - - src/coreclr/jit/emitxarch.cpp - - src/coreclr/jit/emitxarch.h - schedules: - cron: "30 19 * * 6" displayName: Sat at 11:30 AM (UTC-8:00) diff --git a/eng/pipelines/coreclr/jitstress-isas-sve.yml b/eng/pipelines/coreclr/jitstress-isas-sve.yml deleted file mode 100644 index c5aca49e6232..000000000000 --- a/eng/pipelines/coreclr/jitstress-isas-sve.yml +++ /dev/null @@ -1,39 +0,0 @@ -# This pipeline only runs on GitHub PRs, not on merges. -trigger: none - -# Only run on specific changes to the JIT directory that are likely to affect Sve. -pr: - branches: - include: - - main - paths: - include: - - src/coreclr/jit/codegenarmarch.cpp - - src/coreclr/jit/emitarm64sve.cpp - - src/coreclr/jit/emitfmtsarm64sve.h - - src/coreclr/jit/hwintrinsicarm64.cpp - - src/coreclr/jit/hwintrinsiccodegenarm64.cpp - - src/coreclr/jit/hwintrinsiclistarm64sve.h - - src/coreclr/jit/instrsarm64sve.h - - src/coreclr/jit/lowerarmarch.cpp - - src/coreclr/jit/lsraarmarch.cpp - - src/coreclr/jit/lsraarm64.cpp - -schedules: -- cron: "30 19 * * 6" - displayName: Sat at 11:30 AM (UTC-8:00) - branches: - include: - - main - always: true - -variables: - - template: /eng/pipelines/common/variables.yml - -extends: - template: /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml - parameters: - platforms: - # just run on windows for now, because abi is universal for other platforms - - windows_x64 - testGroup: jitstress-isas-sve diff --git a/eng/pipelines/coreclr/libraries-pgo.yml b/eng/pipelines/coreclr/libraries-pgo.yml index 00a050da0e62..a8f0e16b01f3 100644 --- a/eng/pipelines/coreclr/libraries-pgo.yml +++ b/eng/pipelines/coreclr/libraries-pgo.yml @@ -71,4 +71,3 @@ extends: - syntheticpgo - syntheticpgo_blend - jitrlcse - - jitoldlayout diff --git a/eng/pipelines/coreclr/runtime-nativeaot-outerloop.yml b/eng/pipelines/coreclr/runtime-nativeaot-outerloop.yml index e83192e6f9b1..67e2c54891ab 100644 --- a/eng/pipelines/coreclr/runtime-nativeaot-outerloop.yml +++ b/eng/pipelines/coreclr/runtime-nativeaot-outerloop.yml @@ -69,7 +69,7 @@ extends: testGroup: innerloop isSingleFile: true nameSuffix: NativeAOT_Libs - buildArgs: -s clr.aot+libs+libs.tests -c $(_BuildConfig) /p:TestNativeAot=true /p:ArchiveTests=true /p:IlcUseServerGc=false /p:RunAnalyzers=false + buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) /p:TestNativeAot=true /p:ArchiveTests=true /p:IlcUseServerGc=false /p:RunAnalyzers=false timeoutInMinutes: 300 # doesn't normally take this long, but I've seen Helix queues backed up for 160 minutes includeAllPlatforms: true # extra steps, run tests @@ -95,7 +95,7 @@ extends: testGroup: innerloop isSingleFile: true nameSuffix: NativeAOT_Checked_Libs - buildArgs: -s clr.aot+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:IlcUseServerGc=false /p:RunAnalyzers=false + buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:IlcUseServerGc=false /p:RunAnalyzers=false timeoutInMinutes: 360 # extra steps, run tests postBuildSteps: @@ -120,7 +120,7 @@ extends: testGroup: innerloop isSingleFile: true nameSuffix: NativeAOT_Checked_Libs_SizeOpt - buildArgs: -s clr.aot+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:OptimizationPreference=Size /p:IlcUseServerGc=false /p:RunAnalyzers=false + buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:OptimizationPreference=Size /p:IlcUseServerGc=false /p:RunAnalyzers=false timeoutInMinutes: 240 # extra steps, run tests postBuildSteps: @@ -145,7 +145,7 @@ extends: testGroup: innerloop isSingleFile: true nameSuffix: NativeAOT_Checked_Libs_SpeedOpt - buildArgs: -s clr.aot+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:OptimizationPreference=Speed /p:IlcUseServerGc=false /p:RunAnalyzers=false + buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:OptimizationPreference=Speed /p:IlcUseServerGc=false /p:RunAnalyzers=false timeoutInMinutes: 240 # extra steps, run tests postBuildSteps: @@ -167,6 +167,7 @@ extends: - windows_x86 - linux_x64 - linux_arm + - linux_arm64 variables: - name: timeoutPerTestInMinutes value: 60 @@ -221,4 +222,4 @@ extends: - template: /eng/pipelines/common/templates/runtimes/test-variables.yml parameters: testGroup: innerloop - liveLibrariesBuildConfig: Release \ No newline at end of file + liveLibrariesBuildConfig: Release diff --git a/eng/pipelines/coreclr/superpmi-collect-test.yml b/eng/pipelines/coreclr/superpmi-collect-test.yml index 863a79c1b319..abc3fa9cdf08 100644 --- a/eng/pipelines/coreclr/superpmi-collect-test.yml +++ b/eng/pipelines/coreclr/superpmi-collect-test.yml @@ -1,4 +1,4 @@ -# This job definition automates the SuperPMI collection process. +# This job definition automates the SuperPMI collection process for the "test" pipeline. trigger: none @@ -6,368 +6,4 @@ variables: - template: /eng/pipelines/common/variables.yml extends: - template: /eng/pipelines/common/templates/pipeline-with-resources.yml - parameters: - stages: - - stage: Build - jobs: - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: checked - platforms: - - windows_x64 - - linux_x64 - jobParameters: - testGroup: outerloop - buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true - timeoutInMinutes: 120 - postBuildSteps: - - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/bin - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/helix - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - extraVariablesTemplates: - - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml - parameters: - testGroup: outerloop - disableComponentGovernance: true # No shipping artifacts produced by this pipeline - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: checked - platforms: - - windows_x86 - - windows_arm64 - - osx_arm64 - jobParameters: - testGroup: outerloop - buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true - timeoutInMinutes: 120 - postBuildSteps: - # Build CLR assets for x64 as well as the target as we need an x64 mcs - - template: /eng/pipelines/common/templates/global-build-step.yml - parameters: - buildArgs: -s clr.spmi -c $(_BuildConfig) - archParameter: -arch x64 - displayName: Build SuperPMI - - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/bin - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/helix - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - extraVariablesTemplates: - - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml - parameters: - testGroup: outerloop - disableComponentGovernance: true # No shipping artifacts produced by this pipeline - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: checked - platforms: - - linux_arm - - linux_arm64 - jobParameters: - testGroup: outerloop - buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true - timeoutInMinutes: 120 - postBuildSteps: - # Build CLR assets for x64 as well as the target as we need an x64 mcs - - template: /eng/pipelines/common/templates/global-build-step.yml - parameters: - buildArgs: -s clr.spmi -c $(_BuildConfig) - archParameter: -arch x64 - container: linux_x64 - displayName: Build SuperPMI - - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/bin - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/helix - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - extraVariablesTemplates: - - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml - parameters: - testGroup: outerloop - disableComponentGovernance: true # No shipping artifacts produced by this pipeline - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml - buildConfig: checked - platforms: - - CoreClrTestBuildHost # Either osx_x64 or linux_x64 - jobParameters: - testGroup: outerloop - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: pmi - collectionName: libraries - collectionUpload: false - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: crossgen2 - collectionName: libraries - collectionUpload: false - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: run - collectionName: realworld - collectionUpload: false - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: run - collectionName: benchmarks - collectionUpload: false - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: run_tiered - collectionName: benchmarks - collectionUpload: false - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: run_pgo - collectionName: benchmarks - collectionUpload: false - - # - # Collection of coreclr test run - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: superpmi - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - SuperPmiCollect: true - SuperPmiCollectionUpload: false - unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: nativeaot - collectionName: smoke_tests - collectionUpload: false - - # - # Collection of libraries test run: normal - # Libraries Test Run using Release libraries, and Checked CoreCLR - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/libraries/run-test-job.yml - buildConfig: Release - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: superpmi - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testScope: innerloop - liveRuntimeBuildConfig: Checked - dependsOnTestBuildConfiguration: Release - dependsOnTestArchitecture: x64 - scenarios: - - normal - SuperPmiCollect: true - SuperPmiCollectionName: libraries_tests - SuperPmiCollectionUpload: false - unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked - helixArtifactsName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked - unifiedBuildConfigOverride: checked - - # - # Collection of libraries test run: no_tiered_compilation - # Libraries Test Run using Release libraries, and Checked CoreCLR - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/libraries/run-test-job.yml - buildConfig: Release - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: superpmi - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testScope: innerloop - liveRuntimeBuildConfig: Checked - dependsOnTestBuildConfiguration: Release - dependsOnTestArchitecture: x64 - scenarios: - - no_tiered_compilation - SuperPmiCollect: true - SuperPmiCollectionName: libraries_tests_no_tiered_compilation - SuperPmiCollectionUpload: false - unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked - helixArtifactsName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked - unifiedBuildConfigOverride: checked + template: /eng/pipelines/coreclr/templates/superpmi-collect-pipeline.yml \ No newline at end of file diff --git a/eng/pipelines/coreclr/superpmi-collect.yml b/eng/pipelines/coreclr/superpmi-collect.yml index 3530ff5a958a..5fa3c7692fac 100644 --- a/eng/pipelines/coreclr/superpmi-collect.yml +++ b/eng/pipelines/coreclr/superpmi-collect.yml @@ -1,4 +1,4 @@ -# This job definition automates the SuperPMI collection process. +# This job definition automates the SuperPMI collection process for the "production" pipeline. # Trigger this job if the JIT-EE GUID changes, which invalidates previous SuperPMI # collections. @@ -15,9 +15,6 @@ trigger: # and should not be triggerable from a PR. pr: none -variables: - - template: /eng/pipelines/common/variables.yml - schedules: - cron: "0 17 * * 0" displayName: Sun at 9:00 AM (UTC-8:00) @@ -26,359 +23,8 @@ schedules: - main always: true -extends: - template: /eng/pipelines/common/templates/pipeline-with-resources.yml - parameters: - stages: - - stage: Build - jobs: - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: checked - platforms: - - windows_x64 - - linux_x64 - jobParameters: - testGroup: outerloop - buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true - timeoutInMinutes: 120 - postBuildSteps: - - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/bin - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/helix - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - extraVariablesTemplates: - - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml - parameters: - testGroup: outerloop - disableComponentGovernance: true # No shipping artifacts produced by this pipeline - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: checked - platforms: - - windows_x86 - - windows_arm64 - - osx_arm64 - jobParameters: - testGroup: outerloop - buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true - timeoutInMinutes: 120 - postBuildSteps: - # Build CLR assets for x64 as well as the target as we need an x64 mcs - - template: /eng/pipelines/common/templates/global-build-step.yml - parameters: - buildArgs: -s clr.spmi -c $(_BuildConfig) - archParameter: -arch x64 - displayName: Build SuperPMI - - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/bin - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/helix - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - extraVariablesTemplates: - - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml - parameters: - testGroup: outerloop - disableComponentGovernance: true # No shipping artifacts produced by this pipeline - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: checked - platforms: - - linux_arm - - linux_arm64 - jobParameters: - testGroup: outerloop - buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true - timeoutInMinutes: 120 - postBuildSteps: - # Build CLR assets for x64 as well as the target as we need an x64 mcs - - template: /eng/pipelines/common/templates/global-build-step.yml - parameters: - buildArgs: -s clr.spmi -c $(_BuildConfig) - archParameter: -arch x64 - container: linux_x64 - displayName: Build SuperPMI - - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/bin - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/helix - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - extraVariablesTemplates: - - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml - parameters: - testGroup: outerloop - disableComponentGovernance: true # No shipping artifacts produced by this pipeline - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml - buildConfig: checked - platforms: - - CoreClrTestBuildHost # Either osx_x64 or linux_x64 - jobParameters: - testGroup: outerloop - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: pmi - collectionName: libraries - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: crossgen2 - collectionName: libraries - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: run - collectionName: realworld - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: run - collectionName: benchmarks - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: run_tiered - collectionName: benchmarks - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: run_pgo - collectionName: benchmarks - - # - # Collection of coreclr test run - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml - buildConfig: checked - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: superpmi - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - SuperPmiCollect: true - unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml - buildConfig: checked - platforms: - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_arm64 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testGroup: outerloop - liveLibrariesBuildConfig: Release - collectionType: nativeaot - collectionName: smoke_tests - - # - # Collection of libraries test run: normal - # Libraries Test Run using Release libraries, and Checked CoreCLR - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/libraries/run-test-job.yml - buildConfig: Release - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: superpmi - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testScope: innerloop - liveRuntimeBuildConfig: Checked - dependsOnTestBuildConfiguration: Release - dependsOnTestArchitecture: x64 - scenarios: - - normal - SuperPmiCollect: true - SuperPmiCollectionName: libraries_tests - unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked - helixArtifactsName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked - unifiedBuildConfigOverride: checked +variables: + - template: /eng/pipelines/common/variables.yml - # - # Collection of libraries test run: no_tiered_compilation - # Libraries Test Run using Release libraries, and Checked CoreCLR - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/libraries/run-test-job.yml - buildConfig: Release - platforms: - - osx_arm64 - - linux_arm - - linux_arm64 - - linux_x64 - - windows_x64 - - windows_x86 - - windows_arm64 - helixQueueGroup: superpmi - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - testScope: innerloop - liveRuntimeBuildConfig: Checked - dependsOnTestBuildConfiguration: Release - dependsOnTestArchitecture: x64 - scenarios: - - no_tiered_compilation - SuperPmiCollect: true - SuperPmiCollectionName: libraries_tests_no_tiered_compilation - unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked - helixArtifactsName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked - unifiedBuildConfigOverride: checked +extends: + template: /eng/pipelines/coreclr/templates/superpmi-collect-pipeline.yml diff --git a/eng/pipelines/coreclr/superpmi-diffs.yml b/eng/pipelines/coreclr/superpmi-diffs.yml index c9cae0c63ac3..b6dd016bd3f3 100644 --- a/eng/pipelines/coreclr/superpmi-diffs.yml +++ b/eng/pipelines/coreclr/superpmi-diffs.yml @@ -20,7 +20,9 @@ pr: # If you are changing these and start including eng/common, adjust the Maestro subscriptions # so that this build can block dependency auto-updates (this build is currently ignored) include: + - eng/pipelines/coreclr/superpmi-diffs.yml - src/coreclr/jit/* + - src/coreclr/gcinfo/* variables: - template: /eng/pipelines/common/variables.yml @@ -54,7 +56,7 @@ extends: - windows_x64 - windows_x86 jobParameters: - buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig) + buildArgs: -s clr.alljits+clr.alljitscommunity+clr.spmi -c $(_BuildConfig) postBuildSteps: - template: /eng/pipelines/common/upload-artifact-step.yml parameters: diff --git a/eng/pipelines/coreclr/superpmi-replay-apx.yml b/eng/pipelines/coreclr/superpmi-replay-apx.yml new file mode 100644 index 000000000000..f5e3f0d1adba --- /dev/null +++ b/eng/pipelines/coreclr/superpmi-replay-apx.yml @@ -0,0 +1,19 @@ +trigger: none + +schedules: +- cron: "0 7 * * *" + displayName: Daily at 11:00 PM (UTC-8:00) + branches: + include: + - main + always: true + +variables: + - template: /eng/pipelines/common/variables.yml + +extends: + template: /eng/pipelines/coreclr/templates/jit-replay-pipeline.yml + parameters: + platforms: + - windows_x64 + replayType: apx \ No newline at end of file diff --git a/eng/pipelines/coreclr/superpmi-replay.yml b/eng/pipelines/coreclr/superpmi-replay.yml index d562dbee4555..6c9c15e7f197 100644 --- a/eng/pipelines/coreclr/superpmi-replay.yml +++ b/eng/pipelines/coreclr/superpmi-replay.yml @@ -1,73 +1,28 @@ -# This pipeline only runs on GitHub PRs, not on merges. trigger: none -# Only run on changes to the JIT directory. Don't run if the JIT-EE GUID has changed, -# since there won't be any SuperPMI collections with the new GUID until the collection -# pipeline completes after this PR is merged. pr: branches: include: - main paths: include: - - src/coreclr/jit/* - - src/coreclr/tools/superpmi/* - exclude: - - src/coreclr/inc/jiteeversionguid.h + - src/coreclr/jit/lsra*.* + +schedules: +- cron: "0 7 * * *" + displayName: Daily at 11:00 PM (UTC-8:00) + branches: + include: + - main + always: true variables: - template: /eng/pipelines/common/variables.yml extends: - template: /eng/pipelines/common/templates/pipeline-with-resources.yml + template: /eng/pipelines/coreclr/templates/jit-replay-pipeline.yml parameters: - stages: - # Don't run if the JIT-EE GUID has changed, - # since there won't be any SuperPMI collections with the new GUID until the collection - # pipeline completes after this PR is merged. - - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}: - - stage: EvaluatePaths - displayName: Evaluate Paths - jobs: - - template: /eng/pipelines/common/evaluate-paths-job.yml - parameters: - paths: - - subset: jiteeversionguid - include: - - src/coreclr/inc/jiteeversionguid.h - - - stage: Build - jobs: - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: checked - platforms: - - windows_x64 - - windows_x86 - jobParameters: - buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig) - postBuildSteps: - - template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr - includeRootFolder: false - archiveType: $(archiveType) - tarCompression: $(tarCompression) - archiveExtension: $(archiveExtension) - artifactName: CheckedJIT_$(osGroup)$(osSubgroup)_$(archType) - displayName: JIT and SuperPMI Assets - condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true)) - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/superpmi-replay-job.yml - buildConfig: checked - platforms: - - windows_x64 - - windows_x86 - helixQueueGroup: ci - helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml - jobParameters: - condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true)) + platforms: + - windows_x64 + - windows_x86 + replayType: standard diff --git a/eng/pipelines/coreclr/templates/helix-queues-setup.yml b/eng/pipelines/coreclr/templates/helix-queues-setup.yml index 201bead89062..094b279199d6 100644 --- a/eng/pipelines/coreclr/templates/helix-queues-setup.yml +++ b/eng/pipelines/coreclr/templates/helix-queues-setup.yml @@ -34,11 +34,11 @@ jobs: # iOS Simulator/Mac Catalyst arm64 - ${{ if in(parameters.platform, 'maccatalyst_arm64', 'iossimulator_arm64') }}: - - OSX.1200.Arm64.Open + - OSX.14.Arm64.Open # iOS/tvOS Simulator x64 & MacCatalyst x64 - ${{ if in(parameters.platform, 'iossimulator_x64', 'tvossimulator_x64', 'maccatalyst_x64') }}: - - OSX.1200.Amd64.Open + - OSX.15.Amd64.Open # Android arm64 - ${{ if in(parameters.platform, 'android_arm64') }}: @@ -50,7 +50,7 @@ jobs: # Browser wasm - ${{ if eq(parameters.platform, 'browser_wasm') }}: - - (Ubuntu.2204.Amd64)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-helix-webassembly + - (Ubuntu.2404.Amd64)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-24.04-helix-webassembly-amd64 # iOS devices - ${{ if in(parameters.platform, 'ios_arm64') }}: @@ -63,16 +63,16 @@ jobs: # Linux arm - ${{ if eq(parameters.platform, 'linux_arm') }}: - ${{ if eq(variables['System.TeamProject'], 'public') }}: - - (Debian.12.Arm32.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-helix-arm32v7 + - (Debian.13.Arm32.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-arm32v7 - ${{ if eq(variables['System.TeamProject'], 'internal') }}: - - (Debian.12.Arm32)Ubuntu.2204.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-helix-arm32v7 + - (Debian.13.Arm32)Ubuntu.2204.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-arm32v7 # Linux arm64 - ${{ if eq(parameters.platform, 'linux_arm64') }}: - ${{ if eq(variables['System.TeamProject'], 'public') }}: - - (Ubuntu.2004.Arm64.Open)Ubuntu.2204.Armarch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-helix-arm64v8 + - (AzureLinux.3.0.ArmArch.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-helix-arm64v8 - ${{ if eq(variables['System.TeamProject'], 'internal') }}: - - (Ubuntu.2004.Arm64)Ubuntu.2204.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-helix-arm64v8 + - (AzureLinux.3.0.ArmArch)Ubuntu.2204.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-helix-arm64v8 # Linux musl x64 - ${{ if eq(parameters.platform, 'linux_musl_x64') }}: @@ -105,9 +105,9 @@ jobs: # OSX arm64 - ${{ if eq(parameters.platform, 'osx_arm64') }}: - ${{ if eq(variables['System.TeamProject'], 'public') }}: - - OSX.1200.ARM64.Open + - OSX.13.Arm64.Open - ${{ if eq(variables['System.TeamProject'], 'internal') }}: - - OSX.1200.ARM64 + - OSX.1200.Arm64 # OSX x64 - ${{ if eq(parameters.platform, 'osx_x64') }}: diff --git a/eng/pipelines/coreclr/templates/install-diagnostic-certs.yml b/eng/pipelines/coreclr/templates/install-diagnostic-certs.yml deleted file mode 100644 index 5eed6b2b5fb7..000000000000 --- a/eng/pipelines/coreclr/templates/install-diagnostic-certs.yml +++ /dev/null @@ -1,28 +0,0 @@ -parameters: - isOfficialBuild: false - certNames: [] - vaultName: '' - azureSubscription: '' - scriptRoot: '$(Build.SourcesDirectory)' - -steps: -- ${{ if and(eq(parameters.isOfficialBuild, true), ne(variables['Build.Reason'], 'PullRequest'), or(startswith(variables['Build.SourceBranch'], 'refs/heads/release/'), startswith(variables['Build.SourceBranch'], 'refs/heads/internal/release/'), startswith(variables['Build.SourceBranch'], 'refs/heads/reltest/')), not(endsWith(variables['Build.SourceBranch'], '-staging'))) }}: - - task: AzureKeyVault@2 - inputs: - azureSubscription: ${{ parameters.azureSubscription }} - KeyVaultName: ${{ parameters.vaultName }} - SecretsFilter: ${{ join(',', parameters.certNames) }} - displayName: 'Download secrets: Diagnostic Certificates' - - - task: EsrpClientTool@2 - displayName: Download ESRPClient - - - powershell: | - eng/pipelines/install-diagnostic-certs.ps1 "${{ join(',', parameters.certNames) }}" - $signArgs = '/p:DotNetEsrpToolPath=$(esrpclient.toolpath)\$(esrpclient.toolname)' - echo "##vso[task.setvariable variable=_SignDiagnosticFilesArgs;]$signArgs" - displayName: 'Install diagnostic certificates' - workingDirectory: ${{ parameters.scriptRoot }} - env: - ${{ each cert in parameters.certNames }}: - ${{ cert }}: $(${{ cert }}) diff --git a/eng/pipelines/coreclr/templates/jit-replay-pipeline.yml b/eng/pipelines/coreclr/templates/jit-replay-pipeline.yml new file mode 100644 index 000000000000..74f2caf2ff38 --- /dev/null +++ b/eng/pipelines/coreclr/templates/jit-replay-pipeline.yml @@ -0,0 +1,58 @@ +parameters: + - name: platforms + type: object + - name: replayType + type: string + default: standard + +extends: + template: /eng/pipelines/common/templates/pipeline-with-resources.yml + parameters: + stages: + # Don't run if the JIT-EE GUID has changed, + # since there won't be any SuperPMI collections with the new GUID until the collection + # pipeline completes after this PR is merged. + - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}: + - stage: EvaluatePaths + displayName: Evaluate Paths + jobs: + - template: /eng/pipelines/common/evaluate-paths-job.yml + parameters: + paths: + - subset: jiteeversionguid + include: + - src/coreclr/inc/jiteeversionguid.h + + - stage: Build + jobs: + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + buildConfig: checked + platforms: ${{ parameters.platforms }} + jobParameters: + buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig) + postBuildSteps: + - template: /eng/pipelines/common/upload-artifact-step.yml + parameters: + rootFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr + includeRootFolder: false + archiveType: $(archiveType) + tarCompression: $(tarCompression) + archiveExtension: $(archiveExtension) + artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) + displayName: Build Assets + condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true)) + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-replay-job.yml + buildConfig: checked + platforms: ${{ parameters.platforms }} + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true)) + replayType: ${{ parameters.replayType }} + unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) diff --git a/eng/pipelines/coreclr/templates/remove-diagnostic-certs.yml b/eng/pipelines/coreclr/templates/remove-diagnostic-certs.yml deleted file mode 100644 index c510ea8f177c..000000000000 --- a/eng/pipelines/coreclr/templates/remove-diagnostic-certs.yml +++ /dev/null @@ -1,11 +0,0 @@ -parameters: - isOfficialBuild: false - scriptRoot: '$(Build.SourcesDirectory)' - -steps: -- ${{ if and(eq(parameters.isOfficialBuild, true), ne(variables['Build.Reason'], 'PullRequest'), or(startswith(variables['Build.SourceBranch'], 'refs/heads/release/'), startswith(variables['Build.SourceBranch'], 'refs/heads/internal/release/'), startswith(variables['Build.SourceBranch'], 'refs/heads/reltest/')), not(endsWith(variables['Build.SourceBranch'], '-staging'))) }}: - - powershell: | - eng/pipelines/remove-diagnostic-certs.ps1 "$(DacCertificateThumbprints)" - workingDirectory: ${{ parameters.scriptRoot }} - displayName: 'Remove Diagnostic Certificates' - condition: always() diff --git a/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml index 4047a8cb2f7c..27cc61c653de 100644 --- a/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml +++ b/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml @@ -16,6 +16,7 @@ parameters: enableTelemetry: false # optional -- enable for telemetry liveLibrariesBuildConfig: '' # optional -- live-live libraries configuration to use for the run helixQueues: '' # required -- Helix queues + replayType: 'standard' # required -- 'standard', 'apx' jobs: - template: /eng/pipelines/common/templates/runtimes/xplat-job.yml @@ -47,6 +48,9 @@ jobs: - ${{ each variable in parameters.variables }}: - ${{insert}}: ${{ variable }} + - name: replayType + value: ${{ parameters.replayType }} + - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml parameters: osGroup: ${{ parameters.osGroup }} @@ -74,8 +78,8 @@ jobs: mkdir $(SpmiLogsLocation) displayName: Create directories - - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_replay_setup.py -source_directory $(Build.SourcesDirectory) -product_directory $(buildProductRootFolderPath) -arch $(archType) - displayName: ${{ format('SuperPMI replay setup ({0})', parameters.archType) }} + - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_replay_setup.py -source_directory $(Build.SourcesDirectory) -product_directory $(buildProductRootFolderPath) -type $(replayType) -arch $(archType) + displayName: ${{ format('SuperPMI replay setup ({0} {1})', parameters.replayType, parameters.archType) }} # Run superpmi replay in helix - template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml @@ -93,6 +97,7 @@ jobs: BuildConfig: ${{ parameters.buildConfig }} osGroup: ${{ parameters.osGroup }} archType: ${{ parameters.archType }} + SuperPmiReplayType: ${{ parameters.replayType }} # Always upload the available logs for diagnostics - task: CopyFiles@2 diff --git a/eng/pipelines/coreclr/templates/superpmi-collect-pipeline.yml b/eng/pipelines/coreclr/templates/superpmi-collect-pipeline.yml new file mode 100644 index 000000000000..2ff40b6c22ef --- /dev/null +++ b/eng/pipelines/coreclr/templates/superpmi-collect-pipeline.yml @@ -0,0 +1,363 @@ +# This template definition automates the SuperPMI collection process, +# and is used by the SuperPMI pipeline definitions. + +extends: + template: /eng/pipelines/common/templates/pipeline-with-resources.yml + parameters: + stages: + - stage: Build + jobs: + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + buildConfig: checked + platforms: + - windows_x64 + - linux_x64 + jobParameters: + testGroup: outerloop + buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true + timeoutInMinutes: 120 + postBuildSteps: + - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml + - template: /eng/pipelines/common/upload-artifact-step.yml + parameters: + rootFolder: $(Build.SourcesDirectory)/artifacts/bin + includeRootFolder: false + archiveType: $(archiveType) + archiveExtension: $(archiveExtension) + tarCompression: $(tarCompression) + artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) + - template: /eng/pipelines/common/upload-artifact-step.yml + parameters: + rootFolder: $(Build.SourcesDirectory)/artifacts/helix + includeRootFolder: false + archiveType: $(archiveType) + archiveExtension: $(archiveExtension) + tarCompression: $(tarCompression) + artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) + extraVariablesTemplates: + - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml + parameters: + testGroup: outerloop + disableComponentGovernance: true # No shipping artifacts produced by this pipeline + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + buildConfig: checked + platforms: + - windows_x86 + - windows_arm64 + - osx_arm64 + jobParameters: + testGroup: outerloop + buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true + timeoutInMinutes: 120 + postBuildSteps: + # Build CLR assets for x64 as well as the target as we need an x64 mcs + - template: /eng/pipelines/common/templates/global-build-step.yml + parameters: + buildArgs: -s clr.spmi -c $(_BuildConfig) + archParameter: -arch x64 + displayName: Build SuperPMI + - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml + - template: /eng/pipelines/common/upload-artifact-step.yml + parameters: + rootFolder: $(Build.SourcesDirectory)/artifacts/bin + includeRootFolder: false + archiveType: $(archiveType) + archiveExtension: $(archiveExtension) + tarCompression: $(tarCompression) + artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) + - template: /eng/pipelines/common/upload-artifact-step.yml + parameters: + rootFolder: $(Build.SourcesDirectory)/artifacts/helix + includeRootFolder: false + archiveType: $(archiveType) + archiveExtension: $(archiveExtension) + tarCompression: $(tarCompression) + artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) + extraVariablesTemplates: + - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml + parameters: + testGroup: outerloop + disableComponentGovernance: true # No shipping artifacts produced by this pipeline + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + buildConfig: checked + platforms: + - linux_arm + - linux_arm64 + jobParameters: + testGroup: outerloop + buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true + timeoutInMinutes: 120 + postBuildSteps: + # Build CLR assets for x64 as well as the target as we need an x64 mcs + - template: /eng/pipelines/common/templates/global-build-step.yml + parameters: + buildArgs: -s clr.spmi -c $(_BuildConfig) + archParameter: -arch x64 + container: linux_x64 + displayName: Build SuperPMI + - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml + - template: /eng/pipelines/common/upload-artifact-step.yml + parameters: + rootFolder: $(Build.SourcesDirectory)/artifacts/bin + includeRootFolder: false + archiveType: $(archiveType) + archiveExtension: $(archiveExtension) + tarCompression: $(tarCompression) + artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) + - template: /eng/pipelines/common/upload-artifact-step.yml + parameters: + rootFolder: $(Build.SourcesDirectory)/artifacts/helix + includeRootFolder: false + archiveType: $(archiveType) + archiveExtension: $(archiveExtension) + tarCompression: $(tarCompression) + artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) + extraVariablesTemplates: + - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml + parameters: + testGroup: outerloop + disableComponentGovernance: true # No shipping artifacts produced by this pipeline + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml + buildConfig: checked + platforms: + - CoreClrTestBuildHost # Either osx_x64 or linux_x64 + jobParameters: + testGroup: outerloop + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml + buildConfig: checked + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: outerloop + liveLibrariesBuildConfig: Release + collectionType: pmi + collectionName: libraries + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml + buildConfig: checked + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: outerloop + liveLibrariesBuildConfig: Release + collectionType: crossgen2 + collectionName: libraries + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml + buildConfig: checked + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: outerloop + liveLibrariesBuildConfig: Release + collectionType: run + collectionName: realworld + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml + buildConfig: checked + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: outerloop + liveLibrariesBuildConfig: Release + collectionType: run + collectionName: benchmarks + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml + buildConfig: checked + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: outerloop + liveLibrariesBuildConfig: Release + collectionType: run_pgo + collectionName: benchmarks + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml + buildConfig: checked + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: outerloop + liveLibrariesBuildConfig: Release + collectionType: run_pgo_optrepeat + collectionName: benchmarks + + # + # Collection of coreclr test run + # + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml + buildConfig: checked + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: superpmi + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: outerloop + liveLibrariesBuildConfig: Release + SuperPmiCollect: true + unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig) + + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-collect-job.yml + buildConfig: checked + platforms: + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_arm64 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testGroup: outerloop + liveLibrariesBuildConfig: Release + collectionType: nativeaot + collectionName: smoke_tests + + # + # Collection of libraries test run: normal + # Libraries Test Run using Release libraries, and Checked CoreCLR + # + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/libraries/run-test-job.yml + buildConfig: Release + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: superpmi + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testScope: innerloop + liveRuntimeBuildConfig: Checked + dependsOnTestBuildConfiguration: Release + dependsOnTestArchitecture: x64 + scenarios: + - normal + SuperPmiCollect: true + SuperPmiCollectionName: libraries_tests + unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked + helixArtifactsName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked + unifiedBuildConfigOverride: checked + # Default timeout is 150 minutes, which is too low for osx-arm64 queue. + timeoutInMinutes: 300 + + # + # Collection of libraries test run: no_tiered_compilation + # Libraries Test Run using Release libraries, and Checked CoreCLR + # + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/libraries/run-test-job.yml + buildConfig: Release + platforms: + - osx_arm64 + - linux_arm + - linux_arm64 + - linux_x64 + - windows_x64 + - windows_x86 + - windows_arm64 + helixQueueGroup: superpmi + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + jobParameters: + testScope: innerloop + liveRuntimeBuildConfig: Checked + dependsOnTestBuildConfiguration: Release + dependsOnTestArchitecture: x64 + scenarios: + - no_tiered_compilation + SuperPmiCollect: true + SuperPmiCollectionName: libraries_tests_no_tiered_compilation + unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked + helixArtifactsName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked + unifiedBuildConfigOverride: checked + # Default timeout is 150 minutes, which is too low for osx-arm64 queue. + timeoutInMinutes: 300 diff --git a/eng/pipelines/coreclr/templates/superpmi-replay-job.yml b/eng/pipelines/coreclr/templates/superpmi-replay-job.yml index ea7854339a21..ec364c8d5063 100644 --- a/eng/pipelines/coreclr/templates/superpmi-replay-job.yml +++ b/eng/pipelines/coreclr/templates/superpmi-replay-job.yml @@ -9,12 +9,14 @@ parameters: variables: {} helixQueues: '' runJobTemplate: '/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml' + replayType: 'standard' + unifiedArtifactsName: '' jobs: - template: ${{ parameters.runJobTemplate }} parameters: - jobName: ${{ format('superpmi_replay_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} - displayName: ${{ format('SuperPMI replay {0}{1} {2} {3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} + jobName: ${{ format('superpmi_replay_{0}_{1}{2}_{3}_{4}', parameters.replayType, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} + displayName: ${{ format('SuperPMI replay {0} {1}{2} {3} {4}', parameters.replayType, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} pool: ${{ parameters.pool }} buildConfig: ${{ parameters.buildConfig }} archType: ${{ parameters.archType }} @@ -23,6 +25,7 @@ jobs: condition: ${{ parameters.condition }} timeoutInMinutes: ${{ parameters.timeoutInMinutes }} helixQueues: ${{ parameters.helixQueues }} + replayType: ${{ parameters.replayType }} dependsOn: - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_' @@ -30,11 +33,11 @@ jobs: steps: - # Download jit builds + # Download builds - template: /eng/pipelines/common/download-artifact-step.yml parameters: unpackFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr - artifactFileName: 'CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)$(archiveExtension)' - artifactName: 'CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)' - displayName: 'JIT checked build' + artifactFileName: '${{ parameters.unifiedArtifactsName }}$(archiveExtension)' + artifactName: '${{ parameters.unifiedArtifactsName }}' + displayName: 'unified artifacts' cleanUnpackFolder: false diff --git a/eng/pipelines/diagnostics/runtime-diag-job.yml b/eng/pipelines/diagnostics/runtime-diag-job.yml new file mode 100644 index 000000000000..0c8009aaf8a8 --- /dev/null +++ b/eng/pipelines/diagnostics/runtime-diag-job.yml @@ -0,0 +1,261 @@ +parameters: + buildConfig: '' + name: '' + buildArgs: '' + archType: '' + hostedOs: '' + osGroup: '' + osSubgroup: '' + container: '' + crossBuild: false + variables: [] + targetRid: '' + timeoutInMinutes: '' + dependsOn: [] + # The following parameter is used to specify dependencies on other global build for the same platform. + # We provide this mechanism to allow for global builds to depend on other global builds and use the multiplexing + # that platform-matrix.yml enables. + # Each item can have the following properties: + # - name: The suffix of the job name to depend on. + # - buildConfig: The configuration of the job to depend on. + dependsOnGlobalBuilds: [] + pool: '' + platform: '' + condition: true + useContinueOnErrorDuringBuild: false + shouldContinueOnError: false + isOfficialBuild: false + runtimeFlavor: 'coreclr' + runtimeVariant: '' + helixQueues: '' + enablePublishTestResults: false + testResultsFormat: '' + postBuildSteps: [] + extraVariablesTemplates: [] + preBuildSteps: [] + templatePath: 'templates' + templateContext: '' + disableComponentGovernance: '' + liveRuntimeDir: '' + +jobs: +- template: /eng/common/${{ parameters.templatePath }}/job/job.yml + parameters: + name: ${{ coalesce(parameters.name, parameters.osGroup) }}_${{ parameters.archType }}_${{ parameters.buildConfig }} + pool: ${{ parameters.pool }} + container: ${{ parameters.container }} + condition: and(succeeded(), ${{ parameters.condition }}) + timeoutInMinutes: ${{ parameters.timeoutInMinutes }} + enablePublishTestResults: ${{ parameters.enablePublishTestResults }} + testResultsFormat: ${{ parameters.testResultsFormat }} + + ${{ if ne(parameters.templateContext, '') }}: + templateContext: ${{ parameters.templateContext }} + + artifacts: + publish: + logs: + ${{ if notin(parameters.osGroup, 'browser', 'wasi') }}: + name: Logs_Build_Attempt$(System.JobAttempt)_${{ parameters.osGroup }}_${{ parameters.osSubGroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_${{ parameters.name }} + ${{ if in(parameters.osGroup, 'browser', 'wasi') }}: + name: Logs_Build_Attempt$(System.JobAttempt)_${{ parameters.osGroup }}_${{ parameters.archType }}_${{ parameters.hostedOs }}_${{ parameters.buildConfig }}_${{ parameters.name }} + + # Component governance does not work on musl machines + ${{ if eq(parameters.osSubGroup, '_musl') }}: + disableComponentGovernance: true + ${{ else }}: + disableComponentGovernance: ${{ parameters.disableComponentGovernance }} + + workspace: + clean: all + + ${{ if or(ne(parameters.dependsOn,''), ne(parameters.dependsOnGlobalBuilds,'')) }}: + dependsOn: + - ${{ each build in parameters.dependsOn }}: + - ${{ build }} + - ${{ each globalBuild in parameters.dependsOnGlobalBuilds }}: + - ${{ format('build_{0}{1}_{2}_{3}_{4}', parameters.osGroup, parameters.osSubgroup, parameters.archType, coalesce(globalBuild.buildConfig, parameters.buildConfig), globalBuild.name) }} + + variables: + - ${{ if eq(variables['System.TeamProject'], 'internal') }}: + - group: DotNet-HelixApi-Access + - group: AzureDevOps-Artifact-Feeds-Pats + + - _PhaseName: ${{ coalesce(parameters.name, parameters.osGroup) }}_${{ parameters.archType }}_${{ parameters.buildConfig }} + - _Pipeline_StreamDumpDir: $(Build.SourcesDirectory)/artifacts/tmp/${{ parameters.buildConfig }}/streams + + - _TestArgs: '-test' + - _Cross: '' + + - _buildScript: $(Build.SourcesDirectory)$(dir)build$(scriptExt) + + - ${{ if and(eq(parameters.testOnly, 'true'), eq(parameters.buildOnly, 'true')) }}: + 'error, testOnly and buildOnly cannot be true at the same time': error + + - ${{ if eq(parameters.testOnly, 'true') }}: + - _TestArgs: '-test -skipnative' + + - ${{ if or(eq(parameters.buildOnly, 'true'), eq(parameters.isCodeQLRun, 'true')) }}: + - _TestArgs: '' + + # For testing msrc's and service releases. The RuntimeSourceVersion is either "default" or the service release version to test + - _InternalInstallArgs: '' + - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest'), eq(parameters.isCodeQLRun, 'false')) }}: + - _InternalInstallArgs: + -dotnetruntimeversion '$(DotnetRuntimeVersion)' + -dotnetruntimedownloadversion '$(DotnetRuntimeDownloadVersion)' + -runtimesourcefeed '$(RuntimeFeedUrl)' + -runtimesourcefeedkey '$(RuntimeFeedBase64SasToken)' + + - ${{ each variableTemplate in parameters.extraVariablesTemplates }}: + - template: ${{ variableTemplate.template }} + parameters: + osGroup: ${{ parameters.osGroup }} + osSubgroup: ${{ parameters.osSubgroup }} + archType: ${{ parameters.archType }} + buildConfig: ${{ parameters.buildConfig }} + runtimeFlavor: ${{ parameters.runtimeFlavor }} + runtimeVariant: ${{ parameters.runtimeVariant }} + helixQueues: ${{ parameters.helixQueues }} + targetRid: ${{ parameters.targetRid }} + name: ${{ parameters.name }} + platform: ${{ parameters.platform }} + shouldContinueOnError: ${{ parameters.shouldContinueOnError }} + ${{ if ne(variableTemplate.forwardedParameters, '') }}: + ${{ each parameter in variableTemplate.forwardedParameters }}: + ${{ parameter }}: ${{ parameters[parameter] }} + ${{ if ne(variableTemplate.parameters, '') }}: + ${{ insert }}: ${{ variableTemplate.parameters }} + + - ${{ each variable in parameters.variables }}: + - ${{ variable }} + + steps: + - ${{ if eq(parameters.osGroup, 'windows') }}: + - template: /eng/pipelines/common/templates/disable-vsupdate-or-failfast.yml + + - checkout: diagnostics + clean: true + fetchDepth: $(checkoutFetchDepth) + + - ${{ if and(eq(parameters.isOfficialBuild, true), notin(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator')) }}: + - template: /eng/pipelines/common/restore-internal-tools.yml + + - ${{ if ne(variables['System.TeamProject'], 'public') }}: + - ${{ if and(ne(parameters.osGroup, 'windows'), ne(parameters.hostedOs, 'windows')) }}: + - task: Bash@3 + displayName: Setup Private Feeds Credentials + inputs: + filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.sh + arguments: $(Build.SourcesDirectory)/NuGet.config $Token + env: + Token: $(dn-bot-dnceng-artifact-feeds-rw) + - ${{ else }}: + - task: PowerShell@2 + displayName: Setup Private Feeds Credentials + inputs: + filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.ps1 + arguments: -ConfigFile $(Build.SourcesDirectory)/NuGet.config -Password $Env:Token + env: + Token: $(dn-bot-dnceng-artifact-feeds-rw) + # Run the NuGetAuthenticate task after the internal feeds are added to the nuget.config + # This ensures that creds are set appropriately for all feeds in the config, and that the + # credential provider is installed. + - task: NuGetAuthenticate@1 + + - ${{ if in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator') }}: + - script: $(Build.SourcesDirectory)/eng/common/native/install-dependencies.sh ${{ parameters.osGroup }} + displayName: Install Build Dependencies + + - script: | + du -sh $(Build.SourcesDirectory)/* + df -h + displayName: Disk Usage before Build + + - ${{ if in(parameters.platform, 'browser_wasm_win', 'wasi_wasm_win') }}: + # Update machine certs + - task: PowerShell@2 + displayName: Update machine certs + inputs: + filePath: $(Build.SourcesDirectory)/eng/pipelines/mono/update-machine-certs.ps1 + + - ${{ if ne(parameters.preBuildSteps,'') }}: + - ${{ each preBuildStep in parameters.preBuildSteps }}: + - ${{ if ne(preBuildStep.template, '') }}: + - template: ${{ preBuildStep.template }} + parameters: + osGroup: ${{ parameters.osGroup }} + osSubgroup: ${{ parameters.osSubgroup }} + archType: ${{ parameters.archType }} + buildConfig: ${{ parameters.buildConfig }} + runtimeFlavor: ${{ parameters.runtimeFlavor }} + runtimeVariant: ${{ parameters.runtimeVariant }} + helixQueues: ${{ parameters.helixQueues }} + targetRid: ${{ parameters.targetRid }} + name: ${{ parameters.name }} + platform: ${{ parameters.platform }} + shouldContinueOnError: ${{ parameters.shouldContinueOnError }} + ${{ if ne(preBuildStep.forwardedParameters, '') }}: + ${{ each parameter in preBuildStep.forwardedParameters }}: + ${{ parameter }}: ${{ parameters[parameter] }} + ${{ if ne(preBuildStep.parameters, '') }}: + ${{ insert }}: ${{ preBuildStep.parameters }} + - ${{ else }}: + - ${{ preBuildStep }} + + # Build + - script: $(_buildScript) + -ci + -configuration ${{ parameters.buildConfig }} + -architecture ${{ parameters.archType }} + -privatebuild + -useCdac + -liveRuntimeDir ${{ parameters.liveRuntimeDir }} + $(_TestArgs) + $(_Cross) + $(_InternalInstallArgs) + /p:OfficialBuildId=$(BUILD.BUILDNUMBER) + ${{ if eq(parameters.testOnly, 'true') }}: + displayName: Test + ${{ elseif eq(parameters.buildOnly, 'true') }}: + displayName: Build + ${{ else }}: + displayName: Build / Test + condition: succeeded() + + - ${{ if in(parameters.osGroup, 'osx', 'ios', 'tvos', 'android') }}: + - script: | + du -sh $(Build.SourcesDirectory)/* + df -h + displayName: Disk Usage after Build + condition: always() + + # If intended to send extra steps after regular build add them here. + - ${{ if ne(parameters.postBuildSteps,'') }}: + - ${{ each postBuildStep in parameters.postBuildSteps }}: + - ${{ if ne(postBuildStep.template, '') }}: + - template: ${{ postBuildStep.template }} + parameters: + osGroup: ${{ parameters.osGroup }} + osSubgroup: ${{ parameters.osSubgroup }} + archType: ${{ parameters.archType }} + buildConfig: ${{ parameters.buildConfig }} + runtimeFlavor: ${{ parameters.runtimeFlavor }} + runtimeVariant: ${{ parameters.runtimeVariant }} + helixQueues: ${{ parameters.helixQueues }} + targetRid: ${{ parameters.targetRid }} + name: ${{ parameters.name }} + platform: ${{ parameters.platform }} + shouldContinueOnError: ${{ parameters.shouldContinueOnError }} + ${{ if ne(postBuildStep.forwardedParameters, '') }}: + ${{ each parameter in postBuildStep.forwardedParameters }}: + ${{ parameter }}: ${{ parameters[parameter] }} + ${{ if ne(postBuildStep.parameters, '') }}: + ${{ insert }}: ${{ postBuildStep.parameters }} + - ${{ else }}: + - ${{ postBuildStep }} + + - ${{ if and(eq(parameters.isOfficialBuild, true), eq(parameters.osGroup, 'windows')) }}: + - powershell: ./eng/collect_vsinfo.ps1 -ArchiveRunName postbuild_log + displayName: Collect vslogs on exit + condition: always() diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-android.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-android.yml index 7b9d41475f30..417241706c4c 100644 --- a/eng/pipelines/extra-platforms/runtime-extra-platforms-android.yml +++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-android.yml @@ -83,3 +83,34 @@ jobs: parameters: creator: dotnet-bot testRunNamePrefixSuffix: Mono_$(_BuildConfig) + +# +# Android devices +# Build the whole product using CoreCLR and run libraries tests +# +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml + buildConfig: Release + runtimeFlavor: coreclr + platforms: + - android_arm64 + variables: + # map dependencies variables to local variables + - name: librariesContainsChange + value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ] + - name: coreclrContainsChange + value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'] ] + jobParameters: + testGroup: innerloop + nameSuffix: AllSubsets_CoreCLR + isExtraPlatforms: ${{ parameters.isExtraPlatformsBuild }} + buildArgs: -s clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.packages+libs+libs.tests+host+packs -c $(_BuildConfig) /p:ArchiveTests=true $(_runSmokeTestsOnlyArg) + timeoutInMinutes: 480 + # extra steps, run tests + postBuildSteps: + - template: /eng/pipelines/libraries/helix.yml + parameters: + creator: dotnet-bot + testRunNamePrefixSuffix: CoreCLR_$(_BuildConfig) diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-androidemulator.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-androidemulator.yml index 4badaf93186c..b7b3774f0b5a 100644 --- a/eng/pipelines/extra-platforms/runtime-extra-platforms-androidemulator.yml +++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-androidemulator.yml @@ -118,3 +118,34 @@ jobs: parameters: creator: dotnet-bot testRunNamePrefixSuffix: Mono_$(_BuildConfig) + +# +# Android emulators +# Build the whole product using CoreCLR and run libraries tests +# +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml + buildConfig: Release + runtimeFlavor: coreclr + platforms: + - android_x64 + variables: + # map dependencies variables to local variables + - name: librariesContainsChange + value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ] + - name: coreclrContainsChange + value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'] ] + jobParameters: + testGroup: innerloop + nameSuffix: AllSubsets_CoreCLR + isExtraPlatforms: ${{ parameters.isExtraPlatformsBuild }} + buildArgs: -s clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.packages+libs+libs.tests+host+packs -c $(_BuildConfig) /p:ArchiveTests=true $(_runSmokeTestsOnlyArg) + timeoutInMinutes: 240 + # extra steps, run tests + postBuildSteps: + - template: /eng/pipelines/libraries/helix.yml + parameters: + creator: dotnet-bot + testRunNamePrefixSuffix: CoreCLR_$(_BuildConfig) diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslike.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslike.yml index 47a38df343f8..7f10d317464c 100644 --- a/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslike.yml +++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslike.yml @@ -39,9 +39,9 @@ jobs: isExtraPlatforms: ${{ parameters.isExtraPlatformsBuild }} # Don't trim tests on rolling builds ${{ if eq(variables['isRollingBuild'], true) }}: - buildArgs: -s mono+libs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:DevTeamProvisioning=- /p:RunAOTCompilation=true $(_runSmokeTestsOnlyArg) /p:BuildTestsOnHelix=true /p:EnableAdditionalTimezoneChecks=true /p:UsePortableRuntimePack=false /p:BuildDarwinFrameworks=true /p:IsManualOrRollingBuild=true /p:EnableAggressiveTrimming=false + buildArgs: -s mono+libs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:DevTeamProvisioning=- /p:RunAOTCompilation=true $(_runSmokeTestsOnlyArg) /p:BuildTestsOnHelix=true /p:EnableAdditionalTimezoneChecks=true /p:UsePortableRuntimePack=false /p:IsManualOrRollingBuild=true /p:EnableAggressiveTrimming=false ${{ else }}: - buildArgs: -s mono+libs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:DevTeamProvisioning=- /p:RunAOTCompilation=true $(_runSmokeTestsOnlyArg) /p:BuildTestsOnHelix=true /p:EnableAdditionalTimezoneChecks=true /p:UsePortableRuntimePack=false /p:BuildDarwinFrameworks=true /p:IsManualOrRollingBuild=true /p:EnableAggressiveTrimming=true + buildArgs: -s mono+libs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:DevTeamProvisioning=- /p:RunAOTCompilation=true $(_runSmokeTestsOnlyArg) /p:BuildTestsOnHelix=true /p:EnableAdditionalTimezoneChecks=true /p:UsePortableRuntimePack=false /p:IsManualOrRollingBuild=true /p:EnableAggressiveTrimming=true timeoutInMinutes: 480 # extra steps, run tests postBuildSteps: diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslikesimulator.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslikesimulator.yml index 2cfb553d5dbd..9f483dbbe75e 100644 --- a/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslikesimulator.yml +++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslikesimulator.yml @@ -25,9 +25,7 @@ jobs: platforms: - iossimulator_x64 - tvossimulator_x64 - # don't run tests on arm64 PRs until we can get significantly more devices - - ${{ if eq(variables['isRollingBuild'], true) }}: - - iossimulator_arm64 + - iossimulator_arm64 variables: # map dependencies variables to local variables - name: librariesContainsChange @@ -37,7 +35,7 @@ jobs: jobParameters: testGroup: innerloop nameSuffix: AllSubsets_Mono - buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true $(_runSmokeTestsOnlyArg) /p:RunAOTCompilation=true /p:MonoForceInterpreter=true /p:BuildDarwinFrameworks=true + buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true $(_runSmokeTestsOnlyArg) /p:RunAOTCompilation=true /p:MonoForceInterpreter=true timeoutInMinutes: 180 # extra steps, run tests postBuildSteps: @@ -61,9 +59,7 @@ jobs: platforms: - iossimulator_x64 - tvossimulator_x64 - # don't run tests on arm64 PRs until we can get significantly more devices - - ${{ if eq(variables['isRollingBuild'], true) }}: - - iossimulator_arm64 + - iossimulator_arm64 variables: - ${{ if and(eq(variables['System.TeamProject'], 'public'), eq(variables['Build.Reason'], 'PullRequest')) }}: - name: _HelixSource @@ -108,9 +104,7 @@ jobs: platforms: - iossimulator_x64 - tvossimulator_x64 - # don't run tests on arm64 PRs until we can get significantly more devices - - ${{ if eq(variables['isRollingBuild'], true) }}: - - iossimulator_arm64 + - iossimulator_arm64 variables: - ${{ if and(eq(variables['System.TeamProject'], 'public'), eq(variables['Build.Reason'], 'PullRequest')) }}: - name: _HelixSource diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-maccatalyst.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-maccatalyst.yml index 3eac90bcf956..4ffece996d9c 100644 --- a/eng/pipelines/extra-platforms/runtime-extra-platforms-maccatalyst.yml +++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-maccatalyst.yml @@ -34,7 +34,7 @@ jobs: jobParameters: testGroup: innerloop nameSuffix: AllSubsets_Mono - buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:DevTeamProvisioning=adhoc /p:RunAOTCompilation=true /p:MonoForceInterpreter=true /p:BuildDarwinFrameworks=true + buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:DevTeamProvisioning=adhoc /p:RunAOTCompilation=true /p:MonoForceInterpreter=true timeoutInMinutes: 180 # extra steps, run tests postBuildSteps: @@ -68,7 +68,7 @@ jobs: jobParameters: testGroup: innerloop nameSuffix: AllSubsets_Mono_AppSandbox - buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true $(_runSmokeTestsOnlyArg) /p:DevTeamProvisioning=adhoc /p:RunAOTCompilation=true /p:MonoForceInterpreter=true /p:BuildDarwinFrameworks=true /p:EnableAppSandbox=true + buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true $(_runSmokeTestsOnlyArg) /p:DevTeamProvisioning=adhoc /p:RunAOTCompilation=true /p:MonoForceInterpreter=true /p:EnableAppSandbox=true timeoutInMinutes: 180 # extra steps, run tests postBuildSteps: diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml index e47cb4996cc7..d4c755945c03 100644 --- a/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml +++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml @@ -40,7 +40,7 @@ jobs: eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true), eq(variables['isRollingBuild'], true)) -# Run net48 tests on win-x64 +# Run net481 tests on win-x64 - template: /eng/pipelines/common/platform-matrix.yml parameters: jobTemplate: /eng/pipelines/common/global-build-job.yml @@ -49,16 +49,16 @@ jobs: - windows_x64 helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml jobParameters: - framework: net48 - buildArgs: -s tools+libs+libs.tests -framework net48 -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true - nameSuffix: Libraries_NET48 + framework: net481 + buildArgs: -s tools+libs+libs.tests -framework net481 -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true + nameSuffix: Libraries_NET481 timeoutInMinutes: 150 postBuildSteps: - template: /eng/pipelines/libraries/helix.yml parameters: creator: dotnet-bot - testRunNamePrefixSuffix: NET48_$(_BuildConfig) - extraHelixArguments: /p:BuildTargetFramework=net48 + testRunNamePrefixSuffix: NET481_$(_BuildConfig) + extraHelixArguments: /p:BuildTargetFramework=net481 isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }} condition: >- or( diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml index f0b197b68db9..dd7f5d2a6382 100644 --- a/eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml +++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml @@ -74,6 +74,22 @@ jobs: scenarios: - WasmTestOnChrome + # Library tests with full threading + - template: /eng/pipelines/common/templates/wasm-library-tests.yml + parameters: + platforms: + - browser_wasm + #- browser_wasm_win + nameSuffix: _Threading + extraBuildArgs: /p:WasmEnableThreads=true /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS) + extraHelixArguments: /p:WasmEnableThreads=true + isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }} + isWasmOnlyBuild: ${{ parameters.isWasmOnlyBuild }} + alwaysRun: ${{ parameters.isWasmOnlyBuild }} + shouldRunSmokeOnly: onLibrariesAndIllinkChanges + scenarios: + - WasmTestOnChrome + # EAT Library tests - only run on linux - template: /eng/pipelines/common/templates/wasm-library-aot-tests.yml parameters: @@ -162,7 +178,6 @@ jobs: - browser_wasm_win nameSuffix: MultiThreaded extraBuildArgs: /p:WasmEnableThreads=true /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS) - condition: ne(variables['wasmMultiThreadedBuildOnlyNeededOnDefaultPipeline'], true) publishArtifactsForWorkload: true publishWBT: false diff --git a/eng/pipelines/install-diagnostic-certs.ps1 b/eng/pipelines/install-diagnostic-certs.ps1 deleted file mode 100644 index 74d3c43f7573..000000000000 --- a/eng/pipelines/install-diagnostic-certs.ps1 +++ /dev/null @@ -1,32 +0,0 @@ -[CmdletBinding()] -param( - [string] - [Parameter(Mandatory)] - $certList -) -# Required for the pipeline logging functions -$ci = $true -. $PSScriptRoot/../common/pipeline-logging-functions.ps1 - -$certs = $certList -split ',' -$thumbprints = @() -$certCollection = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2Collection -foreach ($cert in $certs) -{ - $certBytes = [System.Convert]::FromBase64String($(Get-Item "Env:$cert").Value) - $certCollection.Import($certBytes,$null, [System.Security.Cryptography.X509Certificates.X509KeyStorageFlags]::PersistKeySet) -} - -foreach ($cert in $certCollection) -{ - Write-Host "Installed certificate '$($cert.Thumbprint)' with subject: '$($cert.Subject)'" - $thumbprints += $cert.Thumbprint -} - -$store = Get-Item -Path Cert:\CurrentUser\My -$store.Open([System.Security.Cryptography.X509Certificates.OpenFlags]::ReadWrite) -$store.AddRange($certCollection) -$store.Close() - -Write-PipelineSetVariable -name "DacCertificateThumbprints" -Value "$($thumbprints -join ',')" -IsMultiJobVariable $false -Write-Host "Successfully installed diagnostic certificates" diff --git a/eng/pipelines/installer/helix-queues-setup.yml b/eng/pipelines/installer/helix-queues-setup.yml new file mode 100644 index 000000000000..65340b9ae8bc --- /dev/null +++ b/eng/pipelines/installer/helix-queues-setup.yml @@ -0,0 +1,66 @@ +parameters: + jobTemplate: '' + variables: [] + osGroup: '' + osSubgroup: '' + archType: '' + container: '' + pool: '' + platform: '' + shouldContinueOnError: false + jobParameters: {} + +jobs: +- template: ${{ parameters.jobTemplate }} + parameters: + variables: ${{ parameters.variables }} + osGroup: ${{ parameters.osGroup }} + osSubgroup: ${{ parameters.osSubgroup }} + archType: ${{ parameters.archType }} + container: ${{ parameters.container }} + pool: ${{ parameters.pool }} + platform: ${{ parameters.platform }} + shouldContinueOnError: ${{ parameters.shouldContinueOnError }} + helixQueues: + + # Linux arm + - ${{ if eq(parameters.platform, 'linux_arm') }}: + - (Debian.13.Arm32.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-arm32v7 + + # Linux arm64 + - ${{ if eq(parameters.platform, 'linux_arm64') }}: + - (Ubuntu.2504.Arm64.Open)Ubuntu.2204.Armarch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-25.04-helix-arm64v8 + + # Linux musl x64 + - ${{ if eq(parameters.platform, 'linux_musl_x64') }}: + - (Alpine.321.Amd64.Open)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.21-helix-amd64 + + # Linux musl arm64 + - ${{ if and(eq(parameters.platform, 'linux_musl_arm64'), or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true))) }}: + - (Alpine.321.Arm64.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.21-helix-arm64v8 + + # Linux x64 + - ${{ if eq(parameters.platform, 'linux_x64') }}: + - AzureLinux.3.Amd64.Open + + # OSX arm64 + - ${{ if eq(parameters.platform, 'osx_arm64') }}: + - osx.13.Arm64.Open + + # OSX x64 + - ${{ if eq(parameters.platform, 'osx_x64') }}: + - OSX.1200.Amd64.Open + + # windows x64 + - ${{ if eq(parameters.platform, 'windows_x64') }}: + - Windows.11.Amd64.Client.Open + + # windows x86 + - ${{ if eq(parameters.platform, 'windows_x86') }}: + - Windows.11.Amd64.Client.Open + + # windows arm64 + - ${{ if eq(parameters.platform, 'windows_arm64') }}: + - Windows.11.Arm64.Open + + ${{ insert }}: ${{ parameters.jobParameters }} diff --git a/eng/pipelines/installer/helix.yml b/eng/pipelines/installer/helix.yml new file mode 100644 index 000000000000..6812c87c318d --- /dev/null +++ b/eng/pipelines/installer/helix.yml @@ -0,0 +1,29 @@ +parameters: + archType: '' + osGroup: '' + creator: '' + helixQueues: '' + +steps: + - template: /eng/common/templates/steps/send-to-helix.yml + parameters: + DisplayNamePrefix: Send to Helix + HelixProjectPath: src/installer/tests/helixpublish.proj + ${{ if ne(parameters.osGroup, 'windows') }}: + HelixProjectArguments: >- + --ci + /p:Configuration=$(_BuildConfig) + /p:TargetArchitecture=${{ parameters.archType }} + /p:TargetOS=${{ parameters.osGroup }} + ${{ if eq(parameters.osGroup, 'windows') }}: + HelixProjectArguments: >- + -ci + /p:Configuration=$(_BuildConfig) + /p:TargetArchitecture=${{ parameters.archType }} + /p:TargetOS=${{ parameters.osGroup }} + Creator: ${{ parameters.creator }} + ${{ if eq(variables['System.TeamProject'], 'internal') }}: + HelixAccessToken: $(HelixApiAccessToken) + HelixTargetQueues: ${{ replace(lower(join('+', parameters.helixQueues)), '.open', '') }} + ${{ if eq(variables['System.TeamProject'], 'public') }}: + HelixTargetQueues: ${{ join('+', parameters.helixQueues) }} \ No newline at end of file diff --git a/eng/pipelines/installer/steps/upload-job-artifacts.yml b/eng/pipelines/installer/steps/upload-job-artifacts.yml deleted file mode 100644 index 076f989780bb..000000000000 --- a/eng/pipelines/installer/steps/upload-job-artifacts.yml +++ /dev/null @@ -1,26 +0,0 @@ -parameters: - name: '' - -steps: -# Upload binaries and symbols on failure to allow debugging issues -- task: CopyFiles@2 - displayName: Prepare binaries to publish - inputs: - SourceFolder: '$(Build.SourcesDirectory)/artifacts/bin' - Contents: | - */corehost/** - */corehost_test/** - TargetFolder: '$(Build.StagingDirectory)/Binaries' - continueOnError: true - condition: failed() - -- template: /eng/pipelines/common/upload-artifact-step.yml - parameters: - rootFolder: '$(Build.StagingDirectory)/Binaries' - includeRootFolder: false - archiveType: $(archiveType) - archiveExtension: $(archiveExtension) - tarCompression: $(tarCompression) - artifactName: 'Installer-Binaries-${{ parameters.name }}-$(_BuildConfig)' - displayName: 'Binaries' - condition: failed() diff --git a/eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml b/eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml index 1f7a53fe6494..e179e73e6ac7 100644 --- a/eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml +++ b/eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml @@ -22,7 +22,7 @@ extends: jobs: - job: windows displayName: Build & Deploy to OneFuzz - timeoutInMinutes: 120 + timeoutInMinutes: 240 pool: name: $(DncEngInternalBuildPool) demands: ImageOverride -equals windows.vs2022.amd64 @@ -169,4 +169,12 @@ extends: onefuzzDropDirectory: $(fuzzerProject)/deployment/Utf8JsonWriterFuzzer SYSTEM_ACCESSTOKEN: $(System.AccessToken) displayName: Send Utf8JsonWriterFuzzer to OneFuzz + + - task: onefuzz-task@0 + inputs: + onefuzzOSes: 'Windows' + env: + onefuzzDropDirectory: $(fuzzerProject)/deployment/ZipArchiveFuzzer + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + displayName: Send ZipArchiveFuzzer to OneFuzz # ONEFUZZ_TASK_WORKAROUND_END diff --git a/eng/pipelines/libraries/helix-queues-setup.yml b/eng/pipelines/libraries/helix-queues-setup.yml index 46f897e80b25..3968c90380f2 100644 --- a/eng/pipelines/libraries/helix-queues-setup.yml +++ b/eng/pipelines/libraries/helix-queues-setup.yml @@ -28,63 +28,48 @@ jobs: - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: - (Debian.13.Arm32.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-arm32v7 - # Linux armv6 - - ${{ if eq(parameters.platform, 'linux_armv6') }}: - - (Raspbian.10.Armv6.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:raspbian-10-helix-arm32v6 - # Linux arm64 - ${{ if eq(parameters.platform, 'linux_arm64') }}: - - (Ubuntu.2410.Arm64.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-24.10-helix-arm64v8 + - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: + - (Ubuntu.2504.ArmArch.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-25.04-helix-arm64v8 - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: - - (Debian.13.Arm64.Open)Ubuntu.2204.Armarch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-arm64v8 + - (AzureLinux.3.0.ArmArch.Open)Ubuntu.2204.ArmArch.open@mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-helix-arm64v8 # Linux musl x64 - ${{ if eq(parameters.platform, 'linux_musl_x64') }}: - - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: - - (Alpine.321.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.21-helix-amd64 - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: - - (Alpine.321.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.21-helix-amd64 + - (Alpine.edge.Amd64.Open)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-edge-helix-amd64 + - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: + - (Alpine.322.Amd64.Open)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.22-helix-amd64 # Linux musl arm64 - - ${{ if and(eq(parameters.platform, 'linux_musl_arm64'), or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true))) }}: - - (Alpine.321.Arm64.Open)ubuntu.2204.armarch.open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.21-helix-arm64v8 + - ${{ if eq(parameters.platform, 'linux_musl_arm64') }}: + - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: + - (Alpine.321.Arm64.Open)Ubuntu.2204.ArmArch.open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.21-helix-arm64v8 # Linux x64 - ${{ if eq(parameters.platform, 'linux_x64') }}: - ${{ if and(eq(parameters.jobParameters.interpreter, ''), ne(parameters.jobParameters.isSingleFile, true)) }}: - ${{ if and(eq(parameters.jobParameters.testScope, 'outerloop'), eq(parameters.jobParameters.runtimeFlavor, 'mono')) }}: - - SLES.15.Amd64.Open - - (Centos.9.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream9-helix - - (Fedora.41.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:fedora-41-helix - - (Ubuntu.2204.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-helix-amd64 - - (Debian.13.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-amd64 + - (AzureLinux.3.0.Amd64.Open)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-helix-amd64 + - (Debian.13.Amd64.Open)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-amd64 + - (Ubuntu.2510.Amd64.Open)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-25.10-helix-amd64 - ${{ if or(ne(parameters.jobParameters.testScope, 'outerloop'), ne(parameters.jobParameters.runtimeFlavor, 'mono')) }}: - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: - - SLES.15.Amd64.Open - - (Fedora.41.Amd64.Open)ubuntu.2204.amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:fedora-41-helix - - Ubuntu.2204.Amd64.Open - - (Debian.13.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-amd64 - - (AzureLinux.3.0.Amd64.Open)Ubuntu.2204.Amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-helix-amd64 - - (openSUSE.15.6.Amd64.Open)Ubuntu.2204.Amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:opensuse-15.6-helix-amd64 + - (Fedora.42.Amd64.Open)AzureLinux.3.Amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:fedora-42-helix-amd64 + - (openSUSE.15.6.Amd64.Open)AzureLinux.3.Amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:opensuse-15.6-helix-amd64 - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}: - - (Centos.9.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream9-helix - - (Debian.13.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-amd64 + # inner loop default - Ubuntu.2204.Amd64.Open + - (AzureLinux.3.0.Amd64.Open)AzureLinux.3.Amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-helix-amd64 + - (Centos.10.Amd64.Open)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream-10-helix-amd64 - ${{ if or(eq(parameters.jobParameters.interpreter, 'true'), eq(parameters.jobParameters.isSingleFile, true)) }}: # Limiting interp runs as we don't need as much coverage. - - (Debian.13.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-amd64 - - # Linux s390x - - ${{ if eq(parameters.platform, 'linux_s390x') }}: - - Ubuntu.2004.S390X.Experimental.Open - - # Linux PPC64le - - ${{ if eq(parameters.platform, 'linux_ppc64le') }}: - - Ubuntu.2204.PPC64le.Experimental.Open + - (Debian.13.Amd64.Open)AzureLinux.3.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-amd64 # OSX arm64 - ${{ if eq(parameters.platform, 'osx_arm64') }}: - - OSX.1200.ARM64.Open + - osx.13.arm64.open # OSX x64 - ${{ if eq(parameters.platform, 'osx_x64') }}: @@ -98,11 +83,11 @@ jobs: # iOS Simulator/Mac Catalyst arm64 - ${{ if in(parameters.platform, 'maccatalyst_arm64', 'iossimulator_arm64') }}: - - OSX.1200.Arm64.Open + - OSX.14.Arm64.Open # iOS/tvOS Simulator x64 & MacCatalyst x64 - ${{ if in(parameters.platform, 'iossimulator_x64', 'tvossimulator_x64', 'maccatalyst_x64') }}: - - OSX.1200.Amd64.Open + - OSX.15.Amd64.Open # iOS devices - ${{ if in(parameters.platform, 'ios_arm64') }}: @@ -115,7 +100,7 @@ jobs: # windows x64 - ${{ if eq(parameters.platform, 'windows_x64') }}: # netcoreapp - - ${{ if notIn(parameters.jobParameters.framework, 'net48') }}: + - ${{ if notIn(parameters.jobParameters.framework, 'net481') }}: # libraries on mono outerloop - ${{ if and(eq(parameters.jobParameters.testScope, 'outerloop'), eq(parameters.jobParameters.runtimeFlavor, 'mono')) }}: - Windows.Amd64.Server2022.Open @@ -137,14 +122,14 @@ jobs: - (Windows.Nano.1809.Amd64.Open)windows.10.amd64.serverrs5.open@mcr.microsoft.com/dotnet-buildtools/prereqs:nanoserver-1809-helix-amd64 # .NETFramework - - ${{ if eq(parameters.jobParameters.framework, 'net48') }}: + - ${{ if eq(parameters.jobParameters.framework, 'net481') }}: - Windows.11.Amd64.Client.Open # windows x86 - ${{ if eq(parameters.platform, 'windows_x86') }}: # netcoreapp - - ${{ if notIn(parameters.jobParameters.framework, 'net48') }}: + - ${{ if notIn(parameters.jobParameters.framework, 'net481') }}: # mono outerloop - ${{ if and(eq(parameters.jobParameters.testScope, 'outerloop'), eq(parameters.jobParameters.runtimeFlavor, 'mono')) }}: - Windows.11.Amd64.Client.Open @@ -155,7 +140,7 @@ jobs: - Windows.11.Amd64.Client.Open # .NETFramework - - ${{ if eq(parameters.jobParameters.framework, 'net48') }}: + - ${{ if eq(parameters.jobParameters.framework, 'net481') }}: - Windows.10.Amd64.Client.Open # windows arm64 diff --git a/eng/pipelines/libraries/outerloop-mono.yml b/eng/pipelines/libraries/outerloop-mono.yml index 329de2ea9267..929a02ed0804 100644 --- a/eng/pipelines/libraries/outerloop-mono.yml +++ b/eng/pipelines/libraries/outerloop-mono.yml @@ -6,6 +6,7 @@ schedules: branches: include: - main + always: false # run only if there were changes since the last successful scheduled run. variables: - template: variables.yml diff --git a/eng/pipelines/libraries/outerloop.yml b/eng/pipelines/libraries/outerloop.yml index 597f298c37a3..afc38926ea35 100644 --- a/eng/pipelines/libraries/outerloop.yml +++ b/eng/pipelines/libraries/outerloop.yml @@ -6,7 +6,8 @@ schedules: branches: include: - main - - release/*.* + - release/*.0 + always: false # run only if there were changes since the last successful scheduled run. variables: - template: variables.yml @@ -92,10 +93,10 @@ extends: - ${{ if eq(variables['isRollingBuild'], true) }}: - windows_x64 jobParameters: - framework: net48 + framework: net481 testScope: outerloop - nameSuffix: NET48 - buildArgs: -s libs+libs.tests -c $(_BuildConfig) -testscope outerloop /p:ArchiveTests=true -f net48 + nameSuffix: NET481 + buildArgs: -s libs+libs.tests -c $(_BuildConfig) -testscope outerloop /p:ArchiveTests=true -f net481 timeoutInMinutes: 180 includeAllPlatforms: ${{ variables['isRollingBuild'] }} # extra steps, run tests @@ -104,4 +105,4 @@ extends: parameters: testScope: outerloop creator: dotnet-bot - extraHelixArguments: /p:BuildTargetFramework=net48 + extraHelixArguments: /p:BuildTargetFramework=net481 diff --git a/eng/pipelines/libraries/stress/http.yml b/eng/pipelines/libraries/stress/http.yml index 257334fce3e9..e083bbac2a6e 100644 --- a/eng/pipelines/libraries/stress/http.yml +++ b/eng/pipelines/libraries/stress/http.yml @@ -8,11 +8,11 @@ pr: schedules: - cron: "0 13 * * *" # 1PM UTC => 5 AM PST displayName: HttpStress nightly run + always: true branches: include: - main - - release/8.0 - - release/9.0 + - release/*-staging variables: - template: ../variables.yml diff --git a/eng/pipelines/libraries/stress/ssl.yml b/eng/pipelines/libraries/stress/ssl.yml index 360e67a86c98..eb2088242dcd 100644 --- a/eng/pipelines/libraries/stress/ssl.yml +++ b/eng/pipelines/libraries/stress/ssl.yml @@ -8,11 +8,11 @@ pr: schedules: - cron: "0 13 * * *" # 1PM UTC => 5 AM PST displayName: SslStress nightly run + always: true branches: include: - main - - release/8.0 - - release/9.0 + - release/*-staging variables: - template: ../variables.yml diff --git a/eng/pipelines/performance/perf-build.yml b/eng/pipelines/performance/perf-build.yml new file mode 100644 index 000000000000..ef418994581d --- /dev/null +++ b/eng/pipelines/performance/perf-build.yml @@ -0,0 +1,230 @@ +parameters: +- name: runPrivateJobs + displayName: Upload artifacts to blob storage + type: boolean + default: false +- name: mauiFramework + type: string + default: 'net9.0' +- name: coreclr_arm64_linux + displayName: Build Coreclr Arm64 Linux + type: boolean + default: true +- name: coreclr_arm64_windows + displayName: Build Coreclr Arm64 Windows + type: boolean + default: true +- name: coreclr_muslx64_linux + displayName: Build Coreclr Musl x64 Linux + type: boolean + default: true +- name: coreclr_x64_linux + displayName: Build Coreclr x64 Linux + type: boolean + default: true +- name: coreclr_x64_windows + displayName: Build Coreclr x64 Windows + type: boolean + default: true +- name: coreclr_x86_windows + displayName: Build Coreclr x86 Windows + type: boolean + default: true +- name: coreclr_arm64_android + displayName: Build Coreclr Arm64 Android + type: boolean + default: true +- name: wasm + displayName: Build WebAssembly (wasm) + type: boolean + default: true +- name: monoAot_arm64_linux + displayName: Build Mono AOT Arm64 Linux + type: boolean + default: true +- name: monoAot_x64_linux + displayName: Build Mono AOT x64 Linux + type: boolean + default: true +- name: mono_x64_linux + displayName: Build Mono x64 Linux + type: boolean + default: true +- name: mono_arm64_linux + displayName: Build Mono Arm64 Linux + type: boolean + default: true +- name: mono_arm64_android + displayName: Build Mono Arm64 Android + type: boolean + default: true +- name: mono_arm64_ios + displayName: Build Mono Arm64 iOS + type: boolean + default: true +- name: monoBDN_arm64_android + displayName: Build Mono Arm64 Android BDN (Not working) + type: boolean + default: false # currently not working +- name: nativeAot_arm64_ios + displayName: Build native AOT Arm64 iOS + type: boolean + default: true + +trigger: + batch: false # we want to build every single commit + branches: + include: + - main + - release/9.0 + - release/8.0 + paths: + include: + - '*' + # do not exclude any paths, as we want to build every single commit + +resources: + repositories: + - repository: performance + type: git + name: internal/dotnet-performance + # if you need to specify explicit branch use 'ref:' - for example ref: ekharion/store-latest-build + +variables: + - template: /eng/pipelines/common/variables.yml + +extends: + template: /eng/pipelines/common/templates/pipeline-with-resources.yml + parameters: + stages: + - ${{ if and(ne(variables['System.TeamProject'], 'public'), or(eq(variables['Build.Reason'], 'IndividualCI'), parameters.runPrivateJobs)) }}: + - stage: RegisterBuild + displayName: 'Register Build' + jobs: + - template: /eng/pipelines/register-build-jobs.yml@performance + parameters: + runtimeRepoAlias: self + performanceRepoAlias: performance + buildType: + - ${{ if eq(parameters.coreclr_arm64_linux, true) }}: + - coreclr_arm64_linux + - ${{ if eq(parameters.coreclr_arm64_windows, true) }}: + - coreclr_arm64_windows + - ${{ if eq(parameters.coreclr_muslx64_linux, true) }}: + - coreclr_muslx64_linux + - ${{ if eq(parameters.coreclr_x64_linux, true) }}: + - coreclr_x64_linux + - ${{ if eq(parameters.coreclr_x64_windows, true) }}: + - coreclr_x64_windows + - ${{ if eq(parameters.coreclr_x86_windows, true) }}: + - coreclr_x86_windows + - ${{ if eq(parameters.coreclr_arm64_android, true) }}: + - coreclr_arm64_android + - ${{ if eq(parameters.wasm, true) }}: + - wasm + - ${{ if eq(parameters.monoAot_arm64_linux, true) }}: + - monoAot_arm64_linux + - ${{ if eq(parameters.monoAot_x64_linux, true) }}: + - monoAot_x64_linux + - ${{ if eq(parameters.mono_x64_linux, true) }}: + - mono_x64_linux + - ${{ if eq(parameters.mono_arm64_linux, true) }}: + - mono_arm64_linux + - ${{ if eq(parameters.mono_arm64_android, true) }}: + - mono_arm64_android + - ${{ if eq(parameters.mono_arm64_ios, true) }}: + - mono_arm64_ios + - ${{ if eq(parameters.monoBDN_arm64_android, true) }}: + - monoBDN_arm64_android + - ${{ if eq(parameters.nativeAot_arm64_ios, true) }}: + - nativeAot_arm64_ios + + - stage: Build + displayName: 'Build' + dependsOn: [] # so it runs in parallel with the RegisterBuild stage + jobs: + - template: /eng/pipelines/runtime-perf-build-jobs.yml@performance + parameters: + runtimeRepoAlias: self + performanceRepoAlias: performance + buildType: + - ${{ if eq(parameters.coreclr_arm64_linux, true) }}: + - coreclr_arm64_linux + - ${{ if eq(parameters.coreclr_arm64_windows, true) }}: + - coreclr_arm64_windows + - ${{ if eq(parameters.coreclr_muslx64_linux, true) }}: + - coreclr_muslx64_linux + - ${{ if eq(parameters.coreclr_x64_linux, true) }}: + - coreclr_x64_linux + - ${{ if eq(parameters.coreclr_x64_windows, true) }}: + - coreclr_x64_windows + - ${{ if eq(parameters.coreclr_x86_windows, true) }}: + - coreclr_x86_windows + - ${{ if eq(parameters.coreclr_arm64_android, true) }}: + - coreclr_arm64_android + - ${{ if eq(parameters.wasm, true) }}: + - wasm + - ${{ if eq(parameters.monoAot_arm64_linux, true) }}: + - monoAot_arm64_linux + - ${{ if eq(parameters.monoAot_x64_linux, true) }}: + - monoAot_x64_linux + - ${{ if eq(parameters.mono_x64_linux, true) }}: + - mono_x64_linux + - ${{ if eq(parameters.mono_arm64_linux, true) }}: + - mono_arm64_linux + - ${{ if eq(parameters.mono_arm64_android, true) }}: + - mono_arm64_android + - ${{ if eq(parameters.mono_arm64_ios, true) }}: + - mono_arm64_ios + - ${{ if eq(parameters.monoBDN_arm64_android, true) }}: + - monoBDN_arm64_android + - ${{ if eq(parameters.nativeAot_arm64_ios, true) }}: + - nativeAot_arm64_ios + ${{ if parameters.mauiFramework }}: + mauiFramework: ${{ parameters.mauiFramework }} + + - ${{ if and(ne(variables['System.TeamProject'], 'public'), or(eq(variables['Build.Reason'], 'IndividualCI'), parameters.runPrivateJobs)) }}: + - stage: UploadArtifacts + displayName: 'Upload Artifacts' + condition: always() + dependsOn: + - Build + - RegisterBuild + jobs: + - template: /eng/pipelines/upload-build-artifacts-jobs.yml@performance + parameters: + runtimeRepoAlias: self + performanceRepoAlias: performance + buildType: + - ${{ if eq(parameters.coreclr_arm64_linux, true) }}: + - coreclr_arm64_linux + - ${{ if eq(parameters.coreclr_arm64_windows, true) }}: + - coreclr_arm64_windows + - ${{ if eq(parameters.coreclr_muslx64_linux, true) }}: + - coreclr_muslx64_linux + - ${{ if eq(parameters.coreclr_x64_linux, true) }}: + - coreclr_x64_linux + - ${{ if eq(parameters.coreclr_x64_windows, true) }}: + - coreclr_x64_windows + - ${{ if eq(parameters.coreclr_x86_windows, true) }}: + - coreclr_x86_windows + - ${{ if eq(parameters.coreclr_arm64_android, true) }}: + - coreclr_arm64_android + - ${{ if eq(parameters.wasm, true) }}: + - wasm + - ${{ if eq(parameters.monoAot_arm64_linux, true) }}: + - monoAot_arm64_linux + - ${{ if eq(parameters.monoAot_x64_linux, true) }}: + - monoAot_x64_linux + - ${{ if eq(parameters.mono_x64_linux, true) }}: + - mono_x64_linux + - ${{ if eq(parameters.mono_arm64_linux, true) }}: + - mono_arm64_linux + - ${{ if eq(parameters.mono_arm64_android, true) }}: + - mono_arm64_android + - ${{ if eq(parameters.mono_arm64_ios, true) }}: + - mono_arm64_ios + - ${{ if eq(parameters.monoBDN_arm64_android, true) }}: + - monoBDN_arm64_android + - ${{ if eq(parameters.nativeAot_arm64_ios, true) }}: + - nativeAot_arm64_ios diff --git a/eng/pipelines/performance/perf-slow.yml b/eng/pipelines/performance/perf-slow.yml index d30ecea46d79..84ca1836ff23 100644 --- a/eng/pipelines/performance/perf-slow.yml +++ b/eng/pipelines/performance/perf-slow.yml @@ -15,10 +15,10 @@ trigger: include: - main - release/9.0 + - release/8.0 paths: include: - '*' - - src/libraries/System.Private.CoreLib/* exclude: - '**.md' - .devcontainer/* @@ -62,4 +62,4 @@ extends: performanceRepoAlias: performance jobParameters: ${{ if parameters.onlySanityCheck }}: - onlySanityCheck: true \ No newline at end of file + onlySanityCheck: true diff --git a/eng/pipelines/performance/perf.yml b/eng/pipelines/performance/perf.yml index e717fbe49159..01b80b580db2 100644 --- a/eng/pipelines/performance/perf.yml +++ b/eng/pipelines/performance/perf.yml @@ -13,7 +13,6 @@ trigger: paths: include: - '*' - - src/libraries/System.Private.CoreLib/* exclude: - '**.md' - .devcontainer/* diff --git a/eng/pipelines/performance/templates/build-perf-bdn-app.yml b/eng/pipelines/performance/templates/build-perf-bdn-app.yml index d008ca63bb98..65f80896066e 100644 --- a/eng/pipelines/performance/templates/build-perf-bdn-app.yml +++ b/eng/pipelines/performance/templates/build-perf-bdn-app.yml @@ -85,9 +85,8 @@ steps: - script: | echo '{ }' > ./global.json curl -o NuGet.config 'https://raw.githubusercontent.com/dotnet/maui/${{parameters.framework}}/NuGet.config' - curl -o dotnet-install.sh 'https://dotnet.microsoft.com/download/dotnet/scripts/v1/dotnet-install.sh' - curl -Lo performance-version-details.xml 'https://raw.githubusercontent.com/dotnet/performance/${{parameters.perfBranch}}/eng/Version.Details.xml' - version=$(sed -nr 's/[[:space:]]*- - or( - eq(variables['librariesContainsChange'], true), - eq(variables['monoContainsChange'], true), - eq(variables['isRollingBuild'], true)) # # Build the whole product using Mono @@ -110,39 +99,3 @@ extends: eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true), eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true), eq(variables['isRollingBuild'], true)) - - # disable armv6 until https://github.com/dotnet/runtime/issues/104333 is fixed - # - # Build the whole product using Mono and run libraries tests - # - #- template: /eng/pipelines/common/platform-matrix.yml - # parameters: - # jobTemplate: /eng/pipelines/common/global-build-job.yml - # helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml - # buildConfig: Release - # runtimeFlavor: mono - # platforms: - # - linux_armv6 - # variables: - # # map dependencies variables to local variables - # - name: librariesContainsChange - # value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ] - # - name: monoContainsChange - # value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ] - # jobParameters: - # testScope: innerloop - # nameSuffix: AllSubsets_Mono - # buildArgs: -s mono+clr.iltools+clr.packages+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true - # timeoutInMinutes: 120 - # condition: >- - # or( - # eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true), - # eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true), - # eq(variables['isRollingBuild'], true)) - # ${{ if eq(variables['isRollingBuild'], true) }}: - # # extra steps, run tests - # postBuildSteps: - # - template: /eng/pipelines/libraries/helix.yml - # parameters: - # creator: dotnet-bot - # testRunNamePrefixSuffix: Mono_$(_BuildConfig) diff --git a/eng/pipelines/runtime-diagnostics.yml b/eng/pipelines/runtime-diagnostics.yml new file mode 100644 index 000000000000..f704a135249b --- /dev/null +++ b/eng/pipelines/runtime-diagnostics.yml @@ -0,0 +1,94 @@ +trigger: none + +resources: + repositories: + - repository: diagnostics + type: github + name: dotnet/diagnostics + endpoint: public + +variables: + - template: /eng/pipelines/common/variables.yml + +schedules: +- cron: "30 2 * * *" + displayName: Every night at 2:30AM + branches: + include: + - main + always: true + +pr: + branches: + include: + - main + paths: + include: + - '/eng/pipelines/**' + - '/src/coreclr/**' + - '/src/native/**' + +extends: + template: /eng/pipelines/common/templates/pipeline-with-resources.yml + parameters: + stages: + - stage: Build + jobs: + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + buildConfig: release + platforms: + - windows_x64 + jobParameters: + buildArgs: -s clr+libs+tools.cdac+host+packs -c Debug -rc $(_BuildConfig) -lc $(_BuildConfig) + nameSuffix: AllSubsets_CoreCLR + isOfficialBuild: ${{ variables.isOfficialBuild }} + timeoutInMinutes: 360 + postBuildSteps: + - powershell: | + $versionDir = Get-ChildItem -Directory -Path "$(Build.SourcesDirectory)\artifacts\bin\testhost\net*\shared\Microsoft.NETCore.App" | Select-Object -ExpandProperty FullName + Write-Host "##vso[task.setvariable variable=versionDir]$versionDir" + displayName: 'Set Path to Shared Framework Artifacts' + - template: /eng/pipelines/common/upload-artifact-step.yml + parameters: + rootFolder: $(versionDir) + includeRootFolder: false + archiveType: $(archiveType) + archiveExtension: $(archiveExtension) + tarCompression: $(tarCompression) + artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_coreclr + displayName: Build Assets + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/diagnostics/runtime-diag-job.yml + buildConfig: release + platforms: + - windows_x64 + jobParameters: + name: Windows + isOfficialBuild: ${{ variables.isOfficialBuild }} + liveRuntimeDir: $(Build.SourcesDirectory)/artifacts/runtime + timeoutInMinutes: 360 + dependsOn: + - build_windows_x64_release_AllSubsets_CoreCLR + preBuildSteps: + - template: /eng/pipelines/common/download-artifact-step.yml + parameters: + artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_coreclr + artifactFileName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_coreclr$(archiveExtension) + unpackFolder: $(Build.SourcesDirectory)/artifacts/runtime + displayName: 'Runtime Build Artifacts' + postBuildSteps: + - task: PublishTestResults@2 + inputs: + testResultsFormat: xUnit + testResultsFiles: '**/*.xml' + searchFolder: '$(Build.SourcesDirectory)/artifacts/TestResults' + testRunTitle: 'Tests $(_PhaseName)' + failTaskOnFailedTests: true + publishRunAttachments: true + mergeTestResults: true + buildConfiguration: $(_BuildConfig) + continueOnError: true + condition: always() diff --git a/eng/pipelines/runtime-linker-tests.yml b/eng/pipelines/runtime-linker-tests.yml index 712890ffe1cf..9df8042e7af0 100644 --- a/eng/pipelines/runtime-linker-tests.yml +++ b/eng/pipelines/runtime-linker-tests.yml @@ -103,7 +103,7 @@ extends: or( eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true), eq(variables['isRollingBuild'], true)) - buildArgs: -s clr+libs+tools.illink -c $(_BuildConfig) + buildArgs: -s clr+libs+tools.illink+host.native -c $(_BuildConfig) postBuildSteps: - template: /eng/pipelines/libraries/execute-trimming-tests-steps.yml diff --git a/eng/pipelines/runtime-official.yml b/eng/pipelines/runtime-official.yml index 03c4308e13ae..09ad3218f924 100644 --- a/eng/pipelines/runtime-official.yml +++ b/eng/pipelines/runtime-official.yml @@ -34,497 +34,38 @@ extends: parameters: isOfficialBuild: true stages: - - stage: Build - jobs: - - # - # Localization build - # - - ${{ if eq(variables['Build.SourceBranch'], 'refs/heads/main') }}: + - ${{ if eq(variables['Build.SourceBranch'], 'refs/heads/main') }}: + - stage: Localization + dependsOn: [] + jobs: + # + # Localization build + # - template: /eng/common/templates-official/job/onelocbuild.yml parameters: MirrorRepo: runtime MirrorBranch: main LclSource: lclFilesfromPackage LclPackageId: 'LCL-JUNO-PROD-RUNTIME' - - # - # Source Index Build - # - - ${{ if eq(variables['Build.SourceBranch'], 'refs/heads/main') }}: - - template: /eng/common/templates-official/job/source-index-stage1.yml - parameters: - sourceIndexBuildCommand: build.cmd -subset libs.sfx+libs.oob -binarylog -os linux -ci /p:SkipLibrariesNativeRuntimePackages=true - - # - # Build CoreCLR runtime packs - # Windows x64/arm64 - # Sign diagnostic files after native build - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: release - platforms: - - windows_x64 - - windows_x86 - - windows_arm64 - variables: - - name: _SignDiagnosticFilesArgs - value: '' - jobParameters: - templatePath: 'templates-official' - preBuildSteps: - - template: /eng/pipelines/coreclr/templates/install-diagnostic-certs.yml - parameters: - isOfficialBuild: ${{ variables.isOfficialBuild }} - certNames: - - 'dotnetesrp-diagnostics-aad-ssl-cert' - - 'dotnet-diagnostics-esrp-pki-onecert' - vaultName: 'clrdiag-esrp-id' - azureSubscription: 'diagnostics-esrp-kvcertuser' - - buildArgs: -c $(_BuildConfig) /p:DotNetBuildAllRuntimePacks=true $(_SignDiagnosticFilesArgs) - nameSuffix: AllRuntimes - isOfficialBuild: ${{ variables.isOfficialBuild }} - timeoutInMinutes: 120 - postBuildSteps: - - template: /eng/pipelines/coreclr/templates/remove-diagnostic-certs.yml - parameters: - isOfficialBuild: ${{ variables.isOfficialBuild }} - - # Upload the results. - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: $(osGroup)$(osSubgroup)_$(archType) - - # - # Build all runtime packs - # Mac x64/arm64 - # Sign and entitle createdump and corerun after native build. - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: release - platforms: - - osx_arm64 - - osx_x64 - jobParameters: - templatePath: 'templates-official' - buildArgs: -s clr.runtime+clr.alljits+clr.nativeaotruntime+host.native -c $(_BuildConfig) /bl:$(Build.SourcesDirectory)/artifacts/logs/$(_BuildConfig)/CoreClrNativeBuild.binlog - nameSuffix: AllRuntimes - isOfficialBuild: ${{ variables.isOfficialBuild }} - timeoutInMinutes: 120 - postBuildSteps: - - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}: - - template: /eng/pipelines/common/macos-sign-with-entitlements.yml - parameters: - filesToSign: - - name: createdump - path: $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(_BuildConfig) - - name: corerun - path: $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(_BuildConfig) - - name: dotnet - path: $(Build.SourcesDirectory)/artifacts/bin/$(osGroup)-$(archType).$(_BuildConfig)/corehost - - name: apphost - path: $(Build.SourcesDirectory)/artifacts/bin/$(osGroup)-$(archType).$(_BuildConfig)/corehost - - - task: CopyFiles@2 - displayName: 'Copy signed createdump to sharedFramework' - inputs: - contents: createdump - sourceFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(_BuildConfig) - targetFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(_BuildConfig)/sharedFramework - overWrite: true - - # Now that we've entitled and signed createdump, we can build the rest. - - template: /eng/pipelines/common/templates/global-build-step.yml - parameters: - buildArgs: -s clr.corelib+clr.nativecorelib+clr.nativeaotlibs+clr.tools+clr.packages+mono+libs+host.tools+host.pkg+packs -c $(_BuildConfig) /p:DotNetBuildAllRuntimePacks=true - displayName: Build managed CoreCLR and host components, Mono, all libraries, and packs - - # Upload the results. - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: $(osGroup)$(osSubgroup)_$(archType) - - # - # Build all runtime packs for Linux and Linux musl - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: release - platforms: - - linux_x64 - - linux_arm - - linux_arm64 - - linux_musl_x64 - - linux_musl_arm - - linux_musl_arm64 - jobParameters: - templatePath: 'templates-official' - buildArgs: -c $(_BuildConfig) /p:DotNetBuildAllRuntimePacks=true - nameSuffix: AllRuntimes - isOfficialBuild: ${{ variables.isOfficialBuild }} - timeoutInMinutes: 120 - postBuildSteps: - # Upload the results. - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: $(osGroup)$(osSubgroup)_$(archType) - - # - # Build and Pack CrossDac - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: release - platforms: - - windows_x64 - jobParameters: - templatePath: 'templates-official' - buildArgs: -s crossdacpack -c $(_BuildConfig) /p:CrossRuntimeExtractionRoot=$(CrossRuntimeExtractionRoot) $(_SignDiagnosticFilesArgs) - nameSuffix: CrossDac - isOfficialBuild: ${{ variables.isOfficialBuild }} - timeoutInMinutes: 120 - preBuildSteps: - - task: DownloadPipelineArtifact@2 - displayName: Download runtime packs for CrossDac - inputs: - artifact: 'IntermediateArtifacts' - path: $(Build.SourcesDirectory)/artifacts/RuntimeDownload - patterns: | - IntermediateArtifacts/linux_*/Shipping/Microsoft.NETCore.App.Runtime.linux-*.nupkg - !IntermediateArtifacts/linux_*/Shipping/Microsoft.NETCore.App.Runtime.linux-*.symbols.nupkg - - powershell: $(Build.SourcesDirectory)/eng/extract-for-crossdac.ps1 -DownloadDirectory $(Build.SourcesDirectory)/artifacts/RuntimeDownload -ExtractDirectory $(CrossRuntimeExtractionRoot) - displayName: Extract runtime packs - - template: /eng/pipelines/coreclr/templates/install-diagnostic-certs.yml - parameters: - isOfficialBuild: ${{ variables.isOfficialBuild }} - certNames: - - 'dotnetesrp-diagnostics-aad-ssl-cert' - - 'dotnet-diagnostics-esrp-pki-onecert' - vaultName: 'clrdiag-esrp-id' - azureSubscription: 'diagnostics-esrp-kvcertuser' - postBuildSteps: - - template: /eng/pipelines/coreclr/templates/remove-diagnostic-certs.yml - parameters: - isOfficialBuild: ${{ variables.isOfficialBuild }} - # Save packages using the prepare-signed-artifacts format. - # CrossDac packages are expected to be in the windows_x64 folder. - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: windows_x64 - dependsOn: - - build_linux_x64_release_AllRuntimes - - build_linux_arm_release_AllRuntimes - - build_linux_arm64_release_AllRuntimes - - build_linux_musl_x64_release_AllRuntimes - - build_linux_musl_arm_release_AllRuntimes - - build_linux_musl_arm64_release_AllRuntimes - variables: - - name: CrossRuntimeExtractionRoot - value: $(Build.SourcesDirectory)/artifacts/CrossDac - - name: _SignDiagnosticFilesArgs - value: '' - - # - # Build All runtime packs for mobile platforms - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: release - platforms: - - android_x64 - - android_x86 - - android_arm - - android_arm64 - - maccatalyst_x64 - - maccatalyst_arm64 - - tvossimulator_x64 - - tvossimulator_arm64 - - tvos_arm64 - - iossimulator_x64 - - iossimulator_arm64 - - ios_arm64 - - linux_bionic_x64 - - linux_bionic_arm - - linux_bionic_arm64 - jobParameters: - templatePath: 'templates-official' - buildArgs: -c $(_BuildConfig) /p:BuildMonoAOTCrossCompiler=false /p:DotNetBuildAllRuntimePacks=true - nameSuffix: AllRuntimes - isOfficialBuild: ${{ variables.isOfficialBuild }} - postBuildSteps: - # delete duplicate RIDless packages to prevent upload conflict - - task: DeleteFiles@1 - displayName: 'Delete Microsoft.NETCore.App.Ref and Microsoft.NETCore.App.HostModel package' - inputs: - SourceFolder: $(Build.SourcesDirectory)/artifacts/packages/$(_BuildConfig)/Shipping - Contents: | - 'Microsoft.NETCore.App.Ref.*.nupkg' - 'Microsoft.NET.HostModel.*.nupkg' - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: MobileRuntimePacks - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: release - runtimeFlavor: mono - platforms: - - browser_wasm - - wasi_wasm - jobParameters: - templatePath: 'templates-official' - buildArgs: -s mono+libs+host+packs -c $(_BuildConfig) /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS) - nameSuffix: Mono - isOfficialBuild: ${{ variables.isOfficialBuild }} - postBuildSteps: - # delete duplicate RIDless packages to prevent upload conflict - - task: DeleteFiles@1 - displayName: 'Delete Microsoft.NETCore.App.Ref and Microsoft.NETCore.App.HostModel package' - inputs: - SourceFolder: $(Build.SourcesDirectory)/artifacts/packages/$(_BuildConfig)/Shipping - Contents: | - 'Microsoft.NETCore.App.Ref.*.nupkg' - 'Microsoft.NET.HostModel.*.nupkg' - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: MobileRuntimePacks - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: release - runtimeFlavor: mono - platforms: - - browser_wasm - jobParameters: - templatePath: 'templates-official' - buildArgs: -s mono+libs+host+packs -c $(_BuildConfig) /p:WasmEnableThreads=true /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS) - nameSuffix: Mono_multithread - isOfficialBuild: ${{ variables.isOfficialBuild }} - runtimeVariant: multithread - postBuildSteps: - # delete duplicate RIDless packages to prevent upload conflict - - task: DeleteFiles@1 - displayName: 'Delete Microsoft.NETCore.App.Ref and Microsoft.NETCore.App.HostModel package' - inputs: - SourceFolder: $(Build.SourcesDirectory)/artifacts/packages/$(_BuildConfig)/Shipping - Contents: | - 'Microsoft.NETCore.App.Ref.*.nupkg' - 'Microsoft.NET.HostModel.*.nupkg' - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: MobileRuntimePacks - - # - # Build Mono LLVM runtime packs - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - platforms: - - osx_x64 - - linux_x64 - - linux_arm64 - buildConfig: release - runtimeFlavor: mono - jobParameters: - templatePath: 'templates-official' - buildArgs: -s mono+libs+host+packs -c $(_BuildConfig) - /p:MonoEnableLLVM=true /p:MonoAOTEnableLLVM=true /p:MonoBundleLLVMOptimizer=true - nameSuffix: Mono_LLVMAOT - runtimeVariant: LLVMAOT - isOfficialBuild: ${{ variables.isOfficialBuild }} - postBuildSteps: - # delete duplicate RIDless packages to prevent upload conflict - - task: DeleteFiles@1 - displayName: 'Delete Microsoft.NETCore.App.Ref and Microsoft.NETCore.App.HostModel package' - inputs: - SourceFolder: $(Build.SourcesDirectory)/artifacts/packages/$(_BuildConfig)/Shipping - Contents: | - 'Microsoft.NETCore.App.Ref.*.nupkg' - 'Microsoft.NET.HostModel.*.nupkg' - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: $(osGroup)$(osSubgroup)_$(archType) - - # - # Build libraries (all TFMs) and packages - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: Release - platforms: - - windows_x64 - jobParameters: - templatePath: 'templates-official' - buildArgs: -s tools+libs -pack -c $(_BuildConfig) /p:TestAssemblies=false /p:TestPackages=true - nameSuffix: Libraries_WithPackages - isOfficialBuild: ${{ variables.isOfficialBuild }} - postBuildSteps: - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: Libraries_WithPackages - timeoutInMinutes: 95 - # - # Build SourceBuild packages - # - - template: /eng/common/templates-official/jobs/source-build.yml - parameters: - platforms: - - name: Linux_x64 - targetRID: linux-x64 - container: SourceBuild_linux_x64 - - # - # Build PGO Instrumented CoreCLR Release - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: Release - helixQueueGroup: ci - platforms: - - windows_x64 - - windows_x86 - - linux_x64 - - windows_arm64 - - linux_arm64 - jobParameters: - templatePath: 'templates-official' - buildArgs: -s clr.native+clr.corelib+clr.tools+clr.nativecorelib+libs+host+packs -c $(_BuildConfig) -pgoinstrument /p:SkipLibrariesNativeRuntimePackages=true - isOfficialBuild: ${{ variables.isOfficialBuild }} - nameSuffix: PGO - postBuildSteps: - - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml - parameters: - name: PGO - timeoutInMinutes: 95 - - # - # Build Workloads - # - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: release - platforms: - - windows_x64 - jobParameters: - templatePath: 'templates-official' - nameSuffix: Workloads - preBuildSteps: - - task: DownloadPipelineArtifact@2 - inputs: - artifact: 'IntermediateArtifacts' - path: $(Build.SourcesDirectory)/artifacts/workloadPackages - patterns: | - IntermediateArtifacts/windows_x64/Shipping/Microsoft.NETCore.App.Runtime.AOT.win-x64.Cross.android-*.nupkg - IntermediateArtifacts/windows_arm64/Shipping/Microsoft.NETCore.App.Runtime.AOT.win-arm64.Cross.android-*.nupkg - IntermediateArtifacts/windows_x64/Shipping/Microsoft.NETCore.App.Runtime.AOT.win-x64.Cross.browser-wasm*.nupkg - IntermediateArtifacts/windows_arm64/Shipping/Microsoft.NETCore.App.Runtime.AOT.win-arm64.Cross.browser-wasm*.nupkg - IntermediateArtifacts/windows_x64/Shipping/Microsoft.NETCore.App.Runtime.AOT.win-x64.Cross.wasi-wasm*.nupkg - IntermediateArtifacts/windows_arm64/Shipping/Microsoft.NETCore.App.Runtime.AOT.win-arm64.Cross.wasi-wasm*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.android-*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.browser-wasm*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.multithread.browser-wasm*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.ios-*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.iossimulator-*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.maccatalyst-*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.multithread.browser-wasm*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.tvos-*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.tvossimulator-*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NETCore.App.Runtime.Mono.wasi-wasm*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Workload.Mono.ToolChain.Current.Manifest*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Workload.Mono.ToolChain.net6.Manifest*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Workload.Mono.ToolChain.net7.Manifest*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Workload.Mono.ToolChain.net8.Manifest*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Workload.Mono.ToolChain.net9.Manifest*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Runtime.MonoTargets.Sdk*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Runtime.MonoAOTCompiler.Task*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Runtime.WebAssembly.Sdk*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Runtime.WebAssembly.Wasi*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Runtime.WebAssembly.Templates*.nupkg - IntermediateArtifacts/windows_arm64/Shipping/Microsoft.NETCore.App.Runtime.win-arm64*.nupkg - IntermediateArtifacts/windows_x64/Shipping/Microsoft.NETCore.App.Runtime.win-x64*.nupkg - IntermediateArtifacts/windows_x86/Shipping/Microsoft.NETCore.App.Runtime.win-x86*.nupkg - IntermediateArtifacts/MobileRuntimePacks/Shipping/Microsoft.NET.Sdk.WebAssembly.Pack*.nupkg - - - task: CopyFiles@2 - displayName: Flatten packages - inputs: - sourceFolder: $(Build.SourcesDirectory)/artifacts/workloadPackages - contents: '*/Shipping/*.nupkg' - cleanTargetFolder: false - targetFolder: $(Build.SourcesDirectory)/artifacts/workloadPackages - flattenFolders: true - - buildArgs: -s mono.workloads -c $(_BuildConfig) /p:PackageSource=$(Build.SourcesDirectory)/artifacts/workloadPackages /p:WorkloadOutputPath=$(Build.SourcesDirectory)/artifacts/workloads - - postBuildSteps: - # Prepare packages wrapping msis - - task: CopyFiles@2 - displayName: Prepare package artifacts - inputs: - SourceFolder: '$(Build.SourcesDirectory)/artifacts/packages/$(_BuildConfig)' - Contents: | - Shipping/**/* - NonShipping/**/* - TargetFolder: '$(Build.ArtifactStagingDirectory)/IntermediateArtifacts1/workloads' - CleanTargetFolder: true - - # Prepare artifacts to be used for generating VS components - - task: CopyFiles@2 - displayName: Prepare VS Insertion artifacts - inputs: - SourceFolder: '$(Build.SourcesDirectory)/artifacts/VSSetup/$(_BuildConfig)' - Contents: | - Insertion/**/* - TargetFolder: '$(Build.ArtifactStagingDirectory)/IntermediateArtifacts2/workloads-vs' - CleanTargetFolder: true - - templateContext: - outputs: - - output: buildArtifacts - PathtoPublish: '$(Build.ArtifactStagingDirectory)/IntermediateArtifacts1' - ArtifactName: IntermediateArtifacts - displayName: 'Publish workload packages' - - output: buildArtifacts - PathtoPublish: '$(Build.ArtifactStagingDirectory)/IntermediateArtifacts2' - ArtifactName: IntermediateArtifacts - displayName: 'Publish workload VS Insertion artifacts' - - isOfficialBuild: ${{ variables.isOfficialBuild }} - timeoutInMinutes: 120 - dependsOn: - - Build_android_arm_release_AllRuntimes - - Build_android_arm64_release_AllRuntimes - - Build_android_x86_release_AllRuntimes - - Build_android_x64_release_AllRuntimes - - Build_browser_wasm_Linux_release_Mono - - Build_wasi_wasm_linux_release_Mono - - Build_ios_arm64_release_AllRuntimes - - Build_iossimulator_x64_release_AllRuntimes - - Build_iossimulator_arm64_release_AllRuntimes - - Build_maccatalyst_arm64_release_AllRuntimes - - Build_maccatalyst_x64_release_AllRuntimes - - Build_tvos_arm64_release_AllRuntimes - - Build_tvossimulator_arm64_release_AllRuntimes - - Build_tvossimulator_x64_release_AllRuntimes - - Build_windows_x64_release_AllRuntimes - - Build_windows_x86_release_AllRuntimes - - Build_windows_arm64_release_AllRuntimes - - - ${{ if eq(variables.isOfficialBuild, true) }}: - - template: /eng/pipelines/official/stages/publish.yml + - stage: Source_Index + dependsOn: [] + displayName: Source Index + jobs: + # + # Source Index Build + # + - template: /eng/common/templates-official/job/source-index-stage1.yml + parameters: + sourceIndexBuildCommand: build.cmd -subset libs.sfx+libs.oob -binarylog -os linux -ci /p:SkipLibrariesNativeRuntimePackages=true + + - stage: Publish + dependsOn: [] + jobs: + - template: /eng/common/templates-official/job/publish-build-assets.yml parameters: - isOfficialBuild: ${{ variables.isOfficialBuild }} + publishUsingPipelines: true + publishAssetsImmediately: true + isAssetlessBuild: true + pool: + name: $(DncEngInternalBuildPool) + demands: ImageOverride -equals 1es-windows-2022 diff --git a/eng/pipelines/runtime-richnav.yml b/eng/pipelines/runtime-richnav.yml deleted file mode 100644 index 965a6f6cfd48..000000000000 --- a/eng/pipelines/runtime-richnav.yml +++ /dev/null @@ -1,61 +0,0 @@ -trigger: - batch: true - branches: - include: - - main - paths: - include: - - '*' - exclude: - - '**.md' - - eng/Version.Details.xml - - .devcontainer/* - - .github/* - - docs/* - - LICENSE.TXT - - PATENTS.TXT - - THIRD-PARTY-NOTICES.TXT - -pr: none - -variables: - - template: /eng/pipelines/common/variables.yml - -extends: - template: /eng/pipelines/common/templates/pipeline-with-resources.yml - parameters: - stages: - - stage: Build - jobs: - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: debug - platforms: - - windows_x64 - jobParameters: - enableRichCodeNavigation: true - richCodeNavigationLanguage: "csharp" - nameSuffix: Libs - timeoutInMinutes: 240 - buildArgs: -s libs.sfx+libs.oob -pack - preBuildSteps: - - script: dotnet.cmd nuget add source -n richnav "https://pkgs.dev.azure.com/azure-public/vside/_packaging/vs-buildservices/nuget/v3/index.json" - displayName: Add richnav NuGet feed - - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/common/global-build-job.yml - buildConfig: debug - platforms: - - windows_x64 - jobParameters: - enableRichCodeNavigation: true - richCodeNavigationLanguage: "csharp,cpp" - nameSuffix: Runtimes - timeoutInMinutes: 240 - buildArgs: -s clr+mono - preBuildSteps: - - script: dotnet.cmd nuget add source -n richnav "https://pkgs.dev.azure.com/azure-public/vside/_packaging/vs-buildservices/nuget/v3/index.json" - displayName: Add richnav NuGet feed \ No newline at end of file diff --git a/eng/pipelines/runtime.yml b/eng/pipelines/runtime.yml index 4dfaa73326a0..9fa0492f75c8 100644 --- a/eng/pipelines/runtime.yml +++ b/eng/pipelines/runtime.yml @@ -117,6 +117,22 @@ extends: eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true), eq(variables['isRollingBuild'], true)) + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + buildConfig: ${{ variables.debugOnPrReleaseOnRolling }} + platforms: + - browser_wasm + - browser_wasm_win + jobParameters: + nameSuffix: AllSubsets_CoreCLR + buildArgs: -s clr.runtime+libs -rc Release -c Release -lc $(_BuildConfig) + timeoutInMinutes: 120 + condition: >- + or( + eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true), + eq(variables['isRollingBuild'], true)) + # # Build CoreCLR and Libraries with Libraries tests # For running libraries tests and installer tests @@ -467,7 +483,7 @@ extends: jobParameters: testScope: innerloop nameSuffix: CoreCLR_NonPortable - buildArgs: -s clr.native+clr.tools+clr.corelib+clr.nativecorelib+clr.aot+clr.packages --outputrid tizen.9.0.0-armel -c $(_BuildConfig) /p:PortableBuild=false + buildArgs: -s clr.native+clr.tools+clr.corelib+clr.nativecorelib+clr.aot+clr.packages --targetrid tizen.9.0.0-armel -c $(_BuildConfig) /p:PortableBuild=false timeoutInMinutes: 120 condition: >- or( @@ -489,27 +505,9 @@ extends: - linux_loongarch64 jobParameters: testScope: innerloop - nameSuffix: CoreCLR_TwoStage - buildArgs: -s clr+libs+host -c $(_BuildConfig) -rc Checked -p:StageOneBuild=true + nameSuffix: CoreCLR_Bootstrapped + buildArgs: -s clr+libs+host+packs -c $(_BuildConfig) -rc Checked --bootstrap timeoutInMinutes: 120 - postBuildSteps: - - script: | - echo Running $(Build.SourcesDirectory)/build$(scriptExt) \ - ${{ variables.debugOnPrReleaseOnRolling }} \ - -s clr.tools+packs -rc Checked -cross \ - -os $(osGroup) \ - -a $(archType) \ - -c $(_BuildConfig) \ - -p:StageTwoBuild=true - - $(Build.SourcesDirectory)/build$(scriptExt) \ - ${{ variables.debugOnPrReleaseOnRolling }} \ - -s clr.tools+packs -rc Checked -cross \ - -os $(osGroup) \ - -a $(archType) \ - -c $(_BuildConfig) \ - -p:StageTwoBuild=true - displayName: Build clr.tools and packs condition: >- or( eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true), @@ -533,7 +531,7 @@ extends: - name: timeoutPerTestCollectionInMinutes value: 180 jobParameters: - timeoutInMinutes: 120 + timeoutInMinutes: 180 nameSuffix: NativeAOT buildArgs: -s clr.aot+libs -rc $(_BuildConfig) -lc Release /p:RunAnalyzers=false postBuildSteps: @@ -616,7 +614,7 @@ extends: value: 180 jobParameters: testGroup: innerloop - timeoutInMinutes: 120 + timeoutInMinutes: 180 nameSuffix: NativeAOT buildArgs: -s clr.aot+libs+tools.illink -c $(_BuildConfig) -rc $(_BuildConfig) -lc Release /p:RunAnalyzers=false postBuildSteps: @@ -680,7 +678,7 @@ extends: jobParameters: timeoutInMinutes: 120 nameSuffix: CLR_Tools_Tests - buildArgs: -s clr.aot+clr.iltools+libs.sfx+clr.toolstests+tools.cdacreader+tools.cdacreadertests -c $(_BuildConfig) -test + buildArgs: -s clr.aot+clr.iltools+libs.sfx+clr.toolstests+tools.cdac+tools.cdactests -c $(_BuildConfig) -test enablePublishTestResults: true testResultsFormat: 'xunit' # We want to run AOT tests when illink changes because there's share code and tests from illink which are used by AOT @@ -688,7 +686,7 @@ extends: or( eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true), eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true), - eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_cdacreader.containsChange'], true), + eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_cdac.containsChange'], true), eq(variables['isRollingBuild'], true)) # # Build CrossDacs @@ -822,20 +820,6 @@ extends: scenarios: - WasmTestOnChrome - # Library tests with full threading - - template: /eng/pipelines/common/templates/wasm-library-tests.yml - parameters: - platforms: - - browser_wasm - #- browser_wasm_win - nameSuffix: _Threading - extraBuildArgs: /p:WasmEnableThreads=true /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS) - extraHelixArguments: /p:WasmEnableThreads=true - alwaysRun: ${{ variables.isRollingBuild }} - shouldRunSmokeOnly: onLibrariesAndIllinkChanges - scenarios: - - WasmTestOnChrome - # EAT Library tests - only run on linux - template: /eng/pipelines/common/templates/wasm-library-aot-tests.yml parameters: @@ -901,18 +885,6 @@ extends: # WASI/WASM - - template: /eng/pipelines/common/templates/wasm-library-tests.yml - parameters: - platforms: - - wasi_wasm - - wasi_wasm_win - nameSuffix: '_Smoke' - extraBuildArgs: /p:EnableAggressiveTrimming=true /p:RunWasmSamples=true /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS) - shouldRunSmokeOnly: true - alwaysRun: ${{ variables.isRollingBuild }} - scenarios: - - WasmTestOnWasmtime - - template: /eng/pipelines/common/templates/simple-wasm-build-tests.yml parameters: platforms: @@ -963,6 +935,48 @@ extends: eq(variables['monoContainsChange'], true), eq(variables['isRollingBuild'], true)) + # + # Android arm64 devices and x64 emulators + # Build the whole product using CoreCLR and run functional tests + # + - template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml + buildConfig: Release + runtimeFlavor: coreclr + platforms: + - android_x64 + - android_arm64 + variables: + # map dependencies variables to local variables + - name: librariesContainsChange + value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ] + - name: coreclrContainsChange + value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'] ] + jobParameters: + testGroup: innerloop + nameSuffix: AllSubsets_CoreCLR + buildArgs: -s clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.packages+libs+libs.tests+host+packs -c $(_BuildConfig) /p:ArchiveTests=true /p:RunSmokeTestsOnly=true + timeoutInMinutes: 480 + condition: >- + or( + eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true), + eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true), + eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true), + eq(variables['isRollingBuild'], true)) + # extra steps, run tests + postBuildSteps: + - template: /eng/pipelines/libraries/helix.yml + parameters: + creator: dotnet-bot + testRunNamePrefixSuffix: CoreCLR_$(_BuildConfig) + condition: >- + or( + eq(variables['librariesContainsChange'], true), + eq(variables['coreclrContainsChange'], true), + eq(variables['isRollingBuild'], true)) + # # iOS/tvOS devices - Full AOT + AggressiveTrimming to reduce size # Build the whole product using Mono and run libraries tests @@ -987,7 +1001,7 @@ extends: jobParameters: testGroup: innerloop nameSuffix: AllSubsets_Mono - buildArgs: -s mono+libs+libs.tests+host+packs -c $(_BuildConfig) /p:ArchiveTests=true /p:DevTeamProvisioning=- /p:RunAOTCompilation=true /p:RunSmokeTestsOnly=true /p:BuildTestsOnHelix=true /p:EnableAdditionalTimezoneChecks=true /p:UsePortableRuntimePack=false /p:BuildDarwinFrameworks=true /p:EnableAggressiveTrimming=true + buildArgs: -s mono+libs+libs.tests+host+packs -c $(_BuildConfig) /p:ArchiveTests=true /p:DevTeamProvisioning=- /p:RunAOTCompilation=true /p:RunSmokeTestsOnly=true /p:BuildTestsOnHelix=true /p:EnableAdditionalTimezoneChecks=true /p:UsePortableRuntimePack=false /p:EnableAggressiveTrimming=true timeoutInMinutes: 480 condition: >- or( @@ -1065,8 +1079,7 @@ extends: runtimeFlavor: mono platforms: - maccatalyst_x64 - - ${{ if eq(variables['isRollingBuild'], true) }}: - - maccatalyst_arm64 + - maccatalyst_arm64 variables: # map dependencies variables to local variables - name: librariesContainsChange @@ -1076,7 +1089,7 @@ extends: jobParameters: testGroup: innerloop nameSuffix: AllSubsets_Mono - buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:RunSmokeTestsOnly=true /p:DevTeamProvisioning=adhoc /p:RunAOTCompilation=true /p:MonoForceInterpreter=true /p:BuildDarwinFrameworks=true + buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:RunSmokeTestsOnly=true /p:DevTeamProvisioning=adhoc /p:RunAOTCompilation=true /p:MonoForceInterpreter=true timeoutInMinutes: 180 condition: >- or( @@ -1203,16 +1216,16 @@ extends: - windows_x86 helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml jobParameters: - framework: net48 - buildArgs: -s tools+libs+libs.tests -framework net48 -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true - nameSuffix: Libraries_NET48 + framework: net481 + buildArgs: -s tools+libs+libs.tests -framework net481 -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true + nameSuffix: Libraries_NET481 timeoutInMinutes: 150 postBuildSteps: - template: /eng/pipelines/libraries/helix.yml parameters: creator: dotnet-bot - testRunNamePrefixSuffix: NET48_$(_BuildConfig) - extraHelixArguments: /p:BuildTargetFramework=net48 + testRunNamePrefixSuffix: NET481_$(_BuildConfig) + extraHelixArguments: /p:BuildTargetFramework=net481 condition: >- or( eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true), @@ -1228,7 +1241,7 @@ extends: platforms: - windows_x64 jobParameters: - buildArgs: -test -s tools+libs+libs.tests -pack -c $(_BuildConfig) /p:TestAssemblies=false /p:TestPackages=true + buildArgs: -test -s tools.illink+libs+libs.tests -pack -c $(_BuildConfig) /p:TestAssemblies=false /p:TestPackages=true nameSuffix: Libraries_WithPackages timeoutInMinutes: 150 condition: >- @@ -1248,9 +1261,10 @@ extends: buildConfig: ${{ variables.debugOnPrReleaseOnRolling }} platforms: - windows_x86 + helixQueuesTemplate: /eng/pipelines/installer/helix-queues-setup.yml jobParameters: nameSuffix: Installer_Build_And_Test - buildArgs: -s host+packs -c $(_BuildConfig) -lc Release -rc Release -test + buildArgs: -s host+packs -c $(_BuildConfig) -lc Release -rc Release dependsOnGlobalBuilds: - nameSuffix: CoreCLR_Libraries buildConfig: release @@ -1261,12 +1275,11 @@ extends: artifactFileName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Release$(archiveExtension) unpackFolder: $(Build.SourcesDirectory)/artifacts/bin displayName: 'unified artifacts' - enablePublishTestResults: true - testRunTitle: Installer-$(osGroup)$(osSubgroup)_$(archType) + timeoutInMinutes: 150 postBuildSteps: - - template: /eng/pipelines/installer/steps/upload-job-artifacts.yml + - template: /eng/pipelines/installer/helix.yml parameters: - name: $(osGroup)$(osSubgroup)_$(archType) + creator: dotnet-bot condition: or( eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true), @@ -1280,9 +1293,10 @@ extends: - windows_x64 - osx_x64 - linux_x64 + helixQueuesTemplate: /eng/pipelines/installer/helix-queues-setup.yml jobParameters: nameSuffix: Installer_Build_And_Test - buildArgs: -s host+packs -c $(_BuildConfig) -lc ${{ variables.debugOnPrReleaseOnRolling }} -rc Release -test + buildArgs: -s host+packs -c $(_BuildConfig) -lc ${{ variables.debugOnPrReleaseOnRolling }} -rc Release dependsOnGlobalBuilds: - nameSuffix: CoreCLR_Libraries buildConfig: ${{ variables.debugOnPrReleaseOnRolling }} @@ -1293,12 +1307,11 @@ extends: artifactFileName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(debugOnPrReleaseOnRolling)$(archiveExtension) unpackFolder: $(Build.SourcesDirectory)/artifacts/bin displayName: 'unified artifacts' - enablePublishTestResults: true - testRunTitle: Installer-$(osGroup)$(osSubgroup)_$(archType) + timeoutInMinutes: 150 postBuildSteps: - - template: /eng/pipelines/installer/steps/upload-job-artifacts.yml + - template: /eng/pipelines/installer/helix.yml parameters: - name: $(osGroup)$(osSubgroup)_$(archType) + creator: dotnet-bot condition: or( eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true), @@ -1756,11 +1769,12 @@ extends: parameters: platforms: - name: CentOS9 + baseRID: linux-x64 targetRID: centos.9-x64 portableBuild: false container: SourceBuild_centos_x64 - name: NonexistentRID - baseOS: linux + baseRID: linux-x64 targetRID: banana.24-x64 portableBuild: false container: SourceBuild_centos_x64 diff --git a/eng/python.targets b/eng/python.targets deleted file mode 100644 index 3f933fdfea0e..000000000000 --- a/eng/python.targets +++ /dev/null @@ -1,31 +0,0 @@ - - - - <_PythonLocationScript>-c "import sys; sys.stdout.write(sys.executable)" - - - - - - - - - - - - - - diff --git a/eng/targetingpacks.targets b/eng/targetingpacks.targets index 4e0fce1e532a..8bc3936eda5c 100644 --- a/eng/targetingpacks.targets +++ b/eng/targetingpacks.targets @@ -34,6 +34,7 @@ $(UseLocalTargetingRuntimePack) $(UseLocalTargetingRuntimePack) + $(UseLocalTargetingRuntimePack) @@ -113,21 +114,30 @@ + + $(MicrosoftNetCoreAppRefPackDir) + $(MicrosoftNetCoreAppRuntimePackDir) + + + $(BootstrapRefPackDir) + $(BootstrapRuntimePackDir) + + + Condition="!Exists('$(LocalRefPackDir)\data\FrameworkList.xml')" /> - - - - $(MicrosoftNetCoreAppRuntimePackDir) + $(LocalRuntimePackDir) @@ -146,6 +156,10 @@ + + $(LocalAppHostPath) + $(LocalSingleFileHostPath) + - $(ProductVersion) - $(BundledNETCoreAppPackageVersion) + $(ProductVersion) + + + $(CoreCLRCrossILCompilerDir) + $(ROOTFS_DIR) + $(CoreCLRILCompilerDir)netstandard/ILCompiler.Build.Tasks.dll + $(CoreCLRAotSdkDir) + $(MicrosoftNetCoreAppRuntimePackRidLibTfmDir) + $(MicrosoftNetCoreAppRuntimePackNativeDir) + + + + $(BootstrapRuntimePackDir)/runtimes/$(TargetRid)/lib/$(NetCoreAppCurrent)/ + $(BootstrapRuntimePackDir)/runtimes/$(TargetRid)/native/ + $(BootstrapAotSdkDir)/ + diff --git a/eng/testing/AndroidRunnerTemplate.cmd b/eng/testing/AndroidRunnerTemplate.cmd new file mode 100644 index 000000000000..a1535a9e2b6e --- /dev/null +++ b/eng/testing/AndroidRunnerTemplate.cmd @@ -0,0 +1,47 @@ +@ECHO OFF +setlocal enabledelayedexpansion + +SET EXECUTION_DIR=%~dp0 +SET ASSEMBLY_NAME=%1 +SET TARGET_ARCH=%2 +SET TARGET_OS=%3 +SET TEST_NAME=%4 +SET REPO_ROOT=%5 + +:Arg_Loop +if "%6" == "" goto ArgsDone +set "__AdditionalArgs=!__AdditionalArgs! %6"&shift&goto Arg_Loop +:ArgsDone + +SET "XHARNESS_OUT=%EXECUTION_DIR%xharness-output" + +cd %EXECUTION_DIR% + +:lock +MKDIR androidtests.lock 2>NUL +IF "%errorlevel%" NEQ "0" ( + ping -n 6 127.0.0.1 >NUL + GOTO :lock +) + +IF [%XHARNESS_CLI_PATH%] NEQ [] ( + :: When running in CI, we only have the .NET runtime available + :: We need to call the XHarness CLI DLL directly via dotnet exec + SET HARNESS_RUNNER=%REPO_ROOT%dotnet.cmd exec "%XHARNESS_CLI_PATH%" +) ELSE ( + SET HARNESS_RUNNER=%REPO_ROOT%dotnet.cmd xharness +) + +%HARNESS_RUNNER% android test --instrumentation="net.dot.MonoRunner" --package-name="net.dot.%ASSEMBLY_NAME%" --app="%EXECUTION_DIR%bin\%TEST_NAME%.apk" --output-directory="%XHARNESS_OUT%" --timeout=1800 %__AdditionalArgs% + +SET EXIT_CODE=%ERRORLEVEL% + +ECHO XHarness artifacts: %XHARNESS_OUT% + +RMDIR /Q androidtests.lock 2>NUL +EXIT /B %EXIT_CODE% + +:: ========== FUNCTIONS ========== +:NORMALIZEPATH + SET RETVAL=%~f1 + EXIT /B diff --git a/eng/testing/BrowserVersions.props b/eng/testing/BrowserVersions.props index 73c23aa9e16b..67c9c92ea6a2 100644 --- a/eng/testing/BrowserVersions.props +++ b/eng/testing/BrowserVersions.props @@ -1,13 +1,13 @@ - 133.0.6943.53 - 1402768 - https://storage.googleapis.com/chromium-browser-snapshots/Linux_x64/1402768 - 13.3.415 - 133.0.6943.35 - 1402768 - https://storage.googleapis.com/chromium-browser-snapshots/Win_x64/1402771 - 13.3.415 + 137.0.7151.55 + 1453031 + https://storage.googleapis.com/chromium-browser-snapshots/Linux_x64/1453031 + 13.7.152 + 137.0.7151.41 + 1453031 + https://storage.googleapis.com/chromium-browser-snapshots/Win_x64/1453032 + 13.7.152 125.0.1 0.34.0 125.0.1 diff --git a/eng/testing/linker/project.csproj.template b/eng/testing/linker/project.csproj.template index 003999b221b7..7498d355dfce 100644 --- a/eng/testing/linker/project.csproj.template +++ b/eng/testing/linker/project.csproj.template @@ -25,8 +25,6 @@ {NetCoreAppMaximumVersion} {UseMonoRuntime} {RuntimeIdentifier} - {AppHostSourcePath} - {SingleFileHostSourcePath} true @@ -48,20 +46,22 @@ {ProductVersion} {NetCoreAppCurrent} {NetCoreAppCurrentVersion} + {LocalAppHostPath} + {LocalSingleFileHostPath} {MicrosoftNetCoreAppFrameworkName} {MicrosoftNetCoreAppRefPackDir} {MicrosoftNetCoreAppRuntimePackDir} + {MicrosoftNetCoreAppRuntimePackRidLibTfmDir} + {MicrosoftNetCoreAppRuntimePackNativeDir} + {CoreCLRILCompilerDir} + {CoreCLRCrossILCompilerDir} + {CoreCLRAotSdkDir} {RepositoryEngineeringDir} - <_ExtraTrimmerArgs>{ExtraTrimmerArgs} $(_ExtraTrimmerArgs) + <_ExtraTrimmerArgs>{ExtraTrimmerArgs} $(_ExtraTrimmerArgs) --dump-dependencies {AdditionalProperties} - {IlcToolsPath} - {IlcBuildTasksPath} - {IlcSdkPath} - {IlcFrameworkPath} - {IlcFrameworkNativePath} {CoreCLRBuildIntegrationDir} diff --git a/eng/testing/linker/trimmingTests.targets b/eng/testing/linker/trimmingTests.targets index e0f3cdc0b205..8e6ca49ece3c 100644 --- a/eng/testing/linker/trimmingTests.targets +++ b/eng/testing/linker/trimmingTests.targets @@ -21,8 +21,8 @@ - $([MSBuild]::NormalizeDirectory('$(TrimmingTestProjectsDir)', '$(MSBuildProjectName)', '%(Filename)', '$(PackageRID)')) - $(PackageRID) + $([MSBuild]::NormalizeDirectory('$(TrimmingTestProjectsDir)', '$(MSBuildProjectName)', '%(Filename)', '$(TargetRid)')) + $(TargetRid) $(NetCoreAppCurrent) $(NetCoreAppCurrent)-%(TestConsoleAppSourceFiles.TargetOS) @@ -36,8 +36,8 @@ - - <_SkippedAppSourceFiles Include="@(TestConsoleAppSourceFiles)" Condition="$([System.String]::Copy('%(TestConsoleAppSourceFiles.SkipOnTestRuntimes)').Contains('$(PackageRID)'))" /> + + <_SkippedAppSourceFiles Include="@(TestConsoleAppSourceFiles)" Condition="$([System.String]::Copy('%(TestConsoleAppSourceFiles.SkipOnTestRuntimes)').Contains('$(TargetRid)'))" /> <_SkippedAppSourceFiles Include="@(TestConsoleAppSourceFiles)" Condition="'$(RunNativeAotTestApps)' == 'true' and '%(TestConsoleAppSourceFiles.NativeAotIncompatible)' == 'true'" /> @@ -100,11 +100,6 @@ .Replace('{ExtraTrimmerArgs}', '%(TestConsoleApps.ExtraTrimmerArgs)') .Replace('{AdditionalProperties}', '$(_additionalPropertiesString)') .Replace('{ToolsILLinkDir}', '$(ToolsILLinkDir)') - .Replace('{IlcToolsPath}', '$(IlcToolsPath)') - .Replace('{IlcBuildTasksPath}', '$(CoreCLRILCompilerDir)netstandard/ILCompiler.Build.Tasks.dll') - .Replace('{IlcSdkPath}', '$(CoreCLRAotSdkDir)') - .Replace('{IlcFrameworkPath}', '$(MicrosoftNetCoreAppRuntimePackRidLibTfmDir)') - .Replace('{IlcFrameworkNativePath}', '$(MicrosoftNetCoreAppRuntimePackNativeDir)') .Replace('{CoreCLRBuildIntegrationDir}', '$(CoreCLRBuildIntegrationDir)') .Replace('{RuntimeHostConfigurationOptions}', '$(_runtimeHostConfigurationOptionsString)') .Replace('{AdditionalProjectReferences}', '$(_additionalProjectReferencesString)') @@ -125,9 +120,14 @@ .Replace('{MicrosoftNetCoreAppFrameworkName}', '$(MicrosoftNetCoreAppFrameworkName)') .Replace('{MicrosoftNetCoreAppRefPackDir}', '$(MicrosoftNetCoreAppRefPackDir)') .Replace('{MicrosoftNetCoreAppRuntimePackDir}', '$(MicrosoftNetCoreAppRuntimePackDir)') + .Replace('{MicrosoftNetCoreAppRuntimePackRidLibTfmDir}', '$(MicrosoftNetCoreAppRuntimePackRidLibTfmDir)') + .Replace('{MicrosoftNetCoreAppRuntimePackNativeDir}', '$(MicrosoftNetCoreAppRuntimePackNativeDir)') + .Replace('{CoreCLRILCompilerDir}', '$(CoreCLRILCompilerDir)') + .Replace('{CoreCLRCrossILCompilerDir}', '$(CoreCLRCrossILCompilerDir)') + .Replace('{CoreCLRAotSdkDir}', '$(CoreCLRAotSdkDir)') .Replace('{NativeSanitizersTargets}', '$(RepositoryEngineeringDir)nativeSanitizers.targets') - .Replace('{AppHostSourcePath}', '$(AppHostSourcePath)') - .Replace('{SingleFileHostSourcePath}', '$(SingleFileHostSourcePath)') + .Replace('{LocalAppHostPath}', '$(LocalAppHostPath)') + .Replace('{LocalSingleFileHostPath}', '$(LocalSingleFileHostPath)') .Replace('{SanitizerRuntimeFolder}', '$(DotNetHostBinDir)'))" Overwrite="true" /> + + + + rmdir "%(TestConsoleApps.ProjectDir.TrimEnd('\'))" /s /q + rm -rf "%(TestConsoleApps.ProjectDir)" + + diff --git a/eng/testing/performance/android_scenarios.proj b/eng/testing/performance/android_scenarios.proj index 375307a46fa4..4d562a70e757 100644 --- a/eng/testing/performance/android_scenarios.proj +++ b/eng/testing/performance/android_scenarios.proj @@ -1,8 +1,6 @@ true - 1.0.0-prerelease.21566.2 - %HELIX_CORRELATION_PAYLOAD%\microsoft.dotnet.xharness.cli\$(MicrosoftDotNetXHarnessCLIVersion)\tools\net6.0\any\Microsoft.DotNet.XHarness.CLI.dll python3 @@ -35,24 +33,36 @@ $(Python) test.py sod --scenario-name "%(Identity)" $(ScenarioArgs) $(Python) post.py - + + $(WorkItemDirectory) + cd $(ScenarioDirectory)helloandroid;copy %HELIX_WORKITEM_ROOT%\HelloAndroid.apk .;$(Python) pre.py --apk-name HelloAndroid.apk + $(Python) test.py devicestartup --device-type android --package-path pub\HelloAndroid.apk --package-name net.dot.HelloAndroid --scenario-name "%(Identity)" $(ScenarioArgs) + $(Python) post.py + + + $(WorkItemDirectory) + cd $(ScenarioDirectory)helloandroid;mkdir traces;copy %HELIX_WORKITEM_ROOT%\androidHelloWorldBinlog\msbuild.binlog traces + $(Python) test.py buildtime --scenario-name "%(Identity)" $(ScenarioArgs) --binlog-path msbuild.binlog + $(Python) post.py + + diff --git a/eng/testing/performance/ios_scenarios.proj b/eng/testing/performance/ios_scenarios.proj index 737e09d10067..d72eda864fc1 100644 --- a/eng/testing/performance/ios_scenarios.proj +++ b/eng/testing/performance/ios_scenarios.proj @@ -71,5 +71,11 @@ ]]> + + $(WorkItemDirectory) + cd $(ScenarioDirectory)helloios;mkdir traces;cp -v $HELIX_CORRELATION_PAYLOAD/iosHelloWorldBinlog/msbuild.binlog traces + $(Python) test.py buildtime --scenario-name "%(Identity)" $(ScenarioArgs) --binlog-path ./msbuild.binlog + $(Python) post.py +
diff --git a/eng/testing/scenarios/BuildWasmAppsJobsList.txt b/eng/testing/scenarios/BuildWasmAppsJobsList.txt index 8df77ea901da..82c8b0c82796 100644 --- a/eng/testing/scenarios/BuildWasmAppsJobsList.txt +++ b/eng/testing/scenarios/BuildWasmAppsJobsList.txt @@ -15,6 +15,7 @@ Wasm.Build.Tests.Blazor.NativeTests Wasm.Build.Tests.Blazor.NoopNativeRebuildTest Wasm.Build.Tests.Blazor.WorkloadRequiredTests Wasm.Build.Tests.Blazor.SignalRClientTests +Wasm.Build.Tests.Blazor.EventPipeDiagnosticsTests Wasm.Build.Tests.BuildPublishTests Wasm.Build.Tests.ConfigSrcTests Wasm.Build.Tests.DllImportTests @@ -46,3 +47,7 @@ Wasm.Build.Tests.WasmTemplateTests Wasm.Build.Tests.WorkloadTests Wasm.Build.Tests.MT.Blazor.SimpleMultiThreadedTests Wasm.Build.Tests.DebugLevelTests +Wasm.Build.Tests.PreloadingTests +Wasm.Build.Tests.EnvVariablesTests +Wasm.Build.Tests.HttpTests +Wasm.Build.Tests.DiagnosticsTests diff --git a/eng/testing/tests.android.targets b/eng/testing/tests.android.targets index 3c3eb46fa28e..4144799c78f9 100644 --- a/eng/testing/tests.android.targets +++ b/eng/testing/tests.android.targets @@ -3,7 +3,7 @@ $(BundleTestAppTargets);BundleTestAndroidApp - + @@ -12,6 +12,10 @@ AndroidBuild + + $(DefineConstants);SINGLE_FILE_TEST_RUNNER + + @@ -22,7 +26,7 @@ AndroidTestRunner.dll - + $(PublishDir) $(BundleDir) @@ -44,7 +48,7 @@ <_InternalForceInterpret>true <_IsNative>true - + <_PublishAssemblies Include="$(PublishDir)\**\*.dll" Exclude="$(PublishDir)\**\*.resources.dll" /> <_SatelliteAssemblies Include="$(PublishDir)\**\*.resources.dll" /> @@ -73,4 +77,4 @@ - \ No newline at end of file + diff --git a/eng/testing/tests.browser.targets b/eng/testing/tests.browser.targets index d591b8a608be..4264bde6d019 100644 --- a/eng/testing/tests.browser.targets +++ b/eng/testing/tests.browser.targets @@ -13,9 +13,10 @@ - false + true + false true <_WasmMainJSFileName Condition="'$(WasmMainJSPath)' != ''">$([System.IO.Path]::GetFileName('$(WasmMainJSPath)')) diff --git a/eng/testing/tests.props b/eng/testing/tests.props index 9cc17e209ae0..ad0f25da5fa0 100644 --- a/eng/testing/tests.props +++ b/eng/testing/tests.props @@ -21,7 +21,7 @@ <_WasmTestRunnerTFM Condition="'$(TargetsBrowser)' == 'true'">$(NetCoreAppCurrent)-browser $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'WasmTestRunner', '$(Configuration)', '$(_WasmTestRunnerTFM)')) - $(OutputRID) + $(TargetRid) true diff --git a/eng/testing/tests.singlefile.targets b/eng/testing/tests.singlefile.targets index e6e578aa2494..f42009ad653c 100644 --- a/eng/testing/tests.singlefile.targets +++ b/eng/testing/tests.singlefile.targets @@ -4,7 +4,7 @@ $([MSBuild]::NormalizeDirectory('$(OutDir)', 'publish')) $([MSBuild]::NormalizePath('$(BundleDir)', '$(RunScriptOutputName)')) - $(OutputRID) + $(TargetRid) <_OutputExeExt Condition="'$(TargetsBrowser)' == 'true'">.js <_OutputExeExt Condition="'$(TargetsWasi)' == 'true'">.wasm @@ -27,12 +27,6 @@ - $(CoreCLRCrossILCompilerDir) - $(ROOTFS_DIR) - $(CoreCLRILCompilerDir)netstandard/ILCompiler.Build.Tasks.dll - $(CoreCLRAotSdkDir) - $(NetCoreAppCurrentTestHostSharedFrameworkPath) - $(NetCoreAppCurrentTestHostSharedFrameworkPath) module compiled with /GL found $(NoWarn);IL1005;IL2122;IL3000;IL3001;IL3002;IL3003;IL3050;IL3051;IL3052;IL3053 partial @@ -55,10 +49,6 @@ - - - - @@ -106,7 +96,7 @@ BeforeTargets="_PrepareForReadyToRunCompilation"> $(ArtifactsObjDir)Microsoft.NETCore.App.Bundle/ - $(ArtifactsNetCoreAppBundlePath)$(Configuration)/$(NetCoreAppCurrent)/$(OutputRID)/output/ + $(ArtifactsNetCoreAppBundlePath)$(Configuration)/$(NetCoreAppCurrent)/$(TargetRid)/output/ $(ArtifactsNetCoreAppBundlePath)shared/$(MicrosoftNetCoreAppFrameworkName)/$(PackageVersion)/ diff --git a/eng/testing/tests.targets b/eng/testing/tests.targets index 5eb33b12baa1..bff61813b076 100644 --- a/eng/testing/tests.targets +++ b/eng/testing/tests.targets @@ -1,14 +1,15 @@ - true - false + true + false RunnerTemplate.cmd RunnerTemplate.sh AppleHelixRunnerTemplate.sh AppleRunnerTemplate.sh - AndroidRunnerTemplate.sh + AndroidRunnerTemplate.sh + AndroidRunnerTemplate.cmd WasiRunnerTemplate.sh WasiRunnerTemplate.cmd WasmRunnerTemplate.sh @@ -172,7 +173,8 @@ TEST_ARCH=$(_AndroidArchitecture) "$(RunScriptOutputPath)" $(AssemblyName) $(TargetArchitecture) $(TargetOS) $(TestProjectName) $(RunTestsCommand) $(Configuration) $(AdditionalXHarnessArguments) - $(RunTestsCommand) $(AdditionalXHarnessArguments) + $(RunTestsCommand) $(AdditionalXHarnessArguments) + $(RunTestsCommand) $(RepoRoot) $(AdditionalXHarnessArguments) "$(RunScriptOutputPath)" $(JSEngine) $(AssemblyName).dll $(Scenario) diff --git a/eng/testing/tests.wasm.targets b/eng/testing/tests.wasm.targets index 65ab8ed35de7..0ff52cc4bff4 100644 --- a/eng/testing/tests.wasm.targets +++ b/eng/testing/tests.wasm.targets @@ -59,8 +59,8 @@ So, set those parameters explicitly here. --> - <_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' == 'true' and '$(RunAOTCompilation)' == 'true'">$(_ExtraTrimmerArgs) --substitutions "$(BrowserProjectRoot)build\ILLink.Substitutions.WasmIntrinsics.xml" - <_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' != 'true'">$(_ExtraTrimmerArgs) --substitutions "$(BrowserProjectRoot)build\ILLink.Substitutions.NoWasmIntrinsics.xml" + <_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' != 'false'">$(_ExtraTrimmerArgs) --substitutions "$(BrowserProjectRoot)build\ILLink.Substitutions.WasmIntrinsics.xml" + <_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' == 'false'">$(_ExtraTrimmerArgs) --substitutions "$(BrowserProjectRoot)build\ILLink.Substitutions.NoWasmIntrinsics.xml" @@ -84,10 +84,12 @@ - + - + + + <_WasmPropertyNames Include="WasmLinkIcalls" /> <_WasmPropertyNames Include="WasmNativeStrip" /> + <_WasmPropertyNames Include="WasmNativeDebugSymbols" /> <_WasmPropertyNames Include="_WasmDevel" /> <_WasmPropertyNames Include="_WasmStrictVersionMatch" /> <_WasmPropertyNames Include="WasmEmitSymbolMap" /> diff --git a/eng/toolAot.targets b/eng/toolAot.targets index 494c4cbf1176..919898404078 100644 --- a/eng/toolAot.targets +++ b/eng/toolAot.targets @@ -12,19 +12,19 @@ true - - - true + + <_TrueTargetRid>$(RuntimeIdentifier) + + $(BaseRid) + + + + $(_TrueTargetRid) - - - - - - - - AnyCPU - - - false - - - - + amd64 $(TargetArchitecture) diff --git a/src/coreclr/.nuget/Directory.Build.targets b/src/coreclr/.nuget/Directory.Build.targets index 379fbd65030b..4c103828fc45 100644 --- a/src/coreclr/.nuget/Directory.Build.targets +++ b/src/coreclr/.nuget/Directory.Build.targets @@ -4,7 +4,7 @@ $(ProductVersion) - $(PackageVersion) + $(PackageVersion) diff --git a/src/coreclr/.nuget/Microsoft.CrossOsDiag.Private.CoreCLR/Microsoft.CrossOsDiag.Private.CoreCLR.proj b/src/coreclr/.nuget/Microsoft.CrossOsDiag.Private.CoreCLR/Microsoft.CrossOsDiag.Private.CoreCLR.proj index 4fbe217f4206..a9e84b88cfdb 100644 --- a/src/coreclr/.nuget/Microsoft.CrossOsDiag.Private.CoreCLR/Microsoft.CrossOsDiag.Private.CoreCLR.proj +++ b/src/coreclr/.nuget/Microsoft.CrossOsDiag.Private.CoreCLR/Microsoft.CrossOsDiag.Private.CoreCLR.proj @@ -8,7 +8,7 @@ In dotnet/runtime's official build, we'll extract the runtime packs to a specific directory and pass that here. In the VMR, we'll restore the runtime packs as NuGet packages. --> - + $(NuGetPackageRoot) @@ -24,7 +24,7 @@ - +
@@ -32,7 +32,8 @@ <_CrossOSDacProject Include="@(SupportedRid->'$(RepoRoot)Build.proj')" AdditionalProperties="TargetOS=%(TargetOS); TargetArchitecture=%(TargetArchitecture); - Subset=linuxdac+alpinedac" /> + Subset=linuxdac+alpinedac; + BuildHostTools=false" /> <_RuntimePrereqsProject Include="$(CoreClrProjectRoot)runtime-prereqs.proj" /> diff --git a/src/coreclr/.nuget/builds.targets b/src/coreclr/.nuget/builds.targets index 2e903bf16bdd..d12090e92b95 100644 --- a/src/coreclr/.nuget/builds.targets +++ b/src/coreclr/.nuget/builds.targets @@ -7,11 +7,11 @@ - + - <_projectsToBuild Include="@(Project)" Condition="'%(Project.PackageTargetRuntime)' == '$(OutputRID)'" /> + <_projectsToBuild Include="@(Project)" Condition="'%(Project.PackageTargetRuntime)' == '$(TargetRid)'" /> diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index c5ae458c0dec..1d88c4942868 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -29,12 +29,20 @@ if(CORECLR_SET_RPATH) set(MACOSX_RPATH ON) endif(CORECLR_SET_RPATH) +<<<<<<< HEAD if(CLR_CMAKE_HOST_MACCATALYST OR CLR_CMAKE_HOST_IOS OR CLR_CMAKE_HOST_TVOS OR CLR_CMAKE_TARGET_ARCH_WASM) +======= +if(CLR_CMAKE_HOST_MACCATALYST OR CLR_CMAKE_HOST_IOS OR CLR_CMAKE_HOST_TVOS OR CLR_CMAKE_HOST_BROWSER OR CLR_CMAKE_HOST_ANDROID) +>>>>>>> upstream-jun set(FEATURE_STANDALONE_GC 0) endif() OPTION(CLR_CMAKE_ENABLE_CODE_COVERAGE "Enable code coverage" OFF) +if (DEFINED CLR_CMAKE_ICU_DIR) + include_directories(${CLR_CMAKE_ICU_DIR}/include) +endif(DEFINED CLR_CMAKE_ICU_DIR) + #---------------------------------------------------- # Cross target Component build specific configuration #---------------------------------------------------- @@ -103,9 +111,8 @@ if(CLR_CMAKE_HOST_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM) add_linker_flag(-Wl,-z,notext) endif() - if(NOT CLR_CMAKE_HOST_TVOS) - add_subdirectory(pal) - endif() + add_subdirectory(pal) + if(NOT CLR_CMAKE_HOST_MACCATALYST AND NOT CLR_CMAKE_HOST_IOS AND NOT CLR_CMAKE_HOST_TVOS) add_subdirectory(hosts) endif() @@ -134,10 +141,14 @@ if(NOT CLR_CMAKE_TARGET_ARCH_WASM) add_subdirectory(${CLR_SRC_NATIVE_DIR}/eventpipe eventpipe) add_subdirectory(${CLR_SRC_NATIVE_DIR}/minipal shared_minipal) +<<<<<<< HEAD if(NOT CLR_CMAKE_HOST_TVOS) add_subdirectory(debug/debug-pal) endif() endif(NOT CLR_CMAKE_TARGET_ARCH_WASM) +======= +add_subdirectory(debug/debug-pal) +>>>>>>> upstream-jun add_subdirectory(minipal) @@ -152,10 +163,16 @@ endif() include_directories("pal/prebuilt/inc") include_directories(${CLR_ARTIFACTS_OBJ_DIR}) +<<<<<<< HEAD if(NOT CLR_CMAKE_TARGET_ARCH_WASM) add_subdirectory(tools/aot/jitinterface) endif(NOT CLR_CMAKE_TARGET_ARCH_WASM) +======= +if (NOT CLR_CMAKE_TARGET_BROWSER) + add_subdirectory(tools/aot/jitinterface) +endif (NOT CLR_CMAKE_TARGET_BROWSER) +>>>>>>> upstream-jun if(NOT CLR_CROSS_COMPONENTS_BUILD) # NativeAOT only buildable for a subset of CoreCLR-supported configurations @@ -178,9 +195,17 @@ endif(CLR_CMAKE_HOST_WIN32) #---------------------------------- include(clrdefinitions.cmake) +<<<<<<< HEAD if(NOT CLR_CMAKE_TARGET_ARCH_WASM) add_subdirectory(gc) endif(NOT CLR_CMAKE_TARGET_ARCH_WASM) +======= +if(FEATURE_STANDALONE_GC) + add_definitions(-DFEATURE_STANDALONE_GC) +endif(FEATURE_STANDALONE_GC) + +add_subdirectory(gc) +>>>>>>> upstream-jun if (CLR_CMAKE_HOST_UNIX) include_directories("pal/inc") @@ -202,6 +227,10 @@ include_directories("${GENERATED_INCLUDE_DIR}") include_directories("hosts/inc") include_directories("minipal") +if(FEATURE_INTERPRETER) +add_subdirectory(interpreter) +endif() + if(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE) include_directories("${GENERATED_INCLUDE_DIR}/etw") endif(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE) @@ -225,7 +254,13 @@ if(CLR_CMAKE_HOST_UNIX) set (NATIVE_RESOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/nativeresources) include_directories(${NATIVE_RESOURCE_DIR}) - set (PROCESS_RC_SCRIPT ${NATIVE_RESOURCE_DIR}/processrc.sh) + + # Win32 may be false when cross compiling + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + set (PROCESS_RC_SCRIPT ${NATIVE_RESOURCE_DIR}/processrc.ps1) + else() + set (PROCESS_RC_SCRIPT ${NATIVE_RESOURCE_DIR}/processrc.sh) + endif() set (RESOURCE_STRING_HEADER_DIR ${NATIVE_RESOURCE_DIR}) # Create a command to create a C++ source file containing an array of @@ -240,37 +275,61 @@ if(CLR_CMAKE_HOST_UNIX) set(RESOURCE_ENTRY_ARRAY_CPP ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.cpp) - add_custom_command( - OUTPUT ${RESOURCE_ENTRY_ARRAY_CPP} - # Convert the preprocessed .rc file to a C++ file which will be used to make a static lib. - COMMAND ${PROCESS_RC_SCRIPT} ${PREPROCESSED_SOURCE} ${TARGET_NAME} >${RESOURCE_ENTRY_ARRAY_CPP} - DEPENDS ${PREPROCESSED_SOURCE} ${PROCESS_RC_SCRIPT} - ) + # Win32 may be false when cross compiling + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + add_custom_command( + OUTPUT ${RESOURCE_ENTRY_ARRAY_CPP} + # Convert the preprocessed .rc file to a C++ file which will be used to make a static lib. + COMMAND powershell -NoProfile -ExecutionPolicy ByPass -File \"${PROCESS_RC_SCRIPT}\" ${PREPROCESSED_SOURCE} ${TARGET_NAME} >${RESOURCE_ENTRY_ARRAY_CPP} + DEPENDS ${PREPROCESSED_SOURCE} ${PROCESS_RC_SCRIPT} + ) + else() + add_custom_command( + OUTPUT ${RESOURCE_ENTRY_ARRAY_CPP} + # Convert the preprocessed .rc file to a C++ file which will be used to make a static lib. + COMMAND ${PROCESS_RC_SCRIPT} ${PREPROCESSED_SOURCE} ${TARGET_NAME} >${RESOURCE_ENTRY_ARRAY_CPP} + DEPENDS ${PREPROCESSED_SOURCE} ${PROCESS_RC_SCRIPT} + ) + endif() include_directories(${RESOURCE_STRING_HEADER_DIR}) set(${TARGET_FILE} ${RESOURCE_ENTRY_ARRAY_CPP} PARENT_SCOPE) - endfunction() add_subdirectory(nativeresources) endif(CLR_CMAKE_HOST_UNIX) +<<<<<<< HEAD if(NOT CLR_CMAKE_HOST_TVOS AND NOT CLR_CMAKE_TARGET_ARCH_WASM) add_subdirectory(utilcode) add_subdirectory(inc) +======= +add_subdirectory(utilcode) +add_subdirectory(inc) +>>>>>>> upstream-jun - if (CLR_CMAKE_BUILD_TOOLS) - add_subdirectory(ilasm) - add_subdirectory(ildasm) - endif(CLR_CMAKE_BUILD_TOOLS) - add_subdirectory(gcinfo) +if (CLR_CMAKE_BUILD_TOOLS) + add_subdirectory(ilasm) + add_subdirectory(ildasm) +endif(CLR_CMAKE_BUILD_TOOLS) +add_subdirectory(gcinfo) + +if (NOT CLR_CMAKE_TARGET_ARCH_WASM) add_subdirectory(jit) - add_subdirectory(vm) - add_subdirectory(md) +endif() + +add_subdirectory(vm) +add_subdirectory(md) + +if (NOT CLR_CMAKE_TARGET_ARCH_WASM) add_subdirectory(debug) - add_subdirectory(binder) - add_subdirectory(classlibnative) - add_subdirectory(dlls) +endif() + +add_subdirectory(binder) +add_subdirectory(classlibnative) +add_subdirectory(dlls) + +if (NOT CLR_CMAKE_TARGET_ARCH_WASM) add_subdirectory(unwinder) add_subdirectory(interop) endif() diff --git a/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.csproj index b77d7f3f44a1..07861b33acb6 100644 --- a/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -11,7 +11,7 @@ $(RuntimeBinDir)IL\ Debug;Release;Checked - x64;x86;arm;arm64 + x64;x86;arm;arm64;wasm true @@ -19,8 +19,9 @@ $(IntermediateOutputPath)ILLink.Descriptors.xml $(MSBuildThisFileDirectory)src\ILLink\ - true - true + true + true + true @@ -73,6 +74,9 @@ $(DefineConstants);TARGET_RISCV64 + + $(DefineConstants);TARGET_WASM + @@ -107,6 +111,10 @@ + + + Common\Internal\VersionResilientHashCode.cs + @@ -166,6 +174,7 @@ + @@ -196,6 +205,7 @@ + @@ -244,7 +254,8 @@ - + + @@ -276,7 +287,7 @@ - + @@ -309,30 +320,7 @@ - - - - - src\System\Diagnostics\Eventing\NativeRuntimeEventSource.Generated.cs - - + + - - - - - - <_PythonWarningParameter>-Wall - <_PythonWarningParameter Condition="'$(MSBuildTreatWarningsAsErrors)' == 'true'">$(_PythonWarningParameter) -Werror - <_EventingSourceFileDirectory>%(EventingSourceFile.RootDir)%(EventingSourceFile.Directory) - <_EventingSourceFileDirectory Condition="HasTrailingSlash('$(_EventingSourceFileDirectory)')">$(_EventingSourceFileDirectory.TrimEnd('\')) - - - - - - - - - diff --git a/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.sln b/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.sln deleted file mode 100644 index ccd486794b79..000000000000 --- a/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.sln +++ /dev/null @@ -1,492 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.11.35017.193 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib", "System.Private.CoreLib.csproj", "{3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}" -EndProject -Project("{D954291E-2A0B-460D-934E-DC6B0785DB48}") = "System.Private.CoreLib.Shared", "..\..\libraries\System.Private.CoreLib\src\System.Private.CoreLib.Shared.shproj", "{845C8B26-350B-4E63-BD11-2C8150444E28}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib.Generators", "..\..\libraries\System.Private.CoreLib\gen\System.Private.CoreLib.Generators.csproj", "{7196828B-5E00-4BC6-9A1E-492C948E41A3}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILLink.Tasks", "..\..\tools\illink\src\ILLink.Tasks\ILLink.Tasks.csproj", "{EE093971-5189-4438-84CF-EBC8C8AC7713}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{D47C8483-58B6-4669-A2CF-9EF1C26C3F48}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILLink.RoslynAnalyzer", "..\..\tools\illink\src\ILLink.RoslynAnalyzer\ILLink.RoslynAnalyzer.csproj", "{F4311FB3-2A1D-4309-AEAE-324373B56163}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILLink.CodeFixProvider", "..\..\tools\illink\src\ILLink.CodeFix\ILLink.CodeFixProvider.csproj", "{13D30D8B-8997-4D6C-B09B-BD2A7F238420}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mono.Linker", "..\..\tools\illink\src\linker\Mono.Linker.csproj", "{94712627-28B7-4757-8988-4259DC7E9F5F}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.DependencyAnalysisFramework", "..\tools\aot\ILCompiler.DependencyAnalysisFramework\ILCompiler.DependencyAnalysisFramework.csproj", "{FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{AC9B32A7-1C99-4915-83D9-7A47DD22B9B6}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{0D24A796-E606-41C0-BEB5-8621A7568E01}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{265BA810-289A-4BD6-8DEB-F896F8011365}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mono.Linker", "..\..\tools\illink\src\linker\ref\Mono.Linker.csproj", "{6574A353-18D4-4198-A2CE-12FF3CC5AA0A}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{520534D2-D466-499B-81BA-F005DEEB3BA9}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{4847C9F6-F653-4A8C-9150-65C250112D66}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LibraryImportGenerator", "..\..\libraries\System.Runtime.InteropServices\gen\LibraryImportGenerator\LibraryImportGenerator.csproj", "{C99433C1-ADDE-4077-8590-207ADC67ED41}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Interop.SourceGeneration", "..\..\libraries\System.Runtime.InteropServices\gen\Microsoft.Interop.SourceGeneration\Microsoft.Interop.SourceGeneration.csproj", "{04388B88-25F9-4CDA-AA6F-FB839A9C2A58}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{C0B7FE54-16F9-46A8-B9A7-7577A97F01C9}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib", "..\..\libraries\System.Private.CoreLib\ref\System.Private.CoreLib.csproj", "{58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}" -EndProject -Project("{D954291E-2A0B-460D-934E-DC6B0785DB48}") = "ILLink.Shared", "..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.shproj", "{FF598E93-8E9E-4091-9F50-61A7572663AE}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Checked|amd64 = Checked|amd64 - Checked|Any CPU = Checked|Any CPU - Checked|arm = Checked|arm - Checked|arm64 = Checked|arm64 - Checked|x64 = Checked|x64 - Checked|x86 = Checked|x86 - Debug|amd64 = Debug|amd64 - Debug|Any CPU = Debug|Any CPU - Debug|arm = Debug|arm - Debug|arm64 = Debug|arm64 - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|amd64 = Release|amd64 - Release|Any CPU = Release|Any CPU - Release|arm = Release|arm - Release|arm64 = Release|arm64 - Release|x64 = Release|x64 - Release|x86 = Release|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|amd64.ActiveCfg = Checked|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|amd64.Build.0 = Checked|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|Any CPU.ActiveCfg = Checked|x86 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|arm.ActiveCfg = Checked|arm - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|arm.Build.0 = Checked|arm - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|arm64.ActiveCfg = Checked|arm64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|arm64.Build.0 = Checked|arm64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|x64.ActiveCfg = Checked|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|x64.Build.0 = Checked|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|x86.ActiveCfg = Checked|x86 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Checked|x86.Build.0 = Checked|x86 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|amd64.ActiveCfg = Debug|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|amd64.Build.0 = Debug|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|Any CPU.ActiveCfg = Debug|x86 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|arm.ActiveCfg = Debug|arm - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|arm.Build.0 = Debug|arm - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|arm64.ActiveCfg = Debug|arm64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|arm64.Build.0 = Debug|arm64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|x64.ActiveCfg = Debug|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|x64.Build.0 = Debug|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|x86.ActiveCfg = Debug|x86 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Debug|x86.Build.0 = Debug|x86 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|amd64.ActiveCfg = Release|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|amd64.Build.0 = Release|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|Any CPU.ActiveCfg = Release|x86 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|arm.ActiveCfg = Release|arm - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|arm.Build.0 = Release|arm - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|arm64.ActiveCfg = Release|arm64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|arm64.Build.0 = Release|arm64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|x64.ActiveCfg = Release|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|x64.Build.0 = Release|x64 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|x86.ActiveCfg = Release|x86 - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9}.Release|x86.Build.0 = Release|x86 - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|amd64.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|amd64.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|Any CPU.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|arm.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|arm.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|arm64.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|arm64.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|x64.ActiveCfg = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|x64.Build.0 = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|x86.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Checked|x86.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|amd64.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|amd64.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|Any CPU.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|arm.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|arm.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|arm64.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|arm64.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|x64.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|x64.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|x86.ActiveCfg = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Debug|x86.Build.0 = Debug|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|amd64.ActiveCfg = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|amd64.Build.0 = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|Any CPU.ActiveCfg = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|Any CPU.Build.0 = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|arm.ActiveCfg = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|arm.Build.0 = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|arm64.ActiveCfg = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|arm64.Build.0 = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|x64.ActiveCfg = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|x64.Build.0 = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|x86.ActiveCfg = Release|Any CPU - {7196828B-5E00-4BC6-9A1E-492C948E41A3}.Release|x86.Build.0 = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|amd64.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|amd64.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|Any CPU.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|arm.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|arm.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|arm64.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|arm64.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|x64.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|x64.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|x86.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Checked|x86.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|amd64.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|amd64.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|Any CPU.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|arm.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|arm.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|arm64.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|arm64.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|x64.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|x64.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|x86.ActiveCfg = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Debug|x86.Build.0 = Debug|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|amd64.ActiveCfg = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|amd64.Build.0 = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|Any CPU.ActiveCfg = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|Any CPU.Build.0 = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|arm.ActiveCfg = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|arm.Build.0 = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|arm64.ActiveCfg = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|arm64.Build.0 = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|x64.ActiveCfg = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|x64.Build.0 = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|x86.ActiveCfg = Release|Any CPU - {EE093971-5189-4438-84CF-EBC8C8AC7713}.Release|x86.Build.0 = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|amd64.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|amd64.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|Any CPU.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|arm.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|arm.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|arm64.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|arm64.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|x64.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|x64.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|x86.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Checked|x86.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|amd64.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|amd64.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|Any CPU.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|arm.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|arm.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|arm64.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|arm64.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|x64.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|x64.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|x86.ActiveCfg = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Debug|x86.Build.0 = Debug|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|amd64.ActiveCfg = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|amd64.Build.0 = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|Any CPU.ActiveCfg = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|Any CPU.Build.0 = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|arm.ActiveCfg = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|arm.Build.0 = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|arm64.ActiveCfg = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|arm64.Build.0 = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|x64.ActiveCfg = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|x64.Build.0 = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|x86.ActiveCfg = Release|Any CPU - {F4311FB3-2A1D-4309-AEAE-324373B56163}.Release|x86.Build.0 = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|amd64.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|amd64.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|Any CPU.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|arm.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|arm.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|arm64.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|arm64.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|x64.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|x64.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|x86.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Checked|x86.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|amd64.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|amd64.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|Any CPU.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|arm.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|arm.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|arm64.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|arm64.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|x64.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|x64.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|x86.ActiveCfg = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Debug|x86.Build.0 = Debug|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|amd64.ActiveCfg = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|amd64.Build.0 = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|Any CPU.ActiveCfg = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|Any CPU.Build.0 = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|arm.ActiveCfg = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|arm.Build.0 = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|arm64.ActiveCfg = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|arm64.Build.0 = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|x64.ActiveCfg = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|x64.Build.0 = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|x86.ActiveCfg = Release|Any CPU - {13D30D8B-8997-4D6C-B09B-BD2A7F238420}.Release|x86.Build.0 = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|amd64.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|amd64.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|Any CPU.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|arm.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|arm.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|arm64.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|arm64.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|x64.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|x64.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|x86.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Checked|x86.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|amd64.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|amd64.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|Any CPU.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|arm.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|arm.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|arm64.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|arm64.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|x64.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|x64.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|x86.ActiveCfg = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Debug|x86.Build.0 = Debug|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|amd64.ActiveCfg = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|amd64.Build.0 = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|Any CPU.ActiveCfg = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|Any CPU.Build.0 = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|arm.ActiveCfg = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|arm.Build.0 = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|arm64.ActiveCfg = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|arm64.Build.0 = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|x64.ActiveCfg = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|x64.Build.0 = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|x86.ActiveCfg = Release|Any CPU - {94712627-28B7-4757-8988-4259DC7E9F5F}.Release|x86.Build.0 = Release|Any CPU - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|amd64.ActiveCfg = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|amd64.Build.0 = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|Any CPU.ActiveCfg = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|Any CPU.Build.0 = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|arm.ActiveCfg = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|arm.Build.0 = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|arm64.ActiveCfg = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|arm64.Build.0 = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|x64.ActiveCfg = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|x64.Build.0 = Checked|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|x86.ActiveCfg = Checked|x86 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Checked|x86.Build.0 = Checked|x86 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|amd64.ActiveCfg = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|amd64.Build.0 = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|Any CPU.ActiveCfg = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|Any CPU.Build.0 = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|arm.ActiveCfg = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|arm.Build.0 = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|arm64.ActiveCfg = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|arm64.Build.0 = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|x64.ActiveCfg = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|x64.Build.0 = Debug|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|x86.ActiveCfg = Debug|x86 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Debug|x86.Build.0 = Debug|x86 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|amd64.ActiveCfg = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|amd64.Build.0 = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|Any CPU.ActiveCfg = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|Any CPU.Build.0 = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|arm.ActiveCfg = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|arm.Build.0 = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|arm64.ActiveCfg = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|arm64.Build.0 = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|x64.ActiveCfg = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|x64.Build.0 = Release|x64 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|x86.ActiveCfg = Release|x86 - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E}.Release|x86.Build.0 = Release|x86 - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|amd64.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|amd64.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|Any CPU.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|arm.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|arm.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|arm64.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|arm64.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|x64.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|x64.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|x86.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Checked|x86.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|amd64.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|amd64.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|Any CPU.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|arm.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|arm.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|arm64.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|arm64.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|x64.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|x64.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|x86.ActiveCfg = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Debug|x86.Build.0 = Debug|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|amd64.ActiveCfg = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|amd64.Build.0 = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|Any CPU.ActiveCfg = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|Any CPU.Build.0 = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|arm.ActiveCfg = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|arm.Build.0 = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|arm64.ActiveCfg = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|arm64.Build.0 = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|x64.ActiveCfg = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|x64.Build.0 = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|x86.ActiveCfg = Release|Any CPU - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A}.Release|x86.Build.0 = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|amd64.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|amd64.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|Any CPU.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|arm.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|arm.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|arm64.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|arm64.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|x64.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|x64.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|x86.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Checked|x86.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|amd64.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|amd64.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|Any CPU.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|arm.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|arm.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|arm64.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|arm64.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|x64.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|x64.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|x86.ActiveCfg = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Debug|x86.Build.0 = Debug|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|amd64.ActiveCfg = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|amd64.Build.0 = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|Any CPU.ActiveCfg = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|Any CPU.Build.0 = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|arm.ActiveCfg = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|arm.Build.0 = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|arm64.ActiveCfg = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|arm64.Build.0 = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|x64.ActiveCfg = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|x64.Build.0 = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|x86.ActiveCfg = Release|Any CPU - {C99433C1-ADDE-4077-8590-207ADC67ED41}.Release|x86.Build.0 = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|amd64.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|amd64.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|Any CPU.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|arm.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|arm.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|arm64.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|arm64.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|x64.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|x64.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|x86.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Checked|x86.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|amd64.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|amd64.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|Any CPU.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|arm.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|arm.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|arm64.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|arm64.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|x64.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|x64.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|x86.ActiveCfg = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Debug|x86.Build.0 = Debug|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|amd64.ActiveCfg = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|amd64.Build.0 = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|Any CPU.ActiveCfg = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|Any CPU.Build.0 = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|arm.ActiveCfg = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|arm.Build.0 = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|arm64.ActiveCfg = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|arm64.Build.0 = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|x64.ActiveCfg = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|x64.Build.0 = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|x86.ActiveCfg = Release|Any CPU - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58}.Release|x86.Build.0 = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|amd64.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|amd64.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|Any CPU.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|arm.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|arm.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|arm64.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|arm64.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|x64.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|x64.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|x86.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Checked|x86.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|amd64.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|amd64.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|Any CPU.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|arm.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|arm.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|arm64.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|arm64.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|x64.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|x64.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|x86.ActiveCfg = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Debug|x86.Build.0 = Debug|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|amd64.ActiveCfg = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|amd64.Build.0 = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|Any CPU.ActiveCfg = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|Any CPU.Build.0 = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|arm.ActiveCfg = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|arm.Build.0 = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|arm64.ActiveCfg = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|arm64.Build.0 = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|x64.ActiveCfg = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|x64.Build.0 = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|x86.ActiveCfg = Release|Any CPU - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF}.Release|x86.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(NestedProjects) = preSolution - {3DA06C3A-2E7B-4CB7-80ED-9B12916013F9} = {520534D2-D466-499B-81BA-F005DEEB3BA9} - {845C8B26-350B-4E63-BD11-2C8150444E28} = {520534D2-D466-499B-81BA-F005DEEB3BA9} - {7196828B-5E00-4BC6-9A1E-492C948E41A3} = {4847C9F6-F653-4A8C-9150-65C250112D66} - {EE093971-5189-4438-84CF-EBC8C8AC7713} = {0D24A796-E606-41C0-BEB5-8621A7568E01} - {F4311FB3-2A1D-4309-AEAE-324373B56163} = {265BA810-289A-4BD6-8DEB-F896F8011365} - {13D30D8B-8997-4D6C-B09B-BD2A7F238420} = {265BA810-289A-4BD6-8DEB-F896F8011365} - {94712627-28B7-4757-8988-4259DC7E9F5F} = {0D24A796-E606-41C0-BEB5-8621A7568E01} - {FCF6BB26-EBD8-40FA-B33A-02E86D543C2E} = {0D24A796-E606-41C0-BEB5-8621A7568E01} - {AC9B32A7-1C99-4915-83D9-7A47DD22B9B6} = {D47C8483-58B6-4669-A2CF-9EF1C26C3F48} - {0D24A796-E606-41C0-BEB5-8621A7568E01} = {D47C8483-58B6-4669-A2CF-9EF1C26C3F48} - {265BA810-289A-4BD6-8DEB-F896F8011365} = {D47C8483-58B6-4669-A2CF-9EF1C26C3F48} - {6574A353-18D4-4198-A2CE-12FF3CC5AA0A} = {AC9B32A7-1C99-4915-83D9-7A47DD22B9B6} - {C99433C1-ADDE-4077-8590-207ADC67ED41} = {4847C9F6-F653-4A8C-9150-65C250112D66} - {04388B88-25F9-4CDA-AA6F-FB839A9C2A58} = {4847C9F6-F653-4A8C-9150-65C250112D66} - {58A9924A-2C19-4BF1-8F22-B3E7F74F9BAF} = {C0B7FE54-16F9-46A8-B9A7-7577A97F01C9} - {FF598E93-8E9E-4091-9F50-61A7572663AE} = {0D24A796-E606-41C0-BEB5-8621A7568E01} - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {DA05075A-7CDA-4F65-AF6A-CB5DB6CF936F} - EndGlobalSection - GlobalSection(SharedMSBuildProjectFiles) = preSolution - ..\..\libraries\System.Private.CoreLib\src\System.Private.CoreLib.Shared.projitems*{3da06c3a-2e7b-4cb7-80ed-9b12916013f9}*SharedItemsImports = 5 - ..\..\libraries\System.Private.CoreLib\src\System.Private.CoreLib.Shared.projitems*{845c8b26-350b-4e63-bd11-2c8150444e28}*SharedItemsImports = 13 - ..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{94712627-28b7-4757-8988-4259dc7e9f5f}*SharedItemsImports = 5 - ..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{f4311fb3-2a1d-4309-aeae-324373b56163}*SharedItemsImports = 5 - ..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{ff598e93-8e9e-4091-9f50-61a7572663ae}*SharedItemsImports = 13 - EndGlobalSection -EndGlobal diff --git a/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.slnx b/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.slnx new file mode 100644 index 000000000000..1eab2c5f0e16 --- /dev/null +++ b/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.slnx @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/coreclr/System.Private.CoreLib/src/Internal/VersionResilientHashCode.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/Internal/VersionResilientHashCode.CoreCLR.cs new file mode 100644 index 000000000000..5ecf25c5bc7a --- /dev/null +++ b/src/coreclr/System.Private.CoreLib/src/Internal/VersionResilientHashCode.CoreCLR.cs @@ -0,0 +1,78 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Reflection.Metadata; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Internal +{ + /// + /// Managed implementation of the version-resilient hash code algorithm. + /// + internal static partial class VersionResilientHashCode + { + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "VersionResilientHashCode_TypeHashCode")] + private static partial int TypeHashCode(QCallTypeHandle typeHandle); + + public static int TypeHashCode(RuntimeType type) + => TypeHashCode(new QCallTypeHandle(ref type)); + + /// + /// TypeName to hash + public static int TypeHashCode(TypeName type) + { + if (type.IsSimple || type.IsConstructedGenericType) + { + int hashcode = NameHashCode(type.IsNested ? string.Empty : type.Namespace, type.Name); + if (type.IsNested) + { + hashcode = NestedTypeHashCode(TypeHashCode(type.DeclaringType), hashcode); + } + if (type.IsConstructedGenericType) + { + return GenericInstanceHashCode(hashcode, type.GetGenericArguments()); + } + else + { + return hashcode; + } + } + + if (type.IsArray) + { + return ArrayTypeHashCode(TypeHashCode(type.GetElementType()), type.GetArrayRank()); + } + + if (type.IsPointer) + { + return PointerTypeHashCode(TypeHashCode(type.GetElementType())); + } + + if (type.IsByRef) + { + return ByrefTypeHashCode(TypeHashCode(type.GetElementType())); + } + + throw new NotImplementedException(); + } + + /// + /// CoreCLR ComputeGenericInstanceHashCode + /// + /// Base hash code + /// Instantiation to include in the hash + private static int GenericInstanceHashCode(int hashcode, ReadOnlySpan instantiation) + { + for (int i = 0; i < instantiation.Length; i++) + { + int argumentHashCode = TypeHashCode(instantiation[i]); + hashcode = unchecked(hashcode + RotateLeft(hashcode, 13)) ^ argumentHashCode; + } + return unchecked(hashcode + RotateLeft(hashcode, 15)); + } + } +} diff --git a/src/coreclr/System.Private.CoreLib/src/System/Buffer.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Buffer.CoreCLR.cs index 2a375e74035f..69c7ee7b4abb 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Buffer.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Buffer.CoreCLR.cs @@ -10,13 +10,13 @@ namespace System public partial class Buffer { [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "Buffer_Clear")] - private static unsafe partial void __ZeroMemory(void* b, nuint byteLength); + private static unsafe partial void ZeroMemoryInternal(void* b, nuint byteLength); [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "Buffer_MemMove")] - private static unsafe partial void __Memmove(byte* dest, byte* src, nuint len); + private static unsafe partial void MemmoveInternal(byte* dest, byte* src, nuint len); [MethodImpl(MethodImplOptions.InternalCall)] - private static extern void __BulkMoveWithWriteBarrier(ref byte destination, ref byte source, nuint byteCount); + private static extern void BulkMoveWithWriteBarrierInternal(ref byte destination, ref byte source, nuint byteCount); // Used by ilmarshalers.cpp internal static unsafe void Memcpy(byte* dest, byte* src, int len) diff --git a/src/coreclr/System.Private.CoreLib/src/System/ComAwareWeakReference.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/ComAwareWeakReference.CoreCLR.cs index de9aeddf4030..a4694b9fab25 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/ComAwareWeakReference.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/ComAwareWeakReference.CoreCLR.cs @@ -10,46 +10,56 @@ namespace System internal sealed partial class ComAwareWeakReference { [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWeakRefToObject")] - private static partial void ComWeakRefToObject(IntPtr pComWeakRef, long wrapperId, ObjectHandleOnStack retRcw); + private static partial void ComWeakRefToObject(IntPtr pComWeakRef, ObjectHandleOnStack retRcw); - internal static object? ComWeakRefToObject(IntPtr pComWeakRef, long wrapperId) + internal static object? ComWeakRefToObject(IntPtr pComWeakRef, object? context) { - object? retRcw = null; - ComWeakRefToObject(pComWeakRef, wrapperId, ObjectHandleOnStack.Create(ref retRcw)); - return retRcw; +#if FEATURE_COMINTEROP + if (context is null) + { + // This wrapper was not created by ComWrappers, so we try to rehydrate using built-in COM. + object? retRcw = null; + ComWeakRefToObject(pComWeakRef, ObjectHandleOnStack.Create(ref retRcw)); + return retRcw; + } +#endif // FEATURE_COMINTEROP + + return ComWeakRefToComWrappersObject(pComWeakRef, context); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe bool PossiblyComObject(object target) + internal static bool PossiblyComObject(object target) { - // see: syncblk.h - const int IS_HASHCODE_BIT_NUMBER = 26; - const int BIT_SBLK_IS_HASHCODE = 1 << IS_HASHCODE_BIT_NUMBER; - const int BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX = 0x08000000; - - fixed (byte* pRawData = &target.GetRawData()) - { - // The header is 4 bytes before MT field on all architectures - int header = *(int*)(pRawData - sizeof(IntPtr) - sizeof(int)); - // common case: target does not have a syncblock, so there is no interop info - return (header & (BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX | BIT_SBLK_IS_HASHCODE)) == BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX; - } +#if FEATURE_COMINTEROP + return target is __ComObject || PossiblyComWrappersObject(target); +#else // !FEATURE_COMINTEROP + // If we are not using built-in COM, then we can only be a ComWrappers object. + return PossiblyComWrappersObject(target); +#endif // FEATURE_COMINTEROP } - [MethodImpl(MethodImplOptions.InternalCall)] - internal static extern bool HasInteropInfo(object target); - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ObjectToComWeakRef")] - private static partial IntPtr ObjectToComWeakRef(ObjectHandleOnStack retRcw, out long wrapperId); + private static partial IntPtr ObjectToComWeakRef(ObjectHandleOnStack retRcw); - internal static nint ObjectToComWeakRef(object target, out long wrapperId) + internal static nint ObjectToComWeakRef(object target, out object? context) { - if (HasInteropInfo(target)) +#if FEATURE_COMINTEROP + if (target is __ComObject) + { + // This object is using built-in COM, so use built-in COM to create the weak reference. + context = null; + return ObjectToComWeakRef(ObjectHandleOnStack.Create(ref target)); + } +#endif // FEATURE_COMINTEROP + + if (PossiblyComWrappersObject(target)) { - return ObjectToComWeakRef(ObjectHandleOnStack.Create(ref target), out wrapperId); + return ComWrappersObjectToComWeakRef(target, out context); } - wrapperId = 0; + // This object is not produced using built-in COM or ComWrappers + // or is an aggregated object, so we cannot create a weak reference. + context = null; return IntPtr.Zero; } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs b/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs index 9bd47568501a..cd39afd38c99 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs @@ -65,16 +65,6 @@ static void NotifyOfCrossThreadDependencySlow() [SuppressGCTransition] private static partial int IsManagedDebuggerAttached(); - // Constants representing the importance level of messages to be logged. - // - // An attached debugger can enable or disable which messages will - // actually be reported to the user through the debugger - // services API. This info is communicated to the runtime so only - // desired events are actually reported to the debugger. - // - // Constant representing the default category - public static readonly string? DefaultCategory; - // Posts a message for the attached debugger. If there is no // debugger attached, has no effect. The debugger may or may not // report the message depending on its settings. diff --git a/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/StackFrameHelper.cs b/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/StackFrameHelper.cs index bed051f8c190..fca904ac8482 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/StackFrameHelper.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/StackFrameHelper.cs @@ -177,7 +177,7 @@ internal void InitializeSourceInfo(bool fNeedFileInfo, Exception? exception) public bool IsLastFrameFromForeignExceptionStackTrace(int i) { - return (rgiLastFrameFromForeignExceptionStackTrace == null) ? false : rgiLastFrameFromForeignExceptionStackTrace[i]; + return rgiLastFrameFromForeignExceptionStackTrace != null && rgiLastFrameFromForeignExceptionStackTrace[i]; } public int GetNumberOfFrames() { return iFrameCount; } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Environment.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Environment.CoreCLR.cs index e0a24a42ef32..8a10a83972b8 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Environment.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Environment.CoreCLR.cs @@ -108,21 +108,5 @@ private static unsafe string[] InitializeCommandLineArgs(char* exePath, int argc // Used by VM internal static string? GetResourceStringLocal(string key) => SR.GetResourceString(key); - - /// Gets the number of milliseconds elapsed since the system started. - /// A 32-bit signed integer containing the amount of time in milliseconds that has passed since the last time the computer was started. - public static extern int TickCount - { - [MethodImpl(MethodImplOptions.InternalCall)] - get; - } - - /// Gets the number of milliseconds elapsed since the system started. - /// A 64-bit signed integer containing the amount of time in milliseconds that has passed since the last time the computer was started. - public static extern long TickCount64 - { - [MethodImpl(MethodImplOptions.InternalCall)] - get; - } } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/GC.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/GC.CoreCLR.cs index 15ec570949bb..101fbfa7c6bc 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/GC.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/GC.CoreCLR.cs @@ -2,14 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. /*============================================================ -** -** -** ** Purpose: Exposes features of the Garbage Collector through ** the class libraries. This is a class which cannot be ** instantiated. -** -** ===========================================================*/ using System.Collections.Generic; @@ -111,7 +106,7 @@ internal enum GC_ALLOC_FLAGS private static partial long GetTotalMemory(); [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "GCInterface_Collect")] - private static partial void _Collect(int generation, int mode); + private static partial void _Collect(int generation, int mode, [MarshalAs(UnmanagedType.U1)] bool lowMemoryPressure); [MethodImpl(MethodImplOptions.InternalCall)] private static extern int GetMaxGeneration(); @@ -179,7 +174,7 @@ public static void Collect(int generation) public static void Collect() { // -1 says to GC all generations. - _Collect(-1, (int)InternalGCCollectionMode.Blocking); + _Collect(-1, (int)InternalGCCollectionMode.Blocking, lowMemoryPressure: false); } public static void Collect(int generation, GCCollectionMode mode) @@ -194,6 +189,11 @@ public static void Collect(int generation, GCCollectionMode mode, bool blocking) } public static void Collect(int generation, GCCollectionMode mode, bool blocking, bool compacting) + { + Collect(generation, mode, blocking, compacting, lowMemoryPressure: false); + } + + internal static void Collect(int generation, GCCollectionMode mode, bool blocking, bool compacting, bool lowMemoryPressure) { ArgumentOutOfRangeException.ThrowIfNegative(generation); @@ -202,7 +202,6 @@ public static void Collect(int generation, GCCollectionMode mode, bool blocking, throw new ArgumentOutOfRangeException(nameof(mode), SR.ArgumentOutOfRange_Enum); } - int iInternalModes = 0; if (mode == GCCollectionMode.Optimized) @@ -227,7 +226,9 @@ public static void Collect(int generation, GCCollectionMode mode, bool blocking, } if (compacting) + { iInternalModes |= (int)InternalGCCollectionMode.Compacting; + } if (blocking) { @@ -238,7 +239,7 @@ public static void Collect(int generation, GCCollectionMode mode, bool blocking, iInternalModes |= (int)InternalGCCollectionMode.NonBlocking; } - _Collect(generation, iInternalModes); + _Collect(generation, (int)iInternalModes, lowMemoryPressure); } public static int CollectionCount(int generation) @@ -347,13 +348,17 @@ public static void WaitForPendingFinalizers() // Indicates that the system should not call the Finalize() method on // an object that would normally require this call. [MethodImpl(MethodImplOptions.InternalCall)] - private static extern void _SuppressFinalize(object o); + private static extern void SuppressFinalizeInternal(object o); - public static void SuppressFinalize(object obj) + public static unsafe void SuppressFinalize(object obj) { ArgumentNullException.ThrowIfNull(obj); - _SuppressFinalize(obj); + MethodTable* pMT = RuntimeHelpers.GetMethodTable(obj); + if (pMT->HasFinalizer) + { + SuppressFinalizeInternal(obj); + } } // Indicates that the system should call the Finalize() method on an object @@ -600,12 +605,14 @@ private static bool InvokeMemoryLoadChangeNotifications() return true; } - // We need to take a snapshot of s_notifications.Count, so that in the case that s_notifications[i].Notification() registers new notifications, - // we neither get rid of them nor iterate over them + // We need to take a snapshot of s_notifications.Count, so that in the case that + // s_notifications[i].Notification() registers new notifications, we neither get rid + // of them nor iterate over them. int count = s_notifications.Count; - // If there is no existing notifications, we won't be iterating over any and we won't be adding any new one. Also, there wasn't any added since - // we last invoked this method so it's safe to assume we can reset s_previousMemoryLoad. + // If there is no existing notifications, we won't be iterating over any and we won't + // be adding any new one. Also, there wasn't any added since we last invoked this + // method so it's safe to assume we can reset s_previousMemoryLoad. if (count == 0) { s_previousMemoryLoad = float.MaxValue; @@ -673,7 +680,7 @@ private unsafe struct NoGCRegionCallbackFinalizerWorkItem public bool scheduled; public bool abandoned; - public GCHandle action; + public GCHandle action; } internal enum EnableNoGCRegionCallbackStatus @@ -709,7 +716,7 @@ public static unsafe void RegisterNoGCRegionCallback(long totalSize, Action call try { pWorkItem = (NoGCRegionCallbackFinalizerWorkItem*)NativeMemory.AllocZeroed((nuint)sizeof(NoGCRegionCallbackFinalizerWorkItem)); - pWorkItem->action = GCHandle.Alloc(callback); + pWorkItem->action = new GCHandle(callback); pWorkItem->callback = &Callback; EnableNoGCRegionCallbackStatus status = (EnableNoGCRegionCallbackStatus)_EnableNoGCRegionCallback(pWorkItem, totalSize); @@ -739,14 +746,13 @@ static void Callback(NoGCRegionCallbackFinalizerWorkItem* pWorkItem) { Debug.Assert(pWorkItem->scheduled); if (!pWorkItem->abandoned) - ((Action)(pWorkItem->action.Target!))(); + pWorkItem->action.Target(); Free(pWorkItem); } static void Free(NoGCRegionCallbackFinalizerWorkItem* pWorkItem) { - if (pWorkItem->action.IsAllocated) - pWorkItem->action.Free(); + pWorkItem->action.Dispose(); NativeMemory.Free(pWorkItem); } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/MulticastDelegate.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/MulticastDelegate.CoreCLR.cs index 0846f7060ce3..9740d23843f4 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/MulticastDelegate.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/MulticastDelegate.CoreCLR.cs @@ -164,10 +164,10 @@ private static bool TrySetSlot(object?[] a, int index, object o) return false; } - private MulticastDelegate NewMulticastDelegate(object[] invocationList, int invocationCount, bool thisIsMultiCastAlready) + private unsafe MulticastDelegate NewMulticastDelegate(object[] invocationList, int invocationCount, bool thisIsMultiCastAlready) { // First, allocate a new multicast delegate just like this one, i.e. same type as the this object - MulticastDelegate result = Unsafe.As(RuntimeTypeHandle.InternalAllocNoChecks((RuntimeType)GetType())); + MulticastDelegate result = Unsafe.As(RuntimeTypeHandle.InternalAllocNoChecks(RuntimeHelpers.GetMethodTable(this))); // Performance optimization - if this already points to a true multicast delegate, // copy _methodPtr and _methodPtrAux fields rather than calling into the EE to get them diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs index 4c02d9abbb80..ee3f6a3e46fb 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs @@ -193,17 +193,23 @@ public override void EmitCalli(OpCode opcode, // If there is a non-void return type, push one. if (returnType != typeof(void)) stackchange++; + // Pop off arguments if any. if (parameterTypes != null) stackchange -= parameterTypes.Length; + // Pop off vararg arguments. if (optionalParameterTypes != null) stackchange -= optionalParameterTypes.Length; - // Pop the this parameter if the method has a this parameter. - if ((callingConvention & CallingConventions.HasThis) == CallingConventions.HasThis) + + // Pop the this parameter if the method has an implicit this parameter. + if ((callingConvention & CallingConventions.HasThis) == CallingConventions.HasThis && + (callingConvention & CallingConventions.ExplicitThis) == 0) stackchange--; + // Pop the native function pointer. stackchange--; + UpdateStackSize(OpCodes.Calli, stackchange); int token = GetTokenForSig(sig.GetSignature(true)); diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/RuntimeILGenerator.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/RuntimeILGenerator.cs index 8976805080d1..b2f0814ca64d 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/RuntimeILGenerator.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/RuntimeILGenerator.cs @@ -536,17 +536,23 @@ public override void EmitCalli(OpCode opcode, CallingConventions callingConventi // If there is a non-void return type, push one. if (returnType != typeof(void)) stackchange++; + // Pop off arguments if any. if (parameterTypes != null) stackchange -= parameterTypes.Length; + // Pop off vararg arguments. if (optionalParameterTypes != null) stackchange -= optionalParameterTypes.Length; - // Pop the this parameter if the method has a this parameter. - if ((callingConvention & CallingConventions.HasThis) == CallingConventions.HasThis) + + // Pop the this parameter if the method has an implicit this parameter. + if ((callingConvention & CallingConventions.HasThis) == CallingConventions.HasThis && + (callingConvention & CallingConventions.ExplicitThis) == 0) stackchange--; + // Pop the native function pointer. stackchange--; + UpdateStackSize(OpCodes.Calli, stackchange); RecordTokenFixup(); diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/SignatureHelper.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/SignatureHelper.cs index c5bb88cf5250..a117d29a3857 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/SignatureHelper.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/SignatureHelper.cs @@ -64,11 +64,12 @@ internal static SignatureHelper GetMethodSigHelper( intCall |= MdSigCallingConvention.Generic; } - if ((callingConvention & CallingConventions.HasThis) == CallingConventions.HasThis) - intCall |= MdSigCallingConvention.HasThis; + const byte Mask = (byte)(CallingConventions.HasThis | CallingConventions.ExplicitThis); + intCall = (MdSigCallingConvention)((byte)intCall | (unchecked((byte)callingConvention) & Mask)); sigHelp = new SignatureHelper(scope, intCall, cGenericParam, returnType, - requiredReturnTypeCustomModifiers, optionalReturnTypeCustomModifiers); + requiredReturnTypeCustomModifiers, optionalReturnTypeCustomModifiers); + sigHelp.AddArguments(parameterTypes, requiredParameterTypeCustomModifiers, optionalParameterTypeCustomModifiers); return sigHelp; @@ -151,11 +152,12 @@ public static SignatureHelper GetPropertySigHelper(Module? mod, CallingConventio MdSigCallingConvention intCall = MdSigCallingConvention.Property; - if ((callingConvention & CallingConventions.HasThis) == CallingConventions.HasThis) - intCall |= MdSigCallingConvention.HasThis; + const byte Mask = (byte)(CallingConventions.HasThis | CallingConventions.ExplicitThis); + intCall = (MdSigCallingConvention)((byte)intCall | (unchecked((byte)callingConvention) & Mask)); sigHelp = new SignatureHelper(mod, intCall, returnType, requiredReturnTypeCustomModifiers, optionalReturnTypeCustomModifiers); + sigHelp.AddArguments(parameterTypes, requiredParameterTypeCustomModifiers, optionalParameterTypeCustomModifiers); return sigHelp; diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/InstanceCalliHelper.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/InstanceCalliHelper.cs new file mode 100644 index 000000000000..f79445292e7c --- /dev/null +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/InstanceCalliHelper.cs @@ -0,0 +1,166 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace System.Reflection +{ + /// + /// Provides a set of helper methods for calling instance methods using calli. + /// This is necessary since C# function pointers currently do not support instance methods. + /// + internal static unsafe class InstanceCalliHelper + { + // Zero parameter methods such as property getters: + + [Intrinsic] + internal static bool Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static byte Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static char Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static DateTime Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static DateTimeOffset Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static decimal Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static double Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static float Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static Guid Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static short Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static int Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static long Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static nint Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static nuint Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static object? Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static sbyte Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static ushort Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static uint Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static ulong Call(delegate* fn, object o) => fn(o); + + [Intrinsic] + internal static void Call(delegate* fn, object o) => fn(o); + + // One parameter methods with no return such as property setters: + + [Intrinsic] + internal static void Call(delegate* fn, object o, bool arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, byte arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, char arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, DateTime arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, DateTimeOffset arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, decimal arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, double arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, float arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, Guid arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, short arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, int arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, long arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, nint arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, nuint arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, object? arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, sbyte arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, ushort arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, uint arg1) => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate* fn, object o, ulong arg1) => fn(o, arg1); + + // Other methods: + + [Intrinsic] + internal static void Call(delegate* fn, object o, object? arg1, object? arg2) + => fn(o, arg1, arg2); + + [Intrinsic] + internal static void Call(delegate* fn, object o, object? arg1, object? arg2, object? arg3) + => fn(o, arg1, arg2, arg3); + + [Intrinsic] + internal static void Call(delegate* fn, object o, object? arg1, object? arg2, object? arg3, object? arg4) + => fn(o, arg1, arg2, arg3, arg4); + + [Intrinsic] + internal static void Call(delegate* fn, object o, object? arg1, object? arg2, object? arg3, object? arg4, object? arg5) + => fn(o, arg1, arg2, arg3, arg4, arg5); + + [Intrinsic] + internal static void Call(delegate* fn, object o, object? arg1, object? arg2, object? arg3, object? arg4, object? arg5, object? arg6) + => fn(o, arg1, arg2, arg3, arg4, arg5, arg6); + + [Intrinsic] + internal static void Call(delegate*?, void> fn, object o, IEnumerable? arg1) + => fn(o, arg1); + + [Intrinsic] + internal static void Call(delegate*?, IEnumerable?, void> fn, object o, IEnumerable? arg1, IEnumerable? arg2) + => fn(o, arg1, arg2); + } +} diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Metadata/MetadataUpdater.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Metadata/MetadataUpdater.cs index 413a970afcf6..f0b6774ac455 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Metadata/MetadataUpdater.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Metadata/MetadataUpdater.cs @@ -55,7 +55,7 @@ public static void ApplyUpdate(Assembly assembly, ReadOnlySpan metadataDel /// /// Returns the metadata update capabilities. /// - internal static string GetCapabilities() => "Baseline AddMethodToExistingType AddStaticFieldToExistingType AddInstanceFieldToExistingType NewTypeDefinition ChangeCustomAttributes UpdateParameters GenericUpdateMethod GenericAddMethodToExistingType GenericAddFieldToExistingType"; + internal static string GetCapabilities() => "Baseline AddMethodToExistingType AddStaticFieldToExistingType AddInstanceFieldToExistingType NewTypeDefinition ChangeCustomAttributes UpdateParameters GenericUpdateMethod GenericAddMethodToExistingType GenericAddFieldToExistingType AddFieldRva"; /// /// Returns true if the apply assembly update is enabled and available. diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeConstructorInfo.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeConstructorInfo.CoreCLR.cs index c16d10e97b38..dd855b65fdee 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeConstructorInfo.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeConstructorInfo.CoreCLR.cs @@ -173,6 +173,7 @@ public override IList GetCustomAttributesData() internal RuntimeType GetRuntimeType() { return m_declaringType; } internal RuntimeModule GetRuntimeModule() { return RuntimeTypeHandle.GetModule(m_declaringType); } internal RuntimeAssembly GetRuntimeAssembly() { return GetRuntimeModule().GetRuntimeAssembly(); } + public override bool IsCollectible => m_declaringType.IsCollectible; #endregion #region MethodBase Overrides diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeCustomAttributeData.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeCustomAttributeData.cs index 8a34a6996d14..eab9a97e8ba8 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeCustomAttributeData.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeCustomAttributeData.cs @@ -1642,13 +1642,6 @@ private static bool FilterCustomAttributeRecord( if (!AttributeUsageCheck(attributeType, mustBeInheritable, ref derivedAttributes)) return false; - // Windows Runtime attributes aren't real types - they exist to be read as metadata only, and as such - // should be filtered out of the GetCustomAttributes path. - if ((attributeType.Attributes & TypeAttributes.WindowsRuntime) == TypeAttributes.WindowsRuntime) - { - return false; - } - // Resolve the attribute ctor ConstArray ctorSig = scope.GetMethodSignature(caCtorToken); isVarArg = (ctorSig[0] & 0x05) != 0; diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeEventInfo.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeEventInfo.cs index 34b5f378cbdb..e09a405a4144 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeEventInfo.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeEventInfo.cs @@ -129,6 +129,7 @@ public override IList GetCustomAttributesData() public override int MetadataToken => m_token; public override Module Module => GetRuntimeModule(); internal RuntimeModule GetRuntimeModule() { return m_declaringType.GetRuntimeModule(); } + public override bool IsCollectible => m_declaringType.IsCollectible; #endregion #region EventInfo Overrides diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/TypeNameResolver.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/TypeNameResolver.CoreCLR.cs index c438aeb1d9a0..7fba80eb5532 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/TypeNameResolver.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/TypeNameResolver.CoreCLR.cs @@ -3,6 +3,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Globalization; using System.IO; using System.Reflection.Metadata; using System.Runtime.CompilerServices; @@ -21,9 +22,12 @@ internal partial struct TypeNameResolver private bool _extensibleParser; private bool _requireAssemblyQualifiedName; private bool _suppressContextualReflectionContext; + private IntPtr _unsafeAccessorMethod; private Assembly? _requestingAssembly; private Assembly? _topLevelAssembly; + private bool SupportsUnboundGenerics { get => _unsafeAccessorMethod != IntPtr.Zero; } + [RequiresUnreferencedCode("The type might be removed")] internal static Type? GetType( string typeName, @@ -80,13 +84,13 @@ internal partial struct TypeNameResolver bool ignoreCase, Assembly topLevelAssembly) { - TypeName? parsed = TypeNameParser.Parse(typeName, throwOnError); + TypeName? parsed = TypeNameParser.Parse(typeName, throwOnError, new() { IsAssemblyGetType = true }); if (parsed is null) { return null; } - else if (topLevelAssembly is not null && parsed.AssemblyName is not null) + else if (parsed.AssemblyName is not null) { return throwOnError ? throw new ArgumentException(SR.Argument_AssemblyGetTypeCannotSpecifyAssembly) : null; } @@ -128,10 +132,15 @@ internal static RuntimeType GetTypeReferencedByCustomAttribute(string typeName, // Used by VM internal static unsafe RuntimeType? GetTypeHelper(char* pTypeName, RuntimeAssembly? requestingAssembly, - bool throwOnError, bool requireAssemblyQualifiedName) + bool throwOnError, bool requireAssemblyQualifiedName, IntPtr unsafeAccessorMethod) { ReadOnlySpan typeName = MemoryMarshal.CreateReadOnlySpanFromNullTerminated(pTypeName); + return GetTypeHelper(typeName, requestingAssembly, throwOnError, requireAssemblyQualifiedName, unsafeAccessorMethod); + } + internal static RuntimeType? GetTypeHelper(ReadOnlySpan typeName, RuntimeAssembly? requestingAssembly, + bool throwOnError, bool requireAssemblyQualifiedName, IntPtr unsafeAccessorMethod = 0) + { // Compat: Empty name throws TypeLoadException instead of // the natural ArgumentException if (typeName.Length == 0) @@ -153,6 +162,7 @@ internal static RuntimeType GetTypeReferencedByCustomAttribute(string typeName, _throwOnError = throwOnError, _suppressContextualReflectionContext = true, _requireAssemblyQualifiedName = requireAssemblyQualifiedName, + _unsafeAccessorMethod = unsafeAccessorMethod, }.Resolve(parsed); if (type != null) @@ -181,6 +191,9 @@ internal static RuntimeType GetTypeReferencedByCustomAttribute(string typeName, return assembly; } + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "UnsafeAccessors_ResolveGenericParamToTypeHandle")] + private static partial IntPtr ResolveGenericParamToTypeHandle(IntPtr unsafeAccessorMethod, [MarshalAs(UnmanagedType.Bool)] bool isMethodParam, uint paramIndex); + [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2026:RequiresUnreferencedCode", Justification = "TypeNameResolver.GetType is marked as RequiresUnreferencedCode.")] [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2075:UnrecognizedReflectionPattern", @@ -223,6 +236,42 @@ internal static RuntimeType GetTypeReferencedByCustomAttribute(string typeName, { if (assembly is null) { + if (SupportsUnboundGenerics + && !string.IsNullOrEmpty(escapedTypeName) + && escapedTypeName[0] == '!') + { + Debug.Assert(_throwOnError); // Unbound generic support currently always throws. + + // Parse the type as an unbound generic parameter. Following the common VAR/MVAR IL syntax: + // ! - Represents a zero-based index into the type's generic parameters. + // !! - Represents a zero-based index into the method's generic parameters. + + // Confirm we have at least one more character + if (escapedTypeName.Length == 1) + { + throw new TypeLoadException(SR.Format(SR.TypeLoad_ResolveType, escapedTypeName), typeName: escapedTypeName); + } + + // At this point we expect either another '!' and then a number or a number. + bool isMethodParam = escapedTypeName[1] == '!'; + ReadOnlySpan toParse = isMethodParam + ? escapedTypeName.AsSpan(2) // Skip over "!!" + : escapedTypeName.AsSpan(1); // Skip over "!" + if (!uint.TryParse(toParse, NumberStyles.None, null, out uint paramIndex)) + { + throw new TypeLoadException(SR.Format(SR.TypeLoad_ResolveType, escapedTypeName), typeName: escapedTypeName); + } + + Debug.Assert(_unsafeAccessorMethod != IntPtr.Zero); + IntPtr typeHandle = ResolveGenericParamToTypeHandle(_unsafeAccessorMethod, isMethodParam, paramIndex); + if (typeHandle == IntPtr.Zero) + { + throw new TypeLoadException(SR.Format(SR.TypeLoad_ResolveType, escapedTypeName), typeName: escapedTypeName); + } + + return RuntimeTypeHandle.GetRuntimeTypeFromHandle(typeHandle); + } + if (_requireAssemblyQualifiedName) { if (_throwOnError) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs new file mode 100644 index 000000000000..98e2c4493f1e --- /dev/null +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs @@ -0,0 +1,410 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Buffers.Binary; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Reflection; +using System.Runtime.InteropServices; +using System.Runtime.Serialization; +using System.Runtime.Versioning; +using System.Threading; +using System.Threading.Tasks; + +namespace System.Runtime.CompilerServices +{ + internal struct ExecutionAndSyncBlockStore + { + // Store current ExecutionContext and SynchronizationContext as "previousXxx". + // This allows us to restore them and undo any Context changes made in stateMachine.MoveNext + // so that they won't "leak" out of the first await. + public ExecutionContext? _previousExecutionCtx; + public SynchronizationContext? _previousSyncCtx; + public Thread _thread; + + public void Push() + { + _thread = Thread.CurrentThread; + _previousExecutionCtx = _thread._executionContext; + _previousSyncCtx = _thread._synchronizationContext; + } + + public void Pop() + { + // The common case is that these have not changed, so avoid the cost of a write barrier if not needed. + if (_previousSyncCtx != _thread._synchronizationContext) + { + // Restore changed SynchronizationContext back to previous + _thread._synchronizationContext = _previousSyncCtx; + } + + ExecutionContext? currentExecutionCtx = _thread._executionContext; + if (_previousExecutionCtx != currentExecutionCtx) + { + ExecutionContext.RestoreChangedContextToThread(_thread, _previousExecutionCtx, currentExecutionCtx); + } + } + } + + [Flags] + internal enum CorInfoContinuationFlags + { + // Whether or not the continuation expects the result to be boxed and + // placed in the GCData array at index 0. Not set if the callee is void. + CORINFO_CONTINUATION_RESULT_IN_GCDATA = 1, + // If this bit is set the continuation resumes inside a try block and thus + // if an exception is being propagated, needs to be resumed. The exception + // should be placed at index 0 or 1 depending on whether the continuation + // also expects a result. + CORINFO_CONTINUATION_NEEDS_EXCEPTION = 2, + // If this bit is set the continuation has the IL offset that inspired the + // OSR method saved in the beginning of 'Data', or -1 if the continuation + // belongs to a tier 0 method. + CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA = 4, + } + + internal sealed unsafe class Continuation + { + public Continuation? Next; + public delegate* Resume; + public uint State; + public CorInfoContinuationFlags Flags; + + // Data and GCData contain the state of the continuation. + // Note: The JIT is ultimately responsible for laying out these arrays. + // However, other parts of the system depend on the layout to + // know where to locate or place various pieces of data: + // + // 1. Resumption stubs need to know where to place the return value + // inside the next continuation. If the return value has GC references + // then it is boxed and placed at GCData[0]; otherwise, it is placed + // inside Data at offset 0 if + // CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA is NOT set and otherwise + // at offset 4. + // + // 2. Likewise, Finalize[Value]TaskReturningThunk needs to know from + // where to extract the return value. + // + // 3. The dispatcher needs to know where to place the exception inside + // the next continuation with a handler. Continuations with handlers + // have CORINFO_CONTINUATION_NEEDS_EXCEPTION set. The exception is + // placed at GCData[0] if CORINFO_CONTINUATION_RESULT_IN_GCDATA is NOT + // set, and otherwise at GCData[1]. + // + public byte[]? Data; + public object?[]? GCData; + } + + public static partial class AsyncHelpers + { + // This is the "magic" method on wich other "Await" methods are built. + // Calling this from an Async method returns the continuation to the caller thus + // explicitly initiates suspension. + [Intrinsic] + private static void AsyncSuspend(Continuation continuation) => throw new UnreachableException(); + + // Used during suspensions to hold the continuation chain and on what we are waiting. + // Methods like FinalizeTaskReturningThunk will unlink the state and wrap into a Task. + private struct RuntimeAsyncAwaitState + { + public Continuation? SentinelContinuation; + public INotifyCompletion? Notifier; + } + + [ThreadStatic] + private static RuntimeAsyncAwaitState t_runtimeAsyncAwaitState; + + private static Continuation AllocContinuation(Continuation prevContinuation, nuint numGCRefs, nuint dataSize) + { + Continuation newContinuation = new Continuation { Data = new byte[dataSize], GCData = new object[numGCRefs] }; + prevContinuation.Next = newContinuation; + return newContinuation; + } + + private static unsafe Continuation AllocContinuationMethod(Continuation prevContinuation, nuint numGCRefs, nuint dataSize, MethodDesc* method) + { + LoaderAllocator loaderAllocator = RuntimeMethodHandle.GetLoaderAllocator(new RuntimeMethodHandleInternal((IntPtr)method)); + object?[] gcData; + if (loaderAllocator != null) + { + gcData = new object[numGCRefs + 1]; + gcData[numGCRefs] = loaderAllocator; + } + else + { + gcData = new object[numGCRefs]; + } + + Continuation newContinuation = new Continuation { Data = new byte[dataSize], GCData = gcData }; + prevContinuation.Next = newContinuation; + return newContinuation; + } + + private static unsafe Continuation AllocContinuationClass(Continuation prevContinuation, nuint numGCRefs, nuint dataSize, MethodTable* methodTable) + { + IntPtr loaderAllocatorHandle = methodTable->GetLoaderAllocatorHandle(); + object?[] gcData; + if (loaderAllocatorHandle != IntPtr.Zero) + { + gcData = new object[numGCRefs + 1]; + gcData[numGCRefs] = GCHandle.FromIntPtr(loaderAllocatorHandle).Target; + } + else + { + gcData = new object[numGCRefs]; + } + + Continuation newContinuation = new Continuation { Data = new byte[dataSize], GCData = gcData }; + prevContinuation.Next = newContinuation; + return newContinuation; + } + + // Used to box the return value before storing into caller's continuation + // if the value is an object-containing struct. + // We are allocating a box directly instead of relying on regular boxing because we want + // to store structs without changing layout, including nullables. + private static unsafe object AllocContinuationResultBox(void* ptr) + { + MethodTable* pMT = (MethodTable*)ptr; + Debug.Assert(pMT->IsValueType); + // We need no type/cctor checks since we will be storing an instance that already exists. + return RuntimeTypeHandle.InternalAllocNoChecks((MethodTable*)pMT); + } + + // wrapper to await a notifier + private struct AwaitableProxy : ICriticalNotifyCompletion + { + private readonly INotifyCompletion _notifier; + + public AwaitableProxy(INotifyCompletion notifier) + { + _notifier = notifier; + } + + public bool IsCompleted => false; + + public void OnCompleted(Action action) + { + _notifier!.OnCompleted(action); + } + + public AwaitableProxy GetAwaiter() { return this; } + + public void UnsafeOnCompleted(Action action) + { + if (_notifier is ICriticalNotifyCompletion criticalNotification) + { + criticalNotification.UnsafeOnCompleted(action); + } + else + { + _notifier!.OnCompleted(action); + } + } + + public void GetResult() { } + } + + private static Continuation UnlinkHeadContinuation(out AwaitableProxy awaitableProxy) + { + ref RuntimeAsyncAwaitState state = ref t_runtimeAsyncAwaitState; + awaitableProxy = new AwaitableProxy(state.Notifier!); + state.Notifier = null; + + Continuation sentinelContinuation = state.SentinelContinuation!; + Continuation head = sentinelContinuation.Next!; + sentinelContinuation.Next = null; + return head; + } + + // When a Task-returning thunk gets a continuation result + // it calls here to make a Task that awaits on the current async state. + // NOTE: This cannot be Runtime Async. Must use C# state machine or make one by hand. + private static async Task FinalizeTaskReturningThunk(Continuation continuation) + { + Continuation finalContinuation = new Continuation(); + + // Note that the exact location the return value is placed is tied + // into getAsyncResumptionStub in the VM, so do not change this + // without also changing that code (and the JIT). + if (RuntimeHelpers.IsReferenceOrContainsReferences()) + { + finalContinuation.Flags = CorInfoContinuationFlags.CORINFO_CONTINUATION_RESULT_IN_GCDATA | CorInfoContinuationFlags.CORINFO_CONTINUATION_NEEDS_EXCEPTION; + finalContinuation.GCData = new object[1]; + } + else + { + finalContinuation.Flags = CorInfoContinuationFlags.CORINFO_CONTINUATION_NEEDS_EXCEPTION; + finalContinuation.Data = new byte[Unsafe.SizeOf()]; + } + + continuation.Next = finalContinuation; + + while (true) + { + Continuation headContinuation = UnlinkHeadContinuation(out var awaitableProxy); + await awaitableProxy; + Continuation? finalResult = DispatchContinuations(headContinuation); + if (finalResult != null) + { + Debug.Assert(finalResult == finalContinuation); + if (RuntimeHelpers.IsReferenceOrContainsReferences()) + { + if (typeof(T).IsValueType) + { + return Unsafe.As(ref finalResult.GCData![0]!.GetRawData()); + } + + return Unsafe.As(ref finalResult.GCData![0]!); + } + else + { + return Unsafe.As(ref finalResult.Data![0]); + } + } + } + } + + private static async Task FinalizeTaskReturningThunk(Continuation continuation) + { + Continuation finalContinuation = new Continuation + { + Flags = CorInfoContinuationFlags.CORINFO_CONTINUATION_NEEDS_EXCEPTION, + }; + continuation.Next = finalContinuation; + + while (true) + { + Continuation headContinuation = UnlinkHeadContinuation(out var awaitableProxy); + await awaitableProxy; + Continuation? finalResult = DispatchContinuations(headContinuation); + if (finalResult != null) + { + Debug.Assert(finalResult == finalContinuation); + return; + } + } + } + + private static async ValueTask FinalizeValueTaskReturningThunk(Continuation continuation) + { + Continuation finalContinuation = new Continuation(); + + // Note that the exact location the return value is placed is tied + // into getAsyncResumptionStub in the VM, so do not change this + // without also changing that code (and the JIT). + if (RuntimeHelpers.IsReferenceOrContainsReferences()) + { + finalContinuation.Flags = CorInfoContinuationFlags.CORINFO_CONTINUATION_RESULT_IN_GCDATA | CorInfoContinuationFlags.CORINFO_CONTINUATION_NEEDS_EXCEPTION; + finalContinuation.GCData = new object[1]; + } + else + { + finalContinuation.Flags = CorInfoContinuationFlags.CORINFO_CONTINUATION_NEEDS_EXCEPTION; + finalContinuation.Data = new byte[Unsafe.SizeOf()]; + } + + continuation.Next = finalContinuation; + + while (true) + { + Continuation headContinuation = UnlinkHeadContinuation(out var awaitableProxy); + await awaitableProxy; + Continuation? finalResult = DispatchContinuations(headContinuation); + if (finalResult != null) + { + Debug.Assert(finalResult == finalContinuation); + if (RuntimeHelpers.IsReferenceOrContainsReferences()) + { + if (typeof(T).IsValueType) + { + return Unsafe.As(ref finalResult.GCData![0]!.GetRawData()); + } + + return Unsafe.As(ref finalResult.GCData![0]!); + } + else + { + return Unsafe.As(ref finalResult.Data![0]); + } + } + } + } + + private static async ValueTask FinalizeValueTaskReturningThunk(Continuation continuation) + { + Continuation finalContinuation = new Continuation + { + Flags = CorInfoContinuationFlags.CORINFO_CONTINUATION_NEEDS_EXCEPTION, + }; + continuation.Next = finalContinuation; + + while (true) + { + Continuation headContinuation = UnlinkHeadContinuation(out var awaitableProxy); + await awaitableProxy; + Continuation? finalResult = DispatchContinuations(headContinuation); + if (finalResult != null) + { + Debug.Assert(finalResult == finalContinuation); + return; + } + } + } + + // Return a continuation object if that is the one which has the final + // result of the Task, if the real output of the series of continuations was + // an exception, it is allowed to propagate out. + // OR + // return NULL to indicate that this isn't yet done. + private static unsafe Continuation? DispatchContinuations(Continuation? continuation) + { + Debug.Assert(continuation != null); + + while (true) + { + Continuation? newContinuation; + try + { + newContinuation = continuation.Resume(continuation); + } + catch (Exception ex) + { + continuation = UnwindToPossibleHandler(continuation); + if (continuation.Resume == null) + { + throw; + } + + continuation.GCData![(continuation.Flags & CorInfoContinuationFlags.CORINFO_CONTINUATION_RESULT_IN_GCDATA) != 0 ? 1 : 0] = ex; + continue; + } + + if (newContinuation != null) + { + newContinuation.Next = continuation.Next; + return null; + } + + continuation = continuation.Next; + Debug.Assert(continuation != null); + + if (continuation.Resume == null) + { + return continuation; // Return the result containing Continuation + } + } + } + + private static Continuation UnwindToPossibleHandler(Continuation continuation) + { + while (true) + { + Debug.Assert(continuation.Next != null); + continuation = continuation.Next; + if ((continuation.Flags & CorInfoContinuationFlags.CORINFO_CONTINUATION_NEEDS_EXCEPTION) != 0) + return continuation; + } + } + } +} diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs index a441e4890f9d..e30e94eaf415 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs @@ -517,6 +517,45 @@ private static void ArrayTypeCheck_Helper(object obj, void* elementType) } } + // Helpers for boxing + [DebuggerHidden] + internal static object? Box_Nullable(MethodTable* srcMT, ref byte nullableData) + { + Debug.Assert(srcMT->IsNullable); + + if (nullableData == 0) + return null; + + // Allocate a new instance of the T in Nullable. + MethodTable* dstMT = srcMT->InstantiationArg0(); + ref byte srcValue = ref Unsafe.Add(ref nullableData, srcMT->NullableValueAddrOffset); + + // Delegate to non-nullable boxing implementation + return Box(dstMT, ref srcValue); + } + + [DebuggerHidden] + internal static object Box(MethodTable* typeMT, ref byte unboxedData) + { + Debug.Assert(typeMT != null); + Debug.Assert(typeMT->IsValueType); + + // A null can be passed for boxing of a null ref. + _ = Unsafe.ReadUnaligned(ref unboxedData); + + object boxed = RuntimeTypeHandle.InternalAllocNoChecks(typeMT); + if (typeMT->ContainsGCPointers) + { + Buffer.BulkMoveWithWriteBarrier(ref boxed.GetRawData(), ref unboxedData, typeMT->GetNumInstanceFieldBytesIfContainsGCPointers()); + } + else + { + SpanHelpers.Memmove(ref boxed.GetRawData(), ref unboxedData, typeMT->GetNumInstanceFieldBytes()); + } + + return boxed; + } + // Helpers for Unboxing #if FEATURE_TYPEEQUIVALENCE [DebuggerHidden] @@ -615,27 +654,8 @@ internal static void Unbox_Nullable(ref byte destPtr, MethodTable* typeMT, objec [DebuggerHidden] internal static object? ReboxFromNullable(MethodTable* srcMT, object src) { - Debug.Assert(srcMT->IsNullable); - ref byte nullableData = ref src.GetRawData(); - - // If 'hasValue' is false, return null. - if (!Unsafe.As(ref nullableData)) - return null; - - // Allocate a new instance of the T in Nullable. - MethodTable* dstMT = srcMT->InstantiationArg0(); - object dst = RuntimeTypeHandle.InternalAlloc(dstMT); - - // Copy data from the Nullable. - ref byte srcData = ref Unsafe.Add(ref nullableData, srcMT->NullableValueAddrOffset); - ref byte dstData = ref RuntimeHelpers.GetRawData(dst); - if (dstMT->ContainsGCPointers) - Buffer.BulkMoveWithWriteBarrier(ref dstData, ref srcData, dstMT->GetNumInstanceFieldBytesIfContainsGCPointers()); - else - SpanHelpers.Memmove(ref dstData, ref srcData, dstMT->GetNumInstanceFieldBytes()); - - return dst; + return Box_Nullable(srcMT, ref nullableData); } [DebuggerHidden] diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs index d60b48ec0e55..2f4a401b18a6 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs @@ -445,8 +445,8 @@ internal static unsafe bool ObjectHasComponentSize(object obj) /// A reference to the data to box. /// A boxed instance of the value at . /// This method includes proper handling for nullable value types as well. - [MethodImpl(MethodImplOptions.InternalCall)] - internal static extern unsafe object? Box(MethodTable* methodTable, ref byte data); + internal static unsafe object? Box(MethodTable* methodTable, ref byte data) => + methodTable->IsNullable ? CastHelpers.Box_Nullable(methodTable, ref data) : CastHelpers.Box(methodTable, ref data); // Given an object reference, returns its MethodTable*. // @@ -930,6 +930,9 @@ public uint GetNumInstanceFieldBytesIfContainsGCPointers() Debug.Assert((BaseSize - (nuint)(2 * sizeof(IntPtr)) == GetNumInstanceFieldBytes())); return BaseSize - (uint)(2 * sizeof(IntPtr)); } + + [MethodImpl(MethodImplOptions.InternalCall)] + public extern IntPtr GetLoaderAllocatorHandle(); } // Subset of src\vm\typedesc.h @@ -963,7 +966,7 @@ internal static ref byte MaskStaticsPointer(ref byte staticsPtr) { fixed (byte* p = &staticsPtr) { - return ref Unsafe.AsRef((byte*)((nuint)p & ~(nuint)DynamicStaticsInfo.ISCLASSNOTINITED)); + return ref Unsafe.AsRef((byte*)((nuint)p & ~(nuint)DynamicStaticsInfo.ISCLASSNOTINITED)); } } @@ -1150,8 +1153,8 @@ public static bool CanCastToForReflection(TypeHandle srcTH, TypeHandle destTH) CastResult.CanCast => true, CastResult.CannotCast => false, - // Reflection allows T to be cast to Nullable. - // See ObjIsInstanceOfCore() + // Reflection allows T to be cast to Nullable. + // See ObjIsInstanceOfCore() _ => CanCastToWorker(srcTH, destTH, nullableCast: true) }; } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index b7095c3579a0..df3da7a876b7 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -50,18 +50,32 @@ class AsmOffsets public const int SIZEOF__REGDISPLAY = 0xc60; public const int OFFSETOF__REGDISPLAY__SP = 0xba8; public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbb0; +#elif TARGET_WASM + public const int SIZEOF__REGDISPLAY = 0x3c; + public const int OFFSETOF__REGDISPLAY__SP = 0x34; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x38; #endif #if TARGET_64BIT public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x8; - public const int SIZEOF__StackFrameIterator = 0x358; - public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x33A; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x350; +#if FEATURE_INTERPRETER + public const int SIZEOF__StackFrameIterator = 0x170; + public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x168; +#else + public const int SIZEOF__StackFrameIterator = 0x150; + public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x148; +#endif + public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x132; +#elif TARGET_X86 + public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4; + public const int SIZEOF__StackFrameIterator = 0x3d4; + public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x3c2; + public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x3d0; #else // TARGET_64BIT public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4; - public const int SIZEOF__StackFrameIterator = 0x2c8; - public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x2b6; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x2c4; + public const int SIZEOF__StackFrameIterator = 0xcc; + public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0xba; + public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0xc8; #endif // TARGET_64BIT #else // DEBUG @@ -102,18 +116,32 @@ class AsmOffsets public const int SIZEOF__REGDISPLAY = 0xc50; public const int OFFSETOF__REGDISPLAY__SP = 0xba0; public const int OFFSETOF__REGDISPLAY__ControlPC = 0xba8; +#elif TARGET_WASM + public const int SIZEOF__REGDISPLAY = 0x3c; + public const int OFFSETOF__REGDISPLAY__SP = 0x34; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x38; #endif #if TARGET_64BIT public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x8; - public const int SIZEOF__StackFrameIterator = 0x350; - public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x332; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x348; +#if FEATURE_INTERPRETER + public const int SIZEOF__StackFrameIterator = 0x168; + public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x160; +#else + public const int SIZEOF__StackFrameIterator = 0x148; + public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x140; +#endif + public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x12a; +#elif TARGET_X86 + public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4; + public const int SIZEOF__StackFrameIterator = 0x3cc; + public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x3ba; + public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x3c8; #else // TARGET_64BIT public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4; - public const int SIZEOF__StackFrameIterator = 0x2c0; - public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x2ae; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x2bc; + public const int SIZEOF__StackFrameIterator = 0xc4; + public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0xb2; + public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0xc0; #endif // TARGET_64BIT #endif // DEBUG @@ -138,6 +166,8 @@ class AsmOffsets public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x220; #elif TARGET_LOONGARCH64 public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x520; +#elif TARGET_WASM + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x08; #endif #if TARGET_AMD64 @@ -158,13 +188,17 @@ class AsmOffsets #elif TARGET_LOONGARCH64 public const int OFFSETOF__PAL_LIMITED_CONTEXT__IP = 0x108; public const int OFFSETOF__PAL_LIMITED_CONTEXT__FP = 0xb8; +#elif TARGET_WASM + // offset to dummy field + public const int OFFSETOF__PAL_LIMITED_CONTEXT__IP = 0x04; + public const int OFFSETOF__PAL_LIMITED_CONTEXT__FP = 0x04; #endif // Offsets / sizes that are different in 64 / 32 bit mode #if TARGET_64BIT public const int SIZEOF__EHEnum = 0x20; - public const int OFFSETOF__StackFrameIterator__m_pRegDisplay = 0x228; + public const int OFFSETOF__StackFrameIterator__m_pRegDisplay = 0x20; public const int OFFSETOF__ExInfo__m_pPrevExInfo = 0; public const int OFFSETOF__ExInfo__m_pExContext = 0xa8; public const int OFFSETOF__ExInfo__m_exception = 0xb0; @@ -175,7 +209,7 @@ class AsmOffsets public const int OFFSETOF__ExInfo__m_notifyDebuggerSP = OFFSETOF__ExInfo__m_frameIter + SIZEOF__StackFrameIterator; #else // TARGET_64BIT public const int SIZEOF__EHEnum = 0x10; - public const int OFFSETOF__StackFrameIterator__m_pRegDisplay = 0x218; + public const int OFFSETOF__StackFrameIterator__m_pRegDisplay = 0x14; public const int OFFSETOF__ExInfo__m_pPrevExInfo = 0; public const int OFFSETOF__ExInfo__m_pExContext = 0x5c; public const int OFFSETOF__ExInfo__m_exception = 0x60; @@ -224,7 +258,7 @@ class AsmOffsets static_assert_no_msg(offsetof(ExInfo, m_idxCurClause) == OFFSETOF__ExInfo__m_idxCurClause); static_assert_no_msg(offsetof(ExInfo, m_frameIter) == OFFSETOF__ExInfo__m_frameIter); static_assert_no_msg(offsetof(ExInfo, m_notifyDebuggerSP) == OFFSETOF__ExInfo__m_notifyDebuggerSP); -#endif +#endif } #if __cplusplus diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/InternalCalls.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/InternalCalls.cs index 1dc4d91c396b..608d52d76891 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/InternalCalls.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/InternalCalls.cs @@ -13,11 +13,11 @@ namespace System.Runtime.ExceptionServices internal static partial class InternalCalls { [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "SfiInit")] - [return: MarshalAs(UnmanagedType.Bool)] - internal static unsafe partial bool RhpSfiInit(ref StackFrameIterator pThis, void* pStackwalkCtx, [MarshalAs(UnmanagedType.Bool)] bool instructionFault, bool* fIsExceptionIntercepted); + [return: MarshalAs(UnmanagedType.U1)] + internal static unsafe partial bool RhpSfiInit(ref StackFrameIterator pThis, void* pStackwalkCtx, [MarshalAs(UnmanagedType.U1)] bool instructionFault, bool* fIsExceptionIntercepted); [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "SfiNext")] - [return: MarshalAs(UnmanagedType.Bool)] + [return: MarshalAs(UnmanagedType.U1)] internal static unsafe partial bool RhpSfiNext(ref StackFrameIterator pThis, uint* uExCollideClauseIdx, bool* fUnwoundReversePInvoke, bool* fIsExceptionIntercepted); [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ResumeAtInterceptionLocation")] @@ -31,7 +31,7 @@ internal static unsafe partial IntPtr RhpCallCatchFunclet( internal static unsafe partial void RhpCallFinallyFunclet(byte* pHandlerIP, void* pvRegDisplay, EH.ExInfo* exInfo); [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "CallFilterFunclet")] - [return: MarshalAs(UnmanagedType.Bool)] + [return: MarshalAs(UnmanagedType.U1)] internal static unsafe partial bool RhpCallFilterFunclet( ObjectHandleOnStack exceptionObj, byte* pFilterIP, void* pvRegDisplay); @@ -39,11 +39,12 @@ internal static unsafe partial bool RhpCallFilterFunclet( internal static unsafe partial void RhpAppendExceptionStackFrame(ObjectHandleOnStack exceptionObj, IntPtr ip, UIntPtr sp, int flags, EH.ExInfo* exInfo); [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "EHEnumInitFromStackFrameIterator")] - [return: MarshalAs(UnmanagedType.Bool)] + [SuppressGCTransition] + [return: MarshalAs(UnmanagedType.U1)] internal static unsafe partial bool RhpEHEnumInitFromStackFrameIterator(ref StackFrameIterator pFrameIter, out EH.MethodRegionInfo pMethodRegionInfo, void* pEHEnum); [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "EHEnumNext")] - [return: MarshalAs(UnmanagedType.Bool)] + [return: MarshalAs(UnmanagedType.U1)] internal static unsafe partial bool RhpEHEnumNext(void* pEHEnum, void* pEHClause); } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.CoreCLR.cs new file mode 100644 index 000000000000..7de8614a6bcf --- /dev/null +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.CoreCLR.cs @@ -0,0 +1,128 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Threading; +using System.Runtime.CompilerServices; +using System.Collections.Generic; +using System.Collections.Concurrent; + +namespace System.Runtime.InteropServices +{ + /// + /// Class for managing wrappers of COM IUnknown types. + /// + public abstract partial class ComWrappers + { + /// + /// Get the runtime provided IUnknown implementation. + /// + /// Function pointer to QueryInterface. + /// Function pointer to AddRef. + /// Function pointer to Release. + public static void GetIUnknownImpl(out IntPtr fpQueryInterface, out IntPtr fpAddRef, out IntPtr fpRelease) + => GetIUnknownImplInternal(out fpQueryInterface, out fpAddRef, out fpRelease); + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_GetIUnknownImpl")] + [SuppressGCTransition] + private static partial void GetIUnknownImplInternal(out IntPtr fpQueryInterface, out IntPtr fpAddRef, out IntPtr fpRelease); + + internal static unsafe void GetUntrackedIUnknownImpl(out delegate* unmanaged[MemberFunction] fpAddRef, out delegate* unmanaged[MemberFunction] fpRelease) + { + fpAddRef = fpRelease = GetUntrackedAddRefRelease(); + } + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_GetUntrackedAddRefRelease")] + [SuppressGCTransition] + private static unsafe partial delegate* unmanaged[MemberFunction] GetUntrackedAddRefRelease(); + + internal static IntPtr DefaultIUnknownVftblPtr { get; } = CreateDefaultIUnknownVftbl(); + internal static IntPtr TaggedImplVftblPtr { get; } = CreateTaggedImplVftbl(); + internal static IntPtr DefaultIReferenceTrackerTargetVftblPtr { get; } = CreateDefaultIReferenceTrackerTargetVftbl(); + + private static unsafe IntPtr CreateDefaultIUnknownVftbl() + { + IntPtr* vftbl = (IntPtr*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(ComWrappers), 3 * sizeof(IntPtr)); + GetIUnknownImpl(out vftbl[0], out vftbl[1], out vftbl[2]); + return (IntPtr)vftbl; + } + + private static unsafe IntPtr CreateTaggedImplVftbl() + { + IntPtr* vftbl = (IntPtr*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(ComWrappers), 4 * sizeof(IntPtr)); + GetIUnknownImpl(out vftbl[0], out vftbl[1], out vftbl[2]); + vftbl[3] = GetTaggedImplCurrentVersion(); + return (IntPtr)vftbl; + } + + internal static int CallICustomQueryInterface(ManagedObjectWrapperHolder holder, ref Guid iid, out IntPtr ppObject) + { + if (holder.WrappedObject is ICustomQueryInterface customQueryInterface) + { + return (int)customQueryInterface.GetInterface(ref iid, out ppObject); + } + + ppObject = IntPtr.Zero; + return -1; // See TryInvokeICustomQueryInterfaceResult + } + + internal static IntPtr GetOrCreateComInterfaceForObjectWithGlobalMarshallingInstance(object obj) + { + try + { + return s_globalInstanceForMarshalling is null + ? IntPtr.Zero + : s_globalInstanceForMarshalling.GetOrCreateComInterfaceForObject(obj, CreateComInterfaceFlags.TrackerSupport); + } + catch (ArgumentException) + { + // We've failed to create a COM interface for the object. + // Fallback to built-in COM. + return IntPtr.Zero; + } + } + + internal static object? GetOrCreateObjectForComInstanceWithGlobalMarshallingInstance(IntPtr comObject, CreateObjectFlags flags) + { + try + { + return s_globalInstanceForMarshalling?.GetOrCreateObjectForComInstance(comObject, flags); + } + catch (ArgumentNullException) + { + // We've failed to create a managed object for the COM instance. + // Fallback to built-in COM. + return null; + } + } + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_GetIReferenceTrackerTargetVftbl")] + [SuppressGCTransition] + private static partial IntPtr GetDefaultIReferenceTrackerTargetVftbl(); + + private static IntPtr CreateDefaultIReferenceTrackerTargetVftbl() + => GetDefaultIReferenceTrackerTargetVftbl(); + + private static IntPtr GetTaggedImplCurrentVersion() + { + return GetTaggedImpl(); + } + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_GetTaggedImpl")] + [SuppressGCTransition] + private static partial IntPtr GetTaggedImpl(); + + internal sealed partial class ManagedObjectWrapperHolder + { + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_RegisterIsRootedCallback")] + private static partial void RegisterIsRootedCallback(); + + private static IntPtr AllocateRefCountedHandle(ManagedObjectWrapperHolder holder) + { + return AllocateRefCountedHandle(ObjectHandleOnStack.Create(ref holder)); + } + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_AllocateRefCountedHandle")] + private static partial IntPtr AllocateRefCountedHandle(ObjectHandleOnStack obj); + } + } +} diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.cs deleted file mode 100644 index c7c78a7325f9..000000000000 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.cs +++ /dev/null @@ -1,396 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Collections; -using System.Diagnostics.CodeAnalysis; -using System.Runtime.CompilerServices; -using System.Runtime.Versioning; -using System.Threading; - -namespace System.Runtime.InteropServices -{ - /// - /// Internal enumeration used by the runtime to indicate the scenario for which ComWrappers is being used. - /// - internal enum ComWrappersScenario - { - Instance = 0, - TrackerSupportGlobalInstance = 1, - MarshallingGlobalInstance = 2, - } - - /// - /// Class for managing wrappers of COM IUnknown types. - /// - public abstract partial class ComWrappers - { - /// - /// Given a managed object, determine if it is a -created - /// managed wrapper and if so, return the wrapped unmanaged pointer. - /// - /// A managed wrapper - /// An unmanaged COM object - /// True if the wrapper was resolved to an external COM object, otherwise false. - /// - /// If a COM object is returned, the caller is expected to call Release() on the object. - /// This can be done through an API like . - /// Since this API is required to interact directly with the external COM object, QueryInterface(), - /// it is important for the caller to understand the COM object may have apartment affinity and therefore - /// if the current thread is not in the correct apartment or the COM object is not a proxy this call may fail. - /// - public static bool TryGetComInstance(object obj, out IntPtr unknown) - { - if (obj == null) - { - unknown = IntPtr.Zero; - return false; - } - - return TryGetComInstanceInternal(ObjectHandleOnStack.Create(ref obj), out unknown); - } - - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_TryGetComInstance")] - [return: MarshalAs(UnmanagedType.Bool)] - private static partial bool TryGetComInstanceInternal(ObjectHandleOnStack wrapperMaybe, out IntPtr externalComObject); - - /// - /// Given a COM object, determine if it is a -created - /// unmanaged wrapper and if so, return the wrapped managed object. - /// - /// An unmanaged wrapper - /// A managed object - /// True if the wrapper was resolved to a managed object, otherwise false. - public static bool TryGetObject(IntPtr unknown, [NotNullWhen(true)] out object? obj) - { - obj = null; - if (unknown == IntPtr.Zero) - { - return false; - } - - return TryGetObjectInternal(unknown, ObjectHandleOnStack.Create(ref obj)); - } - - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_TryGetObject")] - [return: MarshalAs(UnmanagedType.Bool)] - private static partial bool TryGetObjectInternal(IntPtr wrapperMaybe, ObjectHandleOnStack instance); - - /// - /// ABI for function dispatch of a COM interface. - /// - public partial struct ComInterfaceDispatch - { - /// - /// Given a from a generated Vtable, convert to the target type. - /// - /// Desired type. - /// Pointer supplied to Vtable function entry. - /// Instance of type associated with dispatched function call. - public static unsafe T GetInstance(ComInterfaceDispatch* dispatchPtr) where T : class - { - // See the dispatch section in the runtime for details on the masking below. - long DispatchAlignmentThisPtr = sizeof(void*) == 8 ? 64 : 16; // Should be a power of 2. - long DispatchThisPtrMask = ~(DispatchAlignmentThisPtr - 1); - var comInstance = *(ComInterfaceInstance**)(((long)dispatchPtr) & DispatchThisPtrMask); - - return Unsafe.As(GCHandle.InternalGet(comInstance->GcHandle)); - } - - private struct ComInterfaceInstance - { - public IntPtr GcHandle; - } - } - - /// - /// Globally registered instance of the ComWrappers class for reference tracker support. - /// - private static ComWrappers? s_globalInstanceForTrackerSupport; - - /// - /// Globally registered instance of the ComWrappers class for marshalling. - /// - private static ComWrappers? s_globalInstanceForMarshalling; - - private static long s_instanceCounter; - private readonly long id = Interlocked.Increment(ref s_instanceCounter); - - /// - /// Create a COM representation of the supplied object that can be passed to a non-managed environment. - /// - /// The managed object to expose outside the .NET runtime. - /// Flags used to configure the generated interface. - /// The generated COM interface that can be passed outside the .NET runtime. - /// - /// If a COM representation was previously created for the specified using - /// this instance, the previously created COM interface will be returned. - /// If not, a new one will be created. - /// - public IntPtr GetOrCreateComInterfaceForObject(object instance, CreateComInterfaceFlags flags) - { - IntPtr ptr; - if (!TryGetOrCreateComInterfaceForObjectInternal(this, instance, flags, out ptr)) - throw new ArgumentException(null, nameof(instance)); - - return ptr; - } - - /// - /// Create a COM representation of the supplied object that can be passed to a non-managed environment. - /// - /// The implementation to use when creating the COM representation. - /// The managed object to expose outside the .NET runtime. - /// Flags used to configure the generated interface. - /// The generated COM interface that can be passed outside the .NET runtime or IntPtr.Zero if it could not be created. - /// Returns true if a COM representation could be created, false otherwise - /// - /// If is null, the global instance (if registered) will be used. - /// - private static bool TryGetOrCreateComInterfaceForObjectInternal(ComWrappers impl, object instance, CreateComInterfaceFlags flags, out IntPtr retValue) - { - ArgumentNullException.ThrowIfNull(instance); - - return TryGetOrCreateComInterfaceForObjectInternal(ObjectHandleOnStack.Create(ref impl), impl.id, ObjectHandleOnStack.Create(ref instance), flags, out retValue); - } - - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_TryGetOrCreateComInterfaceForObject")] - [return: MarshalAs(UnmanagedType.Bool)] - private static partial bool TryGetOrCreateComInterfaceForObjectInternal(ObjectHandleOnStack comWrappersImpl, long wrapperId, ObjectHandleOnStack instance, CreateComInterfaceFlags flags, out IntPtr retValue); - - // Called by the runtime to execute the abstract instance function - internal static unsafe void* CallComputeVtables(ComWrappersScenario scenario, ComWrappers? comWrappersImpl, object obj, CreateComInterfaceFlags flags, out int count) - { - ComWrappers? impl = null; - switch (scenario) - { - case ComWrappersScenario.Instance: - impl = comWrappersImpl; - break; - case ComWrappersScenario.TrackerSupportGlobalInstance: - impl = s_globalInstanceForTrackerSupport; - break; - case ComWrappersScenario.MarshallingGlobalInstance: - impl = s_globalInstanceForMarshalling; - break; - } - - if (impl is null) - { - count = -1; - return null; - } - - return impl.ComputeVtables(obj, flags, out count); - } - - /// - /// Get the currently registered managed object or creates a new managed object and registers it. - /// - /// Object to import for usage into the .NET runtime. - /// Flags used to describe the external object. - /// Returns a managed object associated with the supplied external COM object. - /// - /// If a managed object was previously created for the specified - /// using this instance, the previously created object will be returned. - /// If not, a new one will be created. - /// - public object GetOrCreateObjectForComInstance(IntPtr externalComObject, CreateObjectFlags flags) - { - object? obj; - if (!TryGetOrCreateObjectForComInstanceInternal(this, externalComObject, IntPtr.Zero, flags, null, out obj)) - throw new ArgumentNullException(nameof(externalComObject)); - - return obj!; - } - - // Called by the runtime to execute the abstract instance function. - internal static object? CallCreateObject(ComWrappersScenario scenario, ComWrappers? comWrappersImpl, IntPtr externalComObject, CreateObjectFlags flags) - { - ComWrappers? impl = null; - switch (scenario) - { - case ComWrappersScenario.Instance: - impl = comWrappersImpl; - break; - case ComWrappersScenario.TrackerSupportGlobalInstance: - impl = s_globalInstanceForTrackerSupport; - break; - case ComWrappersScenario.MarshallingGlobalInstance: - impl = s_globalInstanceForMarshalling; - break; - } - - if (impl == null) - return null; - - return impl.CreateObject(externalComObject, flags); - } - - /// - /// Get the currently registered managed object or uses the supplied managed object and registers it. - /// - /// Object to import for usage into the .NET runtime. - /// Flags used to describe the external object. - /// The to be used as the wrapper for the external object - /// Returns a managed object associated with the supplied external COM object. - /// - /// If the instance already has an associated external object a will be thrown. - /// - public object GetOrRegisterObjectForComInstance(IntPtr externalComObject, CreateObjectFlags flags, object wrapper) - { - return GetOrRegisterObjectForComInstance(externalComObject, flags, wrapper, IntPtr.Zero); - } - - /// - /// Get the currently registered managed object or uses the supplied managed object and registers it. - /// - /// Object to import for usage into the .NET runtime. - /// Flags used to describe the external object. - /// The to be used as the wrapper for the external object - /// Inner for COM aggregation scenarios - /// Returns a managed object associated with the supplied external COM object. - /// - /// This method override is for registering an aggregated COM instance with its associated inner. The inner - /// will be released when the associated wrapper is eventually freed. Note that it will be released on a thread - /// in an unknown apartment state. If the supplied inner is not known to be a free-threaded instance then - /// it is advised to not supply the inner. - /// - /// If the instance already has an associated external object a will be thrown. - /// - public object GetOrRegisterObjectForComInstance(IntPtr externalComObject, CreateObjectFlags flags, object wrapper, IntPtr inner) - { - ArgumentNullException.ThrowIfNull(wrapper); - - object? obj; - if (!TryGetOrCreateObjectForComInstanceInternal(this, externalComObject, inner, flags, wrapper, out obj)) - throw new ArgumentNullException(nameof(externalComObject)); - - return obj!; - } - - /// - /// Get the currently registered managed object or creates a new managed object and registers it. - /// - /// The implementation to use when creating the managed object. - /// Object to import for usage into the .NET runtime. - /// The inner instance if aggregation is involved - /// Flags used to describe the external object. - /// The to be used as the wrapper for the external object. - /// The managed object associated with the supplied external COM object or null if it could not be created. - /// Returns true if a managed object could be retrieved/created, false otherwise - /// - /// If is null, the global instance (if registered) will be used. - /// - private static bool TryGetOrCreateObjectForComInstanceInternal( - ComWrappers impl, - IntPtr externalComObject, - IntPtr innerMaybe, - CreateObjectFlags flags, - object? wrapperMaybe, - out object? retValue) - { - ArgumentNullException.ThrowIfNull(externalComObject); - - // If the inner is supplied the Aggregation flag should be set. - if (innerMaybe != IntPtr.Zero && !flags.HasFlag(CreateObjectFlags.Aggregation)) - throw new InvalidOperationException(SR.InvalidOperation_SuppliedInnerMustBeMarkedAggregation); - - object? wrapperMaybeLocal = wrapperMaybe; - retValue = null; - return TryGetOrCreateObjectForComInstanceInternal(ObjectHandleOnStack.Create(ref impl), impl.id, externalComObject, innerMaybe, flags, ObjectHandleOnStack.Create(ref wrapperMaybeLocal), ObjectHandleOnStack.Create(ref retValue)); - } - - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_TryGetOrCreateObjectForComInstance")] - [return: MarshalAs(UnmanagedType.Bool)] - private static partial bool TryGetOrCreateObjectForComInstanceInternal(ObjectHandleOnStack comWrappersImpl, long wrapperId, IntPtr externalComObject, IntPtr innerMaybe, CreateObjectFlags flags, ObjectHandleOnStack wrapper, ObjectHandleOnStack retValue); - - // Call to execute the virtual instance function - internal static void CallReleaseObjects(ComWrappers? comWrappersImpl, IEnumerable objects) - => (comWrappersImpl ?? s_globalInstanceForTrackerSupport!).ReleaseObjects(objects); - - /// - /// Register a instance to be used as the global instance for reference tracker support. - /// - /// Instance to register - /// - /// This function can only be called a single time. Subsequent calls to this function will result - /// in a being thrown. - /// - /// Scenarios where this global instance may be used are: - /// * Object tracking via the and flags. - /// - public static void RegisterForTrackerSupport(ComWrappers instance) - { - ArgumentNullException.ThrowIfNull(instance); - - if (null != Interlocked.CompareExchange(ref s_globalInstanceForTrackerSupport, instance, null)) - { - throw new InvalidOperationException(SR.InvalidOperation_ResetGlobalComWrappersInstance); - } - - SetGlobalInstanceRegisteredForTrackerSupport(instance.id); - } - - - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_SetGlobalInstanceRegisteredForTrackerSupport")] - [SuppressGCTransition] - private static partial void SetGlobalInstanceRegisteredForTrackerSupport(long id); - - /// - /// Register a instance to be used as the global instance for marshalling in the runtime. - /// - /// Instance to register - /// - /// This function can only be called a single time. Subsequent calls to this function will result - /// in a being thrown. - /// - /// Scenarios where this global instance may be used are: - /// * Usage of COM-related Marshal APIs - /// * P/Invokes with COM-related types - /// * COM activation - /// - [SupportedOSPlatform("windows")] - public static void RegisterForMarshalling(ComWrappers instance) - { - ArgumentNullException.ThrowIfNull(instance); - - if (null != Interlocked.CompareExchange(ref s_globalInstanceForMarshalling, instance, null)) - { - throw new InvalidOperationException(SR.InvalidOperation_ResetGlobalComWrappersInstance); - } - - // Indicate to the runtime that a global instance has been registered for marshalling. - // This allows the native runtime know to call into the managed ComWrappers only if a - // global instance is registered for marshalling. - SetGlobalInstanceRegisteredForMarshalling(instance.id); - } - - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_SetGlobalInstanceRegisteredForMarshalling")] - [SuppressGCTransition] - private static partial void SetGlobalInstanceRegisteredForMarshalling(long id); - - /// - /// Get the runtime provided IUnknown implementation. - /// - /// Function pointer to QueryInterface. - /// Function pointer to AddRef. - /// Function pointer to Release. - public static void GetIUnknownImpl(out IntPtr fpQueryInterface, out IntPtr fpAddRef, out IntPtr fpRelease) - => GetIUnknownImplInternal(out fpQueryInterface, out fpAddRef, out fpRelease); - - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ComWrappers_GetIUnknownImpl")] - private static partial void GetIUnknownImplInternal(out IntPtr fpQueryInterface, out IntPtr fpAddRef, out IntPtr fpRelease); - - internal static int CallICustomQueryInterface(object customQueryInterfaceMaybe, ref Guid iid, out IntPtr ppObject) - { - if (customQueryInterfaceMaybe is ICustomQueryInterface customQueryInterface) - { - return (int)customQueryInterface.GetInterface(ref iid, out ppObject); - } - - ppObject = IntPtr.Zero; - return -1; // See TryInvokeICustomQueryInterfaceResult - } - } -} diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/TrackerObjectManager.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/TrackerObjectManager.CoreCLR.cs new file mode 100644 index 000000000000..ac60cd56719b --- /dev/null +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/TrackerObjectManager.CoreCLR.cs @@ -0,0 +1,45 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Text; + +namespace System.Runtime.InteropServices +{ + internal static partial class TrackerObjectManager + { + private static bool HasReferenceTrackerManager + => HasReferenceTrackerManagerInternal(); + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "TrackerObjectManager_HasReferenceTrackerManager")] + [SuppressGCTransition] + [return: MarshalAs(UnmanagedType.U1)] + private static partial bool HasReferenceTrackerManagerInternal(); + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "TrackerObjectManager_TryRegisterReferenceTrackerManager")] + [SuppressGCTransition] + [return: MarshalAs(UnmanagedType.U1)] + private static partial bool TryRegisterReferenceTrackerManager(IntPtr referenceTrackerManager); + + internal static bool IsGlobalPeggingEnabled + => IsGlobalPeggingEnabledInternal(); + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "TrackerObjectManager_IsGlobalPeggingEnabled")] + [SuppressGCTransition] + [return: MarshalAs(UnmanagedType.U1)] + private static partial bool IsGlobalPeggingEnabledInternal(); + + private static void RegisterGCCallbacks() + { + // CoreCLR doesn't have GC callbacks, but we do need to register the GC handle set with the runtime for enumeration + // during GC. + GCHandleSet handleSet = s_referenceTrackerNativeObjectWrapperCache; + RegisterNativeObjectWrapperCache(ObjectHandleOnStack.Create(ref handleSet)); + } + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "TrackerObjectManager_RegisterNativeObjectWrapperCache")] + private static partial void RegisterNativeObjectWrapperCache(ObjectHandleOnStack nativeObjectWrapperCache); + } +} diff --git a/src/coreclr/System.Private.CoreLib/src/System/RuntimeHandles.cs b/src/coreclr/System.Private.CoreLib/src/System/RuntimeHandles.cs index 1d70bed0c214..b8ae179ff25c 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/RuntimeHandles.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/RuntimeHandles.cs @@ -299,18 +299,27 @@ internal static object InternalAlloc(RuntimeType type) [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "RuntimeTypeHandle_InternalAlloc")] private static unsafe partial void InternalAlloc(MethodTable* pMT, ObjectHandleOnStack result); - internal static object InternalAllocNoChecks(RuntimeType type) + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static object InternalAllocNoChecks(MethodTable* pMT) { - Debug.Assert(!type.GetNativeTypeHandle().IsTypeDesc); - object? result = null; - InternalAllocNoChecks(type.GetNativeTypeHandle().AsMethodTable(), ObjectHandleOnStack.Create(ref result)); - GC.KeepAlive(type); - return result!; + return InternalAllocNoChecks_FastPath(pMT) ?? InternalAllocNoChecksWorker(pMT); + + [MethodImpl(MethodImplOptions.NoInlining)] + static object InternalAllocNoChecksWorker(MethodTable* pMT) + { + object? result = null; + InternalAllocNoChecks(pMT, ObjectHandleOnStack.Create(ref result)); + return result!; + } } [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "RuntimeTypeHandle_InternalAllocNoChecks")] private static unsafe partial void InternalAllocNoChecks(MethodTable* pMT, ObjectHandleOnStack result); + [MethodImpl(MethodImplOptions.InternalCall)] + private static extern object? InternalAllocNoChecks_FastPath(MethodTable* pMT); + /// /// Given a RuntimeType, returns information about how to activate it via calli /// semantics. This method will ensure the type object is fully initialized within diff --git a/src/coreclr/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs index 88113f771b7e..5e872581a02f 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs @@ -3568,18 +3568,18 @@ public override Type[] GetGenericArguments() } [RequiresUnreferencedCode("If some of the generic arguments are annotated (either with DynamicallyAccessedMembersAttribute, or generic constraints), trimming can't validate that the requirements of those annotations are met.")] - public override Type MakeGenericType(Type[] instantiation) + public override Type MakeGenericType(Type[] typeArguments) { - ArgumentNullException.ThrowIfNull(instantiation); + ArgumentNullException.ThrowIfNull(typeArguments); if (!IsGenericTypeDefinition) throw new InvalidOperationException(SR.Format(SR.Arg_NotGenericTypeDefinition, this)); RuntimeType[] genericParameters = GetGenericArgumentsInternal(); - if (genericParameters.Length != instantiation.Length) - throw new ArgumentException(SR.Argument_GenericArgsCount, nameof(instantiation)); + if (genericParameters.Length != typeArguments.Length) + throw new ArgumentException(SR.Argument_GenericArgsCount, nameof(typeArguments)); - if (instantiation.Length == 1 && instantiation[0] is RuntimeType rt) + if (typeArguments.Length == 1 && typeArguments[0] is RuntimeType rt) { ThrowIfTypeNeverValidGenericArgument(rt); try @@ -3593,13 +3593,13 @@ public override Type MakeGenericType(Type[] instantiation) } } - RuntimeType[] instantiationRuntimeType = new RuntimeType[instantiation.Length]; + RuntimeType[] instantiationRuntimeType = new RuntimeType[typeArguments.Length]; bool foundSigType = false; bool foundNonRuntimeType = false; - for (int i = 0; i < instantiation.Length; i++) + for (int i = 0; i < typeArguments.Length; i++) { - Type instantiationElem = instantiation[i] ?? throw new ArgumentNullException(); + Type instantiationElem = typeArguments[i] ?? throw new ArgumentNullException(); RuntimeType? rtInstantiationElem = instantiationElem as RuntimeType; if (rtInstantiationElem == null) @@ -3617,9 +3617,9 @@ public override Type MakeGenericType(Type[] instantiation) if (foundNonRuntimeType) { if (foundSigType) - return new SignatureConstructedGenericType(this, instantiation); + return new SignatureConstructedGenericType(this, typeArguments); - return Reflection.Emit.TypeBuilderInstantiation.MakeGenericType(this, (Type[])(instantiation.Clone())); + return Reflection.Emit.TypeBuilderInstantiation.MakeGenericType(this, (Type[])(typeArguments.Clone())); } SanityCheckGenericArguments(instantiationRuntimeType, genericParameters); diff --git a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs index d19cb01034a7..e66db58ba95f 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using System.Text; @@ -9,8 +10,28 @@ namespace System { public partial class String { + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "String_StrCns")] + private static unsafe partial string* StrCnsInternal(uint rid, IntPtr scopeHandle); + + // implementation of CORINFO_HELP_STRCNS + [StackTraceHidden] + [DebuggerStepThrough] + [DebuggerHidden] + internal static unsafe string StrCns(uint rid, IntPtr scopeHandle) + { + string* ptr = StrCnsInternal(rid, scopeHandle); + Debug.Assert(ptr != null); + return *ptr; + } + [MethodImpl(MethodImplOptions.InternalCall)] - internal static extern string FastAllocateString(int length); + internal static extern unsafe string FastAllocateString(MethodTable *pMT, int length); + + [DebuggerHidden] + internal static unsafe string FastAllocateString(int length) + { + return FastAllocateString(TypeHandle.TypeHandleOf().AsMethodTable(), length); + } [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "String_Intern")] private static partial void Intern(StringHandleOnStack src); @@ -38,7 +59,7 @@ internal static unsafe void InternalCopy(string src, IntPtr dest, int len) { if (len != 0) { - SpanHelpers.Memmove(ref *(byte*)dest, ref Unsafe.As(ref src.GetRawStringData()), (nuint)len); + SpanHelpers.Memmove(ref *(byte*)dest, ref src.GetRawStringDataAsUInt8(), (nuint)len); } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs b/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs index 5b33c5036782..a6494bf27883 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs @@ -1598,6 +1598,9 @@ internal static void MulticastDebuggerTraceHelper(object o, int count) [Intrinsic] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern IntPtr NextCallReturnAddress(); + + [Intrinsic] + internal static Continuation? AsyncCallContinuation() => null; } // class StubHelpers #if FEATURE_COMINTEROP diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Monitor.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Monitor.CoreCLR.cs index fdfc1c333e97..c271c2a3d552 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Monitor.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Monitor.CoreCLR.cs @@ -30,9 +30,79 @@ public static partial class Monitor ** ** Exceptions: ArgumentNullException if object is null. =========================================================================*/ + public static void Enter(object obj) + { + ArgumentNullException.ThrowIfNull(obj, null); + + if (!TryEnter_FastPath(obj)) + { + Enter_Slowpath(obj); + } + } + + [MethodImpl(MethodImplOptions.InternalCall)] + private static extern bool TryEnter_FastPath(object obj); + + // These must match the values in syncblk.h + private enum EnterHelperResult + { + Contention = 0, + Entered = 1, + UseSlowPath = 2 + } + + // These must match the values in syncblk.h + private enum LeaveHelperAction + { + None = 0, + Signal = 1, + Yield = 2, + Contention = 3, + Error = 4, + } + [MethodImpl(MethodImplOptions.InternalCall)] - public static extern void Enter(object obj); + private static extern EnterHelperResult TryEnter_FastPath_WithTimeout(object obj, int timeout); + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "Monitor_Enter_Slowpath")] + private static partial void Enter_Slowpath(ObjectHandleOnStack obj); + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Enter_Slowpath(object obj) + { + Enter_Slowpath(ObjectHandleOnStack.Create(ref obj)); + } + + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "Monitor_TryEnter_Slowpath")] + private static partial int TryEnter_Slowpath(ObjectHandleOnStack obj, int timeout); + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool TryEnter_Slowpath(object obj) + { + if (TryEnter_Slowpath(ObjectHandleOnStack.Create(ref obj), 0) != 0) + { + return true; + } + else + { + return false; + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool TryEnter_Slowpath(object obj, int timeout) + { + ArgumentOutOfRangeException.ThrowIfLessThan(timeout, -1, null); + + if (TryEnter_Slowpath(ObjectHandleOnStack.Create(ref obj), timeout) != 0) + { + return true; + } + else + { + return false; + } + } // Use a ref bool instead of out to ensure that unverifiable code must // initialize this value to something. If we used out, the value @@ -44,7 +114,13 @@ public static void Enter(object obj, ref bool lockTaken) if (lockTaken) ThrowLockTakenException(); - ReliableEnter(obj, ref lockTaken); + ArgumentNullException.ThrowIfNull(obj, null); + + if (!TryEnter_FastPath(obj)) + { + Enter_Slowpath(obj); + } + lockTaken = true; Debug.Assert(lockTaken); } @@ -55,9 +131,16 @@ private static void ThrowLockTakenException() } [MethodImpl(MethodImplOptions.InternalCall)] - private static extern void ReliableEnter(object obj, ref bool lockTaken); + private static extern LeaveHelperAction Exit_FastPath(object obj); + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "Monitor_Exit_Slowpath")] + private static partial void Exit_Slowpath(ObjectHandleOnStack obj, LeaveHelperAction exitBehavior); + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Exit_Slowpath(LeaveHelperAction exitBehavior, object obj) + { + Exit_Slowpath(ObjectHandleOnStack.Create(ref obj), exitBehavior); + } /*========================================================================= ** Release the monitor lock. If one or more threads are waiting to acquire the @@ -68,8 +151,36 @@ private static void ThrowLockTakenException() ** SynchronizationLockException if the current thread does not ** own the lock. =========================================================================*/ - [MethodImpl(MethodImplOptions.InternalCall)] - public static extern void Exit(object obj); + public static void Exit(object obj) + { + ArgumentNullException.ThrowIfNull(obj, null); + + LeaveHelperAction exitBehavior = Exit_FastPath(obj); + + if (exitBehavior == LeaveHelperAction.None) + return; + + Exit_Slowpath(exitBehavior, obj); + } + + // Used to implement synchronized methods on non Windows-X86 architectures + internal static void ExitIfLockTaken(object obj, ref bool lockTaken) + { + ArgumentNullException.ThrowIfNull(obj, null); + + if (lockTaken) + { + LeaveHelperAction exitBehavior = Exit_FastPath(obj); + + if (exitBehavior != LeaveHelperAction.None) + { + Exit_Slowpath(exitBehavior, obj); + return; + } + + lockTaken = false; + } + } /*========================================================================= ** Similar to Enter, but will never block. That is, if the current thread can @@ -80,9 +191,41 @@ private static void ThrowLockTakenException() =========================================================================*/ public static bool TryEnter(object obj) { - bool lockTaken = false; - TryEnter(obj, 0, ref lockTaken); - return lockTaken; + ArgumentNullException.ThrowIfNull(obj, null); + + EnterHelperResult tryEnterResult = TryEnter_FastPath_WithTimeout(obj, 0); + if (tryEnterResult == EnterHelperResult.Entered) + { + return true; + } + else if (tryEnterResult == EnterHelperResult.Contention) + { + return false; + } + + return TryEnter_Slowpath(obj); + } + + private static void TryEnter_Timeout_WithLockTaken(object obj, int millisecondsTimeout, ref bool lockTaken) + { + if (millisecondsTimeout >= -1) + { + EnterHelperResult tryEnterResult = TryEnter_FastPath_WithTimeout(obj, millisecondsTimeout); + if (tryEnterResult == EnterHelperResult.Entered) + { + lockTaken = true; + return; + } + else if (millisecondsTimeout == 0 && (tryEnterResult == EnterHelperResult.Contention)) + { + return; + } + } + + if (TryEnter_Slowpath(obj, millisecondsTimeout)) + { + lockTaken = true; + } } // The JIT should inline this method to allow check of lockTaken argument to be optimized out @@ -92,7 +235,9 @@ public static void TryEnter(object obj, ref bool lockTaken) if (lockTaken) ThrowLockTakenException(); - ReliableEnterTimeout(obj, 0, ref lockTaken); + ArgumentNullException.ThrowIfNull(obj, null); + + TryEnter_Timeout_WithLockTaken(obj, 0, ref lockTaken); } /*========================================================================= @@ -103,13 +248,24 @@ public static void TryEnter(object obj, ref bool lockTaken) ** Exceptions: ArgumentNullException if object is null. ** ArgumentException if timeout < -1 (Timeout.Infinite). =========================================================================*/ - // The JIT should inline this method to allow check of lockTaken argument to be optimized out - // in the typical case. Note that the method has to be transparent for inlining to be allowed by the VM. public static bool TryEnter(object obj, int millisecondsTimeout) { - bool lockTaken = false; - TryEnter(obj, millisecondsTimeout, ref lockTaken); - return lockTaken; + ArgumentNullException.ThrowIfNull(obj, null); + + if (millisecondsTimeout >= -1) + { + EnterHelperResult tryEnterResult = TryEnter_FastPath_WithTimeout(obj, millisecondsTimeout); + if (tryEnterResult == EnterHelperResult.Entered) + { + return true; + } + else if (millisecondsTimeout == 0 && (tryEnterResult == EnterHelperResult.Contention)) + { + return false; + } + } + + return TryEnter_Slowpath(obj, millisecondsTimeout); } // The JIT should inline this method to allow check of lockTaken argument to be optimized out @@ -119,11 +275,10 @@ public static void TryEnter(object obj, int millisecondsTimeout, ref bool lockTa if (lockTaken) ThrowLockTakenException(); - ReliableEnterTimeout(obj, millisecondsTimeout, ref lockTaken); - } + ArgumentNullException.ThrowIfNull(obj, null); - [MethodImpl(MethodImplOptions.InternalCall)] - private static extern void ReliableEnterTimeout(object obj, int timeout, ref bool lockTaken); + TryEnter_Timeout_WithLockTaken(obj, millisecondsTimeout, ref lockTaken); + } public static bool IsEntered(object obj) { diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Mutex.CoreCLR.Unix.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Mutex.CoreCLR.Unix.cs index 52233ffc583b..08b0748b412c 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Mutex.CoreCLR.Unix.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Mutex.CoreCLR.Unix.cs @@ -14,9 +14,40 @@ namespace System.Threading /// public sealed partial class Mutex : WaitHandle { - private void CreateMutexCore(bool initiallyOwned, string? name, out bool createdNew) + private unsafe void CreateMutexCore(bool initiallyOwned) { - SafeWaitHandle mutexHandle = CreateMutexCore(initiallyOwned, name, out int errorCode, out string? errorDetails); + SafeWaitHandle handle = + CreateMutex( + initiallyOwned, + name: null, + currentUserOnly: false, + systemCallErrors: null, + systemCallErrorsBufferSize: 0); + if (handle.IsInvalid) + { + int errorCode = Marshal.GetLastPInvokeError(); + handle.SetHandleAsInvalid(); + throw Win32Marshal.GetExceptionForWin32Error(errorCode); + } + + SafeWaitHandle = handle; + } + + private void CreateMutexCore( + bool initiallyOwned, + string? name, + NamedWaitHandleOptionsInternal options, + out bool createdNew) + { + bool currentUserOnly = false; + if (!string.IsNullOrEmpty(name) && options.WasSpecified) + { + name = options.GetNameWithSessionPrefix(name); + currentUserOnly = options.CurrentUserOnly; + } + + SafeWaitHandle mutexHandle = + CreateMutexCore(initiallyOwned, name, currentUserOnly, out int errorCode, out string? errorDetails); if (mutexHandle.IsInvalid) { mutexHandle.SetHandleAsInvalid(); @@ -33,16 +64,26 @@ private void CreateMutexCore(bool initiallyOwned, string? name, out bool created SafeWaitHandle = mutexHandle; } - private static OpenExistingResult OpenExistingWorker(string name, out Mutex? result) + private static OpenExistingResult OpenExistingWorker( + string name, + NamedWaitHandleOptionsInternal options, + out Mutex? result) { ArgumentException.ThrowIfNullOrEmpty(name); + bool currentUserOnly = false; + if (options.WasSpecified) + { + name = options.GetNameWithSessionPrefix(name); + currentUserOnly = options.CurrentUserOnly; + } + result = null; // To allow users to view & edit the ACL's, call OpenMutex // with parameters to allow us to view & edit the ACL. This will // fail if we don't have permission to view or edit the ACL's. // If that happens, ask for less permissions. - SafeWaitHandle myHandle = OpenMutexCore(name, out int errorCode, out string? errorDetails); + SafeWaitHandle myHandle = OpenMutexCore(name, currentUserOnly, out int errorCode, out string? errorDetails); if (myHandle.IsInvalid) { @@ -86,11 +127,13 @@ public void ReleaseMutex() private static unsafe SafeWaitHandle CreateMutexCore( bool initialOwner, string? name, + bool currentUserOnly, out int errorCode, out string? errorDetails) { byte* systemCallErrors = stackalloc byte[SystemCallErrorsBufferSize]; - SafeWaitHandle mutexHandle = CreateMutex(initialOwner, name, systemCallErrors, SystemCallErrorsBufferSize); + SafeWaitHandle mutexHandle = + CreateMutex(initialOwner, name, currentUserOnly, systemCallErrors, SystemCallErrorsBufferSize); // Get the error code even if the handle is valid, as it could be ERROR_ALREADY_EXISTS, indicating that the mutex // already exists and was opened @@ -100,10 +143,10 @@ private static unsafe SafeWaitHandle CreateMutexCore( return mutexHandle; } - private static unsafe SafeWaitHandle OpenMutexCore(string name, out int errorCode, out string? errorDetails) + private static unsafe SafeWaitHandle OpenMutexCore(string name, bool currentUserOnly, out int errorCode, out string? errorDetails) { byte* systemCallErrors = stackalloc byte[SystemCallErrorsBufferSize]; - SafeWaitHandle mutexHandle = OpenMutex(name, systemCallErrors, SystemCallErrorsBufferSize); + SafeWaitHandle mutexHandle = OpenMutex(name, currentUserOnly, systemCallErrors, SystemCallErrorsBufferSize); errorCode = mutexHandle.IsInvalid ? Marshal.GetLastPInvokeError() : Interop.Errors.ERROR_SUCCESS; errorDetails = mutexHandle.IsInvalid ? GetErrorDetails(systemCallErrors) : null; return mutexHandle; @@ -127,9 +170,9 @@ private static unsafe SafeWaitHandle OpenMutexCore(string name, out int errorCod } [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "PAL_CreateMutexW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] - private static unsafe partial SafeWaitHandle CreateMutex([MarshalAs(UnmanagedType.Bool)] bool initialOwner, string? name, byte* systemCallErrors, uint systemCallErrorsBufferSize); + private static unsafe partial SafeWaitHandle CreateMutex([MarshalAs(UnmanagedType.Bool)] bool initialOwner, string? name, [MarshalAs(UnmanagedType.Bool)] bool currentUserOnly, byte* systemCallErrors, uint systemCallErrorsBufferSize); [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "PAL_OpenMutexW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] - private static unsafe partial SafeWaitHandle OpenMutex(string name, byte* systemCallErrors, uint systemCallErrorsBufferSize); + private static unsafe partial SafeWaitHandle OpenMutex(string name, [MarshalAs(UnmanagedType.Bool)] bool currentUserOnly, byte* systemCallErrors, uint systemCallErrorsBufferSize); } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Thread.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Thread.CoreCLR.cs index c646bada45e0..2a0b425621de 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Thread.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Thread.CoreCLR.cs @@ -491,19 +491,24 @@ private void ResetFinalizerThreadSlow() } } + [MethodImpl(MethodImplOptions.InternalCall)] + private static extern bool CatchAtSafePoint(); + [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ThreadNative_PollGC")] - private static partial void ThreadNative_PollGC(); + private static partial void PollGCInternal(); // GC Suspension is done by simply dropping into native code via p/invoke, and we reuse the p/invoke // mechanism for suspension. On all architectures we should have the actual stub used for the check be implemented // as a small assembly stub which checks the global g_TrapReturningThreads flag and tail-call to this helper - private static unsafe void PollGC() + private static void PollGC() { - NativeThreadState catchAtSafePoint = ((NativeThreadClass*)Thread.DirectOnThreadLocalData.pNativeThread)->m_State & NativeThreadState.TS_CatchAtSafePoint; - if (catchAtSafePoint != NativeThreadState.None) + if (CatchAtSafePoint()) { - ThreadNative_PollGC(); + PollGCWorker(); } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void PollGCWorker() => PollGCInternal(); } [StructLayout(LayoutKind.Sequential)] diff --git a/src/coreclr/binder/assemblybindercommon.cpp b/src/coreclr/binder/assemblybindercommon.cpp index 6bd3d1653a8b..b7989e9c47a4 100644 --- a/src/coreclr/binder/assemblybindercommon.cpp +++ b/src/coreclr/binder/assemblybindercommon.cpp @@ -37,7 +37,7 @@ extern HRESULT RuntimeInvokeHostAssemblyResolver(INT_PTR pManagedAssemblyLoadCon STDAPI BinderAcquirePEImage(LPCTSTR szAssemblyPath, PEImage** ppPEImage, - BundleFileLocation bundleFileLocation); + ProbeExtensionResult probeExtensionResult); namespace BINDER_SPACE { @@ -271,8 +271,8 @@ namespace BINDER_SPACE StackSString sCoreLibName(CoreLibName_IL_W); StackSString sCoreLib; BinderTracing::PathSource pathSource = BinderTracing::PathSource::Bundle; - BundleFileLocation bundleFileLocation = Bundle::ProbeAppBundle(sCoreLibName, /*pathIsBundleRelative */ true); - if (!bundleFileLocation.IsValid()) + ProbeExtensionResult probeExtensionResult = AssemblyProbeExtension::Probe(sCoreLibName, /*pathIsBundleRelative */ true); + if (!probeExtensionResult.IsValid()) { pathSource = BinderTracing::PathSource::ApplicationAssemblies; } @@ -282,7 +282,7 @@ namespace BINDER_SPACE hr = AssemblyBinderCommon::GetAssembly(sCoreLib, TRUE /* fIsInTPA */, &pSystemAssembly, - bundleFileLocation); + probeExtensionResult); BinderTracing::PathProbed(sCoreLib, pathSource, hr); @@ -322,7 +322,7 @@ namespace BINDER_SPACE hr = AssemblyBinderCommon::GetAssembly(sCoreLib, TRUE /* fIsInTPA */, &pSystemAssembly, - bundleFileLocation); + probeExtensionResult); BinderTracing::PathProbed(sCoreLib, BinderTracing::PathSource::ApplicationAssemblies, hr); } @@ -367,8 +367,8 @@ namespace BINDER_SPACE StackSString sCoreLibSatellite; BinderTracing::PathSource pathSource = BinderTracing::PathSource::Bundle; - BundleFileLocation bundleFileLocation = Bundle::ProbeAppBundle(relativePath, /*pathIsBundleRelative */ true); - if (!bundleFileLocation.IsValid()) + ProbeExtensionResult probeExtensionResult = AssemblyProbeExtension::Probe(relativePath, /*pathIsBundleRelative */ true); + if (!probeExtensionResult.IsValid()) { sCoreLibSatellite.Set(systemDirectory); pathSource = BinderTracing::PathSource::ApplicationAssemblies; @@ -379,7 +379,7 @@ namespace BINDER_SPACE IF_FAIL_GO(AssemblyBinderCommon::GetAssembly(sCoreLibSatellite, TRUE /* fIsInTPA */, &pSystemAssembly, - bundleFileLocation)); + probeExtensionResult)); BinderTracing::PathProbed(sCoreLibSatellite, pathSource, hr); *ppSystemAssembly = pSystemAssembly.Extract(); @@ -590,15 +590,15 @@ namespace BINDER_SPACE namespace { - HRESULT BindSatelliteResourceFromBundle( + HRESULT BindSatelliteResourceByProbeExtension( AssemblyName* pRequestedAssemblyName, SString &relativePath, BindResult* pBindResult) { HRESULT hr = S_OK; - BundleFileLocation bundleFileLocation = Bundle::ProbeAppBundle(relativePath, /* pathIsBundleRelative */ true); - if (!bundleFileLocation.IsValid()) + ProbeExtensionResult probeExtensionResult = AssemblyProbeExtension::Probe(relativePath, /* pathIsBundleRelative */ true); + if (!probeExtensionResult.IsValid()) { return hr; } @@ -607,7 +607,7 @@ namespace BINDER_SPACE hr = AssemblyBinderCommon::GetAssembly(relativePath, FALSE /* fIsInTPA */, &pAssembly, - bundleFileLocation); + probeExtensionResult); BinderTracing::PathProbed(relativePath, BinderTracing::PathSource::Bundle, hr); @@ -692,7 +692,7 @@ namespace BINDER_SPACE BindResult* pBindResult) { // Satellite resource probing strategy is to look: - // * First within the single-file bundle + // * First via probe extensions (single-file bundle, external data) // * Then under each of the Platform Resource Roots // * Then under each of the App Paths. // @@ -712,7 +712,7 @@ namespace BINDER_SPACE CombinePath(fileName, simpleNameRef, fileName); fileName.Append(W(".dll")); - hr = BindSatelliteResourceFromBundle(pRequestedAssemblyName, fileName, pBindResult); + hr = BindSatelliteResourceByProbeExtension(pRequestedAssemblyName, fileName, pBindResult); if (pBindResult->HaveResult() || FAILED(hr)) { @@ -841,12 +841,9 @@ namespace BINDER_SPACE ReleaseHolder pTPAAssembly; const SString& simpleName = pRequestedAssemblyName->GetSimpleName(); - // Is assembly in the bundle? - // Single-file bundle contents take precedence over TPA. - // The list of bundled assemblies is contained in the bundle manifest, and NOT in the TPA. - // Therefore the bundle is first probed using the assembly's simple name. - // If found, the assembly is loaded from the bundle. - if (Bundle::AppIsBundle()) + // Probing extensions (single-file, external probe) take precedence over TPA. + // For single-file, bundled assemblies should only be in the bundle manifest, not in the TPA. + if (AssemblyProbeExtension::IsEnabled()) { // Search Assembly.ni.dll, then Assembly.dll // The Assembly.ni.dll paths are rare, and intended for supporting managed C++ R2R assemblies. @@ -858,16 +855,19 @@ namespace BINDER_SPACE SString assemblyFileName(simpleName); assemblyFileName.Append(candidates[i]); - SString assemblyFilePath(Bundle::AppBundle->BasePath()); - assemblyFilePath.Append(assemblyFileName); - - BundleFileLocation bundleFileLocation = Bundle::ProbeAppBundle(assemblyFileName, /* pathIsBundleRelative */ true); - if (bundleFileLocation.IsValid()) + ProbeExtensionResult probeExtensionResult = AssemblyProbeExtension::Probe(assemblyFileName, /* pathIsBundleRelative */ true); + if (probeExtensionResult.IsValid()) { + SString assemblyFilePath; + if (Bundle::AppIsBundle()) + assemblyFilePath.SetUTF8(Bundle::AppBundle->BasePath()); + + assemblyFilePath.Append(assemblyFileName); + hr = GetAssembly(assemblyFilePath, TRUE, // fIsInTPA &pTPAAssembly, - bundleFileLocation); + probeExtensionResult); BinderTracing::PathProbed(assemblyFilePath, BinderTracing::PathSource::Bundle, hr); @@ -996,7 +996,7 @@ namespace BINDER_SPACE HRESULT AssemblyBinderCommon::GetAssembly(SString &assemblyPath, BOOL fIsInTPA, Assembly **ppAssembly, - BundleFileLocation bundleFileLocation) + ProbeExtensionResult probeExtensionResult) { HRESULT hr = S_OK; @@ -1012,7 +1012,7 @@ namespace BINDER_SPACE { LPCTSTR szAssemblyPath = const_cast(assemblyPath.GetUnicode()); - hr = BinderAcquirePEImage(szAssemblyPath, &pPEImage, bundleFileLocation); + hr = BinderAcquirePEImage(szAssemblyPath, &pPEImage, probeExtensionResult); IF_FAIL_GO(hr); } diff --git a/src/coreclr/binder/inc/assembly.hpp b/src/coreclr/binder/inc/assembly.hpp index 650877cafd88..ebaf045207fe 100644 --- a/src/coreclr/binder/inc/assembly.hpp +++ b/src/coreclr/binder/inc/assembly.hpp @@ -25,7 +25,6 @@ #include "customassemblybinder.h" #endif // !defined(DACCESS_COMPILE) -#include "bundle.h" #include namespace BINDER_SPACE diff --git a/src/coreclr/binder/inc/assemblybindercommon.hpp b/src/coreclr/binder/inc/assemblybindercommon.hpp index cdc091b98024..184d2cd3b8fb 100644 --- a/src/coreclr/binder/inc/assemblybindercommon.hpp +++ b/src/coreclr/binder/inc/assemblybindercommon.hpp @@ -16,7 +16,7 @@ #include "bindertypes.hpp" #include "bindresult.hpp" -#include "bundle.h" +#include class AssemblyBinder; class DefaultAssemblyBinder; @@ -28,7 +28,7 @@ namespace BINDER_SPACE class AssemblyBinderCommon { public: - static HRESULT BindAssembly(/* in */ AssemblyBinder *pBinder, + static HRESULT BindAssembly(/* in */ AssemblyBinder *pBinder, /* in */ AssemblyName *pAssemblyName, /* in */ bool excludeAppPaths, /* out */ Assembly **ppAssembly); @@ -44,7 +44,7 @@ namespace BINDER_SPACE static HRESULT GetAssembly(/* in */ SString &assemblyPath, /* in */ BOOL fIsInTPA, /* out */ Assembly **ppAssembly, - /* in */ BundleFileLocation bundleFileLocation = BundleFileLocation::Invalid()); + /* in */ ProbeExtensionResult probeExtensionResult = ProbeExtensionResult::Invalid()); #if !defined(DACCESS_COMPILE) static HRESULT BindUsingHostAssemblyResolver (/* in */ INT_PTR pManagedAssemblyLoadContextToBindWithin, diff --git a/src/coreclr/binder/utils.cpp b/src/coreclr/binder/utils.cpp index f1b916ec9dc1..f071aea3c487 100644 --- a/src/coreclr/binder/utils.cpp +++ b/src/coreclr/binder/utils.cpp @@ -18,6 +18,9 @@ #include "clr/fs/path.h" using namespace clr::fs; +// Forward declaration - see src/vm/util.cpp +BOOL RuntimeFileNotFound(HRESULT hr); + namespace BINDER_SPACE { namespace diff --git a/src/coreclr/build-runtime.cmd b/src/coreclr/build-runtime.cmd index 17ec84791abf..acce555f2401 100644 --- a/src/coreclr/build-runtime.cmd +++ b/src/coreclr/build-runtime.cmd @@ -47,6 +47,7 @@ set __TargetArchArm=0 set __TargetArchArm64=0 set __TargetArchLoongArch64=0 set __TargetArchRiscV64=0 +set __TargetArchWasm=0 set __BuildTypeDebug=0 set __BuildTypeChecked=0 @@ -67,12 +68,14 @@ set __UnprocessedBuildArgs= set __BuildNative=1 set __RestoreOptData=1 +set __CrossTarget=0 +set __HostOS= set __HostArch= set __PgoOptDataPath= set __CMakeArgs= set __Ninja=1 set __RequestedBuildComponents= -set __OutputRid= +set __TargetRid= set __SubDir= :Arg_Loop @@ -135,9 +138,11 @@ if [!__PassThroughArgs!]==[] ( set "__PassThroughArgs=%__PassThroughArgs% %1" ) +if /i "%1" == "-hostos" (set __HostOS=%2&shift&shift&goto Arg_Loop) if /i "%1" == "-hostarch" (set __HostArch=%2&shift&shift&goto Arg_Loop) if /i "%1" == "-os" (set __TargetOS=%2&shift&shift&goto Arg_Loop) -if /i "%1" == "-outputrid" (set __OutputRid=%2&shift&shift&goto Arg_Loop) +if /i "%1" == "-targetrid" (set __TargetRid=%2&shift&shift&goto Arg_Loop) +if /i "%1" == "-outputrid" (set __TargetRid=%2&shift&shift&goto Arg_Loop) if /i "%1" == "-subdir" (set __SubDir=%2&shift&shift&goto Arg_Loop) if /i "%1" == "-cmakeargs" (set __CMakeArgs=%2 %__CMakeArgs%&set __remainingArgs="!__remainingArgs:*%2=!"&shift&shift&goto Arg_Loop) @@ -152,6 +157,7 @@ if /i "%1" == "-enforcepgo" (set __EnforcePgo=1&shift&goto Arg_Loop) if /i "%1" == "-pgodatapath" (set __PgoOptDataPath=%~2&set __PgoOptimize=1&shift&shift&goto Arg_Loop) if /i "%1" == "-component" (set __RequestedBuildComponents=%__RequestedBuildComponents%-%2&set "__remainingArgs=!__remainingArgs:*%2=!"&shift&shift&goto Arg_Loop) if /i "%1" == "-fsanitize" (set __CMakeArgs=%__CMakeArgs% "-DCLR_CMAKE_ENABLE_SANITIZERS=%2"&shift&shift&goto Arg_Loop) +if /i "%1" == "-keepnativesymbols" (set __CMakeArgs=%__CMakeArgs% "-DCLR_CMAKE_KEEP_NATIVE_SYMBOLS=true"&shift&goto Arg_Loop) REM TODO these are deprecated remove them eventually REM don't add more, use the - syntax instead @@ -285,7 +291,18 @@ REM ============================================================================ @if defined _echo @echo on -call "%__RepoRootDir%\eng\native\version\copy_version_files.cmd" +if "%__TargetOS%"=="android" ( + set __CrossTarget=1 +) +if "%__TargetOS%"=="browser" ( + set __CrossTarget=1 +) + +if %__CrossTarget% EQU 0 ( + call "%__RepoRootDir%\eng\native\version\copy_version_files.cmd" +) else ( + call powershell -NoProfile -ExecutionPolicy ByPass -File "%__RepoRootDir%\eng\native\version\copy_version_files.ps1" +) REM ========================================================================================= REM === @@ -374,7 +391,7 @@ REM ============================================================================ :: When the host runs on an unknown rid, it falls back to the output rid :: Strip the architecture -for /f "delims=-" %%i in ("%__OutputRid%") do set __HostFallbackOS=%%i +for /f "delims=-" %%i in ("%__TargetRid%") do set __HostFallbackOS=%%i :: The "win" host build is Windows 10 compatible if "%__HostFallbackOS%" == "win" (set __HostFallbackOS=win10) :: Default to "win10" fallback @@ -423,9 +440,23 @@ if %__BuildNative% EQU 1 ( set __CMakeTargetOS="%__TargetOS%" ) +<<<<<<< HEAD set __ExtraCmakeArgs=!__ExtraCmakeArgs! "-DCLR_CMAKE_TARGET_ARCH=%__TargetArch%" "-DCLR_CMAKE_TARGET_OS=!__CMakeTargetOS!" "-DCLI_CMAKE_FALLBACK_OS=%__HostFallbackOS%" "-DCLR_CMAKE_PGO_INSTRUMENT=%__PgoInstrument%" "-DCLR_CMAKE_OPTDATA_PATH=%__PgoOptDataPath%" "-DCLR_CMAKE_PGO_OPTIMIZE=%__PgoOptimize%" %__CMakeArgs% echo Calling "%__RepoRootDir%\eng\native\gen-buildsys.cmd" "%__ProjectDir%" "%__IntermediatesDir%" %__VSVersion% %__HostArch% %__TargetOS% !__ExtraCmakeArgs! call "%__RepoRootDir%\eng\native\gen-buildsys.cmd" "%__ProjectDir%" "%__IntermediatesDir%" %__VSVersion% %__HostArch% %__TargetOS% !__ExtraCmakeArgs! +======= + set __ExtraCmakeArgs=!__ExtraCmakeArgs! "-DCLR_CMAKE_TARGET_ARCH=%__TargetArch%" "-DCLR_CMAKE_TARGET_OS=%__TargetOS%" + set __ExtraCmakeArgs=!__ExtraCmakeArgs! "-DCLI_CMAKE_FALLBACK_OS=%__HostFallbackOS%" "-DCLR_CMAKE_PGO_INSTRUMENT=%__PgoInstrument%" "-DCLR_CMAKE_OPTDATA_PATH=%__PgoOptDataPath%" "-DCLR_CMAKE_PGO_OPTIMIZE=%__PgoOptimize%" + + if "%__HostOS%" == "" ( + set "__HostOS=!__TargetOS!" + ) + + set __ExtraCmakeArgs=!__ExtraCmakeArgs! %__CMakeArgs% + + echo Calling "%__RepoRootDir%\eng\native\gen-buildsys.cmd" "%__ProjectDir%" "%__IntermediatesDir%" %VisualStudioVersion% %__HostArch% !__HostOS! !__ExtraCmakeArgs! + call "%__RepoRootDir%\eng\native\gen-buildsys.cmd" "%__ProjectDir%" "%__IntermediatesDir%" %VisualStudioVersion% %__HostArch% !__HostOS! !__ExtraCmakeArgs! +>>>>>>> upstream-jun if not !errorlevel! == 0 ( echo %__ErrMsgPrefix%%__MsgPrefix%Error: failed to generate native component build project! goto ExitWithError @@ -509,11 +540,11 @@ REM ============================================================================ set __TargetArchList= -set /A __TotalSpecifiedTargetArch=__TargetArchX64 + __TargetArchX86 + __TargetArchArm + __TargetArchArm64 + __TargetArchLoongArch64 + __TargetArchRiscV64 +set /A __TotalSpecifiedTargetArch=__TargetArchX64 + __TargetArchX86 + __TargetArchArm + __TargetArchArm64 + __TargetArchLoongArch64 + __TargetArchRiscV64 + __TargetArchWasm if %__TotalSpecifiedTargetArch% EQU 0 ( REM Nothing specified means we want to build all architectures. set __TargetArchList=x64 x86 arm arm64 - + if %__BuildAllJitsCommunity%==1 ( set __TargetArchList=%__TargetArchList% loongarch64 riscv64 ) @@ -527,6 +558,7 @@ if %__TargetArchArm%==1 set __TargetArchList=%__TargetArchList% arm if %__TargetArchArm64%==1 set __TargetArchList=%__TargetArchList% arm64 if %__TargetArchLoongArch64%==1 set __TargetArchList=%__TargetArchList% loongarch64 if %__TargetArchRiscV64%==1 set __TargetArchList=%__TargetArchList% riscv64 +if %__TargetArchWasm%==1 set __TargetArchList=%__TargetArchList% wasm set __BuildTypeList= diff --git a/src/coreclr/classlibnative/bcltype/objectnative.cpp b/src/coreclr/classlibnative/bcltype/objectnative.cpp index 56ed9d3ee845..c9087be0db97 100644 --- a/src/coreclr/classlibnative/bcltype/objectnative.cpp +++ b/src/coreclr/classlibnative/bcltype/objectnative.cpp @@ -45,24 +45,7 @@ FCIMPL1(INT32, ObjectNative::TryGetHashCode, Object* obj) return 0; OBJECTREF objRef = ObjectToOBJECTREF(obj); - DWORD bits = objRef->GetHeader()->GetBits(); - if (bits & BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX) - { - if (bits & BIT_SBLK_IS_HASHCODE) - { - // Common case: the object already has a hash code - return bits & MASK_HASHCODE; - } - else - { - // We have a sync block index. This means if we already have a hash code, - // it is in the sync block, otherwise we will return 0, which means "not set". - SyncBlock *psb = objRef->PassiveGetSyncBlock(); - if (psb != NULL) - return psb->GetHashCode(); - } - } - return 0; + return objRef->TryGetHashCode(); } FCIMPLEND @@ -83,8 +66,6 @@ FCIMPL2(FC_BOOL_RET, ObjectNative::ContentEquals, Object *pThisRef, Object *pCom pCompareRef->GetData(), pThisMT->GetNumInstanceFieldBytes()) == 0; - FC_GC_POLL_RET(); - FC_RETURN_BOOL(ret); } FCIMPLEND @@ -199,3 +180,117 @@ extern "C" INT64 QCALLTYPE Monitor_GetLockContentionCount() END_QCALL; return result; } + +//======================================================================== +// +// MONITOR HELPERS +// +//======================================================================== + +/*********************************************************************/ +extern "C" void QCALLTYPE Monitor_Enter_Slowpath(QCall::ObjectHandleOnStack objHandle) +{ + QCALL_CONTRACT; + + BEGIN_QCALL; + + GCX_COOP(); + + objHandle.Get()->EnterObjMonitor(); + END_QCALL; +} + +/*********************************************************************/ +#include + +FCIMPL1(FC_BOOL_RET, ObjectNative::Monitor_TryEnter_FastPath, Object* obj) +{ + FCALL_CONTRACT; + + FC_RETURN_BOOL(obj->TryEnterObjMonitorSpinHelper()); +} +FCIMPLEND + +FCIMPL2(AwareLock::EnterHelperResult, ObjectNative::Monitor_TryEnter_FastPath_WithTimeout, Object* obj, INT32 timeOut) +{ + FCALL_CONTRACT; + + Thread* pCurThread = GetThread(); + + if (pCurThread->CatchAtSafePoint()) + { + return AwareLock::EnterHelperResult::UseSlowPath; + } + + AwareLock::EnterHelperResult result = obj->EnterObjMonitorHelper(pCurThread); + if (result == AwareLock::EnterHelperResult::Contention) + { + if (timeOut == 0) + { + return AwareLock::EnterHelperResult::Contention; + } + + result = obj->EnterObjMonitorHelperSpin(pCurThread); + } + + return result; +} +FCIMPLEND + +#include + +/*********************************************************************/ +extern "C" INT32 QCALLTYPE Monitor_TryEnter_Slowpath(QCall::ObjectHandleOnStack objHandle, INT32 timeOut) +{ + QCALL_CONTRACT; + + BOOL result = FALSE; + + BEGIN_QCALL; + + GCX_COOP(); + + _ASSERTE(timeOut >= -1); // This should be checked in managed code. + + result = objHandle.Get()->TryEnterObjMonitor(timeOut); + + END_QCALL; + + return result; +} + +/*********************************************************************/ +extern "C" void QCALLTYPE Monitor_Exit_Slowpath(QCall::ObjectHandleOnStack objHandle, AwareLock::LeaveHelperAction exitBehavior) +{ + QCALL_CONTRACT; + + BEGIN_QCALL; + + GCX_COOP(); + + if (exitBehavior != AwareLock::LeaveHelperAction::Signal) + { + if (!objHandle.Get()->LeaveObjMonitor()) + COMPlusThrow(kSynchronizationLockException); + } + else + { + // Signal the event + SyncBlock *psb = objHandle.Get()->PassiveGetSyncBlock(); + if (psb != NULL) + psb->QuickGetMonitor()->Signal(); + } + END_QCALL; +} + +#include +FCIMPL1(AwareLock::LeaveHelperAction, ObjectNative::Monitor_Exit_FastPath, Object* obj) +{ + FCALL_CONTRACT; + + // Handle the simple case without erecting helper frame + return obj->LeaveObjMonitorHelper(GetThread()); +} +FCIMPLEND +#include + diff --git a/src/coreclr/classlibnative/bcltype/objectnative.h b/src/coreclr/classlibnative/bcltype/objectnative.h index 8178ce79a552..9dfb597145ae 100644 --- a/src/coreclr/classlibnative/bcltype/objectnative.h +++ b/src/coreclr/classlibnative/bcltype/objectnative.h @@ -27,6 +27,10 @@ class ObjectNative static FCDECL1(INT32, TryGetHashCode, Object* vThisRef); static FCDECL2(FC_BOOL_RET, ContentEquals, Object *pThisRef, Object *pCompareRef); static FCDECL1(FC_BOOL_RET, IsLockHeld, Object* pThisUNSAFE); + + static FCDECL1(FC_BOOL_RET, Monitor_TryEnter_FastPath, Object* obj); + static FCDECL2(AwareLock::EnterHelperResult, Monitor_TryEnter_FastPath_WithTimeout, Object* obj, INT32 timeout); + static FCDECL1(AwareLock::LeaveHelperAction, Monitor_Exit_FastPath, Object* obj); }; extern "C" INT32 QCALLTYPE ObjectNative_GetHashCodeSlow(QCall::ObjectHandleOnStack objHandle); @@ -35,5 +39,9 @@ extern "C" BOOL QCALLTYPE Monitor_Wait(QCall::ObjectHandleOnStack pThis, INT32 T extern "C" void QCALLTYPE Monitor_Pulse(QCall::ObjectHandleOnStack pThis); extern "C" void QCALLTYPE Monitor_PulseAll(QCall::ObjectHandleOnStack pThis); extern "C" INT64 QCALLTYPE Monitor_GetLockContentionCount(); +extern "C" void QCALLTYPE Monitor_Enter_Slowpath(QCall::ObjectHandleOnStack objHandle); +extern "C" void QCALLTYPE Monitor_Exit_Slowpath(QCall::ObjectHandleOnStack objHandle, AwareLock::LeaveHelperAction exitBehavior); +extern "C" INT32 QCALLTYPE Monitor_TryEnter_Slowpath(QCall::ObjectHandleOnStack objHandle, INT32 timeOut); + #endif // _OBJECTNATIVE_H_ diff --git a/src/coreclr/classlibnative/bcltype/system.cpp b/src/coreclr/classlibnative/bcltype/system.cpp index 30845baa04b0..30b215f6c9d3 100644 --- a/src/coreclr/classlibnative/bcltype/system.cpp +++ b/src/coreclr/classlibnative/bcltype/system.cpp @@ -32,24 +32,6 @@ #include - -FCIMPL0(UINT32, SystemNative::GetTickCount) -{ - FCALL_CONTRACT; - - return ::GetTickCount(); -} -FCIMPLEND; - -FCIMPL0(UINT64, SystemNative::GetTickCount64) -{ - FCALL_CONTRACT; - - return ::GetTickCount64(); -} -FCIMPLEND; - - extern "C" VOID QCALLTYPE Environment_Exit(INT32 exitcode) { QCALL_CONTRACT; diff --git a/src/coreclr/classlibnative/bcltype/system.h b/src/coreclr/classlibnative/bcltype/system.h index 89d25005364d..11b4939cc715 100644 --- a/src/coreclr/classlibnative/bcltype/system.h +++ b/src/coreclr/classlibnative/bcltype/system.h @@ -37,9 +37,6 @@ class SystemNative public: // Functions on the System.Environment class - static FCDECL0(UINT32, GetTickCount); - static FCDECL0(UINT64, GetTickCount64); - static FCDECL1(VOID,SetExitCode,INT32 exitcode); static FCDECL0(INT32, GetExitCode); diff --git a/src/coreclr/classlibnative/float/CMakeLists.txt b/src/coreclr/classlibnative/float/CMakeLists.txt index 1dbe160248f2..c6153d5d34c0 100644 --- a/src/coreclr/classlibnative/float/CMakeLists.txt +++ b/src/coreclr/classlibnative/float/CMakeLists.txt @@ -3,6 +3,7 @@ include_directories("../inc") set(FLOAT_SOURCES floatdouble.cpp floatsingle.cpp + divmodint.cpp ) add_library_clr(comfloat_wks OBJECT ${FLOAT_SOURCES}) diff --git a/src/coreclr/classlibnative/float/divmodint.cpp b/src/coreclr/classlibnative/float/divmodint.cpp new file mode 100644 index 000000000000..d7b194c5587d --- /dev/null +++ b/src/coreclr/classlibnative/float/divmodint.cpp @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifdef TARGET_32BIT + +#include + +#include "divmodint.h" + +#include + +FCIMPL2(int32_t, DivModInt::DivInt32, int32_t dividend, int32_t divisor) + FCALL_CONTRACT; + + return dividend / divisor; +FCIMPLEND + +FCIMPL2(uint32_t, DivModInt::DivUInt32, uint32_t dividend, uint32_t divisor) + FCALL_CONTRACT; + + return dividend / divisor; +FCIMPLEND + +FCIMPL2_VV(int64_t, DivModInt::DivInt64, int64_t dividend, int64_t divisor) + FCALL_CONTRACT; + + return dividend / divisor; +FCIMPLEND + +FCIMPL2_VV(uint64_t, DivModInt::DivUInt64, uint64_t dividend, uint64_t divisor) + FCALL_CONTRACT; + + return dividend / divisor; +FCIMPLEND + +FCIMPL2(int32_t, DivModInt::ModInt32, int32_t dividend, int32_t divisor) + FCALL_CONTRACT; + + return dividend % divisor; +FCIMPLEND + +FCIMPL2(uint32_t, DivModInt::ModUInt32, uint32_t dividend, uint32_t divisor) + FCALL_CONTRACT; + + return dividend % divisor; +FCIMPLEND + +FCIMPL2_VV(int64_t, DivModInt::ModInt64, int64_t dividend, int64_t divisor) + FCALL_CONTRACT; + + return dividend % divisor; +FCIMPLEND + +FCIMPL2_VV(uint64_t, DivModInt::ModUInt64, uint64_t dividend, uint64_t divisor) + FCALL_CONTRACT; + + return dividend % divisor; +FCIMPLEND + +#endif // TARGET_32BIT diff --git a/src/coreclr/classlibnative/inc/divmodint.h b/src/coreclr/classlibnative/inc/divmodint.h new file mode 100644 index 000000000000..a6aefdbdb78a --- /dev/null +++ b/src/coreclr/classlibnative/inc/divmodint.h @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef HAVE_DIVMODINT_H +#define HAVE_DIVMODINT_H + +#include +#include + +class DivModInt { +public: + FCDECL2(static int32_t, DivInt32, int32_t dividend, int32_t divisor); + FCDECL2(static uint32_t, DivUInt32, uint32_t dividend, uint32_t divisor); + FCDECL2_VV(static int64_t, DivInt64, int64_t dividend, int64_t divisor); + FCDECL2_VV(static uint64_t, DivUInt64, uint64_t dividend, uint64_t divisor); + FCDECL2(static int32_t, ModInt32, int32_t dividend, int32_t divisor); + FCDECL2(static uint32_t, ModUInt32, uint32_t dividend, uint32_t divisor); + FCDECL2_VV(static int64_t, ModInt64, int64_t dividend, int64_t divisor); + FCDECL2_VV(static uint64_t, ModUInt64, uint64_t dividend, uint64_t divisor); +}; + +#endif // HAVE_DIVMODINT_H diff --git a/src/coreclr/clr.featuredefines.props b/src/coreclr/clr.featuredefines.props index 39529e099b08..1e4050c8d56e 100644 --- a/src/coreclr/clr.featuredefines.props +++ b/src/coreclr/clr.featuredefines.props @@ -1,13 +1,13 @@ true - true true + true true - true + true true @@ -23,8 +23,8 @@ true - - true + + true @@ -37,6 +37,7 @@ $(DefineConstants);FEATURE_EVENTSOURCE_XPLAT $(DefineConstants);FEATURE_TYPEEQUIVALENCE $(DefineConstants);FEATURE_EH_FUNCLETS + $(DefineConstants);FEATURE_INTERPRETER $(DefineConstants);PROFILING_SUPPORTED diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index c8ab1d6a538f..86d0cb4de646 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -1,35 +1,26 @@ include(${CMAKE_CURRENT_LIST_DIR}/clrfeatures.cmake) -add_compile_definitions($<$>:DACCESS_COMPILE>) +if(FEATURE_JIT) + add_compile_definitions(FEATURE_JIT) +endif(FEATURE_JIT) -if (CLR_CMAKE_TARGET_ARCH_ARM64) - if (CLR_CMAKE_TARGET_UNIX) - add_definitions(-DFEATURE_EMULATE_SINGLESTEP) - endif() - add_compile_definitions($<$>>:FEATURE_MULTIREG_RETURN>) -elseif (CLR_CMAKE_TARGET_ARCH_ARM) - if (CLR_CMAKE_HOST_WIN32 AND NOT DEFINED CLR_CROSS_COMPONENTS_BUILD) - # Set this to ensure we can use Arm SDK for Desktop binary linkage when doing native (Arm32) build - add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) - add_definitions(-D_ARM_WORKAROUND_) - endif (CLR_CMAKE_HOST_WIN32 AND NOT DEFINED CLR_CROSS_COMPONENTS_BUILD) - add_definitions(-DFEATURE_EMULATE_SINGLESTEP) -elseif (CLR_CMAKE_TARGET_ARCH_RISCV64) - add_definitions(-DFEATURE_EMULATE_SINGLESTEP) - add_compile_definitions($<$>>:FEATURE_MULTIREG_RETURN>) -endif (CLR_CMAKE_TARGET_ARCH_ARM64) +add_compile_definitions($<$>:DACCESS_COMPILE>) if (CLR_CMAKE_TARGET_UNIX) if (CLR_CMAKE_TARGET_ARCH_AMD64) add_compile_definitions($<$>>:UNIX_AMD64_ABI>) - add_compile_definitions($<$>>:FEATURE_MULTIREG_RETURN>) + elseif (CLR_CMAKE_TARGET_ARCH_ARM64) + add_compile_definitions(FEATURE_EMULATE_SINGLESTEP) elseif (CLR_CMAKE_TARGET_ARCH_ARM) add_compile_definitions($<$>>:UNIX_ARM_ABI>) + add_compile_definitions(FEATURE_EMULATE_SINGLESTEP) elseif (CLR_CMAKE_TARGET_ARCH_I386) add_compile_definitions($<$>>:UNIX_X86_ABI>) elseif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) - add_definitions(-DFEATURE_EMULATE_SINGLESTEP) + add_compile_definitions(FEATURE_EMULATE_SINGLESTEP) + elseif (CLR_CMAKE_TARGET_ARCH_RISCV64) + add_compile_definitions(FEATURE_EMULATE_SINGLESTEP) endif() endif(CLR_CMAKE_TARGET_UNIX) @@ -58,7 +49,7 @@ if(CLR_CMAKE_HOST_WIN32) add_compile_definitions(NOMINMAX) endif(CLR_CMAKE_HOST_WIN32) -if (NOT (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX)) +if (NOT ((CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX) OR CLR_CMAKE_TARGET_ARCH_WASM)) add_compile_definitions(FEATURE_METADATA_UPDATER) endif() if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_WIN32)) @@ -115,13 +106,13 @@ endif(FEATURE_GDBJIT_LANGID_CS) if(FEATURE_GDBJIT_SYMTAB) add_definitions(-DFEATURE_GDBJIT_SYMTAB) endif(FEATURE_GDBJIT_SYMTAB) -if(CLR_CMAKE_TARGET_LINUX) +if(FEATURE_EVENTSOURCE_XPLAT) add_definitions(-DFEATURE_EVENTSOURCE_XPLAT) -endif(CLR_CMAKE_TARGET_LINUX) +endif(FEATURE_EVENTSOURCE_XPLAT) # NetBSD doesn't implement this feature -if(NOT CLR_CMAKE_TARGET_NETBSD) +if(NOT CLR_CMAKE_TARGET_NETBSD AND NOT CLR_CMAKE_TARGET_ARCH_WASM) add_definitions(-DFEATURE_HIJACK) -endif(NOT CLR_CMAKE_TARGET_NETBSD) +endif(NOT CLR_CMAKE_TARGET_NETBSD AND NOT CLR_CMAKE_TARGET_ARCH_WASM) if (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM64)) add_definitions(-DFEATURE_INTEROP_DEBUGGING) endif (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM64)) @@ -132,9 +123,9 @@ if (CLR_CMAKE_TARGET_WIN32) add_definitions(-DFEATURE_ISYM_READER) endif(CLR_CMAKE_TARGET_WIN32) -if(FEATURE_MERGE_JIT_AND_ENGINE) - add_compile_definitions($<$>>:FEATURE_MERGE_JIT_AND_ENGINE>) -endif(FEATURE_MERGE_JIT_AND_ENGINE) +if(FEATURE_STATICALLY_LINKED) + add_compile_definitions($<$>>:FEATURE_STATICALLY_LINKED>) +endif(FEATURE_STATICALLY_LINKED) add_compile_definitions(FEATURE_MULTICOREJIT) if(CLR_CMAKE_TARGET_UNIX) add_definitions(-DFEATURE_PAL_ANSI) @@ -161,6 +152,8 @@ if(FEATURE_OBJCMARSHAL) add_compile_definitions(FEATURE_OBJCMARSHAL) endif() +# add_compile_definitions(FEATURE_RUNTIME_ASYNC) + add_compile_definitions($<$>>:FEATURE_PROFAPI_ATTACH_DETACH>) add_definitions(-DFEATURE_READYTORUN) @@ -176,21 +169,20 @@ endif (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_TARGET_UNIX) if (FEATURE_ENABLE_NO_ADDRESS_SPACE_RANDOMIZATION) add_definitions(-DFEATURE_ENABLE_NO_ADDRESS_SPACE_RANDOMIZATION) endif(FEATURE_ENABLE_NO_ADDRESS_SPACE_RANDOMIZATION) -add_definitions(-DFEATURE_SVR_GC) +if (NOT CLR_CMAKE_HOST_ANDROID) + add_definitions(-DFEATURE_SVR_GC) +endif(NOT CLR_CMAKE_HOST_ANDROID) add_definitions(-DFEATURE_SYMDIFF) add_compile_definitions(FEATURE_TIERED_COMPILATION) -if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) - add_compile_definitions(FEATURE_ON_STACK_REPLACEMENT) -endif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) add_compile_definitions(FEATURE_PGO) if (CLR_CMAKE_TARGET_ARCH_AMD64) # Enable the AMD64 Unix struct passing JIT-EE interface for all AMD64 platforms, to enable altjit. add_definitions(-DUNIX_AMD64_ABI_ITF) endif (CLR_CMAKE_TARGET_ARCH_AMD64) add_definitions(-DFEATURE_USE_ASM_GC_WRITE_BARRIERS) -if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) +if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64 OR CLR_CMAKE_TARGET_ARCH_ARM) add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) -endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) +endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64 OR CLR_CMAKE_TARGET_ARCH_ARM) if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES) endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) @@ -205,14 +197,20 @@ if(CLR_CMAKE_TARGET_WIN32) endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) endif(CLR_CMAKE_TARGET_WIN32) -if (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) - add_compile_definitions($<$>>:FEATURE_EH_FUNCLETS>) -endif (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) +add_compile_definitions($<$>>:FEATURE_EH_FUNCLETS>) if (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)) add_definitions(-DFEATURE_SPECIAL_USER_MODE_APC) endif() +if (FEATURE_STUBPRECODE_DYNAMIC_HELPERS) + add_definitions(-DFEATURE_STUBPRECODE_DYNAMIC_HELPERS) +endif() + +if (CLR_CMAKE_TARGET_APPLE) +# Re-enable when dbgshim containing https://github.com/dotnet/diagnostics/pull/5487 is generally available +# add_definitions(-DFEATURE_MAP_THUNKS_FROM_IMAGE) +endif() # Use this function to enable building with a specific target OS and architecture set of defines # This is known to work for the set of defines used by the JIT and gcinfo, it is not likely correct for @@ -228,7 +226,6 @@ function(set_target_definitions_to_custom_os_and_arch) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_UNIX) if (TARGETDETAILS_ARCH STREQUAL "x64") target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE UNIX_AMD64_ABI) - target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MULTIREG_RETURN) elseif ((TARGETDETAILS_ARCH STREQUAL "arm") OR (TARGETDETAILS_ARCH STREQUAL "armel")) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE UNIX_ARM_ABI) elseif (TARGETDETAILS_ARCH STREQUAL "x86") @@ -259,11 +256,9 @@ function(set_target_definitions_to_custom_os_and_arch) elseif(TARGETDETAILS_ARCH STREQUAL "arm64") target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_64BIT) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_ARM64) - target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MULTIREG_RETURN) elseif(TARGETDETAILS_ARCH STREQUAL "loongarch64") target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_64BIT) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_LOONGARCH64) - target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MULTIREG_RETURN) elseif((TARGETDETAILS_ARCH STREQUAL "arm") OR (TARGETDETAILS_ARCH STREQUAL "armel")) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_ARM) elseif(TARGETDETAILS_ARCH STREQUAL "wasm64") diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index ce3d3ef539de..bc6f53e599ff 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -1,3 +1,7 @@ +if (NOT CLR_CMAKE_TARGET_ARCH_WASM) + set(FEATURE_JIT 1) +endif() + if(CLR_CMAKE_TARGET_TIZEN_LINUX) set(FEATURE_GDBJIT_LANGID_CS 1) endif() @@ -7,9 +11,19 @@ if(CLR_CMAKE_TARGET_UNIX_WASM) endif() if(NOT DEFINED FEATURE_EVENT_TRACE) - set(FEATURE_EVENT_TRACE 1) + if (NOT CLR_CMAKE_TARGET_BROWSER) + # To actually disable FEATURE_EVENT_TRACE, also change clr.featuredefines.props + set(FEATURE_EVENT_TRACE 1) + endif() endif(NOT DEFINED FEATURE_EVENT_TRACE) +if(NOT DEFINED FEATURE_EVENTSOURCE_XPLAT) + if (CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_ANDROID) + # To actually disable FEATURE_EVENTSOURCE_XPLAT, also change clr.featuredefines.props + set(FEATURE_EVENTSOURCE_XPLAT 1) + endif() +endif(NOT DEFINED FEATURE_EVENTSOURCE_XPLAT) + if(NOT DEFINED FEATURE_PERFTRACING AND FEATURE_EVENT_TRACE) set(FEATURE_PERFTRACING 1) endif(NOT DEFINED FEATURE_PERFTRACING AND FEATURE_EVENT_TRACE) @@ -50,4 +64,24 @@ endif() if (CLR_CMAKE_TARGET_WIN32) set(FEATURE_TYPEEQUIVALENCE 1) -endif(CLR_CMAKE_TARGET_WIN32) \ No newline at end of file +endif(CLR_CMAKE_TARGET_WIN32) + + +if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS OR CLR_CMAKE_TARGET_ARCH_WASM) + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) + set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 0) +else() + # Enable cached interface dispatch so that we can test/debug it more easily on non-embedded scenarios (set DOTNET_UseCachedInterfaceDispatch=1) + # Only enable in chk/debug builds as this support isn't intended for retail use elsewhere + if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH $,1,0>) + else() + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 0) + endif() + set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) +endif() + +if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_HOST_ARCH_AMD64) + # Allow 16 byte compare-exchange (cmpxchg16b) + add_compile_options($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:-mcx16>) +endif() diff --git a/src/coreclr/components.cmake b/src/coreclr/components.cmake index a3da945d0277..9271a713ebe2 100644 --- a/src/coreclr/components.cmake +++ b/src/coreclr/components.cmake @@ -9,6 +9,7 @@ add_component(iltools) add_component(nativeaot) add_component(spmi) add_component(debug) +add_component(cdac) # Define coreclr_all as the fallback component and make every component depend on this component. # iltools and paltests should be minimal subsets, so don't add a dependency on coreclr_misc diff --git a/src/coreclr/crossgen-corelib.proj b/src/coreclr/crossgen-corelib.proj index 7e93e2fcf9b2..b95536f174a5 100644 --- a/src/coreclr/crossgen-corelib.proj +++ b/src/coreclr/crossgen-corelib.proj @@ -118,7 +118,10 @@ $(CrossGenDllCmd) --targetos:linux $(CrossGenDllCmd) -m:$(MergedMibcPath) --embed-pgo-data $(CrossGenDllCmd) -O + $(CrossGenDllCmd) --verify-type-and-field-layout + + $(CrossGenDllCmd) --enable-cached-interface-dispatch-support $(CrossGenDllCmd) @(CoreLib) diff --git a/src/coreclr/debug/CMakeLists.txt b/src/coreclr/debug/CMakeLists.txt index d0a999f65c14..0d52fa77527e 100644 --- a/src/coreclr/debug/CMakeLists.txt +++ b/src/coreclr/debug/CMakeLists.txt @@ -1,3 +1,7 @@ + +add_compile_definitions($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:FEATURE_CACHED_INTERFACE_DISPATCH>) +add_compile_definitions($<${FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH}:FEATURE_VIRTUAL_STUB_DISPATCH>) + add_subdirectory(daccess) add_subdirectory(ee) add_subdirectory(di) diff --git a/src/coreclr/debug/createdump/config.h.in b/src/coreclr/debug/createdump/config.h.in index 792d8a5988be..ee8701be0cf9 100644 --- a/src/coreclr/debug/createdump/config.h.in +++ b/src/coreclr/debug/createdump/config.h.in @@ -4,5 +4,3 @@ #pragma once #cmakedefine HAVE_PROCESS_VM_READV -#cmakedefine01 HAVE_CLOCK_GETTIME_NSEC_NP -#cmakedefine01 HAVE_CLOCK_MONOTONIC diff --git a/src/coreclr/debug/createdump/configure.cmake b/src/coreclr/debug/createdump/configure.cmake index 4ba6320f4c93..9587b3f75f39 100644 --- a/src/coreclr/debug/createdump/configure.cmake +++ b/src/coreclr/debug/createdump/configure.cmake @@ -1,22 +1,3 @@ check_function_exists(process_vm_readv HAVE_PROCESS_VM_READV) -check_symbol_exists( - clock_gettime_nsec_np - time.h - HAVE_CLOCK_GETTIME_NSEC_NP) - -check_cxx_source_runs(" -#include -#include -#include - -int main() -{ - int ret; - struct timespec ts; - ret = clock_gettime(CLOCK_MONOTONIC, &ts); - - exit(ret); -}" HAVE_CLOCK_MONOTONIC) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) diff --git a/src/coreclr/debug/createdump/createdumpmain.cpp b/src/coreclr/debug/createdump/createdumpmain.cpp index 5f54b07d03df..b3426a31d2aa 100644 --- a/src/coreclr/debug/createdump/createdumpmain.cpp +++ b/src/coreclr/debug/createdump/createdumpmain.cpp @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "createdump.h" +#include "minipal/time.h" #ifdef HOST_WINDOWS #define DEFAULT_DUMP_PATH "%TEMP%\\" @@ -45,8 +46,6 @@ bool g_diagnostics = false; bool g_diagnosticsVerbose = false; uint64_t g_ticksPerMS = 0; uint64_t g_startTime = 0; -uint64_t GetTickFrequency(); -uint64_t GetTimeStamp(); // // Common entry point @@ -198,8 +197,8 @@ int createdump_main(const int argc, const char* argv[]) return -1; } - g_ticksPerMS = GetTickFrequency() / 1000UL; - g_startTime = GetTimeStamp(); + g_ticksPerMS = minipal_hires_tick_frequency() / 1000UL; + g_startTime = minipal_hires_ticks(); TRACE("TickFrequency: %d ticks per ms\n", g_ticksPerMS); ArrayHolder tmpPath = new char[MAX_LONGPATH]; @@ -221,11 +220,11 @@ int createdump_main(const int argc, const char* argv[]) if (CreateDump(options)) { - printf_status("Dump successfully written in %llums\n", GetTimeStamp() - g_startTime); + printf_status("Dump successfully written in %llums\n", (minipal_hires_ticks() - g_startTime) / g_ticksPerMS); } else { - printf_error("Failure took %llums\n", GetTimeStamp() - g_startTime); + printf_error("Failure took %llums\n", (minipal_hires_ticks() - g_startTime) / g_ticksPerMS); exitCode = -1; } @@ -332,24 +331,6 @@ printf_error(const char* format, ...) va_end(args); } -uint64_t -GetTickFrequency() -{ - LARGE_INTEGER ret; - ZeroMemory(&ret, sizeof(LARGE_INTEGER)); - QueryPerformanceFrequency(&ret); - return ret.QuadPart; -} - -uint64_t -GetTimeStamp() -{ - LARGE_INTEGER ret; - ZeroMemory(&ret, sizeof(LARGE_INTEGER)); - QueryPerformanceCounter(&ret); - return ret.QuadPart / g_ticksPerMS; -} - #ifdef HOST_UNIX static void @@ -360,7 +341,7 @@ trace_prefix(const char* format, va_list args) { fprintf(g_stdout, "[createdump] "); } - fprintf(g_stdout, "%08" PRIx64 " ", GetTimeStamp()); + fprintf(g_stdout, "%08" PRIx64 " ", minipal_hires_ticks() / g_ticksPerMS); vfprintf(g_stdout, format, args); fflush(g_stdout); } diff --git a/src/coreclr/debug/createdump/createdumppal.cpp b/src/coreclr/debug/createdump/createdumppal.cpp index 88e57ef7e854..94e4606ea0fb 100644 --- a/src/coreclr/debug/createdump/createdumppal.cpp +++ b/src/coreclr/debug/createdump/createdumppal.cpp @@ -21,7 +21,6 @@ typedef void (*PFN_PAL_TerminateEx)(int); typedef BOOL (*PFN_PAL_VirtualUnwindOutOfProc)( CONTEXT *context, - KNONVOLATILE_CONTEXT_POINTERS *contextPointers, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback); @@ -96,52 +95,6 @@ UninitializePAL( } } -#define tccSecondsToNanoSeconds 1000000000 // 10^9 - -BOOL -PALAPI -QueryPerformanceCounter( - OUT LARGE_INTEGER* lpPerformanceCount) -{ -#if HAVE_CLOCK_GETTIME_NSEC_NP - lpPerformanceCount->QuadPart = (LONGLONG)clock_gettime_nsec_np(CLOCK_UPTIME_RAW); -#elif HAVE_CLOCK_MONOTONIC - struct timespec ts; - int result = clock_gettime(CLOCK_MONOTONIC, &ts); - if (result != 0) - { - return TRUE; - } - else - { - lpPerformanceCount->QuadPart = ((LONGLONG)(ts.tv_sec) * (LONGLONG)(tccSecondsToNanoSeconds)) + (LONGLONG)(ts.tv_nsec); - } -#else - #error "The createdump requires either mach_absolute_time() or clock_gettime(CLOCK_MONOTONIC) to be supported." -#endif - return TRUE; -} - -BOOL -PALAPI -QueryPerformanceFrequency( - OUT LARGE_INTEGER* lpFrequency) -{ -#if HAVE_CLOCK_GETTIME_NSEC_NP - lpFrequency->QuadPart = (LONGLONG)(tccSecondsToNanoSeconds); -#elif HAVE_CLOCK_MONOTONIC - // clock_gettime() returns a result in terms of nanoseconds rather than a count. This - // means that we need to either always scale the result by the actual resolution (to - // get a count) or we need to say the resolution is in terms of nanoseconds. We prefer - // the latter since it allows the highest throughput and should minimize error propagated - // to the user. - lpFrequency->QuadPart = (LONGLONG)(tccSecondsToNanoSeconds); -#else - #error "The createdump requires either mach_absolute_time() or clock_gettime(CLOCK_MONOTONIC) to be supported." -#endif - return TRUE; -} - #define TEMP_DIRECTORY_PATH "/tmp/" DWORD @@ -174,7 +127,6 @@ BOOL PALAPI PAL_VirtualUnwindOutOfProc( CONTEXT *context, - KNONVOLATILE_CONTEXT_POINTERS *contextPointers, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback) @@ -183,7 +135,7 @@ PAL_VirtualUnwindOutOfProc( { return FALSE; } - return g_PAL_VirtualUnwindOutOfProc(context, contextPointers, functionStart, baseAddress, readMemoryCallback); + return g_PAL_VirtualUnwindOutOfProc(context, functionStart, baseAddress, readMemoryCallback); } BOOL diff --git a/src/coreclr/debug/createdump/memoryregion.h b/src/coreclr/debug/createdump/memoryregion.h index 124c9fbcb142..e828984d1058 100644 --- a/src/coreclr/debug/createdump/memoryregion.h +++ b/src/coreclr/debug/createdump/memoryregion.h @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "specialdiaginfo.h" + #if !defined(PAGE_SIZE) && (defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)) || defined(__riscv) extern long g_pageSize; #define PAGE_SIZE g_pageSize @@ -44,8 +46,15 @@ struct MemoryRegion m_endAddress(end), m_offset(0) { - assert((start & ~PAGE_MASK) == 0); - assert((end & ~PAGE_MASK) == 0); + if (start == SpecialDiagInfoAddress) + { + assert(end == (SpecialDiagInfoAddress + SpecialDiagInfoSize)); + } + else + { + assert((start & ~PAGE_MASK) == 0); + assert((end & ~PAGE_MASK) == 0); + } } // copy with new flags constructor. The file name is not copied. diff --git a/src/coreclr/debug/createdump/specialdiaginfo.h b/src/coreclr/debug/createdump/specialdiaginfo.h index 84f79f00a160..b1fe4796bc8e 100644 --- a/src/coreclr/debug/createdump/specialdiaginfo.h +++ b/src/coreclr/debug/createdump/specialdiaginfo.h @@ -11,6 +11,8 @@ // information like the exception record address for a NativeAOT app crash or the runtime module // base address. The exception record contains the pointer to the JSON formatted crash info. +#pragma once + #define SPECIAL_DIAGINFO_SIGNATURE "DIAGINFOHEADER" #define SPECIAL_DIAGINFO_VERSION 2 diff --git a/src/coreclr/debug/createdump/threadinfo.cpp b/src/coreclr/debug/createdump/threadinfo.cpp index 8eca16d2af1f..75313e422d96 100644 --- a/src/coreclr/debug/createdump/threadinfo.cpp +++ b/src/coreclr/debug/createdump/threadinfo.cpp @@ -104,7 +104,7 @@ ThreadInfo::UnwindNativeFrames(CONTEXT* pContext) // Unwind the native frame adding all the memory accessed to the core dump via the read memory adapter. ULONG64 functionStart; - if (!PAL_VirtualUnwindOutOfProc(pContext, nullptr, &functionStart, baseAddress, ReadMemoryAdapter)) { + if (!PAL_VirtualUnwindOutOfProc(pContext, &functionStart, baseAddress, ReadMemoryAdapter)) { TRACE("Unwind: PAL_VirtualUnwindOutOfProc returned false\n"); break; } diff --git a/src/coreclr/debug/daccess/CMakeLists.txt b/src/coreclr/debug/daccess/CMakeLists.txt index 6c02154de2e4..747a7aae6f23 100644 --- a/src/coreclr/debug/daccess/CMakeLists.txt +++ b/src/coreclr/debug/daccess/CMakeLists.txt @@ -1,6 +1,6 @@ add_definitions(-DFEATURE_NO_HOST) -add_subdirectory(${CLR_SRC_NATIVE_DIR}/managed/cdacreader/cmake ${CLR_ARTIFACTS_OBJ_DIR}/cdacreader) +add_subdirectory(${CLR_SRC_NATIVE_DIR}/managed/cdac/cmake ${CLR_ARTIFACTS_OBJ_DIR}/mscordaccore_universal) include_directories(BEFORE ${VM_DIR}) include_directories(BEFORE ${VM_DIR}/${ARCH_SOURCES_DIR}) @@ -43,7 +43,7 @@ convert_to_absolute_path(DACCESS_SOURCES ${DACCESS_SOURCES}) add_library_clr(daccess ${DACCESS_SOURCES}) set_target_properties(daccess PROPERTIES DAC_COMPONENT TRUE) target_precompile_headers(daccess PRIVATE [["stdafx.h"]]) -target_link_libraries(daccess PRIVATE cdacreader_api) +target_link_libraries(daccess PRIVATE cdac_api) add_dependencies(daccess eventing_headers) diff --git a/src/coreclr/debug/daccess/cdac.cpp b/src/coreclr/debug/daccess/cdac.cpp index cf129318ae96..780b0d2abe29 100644 --- a/src/coreclr/debug/daccess/cdac.cpp +++ b/src/coreclr/debug/daccess/cdac.cpp @@ -7,13 +7,13 @@ #include "dbgutil.h" #include -#define CDAC_LIB_NAME MAKEDLLNAME_W(W("cdacreader")) +#define CDAC_LIB_NAME MAKEDLLNAME_W(W("mscordaccore_universal")) namespace { bool TryLoadCDACLibrary(HMODULE *phCDAC) { - // Load cdacreader from next to current module (DAC binary) + // Load cdac from next to current module (DAC binary) PathString path; if (WszGetModuleFileName((HMODULE)GetCurrentModuleBase(), path) == 0) return false; @@ -25,6 +25,7 @@ namespace iter++; path.Truncate(iter); path.Append(CDAC_LIB_NAME); + *phCDAC = CLRLoadLibrary(path.GetUnicode()); if (*phCDAC == NULL) return false; @@ -41,6 +42,16 @@ namespace return S_OK; } + + int ReadThreadContext(uint32_t threadId, uint32_t contextFlags, uint32_t contextBufferSize, uint8_t* contextBuffer, void* context) + { + ICorDebugDataTarget* target = reinterpret_cast(context); + HRESULT hr = target->GetThreadContext(threadId, contextFlags, contextBufferSize, contextBuffer); + if (FAILED(hr)) + return hr; + + return S_OK; + } } CDAC CDAC::Create(uint64_t descriptorAddr, ICorDebugDataTarget* target, IUnknown* legacyImpl) @@ -53,7 +64,7 @@ CDAC CDAC::Create(uint64_t descriptorAddr, ICorDebugDataTarget* target, IUnknown _ASSERTE(init != nullptr); intptr_t handle; - if (init(descriptorAddr, &ReadFromTargetCallback, target, &handle) != 0) + if (init(descriptorAddr, &ReadFromTargetCallback, &ReadThreadContext, target, &handle) != 0) { ::FreeLibrary(cdacLib); return {}; diff --git a/src/coreclr/debug/daccess/daccess.cpp b/src/coreclr/debug/daccess/daccess.cpp index 8af160fd73d0..14e7cc8371f5 100644 --- a/src/coreclr/debug/daccess/daccess.cpp +++ b/src/coreclr/debug/daccess/daccess.cpp @@ -17,7 +17,6 @@ #include "peimagelayout.inl" #include "datatargetadapter.h" #include "readonlydatatargetfacade.h" -#include "metadataexports.h" #include "excep.h" #include "debugger.h" #include "dwreport.h" @@ -42,14 +41,10 @@ extern TADDR g_ClrModuleBase; // To include definition of IsThrowableThreadAbortException // #include -CRITICAL_SECTION g_dacCritSec; +minipal_mutex g_dacMutex; ClrDataAccess* g_dacImpl; -EXTERN_C -#ifdef TARGET_UNIX -DLLEXPORT // For Win32 PAL LoadLibrary emulation -#endif -BOOL WINAPI DllMain(HANDLE instance, DWORD reason, LPVOID reserved) +EXTERN_C BOOL WINAPI DllMain2(HANDLE instance, DWORD reason, LPVOID reserved) { static bool g_procInitialized = false; @@ -76,7 +71,7 @@ BOOL WINAPI DllMain(HANDLE instance, DWORD reason, LPVOID reserved) return FALSE; } #endif - InitializeCriticalSection(&g_dacCritSec); + minipal_mutex_init(&g_dacMutex); g_procInitialized = true; break; @@ -86,7 +81,7 @@ BOOL WINAPI DllMain(HANDLE instance, DWORD reason, LPVOID reserved) // It's possible for this to be called without ATTACH completing (eg. if it failed) if (g_procInitialized) { - DeleteCriticalSection(&g_dacCritSec); + minipal_mutex_destroy(&g_dacMutex); } g_procInitialized = false; break; @@ -5825,7 +5820,7 @@ ClrDataAccess::RawGetMethodName( { // Try to find matching precode entrypoint Precode* pPrecode = Precode::GetPrecodeFromEntryPoint(alignedAddress, TRUE); - if (pPrecode != NULL) + if (pPrecode != NULL && pPrecode->GetType() != PRECODE_UMENTRY_THUNK) { methodDesc = pPrecode->GetMethodDesc(); if (methodDesc != NULL) diff --git a/src/coreclr/debug/daccess/dacdbiimpl.cpp b/src/coreclr/debug/daccess/dacdbiimpl.cpp index eef3dc127b92..05f2f7f7ca3c 100644 --- a/src/coreclr/debug/daccess/dacdbiimpl.cpp +++ b/src/coreclr/debug/daccess/dacdbiimpl.cpp @@ -1,15 +1,12 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + //***************************************************************************** // File: DacDbiImpl.cpp -// - // // Implement DAC/DBI interface -// //***************************************************************************** - #include "stdafx.h" #include "dacdbiinterface.h" @@ -59,7 +56,7 @@ -// Global allocator for DD. Access is protected under the g_dacCritSec lock. +// Global allocator for DD. Access is protected under the g_dacMutex lock. IDacDbiInterface::IAllocator * g_pAllocator = NULL; //--------------------------------------------------------------------------------------- @@ -362,7 +359,7 @@ interface IMDInternalImport* DacDbiInterfaceImpl::GetMDImport( const ReflectionModule * pReflectionModule, bool fThrowEx) { - // Since this is called from an existing DAC-primitive, we already hold the g_dacCritSec lock. + // Since this is called from an existing DAC-primitive, we already hold the g_dacMutex lock. // The lock conveniently protects our cache. SUPPORTS_DAC; @@ -667,8 +664,7 @@ void DacDbiInterfaceImpl::GetCompilerFlags ( // Get the underlying module - none of this is AppDomain specific Module * pModule = pDomainAssembly->GetAssembly()->GetModule(); - DWORD dwBits = pModule->GetDebuggerInfoBits(); - *pfAllowJITOpts = !CORDisableJITOptimizations(dwBits); + *pfAllowJITOpts = !pModule->AreJITOptimizationsDisabled(); *pfEnableEnC = pModule->IsEditAndContinueEnabled(); @@ -1580,8 +1576,7 @@ void DacDbiInterfaceImpl::ComputeFieldData(PTR_FieldDesc pFD, if (pFD->IsRVA()) { // RVA statics are relative to a base module address - DWORD offset = pFD->GetOffset(); - PTR_VOID addr = pFD->GetModule()->GetRvaField(offset); + PTR_VOID addr = pFD->GetStaticAddressHandle(NULL); if (pCurrentFieldData->OkToGetOrSetStaticAddress()) { pCurrentFieldData->SetStaticAddress(PTR_TO_TADDR(addr)); @@ -3544,7 +3539,9 @@ void DacDbiInterfaceImpl::EnumerateMemRangesForLoaderAllocator(PTR_LoaderAllocat if (pVcsMgr) { if (pVcsMgr->indcell_heap != NULL) heapsToEnumerate.Push(pVcsMgr->indcell_heap); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (pVcsMgr->cache_entry_heap != NULL) heapsToEnumerate.Push(pVcsMgr->cache_entry_heap); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } TADDR rangeAccumAsTaddr = TO_TADDR(rangeAcummulator); @@ -5140,9 +5137,9 @@ void DacDbiInterfaceImpl::Hijack( CORDB_ADDRESS esp = GetSP(&ctx); // - // Find out where the OS exception dispatcher has pushed the EXCEPTION_RECORD and CONTEXT. The ExInfo and - // ExceptionTracker have pointers to these data structures, but when we get the unhandled exception - // notification, the OS exception dispatcher is no longer on the stack, so these pointers are no longer + // Find out where the OS exception dispatcher has pushed the EXCEPTION_RECORD and CONTEXT. The ExInfo + // has pointers to these data structures, but when we get the unhandled exception notification, + // the OS exception dispatcher is no longer on the stack, so these pointers are no longer // valid. We need to either update these pointers in the ExInfo/ExcepionTracker, or reuse the stack // space used by the OS exception dispatcher. We are using the latter approach here. // @@ -5157,8 +5154,8 @@ void DacDbiInterfaceImpl::Hijack( // The managed exception may not be related to the unhandled exception for which we are trying to // hijack. An example would be when a thread hits a managed exception, VS tries to do func eval on // the thread, but the func eval causes an unhandled exception (e.g. AV in mscorwks.dll). In this - // case, the pointers stored on the ExInfo/ExceptionTracker are closer to the root than the current - // SP of the thread. The check below makes sure we don't reuse the pointers in this case. + // case, the pointers stored on the ExInfo are closer to the root than the current SP of the thread. + // The check below makes sure we don't reuse the pointers in this case. if (espOSContext < esp) { SafeWriteStructOrThrow(espOSContext, &ctx); @@ -5640,7 +5637,7 @@ void DacDbiInterfaceImpl::GetContext(VMPTR_Thread vmThread, DT_CONTEXT * pContex // Going through thread Frames and looking for first (deepest one) one that // that has context available for stackwalking (SP and PC) - // For example: RedirectedThreadFrame, InlinedCallFrame, HelperMethodFrame, CLRToCOMMethodFrame + // For example: RedirectedThreadFrame, InlinedCallFrame, DynamicHelperFrame, CLRToCOMMethodFrame Frame *frame = pThread->GetFrame(); while (frame != NULL && frame != FRAME_TOP) { diff --git a/src/coreclr/debug/daccess/dacdbiimpl.h b/src/coreclr/debug/daccess/dacdbiimpl.h index d878b0c0255b..f04504d2ec4d 100644 --- a/src/coreclr/debug/daccess/dacdbiimpl.h +++ b/src/coreclr/debug/daccess/dacdbiimpl.h @@ -1107,7 +1107,7 @@ class DacDbiInterfaceImpl : }; -// Global allocator for DD. Access is protected under the g_dacCritSec lock. +// Global allocator for DD. Access is protected under the g_dacMutex lock. extern "C" IDacDbiInterface::IAllocator * g_pAllocator; @@ -1116,7 +1116,7 @@ class DDHolder public: DDHolder(DacDbiInterfaceImpl* pContainer, bool fAllowReentrant) { - EnterCriticalSection(&g_dacCritSec); + minipal_mutex_enter(&g_dacMutex); // If we're not re-entrant, then assert. if (!fAllowReentrant) @@ -1139,7 +1139,7 @@ class DDHolder g_dacImpl = m_pOldContainer; g_pAllocator = m_pOldAllocator; - LeaveCriticalSection(&g_dacCritSec); + minipal_mutex_leave(&g_dacMutex); } protected: diff --git a/src/coreclr/debug/daccess/dacdbiimplstackwalk.cpp b/src/coreclr/debug/daccess/dacdbiimplstackwalk.cpp index 90158d237ba5..eb5464275d0d 100644 --- a/src/coreclr/debug/daccess/dacdbiimplstackwalk.cpp +++ b/src/coreclr/debug/daccess/dacdbiimplstackwalk.cpp @@ -19,6 +19,14 @@ #include "comcallablewrapper.h" #endif // FEATURE_COMINTEROP +#ifdef FEATURE_INTERPRETER +#include "interpexec.h" +#endif // FEATURE_INTERPRETER + +#ifdef FEATURE_EH_FUNCLETS +#include "exinfo.h" +#endif // FEATURE_EH_FUNCLETS + typedef IDacDbiInterface::StackWalkHandle StackWalkHandle; @@ -262,7 +270,7 @@ BOOL DacDbiInterfaceImpl::UnwindStackWalkFrame(StackWalkHandle pSFIHandle) continue; } #ifdef FEATURE_EH_FUNCLETS - else if (g_isNewExceptionHandlingEnabled && pIter->GetFrameState() == StackFrameIterator::SFITER_FRAMELESS_METHOD) + else if (pIter->GetFrameState() == StackFrameIterator::SFITER_FRAMELESS_METHOD) { // Skip the new exception handling managed code, the debugger clients are not supposed to see them MethodDesc *pMD = pIter->m_crawl.GetFunction(); @@ -462,7 +470,7 @@ ULONG32 DacDbiInterfaceImpl::GetCountOfInternalFrames(VMPTR_Thread vmThread) while (pFrame != FRAME_TOP) { #ifdef FEATURE_EH_FUNCLETS - if (g_isNewExceptionHandlingEnabled && InlinedCallFrame::FrameHasActiveCall(pFrame)) + if (InlinedCallFrame::FrameHasActiveCall(pFrame)) { // Skip new exception handling helpers InlinedCallFrame *pInlinedCallFrame = dac_cast(pFrame); @@ -515,7 +523,7 @@ void DacDbiInterfaceImpl::EnumerateInternalFrames(VMPTR_Thread while (pFrame != FRAME_TOP) { #ifdef FEATURE_EH_FUNCLETS - if (g_isNewExceptionHandlingEnabled && InlinedCallFrame::FrameHasActiveCall(pFrame)) + if (InlinedCallFrame::FrameHasActiveCall(pFrame)) { // Skip new exception handling helpers InlinedCallFrame *pInlinedCallFrame = dac_cast(pFrame); @@ -614,9 +622,9 @@ BOOL DacDbiInterfaceImpl::IsMatchingParentFrame(FramePointer fpToCheck, FramePoi StackFrame sfParent = StackFrame((UINT_PTR)fpParent.GetSPValue()); - // Ask the ExceptionTracker to figure out the answer. + // Ask the ExInfo to figure out the answer. // Don't try to compare the StackFrames/FramePointers ourselves. - return ExceptionTracker::IsUnwoundToTargetParentFrame(sfToCheck, sfParent); + return ExInfo::IsUnwoundToTargetParentFrame(sfToCheck, sfParent); #else // !FEATURE_EH_FUNCLETS return FALSE; @@ -969,12 +977,12 @@ void DacDbiInterfaceImpl::InitParentFrameInfo(CrawlFrame * pCF, if (pCF->IsFunclet()) { DWORD dwParentOffset; - StackFrame sfParent = ExceptionTracker::FindParentStackFrameEx(pCF, &dwParentOffset); + StackFrame sfParent = ExInfo::FindParentStackFrameEx(pCF, &dwParentOffset); // // For funclets, fpParentOrSelf is the FramePointer of the parent. // Don't mess around with this FramePointer. The only thing we can do with it is to pass it back - // to the ExceptionTracker when we are checking if a particular frame is the parent frame. + // to the ExInfo when we are checking if a particular frame is the parent frame. // pJITFuncData->fpParentOrSelf = FramePointer::MakeFramePointer(sfParent.SP); @@ -982,12 +990,12 @@ void DacDbiInterfaceImpl::InitParentFrameInfo(CrawlFrame * pCF, } else { - StackFrame sfSelf = ExceptionTracker::GetStackFrameForParentCheck(pCF); + StackFrame sfSelf = ExInfo::GetStackFrameForParentCheck(pCF); // // For non-funclets, fpParentOrSelf is the FramePointer of the current frame itself. // Don't mess around with this FramePointer. The only thing we can do with it is to pass it back - // to the ExceptionTracker when we are checking if a particular frame is the parent frame. + // to the ExInfo when we are checking if a particular frame is the parent frame. // pJITFuncData->fpParentOrSelf = FramePointer::MakeFramePointer(sfSelf.SP); diff --git a/src/coreclr/debug/daccess/dacfn.cpp b/src/coreclr/debug/daccess/dacfn.cpp index 8077463c8e23..0bc7b8dc00e8 100644 --- a/src/coreclr/debug/daccess/dacfn.cpp +++ b/src/coreclr/debug/daccess/dacfn.cpp @@ -24,14 +24,6 @@ struct DacHostVtPtrs #undef VPTR_CLASS }; - -const WCHAR *g_dacVtStrings[] = -{ -#define VPTR_CLASS(name) W(#name), -#include -#undef VPTR_CLASS -}; - DacHostVtPtrs g_dacHostVtPtrs; HRESULT @@ -212,87 +204,6 @@ DacWriteAll(TADDR addr, PVOID buffer, ULONG32 size, bool throwEx) return S_OK; } -#ifdef TARGET_UNIX - -static BOOL DacReadAllAdapter(PVOID address, PVOID buffer, SIZE_T size) -{ - DAC_INSTANCE* inst = g_dacImpl->m_instances.Find((TADDR)address); - if (inst == nullptr || inst->size < size) - { - inst = g_dacImpl->m_instances.Alloc((TADDR)address, (ULONG32)size, DAC_PAL); - if (inst == nullptr) - { - return FALSE; - } - inst->noReport = 0; - HRESULT hr = DacReadAll((TADDR)address, inst + 1, (ULONG32)size, false); - if (FAILED(hr)) - { - g_dacImpl->m_instances.ReturnAlloc(inst); - return FALSE; - } - if (!g_dacImpl->m_instances.Add(inst)) - { - g_dacImpl->m_instances.ReturnAlloc(inst); - return FALSE; - } - } - memcpy(buffer, inst + 1, size); - return TRUE; -} - -#ifdef HOST_WINDOWS -// For the cross OS dac, we don't have the full pal layer -// Use these minimal prototypes instead of the full pal header -typedef BOOL(*UnwindReadMemoryCallback)(PVOID address, PVOID buffer, SIZE_T size); - -extern -BOOL -PAL_VirtualUnwindOutOfProc(PT_CONTEXT context, PT_KNONVOLATILE_CONTEXT_POINTERS contextPointers, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback); -#endif - -HRESULT -DacVirtualUnwind(ULONG32 threadId, PT_CONTEXT context, PT_KNONVOLATILE_CONTEXT_POINTERS contextPointers) -{ - if (!g_dacImpl) - { - DacError(E_UNEXPECTED); - UNREACHABLE(); - } - - // The DAC code doesn't use these context pointers but zero them out to be safe. - if (contextPointers != NULL) - { - memset(contextPointers, 0, sizeof(T_KNONVOLATILE_CONTEXT_POINTERS)); - } - - HRESULT hr = E_NOINTERFACE; - -#ifdef FEATURE_DATATARGET4 - ReleaseHolder dt; - hr = g_dacImpl->m_pTarget->QueryInterface(IID_ICorDebugDataTarget4, (void **)&dt); - if (SUCCEEDED(hr)) - { - hr = dt->VirtualUnwind(threadId, sizeof(CONTEXT), (BYTE*)context); - } -#endif - - if (hr == E_NOINTERFACE || hr == E_NOTIMPL) - { - hr = S_OK; - - SIZE_T baseAddress = DacGlobalBase(); - if (baseAddress == 0 || !PAL_VirtualUnwindOutOfProc(context, contextPointers, nullptr, baseAddress, DacReadAllAdapter)) - { - hr = E_FAIL; - } - } - - return hr; -} - -#endif // TARGET_UNIX - // DacAllocVirtual - Allocate memory from the target process // Note: this is only available to clients supporting the legacy // ICLRDataTarget2 interface. It's currently used by SOS for notification tables. @@ -367,14 +278,6 @@ DacFreeVirtual(TADDR mem, ULONG32 size, ULONG32 typeFlags, PVOID DacInstantiateTypeByAddressHelper(TADDR addr, ULONG32 size, bool throwEx, bool fReport) { -#ifdef _PREFIX_ - - // Dac accesses are not interesting for PREfix and cause a lot of PREfix noise - // so we just return the unmodified pointer for our PREFIX builds - return (PVOID)addr; - -#else // !_PREFIX_ - if (!g_dacImpl) { DacError(E_UNEXPECTED); @@ -490,8 +393,6 @@ DacInstantiateTypeByAddressHelper(TADDR addr, ULONG32 size, bool throwEx, bool f } return inst + 1; - -#endif // !_PREFIX_ } PVOID DacInstantiateTypeByAddress(TADDR addr, ULONG32 size, bool throwEx) @@ -508,14 +409,6 @@ PVOID DacInstantiateTypeByAddressNoReport(TADDR addr, ULONG32 size, bool throw PVOID DacInstantiateClassByVTable(TADDR addr, ULONG32 minSize, bool throwEx) { -#ifdef _PREFIX_ - - // Dac accesses are not interesting for PREfix and cause a lot of PREfix noise - // so we just return the unmodified pointer for our PREFIX builds - return (PVOID)addr; - -#else // !_PREFIX_ - if (!g_dacImpl) { DacError(E_UNEXPECTED); @@ -660,8 +553,6 @@ DacInstantiateClassByVTable(TADDR addr, ULONG32 minSize, bool throwEx) g_dacImpl->m_instances.Supersede(oldInst); } return inst + 1; - -#endif // !_PREFIX_ } #define LOCAL_STR_BUF 256 @@ -669,14 +560,6 @@ DacInstantiateClassByVTable(TADDR addr, ULONG32 minSize, bool throwEx) PSTR DacInstantiateStringA(TADDR addr, ULONG32 maxChars, bool throwEx) { -#ifdef _PREFIX_ - - // Dac accesses are not interesting for PREfix and cause a lot of PREfix noise - // so we just return the unmodified pointer for our PREFIX builds - return (PSTR)addr; - -#else // !_PREFIX_ - HRESULT status; if (!g_dacImpl) @@ -794,21 +677,11 @@ DacInstantiateStringA(TADDR addr, ULONG32 maxChars, bool throwEx) inst->usage = DAC_STRA; } return retVal; - -#endif // !_PREFIX_ } PWSTR DacInstantiateStringW(TADDR addr, ULONG32 maxChars, bool throwEx) { -#ifdef _PREFIX_ - - // Dac accesses are not interesting for PREfix and cause a lot of PREfix noise - // so we just return the unmodified pointer for our PREFIX builds - return (PWSTR)addr; - -#else // !_PREFIX_ - HRESULT status; if (!g_dacImpl) @@ -926,21 +799,11 @@ DacInstantiateStringW(TADDR addr, ULONG32 maxChars, bool throwEx) inst->usage = DAC_STRW; } return retVal; - -#endif // !_PREFIX_ } TADDR DacGetTargetAddrForHostAddr(LPCVOID ptr, bool throwEx) { -#ifdef _PREFIX_ - - // Dac accesses are not interesting for PREfix and cause a lot of PREfix noise - // so we just return the unmodified pointer for our PREFIX builds - return (TADDR) ptr; - -#else // !_PREFIX_ - // Preserve special pointer values. if (ptr == NULL || ((TADDR) ptr == (TADDR)-1)) { @@ -989,8 +852,6 @@ DacGetTargetAddrForHostAddr(LPCVOID ptr, bool throwEx) return addr; } - -#endif // !_PREFIX_ } // Similar to DacGetTargetAddrForHostAddr above except that ptr can represent any pointer within a host data @@ -1007,14 +868,6 @@ DacGetTargetAddrForHostInteriorAddr(LPCVOID ptr, bool throwEx) // DAC-ized. const DWORD kMaxSearchIterations = 100; -#ifdef _PREFIX_ - - // Dac accesses are not interesting for PREfix and cause a lot of PREfix noise - // so we just return the unmodified pointer for our PREFIX builds - return (TADDR) ptr; - -#else // !_PREFIX_ - // Preserve special pointer values. if (ptr == NULL || ((TADDR) ptr == (TADDR)-1)) { @@ -1131,26 +984,6 @@ DacGetTargetAddrForHostInteriorAddr(LPCVOID ptr, bool throwEx) return addr; } -#endif // !_PREFIX_ -} - -PWSTR DacGetVtNameW(TADDR targetVtable) -{ - PWSTR pszRet = NULL; - - TADDR *targ = &DacGlobalValues()->EEJitManager__vtAddr; - TADDR *targStart = targ; - for (ULONG i = 0; i < sizeof(g_dacHostVtPtrs) / sizeof(PVOID); i++) - { - if (targetVtable == (*targ)) - { - pszRet = (PWSTR) *(g_dacVtStrings + (targ - targStart)); - break; - } - - targ++; - } - return pszRet; } TADDR diff --git a/src/coreclr/debug/daccess/dacimpl.h b/src/coreclr/debug/daccess/dacimpl.h index c8947048ec92..6d1ad0c16958 100644 --- a/src/coreclr/debug/daccess/dacimpl.h +++ b/src/coreclr/debug/daccess/dacimpl.h @@ -13,6 +13,7 @@ #ifndef __DACIMPL_H__ #define __DACIMPL_H__ +#include #include "gcinterface.dac.h" //--------------------------------------------------------------------------------------- // Setting DAC_HASHTABLE tells the DAC to use the hand rolled hashtable for @@ -26,7 +27,7 @@ #include #pragma pop_macro("return") #endif //DAC_HASHTABLE -extern CRITICAL_SECTION g_dacCritSec; +extern minipal_mutex g_dacMutex; // Convert between CLRDATA_ADDRESS and TADDR. // Note that CLRDATA_ADDRESS is sign-extended (for compat with Windbg and OS conventions). Converting @@ -1417,7 +1418,7 @@ class ClrDataAccess ULONG32 m_instanceAge; bool m_debugMode; - // This currently exists on the DAC as a way of managing lifetime of loading/freeing the cdacreader + // This currently exists on the DAC as a way of managing lifetime of loading/freeing the cdac // TODO: [cdac] Remove when cDAC deploys with SOS - https://github.com/dotnet/runtime/issues/108720 CDAC m_cdac; @@ -1504,6 +1505,7 @@ class ClrDataAccess BOOL DACIsComWrappersCCW(CLRDATA_ADDRESS ccwPtr); TADDR DACGetManagedObjectWrapperFromCCW(CLRDATA_ADDRESS ccwPtr); HRESULT DACTryGetComWrappersObjectFromCCW(CLRDATA_ADDRESS ccwPtr, OBJECTREF* objRef); + TADDR GetIdentityForManagedObjectWrapper(TADDR mow); #endif protected: @@ -3420,12 +3422,7 @@ class ClrDataFrame : public IXCLRDataFrame, // //---------------------------------------------------------------------------- -#ifdef FEATURE_EH_FUNCLETS -typedef ExceptionTrackerBase ClrDataExStateType; -#else // FEATURE_EH_FUNCLETS typedef ExInfo ClrDataExStateType; -#endif // FEATURE_EH_FUNCLETS - class ClrDataExceptionState : public IXCLRDataExceptionState { @@ -3813,7 +3810,7 @@ class EnumMethodInstances //---------------------------------------------------------------------------- #define DAC_ENTER() \ - EnterCriticalSection(&g_dacCritSec); \ + minipal_mutex_enter(&g_dacMutex); \ ClrDataAccess* __prevDacImpl = g_dacImpl; \ g_dacImpl = this; @@ -3821,10 +3818,10 @@ class EnumMethodInstances // the process's host instance cache hasn't been flushed // since the child was created. #define DAC_ENTER_SUB(dac) \ - EnterCriticalSection(&g_dacCritSec); \ + minipal_mutex_enter(&g_dacMutex); \ if (dac->m_instanceAge != m_instanceAge) \ { \ - LeaveCriticalSection(&g_dacCritSec); \ + minipal_mutex_leave(&g_dacMutex); \ return E_INVALIDARG; \ } \ ClrDataAccess* __prevDacImpl = g_dacImpl; \ @@ -3832,7 +3829,7 @@ class EnumMethodInstances #define DAC_LEAVE() \ g_dacImpl = __prevDacImpl; \ - LeaveCriticalSection(&g_dacCritSec) + minipal_mutex_leave(&g_dacMutex) #define SOSHelperEnter() \ diff --git a/src/coreclr/debug/daccess/enummem.cpp b/src/coreclr/debug/daccess/enummem.cpp index 7840bb29070c..0f5d418d648b 100644 --- a/src/coreclr/debug/daccess/enummem.cpp +++ b/src/coreclr/debug/daccess/enummem.cpp @@ -318,8 +318,12 @@ HRESULT ClrDataAccess::EnumMemDumpJitManagerInfo(IN CLRDataEnumMemoryFlags flags if (flags == CLRDATA_ENUM_MEM_HEAP2) { - EEJitManager* managerPtr = ExecutionManager::GetEEJitManager(); + EECodeGenManager* managerPtr = ExecutionManager::GetEEJitManager(); managerPtr->EnumMemoryRegions(flags); +#ifdef FEATURE_INTERPRETER + managerPtr = ExecutionManager::GetInterpreterJitManager(); + managerPtr->EnumMemoryRegions(flags); +#endif // FEATURE_INTERPRETER } return status; @@ -557,7 +561,7 @@ HRESULT ClrDataAccess::DumpManagedExcepObject(CLRDataEnumMemoryFlags flags, OBJE // included in the dump. When we touch the header and each element looking for the // MD this happens. StackTraceArray stackTrace; - exceptRef->GetStackTrace(stackTrace); + exceptRef->GetStackTrace(stackTrace, /*outKeepAliveArray*/ NULL, /* pCurrentThread */ NULL); // The stackTraceArrayObj can be either a byte[] with the actual stack trace array or an object[] where the first element is the actual stack trace array. // In case it was the latter, we need to dump the actual stack trace array object here too. @@ -587,8 +591,6 @@ HRESULT ClrDataAccess::DumpManagedExcepObject(CLRDataEnumMemoryFlags flags, OBJE // Pulls in data to translate from token to MethodDesc FindLoadedMethodRefOrDef(pMD->GetMethodTable()->GetModule(), pMD->GetMemberDef()); - // Pulls in sequence points. - DebugInfoManager::EnumMemoryRegionsForMethodDebugInfo(flags, pMD); PCODE addr = pMD->GetNativeCode(); if (addr != (PCODE)NULL) { @@ -966,9 +968,6 @@ HRESULT ClrDataAccess::EnumMemWalkStackHelper(CLRDataEnumMemoryFlags flags, // back to source lines for functions on stacks is very useful and we don't // want to allow the function to fail for all targets. - // Pulls in sequence points and local variable info - DebugInfoManager::EnumMemoryRegionsForMethodDebugInfo(flags, pMethodDesc); - #if defined(FEATURE_EH_FUNCLETS) && defined(USE_GC_INFO_DECODER) if (addr != (PCODE)NULL) diff --git a/src/coreclr/debug/daccess/fntableaccess.h b/src/coreclr/debug/daccess/fntableaccess.h index 722f0581e218..f76056cfba03 100644 --- a/src/coreclr/debug/daccess/fntableaccess.h +++ b/src/coreclr/debug/daccess/fntableaccess.h @@ -30,13 +30,13 @@ struct FakeHeapList size_t maxCodeHeapSize; size_t reserveForJumpStubs; DWORD_PTR pLoaderAllocator; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_64BIT) DWORD_PTR CLRPersonalityRoutine; #endif DWORD_PTR GetModuleBase() { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_64BIT) return CLRPersonalityRoutine; #else return mapBase; @@ -94,8 +94,6 @@ class CheckDuplicatedStructLayouts #else // CHECK_DUPLICATED_STRUCT_LAYOUTS -BOOL WINAPI DllMain(HINSTANCE hDLL, DWORD dwReason, LPVOID pReserved); -//NTSTATUS OutOfProcessFindHeader(HANDLE hProcess, DWORD_PTR pMapIn, DWORD_PTR addr, DWORD_PTR &codeHead); extern "C" NTSTATUS OutOfProcessFunctionTableCallback(IN HANDLE hProcess, IN PVOID TableAddress, OUT PULONG pnEntries, OUT PT_RUNTIME_FUNCTION* ppFunctions); diff --git a/src/coreclr/debug/daccess/inspect.cpp b/src/coreclr/debug/daccess/inspect.cpp index 99667d959189..0c0caec19bbc 100644 --- a/src/coreclr/debug/daccess/inspect.cpp +++ b/src/coreclr/debug/daccess/inspect.cpp @@ -2694,7 +2694,7 @@ ClrDataTypeDefinition::NewFromModule(ClrDataAccess* dac, return E_OUTOFMEMORY; } - PREFIX_ASSUME(typeDef || pubTypeDef); + _ASSERTE(typeDef || pubTypeDef); if (typeDef) { @@ -3808,7 +3808,7 @@ ClrDataTypeInstance::NewFromModule(ClrDataAccess* dac, return E_OUTOFMEMORY; } - PREFIX_ASSUME(typeInst || pubTypeInst); + _ASSERTE(typeInst || pubTypeInst); if (typeInst) { diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp index 9c6669b0810f..4f95b4bd324a 100644 --- a/src/coreclr/debug/daccess/request.cpp +++ b/src/coreclr/debug/daccess/request.cpp @@ -14,6 +14,7 @@ #include "typestring.h" #include #include +#include #ifdef FEATURE_COMINTEROP #include @@ -22,9 +23,6 @@ #ifdef FEATURE_COMWRAPPERS #include #include - -typedef DPTR(InteropLibInterface::ExternalObjectContextBase) PTR_ExternalObjectContext; -typedef DPTR(InteropLib::ABI::ManagedObjectWrapperLayout) PTR_ManagedObjectWrapper; #endif // FEATURE_COMWRAPPERS #ifndef TARGET_UNIX @@ -1783,7 +1781,6 @@ ClrDataAccess::GetModuleData(CLRDATA_ADDRESS addr, struct DacpModuleData *Module ModuleData->dwModuleIndex = 0; // CoreCLR no longer has this concept ModuleData->dwTransientFlags = pModule->m_dwTransientFlags; ModuleData->LoaderAllocator = HOST_CDADDR(pModule->m_loaderAllocator); - ModuleData->ThunkHeap = HOST_CDADDR(pModule->m_pThunkHeap); EX_TRY { @@ -2143,7 +2140,7 @@ ClrDataAccess::GetFrameName(CLRDATA_ADDRESS vtable, unsigned int count, _Inout_u SOSDacEnter(); - PWSTR pszName = DacGetVtNameW(CLRDATA_ADDRESS_TO_TADDR(vtable)); + LPCSTR pszName = Frame::GetFrameTypeName((FrameIdentifier)CLRDATA_ADDRESS_TO_TADDR(vtable)); if (pszName == NULL) { hr = E_INVALIDARG; @@ -2151,11 +2148,19 @@ ClrDataAccess::GetFrameName(CLRDATA_ADDRESS vtable, unsigned int count, _Inout_u else { // Turn from bytes to wide characters - unsigned int len = (unsigned int)u16_strlen(pszName); + unsigned int len = (unsigned int)strlen(pszName); if (frameName) { - wcsncpy_s(frameName, count, pszName, _TRUNCATE); + if (count != 0) + { + unsigned truncatedLength = min(len, count - 1); + for (unsigned i = 0; i < truncatedLength; i++) + { + frameName[i] = pszName[i]; + } + frameName[truncatedLength] = '\0'; + } if (pNeeded) { @@ -2921,7 +2926,7 @@ ClrDataAccess::GetGCDynamicAdaptationMode(int* pDynamicAdaptationMode) { *pDynamicAdaptationMode = -1; hr = S_FALSE; - } + } SOSDacLeave(); return hr; } @@ -3280,11 +3285,7 @@ ClrDataAccess::GetNestedExceptionData(CLRDATA_ADDRESS exception, CLRDATA_ADDRESS SOSDacEnter(); -#ifdef FEATURE_EH_FUNCLETS - ExceptionTrackerBase *pExData = PTR_ExceptionTrackerBase(TO_TADDR(exception)); -#else ExInfo *pExData = PTR_ExInfo(TO_TADDR(exception)); -#endif // FEATURE_EH_FUNCLETS if (!pExData) { @@ -3455,7 +3456,6 @@ ClrDataAccess::TraverseEHInfo(CLRDATA_ADDRESS ip, DUMPEHINFO pFunc, LPVOID token deh.tryEndOffset = EHClause.TryEndPC; deh.handlerStartOffset = EHClause.HandlerStartPC; deh.handlerEndOffset = EHClause.HandlerEndPC; - deh.isDuplicateClause = IsDuplicateClause(&EHClause); if (!(pFunc)(i, EHCount, &deh, token)) { @@ -3547,6 +3547,7 @@ static HRESULT TraverseLoaderHeapBlock(PTR_LoaderHeapBlock firstBlock, VISITHEAP return i < iterationMax ? S_OK : S_FALSE; } + HRESULT ClrDataAccess::TraverseLoaderHeap(CLRDATA_ADDRESS loaderHeapAddr, VISITHEAP pFunc) { @@ -3555,7 +3556,7 @@ ClrDataAccess::TraverseLoaderHeap(CLRDATA_ADDRESS loaderHeapAddr, VISITHEAP pFun SOSDacEnter(); - hr = TraverseLoaderHeapBlock(PTR_LoaderHeap(TO_TADDR(loaderHeapAddr))->m_pFirstBlock, pFunc); + hr = TraverseLoaderHeapBlock(PTR_UnlockedLoaderHeapBase(TO_TADDR(loaderHeapAddr))->m_pFirstBlock, pFunc); SOSDacLeave(); return hr; @@ -3574,7 +3575,7 @@ ClrDataAccess::TraverseLoaderHeap(CLRDATA_ADDRESS loaderHeapAddr, LoaderHeapKind switch (kind) { case LoaderHeapKindNormal: - hr = TraverseLoaderHeapBlock(PTR_LoaderHeap(TO_TADDR(loaderHeapAddr))->m_pFirstBlock, pCallback); + hr = TraverseLoaderHeapBlock(PTR_UnlockedLoaderHeapBase(TO_TADDR(loaderHeapAddr))->m_pFirstBlock, pCallback); break; case LoaderHeapKindExplicitControl: @@ -3613,14 +3614,19 @@ ClrDataAccess::TraverseVirtCallStubHeap(CLRDATA_ADDRESS pAppDomain, VCSHeapType break; case CacheEntryHeap: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + // The existence of the CacheEntryHeap is part of the SOS api surface, but currently + // when FEATURE_VIRTUAL_STUB_DISPATCH is not defined, the CacheEntryHeap is not created + // so its commented out in that situation, but is not considered to be a E_INVALIDARG. pLoaderHeap = pVcsMgr->cache_entry_heap; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH break; default: hr = E_INVALIDARG; } - if (SUCCEEDED(hr)) + if (SUCCEEDED(hr) && (pLoaderHeap != NULL)) { hr = TraverseLoaderHeapBlock(pLoaderHeap->m_pFirstBlock, pFunc); } @@ -3662,8 +3668,13 @@ static const char *LoaderAllocatorLoaderHeapNames[] = "ExecutableHeap", "FixupPrecodeHeap", "NewStubPrecodeHeap", +#if defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS) + "DynamicHelpersStubHeap", +#endif // defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS) "IndcellHeap", +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH "CacheEntryHeap", +#endif // FEATURE_VIRTUAL_STUB_DISPATCH }; @@ -3697,7 +3708,9 @@ HRESULT ClrDataAccess::GetLoaderAllocatorHeaps(CLRDATA_ADDRESS loaderAllocatorAd pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetExecutableHeap()); pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetFixupPrecodeHeap()); pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetNewStubPrecodeHeap()); - +#if defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS) + pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetDynamicHelpersStubHeap()); +#endif // defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS) VirtualCallStubManager *pVcsMgr = pLoaderAllocator->GetVirtualCallStubManager(); if (pVcsMgr == nullptr) { @@ -3707,7 +3720,9 @@ HRESULT ClrDataAccess::GetLoaderAllocatorHeaps(CLRDATA_ADDRESS loaderAllocatorAd else { pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->indcell_heap); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->cache_entry_heap); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } // All of the above are "LoaderHeap" and not the ExplicitControl version. @@ -3902,11 +3917,6 @@ ClrDataAccess::GetJumpThunkTarget(T_CONTEXT *ctx, CLRDATA_ADDRESS *targetIP, CLR #endif // TARGET_AMD64 } - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif STDMETHODIMP ClrDataAccess::Request(IN ULONG32 reqCode, IN ULONG32 inBufferSize, @@ -3953,9 +3963,6 @@ ClrDataAccess::Request(IN ULONG32 reqCode, DAC_LEAVE(); return status; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif void ClrDataAccess::EnumWksGlobalMemoryRegions(CLRDataEnumMemoryFlags flags) @@ -4416,6 +4423,7 @@ ErrExit: return hr; HRESULT ClrDataAccess::DACTryGetComWrappersObjectFromCCW(CLRDATA_ADDRESS ccwPtr, OBJECTREF* objRef) { HRESULT hr = E_FAIL; + MOWHOLDERREF holder = NULL; if (ccwPtr == 0 || objRef == NULL) { @@ -4430,7 +4438,9 @@ HRESULT ClrDataAccess::DACTryGetComWrappersObjectFromCCW(CLRDATA_ADDRESS ccwPtr, goto ErrExit; } - *objRef = ObjectFromHandle(handle); + holder = (MOWHOLDERREF)ObjectFromHandle(handle); + + *objRef = holder->_wrappedObject; return S_OK; @@ -5031,6 +5041,66 @@ HRESULT ClrDataAccess::GetBreakingChangeVersion(int* pVersion) return S_OK; } +#ifdef FEATURE_COMWRAPPERS +namespace +{ + typedef DPTR(InteropLib::ABI::ComInterfaceEntry) PTR_ComInterfaceEntry; + + struct TargetManagedObjectWrapper : public InteropLib::ABI::ManagedObjectWrapperLayout + { + public: + InteropLib::Com::CreateComInterfaceFlagsEx GetFlags() + { + return _flags; + } + + PTR_ComInterfaceEntry GetUserDefined(int32_t* pNumEntries) + { + return dac_cast((TADDR)_userDefined); + } + + TADDR IndexIntoDispatchSection(int32_t index) + { + return (TADDR)InteropLib::ABI::IndexIntoDispatchSection(index, _dispatches); + } + + TADDR GetRuntimeDefinedIUnknown() + { + return (TADDR)InteropLib::ABI::IndexIntoDispatchSection(_userDefinedCount, _dispatches); + } + }; + + typedef DPTR(TargetManagedObjectWrapper) PTR_ManagedObjectWrapper; +} + +TADDR ClrDataAccess::GetIdentityForManagedObjectWrapper(TADDR mow) +{ + PTR_ManagedObjectWrapper pMOW = dac_cast(mow); + // Replicate the logic for _wrapper.As(IID_IUnknown) + if ((pMOW->GetFlags() & InteropLib::Com::CreateComInterfaceFlagsEx::CallerDefinedIUnknown) == InteropLib::Com::CreateComInterfaceFlagsEx::None) + { + // We have the standard IUnknown implementation, so grab it from its known location. + // The index returned from IndexIntoDispatchSection is in the target address space. + return pMOW->GetRuntimeDefinedIUnknown(); + } + + // We need to find the IUnknown interface pointer in the MOW. + int32_t userDefinedCount; + PTR_ComInterfaceEntry pUserDefined = pMOW->GetUserDefined(&userDefinedCount); + for (int32_t i = 0; i < userDefinedCount; i++) + { + if (pUserDefined[i].IID == IID_IUnknown) + { + // We found the IUnknown interface pointer. + // The index returned from IndexIntoDispatchSection is in the target address space. + return pMOW->IndexIntoDispatchSection(i); + } + } + + return (TADDR)NULL; +} +#endif // FEATURE_COMWRAPPERS + HRESULT ClrDataAccess::GetObjectComWrappersData(CLRDATA_ADDRESS objAddr, CLRDATA_ADDRESS *rcw, unsigned int count, CLRDATA_ADDRESS *mowList, unsigned int *pNeeded) { #ifdef FEATURE_COMWRAPPERS @@ -5045,6 +5115,10 @@ HRESULT ClrDataAccess::GetObjectComWrappersData(CLRDATA_ADDRESS objAddr, CLRDATA } SOSDacEnter(); + + // Default to having found no information. + HRESULT hr = S_FALSE; + if (pNeeded != NULL) { *pNeeded = 0; @@ -5055,59 +5129,63 @@ HRESULT ClrDataAccess::GetObjectComWrappersData(CLRDATA_ADDRESS objAddr, CLRDATA *rcw = 0; } - PTR_SyncBlock pSyncBlk = PTR_Object(TO_TADDR(objAddr))->PassiveGetSyncBlock(); - if (pSyncBlk != NULL) + FieldDesc* pRcwTableField = (&g_CoreLib)->GetField(FIELD__COMWRAPPERS__NAITVE_OBJECT_WRAPPER_TABLE); + CONDITIONAL_WEAK_TABLE_REF rcwTable = *(DPTR(CONDITIONAL_WEAK_TABLE_REF))PTR_TO_TADDR(pRcwTableField->GetStaticAddressHandle(pRcwTableField->GetBase())); + if (rcwTable != nullptr) { - PTR_InteropSyncBlockInfo pInfo = pSyncBlk->GetInteropInfoNoCreate(); - if (pInfo != NULL) + NATIVEOBJECTWRAPPERREF pNativeObjectWrapperRef = nullptr; + if (rcwTable->TryGetValue(OBJECTREF(TO_TADDR(objAddr)), &pNativeObjectWrapperRef)) { - if (rcw != NULL) - { - *rcw = TO_TADDR(pInfo->m_externalComObjectContext); - } + // Tag this RCW as a ComWrappers RCW. + *rcw = TO_CDADDR(dac_cast(pNativeObjectWrapperRef)) | 0x1; + hr = S_OK; + } + } - DPTR(NewHolder) mapHolder(PTR_TO_MEMBER_TADDR(InteropSyncBlockInfo, pInfo, m_managedObjectComWrapperMap)); - DPTR(ManagedObjectComWrapperByIdMap *)ppMap(PTR_TO_MEMBER_TADDR(NewHolder, mapHolder, m_value)); - DPTR(ManagedObjectComWrapperByIdMap) pMap(TO_TADDR(*ppMap)); + FieldDesc* pMowTableField = (&g_CoreLib)->GetField(FIELD__COMWRAPPERS__ALL_MANAGED_OBJECT_WRAPPER_TABLE); + CONDITIONAL_WEAK_TABLE_REF mowTable = *(DPTR(CONDITIONAL_WEAK_TABLE_REF))PTR_TO_TADDR(pMowTableField->GetStaticAddressHandle(pRcwTableField->GetBase())); + if (mowTable != nullptr) + { + OBJECTREF pAllManagedObjectWrapperRef = nullptr; + if (mowTable->TryGetValue(OBJECTREF(TO_TADDR(objAddr)), &pAllManagedObjectWrapperRef)) + { + hr = S_OK; - CQuickArrayList comWrappers; - if (pMap != NULL) + // Read the list of MOWs into the provided buffer. + FieldDesc* pListItemsField = (&g_CoreLib)->GetField(FIELD__LISTGENERIC__ITEMS); + PTRARRAYREF pListItems = (PTRARRAYREF)pListItemsField->GetRefValue(pAllManagedObjectWrapperRef); + FieldDesc* pListSizeField = (&g_CoreLib)->GetField(FIELD__LISTGENERIC__SIZE); + int32_t listCount = pListSizeField->GetValue32(pAllManagedObjectWrapperRef); + if (listCount > 0 && pListItems != nullptr) { - ManagedObjectComWrapperByIdMap::Iterator iter = pMap->Begin(); - while (iter != pMap->End()) + // The list is not empty, so we can return the MOWs. + if (pNeeded != NULL) { - comWrappers.Push(TO_CDADDR(iter->Value())); - ++iter; - + *pNeeded = (unsigned int)listCount; } - } - if (pNeeded != NULL) - { - *pNeeded = (unsigned int)comWrappers.Size(); - } - - for (SIZE_T pos = 0; pos < comWrappers.Size(); ++pos) - { - if (pos >= count) + if (count < (unsigned int)listCount) { + // Return S_FALSE if the buffer is too small. hr = S_FALSE; - break; } - mowList[pos] = comWrappers[pos]; + for (unsigned int i = 0; i < count; i++) + { + MOWHOLDERREF pMOWRef = (MOWHOLDERREF)pListItems->GetAt(i); + PTR_ManagedObjectWrapper pMOW = PTR_ManagedObjectWrapper(dac_cast(pMOWRef->_wrapper)); + + // Now that we have the managed object wrapper, we need to figure out the COM identity of it. + TADDR pComIdentity = GetIdentityForManagedObjectWrapper(dac_cast(pMOW)); + + mowList[i] = TO_CDADDR(pComIdentity); + } } } - else - { - hr = S_FALSE; - } - } - else - { - hr = S_FALSE; } + hr = S_FALSE; + SOSDacLeave(); return hr; #else // FEATURE_COMWRAPPERS @@ -5169,7 +5247,7 @@ HRESULT ClrDataAccess::GetComWrappersCCWData(CLRDATA_ADDRESS ccw, CLRDATA_ADDRES if (refCount != NULL) { - *refCount = (int)pMOW->RefCount; + *refCount = (int)pMOW->GetRawRefCount(); } } else @@ -5185,52 +5263,58 @@ HRESULT ClrDataAccess::GetComWrappersCCWData(CLRDATA_ADDRESS ccw, CLRDATA_ADDRES #endif // FEATURE_COMWRAPPERS } -HRESULT ClrDataAccess::IsComWrappersRCW(CLRDATA_ADDRESS rcw, BOOL *isComWrappersRCW) -{ #ifdef FEATURE_COMWRAPPERS - if (rcw == 0) - { - return E_INVALIDARG; - } - - SOSDacEnter(); - - if (isComWrappersRCW != NULL) +namespace +{ + BOOL IsComWrappersRCW(CLRDATA_ADDRESS rcw) { - PTR_ExternalObjectContext pRCW(TO_TADDR(rcw)); - BOOL stillValid = TRUE; - if(pRCW->SyncBlockIndex >= SyncBlockCache::s_pSyncBlockCache->m_SyncTableSize) + if ((rcw & 1) == 0) { - stillValid = FALSE; + // We use the low bit of the RCW address to indicate that it is a ComWrappers RCW. + return FALSE; } - PTR_SyncBlock pSyncBlk = NULL; - if (stillValid) + OBJECTREF nativeObjectWrapper = OBJECTREF(TO_TADDR(rcw & ~1)); + if (nativeObjectWrapper == NULL) { - PTR_SyncTableEntry ste = PTR_SyncTableEntry(dac_cast(g_pSyncTable) + (sizeof(SyncTableEntry) * pRCW->SyncBlockIndex)); - pSyncBlk = ste->m_SyncBlock; - if(pSyncBlk == NULL) - { - stillValid = FALSE; - } + return FALSE; } - PTR_InteropSyncBlockInfo pInfo = NULL; - if (stillValid) + if (nativeObjectWrapper->GetMethodTable() != (&g_CoreLib)->GetClass(CLASS__NATIVE_OBJECT_WRAPPER)) { - pInfo = pSyncBlk->GetInteropInfoNoCreate(); - if(pInfo == NULL) - { - stillValid = FALSE; - } + return FALSE; } - if (stillValid) + return TRUE; + } + + TADDR GetComWrappersRCWIdentity(CLRDATA_ADDRESS rcw) + { + if ((rcw & 1) == 0) { - stillValid = TO_TADDR(pInfo->m_externalComObjectContext) == PTR_HOST_TO_TADDR(pRCW); + // We use the low bit of the RCW address to indicate that it is a ComWrappers RCW. + return (TADDR)NULL; } - *isComWrappersRCW = stillValid; + NATIVEOBJECTWRAPPERREF pNativeObjectWrapper = NATIVEOBJECTWRAPPERREF(TO_TADDR(rcw & ~1)); + return pNativeObjectWrapper->GetExternalComObject(); + } +} +#endif + +HRESULT ClrDataAccess::IsComWrappersRCW(CLRDATA_ADDRESS rcw, BOOL *isComWrappersRCW) +{ +#ifdef FEATURE_COMWRAPPERS + if (rcw == 0) + { + return E_INVALIDARG; + } + + SOSDacEnter(); + + if (isComWrappersRCW != NULL) + { + *isComWrappersRCW = ::IsComWrappersRCW(rcw); hr = *isComWrappersRCW ? S_OK : S_FALSE; } @@ -5251,10 +5335,9 @@ HRESULT ClrDataAccess::GetComWrappersRCWData(CLRDATA_ADDRESS rcw, CLRDATA_ADDRES SOSDacEnter(); - PTR_ExternalObjectContext pEOC(TO_TADDR(rcw)); if (identity != NULL) { - *identity = PTR_CDADDR(pEOC->Identity); + *identity = TO_CDADDR(GetComWrappersRCWIdentity(rcw)); } SOSDacLeave(); diff --git a/src/coreclr/debug/daccess/stack.cpp b/src/coreclr/debug/daccess/stack.cpp index a0dc074ae1f6..a8dcfdb6b8b7 100644 --- a/src/coreclr/debug/daccess/stack.cpp +++ b/src/coreclr/debug/daccess/stack.cpp @@ -10,7 +10,9 @@ //***************************************************************************** #include "stdafx.h" - +#ifdef FEATURE_INTERPRETER +#include "interpexec.h" +#endif // FEATURE_INTERPRETER //---------------------------------------------------------------------------- // // ClrDataStackWalk. diff --git a/src/coreclr/debug/daccess/task.cpp b/src/coreclr/debug/daccess/task.cpp index 65f14dedd189..def99a1e10da 100644 --- a/src/coreclr/debug/daccess/task.cpp +++ b/src/coreclr/debug/daccess/task.cpp @@ -11,20 +11,6 @@ #include "stdafx.h" -// XXX Microsoft - Why aren't these extra MD APIs in a header? -STDAPI GetMDPublicInterfaceFromInternal( - void *pIUnkPublic, // [IN] Given scope. - REFIID riid, // [in] The interface desired. - void **ppIUnkInternal); // [out] Return interface on success. - -STDAPI GetMetaDataPublicInterfaceFromInternal( - void *pv, // [IN] Given interface. - REFIID riid, // [IN] desired interface. - void **ppv) // [OUT] returned interface -{ - return GetMDPublicInterfaceFromInternal(pv, riid, ppv); -} - //---------------------------------------------------------------------------- // // ClrDataTask. @@ -3636,7 +3622,7 @@ ClrDataMethodDefinition::NewFromModule(ClrDataAccess* dac, return E_OUTOFMEMORY; } - PREFIX_ASSUME(methDef || pubMethDef); + _ASSERTE(methDef || pubMethDef); if (methDef) { @@ -4448,7 +4434,7 @@ ClrDataMethodInstance::NewFromModule(ClrDataAccess* dac, return E_OUTOFMEMORY; } - PREFIX_ASSUME(methInst || pubMethInst); + _ASSERTE(methInst || pubMethInst); if (methInst) { @@ -4956,7 +4942,7 @@ ClrDataExceptionState::NewFromThread(ClrDataAccess* dac, return E_OUTOFMEMORY; } - PREFIX_ASSUME(exception || pubException); + _ASSERTE(exception || pubException); if (exception) { diff --git a/src/coreclr/debug/di/cordb.cpp b/src/coreclr/debug/di/cordb.cpp index 9f8a6f49e748..d7d3957ec7db 100644 --- a/src/coreclr/debug/di/cordb.cpp +++ b/src/coreclr/debug/di/cordb.cpp @@ -13,7 +13,6 @@ #include "classfactory.h" #include "corsym.h" #include "contract.h" -#include "metadataexports.h" #if defined(FEATURE_DBGIPC_TRANSPORT_DI) #include "dbgtransportsession.h" #include "dbgtransportmanager.h" @@ -321,19 +320,6 @@ STDAPI DLLEXPORT DllGetClassObjectInternal( // Return code. { pfnCreateObject = Cordb::CreateObjectTelesto; } -#else // !FEATURE_DBGIPC_TRANSPORT_DI - if(rclsid == CLSID_CorDebug_V1) - { - if (0) // if (IsSingleCLR()) - { - // Don't allow creating backwards objects until we ensure that the v2.0 Right-side - // is backwards compat. This may involve using CordbProcess::SupportsVersion to conditionally - // emulate old behavior. - // If emulating V1.0, QIs for V2.0 interfaces should fail. - _ASSERTE(!"Ensure that V2.0 RS is backwards compat"); - pfnCreateObject = Cordb::CreateObjectV1; - } - } #endif // FEATURE_DBGIPC_TRANSPORT_DI if (pfnCreateObject == NULL) diff --git a/src/coreclr/debug/di/hash.cpp b/src/coreclr/debug/di/hash.cpp index 6a70025c095e..911e0742814b 100644 --- a/src/coreclr/debug/di/hash.cpp +++ b/src/coreclr/debug/di/hash.cpp @@ -201,7 +201,7 @@ void CordbHashTableEnum::BuildOrThrow( CordbBase * pOwnerObj, NeuterList * pOwnerList, CordbHashTable *pTable, - const _GUID &id, + const GUID &id, RSInitHolder * pHolder) { CordbHashTableEnum * pEnum = new CordbHashTableEnum(pOwnerObj, pOwnerList, pTable, id); diff --git a/src/coreclr/debug/di/module.cpp b/src/coreclr/debug/di/module.cpp index 02986839e454..61ddfd18fc53 100644 --- a/src/coreclr/debug/di/module.cpp +++ b/src/coreclr/debug/di/module.cpp @@ -10,8 +10,6 @@ #include "stdafx.h" #include "winbase.h" -#include "metadataexports.h" - #include "winbase.h" #include "corpriv.h" #include "corsym.h" @@ -19,30 +17,6 @@ #include "pedecoder.h" #include "stgpool.h" -//--------------------------------------------------------------------------------------- -// Update an existing metadata importer with a buffer -// -// Arguments: -// pUnk - IUnknoown of importer to update. -// pData - local buffer containing new metadata -// cbData - size of buffer in bytes. -// dwReOpenFlags - metadata flags to pass for reopening. -// -// Returns: -// S_OK on success. Else failure. -// -// Notes: -// This will call code:MDReOpenMetaDataWithMemoryEx from the metadata engine. -STDAPI ReOpenMetaDataWithMemoryEx( - void *pUnk, - LPCVOID pData, - ULONG cbData, - DWORD dwReOpenFlags) -{ - HRESULT hr = MDReOpenMetaDataWithMemoryEx(pUnk,pData, cbData, dwReOpenFlags); - return hr; -} - //--------------------------------------------------------------------------------------- // Initialize a new CordbModule around a Module in the target. // @@ -1063,7 +1037,7 @@ void CordbModule::UpdatePublicMetaDataFromRemote(TargetBuffer bufferRemoteMetaDa // Now tell our current IMetaDataImport object to re-initialize by swapping in the new memory block. // This allows us to keep manipulating metadata objects on other threads without crashing. // This will also invalidate an existing associated Internal MetaData. - hr = ReOpenMetaDataWithMemoryEx(m_pIMImport, pLocalMetaDataPtr, dwMetaDataSize, ofTakeOwnership ); + hr = MDReOpenMetaDataWithMemory(m_pIMImport, pLocalMetaDataPtr, dwMetaDataSize, ofTakeOwnership ); IfFailThrow(hr); // Success. MetaData now owns the metadata memory @@ -2443,7 +2417,7 @@ HRESULT CordbModule::CreateReaderForInMemorySymbols(REFIID riid, void** ppObj) { #ifndef TARGET_UNIX // PDB format - use diasymreader.dll with COM activation - InlineSString<_MAX_PATH> ssBuf; + InlineSString ssBuf; IfFailThrow(GetClrModuleDirectory(ssBuf)); IfFailThrow(FakeCoCreateInstanceEx(CLSID_CorSymBinder_SxS, ssBuf.GetUnicode(), diff --git a/src/coreclr/debug/di/process.cpp b/src/coreclr/debug/di/process.cpp index cfd38bfe2d4c..da0c9b841c7e 100644 --- a/src/coreclr/debug/di/process.cpp +++ b/src/coreclr/debug/di/process.cpp @@ -26,7 +26,6 @@ // @dbgtodo shim: process has some private hooks into the shim. #include "shimpriv.h" -#include "metadataexports.h" #include "readonlydatatargetfacade.h" #include "metahost.h" @@ -441,6 +440,12 @@ IMDInternalImport * CordbProcess::LookupMetaDataFromDebuggerForSingleFile( { INTERNAL_DAC_CALLBACK(this); + // If the debugger didn't supply a metadata locator interface, fail + if (m_pMetaDataLocator == nullptr) + { + return nullptr; + } + ULONG32 cchLocalImagePath = MAX_LONGPATH; ULONG32 cchLocalImagePathRequired; NewArrayHolder pwszLocalFilePath = NULL; @@ -1734,7 +1739,7 @@ HRESULT CordbProcess::Init() hr = m_pDACDataTarget->QueryInterface(IID_ICorDebugMetaDataLocator, reinterpret_cast(&m_pMetaDataLocator)); // Get the metadata dispenser. - hr = InternalCreateMetaDataDispenser(IID_IMetaDataDispenserEx, (void **)&m_pMetaDispenser); + hr = CreateMetaDataDispenser(IID_IMetaDataDispenserEx, (void **)&m_pMetaDispenser); // We statically link in the dispenser. We expect it to succeed, except for OOM, which // debugger doesn't yet handle. @@ -4790,10 +4795,6 @@ void CordbProcess::DbgAssertAppDomainDeleted(VMPTR_AppDomain vmAppDomainDeleted) // A V2 shim can provide a proxy calllack that takes these events and queues them and // does the real dispatch to the user to emulate V2 semantics. // -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif void CordbProcess::RawDispatchEvent( DebuggerIPCEvent * pEvent, RSLockHolder * pLockHolder, @@ -4957,7 +4958,7 @@ void CordbProcess::RawDispatchEvent( STRESS_LOG1(LF_CORDB, LL_INFO1000, "[%x] RCET::DRCE: step complete.\n", GetCurrentThreadId()); - PREFIX_ASSUME(pThread != NULL); + _ASSERTE(pThread != NULL); CordbStepper * pStepper = m_steppers.GetBase(LsPtrToCookie(pEvent->StepData.stepperToken)); @@ -5132,7 +5133,7 @@ void CordbProcess::RawDispatchEvent( VmPtrToCookie(pEvent->UnloadModuleData.vmDomainAssembly), VmPtrToCookie(pEvent->vmAppDomain)); - PREFIX_ASSUME (pAppDomain != NULL); + _ASSERTE (pAppDomain != NULL); CordbModule *module = pAppDomain->LookupOrCreateModule(pEvent->UnloadModuleData.vmDomainAssembly); @@ -5659,7 +5660,7 @@ void CordbProcess::RawDispatchEvent( _ASSERTE(NULL != pAppDomain); CordbModule * pModule = pAppDomain->LookupOrCreateModule(pEvent->EnCRemap.vmDomainAssembly); - PREFIX_ASSUME(pModule != NULL); + _ASSERTE(pModule != NULL); CordbFunction * pCurFunction = NULL; CordbFunction * pResumeFunction = NULL; @@ -5715,12 +5716,12 @@ void CordbProcess::RawDispatchEvent( _ASSERTE(NULL != pAppDomain); CordbModule* pModule = pAppDomain->LookupOrCreateModule(pEvent->EnCRemap.vmDomainAssembly); - PREFIX_ASSUME(pModule != NULL); + _ASSERTE(pModule != NULL); // Find the function we're remapping to, which must be the latest version CordbFunction *pRemapFunction= pModule->LookupFunctionLatestVersion(pEvent->EnCRemapComplete.funcMetadataToken); - PREFIX_ASSUME(pRemapFunction != NULL); + _ASSERTE(pRemapFunction != NULL); // Dispatch the FunctionRemapComplete callback RSSmartPtr pRef(pRemapFunction); @@ -5960,9 +5961,6 @@ void CordbProcess::RawDispatchEvent( FinishEventDispatch(); } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //--------------------------------------------------------------------------------------- // Callback for prepopulating threads. @@ -11162,8 +11160,8 @@ void CordbProcess::HandleSetThreadContextNeeded(DWORD dwThreadId) // For the first step of obtaining the thread handle, // we have previously attempted to use ::OpenThread to get a handle to the thread. // However, there are situations where OpenThread can fail with an Access Denied error. - // From https://github.com/dotnet/runtime/issues/107263, the control-c handler in - // Windows causes the process to have higher privileges. + // From https://github.com/dotnet/runtime/issues/107263, the control-c handler in + // Windows causes the process to have higher privileges. // We are now caching the thread handle in the unmanaged thread hash table when the thread is created. UnmanagedThreadTracker * curThread = m_unmanagedThreadHashTable.Lookup(dwThreadId); @@ -11382,7 +11380,7 @@ bool CordbProcess::HandleInPlaceSingleStep(DWORD dwThreadId, PVOID pExceptionAdd { UnmanagedThreadTracker * curThread = m_unmanagedThreadHashTable.Lookup(dwThreadId); _ASSERTE(curThread != NULL); - if (curThread != NULL && + if (curThread != NULL && curThread->GetThreadId() == dwThreadId && curThread->IsInPlaceStepping()) { @@ -11912,7 +11910,7 @@ void CordbWin32EventThread::Win32EventLoop() // Once we detach, we don't need to continue any outstanding event. // So act like we never got the event. fEventAvailable = false; - PREFIX_ASSUME(m_pProcess == NULL); // W32 cleared process pointer + _ASSERTE(m_pProcess == NULL); // W32 cleared process pointer } #ifdef FEATURE_INTEROP_DEBUGGING @@ -11937,7 +11935,7 @@ void CordbWin32EventThread::Win32EventLoop() // But since the CordbProcess is our parent object, we know it won't go away until // it neuters us, so we can safely proceed. // Find the process this event is for. - PREFIX_ASSUME(m_pProcess != NULL); + _ASSERTE(m_pProcess != NULL); _ASSERTE(m_pProcess->m_id == GetProcessId(&event)); // should only get events for our proc g_pRSDebuggingInfo->m_MRUprocess = m_pProcess; @@ -13311,7 +13309,11 @@ void CordbProcess::HandleDebugEventForInteropDebugging(const DEBUG_EVENT * pEven { LOG((LF_CORDB, LL_INFO100000, "W32ET::W32EL: hijack complete will restore context...\n")); DT_CONTEXT tempContext = { 0 }; +#if defined(DT_CONTEXT_EXTENDED_REGISTERS) + tempContext.ContextFlags = DT_CONTEXT_FULL | DT_CONTEXT_EXTENDED_REGISTERS; +#else tempContext.ContextFlags = DT_CONTEXT_FULL; +#endif HRESULT hr = pUnmanagedThread->GetThreadContext(&tempContext); _ASSERTE(SUCCEEDED(hr)); @@ -15591,4 +15593,4 @@ void CordbProcess::HandleDebugEventForInPlaceStepping(const DEBUG_EVENT * pEvent break; } } -#endif // OUT_OF_PROCESS_SETTHREADCONTEXT \ No newline at end of file +#endif // OUT_OF_PROCESS_SETTHREADCONTEXT diff --git a/src/coreclr/debug/di/rsfunction.cpp b/src/coreclr/debug/di/rsfunction.cpp index d9d30eddb072..0dc6cd2b8531 100644 --- a/src/coreclr/debug/di/rsfunction.cpp +++ b/src/coreclr/debug/di/rsfunction.cpp @@ -497,7 +497,7 @@ HRESULT CordbFunction::GetCurrentVersionNumber(ULONG32 *pnCurrentVersion) CordbFunction* curFunc = m_pModule->LookupFunctionLatestVersion(m_MDToken); // will always find at least ourself - PREFIX_ASSUME(curFunc != NULL); + _ASSERTE(curFunc != NULL); *pnCurrentVersion = (ULONG32)(curFunc->m_dwEnCVersionNumber); @@ -868,10 +868,10 @@ HRESULT CordbFunction::InitParentClassOfFunction() _ASSERTE(pProcess != NULL); CordbAssembly *pAssembly = m_pModule->GetCordbAssembly(); - PREFIX_ASSUME(pAssembly != NULL); + _ASSERTE(pAssembly != NULL); CordbModule* pClassModule = pAssembly->GetAppDomain()->LookupOrCreateModule(vmDomainAssembly); - PREFIX_ASSUME(pClassModule != NULL); + _ASSERTE(pClassModule != NULL); CordbClass *pClass; hr = pClassModule->LookupOrCreateClass(classMetadataToken, &pClass); diff --git a/src/coreclr/debug/di/rsmain.cpp b/src/coreclr/debug/di/rsmain.cpp index 379330ebeb41..33c9e82878c3 100644 --- a/src/coreclr/debug/di/rsmain.cpp +++ b/src/coreclr/debug/di/rsmain.cpp @@ -71,11 +71,7 @@ const char * GetDebugCodeName(DWORD dwCode) // Per-thread state for Debug builds... //----------------------------------------------------------------------------- #ifdef RSCONTRACTS -#ifndef __GNUC__ -__declspec(thread) DbgRSThread* DbgRSThread::t_pCurrent; -#else // !__GNUC__ -__thread DbgRSThread* DbgRSThread::t_pCurrent; -#endif // !__GNUC__ +thread_local DbgRSThread* DbgRSThread::t_pCurrent; LONG DbgRSThread::s_Total = 0; @@ -1706,7 +1702,7 @@ HRESULT Cordb::CreateProcessCommon(ICorDebugRemoteTarget * pRemoteTarget, UnlockProcessList(); - PREFIX_ASSUME(pProcess != NULL); + _ASSERTE(pProcess != NULL); pProcess->ExternalAddRef(); *ppProcess = (ICorDebugProcess *)pProcess; @@ -2032,11 +2028,6 @@ void Cordb::EnsureCanLaunchOrAttach(BOOL fWin32DebuggingEnabled) // Made it this far, we succeeded. } -HRESULT Cordb::CreateObjectV1(REFIID id, void **object) -{ - return CreateObject(CorDebugVersion_1_0, ProcessDescriptor::UNINITIALIZED_PID, NULL, NULL, id, object); -} - #if defined(FEATURE_DBGIPC_TRANSPORT_DI) // CoreCLR activates debugger objects via direct COM rather than the shim (just like V1). For now we share the // same debug engine version as V2, though this may change in the future. @@ -2496,7 +2487,7 @@ HRESULT CordbEnumFilter::Init (ICorDebugModuleEnum * pModEnum, CordbAssembly *pA } else { - PREFIX_ASSUME(pPrevious != NULL); + _ASSERTE(pPrevious != NULL); pPrevious->SetNext (pElement); } pPrevious = pElement; @@ -2600,7 +2591,7 @@ HRESULT CordbEnumFilter::Init (ICorDebugThreadEnum *pThreadEnum, CordbAppDomain } else { - PREFIX_ASSUME(pPrevious != NULL); + _ASSERTE(pPrevious != NULL); pPrevious->SetNext (pElement); } diff --git a/src/coreclr/debug/di/rspriv.h b/src/coreclr/debug/di/rspriv.h index 7b8098cf84a5..3185cc0b5543 100644 --- a/src/coreclr/debug/di/rspriv.h +++ b/src/coreclr/debug/di/rspriv.h @@ -15,7 +15,7 @@ #include #include - +#include #ifdef _DEBUG #define LOGGING @@ -798,7 +798,7 @@ class RSLock } - CRITICAL_SECTION m_lock; + minipal_mutex m_lock; #ifdef _DEBUG public: @@ -839,9 +839,8 @@ class RSLock typedef RSLock::RSLockHolder RSLockHolder; typedef RSLock::RSInverseLockHolder RSInverseLockHolder; -// In the RS, we should be using RSLocks instead of raw critical sections. -#define CRITICAL_SECTION USE_RSLOCK_INSTEAD_OF_CRITICAL_SECTION - +// In the RS, we should be using RSLocks instead of raw minipal_mutex. +#define minipal_mutex USE_RSLOCK_INSTEAD_OF_MINIPAL_MUTEX /* ------------------------------------------------------------------------- * * Helper macros. Use the ATT_* macros below instead of these. @@ -1837,14 +1836,14 @@ public ICorDebugAssemblyEnum CordbBase * pOwnerObj, NeuterList * pOwnerList, CordbHashTable *table, - const _GUID &id); + const GUID &id); public: static void BuildOrThrow( CordbBase * pOwnerObj, NeuterList * pOwnerList, CordbHashTable *table, - const _GUID &id, + const GUID &id, RSInitHolder * pHolder); CordbHashTableEnum(CordbHashTableEnum *cloneSrc); @@ -2222,7 +2221,6 @@ class Cordb : public CordbBase, public ICorDebug, public ICorDebugRemote // CorDebug //----------------------------------------------------------- - static COM_METHOD CreateObjectV1(REFIID id, void **object); #if defined(FEATURE_DBGIPC_TRANSPORT_DI) static COM_METHOD CreateObjectTelesto(REFIID id, void ** pObject); #endif // FEATURE_DBGIPC_TRANSPORT_DI @@ -11201,7 +11199,7 @@ inline CordbEval * UnwrapCookieCordbEval(CordbProcess *pProc, UINT cookie) // We defined this at the top of the file - undef it now so that we don't pollute other files. -#undef CRITICAL_SECTION +#undef minipal_mutex #ifdef RSCONTRACTS @@ -11228,11 +11226,7 @@ class DbgRSThread DbgRSThread(); // The TLS slot that we'll put this thread object in. -#ifndef __GNUC__ - static __declspec(thread) DbgRSThread* t_pCurrent; -#else // !__GNUC__ - static __thread DbgRSThread* t_pCurrent; -#endif // !__GNUC__ + static thread_local DbgRSThread* t_pCurrent; static LONG s_Total; // Total count of thread objects diff --git a/src/coreclr/debug/di/rspriv.inl b/src/coreclr/debug/di/rspriv.inl index d918cae3ee54..c88416b505c2 100644 --- a/src/coreclr/debug/di/rspriv.inl +++ b/src/coreclr/debug/di/rspriv.inl @@ -528,14 +528,15 @@ inline void RSLock::Init(const char * szTag, int eAttr, ERSLockLevel level) _ASSERTE(IsInit()); - InitializeCriticalSection(&m_lock); + bool init = minipal_mutex_init(&m_lock); + _ASSERTE(init); } // Cleanup a lock. inline void RSLock::Destroy() { CONSISTENCY_CHECK_MSGF(IsInit(), ("RSLock '%s' not inited", m_szTag)); - DeleteCriticalSection(&m_lock); + minipal_mutex_destroy(&m_lock); #ifdef _DEBUG m_eAttr = cLockUninit; // No longer initialized. @@ -549,10 +550,11 @@ inline void RSLock::Lock() #ifdef RSCONTRACTS DbgRSThread * pThread = DbgRSThread::GetThread(); + pThread->NotifyTakeLock(this); #endif - EnterCriticalSection(&m_lock); + minipal_mutex_enter(&m_lock); #ifdef _DEBUG m_tidOwner = ::GetCurrentThreadId(); m_count++; @@ -583,7 +585,7 @@ inline void RSLock::Unlock() pThread->NotifyReleaseLock(this); #endif - LeaveCriticalSection(&m_lock); + minipal_mutex_leave(&m_lock); } template diff --git a/src/coreclr/debug/di/rsstackwalk.cpp b/src/coreclr/debug/di/rsstackwalk.cpp index f2bf3777bb6b..8fdcf2efc5d2 100644 --- a/src/coreclr/debug/di/rsstackwalk.cpp +++ b/src/coreclr/debug/di/rsstackwalk.cpp @@ -640,7 +640,7 @@ HRESULT CordbStackWalk::GetFrameWorker(ICorDebugFrame ** ppFrame) // Lookup the module CordbModule* pModule = pCurrentAppDomain->LookupOrCreateModule(pFuncData->vmDomainAssembly); - PREFIX_ASSUME(pModule != NULL); + _ASSERTE(pModule != NULL); // Create or look up a CordbNativeCode. There is one for each jitted instance of a method, // and we may have multiple instances because of generics. diff --git a/src/coreclr/debug/di/rsthread.cpp b/src/coreclr/debug/di/rsthread.cpp index cc0656b5bd19..09a41b608f27 100644 --- a/src/coreclr/debug/di/rsthread.cpp +++ b/src/coreclr/debug/di/rsthread.cpp @@ -633,7 +633,7 @@ void CordbThread::RefreshHandle(HANDLE * phThread) HANDLE hThread = pDAC->GetThreadHandle(m_vmThreadToken); _ASSERTE(hThread != INVALID_HANDLE_VALUE); - PREFAST_ASSUME(hThread != NULL); + _ASSERTE(hThread != NULL); // need to dup handle here if (hThread == m_hCachedOutOfProcThread) @@ -3125,7 +3125,7 @@ HRESULT CordbUnmanagedThread::SetTlsSlot(DWORD slot, REMOTE_PTR value) return S_OK; } -// gets the value of gCurrentThreadInfo.m_pThread +// gets the value of t_CurrentThreadInfo.m_pThread DWORD_PTR CordbUnmanagedThread::GetEEThreadValue() { DWORD_PTR ret = NULL; @@ -3152,7 +3152,7 @@ DWORD_PTR CordbUnmanagedThread::GetEEThreadValue() return ret; } -// returns the remote address of gCurrentThreadInfo +// returns the remote address of t_CurrentThreadInfo HRESULT CordbUnmanagedThread::GetClrModuleTlsDataAddress(REMOTE_PTR* pAddress) { *pAddress = NULL; @@ -3721,9 +3721,14 @@ HRESULT CordbUnmanagedThread::SetupFirstChanceHijackForSync() LOG((LF_CORDB, LL_INFO10000, "CUT::SFCHFS: hijackCtx started as:\n")); LogContext(GetHijackCtx()); - // Save the thread's full context. + // Save the thread's full context + DT_CONTEXT_EXTENDED_REGISTERS + // to avoid getting incomplete information and corrupt the thread context DT_CONTEXT context; +#if defined(DT_CONTEXT_EXTENDED_REGISTERS) + context.ContextFlags = DT_CONTEXT_FULL | DT_CONTEXT_EXTENDED_REGISTERS; +#else context.ContextFlags = DT_CONTEXT_FULL; +#endif BOOL succ = DbiGetThreadContext(m_handle, &context); _ASSERTE(succ); // for debugging when GetThreadContext fails @@ -3732,8 +3737,11 @@ HRESULT CordbUnmanagedThread::SetupFirstChanceHijackForSync() DWORD error = GetLastError(); LOG((LF_CORDB, LL_ERROR, "CUT::SFCHFS: DbiGetThreadContext error=0x%x\n", error)); } - +#if defined(DT_CONTEXT_EXTENDED_REGISTERS) + GetHijackCtx()->ContextFlags = DT_CONTEXT_FULL | DT_CONTEXT_EXTENDED_REGISTERS; +#else GetHijackCtx()->ContextFlags = DT_CONTEXT_FULL; +#endif CORDbgCopyThreadContext(GetHijackCtx(), &context); LOG((LF_CORDB, LL_INFO10000, "CUT::SFCHFS: thread=0x%x Hijacking for sync. Original context is:\n", this)); LogContext(GetHijackCtx()); @@ -5893,7 +5901,7 @@ CORDB_ADDRESS CordbNativeFrame::GetLSStackAddress( // This should never be null as long as regNum is a member of the RegNum enum. // If it is, an AV dereferencing a null-pointer in retail builds, or an assert in debug // builds is exactly the behavior we want. - PREFIX_ASSUME(pRegAddr != NULL); + _ASSERTE(pRegAddr != NULL); pRemoteValue = PTR_TO_CORDB_ADDRESS(*pRegAddr + offset); } @@ -10560,7 +10568,7 @@ HRESULT CordbEval::GetResult(ICorDebugValue **ppResult) { pAppDomain = m_thread->GetAppDomain(); } - PREFIX_ASSUME(pAppDomain != NULL); + _ASSERTE(pAppDomain != NULL); CordbType * pType = NULL; hr = CordbType::TypeDataToType(pAppDomain, &m_resultType, &pType); diff --git a/src/coreclr/debug/di/shimremotedatatarget.cpp b/src/coreclr/debug/di/shimremotedatatarget.cpp index 674d325560ff..72c46f594c73 100644 --- a/src/coreclr/debug/di/shimremotedatatarget.cpp +++ b/src/coreclr/debug/di/shimremotedatatarget.cpp @@ -371,5 +371,5 @@ ShimRemoteDataTarget::ContinueStatusChanged( HRESULT STDMETHODCALLTYPE ShimRemoteDataTarget::VirtualUnwind(DWORD threadId, ULONG32 contextSize, PBYTE context) { - return m_pTransport->VirtualUnwind(threadId, contextSize, context); + return E_NOTIMPL; } diff --git a/src/coreclr/debug/di/valuehome.cpp b/src/coreclr/debug/di/valuehome.cpp index 84222d8a699a..49c96035b090 100644 --- a/src/coreclr/debug/di/valuehome.cpp +++ b/src/coreclr/debug/di/valuehome.cpp @@ -238,7 +238,7 @@ void RegValueHome::SetEnregisteredValue(MemoryRange newValue, DT_CONTEXT * pCont void RegValueHome::GetEnregisteredValue(MemoryRange valueOutBuffer) { UINT_PTR* reg = m_pFrame->GetAddressOfRegister(m_reg1Info.m_kRegNumber); - PREFIX_ASSUME(reg != NULL); + _ASSERTE(reg != NULL); _ASSERTE(sizeof(*reg) == valueOutBuffer.Size()); memcpy(valueOutBuffer.StartAddress(), reg, sizeof(*reg)); @@ -293,10 +293,10 @@ void RegRegValueHome::SetEnregisteredValue(MemoryRange newValue, DT_CONTEXT * pC void RegRegValueHome::GetEnregisteredValue(MemoryRange valueOutBuffer) { UINT_PTR* highWordAddr = m_pFrame->GetAddressOfRegister(m_reg1Info.m_kRegNumber); - PREFIX_ASSUME(highWordAddr != NULL); + _ASSERTE(highWordAddr != NULL); UINT_PTR* lowWordAddr = m_pFrame->GetAddressOfRegister(m_reg2Info.m_kRegNumber); - PREFIX_ASSUME(lowWordAddr != NULL); + _ASSERTE(lowWordAddr != NULL); _ASSERTE(sizeof(*highWordAddr) + sizeof(*lowWordAddr) == valueOutBuffer.Size()); @@ -353,7 +353,7 @@ void RegMemValueHome::GetEnregisteredValue(MemoryRange valueOutBuffer) { // Read the high bits from the register... UINT_PTR* highBitsAddr = m_pFrame->GetAddressOfRegister(m_reg1Info.m_kRegNumber); - PREFIX_ASSUME(highBitsAddr != NULL); + _ASSERTE(highBitsAddr != NULL); // ... and the low bits from the remote process DWORD lowBits; @@ -420,7 +420,7 @@ void MemRegValueHome::GetEnregisteredValue(MemoryRange valueOutBuffer) // and the low bits from a register UINT_PTR* lowBitsAddr = m_pFrame->GetAddressOfRegister(m_reg1Info.m_kRegNumber); - PREFIX_ASSUME(lowBitsAddr != NULL); + _ASSERTE(lowBitsAddr != NULL); _ASSERTE(sizeof(*lowBitsAddr)+sizeof(highBits) == valueOutBuffer.Size()); diff --git a/src/coreclr/debug/ee/amd64/amd64InstrDecode.h b/src/coreclr/debug/ee/amd64/amd64InstrDecode.h index 3c2da5c40ea3..8aa398e018f7 100644 --- a/src/coreclr/debug/ee/amd64/amd64InstrDecode.h +++ b/src/coreclr/debug/ee/amd64/amd64InstrDecode.h @@ -30,12 +30,21 @@ namespace Amd64InstrDecode // I4B // Instruction includes 4 bytes of immediates // I8B // Instruction includes 8 bytes of immediates // Unknown // Instruction samples did not include a modrm configured to produce RIP addressing - // L // Flags depend on L bit in encoding. L__or_ - // LL // Flags depend on L'L bits in EVEX encoding. LL___ - // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector - // W // Flags depend on W bit in encoding. W__or_ - // P // Flags depend on OpSize prefix for encoding. P__or_ - // WP // Flags depend on W bit in encoding and OpSize prefix. WP__or__or_ + // L // Flags depend on L bit in encoding. + // // L__or_ + // // L__or_ + // LL // Flags depend on L'L bits in EVEX encoding. + // // LL___ + // // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector + // W // Flags depend on W bit in encoding. + // // W__or_ + // // W__or_ + // P // Flags depend on OpSize prefix for encoding. + // // P__or_ + // // P__or_ + // WP // Flags depend on W bit in encoding and OpSize prefix. + // // WP__or__or_ + // // WP__or__or_ // WLL // Flags depend on W and L'L bits. // // WLL____or___ // bLL // Flags depend on EVEX.b and L'L bits. @@ -53,12 +62,12 @@ namespace Amd64InstrDecode I3B, I4B, I8B, - M1st_bLL_M4B_M16B_M32B_M64B, - M1st_bLL_M8B_M16B_M32B_M64B, M1st_I1B_L_M16B_or_M8B, M1st_I1B_LL_M8B_M16B_M32B, + M1st_I1B_W_M8B_or_M2B, M1st_I1B_W_M8B_or_M4B, M1st_I1B_WP_M8B_or_M4B_or_M2B, + M1st_I4B_W_M8B_or_M4B, M1st_L_M32B_or_M16B, M1st_LL_M16B_M32B_M64B, M1st_LL_M2B_M4B_M8B, @@ -76,6 +85,7 @@ namespace Amd64InstrDecode M1st_M8B, M1st_MUnknown, M1st_W_M4B_or_M1B, + M1st_W_M8B_I4B_or_M2B_I2B, M1st_W_M8B_or_M2B, M1st_W_M8B_or_M4B, M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B, @@ -88,6 +98,7 @@ namespace Amd64InstrDecode MOnly_MUnknown, MOnly_P_M6B_or_M4B, MOnly_W_M16B_or_M8B, + MOnly_W_M8B_or_M2B, MOnly_W_M8B_or_M4B, MOnly_WP_M8B_or_M4B_or_M2B, MOnly_WP_M8B_or_M8B_or_M2B, @@ -138,14 +149,14 @@ namespace Amd64InstrDecode MOp_WP_M8B_or_M4B_or_M2B, WP_I4B_or_I4B_or_I2B, WP_I8B_or_I4B_or_I2B, - Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location in encoded in lower bits + Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location is encoded in lower bits. }; - // The following instrForm maps correspond to the amd64 instr maps - // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics - // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp - // - For Vex* the pp is directly included in the encoding - // - For the Secondary, F38, and F3A pages the pp is not defined in the encoding, but affects instr form. + // The following instrForm maps correspond to the amd64 instruction maps. + // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics. + // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp. For example, 0x123 is opcode 0x12, pp=0x3. + // - For Vex* and EVEX the pp is directly included in the encoding + // - For the Secondary (0F), 0F 38, and 0F 3A pages the pp is not defined in the encoding, but affects instruction form. // - pp = 0 implies no prefix. // - pp = 1 implies 0x66 OpSize prefix only. // - pp = 2 implies 0xF3 prefix. @@ -153,9 +164,9 @@ namespace Amd64InstrDecode // - For the primary map, pp is not used and is always 0 in the comments. - // Instruction which change forms based on modrm.reg are encoded in this extension table. - // Since there are 8 modrm.reg values, they occur is groups of 8. - // Each group is referenced from the other tables below using Extension|(index >> 3). + // Instructions which change forms based on modrm.reg are encoded in this extension table. + // Since there are 8 modrm.reg values, they occur in groups of 8. + // Each group is referenced from the other tables below using (Extension|(index >> 3)). static const InstrForm instrFormExtension[217] { MOnly_M4B, // Primary:0xd90/0 fld @@ -252,32 +263,32 @@ namespace Amd64InstrDecode MOnly_M1B, // Secondary:0x180/3 prefetcht2 MOnly_W_M8B_or_M4B, // Secondary:0x180/4 nop MOnly_W_M8B_or_M4B, // Secondary:0x180/5 nop - MOnly_W_M8B_or_M4B, // Secondary:0x180/6 nop - MOnly_W_M8B_or_M4B, // Secondary:0x180/7 nop + MOnly_M1B, // Secondary:0x180/6 prefetchit1 + MOnly_M1B, // Secondary:0x180/7 prefetchit0 MOnly_M1B, // Secondary:0x181/0 prefetchnta MOnly_M1B, // Secondary:0x181/1 prefetcht0 MOnly_M1B, // Secondary:0x181/2 prefetcht1 MOnly_M1B, // Secondary:0x181/3 prefetcht2 - MOnly_M2B, // Secondary:0x181/4 nop - MOnly_M2B, // Secondary:0x181/5 nop - MOnly_M2B, // Secondary:0x181/6 nop - MOnly_M2B, // Secondary:0x181/7 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/4 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/5 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/6 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/7 nop MOnly_M1B, // Secondary:0x182/0 prefetchnta MOnly_M1B, // Secondary:0x182/1 prefetcht0 MOnly_M1B, // Secondary:0x182/2 prefetcht1 MOnly_M1B, // Secondary:0x182/3 prefetcht2 - MOnly_M4B, // Secondary:0x182/4 nop - MOnly_M4B, // Secondary:0x182/5 nop - MOnly_M4B, // Secondary:0x182/6 nop - MOnly_M4B, // Secondary:0x182/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/7 nop MOnly_M1B, // Secondary:0x183/0 prefetchnta MOnly_M1B, // Secondary:0x183/1 prefetcht0 MOnly_M1B, // Secondary:0x183/2 prefetcht1 MOnly_M1B, // Secondary:0x183/3 prefetcht2 - MOnly_M4B, // Secondary:0x183/4 nop - MOnly_M4B, // Secondary:0x183/5 nop - MOnly_M4B, // Secondary:0x183/6 nop - MOnly_M4B, // Secondary:0x183/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/7 nop MOnly_M1B, // Secondary:0x1c0/0 cldemote MOnly_W_M8B_or_M4B, // Secondary:0x1c0/1 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/2 nop @@ -286,30 +297,30 @@ namespace Amd64InstrDecode MOnly_W_M8B_or_M4B, // Secondary:0x1c0/5 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/6 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/7 nop - MOnly_M2B, // Secondary:0x1c1/0 nop - MOnly_M2B, // Secondary:0x1c1/1 nop - MOnly_M2B, // Secondary:0x1c1/2 nop - MOnly_M2B, // Secondary:0x1c1/3 nop - MOnly_M2B, // Secondary:0x1c1/4 nop - MOnly_M2B, // Secondary:0x1c1/5 nop - MOnly_M2B, // Secondary:0x1c1/6 nop - MOnly_M2B, // Secondary:0x1c1/7 nop - MOnly_M4B, // Secondary:0x1c2/0 nop - MOnly_M4B, // Secondary:0x1c2/1 nop - MOnly_M4B, // Secondary:0x1c2/2 nop - MOnly_M4B, // Secondary:0x1c2/3 nop - MOnly_M4B, // Secondary:0x1c2/4 nop - MOnly_M4B, // Secondary:0x1c2/5 nop - MOnly_M4B, // Secondary:0x1c2/6 nop - MOnly_M4B, // Secondary:0x1c2/7 nop - MOnly_M4B, // Secondary:0x1c3/0 nop - MOnly_M4B, // Secondary:0x1c3/1 nop - MOnly_M4B, // Secondary:0x1c3/2 nop - MOnly_M4B, // Secondary:0x1c3/3 nop - MOnly_M4B, // Secondary:0x1c3/4 nop - MOnly_M4B, // Secondary:0x1c3/5 nop - MOnly_M4B, // Secondary:0x1c3/6 nop - MOnly_M4B, // Secondary:0x1c3/7 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/0 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/1 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/2 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/3 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/4 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/5 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/6 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/0 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/1 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/2 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/3 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/0 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/1 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/2 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/3 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/7 nop MOnly_MUnknown, // Secondary:0xae0/0 fxsave,fxsave64 MOnly_MUnknown, // Secondary:0xae0/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae0/2 ldmxcsr @@ -318,24 +329,24 @@ namespace Amd64InstrDecode MOnly_MUnknown, // Secondary:0xae0/5 xrstor,xrstor64 MOnly_MUnknown, // Secondary:0xae0/6 xsaveopt,xsaveopt64 MOnly_M1B, // Secondary:0xae0/7 clflush - MOnly_MUnknown, // Secondary:0xae1/0 fxsave - MOnly_MUnknown, // Secondary:0xae1/1 fxrstor + MOnly_MUnknown, // Secondary:0xae1/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae1/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae1/2 ldmxcsr MOnly_M4B, // Secondary:0xae1/3 stmxcsr None, None, MOnly_M1B, // Secondary:0xae1/6 clwb MOnly_M1B, // Secondary:0xae1/7 clflushopt - MOnly_MUnknown, // Secondary:0xae2/0 fxsave - MOnly_MUnknown, // Secondary:0xae2/1 fxrstor + MOnly_MUnknown, // Secondary:0xae2/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae2/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae2/2 ldmxcsr MOnly_M4B, // Secondary:0xae2/3 stmxcsr - MOnly_M4B, // Secondary:0xae2/4 ptwrite + MOnly_W_M8B_or_M4B, // Secondary:0xae2/4 ptwrite None, MOnly_M8B, // Secondary:0xae2/6 clrssbsy None, - MOnly_MUnknown, // Secondary:0xae3/0 fxsave - MOnly_MUnknown, // Secondary:0xae3/1 fxrstor + MOnly_MUnknown, // Secondary:0xae3/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae3/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae3/2 ldmxcsr MOnly_M4B, // Secondary:0xae3/3 stmxcsr None, @@ -351,27 +362,27 @@ namespace Amd64InstrDecode MOnly_M8B, // Secondary:0xc70/6 vmptrld MOnly_M8B, // Secondary:0xc70/7 vmptrst None, - MOnly_M8B, // Secondary:0xc71/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc71/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc71/3 xrstors - MOnly_MUnknown, // Secondary:0xc71/4 xsavec - MOnly_MUnknown, // Secondary:0xc71/5 xsaves + MOnly_MUnknown, // Secondary:0xc71/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc71/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc71/5 xsaves,xsaves64 MOnly_M8B, // Secondary:0xc71/6 vmclear MOnly_M8B, // Secondary:0xc71/7 vmptrst None, - MOnly_M8B, // Secondary:0xc72/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc72/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc72/3 xrstors - MOnly_MUnknown, // Secondary:0xc72/4 xsavec - MOnly_MUnknown, // Secondary:0xc72/5 xsaves + MOnly_MUnknown, // Secondary:0xc72/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc72/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc72/5 xsaves,xsaves64 MOnly_M8B, // Secondary:0xc72/6 vmxon MOnly_M8B, // Secondary:0xc72/7 vmptrst None, - MOnly_M8B, // Secondary:0xc73/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc73/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc73/3 xrstors - MOnly_MUnknown, // Secondary:0xc73/4 xsavec - MOnly_MUnknown, // Secondary:0xc73/5 xsaves + MOnly_MUnknown, // Secondary:0xc73/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc73/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc73/5 xsaves,xsaves64 None, MOnly_M8B, // Secondary:0xc73/7 vmptrst }; @@ -458,22 +469,22 @@ namespace Amd64InstrDecode None, // 0x4d0 None, // 0x4e0 None, // 0x4f0 - None, // 0x500 push - None, // 0x510 push - None, // 0x520 push - None, // 0x530 push - None, // 0x540 push - None, // 0x550 push - None, // 0x560 push - None, // 0x570 push - None, // 0x580 pop - None, // 0x590 pop - None, // 0x5a0 pop - None, // 0x5b0 pop - None, // 0x5c0 pop - None, // 0x5d0 pop - None, // 0x5e0 pop - None, // 0x5f0 pop + None, // 0x500 push,pushp + None, // 0x510 push,pushp + None, // 0x520 push,pushp + None, // 0x530 push,pushp + None, // 0x540 push,pushp + None, // 0x550 push,pushp + None, // 0x560 push,pushp + None, // 0x570 push,pushp + None, // 0x580 pop,popp + None, // 0x590 pop,popp + None, // 0x5a0 pop,popp + None, // 0x5b0 pop,popp + None, // 0x5c0 pop,popp + None, // 0x5d0 pop,popp + None, // 0x5e0 pop,popp + None, // 0x5f0 pop,popp None, // 0x600 None, // 0x610 None, // 0x620 @@ -539,7 +550,7 @@ namespace Amd64InstrDecode None, // 0x9e0 sahf None, // 0x9f0 lahf I8B, // 0xa00 movabs - I8B, // 0xa10 movabs + I8B, // 0xa10 jmpabs,movabs I8B, // 0xa20 movabs I8B, // 0xa30 movabs None, // 0xa40 movs @@ -641,7 +652,7 @@ namespace Amd64InstrDecode MOnly_M2B, // 0x000 lldt,ltr,sldt,str,verr,verw MOnly_M2B, // 0x001 lldt,ltr,sldt,str,verr,verw MOnly_M2B, // 0x002 lldt,ltr,sldt,str,verr,verw - MOnly_M2B, // 0x003 lldt,ltr,sldt,str,verr,verw + MOnly_M2B, // 0x003 lkgs,lldt,ltr,sldt,str,verr,verw InstrForm(int(Extension)|0x07), // 0x010 InstrForm(int(Extension)|0x08), // 0x011 InstrForm(int(Extension)|0x09), // 0x012 @@ -667,9 +678,9 @@ namespace Amd64InstrDecode None, // 0x062 clts None, // 0x063 clts None, // 0x070 sysretd,sysretq - None, // 0x071 sysretd - None, // 0x072 sysretd - None, // 0x073 sysretd + None, // 0x071 sysretd,sysretq + None, // 0x072 sysretd,sysretq + None, // 0x073 sysretd,sysretq None, // 0x080 invd None, // 0x081 invd None, // 0x082 invd @@ -739,9 +750,9 @@ namespace Amd64InstrDecode InstrForm(int(Extension)|0x0d), // 0x182 InstrForm(int(Extension)|0x0e), // 0x183 MOnly_W_M8B_or_M4B, // 0x190 nop - MOnly_M2B, // 0x191 nop - MOnly_M4B, // 0x192 nop - MOnly_M4B, // 0x193 nop + MOnly_W_M8B_or_M2B, // 0x191 nop + MOnly_W_M8B_or_M4B, // 0x192 nop + MOnly_W_M8B_or_M4B, // 0x193 nop None, // 0x1a0 MOp_MUnknown, // 0x1a1 bndmov MOp_MUnknown, // 0x1a2 bndcl @@ -755,17 +766,17 @@ namespace Amd64InstrDecode InstrForm(int(Extension)|0x11), // 0x1c2 InstrForm(int(Extension)|0x12), // 0x1c3 MOnly_W_M8B_or_M4B, // 0x1d0 nop - MOnly_M2B, // 0x1d1 nop - MOnly_M4B, // 0x1d2 nop - MOnly_M4B, // 0x1d3 nop + MOnly_W_M8B_or_M2B, // 0x1d1 nop + MOnly_W_M8B_or_M4B, // 0x1d2 nop + MOnly_W_M8B_or_M4B, // 0x1d3 nop MOnly_W_M8B_or_M4B, // 0x1e0 nop - MOnly_M2B, // 0x1e1 nop - MOnly_M4B, // 0x1e2 nop - MOnly_M4B, // 0x1e3 nop + MOnly_W_M8B_or_M2B, // 0x1e1 nop + MOnly_W_M8B_or_M4B, // 0x1e2 nop + MOnly_W_M8B_or_M4B, // 0x1e3 nop MOnly_W_M8B_or_M4B, // 0x1f0 nop - MOnly_M2B, // 0x1f1 nop - MOnly_M4B, // 0x1f2 nop - MOnly_M4B, // 0x1f3 nop + MOnly_W_M8B_or_M2B, // 0x1f1 nop + MOnly_W_M8B_or_M4B, // 0x1f2 nop + MOnly_W_M8B_or_M4B, // 0x1f3 nop I1B, // 0x200 mov I1B, // 0x201 mov I1B, // 0x202 mov @@ -808,8 +819,8 @@ namespace Amd64InstrDecode None, // 0x293 MOp_M8B, // 0x2a0 cvtpi2ps MOp_M8B, // 0x2a1 cvtpi2pd - MOp_M4B, // 0x2a2 cvtsi2ss - MOp_M4B, // 0x2a3 cvtsi2sd + MOp_W_M8B_or_M4B, // 0x2a2 cvtsi2ss + MOp_W_M8B_or_M4B, // 0x2a3 cvtsi2sd M1st_M16B, // 0x2b0 movntps M1st_M16B, // 0x2b1 movntpd M1st_M4B, // 0x2b2 movntss @@ -851,9 +862,9 @@ namespace Amd64InstrDecode None, // 0x342 sysenter None, // 0x343 sysenter None, // 0x350 sysexitd,sysexitq - None, // 0x351 sysexitd - None, // 0x352 sysexitd - None, // 0x353 sysexitd + None, // 0x351 sysexitd,sysexitq + None, // 0x352 sysexitd,sysexitq + None, // 0x353 sysexitd,sysexitq None, // 0x360 None, // 0x361 None, // 0x362 @@ -895,69 +906,69 @@ namespace Amd64InstrDecode None, // 0x3f2 None, // 0x3f3 MOp_W_M8B_or_M4B, // 0x400 cmovo - MOp_M2B, // 0x401 cmovo - MOp_M4B, // 0x402 cmovo - MOp_M4B, // 0x403 cmovo + MOp_W_M8B_or_M2B, // 0x401 cmovo + MOp_W_M8B_or_M4B, // 0x402 cmovo + MOp_W_M8B_or_M4B, // 0x403 cmovo MOp_W_M8B_or_M4B, // 0x410 cmovno - MOp_M2B, // 0x411 cmovno - MOp_M4B, // 0x412 cmovno - MOp_M4B, // 0x413 cmovno + MOp_W_M8B_or_M2B, // 0x411 cmovno + MOp_W_M8B_or_M4B, // 0x412 cmovno + MOp_W_M8B_or_M4B, // 0x413 cmovno MOp_W_M8B_or_M4B, // 0x420 cmovb - MOp_M2B, // 0x421 cmovb - MOp_M4B, // 0x422 cmovb - MOp_M4B, // 0x423 cmovb + MOp_W_M8B_or_M2B, // 0x421 cmovb + MOp_W_M8B_or_M4B, // 0x422 cmovb + MOp_W_M8B_or_M4B, // 0x423 cmovb MOp_W_M8B_or_M4B, // 0x430 cmovae - MOp_M2B, // 0x431 cmovae - MOp_M4B, // 0x432 cmovae - MOp_M4B, // 0x433 cmovae + MOp_W_M8B_or_M2B, // 0x431 cmovae + MOp_W_M8B_or_M4B, // 0x432 cmovae + MOp_W_M8B_or_M4B, // 0x433 cmovae MOp_W_M8B_or_M4B, // 0x440 cmove - MOp_M2B, // 0x441 cmove - MOp_M4B, // 0x442 cmove - MOp_M4B, // 0x443 cmove + MOp_W_M8B_or_M2B, // 0x441 cmove + MOp_W_M8B_or_M4B, // 0x442 cmove + MOp_W_M8B_or_M4B, // 0x443 cmove MOp_W_M8B_or_M4B, // 0x450 cmovne - MOp_M2B, // 0x451 cmovne - MOp_M4B, // 0x452 cmovne - MOp_M4B, // 0x453 cmovne + MOp_W_M8B_or_M2B, // 0x451 cmovne + MOp_W_M8B_or_M4B, // 0x452 cmovne + MOp_W_M8B_or_M4B, // 0x453 cmovne MOp_W_M8B_or_M4B, // 0x460 cmovbe - MOp_M2B, // 0x461 cmovbe - MOp_M4B, // 0x462 cmovbe - MOp_M4B, // 0x463 cmovbe + MOp_W_M8B_or_M2B, // 0x461 cmovbe + MOp_W_M8B_or_M4B, // 0x462 cmovbe + MOp_W_M8B_or_M4B, // 0x463 cmovbe MOp_W_M8B_or_M4B, // 0x470 cmova - MOp_M2B, // 0x471 cmova - MOp_M4B, // 0x472 cmova - MOp_M4B, // 0x473 cmova + MOp_W_M8B_or_M2B, // 0x471 cmova + MOp_W_M8B_or_M4B, // 0x472 cmova + MOp_W_M8B_or_M4B, // 0x473 cmova MOp_W_M8B_or_M4B, // 0x480 cmovs - MOp_M2B, // 0x481 cmovs - MOp_M4B, // 0x482 cmovs - MOp_M4B, // 0x483 cmovs + MOp_W_M8B_or_M2B, // 0x481 cmovs + MOp_W_M8B_or_M4B, // 0x482 cmovs + MOp_W_M8B_or_M4B, // 0x483 cmovs MOp_W_M8B_or_M4B, // 0x490 cmovns - MOp_M2B, // 0x491 cmovns - MOp_M4B, // 0x492 cmovns - MOp_M4B, // 0x493 cmovns + MOp_W_M8B_or_M2B, // 0x491 cmovns + MOp_W_M8B_or_M4B, // 0x492 cmovns + MOp_W_M8B_or_M4B, // 0x493 cmovns MOp_W_M8B_or_M4B, // 0x4a0 cmovp - MOp_M2B, // 0x4a1 cmovp - MOp_M4B, // 0x4a2 cmovp - MOp_M4B, // 0x4a3 cmovp + MOp_W_M8B_or_M2B, // 0x4a1 cmovp + MOp_W_M8B_or_M4B, // 0x4a2 cmovp + MOp_W_M8B_or_M4B, // 0x4a3 cmovp MOp_W_M8B_or_M4B, // 0x4b0 cmovnp - MOp_M2B, // 0x4b1 cmovnp - MOp_M4B, // 0x4b2 cmovnp - MOp_M4B, // 0x4b3 cmovnp + MOp_W_M8B_or_M2B, // 0x4b1 cmovnp + MOp_W_M8B_or_M4B, // 0x4b2 cmovnp + MOp_W_M8B_or_M4B, // 0x4b3 cmovnp MOp_W_M8B_or_M4B, // 0x4c0 cmovl - MOp_M2B, // 0x4c1 cmovl - MOp_M4B, // 0x4c2 cmovl - MOp_M4B, // 0x4c3 cmovl + MOp_W_M8B_or_M2B, // 0x4c1 cmovl + MOp_W_M8B_or_M4B, // 0x4c2 cmovl + MOp_W_M8B_or_M4B, // 0x4c3 cmovl MOp_W_M8B_or_M4B, // 0x4d0 cmovge - MOp_M2B, // 0x4d1 cmovge - MOp_M4B, // 0x4d2 cmovge - MOp_M4B, // 0x4d3 cmovge + MOp_W_M8B_or_M2B, // 0x4d1 cmovge + MOp_W_M8B_or_M4B, // 0x4d2 cmovge + MOp_W_M8B_or_M4B, // 0x4d3 cmovge MOp_W_M8B_or_M4B, // 0x4e0 cmovle - MOp_M2B, // 0x4e1 cmovle - MOp_M4B, // 0x4e2 cmovle - MOp_M4B, // 0x4e3 cmovle + MOp_W_M8B_or_M2B, // 0x4e1 cmovle + MOp_W_M8B_or_M4B, // 0x4e2 cmovle + MOp_W_M8B_or_M4B, // 0x4e3 cmovle MOp_W_M8B_or_M4B, // 0x4f0 cmovg - MOp_M2B, // 0x4f1 cmovg - MOp_M4B, // 0x4f2 cmovg - MOp_M4B, // 0x4f3 cmovg + MOp_W_M8B_or_M2B, // 0x4f1 cmovg + MOp_W_M8B_or_M4B, // 0x4f2 cmovg + MOp_W_M8B_or_M4B, // 0x4f3 cmovg None, // 0x500 None, // 0x501 None, // 0x502 @@ -1079,7 +1090,7 @@ namespace Amd64InstrDecode None, // 0x6d2 None, // 0x6d3 MOp_W_M8B_or_M4B, // 0x6e0 movd,movq - MOp_M4B, // 0x6e1 movd + MOp_W_M8B_or_M4B, // 0x6e1 movd,movq None, // 0x6e2 None, // 0x6e3 MOp_M8B, // 0x6f0 movq @@ -1143,7 +1154,7 @@ namespace Amd64InstrDecode None, // 0x7d2 MOp_M16B, // 0x7d3 hsubps M1st_W_M8B_or_M4B, // 0x7e0 movd,movq - M1st_M4B, // 0x7e1 movd + M1st_W_M8B_or_M4B, // 0x7e1 movd,movq MOp_M8B, // 0x7e2 movq None, // 0x7e3 M1st_M8B, // 0x7f0 movq @@ -1151,67 +1162,67 @@ namespace Amd64InstrDecode M1st_M16B, // 0x7f2 movdqu None, // 0x7f3 I4B, // 0x800 jo - I2B, // 0x801 jo + WP_I4B_or_I4B_or_I2B, // 0x801 jo I4B, // 0x802 jo I4B, // 0x803 jo I4B, // 0x810 jno - I2B, // 0x811 jno + WP_I4B_or_I4B_or_I2B, // 0x811 jno I4B, // 0x812 jno I4B, // 0x813 jno I4B, // 0x820 jb - I2B, // 0x821 jb + WP_I4B_or_I4B_or_I2B, // 0x821 jb I4B, // 0x822 jb I4B, // 0x823 jb I4B, // 0x830 jae - I2B, // 0x831 jae + WP_I4B_or_I4B_or_I2B, // 0x831 jae I4B, // 0x832 jae I4B, // 0x833 jae I4B, // 0x840 je - I2B, // 0x841 je + WP_I4B_or_I4B_or_I2B, // 0x841 je I4B, // 0x842 je I4B, // 0x843 je I4B, // 0x850 jne - I2B, // 0x851 jne + WP_I4B_or_I4B_or_I2B, // 0x851 jne I4B, // 0x852 jne I4B, // 0x853 jne I4B, // 0x860 jbe - I2B, // 0x861 jbe + WP_I4B_or_I4B_or_I2B, // 0x861 jbe I4B, // 0x862 jbe I4B, // 0x863 jbe I4B, // 0x870 ja - I2B, // 0x871 ja + WP_I4B_or_I4B_or_I2B, // 0x871 ja I4B, // 0x872 ja I4B, // 0x873 ja I4B, // 0x880 js - I2B, // 0x881 js + WP_I4B_or_I4B_or_I2B, // 0x881 js I4B, // 0x882 js I4B, // 0x883 js I4B, // 0x890 jns - I2B, // 0x891 jns + WP_I4B_or_I4B_or_I2B, // 0x891 jns I4B, // 0x892 jns I4B, // 0x893 jns I4B, // 0x8a0 jp - I2B, // 0x8a1 jp + WP_I4B_or_I4B_or_I2B, // 0x8a1 jp I4B, // 0x8a2 jp I4B, // 0x8a3 jp I4B, // 0x8b0 jnp - I2B, // 0x8b1 jnp + WP_I4B_or_I4B_or_I2B, // 0x8b1 jnp I4B, // 0x8b2 jnp I4B, // 0x8b3 jnp I4B, // 0x8c0 jl - I2B, // 0x8c1 jl + WP_I4B_or_I4B_or_I2B, // 0x8c1 jl I4B, // 0x8c2 jl I4B, // 0x8c3 jl I4B, // 0x8d0 jge - I2B, // 0x8d1 jge + WP_I4B_or_I4B_or_I2B, // 0x8d1 jge I4B, // 0x8d2 jge I4B, // 0x8d3 jge I4B, // 0x8e0 jle - I2B, // 0x8e1 jle + WP_I4B_or_I4B_or_I2B, // 0x8e1 jle I4B, // 0x8e2 jle I4B, // 0x8e3 jle I4B, // 0x8f0 jg - I2B, // 0x8f1 jg + WP_I4B_or_I4B_or_I2B, // 0x8f1 jg I4B, // 0x8f2 jg I4B, // 0x8f3 jg MOnly_M1B, // 0x900 seto @@ -1279,11 +1290,11 @@ namespace Amd64InstrDecode MOnly_M1B, // 0x9f2 setg MOnly_M1B, // 0x9f3 setg None, // 0xa00 push - None, // 0xa01 pushw + None, // 0xa01 push,pushw None, // 0xa02 push None, // 0xa03 push None, // 0xa10 pop - None, // 0xa11 popw + None, // 0xa11 pop,popw None, // 0xa12 pop None, // 0xa13 pop None, // 0xa20 cpuid @@ -1291,17 +1302,17 @@ namespace Amd64InstrDecode None, // 0xa22 cpuid None, // 0xa23 cpuid M1st_W_M8B_or_M4B, // 0xa30 bt - M1st_M2B, // 0xa31 bt - M1st_M4B, // 0xa32 bt - M1st_M4B, // 0xa33 bt + M1st_W_M8B_or_M2B, // 0xa31 bt + M1st_W_M8B_or_M4B, // 0xa32 bt + M1st_W_M8B_or_M4B, // 0xa33 bt M1st_I1B_W_M8B_or_M4B, // 0xa40 shld - M1st_M2B_I1B, // 0xa41 shld - M1st_M4B_I1B, // 0xa42 shld - M1st_M4B_I1B, // 0xa43 shld + M1st_I1B_W_M8B_or_M2B, // 0xa41 shld + M1st_I1B_W_M8B_or_M4B, // 0xa42 shld + M1st_I1B_W_M8B_or_M4B, // 0xa43 shld M1st_W_M8B_or_M4B, // 0xa50 shld - M1st_M2B, // 0xa51 shld - M1st_M4B, // 0xa52 shld - M1st_M4B, // 0xa53 shld + M1st_W_M8B_or_M2B, // 0xa51 shld + M1st_W_M8B_or_M4B, // 0xa52 shld + M1st_W_M8B_or_M4B, // 0xa53 shld None, // 0xa60 None, // 0xa61 None, // 0xa62 @@ -1311,11 +1322,11 @@ namespace Amd64InstrDecode None, // 0xa72 None, // 0xa73 None, // 0xa80 push - None, // 0xa81 pushw + None, // 0xa81 push,pushw None, // 0xa82 push None, // 0xa83 push None, // 0xa90 pop - None, // 0xa91 popw + None, // 0xa91 pop,popw None, // 0xa92 pop None, // 0xa93 pop None, // 0xaa0 rsm @@ -1323,41 +1334,41 @@ namespace Amd64InstrDecode None, // 0xaa2 rsm None, // 0xaa3 rsm M1st_W_M8B_or_M4B, // 0xab0 bts - M1st_M2B, // 0xab1 bts - M1st_M4B, // 0xab2 bts - M1st_M4B, // 0xab3 bts + M1st_W_M8B_or_M2B, // 0xab1 bts + M1st_W_M8B_or_M4B, // 0xab2 bts + M1st_W_M8B_or_M4B, // 0xab3 bts M1st_I1B_W_M8B_or_M4B, // 0xac0 shrd - M1st_M2B_I1B, // 0xac1 shrd - M1st_M4B_I1B, // 0xac2 shrd - M1st_M4B_I1B, // 0xac3 shrd + M1st_I1B_W_M8B_or_M2B, // 0xac1 shrd + M1st_I1B_W_M8B_or_M4B, // 0xac2 shrd + M1st_I1B_W_M8B_or_M4B, // 0xac3 shrd M1st_W_M8B_or_M4B, // 0xad0 shrd - M1st_M2B, // 0xad1 shrd - M1st_M4B, // 0xad2 shrd - M1st_M4B, // 0xad3 shrd + M1st_W_M8B_or_M2B, // 0xad1 shrd + M1st_W_M8B_or_M4B, // 0xad2 shrd + M1st_W_M8B_or_M4B, // 0xad3 shrd InstrForm(int(Extension)|0x13), // 0xae0 InstrForm(int(Extension)|0x14), // 0xae1 InstrForm(int(Extension)|0x15), // 0xae2 InstrForm(int(Extension)|0x16), // 0xae3 MOp_W_M8B_or_M4B, // 0xaf0 imul - MOp_M2B, // 0xaf1 imul - MOp_M4B, // 0xaf2 imul - MOp_M4B, // 0xaf3 imul + MOp_W_M8B_or_M2B, // 0xaf1 imul + MOp_W_M8B_or_M4B, // 0xaf2 imul + MOp_W_M8B_or_M4B, // 0xaf3 imul M1st_M1B, // 0xb00 cmpxchg M1st_M1B, // 0xb01 cmpxchg M1st_M1B, // 0xb02 cmpxchg M1st_M1B, // 0xb03 cmpxchg M1st_W_M8B_or_M4B, // 0xb10 cmpxchg - M1st_M2B, // 0xb11 cmpxchg - M1st_M4B, // 0xb12 cmpxchg - M1st_M4B, // 0xb13 cmpxchg + M1st_W_M8B_or_M2B, // 0xb11 cmpxchg + M1st_W_M8B_or_M4B, // 0xb12 cmpxchg + M1st_W_M8B_or_M4B, // 0xb13 cmpxchg MOp_M6B, // 0xb20 lss MOp_M4B, // 0xb21 lss MOp_M6B, // 0xb22 lss MOp_M6B, // 0xb23 lss M1st_W_M8B_or_M4B, // 0xb30 btr - M1st_M2B, // 0xb31 btr - M1st_M4B, // 0xb32 btr - M1st_M4B, // 0xb33 btr + M1st_W_M8B_or_M2B, // 0xb31 btr + M1st_W_M8B_or_M4B, // 0xb32 btr + M1st_W_M8B_or_M4B, // 0xb33 btr MOp_M6B, // 0xb40 lfs MOp_M4B, // 0xb41 lfs MOp_M6B, // 0xb42 lfs @@ -1376,27 +1387,27 @@ namespace Amd64InstrDecode MOp_M2B, // 0xb73 movzx None, // 0xb80 None, // 0xb81 - MOp_M4B, // 0xb82 popcnt + MOp_W_M8B_or_M4B, // 0xb82 popcnt None, // 0xb83 MOp_W_M8B_or_M4B, // 0xb90 ud1 - MOp_M2B, // 0xb91 ud1 - MOp_M4B, // 0xb92 ud1 - MOp_M4B, // 0xb93 ud1 + MOp_W_M8B_or_M2B, // 0xb91 ud1 + MOp_W_M8B_or_M4B, // 0xb92 ud1 + MOp_W_M8B_or_M4B, // 0xb93 ud1 M1st_I1B_W_M8B_or_M4B, // 0xba0 bt,btc,btr,bts - M1st_M2B_I1B, // 0xba1 bt,btc,btr,bts - M1st_M4B_I1B, // 0xba2 bt,btc,btr,bts - M1st_M4B_I1B, // 0xba3 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M2B, // 0xba1 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M4B, // 0xba2 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M4B, // 0xba3 bt,btc,btr,bts M1st_W_M8B_or_M4B, // 0xbb0 btc - M1st_M2B, // 0xbb1 btc - M1st_M4B, // 0xbb2 btc - M1st_M4B, // 0xbb3 btc + M1st_W_M8B_or_M2B, // 0xbb1 btc + M1st_W_M8B_or_M4B, // 0xbb2 btc + M1st_W_M8B_or_M4B, // 0xbb3 btc MOp_W_M8B_or_M4B, // 0xbc0 bsf - MOp_M2B, // 0xbc1 bsf - MOp_M4B, // 0xbc2 tzcnt + MOp_W_M8B_or_M2B, // 0xbc1 bsf + MOp_W_M8B_or_M4B, // 0xbc2 tzcnt None, // 0xbc3 MOp_W_M8B_or_M4B, // 0xbd0 bsr - MOp_M2B, // 0xbd1 bsr - MOp_M4B, // 0xbd2 lzcnt + MOp_W_M8B_or_M2B, // 0xbd1 bsr + MOp_W_M8B_or_M4B, // 0xbd2 lzcnt None, // 0xbd3 MOp_M1B, // 0xbe0 movsx MOp_M1B, // 0xbe1 movsx @@ -1411,9 +1422,9 @@ namespace Amd64InstrDecode M1st_M1B, // 0xc02 xadd M1st_M1B, // 0xc03 xadd M1st_W_M8B_or_M4B, // 0xc10 xadd - M1st_M2B, // 0xc11 xadd - M1st_M4B, // 0xc12 xadd - M1st_M4B, // 0xc13 xadd + M1st_W_M8B_or_M2B, // 0xc11 xadd + M1st_W_M8B_or_M4B, // 0xc12 xadd + M1st_W_M8B_or_M4B, // 0xc13 xadd MOp_M16B_I1B, // 0xc20 cmpps MOp_M16B_I1B, // 0xc21 cmppd MOp_M4B_I1B, // 0xc22 cmpss @@ -1659,9 +1670,9 @@ namespace Amd64InstrDecode None, // 0xfe2 None, // 0xfe3 MOp_W_M8B_or_M4B, // 0xff0 ud0 - MOp_M2B, // 0xff1 ud0 - MOp_M4B, // 0xff2 ud0 - MOp_M4B, // 0xff3 ud0 + MOp_W_M8B_or_M2B, // 0xff1 ud0 + MOp_W_M8B_or_M4B, // 0xff2 ud0 + MOp_W_M8B_or_M4B, // 0xff3 ud0 }; static const InstrForm instrFormF38[1024] @@ -2674,10 +2685,10 @@ namespace Amd64InstrDecode None, // 0xfb1 None, // 0xfb2 None, // 0xfb3 - None, // 0xfc0 - None, // 0xfc1 - None, // 0xfc2 - None, // 0xfc3 + M1st_W_M8B_or_M4B, // 0xfc0 aadd + M1st_W_M8B_or_M4B, // 0xfc1 aand + M1st_W_M8B_or_M4B, // 0xfc2 axor + M1st_W_M8B_or_M4B, // 0xfc3 aor None, // 0xfd0 None, // 0xfd1 None, // 0xfd2 @@ -5070,14 +5081,14 @@ namespace Amd64InstrDecode None, // 0x4f1 None, // 0x4f2 None, // 0x4f3 - None, // 0x500 + MOp_L_M32B_or_M16B, // 0x500 vpdpbuud None, // 0x501 - None, // 0x502 - None, // 0x503 - None, // 0x510 + MOp_L_M32B_or_M16B, // 0x502 vpdpbsud + MOp_L_M32B_or_M16B, // 0x503 vpdpbssd + MOp_L_M32B_or_M16B, // 0x510 vpdpbuuds None, // 0x511 - None, // 0x512 - None, // 0x513 + MOp_L_M32B_or_M16B, // 0x512 vpdpbsuds + MOp_L_M32B_or_M16B, // 0x513 vpdpbssds None, // 0x520 None, // 0x521 None, // 0x522 @@ -5454,13 +5465,13 @@ namespace Amd64InstrDecode MOp_W_M8B_or_M4B, // 0xaf1 vfnmsub213sd,vfnmsub213ss None, // 0xaf2 None, // 0xaf3 - None, // 0xb00 - None, // 0xb01 - None, // 0xb02 - None, // 0xb03 + MOp_L_M32B_or_M16B, // 0xb00 vcvtneoph2ps + MOp_L_M32B_or_M16B, // 0xb01 vcvtneeph2ps + MOp_L_M32B_or_M16B, // 0xb02 vcvtneebf162ps + MOp_L_M32B_or_M16B, // 0xb03 vcvtneobf162ps None, // 0xb10 - None, // 0xb11 - None, // 0xb12 + MOp_M2B, // 0xb11 vbcstnesh2ps + MOp_M2B, // 0xb12 vbcstnebf162ps None, // 0xb13 None, // 0xb20 None, // 0xb21 @@ -5590,13 +5601,13 @@ namespace Amd64InstrDecode None, // 0xd11 None, // 0xd12 None, // 0xd13 - None, // 0xd20 - None, // 0xd21 - None, // 0xd22 + MOp_L_M32B_or_M16B, // 0xd20 vpdpwuud + MOp_L_M32B_or_M16B, // 0xd21 vpdpwusd + MOp_L_M32B_or_M16B, // 0xd22 vpdpwsud None, // 0xd23 - None, // 0xd30 - None, // 0xd31 - None, // 0xd32 + MOp_L_M32B_or_M16B, // 0xd30 vpdpwuuds + MOp_L_M32B_or_M16B, // 0xd31 vpdpwusds + MOp_L_M32B_or_M16B, // 0xd32 vpdpwsuds None, // 0xd33 None, // 0xd40 None, // 0xd41 @@ -5622,10 +5633,10 @@ namespace Amd64InstrDecode None, // 0xd91 None, // 0xd92 None, // 0xd93 - None, // 0xda0 - None, // 0xda1 - None, // 0xda2 - None, // 0xda3 + MOp_M16B, // 0xda0 vsm3msg1 + MOp_M16B, // 0xda1 vsm3msg2 + MOp_L_M32B_or_M16B, // 0xda2 vsm4key4 + MOp_L_M32B_or_M16B, // 0xda3 vsm4rnds4 None, // 0xdb0 MOp_M16B, // 0xdb1 vaesimc None, // 0xdb2 @@ -5647,67 +5658,67 @@ namespace Amd64InstrDecode None, // 0xdf2 None, // 0xdf3 None, // 0xe00 - None, // 0xe01 + M1st_W_M8B_or_M4B, // 0xe01 cmpoxadd None, // 0xe02 None, // 0xe03 None, // 0xe10 - None, // 0xe11 + M1st_W_M8B_or_M4B, // 0xe11 cmpnoxadd None, // 0xe12 None, // 0xe13 None, // 0xe20 - None, // 0xe21 + M1st_W_M8B_or_M4B, // 0xe21 cmpbxadd None, // 0xe22 None, // 0xe23 None, // 0xe30 - None, // 0xe31 + M1st_W_M8B_or_M4B, // 0xe31 cmpnbxadd None, // 0xe32 None, // 0xe33 None, // 0xe40 - None, // 0xe41 + M1st_W_M8B_or_M4B, // 0xe41 cmpzxadd None, // 0xe42 None, // 0xe43 None, // 0xe50 - None, // 0xe51 + M1st_W_M8B_or_M4B, // 0xe51 cmpnzxadd None, // 0xe52 None, // 0xe53 None, // 0xe60 - None, // 0xe61 + M1st_W_M8B_or_M4B, // 0xe61 cmpbexadd None, // 0xe62 None, // 0xe63 None, // 0xe70 - None, // 0xe71 + M1st_W_M8B_or_M4B, // 0xe71 cmpnbexadd None, // 0xe72 None, // 0xe73 None, // 0xe80 - None, // 0xe81 + M1st_W_M8B_or_M4B, // 0xe81 cmpsxadd None, // 0xe82 None, // 0xe83 None, // 0xe90 - None, // 0xe91 + M1st_W_M8B_or_M4B, // 0xe91 cmpnsxadd None, // 0xe92 None, // 0xe93 None, // 0xea0 - None, // 0xea1 + M1st_W_M8B_or_M4B, // 0xea1 cmppxadd None, // 0xea2 None, // 0xea3 None, // 0xeb0 - None, // 0xeb1 + M1st_W_M8B_or_M4B, // 0xeb1 cmpnpxadd None, // 0xeb2 None, // 0xeb3 None, // 0xec0 - None, // 0xec1 + M1st_W_M8B_or_M4B, // 0xec1 cmplxadd None, // 0xec2 None, // 0xec3 None, // 0xed0 - None, // 0xed1 + M1st_W_M8B_or_M4B, // 0xed1 cmpnlxadd None, // 0xed2 None, // 0xed3 None, // 0xee0 - None, // 0xee1 + M1st_W_M8B_or_M4B, // 0xee1 cmplexadd None, // 0xee2 None, // 0xee3 None, // 0xef0 - None, // 0xef1 + M1st_W_M8B_or_M4B, // 0xef1 cmpnlexadd None, // 0xef2 None, // 0xef3 None, // 0xf00 @@ -6667,7 +6678,7 @@ namespace Amd64InstrDecode None, // 0xdd2 None, // 0xdd3 None, // 0xde0 - None, // 0xde1 + MOp_M16B_I1B, // 0xde1 vsm3rnds2 None, // 0xde2 None, // 0xde3 None, // 0xdf0 @@ -6978,8 +6989,8 @@ namespace Amd64InstrDecode None, // 0x2a1 MOp_W_M8B_or_M4B, // 0x2a2 vcvtsi2ss MOp_W_M8B_or_M4B, // 0x2a3 vcvtsi2sd - M1st_bLL_M4B_M16B_M32B_M64B, // 0x2b0 vmovntps - M1st_bLL_M8B_M16B_M32B_M64B, // 0x2b1 vmovntpd + M1st_LL_M16B_M32B_M64B, // 0x2b0 vmovntps + M1st_LL_M16B_M32B_M64B, // 0x2b1 vmovntpd None, // 0x2b2 None, // 0x2b3 None, // 0x2c0 @@ -7130,8 +7141,8 @@ namespace Amd64InstrDecode None, // 0x501 None, // 0x502 None, // 0x503 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x510 vsqrtps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x511 vsqrtpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x510 vsqrtps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x511 vsqrtpd MOp_M4B, // 0x512 vsqrtss MOp_M8B, // 0x513 vsqrtsd None, // 0x520 @@ -7158,12 +7169,12 @@ namespace Amd64InstrDecode MOp_bLL_M8B_M16B_M32B_M64B, // 0x571 vxorpd None, // 0x572 None, // 0x573 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x580 vaddps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x581 vaddpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x580 vaddps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x581 vaddpd MOp_M4B, // 0x582 vaddss MOp_M8B, // 0x583 vaddsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x590 vmulps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x591 vmulpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x590 vmulps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x591 vmulpd MOp_M4B, // 0x592 vmulss MOp_M8B, // 0x593 vmulsd MOp_bLL_M4B_M8B_M16B_M32B, // 0x5a0 vcvtps2pd @@ -7174,20 +7185,20 @@ namespace Amd64InstrDecode MOp_bLL_M4B_M16B_M32B_M64B, // 0x5b1 vcvtps2dq MOp_bLL_M4B_M16B_M32B_M64B, // 0x5b2 vcvttps2dq None, // 0x5b3 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5c0 vsubps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5c1 vsubpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5c0 vsubps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5c1 vsubpd MOp_M4B, // 0x5c2 vsubss MOp_M8B, // 0x5c3 vsubsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5d0 vminps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5d1 vminpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5d0 vminps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5d1 vminpd MOp_M4B, // 0x5d2 vminss MOp_M8B, // 0x5d3 vminsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5e0 vdivps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5e1 vdivpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5e0 vdivps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5e1 vdivpd MOp_M4B, // 0x5e2 vdivss MOp_M8B, // 0x5e3 vdivsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5f0 vmaxps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5f1 vmaxpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5f0 vmaxps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5f1 vmaxpd MOp_M4B, // 0x5f2 vmaxss MOp_M8B, // 0x5f3 vmaxsd None, // 0x600 @@ -7382,12 +7393,12 @@ namespace Amd64InstrDecode None, // 0x8f1 None, // 0x8f2 None, // 0x8f3 - None, // 0x900 - None, // 0x901 + MOp_W_M8B_or_M2B, // 0x900 kmovq,kmovw + MOp_W_M4B_or_M1B, // 0x901 kmovb,kmovd None, // 0x902 None, // 0x903 - None, // 0x910 - None, // 0x911 + M1st_W_M8B_or_M2B, // 0x910 kmovq,kmovw + M1st_W_M4B_or_M1B, // 0x911 kmovb,kmovd None, // 0x912 None, // 0x913 None, // 0x920 @@ -8000,7 +8011,7 @@ namespace Amd64InstrDecode None, // 0x283 None, // 0x290 MOp_bLL_M8B_M16B_M32B_M64B, // 0x291 vpcmpeqq - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x292 vpmovb2m,vpmovw2m + None, // 0x292 None, // 0x293 None, // 0x2a0 MOp_LL_M16B_M32B_M64B, // 0x2a1 vmovntdqa @@ -8064,7 +8075,7 @@ namespace Amd64InstrDecode None, // 0x383 None, // 0x390 MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x391 vpminsd,vpminsq - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x392 vpmovd2m,vpmovq2m + None, // 0x392 None, // 0x393 None, // 0x3a0 MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x3a1 vpminuw @@ -8126,8 +8137,8 @@ namespace Amd64InstrDecode None, // 0x481 None, // 0x482 None, // 0x483 - None, // 0x490 - None, // 0x491 + MOnly_MUnknown, // 0x490 ldtilecfg + MOnly_MUnknown, // 0x491 sttilecfg None, // 0x492 None, // 0x493 None, // 0x4a0 @@ -8154,20 +8165,20 @@ namespace Amd64InstrDecode MOp_W_M8B_or_M4B, // 0x4f1 vrsqrt14sd,vrsqrt14ss None, // 0x4f2 None, // 0x4f3 - None, // 0x500 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x501 vpdpbusd - None, // 0x502 - None, // 0x503 - None, // 0x510 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x511 vpdpbusds - None, // 0x512 - None, // 0x513 + MOp_bLL_M4B_M16B_M32B_M64B, // 0x500 vpdpbuud + MOp_bLL_M4B_M16B_M32B_M64B, // 0x501 vpdpbusd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x502 vpdpbsud + MOp_bLL_M4B_M16B_M32B_M64B, // 0x503 vpdpbssd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x510 vpdpbuuds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x511 vpdpbusds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x512 vpdpbsuds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x513 vpdpbssds None, // 0x520 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x521 vpdpwssd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x521 vpdpwssd MOp_bLL_M4B_M16B_M32B_M64B, // 0x522 vdpbf16ps MOp_M16B, // 0x523 vp4dpwssd None, // 0x530 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x531 vpdpwssds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x531 vpdpwssds None, // 0x532 MOp_M16B, // 0x533 vp4dpwssds None, // 0x540 @@ -8555,11 +8566,11 @@ namespace Amd64InstrDecode None, // 0xb32 None, // 0xb33 None, // 0xb40 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0xb41 vpmadd52luq + MOp_bLL_M8B_M16B_M32B_M64B, // 0xb41 vpmadd52luq None, // 0xb42 None, // 0xb43 None, // 0xb50 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0xb51 vpmadd52huq + MOp_bLL_M8B_M16B_M32B_M64B, // 0xb51 vpmadd52huq None, // 0xb52 None, // 0xb53 None, // 0xb60 @@ -8731,67 +8742,67 @@ namespace Amd64InstrDecode None, // 0xdf2 None, // 0xdf3 None, // 0xe00 - None, // 0xe01 + M1st_W_M8B_or_M4B, // 0xe01 cmpoxadd None, // 0xe02 None, // 0xe03 None, // 0xe10 - None, // 0xe11 + M1st_W_M8B_or_M4B, // 0xe11 cmpnoxadd None, // 0xe12 None, // 0xe13 None, // 0xe20 - None, // 0xe21 + M1st_W_M8B_or_M4B, // 0xe21 cmpbxadd None, // 0xe22 None, // 0xe23 None, // 0xe30 - None, // 0xe31 + M1st_W_M8B_or_M4B, // 0xe31 cmpnbxadd None, // 0xe32 None, // 0xe33 None, // 0xe40 - None, // 0xe41 + M1st_W_M8B_or_M4B, // 0xe41 cmpzxadd None, // 0xe42 None, // 0xe43 None, // 0xe50 - None, // 0xe51 + M1st_W_M8B_or_M4B, // 0xe51 cmpnzxadd None, // 0xe52 None, // 0xe53 None, // 0xe60 - None, // 0xe61 + M1st_W_M8B_or_M4B, // 0xe61 cmpbexadd None, // 0xe62 None, // 0xe63 None, // 0xe70 - None, // 0xe71 + M1st_W_M8B_or_M4B, // 0xe71 cmpnbexadd None, // 0xe72 None, // 0xe73 None, // 0xe80 - None, // 0xe81 + M1st_W_M8B_or_M4B, // 0xe81 cmpsxadd None, // 0xe82 None, // 0xe83 None, // 0xe90 - None, // 0xe91 + M1st_W_M8B_or_M4B, // 0xe91 cmpnsxadd None, // 0xe92 None, // 0xe93 None, // 0xea0 - None, // 0xea1 + M1st_W_M8B_or_M4B, // 0xea1 cmppxadd None, // 0xea2 None, // 0xea3 None, // 0xeb0 - None, // 0xeb1 + M1st_W_M8B_or_M4B, // 0xeb1 cmpnpxadd None, // 0xeb2 None, // 0xeb3 None, // 0xec0 - None, // 0xec1 + M1st_W_M8B_or_M4B, // 0xec1 cmplxadd None, // 0xec2 None, // 0xec3 None, // 0xed0 - None, // 0xed1 + M1st_W_M8B_or_M4B, // 0xed1 cmpnlxadd None, // 0xed2 None, // 0xed3 None, // 0xee0 - None, // 0xee1 + M1st_W_M8B_or_M4B, // 0xee1 cmplexadd None, // 0xee2 None, // 0xee3 None, // 0xef0 - None, // 0xef1 + M1st_W_M8B_or_M4B, // 0xef1 cmpnlexadd None, // 0xef2 None, // 0xef3 None, // 0xf00 @@ -8802,11 +8813,11 @@ namespace Amd64InstrDecode None, // 0xf11 None, // 0xf12 None, // 0xf13 - None, // 0xf20 + MOp_W_M8B_or_M4B, // 0xf20 andn None, // 0xf21 None, // 0xf22 None, // 0xf23 - None, // 0xf30 + MOp_W_M8B_or_M4B, // 0xf30 blsi,blsmsk,blsr None, // 0xf31 None, // 0xf32 None, // 0xf33 @@ -8814,18 +8825,18 @@ namespace Amd64InstrDecode None, // 0xf41 None, // 0xf42 None, // 0xf43 - None, // 0xf50 + MOp_W_M8B_or_M4B, // 0xf50 bzhi None, // 0xf51 - None, // 0xf52 - None, // 0xf53 + MOp_W_M8B_or_M4B, // 0xf52 pext + MOp_W_M8B_or_M4B, // 0xf53 pdep None, // 0xf60 None, // 0xf61 None, // 0xf62 - None, // 0xf63 - None, // 0xf70 - None, // 0xf71 - None, // 0xf72 - None, // 0xf73 + MOp_W_M8B_or_M4B, // 0xf63 mulx + MOp_W_M8B_or_M4B, // 0xf70 bextr + MOp_W_M8B_or_M4B, // 0xf71 shlx + MOp_W_M8B_or_M4B, // 0xf72 sarx + MOp_W_M8B_or_M4B, // 0xf73 shrx None, // 0xf80 None, // 0xf81 None, // 0xf82 @@ -9691,7 +9702,7 @@ namespace Amd64InstrDecode None, // 0xce2 None, // 0xce3 None, // 0xcf0 - None, // 0xcf1 + MOp_I1B_bLL_M8B_M16B_M32B_M64B, // 0xcf1 vgf2p8affineinvqb None, // 0xcf2 None, // 0xcf3 None, // 0xd00 @@ -9825,7 +9836,7 @@ namespace Amd64InstrDecode None, // 0xf00 None, // 0xf01 None, // 0xf02 - None, // 0xf03 + MOp_I1B_W_M8B_or_M4B, // 0xf03 rorx None, // 0xf10 None, // 0xf11 None, // 0xf12 @@ -9887,4 +9898,1032 @@ namespace Amd64InstrDecode None, // 0xff2 None, // 0xff3 }; + + static const InstrForm instrFormEvex_4[1024] + { + M1st_M1B, // 0x000 add + None, // 0x001 + None, // 0x002 + None, // 0x003 + M1st_W_M8B_or_M4B, // 0x010 add + M1st_W_M8B_or_M2B, // 0x011 add + None, // 0x012 + None, // 0x013 + MOp_M1B, // 0x020 add + None, // 0x021 + None, // 0x022 + None, // 0x023 + MOp_W_M8B_or_M4B, // 0x030 add + MOp_W_M8B_or_M2B, // 0x031 add + None, // 0x032 + None, // 0x033 + None, // 0x040 + None, // 0x041 + None, // 0x042 + None, // 0x043 + None, // 0x050 + None, // 0x051 + None, // 0x052 + None, // 0x053 + None, // 0x060 + None, // 0x061 + None, // 0x062 + None, // 0x063 + None, // 0x070 + None, // 0x071 + None, // 0x072 + None, // 0x073 + M1st_M1B, // 0x080 or + None, // 0x081 + None, // 0x082 + None, // 0x083 + M1st_W_M8B_or_M4B, // 0x090 or + M1st_W_M8B_or_M2B, // 0x091 or + None, // 0x092 + None, // 0x093 + MOp_M1B, // 0x0a0 or + None, // 0x0a1 + None, // 0x0a2 + None, // 0x0a3 + MOp_W_M8B_or_M4B, // 0x0b0 or + MOp_W_M8B_or_M2B, // 0x0b1 or + None, // 0x0b2 + None, // 0x0b3 + None, // 0x0c0 + None, // 0x0c1 + None, // 0x0c2 + None, // 0x0c3 + None, // 0x0d0 + None, // 0x0d1 + None, // 0x0d2 + None, // 0x0d3 + None, // 0x0e0 + None, // 0x0e1 + None, // 0x0e2 + None, // 0x0e3 + None, // 0x0f0 + None, // 0x0f1 + None, // 0x0f2 + None, // 0x0f3 + M1st_M1B, // 0x100 adc + None, // 0x101 + None, // 0x102 + None, // 0x103 + M1st_W_M8B_or_M4B, // 0x110 adc + M1st_W_M8B_or_M2B, // 0x111 adc + None, // 0x112 + None, // 0x113 + MOp_M1B, // 0x120 adc + None, // 0x121 + None, // 0x122 + None, // 0x123 + MOp_W_M8B_or_M4B, // 0x130 adc + MOp_W_M8B_or_M2B, // 0x131 adc + None, // 0x132 + None, // 0x133 + None, // 0x140 + None, // 0x141 + None, // 0x142 + None, // 0x143 + None, // 0x150 + None, // 0x151 + None, // 0x152 + None, // 0x153 + None, // 0x160 + None, // 0x161 + None, // 0x162 + None, // 0x163 + None, // 0x170 + None, // 0x171 + None, // 0x172 + None, // 0x173 + M1st_M1B, // 0x180 sbb + None, // 0x181 + None, // 0x182 + None, // 0x183 + M1st_W_M8B_or_M4B, // 0x190 sbb + M1st_W_M8B_or_M2B, // 0x191 sbb + None, // 0x192 + None, // 0x193 + MOp_M1B, // 0x1a0 sbb + None, // 0x1a1 + None, // 0x1a2 + None, // 0x1a3 + MOp_W_M8B_or_M4B, // 0x1b0 sbb + MOp_W_M8B_or_M2B, // 0x1b1 sbb + None, // 0x1b2 + None, // 0x1b3 + None, // 0x1c0 + None, // 0x1c1 + None, // 0x1c2 + None, // 0x1c3 + None, // 0x1d0 + None, // 0x1d1 + None, // 0x1d2 + None, // 0x1d3 + None, // 0x1e0 + None, // 0x1e1 + None, // 0x1e2 + None, // 0x1e3 + None, // 0x1f0 + None, // 0x1f1 + None, // 0x1f2 + None, // 0x1f3 + M1st_M1B, // 0x200 and + None, // 0x201 + None, // 0x202 + None, // 0x203 + M1st_W_M8B_or_M4B, // 0x210 and + M1st_W_M8B_or_M2B, // 0x211 and + None, // 0x212 + None, // 0x213 + MOp_M1B, // 0x220 and + None, // 0x221 + None, // 0x222 + None, // 0x223 + MOp_W_M8B_or_M4B, // 0x230 and + MOp_W_M8B_or_M2B, // 0x231 and + None, // 0x232 + None, // 0x233 + M1st_I1B_W_M8B_or_M4B, // 0x240 shld + M1st_I1B_W_M8B_or_M2B, // 0x241 shld + None, // 0x242 + None, // 0x243 + None, // 0x250 + None, // 0x251 + None, // 0x252 + None, // 0x253 + None, // 0x260 + None, // 0x261 + None, // 0x262 + None, // 0x263 + None, // 0x270 + None, // 0x271 + None, // 0x272 + None, // 0x273 + M1st_M1B, // 0x280 sub + None, // 0x281 + None, // 0x282 + None, // 0x283 + M1st_W_M8B_or_M4B, // 0x290 sub + M1st_W_M8B_or_M2B, // 0x291 sub + None, // 0x292 + None, // 0x293 + MOp_M1B, // 0x2a0 sub + None, // 0x2a1 + None, // 0x2a2 + None, // 0x2a3 + MOp_W_M8B_or_M4B, // 0x2b0 sub + MOp_W_M8B_or_M2B, // 0x2b1 sub + None, // 0x2b2 + None, // 0x2b3 + M1st_I1B_W_M8B_or_M4B, // 0x2c0 shrd + M1st_I1B_W_M8B_or_M2B, // 0x2c1 shrd + None, // 0x2c2 + None, // 0x2c3 + None, // 0x2d0 + None, // 0x2d1 + None, // 0x2d2 + None, // 0x2d3 + None, // 0x2e0 + None, // 0x2e1 + None, // 0x2e2 + None, // 0x2e3 + None, // 0x2f0 + None, // 0x2f1 + None, // 0x2f2 + None, // 0x2f3 + M1st_M1B, // 0x300 xor + None, // 0x301 + None, // 0x302 + None, // 0x303 + M1st_W_M8B_or_M4B, // 0x310 xor + M1st_W_M8B_or_M2B, // 0x311 xor + None, // 0x312 + None, // 0x313 + MOp_M1B, // 0x320 xor + None, // 0x321 + None, // 0x322 + None, // 0x323 + MOp_W_M8B_or_M4B, // 0x330 xor + MOp_W_M8B_or_M2B, // 0x331 xor + None, // 0x332 + None, // 0x333 + None, // 0x340 + None, // 0x341 + None, // 0x342 + None, // 0x343 + None, // 0x350 + None, // 0x351 + None, // 0x352 + None, // 0x353 + None, // 0x360 + None, // 0x361 + None, // 0x362 + None, // 0x363 + None, // 0x370 + None, // 0x371 + None, // 0x372 + None, // 0x373 + None, // 0x380 + None, // 0x381 + None, // 0x382 + None, // 0x383 + None, // 0x390 + None, // 0x391 + None, // 0x392 + None, // 0x393 + None, // 0x3a0 + None, // 0x3a1 + None, // 0x3a2 + None, // 0x3a3 + None, // 0x3b0 + None, // 0x3b1 + None, // 0x3b2 + None, // 0x3b3 + None, // 0x3c0 + None, // 0x3c1 + None, // 0x3c2 + None, // 0x3c3 + None, // 0x3d0 + None, // 0x3d1 + None, // 0x3d2 + None, // 0x3d3 + None, // 0x3e0 + None, // 0x3e1 + None, // 0x3e2 + None, // 0x3e3 + None, // 0x3f0 + None, // 0x3f1 + None, // 0x3f2 + None, // 0x3f3 + MOp_W_M8B_or_M4B, // 0x400 cmovo + MOp_W_M8B_or_M2B, // 0x401 cmovo + None, // 0x402 + None, // 0x403 + MOp_W_M8B_or_M4B, // 0x410 cmovno + MOp_W_M8B_or_M2B, // 0x411 cmovno + None, // 0x412 + None, // 0x413 + MOp_W_M8B_or_M4B, // 0x420 cmovb + MOp_W_M8B_or_M2B, // 0x421 cmovb + None, // 0x422 + None, // 0x423 + MOp_W_M8B_or_M4B, // 0x430 cmovae + MOp_W_M8B_or_M2B, // 0x431 cmovae + None, // 0x432 + None, // 0x433 + MOp_W_M8B_or_M4B, // 0x440 cmove + MOp_W_M8B_or_M2B, // 0x441 cmove + None, // 0x442 + None, // 0x443 + MOp_W_M8B_or_M4B, // 0x450 cmovne + MOp_W_M8B_or_M2B, // 0x451 cmovne + None, // 0x452 + None, // 0x453 + MOp_W_M8B_or_M4B, // 0x460 cmovbe + MOp_W_M8B_or_M2B, // 0x461 cmovbe + None, // 0x462 + None, // 0x463 + MOp_W_M8B_or_M4B, // 0x470 cmova + MOp_W_M8B_or_M2B, // 0x471 cmova + None, // 0x472 + None, // 0x473 + MOp_W_M8B_or_M4B, // 0x480 cmovs + MOp_W_M8B_or_M2B, // 0x481 cmovs + None, // 0x482 + None, // 0x483 + MOp_W_M8B_or_M4B, // 0x490 cmovns + MOp_W_M8B_or_M2B, // 0x491 cmovns + None, // 0x492 + None, // 0x493 + MOp_W_M8B_or_M4B, // 0x4a0 cmovp + MOp_W_M8B_or_M2B, // 0x4a1 cmovp + None, // 0x4a2 + None, // 0x4a3 + MOp_W_M8B_or_M4B, // 0x4b0 cmovnp + MOp_W_M8B_or_M2B, // 0x4b1 cmovnp + None, // 0x4b2 + None, // 0x4b3 + MOp_W_M8B_or_M4B, // 0x4c0 cmovl + MOp_W_M8B_or_M2B, // 0x4c1 cmovl + None, // 0x4c2 + None, // 0x4c3 + MOp_W_M8B_or_M4B, // 0x4d0 cmovge + MOp_W_M8B_or_M2B, // 0x4d1 cmovge + None, // 0x4d2 + None, // 0x4d3 + MOp_W_M8B_or_M4B, // 0x4e0 cmovle + MOp_W_M8B_or_M2B, // 0x4e1 cmovle + None, // 0x4e2 + None, // 0x4e3 + MOp_W_M8B_or_M4B, // 0x4f0 cmovg + MOp_W_M8B_or_M2B, // 0x4f1 cmovg + None, // 0x4f2 + None, // 0x4f3 + None, // 0x500 + None, // 0x501 + None, // 0x502 + None, // 0x503 + None, // 0x510 + None, // 0x511 + None, // 0x512 + None, // 0x513 + None, // 0x520 + None, // 0x521 + None, // 0x522 + None, // 0x523 + None, // 0x530 + None, // 0x531 + None, // 0x532 + None, // 0x533 + None, // 0x540 + None, // 0x541 + None, // 0x542 + None, // 0x543 + None, // 0x550 + None, // 0x551 + None, // 0x552 + None, // 0x553 + None, // 0x560 + None, // 0x561 + None, // 0x562 + None, // 0x563 + None, // 0x570 + None, // 0x571 + None, // 0x572 + None, // 0x573 + None, // 0x580 + None, // 0x581 + None, // 0x582 + None, // 0x583 + None, // 0x590 + None, // 0x591 + None, // 0x592 + None, // 0x593 + None, // 0x5a0 + None, // 0x5a1 + None, // 0x5a2 + None, // 0x5a3 + None, // 0x5b0 + None, // 0x5b1 + None, // 0x5b2 + None, // 0x5b3 + None, // 0x5c0 + None, // 0x5c1 + None, // 0x5c2 + None, // 0x5c3 + None, // 0x5d0 + None, // 0x5d1 + None, // 0x5d2 + None, // 0x5d3 + None, // 0x5e0 + None, // 0x5e1 + None, // 0x5e2 + None, // 0x5e3 + None, // 0x5f0 + None, // 0x5f1 + None, // 0x5f2 + None, // 0x5f3 + MOp_W_M8B_or_M4B, // 0x600 movbe + MOp_W_M8B_or_M2B, // 0x601 movbe + None, // 0x602 + None, // 0x603 + M1st_W_M8B_or_M4B, // 0x610 movbe + M1st_W_M8B_or_M2B, // 0x611 movbe + None, // 0x612 + None, // 0x613 + None, // 0x620 + None, // 0x621 + None, // 0x622 + None, // 0x623 + None, // 0x630 + None, // 0x631 + None, // 0x632 + None, // 0x633 + None, // 0x640 + None, // 0x641 + None, // 0x642 + None, // 0x643 + None, // 0x650 + M1st_MUnknown, // 0x651 wrussd,wrussq + None, // 0x652 + None, // 0x653 + M1st_MUnknown, // 0x660 wrssd,wrssq + MOp_W_M8B_or_M4B, // 0x661 adcx + MOp_W_M8B_or_M4B, // 0x662 adox + None, // 0x663 + None, // 0x670 + None, // 0x671 + None, // 0x672 + None, // 0x673 + None, // 0x680 + None, // 0x681 + None, // 0x682 + None, // 0x683 + None, // 0x690 + None, // 0x691 + None, // 0x692 + None, // 0x693 + None, // 0x6a0 + None, // 0x6a1 + None, // 0x6a2 + None, // 0x6a3 + None, // 0x6b0 + None, // 0x6b1 + None, // 0x6b2 + None, // 0x6b3 + None, // 0x6c0 + None, // 0x6c1 + None, // 0x6c2 + None, // 0x6c3 + None, // 0x6d0 + None, // 0x6d1 + None, // 0x6d2 + None, // 0x6d3 + None, // 0x6e0 + None, // 0x6e1 + None, // 0x6e2 + None, // 0x6e3 + None, // 0x6f0 + None, // 0x6f1 + None, // 0x6f2 + None, // 0x6f3 + None, // 0x700 + None, // 0x701 + None, // 0x702 + None, // 0x703 + None, // 0x710 + None, // 0x711 + None, // 0x712 + None, // 0x713 + None, // 0x720 + None, // 0x721 + None, // 0x722 + None, // 0x723 + None, // 0x730 + None, // 0x731 + None, // 0x732 + None, // 0x733 + None, // 0x740 + None, // 0x741 + None, // 0x742 + None, // 0x743 + None, // 0x750 + None, // 0x751 + None, // 0x752 + None, // 0x753 + None, // 0x760 + None, // 0x761 + None, // 0x762 + None, // 0x763 + None, // 0x770 + None, // 0x771 + None, // 0x772 + None, // 0x773 + None, // 0x780 + None, // 0x781 + None, // 0x782 + None, // 0x783 + None, // 0x790 + None, // 0x791 + None, // 0x792 + None, // 0x793 + None, // 0x7a0 + None, // 0x7a1 + None, // 0x7a2 + None, // 0x7a3 + None, // 0x7b0 + None, // 0x7b1 + None, // 0x7b2 + None, // 0x7b3 + None, // 0x7c0 + None, // 0x7c1 + None, // 0x7c2 + None, // 0x7c3 + None, // 0x7d0 + None, // 0x7d1 + None, // 0x7d2 + None, // 0x7d3 + None, // 0x7e0 + None, // 0x7e1 + None, // 0x7e2 + None, // 0x7e3 + None, // 0x7f0 + None, // 0x7f1 + None, // 0x7f2 + None, // 0x7f3 + M1st_M1B_I1B, // 0x800 adc,add,and,or,sbb,sub,xor + None, // 0x801 + None, // 0x802 + None, // 0x803 + M1st_I4B_W_M8B_or_M4B, // 0x810 adc,add,and,or,sbb,sub,xor + M1st_W_M8B_I4B_or_M2B_I2B, // 0x811 adc,add,and,or,sbb,sub,xor + None, // 0x812 + None, // 0x813 + None, // 0x820 + None, // 0x821 + None, // 0x822 + None, // 0x823 + M1st_I1B_W_M8B_or_M4B, // 0x830 adc,add,and,or,sbb,sub,xor + M1st_I1B_W_M8B_or_M2B, // 0x831 adc,add,and,or,sbb,sub,xor + None, // 0x832 + None, // 0x833 + None, // 0x840 + None, // 0x841 + None, // 0x842 + None, // 0x843 + None, // 0x850 + None, // 0x851 + None, // 0x852 + None, // 0x853 + None, // 0x860 + None, // 0x861 + None, // 0x862 + None, // 0x863 + None, // 0x870 + None, // 0x871 + None, // 0x872 + None, // 0x873 + None, // 0x880 + None, // 0x881 + None, // 0x882 + None, // 0x883 + None, // 0x890 + None, // 0x891 + None, // 0x892 + None, // 0x893 + None, // 0x8a0 + None, // 0x8a1 + None, // 0x8a2 + None, // 0x8a3 + None, // 0x8b0 + None, // 0x8b1 + None, // 0x8b2 + None, // 0x8b3 + None, // 0x8c0 + None, // 0x8c1 + None, // 0x8c2 + None, // 0x8c3 + None, // 0x8d0 + None, // 0x8d1 + None, // 0x8d2 + None, // 0x8d3 + None, // 0x8e0 + None, // 0x8e1 + None, // 0x8e2 + None, // 0x8e3 + None, // 0x8f0 + None, // 0x8f1 + None, // 0x8f2 + None, // 0x8f3 + None, // 0x900 + None, // 0x901 + None, // 0x902 + None, // 0x903 + None, // 0x910 + None, // 0x911 + None, // 0x912 + None, // 0x913 + None, // 0x920 + None, // 0x921 + None, // 0x922 + None, // 0x923 + None, // 0x930 + None, // 0x931 + None, // 0x932 + None, // 0x933 + None, // 0x940 + None, // 0x941 + None, // 0x942 + None, // 0x943 + None, // 0x950 + None, // 0x951 + None, // 0x952 + None, // 0x953 + None, // 0x960 + None, // 0x961 + None, // 0x962 + None, // 0x963 + None, // 0x970 + None, // 0x971 + None, // 0x972 + None, // 0x973 + None, // 0x980 + None, // 0x981 + None, // 0x982 + None, // 0x983 + None, // 0x990 + None, // 0x991 + None, // 0x992 + None, // 0x993 + None, // 0x9a0 + None, // 0x9a1 + None, // 0x9a2 + None, // 0x9a3 + None, // 0x9b0 + None, // 0x9b1 + None, // 0x9b2 + None, // 0x9b3 + None, // 0x9c0 + None, // 0x9c1 + None, // 0x9c2 + None, // 0x9c3 + None, // 0x9d0 + None, // 0x9d1 + None, // 0x9d2 + None, // 0x9d3 + None, // 0x9e0 + None, // 0x9e1 + None, // 0x9e2 + None, // 0x9e3 + None, // 0x9f0 + None, // 0x9f1 + None, // 0x9f2 + None, // 0x9f3 + None, // 0xa00 + None, // 0xa01 + None, // 0xa02 + None, // 0xa03 + None, // 0xa10 + None, // 0xa11 + None, // 0xa12 + None, // 0xa13 + None, // 0xa20 + None, // 0xa21 + None, // 0xa22 + None, // 0xa23 + None, // 0xa30 + None, // 0xa31 + None, // 0xa32 + None, // 0xa33 + None, // 0xa40 + None, // 0xa41 + None, // 0xa42 + None, // 0xa43 + M1st_W_M8B_or_M4B, // 0xa50 shld + M1st_W_M8B_or_M2B, // 0xa51 shld + None, // 0xa52 + None, // 0xa53 + None, // 0xa60 + None, // 0xa61 + None, // 0xa62 + None, // 0xa63 + None, // 0xa70 + None, // 0xa71 + None, // 0xa72 + None, // 0xa73 + None, // 0xa80 + None, // 0xa81 + None, // 0xa82 + None, // 0xa83 + None, // 0xa90 + None, // 0xa91 + None, // 0xa92 + None, // 0xa93 + None, // 0xaa0 + None, // 0xaa1 + None, // 0xaa2 + None, // 0xaa3 + None, // 0xab0 + None, // 0xab1 + None, // 0xab2 + None, // 0xab3 + None, // 0xac0 + None, // 0xac1 + None, // 0xac2 + None, // 0xac3 + M1st_W_M8B_or_M4B, // 0xad0 shrd + M1st_W_M8B_or_M2B, // 0xad1 shrd + None, // 0xad2 + None, // 0xad3 + None, // 0xae0 + None, // 0xae1 + None, // 0xae2 + None, // 0xae3 + MOp_W_M8B_or_M4B, // 0xaf0 imul + MOp_W_M8B_or_M2B, // 0xaf1 imul + None, // 0xaf2 + None, // 0xaf3 + None, // 0xb00 + None, // 0xb01 + None, // 0xb02 + None, // 0xb03 + None, // 0xb10 + None, // 0xb11 + None, // 0xb12 + None, // 0xb13 + None, // 0xb20 + None, // 0xb21 + None, // 0xb22 + None, // 0xb23 + None, // 0xb30 + None, // 0xb31 + None, // 0xb32 + None, // 0xb33 + None, // 0xb40 + None, // 0xb41 + None, // 0xb42 + None, // 0xb43 + None, // 0xb50 + None, // 0xb51 + None, // 0xb52 + None, // 0xb53 + None, // 0xb60 + None, // 0xb61 + None, // 0xb62 + None, // 0xb63 + None, // 0xb70 + None, // 0xb71 + None, // 0xb72 + None, // 0xb73 + None, // 0xb80 + None, // 0xb81 + None, // 0xb82 + None, // 0xb83 + None, // 0xb90 + None, // 0xb91 + None, // 0xb92 + None, // 0xb93 + None, // 0xba0 + None, // 0xba1 + None, // 0xba2 + None, // 0xba3 + None, // 0xbb0 + None, // 0xbb1 + None, // 0xbb2 + None, // 0xbb3 + None, // 0xbc0 + None, // 0xbc1 + None, // 0xbc2 + None, // 0xbc3 + None, // 0xbd0 + None, // 0xbd1 + None, // 0xbd2 + None, // 0xbd3 + None, // 0xbe0 + None, // 0xbe1 + None, // 0xbe2 + None, // 0xbe3 + None, // 0xbf0 + None, // 0xbf1 + None, // 0xbf2 + None, // 0xbf3 + M1st_M1B_I1B, // 0xc00 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xc01 + None, // 0xc02 + None, // 0xc03 + M1st_I1B_W_M8B_or_M4B, // 0xc10 rcl,rcr,rol,ror,sar,shl,shr + M1st_I1B_W_M8B_or_M2B, // 0xc11 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xc12 + None, // 0xc13 + None, // 0xc20 + None, // 0xc21 + None, // 0xc22 + None, // 0xc23 + None, // 0xc30 + None, // 0xc31 + None, // 0xc32 + None, // 0xc33 + None, // 0xc40 + None, // 0xc41 + None, // 0xc42 + None, // 0xc43 + None, // 0xc50 + None, // 0xc51 + None, // 0xc52 + None, // 0xc53 + None, // 0xc60 + None, // 0xc61 + None, // 0xc62 + None, // 0xc63 + None, // 0xc70 + None, // 0xc71 + None, // 0xc72 + None, // 0xc73 + None, // 0xc80 + None, // 0xc81 + None, // 0xc82 + None, // 0xc83 + None, // 0xc90 + None, // 0xc91 + None, // 0xc92 + None, // 0xc93 + None, // 0xca0 + None, // 0xca1 + None, // 0xca2 + None, // 0xca3 + None, // 0xcb0 + None, // 0xcb1 + None, // 0xcb2 + None, // 0xcb3 + None, // 0xcc0 + None, // 0xcc1 + None, // 0xcc2 + None, // 0xcc3 + None, // 0xcd0 + None, // 0xcd1 + None, // 0xcd2 + None, // 0xcd3 + None, // 0xce0 + None, // 0xce1 + None, // 0xce2 + None, // 0xce3 + None, // 0xcf0 + None, // 0xcf1 + None, // 0xcf2 + None, // 0xcf3 + M1st_M1B, // 0xd00 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd01 + None, // 0xd02 + None, // 0xd03 + M1st_W_M8B_or_M4B, // 0xd10 rcl,rcr,rol,ror,sar,shl,shr + M1st_W_M8B_or_M2B, // 0xd11 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd12 + None, // 0xd13 + M1st_M1B, // 0xd20 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd21 + None, // 0xd22 + None, // 0xd23 + M1st_W_M8B_or_M4B, // 0xd30 rcl,rcr,rol,ror,sar,shl,shr + M1st_W_M8B_or_M2B, // 0xd31 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd32 + None, // 0xd33 + MOp_M16B_I1B, // 0xd40 sha1rnds4 + None, // 0xd41 + None, // 0xd42 + None, // 0xd43 + None, // 0xd50 + None, // 0xd51 + None, // 0xd52 + None, // 0xd53 + None, // 0xd60 + None, // 0xd61 + None, // 0xd62 + None, // 0xd63 + None, // 0xd70 + None, // 0xd71 + None, // 0xd72 + None, // 0xd73 + MOp_M16B, // 0xd80 sha1nexte + None, // 0xd81 + MOnly_MUnknown, // 0xd82 aesdecwide128kl,aesdecwide256kl,aesencwide128kl,aesencwide256kl + None, // 0xd83 + MOp_M16B, // 0xd90 sha1msg1 + None, // 0xd91 + None, // 0xd92 + None, // 0xd93 + MOp_M16B, // 0xda0 sha1msg2 + None, // 0xda1 + None, // 0xda2 + None, // 0xda3 + MOp_M16B, // 0xdb0 sha256rnds2 + None, // 0xdb1 + None, // 0xdb2 + None, // 0xdb3 + MOp_M16B, // 0xdc0 sha256msg1 + None, // 0xdc1 + MOp_MUnknown, // 0xdc2 aesenc128kl + None, // 0xdc3 + MOp_M16B, // 0xdd0 sha256msg2 + None, // 0xdd1 + MOp_MUnknown, // 0xdd2 aesdec128kl + None, // 0xdd3 + None, // 0xde0 + None, // 0xde1 + MOp_MUnknown, // 0xde2 aesenc256kl + None, // 0xde3 + None, // 0xdf0 + None, // 0xdf1 + MOp_MUnknown, // 0xdf2 aesdec256kl + None, // 0xdf3 + None, // 0xe00 + None, // 0xe01 + None, // 0xe02 + None, // 0xe03 + None, // 0xe10 + None, // 0xe11 + None, // 0xe12 + None, // 0xe13 + None, // 0xe20 + None, // 0xe21 + None, // 0xe22 + None, // 0xe23 + None, // 0xe30 + None, // 0xe31 + None, // 0xe32 + None, // 0xe33 + None, // 0xe40 + None, // 0xe41 + None, // 0xe42 + None, // 0xe43 + None, // 0xe50 + None, // 0xe51 + None, // 0xe52 + None, // 0xe53 + None, // 0xe60 + None, // 0xe61 + None, // 0xe62 + None, // 0xe63 + None, // 0xe70 + None, // 0xe71 + None, // 0xe72 + None, // 0xe73 + None, // 0xe80 + None, // 0xe81 + None, // 0xe82 + None, // 0xe83 + None, // 0xe90 + None, // 0xe91 + None, // 0xe92 + None, // 0xe93 + None, // 0xea0 + None, // 0xea1 + None, // 0xea2 + None, // 0xea3 + None, // 0xeb0 + None, // 0xeb1 + None, // 0xeb2 + None, // 0xeb3 + None, // 0xec0 + None, // 0xec1 + None, // 0xec2 + None, // 0xec3 + None, // 0xed0 + None, // 0xed1 + None, // 0xed2 + None, // 0xed3 + None, // 0xee0 + None, // 0xee1 + None, // 0xee2 + None, // 0xee3 + None, // 0xef0 + None, // 0xef1 + None, // 0xef2 + None, // 0xef3 + MOp_M1B, // 0xf00 crc32 + None, // 0xf01 + MOp_M16B, // 0xf02 invept + None, // 0xf03 + MOp_W_M8B_or_M4B, // 0xf10 crc32 + MOp_W_M8B_or_M2B, // 0xf11 crc32 + MOp_M16B, // 0xf12 invvpid + None, // 0xf13 + None, // 0xf20 + None, // 0xf21 + MOp_MUnknown, // 0xf22 invpcid + None, // 0xf23 + None, // 0xf30 + None, // 0xf31 + None, // 0xf32 + None, // 0xf33 + None, // 0xf40 + None, // 0xf41 + None, // 0xf42 + None, // 0xf43 + None, // 0xf50 + None, // 0xf51 + None, // 0xf52 + None, // 0xf53 + MOnly_M1B, // 0xf60 neg,not + None, // 0xf61 + None, // 0xf62 + None, // 0xf63 + MOnly_W_M8B_or_M4B, // 0xf70 neg,not + MOnly_W_M8B_or_M2B, // 0xf71 neg,not + None, // 0xf72 + None, // 0xf73 + None, // 0xf80 + MOp_MUnknown, // 0xf81 movdir64b + MOp_MUnknown, // 0xf82 enqcmds + MOp_MUnknown, // 0xf83 enqcmd + M1st_W_M8B_or_M4B, // 0xf90 movdiri + None, // 0xf91 + None, // 0xf92 + None, // 0xf93 + None, // 0xfa0 + None, // 0xfa1 + None, // 0xfa2 + None, // 0xfa3 + None, // 0xfb0 + None, // 0xfb1 + None, // 0xfb2 + None, // 0xfb3 + M1st_W_M8B_or_M4B, // 0xfc0 aadd + M1st_W_M8B_or_M4B, // 0xfc1 aand + M1st_W_M8B_or_M4B, // 0xfc2 axor + M1st_W_M8B_or_M4B, // 0xfc3 aor + None, // 0xfd0 + None, // 0xfd1 + None, // 0xfd2 + None, // 0xfd3 + MOnly_M1B, // 0xfe0 dec,inc + None, // 0xfe1 + None, // 0xfe2 + None, // 0xfe3 + MOnly_W_M8B_or_M4B, // 0xff0 dec,inc + None, // 0xff1 + None, // 0xff2 + None, // 0xff3 + }; } diff --git a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs index dbcec26dd675..0e55c4c2d4fa 100644 --- a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs +++ b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs @@ -23,9 +23,8 @@ public enum EncodingFlags : int P = 0x1, // OpSize (P)refix F2 = 0x2, F3 = 0x4, - Rex = 0x8, - W = 0x10, // VEX.W / EVEX.W + W = 0x10, // REX.W / REX2.W / VEX.W / EVEX.W L = 0x20, // VEX.L (for EVEX, see LL bits below) b = 0x40, // EVEX.b (broadcast/RC/SAE Context) @@ -102,21 +101,29 @@ internal enum Map { // Map None, - Primary, - Secondary, - F38, - F3A, + Primary, // legacy map 0 + Secondary, // 0F - legacy map 1 + F38, // 0F 38 - legacy map 2 + F3A, // 0F 3A - legacy map 3 Vex1, // mmmmm = 00001 (0F) Vex2, // mmmmm = 00010 (0F 38) Vex3, // mmmmm = 00011 (0F 3A) Evex_0F, // mmm = 001 Evex_0F38, // mmm = 010 Evex_0F3A, // mmm = 011 + Evex_4, // mmm = 100 // Extended EVEX legacy promoted map 0/1 } internal sealed partial class Amd64InstructionSample { - [GeneratedRegex(@"^\s*(?
0x[a-f0-9]+)\s[^:]*:\s*(?[0-9a-f ]*)\t(?(((rex[.WRXB]*)|(rep[nez]*)|(data16)|(addr32)|(lock)|(bnd)|(\{vex\})|([cdefgs]s)) +)*)(?\S+) *(?(\S[^#]*?)?)\s*(?#.*)?$", + [GeneratedRegex( + @"^\s*" + + @"(?
0x[0-9a-fA-F]+)\s[^:]*:\s*" + + @"(?[0-9a-fA-F][0-9a-fA-F]( [0-9a-fA-F][0-9a-fA-F])*)\s*" + + @"(?(((rex[.WRXB]*)|(\{rex2 0x[0-9a-fA-F][0-9a-fA-F]?\})|(rep[nez]*)|(data16)|(addr32)|(lock)|(bnd)|(\{vex\})|(\{evex\})|([cdefgs]s)) +)*)" + + @"(?\S+) *" + + @"(?(\S[^#]*?)?)\s*" + + @"(?#.*)?$", RegexOptions.ExplicitCapture)] private static partial Regex EncDisassemblySplit(); @@ -139,15 +146,28 @@ internal sealed partial class Amd64InstructionSample ["WORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M2B, ["WORD PTR [rip+0x53525150]{1to16}"] = SuffixFlags.M2B, ["WORD PTR [rip+0x53525150]{1to32}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to16}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to32}"] = SuffixFlags.M2B, ["DWORD PTR [rip+0x53525150]"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to2}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to4}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to16}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to2}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to4}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to16}"] = SuffixFlags.M4B, ["QWORD PTR [rip+0x53525150]"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to2}"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to4}"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to2}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to4}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M8B, ["OWORD PTR [rip+0x53525150]"] = SuffixFlags.M16B, ["XMMWORD PTR [rip+0x53525150]"] = SuffixFlags.M16B, ["YMMWORD PTR [rip+0x53525150]"] = SuffixFlags.M32B, @@ -181,7 +201,8 @@ public int opCodeExt { get { - const byte BytePP = 0x3; + const byte VEX_pp_mask = 0x3; + const byte EVEX_pp_mask = 0x3; byte opcode = encoding[opIndex]; byte pp = 0; @@ -203,14 +224,15 @@ public int opCodeExt case Map.Vex2: case Map.Vex3: // `pp` is the low 2 bits of the last byte of the VEX prefix (either 3-byte or 2-byte form). - pp = (byte)(encoding[opIndex - 1] & BytePP); + pp = (byte)(encoding[opIndex - 1] & VEX_pp_mask); break; case Map.Evex_0F: case Map.Evex_0F38: case Map.Evex_0F3A: + case Map.Evex_4: { var evex_p1 = encoding[opIndex - 2]; - pp = (byte)(evex_p1 & BytePP); + pp = (byte)(evex_p1 & EVEX_pp_mask); break; } default: @@ -307,6 +329,7 @@ internal enum Prefixes : byte AddSize = 0x67, Vex = 0xc4, VexShort = 0xc5, + Rex2 = 0xD5, Lock = 0xf0, Rep = 0xf2, Repne = 0xf3 @@ -321,6 +344,8 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) const byte RexMask = 0xf0; const byte RexW = 0x8; + const byte Rex2W = 0x8; + const byte Rex2_M0 = 0x80; const byte Vex_ByteW = 0x80; const byte Vex_ByteL = 0x04; @@ -365,7 +390,6 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) { byte rex = encoding[operandIndex++]; - flags |= EncodingFlags.Rex; if (Debug.debug) Console.WriteLine($" P:REX"); if ((rex & RexW) != 0) @@ -477,6 +501,11 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) map = Map.Evex_0F3A; if (Debug.debug) Console.WriteLine($" map: Evex_0F3A"); break; + case 0x4: + map = Map.Evex_4; + // Extended EVEX legacy promoted map 0/1. + if (Debug.debug) Console.WriteLine($" map: Evex_4"); + break; default: throw new Exception($"Unexpected EVEX map {encoding}"); } @@ -487,23 +516,55 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) if (Debug.debug) Console.WriteLine($" EVEX.W"); } - byte evex_LprimeL = (byte)((evex_p2 & Evex_ByteLprimeLmask) >> Evex_ByteLprimeLshift); - flags |= Util.ConvertEvexLLToEncodingFlags(evex_LprimeL); - if (Debug.debug) + if (evex_mmm != 4) // EVEX.L'L is not used in map 4 { - Console.WriteLine($" EVEX.L'L={evex_LprimeL:x1}"); + byte evex_LprimeL = (byte)((evex_p2 & Evex_ByteLprimeLmask) >> Evex_ByteLprimeLshift); + flags |= Util.ConvertEvexLLToEncodingFlags(evex_LprimeL); + if (Debug.debug) + { + Console.WriteLine($" EVEX.L'L={evex_LprimeL:x1}"); + } } - var evex_b = evex_p2 & 0x10; - if (evex_b != 0) + if (evex_mmm != 4) // EVEX.b is not used in map 4 { - flags |= EncodingFlags.b; - if (Debug.debug) Console.WriteLine($" EVEX.b"); + var evex_b = evex_p2 & 0x10; + if (evex_b != 0) + { + flags |= EncodingFlags.b; + if (Debug.debug) Console.WriteLine($" EVEX.b"); + } } operandIndex += 4; break; } + case Prefixes.Rex2: + { + if (Debug.debug) Console.WriteLine($" P:REX2"); + var byte1 = encoding[operandIndex + 1]; + + var rex2_m0 = byte1 & Rex2_M0; + if (rex2_m0 == 0) + { + map = Map.Primary; + if (Debug.debug) Console.WriteLine($" map: Primary"); + } + else + { + map = Map.Secondary; + if (Debug.debug) Console.WriteLine($" map: Secondary"); + } + + if ((byte1 & Rex2W) != 0) + { + flags |= EncodingFlags.W; + if (Debug.debug) Console.WriteLine($" P:REX2.W"); + } + + operandIndex += 2; + break; + } default: map = Map.Primary; if (Debug.debug) Console.WriteLine($" map: primary"); @@ -599,11 +660,19 @@ internal sealed partial class Amd64InstructionTableGenerator { private List samples = new List(); - private const string assemblyPrefix = " 0x000000000"; - private const string preTerminator = "58\t"; - private const string groupTerminator = "59\tpop"; + [GeneratedRegex(@"^\s+0x00000000")] + private static partial Regex AssemblyPrefix(); + + // The '0x' prefix is not included in the regex match. + [GeneratedRegex(@"^\s*0x(?
[0-9a-fA-F]+)", RegexOptions.ExplicitCapture)] + private static partial Regex AssemblyAddress(); - [GeneratedRegex(@"((\{vex\})|(\{bad\})|(\(bad\))|(\srex(\.[WRXB]*)?\s*(#.*)?$))")] + // NOTE: APX instructions push2/push2p/pop2/pop2p are not causing gdb to report an illegal instruction, + // which is causing problems. So manually disallow them. + // NOTE: we don't disqualify disassembly with `{evex}` in the text: there are some cases where an instruction + // can be encoded with either an EVEX or VEX encoding, and the disassembler will annotate the instruction with + // `{evex}` to indicate it is not the canonical encoding. + [GeneratedRegex(@"((push2)|(pop2)|(\{vex\})|(\{bad\})|(\(bad\))|(\srex(\.[WRXB]*)?\s*(#.*)?$))")] private static partial Regex BadDisassembly(); private List<(Map, int)> regExpandOpcodes; @@ -648,6 +717,7 @@ private Amd64InstructionTableGenerator() { Map.Evex_0F, new Dictionary() }, { Map.Evex_0F38, new Dictionary() }, { Map.Evex_0F3A, new Dictionary() }, + { Map.Evex_4, new Dictionary() }, }; ParseSamples(); @@ -658,32 +728,34 @@ private void ParseSamples() { string line; string sample = null; - bool saw58 = false; + int sampleAddress = 0; + + // Each sample is written out as 16 bytes of disassembly. If we hit bad disassembly, we need to skip to the next sample + // based on the disassembly address. + while ((line = Console.In.ReadLine()) != null) { //if (Debug.debug) Console.WriteLine($"line: {line}"); - if (sample == null) + var match = AssemblyAddress().Match(line); + if (!match.Success) { - // Ignore non-assembly lines - if (line.StartsWith(assemblyPrefix)) - sample = line.Trim(); continue; } + int lineAddress = int.Parse(match.Groups["address"].Value, NumberStyles.AllowHexSpecifier); - //if (Debug.debug) Console.WriteLine($"sample: {sample}"); - - // Each sample may contain multiple instructions - // We are only interested in the first of each group - // Each group is terminated by 0x58 then 0x59 which is a pop instruction - if (!saw58) + if (sample == null) { - saw58 = line.Contains(preTerminator); + sample = line.Trim(); + sampleAddress = lineAddress; + //if (Debug.debug) Console.WriteLine($"sample: ({sampleAddress:x}) {sample}"); continue; } - else if (!line.Contains(groupTerminator)) + + // Keep skipping instructions until we get to the next sample address. + if (lineAddress < sampleAddress + 15) { - saw58 = false; + //if (Debug.debug) Console.WriteLine($"Skipping {lineAddress:x}"); continue; } @@ -712,8 +784,8 @@ private void ParseSamples() } } - saw58 = false; sample = null; + sampleAddress = 0; } } @@ -947,13 +1019,22 @@ private void SummarizeSamples(bool reg) else goto default; break; + case SuffixFlags.M8B | SuffixFlags.M2B | SuffixFlags.I4B | SuffixFlags.I2B: + if (TestHypothesis((e) => Amd64W(SuffixFlags.M8B | SuffixFlags.I4B, SuffixFlags.M2B | SuffixFlags.I2B, e), sometimesSuffix, map)) + rules += "_W_M8B_I4B_or_M2B_I2B"; + else + goto default; + break; default: - if (Debug.debug) { - Console.WriteLine($"Unhandled rule...{sometimesSuffix}"); + string mnemonics_string = string.Join(",", mnemonics.OrderBy(s => s)); + if (Debug.debug) + { + Console.WriteLine($"Unhandled rule...{sometimesSuffix} : {mnemonics_string}"); + } + Console.Error.WriteLine($"Unhandled rule...{sometimesSuffix} : {mnemonics_string}"); + return; } - Console.Error.WriteLine($"Unhandled rule...{sometimesSuffix}"); - return; } rules = rules.Replace("^_", "").Replace("^", "None"); @@ -1003,12 +1084,12 @@ public static SuffixFlags TestLL(SuffixFlags LL00, SuffixFlags LL01, SuffixFlags public static SuffixFlags Amd64L(SuffixFlags t, SuffixFlags f, EncodingFlags g) => Test(EncodingFlags.L, t, f, g); public static SuffixFlags Amd64W(SuffixFlags W1, SuffixFlags W0, EncodingFlags g) => Test(EncodingFlags.W, W1, W0, g); - public static SuffixFlags Amd64P(SuffixFlags t, SuffixFlags f, EncodingFlags g) => Test(EncodingFlags.P, f, t, g); + public static SuffixFlags Amd64P(SuffixFlags P0, SuffixFlags P1, EncodingFlags g) => Test(EncodingFlags.P, P1, P0, g); // Note: P0/P1 reversed. This puts smaller (OSIZE override) second. public static SuffixFlags Amd64b(SuffixFlags b1, SuffixFlags b0, EncodingFlags g) => Test(EncodingFlags.b, b1, b0, g); // Tests for multiple flags - public static SuffixFlags Amd64WP(SuffixFlags tx, SuffixFlags ft, SuffixFlags ff, EncodingFlags g) => Amd64W(tx, Amd64P(ft, ff, g), g); + public static SuffixFlags Amd64WP(SuffixFlags W1, SuffixFlags W0P0, SuffixFlags W0P1, EncodingFlags g) => Amd64W(W1, Amd64P(W0P0, W0P1, g), g); public static SuffixFlags Amd64WLL(SuffixFlags W1LL00, SuffixFlags W1LL01, SuffixFlags W1LL10, SuffixFlags W0LL00, SuffixFlags W0LL01, SuffixFlags W0LL10, EncodingFlags g) => Amd64W(TestLL(W1LL00, W1LL01, W1LL10, g), TestLL(W0LL00, W0LL01, W0LL10, g), g); public static SuffixFlags Amd64bLL(SuffixFlags b1, SuffixFlags b0LL00, SuffixFlags b0LL01, SuffixFlags b0LL10, EncodingFlags g) => @@ -1035,7 +1116,7 @@ private void AddOpCode(Map map, int opCodeExt, bool reg, int modrmReg, string ru else { string oldstring = null; - if (Debug.debug) + if (true) // Debug.debug { if (opcodes[map].TryGetValue(opCodeExt, out oldstring)) { @@ -1046,6 +1127,9 @@ private void AddOpCode(Map map, int opCodeExt, bool reg, int modrmReg, string ru if (Debug.debug) { Console.WriteLine($"add opcodes[{map}][{opCodeExt:x3}] = {opcodes[map][opCodeExt]}"); + } + if (true) // Debug.debug + { if ((oldstring != null) && (oldstring != opcodes[map][opCodeExt])) { Console.WriteLine($"WARNING: REPLACEMENT WAS DIFFERENT"); @@ -1090,12 +1174,21 @@ private void WriteCode() Console.WriteLine(" // I4B // Instruction includes 4 bytes of immediates"); Console.WriteLine(" // I8B // Instruction includes 8 bytes of immediates"); Console.WriteLine(" // Unknown // Instruction samples did not include a modrm configured to produce RIP addressing"); - Console.WriteLine(" // L // Flags depend on L bit in encoding. L__or_"); - Console.WriteLine(" // LL // Flags depend on L'L bits in EVEX encoding. LL___"); - Console.WriteLine(" // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector"); - Console.WriteLine(" // W // Flags depend on W bit in encoding. W__or_"); - Console.WriteLine(" // P // Flags depend on OpSize prefix for encoding. P__or_"); - Console.WriteLine(" // WP // Flags depend on W bit in encoding and OpSize prefix. WP__or__or_"); + Console.WriteLine(" // L // Flags depend on L bit in encoding."); + Console.WriteLine(" // // L__or_"); + Console.WriteLine(" // // L__or_"); + Console.WriteLine(" // LL // Flags depend on L'L bits in EVEX encoding."); + Console.WriteLine(" // // LL___"); + Console.WriteLine(" // // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector"); + Console.WriteLine(" // W // Flags depend on W bit in encoding."); + Console.WriteLine(" // // W__or_"); + Console.WriteLine(" // // W__or_"); + Console.WriteLine(" // P // Flags depend on OpSize prefix for encoding."); + Console.WriteLine(" // // P__or_"); + Console.WriteLine(" // // P__or_"); + Console.WriteLine(" // WP // Flags depend on W bit in encoding and OpSize prefix."); + Console.WriteLine(" // // WP__or__or_"); + Console.WriteLine(" // // WP__or__or_"); Console.WriteLine(" // WLL // Flags depend on W and L'L bits."); Console.WriteLine(" // // WLL____or___"); Console.WriteLine(" // bLL // Flags depend on EVEX.b and L'L bits."); @@ -1114,15 +1207,15 @@ private void WriteCode() continue; Console.WriteLine($" {rule},"); } - Console.WriteLine($" Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location in encoded in lower bits"); + Console.WriteLine($" Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location is encoded in lower bits."); Console.WriteLine(" };"); Console.WriteLine(); - Console.WriteLine(" // The following instrForm maps correspond to the amd64 instr maps"); - Console.WriteLine(" // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics"); - Console.WriteLine(" // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp"); - Console.WriteLine(" // - For Vex* the pp is directly included in the encoding"); - Console.WriteLine(" // - For the Secondary, F38, and F3A pages the pp is not defined in the encoding, but affects instr form."); + Console.WriteLine(" // The following instrForm maps correspond to the amd64 instruction maps."); + Console.WriteLine(" // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics."); + Console.WriteLine(" // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp. For example, 0x123 is opcode 0x12, pp=0x3."); + Console.WriteLine(" // - For Vex* and EVEX the pp is directly included in the encoding"); + Console.WriteLine(" // - For the Secondary (0F), 0F 38, and 0F 3A pages the pp is not defined in the encoding, but affects instruction form."); Console.WriteLine(" // - pp = 0 implies no prefix."); Console.WriteLine(" // - pp = 1 implies 0x66 OpSize prefix only."); Console.WriteLine(" // - pp = 2 implies 0xF3 prefix."); @@ -1130,9 +1223,9 @@ private void WriteCode() Console.WriteLine(" // - For the primary map, pp is not used and is always 0 in the comments."); Console.WriteLine(); Console.WriteLine(); - Console.WriteLine(" // Instruction which change forms based on modrm.reg are encoded in this extension table."); - Console.WriteLine(" // Since there are 8 modrm.reg values, they occur is groups of 8."); - Console.WriteLine(" // Each group is referenced from the other tables below using Extension|(index >> 3)."); + Console.WriteLine(" // Instructions which change forms based on modrm.reg are encoded in this extension table."); + Console.WriteLine(" // Since there are 8 modrm.reg values, they occur in groups of 8."); + Console.WriteLine(" // Each group is referenced from the other tables below using (Extension|(index >> 3))."); currentExtension += 8; Console.WriteLine($" static const InstrForm instrFormExtension[{currentExtension + 1}]"); Console.WriteLine(" {"); @@ -1167,7 +1260,8 @@ private void WriteCode() ("Vex3", Map.Vex3), ("Evex_0F", Map.Evex_0F), ("Evex_0F38", Map.Evex_0F38), - ("Evex_0F3A", Map.Evex_0F3A) + ("Evex_0F3A", Map.Evex_0F3A), + ("Evex_4", Map.Evex_4) }; foreach ((string name, Map map) in mapTuples) diff --git a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp index 963401812ca8..abaaa165af25 100644 --- a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp +++ b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp @@ -2,19 +2,89 @@ // The .NET Foundation licenses this file to you under the MIT license. #include - + #define ARRAYSIZE(a) (sizeof(a)/sizeof((a)[0])) +void generatePostamble(int bytesEmitted) +{ + // We need a postamble of single-byte instructions so the disassembler can get back on track + // after a bad instruction. We always pad up to 16 bytes total codes: the maximum x86 instruction + // size is 15, so the disassembler will find at worst a 15 byte instruction followed by a single byte + // padding instruction. The minimum byte sequence we generate below is a single opcode plus a modrm, + // so we need 14 possible postamble/padding bytes. + const char* postamble[] = { + "0x50, ", + "0x51, ", + "0x52, ", + "0x53, ", + "0x54, ", + "0x55, ", + "0x56, ", + "0x57, ", + "0x58, ", + "0x59, ", + "0x59, ", + "0x59, ", + "0x59, ", + "0x59, " + }; + + int bytesToEmit = 16 - bytesEmitted; + for (int i = 0; i < bytesToEmit; i++) + { + printf("%s", postamble[i]); + } + printf("\n"); +} + int main(int argc, char* argv[]) { printf("#include \n"); printf("#include \n"); - const char* postamble = "0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,\n"; + // The sequence of generated codes is important: the tool which reads the disassembled instructions + // processes one "opcode" at a time. When the opcode changes, it summarizes the opcode and moves on + // to process the next one. Here, "opcode" means a single instruction. In the x64 encoding, this + // can be determined by the primary opcode byte, the prefix (0x66, 0xF2, 0xF3) or equivalent "pp" field + // in the VEX/EVEX prefix, and the "reg/opcode" field of the ModRM byte, which sometimes provides additional + // "opcode" bits. + // + // When generating codes, for every primary opcode, we output the following ModRM bytes: + // 0x05, 0x0d, 0x15, 0x1d, 0x25, 0x2d, 0x35, 0x3d + // this corresponds to modrm.rm=0x5 and modrm.reg=0,1,2,3,4,5,6,7. That is, all possible modrm.reg values. + // modrm.mod=0/modrm.rm=0x5 corresponds to RIP-relative addressing. The purpose of varying modrm.reg + // is to find all cases where an instruction encoding depends on modrm.reg. + // + // Thus, the 'modrm' loop needs to be less nested than the opcode/prefix loop, since varying modrm + // can change the "instruction". + // + // Note: it might be more robust to not have this ordering restriction but that would require the + // processing tool to save all in-progress calculations, for all instructions -- perhaps using a + // lot of memory? printf("uint8_t opcodes[] = {\n"); - printf("// Primary Opcode\n"); + struct byteSequence { + const char* string; + int numBytes; + }; + + // Opcodes in legacy map 0 don't change the instruction based on the 0x66 prefix (unlike in + // other maps), so the 0x66 prefix can vary inside (in a more nested loop) the modrm loop. + const byteSequence legacyMap0PrefixStrings[] = { + { "", 0 }, + { "0x66, ", 1 }, // Operand size prefix 0x66 + { "0x40, ", 1 }, // REX + { "0x66, 0x40, ", 2 }, // Operand size prefix 0x66 + REX + { "0x4F, ", 1 }, // REX.WRXB + { "0x66, 0x4F, ", 2 }, // Operand size prefix 0x66 + REX.WRXB + { "0xD5, 0x00, ", 2 }, // REX2.M0=0.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0x66, 0xD5, 0x00, ", 3 }, // Operand size prefix 0x66 + REX2.M0=0.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0xD5, 0x7F, ", 2 }, // REX2.M0=0.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + { "0x66, 0xD5, 0x7F, ", 3 } // Operand size prefix 0x66 + REX2.M0=0.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + }; + + printf("// Primary Opcode (legacy map 0)\n"); for (int i = 0; i < 256; ++i) { switch(i) @@ -47,6 +117,7 @@ int main(int argc, char* argv[]) case 0x67: // AddrSize case 0xc4: // Vex 3 Byte case 0xc5: // Vex 2 Byte + case 0xd5: // REX2 case 0xf0: // Lock case 0xf2: // Repne case 0xf3: // Rep @@ -54,24 +125,36 @@ int main(int argc, char* argv[]) default: break; } + for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x%02x, 0x%02x, %s", i, modrm, postamble); - // REX - printf( "0x40, 0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x40, 0x%02x, 0x%02x, %s", i, modrm, postamble); - // REX.WRXB - printf( "0x4f, 0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x4f, 0x%02x, 0x%02x, %s", i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap0PrefixStrings); ++prefixNum) + { + printf("%s0x%02x, 0x%02x, ", legacyMap0PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(legacyMap0PrefixStrings[prefixNum].numBytes + 2); + } } printf("\n"); } // `66 F2` is only used for `0F 38 F*` ("row F") - const char* const ppString[] = {"", "0x66, ", "0xf3, ", "0xf2, ", "0x66, 0xf2, "}; + const byteSequence ppString[] = { + { "", 0 }, + { "0x66, ", 1 }, + { "0xf3, ", 1 }, + { "0xf2, ", 1 }, + { "0x66, 0xf2, ", 2 } + }; - printf("// Secondary Opcode\n"); + const byteSequence legacyMap1PrefixStrings[] = { + { "0x0F, ", 1 }, // Escape prefix + { "0x40, 0x0F, ", 2 }, // REX + { "0x4F, 0x0F, ", 2 }, // REX.WRXB + { "0xD5, 0x80, ", 2 }, // REX2.M0=1.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0xD5, 0xFF, ", 2 } // REX2.M0=1.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + }; + + printf("// Secondary Opcode: 0F (legacy map 1)\n"); for (int i = 0; i < 256; ++i) { if (i == 0x38) // extension: 0F 38 @@ -83,17 +166,23 @@ int main(int argc, char* argv[]) { for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "0x40, %s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "0x4f, %s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap1PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap1PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap1PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); } - printf("// 0F 38\n"); + const byteSequence legacyMap2PrefixStrings[] = { + { "0x0F, 0x38, ", 2 }, + { "0x40, 0x0F, 0x38, ", 3 }, // REX + { "0x4F, 0x0F, 0x38, ", 3 } // REX.WRXB + }; + + printf("// 0F 38 (legacy map 2)\n"); for (int i = 0; i < 256; ++i) { for (int pp = 0; pp < 5; ++pp) @@ -104,28 +193,34 @@ int main(int argc, char* argv[]) for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "%s0x40, 0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "%s0x4f, 0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap2PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap2PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap2PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); } - printf("// 0F 3A\n"); + const byteSequence legacyMap3PrefixStrings[] = { + { "0x0F, 0x3A, ", 2 }, + { "0x40, 0x0F, 0x3A, ", 3 }, // REX + { "0x4F, 0x0F, 0x3A, ", 3 } // REX.WRXB + }; + + printf("// 0F 3A (legacy map 3)\n"); for (int i = 0; i < 256; ++i) { for (int pp = 0; pp < 2; ++pp) // only 66 prefix is used (no F3, F2) (F2 is used in VEX 0F 3A) { for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "%s0x40, 0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "%s0x4f, 0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap3PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap3PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap3PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); @@ -157,7 +252,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe1, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe1, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -173,7 +269,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe2, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe2, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -189,7 +286,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe3, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe3, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -199,21 +297,30 @@ int main(int argc, char* argv[]) // Interesting cases for the EVEX prefix. Several cases are added below, in the loops, to ensure desired // ordering: // 1. cases of `mmm` (which defines the opcode decoding map) are the outer loops. - // 2. cases of `pp`, next inner loops. - // 3. cases of ModR/M byte, innermost loops. + // 2. one-byte instruction opcode, next inner loops. + // 3. cases of `pp`, next inner loops. + // 4. cases of ModR/M byte, next inner loops. + // 5. various EVEX cases, innermost loops. + // NOTE: 4 & 5 can probably (and possibly should, for consistency with above loops) be swapped. // // In all cases, we have: // P0: // P[3] = P0[3] = 0 // required by specification + // -- For APX, mmm=0b100, P[3] = B4, 0 is ok // EVEX.R'=1 (inverted) + // -- For APX, mmm=0b100, EVEX.R' = EVEX.R4 (inverted) = P[4]. 1 (inverted value) is ok // EVEX.RXB=111 (inverted) + // -- For APX, mmm=0b100, EVEX.RXB (inverted) = EVEX.R3.X3.B3 (inverted), so 111 is ok. // P1: // P[10] = P1[2] = 1 // required by specification + // -- For APX, mmm=0b100, EVEX.X4/1 (inverted) so 1 is ok // P2: - // EVEX.aaa = 0 // opmask register k0 (no masking) - // EVEX.V'=1 (inverted) - // EVEX.b=0 // no broadcast (REVIEW: need to handle broadcast as it changes the size of the memory operand) - // EVEX.z=0 // always merge + // P[18:16] = P2[2:0] = EVEX.aaa = 0 // opmask register k0 (no masking) + // -- For APX, mmm=0b100, P2[0] = P2[1] = 0, P2[2] = NF = 0 (same as non-APX) + // P[19] = P2[3] = EVEX.V'=1 (inverted) + // -- For APX, mmm=0b100, EVEX.V' = EVEX.V4 (inverted), so 1 is ok. + // P[23] = P2[7] = EVEX.z=0 // always merge + // -- For APX, mmm=0b100, P[23] = 0. // // Note that we don't need to consider disp8*N compressed displacement support since that is not used for // RIP-relative addressing, which is all we care about. @@ -222,6 +329,10 @@ int main(int argc, char* argv[]) const int evex_p1_base = 0x04; const int evex_p2_base = 0x08; + const int evex_4_p0_base = 0xf0; + const int evex_4_p1_base = 0x7c; + const int evex_4_p2_base = 0x08; + const int evex_w_cases[] = // EVEX.W in P1 { 0, @@ -229,6 +340,8 @@ int main(int argc, char* argv[]) }; const size_t evex_w_cases_size = ARRAYSIZE(evex_w_cases); + // For APX, mmm=0b100, EVEX.vvvv is used to store NDD register if EVEX.ND=1. We never set EVEX.ND=1 + // since it doesn't affect instruction size or RIP-relative memory information. const int evex_vvvv_cases[] = // EVEX.vvvv in P1 { 0, // 0000b (xmm15) @@ -236,6 +349,7 @@ int main(int argc, char* argv[]) }; const size_t evex_vvvv_cases_size = ARRAYSIZE(evex_vvvv_cases); + // For APX, mmm=0b100, P[22:21] = P2[6:5] = EVEX.L'L and must be zero. const int evex_LprimeL_cases[] = // EVEX.L'L in P2 { 0, // 00b = 128-bit vectors @@ -244,6 +358,7 @@ int main(int argc, char* argv[]) }; const size_t evex_LprimeL_cases_size = ARRAYSIZE(evex_LprimeL_cases); + // -- For APX, mmm=0b100, P[20] = P2[4] = EVEX.b = EVEX.ND, so we keep it zero const int evex_b_cases[] = // EVEX.b in P2 { 0, // 0b = no broadcast @@ -251,14 +366,17 @@ int main(int argc, char* argv[]) }; const size_t evex_b_cases_size = ARRAYSIZE(evex_b_cases); - const size_t total_evex_cases = evex_w_cases_size * evex_vvvv_cases_size * evex_LprimeL_cases_size * evex_b_cases_size; + const size_t total_evex_cases = evex_w_cases_size * evex_vvvv_cases_size * evex_LprimeL_cases_size * evex_b_cases_size; + const size_t total_evex_4_cases = evex_w_cases_size; struct EvexBytes { int p0, p1, p2; - } - EvexCases[total_evex_cases]; - + }; + + EvexBytes EvexCases[total_evex_cases]; // cases for mmm=0b001, 0b010, 0b011 + EvexBytes Evex4Cases[total_evex_4_cases]; // cases for mmm=0b100 + size_t evex_case = 0; for (size_t i = 0; i < evex_w_cases_size; i++) { @@ -277,6 +395,15 @@ int main(int argc, char* argv[]) } } + evex_case = 0; + for (size_t i = 0; i < evex_w_cases_size; i++) + { + Evex4Cases[evex_case].p0 = evex_4_p0_base; + Evex4Cases[evex_case].p1 = evex_4_p1_base | evex_w_cases[i]; + Evex4Cases[evex_case].p2 = evex_4_p2_base; + ++evex_case; + } + printf("// EVEX: mmm=001 (0F)\n"); for (int i = 0; i < 256; ++i) { @@ -289,7 +416,8 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x1; // mmm=001 (0F) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } @@ -308,7 +436,8 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x2; // mmm=010 (0F 38) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } @@ -327,7 +456,28 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x3; // mmm=011 (0F 3A) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); + } + } + } + printf("\n"); + } + + printf("// EVEX: mmm=100 (extended EVEX; APX promoted legacy map 0 instructions)\n"); + for (int i = 0; i < 256; ++i) + { + for (int pp = 0; pp < 4; ++pp) + { + for (int modrm = 0x5; modrm < 64; modrm += 8) + { + for (int c = 0; c < ARRAYSIZE(Evex4Cases); ++c) + { + int evex_p0 = Evex4Cases[c].p0 | 0x4; // mmm=100 + int evex_p1 = Evex4Cases[c].p1 | pp; + int evex_p2 = Evex4Cases[c].p2; + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } diff --git a/src/coreclr/debug/ee/amd64/walker.cpp b/src/coreclr/debug/ee/amd64/walker.cpp index 4eef90d526a2..5ed3b9c8e319 100644 --- a/src/coreclr/debug/ee/amd64/walker.cpp +++ b/src/coreclr/debug/ee/amd64/walker.cpp @@ -23,6 +23,8 @@ // void NativeWalker::Decode() { + LOG((LF_CORDB, LL_INFO100000, "NW:Decode: m_ip 0x%p\n", m_ip)); + const BYTE *ip = m_ip; m_type = WALK_UNKNOWN; @@ -30,13 +32,13 @@ void NativeWalker::Decode() m_nextIP = NULL; BYTE rex = 0; - - LOG((LF_CORDB, LL_INFO100000, "NW:Decode: m_ip 0x%p\n", m_ip)); + BYTE rex2_payload = 0; + bool has_rex2 = false; BYTE prefix = *ip; if (prefix == 0xcc) { - prefix = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); + prefix = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); // REVIEW: change `m_ip` to `ip`? LOG((LF_CORDB, LL_INFO100000, "NW:Decode 1st byte was patched, might have been prefix\n")); } @@ -65,8 +67,13 @@ void NativeWalker::Decode() // String REP prefixes case 0xf2: // REPNE/REPNZ case 0xf3: - LOG((LF_CORDB, LL_INFO10000, "NW:Decode: prefix:%0.2x ", prefix)); + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: prefix:%02x ", prefix)); ip++; + // REVIEW: it looks like a bug that we don't loop here looking for additional + // prefixes (the 'continue' branches to the 'while (0)' which exits the loop). + // Thus, we will only process a single prefix. For example, we won't process + // "66 40", which is an operand size prefix followed by a REX prefix, and is legal. + // REX and REX2 need to be the final prefixes, but even then, looping would be safe. continue; // REX register extension prefixes @@ -86,13 +93,27 @@ void NativeWalker::Decode() case 0x4d: case 0x4e: case 0x4f: - LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX prefix:%0.2x ", prefix)); + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX prefix:%02x ", prefix)); // make sure to set rex to prefix, not *ip because *ip still represents the // codestream which has a 0xcc in it. rex = prefix; ip++; continue; + // REX2 register extension prefix + case 0xd5: + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX2 prefix:%02x ", prefix)); + has_rex2 = true; + ip++; + rex2_payload = *ip; // Get the REX2 payload byte + if (rex2_payload == 0xcc) + { + rex2_payload = (BYTE)DebuggerController::GetPatchedOpcode(ip); + LOG((LF_CORDB, LL_INFO100000, "NW:Decode 2nd byte was patched, REX2 prefix payload byte\n")); + } + ip++; + continue; + default: break; } @@ -101,18 +122,18 @@ void NativeWalker::Decode() // Read the opcode m_opcode = *ip++; - LOG((LF_CORDB, LL_INFO100000, "NW:Decode: ip 0x%p, m_opcode:%0.2x\n", ip, m_opcode)); + LOG((LF_CORDB, LL_INFO100000, "NW:Decode: ip 0x%p, m_opcode:%02x\n", ip, m_opcode)); // Don't remove this, when we did the check above for the prefix we didn't modify the codestream // and since m_opcode was just taken directly from the code stream it will be patched if we // didn't have a prefix if (m_opcode == 0xcc) { - m_opcode = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); - LOG((LF_CORDB, LL_INFO100000, "NW:Decode after patch look up: m_opcode:%0.2x\n", m_opcode)); + m_opcode = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); // REVIEW: it looks like a bug that we use 'm_ip' instead of 'ip' here. + LOG((LF_CORDB, LL_INFO100000, "NW:Decode after patch look up: m_opcode:%02x\n", m_opcode)); } - // Setup rex bits if needed + // Setup REX bits if needed BYTE rex_b = 0; BYTE rex_x = 0; BYTE rex_r = 0; @@ -124,29 +145,52 @@ void NativeWalker::Decode() rex_r = (rex & 0x4) >> 2; // high bit to modrm reg field } + // Setup REX2 bits if needed + BYTE rex2_b3 = 0; + BYTE rex2_b4 = 0; + BYTE rex2_x3 = 0; + BYTE rex2_x4 = 0; + BYTE rex2_r3 = 0; + BYTE rex2_r4 = 0; + + // We could have a REX2 prefix with a zero payload byte, but that would leave these fields all zero, which is correct. + if (rex2_payload != 0) + { + rex2_b3 = rex2_payload & 0x1; + rex2_x3 = (rex2_payload >> 1) & 0x1; + rex2_r3 = (rex2_payload >> 2) & 0x1; + rex2_b4 = (rex2_payload >> 4) & 0x1; + rex2_x4 = (rex2_payload >> 5) & 0x1; + rex2_r4 = (rex2_payload >> 6) & 0x1; + } + // Analyze what we can of the opcode switch (m_opcode) { + // Look for CALL, JMP with opcode 0xFF, modrm.reg=2,3,4,5 case 0xff: { BYTE modrm = *ip++; - // Ignore "inc dword ptr [reg]" instructions - if (modrm == 0) - break; - BYTE mod = (modrm & 0xC0) >> 6; BYTE reg = (modrm & 0x38) >> 3; BYTE rm = (modrm & 0x07); - reg |= (rex_r << 3); - rm |= (rex_b << 3); - - if ((reg < 2) || (reg > 5 && reg < 8) || (reg > 15)) { - // not a valid register for a CALL or BRANCH + if ((reg < 2) || (reg > 5)) { + // Not a CALL/JMP instruction (modrm.reg field is an opcode extension for opcode FF) return; } + BYTE rm_reg = rm; + if (rex != 0) + { + rm_reg |= (rex_b << 3); + } + else if (rex2_payload != 0) + { + rm_reg |= (rex2_b3 << 3) | (rex2_b4 << 4); + } + BYTE *result; WORD displace; @@ -158,12 +202,12 @@ void NativeWalker::Decode() case 0: case 1: case 2: - if ((rm & 0x07) == 4) // we have an SIB byte following + if (rm == 4) // we have an SIB byte following { // // Get values from the SIB byte // - BYTE sib = *ip; + BYTE sib = *ip; _ASSERT(sib != 0); @@ -171,21 +215,31 @@ void NativeWalker::Decode() BYTE index = (sib & 0x38) >> 3; BYTE base = (sib & 0x07); - index |= (rex_x << 3); - base |= (rex_b << 3); + BYTE index_reg = index; + BYTE base_reg = base; + if (rex != 0) + { + index_reg |= (rex_x << 3); + base_reg |= (rex_b << 3); + } + else if (rex2_payload != 0) + { + index_reg |= (rex2_x3 << 3) | (rex2_x4 << 4); + base_reg |= (rex2_b3 << 3) | (rex2_b4 << 4); + } ip++; // // Get starting value // - if ((mod == 0) && ((base & 0x07) == 5)) + if ((mod == 0) && (base == 5)) { result = 0; } else { - result = (BYTE *)(size_t)GetRegisterValue(base); + result = (BYTE *)(size_t)GetRegisterValue(base_reg); } // @@ -193,7 +247,7 @@ void NativeWalker::Decode() // if (index != 0x4) { - result = result + (GetRegisterValue(index) << ss); + result = result + (GetRegisterValue(index_reg) << ss); } // @@ -201,7 +255,7 @@ void NativeWalker::Decode() // if (mod == 0) { - if ((base & 0x07) == 5) + if (base == 5) { result = result + *((INT32*)ip); displace = 7; @@ -221,7 +275,6 @@ void NativeWalker::Decode() result = result + *((INT32*)ip); displace = 7; } - } else { @@ -230,28 +283,32 @@ void NativeWalker::Decode() // // Check for RIP-relative addressing mode. - if ((mod == 0) && ((rm & 0x07) == 5)) + if ((mod == 0) && (rm == 5)) { + // [RIP + disp32] displace = 6; // 1 byte opcode + 1 byte modrm + 4 byte displacement (signed) result = const_cast(m_ip) + displace + *(reinterpret_cast(ip)); } else { - result = (BYTE *)GetRegisterValue(rm); + result = (BYTE *)GetRegisterValue(rm_reg); if (mod == 0) { - displace = 2; + // [modrm.rm] + displace = 2; // 1 byte opcode + 1 byte modrm } else if (mod == 1) { + // [modrm.rm + disp8] result = result + *((INT8*)ip); - displace = 3; + displace = 3; // 1 byte opcode + 1 byte modrm + 1 byte displacement } else // mod == 2 { + // [modrm.rm + disp32] result = result + *((INT32*)ip); - displace = 6; + displace = 6; // 1 byte opcode + 1 byte modrm + 4 byte displacement (signed) } } } @@ -266,9 +323,9 @@ void NativeWalker::Decode() case 3: default: // The operand is stored in a register. - result = (BYTE *)GetRegisterValue(rm); - displace = 2; - + // [modrm.rm] + result = (BYTE *)GetRegisterValue(rm_reg); + displace = 2; // 1 byte opcode + 1 byte modrm break; } @@ -280,6 +337,11 @@ void NativeWalker::Decode() displace++; } + if (has_rex2) // Can't just check `rex2_payload` since that payload byte might be zero. + { + displace += 2; // adjust for the size of the REX2 prefix + } + // because we already checked register validity for CALL/BRANCH // instructions above we can assume that there is no other option if ((reg == 4) || (reg == 5)) @@ -344,52 +406,71 @@ UINT64 NativeWalker::GetRegisterValue(int registerNumber) { case 0: return m_registers->pCurrentContext->Rax; - break; case 1: return m_registers->pCurrentContext->Rcx; - break; case 2: return m_registers->pCurrentContext->Rdx; - break; case 3: return m_registers->pCurrentContext->Rbx; - break; case 4: return m_registers->pCurrentContext->Rsp; - break; case 5: return m_registers->pCurrentContext->Rbp; - break; case 6: return m_registers->pCurrentContext->Rsi; - break; case 7: return m_registers->pCurrentContext->Rdi; - break; case 8: return m_registers->pCurrentContext->R8; - break; case 9: return m_registers->pCurrentContext->R9; - break; case 10: return m_registers->pCurrentContext->R10; - break; case 11: return m_registers->pCurrentContext->R11; - break; case 12: return m_registers->pCurrentContext->R12; - break; case 13: return m_registers->pCurrentContext->R13; - break; case 14: return m_registers->pCurrentContext->R14; - break; case 15: return m_registers->pCurrentContext->R15; - break; +#if 0 + // TODO-XArch-APX: The Windows SDK doesn't define the APX eGPR registers yet. + case 16: + return m_registers->pCurrentContext->R16; + case 17: + return m_registers->pCurrentContext->R17; + case 18: + return m_registers->pCurrentContext->R18; + case 19: + return m_registers->pCurrentContext->R19; + case 20: + return m_registers->pCurrentContext->R21; + case 21: + return m_registers->pCurrentContext->R21; + case 22: + return m_registers->pCurrentContext->R22; + case 23: + return m_registers->pCurrentContext->R23; + case 24: + return m_registers->pCurrentContext->R24; + case 25: + return m_registers->pCurrentContext->R25; + case 26: + return m_registers->pCurrentContext->R26; + case 27: + return m_registers->pCurrentContext->R27; + case 28: + return m_registers->pCurrentContext->R28; + case 29: + return m_registers->pCurrentContext->R29; + case 30: + return m_registers->pCurrentContext->R30; + case 31: + return m_registers->pCurrentContext->R31; +#endif default: _ASSERTE(!"Invalid register number!"); } @@ -431,8 +512,6 @@ static bool InstructionHasModRMByte(Amd64InstrDecode::InstrForm form, bool W) modrm = false; break; default: - if (form & Amd64InstrDecode::InstrForm::Extension) - modrm = true; break; } return modrm; @@ -446,15 +525,15 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) // M1st cases (memory operand comes first) case Amd64InstrDecode::InstrForm::M1st_I1B_L_M16B_or_M8B: case Amd64InstrDecode::InstrForm::M1st_I1B_LL_M8B_M16B_M32B: + case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::M1st_I1B_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_L_M32B_or_M16B: case Amd64InstrDecode::InstrForm::M1st_LL_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::M1st_LL_M2B_M4B_M8B: case Amd64InstrDecode::InstrForm::M1st_LL_M4B_M8B_M16B: case Amd64InstrDecode::InstrForm::M1st_LL_M8B_M16B_M32B: - case Amd64InstrDecode::InstrForm::M1st_bLL_M4B_M16B_M32B_M64B: - case Amd64InstrDecode::InstrForm::M1st_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::M1st_M16B: case Amd64InstrDecode::InstrForm::M1st_M16B_I1B: case Amd64InstrDecode::InstrForm::M1st_M1B: @@ -469,6 +548,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) case Amd64InstrDecode::InstrForm::M1st_W_M4B_or_M1B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::M1st_WP_M8B_or_M4B_or_M2B: @@ -482,6 +562,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) case Amd64InstrDecode::InstrForm::MOnly_P_M6B_or_M4B: case Amd64InstrDecode::InstrForm::MOnly_W_M16B_or_M8B: case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M8B_or_M2B: isWrite = true; @@ -495,7 +576,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, bool W, bool L, bool evex_b, int LL, bool fPrefix66) { uint8_t opSize = 0; - bool P = !((pp == 1) || fPrefix66); + const bool P = ((pp == 1) || fPrefix66); switch (form) { // M32B @@ -545,6 +626,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // W_M8B_or_M4B case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::MOp_I1B_W_M8B_or_M4B: @@ -553,7 +635,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // WP_M8B_or_M8B_or_M2B case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M8B_or_M2B: - opSize = W ? 8 : P ? 8 : 2; + opSize = W ? 8 : P ? 2 : 8; break; // WP_M8B_or_M4B_or_M2B case Amd64InstrDecode::InstrForm::M1st_I1B_WP_M8B_or_M4B_or_M2B: @@ -563,11 +645,14 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, case Amd64InstrDecode::InstrForm::MOp_I1B_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_or_M4B_or_M2B: - opSize = W ? 8 : P ? 4 : 2; + opSize = W ? 8 : P ? 2 : 4; break; // W_M8B_or_M2B + case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M2B: + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_W_M8B_or_M2B: + case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M2B: opSize = W ? 8 : 2; break; // M8B @@ -581,7 +666,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // P_M6B_or_M4B case Amd64InstrDecode::InstrForm::MOnly_P_M6B_or_M4B: - opSize = P ? 6 : 4; + opSize = P ? 4 : 6; break; // M4B case Amd64InstrDecode::InstrForm::M1st_M4B: @@ -660,7 +745,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // bLL_M4B_M16B_M32B_M64B - case Amd64InstrDecode::InstrForm::M1st_bLL_M4B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_I1B_bLL_M4B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_bLL_M4B_M16B_M32B_M64B: if (evex_b) @@ -674,7 +758,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // bLL_M8B_M16B_M32B_M64B - case Amd64InstrDecode::InstrForm::M1st_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_I1B_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_bLL_M8B_M16B_M32B_M64B: if (evex_b) @@ -779,7 +862,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, } break; - // MUnknown case Amd64InstrDecode::InstrForm::M1st_MUnknown: case Amd64InstrDecode::InstrForm::MOnly_MUnknown: @@ -796,7 +878,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, bool fPrefix66) { int immSize = 0; - bool P = !((pp == 1) || fPrefix66); + const bool P = ((pp == 1) || fPrefix66); switch (form) { case Amd64InstrDecode::InstrForm::I1B: @@ -833,6 +915,7 @@ static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, immSize = 3; break; case Amd64InstrDecode::InstrForm::I4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: immSize = 4; break; case Amd64InstrDecode::InstrForm::I8B: @@ -841,10 +924,13 @@ static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, case Amd64InstrDecode::InstrForm::M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::WP_I4B_or_I4B_or_I2B: - immSize = W ? 4 : P ? 4 : 2; + immSize = W ? 4 : P ? 2 : 4; break; case Amd64InstrDecode::InstrForm::WP_I8B_or_I4B_or_I2B: - immSize = W ? 8 : P ? 4 : 2; + immSize = W ? 8 : P ? 2 : 4; + break; + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: + immSize = W ? 4 : 2; break; default: @@ -966,12 +1052,14 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio VexMapC40F3A = 0xc403, EvexMap0F = 0x6201, EvexMap0F38 = 0x6202, - EvexMap0F3A = 0x6203 + EvexMap0F3A = 0x6203, + EvexMap4 = 0x6204 } opCodeMap; switch (*address) { case 0xf: + { switch (address[1]) { case 0x38: @@ -999,6 +1087,7 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio else if (fPrefixF3) pp = 0x2; break; + } case 0xc4: // Vex 3-byte { @@ -1052,7 +1141,10 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio case 0x62: // Evex { - BYTE evex_mmm = address[1] & 0x7; + BYTE evex_p0 = address[1]; + BYTE evex_p1 = address[2]; + BYTE evex_p2 = address[3]; + BYTE evex_mmm = evex_p0 & 0x7; switch (evex_mmm) { case 0x1: @@ -1067,29 +1159,64 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio LOG((LF_CORDB, LL_INFO10000, "map:Evex0F3A ")); opCodeMap = EvexMap0F3A; break; + case 0x4: + LOG((LF_CORDB, LL_INFO10000, "map:Evex4 ")); + opCodeMap = EvexMap4; + break; default: _ASSERT(!"Unknown Evex 'mmm' bytes"); return; } - BYTE evex_w = address[2] & 0x80; + BYTE evex_w = evex_p1 & 0x80; if (evex_w != 0) { W = true; } - if ((address[2] & 0x10) != 0) + if (evex_mmm != 4) { - evex_b = true; - } + if ((evex_p2 & 0x10) != 0) + { + evex_b = true; + } - evex_LL = (address[2] >> 5) & 0x3; + evex_LL = (evex_p2 >> 5) & 0x3; + } - pp = address[1] & 0x3; + pp = evex_p1 & 0x3; address += 4; break; } + case 0xD5: // REX2 + { + BYTE rex2_byte1 = address[1]; + address += 2; + + BYTE rex2_w = rex2_byte1 & 0x08; + if (rex2_w != 0) + { + W = true; + } + + if (fPrefix66) + { + pp = 0x1; + } + + BYTE rex2_m0 = rex2_byte1 & 0x80; + if (rex2_m0 == 0) + { + opCodeMap = Primary; + } + else + { + opCodeMap = Secondary; + } + break; + } + default: opCodeMap = Primary; break; @@ -1105,7 +1232,7 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio switch (opCodeMap) { case Primary: - form = Amd64InstrDecode::instrFormPrimary[opCode]; + form = Amd64InstrDecode::instrFormPrimary[opCode]; // NOTE: instrFormPrimary is the only map that uses 'opCode', not 'opCodeExt'. break; case Secondary: form = Amd64InstrDecode::instrFormSecondary[opCodeExt]; @@ -1134,6 +1261,9 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio case EvexMap0F3A: form = Amd64InstrDecode::instrFormEvex_0F3A[opCodeExt]; break; + case EvexMap4: + form = Amd64InstrDecode::instrFormEvex_4[opCodeExt]; + break; default: _ASSERTE(false); } @@ -1227,4 +1357,3 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio } #endif // TARGET_AMD64 - diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp index ca63a24170d1..4927e96b7b39 100644 --- a/src/coreclr/debug/ee/controller.cpp +++ b/src/coreclr/debug/ee/controller.cpp @@ -3237,7 +3237,7 @@ void DebuggerController::ApplyTraceFlag(Thread *thread) context = GetManagedStoppedCtx(thread); } CONSISTENCY_CHECK_MSGF(context != NULL, ("Can't apply ss flag to thread 0x%p b/c it's not in a safe place.\n", thread)); - PREFIX_ASSUME(context != NULL); + _ASSERTE(context != NULL); g_pEEInterface->MarkThreadForDebugStepping(thread, true); @@ -4268,7 +4268,7 @@ bool DebuggerController::DispatchNativeException(EXCEPTION_RECORD *pException, CONTRACTL_END; LOG((LF_CORDB, LL_EVERYTHING, "DispatchNativeException was called\n")); - LOG((LF_CORDB, LL_INFO10000, "Native exception at 0x%p, code=0x%8x, context=0x%p, er=0x%p\n", + LOG((LF_CORDB, LL_INFO10000, "Native exception at %p, code=0x%8x, context=%p, er=%p\n", pException->ExceptionAddress, dwCode, pContext, pException)); @@ -4469,7 +4469,19 @@ bool DebuggerController::DispatchNativeException(EXCEPTION_RECORD *pException, ThisFunctionMayHaveTriggerAGC(); } #endif - +#ifdef FEATURE_SPECIAL_USER_MODE_APC + if (pCurThread->m_State & Thread::TS_SSToExitApcCall) + { + if (!CheckActivationSafePoint(GetIP(pContext))) + { + return FALSE; + } + pCurThread->SetThreadState(Thread::TS_SSToExitApcCallDone); + pCurThread->ResetThreadState(Thread::TS_SSToExitApcCall); + DebuggerController::UnapplyTraceFlag(pCurThread); + pCurThread->MarkForSuspensionAndWait(Thread::TS_DebugSuspendPending); + } +#endif // Must restore the filter context. After the filter context is gone, we're @@ -5926,7 +5938,7 @@ bool DebuggerStepper::TrapStep(ControllerStackInfo *info, bool in) #ifdef TARGET_X86 LOG((LF_CORDB,LL_INFO1000, "GetJitInfo for pc = 0x%x (addr of " "that value:0x%x)\n", (const BYTE*)(GetControlPC(&info->m_activeFrame.registers)), - info->m_activeFrame.registers.PCTAddr)); + GetRegdisplayPCTAddr(&info->m_activeFrame.registers))); #endif // Note: we used to pass in the IP from the active frame to GetJitInfo, but there seems to be no value in that, and @@ -6049,7 +6061,7 @@ bool DebuggerStepper::TrapStep(ControllerStackInfo *info, bool in) fCallingIntoFunclet = IsAddrWithinMethodIncludingFunclet(ji, info->m_activeFrame.md, walker.GetNextIP()) && ((CORDB_ADDRESS)(SIZE_T)walker.GetNextIP() != ji->m_addrOfCode); #endif - // If we are stepping into a tail call that uses the StoreTailCallArgs + // If we are stepping into a tail call that uses the StoreTailCallArgs // we need to enable the method enter, otherwise it will behave like a resume if (in && IsTailCall(walker.GetNextIP(), info, TailCallFunctionType::StoreTailCallArgs)) { @@ -6613,8 +6625,7 @@ void DebuggerStepper::TrapStepOut(ControllerStackInfo *info, bool fForceTraditio m_reason = STEP_EXIT; break; } - else if (info->m_activeFrame.frame->GetFrameType() == Frame::TYPE_SECURITY && - info->m_activeFrame.frame->GetInterception() == Frame::INTERCEPTION_NONE) + else if (info->m_activeFrame.frame->GetInterception() == Frame::INTERCEPTION_NONE) { // If we're stepping out of something that was protected by (declarative) security, // the security subsystem may leave a frame on the stack to cache it's computation. @@ -7588,9 +7599,9 @@ bool DebuggerStepper::TriggerSingleStep(Thread *thread, const BYTE *ip) // Sometimes we can get here with a callstack that is coming from an APC // this will disable the single stepping and incorrectly resume an app that the user // is stepping through. -#ifdef FEATURE_THREAD_ACTIVATION +#ifdef FEATURE_THREAD_ACTIVATION if ((thread->m_State & Thread::TS_DebugWillSync) == 0) -#endif +#endif // FEATURE_THREAD_ACTIVATION { DisableSingleStep(); } @@ -8694,24 +8705,20 @@ void DebuggerUserBreakpoint::HandleDebugBreak(Thread * pThread) } } - DebuggerUserBreakpoint::DebuggerUserBreakpoint(Thread *thread) : DebuggerStepper(thread, (CorDebugUnmappedStop) (STOP_ALL & ~STOP_UNMANAGED), INTERCEPT_ALL, NULL) { // Setup a step out from the current frame (which we know is // unmanaged, actually...) - - // This happens to be safe, but it's a very special case (so we have a special case ticket) - // This is called while we're live (so no filter context) and from the fcall, - // and we pushed a HelperMethodFrame to protect us. We also happen to know that we have - // done anything illegal or dangerous since then. + // Initiate a step-out from Debug.Break() if the current frame allows it. + // This is now safe because the entry point uses QCall or dynamic transition, + // so no special frame setup is required. StackTraceTicket ticket(this); StepOut(LEAF_MOST_FRAME, ticket); } - // Is this frame interesting? // Use this to skip all code in the namespace "Debugger.Diagnostics" bool DebuggerUserBreakpoint::IsInterestingFrame(FrameInfo * pFrame) @@ -9204,7 +9211,7 @@ bool DebuggerContinuableExceptionBreakpoint::SendEvent(Thread *thread, bool fIpC } // On WIN64, by the time we get here the DebuggerExState is gone already. - // ExceptionTrackers are cleaned up before we resume execution for a handled exception. + // ExInfos are cleaned up before we resume execution for a handled exception. #if !defined(FEATURE_EH_FUNCLETS) thread->GetExceptionState()->GetDebuggerState()->SetDebuggerInterceptContext(NULL); #endif // !FEATURE_EH_FUNCLETS diff --git a/src/coreclr/debug/ee/dactable.cpp b/src/coreclr/debug/ee/dactable.cpp index 6dac100ffb39..80d1995acf89 100644 --- a/src/coreclr/debug/ee/dactable.cpp +++ b/src/coreclr/debug/ee/dactable.cpp @@ -31,16 +31,19 @@ extern "C" void STDCALL ThePreStubPatchLabel(void); #ifdef FEATURE_COMWRAPPERS // Keep these forward declarations in sync with the method definitions in interop/comwrappers.cpp -namespace ABI +namespace InteropLib { - struct ComInterfaceDispatch; + namespace ABI + { + struct ComInterfaceDispatch; + } } HRESULT STDMETHODCALLTYPE ManagedObjectWrapper_QueryInterface( - _In_ ABI::ComInterfaceDispatch* disp, + _In_ InteropLib::ABI::ComInterfaceDispatch* disp, /* [in] */ REFIID riid, /* [iid_is][out] */ _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject); HRESULT STDMETHODCALLTYPE TrackerTarget_QueryInterface( - _In_ ABI::ComInterfaceDispatch* disp, + _In_ InteropLib::ABI::ComInterfaceDispatch* disp, /* [in] */ REFIID riid, /* [iid_is][out] */ _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject); diff --git a/src/coreclr/debug/ee/debugger.cpp b/src/coreclr/debug/ee/debugger.cpp index 0345218e861b..6ae02c817f00 100644 --- a/src/coreclr/debug/ee/debugger.cpp +++ b/src/coreclr/debug/ee/debugger.cpp @@ -71,7 +71,6 @@ InteropSafe interopsafe; DebuggerRCThread *g_pRCThread = NULL; -#ifndef _PREFAST_ // Do some compile time checking on the events in DbgIpcEventTypes.h // No one ever calls this. But the compiler should still compile it, // and that should be sufficient. @@ -184,7 +183,6 @@ void DoCompileTimeCheckOnDbgIpcEventTypes() static_assert_no_msg(f2); } // end checks -#endif // _PREFAST_ //----------------------------------------------------------------------------- // Ctor for AtSafePlaceHolder @@ -2774,6 +2772,13 @@ DebuggerMethodInfo *Debugger::GetOrCreateMethodInfo(Module *pModule, mdMethodDef } CONTRACTL_END; +#ifdef DACCESS_COMPILE + if (!HasLazyData()) + { + return NULL; + } +#endif // #ifdef DACCESS_COMPILE + DebuggerMethodInfo *info = NULL; // When dump debugging, we don't expect to have a lock, @@ -3301,7 +3306,7 @@ void Debugger::getBoundaries(MethodDesc * md, // lives in, then don't grab specific boundaries from the symbol // store since any boundaries we give the JIT will be pretty much // ignored anyway. - if (!CORDisableJITOptimizations(md->GetModule()->GetDebuggerInfoBits())) + if (!md->GetModule()->AreJITOptimizationsDisabled()) { *implicitBoundaries = ICorDebugInfo::BoundaryTypes(ICorDebugInfo::STACK_EMPTY_BOUNDARIES | ICorDebugInfo::CALL_SITE_BOUNDARIES); @@ -3379,13 +3384,10 @@ void Debugger::getVars(MethodDesc * md, ULONG32 *cVars, ICorDebugInfo::ILVarInfo // free to ignore *extendOthers *extendOthers = true; - DWORD bits = md->GetModule()->GetDebuggerInfoBits(); - if (CORDBUnrecoverableError(this)) goto Exit; - if (CORDisableJITOptimizations(bits)) -// if (!CORDebuggerAllowJITOpts(bits)) + if (md->GetModule()->AreJITOptimizationsDisabled()) { // // @TODO: Do we really need this code since *extendOthers==true? @@ -4961,7 +4963,7 @@ HRESULT Debugger::MapPatchToDJI(DebuggerControllerPatch *dcp, DebuggerJitInfo *d // If the patch has no DJI then we're doing a UnbindFunctionPatches/RebindFunctionPatches. Either // way, we simply want the most recent version. In the absence of EnC we should have djiCur == djiTo. DebuggerJitInfo *djiCur = dcp->HasDJI() ? dcp->GetDJI() : djiTo; - PREFIX_ASSUME(djiCur != NULL); + _ASSERTE(djiCur != NULL); // If the source and destination are the same version, then this method // decays into BindFunctionPatch's BindPatch function @@ -5087,7 +5089,7 @@ void Debugger::SendSyncCompleteIPCEvent(bool isEESuspendedForGC) pDCB = m_pRCThread->GetDCB(); (void)pDCB; //prevent "unused variable" error from GCC - PREFIX_ASSUME(pDCB != NULL); // must have DCB by the time we're sending IPC events. + _ASSERTE(pDCB != NULL); // must have DCB by the time we're sending IPC events. #ifdef FEATURE_INTEROP_DEBUGGING // The synccomplete can't be the first IPC event over. That's b/c the LS needs to know // if we're interop-debugging and the RS needs to know special addresses for interop-debugging @@ -5901,8 +5903,7 @@ void Debugger::SendDataBreakpoint(Thread *thread, CONTEXT *context, LOG((LF_CORDB, LL_INFO10000, "D::SDB: breakpoint BP:0x%x\n", breakpoint)); _ASSERTE((g_pEEInterface->GetThread() && - !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled) || - g_fInControlC); + !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled)); _ASSERTE(ThreadHoldsLock()); @@ -5949,8 +5950,7 @@ void Debugger::SendBreakpoint(Thread *thread, CONTEXT *context, LOG((LF_CORDB, LL_INFO10000, "D::SB: breakpoint BP:0x%x\n", breakpoint)); _ASSERTE((g_pEEInterface->GetThread() && - !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled) || - g_fInControlC); + !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled)); _ASSERTE(ThreadHoldsLock()); @@ -6093,8 +6093,7 @@ void Debugger::SendStep(Thread *thread, CONTEXT *context, stepper, reason)); _ASSERTE((g_pEEInterface->GetThread() && - !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled) || - g_fInControlC); + !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled)); _ASSERTE(ThreadHoldsLock()); @@ -6421,8 +6420,7 @@ void Debugger::SyncAllThreads(DebuggerLockHolder *dbgLockHolder) Thread *pThread = g_pEEInterface->GetThread(); (void)pThread; //prevent "unused variable" error from GCC _ASSERTE((pThread && - !pThread->m_fPreemptiveGCDisabled) || - g_fInControlC); + !pThread->m_fPreemptiveGCDisabled)); _ASSERTE(ThreadHoldsLock()); @@ -7571,7 +7569,8 @@ HRESULT Debugger::SendException(Thread *pThread, } CONTRACTL_END; - LOG((LF_CORDB, LL_INFO10000, "D::SendException\n")); + LOG((LF_CORDB, LL_INFO10000, "D::SendException pThread=0x%p fFirstChance=%s currentIP=0x%p currentSP= 0x%p fContinuable=%s fAttaching=%s fForceNonInterceptable=%s\n", + pThread, fFirstChance ? "true" : "false", (void*)currentIP, (void*)currentSP, fContinuable ? "true" : "false", fAttaching ? "true" : "false", fForceNonInterceptable ? "true" : "false")); if (CORDBUnrecoverableError(this)) { @@ -7901,8 +7900,6 @@ LONG Debugger::NotifyOfCHFFilter(EXCEPTION_POINTERS* pExceptionPointers, PVOID p } CONTRACTL_END; - SCAN_IGNORE_TRIGGER; // Scan can't handle conditional contracts. - // @@@ // Implements DebugInterface // Can only be called from EE @@ -8258,7 +8255,7 @@ void Debugger::ManagedExceptionUnwindBegin(Thread *pThread) * * This function is called by the VM to release any debugger specific information for an * exception object. It is called when the VM releases its internal exception stuff, i.e. - * ExInfo on X86 and ExceptionTracker on WIN64. + * ExInfo. * * * Parameters: @@ -9005,8 +9002,7 @@ void Debugger::ThreadStarted(Thread* pRuntimeThread) // _ASSERTE((g_pEEInterface->GetThread() && - !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled) || - g_fInControlC); + !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled)); _ASSERTE(ThreadHoldsLock()); DebuggerIPCEvent* ipce = m_pRCThread->GetIPCEventSendBuffer(); @@ -9143,7 +9139,7 @@ BOOL Debugger::SuspendComplete(bool isEESuspendedForGC) // We can't throw here (we're in the middle of the runtime suspension logic). // But things below us throw. So we catch the exception, but then what state are we in? - if (!isEESuspendedForGC) {_ASSERTE((!g_pEEInterface->GetThread() || !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled) || g_fInControlC); } + if (!isEESuspendedForGC) {_ASSERTE((!g_pEEInterface->GetThread() || !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled)); } if (!isEESuspendedForGC) { _ASSERTE(ThisIsHelperThreadWorker()); } STRESS_LOG0(LF_CORDB, LL_INFO10000, "D::SC: suspension complete\n"); @@ -9477,7 +9473,7 @@ void Debugger::SendRawUpdateModuleSymsEvent(Module *pRuntimeModule) return; // Non-PDB symbols DebuggerModule* module = LookupOrCreateModule(pRuntimeModule); - PREFIX_ASSUME(module != NULL); + _ASSERTE(module != NULL); DebuggerIPCEvent* ipce = NULL; ipce = m_pRCThread->GetIPCEventSendBuffer(); @@ -9980,7 +9976,7 @@ void Debugger::FuncEvalComplete(Thread* pThread, DebuggerEval *pDE) _ASSERTE(pDE->m_completed); - _ASSERTE((g_pEEInterface->GetThread() && !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled) || g_fInControlC); + _ASSERTE((g_pEEInterface->GetThread() && !g_pEEInterface->GetThread()->m_fPreemptiveGCDisabled)); _ASSERTE(ThreadHoldsLock()); // @@ -10219,11 +10215,6 @@ BYTE* Debugger::SerializeModuleMetaData(Module * pModule, DWORD * countBytes) // // //--------------------------------------------------------------------------------------- - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif bool Debugger::HandleIPCEvent(DebuggerIPCEvent * pEvent) { CONTRACTL @@ -10714,7 +10705,7 @@ bool Debugger::HandleIPCEvent(DebuggerIPCEvent * pEvent) // Just send back an HR. DebuggerIPCEvent * pIPCResult = m_pRCThread->GetIPCEventReceiveBuffer(); - PREFIX_ASSUME(pIPCResult != NULL); + _ASSERTE(pIPCResult != NULL); InitIPCEvent(pIPCResult, DB_IPCE_SET_DEBUG_STATE_RESULT, NULL, NULL); @@ -10732,7 +10723,7 @@ bool Debugger::HandleIPCEvent(DebuggerIPCEvent * pEvent) DebuggerIPCEvent * pIPCResult = m_pRCThread->GetIPCEventReceiveBuffer(); - PREFIX_ASSUME(pIPCResult != NULL); + _ASSERTE(pIPCResult != NULL); InitIPCEvent(pIPCResult, DB_IPCE_GET_GCHANDLE_INFO_RESULT, NULL, NULL); @@ -11360,9 +11351,6 @@ bool Debugger::HandleIPCEvent(DebuggerIPCEvent * pEvent) // dbgLockHolder goes out of scope - implicit Release return fContinue; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif /* * GetAndSendInterceptCommand @@ -11582,19 +11570,8 @@ HRESULT Debugger::GetAndSendInterceptCommand(DebuggerIPCEvent *event) // Set up the VM side of intercepting. // StackFrame sfInterceptFramePointer; - if (g_isNewExceptionHandlingEnabled) - { - sfInterceptFramePointer = StackFrame::FromRegDisplay(&(csi.m_activeFrame.registers)); - } - else - { -#if defined (TARGET_ARM )|| defined (TARGET_ARM64 ) - // ARM requires the caller stack pointer, not the current stack pointer - sfInterceptFramePointer = CallerStackFrame::FromRegDisplay(&(csi.m_activeFrame.registers)); -#else - sfInterceptFramePointer = StackFrame::FromRegDisplay(&(csi.m_activeFrame.registers)); -#endif - } + sfInterceptFramePointer = StackFrame::FromRegDisplay(&(csi.m_activeFrame.registers)); + if (pExState->GetDebuggerState()->SetDebuggerInterceptInfo(csi.m_activeFrame.pIJM, pThread, csi.m_activeFrame.MethodToken, @@ -11687,7 +11664,7 @@ void Debugger::PollWaitingForHelper() DebuggerIPCControlBlock * pDCB = g_pRCThread->GetDCB(); - PREFIX_ASSUME(pDCB != NULL); + _ASSERTE(pDCB != NULL); int nTotalMSToWait = 8 * 1000; @@ -12088,7 +12065,7 @@ HRESULT Debugger::GetAndSendBuffer(DebuggerRCThread* rcThread, ULONG bufSize) // This is a synchronous event (reply required) DebuggerIPCEvent* event = rcThread->GetIPCEventReceiveBuffer(); - PREFIX_ASSUME(event != NULL); + _ASSERTE(event != NULL); InitIPCEvent(event, DB_IPCE_GET_BUFFER_RESULT, NULL, NULL); // Allocate the buffer @@ -12161,7 +12138,7 @@ HRESULT Debugger::SendReleaseBuffer(DebuggerRCThread* rcThread, void *pBuffer) // This is a synchronous event (reply required) DebuggerIPCEvent* event = rcThread->GetIPCEventReceiveBuffer(); - PREFIX_ASSUME(event != NULL); + _ASSERTE(event != NULL); InitIPCEvent(event, DB_IPCE_RELEASE_BUFFER_RESULT, NULL, NULL); _ASSERTE(pBuffer != NULL); @@ -12465,7 +12442,7 @@ void Debugger::UnrecoverableError(HRESULT errorHR, // DebuggerIPCControlBlock *pDCB = m_pRCThread->GetDCB(); - PREFIX_ASSUME(pDCB != NULL); + _ASSERTE(pDCB != NULL); pDCB->m_errorHR = errorHR; pDCB->m_errorCode = errorCode; @@ -12682,7 +12659,7 @@ void Debugger::GetVarInfo(MethodDesc * fd, // [IN] method of interest } _ASSERTE(fd == ji->m_nativeCodeVersion.GetMethodDesc()); - PREFIX_ASSUME(ji != NULL); + _ASSERTE(ji != NULL); *vars = ji->GetVarNativeInfo(); *cVars = ji->GetVarNativeInfoCount(); @@ -14270,9 +14247,9 @@ void Debugger::SendMDANotification( } CONTRACTL_END; - PREFIX_ASSUME(szName != NULL); - PREFIX_ASSUME(szDescription != NULL); - PREFIX_ASSUME(szXML != NULL); + _ASSERTE(szName != NULL); + _ASSERTE(szDescription != NULL); + _ASSERTE(szXML != NULL); // Note: we normally don't send events like this when there is an unrecoverable error. However, // if a host attempts to setup fiber mode on a thread, then we'll set an unrecoverable error @@ -15023,6 +15000,14 @@ HRESULT Debugger::FuncEvalSetup(DebuggerIPCE_FuncEvalInfo *pEvalInfo, return CORDBG_E_ILLEGAL_IN_STACK_OVERFLOW; } +#ifdef FEATURE_SPECIAL_USER_MODE_APC + if (pThread->m_hasPendingActivation) + { + _ASSERTE(!"Should never get here with a pending activation. (Debugger::FuncEvalSetup)"); + return CORDBG_E_ILLEGAL_IN_NATIVE_CODE; + } +#endif + bool fInException = pEvalInfo->evalDuringException; // The thread has to be at a GC safe place for now, just in case the func eval causes a collection. Processing an @@ -16794,6 +16779,15 @@ void Debugger::ExternalMethodFixupNextStep(PCODE address) { DebuggerController::DispatchExternalMethodFixup(address); } +#ifdef FEATURE_SPECIAL_USER_MODE_APC +void Debugger::SingleStepToExitApcCall(Thread* pThread, CONTEXT *interruptedContext) +{ + pThread->SetThreadState(Thread::TS_SSToExitApcCall); + g_pEEInterface->SetThreadFilterContext(pThread, interruptedContext); + DebuggerController::EnableSingleStep(pThread); + g_pEEInterface->SetThreadFilterContext(pThread, NULL); +} +#endif //FEATURE_SPECIAL_USER_MODE_APC #endif //DACCESS_COMPILE unsigned FuncEvalFrame::GetFrameAttribs_Impl(void) @@ -16855,21 +16849,19 @@ void FuncEvalFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloa pRD->SetEcxLocation(&(pDE->m_context.Ecx)); pRD->SetEaxLocation(&(pDE->m_context.Eax)); pRD->SetEbpLocation(&(pDE->m_context.Ebp)); - pRD->PCTAddr = GetReturnAddressPtr(); + SetRegdisplayPCTAddr(pRD, GetReturnAddressPtr()); #ifdef FEATURE_EH_FUNCLETS pRD->IsCallerContextValid = FALSE; pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. - pRD->pCurrentContext->Eip = *PTR_PCODE(pRD->PCTAddr); pRD->pCurrentContext->Esp = (DWORD)GetSP(&pDE->m_context); SyncRegDisplayToCurrentContext(pRD); #else // FEATURE_EH_FUNCLETS - pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr); pRD->SP = (DWORD)GetSP(&pDE->m_context); #endif // FEATURE_EH_FUNCLETS diff --git a/src/coreclr/debug/ee/debugger.h b/src/coreclr/debug/ee/debugger.h index 9da256056f71..89999fac6595 100644 --- a/src/coreclr/debug/ee/debugger.h +++ b/src/coreclr/debug/ee/debugger.h @@ -157,7 +157,6 @@ class GCHolderEEInterface public: DEBUG_NOINLINE GCHolderEEInterface() { - SCAN_SCOPE_BEGIN; STATIC_CONTRACT_MODE_COOPERATIVE; if (IFTHREAD && g_pEEInterface->GetThread() == NULL) @@ -182,8 +181,6 @@ class GCHolderEEInterface DEBUG_NOINLINE ~GCHolderEEInterface() { - SCAN_SCOPE_END; - if (IFTHREAD && g_pEEInterface->GetThread() == NULL) { return; @@ -279,7 +276,6 @@ class GCHolderEEInterface public: DEBUG_NOINLINE GCHolderEEInterface() { - SCAN_SCOPE_BEGIN; STATIC_CONTRACT_MODE_PREEMPTIVE; this->EnterInternal(false, true); @@ -287,7 +283,6 @@ class GCHolderEEInterface DEBUG_NOINLINE GCHolderEEInterface(bool bConditional) { - SCAN_SCOPE_BEGIN; if (bConditional) { STATIC_CONTRACT_MODE_PREEMPTIVE; @@ -298,8 +293,6 @@ class GCHolderEEInterface DEBUG_NOINLINE ~GCHolderEEInterface() { - SCAN_SCOPE_END; - this->LeaveInternal(); }; }; @@ -481,8 +474,6 @@ class DebuggerModule PTR_Module m_pRuntimeModule; PTR_DomainAssembly m_pRuntimeDomainAssembly; - bool m_fHasOptimizedCode; - // Can we change jit flags on the module? // This is true during the Module creation bool m_fCanChangeJitFlags; @@ -3063,6 +3054,9 @@ class Debugger : public DebugInterface // Used by Debugger::FirstChanceNativeException to update the context from out of process void SendSetThreadContextNeeded(CONTEXT *context, DebuggerSteppingInfo *pDebuggerSteppingInfo = NULL); BOOL IsOutOfProcessSetContextEnabled(); +#ifdef FEATURE_SPECIAL_USER_MODE_APC + void SingleStepToExitApcCall(Thread* pThread, CONTEXT *interruptedContext); +#endif // FEATURE_SPECIAL_USER_MODE_APC }; @@ -3158,7 +3152,7 @@ class CNewZeroData DebuggerHeap* pHeap = g_pDebugger->GetInteropSafeHeap_NoThrow(); _ASSERTE(pHeap != NULL); // should already exist - PREFIX_ASSUME( iCurSize >= 0 ); + _ASSERTE( iCurSize >= 0 ); S_UINT32 iNewSize = S_UINT32( iCurSize ) + S_UINT32( GrowSize(iCurSize) ); if( iNewSize.IsOverflow() ) { @@ -3994,7 +3988,7 @@ HANDLE OpenWin32EventOrThrow( // Returns true if the specified IL offset has a special meaning (eg. prolog, etc.) bool DbgIsSpecialILOffset(DWORD offset); -#if defined(TARGET_WINDOWS) +#if defined(TARGET_WINDOWS) && !defined(TARGET_X86) void FixupDispatcherContext(T_DISPATCHER_CONTEXT* pDispatcherContext, T_CONTEXT* pContext, PEXCEPTION_ROUTINE pUnwindPersonalityRoutine = NULL); #endif diff --git a/src/coreclr/debug/ee/debugger.inl b/src/coreclr/debug/ee/debugger.inl index 4f69164a292f..e9fc233e2637 100644 --- a/src/coreclr/debug/ee/debugger.inl +++ b/src/coreclr/debug/ee/debugger.inl @@ -60,10 +60,6 @@ inline DebuggerModule::DebuggerModule(Module * pRuntimeModule, LOG((LF_CORDB,LL_INFO10000, "DM::DM this:0x%x Module:0x%x DF:0x%x\n", this, pRuntimeModule, pDomainAssembly)); - // Do we have any optimized code? - DWORD dwDebugBits = pRuntimeModule->GetDebuggerInfoBits(); - m_fHasOptimizedCode = CORDebuggerAllowJITOpts(dwDebugBits); - // Dynamic modules must receive ClassLoad callbacks in order to receive metadata updates as the module // evolves. So we force this on here and refuse to change it for all dynamic modules. if (pRuntimeModule->IsReflectionEmit()) @@ -83,8 +79,7 @@ inline bool DebuggerModule::HasAnyOptimizedCode() { LIMITED_METHOD_CONTRACT; Module * pModule = GetRuntimeModule(); - DWORD dwDebugBits = pModule->GetDebuggerInfoBits(); - return CORDebuggerAllowJITOpts(dwDebugBits); + return !pModule->AreJITOptimizationsDisabled(); } //----------------------------------------------------------------------------- diff --git a/src/coreclr/debug/ee/frameinfo.cpp b/src/coreclr/debug/ee/frameinfo.cpp index d2a73fbd9b8e..f4d88f7a462f 100644 --- a/src/coreclr/debug/ee/frameinfo.cpp +++ b/src/coreclr/debug/ee/frameinfo.cpp @@ -16,6 +16,10 @@ #include "COMToClrCall.h" #endif +#ifdef FEATURE_EH_FUNCLETS +#include "exinfo.h" +#endif // FEATURE_EH_FUNCLETS + // Get a frame pointer from a RegDisplay. // This is mostly used for chains and stub frames (i.e. internal frames), where we don't need an exact // frame pointer. This is why it is okay to use the current SP instead of the caller SP on IA64. @@ -803,12 +807,12 @@ void FrameInfo::InitForM2UInternalFrame(CrawlFrame * pCF) //----------------------------------------------------------------------------- void FrameInfo::InitForU2MInternalFrame(CrawlFrame * pCF) { - PREFIX_ASSUME(pCF != NULL); + _ASSERTE(pCF != NULL); MethodDesc * pMDHint = NULL; #ifdef FEATURE_COMINTEROP Frame * pFrame = pCF->GetFrame(); - PREFIX_ASSUME(pFrame != NULL); + _ASSERTE(pFrame != NULL); // For regular U2M PInvoke cases, we don't care about MD b/c it's just going to @@ -1261,7 +1265,7 @@ FramePointer GetFramePointerForDebugger(DebuggerFrameData* pData, CrawlFrame* pC FramePointer fpResult; -#if defined(FEATURE_EH_FUNCLETS) +#if !defined(TARGET_X86) if (pData->info.frame == NULL) { // This is a managed method frame. @@ -1273,7 +1277,7 @@ FramePointer GetFramePointerForDebugger(DebuggerFrameData* pData, CrawlFrame* pC fpResult = FramePointer::MakeFramePointer((LPVOID)(pData->info.frame)); } -#else // !FEATURE_EH_FUNCLETS +#else // !TARGET_X86 if ((pCF == NULL || !pCF->IsFrameless()) && pData->info.frame != NULL) { // @@ -1295,7 +1299,7 @@ FramePointer GetFramePointerForDebugger(DebuggerFrameData* pData, CrawlFrame* pC fpResult = FramePointer::MakeFramePointer((LPVOID)GetRegdisplayStackMark(&(pData->regDisplay))); } -#endif // !FEATURE_EH_FUNCLETS +#endif // !TARGET_X86 LOG((LF_CORDB, LL_INFO100000, "GFPFD: Frame pointer is 0x%p\n", fpResult.GetSPValue())); @@ -1310,7 +1314,7 @@ FramePointer GetFramePointerForDebugger(DebuggerFrameData* pData, CrawlFrame* pC // frame pointer for the parent method frame. Otherwise we return LEAF_MOST_FRAME. If we are already skipping // frames, then we return the current frame pointer for the parent method frame. // -// The return value of this function corresponds to the return value of ExceptionTracker::FindParentStackFrame(). +// The return value of this function corresponds to the return value of ExInfo::FindParentStackFrame(). // Refer to that function for more information. // // Arguments: @@ -1524,7 +1528,7 @@ StackWalkAction DebuggerWalkStackProc(CrawlFrame *pCF, void *data) // skipping if the current frame pointer matches fpParent. In either case, clear fpParent, and // then check again. if ((d->fpParent == ROOT_MOST_FRAME) || - ExceptionTracker::IsUnwoundToTargetParentFrame(pCF, ConvertFPToStackFrame(d->fpParent))) + ExInfo::IsUnwoundToTargetParentFrame(pCF, ConvertFPToStackFrame(d->fpParent))) { LOG((LF_CORDB, LL_INFO100000, "DWSP: Stopping to skip funclet at 0x%p.\n", d->fpParent.GetSPValue())); @@ -1667,7 +1671,6 @@ StackWalkAction DebuggerWalkStackProc(CrawlFrame *pCF, void *data) { case Frame::TYPE_ENTRY: // We now ignore entry + exit frames. case Frame::TYPE_EXIT: - case Frame::TYPE_HELPER_METHOD_FRAME: case Frame::TYPE_INTERNAL: /* If we have a specific interception type, use it. However, if this @@ -1690,8 +1693,7 @@ StackWalkAction DebuggerWalkStackProc(CrawlFrame *pCF, void *data) break; case Frame::TYPE_INTERCEPTION: - case Frame::TYPE_SECURITY: // Security is a sub-type of interception - LOG((LF_CORDB, LL_INFO100000, "DWSP: Frame type is TYPE_INTERCEPTION/TYPE_SECURITY.\n")); + LOG((LF_CORDB, LL_INFO100000, "DWSP: Frame type is TYPE_INTERCEPTION.\n")); d->info.managed = true; d->info.internal = true; use = true; @@ -1699,22 +1701,14 @@ StackWalkAction DebuggerWalkStackProc(CrawlFrame *pCF, void *data) case Frame::TYPE_CALL: LOG((LF_CORDB, LL_INFO100000, "DWSP: Frame type is TYPE_CALL.\n")); - // In V4, StubDispatchFrame is only used on 64-bit (and PPC?) but not on x86. x86 uses a - // different code path which sets up a HelperMethodFrame instead. In V4.5, x86 and ARM - // both use the 64-bit code path and they set up a StubDispatchFrame as well. This causes - // a problem in the debugger stackwalker (see Dev11 Issue 13229) since the two frame types - // are treated differently. More specifically, a StubDispatchFrame causes the debugger - // stackwalk to make an invalid callback, i.e. a callback which is not for a managed method, - // an explicit frame, or a chain. + + // StubDispatchFrame is used during virtual stub dispatch and appears temporarily on the stack + // across architectures like x64, x86, and ARM. It exists for a short duration while dispatching + // a virtual call through a stub, making its presence rare during a typical debugger stack walk. // - // Ideally we would just change the StubDispatchFrame to behave like a HMF, but it's - // too big of a change for an in-place release. For now I'm just making surgical fixes in - // the debugger stackwalker. This may introduce behavioural changes in on X64, but the - // chance of that is really small. StubDispatchFrame is only used in the virtual stub - // disptch code path. It stays on the stack in a small time window and it's not likely to - // be on the stack while some managed methods closer to the leaf are on the stack. There is - // only one scenario I know of, and that's the repro for Dev11 13229, but that's for x86 only. - // The jitted code on X64 behaves differently. + // In the debugger, we avoid treating StubDispatchFrame as a managed or inspectable frame. + // It doesn't represent a managed method, explicit frame, or chain, and attempting to interpret + // it as such may lead to invalid callbacks or incorrect debugger behavior. // // Note that there is a corresponding change in DacDbiInterfaceImpl::GetInternalFrameType(). if (frame->GetFrameIdentifier() == FrameIdentifier::StubDispatchFrame) diff --git a/src/coreclr/debug/ee/funceval.cpp b/src/coreclr/debug/ee/funceval.cpp index 66a85ab0d239..3b62a6addf2e 100644 --- a/src/coreclr/debug/ee/funceval.cpp +++ b/src/coreclr/debug/ee/funceval.cpp @@ -3990,7 +3990,7 @@ void * STDCALL FuncEvalHijackWorker(DebuggerEval *pDE) } -#if defined(FEATURE_EH_FUNCLETS) && !defined(TARGET_UNIX) +#if defined(FEATURE_EH_FUNCLETS) && !defined(TARGET_UNIX) && !defined(TARGET_X86) EXTERN_C EXCEPTION_DISPOSITION FuncEvalHijackPersonalityRoutine(IN PEXCEPTION_RECORD pExceptionRecord, @@ -4028,7 +4028,6 @@ FuncEvalHijackPersonalityRoutine(IN PEXCEPTION_RECORD pExceptionRecord, return ExceptionCollidedUnwind; } - -#endif // FEATURE_EH_FUNCLETS && !TARGET_UNIX +#endif // FEATURE_EH_FUNCLETS && !TARGET_UNIX && !TARGET_X86 #endif // ifndef DACCESS_COMPILE diff --git a/src/coreclr/debug/ee/functioninfo.cpp b/src/coreclr/debug/ee/functioninfo.cpp index cbe39f0d4ac9..6a0018d8c556 100644 --- a/src/coreclr/debug/ee/functioninfo.cpp +++ b/src/coreclr/debug/ee/functioninfo.cpp @@ -382,7 +382,7 @@ DebuggerJitInfo::NativeOffset DebuggerJitInfo::MapILOffsetToNative(DebuggerJitIn if (ilOffset.m_funcletIndex <= PARENT_METHOD_INDEX) { #endif // FEATURE_EH_FUNCLETS - PREFIX_ASSUME( map != NULL ); + _ASSERTE( map != NULL ); LOG((LF_CORDB, LL_INFO10000, "DJI::MILOTN: ilOffset 0x%zx to nat 0x%x exact:%s (Entry IL Off:0x%x)\n", ilOffset.m_ilOffset, map->nativeStartOffset, (resultOffset.m_fExact ? "true" : "false"), map->ilOffset)); @@ -1845,7 +1845,7 @@ void DebuggerMethodInfo::DJIIterator::Next(BOOL fFirst /*=FALSE*/) if (!fFirst) { - PREFIX_ASSUME(m_pCurrent != NULL); // IsAtEnd() should have caught this. + _ASSERTE(m_pCurrent != NULL); // IsAtEnd() should have caught this. m_pCurrent = m_pCurrent->m_prevJitInfo; } diff --git a/src/coreclr/debug/ee/i386/walker.cpp b/src/coreclr/debug/ee/i386/walker.cpp index ed0107917938..800f8b64357a 100644 --- a/src/coreclr/debug/ee/i386/walker.cpp +++ b/src/coreclr/debug/ee/i386/walker.cpp @@ -284,7 +284,7 @@ void NativeWalker::Decode() DWORD NativeWalker::GetRegisterValue(int registerNumber) { // If we're going to decode a register, then we'd better have a valid register set. - PREFIX_ASSUME(m_registers != NULL); + _ASSERTE(m_registers != NULL); switch (registerNumber) { diff --git a/src/coreclr/debug/ee/rcthread.cpp b/src/coreclr/debug/ee/rcthread.cpp index 455822bb67f1..ea6a6d56f478 100644 --- a/src/coreclr/debug/ee/rcthread.cpp +++ b/src/coreclr/debug/ee/rcthread.cpp @@ -1379,7 +1379,7 @@ HRESULT DebuggerRCThread::Start(void) // This gets published immediately. DebuggerIPCControlBlock* dcb = GetDCB(); - PREFIX_ASSUME(dcb != NULL); + _ASSERTE(dcb != NULL); dcb->m_realHelperThreadId = helperThreadId; #ifdef _DEBUG diff --git a/src/coreclr/debug/ee/walker.h b/src/coreclr/debug/ee/walker.h index 0c901cab4665..63bd4cfa2fd2 100644 --- a/src/coreclr/debug/ee/walker.h +++ b/src/coreclr/debug/ee/walker.h @@ -77,7 +77,7 @@ class Walker virtual void Init(const BYTE *ip, REGDISPLAY *pregisters) { - PREFIX_ASSUME(pregisters != NULL); + _ASSERTE(pregisters != NULL); _ASSERTE(GetControlPC(pregisters) == (PCODE)ip); m_registers = pregisters; diff --git a/src/coreclr/debug/inc/dacdbiinterface.h b/src/coreclr/debug/inc/dacdbiinterface.h index e20c318dbf70..1b6ec7caa5b3 100644 --- a/src/coreclr/debug/inc/dacdbiinterface.h +++ b/src/coreclr/debug/inc/dacdbiinterface.h @@ -1477,7 +1477,7 @@ class IDacDbiInterface // // Note: // Because of the complexity involved in checking for the parent frame, we should always - // ask the ExceptionTracker to do it. + // ask the ExInfo to do it. // virtual diff --git a/src/coreclr/debug/inc/dacdbistructures.inl b/src/coreclr/debug/inc/dacdbistructures.inl index a408a2c1ffb8..0db7feea837b 100644 --- a/src/coreclr/debug/inc/dacdbistructures.inl +++ b/src/coreclr/debug/inc/dacdbistructures.inl @@ -427,8 +427,12 @@ void SequencePoints::CopyAndSortSequencePoints(const ICorDebugInfo::OffsetMappin } // sort the map - MapSortILMap mapSorter(&m_map[0], m_map.Count()); - mapSorter.Sort(); + // Interpreter-TODO: This check can be removed once the interpreter generates proper maps + if (m_map.Count() != 0) + { + MapSortILMap mapSorter(&m_map[0], m_map.Count()); + mapSorter.Sort(); + } m_mapCount = m_map.Count(); diff --git a/src/coreclr/debug/inc/dbgipcevents.h b/src/coreclr/debug/inc/dbgipcevents.h index 6c39939f0030..25971d7638f6 100644 --- a/src/coreclr/debug/inc/dbgipcevents.h +++ b/src/coreclr/debug/inc/dbgipcevents.h @@ -435,18 +435,9 @@ class MSLAYOUT LsPointer : public GeneralLsPointer static LsPointer MakePtr(T* p) { -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6001) // PREfast warning: Using uninitialize memory 't' -#endif // _PREFAST_ - LsPointer t; t.Set(p); return t; - -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // _PREFAST_ } bool operator!= (void * p) { return m_ptr != p; } @@ -536,18 +527,9 @@ class MSLAYOUT LsPointer : public GeneralLsPointer static LsPointer MakePtr(T * p) { -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6001) // PREfast warning: Using uninitialize memory 't' -#endif // _PREFAST_ - LsPointer t; t.Set(p); return t; - -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // _PREFAST_ } bool operator!= (void * p) { return m_ptr != p; } @@ -734,18 +716,9 @@ class MSLAYOUT VMPTR_Base // Convenience for converting TTargetPtr --> VMPTR static VMPTR_This MakePtr(TTargetPtr * ptr) { -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6001) // PREfast warning: Using uninitialize memory 't' -#endif // _PREFAST_ - VMPTR_This t; t.SetRawPtr(ptr); return t; - -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // _PREFAST_ } @@ -776,18 +749,9 @@ class MSLAYOUT VMPTR_Base { SUPPORTS_DAC; -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6001) // PREfast warning: Using uninitialize memory 't' -#endif // _PREFAST_ - VMPTR_This dummy; dummy.m_addr = (TADDR)NULL; return dummy; - -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // _PREFAST_ } bool operator!= (VMPTR_This vmOther) const { SUPPORTS_DAC; return this->m_addr != vmOther.m_addr; } @@ -1938,6 +1902,9 @@ C_ASSERT(DBG_TARGET_REGNUM_AMBIENT_SP == ICorDebugInfo::REGNUM_AMBIENT_SP); C_ASSERT(DBG_TARGET_REGNUM_SP == ICorDebugInfo::REGNUM_SP); C_ASSERT(DBG_TARGET_REGNUM_AMBIENT_SP == ICorDebugInfo::REGNUM_AMBIENT_SP); #endif +#elif defined(TARGET_WASM) +#define DBG_TARGET_REGNUM_SP 0 +#define DBG_TARGET_REGNUM_AMBIENT_SP 0 #else #error Target registers are not defined for this platform #endif diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index dab7ca29c7db..ea374cf8b6de 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -58,6 +58,8 @@ #define DTCONTEXT_IS_LOONGARCH64 #elif defined (TARGET_RISCV64) #define DTCONTEXT_IS_RISCV64 +#elif defined (TARGET_WASM) +#define DTCONTEXT_IS_WASM #endif #define CONTEXT_AREA_MASK 0xffff @@ -614,6 +616,10 @@ typedef struct DECLSPEC_ALIGN(16) { static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size"); +#elif defined(DTCONTEXT_IS_WASM) +// no context for wasm +typedef struct { +} DT_CONTEXT; #else #error Unsupported platform #endif diff --git a/src/coreclr/debug/inc/dbgtransportsession.h b/src/coreclr/debug/inc/dbgtransportsession.h index 7a5df64d41ee..3bc9521e9c52 100644 --- a/src/coreclr/debug/inc/dbgtransportsession.h +++ b/src/coreclr/debug/inc/dbgtransportsession.h @@ -8,9 +8,9 @@ #ifndef RIGHT_SIDE_COMPILE #include #include - #endif // !RIGHT_SIDE_COMPILE +#include #include #if defined(FEATURE_DBGIPC_TRANSPORT_VM) || defined(FEATURE_DBGIPC_TRANSPORT_DI) @@ -271,7 +271,7 @@ inline UINT32 DBGIPC_HTONL(UINT32 x) // Lock abstraction (we can't use the same lock implementation on LS and RS since we really want a Crst on the // LS and this isn't available in the RS environment). -class DbgTransportLock +class DbgTransportLock final { public: void Init(); @@ -281,12 +281,32 @@ class DbgTransportLock private: #ifdef RIGHT_SIDE_COMPILE - CRITICAL_SECTION m_sLock; + minipal_mutex m_sLock; #else // RIGHT_SIDE_COMPILE CrstExplicitInit m_sLock; #endif // RIGHT_SIDE_COMPILE }; +class TransportLockHolder final +{ + DbgTransportLock& _lock; +public: + TransportLockHolder(DbgTransportLock& lock) + : _lock(lock) + { + _lock.Enter(); + } + ~TransportLockHolder() + { + _lock.Leave(); + } + + TransportLockHolder(TransportLockHolder const&) = delete; + TransportLockHolder& operator=(TransportLockHolder const&) = delete; + TransportLockHolder(TransportLockHolder&& other) = delete; + TransportLockHolder&& operator=(TransportLockHolder&&) = delete; +}; + // The transport has only one queue for IPC events, but each IPC event can be marked as one of two types. // The transport will signal the handle corresponding to the type of each IPC event. (See // code:DbgTransportSession::GetIPCEventReadyEvent and code:DbgTransportSession::GetDebugEventReadyEvent.) @@ -401,7 +421,6 @@ class DbgTransportSession // Read and write memory on the LS from the RS. HRESULT ReadMemory(PBYTE pbRemoteAddress, PBYTE pbBuffer, SIZE_T cbBuffer); HRESULT WriteMemory(PBYTE pbRemoteAddress, PBYTE pbBuffer, SIZE_T cbBuffer); - HRESULT VirtualUnwind(DWORD threadId, ULONG32 contextSize, PBYTE context); // Read and write the debugger control block on the LS from the RS. HRESULT GetDCB(DebuggerIPCControlBlock *pDCB); @@ -448,7 +467,6 @@ class DbgTransportSession // Misc management operations. MT_ReadMemory, // RS <-> LS : RS wants to read LS memory block (or LS is replying to such a request) MT_WriteMemory, // RS <-> LS : RS wants to write LS memory block (or LS is replying to such a request) - MT_VirtualUnwind, // RS <-> LS : RS wants to LS unwind a stack frame (or LS is replying to such a request) MT_GetDCB, // RS <-> LS : RS wants to read LS DCB (or LS is replying to such a request) MT_SetDCB, // RS <-> LS : RS wants to write LS DCB (or LS is replying to such a request) MT_GetAppDomainCB, // RS <-> LS : RS wants to read LS AppDomainCB (or LS is replying to such a request) @@ -521,7 +539,7 @@ class DbgTransportSession // Struct defining the format of the data block sent with a SessionRequest. struct SessionRequestData { - minipal_guid_t m_sSessionID; // Unique session ID. Treated as byte blob so no endian-ness + GUID m_sSessionID; // Unique session ID. Treated as byte blob so no endian-ness }; // Struct used to track a message that is being (or will soon be) sent but has not yet been acknowledged. @@ -555,26 +573,6 @@ class DbgTransportSession } }; - // Holder class used to take a transport lock in a given scope and automatically release it once that - // scope is exited. - class TransportLockHolder - { - public: - TransportLockHolder(DbgTransportLock *pLock) - { - m_pLock = pLock; - m_pLock->Enter(); - } - - ~TransportLockHolder() - { - m_pLock->Leave(); - } - - private: - DbgTransportLock *m_pLock; - }; - #ifdef _DEBUG // Store statistics for various session activities that will be useful for performance analysis and tracking // down bugs. @@ -589,7 +587,6 @@ class DbgTransportSession LONG m_cSentEvent; LONG m_cSentReadMemory; LONG m_cSentWriteMemory; - LONG m_cSentVirtualUnwind; LONG m_cSentGetDCB; LONG m_cSentSetDCB; LONG m_cSentGetAppDomainCB; @@ -604,7 +601,6 @@ class DbgTransportSession LONG m_cReceivedEvent; LONG m_cReceivedReadMemory; LONG m_cReceivedWriteMemory; - LONG m_cReceivedVirtualUnwind; LONG m_cReceivedGetDCB; LONG m_cReceivedSetDCB; LONG m_cReceivedGetAppDomainCB; @@ -676,7 +672,7 @@ class DbgTransportSession // Session ID randomly allocated by the right side and sent over in the SessionRequest message. This // serves to disambiguate a re-send of the SessionRequest due to a network error versus a SessionRequest // from a different debugger. - minipal_guid_t m_sSessionID; + GUID m_sSessionID; // Lock used to synchronize sending messages and updating the session state. This ensures message bytes // don't become interleaved on the transport connection, the send queue is updated consistently across diff --git a/src/coreclr/debug/inc/wasm/primitives.h b/src/coreclr/debug/inc/wasm/primitives.h new file mode 100644 index 000000000000..5c428fe76ceb --- /dev/null +++ b/src/coreclr/debug/inc/wasm/primitives.h @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +//***************************************************************************** +// File: primitives.h +// + +// +// Platform-specific debugger primitives +// +//***************************************************************************** + +#ifndef PRIMITIVES_H_ +#define PRIMITIVES_H_ + +inline CORDB_ADDRESS GetPatchEndAddr(CORDB_ADDRESS patchAddr) +{ + _ASSERTE("The function is not implemented on wasm"); + return patchAddr; +} + +typedef const BYTE CORDB_ADDRESS_TYPE; +typedef DPTR(CORDB_ADDRESS_TYPE) PTR_CORDB_ADDRESS_TYPE; + +//This is an abstraction to keep x86/ia64 patch data separate +#define PRD_TYPE USHORT + +#define MAX_INSTRUCTION_LENGTH 2 // update once we have codegen + +#define CORDbg_BREAK_INSTRUCTION_SIZE 1 +#define CORDbg_BREAK_INSTRUCTION 0 // unreachable intruction + +inline bool PRDIsEmpty(PRD_TYPE p1) +{ + LIMITED_METHOD_CONTRACT; + + return p1 == 0; +} + +#endif diff --git a/src/coreclr/debug/runtimeinfo/CMakeLists.txt b/src/coreclr/debug/runtimeinfo/CMakeLists.txt index f7ff177b130a..1d0abd332c60 100644 --- a/src/coreclr/debug/runtimeinfo/CMakeLists.txt +++ b/src/coreclr/debug/runtimeinfo/CMakeLists.txt @@ -7,7 +7,8 @@ set(RUNTIMEINFO_SOURCES add_library_clr(runtimeinfo STATIC ${RUNTIMEINFO_SOURCES}) function(generate_module_index Target ModuleIndexFile) - if(CLR_CMAKE_HOST_WIN32) + # Win32 may be false when cross compiling + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") set(scriptExt ".cmd") else() set(scriptExt ".sh") @@ -41,6 +42,11 @@ install_clr(TARGETS runtimeinfo DESTINATIONS lib COMPONENT runtime) # cDAC contract descriptor +if(CDAC_BUILD_TOOL_BINARY_PATH AND "${CLR_DOTNET_RID}" STREQUAL "") + message(FATAL_ERROR "CLR_DOTNET_RID is not set. Please ensure it is being set to the portable RID of the target platform by runtime.proj.") +endif() +configure_file(configure.h.in ${CMAKE_CURRENT_BINARY_DIR}/configure.h) + if (NOT CDAC_BUILD_TOOL_BINARY_PATH) # if CDAC_BUILD_TOOL_BINARY_PATH is unspecified (for example for a build without a .NET SDK or msbuild), # link a stub contract descriptor into the runtime diff --git a/src/coreclr/debug/runtimeinfo/configure.h.in b/src/coreclr/debug/runtimeinfo/configure.h.in new file mode 100644 index 000000000000..efe0ede365c6 --- /dev/null +++ b/src/coreclr/debug/runtimeinfo/configure.h.in @@ -0,0 +1,6 @@ +#ifndef RUNTIME_INFO_CONFIGURE_H_INCLUDED +#define RUNTIME_INFO_CONFIGURE_H_INCLUDED + +#define RID_STRING @CLR_DOTNET_RID@ + +#endif // RUNTIME_INFO_CONFIGURE_H_INCLUDED diff --git a/src/coreclr/debug/runtimeinfo/contracts.jsonc b/src/coreclr/debug/runtimeinfo/contracts.jsonc index ae8cb684ab2c..bcc2daae3830 100644 --- a/src/coreclr/debug/runtimeinfo/contracts.jsonc +++ b/src/coreclr/debug/runtimeinfo/contracts.jsonc @@ -17,8 +17,11 @@ "Loader": 1, "Object": 1, "PlatformMetadata": 1, - "PrecodeStubs": 1, + "PrecodeStubs": 2, "ReJIT": 1, + "RuntimeInfo": 1, "RuntimeTypeSystem": 1, + "StackWalk": 1, + "StressLog": 2, "Thread": 1 } diff --git a/src/coreclr/debug/runtimeinfo/datadescriptor.cpp b/src/coreclr/debug/runtimeinfo/datadescriptor.cpp index 72a1ee183045..a854a9a5b92e 100644 --- a/src/coreclr/debug/runtimeinfo/datadescriptor.cpp +++ b/src/coreclr/debug/runtimeinfo/datadescriptor.cpp @@ -12,6 +12,11 @@ #include "cdacplatformmetadata.hpp" #include "methodtable.h" #include "threads.h" +#include "exinfo.h" + +#include "configure.h" + +#include "../debug/ee/debugger.h" #ifdef HAVE_GCCOVER #include "gccover.h" @@ -49,6 +54,12 @@ struct GlobalPointerSpec uint32_t PointerDataIndex; }; +struct GlobalStringSpec +{ + uint32_t Name; + uint32_t StringValue; +}; + #define CONCAT(token1,token2) token1 ## token2 #define CONCAT4(token1, token2, token3, token4) token1 ## token2 ## token3 ## token4 @@ -57,6 +68,10 @@ struct GlobalPointerSpec #define MAKE_FIELDTYPELEN_NAME(tyname,membername) CONCAT4(cdac_string_pool_membertypename__, tyname, __, membername) #define MAKE_GLOBALLEN_NAME(globalname) CONCAT(cdac_string_pool_globalname__, globalname) #define MAKE_GLOBALTYPELEN_NAME(globalname) CONCAT(cdac_string_pool_globaltypename__, globalname) +#define MAKE_GLOBALVALUELEN_NAME(globalname) CONCAT(cdac_string_pool_globalvalue__, globalname) + +// used to stringify the result of a macros expansion +#define STRINGIFY(x) #x // define a struct where the size of each field is the length of some string. we will use offsetof to get // the offset of each struct element, which will be equal to the offset of the beginning of that string in the @@ -69,6 +84,8 @@ struct CDacStringPoolSizes #define CDAC_TYPE_BEGIN(name) DECL_LEN(MAKE_TYPELEN_NAME(name), sizeof(#name)) #define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) DECL_LEN(MAKE_FIELDLEN_NAME(tyname,membername), sizeof(#membername)) \ DECL_LEN(MAKE_FIELDTYPELEN_NAME(tyname,membername), sizeof(#membertyname)) +#define CDAC_GLOBAL_STRING(name, stringval) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) \ + DECL_LEN(MAKE_GLOBALVALUELEN_NAME(name), sizeof(STRINGIFY(stringval))) #define CDAC_GLOBAL_POINTER(name,value) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) #define CDAC_GLOBAL(name,tyname,value) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) \ DECL_LEN(MAKE_GLOBALTYPELEN_NAME(name), sizeof(#tyname)) @@ -82,6 +99,7 @@ struct CDacStringPoolSizes #define GET_FIELDTYPE_NAME(tyname,membername) offsetof(struct CDacStringPoolSizes, MAKE_FIELDTYPELEN_NAME(tyname,membername)) #define GET_GLOBAL_NAME(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALLEN_NAME(globalname)) #define GET_GLOBALTYPE_NAME(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALTYPELEN_NAME(globalname)) +#define GET_GLOBALSTRING_VALUE(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALVALUELEN_NAME(globalname)) // count the types enum @@ -121,6 +139,15 @@ enum #include "datadescriptor.h" }; +// count the global strings +enum +{ + CDacBlobGlobalStringsCount = +#define CDAC_GLOBALS_BEGIN() 0 +#define CDAC_GLOBAL_STRING(name,value) + 1 +#include "datadescriptor.h" +}; + #define MAKE_TYPEFIELDS_TYNAME(tyname) CONCAT(CDacFieldsPoolTypeStart__, tyname) @@ -195,6 +222,7 @@ struct BinaryBlobDataDescriptor uint32_t GlobalLiteralValuesStart; uint32_t GlobalPointersStart; + uint32_t GlobalStringValuesStart; uint32_t NamesPoolStart; uint32_t TypeCount; @@ -202,6 +230,7 @@ struct BinaryBlobDataDescriptor uint32_t GlobalLiteralValuesCount; uint32_t GlobalPointerValuesCount; + uint32_t GlobalStringValuesCount; uint32_t NamesPoolCount; @@ -209,6 +238,7 @@ struct BinaryBlobDataDescriptor uint8_t FieldSpecSize; uint8_t GlobalLiteralSpecSize; uint8_t GlobalPointerSpecSize; + uint8_t GlobalStringSpecSize; } Directory; uint32_t PlatformFlags; uint32_t BaselineName; @@ -216,6 +246,7 @@ struct BinaryBlobDataDescriptor struct FieldSpec FieldsPool[CDacBlobFieldsPoolCount]; struct GlobalLiteralSpec GlobalLiteralValues[CDacBlobGlobalLiteralsCount]; struct GlobalPointerSpec GlobalPointerValues[CDacBlobGlobalPointersCount]; + struct GlobalStringSpec GlobalStringValues[CDacBlobGlobalStringsCount]; uint8_t NamesPool[sizeof(struct CDacStringPoolSizes)]; uint8_t EndMagic[4]; }; @@ -240,16 +271,19 @@ struct MagicAndBlob BlobDataDescriptor = { /* .FieldsPoolStart = */ offsetof(struct BinaryBlobDataDescriptor, FieldsPool), /* .GlobalLiteralValuesStart = */ offsetof(struct BinaryBlobDataDescriptor, GlobalLiteralValues), /* .GlobalPointersStart = */ offsetof(struct BinaryBlobDataDescriptor, GlobalPointerValues), + /* .GlobalStringValuesStart = */ offsetof(struct BinaryBlobDataDescriptor, GlobalStringValues), /* .NamesPoolStart = */ offsetof(struct BinaryBlobDataDescriptor, NamesPool), /* .TypeCount = */ CDacBlobTypesCount, /* .FieldsPoolCount = */ CDacBlobFieldsPoolCount, /* .GlobalLiteralValuesCount = */ CDacBlobGlobalLiteralsCount, /* .GlobalPointerValuesCount = */ CDacBlobGlobalPointersCount, + /* .GlobalStringValuesCount = */ CDacBlobGlobalStringsCount, /* .NamesPoolCount = */ sizeof(struct CDacStringPoolSizes), /* .TypeSpecSize = */ sizeof(struct TypeSpec), /* .FieldSpecSize = */ sizeof(struct FieldSpec), /* .GlobalLiteralSpecSize = */ sizeof(struct GlobalLiteralSpec), /* .GlobalPointerSpecSize = */ sizeof(struct GlobalPointerSpec), + /* .GlobalStringSpecSize = */ sizeof(struct GlobalStringSpec) }, /* .PlatformFlags = */ (sizeof(void*) == 4 ? 0x02 : 0) | 0x01, /* .BaselineName = */ offsetof(struct CDacStringPoolSizes, cdac_string_pool_baseline_), @@ -285,10 +319,16 @@ struct MagicAndBlob BlobDataDescriptor = { #include "datadescriptor.h" }, + /* .GlobalStringValues = */ { +#define CDAC_GLOBAL_STRING(name,value) { /* .Name = */ GET_GLOBAL_NAME(name), /* .Value = */ GET_GLOBALSTRING_VALUE(name) }, +#include "datadescriptor.h" + }, + /* .NamesPool = */ ("\0" // starts with a nul #define CDAC_BASELINE(name) name "\0" #define CDAC_TYPE_BEGIN(name) #name "\0" #define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) #membername "\0" #membertyname "\0" +#define CDAC_GLOBAL_STRING(name,value) #name "\0" STRINGIFY(value) "\0" #define CDAC_GLOBAL_POINTER(name,value) #name "\0" #define CDAC_GLOBAL(name,tyname,value) #name "\0" #tyname "\0" #include "datadescriptor.h" diff --git a/src/coreclr/debug/runtimeinfo/datadescriptor.h b/src/coreclr/debug/runtimeinfo/datadescriptor.h index e58d91bd6cab..aab39b5678ad 100644 --- a/src/coreclr/debug/runtimeinfo/datadescriptor.h +++ b/src/coreclr/debug/runtimeinfo/datadescriptor.h @@ -98,6 +98,9 @@ #ifndef CDAC_GLOBAL_POINTER #define CDAC_GLOBAL_POINTER(globalname,addr) #endif +#ifndef CDAC_GLOBAL_STRING +#define CDAC_GLOBAL_STRING(globalname,stringval) +#endif #ifndef CDAC_GLOBALS_END #define CDAC_GLOBALS_END() #endif @@ -165,13 +168,8 @@ CDAC_TYPE_END(Exception) CDAC_TYPE_BEGIN(ExceptionInfo) CDAC_TYPE_INDETERMINATE(ExceptionInfo) -#if FEATURE_EH_FUNCLETS -CDAC_TYPE_FIELD(ExceptionInfo, /*pointer*/, ThrownObject, offsetof(ExceptionTrackerBase, m_hThrowable)) -CDAC_TYPE_FIELD(PreviousNestedInfo, /*pointer*/, PreviousNestedInfo, offsetof(ExceptionTrackerBase, m_pPrevNestedInfo)) -#else CDAC_TYPE_FIELD(ExceptionInfo, /*pointer*/, ThrownObject, offsetof(ExInfo, m_hThrowable)) CDAC_TYPE_FIELD(PreviousNestedInfo, /*pointer*/, PreviousNestedInfo, offsetof(ExInfo, m_pPrevNestedInfo)) -#endif CDAC_TYPE_END(ExceptionInfo) @@ -220,14 +218,15 @@ CDAC_TYPE_END(SyncTableEntry) CDAC_TYPE_BEGIN(Module) CDAC_TYPE_INDETERMINATE(Module) CDAC_TYPE_FIELD(Module, /*pointer*/, Assembly, cdac_data::Assembly) +CDAC_TYPE_FIELD(Module, /*pointer*/, PEAssembly, cdac_data::PEAssembly) CDAC_TYPE_FIELD(Module, /*pointer*/, Base, cdac_data::Base) CDAC_TYPE_FIELD(Module, /*uint32*/, Flags, cdac_data::Flags) CDAC_TYPE_FIELD(Module, /*pointer*/, LoaderAllocator, cdac_data::LoaderAllocator) -CDAC_TYPE_FIELD(Module, /*pointer*/, ThunkHeap, cdac_data::ThunkHeap) CDAC_TYPE_FIELD(Module, /*pointer*/, DynamicMetadata, cdac_data::DynamicMetadata) CDAC_TYPE_FIELD(Module, /*pointer*/, Path, cdac_data::Path) CDAC_TYPE_FIELD(Module, /*pointer*/, FileName, cdac_data::FileName) CDAC_TYPE_FIELD(Module, /*pointer*/, ReadyToRunInfo, cdac_data::ReadyToRunInfo) +CDAC_TYPE_FIELD(Module, /*pointer*/, GrowableSymbolStream, cdac_data::GrowableSymbolStream) CDAC_TYPE_FIELD(Module, /*pointer*/, FieldDefToDescMap, cdac_data::FieldDefToDescMap) CDAC_TYPE_FIELD(Module, /*pointer*/, ManifestModuleReferencesMap, cdac_data::ManifestModuleReferencesMap) @@ -250,8 +249,64 @@ CDAC_TYPE_INDETERMINATE(Assembly) #ifdef FEATURE_COLLECTIBLE_TYPES CDAC_TYPE_FIELD(Assembly, /*uint8*/, IsCollectible, cdac_data::IsCollectible) #endif +CDAC_TYPE_FIELD(Assembly, /*pointer*/, Module, cdac_data::Module) +CDAC_TYPE_FIELD(Assembly, /*pointer*/, Error, cdac_data::Error) +CDAC_TYPE_FIELD(Assembly, /*uint32*/, NotifyFlags, cdac_data::NotifyFlags) +CDAC_TYPE_FIELD(Assembly, /*uint32*/, Level, cdac_data::Level) CDAC_TYPE_END(Assembly) +CDAC_TYPE_BEGIN(LoaderAllocator) +CDAC_TYPE_INDETERMINATE(LoaderAllocator) +CDAC_TYPE_FIELD(LoaderAllocator, /*uint32*/, ReferenceCount, cdac_data::ReferenceCount) +CDAC_TYPE_END(LoaderAllocator) + +CDAC_TYPE_BEGIN(PEAssembly) +CDAC_TYPE_INDETERMINATE(PEAssembly) +CDAC_TYPE_FIELD(PEAssembly, /*pointer*/, PEImage, cdac_data::PEImage) +CDAC_TYPE_END(PEAssembly) + +CDAC_TYPE_BEGIN(PEImage) +CDAC_TYPE_INDETERMINATE(PEImage) +CDAC_TYPE_FIELD(PEImage, /*pointer*/, LoadedImageLayout, cdac_data::LoadedImageLayout) +CDAC_TYPE_FIELD(PEImage, /*ProbeExtensionResult*/, ProbeExtensionResult, cdac_data::ProbeExtensionResult) +CDAC_TYPE_END(PEImage) + +CDAC_TYPE_BEGIN(PEImageLayout) +CDAC_TYPE_FIELD(PEImageLayout, /*pointer*/, Base, cdac_data::Base) +CDAC_TYPE_FIELD(PEImageLayout, /*uint32*/, Size, cdac_data::Size) +CDAC_TYPE_FIELD(PEImageLayout, /*uint32*/, Flags, cdac_data::Flags) +CDAC_TYPE_END(PEImageLayout) + +CDAC_TYPE_BEGIN(CGrowableSymbolStream) +CDAC_TYPE_INDETERMINATE(CGrowableSymbolStream) +CDAC_TYPE_FIELD(CGrowableSymbolStream, /*pointer*/, Buffer, cdac_data::Buffer) +CDAC_TYPE_FIELD(CGrowableSymbolStream, /*uint32*/, Size, cdac_data::Size) +CDAC_TYPE_END(CGrowableSymbolStream) + +CDAC_TYPE_BEGIN(ProbeExtensionResult) +CDAC_TYPE_INDETERMINATE(ProbeExtensionResult) +CDAC_TYPE_FIELD(ProbeExtensionResult, /*int32*/, Type, offsetof(ProbeExtensionResult, Type)) +CDAC_TYPE_END(ProbeExtensionResult) + +CDAC_TYPE_BEGIN(AppDomain) +CDAC_TYPE_INDETERMINATE(AppDomain) +CDAC_TYPE_FIELD(AppDomain, /*pointer*/, RootAssembly, cdac_data::RootAssembly) +CDAC_TYPE_FIELD(AppDomain, /*DomainAssemblyList*/, DomainAssemblyList, cdac_data::DomainAssemblyList) +CDAC_TYPE_END(AppDomain) + +CDAC_TYPE_BEGIN(ArrayListBase) +CDAC_TYPE_INDETERMINATE(ArrayListBase) +CDAC_TYPE_FIELD(ArrayListBase, /*uint32*/, Count, cdac_data::Count) +CDAC_TYPE_FIELD(ArrayListBase, /*pointer*/, FirstBlock, cdac_data::FirstBlock) +CDAC_TYPE_END(ArrayListBase) + +CDAC_TYPE_BEGIN(ArrayListBlock) +CDAC_TYPE_INDETERMINATE(ArrayListBlock) +CDAC_TYPE_FIELD(ArrayListBlock, /*pointer*/, Next, cdac_data::Next) +CDAC_TYPE_FIELD(ArrayListBlock, /*uint32*/, Size, cdac_data::Size) +CDAC_TYPE_FIELD(ArrayListBlock, /*pointer*/, ArrayStart, cdac_data::ArrayStart) +CDAC_TYPE_END(ArrayListBlock) + // RuntimeTypeSystem CDAC_TYPE_BEGIN(MethodTable) @@ -323,6 +378,58 @@ CDAC_TYPE_FIELD(DynamicMetadata, /*uint32*/, Size, cdac_data::S CDAC_TYPE_FIELD(DynamicMetadata, /*inline byte array*/, Data, cdac_data::Data) CDAC_TYPE_END(DynamicMetadata) +#ifdef STRESS_LOG +CDAC_TYPE_BEGIN(StressLog) +CDAC_TYPE_SIZE(sizeof(StressLog)) +CDAC_TYPE_FIELD(StressLog, /* uint32 */, LoggedFacilities, cdac_offsets::facilitiesToLog) +CDAC_TYPE_FIELD(StressLog, /* uint32 */, Level, cdac_offsets::levelToLog) +CDAC_TYPE_FIELD(StressLog, /* uint32 */, MaxSizePerThread, cdac_offsets::MaxSizePerThread) +CDAC_TYPE_FIELD(StressLog, /* uint32 */, MaxSizeTotal, cdac_offsets::MaxSizeTotal) +CDAC_TYPE_FIELD(StressLog, /* uint32 */, TotalChunks, cdac_offsets::totalChunk) +CDAC_TYPE_FIELD(StressLog, /* pointer */, Logs, cdac_offsets::logs) +CDAC_TYPE_FIELD(StressLog, /* uint64 */, TickFrequency, cdac_offsets::tickFrequency) +CDAC_TYPE_FIELD(StressLog, /* uint64 */, StartTimestamp, cdac_offsets::startTimeStamp) +CDAC_TYPE_FIELD(StressLog, /* nuint */, ModuleOffset, cdac_offsets::moduleOffset) +CDAC_TYPE_END(StressLog) + +CDAC_TYPE_BEGIN(StressLogModuleDesc) +CDAC_TYPE_SIZE(cdac_offsets::ModuleDesc::type_size) +CDAC_TYPE_FIELD(StressLogModuleDesc, pointer, BaseAddress, cdac_offsets::ModuleDesc::baseAddress) +CDAC_TYPE_FIELD(StressLogModuleDesc, nuint, Size, cdac_offsets::ModuleDesc::size) +CDAC_TYPE_END(StressLogModuleDesc) + +CDAC_TYPE_BEGIN(ThreadStressLog) +CDAC_TYPE_INDETERMINATE(ThreadStressLog) +CDAC_TYPE_FIELD(ThreadStressLog, /* pointer */, Next, cdac_offsets::next) +CDAC_TYPE_FIELD(ThreadStressLog, uint64, ThreadId, cdac_offsets::threadId) +CDAC_TYPE_FIELD(ThreadStressLog, uint8, WriteHasWrapped, cdac_offsets::writeHasWrapped) +CDAC_TYPE_FIELD(ThreadStressLog, pointer, CurrentPtr, cdac_offsets::curPtr) +CDAC_TYPE_FIELD(ThreadStressLog, /* pointer */, ChunkListHead, cdac_offsets::chunkListHead) +CDAC_TYPE_FIELD(ThreadStressLog, /* pointer */, ChunkListTail, cdac_offsets::chunkListTail) +CDAC_TYPE_FIELD(ThreadStressLog, /* pointer */, CurrentWriteChunk, cdac_offsets::curWriteChunk) +CDAC_TYPE_END(ThreadStressLog) + +CDAC_TYPE_BEGIN(StressLogChunk) +CDAC_TYPE_SIZE(sizeof(StressLogChunk)) +CDAC_TYPE_FIELD(StressLogChunk, /* pointer */, Prev, offsetof(StressLogChunk, prev)) +CDAC_TYPE_FIELD(StressLogChunk, /* pointer */, Next, offsetof(StressLogChunk, next)) +CDAC_TYPE_FIELD(StressLogChunk, /* uint8[STRESSLOG_CHUNK_SIZE] */, Buf, offsetof(StressLogChunk, buf)) +CDAC_TYPE_FIELD(StressLogChunk, /* uint32 */, Sig1, offsetof(StressLogChunk, dwSig1)) +CDAC_TYPE_FIELD(StressLogChunk, /* uint32 */, Sig2, offsetof(StressLogChunk, dwSig2)) +CDAC_TYPE_END(StressLogChunk) + +// The StressMsg Header is the fixed size portion of the StressMsg +CDAC_TYPE_BEGIN(StressMsgHeader) +CDAC_TYPE_SIZE(sizeof(StressMsg)) +CDAC_TYPE_END(StressMsgHeader) + +CDAC_TYPE_BEGIN(StressMsg) +CDAC_TYPE_INDETERMINATE(StressMsg) +CDAC_TYPE_FIELD(StressMsg, StressMsgHeader, Header, 0) +CDAC_TYPE_FIELD(StressMsg, /* pointer */, Args, offsetof(StressMsg, args)) +CDAC_TYPE_END(StressMsg) +#endif + CDAC_TYPE_BEGIN(MethodDesc) CDAC_TYPE_SIZE(sizeof(MethodDesc)) CDAC_TYPE_FIELD(MethodDesc, /*uint8*/, ChunkIndex, cdac_data::ChunkIndex) @@ -441,10 +548,17 @@ CDAC_TYPE_END(PlatformMetadata) CDAC_TYPE_BEGIN(StubPrecodeData) CDAC_TYPE_INDETERMINATE(StubPrecodeData) -CDAC_TYPE_FIELD(StubPrecodeData, /*pointer*/, MethodDesc, offsetof(StubPrecodeData, MethodDesc)) +CDAC_TYPE_FIELD(StubPrecodeData, /*pointer*/, SecretParam, offsetof(StubPrecodeData, SecretParam)) CDAC_TYPE_FIELD(StubPrecodeData, /*uint8*/, Type, offsetof(StubPrecodeData, Type)) CDAC_TYPE_END(StubPrecodeData) +#ifdef HAS_THISPTR_RETBUF_PRECODE +CDAC_TYPE_BEGIN(ThisPtrRetBufPrecodeData) +CDAC_TYPE_INDETERMINATE(ThisPtrRetBufPrecodeData) +CDAC_TYPE_FIELD(ThisPtrRetBufPrecodeData, /*pointer*/, MethodDesc, offsetof(ThisPtrRetBufPrecodeData, MethodDesc)) +CDAC_TYPE_END(ThisPtrRetBufPrecodeData) +#endif + CDAC_TYPE_BEGIN(FixupPrecodeData) CDAC_TYPE_INDETERMINATE(FixupPrecodeData) CDAC_TYPE_FIELD(FixupPrecodeData, /*pointer*/, MethodDesc, offsetof(FixupPrecodeData, MethodDesc)) @@ -521,6 +635,10 @@ CDAC_TYPE_END(RangeSection) CDAC_TYPE_BEGIN(RealCodeHeader) CDAC_TYPE_INDETERMINATE(RealCodeHeader) CDAC_TYPE_FIELD(RealCodeHeader, /*pointer*/, MethodDesc, offsetof(RealCodeHeader, phdrMDesc)) +#ifdef FEATURE_EH_FUNCLETS +CDAC_TYPE_FIELD(RealCodeHeader, /*uint32*/, NumUnwindInfos, offsetof(RealCodeHeader, nUnwindInfos)) +CDAC_TYPE_FIELD(RealCodeHeader, /* T_RUNTIME_FUNCTION */, UnwindInfos, offsetof(RealCodeHeader, unwindInfos)) +#endif // FEATURE_EH_FUNCLETS CDAC_TYPE_END(RealCodeHeader) CDAC_TYPE_BEGIN(CodeHeapListNode) @@ -570,13 +688,169 @@ CDAC_TYPE_FIELD(GCCoverageInfo, /*pointer*/, SavedCode, offsetof(GCCoverageInfo, CDAC_TYPE_END(GCCoverageInfo) #endif // HAVE_GCCOVER +CDAC_TYPE_BEGIN(Frame) +CDAC_TYPE_INDETERMINATE(Frame) +CDAC_TYPE_FIELD(Frame, /*pointer*/, Next, cdac_data::Next) +CDAC_TYPE_END(Frame) + +CDAC_TYPE_BEGIN(InlinedCallFrame) +CDAC_TYPE_SIZE(sizeof(InlinedCallFrame)) +CDAC_TYPE_FIELD(InlinedCallFrame, /*pointer*/, CallSiteSP, offsetof(InlinedCallFrame, m_pCallSiteSP)) +CDAC_TYPE_FIELD(InlinedCallFrame, /*pointer*/, CallerReturnAddress, offsetof(InlinedCallFrame, m_pCallerReturnAddress)) +CDAC_TYPE_FIELD(InlinedCallFrame, /*pointer*/, CalleeSavedFP, offsetof(InlinedCallFrame, m_pCalleeSavedFP)) +CDAC_TYPE_END(InlinedCallFrame) + +CDAC_TYPE_BEGIN(SoftwareExceptionFrame) +CDAC_TYPE_SIZE(sizeof(SoftwareExceptionFrame)) +CDAC_TYPE_FIELD(SoftwareExceptionFrame, /*T_CONTEXT*/, TargetContext, cdac_data::TargetContext) +CDAC_TYPE_FIELD(SoftwareExceptionFrame, /*pointer*/, ReturnAddress, cdac_data::ReturnAddress) +CDAC_TYPE_END(SoftwareExceptionFrame) + +CDAC_TYPE_BEGIN(FramedMethodFrame) +CDAC_TYPE_SIZE(sizeof(FramedMethodFrame)) +CDAC_TYPE_FIELD(FramedMethodFrame, /*pointer*/, TransitionBlockPtr, cdac_data::TransitionBlockPtr) +CDAC_TYPE_END(FramedMethodFrame) + +CDAC_TYPE_BEGIN(TransitionBlock) +CDAC_TYPE_SIZE(sizeof(TransitionBlock)) +CDAC_TYPE_FIELD(TransitionBlock, /*pointer*/, ReturnAddress, offsetof(TransitionBlock, m_ReturnAddress)) +CDAC_TYPE_FIELD(TransitionBlock, /*CalleeSavedRegisters*/, CalleeSavedRegisters, offsetof(TransitionBlock, m_calleeSavedRegisters)) +CDAC_TYPE_END(TransitionBlock) + +#ifdef DEBUGGING_SUPPORTED +CDAC_TYPE_BEGIN(FuncEvalFrame) +CDAC_TYPE_SIZE(sizeof(FuncEvalFrame)) +CDAC_TYPE_FIELD(FuncEvalFrame, /*pointer*/, DebuggerEvalPtr, cdac_data::DebuggerEvalPtr) +CDAC_TYPE_END(FuncEvalFrame) + +CDAC_TYPE_BEGIN(DebuggerEval) +CDAC_TYPE_SIZE(sizeof(DebuggerEval)) +CDAC_TYPE_FIELD(DebuggerEval, /*T_CONTEXT*/, TargetContext, offsetof(DebuggerEval, m_context)) +CDAC_TYPE_FIELD(DebuggerEval, /*bool*/, EvalDuringException, offsetof(DebuggerEval, m_evalDuringException)) +CDAC_TYPE_END(DebuggerEval) +#endif // DEBUGGING_SUPPORTED + +#ifdef FEATURE_HIJACK +CDAC_TYPE_BEGIN(ResumableFrame) +CDAC_TYPE_SIZE(sizeof(ResumableFrame)) +CDAC_TYPE_FIELD(ResumableFrame, /*pointer*/, TargetContextPtr, cdac_data::TargetContextPtr) +CDAC_TYPE_END(ResumableFrame) + +CDAC_TYPE_BEGIN(HijackFrame) +CDAC_TYPE_SIZE(sizeof(HijackFrame)) +CDAC_TYPE_FIELD(HijackFrame, /*pointer*/, ReturnAddress, cdac_data::ReturnAddress) +CDAC_TYPE_FIELD(HijackFrame, /*pointer*/, HijackArgsPtr, cdac_data::HijackArgsPtr) +CDAC_TYPE_END(HijackFrame) + +// HijackArgs struct is different on each platform +CDAC_TYPE_BEGIN(HijackArgs) +CDAC_TYPE_SIZE(sizeof(HijackArgs)) +#if defined(TARGET_AMD64) + +CDAC_TYPE_FIELD(HijackArgs, /*CalleeSavedRegisters*/, CalleeSavedRegisters, offsetof(HijackArgs, Regs)) +#ifdef TARGET_WINDOWS +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, Rsp, offsetof(HijackArgs, Rsp)) +#endif // TARGET_WINDOWS + +#elif defined(TARGET_ARM64) + +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X0, offsetof(HijackArgs, X0)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X1, offsetof(HijackArgs, X1)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X19, offsetof(HijackArgs, X19)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X20, offsetof(HijackArgs, X20)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X21, offsetof(HijackArgs, X21)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X22, offsetof(HijackArgs, X22)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X23, offsetof(HijackArgs, X23)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X24, offsetof(HijackArgs, X24)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X25, offsetof(HijackArgs, X25)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X26, offsetof(HijackArgs, X26)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X27, offsetof(HijackArgs, X27)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X28, offsetof(HijackArgs, X28)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, Fp, offsetof(HijackArgs, X29)) +CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, Lr, offsetof(HijackArgs, Lr)) + +#endif // Platform switch +CDAC_TYPE_END(HijackArgs) +#endif // FEATURE_HIJACK + +CDAC_TYPE_BEGIN(FaultingExceptionFrame) +CDAC_TYPE_SIZE(sizeof(FaultingExceptionFrame)) +#ifdef FEATURE_EH_FUNCLETS +CDAC_TYPE_FIELD(FaultingExceptionFrame, /*T_CONTEXT*/, TargetContext, cdac_data::TargetContext) +#endif // FEATURE_EH_FUNCLETS +CDAC_TYPE_END(FaultingExceptionFrame) + +// CalleeSavedRegisters struct is different on each platform +CDAC_TYPE_BEGIN(CalleeSavedRegisters) +CDAC_TYPE_SIZE(sizeof(CalleeSavedRegisters)) +#if defined(TARGET_AMD64) + +#define CALLEE_SAVED_REGISTER(regname) \ + CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, regname, offsetof(CalleeSavedRegisters, regname)) +ENUM_CALLEE_SAVED_REGISTERS() +#undef CALLEE_SAVED_REGISTER + +#elif defined(TARGET_ARM64) + +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X19, offsetof(CalleeSavedRegisters, x19)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X20, offsetof(CalleeSavedRegisters, x20)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X21, offsetof(CalleeSavedRegisters, x21)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X22, offsetof(CalleeSavedRegisters, x22)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X23, offsetof(CalleeSavedRegisters, x23)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X24, offsetof(CalleeSavedRegisters, x24)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X25, offsetof(CalleeSavedRegisters, x25)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X26, offsetof(CalleeSavedRegisters, x26)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X27, offsetof(CalleeSavedRegisters, x27)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X28, offsetof(CalleeSavedRegisters, x28)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, Fp, offsetof(CalleeSavedRegisters, x29)) +CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, Lr, offsetof(CalleeSavedRegisters, x30)) + +#endif // Platform switch +CDAC_TYPE_END(CalleeSavedRegisters) + CDAC_TYPES_END() CDAC_GLOBALS_BEGIN() + +#if defined(TARGET_UNIX) +CDAC_GLOBAL_STRING(OperatingSystem, unix) +#elif defined(TARGET_WINDOWS) +CDAC_GLOBAL_STRING(OperatingSystem, windows) +#else +#error TARGET_{OS} define is not recognized by the cDAC. Update this switch and the enum values in IRuntimeInfo.cs +#endif + +#if defined(TARGET_X86) +CDAC_GLOBAL_STRING(Architecture, x86) +#elif defined(TARGET_AMD64) +CDAC_GLOBAL_STRING(Architecture, x64) +#elif defined(TARGET_ARM) +CDAC_GLOBAL_STRING(Architecture, arm) +#elif defined(TARGET_ARM64) +CDAC_GLOBAL_STRING(Architecture, arm64) +#elif defined(TARGET_LOONGARCH64) +CDAC_GLOBAL_STRING(Architecture, loongarch64) +#elif defined(TARGET_RISCV64) +CDAC_GLOBAL_STRING(Architecture, riscv64) +#else +#error TARGET_{ARCH} define is not recognized by the cDAC. Update this switch and the enum values in IRuntimeInfo.cs +#endif + +CDAC_GLOBAL_STRING(RID, RID_STRING) + CDAC_GLOBAL_POINTER(AppDomain, &AppDomain::m_pTheAppDomain) +CDAC_GLOBAL_POINTER(SystemDomain, cdac_data::SystemDomain) CDAC_GLOBAL_POINTER(ThreadStore, &ThreadStore::s_pThreadStore) CDAC_GLOBAL_POINTER(FinalizerThread, &::g_pFinalizerThread) CDAC_GLOBAL_POINTER(GCThread, &::g_pSuspensionThread) + +// Add FrameIdentifier for all defined Frame types. Used to differentiate Frame objects. +#define FRAME_TYPE_NAME(frameType) \ + CDAC_GLOBAL(frameType##Identifier, nuint, (uint64_t)FrameIdentifier::frameType) + + #include "frames.h" +#undef FRAME_TYPE_NAME + CDAC_GLOBAL(MethodDescTokenRemainderBitCount, uint8, METHOD_TOKEN_REMAINDER_BIT_COUNT) #if FEATURE_EH_FUNCLETS CDAC_GLOBAL(FeatureEHFunclets, uint8, 1) @@ -611,6 +885,18 @@ CDAC_GLOBAL_POINTER(StringMethodTable, &::g_pStringClass) CDAC_GLOBAL_POINTER(SyncTableEntries, &::g_pSyncTable) CDAC_GLOBAL_POINTER(MiniMetaDataBuffAddress, &::g_MiniMetaDataBuffAddress) CDAC_GLOBAL_POINTER(MiniMetaDataBuffMaxSize, &::g_MiniMetaDataBuffMaxSize) +#ifdef STRESS_LOG +CDAC_GLOBAL(StressLogEnabled, uint8, 1) +CDAC_GLOBAL_POINTER(StressLog, &g_pStressLog) +CDAC_GLOBAL(StressLogHasModuleTable, uint8, 1) +CDAC_GLOBAL_POINTER(StressLogModuleTable, &g_pStressLog->modules) +CDAC_GLOBAL(StressLogMaxModules, uint64, cdac_offsets::MAX_MODULES) +CDAC_GLOBAL(StressLogChunkSize, uint32, STRESSLOG_CHUNK_SIZE) +CDAC_GLOBAL(StressLogValidChunkSig, uint32, StressLogChunk::ValidChunkSig) +CDAC_GLOBAL(StressLogMaxMessageSize, uint64, (uint64_t)StressMsg::maxMsgSize) +#else +CDAC_GLOBAL(StressLogEnabled, uint8, 0) +#endif CDAC_GLOBAL_POINTER(ExecutionManagerCodeRangeMapAddress, cdac_data::CodeRangeMapAddress) CDAC_GLOBAL_POINTER(PlatformMetadata, &::g_cdacPlatformMetadata) CDAC_GLOBAL_POINTER(ProfilerControlBlock, &::g_profControlBlock) @@ -627,4 +913,5 @@ CDAC_GLOBALS_END() #undef CDAC_GLOBALS_BEGIN #undef CDAC_GLOBAL #undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL_STRING #undef CDAC_GLOBALS_END diff --git a/src/coreclr/debug/shared/dbgtransportsession.cpp b/src/coreclr/debug/shared/dbgtransportsession.cpp index 596794cb9b1a..f2547ffc8aef 100644 --- a/src/coreclr/debug/shared/dbgtransportsession.cpp +++ b/src/coreclr/debug/shared/dbgtransportsession.cpp @@ -80,7 +80,7 @@ HRESULT DbgTransportSession::Init(DebuggerIPCControlBlock *pDCB, AppDomainEnumer // the two way pipe; it expects the in/out handles to be -1 instead of 0. m_ref = 1; m_pipe = TwoWayPipe(); - m_sStateLock = DbgTransportLock(); + m_sStateLock = {}; // Initialize all per-session state variables. InitSessionState(); @@ -187,7 +187,7 @@ void DbgTransportSession::Shutdown() // Must take the state lock to make a state transition. { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); // Remember previous state and transition to SS_Closed. SessionState ePreviousState = m_eState; @@ -271,7 +271,7 @@ bool DbgTransportSession::WaitForSessionToOpen(DWORD dwTimeout) bool DbgTransportSession::UseAsDebugger(DebugTicket * pTicket) { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); if (m_fDebuggerAttached) { if (pTicket->IsValid()) @@ -309,7 +309,7 @@ bool DbgTransportSession::UseAsDebugger(DebugTicket * pTicket) bool DbgTransportSession::StopUsingAsDebugger(DebugTicket * pTicket) { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); if (m_fDebuggerAttached && pTicket->IsValid()) { // The caller is indeed the owner of the debug ticket. @@ -365,7 +365,7 @@ void DbgTransportSession::GetNextEvent(DebuggerIPCEvent *pEvent, DWORD cbEvent) // Must acquire the state lock to synchronize us wrt to the transport thread (clients already guarantee // they serialize calls to this and waiting on m_rghEventReadyEvent). - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); // There must be at least one valid event waiting (this call does not block). _ASSERTE(m_cValidEventBuffers); @@ -526,16 +526,6 @@ HRESULT DbgTransportSession::WriteMemory(PBYTE pbRemoteAddress, PBYTE pbBuffer, return sMessage.m_sHeader.TypeSpecificData.MemoryAccess.m_hrResult; } -HRESULT DbgTransportSession::VirtualUnwind(DWORD threadId, ULONG32 contextSize, PBYTE context) -{ - DbgTransportLog(LC_Requests, "Sending 'VirtualUnwind'"); - DBG_TRANSPORT_INC_STAT(SentVirtualUnwind); - - Message sMessage; - sMessage.Init(MT_VirtualUnwind, context, contextSize, context, contextSize); - return SendRequestMessageAndWait(&sMessage); -} - // Read and write the debugger control block on the LS from the RS. HRESULT DbgTransportSession::GetDCB(DebuggerIPCControlBlock *pDCB) { @@ -607,7 +597,7 @@ HRESULT DbgTransportSession::SendMessage(Message *pMessage, bool fWaitsForReply) // and while determining whether to send immediately or not depending on the session state (to avoid // posting a send on a closed and possibly recycled socket). { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); // Perform any last updates to the header or data block here since we might be about to encrypt them. @@ -939,7 +929,7 @@ void DbgTransportSession::HandleNetworkError(bool fCallerHoldsStateLock) void DbgTransportSession::FlushSendQueue(DWORD dwLastProcessedId) { // Must access the send queue under the state lock. - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); // Note that message headers (and data blocks) may be encrypted. Use the cached fields in the Message // structure to compare message IDs and types. @@ -957,7 +947,6 @@ void DbgTransportSession::FlushSendQueue(DWORD dwLastProcessedId) MessageType eType = pMsg->m_sHeader.m_eType; if (eType != MT_ReadMemory && eType != MT_WriteMemory && - eType != MT_VirtualUnwind && eType != MT_GetDCB && eType != MT_SetDCB && eType != MT_GetAppDomainCB) @@ -1030,7 +1019,7 @@ bool DbgTransportSession::ProcessReply(MessageHeader *pHeader) // we don't need to put it on the queue in order (it will never be resent). Easiest just to put it // on the head. { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); pMsg->m_pNext = m_pSendQueueFirst; m_pSendQueueFirst = pMsg; if (m_pSendQueueLast == NULL) @@ -1101,7 +1090,7 @@ DbgTransportSession::Message * DbgTransportSession::RemoveMessageFromSendQueue(D // Locate original message on the send queue. Message *pMsg = NULL; { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); pMsg = m_pSendQueueFirst; Message *pLastMsg = NULL; @@ -1251,10 +1240,6 @@ DWORD WINAPI DbgTransportSession::TransportWorkerStatic(LPVOID pvContext) goto Shutdown; \ } while (false) -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif void DbgTransportSession::TransportWorker() { _ASSERTE(m_eState == SS_Opening_NC); @@ -1344,7 +1329,7 @@ void DbgTransportSession::TransportWorker() // blocked on a Receive() on the newly formed connection (important if they want to transition the state // to SS_Closed). { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); if (m_eState == SS_Closed) break; @@ -1485,7 +1470,7 @@ void DbgTransportSession::TransportWorker() // Must access the send queue under the state lock. { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); Message *pMsg = m_pSendQueueFirst; while (pMsg) { @@ -1504,7 +1489,7 @@ void DbgTransportSession::TransportWorker() // Finally we can transition to SS_Open. { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); if (m_eState == SS_Closed) break; else if (m_eState == SS_Opening) @@ -1621,7 +1606,7 @@ void DbgTransportSession::TransportWorker() // Must access the send queue under the state lock. { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); Message *pMsg = m_pSendQueueFirst; while (pMsg) @@ -1770,7 +1755,7 @@ void DbgTransportSession::TransportWorker() // We need to do some state cleanup here, since when we reform a connection (if ever, it will // be with a new session). { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); // Check we're still in a good state before a clean restart. if (m_eState != SS_Open) @@ -1819,7 +1804,7 @@ void DbgTransportSession::TransportWorker() // that can expand the array, a client thread may be in GetNextEvent() reading from the // old version. { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); // When we copy old array contents over we place the head of the list at the start of // the new array for simplicity. If the head happened to be at the start of the old @@ -1872,7 +1857,7 @@ void DbgTransportSession::TransportWorker() // We must take the lock to update the count of valid entries though, since clients can // touch this field as well. - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); m_cValidEventBuffers++; DWORD idxCurrentEvent = m_idxEventBufferTail; @@ -1957,33 +1942,6 @@ void DbgTransportSession::TransportWorker() #endif // RIGHT_SIDE_COMPILE break; - case MT_VirtualUnwind: -#ifdef RIGHT_SIDE_COMPILE - if (!ProcessReply(&sReceiveHeader)) - HANDLE_TRANSIENT_ERROR(); -#else // RIGHT_SIDE_COMPILE - if (sReceiveHeader.m_cbDataBlock != (DWORD)sizeof(frameContext)) - { - _ASSERTE(!"Inconsistent VirtualUnwind request"); - HANDLE_CRITICAL_ERROR(); - } - - if (!ReceiveBlock((PBYTE)&frameContext, sizeof(frameContext))) - { - HANDLE_TRANSIENT_ERROR(); - } - - if (!PAL_VirtualUnwind(&frameContext, NULL)) - { - HANDLE_TRANSIENT_ERROR(); - } - - fReplyRequired = true; - pbOptReplyData = (PBYTE)&frameContext; - cbOptReplyData = sizeof(frameContext); -#endif // RIGHT_SIDE_COMPILE - break; - case MT_GetDCB: #ifdef RIGHT_SIDE_COMPILE if (!ProcessReply(&sReceiveHeader)) @@ -2095,7 +2053,7 @@ void DbgTransportSession::TransportWorker() // Drain any remaining entries in the send queue (aborting them when they need completions). { - TransportLockHolder sLockHolder(&m_sStateLock); + TransportLockHolder sLockHolder(m_sStateLock); Message *pMsg; while ((pMsg = m_pSendQueueFirst) != NULL) @@ -2129,7 +2087,6 @@ void DbgTransportSession::TransportWorker() #ifdef RIGHT_SIDE_COMPILE case MT_ReadMemory: case MT_WriteMemory: - case MT_VirtualUnwind: case MT_GetDCB: case MT_SetDCB: case MT_GetAppDomainCB: @@ -2139,7 +2096,6 @@ void DbgTransportSession::TransportWorker() #else // RIGHT_SIDE_COMPILE case MT_ReadMemory: case MT_WriteMemory: - case MT_VirtualUnwind: case MT_GetDCB: case MT_SetDCB: case MT_GetAppDomainCB: @@ -2518,9 +2474,6 @@ DWORD DbgTransportSession::GetEventSize(DebuggerIPCEvent *pEvent) return cbBaseSize + cbAdditionalSize; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif #ifdef _DEBUG // Debug helper which returns the name associated with a MessageType. @@ -2544,8 +2497,6 @@ const char *DbgTransportSession::MessageName(MessageType eType) return "ReadMemory"; case MT_WriteMemory: return "WriteMemory"; - case MT_VirtualUnwind: - return "VirtualUnwind"; case MT_GetDCB: return "GetDCB"; case MT_SetDCB: @@ -2602,10 +2553,6 @@ void DbgTransportSession::DbgTransportLogMessageReceived(MessageHeader *pHeader) (DWORD)pHeader->TypeSpecificData.MemoryAccess.m_cbLeftSideBuffer); DBG_TRANSPORT_INC_STAT(ReceivedWriteMemory); return; - case MT_VirtualUnwind: - DbgTransportLog(LC_Requests, "Received 'VirtualUnwind' reply"); - DBG_TRANSPORT_INC_STAT(ReceivedVirtualUnwind); - return; case MT_GetDCB: DbgTransportLog(LC_Requests, "Received 'GetDCB' reply"); DBG_TRANSPORT_INC_STAT(ReceivedGetDCB); @@ -2631,10 +2578,6 @@ void DbgTransportSession::DbgTransportLogMessageReceived(MessageHeader *pHeader) (DWORD)pHeader->TypeSpecificData.MemoryAccess.m_cbLeftSideBuffer); DBG_TRANSPORT_INC_STAT(ReceivedWriteMemory); return; - case MT_VirtualUnwind: - DbgTransportLog(LC_Requests, "Received 'VirtualUnwind'"); - DBG_TRANSPORT_INC_STAT(ReceivedVirtualUnwind); - return; case MT_GetDCB: DbgTransportLog(LC_Requests, "Received 'GetDCB'"); DBG_TRANSPORT_INC_STAT(ReceivedGetDCB); @@ -2717,27 +2660,28 @@ bool DbgTransportSession::DbgTransportShouldInjectFault(DbgTransportFaultOp eOp, // Lock abstraction code (hides difference in lock implementation between left and right side). #ifdef RIGHT_SIDE_COMPILE -// On the right side we use a CRITICAL_SECTION. +// On the right side we use a minipal_mutex. void DbgTransportLock::Init() { - InitializeCriticalSection(&m_sLock); + minipal_mutex_init(&m_sLock); } void DbgTransportLock::Destroy() { - DeleteCriticalSection(&m_sLock); + minipal_mutex_destroy(&m_sLock); } void DbgTransportLock::Enter() { - EnterCriticalSection(&m_sLock); + minipal_mutex_enter(&m_sLock); } void DbgTransportLock::Leave() { - LeaveCriticalSection(&m_sLock); + minipal_mutex_leave(&m_sLock); } + #else // RIGHT_SIDE_COMPILE // On the left side we use a Crst. diff --git a/src/coreclr/dlls/CMakeLists.txt b/src/coreclr/dlls/CMakeLists.txt index 18f01e35331f..0a2ab14d8db8 100644 --- a/src/coreclr/dlls/CMakeLists.txt +++ b/src/coreclr/dlls/CMakeLists.txt @@ -1,10 +1,14 @@ -if(CLR_CMAKE_TARGET_WIN32) +if(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE) add_subdirectory(clretwrc) -endif(CLR_CMAKE_TARGET_WIN32) +endif(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE) if (NOT (CLR_CMAKE_TARGET_WIN32 AND FEATURE_CROSSBITNESS)) - add_subdirectory(mscordbi) - add_subdirectory(mscordac) + if (NOT CLR_CMAKE_TARGET_ARCH_WASM) + add_subdirectory(mscordbi) + add_subdirectory(mscordac) + endif() add_subdirectory(mscoree) endif() -add_subdirectory(mscorpe) +if (NOT CLR_CMAKE_TARGET_ARCH_WASM) + add_subdirectory(mscorpe) +endif() add_subdirectory(mscorrc) diff --git a/src/coreclr/dlls/mscordac/CMakeLists.txt b/src/coreclr/dlls/mscordac/CMakeLists.txt index ed7d7f03f952..71b69336e2ee 100644 --- a/src/coreclr/dlls/mscordac/CMakeLists.txt +++ b/src/coreclr/dlls/mscordac/CMakeLists.txt @@ -45,7 +45,11 @@ else(CLR_CMAKE_HOST_WIN32) # Generate DAC export file with the DAC_ prefix generate_exports_file_prefix(${DEF_SOURCES} ${EXPORTS_FILE} DAC_) - set(REDEFINES_FILE_SCRIPT ${CMAKE_SOURCE_DIR}/generateredefinesfile.sh) + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + set(REDEFINES_FILE_SCRIPT ${CMAKE_SOURCE_DIR}/generateredefinesfile.ps1) + else() + set(REDEFINES_FILE_SCRIPT ${CMAKE_SOURCE_DIR}/generateredefinesfile.sh) + endif() if (CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_LOONGARCH64) set(JUMP_INSTRUCTION b) @@ -57,23 +61,46 @@ else(CLR_CMAKE_HOST_WIN32) # Generate the palredefines.inc file to map from the imported prefixed APIs (foo to DAC_foo) set(PAL_REDEFINES_INC ${GENERATED_INCLUDE_DIR}/palredefines.inc) - add_custom_command( - OUTPUT ${PAL_REDEFINES_INC} - COMMAND ${REDEFINES_FILE_SCRIPT} ${DEF_SOURCES} ${JUMP_INSTRUCTION} "" DAC_ > ${PAL_REDEFINES_INC} - DEPENDS ${DEF_SOURCES} ${REDEFINES_FILE_SCRIPT} - COMMENT "Generating PAL redefines file -> ${PAL_REDEFINES_INC}" - VERBATIM - ) + + # Win32 may be false when cross compiling + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + add_custom_command( + OUTPUT ${PAL_REDEFINES_INC} + COMMAND powershell -NoProfile -ExecutionPolicy ByPass -File \"${REDEFINES_FILE_SCRIPT}\" -filename \"${DEF_SOURCES}\" -jump ${JUMP_INSTRUCTION} -prefix1 \"\" -prefix2 \"DAC_\" > ${PAL_REDEFINES_INC} + DEPENDS ${DEF_SOURCES} ${REDEFINES_FILE_SCRIPT} + COMMENT "Generating PAL redefines file -> ${PAL_REDEFINES_INC}" + ) + else() + add_custom_command( + OUTPUT ${PAL_REDEFINES_INC} + COMMAND ${REDEFINES_FILE_SCRIPT} ${DEF_SOURCES} ${JUMP_INSTRUCTION} "" DAC_ > ${PAL_REDEFINES_INC} + DEPENDS ${DEF_SOURCES} ${REDEFINES_FILE_SCRIPT} + COMMENT "Generating PAL redefines file -> ${PAL_REDEFINES_INC}" + VERBATIM + ) + endif() add_custom_target(pal_redefines_file DEPENDS ${PAL_REDEFINES_INC}) # Generate the libredefines.inc file for the DAC to export the prefixed APIs (DAC_foo to foo) set(LIB_REDEFINES_INC ${GENERATED_INCLUDE_DIR}/libredefines.inc) - add_custom_command( - OUTPUT ${LIB_REDEFINES_INC} - COMMAND ${REDEFINES_FILE_SCRIPT} ${DEF_SOURCES} ${JUMP_INSTRUCTION} DAC_ > ${LIB_REDEFINES_INC} - DEPENDS ${DEF_SOURCES} ${REDEFINES_FILE_SCRIPT} - COMMENT "Generating DAC export redefines file -> ${LIB_REDEFINES_INC}" - ) + + # Win32 may be false when cross compiling + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + add_custom_command( + OUTPUT ${LIB_REDEFINES_INC} + COMMAND powershell -NoProfile -ExecutionPolicy ByPass -File \"${REDEFINES_FILE_SCRIPT}\" -filename \"${DEF_SOURCES}\" -jump ${JUMP_INSTRUCTION} -prefix1 \"DAC_\" > ${LIB_REDEFINES_INC} + DEPENDS ${DEF_SOURCES} ${REDEFINES_FILE_SCRIPT} + COMMENT "Generating DAC export redefines file -> ${LIB_REDEFINES_INC}" + ) + else() + add_custom_command( + OUTPUT ${LIB_REDEFINES_INC} + COMMAND ${REDEFINES_FILE_SCRIPT} ${DEF_SOURCES} ${JUMP_INSTRUCTION} DAC_ > ${LIB_REDEFINES_INC} + DEPENDS ${DEF_SOURCES} ${REDEFINES_FILE_SCRIPT} + COMMENT "Generating DAC export redefines file -> ${LIB_REDEFINES_INC}" + VERBATIM + ) + endif() add_custom_target(lib_redefines_inc DEPENDS ${LIB_REDEFINES_INC}) # Add lib redefines file to DAC @@ -89,10 +116,6 @@ else(CLR_CMAKE_HOST_WIN32) # ensure proper resolving of circular references between a subset of the libraries. set(START_LIBRARY_GROUP -Wl,--start-group) set(END_LIBRARY_GROUP -Wl,--end-group) - - # These options are used to force every object to be included even if it's unused. - set(START_WHOLE_ARCHIVE -Wl,--whole-archive) - set(END_WHOLE_ARCHIVE -Wl,--no-whole-archive) endif(CLR_CMAKE_HOST_LINUX OR CLR_CMAKE_HOST_FREEBSD OR CLR_CMAKE_HOST_NETBSD OR CLR_CMAKE_HOST_SUNOS OR CLR_CMAKE_HOST_HAIKU) set_exports_linker_option(${EXPORTS_FILE}) @@ -123,10 +146,8 @@ set(COREDAC_LIBRARIES ${START_LIBRARY_GROUP} # Start group of libraries that have circular references cee_dac cordbee_dac - ${START_WHOLE_ARCHIVE} # force all exports to be available corguids daccess - ${END_WHOLE_ARCHIVE} dbgutil mdcompiler_dac mdruntime_dac @@ -173,10 +194,8 @@ if(CLR_CMAKE_HOST_WIN32) else(CLR_CMAKE_HOST_WIN32) list(APPEND COREDAC_LIBRARIES mscorrc - ${START_WHOLE_ARCHIVE} # force all PAL objects to be included so all exports are available coreclrpal coreclrminipal - ${END_WHOLE_ARCHIVE} ) endif(CLR_CMAKE_HOST_WIN32) diff --git a/src/coreclr/dlls/mscordac/mscordac.cpp b/src/coreclr/dlls/mscordac/mscordac.cpp index 53616c226961..eb27e1d50c45 100644 --- a/src/coreclr/dlls/mscordac/mscordac.cpp +++ b/src/coreclr/dlls/mscordac/mscordac.cpp @@ -1,2 +1,17 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + +#include + +EXTERN_C BOOL WINAPI DllMain2(HANDLE instance, DWORD reason, LPVOID reserved); + +// This is a workaround for missing exports on Linux. Defining DllMain forwarder here makes Linux linker export DllMain and other +// methods built under debug/daccess in the final binary. +EXTERN_C +#ifdef HOST_UNIX +DLLEXPORT // For Win32 PAL LoadLibrary emulation +#endif +BOOL WINAPI DllMain(HANDLE instance, DWORD reason, LPVOID reserved) +{ + return DllMain2(instance, reason, reserved); +} diff --git a/src/coreclr/dlls/mscordac/mscordac_unixexports.src b/src/coreclr/dlls/mscordac/mscordac_unixexports.src index 0857ba2884f7..0b53a46fbb72 100644 --- a/src/coreclr/dlls/mscordac/mscordac_unixexports.src +++ b/src/coreclr/dlls/mscordac/mscordac_unixexports.src @@ -61,7 +61,6 @@ nativeStringResourceTable_mscorrc #memcpy_s #sscanf_s -#CopyFileW #CreateFileMappingW #CreateFileA #CreateFileW @@ -74,24 +73,19 @@ nativeStringResourceTable_mscorrc #CreateThread #CloseHandle #DebugBreak -#DeleteCriticalSection #DuplicateHandle -#EnterCriticalSection #FlushFileBuffers #FlushInstructionCache #FormatMessageW #FreeEnvironmentStringsW #FreeLibrary #FileTimeToSystemTime -#GetACP #GetCurrentProcess #GetCurrentProcessId #GetCurrentThreadId #GetEnvironmentStringsW #GetEnvironmentVariableA #GetEnvironmentVariableW -#GetFileAttributesExW -#GetFileAttributesW #GetFileSize #GetFullPathNameW #GetLastError @@ -101,11 +95,8 @@ nativeStringResourceTable_mscorrc #GetSystemInfo #GetSystemTime #GetSystemTimeAsFileTime -#GetTempFileNameW #GetTempPathA #GetTempPathW -#InitializeCriticalSection -#LeaveCriticalSection #LoadLibraryExA #LoadLibraryExW #MapViewOfFile @@ -115,8 +106,6 @@ nativeStringResourceTable_mscorrc #OutputDebugStringW #OpenEventW #OutputDebugStringA -#QueryPerformanceCounter -#QueryPerformanceFrequency #RaiseException #RaiseFailFastException #ReadFile diff --git a/src/coreclr/dlls/mscordbi/CMakeLists.txt b/src/coreclr/dlls/mscordbi/CMakeLists.txt index 87e566175a25..01f94c737b49 100644 --- a/src/coreclr/dlls/mscordbi/CMakeLists.txt +++ b/src/coreclr/dlls/mscordbi/CMakeLists.txt @@ -75,6 +75,7 @@ set(COREDBI_LIBRARIES mdruntimerw-dbi mddatasource_dbi corguids + minipal ) if(CLR_CMAKE_HOST_WIN32) diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt index 2ecb150aa973..60f6a310451d 100644 --- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt +++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt @@ -50,12 +50,6 @@ else(CLR_CMAKE_HOST_WIN32) set(END_WHOLE_ARCHIVE -Wl,--no-whole-archive) endif(CLR_CMAKE_TARGET_LINUX OR CLR_CMAKE_TARGET_FREEBSD OR CLR_CMAKE_TARGET_NETBSD OR CLR_CMAKE_TARGET_SUNOS OR CLR_CMAKE_HOST_HAIKU) - if(CLR_CMAKE_TARGET_OSX) - # These options are used to force every object to be included even if it's unused. - set(START_WHOLE_ARCHIVE -force_load) - set(END_WHOLE_ARCHIVE ) - endif(CLR_CMAKE_TARGET_OSX) - set_exports_linker_option(${EXPORTS_FILE}) endif (CLR_CMAKE_HOST_WIN32) @@ -98,7 +92,6 @@ set(CORECLR_LIBRARIES ceefgen comfloat_wks corguids - gcinfo utilcode v3binder System.Globalization.Native-Static @@ -132,9 +125,7 @@ if(CLR_CMAKE_TARGET_WIN32) ) else() list(APPEND CORECLR_LIBRARIES - ${START_WHOLE_ARCHIVE} # force all PAL objects to be included so all exports are available coreclrpal - ${END_WHOLE_ARCHIVE} mscorrc ) endif(CLR_CMAKE_TARGET_WIN32) @@ -177,20 +168,33 @@ if(FEATURE_PERFTRACING) endif(CLR_CMAKE_TARGET_LINUX) endif(FEATURE_PERFTRACING) -if(FEATURE_MERGE_JIT_AND_ENGINE) +if(FEATURE_STATICALLY_LINKED) set(CLRJIT_STATIC clrjit_static) -endif(FEATURE_MERGE_JIT_AND_ENGINE) +endif(FEATURE_STATICALLY_LINKED) if (CLR_CMAKE_TARGET_OSX) find_library(FOUNDATION Foundation REQUIRED) endif() -target_sources(coreclr PUBLIC $) -target_link_libraries(coreclr PUBLIC ${CORECLR_LIBRARIES} ${CLRJIT_STATIC} cee_wks ${FOUNDATION}) -target_sources(coreclr_static PUBLIC $) -target_link_libraries(coreclr_static PUBLIC ${CORECLR_LIBRARIES} clrjit_static cee_wks_mergeable ${FOUNDATION}) +target_link_libraries(coreclr PUBLIC ${CORECLR_LIBRARIES} ${CLRJIT_STATIC} cee_wks_core cee_wks ${FOUNDATION}) +target_link_libraries(coreclr_static PUBLIC ${CORECLR_LIBRARIES} cee_wks_core clrjit_static cee_wks_mergeable ${FOUNDATION}) target_compile_definitions(coreclr_static PUBLIC CORECLR_EMBEDDED) +if (CLR_CMAKE_HOST_ANDROID) + target_link_libraries(coreclr PUBLIC log) + target_link_libraries(coreclr_static + PUBLIC + coreclrminipal_objects + coreclrpal_objects + eventprovider_objects + gcinfo + libunwind + log + minipal_objects + nativeresourcestring_objects + ) +endif() + if(CLR_CMAKE_TARGET_WIN32) set(CLRDEBUGINFO_RESOURCE_PATH ${CMAKE_CURRENT_BINARY_DIR}/clr_debug_resource.bin) @@ -224,9 +228,9 @@ endif(CLR_CMAKE_TARGET_WIN32) # add the install targets install_clr(TARGETS coreclr DESTINATIONS . sharedFramework COMPONENT runtime) -if(CLR_CMAKE_HOST_MACCATALYST OR CLR_CMAKE_HOST_IOS) +if(CLR_CMAKE_HOST_MACCATALYST OR CLR_CMAKE_HOST_IOS OR CLR_CMAKE_HOST_TVOS OR CLR_CMAKE_HOST_ANDROID) install_clr(TARGETS coreclr_static DESTINATIONS . sharedFramework COMPONENT runtime) -endif() +endif(CLR_CMAKE_HOST_MACCATALYST OR CLR_CMAKE_HOST_IOS OR CLR_CMAKE_HOST_TVOS OR CLR_CMAKE_HOST_ANDROID) # Enable profile guided optimization add_pgo(coreclr) diff --git a/src/coreclr/dlls/mscoree/exports.cpp b/src/coreclr/dlls/mscoree/exports.cpp index 8c28aa0545a8..1515423445bb 100644 --- a/src/coreclr/dlls/mscoree/exports.cpp +++ b/src/coreclr/dlls/mscoree/exports.cpp @@ -110,9 +110,7 @@ static LPCWSTR* StringArrayToUnicode(int argc, LPCSTR* argv) static void InitializeStartupFlags(STARTUP_FLAGS* startupFlagsRef) { - STARTUP_FLAGS startupFlags = static_cast( - STARTUP_FLAGS::STARTUP_LOADER_OPTIMIZATION_SINGLE_DOMAIN | - STARTUP_FLAGS::STARTUP_SINGLE_APPDOMAIN); + STARTUP_FLAGS startupFlags = static_cast(0); if (Configuration::GetKnobBooleanValue(W("System.GC.Concurrent"), CLRConfig::UNSUPPORTED_gcConcurrent)) { @@ -290,8 +288,6 @@ int coreclr_initialize( hr = CorHost2::CreateObject(IID_ICLRRuntimeHost4, (void**)&host); IfFailRet(hr); - ConstWStringHolder appDomainFriendlyNameW = StringToUnicode(appDomainFriendlyName); - if (bundleProbe != nullptr) { static Bundle bundle(exePath, bundleProbe); @@ -310,9 +306,10 @@ int coreclr_initialize( hr = host->Start(); IfFailRet(hr); + ConstWStringHolder appDomainFriendlyNameW = StringToUnicode(appDomainFriendlyName); hr = host->CreateAppDomainWithManager( appDomainFriendlyNameW, - APPDOMAIN_SECURITY_DEFAULT, + 0, NULL, // Name of the assembly that contains the AppDomainManager implementation NULL, // The AppDomainManager implementation type name propertyCount, diff --git a/src/coreclr/dlls/mscoree/mscoree.cpp b/src/coreclr/dlls/mscoree/mscoree.cpp index fe8ec220cd62..19be0938f527 100644 --- a/src/coreclr/dlls/mscoree/mscoree.cpp +++ b/src/coreclr/dlls/mscoree/mscoree.cpp @@ -6,16 +6,7 @@ #include "stdafx.h" // Standard header. #include // Utility helpers. -#include // Error handlers -#define INIT_GUIDS #include -#include -#include -#include "shimload.h" -#include "metadataexports.h" -#include "ex.h" - -#include #if !defined(CORECLR_EMBEDDED) @@ -45,18 +36,16 @@ BOOL WINAPI DllMain(HANDLE hInstance, DWORD dwReason, LPVOID lpReserved) #endif // !defined(CORECLR_EMBEDDED) -extern void* GetClrModuleBase(); - // --------------------------------------------------------------------------- // %%Function: MetaDataGetDispenser // This function gets the Dispenser interface given the CLSID and REFIID. +// Exported from coreclr and used by external profilers. // --------------------------------------------------------------------------- -STDAPI DLLEXPORT MetaDataGetDispenser( // Return HRESULT +STDAPI DLLEXPORT MetaDataGetDispenser( // Return HRESULT REFCLSID rclsid, // The class to desired. REFIID riid, // Interface wanted on class factory. LPVOID FAR *ppv) // Return interface pointer here. { - CONTRACTL { NOTHROW; GC_NOTRIGGER; @@ -64,217 +53,8 @@ STDAPI DLLEXPORT MetaDataGetDispenser( // Return HRESULT PRECONDITION(CheckPointer(ppv)); } CONTRACTL_END; - NonVMComHolder pcf(NULL); - HRESULT hr; - - IfFailGo(MetaDataDllGetClassObject(rclsid, IID_IClassFactory, (void **) &pcf)); - hr = pcf->CreateInstance(NULL, riid, ppv); - -ErrExit: - return (hr); -} - -// --------------------------------------------------------------------------- -// %%Function: GetMetaDataInternalInterface -// This function gets the IMDInternalImport given the metadata on memory. -// --------------------------------------------------------------------------- -STDAPI DLLEXPORT GetMetaDataInternalInterface( - LPVOID pData, // [IN] in memory metadata section - ULONG cbData, // [IN] size of the metadata section - DWORD flags, // [IN] MDInternal_OpenForRead or MDInternal_OpenForENC - REFIID riid, // [IN] desired interface - void **ppv) // [OUT] returned interface -{ - CONTRACTL{ - NOTHROW; - GC_NOTRIGGER; - ENTRY_POINT; - PRECONDITION(CheckPointer(pData)); - PRECONDITION(CheckPointer(ppv)); - } CONTRACTL_END; + if (rclsid != CLSID_CorMetaDataDispenser) + return CLASS_E_CLASSNOTAVAILABLE; - return GetMDInternalInterface(pData, cbData, flags, riid, ppv); + return CreateMetaDataDispenser(riid, ppv); } - -// --------------------------------------------------------------------------- -// %%Function: GetMetaDataInternalInterfaceFromPublic -// This function gets the internal scopeless interface given the public -// scopeless interface. -// --------------------------------------------------------------------------- -STDAPI DLLEXPORT GetMetaDataInternalInterfaceFromPublic( - IUnknown *pv, // [IN] Given interface. - REFIID riid, // [IN] desired interface - void **ppv) // [OUT] returned interface -{ - CONTRACTL{ - NOTHROW; - GC_NOTRIGGER; - ENTRY_POINT; - PRECONDITION(CheckPointer(pv)); - PRECONDITION(CheckPointer(ppv)); - } CONTRACTL_END; - - return GetMDInternalInterfaceFromPublic(pv, riid, ppv); -} - -// --------------------------------------------------------------------------- -// %%Function: GetMetaDataPublicInterfaceFromInternal -// This function gets the public scopeless interface given the internal -// scopeless interface. -// --------------------------------------------------------------------------- -STDAPI DLLEXPORT GetMetaDataPublicInterfaceFromInternal( - void *pv, // [IN] Given interface. - REFIID riid, // [IN] desired interface. - void **ppv) // [OUT] returned interface -{ - CONTRACTL{ - NOTHROW; - GC_NOTRIGGER; - PRECONDITION(CheckPointer(pv)); - PRECONDITION(CheckPointer(ppv)); - ENTRY_POINT; - } CONTRACTL_END; - - return GetMDPublicInterfaceFromInternal(pv, riid, ppv); -} - - -// --------------------------------------------------------------------------- -// %%Function: ReopenMetaDataWithMemory -// This function gets the public scopeless interface given the internal -// scopeless interface. -// --------------------------------------------------------------------------- -STDAPI ReOpenMetaDataWithMemory( - void *pUnk, // [IN] Given scope. public interfaces - LPCVOID pData, // [in] Location of scope data. - ULONG cbData) // [in] Size of the data pointed to by pData. -{ - CONTRACTL{ - NOTHROW; - GC_NOTRIGGER; - ENTRY_POINT; - PRECONDITION(CheckPointer(pUnk)); - PRECONDITION(CheckPointer(pData)); - } CONTRACTL_END; - - return MDReOpenMetaDataWithMemory(pUnk, pData, cbData); -} - -// --------------------------------------------------------------------------- -// %%Function: ReopenMetaDataWithMemoryEx -// This function gets the public scopeless interface given the internal -// scopeless interface. -// --------------------------------------------------------------------------- -STDAPI ReOpenMetaDataWithMemoryEx( - void *pUnk, // [IN] Given scope. public interfaces - LPCVOID pData, // [in] Location of scope data. - ULONG cbData, // [in] Size of the data pointed to by pData. - DWORD dwReOpenFlags) // [in] ReOpen flags -{ - CONTRACTL{ - NOTHROW; - GC_NOTRIGGER; - ENTRY_POINT; - PRECONDITION(CheckPointer(pUnk)); - PRECONDITION(CheckPointer(pData)); - } CONTRACTL_END; - - return MDReOpenMetaDataWithMemoryEx(pUnk, pData, cbData, dwReOpenFlags); -} - -static DWORD g_dwSystemDirectory = 0; -static WCHAR * g_pSystemDirectory = NULL; - -HRESULT GetInternalSystemDirectory(_Out_writes_to_opt_(*pdwLength,*pdwLength) LPWSTR buffer, __inout DWORD* pdwLength) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - PRECONDITION(CheckPointer(buffer, NULL_OK)); - PRECONDITION(CheckPointer(pdwLength)); - } CONTRACTL_END; - - if (g_dwSystemDirectory == 0) - SetInternalSystemDirectory(); - - // - // g_dwSystemDirectory includes the NULL in its count! - // - if(*pdwLength < g_dwSystemDirectory) - { - *pdwLength = g_dwSystemDirectory; - return HRESULT_FROM_WIN32(ERROR_INSUFFICIENT_BUFFER); - } - - if (buffer != NULL) - { - // - // wcsncpy_s will automatically append a null and g_dwSystemDirectory - // includes the null in its count, so we have to subtract 1. - // - wcsncpy_s(buffer, *pdwLength, g_pSystemDirectory, g_dwSystemDirectory-1); - } - *pdwLength = g_dwSystemDirectory; - return S_OK; -} - - -LPCWSTR GetInternalSystemDirectory(_Out_ DWORD* pdwLength) -{ - LIMITED_METHOD_CONTRACT; - - if (g_dwSystemDirectory == 0) - { - SetInternalSystemDirectory(); - } - - if (pdwLength != NULL) - { - * pdwLength = g_dwSystemDirectory; - } - - return g_pSystemDirectory; -} - - -HRESULT SetInternalSystemDirectory() - { - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - } CONTRACTL_END; - - HRESULT hr = S_OK; - if(g_dwSystemDirectory == 0) { - - DWORD len = 0; - NewArrayHolder pSystemDirectory; - EX_TRY{ - - // use local buffer for thread safety - PathString wzSystemDirectory; - hr = GetClrModuleDirectory(wzSystemDirectory); - - if (FAILED(hr)) { - wzSystemDirectory.Set(W('\0')); - } - - pSystemDirectory = wzSystemDirectory.GetCopyOfUnicodeString(); - if (pSystemDirectory == NULL) - { - hr = HRESULT_FROM_WIN32(ERROR_NOT_ENOUGH_MEMORY); - } - len = wzSystemDirectory.GetCount() + 1; - - } - EX_CATCH_HRESULT(hr); - - // publish results idempotently with correct memory ordering - g_pSystemDirectory = pSystemDirectory.Extract(); - - (void)InterlockedExchange((LONG *)&g_dwSystemDirectory, len); - } - - return hr; -} - diff --git a/src/coreclr/dlls/mscorpe/pewriter.cpp b/src/coreclr/dlls/mscorpe/pewriter.cpp index f22c01473109..4afcdee90e08 100644 --- a/src/coreclr/dlls/mscorpe/pewriter.cpp +++ b/src/coreclr/dlls/mscorpe/pewriter.cpp @@ -290,7 +290,7 @@ HRESULT PEWriterSection::applyRelocs(IMAGE_NT_HEADERS * pNtHeaders, IfFailRet(AddOvf_RVA(curRVA, curOffset)); DWORD UNALIGNED * pos = (DWORD *) m_blobFetcher.ComputePointer(curOffset); - PREFIX_ASSUME(pos != NULL); + _ASSERTE(pos != NULL); #ifdef LOGGING LOG((LF_ZAP, LL_INFO1000000, diff --git a/src/coreclr/dlls/mscorrc/mscorrc.rc b/src/coreclr/dlls/mscorrc/mscorrc.rc index a858eeff5943..e85b88fcdd07 100644 --- a/src/coreclr/dlls/mscorrc/mscorrc.rc +++ b/src/coreclr/dlls/mscorrc/mscorrc.rc @@ -151,6 +151,7 @@ STRINGTABLE DISCARDABLE BEGIN IDS_EE_THREAD_APARTMENT_NOT_SUPPORTED "The system does not support the %1 thread apartment." IDS_EE_NDIRECT_UNSUPPORTED_SIG "Method's type signature is not PInvoke compatible." + IDS_EE_NDIRECT_UNSUPPORTED_UNMANAGEDCALLERSONLY "Method '%1.%2' cannot be marked with both DllImportAttribute and UnmanagedCallersOnlyAttribute." IDS_EE_COM_UNSUPPORTED_SIG "Method's type signature is not Interop compatible." IDS_EE_COM_UNSUPPORTED_TYPE "The method returned a COM Variant type that is not Interop compatible." IDS_EE_MULTIPLE_CALLCONV_UNSUPPORTED "Multiple unmanaged calling conventions are specified. Only a single calling convention is supported." @@ -292,10 +293,6 @@ BEGIN IDS_EE_INTEROP_STUB_CA_NO_ACCESS_TO_STUB_METHOD "The interop method '%1' cannot access the stub method '%2' specified in ManagedToNativeComInteropStubAttribute. Please make sure they have compatible access modifiers and security accessibility." #endif // FEATURE_COMINTEROP -#if defined(FEATURE_COMINTEROP) || defined(FEATURE_COMWRAPPERS) - IDS_EE_NATIVE_COM_WEAKREF_BAD_TYPE "The object resolved by a native IWeakReference has an incompatible type for its managed WeakReference instance.\r\nExpected WeakReference target type: '%1'\r\nNative IWeakReference returned type: '%2'" -#endif // FEATURE_COMINTEROP || FEATURE_COMWRAPPERS - IDS_CLASSLOAD_TYPEWRONGNUMGENERICARGS "The generic type '%1' was used with the wrong number of generic arguments in assembly '%2'." IDS_CLASSLOAD_INVALIDINSTANTIATION "The generic type '%1' was used with an invalid instantiation in assembly '%2'." IDS_CLASSLOAD_VARIANCE_IN_METHOD_ARG "Could not load type '%1' from assembly '%2' because a covariant or contravariant type parameter was used illegally in the signature for an argument in method '%3'." @@ -367,7 +364,6 @@ BEGIN IDS_CLASSLOAD_UNSUPPORTED_DISPATCH "Could not make constrained call to method '%1' on interface '%2' with type '%3' from assembly '%4'. Dispatch to default interface methods is not supported in this situation." IDS_CLASSLOAD_METHOD_NOT_IMPLEMENTED "Could not call method '%1' on type '%2' with an instance of '%3' from assembly '%4' because there is no implementation for the method." - IDS_CLASSLOAD_MISSINGMETHODRVA "Could not load type '%1' from assembly '%2' because the method '%3' has no implementation (no RVA)." IDS_CLASSLOAD_BAD_FIELD "Type '%1' from assembly '%2' has a field of an illegal type." IDS_CLASSLOAD_MI_ILLEGAL_BODY "Body referenced in a method implementation must be defined in the same type. Type: '%1'. Assembly: '%2'." IDS_CLASSLOAD_MI_ILLEGAL_TOKEN_BODY "Body token used in a method implementation is out of range. Type: '%1'. Assembly: '%2'." @@ -418,8 +414,6 @@ BEGIN IDS_EE_SIZECONTROLBADTYPE "Array size control parameter type not supported." IDS_EE_SAFEARRAYSZARRAYMISMATCH "SafeArray cannot be marshaled to this array type because it has either nonzero lower bounds or more than one dimension." - IDS_EE_NEEDS_ASSEMBLY_SPEC "Typename needs an assembly qualifier." - IDS_EE_FILELOAD_ERROR_GENERIC "Could not load file or assembly '%1'. %2" IDS_EE_LOCAL_COGETCLASSOBJECT_FAILED "Retrieving the COM class factory for component with CLSID %2 failed due to the following error: %1 %3." @@ -556,12 +550,6 @@ BEGIN END -// General prompt strings. -STRINGTABLE DISCARDABLE -BEGIN - IDS_RTL "RTL_False" // change this to RTL_True on Arabic/Hebrew system -END - // BAD_FORMAT_ASSERT error messages STRINGTABLE DISCARDABLE BEGIN @@ -595,6 +583,8 @@ BEGIN BFA_BAD_FIELD_TOKEN "Field token out of range." BFA_INVALID_FIELD_ACC_FLAGS "Invalid Field Access Flags." BFA_INVALID_UNSAFEACCESSOR "Invalid usage of UnsafeAccessorAttribute." + BFA_INVALID_UNSAFEACCESSORTYPE "Invalid usage of UnsafeAccessorTypeAttribute." + BFA_INVALID_UNSAFEACCESSORTYPE_VALUETYPE "ValueTypes are not supported with UnsafeAccessorTypeAttribute." BFA_FIELD_LITERAL_AND_INIT "Field is Literal and InitOnly." BFA_NONSTATIC_GLOBAL_FIELD "Non-Static Global Field." BFA_INSTANCE_FIELD_IN_INT "Instance Field in an Interface." diff --git a/src/coreclr/dlls/mscorrc/resource.h b/src/coreclr/dlls/mscorrc/resource.h index 964cb7128325..397efa1f2ae7 100644 --- a/src/coreclr/dlls/mscorrc/resource.h +++ b/src/coreclr/dlls/mscorrc/resource.h @@ -20,21 +20,8 @@ #endif // HRESULT_CODE -//----------------------------------------------------------------------------- -// Resource strings for MDA descriptions. -//----------------------------------------------------------------------------- - -#define IDS_RTL 0x01F5 - -#define IDS_DS_ACTIVESESSIONS 0x1701 -#define IDS_DS_DATASOURCENAME 0x1702 -#define IDS_DS_DATASOURCEREADONLY 0x1703 -#define IDS_DS_DBMSNAME 0x1704 -#define IDS_DS_DBMSVER 0x1705 -#define IDS_DS_IDENTIFIERCASE 0x1706 -#define IDS_DS_DSOTHREADMODEL 0x1707 - #define IDS_EE_NDIRECT_UNSUPPORTED_SIG 0x1708 +#define IDS_EE_NDIRECT_UNSUPPORTED_UNMANAGEDCALLERSONLY 0x1709 #define IDS_EE_NDIRECT_BADNATL 0x170a #define IDS_EE_NDIRECT_LOADLIB_WIN 0x170b #define IDS_EE_NDIRECT_GETPROCADDRESS_WIN 0x170c @@ -64,7 +51,6 @@ #define IDS_EE_ILLEGAL_TOKEN_FOR_MAIN 0x1723 #define IDS_EE_MAIN_METHOD_MUST_BE_STATIC 0x1724 #define IDS_EE_MAIN_METHOD_HAS_INVALID_RTN 0x1725 -#define IDS_EE_VTABLECALLSNOTSUPPORTED 0x1726 #define IDS_EE_BADMARSHALFIELD_STRING 0x1727 #define IDS_EE_BADMARSHALFIELD_NOCUSTOMMARSH 0x1728 @@ -112,8 +98,6 @@ #define IDS_EE_BADMARSHAL_ARGITERATORRESTRICTION 0x1765 #define IDS_EE_BADMARSHAL_HANDLEREFRESTRICTION 0x1766 -#define IDS_EE_ADUNLOAD_NOT_ALLOWED 0x1767 - #define IDS_CANNOT_MARSHAL 0x1770 #define IDS_CANNOT_MARSHAL_RECURSIVE_DEF 0x1771 #define IDS_EE_HASH_VAL_FAILED 0x1772 @@ -149,7 +133,6 @@ #define IDS_CLASSLOAD_MI_BADSIGNATURE 0x1793 #define IDS_CLASSLOAD_MI_NOTIMPLEMENTED 0x1794 #define IDS_CLASSLOAD_MI_MUSTBEVIRTUAL 0x1796 -#define IDS_CLASSLOAD_MISSINGMETHODRVA 0x1797 #define IDS_CLASSLOAD_FIELDTOOLARGE 0x1798 #define IDS_CLASSLOAD_CANTEXTEND 0x179a #define IDS_CLASSLOAD_ZEROSIZE 0x179b @@ -177,17 +160,9 @@ #define IDS_CLASSLOAD_BYREF_OF_BYREF 0x17af #define IDS_CLASSLOAD_POINTER_OF_BYREF 0x17b0 -#define IDS_DEBUG_USERBREAKPOINT 0x17b6 - -#define IDS_PERFORMANCEMON_FUNCNOTFOUND 0x17bb -#define IDS_PERFORMANCEMON_FUNCNOTFOUND_TITLE 0x17bc -#define IDS_PERFORMANCEMON_PSAPINOTFOUND 0x17bd -#define IDS_PERFORMANCEMON_PSAPINOTFOUND_TITLE 0x17be - #define IDS_INVALID_REDIM 0x17c3 #define IDS_INVALID_PINVOKE_CALLCONV 0x17c4 #define IDS_CLASSLOAD_NSTRUCT_EXPLICIT_OFFSET 0x17c7 -#define IDS_EE_BADPINVOKEFIELD_NOTMARSHALABLE 0x17c9 #define IDS_EE_INVALIDLCIDPARAM 0x17cd #define IDS_EE_BADMARSHAL_NESTEDARRAY 0x17ce @@ -268,12 +243,10 @@ #define IDS_CLASSLOAD_INVALIDINSTANTIATION 0x1a59 -#define IDS_EE_CLASSLOAD_INVALIDINSTANTIATION 0x1a59 #define IDS_EE_BADMARSHALFIELD_ZEROLENGTHFIXEDSTRING 0x1a5a #define IDS_EE_BADMARSHAL_CRITICALHANDLENATIVETOCOM 0x1a62 #define IDS_EE_BADMARSHAL_ABSTRACTOUTCRITICALHANDLE 0x1a63 -#define IDS_EE_BADMARSHAL_RETURNCHCOMTONATIVE 0x1a64 #define IDS_EE_BADMARSHAL_CRITICALHANDLE 0x1a65 #define IDS_EE_BADMARSHAL_INT128_RESTRICTION 0x1a66 @@ -289,7 +262,6 @@ #define IDS_CLASSLOAD_VARIANCE_IN_METHOD_ARG 0x1a79 #define IDS_CLASSLOAD_VARIANCE_IN_METHOD_RESULT 0x1a7a -#define IDS_CLASSLOAD_VARIANCE_IN_BASE 0x1a7b #define IDS_CLASSLOAD_VARIANCE_IN_INTERFACE 0x1a7c #define IDS_CLASSLOAD_VARIANCE_IN_CONSTRAINT 0x1a7d #define IDS_CLASSLOAD_VARIANCE_CLASS 0x1a7e @@ -298,8 +270,6 @@ #define IDS_CLASSLOAD_OVERLAPPING_INTERFACES 0x1a80 #define IDS_CLASSLOAD_32BITCLRLOADING64BITASSEMBLY 0x1a81 -#define IDS_EE_NEEDS_ASSEMBLY_SPEC 0x1a87 - #define IDS_EE_FILELOAD_ERROR_GENERIC 0x1a88 #define IDS_EE_BADMARSHAL_UNSUPPORTED_SIG 0x1a89 @@ -414,10 +384,11 @@ #define BFA_BAD_TYPEREF_TOKEN 0x2046 #define BFA_BAD_CLASS_INT_CA_FORMAT 0x2048 #define BFA_BAD_COMPLUS_SIG 0x2049 -#define BFA_BAD_ELEM_IN_SIZEOF 0x204b -#define BFA_IJW_IN_COLLECTIBLE_ALC 0x204c -#define BFA_INVALID_UNSAFEACCESSOR 0x204d - +#define BFA_BAD_ELEM_IN_SIZEOF 0x204a +#define BFA_IJW_IN_COLLECTIBLE_ALC 0x204b +#define BFA_INVALID_UNSAFEACCESSOR 0x204c +#define BFA_INVALID_UNSAFEACCESSORTYPE 0x204d +#define BFA_INVALID_UNSAFEACCESSORTYPE_VALUETYPE 0x204e #define IDS_CLASSLOAD_INTERFACE_NO_ACCESS 0x204f #define BFA_BAD_CA_HEADER 0x2050 @@ -526,9 +497,6 @@ #define IDS_E_PROF_TIMEOUT_WAITING_FOR_CONCURRENT_GC 0x251D #define IDS_EE_CANNOTCAST_NOMARSHAL 0x2629 -#if defined(FEATURE_COMINTEROP) || defined(FEATURE_COMWRAPPERS) -#define IDS_EE_NATIVE_COM_WEAKREF_BAD_TYPE 0x262e -#endif // FEATURE_COMINTEROP || FEATURE_COMWRAPPERS #define IDS_HOST_ASSEMBLY_RESOLVER_ASSEMBLY_ALREADY_LOADED_IN_CONTEXT 0x2636 #define IDS_HOST_ASSEMBLY_RESOLVER_DYNAMICALLY_EMITTED_ASSEMBLIES_UNSUPPORTED 0x2637 diff --git a/src/coreclr/gc/CMakeLists.txt b/src/coreclr/gc/CMakeLists.txt index b7b79013fc45..a7bdcfc63336 100644 --- a/src/coreclr/gc/CMakeLists.txt +++ b/src/coreclr/gc/CMakeLists.txt @@ -85,7 +85,10 @@ if(CLR_CMAKE_HOST_WIN32) advapi32.lib) endif(CLR_CMAKE_HOST_WIN32) -set (GC_LINK_LIBRARIES ${GC_LINK_LIBRARIES} gc_pal) +set (GC_LINK_LIBRARIES + ${GC_LINK_LIBRARIES} + gc_pal + minipal) if(CLR_CMAKE_TARGET_ARCH_AMD64) list(APPEND GC_LINK_LIBRARIES diff --git a/src/coreclr/gc/env/common.h b/src/coreclr/gc/env/common.h index 5d8cff7f7790..d356baa598d1 100644 --- a/src/coreclr/gc/env/common.h +++ b/src/coreclr/gc/env/common.h @@ -29,7 +29,9 @@ #include #include -#ifdef TARGET_UNIX +#ifdef TARGET_WINDOWS +#include +#else #include #endif diff --git a/src/coreclr/gc/env/gcenv.base.h b/src/coreclr/gc/env/gcenv.base.h index 3e0122f0ea50..9362427f826d 100644 --- a/src/coreclr/gc/env/gcenv.base.h +++ b/src/coreclr/gc/env/gcenv.base.h @@ -45,7 +45,7 @@ #define SSIZE_T_MAX ((ptrdiff_t)(SIZE_T_MAX / 2)) #endif -#ifndef _INC_WINDOWS +#ifdef TARGET_UNIX // ----------------------------------------------------------------------------------------------------------- // // Aliases for Win32 types @@ -80,12 +80,6 @@ inline HRESULT HRESULT_FROM_WIN32(unsigned long x) #define S_OK 0x0 #define E_FAIL 0x80004005 #define E_OUTOFMEMORY 0x8007000E -#define COR_E_EXECUTIONENGINE 0x80131506 -#define CLR_E_GC_BAD_AFFINITY_CONFIG 0x8013200A -#define CLR_E_GC_BAD_AFFINITY_CONFIG_FORMAT 0x8013200B -#define CLR_E_GC_BAD_HARD_LIMIT 0x8013200D -#define CLR_E_GC_LARGE_PAGE_MISSING_HARD_LIMIT 0x8013200E -#define CLR_E_GC_BAD_REGION_SIZE 0x8013200F #define NOERROR 0x0 #define ERROR_TIMEOUT 1460 @@ -143,7 +137,10 @@ typedef DWORD (WINAPI *PTHREAD_START_ROUTINE)(void* lpThreadParameter); #pragma intrinsic(__dmb) #define MemoryBarrier() { __dmb(_ARM64_BARRIER_SY); } - #elif defined(HOST_AMD64) + #elif defined(HOST_BROWSER) + #define YieldProcessor() + #define MemoryBarrier __sync_synchronize +#elif defined(HOST_AMD64) extern "C" void _mm_pause ( @@ -339,6 +336,14 @@ inline uint8_t BitScanReverse64(uint32_t *bitIndex, uint64_t mask) return mask != 0 ? TRUE : FALSE; #endif // _MSC_VER } +#endif // TARGET_UNIX + +#define COR_E_EXECUTIONENGINE 0x80131506 +#define CLR_E_GC_BAD_AFFINITY_CONFIG 0x8013200A +#define CLR_E_GC_BAD_AFFINITY_CONFIG_FORMAT 0x8013200B +#define CLR_E_GC_BAD_HARD_LIMIT 0x8013200D +#define CLR_E_GC_LARGE_PAGE_MISSING_HARD_LIMIT 0x8013200E +#define CLR_E_GC_BAD_REGION_SIZE 0x8013200F // Aligns a size_t to the specified alignment. Alignment must be a power // of two. @@ -385,13 +390,6 @@ inline void* ALIGN_DOWN(void* ptr, size_t alignment) return reinterpret_cast(ALIGN_DOWN(as_size_t, alignment)); } -typedef struct _PROCESSOR_NUMBER { - uint16_t Group; - uint8_t Number; - uint8_t Reserved; -} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; -#endif // _INC_WINDOWS - // ----------------------------------------------------------------------------------------------------------- // // The subset of the contract code required by the GC/HandleTable sources. If NativeAOT moves to support @@ -467,7 +465,7 @@ typedef DPTR(uint8_t) PTR_uint8_t; #define _ASSERTE(_expr) ASSERT(_expr) #endif #define CONSISTENCY_CHECK(_expr) ASSERT(_expr) -#define PREFIX_ASSUME(cond) ASSERT(cond) +#define COMPILER_ASSUME(cond) ASSERT(cond) #define EEPOLICY_HANDLE_FATAL_ERROR(error) ASSERT(!"EEPOLICY_HANDLE_FATAL_ERROR") #define UI64(_literal) _literal##ULL diff --git a/src/coreclr/gc/env/gcenv.object.h b/src/coreclr/gc/env/gcenv.object.h index f515536f6a66..1a7077c75722 100644 --- a/src/coreclr/gc/env/gcenv.object.h +++ b/src/coreclr/gc/env/gcenv.object.h @@ -8,10 +8,8 @@ extern bool g_oldMethodTableFlags; #endif -// ARM requires that 64-bit primitive types are aligned at 64-bit boundaries for interlocked-like operations. -// Additionally the platform ABI requires these types and composite type containing them to be similarly -// aligned when passed as arguments. -#ifdef TARGET_ARM +// Some 32-bit platform ABIs require that 64-bit primitive types and composite types containing them are aligned at 64-bit boundaries. +#if defined(TARGET_ARM) || defined(TARGET_WASM) #define FEATURE_64BIT_ALIGNMENT #endif diff --git a/src/coreclr/gc/env/gcenv.os.h b/src/coreclr/gc/env/gcenv.os.h index aa7223850eaa..08e9b39e36eb 100644 --- a/src/coreclr/gc/env/gcenv.os.h +++ b/src/coreclr/gc/env/gcenv.os.h @@ -6,27 +6,41 @@ #ifndef __GCENV_OS_H__ #define __GCENV_OS_H__ +#include + #define NUMA_NODE_UNDEFINED UINT16_MAX bool ParseIndexOrRange(const char** config_string, size_t* start_index, size_t* end_index); // Critical section used by the GC -class CLRCriticalSection +class CLRCriticalSection final { - CRITICAL_SECTION m_cs; + minipal_mutex m_cs; public: // Initialize the critical section - bool Initialize(); + bool Initialize() + { + return minipal_mutex_init(&m_cs); + } // Destroy the critical section - void Destroy(); + void Destroy() + { + minipal_mutex_destroy(&m_cs); + } // Enter the critical section. Blocks until the section can be entered. - void Enter(); + void Enter() + { + minipal_mutex_enter(&m_cs); + } // Leave the critical section - void Leave(); + void Leave() + { + minipal_mutex_leave(&m_cs); + } }; // Flags for the GCToOSInterface::VirtualReserve method diff --git a/src/coreclr/gc/env/gcenv.structs.h b/src/coreclr/gc/env/gcenv.structs.h index 9f287ec7bf8c..f3e30a849308 100644 --- a/src/coreclr/gc/env/gcenv.structs.h +++ b/src/coreclr/gc/env/gcenv.structs.h @@ -44,10 +44,6 @@ class EEThreadId #else // TARGET_UNIX -#ifndef _INC_WINDOWS -extern "C" uint32_t __stdcall GetCurrentThreadId(); -#endif - class EEThreadId { uint64_t m_uiId; @@ -71,37 +67,4 @@ class EEThreadId #endif // TARGET_UNIX -#ifndef _INC_WINDOWS - -#ifdef TARGET_UNIX - -typedef struct _RTL_CRITICAL_SECTION { - pthread_mutex_t mutex; -} CRITICAL_SECTION, RTL_CRITICAL_SECTION, *PRTL_CRITICAL_SECTION; - -#else - -#pragma pack(push, 8) - -typedef struct _RTL_CRITICAL_SECTION { - void* DebugInfo; - - // - // The following three fields control entering and exiting the critical - // section for the resource - // - - int32_t LockCount; - int32_t RecursionCount; - HANDLE OwningThread; // from the thread's ClientId->UniqueThread - HANDLE LockSemaphore; - uintptr_t SpinCount; // force size on 64-bit systems when packed -} CRITICAL_SECTION, RTL_CRITICAL_SECTION, *PRTL_CRITICAL_SECTION; - -#pragma pack(pop) - -#endif - -#endif // _INC_WINDOWS - #endif // __GCENV_STRUCTS_INCLUDED__ diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 1632b11dbc44..9ffb1de45c44 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -51,7 +51,7 @@ namespace SVR { #else // SERVER_GC namespace WKS { #endif // SERVER_GC - + #include "gcimpl.h" #include "gcpriv.h" @@ -106,16 +106,14 @@ BOOL bgc_heap_walk_for_etw_p = FALSE; #define num_partial_refs 32 #endif //SERVER_GC +#define demotion_plug_len_th (6*1024*1024) + #ifdef USE_REGIONS -// If the pinned survived is 1+% of the region size, we don't demote. -#define demotion_pinned_ratio_th (1) // If the survived / region_size is 90+%, we don't compact this region. #define sip_surv_ratio_th (90) // If the survived due to cards from old generations / region_size is 90+%, // we don't compact this region, also we immediately promote it to gen2. #define sip_old_card_surv_ratio_th (90) -#else -#define demotion_plug_len_th (6*1024*1024) #endif //USE_REGIONS #ifdef HOST_64BIT @@ -429,6 +427,12 @@ float mb (size_t num) return (float)((float)num / 1000.0 / 1000.0); } +inline +size_t gib (size_t num) +{ + return (num / 1024 / 1024 / 1024); +} + #ifdef BACKGROUND_GC uint32_t bgc_alloc_spin_count = 140; uint32_t bgc_alloc_spin_count_uoh = 16; @@ -2566,6 +2570,8 @@ BOOL gc_heap::last_gc_before_oom = FALSE; BOOL gc_heap::sufficient_gen0_space_p = FALSE; +BOOL gc_heap::decide_promote_gen1_pins_p = TRUE; + #ifdef BACKGROUND_GC uint8_t* gc_heap::background_saved_lowest_address = 0; uint8_t* gc_heap::background_saved_highest_address = 0; @@ -2617,8 +2623,6 @@ uint8_t* gc_heap::demotion_low; uint8_t* gc_heap::demotion_high; -BOOL gc_heap::demote_gen1_p = TRUE; - uint8_t* gc_heap::last_gen1_pin_end; #endif //!USE_REGIONS @@ -3910,6 +3914,10 @@ bool region_allocator::init (uint8_t* start, uint8_t* end, size_t alignment, uin *lowest = global_region_start; *highest = global_region_end; } + else + { + log_init_error_to_host ("global region allocator failed to allocate %zd bytes during init", (total_num_units * sizeof (uint32_t))); + } return (unit_map != 0); } @@ -5915,7 +5923,7 @@ gc_heap::soh_get_segment_to_expand() // tend to be more compact than the later ones. heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation))); - PREFIX_ASSUME(fseg != NULL); + _ASSERTE(fseg != NULL); #ifdef SEG_REUSE_STATS int try_reuse = 0; @@ -6456,9 +6464,9 @@ class heap_select if (GCToOSInterface::CanGetCurrentProcessorNumber()) { uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); - // For a 32-bit process running on a machine with > 64 procs, - // even though the process can only use up to 32 procs, the processor - // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0, + // For a 32-bit process running on a machine with > 64 procs, + // even though the process can only use up to 32 procs, the processor + // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0, // the GetCurrentProcessorNumber will return a number that's >= 64. proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS] = (uint16_t)heap_number; } @@ -6482,9 +6490,9 @@ class heap_select if (GCToOSInterface::CanGetCurrentProcessorNumber()) { uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); - // For a 32-bit process running on a machine with > 64 procs, - // even though the process can only use up to 32 procs, the processor - // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0, + // For a 32-bit process running on a machine with > 64 procs, + // even though the process can only use up to 32 procs, the processor + // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0, // the GetCurrentProcessorNumber will return a number that's >= 64. int adjusted_heap = proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS]; // with dynamic heap count, need to make sure the value is in range. @@ -8130,12 +8138,12 @@ void gc_heap::set_allocation_heap_segment (generation* gen) { seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (!in_range_for_segment (p, seg)) { seg = heap_segment_next_rw (seg); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); } } #endif //USE_REGIONS @@ -9469,6 +9477,7 @@ bool gc_heap::inplace_commit_card_table (uint8_t* from, uint8_t* to) succeed = virtual_commit (commit_begins[i], commit_sizes[i], recorded_committed_bookkeeping_bucket); if (!succeed) { + log_init_error_to_host ("Committing %zd bytes (%.3f mb) for GC bookkeeping element#%d failed", commit_sizes[i], mb (commit_sizes[i]), i); failed_commit = i; break; } @@ -9522,7 +9531,10 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end) bookkeeping_start = mem; if (!mem) + { + log_init_error_to_host ("Reserving %zd bytes (%.3f mb) for GC bookkeeping failed", alloc_size, mb (alloc_size)); return 0; + } dprintf (2, ("Init - Card table alloc for %zd bytes: [%zx, %zx[", alloc_size, (size_t)mem, (size_t)(mem+alloc_size))); @@ -10780,6 +10792,14 @@ size_t gc_heap::sort_mark_list() size_t region_index = get_basic_region_index_for_address (heap_segment_mem (region)); uint8_t* region_limit = heap_segment_allocated (region); + // Due to GC holes, x can point to something in a region that already got freed. And that region's + // allocated would be 0 and cause an infinite loop which is much harder to handle on production than + // simply throwing an exception. + if (region_limit == 0) + { + FATAL_GC_ERROR(); + } + uint8_t*** mark_list_piece_start_ptr = &mark_list_piece_start[region_index]; uint8_t*** mark_list_piece_end_ptr = &mark_list_piece_end[region_index]; #else // USE_REGIONS @@ -12400,6 +12420,7 @@ heap_segment* gc_heap::make_heap_segment (uint8_t* new_pages, size_t size, gc_he if (!virtual_commit (new_pages, initial_commit, oh, h_number)) { + log_init_error_to_host ("Committing %zd bytes for a region failed", initial_commit); return 0; } @@ -13605,7 +13626,7 @@ void gc_heap::distribute_free_regions() dprintf(REGIONS_LOG, ("distributing the %zd %s regions, removing %zd regions", total_budget_in_region_units[kind], free_region_kind_name[kind], - balance)); + balance_to_decommit)); if (balance_to_decommit > 0) { @@ -14347,6 +14368,7 @@ bool allocate_initial_regions(int number_of_heaps) initial_regions = new (nothrow) uint8_t*[number_of_heaps][total_generation_count][2]; if (initial_regions == nullptr) { + log_init_error_to_host ("allocate_initial_regions failed to allocate %zd bytes", (number_of_heaps * total_generation_count * 2 * sizeof (uint8_t*))); return false; } for (int i = 0; i < number_of_heaps; i++) @@ -14409,7 +14431,6 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, if (gc_config_log == NULL) { - GCToEEInterface::LogErrorToHost("Cannot create log file"); return E_FAIL; } @@ -14534,7 +14555,11 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, size_t reserve_size = regions_range; uint8_t* reserve_range = (uint8_t*)virtual_alloc (reserve_size, use_large_pages_p); if (!reserve_range) + { + log_init_error_to_host ("Reserving %zd bytes (%zd GiB) for the regions range failed, do you have a virtual memory limit set on this process?", + reserve_size, gib (reserve_size)); return E_OUTOFMEMORY; + } if (!global_region_allocator.init (reserve_range, (reserve_range + reserve_size), ((size_t)1 << min_segment_size_shr), @@ -14547,7 +14572,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, else { assert (!"cannot use regions without specifying the range!!!"); - GCToEEInterface::LogErrorToHost("Cannot use regions without specifying the range (using DOTNET_GCRegionRange)"); + log_init_error_to_host ("Regions range is 0! unexpected"); return E_FAIL; } #else //USE_REGIONS @@ -14617,11 +14642,6 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, if (!g_heaps) return E_OUTOFMEMORY; -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22011) // Suppress PREFast warning about integer underflow/overflow -#endif // _PREFAST_ - #if !defined(USE_REGIONS) || defined(_DEBUG) g_promoted = new (nothrow) size_t [number_of_heaps*16]; if (!g_promoted) @@ -14636,9 +14656,6 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, #ifdef MH_SC_MARK g_mark_stack_busy = new (nothrow) int[(number_of_heaps+2)*HS_CACHE_LINE_SIZE/sizeof(int)]; #endif //MH_SC_MARK -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // _PREFAST_ #ifdef MH_SC_MARK if (!g_mark_stack_busy) @@ -14687,7 +14704,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, if (!init_semi_shared()) { - GCToEEInterface::LogErrorToHost("PER_HEAP_ISOLATED data members initialization failed"); + log_init_error_to_host ("PER_HEAP_ISOLATED data members initialization failed"); hres = E_FAIL; } @@ -14965,14 +14982,7 @@ gc_heap* gc_heap::make_gc_heap ( if (!res->mark_list_piece_start) return 0; -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22011) // Suppress PREFast warning about integer underflow/overflow -#endif // _PREFAST_ res->mark_list_piece_end = new (nothrow) uint8_t**[n_heaps + 32]; // +32 is padding to reduce false sharing -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // _PREFAST_ if (!res->mark_list_piece_end) return 0; @@ -15013,10 +15023,6 @@ gc_heap::wait_for_gc_done(int32_t timeOut) dprintf(2, ("waiting for the gc_done_event on heap %d", wait_heap->heap_number)); #endif // MULTIPLE_HEAPS -#ifdef _PREFAST_ - PREFIX_ASSUME(wait_heap != NULL); -#endif // _PREFAST_ - dwWaitResult = wait_heap->gc_done_event.Wait(timeOut, FALSE); } disable_preemptive (cooperative_mode); @@ -15636,7 +15642,7 @@ gc_heap::self_destroy() for (int i = get_start_generation_index(); i < total_generation_count; i++) { heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i))); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (seg) { @@ -20754,6 +20760,55 @@ heap_segment* gc_heap::get_next_alloc_seg (generation* gen) #endif //USE_REGIONS } +bool gc_heap::decide_on_gen1_pin_promotion (float pin_frag_ratio, float pin_surv_ratio) +{ + return ((pin_frag_ratio > 0.15) && (pin_surv_ratio > 0.30)); +} + +// Add the size of the pinned plug to the higher generation's pinned allocations. +void gc_heap::attribute_pin_higher_gen_alloc ( +#ifdef USE_REGIONS + heap_segment* seg, int to_gen_number, +#endif + uint8_t* plug, size_t len) +{ + //find out which gen this pinned plug came from + int frgn = object_gennum (plug); + if ((frgn != (int)max_generation) && settings.promotion) + { + generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len; + +#ifdef USE_REGIONS + // With regions it's a bit more complicated since we only set the plan_gen_num + // of a region after we've planned it. This means if the pinning plug is in the + // the same seg we are planning, we haven't set its plan_gen_num yet. So we + // need to check for that first. + int togn = (in_range_for_segment (plug, seg) ? to_gen_number : object_gennum_plan (plug)); +#else + int togn = object_gennum_plan (plug); +#endif //USE_REGIONS + if (frgn < togn) + { + generation_pinned_allocation_compact_size (generation_of (togn)) += len; + } + } +} + +#ifdef USE_REGIONS +void gc_heap::attribute_pin_higher_gen_alloc (int frgn, int togn, size_t len) +{ + if ((frgn != (int)max_generation) && settings.promotion) + { + generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len; + + if (frgn < togn) + { + generation_pinned_allocation_compact_size (generation_of (togn)) += len; + } + } +} +#endif //USE_REGIONS + uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen, size_t size, int from_gen_number, @@ -20841,28 +20896,12 @@ uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen, generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); set_allocator_next_pin (gen); - - //Add the size of the pinned plug to the right pinned allocations - //find out which gen this pinned plug came from - int frgn = object_gennum (plug); - if ((frgn != (int)max_generation) && settings.promotion) - { - generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len; - + attribute_pin_higher_gen_alloc ( #ifdef USE_REGIONS - // With regions it's a bit more complicated since we only set the plan_gen_num - // of a region after we've planned it. This means if the pinning plug is in the - // the same seg we are planning, we haven't set its plan_gen_num yet. So we - // need to check for that first. - int togn = (in_range_for_segment (plug, seg) ? to_gen_number : object_gennum_plan (plug)); -#else - int togn = object_gennum_plan (plug); -#endif //USE_REGIONS - if (frgn < togn) - { - generation_pinned_allocation_compact_size (generation_of (togn)) += len; - } - } + seg, to_gen_number, +#endif + plug, len); + goto retry; } @@ -21762,11 +21801,6 @@ size_t gc_heap::generation_unusable_fragmentation (generation* inst, int hn) } } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6326) // "Potential comparison of a constant with another constant" is intentional in this function. -#endif //_PREFAST_ - /* This is called by when we are actually doing a GC, or when we are just checking whether we would do a full blocking GC, in which case check_only_p is TRUE. @@ -22363,10 +22397,6 @@ int gc_heap::generation_to_condemn (int n_initial, return n; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //_PREFAST_ - inline size_t gc_heap::min_reclaim_fragmentation_threshold (uint32_t num_heaps) { @@ -22404,7 +22434,7 @@ void gc_heap::init_background_gc () generation_allocation_limit (gen) = 0; generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(generation_allocation_segment(gen) != NULL); + _ASSERTE(generation_allocation_segment(gen) != NULL); #ifdef DOUBLY_LINKED_FL generation_set_bgc_mark_bit_p (gen) = FALSE; @@ -26100,6 +26130,7 @@ void gc_heap::calculate_new_heap_count () mb (total_soh_stable_size), mb (total_bcd), diff_pct, change_int, (change_int * 100.0 / n_heaps))); } +#ifdef FEATURE_EVENT_TRACE GCEventFireSizeAdaptationTuning_V1 ( (uint16_t)new_n_heaps, (uint16_t)max_heap_count_datas, @@ -26118,6 +26149,7 @@ void gc_heap::calculate_new_heap_count () (uint16_t)hc_change_freq_factor, (uint16_t)hc_freq_reason, (uint8_t)adj_metric); +#endif //FEATURE_EVENT_TRACE } size_t num_gen2s_since_last_change = 0; @@ -26173,6 +26205,7 @@ void gc_heap::calculate_new_heap_count () if (process_gen2_samples_p) { dynamic_heap_count_data_t::gen2_sample* gen2_samples = dynamic_heap_count_data.gen2_samples; +#ifdef FEATURE_EVENT_TRACE GCEventFireSizeAdaptationFullGCTuning_V1 ( (uint16_t)dynamic_heap_count_data.new_n_heaps, (uint64_t)current_gc_index, @@ -26184,6 +26217,7 @@ void gc_heap::calculate_new_heap_count () (float)gen2_samples[1].gc_percent, (uint32_t)(current_gc_index - gen2_samples[2].gc_index), (float)gen2_samples[2].gc_percent); +#endif //FEATURE_EVENT_TRACEs dprintf (6666, ("processed gen2 samples, updating processed %Id -> %Id", dynamic_heap_count_data.processed_gen2_samples_count, dynamic_heap_count_data.current_gen2_samples_count)); dynamic_heap_count_data.processed_gen2_samples_count = dynamic_heap_count_data.current_gen2_samples_count; @@ -26879,12 +26913,14 @@ void gc_heap::process_datas_sample() (sample.gc_pause_time ? (sample.gc_survived_size / 1000.0 / sample.gc_pause_time) : 0), (sample.gc_pause_time ? ((float)sample.gc_survived_size / sample.gc_pause_time / n_heaps) : 0))); +#ifdef FEATURE_EVENT_TRACE GCEventFireSizeAdaptationSample_V1 ( (uint64_t)gc_index, (uint32_t)sample.elapsed_between_gcs, (uint32_t)sample.gc_pause_time, (uint32_t)soh_msl_wait_time, (uint32_t)uoh_msl_wait_time, (uint64_t)total_soh_stable_size, (uint32_t)sample.gen0_budget_per_heap); +#endif //FEATURE_EVENT_TRACE dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size; (dynamic_heap_count_data.current_samples_count)++; @@ -28310,7 +28346,7 @@ gc_heap::ha_mark_object_simple (uint8_t** po THREAD_NUMBER_DCL) if (heap_analyze_success) { - PREFIX_ASSUME(internal_root_array_index < internal_root_array_length); + _ASSERTE(internal_root_array_index < internal_root_array_length); uint8_t* ref = (uint8_t*)po; if (!current_obj || @@ -29002,7 +29038,7 @@ void gc_heap::background_process_mark_overflow_internal (uint8_t* min_add, uint8 int align_const = get_alignment_constant (small_object_segments); generation* gen = hp->generation_of (i); heap_segment* seg = heap_segment_in_range (generation_start_segment (gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); uint8_t* current_min_add = min_add; uint8_t* current_max_add = max_add; @@ -29575,7 +29611,7 @@ void gc_heap::process_mark_overflow_internal (int condemned_gen_number, heap_segment* seg = heap_segment_in_range (generation_start_segment (gen)); int align_const = get_alignment_constant (i < uoh_start_generation); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (seg) { @@ -31119,7 +31155,8 @@ void gc_heap::advance_pins_for_demotion (generation* gen) size_t total_space_to_skip = last_gen1_pin_end - generation_allocation_pointer (gen); float pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip; float pin_surv_ratio = (float)gen1_pins_left / (float)(dd_survived_size (dynamic_data_of (max_generation - 1))); - if ((pin_frag_ratio > 0.15) && (pin_surv_ratio > 0.30)) + bool actual_promote_gen1_pins_p = decide_on_gen1_pin_promotion (pin_frag_ratio, pin_surv_ratio); + if (actual_promote_gen1_pins_p) { while (!pinned_plug_que_empty_p() && (pinned_plug (oldest_pin()) < original_youngest_start)) @@ -31133,19 +31170,7 @@ void gc_heap::advance_pins_for_demotion (generation* gen) generation_allocation_pointer (gen) = plug + len; generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); set_allocator_next_pin (gen); - - //Add the size of the pinned plug to the right pinned allocations - //find out which gen this pinned plug came from - int frgn = object_gennum (plug); - if ((frgn != (int)max_generation) && settings.promotion) - { - int togn = object_gennum_plan (plug); - generation_pinned_allocation_sweep_size ((generation_of (frgn +1))) += len; - if (frgn < togn) - { - generation_pinned_allocation_compact_size (generation_of (togn)) += len; - } - } + attribute_pin_higher_gen_alloc (plug, len); dprintf (2, ("skipping gap %zu, pin %p (%zd)", pinned_len (pinned_plug_of (entry)), plug, len)); @@ -31239,7 +31264,7 @@ void gc_heap::process_ephemeral_boundaries (uint8_t* x, // be the ephemeral segment. heap_segment* nseg = heap_segment_in_range (generation_allocation_segment (consing_gen)); - PREFIX_ASSUME(nseg != NULL); + _ASSERTE(nseg != NULL); while (!((plug >= generation_allocation_pointer (consing_gen))&& (plug < heap_segment_allocated (nseg)))) @@ -31274,7 +31299,7 @@ void gc_heap::process_ephemeral_boundaries (uint8_t* x, if (active_new_gen_number == (max_generation - 1)) { maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); - if (!demote_gen1_p) + if (decide_promote_gen1_pins_p) advance_pins_for_demotion (consing_gen); } @@ -31664,7 +31689,7 @@ BOOL gc_heap::plan_loh() generation* gen = large_object_generation; heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(start_seg != NULL); + _ASSERTE(start_seg != NULL); heap_segment* seg = start_seg; uint8_t* o = get_uoh_start_object (seg, gen); @@ -31812,7 +31837,7 @@ void gc_heap::compact_loh() generation* gen = large_object_generation; heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(start_seg != NULL); + _ASSERTE(start_seg != NULL); heap_segment* seg = start_seg; heap_segment* prev_seg = 0; uint8_t* o = get_uoh_start_object (seg, gen); @@ -32208,6 +32233,7 @@ void gc_heap::record_interesting_data_point (interesting_data_point idp) void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num) { heap_segment* alloc_region = generation_allocation_segment (consing_gen); + size_t skipped_pins_len = 0; while (!pinned_plug_que_empty_p()) { uint8_t* oldest_plug = pinned_plug (oldest_pin()); @@ -32219,6 +32245,7 @@ void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_n uint8_t* plug = pinned_plug (m); size_t len = pinned_len (m); + skipped_pins_len += len; set_new_pin_info (m, generation_allocation_pointer (consing_gen)); dprintf (REGIONS_LOG, ("pin %p b: %zx->%zx", plug, brick_of (plug), (size_t)(brick_table[brick_of (plug)]))); @@ -32237,37 +32264,43 @@ void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_n (heap_segment_swept_in_plan (alloc_region) ? "SIP" : "non SIP"), (heap_segment_swept_in_plan (alloc_region) ? heap_segment_plan_gen_num (alloc_region) : plan_gen_num))); + + attribute_pin_higher_gen_alloc (heap_segment_gen_num (alloc_region), plan_gen_num, skipped_pins_len); + set_region_plan_gen_num_sip (alloc_region, plan_gen_num); heap_segment_plan_allocated (alloc_region) = generation_allocation_pointer (consing_gen); } -void gc_heap::decide_on_demotion_pin_surv (heap_segment* region, int* no_pinned_surv_region_count) +void gc_heap::decide_on_demotion_pin_surv (heap_segment* region, int* no_pinned_surv_region_count, bool promote_gen1_pins_p, bool large_pins_p) { + int gen_num = heap_segment_gen_num (region); int new_gen_num = 0; int pinned_surv = heap_segment_pinned_survived (region); + int promote_pins_p = large_pins_p; if (pinned_surv == 0) { (*no_pinned_surv_region_count)++; - dprintf (REGIONS_LOG, ("region %Ix will be empty", heap_segment_mem (region))); + dprintf (REGIONS_LOG, ("h%d gen%d region %Ix will be empty", heap_number, heap_segment_gen_num (region), heap_segment_mem (region))); } - - // If this region doesn't have much pinned surv left, we demote it; otherwise the region - // will be promoted like normal. - size_t basic_region_size = (size_t)1 << min_segment_size_shr; - int pinned_ratio = (int)(((double)pinned_surv * 100.0) / (double)basic_region_size); - dprintf (REGIONS_LOG, ("h%d g%d region %Ix(%Ix) ps: %d (%d) (%s)", heap_number, - heap_segment_gen_num (region), (size_t)region, heap_segment_mem (region), pinned_surv, pinned_ratio, - ((pinned_ratio >= demotion_pinned_ratio_th) ? "ND" : "D"))); - - if (pinned_ratio >= demotion_pinned_ratio_th) + else { - if (settings.promotion) + if (!promote_pins_p && (gen_num == (max_generation - 1)) && promote_gen1_pins_p) + { + promote_pins_p = true; + } + + if (promote_pins_p) { new_gen_num = get_plan_gen_num (heap_segment_gen_num (region)); } + + attribute_pin_higher_gen_alloc (gen_num, new_gen_num, pinned_surv); } + dprintf (REGIONS_LOG, ("h%d gen%d region pinned surv %d %s -> g%d", + heap_number, gen_num, pinned_surv, (promote_pins_p ? "PROMOTE" : "DEMOTE"), new_gen_num)); + set_region_plan_gen_num (region, new_gen_num); } @@ -32419,6 +32452,9 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c // Instead of checking for this condition we just set the alloc region to 0 so it's easier to check // later. + // + // set generation_allocation_segment to 0, we know we don't have pins so we will not be going through the while loop below + // generation_allocation_segment (consing_gen) = 0; generation_allocation_pointer (consing_gen) = 0; generation_allocation_limit (consing_gen) = 0; @@ -32429,13 +32465,12 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c // What has been planned doesn't change at this point. So at this point we know exactly which generation still doesn't // have any regions planned and this method is responsible to attempt to plan at least one region in each of those gens. // So we look at each of the remaining regions (that are non SIP, since SIP regions have already been planned) and decide - // which generation it should be planned in. We used the following rules to decide - + // which generation it should be planned in. // - // + if the pinned surv of a region is >= demotion_pinned_ratio_th (this will be dynamically tuned based on memory load), - // it will be promoted to its normal planned generation unconditionally. + // + if we are in a gen1 GC due to cards, we will decide if we need to promote based on the same criteria as segments. And + // we never demote large pins to gen0. // - // + if the pinned surv is < demotion_pinned_ratio_th, we will always demote it to gen0. We will record how many regions - // have no survival at all - those will be empty and can be used to plan any non gen0 generation if needed. + // + we will record how many regions have no survival at all - those will be empty and can be used to plan any non gen0 generation if needed. // // Note! We could actually promote a region with non zero pinned survivors to whichever generation we'd like (eg, we could // promote a gen0 region to gen2). However it means we'd need to set cards on those objects because we will not have a chance @@ -32452,7 +32487,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c // + if we don't have enough in regions that will be empty, we'll need to ask for new regions and if we can't, we fall back // to the special sweep mode. // - dprintf (REGIONS_LOG, ("h%d regions in g2: %d, g1: %d, g0: %d, before processing remaining regions", + dprintf (REGIONS_LOG, ("h%d planned regions in g2: %d, g1: %d, g0: %d, before processing remaining regions", heap_number, planned_regions_per_gen[2], planned_regions_per_gen[1], planned_regions_per_gen[0])); dprintf (REGIONS_LOG, ("h%d g2: surv %Id(p: %Id, %.2f%%), g1: surv %Id(p: %Id, %.2f%%), g0: surv %Id(p: %Id, %.2f%%)", @@ -32466,11 +32501,69 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c int to_be_empty_regions = 0; + // If decide_promote_gen1_pins_p is true, We need to see if we should promote what's left in gen1 pins. We either promote + // or demote all that's left. As a future performance improvement, we could sort these regions by the amount of + // pinned survival and only promote the ones with excessive amounts of survival. + // + // First go through the remaining gen1 regions to see if we should demote the remaining pins + heap_segment* current_region = generation_allocation_segment (consing_gen); + bool actual_promote_gen1_pins_p = false; + + if (decide_promote_gen1_pins_p) + { + size_t gen1_pins_left = 0; + size_t total_space_to_skip = 0; + + while (current_region) + { + int gen_num = heap_segment_gen_num (current_region); + if (gen_num != 0) + { + assert (gen_num == (max_generation - 1)); + + if (!heap_segment_swept_in_plan (current_region)) + { + gen1_pins_left += heap_segment_pinned_survived (current_region); + total_space_to_skip += get_region_size (current_region); + } + } + else + { + break; + } + + current_region = heap_segment_next (current_region); + } + + float pin_frag_ratio = 0.0; + float pin_surv_ratio = 0.0; + + if (total_space_to_skip) + { + size_t gen1_surv = dd_survived_size (dynamic_data_of (max_generation - 1)); + if (gen1_surv) + { + pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip; + pin_surv_ratio = (float)gen1_pins_left / (float)gen1_surv; + actual_promote_gen1_pins_p = decide_on_gen1_pin_promotion (pin_frag_ratio, pin_surv_ratio); + } + } + +#ifdef SIMPLE_DPRINTF + dprintf (REGIONS_LOG, ("h%d ad_p_d: PL: %zd, SL: %zd, pfr: %.3f, psr: %.3f, prmoote gen1 %d. gen1_pins_left %Id, total surv %Id (p:%Id), total_space %Id", + heap_number, gen1_pins_left, total_space_to_skip, pin_frag_ratio, pin_surv_ratio, actual_promote_gen1_pins_p, gen1_pins_left, + dd_survived_size (dynamic_data_of (max_generation - 1)), dd_pinned_survived_size (dynamic_data_of (max_generation - 1)), total_space_to_skip)); +#endif + } + + maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); + + bool large_pins_p = false; + while (!pinned_plug_que_empty_p()) { uint8_t* oldest_plug = pinned_plug (oldest_pin()); - // detect pinned block in segments without pins heap_segment* nseg = heap_segment_rw (generation_allocation_segment (consing_gen)); dprintf (3, ("h%d oldest pin: %p, consing alloc %p, ptr %p, limit %p", heap_number, oldest_plug, heap_segment_mem (nseg), @@ -32480,12 +32573,10 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c while ((oldest_plug < generation_allocation_pointer (consing_gen)) || (oldest_plug >= heap_segment_allocated (nseg))) { - assert ((oldest_plug < heap_segment_mem (nseg)) || - (oldest_plug > heap_segment_reserved (nseg))); - assert (generation_allocation_pointer (consing_gen)>= - heap_segment_mem (nseg)); - assert (generation_allocation_pointer (consing_gen)<= - heap_segment_committed (nseg)); + assert ((oldest_plug < heap_segment_mem (nseg)) || (oldest_plug > heap_segment_reserved (nseg))); + assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (nseg)); + assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (nseg)); + assert (!heap_segment_swept_in_plan (nseg)); dprintf (3, ("h%d PRR: in loop, seg %p pa %p -> alloc ptr %p, plan gen %d->%d", heap_number, heap_segment_mem (nseg), @@ -32494,10 +32585,8 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c heap_segment_plan_gen_num (nseg), current_plan_gen_num)); - assert (!heap_segment_swept_in_plan (nseg)); - heap_segment_plan_allocated (nseg) = generation_allocation_pointer (consing_gen); - decide_on_demotion_pin_surv (nseg, &to_be_empty_regions); + decide_on_demotion_pin_surv (nseg, &to_be_empty_regions, actual_promote_gen1_pins_p, large_pins_p); heap_segment* next_seg = heap_segment_next_non_sip (nseg); @@ -32510,6 +32599,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c assert (next_seg != 0); nseg = next_seg; + large_pins_p = false; generation_allocation_segment (consing_gen) = nseg; generation_allocation_pointer (consing_gen) = heap_segment_mem (nseg); @@ -32519,6 +32609,11 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c uint8_t* plug = pinned_plug (m); size_t len = pinned_len (m); + if (!large_pins_p) + { + large_pins_p = (len >= demotion_plug_len_th); + } + set_new_pin_info (m, generation_allocation_pointer (consing_gen)); size_t free_size = pinned_len (m); update_planned_gen0_free_space (free_size, plug); @@ -32531,7 +32626,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c generation_allocation_pointer (consing_gen); } - heap_segment* current_region = generation_allocation_segment (consing_gen); + current_region = generation_allocation_segment (consing_gen); if (special_sweep_p) { @@ -32550,7 +32645,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c if (current_region) { - decide_on_demotion_pin_surv (current_region, &to_be_empty_regions); + decide_on_demotion_pin_surv (current_region, &to_be_empty_regions, actual_promote_gen1_pins_p, large_pins_p); if (!heap_segment_swept_in_plan (current_region)) { @@ -32897,10 +32992,6 @@ inline void save_allocated(heap_segment* seg) } } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif //_PREFAST_ void gc_heap::plan_phase (int condemned_gen_number) { size_t old_gen2_allocated = 0; @@ -32969,7 +33060,7 @@ void gc_heap::plan_phase (int condemned_gen_number) if (shigh != (uint8_t*)0) { heap_segment* seg = heap_segment_rw (generation_start_segment (current_gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); heap_segment* fseg = seg; do @@ -33052,7 +33143,7 @@ void gc_heap::plan_phase (int condemned_gen_number) { heap_segment* seg = heap_segment_rw (generation_start_segment (current_gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); heap_segment* sseg = seg; do @@ -33087,7 +33178,7 @@ void gc_heap::plan_phase (int condemned_gen_number) heap_segment* seg1 = heap_segment_rw (generation_start_segment (condemned_gen1)); - PREFIX_ASSUME(seg1 != NULL); + _ASSERTE(seg1 != NULL); uint8_t* end = heap_segment_allocated (seg1); uint8_t* first_condemned_address = get_soh_start_object (seg1, condemned_gen1); @@ -33172,7 +33263,7 @@ void gc_heap::plan_phase (int condemned_gen_number) heap_segment* start_seg = heap_segment_rw (generation_start_segment (older_gen)); - PREFIX_ASSUME(start_seg != NULL); + _ASSERTE(start_seg != NULL); #ifdef USE_REGIONS heap_segment* skip_seg = 0; @@ -33208,7 +33299,7 @@ void gc_heap::plan_phase (int condemned_gen_number) { generation* current_gen = generation_of (condemned_gen_index1); heap_segment* seg2 = heap_segment_rw (generation_start_segment (current_gen)); - PREFIX_ASSUME(seg2 != NULL); + _ASSERTE(seg2 != NULL); while (seg2) { @@ -33258,7 +33349,7 @@ void gc_heap::plan_phase (int condemned_gen_number) generation_allocation_segment (condemned_gen2) = heap_segment_rw (generation_start_segment (condemned_gen2)); - PREFIX_ASSUME(generation_allocation_segment(condemned_gen2) != NULL); + _ASSERTE(generation_allocation_segment(condemned_gen2) != NULL); #ifdef USE_REGIONS generation_allocation_pointer (condemned_gen2) = @@ -33289,6 +33380,12 @@ void gc_heap::plan_phase (int condemned_gen_number) dprintf(3,( " From %zx to %zx", (size_t)x, (size_t)end)); + // Normally we always demote pins left after plan allocation, but if we are doing a gen1 only because of cards, it means + // we need to decide if we will promote these pins from gen1. + decide_promote_gen1_pins_p = (settings.promotion && + (settings.condemned_generation == (max_generation - 1)) && + gen_to_condemn_reasons.is_only_condition(gen_low_card_p)); + #ifdef USE_REGIONS if (should_sweep_in_plan (seg1)) { @@ -33298,11 +33395,6 @@ void gc_heap::plan_phase (int condemned_gen_number) #else demotion_low = MAX_PTR; demotion_high = heap_segment_allocated (ephemeral_heap_segment); - // If we are doing a gen1 only because of cards, it means we should not demote any pinned plugs - // from gen1. They should get promoted to gen2. - demote_gen1_p = !(settings.promotion && - (settings.condemned_generation == (max_generation - 1)) && - gen_to_condemn_reasons.is_only_condition(gen_low_card_p)); total_ephemeral_size = 0; #endif //!USE_REGIONS @@ -33783,7 +33875,7 @@ void gc_heap::plan_phase (int condemned_gen_number) dd_artificial_pinned_survived_size (dd_active_old) += artificial_pinned_size; #ifndef USE_REGIONS - if (!demote_gen1_p && (active_old_gen_number == (max_generation - 1))) + if (decide_promote_gen1_pins_p && (active_old_gen_number == (max_generation - 1))) { last_gen1_pin_end = plug_end; } @@ -33880,7 +33972,7 @@ void gc_heap::plan_phase (int condemned_gen_number) if (active_new_gen_number == (max_generation - 1)) { maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); - if (!demote_gen1_p) + if (decide_promote_gen1_pins_p) advance_pins_for_demotion (consing_gen); } @@ -34869,9 +34961,6 @@ void gc_heap::plan_phase (int condemned_gen_number) //verify_partial(); } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //_PREFAST_ /***************************** Called after compact phase to fix all generation gaps @@ -35844,7 +35933,7 @@ void gc_heap::make_free_lists (int condemned_gen_number) uint8_t* start_address = get_soh_start_object (current_heap_segment, condemned_gen); size_t current_brick = brick_of (start_address); - PREFIX_ASSUME(current_heap_segment != NULL); + _ASSERTE(current_heap_segment != NULL); uint8_t* end_address = heap_segment_allocated (current_heap_segment); size_t end_brick = brick_of (end_address - 1); @@ -35886,7 +35975,7 @@ void gc_heap::make_free_lists (int condemned_gen_number) heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(start_seg != NULL); + _ASSERTE(start_seg != NULL); uint8_t* gap = heap_segment_mem (start_seg); @@ -35964,7 +36053,7 @@ void gc_heap::make_free_lists (int condemned_gen_number) reset_allocation_pointers (gen2, gap); dprintf(3,("Fixing generation start of %d to: %zx", args.free_list_gen_number, (size_t)gap)); - PREFIX_ASSUME(gap != NULL); + _ASSERTE(gap != NULL); make_unused_array (gap, Align (min_obj_size)); args.free_list_gen_number--; @@ -36879,7 +36968,7 @@ void gc_heap::relocate_survivors (int condemned_gen_number, uint8_t* start_address = get_soh_start_object (current_heap_segment, condemned_gen); size_t current_brick = brick_of (start_address); - PREFIX_ASSUME(current_heap_segment != NULL); + _ASSERTE(current_heap_segment != NULL); uint8_t* end_address = heap_segment_allocated (current_heap_segment); @@ -37053,7 +37142,7 @@ void gc_heap::walk_relocation (void* profiling_context, record_surv_fn fn) uint8_t* start_address = get_soh_start_object (current_heap_segment, condemned_gen); size_t current_brick = brick_of (start_address); - PREFIX_ASSUME(current_heap_segment != NULL); + _ASSERTE(current_heap_segment != NULL); size_t end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1); walk_relocate_args args; args.is_shortened = FALSE; @@ -39217,7 +39306,7 @@ void gc_heap::background_mark_phase () for (int i = get_start_generation_index(); i < uoh_start_generation; i++) { heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i))); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (seg) { @@ -39503,7 +39592,7 @@ void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p) for (int i = start_gen_idx; i < total_generation_count; i++) { heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i))); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (seg) { @@ -42141,7 +42230,7 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating CARD_ for (int i = get_start_generation_index(); i < max_generation; i++) { heap_segment* soh_seg = heap_segment_rw (generation_start_segment (generation_of (i))); - PREFIX_ASSUME(soh_seg != NULL); + _ASSERTE(soh_seg != NULL); while (soh_seg) { @@ -42178,7 +42267,7 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating CARD_ heap_segment_plan_allocated (ephemeral_heap_segment) : high); #endif //USE_REGIONS heap_segment* seg = heap_segment_rw (generation_start_segment (oldest_gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); uint8_t* beg = get_soh_start_object (seg, oldest_gen); uint8_t* end = compute_next_end (seg, low); @@ -42227,7 +42316,7 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating CARD_ size_t card_last_obj = card_of (last_object_processed); clear_cards(card, card_last_obj); - // We need to be careful of the accounting here because we could be in the situation where there are more set cards between end of + // We need to be careful of the accounting here because we could be in the situation where there are more set cards between end of // last set card batch and last_object_processed. We will be clearing all of them. But we can't count the set cards we haven't // discovered yet or we can get a negative number for n_card_set. However, if last_object_processed lands before what end_card // corresponds to, we can't count the whole batch because it will be handled by a later clear_cards. @@ -44391,7 +44480,7 @@ size_t gc_heap::generation_plan_size (int gen_number) size_t gensize = 0; heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (seg && (seg != ephemeral_heap_segment)) { @@ -44442,7 +44531,7 @@ size_t gc_heap::generation_size (int gen_number) size_t gensize = 0; heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (seg && (seg != ephemeral_heap_segment)) { @@ -44585,8 +44674,8 @@ gc_history_per_heap* gc_heap::get_gc_data_per_heap() void gc_heap::compute_new_dynamic_data (int gen_number) { - PREFIX_ASSUME(gen_number >= 0); - PREFIX_ASSUME(gen_number <= max_generation); + _ASSERTE(gen_number >= 0); + _ASSERTE(gen_number <= max_generation); dynamic_data* dd = dynamic_data_of (gen_number); generation* gen = generation_of (gen_number); @@ -44646,7 +44735,7 @@ void gc_heap::compute_new_dynamic_data (int gen_number) size_t final_promoted = 0; final_promoted = min (finalization_promoted_bytes, out); // Prefast: this is clear from above but prefast needs to be told explicitly - PREFIX_ASSUME(final_promoted <= out); + _ASSERTE(final_promoted <= out); dprintf (2, ("gen: %d final promoted: %zd", gen_number, final_promoted)); dd_freach_previous_promotion (dd) = final_promoted; @@ -45162,7 +45251,7 @@ size_t gc_heap::generation_fragmentation (generation* gen, heap_segment_mem (ephemeral_heap_segment)); heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (seg != ephemeral_heap_segment) { @@ -45223,7 +45312,7 @@ size_t gc_heap::generation_sizes (generation* gen, bool use_saved_p) { heap_segment* seg = heap_segment_in_range (generation_start_segment (gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while (seg) { @@ -45939,7 +46028,7 @@ void gc_heap::walk_survivors_for_uoh (void* profiling_context, record_surv_fn fn generation* gen = generation_of (gen_number); heap_segment* seg = heap_segment_rw (generation_start_segment (gen));; - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); uint8_t* o = get_uoh_start_object (seg, gen); uint8_t* plug_end = o; @@ -46600,7 +46689,7 @@ void gc_heap::background_sweep() for (int i = uoh_start_generation; i < total_generation_count; i++) { heap_segment* uoh_seg = heap_segment_rw (generation_start_segment (generation_of (i))); - PREFIX_ASSUME(uoh_seg != NULL); + _ASSERTE(uoh_seg != NULL); while (uoh_seg) { uoh_seg->flags &= ~heap_segment_flags_swept; @@ -46710,7 +46799,7 @@ void gc_heap::background_sweep() } } - PREFIX_ASSUME(start_seg != NULL); + _ASSERTE(start_seg != NULL); heap_segment* seg = start_seg; dprintf (2, ("bgs: sweeping gen %d seg %p->%p(%p)", gen->gen_num, heap_segment_mem (seg), @@ -46941,7 +47030,7 @@ void gc_heap::background_sweep() } generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(generation_allocation_segment(gen) != NULL); + _ASSERTE(generation_allocation_segment(gen) != NULL); if (i == max_generation) { @@ -46990,7 +47079,7 @@ void gc_heap::background_sweep() concurrent_print_time_delta ("background sweep"); heap_segment* reset_seg = heap_segment_rw (generation_start_segment (generation_of (max_generation))); - PREFIX_ASSUME(reset_seg != NULL); + _ASSERTE(reset_seg != NULL); while (reset_seg) { @@ -47072,7 +47161,7 @@ void gc_heap::sweep_uoh_objects (int gen_num) generation* gen = generation_of (gen_num); heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(start_seg != NULL); + _ASSERTE(start_seg != NULL); heap_segment* seg = start_seg; heap_segment* prev_seg = 0; @@ -47169,7 +47258,7 @@ void gc_heap::sweep_uoh_objects (int gen_num) generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(generation_allocation_segment(gen) != NULL); + _ASSERTE(generation_allocation_segment(gen) != NULL); } void gc_heap::relocate_in_uoh_objects (int gen_num) @@ -47178,7 +47267,7 @@ void gc_heap::relocate_in_uoh_objects (int gen_num) heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); uint8_t* o = get_uoh_start_object (seg, gen); @@ -47224,7 +47313,7 @@ void gc_heap::mark_through_cards_for_uoh_objects (card_fn fn, generation* oldest_gen = generation_of (gen_num); heap_segment* seg = heap_segment_rw (generation_start_segment (oldest_gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); uint8_t* beg = get_uoh_start_object (seg, oldest_gen); uint8_t* end = heap_segment_allocated (seg); @@ -47520,11 +47609,6 @@ void gc_heap::descr_generations_to_profiler (gen_walk_fn fn, void *context) #else //MULTIPLE_HEAPS { gc_heap* hp = NULL; -#ifdef _PREFAST_ - // prefix complains about us dereferencing hp in wks build even though we only access static members - // this way. not sure how to shut it up except for this ugly workaround: - PREFIX_ASSUME(hp != NULL); -#endif // _PREFAST_ #endif //MULTIPLE_HEAPS for (int curr_gen_number = total_generation_count-1; curr_gen_number >= 0; curr_gen_number--) @@ -48270,8 +48354,10 @@ gc_heap::verify_free_lists () #if defined(USE_REGIONS) && defined(MULTIPLE_HEAPS) heap_segment* region = region_of (free_list); - if (region->heap != this) + if ((region->heap != this) && ((gen_num != max_generation) || (!trigger_bgc_for_rethreading_p))) { + // The logic in change_heap_count depends on the coming BGC (or blocking gen 2) to rebuild the gen 2 free list. + // In that case, before the rebuild happens, the gen2 free list is expected to contain free list items that do not belong to the right heap. dprintf (1, ("curr free item %p should be on heap %d, but actually is on heap %d: %d", free_list, this->heap_number, region->heap->heap_number)); FATAL_GC_ERROR(); } @@ -49127,6 +49213,7 @@ HRESULT GCHeap::Initialize() memset (gc_heap::committed_by_oh, 0, sizeof (gc_heap::committed_by_oh)); if (!gc_heap::compute_hard_limit()) { + log_init_error_to_host ("compute_hard_limit failed, check your heap hard limit related configs"); return CLR_E_GC_BAD_HARD_LIMIT; } @@ -49144,6 +49231,7 @@ HRESULT GCHeap::Initialize() uintptr_t config_affinity_mask = static_cast(GCConfig::GetGCHeapAffinitizeMask()); if (!ParseGCHeapAffinitizeRanges(cpu_index_ranges_holder.Get(), &config_affinity_set, config_affinity_mask)) { + log_init_error_to_host ("ParseGCHeapAffinitizeRange failed, check your HeapAffinitizeRanges config"); return CLR_E_GC_BAD_AFFINITY_CONFIG_FORMAT; } @@ -49152,6 +49240,7 @@ HRESULT GCHeap::Initialize() if (process_affinity_set->IsEmpty()) { + log_init_error_to_host ("This process is affinitize to 0 CPUs, check your GC heap affinity related configs"); return CLR_E_GC_BAD_AFFINITY_CONFIG; } @@ -49294,6 +49383,8 @@ HRESULT GCHeap::Initialize() if (gc_region_size >= MAX_REGION_SIZE) { + log_init_error_to_host ("The GC RegionSize config is set to %zd bytes (%zd GiB), it needs to be < %zd GiB", + gc_region_size, gib (gc_region_size), gib (MAX_REGION_SIZE)); return CLR_E_GC_BAD_REGION_SIZE; } @@ -49322,6 +49413,8 @@ HRESULT GCHeap::Initialize() if (!power_of_two_p(gc_region_size) || ((gc_region_size * nhp * min_regions_per_heap) > gc_heap::regions_range)) { + log_init_error_to_host ("Region size is %zd bytes, range is %zd bytes, (%d heaps * %d regions/heap = %d) regions needed initially", + gc_region_size, gc_heap::regions_range, nhp, min_regions_per_heap, (nhp * min_regions_per_heap)); return E_OUTOFMEMORY; } @@ -49396,7 +49489,7 @@ HRESULT GCHeap::Initialize() if (!WaitForGCEvent->CreateManualEventNoThrow(TRUE)) { - GCToEEInterface::LogErrorToHost("Creation of WaitForGCEvent failed"); + log_init_error_to_host ("Creation of WaitForGCEvent failed"); return E_FAIL; } @@ -49485,12 +49578,10 @@ HRESULT GCHeap::Initialize() uint8_t* numa_mem = (uint8_t*)GCToOSInterface::VirtualReserve (hb_info_size_per_node, 0, 0, (uint16_t)numa_node_index); if (!numa_mem) { - GCToEEInterface::LogErrorToHost("Reservation of numa_mem failed"); return E_FAIL; } if (!GCToOSInterface::VirtualCommit (numa_mem, hb_info_size_per_node, (uint16_t)numa_node_index)) { - GCToEEInterface::LogErrorToHost("Commit of numa_mem failed"); return E_FAIL; } @@ -49592,7 +49683,6 @@ HRESULT GCHeap::Initialize() if (seg_mem == nullptr) { - GCToEEInterface::LogErrorToHost("STRESS_REGIONS couldn't allocate ro segment"); hr = E_FAIL; break; } @@ -49606,7 +49696,6 @@ HRESULT GCHeap::Initialize() if (!RegisterFrozenSegment(&seg_info)) { - GCToEEInterface::LogErrorToHost("STRESS_REGIONS failed to RegisterFrozenSegment"); hr = E_FAIL; break; } @@ -50358,11 +50447,6 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_ gc_heap* hp = acontext->get_alloc_heap()->pGenGCHeap; #else gc_heap* hp = pGenGCHeap; -#ifdef _PREFAST_ - // prefix complains about us dereferencing hp in wks build even though we only access static members - // this way. not sure how to shut it up except for this ugly workaround: - PREFIX_ASSUME(hp != NULL); -#endif //_PREFAST_ #endif //MULTIPLE_HEAPS assert(size < loh_size_threshold || (flags & GC_ALLOC_LARGE_OBJECT_HEAP)); @@ -53300,7 +53384,7 @@ void checkGCWriteBarrier() generation* gen = gc_heap::generation_of (i); heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); - PREFIX_ASSUME(seg != NULL); + _ASSERTE(seg != NULL); while(seg) { diff --git a/src/coreclr/gc/gc.h b/src/coreclr/gc/gc.h index a1586ce8f687..4c2e3a04a7fc 100644 --- a/src/coreclr/gc/gc.h +++ b/src/coreclr/gc/gc.h @@ -392,4 +392,6 @@ void GCLog (const char *fmt, ... ); FILE* CreateLogFile(const GCConfigStringHolder& temp_logfile_name, bool is_config); #endif //TRACE_GC || GC_CONFIG_DRIVEN +void log_init_error_to_host (const char* format, ...); + #endif // __GC_H diff --git a/src/coreclr/gc/gccommon.cpp b/src/coreclr/gc/gccommon.cpp index f68b06d818e7..822d9c4349a8 100644 --- a/src/coreclr/gc/gccommon.cpp +++ b/src/coreclr/gc/gccommon.cpp @@ -132,6 +132,12 @@ FILE* CreateLogFile(const GCConfigStringHolder& temp_logfile_name, bool is_confi //_snprintf_s(logfile_name, MAX_LONGPATH+1, _TRUNCATE, "%s.%d%s", temp_logfile_name.Get(), pid, suffix); _snprintf_s(logfile_name, MAX_LONGPATH+1, _TRUNCATE, "%s%s", temp_logfile_name.Get(), suffix); logFile = fopen(logfile_name, "wb"); + + if (logFile == NULL) + { + log_init_error_to_host ("Cannot create log file %s", logfile_name); + } + return logFile; } #endif //TRACE_GC || GC_CONFIG_DRIVEN @@ -159,7 +165,6 @@ HRESULT initialize_log_file() if (gc_log == NULL) { - GCToEEInterface::LogErrorToHost("Cannot create log file"); return E_FAIL; } @@ -168,7 +173,7 @@ HRESULT initialize_log_file() if (gc_log_file_size <= 0 || gc_log_file_size > 500) { - GCToEEInterface::LogErrorToHost("Invalid log file size (valid size needs to be larger than 0 and smaller than 500)"); + log_init_error_to_host ("Invalid log file size %zd MiB (valid size needs to be > 0 and <= 500 MiB)", gc_log_file_size); fclose (gc_log); return E_FAIL; } @@ -265,4 +270,15 @@ void GCLog (const char *fmt, ... ) } #endif //TRACE_GC && SIMPLE_DPRINTF +// We log initialization errors to the host to help with diagnostics. By default these will show up in stdout. +// You can also redirect them to a file. See docs/design/features/host-tracing.md. +void log_init_error_to_host (const char* format, ...) +{ + char error_buf[256]; + va_list args; + va_start (args, format); + _vsnprintf_s (error_buf, ARRAY_SIZE (error_buf), _TRUNCATE, format, args); + GCToEEInterface::LogErrorToHost (error_buf); + va_end (args); +} #endif // !DACCESS_COMPILE diff --git a/src/coreclr/gc/gcconfig.h b/src/coreclr/gc/gcconfig.h index 5b97c021bbde..0378323b6e96 100644 --- a/src/coreclr/gc/gcconfig.h +++ b/src/coreclr/gc/gcconfig.h @@ -104,8 +104,8 @@ class GCConfigStringHolder INT_CONFIG (GCHeapHardLimit, "GCHeapHardLimit", "System.GC.HeapHardLimit", 0, "Specifies a hard limit for the GC heap") \ INT_CONFIG (GCHeapHardLimitPercent, "GCHeapHardLimitPercent", "System.GC.HeapHardLimitPercent", 0, "Specifies the GC heap usage as a percentage of the total memory") \ INT_CONFIG (GCTotalPhysicalMemory, "GCTotalPhysicalMemory", NULL, 0, "Specifies what the GC should consider to be total physical memory") \ - INT_CONFIG (GCRegionRange, "GCRegionRange", NULL, 0, "Specifies the range for the GC heap") \ - INT_CONFIG (GCRegionSize, "GCRegionSize", NULL, 0, "Specifies the size for a basic GC region") \ + INT_CONFIG (GCRegionRange, "GCRegionRange", "System.GC.RegionRange", 0, "Specifies the range for the GC heap") \ + INT_CONFIG (GCRegionSize, "GCRegionSize", "System.GC.RegionSize", 0, "Specifies the size for a basic GC region") \ INT_CONFIG (GCEnableSpecialRegions, "GCEnableSpecialRegions", NULL, 0, "Specifies to enable special handling some regions like SIP") \ STRING_CONFIG(LogFile, "GCLogFile", NULL, "Specifies the name of the GC log file") \ STRING_CONFIG(ConfigLogFile, "GCConfigLogFile", NULL, "Specifies the name of the GC config log file") \ @@ -142,7 +142,7 @@ class GCConfigStringHolder INT_CONFIG (GCSpinCountUnit, "GCSpinCountUnit", NULL, 0, "Specifies the spin count unit used by the GC.") \ INT_CONFIG (GCDynamicAdaptationMode, "GCDynamicAdaptationMode", "System.GC.DynamicAdaptationMode", 1, "Enable the GC to dynamically adapt to application sizes.") \ INT_CONFIG (GCDTargetTCP, "GCDTargetTCP", "System.GC.DTargetTCP", 0, "Specifies the target tcp for DATAS") \ - INT_CONFIG (GCDBGCRatio, " GCDBGCRatio", NULL, 0, "Specifies the ratio of BGC to NGC2 for HC change") \ + INT_CONFIG (GCDBGCRatio, "GCDBGCRatio", NULL, 0, "Specifies the ratio of BGC to NGC2 for HC change") \ BOOL_CONFIG (GCCacheSizeFromSysConf, "GCCacheSizeFromSysConf", NULL, false, "Specifies using sysconf to retrieve the last level cache size for Unix.") // This class is responsible for retreiving configuration information diff --git a/src/coreclr/gc/gcimpl.h b/src/coreclr/gc/gcimpl.h index f7ff4c2f8d7d..8dcf062889c8 100644 --- a/src/coreclr/gc/gcimpl.h +++ b/src/coreclr/gc/gcimpl.h @@ -283,7 +283,7 @@ class GCHeap : public IGCHeapInternal //return TRUE if GC actually happens, otherwise FALSE bool StressHeap(gc_alloc_context * acontext); -#ifndef FEATURE_NATIVEAOT // Redhawk forces relocation a different way +#ifndef FEATURE_NATIVEAOT // NativeAOT forces relocation a different way #ifdef STRESS_HEAP protected: diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index 37109c9d0531..e2ffaf62432a 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -1681,7 +1681,10 @@ class gc_heap PER_HEAP_METHOD void set_region_sweep_in_plan (heap_segment* region); PER_HEAP_METHOD void clear_region_sweep_in_plan (heap_segment* region); PER_HEAP_METHOD void clear_region_demoted (heap_segment* region); - PER_HEAP_METHOD void decide_on_demotion_pin_surv (heap_segment* region, int* no_pinned_surv_region_count); + PER_HEAP_METHOD void decide_on_demotion_pin_surv (heap_segment* region, + int* no_pinned_surv_region_count, + bool promote_gen1_pins_p, + bool large_pins_p); PER_HEAP_METHOD void skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num); PER_HEAP_METHOD void process_last_np_surv_region (generation* consing_gen, int current_plan_gen_num, @@ -2614,6 +2617,15 @@ class gc_heap #ifndef USE_REGIONS PER_HEAP_METHOD generation* ensure_ephemeral_heap_segment (generation* consing_gen); #endif //!USE_REGIONS + + PER_HEAP_ISOLATED_METHOD bool decide_on_gen1_pin_promotion (float pin_frag_ratio, float pin_surv_ratio); + + PER_HEAP_METHOD void attribute_pin_higher_gen_alloc ( +#ifdef USE_REGIONS + heap_segment* seg, int to_gen_number, +#endif + uint8_t* plug, size_t len); + PER_HEAP_METHOD uint8_t* allocate_in_condemned_generations (generation* gen, size_t size, int from_gen_number, @@ -2635,6 +2647,8 @@ class gc_heap PER_HEAP_METHOD size_t get_promoted_bytes(); #ifdef USE_REGIONS + PER_HEAP_METHOD void attribute_pin_higher_gen_alloc (int frgn, int togn, size_t len); + PER_HEAP_ISOLATED_METHOD void sync_promoted_bytes(); PER_HEAP_ISOLATED_METHOD void set_heap_for_contained_basic_regions (heap_segment* region, gc_heap* hp); @@ -3516,6 +3530,8 @@ class gc_heap // Set during a GC and checked by allocator after that GC PER_HEAP_FIELD_SINGLE_GC BOOL sufficient_gen0_space_p; + PER_HEAP_FIELD_SINGLE_GC BOOL decide_promote_gen1_pins_p; + PER_HEAP_FIELD_SINGLE_GC bool no_gc_oom_p; PER_HEAP_FIELD_SINGLE_GC heap_segment* saved_loh_segment_no_gc; @@ -3657,7 +3673,6 @@ class gc_heap PER_HEAP_FIELD_SINGLE_GC uint8_t* demotion_low; PER_HEAP_FIELD_SINGLE_GC uint8_t* demotion_high; - PER_HEAP_FIELD_SINGLE_GC BOOL demote_gen1_p; PER_HEAP_FIELD_SINGLE_GC uint8_t* last_gen1_pin_end; PER_HEAP_FIELD_SINGLE_GC BOOL ephemeral_promotion; diff --git a/src/coreclr/gc/handletablecore.cpp b/src/coreclr/gc/handletablecore.cpp index 798e77666f24..76046964b4ef 100644 --- a/src/coreclr/gc/handletablecore.cpp +++ b/src/coreclr/gc/handletablecore.cpp @@ -1842,11 +1842,6 @@ uint32_t BlockFreeHandlesInMask(TableSegment *pSegment, uint32_t uBlock, uint32_ // keep track of how many handles we have left to free uint32_t uRemain = uCount; -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6305) // "This code deals with a bit vector mapped piece of code, so there is no mismatch between sizeof and countof" -#endif - // if this block has user data, convert the pointer to be mask-relative if (pUserData) pUserData += (uMask * HANDLE_HANDLES_PER_MASK); @@ -1858,10 +1853,6 @@ uint32_t BlockFreeHandlesInMask(TableSegment *pSegment, uint32_t uBlock, uint32_ OBJECTHANDLE firstHandle = (OBJECTHANDLE)(pSegment->rgValue + (uMask * HANDLE_HANDLES_PER_MASK)); OBJECTHANDLE lastHandle = (OBJECTHANDLE)((_UNCHECKED_OBJECTREF *)firstHandle + HANDLE_HANDLES_PER_MASK); -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - // keep a local copy of the free mask to update as we free handles uint32_t dwFreeMask = pSegment->rgFreeMask[uMask]; diff --git a/src/coreclr/gc/sample/CMakeLists.txt b/src/coreclr/gc/sample/CMakeLists.txt index 1f297fd23133..2f0181f4a55d 100644 --- a/src/coreclr/gc/sample/CMakeLists.txt +++ b/src/coreclr/gc/sample/CMakeLists.txt @@ -44,7 +44,9 @@ if(CLR_CMAKE_TARGET_WIN32) ${STATIC_MT_CRT_LIB} ${STATIC_MT_VCRT_LIB} kernel32.lib - advapi32.lib) + advapi32.lib + minipal + ) endif(CLR_CMAKE_TARGET_WIN32) add_definitions(-DVERIFY_HEAP) diff --git a/src/coreclr/gc/unix/CMakeLists.txt b/src/coreclr/gc/unix/CMakeLists.txt index f88b03960988..5ae2de786634 100644 --- a/src/coreclr/gc/unix/CMakeLists.txt +++ b/src/coreclr/gc/unix/CMakeLists.txt @@ -10,4 +10,4 @@ set(GC_PAL_SOURCES events.cpp cgroup.cpp) -add_library(gc_pal STATIC ${GC_PAL_SOURCES} ${VERSION_FILE_PATH}) +add_library(gc_pal OBJECT ${GC_PAL_SOURCES} ${VERSION_FILE_PATH}) diff --git a/src/coreclr/gc/unix/config.gc.h.in b/src/coreclr/gc/unix/config.gc.h.in index 322aea3fe61a..06f1e8255e25 100644 --- a/src/coreclr/gc/unix/config.gc.h.in +++ b/src/coreclr/gc/unix/config.gc.h.in @@ -17,7 +17,6 @@ #cmakedefine01 HAVE_SYSCTLBYNAME #cmakedefine01 HAVE_PTHREAD_CONDATTR_SETCLOCK #cmakedefine01 HAVE_CLOCK_GETTIME_NSEC_NP -#cmakedefine01 HAVE_CLOCK_MONOTONIC #cmakedefine01 HAVE_SCHED_GETAFFINITY #cmakedefine01 HAVE_SCHED_SETAFFINITY #cmakedefine01 HAVE_PTHREAD_SETAFFINITY_NP diff --git a/src/coreclr/gc/unix/configure.cmake b/src/coreclr/gc/unix/configure.cmake index c88c6635a8ca..a27ccf3206bf 100644 --- a/src/coreclr/gc/unix/configure.cmake +++ b/src/coreclr/gc/unix/configure.cmake @@ -89,20 +89,6 @@ check_symbol_exists( time.h HAVE_CLOCK_GETTIME_NSEC_NP) -check_cxx_source_runs(" -#include -#include -#include - -int main() -{ - int ret; - struct timespec ts; - ret = clock_gettime(CLOCK_MONOTONIC, &ts); - - exit(ret); -}" HAVE_CLOCK_MONOTONIC) - check_symbol_exists( posix_madvise sys/mman.h diff --git a/src/coreclr/gc/unix/gcenv.unix.cpp b/src/coreclr/gc/unix/gcenv.unix.cpp index f8332ed70bbf..b33a856895a6 100644 --- a/src/coreclr/gc/unix/gcenv.unix.cpp +++ b/src/coreclr/gc/unix/gcenv.unix.cpp @@ -24,6 +24,7 @@ #include "gcconfig.h" #include "numasupport.h" #include +#include #if HAVE_SWAPCTL #include @@ -35,10 +36,16 @@ #define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__) #elif HAVE_SYS_MEMBARRIER_H #include +<<<<<<< HEAD // Emscriptenn's membarrier.h does not have a membarrier function. #ifdef TARGET_WASM # define membarrier(...) -ENOSYS #endif // TARGET_WASM +======= +#ifdef TARGET_BROWSER +#define membarrier(cmd, flags, cpu_id) 0 // browser/wasm is currently single threaded +#endif +>>>>>>> upstream-jun #endif #include @@ -920,7 +927,7 @@ static void GetLogicalProcessorCacheSizeFromSysFs(size_t* cacheLevel, size_t* ca } } } -#endif +#endif } static void GetLogicalProcessorCacheSizeFromHeuristic(size_t* cacheLevel, size_t* cacheSize) @@ -968,7 +975,7 @@ static size_t GetLogicalProcessorCacheSizeFromOS() GetLogicalProcessorCacheSizeFromSysConf(&cacheLevel, &cacheSize); } - if (cacheSize == 0) + if (cacheSize == 0) { GetLogicalProcessorCacheSizeFromSysFs(&cacheLevel, &cacheSize); if (cacheSize == 0) @@ -1472,22 +1479,7 @@ void GCToOSInterface::GetMemoryStatus(uint64_t restricted_limit, uint32_t* memor // The counter value int64_t GCToOSInterface::QueryPerformanceCounter() { -#if HAVE_CLOCK_GETTIME_NSEC_NP - return (int64_t)clock_gettime_nsec_np(CLOCK_UPTIME_RAW); -#elif HAVE_CLOCK_MONOTONIC - struct timespec ts; - int result = clock_gettime(CLOCK_MONOTONIC, &ts); - - if (result != 0) - { - assert(!"clock_gettime(CLOCK_MONOTONIC) failed"); - __UNREACHABLE(); - } - - return ((int64_t)(ts.tv_sec) * (int64_t)(tccSecondsToNanoSeconds)) + (int64_t)(ts.tv_nsec); -#else -#error " clock_gettime(CLOCK_MONOTONIC) or clock_gettime_nsec_np() must be supported." -#endif + return minipal_hires_ticks(); } // Get a frequency of the high precision performance counter @@ -1496,7 +1488,7 @@ int64_t GCToOSInterface::QueryPerformanceCounter() int64_t GCToOSInterface::QueryPerformanceFrequency() { // The counter frequency of gettimeofday is in microseconds. - return tccSecondsToNanoSeconds; + return minipal_hires_tick_frequency(); } // Get a time stamp with a low precision @@ -1504,42 +1496,7 @@ int64_t GCToOSInterface::QueryPerformanceFrequency() // Time stamp in milliseconds uint64_t GCToOSInterface::GetLowPrecisionTimeStamp() { - uint64_t retval = 0; - -#if HAVE_CLOCK_GETTIME_NSEC_NP - retval = clock_gettime_nsec_np(CLOCK_UPTIME_RAW) / tccMilliSecondsToNanoSeconds; -#elif HAVE_CLOCK_MONOTONIC - struct timespec ts; - -#if HAVE_CLOCK_MONOTONIC_COARSE - clockid_t clockType = CLOCK_MONOTONIC_COARSE; // good enough resolution, fastest speed -#else - clockid_t clockType = CLOCK_MONOTONIC; -#endif - - if (clock_gettime(clockType, &ts) != 0) - { -#if HAVE_CLOCK_MONOTONIC_COARSE - assert(!"clock_gettime(HAVE_CLOCK_MONOTONIC_COARSE) failed\n"); -#else - assert(!"clock_gettime(CLOCK_MONOTONIC) failed\n"); -#endif - } - - retval = (ts.tv_sec * tccSecondsToMilliSeconds) + (ts.tv_nsec / tccMilliSecondsToNanoSeconds); -#else - struct timeval tv; - if (gettimeofday(&tv, NULL) == 0) - { - retval = (tv.tv_sec * tccSecondsToMilliSeconds) + (tv.tv_usec / tccMilliSecondsToMicroSeconds); - } - else - { - assert(!"gettimeofday() failed\n"); - } -#endif - - return retval; + return (uint64_t)minipal_lowres_ticks(); } // Gets the total number of processors on the machine, not taking @@ -1615,43 +1572,3 @@ bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_strin { return ParseIndexOrRange(config_string, start_index, end_index); } - -// Initialize the critical section -bool CLRCriticalSection::Initialize() -{ - pthread_mutexattr_t mutexAttributes; - int st = pthread_mutexattr_init(&mutexAttributes); - if (st != 0) - { - return false; - } - - st = pthread_mutexattr_settype(&mutexAttributes, PTHREAD_MUTEX_RECURSIVE); - if (st == 0) - { - st = pthread_mutex_init(&m_cs.mutex, &mutexAttributes); - } - - pthread_mutexattr_destroy(&mutexAttributes); - - return (st == 0); -} - -// Destroy the critical section -void CLRCriticalSection::Destroy() -{ - int st = pthread_mutex_destroy(&m_cs.mutex); - assert(st == 0); -} - -// Enter the critical section. Blocks until the section can be entered. -void CLRCriticalSection::Enter() -{ - pthread_mutex_lock(&m_cs.mutex); -} - -// Leave the critical section -void CLRCriticalSection::Leave() -{ - pthread_mutex_unlock(&m_cs.mutex); -} diff --git a/src/coreclr/gc/vxsort/CMakeLists.txt b/src/coreclr/gc/vxsort/CMakeLists.txt index fc55956832e3..0937cba5942a 100644 --- a/src/coreclr/gc/vxsort/CMakeLists.txt +++ b/src/coreclr/gc/vxsort/CMakeLists.txt @@ -26,4 +26,4 @@ set (VXSORT_SOURCES do_vxsort.h ) -add_library(gc_vxsort STATIC ${VXSORT_SOURCES}) +add_library(gc_vxsort OBJECT ${VXSORT_SOURCES}) diff --git a/src/coreclr/gc/vxsort/isa_detection.cpp b/src/coreclr/gc/vxsort/isa_detection.cpp index 93c7288663c4..b069c8be9bee 100644 --- a/src/coreclr/gc/vxsort/isa_detection.cpp +++ b/src/coreclr/gc/vxsort/isa_detection.cpp @@ -2,14 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "common.h" - -#ifdef TARGET_WINDOWS -#include -#include -#endif - #include "do_vxsort.h" +#include + enum class SupportedISA { None = 0, @@ -17,77 +13,12 @@ enum class SupportedISA AVX512F = 1 << (int)InstructionSet::AVX512F }; -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) - -SupportedISA DetermineSupportedISA() -{ - // register definitions to make the following code more readable - enum reg - { - EAX = 0, - EBX = 1, - ECX = 2, - EDX = 3, - COUNT = 4 - }; - - // bit definitions to make code more readable - enum bits - { - OCXSAVE = 1<<27, - AVX = 1<<28, - AVX2 = 1<< 5, - AVX512F = 1<<16, - AVX512DQ = 1<<17, - }; - int reg[COUNT]; - - __cpuid(reg, 0); - if (reg[EAX] < 7) - return SupportedISA::None; - - __cpuid(reg, 1); - - // both AVX and OCXSAVE feature flags must be enabled - if ((reg[ECX] & (OCXSAVE|AVX)) != (OCXSAVE | AVX)) - return SupportedISA::None; - - // get xcr0 register - DWORD64 xcr0 = _xgetbv(0); - - // get OS XState info - DWORD64 FeatureMask = GetEnabledXStateFeatures(); - - // get processor extended feature flag info - __cpuidex(reg, 7, 0); - - // check if all of AVX2, AVX512F and AVX512DQ are supported by both processor and OS - if ((reg[EBX] & (AVX2 | AVX512F | AVX512DQ)) == (AVX2 | AVX512F | AVX512DQ) && - (xcr0 & 0xe6) == 0xe6 && - (FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) == (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) - { - return (SupportedISA)((int)SupportedISA::AVX2 | (int)SupportedISA::AVX512F); - } - - // check if AVX2 is supported by both processor and OS - if ((reg[EBX] & AVX2) && - (xcr0 & 0x06) == 0x06 && - (FeatureMask & XSTATE_MASK_AVX) == XSTATE_MASK_AVX) - { - return SupportedISA::AVX2; - } - - return SupportedISA::None; -} - -#elif defined(TARGET_UNIX) - SupportedISA DetermineSupportedISA() { - __builtin_cpu_init(); - if (__builtin_cpu_supports("avx2")) + int cpuFeatures = minipal_getcpufeatures(); + if ((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) { - if (__builtin_cpu_supports("avx512f")) + if ((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) return (SupportedISA)((int)SupportedISA::AVX2 | (int)SupportedISA::AVX512F); else return SupportedISA::AVX2; @@ -98,8 +29,6 @@ SupportedISA DetermineSupportedISA() } } -#endif // defined(TARGET_UNIX) - static bool s_initialized; static SupportedISA s_supportedISA; diff --git a/src/coreclr/gc/windows/gcenv.windows.cpp b/src/coreclr/gc/windows/gcenv.windows.cpp index 608751dd169a..3e8040be0bcb 100644 --- a/src/coreclr/gc/windows/gcenv.windows.cpp +++ b/src/coreclr/gc/windows/gcenv.windows.cpp @@ -1319,31 +1319,6 @@ static DWORD GCThreadStub(void* param) return 0; } -// Initialize the critical section -bool CLRCriticalSection::Initialize() -{ - ::InitializeCriticalSection(&m_cs); - return true; -} - -// Destroy the critical section -void CLRCriticalSection::Destroy() -{ - ::DeleteCriticalSection(&m_cs); -} - -// Enter the critical section. Blocks until the section can be entered. -void CLRCriticalSection::Enter() -{ - ::EnterCriticalSection(&m_cs); -} - -// Leave the critical section -void CLRCriticalSection::Leave() -{ - ::LeaveCriticalSection(&m_cs); -} - // WindowsEvent is an implementation of GCEvent that forwards // directly to Win32 APIs. class GCEvent::Impl diff --git a/src/coreclr/gcdump/gcdumpnonx86.cpp b/src/coreclr/gcdump/gcdumpnonx86.cpp index d4366069f14a..4568477e3a60 100644 --- a/src/coreclr/gcdump/gcdumpnonx86.cpp +++ b/src/coreclr/gcdump/gcdumpnonx86.cpp @@ -361,6 +361,7 @@ size_t GCDump::DumpGCTable(PTR_CBYTE gcInfoBlock, | DECODE_GENERICS_INST_CONTEXT | DECODE_GC_LIFETIMES | DECODE_PROLOG_LENGTH + | DECODE_RETURN_KIND #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) | DECODE_HAS_TAILCALLS #endif @@ -501,6 +502,12 @@ size_t GCDump::DumpGCTable(PTR_CBYTE gcInfoBlock, gcPrintf("Size of parameter area: %x\n", hdrdecoder.GetSizeOfStackParameterArea()); #endif + if (hdrdecoder.Version() < 4) + { + ReturnKind returnKind = hdrdecoder.GetReturnKind(); + gcPrintf("Return Kind: %s\n", ReturnKindToString(returnKind)); + } + UINT32 cbEncodedMethodSize = hdrdecoder.GetCodeLength(); gcPrintf("Code size: %x\n", cbEncodedMethodSize); diff --git a/src/coreclr/gcdump/i386/gcdumpx86.cpp b/src/coreclr/gcdump/i386/gcdumpx86.cpp index 680504db6f05..bb85705b45c8 100644 --- a/src/coreclr/gcdump/i386/gcdumpx86.cpp +++ b/src/coreclr/gcdump/i386/gcdumpx86.cpp @@ -13,7 +13,6 @@ #endif #include "gcdump.h" - /*****************************************************************************/ #define castto(var,typ) (*(typ *)&var) @@ -115,6 +114,17 @@ size_t GCDump::DumpInfoHdr (PTR_CBYTE gcInfoBlock, header->revPInvokeOffset = count; } + if (header->noGCRegionCnt == HAS_NOGCREGIONS) + { + hasArgTabOffset = TRUE; + table += decodeUnsigned(table, &count); + header->noGCRegionCnt = count; + } + else if (header->noGCRegionCnt > 0) + { + hasArgTabOffset = TRUE; + } + // // First print out all the basic information // @@ -157,6 +167,8 @@ size_t GCDump::DumpInfoHdr (PTR_CBYTE gcInfoBlock, gcPrintf(" Sync region = [%u,%u] ([0x%x,0x%x])\n", header->syncStartOffset, header->syncEndOffset, header->syncStartOffset, header->syncEndOffset); + if (header->noGCRegionCnt > 0) + gcPrintf(" no GC region count = %2u \n", header->noGCRegionCnt); if (header->epilogCount > 1 || (header->epilogCount != 0 && header->epilogAtEnd == 0)) @@ -205,11 +217,6 @@ size_t GCDump::DumpInfoHdr (PTR_CBYTE gcInfoBlock, } /*****************************************************************************/ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif size_t GCDump::DumpGCTable(PTR_CBYTE table, const InfoHdr& header, unsigned methodSize, @@ -238,6 +245,23 @@ size_t GCDump::DumpGCTable(PTR_CBYTE table, if (header.ebxSaved) calleeSavedRegs++; } + /* Dump the no GC region table */ + + if (header.noGCRegionCnt > 0) + { + count = header.noGCRegionCnt; + while (count-- > 0) + { + unsigned regionOffset; + unsigned regionSize; + + table += decodeUnsigned(table, ®ionOffset); + table += decodeUnsigned(table, ®ionSize); + + gcPrintf("[%04X-%04X) no GC region\n", regionOffset, regionOffset + regionSize); + } + } + /* Dump the untracked frame variable table */ count = header.untrackedCnt; @@ -323,11 +347,7 @@ size_t GCDump::DumpGCTable(PTR_CBYTE table, gcPrintf("%s%s pointer\n", (lowBits & byref_OFFSET_FLAG) ? "byref " : "", -#ifndef FEATURE_EH_FUNCLETS - (lowBits & this_OFFSET_FLAG) ? "this" : "" -#else (lowBits & pinned_OFFSET_FLAG) ? "pinned" : "" -#endif ); _ASSERTE(endOffs <= methodSize); @@ -456,10 +476,6 @@ size_t GCDump::DumpGCTable(PTR_CBYTE table, /* non-ptr arg push */ curOffs += (val & 0x07); -#ifndef FEATURE_EH_FUNCLETS - // For funclets, non-ptr arg pushes can be reported even for EBP frames - _ASSERTE(!header.ebpFrame); -#endif // FEATURE_EH_FUNCLETS argCnt++; DumpEncoding(bp, table-bp); bp = table; @@ -681,9 +697,6 @@ size_t GCDump::DumpGCTable(PTR_CBYTE table, { argTab += decodeUnsigned(argTab, &val); -#ifndef FEATURE_EH_FUNCLETS - assert((val & this_OFFSET_FLAG) == 0); -#endif unsigned stkOffs = val & ~byref_OFFSET_FLAG; unsigned lowBit = val & byref_OFFSET_FLAG; @@ -939,10 +952,6 @@ size_t GCDump::DumpGCTable(PTR_CBYTE table, return (table - tableStart); } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - /*****************************************************************************/ @@ -1016,6 +1025,12 @@ void GCDump::DumpPtrsInFrame(PTR_CBYTE gcInfoBlock, header.revPInvokeOffset = offset; _ASSERTE(offset != INVALID_REV_PINVOKE_OFFSET); } + if (header.noGCRegionCnt == HAS_NOGCREGIONS) + { + unsigned count; + table += decodeUnsigned(table, &count); + header.noGCRegionCnt = count; + } prologSize = header.prologSize; epilogSize = header.epilogSize; diff --git a/src/coreclr/gcinfo/arraylist.cpp b/src/coreclr/gcinfo/arraylist.cpp index 5071c483ba7f..84267b00e225 100644 --- a/src/coreclr/gcinfo/arraylist.cpp +++ b/src/coreclr/gcinfo/arraylist.cpp @@ -1,9 +1,23 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// Interpreter-FIXME: we get an existing implementation of ASSERTE via PCH that isn't usable +// from inside the interpreter, so we need to replace it with our own. +#undef _ASSERTE + +#if defined(_DEBUG) + +extern "C" void assertAbort(const char* why, const char* file, unsigned line); + +#define _ASSERTE(expr) if (!(expr)) { \ + assertAbort(#expr, __FILE__, __LINE__); \ +} +#else // _DEBUG +#define _ASSERTE(expr) (void)0 +#endif // _DEBUG + +#include "gcinfohelpers.h" #include -#include -#include "debugmacros.h" #include "iallocator.h" #include "gcinfoarraylist.h" #include "safemath.h" @@ -11,8 +25,8 @@ inline size_t roundUp(size_t size, size_t alignment) { // `alignment` must be a power of two - assert(alignment != 0); - assert((alignment & (alignment - 1)) == 0); + _ASSERTE(alignment != 0); + _ASSERTE((alignment & (alignment - 1)) == 0); return (size + (alignment - 1)) & ~(alignment - 1); } @@ -25,7 +39,7 @@ GcInfoArrayListBase::GcInfoArrayListBase(IAllocator* allocator) m_lastChunkCapacity(0), m_itemCount(0) { - assert(m_allocator != nullptr); + _ASSERTE(m_allocator != nullptr); } GcInfoArrayListBase::~GcInfoArrayListBase() @@ -42,19 +56,19 @@ void GcInfoArrayListBase::AppendNewChunk(size_t firstChunkCapacity, size_t eleme size_t chunkCapacity = (m_firstChunk == nullptr) ? firstChunkCapacity : (m_lastChunkCapacity * GrowthFactor); S_SIZE_T chunkSize = S_SIZE_T(roundUp(sizeof(ChunkBase), chunkAlignment)) + (S_SIZE_T(elementSize) * S_SIZE_T(chunkCapacity)); - assert(!chunkSize.IsOverflow()); + _ASSERTE(!chunkSize.IsOverflow()); ChunkBase* chunk = reinterpret_cast(m_allocator->Alloc(chunkSize.Value())); chunk->m_next = nullptr; if (m_lastChunk != nullptr) { - assert(m_firstChunk != nullptr); + _ASSERTE(m_firstChunk != nullptr); m_lastChunk->m_next = chunk; } else { - assert(m_lastChunk == nullptr); + _ASSERTE(m_lastChunk == nullptr); m_firstChunk = chunk; } @@ -66,7 +80,7 @@ void GcInfoArrayListBase::AppendNewChunk(size_t firstChunkCapacity, size_t eleme GcInfoArrayListBase::IteratorBase::IteratorBase(GcInfoArrayListBase* list, size_t firstChunkCapacity) : m_list(list) { - assert(m_list != nullptr); + _ASSERTE(m_list != nullptr); // Note: if the list is empty, m_list->firstChunk == nullptr == m_list->lastChunk and m_lastChunkCount == 0. // In that case, the next two lines will set m_currentChunk to nullptr and m_currentChunkCount to 0. diff --git a/src/coreclr/gcinfo/gcinfodumper.cpp b/src/coreclr/gcinfo/gcinfodumper.cpp index 1c8c79c10c88..c22850c2b0f1 100644 --- a/src/coreclr/gcinfo/gcinfodumper.cpp +++ b/src/coreclr/gcinfo/gcinfodumper.cpp @@ -4,6 +4,8 @@ #ifndef SOS_INCLUDE #include "common.h" #endif + +#include "gcinfohelpers.h" #include "gcinfodumper.h" #include "gcinfodecoder.h" diff --git a/src/coreclr/gcinfo/gcinfoencoder.cpp b/src/coreclr/gcinfo/gcinfoencoder.cpp index d1988ba34a53..4654e2e28b6d 100644 --- a/src/coreclr/gcinfo/gcinfoencoder.cpp +++ b/src/coreclr/gcinfo/gcinfoencoder.cpp @@ -7,9 +7,29 @@ * */ +// Interpreter-FIXME: we get an existing implementation of ASSERTE via PCH that isn't usable +// from inside the interpreter, so we need to replace it with our own. +#undef _ASSERTE + +#if defined(_DEBUG) + +extern "C" void assertAbort(const char* why, const char* file, unsigned line); + +#define _ASSERTE(expr) if (!(expr)) { \ + assertAbort(#expr, __FILE__, __LINE__); \ +} +#else // _DEBUG +#define _ASSERTE(expr) (void)0 +#endif // _DEBUG + + #include +#include "gcinfohelpers.h" #include "gcinfoencoder.h" + +using namespace GcInfoEncoderExt; + #include "targetosarch.h" #ifdef _DEBUG @@ -19,7 +39,6 @@ #endif #ifndef STANDALONE_BUILD -#include "log.h" #include "simplerhash.h" #include "bitposition.h" #endif @@ -354,7 +373,6 @@ GcInfoSize& GcInfoSize::operator+=(const GcInfoSize& other) SecObjSize += other.SecObjSize; GsCookieSize += other.GsCookieSize; GenericsCtxSize += other.GenericsCtxSize; - PspSymSize += other.PspSymSize; StackBaseSize += other.StackBaseSize; ReversePInvokeFrameSize += other.ReversePInvokeFrameSize; FixedAreaSize += other.FixedAreaSize; @@ -379,56 +397,54 @@ GcInfoSize& GcInfoSize::operator+=(const GcInfoSize& other) void GcInfoSize::Log(DWORD level, const char * header) { - if(LoggingOn(LF_GCINFO, level)) - { - LogSpew(LF_GCINFO, level, header); - - LogSpew(LF_GCINFO, level, "---COUNTS---\n"); - LogSpew(LF_GCINFO, level, "NumMethods: %zu\n", NumMethods); - LogSpew(LF_GCINFO, level, "NumCallSites: %zu\n", NumCallSites); - LogSpew(LF_GCINFO, level, "NumRanges: %zu\n", NumRanges); - LogSpew(LF_GCINFO, level, "NumRegs: %zu\n", NumRegs); - LogSpew(LF_GCINFO, level, "NumStack: %zu\n", NumStack); - LogSpew(LF_GCINFO, level, "NumUntracked: %zu\n", NumUntracked); - LogSpew(LF_GCINFO, level, "NumTransitions: %zu\n", NumTransitions); - LogSpew(LF_GCINFO, level, "SizeOfCode: %zu\n", SizeOfCode); - LogSpew(LF_GCINFO, level, "EncInfoSize: %zu\n", EncInfoSize); - - LogSpew(LF_GCINFO, level, "---SIZES(bits)---\n"); - LogSpew(LF_GCINFO, level, "Total: %zu\n", TotalSize); - LogSpew(LF_GCINFO, level, "UntrackedSlot: %zu\n", UntrackedSlotSize); - LogSpew(LF_GCINFO, level, "NumUntracked: %zu\n", NumUntrackedSize); - LogSpew(LF_GCINFO, level, "Flags: %zu\n", FlagsSize); - LogSpew(LF_GCINFO, level, "CodeLength: %zu\n", CodeLengthSize); - LogSpew(LF_GCINFO, level, "Prolog/Epilog: %zu\n", ProEpilogSize); - LogSpew(LF_GCINFO, level, "SecObj: %zu\n", SecObjSize); - LogSpew(LF_GCINFO, level, "GsCookie: %zu\n", GsCookieSize); - LogSpew(LF_GCINFO, level, "PspSym: %zu\n", PspSymSize); - LogSpew(LF_GCINFO, level, "GenericsCtx: %zu\n", GenericsCtxSize); - LogSpew(LF_GCINFO, level, "StackBase: %zu\n", StackBaseSize); - LogSpew(LF_GCINFO, level, "FixedArea: %zu\n", FixedAreaSize); - LogSpew(LF_GCINFO, level, "ReversePInvokeFrame: %zu\n", ReversePInvokeFrameSize); - LogSpew(LF_GCINFO, level, "NumCallSites: %zu\n", NumCallSitesSize); - LogSpew(LF_GCINFO, level, "NumRanges: %zu\n", NumRangesSize); - LogSpew(LF_GCINFO, level, "CallSiteOffsets: %zu\n", CallSitePosSize); - LogSpew(LF_GCINFO, level, "Ranges: %zu\n", RangeSize); - LogSpew(LF_GCINFO, level, "NumRegs: %zu\n", NumRegsSize); - LogSpew(LF_GCINFO, level, "NumStack: %zu\n", NumStackSize); - LogSpew(LF_GCINFO, level, "RegSlots: %zu\n", RegSlotSize); - LogSpew(LF_GCINFO, level, "StackSlots: %zu\n", StackSlotSize); - LogSpew(LF_GCINFO, level, "CallSiteStates: %zu\n", CallSiteStateSize); - LogSpew(LF_GCINFO, level, "EhOffsets: %zu\n", EhPosSize); - LogSpew(LF_GCINFO, level, "EhStates: %zu\n", EhStateSize); - LogSpew(LF_GCINFO, level, "ChunkPointers: %zu\n", ChunkPtrSize); - LogSpew(LF_GCINFO, level, "ChunkMasks: %zu\n", ChunkMaskSize); - LogSpew(LF_GCINFO, level, "ChunkFinalStates: %zu\n", ChunkFinalStateSize); - LogSpew(LF_GCINFO, level, "Transitions: %zu\n", ChunkTransitionSize); + if (GCINFO_LOGSPEW(level, header)) + { + GCINFO_LOGSPEW( level, "---COUNTS---\n"); + GCINFO_LOGSPEW( level, "NumMethods: %zu\n", NumMethods); + GCINFO_LOGSPEW( level, "NumCallSites: %zu\n", NumCallSites); + GCINFO_LOGSPEW( level, "NumRanges: %zu\n", NumRanges); + GCINFO_LOGSPEW( level, "NumRegs: %zu\n", NumRegs); + GCINFO_LOGSPEW( level, "NumStack: %zu\n", NumStack); + GCINFO_LOGSPEW( level, "NumUntracked: %zu\n", NumUntracked); + GCINFO_LOGSPEW( level, "NumTransitions: %zu\n", NumTransitions); + GCINFO_LOGSPEW( level, "SizeOfCode: %zu\n", SizeOfCode); + GCINFO_LOGSPEW( level, "EncInfoSize: %zu\n", EncInfoSize); + + GCINFO_LOGSPEW( level, "---SIZES(bits)---\n"); + GCINFO_LOGSPEW( level, "Total: %zu\n", TotalSize); + GCINFO_LOGSPEW( level, "UntrackedSlot: %zu\n", UntrackedSlotSize); + GCINFO_LOGSPEW( level, "NumUntracked: %zu\n", NumUntrackedSize); + GCINFO_LOGSPEW( level, "Flags: %zu\n", FlagsSize); + GCINFO_LOGSPEW( level, "CodeLength: %zu\n", CodeLengthSize); + GCINFO_LOGSPEW( level, "Prolog/Epilog: %zu\n", ProEpilogSize); + GCINFO_LOGSPEW( level, "SecObj: %zu\n", SecObjSize); + GCINFO_LOGSPEW( level, "GsCookie: %zu\n", GsCookieSize); + GCINFO_LOGSPEW( level, "PspSym: %zu\n", PspSymSize); + GCINFO_LOGSPEW( level, "GenericsCtx: %zu\n", GenericsCtxSize); + GCINFO_LOGSPEW( level, "StackBase: %zu\n", StackBaseSize); + GCINFO_LOGSPEW( level, "FixedArea: %zu\n", FixedAreaSize); + GCINFO_LOGSPEW( level, "ReversePInvokeFrame: %zu\n", ReversePInvokeFrameSize); + GCINFO_LOGSPEW( level, "NumCallSites: %zu\n", NumCallSitesSize); + GCINFO_LOGSPEW( level, "NumRanges: %zu\n", NumRangesSize); + GCINFO_LOGSPEW( level, "CallSiteOffsets: %zu\n", CallSitePosSize); + GCINFO_LOGSPEW( level, "Ranges: %zu\n", RangeSize); + GCINFO_LOGSPEW( level, "NumRegs: %zu\n", NumRegsSize); + GCINFO_LOGSPEW( level, "NumStack: %zu\n", NumStackSize); + GCINFO_LOGSPEW( level, "RegSlots: %zu\n", RegSlotSize); + GCINFO_LOGSPEW( level, "StackSlots: %zu\n", StackSlotSize); + GCINFO_LOGSPEW( level, "CallSiteStates: %zu\n", CallSiteStateSize); + GCINFO_LOGSPEW( level, "EhOffsets: %zu\n", EhPosSize); + GCINFO_LOGSPEW( level, "EhStates: %zu\n", EhStateSize); + GCINFO_LOGSPEW( level, "ChunkPointers: %zu\n", ChunkPtrSize); + GCINFO_LOGSPEW( level, "ChunkMasks: %zu\n", ChunkMaskSize); + GCINFO_LOGSPEW( level, "ChunkFinalStates: %zu\n", ChunkFinalStateSize); + GCINFO_LOGSPEW( level, "Transitions: %zu\n", ChunkTransitionSize); } } #endif -GcInfoEncoder::GcInfoEncoder( +template TGcInfoEncoder::TGcInfoEncoder( ICorJitInfo* pCorJitInfo, CORINFO_METHOD_INFO* pMethodInfo, IAllocator* pJitAllocator, @@ -439,11 +455,6 @@ GcInfoEncoder::GcInfoEncoder( m_InterruptibleRanges( pJitAllocator ), m_LifetimeTransitions( pJitAllocator ) { -#ifdef MEASURE_GCINFO - // This causes multiple complus.log files in JIT64. TODO: consider using ICorJitInfo::logMsg instead. - InitializeLogging(); -#endif - _ASSERTE( pCorJitInfo != NULL ); _ASSERTE( pMethodInfo != NULL ); _ASSERTE( pJitAllocator != NULL ); @@ -468,7 +479,6 @@ GcInfoEncoder::GcInfoEncoder( m_GSCookieValidRangeStart = 0; _ASSERTE(sizeof(m_GSCookieValidRangeEnd) == sizeof(UINT32)); m_GSCookieValidRangeEnd = (UINT32) (-1); // == UINT32.MaxValue - m_PSPSymStackSlot = NO_PSP_SYM; m_GenericsInstContextStackSlot = NO_GENERICS_INST_CONTEXT; m_contextParamType = GENERIC_CONTEXTPARAM_NONE; @@ -498,7 +508,7 @@ GcInfoEncoder::GcInfoEncoder( } #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED -void GcInfoEncoder::DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UINT32 numCallSites) +template void TGcInfoEncoder::DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UINT32 numCallSites) { m_pCallSites = pCallSites; m_pCallSiteSizes = pCallSiteSizes; @@ -507,7 +517,7 @@ void GcInfoEncoder::DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UI for(UINT32 i=0; i 0); - _ASSERTE(DENORMALIZE_CODE_OFFSET(NORMALIZE_CODE_OFFSET(pCallSites[i])) == pCallSites[i]); + _ASSERTE(GcInfoEncoding::DENORMALIZE_CODE_OFFSET(GcInfoEncoding::NORMALIZE_CODE_OFFSET(pCallSites[i])) == pCallSites[i]); if(i > 0) { UINT32 prevEnd = pCallSites[i-1] + pCallSiteSizes[i-1]; @@ -519,7 +529,7 @@ void GcInfoEncoder::DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UI } #endif -GcSlotId GcInfoEncoder::GetRegisterSlotId( UINT32 regNum, GcSlotFlags flags ) +template GcSlotId TGcInfoEncoder::GetRegisterSlotId( UINT32 regNum, GcSlotFlags flags ) { // We could lookup an existing identical slot in the slot table (via some hashtable mechanism). // We just create duplicates for now. @@ -544,7 +554,7 @@ GcSlotId GcInfoEncoder::GetRegisterSlotId( UINT32 regNum, GcSlotFlags flags ) return newSlotId; } -GcSlotId GcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcStackSlotBase spBase ) +template GcSlotId TGcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcStackSlotBase spBase ) { // We could lookup an existing identical slot in the slot table (via some hashtable mechanism). // We just create duplicates for now. @@ -580,7 +590,7 @@ GcSlotId GcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcSta return newSlotId; } -void GcInfoEncoder::GrowSlotTable() +template void TGcInfoEncoder::GrowSlotTable() { m_SlotTableSize *= 2; GcSlotDesc* newSlotTable = (GcSlotDesc*) m_pAllocator->Alloc( m_SlotTableSize * sizeof(GcSlotDesc) ); @@ -593,7 +603,7 @@ void GcInfoEncoder::GrowSlotTable() m_SlotTable = newSlotTable; } -void GcInfoEncoder::WriteSlotStateVector(BitStreamWriter &writer, const BitArray& vector) +template void TGcInfoEncoder::WriteSlotStateVector(BitStreamWriter &writer, const BitArray& vector) { for(UINT32 i = 0; i < m_NumSlots && !m_SlotTable[i].IsUntracked(); i++) { @@ -604,12 +614,12 @@ void GcInfoEncoder::WriteSlotStateVector(BitStreamWriter &writer, const BitArray } } -void GcInfoEncoder::DefineInterruptibleRange( UINT32 startInstructionOffset, UINT32 length ) +template void TGcInfoEncoder::DefineInterruptibleRange( UINT32 startInstructionOffset, UINT32 length ) { UINT32 stopInstructionOffset = startInstructionOffset + length; - UINT32 normStartOffset = NORMALIZE_CODE_OFFSET(startInstructionOffset); - UINT32 normStopOffset = NORMALIZE_CODE_OFFSET(stopInstructionOffset); + UINT32 normStartOffset = GcInfoEncoding::NORMALIZE_CODE_OFFSET(startInstructionOffset); + UINT32 normStopOffset = GcInfoEncoding::NORMALIZE_CODE_OFFSET(stopInstructionOffset); // Ranges must not overlap and must be passed sorted by increasing offset _ASSERTE( @@ -636,7 +646,7 @@ void GcInfoEncoder::DefineInterruptibleRange( UINT32 startInstructionOffset, UIN } } - LOG((LF_GCINFO, LL_INFO1000000, "interruptible at %x length %x\n", startInstructionOffset, length)); + GCINFO_LOG( LL_INFO1000000, "interruptible at %x length %x\n", startInstructionOffset, length); } @@ -644,7 +654,7 @@ void GcInfoEncoder::DefineInterruptibleRange( UINT32 startInstructionOffset, UIN // // For inputs, pass zero as offset // -void GcInfoEncoder::SetSlotState( +template void TGcInfoEncoder::SetSlotState( UINT32 instructionOffset, GcSlotId slotId, GcSlotState slotState @@ -661,23 +671,23 @@ void GcInfoEncoder::SetSlotState( *( m_LifetimeTransitions.Append() ) = transition; - LOG((LF_GCINFO, LL_INFO1000000, LOG_GCSLOTDESC_FMT " %s at %x\n", LOG_GCSLOTDESC_ARGS(&m_SlotTable[slotId]), slotState == GC_SLOT_LIVE ? "live" : "dead", instructionOffset)); + GCINFO_LOG( LL_INFO1000000, LOG_GCSLOTDESC_FMT " %s at %x\n", LOG_GCSLOTDESC_ARGS(&m_SlotTable[slotId]), slotState == GC_SLOT_LIVE ? "live" : "dead", instructionOffset); } -void GcInfoEncoder::SetIsVarArg() +template void TGcInfoEncoder::SetIsVarArg() { m_IsVarArg = true; } -void GcInfoEncoder::SetCodeLength( UINT32 length ) +template void TGcInfoEncoder::SetCodeLength( UINT32 length ) { _ASSERTE( length > 0 ); _ASSERTE( m_CodeLength == 0 || m_CodeLength == length ); m_CodeLength = length; } -void GcInfoEncoder::SetPrologSize( UINT32 prologSize ) +template void TGcInfoEncoder::SetPrologSize( UINT32 prologSize ) { _ASSERTE(prologSize != 0); _ASSERTE(m_GSCookieValidRangeStart == 0 || m_GSCookieValidRangeStart == prologSize); @@ -688,7 +698,7 @@ void GcInfoEncoder::SetPrologSize( UINT32 prologSize ) m_GSCookieValidRangeEnd = prologSize+1; } -void GcInfoEncoder::SetGSCookieStackSlot( INT32 spOffsetGSCookie, UINT32 validRangeStart, UINT32 validRangeEnd ) +template void TGcInfoEncoder::SetGSCookieStackSlot( INT32 spOffsetGSCookie, UINT32 validRangeStart, UINT32 validRangeEnd ) { _ASSERTE( spOffsetGSCookie != NO_GS_COOKIE ); _ASSERTE( m_GSCookieStackSlot == NO_GS_COOKIE || m_GSCookieStackSlot == spOffsetGSCookie ); @@ -699,15 +709,7 @@ void GcInfoEncoder::SetGSCookieStackSlot( INT32 spOffsetGSCookie, UINT32 validRa m_GSCookieValidRangeEnd = validRangeEnd; } -void GcInfoEncoder::SetPSPSymStackSlot( INT32 spOffsetPSPSym ) -{ - _ASSERTE( spOffsetPSPSym != NO_PSP_SYM ); - _ASSERTE( m_PSPSymStackSlot == NO_PSP_SYM || m_PSPSymStackSlot == spOffsetPSPSym ); - - m_PSPSymStackSlot = spOffsetPSPSym; -} - -void GcInfoEncoder::SetGenericsInstContextStackSlot( INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type) +template void TGcInfoEncoder::SetGenericsInstContextStackSlot( INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type) { _ASSERTE( spOffsetGenericsContext != NO_GENERICS_INST_CONTEXT); _ASSERTE( m_GenericsInstContextStackSlot == NO_GENERICS_INST_CONTEXT || m_GenericsInstContextStackSlot == spOffsetGenericsContext ); @@ -716,10 +718,10 @@ void GcInfoEncoder::SetGenericsInstContextStackSlot( INT32 spOffsetGenericsConte m_contextParamType = type; } -void GcInfoEncoder::SetStackBaseRegister( UINT32 regNum ) +template void TGcInfoEncoder::SetStackBaseRegister( UINT32 regNum ) { _ASSERTE( regNum != NO_STACK_BASE_REGISTER ); - _ASSERTE(DENORMALIZE_STACK_BASE_REGISTER(NORMALIZE_STACK_BASE_REGISTER(regNum)) == regNum); + _ASSERTE(GcInfoEncoding::DENORMALIZE_STACK_BASE_REGISTER(GcInfoEncoding::NORMALIZE_STACK_BASE_REGISTER(regNum)) == regNum); _ASSERTE( m_StackBaseRegister == NO_STACK_BASE_REGISTER || m_StackBaseRegister == regNum ); #if defined(TARGET_LOONGARCH64) assert(regNum == 3 || 22 == regNum); @@ -729,7 +731,7 @@ void GcInfoEncoder::SetStackBaseRegister( UINT32 regNum ) m_StackBaseRegister = regNum; } -void GcInfoEncoder::SetSizeOfEditAndContinuePreservedArea( UINT32 slots ) +template void TGcInfoEncoder::SetSizeOfEditAndContinuePreservedArea( UINT32 slots ) { _ASSERTE( slots != NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA ); _ASSERTE( m_SizeOfEditAndContinuePreservedArea == NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA ); @@ -737,26 +739,26 @@ void GcInfoEncoder::SetSizeOfEditAndContinuePreservedArea( UINT32 slots ) } #ifdef TARGET_ARM64 -void GcInfoEncoder::SetSizeOfEditAndContinueFixedStackFrame( UINT32 size ) +template void TGcInfoEncoder::SetSizeOfEditAndContinueFixedStackFrame( UINT32 size ) { m_SizeOfEditAndContinueFixedStackFrame = size; } #endif #ifdef TARGET_AMD64 -void GcInfoEncoder::SetWantsReportOnlyLeaf() +template void TGcInfoEncoder::SetWantsReportOnlyLeaf() { m_WantsReportOnlyLeaf = true; } #elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) -void GcInfoEncoder::SetHasTailCalls() +template void TGcInfoEncoder::SetHasTailCalls() { m_HasTailCalls = true; } #endif // TARGET_AMD64 #ifdef FIXED_STACK_PARAMETER_SCRATCH_AREA -void GcInfoEncoder::SetSizeOfStackOutgoingAndScratchArea( UINT32 size ) +template void TGcInfoEncoder::SetSizeOfStackOutgoingAndScratchArea( UINT32 size ) { _ASSERTE( size != (UINT32)-1 ); _ASSERTE( m_SizeOfStackOutgoingAndScratchArea == (UINT32)-1 || m_SizeOfStackOutgoingAndScratchArea == size ); @@ -764,11 +766,12 @@ void GcInfoEncoder::SetSizeOfStackOutgoingAndScratchArea( UINT32 size ) } #endif // FIXED_STACK_PARAMETER_SCRATCH_AREA -void GcInfoEncoder::SetReversePInvokeFrameSlot(INT32 spOffset) +template void TGcInfoEncoder::SetReversePInvokeFrameSlot(INT32 spOffset) { m_ReversePInvokeFrameSlot = spOffset; } + struct GcSlotDescAndId { GcSlotDesc m_SlotDesc; @@ -819,7 +822,7 @@ struct CompareSlotDescAndIdBySlotDesc struct CompareLifetimeTransitionsByOffsetThenSlot { - bool operator()(const GcInfoEncoder::LifetimeTransition& first, const GcInfoEncoder::LifetimeTransition& second) + bool operator()(const GcInfoEncoderExt::LifetimeTransition& first, const GcInfoEncoderExt::LifetimeTransition& second) { UINT32 firstOffset = first.CodeOffset; UINT32 secondOffset = second.CodeOffset; @@ -837,12 +840,13 @@ struct CompareLifetimeTransitionsByOffsetThenSlot struct CompareLifetimeTransitionsBySlot { - bool operator()(const GcInfoEncoder::LifetimeTransition& first, const GcInfoEncoder::LifetimeTransition& second) + bool operator()(const GcInfoEncoderExt::LifetimeTransition& first, const GcInfoEncoderExt::LifetimeTransition& second) { UINT32 firstOffset = first.CodeOffset; UINT32 secondOffset = second.CodeOffset; - _ASSERTE(GetNormCodeOffsetChunk(firstOffset) == GetNormCodeOffsetChunk(secondOffset)); + // Interpreter-FIXME: GcInfoEncoding:: + // _ASSERTE(GetNormCodeOffsetChunk(firstOffset) == GetNormCodeOffsetChunk(secondOffset)); // Sort them by slot if( first.SlotId != second.SlotId) @@ -902,110 +906,20 @@ void BitStreamWriter::MemoryBlockList::Dispose(IAllocator* allocator) #endif } -void GcInfoEncoder::FinalizeSlotIds() + +template void TGcInfoEncoder::FinalizeSlotIds() { #ifdef _DEBUG m_IsSlotTableFrozen = TRUE; #endif } -#ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - -// tells whether a slot cannot contain an object reference -// at call instruction or right after returning -bool GcInfoEncoder::DoNotTrackInPartiallyInterruptible(GcSlotDesc &slotDesc) -{ -#if defined(TARGET_ARM) - - _ASSERTE( m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1 ); - if(slotDesc.IsRegister()) - { - int regNum = (int) slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 14); - _ASSERTE(regNum != 13); // sp - - return ((regNum <= 3) || (regNum >= 12)) // R12 is volatile and SP/LR can't contain objects around calls - && regNum != 0 // R0 can contain return value - ; - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#elif defined(TARGET_ARM64) - - _ASSERTE(m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1); - if (slotDesc.IsRegister()) - { - int regNum = (int)slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 30); - _ASSERTE(regNum != 18); - - return (regNum <= 17 || regNum >= 29) // X0 through X17 are scratch, FP/LR can't be used for objects around calls - && regNum != 0 // X0 can contain return value - && regNum != 1 // X1 can contain return value - ; - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#elif defined(TARGET_AMD64) - - _ASSERTE( m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1 ); - if(slotDesc.IsRegister()) - { - int regNum = (int) slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 16); - _ASSERTE(regNum != 4); // rsp - - UINT16 PreservedRegMask = - (1 << 3) // rbx - | (1 << 5) // rbp -#ifndef UNIX_AMD64_ABI - | (1 << 6) // rsi - | (1 << 7) // rdi -#endif // UNIX_AMD64_ABI - | (1 << 12) // r12 - | (1 << 13) // r13 - | (1 << 14) // r14 - | (1 << 15) // r15 - | (1 << 0) // rax - may contain return value -#ifdef UNIX_AMD64_ABI - | (1 << 2) // rdx - may contain return value -#endif - ; - - return !(PreservedRegMask & (1 << regNum)); - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#else - return FALSE; -#endif -} -#endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - -void GcInfoEncoder::Build() +template void TGcInfoEncoder::Build() { #ifdef _DEBUG _ASSERTE(m_IsSlotTableFrozen || m_NumSlots == 0); - _ASSERTE((1 << NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2) == NUM_NORM_CODE_OFFSETS_PER_CHUNK); + _ASSERTE((1 << GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2) == GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK); char methodName[256]; m_pCorJitInfo->printMethodName(m_pMethodInfo->ftn, methodName, sizeof(methodName)); @@ -1013,9 +927,9 @@ void GcInfoEncoder::Build() char className[256]; m_pCorJitInfo->printClassName(m_pCorJitInfo->getMethodClass(m_pMethodInfo->ftn), className, sizeof(className)); - LOG((LF_GCINFO, LL_INFO100, + GCINFO_LOG( LL_INFO100, "Entering GcInfoEncoder::Build() for method %s:%s\n", - className, methodName)); + className, methodName); #endif @@ -1028,9 +942,9 @@ void GcInfoEncoder::Build() UINT32 hasContextParamType = (m_GenericsInstContextStackSlot != NO_GENERICS_INST_CONTEXT); UINT32 hasReversePInvokeFrame = (m_ReversePInvokeFrameSlot != NO_REVERSE_PINVOKE_FRAME); - BOOL slimHeader = (!m_IsVarArg && !hasGSCookie && (m_PSPSymStackSlot == NO_PSP_SYM) && + BOOL slimHeader = (!m_IsVarArg && !hasGSCookie && !hasContextParamType && (m_InterruptibleRanges.Count() == 0) && !hasReversePInvokeFrame && - ((m_StackBaseRegister == NO_STACK_BASE_REGISTER) || (NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister) == 0))) && + ((m_StackBaseRegister == NO_STACK_BASE_REGISTER) || (GcInfoEncoding::NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister) == 0))) && #ifdef TARGET_AMD64 !m_WantsReportOnlyLeaf && #elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) @@ -1057,7 +971,7 @@ void GcInfoEncoder::Build() GCINFO_WRITE(m_Info1, (m_IsVarArg ? 1 : 0), 1, FlagsSize); GCINFO_WRITE(m_Info1, 0 /* unused - was hasSecurityObject */, 1, FlagsSize); GCINFO_WRITE(m_Info1, (hasGSCookie ? 1 : 0), 1, FlagsSize); - GCINFO_WRITE(m_Info1, ((m_PSPSymStackSlot != NO_PSP_SYM) ? 1 : 0), 1, FlagsSize); + GCINFO_WRITE(m_Info1, 0 /* unused - was hasPSPSymStackSlot */, 1, FlagsSize); GCINFO_WRITE(m_Info1, m_contextParamType, 2, FlagsSize); #if defined(TARGET_LOONGARCH64) assert(m_StackBaseRegister == 22 || 3 == m_StackBaseRegister); @@ -1075,8 +989,8 @@ void GcInfoEncoder::Build() } _ASSERTE( m_CodeLength > 0 ); - _ASSERTE(DENORMALIZE_CODE_LENGTH(NORMALIZE_CODE_LENGTH(m_CodeLength)) == m_CodeLength); - GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_CODE_LENGTH(m_CodeLength), CODE_LENGTH_ENCBASE, CodeLengthSize); + _ASSERTE(GcInfoEncoding::DENORMALIZE_CODE_LENGTH(GcInfoEncoding::NORMALIZE_CODE_LENGTH(m_CodeLength)) == m_CodeLength); + GCINFO_WRITE_VARL_U(m_Info1, GcInfoEncoding::NORMALIZE_CODE_LENGTH(m_CodeLength), GcInfoEncoding::CODE_LENGTH_ENCBASE, CodeLengthSize); if(hasGSCookie) { @@ -1090,13 +1004,13 @@ void GcInfoEncoder::Build() _ASSERTE(intersectionStart > 0 && intersectionStart < m_CodeLength); _ASSERTE(intersectionEnd > 0 && intersectionEnd <= m_CodeLength); _ASSERTE(intersectionStart <= intersectionEnd); - UINT32 normPrologSize = NORMALIZE_CODE_OFFSET(intersectionStart); - UINT32 normEpilogSize = NORMALIZE_CODE_OFFSET(m_CodeLength) - NORMALIZE_CODE_OFFSET(intersectionEnd); + UINT32 normPrologSize = GcInfoEncoding::NORMALIZE_CODE_OFFSET(intersectionStart); + UINT32 normEpilogSize = GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_CodeLength) - GcInfoEncoding::NORMALIZE_CODE_OFFSET(intersectionEnd); _ASSERTE(normPrologSize > 0 && normPrologSize < m_CodeLength); _ASSERTE(normEpilogSize < m_CodeLength); - GCINFO_WRITE_VARL_U(m_Info1, normPrologSize-1, NORM_PROLOG_SIZE_ENCBASE, ProEpilogSize); - GCINFO_WRITE_VARL_U(m_Info1, normEpilogSize, NORM_EPILOG_SIZE_ENCBASE, ProEpilogSize); + GCINFO_WRITE_VARL_U(m_Info1, normPrologSize-1, GcInfoEncoding::NORM_PROLOG_SIZE_ENCBASE, ProEpilogSize); + GCINFO_WRITE_VARL_U(m_Info1, normEpilogSize, GcInfoEncoding::NORM_EPILOG_SIZE_ENCBASE, ProEpilogSize); } else if (hasContextParamType) { @@ -1104,10 +1018,10 @@ void GcInfoEncoder::Build() // Save the prolog size, to be used for determining when it is not safe // to report generics param context and the security object _ASSERTE(m_GSCookieValidRangeStart > 0 && m_GSCookieValidRangeStart < m_CodeLength); - UINT32 normPrologSize = NORMALIZE_CODE_OFFSET(m_GSCookieValidRangeStart); + UINT32 normPrologSize = GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_GSCookieValidRangeStart); _ASSERTE(normPrologSize > 0 && normPrologSize < m_CodeLength); - GCINFO_WRITE_VARL_U(m_Info1, normPrologSize-1, NORM_PROLOG_SIZE_ENCBASE, ProEpilogSize); + GCINFO_WRITE_VARL_U(m_Info1, normPrologSize-1, GcInfoEncoding::NORM_PROLOG_SIZE_ENCBASE, ProEpilogSize); } // Encode the offset to the GS cookie. @@ -1115,24 +1029,13 @@ void GcInfoEncoder::Build() { _ASSERTE(!slimHeader); #ifdef _DEBUG - LOG((LF_GCINFO, LL_INFO1000, "GS cookie at " FMT_STK "\n", + GCINFO_LOG( LL_INFO1000, "GS cookie at " FMT_STK "\n", DBG_STK(m_GSCookieStackSlot) - )); + ); #endif - GCINFO_WRITE_VARL_S(m_Info1, NORMALIZE_STACK_SLOT(m_GSCookieStackSlot), GS_COOKIE_STACK_SLOT_ENCBASE, GsCookieSize); - - } + GCINFO_WRITE_VARL_S(m_Info1, GcInfoEncoding::NORMALIZE_STACK_SLOT(m_GSCookieStackSlot), GcInfoEncoding::GS_COOKIE_STACK_SLOT_ENCBASE, GsCookieSize); - // Encode the offset to the PSPSym. - // The PSPSym is relative to the caller SP on IA64 and the initial stack pointer before stack allocations on X64. - if(m_PSPSymStackSlot != NO_PSP_SYM) - { - _ASSERTE(!slimHeader); -#ifdef _DEBUG - LOG((LF_GCINFO, LL_INFO1000, "Parent PSP at " FMT_STK "\n", DBG_STK(m_PSPSymStackSlot))); -#endif - GCINFO_WRITE_VARL_S(m_Info1, NORMALIZE_STACK_SLOT(m_PSPSymStackSlot), PSP_SYM_STACK_SLOT_ENCBASE, PspSymSize); } // Encode the offset to the generics type context. @@ -1140,11 +1043,11 @@ void GcInfoEncoder::Build() { _ASSERTE(!slimHeader); #ifdef _DEBUG - LOG((LF_GCINFO, LL_INFO1000, "Generics instantiation context at " FMT_STK "\n", + GCINFO_LOG( LL_INFO1000, "Generics instantiation context at " FMT_STK "\n", DBG_STK(m_GenericsInstContextStackSlot) - )); + ); #endif - GCINFO_WRITE_VARL_S(m_Info1, NORMALIZE_STACK_SLOT(m_GenericsInstContextStackSlot), GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE, GenericsCtxSize); + GCINFO_WRITE_VARL_S(m_Info1, GcInfoEncoding::NORMALIZE_STACK_SLOT(m_GenericsInstContextStackSlot), GcInfoEncoding::GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE, GenericsCtxSize); } if(!slimHeader && (m_StackBaseRegister != NO_STACK_BASE_REGISTER)) @@ -1154,28 +1057,28 @@ void GcInfoEncoder::Build() #elif defined(TARGET_RISCV64) assert(m_StackBaseRegister == 8 || 2 == m_StackBaseRegister); #endif - GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister), STACK_BASE_REGISTER_ENCBASE, StackBaseSize); + GCINFO_WRITE_VARL_U(m_Info1, GcInfoEncoding::NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister), GcInfoEncoding::STACK_BASE_REGISTER_ENCBASE, StackBaseSize); } if (m_SizeOfEditAndContinuePreservedArea != NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA) { - GCINFO_WRITE_VARL_U(m_Info1, m_SizeOfEditAndContinuePreservedArea, SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE, EncInfoSize); + GCINFO_WRITE_VARL_U(m_Info1, m_SizeOfEditAndContinuePreservedArea, GcInfoEncoding::SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE, EncInfoSize); #ifdef TARGET_ARM64 - GCINFO_WRITE_VARL_U(m_Info1, m_SizeOfEditAndContinueFixedStackFrame, SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE, EncInfoSize); + GCINFO_WRITE_VARL_U(m_Info1, m_SizeOfEditAndContinueFixedStackFrame, GcInfoEncoding::SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE, EncInfoSize); #endif } if (hasReversePInvokeFrame) { _ASSERTE(!slimHeader); - GCINFO_WRITE_VARL_S(m_Info1, NORMALIZE_STACK_SLOT(m_ReversePInvokeFrameSlot), REVERSE_PINVOKE_FRAME_ENCBASE, ReversePInvokeFrameSize); + GCINFO_WRITE_VARL_S(m_Info1, GcInfoEncoding::NORMALIZE_STACK_SLOT(m_ReversePInvokeFrameSlot), GcInfoEncoding::REVERSE_PINVOKE_FRAME_ENCBASE, ReversePInvokeFrameSize); } #ifdef FIXED_STACK_PARAMETER_SCRATCH_AREA if (!slimHeader) { _ASSERTE( m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1 ); - GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_SIZE_OF_STACK_AREA(m_SizeOfStackOutgoingAndScratchArea), SIZE_OF_STACK_AREA_ENCBASE, FixedAreaSize); + GCINFO_WRITE_VARL_U(m_Info1, GcInfoEncoding::NORMALIZE_SIZE_OF_STACK_AREA(m_SizeOfStackOutgoingAndScratchArea), GcInfoEncoding::SIZE_OF_STACK_AREA_ENCBASE, FixedAreaSize); } #endif // FIXED_STACK_PARAMETER_SCRATCH_AREA @@ -1208,12 +1111,12 @@ void GcInfoEncoder::Build() UINT32 callSite = m_pCallSites[callSiteIndex]; callSite += m_pCallSiteSizes[callSiteIndex]; - _ASSERTE(DENORMALIZE_CODE_OFFSET(NORMALIZE_CODE_OFFSET(callSite)) == callSite); - UINT32 normOffset = NORMALIZE_CODE_OFFSET(callSite); + _ASSERTE(GcInfoEncoding::DENORMALIZE_CODE_OFFSET(GcInfoEncoding::NORMALIZE_CODE_OFFSET(callSite)) == callSite); + UINT32 normOffset = GcInfoEncoding::NORMALIZE_CODE_OFFSET(callSite); m_pCallSites[numCallSites++] = normOffset; } - GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_NUM_SAFE_POINTS(numCallSites), NUM_SAFE_POINTS_ENCBASE, NumCallSitesSize); + GCINFO_WRITE_VARL_U(m_Info1, numCallSites, GcInfoEncoding::NUM_SAFE_POINTS_ENCBASE, NumCallSitesSize); m_NumCallSites = numCallSites; #endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED @@ -1223,7 +1126,7 @@ void GcInfoEncoder::Build() } else { - GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_NUM_INTERRUPTIBLE_RANGES(numInterruptibleRanges), NUM_INTERRUPTIBLE_RANGES_ENCBASE, NumRangesSize); + GCINFO_WRITE_VARL_U(m_Info1, numInterruptibleRanges, GcInfoEncoding::NUM_INTERRUPTIBLE_RANGES_ENCBASE, NumRangesSize); } @@ -1233,7 +1136,7 @@ void GcInfoEncoder::Build() // Encode call site offsets /////////////////////////////////////////////////////////////////////// - UINT32 numBitsPerOffset = CeilOfLog2(NORMALIZE_CODE_OFFSET(m_CodeLength)); + UINT32 numBitsPerOffset = CeilOfLog2(GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_CodeLength)); for(UINT32 callSiteIndex = 0; callSiteIndex < m_NumCallSites; callSiteIndex++) { @@ -1264,9 +1167,9 @@ void GcInfoEncoder::Build() lastStopOffset = normStopOffset; - GCINFO_WRITE_VARL_U(m_Info1, normStartDelta, INTERRUPTIBLE_RANGE_DELTA1_ENCBASE, RangeSize); + GCINFO_WRITE_VARL_U(m_Info1, normStartDelta, GcInfoEncoding::INTERRUPTIBLE_RANGE_DELTA1_ENCBASE, RangeSize); - GCINFO_WRITE_VARL_U(m_Info1, normStopDelta-1, INTERRUPTIBLE_RANGE_DELTA2_ENCBASE, RangeSize); + GCINFO_WRITE_VARL_U(m_Info1, normStopDelta-1, GcInfoEncoding::INTERRUPTIBLE_RANGE_DELTA2_ENCBASE, RangeSize); } } @@ -1351,14 +1254,14 @@ void GcInfoEncoder::Build() #endif } -#if CODE_OFFSETS_NEED_NORMALIZATION - // Do a pass to normalize transition offsets - for(pCurrent = pTransitions; pCurrent < pEndTransitions; pCurrent++) - { - _ASSERTE(pCurrent->CodeOffset <= m_CodeLength); - pCurrent->CodeOffset = NORMALIZE_CODE_OFFSET(pCurrent->CodeOffset); + if (GcInfoEncoding::CODE_OFFSETS_NEED_NORMALIZATION) { + // Do a pass to normalize transition offsets + for(pCurrent = pTransitions; pCurrent < pEndTransitions; pCurrent++) + { + _ASSERTE(pCurrent->CodeOffset <= m_CodeLength); + pCurrent->CodeOffset = GcInfoEncoding::NORMALIZE_CODE_OFFSET(pCurrent->CodeOffset); + } } -#endif /////////////////////////////////////////////////////////////////// // Find out which slots are really used @@ -1389,14 +1292,11 @@ void GcInfoEncoder::Build() else { UINT32 slotIndex = pCurrent->SlotId; - if(!DoNotTrackInPartiallyInterruptible(m_SlotTable[slotIndex])) - { - BYTE becomesLive = pCurrent->BecomesLive; - _ASSERTE((liveState.ReadBit(slotIndex) && !becomesLive) - || (!liveState.ReadBit(slotIndex) && becomesLive)); + BYTE becomesLive = pCurrent->BecomesLive; + _ASSERTE((liveState.ReadBit(slotIndex) && !becomesLive) + || (!liveState.ReadBit(slotIndex) && becomesLive)); - liveState.WriteBit(slotIndex, becomesLive); - } + liveState.WriteBit(slotIndex, becomesLive); pCurrent++; } } @@ -1538,7 +1438,7 @@ void GcInfoEncoder::Build() if (numRegisters) { GCINFO_WRITE(m_Info1, 1, 1, FlagsSize); - GCINFO_WRITE_VARL_U(m_Info1, numRegisters, NUM_REGISTERS_ENCBASE, NumRegsSize); + GCINFO_WRITE_VARL_U(m_Info1, numRegisters, GcInfoEncoding::NUM_REGISTERS_ENCBASE, NumRegsSize); } else { @@ -1547,8 +1447,8 @@ void GcInfoEncoder::Build() if (numStackSlots || numUntrackedSlots) { GCINFO_WRITE(m_Info1, 1, 1, FlagsSize); - GCINFO_WRITE_VARL_U(m_Info1, numStackSlots, NUM_STACK_SLOTS_ENCBASE, NumStackSize); - GCINFO_WRITE_VARL_U(m_Info1, numUntrackedSlots, NUM_UNTRACKED_SLOTS_ENCBASE, NumUntrackedSize); + GCINFO_WRITE_VARL_U(m_Info1, numStackSlots, GcInfoEncoding::NUM_STACK_SLOTS_ENCBASE, NumStackSize); + GCINFO_WRITE_VARL_U(m_Info1, numUntrackedSlots, GcInfoEncoding::NUM_UNTRACKED_SLOTS_ENCBASE, NumUntrackedSize); } else { @@ -1572,8 +1472,8 @@ void GcInfoEncoder::Build() _ASSERTE(pSlotDesc->IsRegister()); // Encode slot identification - UINT32 currentNormRegNum = NORMALIZE_REGISTER(pSlotDesc->Slot.RegisterNumber); - GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum, REGISTER_ENCBASE, RegSlotSize); + UINT32 currentNormRegNum = pSlotDesc->Slot.RegisterNumber; + GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum, GcInfoEncoding::REGISTER_ENCBASE, RegSlotSize); GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, RegSlotSize); for(UINT32 j = 1; j < numRegisters; j++) @@ -1589,17 +1489,17 @@ void GcInfoEncoder::Build() while(pSlotDesc->IsDeleted()); _ASSERTE(pSlotDesc->IsRegister()); - currentNormRegNum = NORMALIZE_REGISTER(pSlotDesc->Slot.RegisterNumber); + currentNormRegNum = pSlotDesc->Slot.RegisterNumber; if(lastFlags != GC_SLOT_IS_REGISTER) { - GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum, REGISTER_ENCBASE, RegSlotSize); + GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum, GcInfoEncoding::REGISTER_ENCBASE, RegSlotSize); GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, RegSlotSize); } else { _ASSERTE(pSlotDesc->Flags == GC_SLOT_IS_REGISTER); - GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum - lastNormRegNum - 1, REGISTER_DELTA_ENCBASE, RegSlotSize); + GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum - lastNormRegNum - 1, GcInfoEncoding::REGISTER_DELTA_ENCBASE, RegSlotSize); } } } @@ -1619,8 +1519,8 @@ void GcInfoEncoder::Build() // Encode slot identification _ASSERTE((pSlotDesc->Slot.Stack.Base & ~3) == 0); GCINFO_WRITE(m_Info1, pSlotDesc->Slot.Stack.Base, 2, StackSlotSize); - INT32 currentNormStackSlot = NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset); - GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, STACK_SLOT_ENCBASE, StackSlotSize); + INT32 currentNormStackSlot = GcInfoEncoding::NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset); + GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, GcInfoEncoding::STACK_SLOT_ENCBASE, StackSlotSize); GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, StackSlotSize); @@ -1638,20 +1538,20 @@ void GcInfoEncoder::Build() _ASSERTE(!pSlotDesc->IsRegister()); _ASSERTE(!pSlotDesc->IsUntracked()); - currentNormStackSlot = NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset); + currentNormStackSlot = GcInfoEncoding::NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset); _ASSERTE((pSlotDesc->Slot.Stack.Base & ~3) == 0); GCINFO_WRITE(m_Info1, pSlotDesc->Slot.Stack.Base, 2, StackSlotSize); if(lastFlags != GC_SLOT_BASE) { - GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, STACK_SLOT_ENCBASE, StackSlotSize); + GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, GcInfoEncoding::STACK_SLOT_ENCBASE, StackSlotSize); GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, StackSlotSize); } else { _ASSERTE(pSlotDesc->Flags == GC_SLOT_BASE); - GCINFO_WRITE_VARL_U(m_Info1, currentNormStackSlot - lastNormStackSlot, STACK_SLOT_DELTA_ENCBASE, StackSlotSize); + GCINFO_WRITE_VARL_U(m_Info1, currentNormStackSlot - lastNormStackSlot, GcInfoEncoding::STACK_SLOT_DELTA_ENCBASE, StackSlotSize); } } } @@ -1671,8 +1571,8 @@ void GcInfoEncoder::Build() // Encode slot identification _ASSERTE((pSlotDesc->Slot.Stack.Base & ~3) == 0); GCINFO_WRITE(m_Info1, pSlotDesc->Slot.Stack.Base, 2, UntrackedSlotSize); - INT32 currentNormStackSlot = NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset); - GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, STACK_SLOT_ENCBASE, UntrackedSlotSize); + INT32 currentNormStackSlot = GcInfoEncoding::NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset); + GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, GcInfoEncoding::STACK_SLOT_ENCBASE, UntrackedSlotSize); GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, UntrackedSlotSize); @@ -1690,20 +1590,20 @@ void GcInfoEncoder::Build() _ASSERTE(!pSlotDesc->IsRegister()); _ASSERTE(pSlotDesc->IsUntracked()); - currentNormStackSlot = NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset); + currentNormStackSlot = GcInfoEncoding::NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset); _ASSERTE((pSlotDesc->Slot.Stack.Base & ~3) == 0); GCINFO_WRITE(m_Info1, pSlotDesc->Slot.Stack.Base, 2, UntrackedSlotSize); if(lastFlags != GC_SLOT_UNTRACKED) { - GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, STACK_SLOT_ENCBASE, UntrackedSlotSize); + GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, GcInfoEncoding::STACK_SLOT_ENCBASE, UntrackedSlotSize); GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, UntrackedSlotSize); } else { _ASSERTE(pSlotDesc->Flags == GC_SLOT_UNTRACKED); - GCINFO_WRITE_VARL_U(m_Info1, currentNormStackSlot - lastNormStackSlot, STACK_SLOT_DELTA_ENCBASE, UntrackedSlotSize); + GCINFO_WRITE_VARL_U(m_Info1, currentNormStackSlot - lastNormStackSlot, GcInfoEncoding::STACK_SLOT_DELTA_ENCBASE, UntrackedSlotSize); } } } @@ -1795,13 +1695,13 @@ void GcInfoEncoder::Build() for (LiveStateHashTable::KeyIterator iter = hashMap.Begin(), end = hashMap.End(); !iter.Equal(end); iter.Next()) { largestSetOffset = sizeofSets; - sizeofSets += SizeofSlotStateVarLengthVector(*iter.Get(), LIVESTATE_RLE_SKIP_ENCBASE, LIVESTATE_RLE_RUN_ENCBASE); + sizeofSets += SizeofSlotStateVarLengthVector(*iter.Get(), GcInfoEncoding::LIVESTATE_RLE_SKIP_ENCBASE, GcInfoEncoding::LIVESTATE_RLE_RUN_ENCBASE); } // Now that we know the largest offset, we can figure out how much the indirection // will cost us and commit UINT32 numBitsPerPointer = ((largestSetOffset < 2) ? 1 : CeilOfLog2(largestSetOffset + 1)); - const size_t sizeofEncodedNumBitsPerPointer = BitStreamWriter::SizeofVarLengthUnsigned(numBitsPerPointer, POINTER_SIZE_ENCBASE); + const size_t sizeofEncodedNumBitsPerPointer = BitStreamWriter::SizeofVarLengthUnsigned(numBitsPerPointer, GcInfoEncoding::POINTER_SIZE_ENCBASE); const size_t sizeofNoIndirection = m_NumCallSites * (numRegisters + numStackSlots); const size_t sizeofIndirection = sizeofEncodedNumBitsPerPointer // Encode the pointer sizes + (m_NumCallSites * numBitsPerPointer) // Encode the pointers @@ -1817,14 +1717,14 @@ void GcInfoEncoder::Build() { // we are using an indirection GCINFO_WRITE(m_Info1, 1, 1, FlagsSize); - GCINFO_WRITE_VARL_U(m_Info1, numBitsPerPointer - 1, POINTER_SIZE_ENCBASE, CallSiteStateSize); + GCINFO_WRITE_VARL_U(m_Info1, numBitsPerPointer - 1, GcInfoEncoding::POINTER_SIZE_ENCBASE, CallSiteStateSize); // Now encode the live sets and record the real offset for (LiveStateHashTable::KeyIterator iter = hashMap.Begin(), end = hashMap.End(); !iter.Equal(end); iter.Next()) { _ASSERTE(FitsIn(m_Info2.GetBitCount())); iter.SetValue((UINT32)m_Info2.GetBitCount()); - GCINFO_WRITE_VAR_VECTOR(m_Info2, *iter.Get(), LIVESTATE_RLE_SKIP_ENCBASE, LIVESTATE_RLE_RUN_ENCBASE, CallSiteStateSize); + GCINFO_WRITE_VAR_VECTOR(m_Info2, *iter.Get(), GcInfoEncoding::LIVESTATE_RLE_SKIP_ENCBASE, GcInfoEncoding::LIVESTATE_RLE_RUN_ENCBASE, CallSiteStateSize); } _ASSERTE(sizeofSets == m_Info2.GetBitCount()); @@ -1937,7 +1837,7 @@ void GcInfoEncoder::Build() InterruptibleRange *pRange = &pRanges[i]; totalInterruptibleLength += pRange->NormStopOffset - pRange->NormStartOffset; } - _ASSERTE(totalInterruptibleLength <= NORMALIZE_CODE_OFFSET(m_CodeLength)); + _ASSERTE(totalInterruptibleLength <= GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_CodeLength)); liveState.ClearAll(); // Re-use couldBeLive @@ -2038,14 +1938,14 @@ void GcInfoEncoder::Build() pEndTransitions = pNextFree; #else - UINT32 totalInterruptibleLength = NORMALIZE_CODE_OFFSET(m_CodeLength); + UINT32 totalInterruptibleLength = GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_CodeLength); #endif //PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED // // Initialize chunk pointers // - UINT32 numChunks = (totalInterruptibleLength + NUM_NORM_CODE_OFFSETS_PER_CHUNK - 1) / NUM_NORM_CODE_OFFSETS_PER_CHUNK; + UINT32 numChunks = (totalInterruptibleLength + GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK - 1) / GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK; _ASSERTE(numChunks > 0); size_t* pChunkPointers = (size_t*) m_pAllocator->Alloc(numChunks*sizeof(size_t)); @@ -2055,7 +1955,7 @@ void GcInfoEncoder::Build() // Encode transitions //------------------------------------------------------------------ - LOG((LF_GCINFO, LL_INFO1000, "Encoding %i lifetime transitions.\n", pEndTransitions - pTransitions)); + GCINFO_LOG( LL_INFO1000, "Encoding %i lifetime transitions.\n", pEndTransitions - pTransitions); liveState.ClearAll(); @@ -2102,13 +2002,13 @@ void GcInfoEncoder::Build() pChunkPointers[currentChunk] = m_Info2.GetBitCount() + 1; // Write couldBeLive slot map - GCINFO_WRITE_VAR_VECTOR(m_Info2, couldBeLive, LIVESTATE_RLE_SKIP_ENCBASE, LIVESTATE_RLE_RUN_ENCBASE, ChunkMaskSize); + GCINFO_WRITE_VAR_VECTOR(m_Info2, couldBeLive, GcInfoEncoding::LIVESTATE_RLE_SKIP_ENCBASE, GcInfoEncoding::LIVESTATE_RLE_RUN_ENCBASE, ChunkMaskSize); - LOG((LF_GCINFO, LL_INFO100000, + GCINFO_LOG( LL_INFO100000, "Chunk %d couldBeLive (%04x-%04x):\n", currentChunk, - currentChunk * NUM_NORM_CODE_OFFSETS_PER_CHUNK, - ((currentChunk + 1) * NUM_NORM_CODE_OFFSETS_PER_CHUNK) - 1 - )); + currentChunk * GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK, + ((currentChunk + 1) * GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK) - 1 + ); // Write final state // For all the bits set in couldBeLive. @@ -2125,15 +2025,15 @@ void GcInfoEncoder::Build() ChunkFinalStateSize ); - LOG((LF_GCINFO, LL_INFO100000, + GCINFO_LOG( LL_INFO100000, "\t" LOG_GCSLOTDESC_FMT " %s at end of chunk.\n", LOG_GCSLOTDESC_ARGS(&m_SlotTable[i]), - liveState.ReadBit(i) ? "live" : "dead")); + liveState.ReadBit(i) ? "live" : "dead"); } } // Write transitions offsets - UINT32 normChunkBaseCodeOffset = currentChunk * NUM_NORM_CODE_OFFSETS_PER_CHUNK; + UINT32 normChunkBaseCodeOffset = currentChunk * GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK; LifetimeTransition* pT = pCurrent - numTransitionsInCurrentChunk; @@ -2149,11 +2049,11 @@ void GcInfoEncoder::Build() _ASSERTE(couldBeLive.ReadBit(slotId)); - LOG((LF_GCINFO, LL_INFO100000, + GCINFO_LOG( LL_INFO100000, "\tTransition " LOG_GCSLOTDESC_FMT " going %s at offset %04x.\n", LOG_GCSLOTDESC_ARGS(&m_SlotTable[pT->SlotId]), pT->BecomesLive ? "live" : "dead", - (int) pT->CodeOffset )); + (int) pT->CodeOffset ); // Write code offset delta UINT32 normCodeOffset = pT->CodeOffset; @@ -2162,10 +2062,10 @@ void GcInfoEncoder::Build() // Don't encode transitions at offset 0 as they are useless if(normCodeOffsetDelta) { - _ASSERTE(normCodeOffsetDelta < NUM_NORM_CODE_OFFSETS_PER_CHUNK); + _ASSERTE(normCodeOffsetDelta < GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK); GCINFO_WRITE(m_Info2, 1, 1, ChunkTransitionSize); - GCINFO_WRITE(m_Info2, normCodeOffsetDelta, NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2, ChunkTransitionSize); + GCINFO_WRITE(m_Info2, normCodeOffsetDelta, GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2, ChunkTransitionSize); #ifdef MEASURE_GCINFO m_CurrentMethodSize.NumTransitions++; @@ -2199,7 +2099,7 @@ void GcInfoEncoder::Build() } UINT32 numBitsPerPointer = CeilOfLog2(largestPointer + 1); - GCINFO_WRITE_VARL_U(m_Info1, numBitsPerPointer, POINTER_SIZE_ENCBASE, ChunkPtrSize); + GCINFO_WRITE_VARL_U(m_Info1, numBitsPerPointer, GcInfoEncoding::POINTER_SIZE_ENCBASE, ChunkPtrSize); if(numBitsPerPointer) { @@ -2262,12 +2162,12 @@ lExitSuccess:; m_CurrentMethodSize.Log(LL_INFO100, "=== PartiallyInterruptible method breakdown ===\r\n"); g_PiGcInfoSize.Log(LL_INFO10, "=== PartiallyInterruptible global breakdown ===\r\n"); } - LogSpew(LF_GCINFO, LL_INFO10, "Total SlimHeaders: %zu\n", g_NumSlimHeaders); - LogSpew(LF_GCINFO, LL_INFO10, "NumMethods: %zu\n", g_NumFatHeaders); + GCINFO_LOGSPEW( LL_INFO10, "Total SlimHeaders: %zu\n", g_NumSlimHeaders); + GCINFO_LOGSPEW( LL_INFO10, "NumMethods: %zu\n", g_NumFatHeaders); #endif } -void GcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector, +template void TGcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector, UINT32 baseSkip, UINT32 baseRun, UINT32 *pSizeofSimple, @@ -2342,7 +2242,7 @@ void GcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector, *pSizeofRLENeg = sizeofRLENeg; } -UINT32 GcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector, +template UINT32 TGcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector, UINT32 baseSkip, UINT32 baseRun) { @@ -2359,7 +2259,7 @@ UINT32 GcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector, return sizeofRLENeg; } -UINT32 GcInfoEncoder::WriteSlotStateVarLengthVector(BitStreamWriter &writer, +template UINT32 TGcInfoEncoder::WriteSlotStateVarLengthVector(BitStreamWriter &writer, const BitArray &vector, UINT32 baseSkip, UINT32 baseRun) @@ -2449,7 +2349,7 @@ UINT32 GcInfoEncoder::WriteSlotStateVarLengthVector(BitStreamWriter &writer, } -void GcInfoEncoder::EliminateRedundantLiveDeadPairs(LifetimeTransition** ppTransitions, +template void TGcInfoEncoder::EliminateRedundantLiveDeadPairs(LifetimeTransition** ppTransitions, size_t* pNumTransitions, LifetimeTransition** ppEndTransitions) { @@ -2504,12 +2404,12 @@ void GcInfoEncoder::EliminateRedundantLiveDeadPairs(LifetimeTransition** ppTrans // Write encoded information to its final destination and frees temporary buffers. // The encoder shouldn't be used anymore after calling this method. // -BYTE* GcInfoEncoder::Emit() +template BYTE* TGcInfoEncoder::Emit() { size_t cbGcInfoSize = m_Info1.GetByteCount() + m_Info2.GetByteCount(); - LOG((LF_GCINFO, LL_INFO100, "GcInfoEncoder::Emit(): Size of GC info is %u bytes, code size %u bytes.\n", (unsigned)cbGcInfoSize, m_CodeLength )); + GCINFO_LOG( LL_INFO100, "GcInfoEncoder::Emit(): Size of GC info is %u bytes, code size %u bytes.\n", (unsigned)cbGcInfoSize, m_CodeLength ); BYTE* destBuffer = (BYTE *)eeAllocGCInfo(cbGcInfoSize); // Allocator will throw an exception on failure. @@ -2533,17 +2433,18 @@ BYTE* GcInfoEncoder::Emit() return destBuffer; } -void * GcInfoEncoder::eeAllocGCInfo (size_t blockSize) +template void * TGcInfoEncoder::eeAllocGCInfo (size_t blockSize) { m_BlockSize = blockSize; return m_pCorJitInfo->allocGCInfo(blockSize); } -size_t GcInfoEncoder::GetEncodedGCInfoSize() const +template size_t TGcInfoEncoder::GetEncodedGCInfoSize() const { return m_BlockSize; } + BitStreamWriter::BitStreamWriter( IAllocator* pAllocator ) { m_pAllocator = pAllocator; @@ -2563,7 +2464,7 @@ BitStreamWriter::BitStreamWriter( IAllocator* pAllocator ) // void BitStreamWriter::Write( size_t data, UINT32 count ) { - _ASSERT(count <= BITS_PER_SIZE_T); + _ASSERTE(count <= BITS_PER_SIZE_T); if(count) { @@ -2638,6 +2539,21 @@ void BitStreamWriter::CopyTo( BYTE* buffer ) } +inline void BitStreamWriter::AllocMemoryBlock() +{ + // Interpreter-FIXME: Causes linker error in interpreter because IS_ALIGNED calls _ASSERTE + // _ASSERTE( IS_ALIGNED( m_MemoryBlockSize, sizeof( size_t ) ) ); + MemoryBlock* pMemBlock = m_MemoryBlocks.AppendNew(m_pAllocator, m_MemoryBlockSize); + + m_pCurrentSlot = pMemBlock->Contents; + m_OutOfBlockSlot = m_pCurrentSlot + m_MemoryBlockSize / sizeof( size_t ); + +#ifdef _DEBUG + m_MemoryBlocksCount++; +#endif +} + + void BitStreamWriter::Dispose() { m_MemoryBlocks.Dispose(m_pAllocator); @@ -2710,3 +2626,9 @@ int BitStreamWriter::EncodeVarLengthSigned( SSIZE_T n, UINT32 base ) } } +// Instantiate the encoder so other files can use it +template class TGcInfoEncoder; + +#ifdef FEATURE_INTERPRETER +template class TGcInfoEncoder; +#endif // FEATURE_INTERPRETER diff --git a/src/coreclr/gcinfo/gcinfohelpers.h b/src/coreclr/gcinfo/gcinfohelpers.h new file mode 100644 index 000000000000..da296817d279 --- /dev/null +++ b/src/coreclr/gcinfo/gcinfohelpers.h @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _GCINFOHELPERS_H_ +#define _GCINFOHELPERS_H_ + +// If you want GcInfoEncoder logging to work, replace this macro with an appropriate definition. +// This previously relied on our common logging infrastructure, but that caused linker failures in the interpreter. +// Example implementation: +// #define GCINFO_LOG(level, format, ...) (printf(format, __VA_ARGS__), true) +#define GCINFO_LOG(level, format, ...) false + +// If you want to enable GcInfoSize::Log to work, replace this macro with an appropriate definition. +#define GCINFO_LOGSPEW(level, format, ...) false + +// Duplicated from log.h +// NOTE: ICorJitInfo::logMsg appears to accept these same levels and is accessible from GcInfoEncoder. +#define LL_EVERYTHING 10 +#define LL_INFO1000000 9 // can be expected to generate 1,000,000 logs per small but not trivial run +#define LL_INFO100000 8 // can be expected to generate 100,000 logs per small but not trivial run +#define LL_INFO10000 7 // can be expected to generate 10,000 logs per small but not trivial run +#define LL_INFO1000 6 // can be expected to generate 1,000 logs per small but not trivial run +#define LL_INFO100 5 // can be expected to generate 100 logs per small but not trivial run +#define LL_INFO10 4 // can be expected to generate 10 logs per small but not trivial run +#define LL_WARNING 3 +#define LL_ERROR 2 +#define LL_FATALERROR 1 +#define LL_ALWAYS 0 // impossible to turn off (log level never negative) + +#endif // _GCINFOHELPERS_H_ diff --git a/src/coreclr/generateredefinesfile.ps1 b/src/coreclr/generateredefinesfile.ps1 new file mode 100644 index 000000000000..1a37c661d0c5 --- /dev/null +++ b/src/coreclr/generateredefinesfile.ps1 @@ -0,0 +1,40 @@ +param ( + [string]$filename = "", + [string]$jump = "", + [string]$prefix1 = "", + [string]$prefix2 = "" +) + +# Function to display usage information +function Show-Usage { + Write-Host "Usage:" + Write-Host "generateredefinesfile.ps1 " + exit 1 +} + +if ($filename.Length -eq 0) { + Show-Usage +} + +# Read the file line by line +Get-Content $filename | ForEach-Object { + $line = $_.Trim() + + # Skip empty lines and comment lines starting with semicolon + if ($line -match '^\;.*$' -or $line -match '^[\s]*$') { + return + } + + # Remove the CR character in case the sources are mapped from + # a Windows share and contain CRLF line endings + $line = $line -replace "`r", "" + + # Only process the entries that begin with "#" + if ($line -match '^#.*$') { + $line = $line -replace '^#', '' + Write-Output "LEAF_ENTRY ${prefix1}${line}, _TEXT" + Write-Output " ${jump} EXTERNAL_C_FUNC(${prefix2}${line})" + Write-Output "LEAF_END ${prefix1}${line}, _TEXT" + Write-Output "" + } +} diff --git a/src/coreclr/hosts/corerun/corerun.cpp b/src/coreclr/hosts/corerun/corerun.cpp index f17f19f6f294..7286a4dec895 100644 --- a/src/coreclr/hosts/corerun/corerun.cpp +++ b/src/coreclr/hosts/corerun/corerun.cpp @@ -70,6 +70,12 @@ namespace envvar // Variable used to preload a mock hostpolicy for testing. const char_t* mockHostPolicy = W("MOCK_HOSTPOLICY"); + + // Variable used to indicate how app assemblies should be provided to the runtime + // - PROPERTY: corerun will pass the paths vias the TRUSTED_PLATFORM_ASSEMBLIES property + // - EXTERNAL: corerun will pass an external assembly probe to the runtime for app assemblies + // - Not set: same as PROPERTY + const char_t* appAssemblies = W("APP_ASSEMBLIES"); } static void wait_for_debugger() @@ -242,6 +248,37 @@ size_t HOST_CONTRACT_CALLTYPE get_runtime_property( return -1; } +// Paths for external assembly probe +static char* s_core_libs_path = nullptr; +static char* s_core_root_path = nullptr; + +static bool HOST_CONTRACT_CALLTYPE external_assembly_probe( + const char* path, + void** data_start, + int64_t* size) +{ + // Get just the file name + const char* name = path; + const char* pos = strrchr(name, '/'); + if (pos != NULL) + name = pos + 1; + + // Try to map the file from our known app assembly paths + for (const char* dir : { s_core_libs_path, s_core_root_path }) + { + if (dir == nullptr) + continue; + + std::string full_path = dir; + assert(full_path.back() == pal::dir_delim); + full_path.append(name); + if (pal::try_map_file_readonly(full_path.c_str(), data_start, size)) + return true; + } + + return false; +} + static int run(const configuration& config) { platform_specific_actions actions; @@ -295,7 +332,37 @@ static int run(const configuration& config) native_search_dirs << core_root << pal::env_path_delim; } - string_t tpa_list = build_tpa(core_root, core_libs); + string_t tpa_list; + string_t app_assemblies_env = pal::getenv(envvar::appAssemblies); + bool use_external_assembly_probe = false; + if (app_assemblies_env.empty() || app_assemblies_env == W("PROPERTY")) + { + // Use the TRUSTED_PLATFORM_ASSEMBLIES property to pass the app assemblies to the runtime. + tpa_list = build_tpa(core_root, core_libs); + } + else if (app_assemblies_env == W("EXTERNAL")) + { + // Use the external assembly probe to load assemblies from the app assembly paths. + use_external_assembly_probe = true; + if (!core_libs.empty()) + { + pal::string_utf8_t core_libs_utf8 = pal::convert_to_utf8(core_libs.c_str()); + s_core_libs_path = (char*)::malloc(core_libs_utf8.length() + 1); + ::strcpy(s_core_libs_path, core_libs_utf8.c_str()); + } + + if (!core_root.empty()) + { + pal::string_utf8_t core_root_utf8 = pal::convert_to_utf8(core_root.c_str()); + s_core_root_path = (char*)::malloc(core_root_utf8.length() + 1); + ::strcpy(s_core_root_path, core_root_utf8.c_str()); + } + } + else + { + pal::fprintf(stderr, W("Unknown value for APP_ASSEMBLIES environment variable: %s\n"), app_assemblies_env.c_str()); + return -1; + } { // Load hostpolicy if requested. @@ -376,7 +443,8 @@ static int run(const configuration& config) (void*)&config, &get_runtime_property, nullptr, - nullptr }; + nullptr, + use_external_assembly_probe ? &external_assembly_probe : nullptr }; propertyKeys.push_back(HOST_PROPERTY_RUNTIME_CONTRACT); std::stringstream ss; ss << "0x" << std::hex << (size_t)(&host_contract); @@ -457,6 +525,8 @@ static int run(const configuration& config) if (exit_code != -1) exit_code = latched_exit_code; + ::free((void*)s_core_libs_path); + ::free((void*)s_core_root_path); return exit_code; } diff --git a/src/coreclr/hosts/corerun/corerun.hpp b/src/coreclr/hosts/corerun/corerun.hpp index f904cd444e13..48e6ed4c1295 100644 --- a/src/coreclr/hosts/corerun/corerun.hpp +++ b/src/coreclr/hosts/corerun/corerun.hpp @@ -134,6 +134,34 @@ namespace pal return INVALID_FILE_ATTRIBUTES != ::GetFileAttributesW(file_path.c_str()); } + inline bool try_map_file_readonly(const char* path, void** mapped, int64_t* size) + { + HANDLE file = ::CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (file == INVALID_HANDLE_VALUE) + return false; + + HANDLE file_mapping = ::CreateFileMappingA(file, nullptr, PAGE_READONLY, 0, 0, nullptr); + if (file_mapping == nullptr) + { + ::CloseHandle(file); + return false; + } + + void* mapped_local = ::MapViewOfFile(file_mapping, FILE_MAP_READ, 0, 0, 0); + if (mapped_local == nullptr) + { + ::CloseHandle(file); + ::CloseHandle(file_mapping); + return false; + } + + *size = ::GetFileSize(file, nullptr); + *mapped = mapped_local; + ::CloseHandle(file_mapping); + ::CloseHandle(file); + return true; + } + // Forward declaration void ensure_trailing_delimiter(pal::string_t& dir); @@ -300,7 +328,9 @@ class platform_specific_actions final #else // !TARGET_WINDOWS #include #include +#include #include +#include #include #include @@ -309,7 +339,6 @@ class platform_specific_actions final #include #include #else // !__APPLE__ -#include #include #endif // !__APPLE__ @@ -434,6 +463,33 @@ namespace pal return true; } + inline bool try_map_file_readonly(const char* path, void** mapped, int64_t* size) + { + int fd = open(path, O_RDONLY); + if (fd == -1) + return false; + + struct stat buf; + if (fstat(fd, &buf) == -1) + { + close(fd); + return false; + } + + int64_t size_local = buf.st_size; + void* mapped_local = mmap(NULL, size_local, PROT_READ, MAP_PRIVATE, fd, 0); + if (mapped == MAP_FAILED) + { + close(fd); + return false; + } + + *mapped = mapped_local; + *size = size_local; + close(fd); + return true; + } + // Forward declaration template bool string_ends_with(const string_t& str, const char_t(&suffix)[LEN]); diff --git a/src/coreclr/hosts/inc/coreclrhost.h b/src/coreclr/hosts/inc/coreclrhost.h index 0a72016cebf8..12099870c971 100644 --- a/src/coreclr/hosts/inc/coreclrhost.h +++ b/src/coreclr/hosts/inc/coreclrhost.h @@ -150,6 +150,7 @@ CORECLR_HOSTING_API(coreclr_execute_assembly, // // Callback types used by the hosts // +typedef bool(CORECLR_CALLING_CONVENTION ExternalAssemblyProbeFn)(const char* path, void** data_start, int64_t* size); typedef bool(CORECLR_CALLING_CONVENTION BundleProbeFn)(const char* path, int64_t* offset, int64_t* size, int64_t* compressedSize); typedef const void* (CORECLR_CALLING_CONVENTION PInvokeOverrideFn)(const char* libraryName, const char* entrypointName); diff --git a/src/coreclr/ilasm/CMakeLists.txt b/src/coreclr/ilasm/CMakeLists.txt index d7f5ce9a15aa..9cc652ccbf78 100644 --- a/src/coreclr/ilasm/CMakeLists.txt +++ b/src/coreclr/ilasm/CMakeLists.txt @@ -87,7 +87,6 @@ set(ILASM_LINK_LIBRARIES mdcompiler_ppdb mdruntime_ppdb mdruntimerw_ppdb - mdstaticapi_ppdb ${END_LIBRARY_GROUP} # End group of libraries that have circular references ceefgen corguids diff --git a/src/coreclr/ilasm/asmparse.y b/src/coreclr/ilasm/asmparse.y index 2257ecb31af7..d8ac94474868 100644 --- a/src/coreclr/ilasm/asmparse.y +++ b/src/coreclr/ilasm/asmparse.y @@ -77,7 +77,7 @@ %token VALUE_ VALUETYPE_ NATIVE_ INSTANCE_ SPECIALNAME_ FORWARDER_ %token STATIC_ PUBLIC_ PRIVATE_ FAMILY_ FINAL_ SYNCHRONIZED_ INTERFACE_ SEALED_ NESTED_ %token ABSTRACT_ AUTO_ SEQUENTIAL_ EXPLICIT_ ANSI_ UNICODE_ AUTOCHAR_ IMPORT_ ENUM_ -%token VIRTUAL_ NOINLINING_ AGGRESSIVEINLINING_ NOOPTIMIZATION_ AGGRESSIVEOPTIMIZATION_ UNMANAGEDEXP_ BEFOREFIELDINIT_ +%token VIRTUAL_ NOINLINING_ AGGRESSIVEINLINING_ NOOPTIMIZATION_ AGGRESSIVEOPTIMIZATION_ UNMANAGEDEXP_ BEFOREFIELDINIT_ ASYNC_ %token STRICT_ RETARGETABLE_ WINDOWSRUNTIME_ NOPLATFORM_ %token METHOD_ FIELD_ PINNED_ MODREQ_ MODOPT_ SERIALIZABLE_ PROPERTY_ TYPE_ %token ASSEMBLY_ FAMANDASSEM_ FAMORASSEM_ PRIVATESCOPE_ HIDEBYSIG_ NEWSLOT_ RTSPECIALNAME_ PINVOKEIMPL_ @@ -210,14 +210,7 @@ decl : classHead '{' classDecls '}' | secDecl | customAttrDecl | _SUBSYSTEM int32 { -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22011) // Suppress PREFast warning about integer overflow/underflow -#endif PASM->m_dwSubsystem = $2; -#ifdef _PREFAST_ -#pragma warning(pop) -#endif } | _CORFLAGS int32 { PASM->m_dwComImageFlags = $2; } | _FILE ALIGNMENT_ int32 { PASM->m_dwFileAlignment = $3; @@ -251,6 +244,22 @@ languageDecl : _LANGUAGE SQSTRING ; /* Basic tokens */ id : ID { $$ = $1; } + /* Allow methodImpl attributes to be used as identifiers */ + | NATIVE_ { $$ = newString("native"); } + | CIL_ { $$ = newString("cil"); } + | OPTIL_ { $$ = newString("optil"); } + | MANAGED_ { $$ = newString("managed"); } + | UNMANAGED_ { $$ = newString("unmanaged"); } + | FORWARDREF_ { $$ = newString("forwardref"); } + | PRESERVESIG_ { $$ = newString("preservesig"); } + | RUNTIME_ { $$ = newString("runtime"); } + | INTERNALCALL_ { $$ = newString("internalcall"); } + | SYNCHRONIZED_ { $$ = newString("synchronized"); } + | NOINLINING_ { $$ = newString("noinlining"); } + | AGGRESSIVEINLINING_ { $$ = newString("aggressiveinlining"); } + | NOOPTIMIZATION_ { $$ = newString("nooptimization"); } + | AGGRESSIVEOPTIMIZATION_ { $$ = newString("aggressiveoptimization"); } + | ASYNC_ { $$ = newString("async"); } | SQSTRING { $$ = $1; } ; @@ -864,6 +873,7 @@ implAttr : /* EMPTY */ { $$ = (CorMethodImp | implAttr AGGRESSIVEINLINING_ { $$ = (CorMethodImpl) ($1 | miAggressiveInlining); } | implAttr NOOPTIMIZATION_ { $$ = (CorMethodImpl) ($1 | miNoOptimization); } | implAttr AGGRESSIVEOPTIMIZATION_ { $$ = (CorMethodImpl) ($1 | miAggressiveOptimization); } + | implAttr ASYNC_ { $$ = (CorMethodImpl) ($1 | miAsync); } | implAttr FLAGS_ '(' int32 ')' { $$ = (CorMethodImpl) ($4); } ; diff --git a/src/coreclr/ilasm/asmtemplates.h b/src/coreclr/ilasm/asmtemplates.h index c4f4154aecd4..8a0c1e12e047 100644 --- a/src/coreclr/ilasm/asmtemplates.h +++ b/src/coreclr/ilasm/asmtemplates.h @@ -4,11 +4,6 @@ #ifndef ASMTEMPLATES_H #define ASMTEMPLATES_H -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22008) // "Suppress PREfast warnings about integer overflow" -#endif - inline ULONG GrowBuffer(ULONG startingSize) { int toAdd = startingSize >> 1; @@ -861,9 +856,5 @@ template class RBTREE }; }; -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - #endif //ASMTEMPLATES_H diff --git a/src/coreclr/ilasm/assembler.cpp b/src/coreclr/ilasm/assembler.cpp index 633daec45108..c5e2a8ad8908 100644 --- a/src/coreclr/ilasm/assembler.cpp +++ b/src/coreclr/ilasm/assembler.cpp @@ -57,11 +57,8 @@ void Assembler::ClearImplList(void) { while(m_nImplList) m_crImplList[--m_nImplList] = mdTypeRefNil; } + /**************************************************************************/ -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22008) // "Suppress PREfast warnings about integer overflow" -#endif void Assembler::AddToImplList(mdToken tk) { if(m_nImplList+1 >= m_nImplListSize) @@ -82,9 +79,6 @@ void Assembler::AddToImplList(mdToken tk) m_crImplList[m_nImplList++] = tk; m_crImplList[m_nImplList] = mdTypeRefNil; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif void Assembler::ClearBoundList(void) { @@ -1129,7 +1123,7 @@ void Assembler::AddException(DWORD pcStart, DWORD pcEnd, DWORD pcHandler, DWORD clause->SetHandlerLength(pcHandlerTo - pcHandler); clause->SetClassToken(crException); - int flags = COR_ILEXCEPTION_CLAUSE_OFFSETLEN; + int flags = 0; if (isFilter) { flags |= COR_ILEXCEPTION_CLAUSE_FILTER; } @@ -1428,7 +1422,7 @@ void Assembler::EmitOpcode(Instr* instr) { m_pCurMethod->m_HasMultipleDocuments = TRUE; } - + if (0xfeefee == instr->linenum && 0xfeefee == instr->linenum_end && 0 == instr->column && diff --git a/src/coreclr/ilasm/assembler.h b/src/coreclr/ilasm/assembler.h index b2b2d30a9c08..3d5326a41f87 100644 --- a/src/coreclr/ilasm/assembler.h +++ b/src/coreclr/ilasm/assembler.h @@ -34,7 +34,7 @@ #define MAX_SIGNATURE_LENGTH 256 // unused #define MAX_LABEL_SIZE 256 //64 #define MAX_CALL_SIG_SIZE 32 // unused -#define MAX_SCOPE_LENGTH _MAX_PATH // follow the RegMeta::SetModuleProps limitation +#define MAX_SCOPE_LENGTH MAX_PATH // follow the RegMeta::SetModuleProps limitation #define MAX_NAMESPACE_LENGTH 1024 //256 //64 #define MAX_MEMBER_NAME_LENGTH 1024 //256 //64 @@ -644,22 +644,22 @@ typedef FIFO MethodBodyList; struct Clockwork { - DWORD cBegin; - DWORD cEnd; - DWORD cParsBegin; - DWORD cParsEnd; - DWORD cMDInitBegin; - DWORD cMDInitEnd; - DWORD cMDEmitBegin; - DWORD cMDEmitEnd; - DWORD cMDEmit1; - DWORD cMDEmit2; - DWORD cMDEmit3; - DWORD cMDEmit4; - DWORD cRef2DefBegin; - DWORD cRef2DefEnd; - DWORD cFilegenBegin; - DWORD cFilegenEnd; + int64_t cBegin; + int64_t cEnd; + int64_t cParsBegin; + int64_t cParsEnd; + int64_t cMDInitBegin; + int64_t cMDInitEnd; + int64_t cMDEmitBegin; + int64_t cMDEmitEnd; + int64_t cMDEmit1; + int64_t cMDEmit2; + int64_t cMDEmit3; + int64_t cMDEmit4; + int64_t cRef2DefBegin; + int64_t cRef2DefEnd; + int64_t cFilegenBegin; + int64_t cFilegenEnd; }; struct TypeDefDescr diff --git a/src/coreclr/ilasm/binstr.h b/src/coreclr/ilasm/binstr.h index 013e35cb2889..a01e7f5eb1d8 100644 --- a/src/coreclr/ilasm/binstr.h +++ b/src/coreclr/ilasm/binstr.h @@ -8,11 +8,6 @@ #include // for memmove, memcpy ... -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22008) // "Suppress PREfast warnings about integer overflow" -#endif - class BinStr { public: BinStr() { len = 0L; max = 8L; ptr_ = buff; } @@ -68,9 +63,6 @@ class BinStr { uint8_t buff[8]; }; BinStr* BinStrToUnicode(BinStr* pSource, bool Swap = false); -#ifdef _PREFAST_ -#pragma warning(pop) -#endif #endif diff --git a/src/coreclr/ilasm/grammar_after.cpp b/src/coreclr/ilasm/grammar_after.cpp index a6e70ad380a0..ac43bd8faace 100644 --- a/src/coreclr/ilasm/grammar_after.cpp +++ b/src/coreclr/ilasm/grammar_after.cpp @@ -839,10 +839,6 @@ int parse_literal(unsigned curSym, __inout __nullterminated char* &curPos, BOOL } } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif int yylex() { char* curPos = PENV->curPos; @@ -1286,9 +1282,6 @@ int yylex() PENV->curTok = curTok; return(tok); } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif /**************************************************************************/ static char* newString(_In_ __nullterminated const char* str1) diff --git a/src/coreclr/ilasm/ilasmpch.h b/src/coreclr/ilasm/ilasmpch.h index a6a62fe9ea81..cbc5fd0b3d1d 100644 --- a/src/coreclr/ilasm/ilasmpch.h +++ b/src/coreclr/ilasm/ilasmpch.h @@ -24,6 +24,7 @@ #include "mdfileformat.h" #include "stgpooli.h" +#include "minipal/time.h" #ifdef _EXPORT #undef _EXPORT diff --git a/src/coreclr/ilasm/main.cpp b/src/coreclr/ilasm/main.cpp index 3956b40807b6..58e0ee7573aa 100644 --- a/src/coreclr/ilasm/main.cpp +++ b/src/coreclr/ilasm/main.cpp @@ -9,7 +9,6 @@ #include "asmparse.h" #include "clrversion.h" -#include "shimload.h" #include "strsafe.h" #define ASSERTE_ALL_BUILDS(expr) _ASSERTE_ALL_BUILDS((expr)) @@ -118,12 +117,6 @@ WCHAR wzInputFilename[MAX_FILENAME_LENGTH]; WCHAR wzOutputFilename[MAX_FILENAME_LENGTH]; WCHAR wzPdbFilename[MAX_FILENAME_LENGTH]; - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif - extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv) { int i, NumFiles = 0, NumDeltaFiles = 0; @@ -145,21 +138,15 @@ extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv) memset(pwzInputFiles,0,1024*sizeof(WCHAR*)); memset(pwzDeltaFiles,0,1024*sizeof(WCHAR*)); memset(&cw,0,sizeof(Clockwork)); - cw.cBegin = GetTickCount(); + cw.cBegin = minipal_lowres_ticks(); g_uConsoleCP = GetConsoleOutputCP(); memset(wzOutputFilename,0,sizeof(wzOutputFilename)); memset(wzPdbFilename, 0, sizeof(wzPdbFilename)); if(argc < 2) goto ErrorExit; -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:26000) // "Suppress prefast warning about index overflow" -#endif + if (! u16_strcmp(argv[1], W("/?")) || ! u16_strcmp(argv[1],W("-?"))) -#ifdef _PREFAST_ -#pragma warning(pop) -#endif { printf("\n.NET IL Assembler version " CLR_PRODUCT_VERSION); printf("\n%s\n\n", VER_LEGALCOPYRIGHT_LOGO_STR); @@ -679,7 +666,7 @@ extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv) { int iFile; BOOL fAllFilesPresent = TRUE; - if(bClock) cw.cParsBegin = GetTickCount(); + if(bClock) cw.cParsBegin = minipal_lowres_ticks(); for(iFile = 0; iFile < NumFiles; iFile++) { uCodePage = CP_UTF8; @@ -735,7 +722,7 @@ extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv) delete pIn; } } // end for(iFile) - if(bClock) cw.cParsEnd = GetTickCount(); + if(bClock) cw.cParsEnd = minipal_lowres_ticks(); if ((pParser->Success() && fAllFilesPresent) || pAsm->OnErrGo) { HRESULT hr; @@ -758,7 +745,7 @@ extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv) } if(exitval == 0) // Write the output file { - if(bClock) cw.cFilegenEnd = GetTickCount(); + if(bClock) cw.cFilegenEnd = minipal_lowres_ticks(); if(pAsm->m_fReportProgress) pParser->msg("Writing PE file\n"); // Generate the file if (FAILED(hr = pAsm->m_pCeeFileGen->GenerateCeeFile(pAsm->m_pCeeFile))) @@ -776,7 +763,7 @@ extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv) pParser->msg("Failed to write PDB file, error code=0x%08X\n", hr); } } - if(bClock) cw.cEnd = GetTickCount(); + if(bClock) cw.cEnd = minipal_lowres_ticks(); if(exitval==0) { WCHAR wzNewOutputFilename[MAX_FILENAME_LENGTH+16]; @@ -866,21 +853,21 @@ extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv) if(bReportProgress) printf("Operation completed successfully\n"); if(bClock) { - printf("Timing (msec): Total run %d\n",(cw.cEnd-cw.cBegin)); - printf(" Startup %d\n",(cw.cParsBegin-cw.cBegin)); - printf(" - MD initialization %d\n",(cw.cMDInitEnd - cw.cMDInitBegin)); - printf(" Parsing %d\n",(cw.cParsEnd - cw.cParsBegin)); - printf(" Emitting MD %d\n",(cw.cMDEmitEnd - cw.cRef2DefEnd)+(cw.cRef2DefBegin - cw.cMDEmitBegin)); - //printf(" - global fixups %d\n",(cw.cMDEmit1 - cw.cMDEmitBegin)); - printf(" - SN sig alloc %d\n",(cw.cMDEmit2 - cw.cMDEmitBegin)); - printf(" - Classes,Methods,Fields %d\n",(cw.cRef2DefBegin - cw.cMDEmit2)); - printf(" - Events,Properties %d\n",(cw.cMDEmit3 - cw.cRef2DefEnd)); - printf(" - MethodImpls %d\n",(cw.cMDEmit4 - cw.cMDEmit3)); - printf(" - Manifest,CAs %d\n",(cw.cMDEmitEnd - cw.cMDEmit4)); - printf(" Ref to Def resolution %d\n",(cw.cRef2DefEnd - cw.cRef2DefBegin)); - printf(" Fixup and linking %d\n",(cw.cFilegenBegin - cw.cMDEmitEnd)); - printf(" CEE file generation %d\n",(cw.cFilegenEnd - cw.cFilegenBegin)); - printf(" PE file writing %d\n",(cw.cEnd - cw.cFilegenEnd)); + printf("Timing (msec): Total run %d\n",(int)(cw.cEnd-cw.cBegin)); + printf(" Startup %d\n",(int)(cw.cParsBegin-cw.cBegin)); + printf(" - MD initialization %d\n",(int)(cw.cMDInitEnd - cw.cMDInitBegin)); + printf(" Parsing %d\n",(int)(cw.cParsEnd - cw.cParsBegin)); + printf(" Emitting MD %d\n",(int)(cw.cMDEmitEnd - cw.cRef2DefEnd)+(int)(cw.cRef2DefBegin - cw.cMDEmitBegin)); + //printf(" - global fixups %d\n",(int)(cw.cMDEmit1 - cw.cMDEmitBegin)); + printf(" - SN sig alloc %d\n",(int)(cw.cMDEmit2 - cw.cMDEmitBegin)); + printf(" - Classes,Methods,Fields %d\n",(int)(cw.cRef2DefBegin - cw.cMDEmit2)); + printf(" - Events,Properties %d\n",(int)(cw.cMDEmit3 - cw.cRef2DefEnd)); + printf(" - MethodImpls %d\n",(int)(cw.cMDEmit4 - cw.cMDEmit3)); + printf(" - Manifest,CAs %d\n",(int)(cw.cMDEmitEnd - cw.cMDEmit4)); + printf(" Ref to Def resolution %d\n",(int)(cw.cRef2DefEnd - cw.cRef2DefBegin)); + printf(" Fixup and linking %d\n",(int)(cw.cFilegenBegin - cw.cMDEmitEnd)); + printf(" CEE file generation %d\n",(int)(cw.cFilegenEnd - cw.cFilegenBegin)); + printf(" PE file writing %d\n",(int)(cw.cEnd - cw.cFilegenEnd)); } } else @@ -890,10 +877,6 @@ extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv) exit(exitval); return exitval; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - #ifdef TARGET_UNIX int main(int argc, char* str[]) diff --git a/src/coreclr/ilasm/portable_pdb.cpp b/src/coreclr/ilasm/portable_pdb.cpp index a60214c99e9f..3aa26fe1597a 100644 --- a/src/coreclr/ilasm/portable_pdb.cpp +++ b/src/coreclr/ilasm/portable_pdb.cpp @@ -93,7 +93,7 @@ HRESULT PortablePdbWriter::Init(IMetaDataDispenserEx2* mdDispenser) time_t now; time(&now); m_pdbStream.id.pdbTimeStamp = (ULONG)now; - if (!minipal_guid_v4_create(reinterpret_cast(&m_pdbStream.id.pdbGuid))) + if (!minipal_guid_v4_create(&m_pdbStream.id.pdbGuid)) { hr = E_FAIL; goto exit; diff --git a/src/coreclr/ilasm/prebuilt/asmparse.cpp b/src/coreclr/ilasm/prebuilt/asmparse.cpp index 4a5d9b3486b8..38d9c6666bf1 100644 --- a/src/coreclr/ilasm/prebuilt/asmparse.cpp +++ b/src/coreclr/ilasm/prebuilt/asmparse.cpp @@ -81,7 +81,7 @@ #include "grammar_before.cpp" -#line 85 "asmparse.cpp" +#line 85 "prebuilt\\asmparse.cpp" # ifndef YY_CAST # ifdef __cplusplus @@ -218,196 +218,197 @@ extern int yydebug; AGGRESSIVEOPTIMIZATION_ = 351, /* AGGRESSIVEOPTIMIZATION_ */ UNMANAGEDEXP_ = 352, /* UNMANAGEDEXP_ */ BEFOREFIELDINIT_ = 353, /* BEFOREFIELDINIT_ */ - STRICT_ = 354, /* STRICT_ */ - RETARGETABLE_ = 355, /* RETARGETABLE_ */ - WINDOWSRUNTIME_ = 356, /* WINDOWSRUNTIME_ */ - NOPLATFORM_ = 357, /* NOPLATFORM_ */ - METHOD_ = 358, /* METHOD_ */ - FIELD_ = 359, /* FIELD_ */ - PINNED_ = 360, /* PINNED_ */ - MODREQ_ = 361, /* MODREQ_ */ - MODOPT_ = 362, /* MODOPT_ */ - SERIALIZABLE_ = 363, /* SERIALIZABLE_ */ - PROPERTY_ = 364, /* PROPERTY_ */ - TYPE_ = 365, /* TYPE_ */ - ASSEMBLY_ = 366, /* ASSEMBLY_ */ - FAMANDASSEM_ = 367, /* FAMANDASSEM_ */ - FAMORASSEM_ = 368, /* FAMORASSEM_ */ - PRIVATESCOPE_ = 369, /* PRIVATESCOPE_ */ - HIDEBYSIG_ = 370, /* HIDEBYSIG_ */ - NEWSLOT_ = 371, /* NEWSLOT_ */ - RTSPECIALNAME_ = 372, /* RTSPECIALNAME_ */ - PINVOKEIMPL_ = 373, /* PINVOKEIMPL_ */ - _CTOR = 374, /* _CTOR */ - _CCTOR = 375, /* _CCTOR */ - LITERAL_ = 376, /* LITERAL_ */ - NOTSERIALIZED_ = 377, /* NOTSERIALIZED_ */ - INITONLY_ = 378, /* INITONLY_ */ - REQSECOBJ_ = 379, /* REQSECOBJ_ */ - CIL_ = 380, /* CIL_ */ - OPTIL_ = 381, /* OPTIL_ */ - MANAGED_ = 382, /* MANAGED_ */ - FORWARDREF_ = 383, /* FORWARDREF_ */ - PRESERVESIG_ = 384, /* PRESERVESIG_ */ - RUNTIME_ = 385, /* RUNTIME_ */ - INTERNALCALL_ = 386, /* INTERNALCALL_ */ - _IMPORT = 387, /* _IMPORT */ - NOMANGLE_ = 388, /* NOMANGLE_ */ - LASTERR_ = 389, /* LASTERR_ */ - WINAPI_ = 390, /* WINAPI_ */ - AS_ = 391, /* AS_ */ - BESTFIT_ = 392, /* BESTFIT_ */ - ON_ = 393, /* ON_ */ - OFF_ = 394, /* OFF_ */ - CHARMAPERROR_ = 395, /* CHARMAPERROR_ */ - INSTR_NONE = 396, /* INSTR_NONE */ - INSTR_VAR = 397, /* INSTR_VAR */ - INSTR_I = 398, /* INSTR_I */ - INSTR_I8 = 399, /* INSTR_I8 */ - INSTR_R = 400, /* INSTR_R */ - INSTR_BRTARGET = 401, /* INSTR_BRTARGET */ - INSTR_METHOD = 402, /* INSTR_METHOD */ - INSTR_FIELD = 403, /* INSTR_FIELD */ - INSTR_TYPE = 404, /* INSTR_TYPE */ - INSTR_STRING = 405, /* INSTR_STRING */ - INSTR_SIG = 406, /* INSTR_SIG */ - INSTR_TOK = 407, /* INSTR_TOK */ - INSTR_SWITCH = 408, /* INSTR_SWITCH */ - _CLASS = 409, /* _CLASS */ - _NAMESPACE = 410, /* _NAMESPACE */ - _METHOD = 411, /* _METHOD */ - _FIELD = 412, /* _FIELD */ - _DATA = 413, /* _DATA */ - _THIS = 414, /* _THIS */ - _BASE = 415, /* _BASE */ - _NESTER = 416, /* _NESTER */ - _EMITBYTE = 417, /* _EMITBYTE */ - _TRY = 418, /* _TRY */ - _MAXSTACK = 419, /* _MAXSTACK */ - _LOCALS = 420, /* _LOCALS */ - _ENTRYPOINT = 421, /* _ENTRYPOINT */ - _ZEROINIT = 422, /* _ZEROINIT */ - _EVENT = 423, /* _EVENT */ - _ADDON = 424, /* _ADDON */ - _REMOVEON = 425, /* _REMOVEON */ - _FIRE = 426, /* _FIRE */ - _OTHER = 427, /* _OTHER */ - _PROPERTY = 428, /* _PROPERTY */ - _SET = 429, /* _SET */ - _GET = 430, /* _GET */ - _PERMISSION = 431, /* _PERMISSION */ - _PERMISSIONSET = 432, /* _PERMISSIONSET */ - REQUEST_ = 433, /* REQUEST_ */ - DEMAND_ = 434, /* DEMAND_ */ - ASSERT_ = 435, /* ASSERT_ */ - DENY_ = 436, /* DENY_ */ - PERMITONLY_ = 437, /* PERMITONLY_ */ - LINKCHECK_ = 438, /* LINKCHECK_ */ - INHERITCHECK_ = 439, /* INHERITCHECK_ */ - REQMIN_ = 440, /* REQMIN_ */ - REQOPT_ = 441, /* REQOPT_ */ - REQREFUSE_ = 442, /* REQREFUSE_ */ - PREJITGRANT_ = 443, /* PREJITGRANT_ */ - PREJITDENY_ = 444, /* PREJITDENY_ */ - NONCASDEMAND_ = 445, /* NONCASDEMAND_ */ - NONCASLINKDEMAND_ = 446, /* NONCASLINKDEMAND_ */ - NONCASINHERITANCE_ = 447, /* NONCASINHERITANCE_ */ - _LINE = 448, /* _LINE */ - P_LINE = 449, /* P_LINE */ - _LANGUAGE = 450, /* _LANGUAGE */ - _CUSTOM = 451, /* _CUSTOM */ - INIT_ = 452, /* INIT_ */ - _SIZE = 453, /* _SIZE */ - _PACK = 454, /* _PACK */ - _VTABLE = 455, /* _VTABLE */ - _VTFIXUP = 456, /* _VTFIXUP */ - FROMUNMANAGED_ = 457, /* FROMUNMANAGED_ */ - CALLMOSTDERIVED_ = 458, /* CALLMOSTDERIVED_ */ - _VTENTRY = 459, /* _VTENTRY */ - RETAINAPPDOMAIN_ = 460, /* RETAINAPPDOMAIN_ */ - _FILE = 461, /* _FILE */ - NOMETADATA_ = 462, /* NOMETADATA_ */ - _HASH = 463, /* _HASH */ - _ASSEMBLY = 464, /* _ASSEMBLY */ - _PUBLICKEY = 465, /* _PUBLICKEY */ - _PUBLICKEYTOKEN = 466, /* _PUBLICKEYTOKEN */ - ALGORITHM_ = 467, /* ALGORITHM_ */ - _VER = 468, /* _VER */ - _LOCALE = 469, /* _LOCALE */ - EXTERN_ = 470, /* EXTERN_ */ - _MRESOURCE = 471, /* _MRESOURCE */ - _MODULE = 472, /* _MODULE */ - _EXPORT = 473, /* _EXPORT */ - LEGACY_ = 474, /* LEGACY_ */ - LIBRARY_ = 475, /* LIBRARY_ */ - X86_ = 476, /* X86_ */ - AMD64_ = 477, /* AMD64_ */ - ARM_ = 478, /* ARM_ */ - ARM64_ = 479, /* ARM64_ */ - MARSHAL_ = 480, /* MARSHAL_ */ - CUSTOM_ = 481, /* CUSTOM_ */ - SYSSTRING_ = 482, /* SYSSTRING_ */ - FIXED_ = 483, /* FIXED_ */ - VARIANT_ = 484, /* VARIANT_ */ - CURRENCY_ = 485, /* CURRENCY_ */ - SYSCHAR_ = 486, /* SYSCHAR_ */ - DECIMAL_ = 487, /* DECIMAL_ */ - DATE_ = 488, /* DATE_ */ - BSTR_ = 489, /* BSTR_ */ - TBSTR_ = 490, /* TBSTR_ */ - LPSTR_ = 491, /* LPSTR_ */ - LPWSTR_ = 492, /* LPWSTR_ */ - LPTSTR_ = 493, /* LPTSTR_ */ - OBJECTREF_ = 494, /* OBJECTREF_ */ - IUNKNOWN_ = 495, /* IUNKNOWN_ */ - IDISPATCH_ = 496, /* IDISPATCH_ */ - STRUCT_ = 497, /* STRUCT_ */ - SAFEARRAY_ = 498, /* SAFEARRAY_ */ - BYVALSTR_ = 499, /* BYVALSTR_ */ - LPVOID_ = 500, /* LPVOID_ */ - ANY_ = 501, /* ANY_ */ - ARRAY_ = 502, /* ARRAY_ */ - LPSTRUCT_ = 503, /* LPSTRUCT_ */ - IIDPARAM_ = 504, /* IIDPARAM_ */ - IN_ = 505, /* IN_ */ - OUT_ = 506, /* OUT_ */ - OPT_ = 507, /* OPT_ */ - _PARAM = 508, /* _PARAM */ - _OVERRIDE = 509, /* _OVERRIDE */ - WITH_ = 510, /* WITH_ */ - NULL_ = 511, /* NULL_ */ - ERROR_ = 512, /* ERROR_ */ - HRESULT_ = 513, /* HRESULT_ */ - CARRAY_ = 514, /* CARRAY_ */ - USERDEFINED_ = 515, /* USERDEFINED_ */ - RECORD_ = 516, /* RECORD_ */ - FILETIME_ = 517, /* FILETIME_ */ - BLOB_ = 518, /* BLOB_ */ - STREAM_ = 519, /* STREAM_ */ - STORAGE_ = 520, /* STORAGE_ */ - STREAMED_OBJECT_ = 521, /* STREAMED_OBJECT_ */ - STORED_OBJECT_ = 522, /* STORED_OBJECT_ */ - BLOB_OBJECT_ = 523, /* BLOB_OBJECT_ */ - CF_ = 524, /* CF_ */ - CLSID_ = 525, /* CLSID_ */ - VECTOR_ = 526, /* VECTOR_ */ - _SUBSYSTEM = 527, /* _SUBSYSTEM */ - _CORFLAGS = 528, /* _CORFLAGS */ - ALIGNMENT_ = 529, /* ALIGNMENT_ */ - _IMAGEBASE = 530, /* _IMAGEBASE */ - _STACKRESERVE = 531, /* _STACKRESERVE */ - _TYPEDEF = 532, /* _TYPEDEF */ - _TEMPLATE = 533, /* _TEMPLATE */ - _TYPELIST = 534, /* _TYPELIST */ - _MSCORLIB = 535, /* _MSCORLIB */ - P_DEFINE = 536, /* P_DEFINE */ - P_UNDEF = 537, /* P_UNDEF */ - P_IFDEF = 538, /* P_IFDEF */ - P_IFNDEF = 539, /* P_IFNDEF */ - P_ELSE = 540, /* P_ELSE */ - P_ENDIF = 541, /* P_ENDIF */ - P_INCLUDE = 542, /* P_INCLUDE */ - CONSTRAINT_ = 543 /* CONSTRAINT_ */ + ASYNC_ = 354, /* ASYNC_ */ + STRICT_ = 355, /* STRICT_ */ + RETARGETABLE_ = 356, /* RETARGETABLE_ */ + WINDOWSRUNTIME_ = 357, /* WINDOWSRUNTIME_ */ + NOPLATFORM_ = 358, /* NOPLATFORM_ */ + METHOD_ = 359, /* METHOD_ */ + FIELD_ = 360, /* FIELD_ */ + PINNED_ = 361, /* PINNED_ */ + MODREQ_ = 362, /* MODREQ_ */ + MODOPT_ = 363, /* MODOPT_ */ + SERIALIZABLE_ = 364, /* SERIALIZABLE_ */ + PROPERTY_ = 365, /* PROPERTY_ */ + TYPE_ = 366, /* TYPE_ */ + ASSEMBLY_ = 367, /* ASSEMBLY_ */ + FAMANDASSEM_ = 368, /* FAMANDASSEM_ */ + FAMORASSEM_ = 369, /* FAMORASSEM_ */ + PRIVATESCOPE_ = 370, /* PRIVATESCOPE_ */ + HIDEBYSIG_ = 371, /* HIDEBYSIG_ */ + NEWSLOT_ = 372, /* NEWSLOT_ */ + RTSPECIALNAME_ = 373, /* RTSPECIALNAME_ */ + PINVOKEIMPL_ = 374, /* PINVOKEIMPL_ */ + _CTOR = 375, /* _CTOR */ + _CCTOR = 376, /* _CCTOR */ + LITERAL_ = 377, /* LITERAL_ */ + NOTSERIALIZED_ = 378, /* NOTSERIALIZED_ */ + INITONLY_ = 379, /* INITONLY_ */ + REQSECOBJ_ = 380, /* REQSECOBJ_ */ + CIL_ = 381, /* CIL_ */ + OPTIL_ = 382, /* OPTIL_ */ + MANAGED_ = 383, /* MANAGED_ */ + FORWARDREF_ = 384, /* FORWARDREF_ */ + PRESERVESIG_ = 385, /* PRESERVESIG_ */ + RUNTIME_ = 386, /* RUNTIME_ */ + INTERNALCALL_ = 387, /* INTERNALCALL_ */ + _IMPORT = 388, /* _IMPORT */ + NOMANGLE_ = 389, /* NOMANGLE_ */ + LASTERR_ = 390, /* LASTERR_ */ + WINAPI_ = 391, /* WINAPI_ */ + AS_ = 392, /* AS_ */ + BESTFIT_ = 393, /* BESTFIT_ */ + ON_ = 394, /* ON_ */ + OFF_ = 395, /* OFF_ */ + CHARMAPERROR_ = 396, /* CHARMAPERROR_ */ + INSTR_NONE = 397, /* INSTR_NONE */ + INSTR_VAR = 398, /* INSTR_VAR */ + INSTR_I = 399, /* INSTR_I */ + INSTR_I8 = 400, /* INSTR_I8 */ + INSTR_R = 401, /* INSTR_R */ + INSTR_BRTARGET = 402, /* INSTR_BRTARGET */ + INSTR_METHOD = 403, /* INSTR_METHOD */ + INSTR_FIELD = 404, /* INSTR_FIELD */ + INSTR_TYPE = 405, /* INSTR_TYPE */ + INSTR_STRING = 406, /* INSTR_STRING */ + INSTR_SIG = 407, /* INSTR_SIG */ + INSTR_TOK = 408, /* INSTR_TOK */ + INSTR_SWITCH = 409, /* INSTR_SWITCH */ + _CLASS = 410, /* _CLASS */ + _NAMESPACE = 411, /* _NAMESPACE */ + _METHOD = 412, /* _METHOD */ + _FIELD = 413, /* _FIELD */ + _DATA = 414, /* _DATA */ + _THIS = 415, /* _THIS */ + _BASE = 416, /* _BASE */ + _NESTER = 417, /* _NESTER */ + _EMITBYTE = 418, /* _EMITBYTE */ + _TRY = 419, /* _TRY */ + _MAXSTACK = 420, /* _MAXSTACK */ + _LOCALS = 421, /* _LOCALS */ + _ENTRYPOINT = 422, /* _ENTRYPOINT */ + _ZEROINIT = 423, /* _ZEROINIT */ + _EVENT = 424, /* _EVENT */ + _ADDON = 425, /* _ADDON */ + _REMOVEON = 426, /* _REMOVEON */ + _FIRE = 427, /* _FIRE */ + _OTHER = 428, /* _OTHER */ + _PROPERTY = 429, /* _PROPERTY */ + _SET = 430, /* _SET */ + _GET = 431, /* _GET */ + _PERMISSION = 432, /* _PERMISSION */ + _PERMISSIONSET = 433, /* _PERMISSIONSET */ + REQUEST_ = 434, /* REQUEST_ */ + DEMAND_ = 435, /* DEMAND_ */ + ASSERT_ = 436, /* ASSERT_ */ + DENY_ = 437, /* DENY_ */ + PERMITONLY_ = 438, /* PERMITONLY_ */ + LINKCHECK_ = 439, /* LINKCHECK_ */ + INHERITCHECK_ = 440, /* INHERITCHECK_ */ + REQMIN_ = 441, /* REQMIN_ */ + REQOPT_ = 442, /* REQOPT_ */ + REQREFUSE_ = 443, /* REQREFUSE_ */ + PREJITGRANT_ = 444, /* PREJITGRANT_ */ + PREJITDENY_ = 445, /* PREJITDENY_ */ + NONCASDEMAND_ = 446, /* NONCASDEMAND_ */ + NONCASLINKDEMAND_ = 447, /* NONCASLINKDEMAND_ */ + NONCASINHERITANCE_ = 448, /* NONCASINHERITANCE_ */ + _LINE = 449, /* _LINE */ + P_LINE = 450, /* P_LINE */ + _LANGUAGE = 451, /* _LANGUAGE */ + _CUSTOM = 452, /* _CUSTOM */ + INIT_ = 453, /* INIT_ */ + _SIZE = 454, /* _SIZE */ + _PACK = 455, /* _PACK */ + _VTABLE = 456, /* _VTABLE */ + _VTFIXUP = 457, /* _VTFIXUP */ + FROMUNMANAGED_ = 458, /* FROMUNMANAGED_ */ + CALLMOSTDERIVED_ = 459, /* CALLMOSTDERIVED_ */ + _VTENTRY = 460, /* _VTENTRY */ + RETAINAPPDOMAIN_ = 461, /* RETAINAPPDOMAIN_ */ + _FILE = 462, /* _FILE */ + NOMETADATA_ = 463, /* NOMETADATA_ */ + _HASH = 464, /* _HASH */ + _ASSEMBLY = 465, /* _ASSEMBLY */ + _PUBLICKEY = 466, /* _PUBLICKEY */ + _PUBLICKEYTOKEN = 467, /* _PUBLICKEYTOKEN */ + ALGORITHM_ = 468, /* ALGORITHM_ */ + _VER = 469, /* _VER */ + _LOCALE = 470, /* _LOCALE */ + EXTERN_ = 471, /* EXTERN_ */ + _MRESOURCE = 472, /* _MRESOURCE */ + _MODULE = 473, /* _MODULE */ + _EXPORT = 474, /* _EXPORT */ + LEGACY_ = 475, /* LEGACY_ */ + LIBRARY_ = 476, /* LIBRARY_ */ + X86_ = 477, /* X86_ */ + AMD64_ = 478, /* AMD64_ */ + ARM_ = 479, /* ARM_ */ + ARM64_ = 480, /* ARM64_ */ + MARSHAL_ = 481, /* MARSHAL_ */ + CUSTOM_ = 482, /* CUSTOM_ */ + SYSSTRING_ = 483, /* SYSSTRING_ */ + FIXED_ = 484, /* FIXED_ */ + VARIANT_ = 485, /* VARIANT_ */ + CURRENCY_ = 486, /* CURRENCY_ */ + SYSCHAR_ = 487, /* SYSCHAR_ */ + DECIMAL_ = 488, /* DECIMAL_ */ + DATE_ = 489, /* DATE_ */ + BSTR_ = 490, /* BSTR_ */ + TBSTR_ = 491, /* TBSTR_ */ + LPSTR_ = 492, /* LPSTR_ */ + LPWSTR_ = 493, /* LPWSTR_ */ + LPTSTR_ = 494, /* LPTSTR_ */ + OBJECTREF_ = 495, /* OBJECTREF_ */ + IUNKNOWN_ = 496, /* IUNKNOWN_ */ + IDISPATCH_ = 497, /* IDISPATCH_ */ + STRUCT_ = 498, /* STRUCT_ */ + SAFEARRAY_ = 499, /* SAFEARRAY_ */ + BYVALSTR_ = 500, /* BYVALSTR_ */ + LPVOID_ = 501, /* LPVOID_ */ + ANY_ = 502, /* ANY_ */ + ARRAY_ = 503, /* ARRAY_ */ + LPSTRUCT_ = 504, /* LPSTRUCT_ */ + IIDPARAM_ = 505, /* IIDPARAM_ */ + IN_ = 506, /* IN_ */ + OUT_ = 507, /* OUT_ */ + OPT_ = 508, /* OPT_ */ + _PARAM = 509, /* _PARAM */ + _OVERRIDE = 510, /* _OVERRIDE */ + WITH_ = 511, /* WITH_ */ + NULL_ = 512, /* NULL_ */ + ERROR_ = 513, /* ERROR_ */ + HRESULT_ = 514, /* HRESULT_ */ + CARRAY_ = 515, /* CARRAY_ */ + USERDEFINED_ = 516, /* USERDEFINED_ */ + RECORD_ = 517, /* RECORD_ */ + FILETIME_ = 518, /* FILETIME_ */ + BLOB_ = 519, /* BLOB_ */ + STREAM_ = 520, /* STREAM_ */ + STORAGE_ = 521, /* STORAGE_ */ + STREAMED_OBJECT_ = 522, /* STREAMED_OBJECT_ */ + STORED_OBJECT_ = 523, /* STORED_OBJECT_ */ + BLOB_OBJECT_ = 524, /* BLOB_OBJECT_ */ + CF_ = 525, /* CF_ */ + CLSID_ = 526, /* CLSID_ */ + VECTOR_ = 527, /* VECTOR_ */ + _SUBSYSTEM = 528, /* _SUBSYSTEM */ + _CORFLAGS = 529, /* _CORFLAGS */ + ALIGNMENT_ = 530, /* ALIGNMENT_ */ + _IMAGEBASE = 531, /* _IMAGEBASE */ + _STACKRESERVE = 532, /* _STACKRESERVE */ + _TYPEDEF = 533, /* _TYPEDEF */ + _TEMPLATE = 534, /* _TEMPLATE */ + _TYPELIST = 535, /* _TYPELIST */ + _MSCORLIB = 536, /* _MSCORLIB */ + P_DEFINE = 537, /* P_DEFINE */ + P_UNDEF = 538, /* P_UNDEF */ + P_IFDEF = 539, /* P_IFDEF */ + P_IFNDEF = 540, /* P_IFNDEF */ + P_ELSE = 541, /* P_ELSE */ + P_ENDIF = 542, /* P_ENDIF */ + P_INCLUDE = 543, /* P_INCLUDE */ + CONSTRAINT_ = 544 /* CONSTRAINT_ */ }; typedef enum yytokentype yytoken_kind_t; #endif @@ -445,7 +446,7 @@ union YYSTYPE CustomDescr* cad; unsigned short opcode; -#line 449 "asmparse.cpp" +#line 450 "prebuilt\\asmparse.cpp" }; typedef union YYSTYPE YYSTYPE; @@ -564,401 +565,402 @@ enum yysymbol_kind_t YYSYMBOL_AGGRESSIVEOPTIMIZATION_ = 96, /* AGGRESSIVEOPTIMIZATION_ */ YYSYMBOL_UNMANAGEDEXP_ = 97, /* UNMANAGEDEXP_ */ YYSYMBOL_BEFOREFIELDINIT_ = 98, /* BEFOREFIELDINIT_ */ - YYSYMBOL_STRICT_ = 99, /* STRICT_ */ - YYSYMBOL_RETARGETABLE_ = 100, /* RETARGETABLE_ */ - YYSYMBOL_WINDOWSRUNTIME_ = 101, /* WINDOWSRUNTIME_ */ - YYSYMBOL_NOPLATFORM_ = 102, /* NOPLATFORM_ */ - YYSYMBOL_METHOD_ = 103, /* METHOD_ */ - YYSYMBOL_FIELD_ = 104, /* FIELD_ */ - YYSYMBOL_PINNED_ = 105, /* PINNED_ */ - YYSYMBOL_MODREQ_ = 106, /* MODREQ_ */ - YYSYMBOL_MODOPT_ = 107, /* MODOPT_ */ - YYSYMBOL_SERIALIZABLE_ = 108, /* SERIALIZABLE_ */ - YYSYMBOL_PROPERTY_ = 109, /* PROPERTY_ */ - YYSYMBOL_TYPE_ = 110, /* TYPE_ */ - YYSYMBOL_ASSEMBLY_ = 111, /* ASSEMBLY_ */ - YYSYMBOL_FAMANDASSEM_ = 112, /* FAMANDASSEM_ */ - YYSYMBOL_FAMORASSEM_ = 113, /* FAMORASSEM_ */ - YYSYMBOL_PRIVATESCOPE_ = 114, /* PRIVATESCOPE_ */ - YYSYMBOL_HIDEBYSIG_ = 115, /* HIDEBYSIG_ */ - YYSYMBOL_NEWSLOT_ = 116, /* NEWSLOT_ */ - YYSYMBOL_RTSPECIALNAME_ = 117, /* RTSPECIALNAME_ */ - YYSYMBOL_PINVOKEIMPL_ = 118, /* PINVOKEIMPL_ */ - YYSYMBOL__CTOR = 119, /* _CTOR */ - YYSYMBOL__CCTOR = 120, /* _CCTOR */ - YYSYMBOL_LITERAL_ = 121, /* LITERAL_ */ - YYSYMBOL_NOTSERIALIZED_ = 122, /* NOTSERIALIZED_ */ - YYSYMBOL_INITONLY_ = 123, /* INITONLY_ */ - YYSYMBOL_REQSECOBJ_ = 124, /* REQSECOBJ_ */ - YYSYMBOL_CIL_ = 125, /* CIL_ */ - YYSYMBOL_OPTIL_ = 126, /* OPTIL_ */ - YYSYMBOL_MANAGED_ = 127, /* MANAGED_ */ - YYSYMBOL_FORWARDREF_ = 128, /* FORWARDREF_ */ - YYSYMBOL_PRESERVESIG_ = 129, /* PRESERVESIG_ */ - YYSYMBOL_RUNTIME_ = 130, /* RUNTIME_ */ - YYSYMBOL_INTERNALCALL_ = 131, /* INTERNALCALL_ */ - YYSYMBOL__IMPORT = 132, /* _IMPORT */ - YYSYMBOL_NOMANGLE_ = 133, /* NOMANGLE_ */ - YYSYMBOL_LASTERR_ = 134, /* LASTERR_ */ - YYSYMBOL_WINAPI_ = 135, /* WINAPI_ */ - YYSYMBOL_AS_ = 136, /* AS_ */ - YYSYMBOL_BESTFIT_ = 137, /* BESTFIT_ */ - YYSYMBOL_ON_ = 138, /* ON_ */ - YYSYMBOL_OFF_ = 139, /* OFF_ */ - YYSYMBOL_CHARMAPERROR_ = 140, /* CHARMAPERROR_ */ - YYSYMBOL_INSTR_NONE = 141, /* INSTR_NONE */ - YYSYMBOL_INSTR_VAR = 142, /* INSTR_VAR */ - YYSYMBOL_INSTR_I = 143, /* INSTR_I */ - YYSYMBOL_INSTR_I8 = 144, /* INSTR_I8 */ - YYSYMBOL_INSTR_R = 145, /* INSTR_R */ - YYSYMBOL_INSTR_BRTARGET = 146, /* INSTR_BRTARGET */ - YYSYMBOL_INSTR_METHOD = 147, /* INSTR_METHOD */ - YYSYMBOL_INSTR_FIELD = 148, /* INSTR_FIELD */ - YYSYMBOL_INSTR_TYPE = 149, /* INSTR_TYPE */ - YYSYMBOL_INSTR_STRING = 150, /* INSTR_STRING */ - YYSYMBOL_INSTR_SIG = 151, /* INSTR_SIG */ - YYSYMBOL_INSTR_TOK = 152, /* INSTR_TOK */ - YYSYMBOL_INSTR_SWITCH = 153, /* INSTR_SWITCH */ - YYSYMBOL__CLASS = 154, /* _CLASS */ - YYSYMBOL__NAMESPACE = 155, /* _NAMESPACE */ - YYSYMBOL__METHOD = 156, /* _METHOD */ - YYSYMBOL__FIELD = 157, /* _FIELD */ - YYSYMBOL__DATA = 158, /* _DATA */ - YYSYMBOL__THIS = 159, /* _THIS */ - YYSYMBOL__BASE = 160, /* _BASE */ - YYSYMBOL__NESTER = 161, /* _NESTER */ - YYSYMBOL__EMITBYTE = 162, /* _EMITBYTE */ - YYSYMBOL__TRY = 163, /* _TRY */ - YYSYMBOL__MAXSTACK = 164, /* _MAXSTACK */ - YYSYMBOL__LOCALS = 165, /* _LOCALS */ - YYSYMBOL__ENTRYPOINT = 166, /* _ENTRYPOINT */ - YYSYMBOL__ZEROINIT = 167, /* _ZEROINIT */ - YYSYMBOL__EVENT = 168, /* _EVENT */ - YYSYMBOL__ADDON = 169, /* _ADDON */ - YYSYMBOL__REMOVEON = 170, /* _REMOVEON */ - YYSYMBOL__FIRE = 171, /* _FIRE */ - YYSYMBOL__OTHER = 172, /* _OTHER */ - YYSYMBOL__PROPERTY = 173, /* _PROPERTY */ - YYSYMBOL__SET = 174, /* _SET */ - YYSYMBOL__GET = 175, /* _GET */ - YYSYMBOL__PERMISSION = 176, /* _PERMISSION */ - YYSYMBOL__PERMISSIONSET = 177, /* _PERMISSIONSET */ - YYSYMBOL_REQUEST_ = 178, /* REQUEST_ */ - YYSYMBOL_DEMAND_ = 179, /* DEMAND_ */ - YYSYMBOL_ASSERT_ = 180, /* ASSERT_ */ - YYSYMBOL_DENY_ = 181, /* DENY_ */ - YYSYMBOL_PERMITONLY_ = 182, /* PERMITONLY_ */ - YYSYMBOL_LINKCHECK_ = 183, /* LINKCHECK_ */ - YYSYMBOL_INHERITCHECK_ = 184, /* INHERITCHECK_ */ - YYSYMBOL_REQMIN_ = 185, /* REQMIN_ */ - YYSYMBOL_REQOPT_ = 186, /* REQOPT_ */ - YYSYMBOL_REQREFUSE_ = 187, /* REQREFUSE_ */ - YYSYMBOL_PREJITGRANT_ = 188, /* PREJITGRANT_ */ - YYSYMBOL_PREJITDENY_ = 189, /* PREJITDENY_ */ - YYSYMBOL_NONCASDEMAND_ = 190, /* NONCASDEMAND_ */ - YYSYMBOL_NONCASLINKDEMAND_ = 191, /* NONCASLINKDEMAND_ */ - YYSYMBOL_NONCASINHERITANCE_ = 192, /* NONCASINHERITANCE_ */ - YYSYMBOL__LINE = 193, /* _LINE */ - YYSYMBOL_P_LINE = 194, /* P_LINE */ - YYSYMBOL__LANGUAGE = 195, /* _LANGUAGE */ - YYSYMBOL__CUSTOM = 196, /* _CUSTOM */ - YYSYMBOL_INIT_ = 197, /* INIT_ */ - YYSYMBOL__SIZE = 198, /* _SIZE */ - YYSYMBOL__PACK = 199, /* _PACK */ - YYSYMBOL__VTABLE = 200, /* _VTABLE */ - YYSYMBOL__VTFIXUP = 201, /* _VTFIXUP */ - YYSYMBOL_FROMUNMANAGED_ = 202, /* FROMUNMANAGED_ */ - YYSYMBOL_CALLMOSTDERIVED_ = 203, /* CALLMOSTDERIVED_ */ - YYSYMBOL__VTENTRY = 204, /* _VTENTRY */ - YYSYMBOL_RETAINAPPDOMAIN_ = 205, /* RETAINAPPDOMAIN_ */ - YYSYMBOL__FILE = 206, /* _FILE */ - YYSYMBOL_NOMETADATA_ = 207, /* NOMETADATA_ */ - YYSYMBOL__HASH = 208, /* _HASH */ - YYSYMBOL__ASSEMBLY = 209, /* _ASSEMBLY */ - YYSYMBOL__PUBLICKEY = 210, /* _PUBLICKEY */ - YYSYMBOL__PUBLICKEYTOKEN = 211, /* _PUBLICKEYTOKEN */ - YYSYMBOL_ALGORITHM_ = 212, /* ALGORITHM_ */ - YYSYMBOL__VER = 213, /* _VER */ - YYSYMBOL__LOCALE = 214, /* _LOCALE */ - YYSYMBOL_EXTERN_ = 215, /* EXTERN_ */ - YYSYMBOL__MRESOURCE = 216, /* _MRESOURCE */ - YYSYMBOL__MODULE = 217, /* _MODULE */ - YYSYMBOL__EXPORT = 218, /* _EXPORT */ - YYSYMBOL_LEGACY_ = 219, /* LEGACY_ */ - YYSYMBOL_LIBRARY_ = 220, /* LIBRARY_ */ - YYSYMBOL_X86_ = 221, /* X86_ */ - YYSYMBOL_AMD64_ = 222, /* AMD64_ */ - YYSYMBOL_ARM_ = 223, /* ARM_ */ - YYSYMBOL_ARM64_ = 224, /* ARM64_ */ - YYSYMBOL_MARSHAL_ = 225, /* MARSHAL_ */ - YYSYMBOL_CUSTOM_ = 226, /* CUSTOM_ */ - YYSYMBOL_SYSSTRING_ = 227, /* SYSSTRING_ */ - YYSYMBOL_FIXED_ = 228, /* FIXED_ */ - YYSYMBOL_VARIANT_ = 229, /* VARIANT_ */ - YYSYMBOL_CURRENCY_ = 230, /* CURRENCY_ */ - YYSYMBOL_SYSCHAR_ = 231, /* SYSCHAR_ */ - YYSYMBOL_DECIMAL_ = 232, /* DECIMAL_ */ - YYSYMBOL_DATE_ = 233, /* DATE_ */ - YYSYMBOL_BSTR_ = 234, /* BSTR_ */ - YYSYMBOL_TBSTR_ = 235, /* TBSTR_ */ - YYSYMBOL_LPSTR_ = 236, /* LPSTR_ */ - YYSYMBOL_LPWSTR_ = 237, /* LPWSTR_ */ - YYSYMBOL_LPTSTR_ = 238, /* LPTSTR_ */ - YYSYMBOL_OBJECTREF_ = 239, /* OBJECTREF_ */ - YYSYMBOL_IUNKNOWN_ = 240, /* IUNKNOWN_ */ - YYSYMBOL_IDISPATCH_ = 241, /* IDISPATCH_ */ - YYSYMBOL_STRUCT_ = 242, /* STRUCT_ */ - YYSYMBOL_SAFEARRAY_ = 243, /* SAFEARRAY_ */ - YYSYMBOL_BYVALSTR_ = 244, /* BYVALSTR_ */ - YYSYMBOL_LPVOID_ = 245, /* LPVOID_ */ - YYSYMBOL_ANY_ = 246, /* ANY_ */ - YYSYMBOL_ARRAY_ = 247, /* ARRAY_ */ - YYSYMBOL_LPSTRUCT_ = 248, /* LPSTRUCT_ */ - YYSYMBOL_IIDPARAM_ = 249, /* IIDPARAM_ */ - YYSYMBOL_IN_ = 250, /* IN_ */ - YYSYMBOL_OUT_ = 251, /* OUT_ */ - YYSYMBOL_OPT_ = 252, /* OPT_ */ - YYSYMBOL__PARAM = 253, /* _PARAM */ - YYSYMBOL__OVERRIDE = 254, /* _OVERRIDE */ - YYSYMBOL_WITH_ = 255, /* WITH_ */ - YYSYMBOL_NULL_ = 256, /* NULL_ */ - YYSYMBOL_ERROR_ = 257, /* ERROR_ */ - YYSYMBOL_HRESULT_ = 258, /* HRESULT_ */ - YYSYMBOL_CARRAY_ = 259, /* CARRAY_ */ - YYSYMBOL_USERDEFINED_ = 260, /* USERDEFINED_ */ - YYSYMBOL_RECORD_ = 261, /* RECORD_ */ - YYSYMBOL_FILETIME_ = 262, /* FILETIME_ */ - YYSYMBOL_BLOB_ = 263, /* BLOB_ */ - YYSYMBOL_STREAM_ = 264, /* STREAM_ */ - YYSYMBOL_STORAGE_ = 265, /* STORAGE_ */ - YYSYMBOL_STREAMED_OBJECT_ = 266, /* STREAMED_OBJECT_ */ - YYSYMBOL_STORED_OBJECT_ = 267, /* STORED_OBJECT_ */ - YYSYMBOL_BLOB_OBJECT_ = 268, /* BLOB_OBJECT_ */ - YYSYMBOL_CF_ = 269, /* CF_ */ - YYSYMBOL_CLSID_ = 270, /* CLSID_ */ - YYSYMBOL_VECTOR_ = 271, /* VECTOR_ */ - YYSYMBOL__SUBSYSTEM = 272, /* _SUBSYSTEM */ - YYSYMBOL__CORFLAGS = 273, /* _CORFLAGS */ - YYSYMBOL_ALIGNMENT_ = 274, /* ALIGNMENT_ */ - YYSYMBOL__IMAGEBASE = 275, /* _IMAGEBASE */ - YYSYMBOL__STACKRESERVE = 276, /* _STACKRESERVE */ - YYSYMBOL__TYPEDEF = 277, /* _TYPEDEF */ - YYSYMBOL__TEMPLATE = 278, /* _TEMPLATE */ - YYSYMBOL__TYPELIST = 279, /* _TYPELIST */ - YYSYMBOL__MSCORLIB = 280, /* _MSCORLIB */ - YYSYMBOL_P_DEFINE = 281, /* P_DEFINE */ - YYSYMBOL_P_UNDEF = 282, /* P_UNDEF */ - YYSYMBOL_P_IFDEF = 283, /* P_IFDEF */ - YYSYMBOL_P_IFNDEF = 284, /* P_IFNDEF */ - YYSYMBOL_P_ELSE = 285, /* P_ELSE */ - YYSYMBOL_P_ENDIF = 286, /* P_ENDIF */ - YYSYMBOL_P_INCLUDE = 287, /* P_INCLUDE */ - YYSYMBOL_CONSTRAINT_ = 288, /* CONSTRAINT_ */ - YYSYMBOL_289_ = 289, /* '{' */ - YYSYMBOL_290_ = 290, /* '}' */ - YYSYMBOL_291_ = 291, /* '+' */ - YYSYMBOL_292_ = 292, /* ',' */ - YYSYMBOL_293_ = 293, /* '.' */ - YYSYMBOL_294_ = 294, /* '(' */ - YYSYMBOL_295_ = 295, /* ')' */ - YYSYMBOL_296_ = 296, /* ';' */ - YYSYMBOL_297_ = 297, /* '=' */ - YYSYMBOL_298_ = 298, /* '[' */ - YYSYMBOL_299_ = 299, /* ']' */ - YYSYMBOL_300_ = 300, /* '<' */ - YYSYMBOL_301_ = 301, /* '>' */ - YYSYMBOL_302_ = 302, /* '-' */ - YYSYMBOL_303_ = 303, /* ':' */ - YYSYMBOL_304_ = 304, /* '*' */ - YYSYMBOL_305_ = 305, /* '&' */ - YYSYMBOL_306_ = 306, /* '/' */ - YYSYMBOL_307_ = 307, /* '!' */ - YYSYMBOL_YYACCEPT = 308, /* $accept */ - YYSYMBOL_decls = 309, /* decls */ - YYSYMBOL_decl = 310, /* decl */ - YYSYMBOL_classNameSeq = 311, /* classNameSeq */ - YYSYMBOL_compQstring = 312, /* compQstring */ - YYSYMBOL_languageDecl = 313, /* languageDecl */ - YYSYMBOL_id = 314, /* id */ - YYSYMBOL_dottedName = 315, /* dottedName */ - YYSYMBOL_int32 = 316, /* int32 */ - YYSYMBOL_int64 = 317, /* int64 */ - YYSYMBOL_float64 = 318, /* float64 */ - YYSYMBOL_typedefDecl = 319, /* typedefDecl */ - YYSYMBOL_compControl = 320, /* compControl */ - YYSYMBOL_customDescr = 321, /* customDescr */ - YYSYMBOL_customDescrWithOwner = 322, /* customDescrWithOwner */ - YYSYMBOL_customHead = 323, /* customHead */ - YYSYMBOL_customHeadWithOwner = 324, /* customHeadWithOwner */ - YYSYMBOL_customType = 325, /* customType */ - YYSYMBOL_ownerType = 326, /* ownerType */ - YYSYMBOL_customBlobDescr = 327, /* customBlobDescr */ - YYSYMBOL_customBlobArgs = 328, /* customBlobArgs */ - YYSYMBOL_customBlobNVPairs = 329, /* customBlobNVPairs */ - YYSYMBOL_fieldOrProp = 330, /* fieldOrProp */ - YYSYMBOL_customAttrDecl = 331, /* customAttrDecl */ - YYSYMBOL_serializType = 332, /* serializType */ - YYSYMBOL_moduleHead = 333, /* moduleHead */ - YYSYMBOL_vtfixupDecl = 334, /* vtfixupDecl */ - YYSYMBOL_vtfixupAttr = 335, /* vtfixupAttr */ - YYSYMBOL_vtableDecl = 336, /* vtableDecl */ - YYSYMBOL_vtableHead = 337, /* vtableHead */ - YYSYMBOL_nameSpaceHead = 338, /* nameSpaceHead */ - YYSYMBOL__class = 339, /* _class */ - YYSYMBOL_classHeadBegin = 340, /* classHeadBegin */ - YYSYMBOL_classHead = 341, /* classHead */ - YYSYMBOL_classAttr = 342, /* classAttr */ - YYSYMBOL_extendsClause = 343, /* extendsClause */ - YYSYMBOL_implClause = 344, /* implClause */ - YYSYMBOL_classDecls = 345, /* classDecls */ - YYSYMBOL_implList = 346, /* implList */ - YYSYMBOL_typeList = 347, /* typeList */ - YYSYMBOL_typeListNotEmpty = 348, /* typeListNotEmpty */ - YYSYMBOL_typarsClause = 349, /* typarsClause */ - YYSYMBOL_typarAttrib = 350, /* typarAttrib */ - YYSYMBOL_typarAttribs = 351, /* typarAttribs */ - YYSYMBOL_typars = 352, /* typars */ - YYSYMBOL_typarsRest = 353, /* typarsRest */ - YYSYMBOL_tyBound = 354, /* tyBound */ - YYSYMBOL_genArity = 355, /* genArity */ - YYSYMBOL_genArityNotEmpty = 356, /* genArityNotEmpty */ - YYSYMBOL_classDecl = 357, /* classDecl */ - YYSYMBOL_fieldDecl = 358, /* fieldDecl */ - YYSYMBOL_fieldAttr = 359, /* fieldAttr */ - YYSYMBOL_atOpt = 360, /* atOpt */ - YYSYMBOL_initOpt = 361, /* initOpt */ - YYSYMBOL_repeatOpt = 362, /* repeatOpt */ - YYSYMBOL_methodRef = 363, /* methodRef */ - YYSYMBOL_callConv = 364, /* callConv */ - YYSYMBOL_callKind = 365, /* callKind */ - YYSYMBOL_mdtoken = 366, /* mdtoken */ - YYSYMBOL_memberRef = 367, /* memberRef */ - YYSYMBOL_eventHead = 368, /* eventHead */ - YYSYMBOL_eventAttr = 369, /* eventAttr */ - YYSYMBOL_eventDecls = 370, /* eventDecls */ - YYSYMBOL_eventDecl = 371, /* eventDecl */ - YYSYMBOL_propHead = 372, /* propHead */ - YYSYMBOL_propAttr = 373, /* propAttr */ - YYSYMBOL_propDecls = 374, /* propDecls */ - YYSYMBOL_propDecl = 375, /* propDecl */ - YYSYMBOL_methodHeadPart1 = 376, /* methodHeadPart1 */ - YYSYMBOL_marshalClause = 377, /* marshalClause */ - YYSYMBOL_marshalBlob = 378, /* marshalBlob */ - YYSYMBOL_marshalBlobHead = 379, /* marshalBlobHead */ - YYSYMBOL_methodHead = 380, /* methodHead */ - YYSYMBOL_methAttr = 381, /* methAttr */ - YYSYMBOL_pinvAttr = 382, /* pinvAttr */ - YYSYMBOL_methodName = 383, /* methodName */ - YYSYMBOL_paramAttr = 384, /* paramAttr */ - YYSYMBOL_implAttr = 385, /* implAttr */ - YYSYMBOL_localsHead = 386, /* localsHead */ - YYSYMBOL_methodDecls = 387, /* methodDecls */ - YYSYMBOL_methodDecl = 388, /* methodDecl */ - YYSYMBOL_scopeBlock = 389, /* scopeBlock */ - YYSYMBOL_scopeOpen = 390, /* scopeOpen */ - YYSYMBOL_sehBlock = 391, /* sehBlock */ - YYSYMBOL_sehClauses = 392, /* sehClauses */ - YYSYMBOL_tryBlock = 393, /* tryBlock */ - YYSYMBOL_tryHead = 394, /* tryHead */ - YYSYMBOL_sehClause = 395, /* sehClause */ - YYSYMBOL_filterClause = 396, /* filterClause */ - YYSYMBOL_filterHead = 397, /* filterHead */ - YYSYMBOL_catchClause = 398, /* catchClause */ - YYSYMBOL_finallyClause = 399, /* finallyClause */ - YYSYMBOL_faultClause = 400, /* faultClause */ - YYSYMBOL_handlerBlock = 401, /* handlerBlock */ - YYSYMBOL_dataDecl = 402, /* dataDecl */ - YYSYMBOL_ddHead = 403, /* ddHead */ - YYSYMBOL_tls = 404, /* tls */ - YYSYMBOL_ddBody = 405, /* ddBody */ - YYSYMBOL_ddItemList = 406, /* ddItemList */ - YYSYMBOL_ddItemCount = 407, /* ddItemCount */ - YYSYMBOL_ddItem = 408, /* ddItem */ - YYSYMBOL_fieldSerInit = 409, /* fieldSerInit */ - YYSYMBOL_bytearrayhead = 410, /* bytearrayhead */ - YYSYMBOL_bytes = 411, /* bytes */ - YYSYMBOL_hexbytes = 412, /* hexbytes */ - YYSYMBOL_fieldInit = 413, /* fieldInit */ - YYSYMBOL_serInit = 414, /* serInit */ - YYSYMBOL_f32seq = 415, /* f32seq */ - YYSYMBOL_f64seq = 416, /* f64seq */ - YYSYMBOL_i64seq = 417, /* i64seq */ - YYSYMBOL_i32seq = 418, /* i32seq */ - YYSYMBOL_i16seq = 419, /* i16seq */ - YYSYMBOL_i8seq = 420, /* i8seq */ - YYSYMBOL_boolSeq = 421, /* boolSeq */ - YYSYMBOL_sqstringSeq = 422, /* sqstringSeq */ - YYSYMBOL_classSeq = 423, /* classSeq */ - YYSYMBOL_objSeq = 424, /* objSeq */ - YYSYMBOL_methodSpec = 425, /* methodSpec */ - YYSYMBOL_instr_none = 426, /* instr_none */ - YYSYMBOL_instr_var = 427, /* instr_var */ - YYSYMBOL_instr_i = 428, /* instr_i */ - YYSYMBOL_instr_i8 = 429, /* instr_i8 */ - YYSYMBOL_instr_r = 430, /* instr_r */ - YYSYMBOL_instr_brtarget = 431, /* instr_brtarget */ - YYSYMBOL_instr_method = 432, /* instr_method */ - YYSYMBOL_instr_field = 433, /* instr_field */ - YYSYMBOL_instr_type = 434, /* instr_type */ - YYSYMBOL_instr_string = 435, /* instr_string */ - YYSYMBOL_instr_sig = 436, /* instr_sig */ - YYSYMBOL_instr_tok = 437, /* instr_tok */ - YYSYMBOL_instr_switch = 438, /* instr_switch */ - YYSYMBOL_instr_r_head = 439, /* instr_r_head */ - YYSYMBOL_instr = 440, /* instr */ - YYSYMBOL_labels = 441, /* labels */ - YYSYMBOL_tyArgs0 = 442, /* tyArgs0 */ - YYSYMBOL_tyArgs1 = 443, /* tyArgs1 */ - YYSYMBOL_tyArgs2 = 444, /* tyArgs2 */ - YYSYMBOL_sigArgs0 = 445, /* sigArgs0 */ - YYSYMBOL_sigArgs1 = 446, /* sigArgs1 */ - YYSYMBOL_sigArg = 447, /* sigArg */ - YYSYMBOL_className = 448, /* className */ - YYSYMBOL_slashedName = 449, /* slashedName */ - YYSYMBOL_typeSpec = 450, /* typeSpec */ - YYSYMBOL_nativeType = 451, /* nativeType */ - YYSYMBOL_iidParamIndex = 452, /* iidParamIndex */ - YYSYMBOL_variantType = 453, /* variantType */ - YYSYMBOL_type = 454, /* type */ - YYSYMBOL_simpleType = 455, /* simpleType */ - YYSYMBOL_bounds1 = 456, /* bounds1 */ - YYSYMBOL_bound = 457, /* bound */ - YYSYMBOL_secDecl = 458, /* secDecl */ - YYSYMBOL_secAttrSetBlob = 459, /* secAttrSetBlob */ - YYSYMBOL_secAttrBlob = 460, /* secAttrBlob */ - YYSYMBOL_psetHead = 461, /* psetHead */ - YYSYMBOL_nameValPairs = 462, /* nameValPairs */ - YYSYMBOL_nameValPair = 463, /* nameValPair */ - YYSYMBOL_truefalse = 464, /* truefalse */ - YYSYMBOL_caValue = 465, /* caValue */ - YYSYMBOL_secAction = 466, /* secAction */ - YYSYMBOL_esHead = 467, /* esHead */ - YYSYMBOL_extSourceSpec = 468, /* extSourceSpec */ - YYSYMBOL_fileDecl = 469, /* fileDecl */ - YYSYMBOL_fileAttr = 470, /* fileAttr */ - YYSYMBOL_fileEntry = 471, /* fileEntry */ - YYSYMBOL_hashHead = 472, /* hashHead */ - YYSYMBOL_assemblyHead = 473, /* assemblyHead */ - YYSYMBOL_asmAttr = 474, /* asmAttr */ - YYSYMBOL_assemblyDecls = 475, /* assemblyDecls */ - YYSYMBOL_assemblyDecl = 476, /* assemblyDecl */ - YYSYMBOL_intOrWildcard = 477, /* intOrWildcard */ - YYSYMBOL_asmOrRefDecl = 478, /* asmOrRefDecl */ - YYSYMBOL_publicKeyHead = 479, /* publicKeyHead */ - YYSYMBOL_publicKeyTokenHead = 480, /* publicKeyTokenHead */ - YYSYMBOL_localeHead = 481, /* localeHead */ - YYSYMBOL_assemblyRefHead = 482, /* assemblyRefHead */ - YYSYMBOL_assemblyRefDecls = 483, /* assemblyRefDecls */ - YYSYMBOL_assemblyRefDecl = 484, /* assemblyRefDecl */ - YYSYMBOL_exptypeHead = 485, /* exptypeHead */ - YYSYMBOL_exportHead = 486, /* exportHead */ - YYSYMBOL_exptAttr = 487, /* exptAttr */ - YYSYMBOL_exptypeDecls = 488, /* exptypeDecls */ - YYSYMBOL_exptypeDecl = 489, /* exptypeDecl */ - YYSYMBOL_manifestResHead = 490, /* manifestResHead */ - YYSYMBOL_manresAttr = 491, /* manresAttr */ - YYSYMBOL_manifestResDecls = 492, /* manifestResDecls */ - YYSYMBOL_manifestResDecl = 493 /* manifestResDecl */ + YYSYMBOL_ASYNC_ = 99, /* ASYNC_ */ + YYSYMBOL_STRICT_ = 100, /* STRICT_ */ + YYSYMBOL_RETARGETABLE_ = 101, /* RETARGETABLE_ */ + YYSYMBOL_WINDOWSRUNTIME_ = 102, /* WINDOWSRUNTIME_ */ + YYSYMBOL_NOPLATFORM_ = 103, /* NOPLATFORM_ */ + YYSYMBOL_METHOD_ = 104, /* METHOD_ */ + YYSYMBOL_FIELD_ = 105, /* FIELD_ */ + YYSYMBOL_PINNED_ = 106, /* PINNED_ */ + YYSYMBOL_MODREQ_ = 107, /* MODREQ_ */ + YYSYMBOL_MODOPT_ = 108, /* MODOPT_ */ + YYSYMBOL_SERIALIZABLE_ = 109, /* SERIALIZABLE_ */ + YYSYMBOL_PROPERTY_ = 110, /* PROPERTY_ */ + YYSYMBOL_TYPE_ = 111, /* TYPE_ */ + YYSYMBOL_ASSEMBLY_ = 112, /* ASSEMBLY_ */ + YYSYMBOL_FAMANDASSEM_ = 113, /* FAMANDASSEM_ */ + YYSYMBOL_FAMORASSEM_ = 114, /* FAMORASSEM_ */ + YYSYMBOL_PRIVATESCOPE_ = 115, /* PRIVATESCOPE_ */ + YYSYMBOL_HIDEBYSIG_ = 116, /* HIDEBYSIG_ */ + YYSYMBOL_NEWSLOT_ = 117, /* NEWSLOT_ */ + YYSYMBOL_RTSPECIALNAME_ = 118, /* RTSPECIALNAME_ */ + YYSYMBOL_PINVOKEIMPL_ = 119, /* PINVOKEIMPL_ */ + YYSYMBOL__CTOR = 120, /* _CTOR */ + YYSYMBOL__CCTOR = 121, /* _CCTOR */ + YYSYMBOL_LITERAL_ = 122, /* LITERAL_ */ + YYSYMBOL_NOTSERIALIZED_ = 123, /* NOTSERIALIZED_ */ + YYSYMBOL_INITONLY_ = 124, /* INITONLY_ */ + YYSYMBOL_REQSECOBJ_ = 125, /* REQSECOBJ_ */ + YYSYMBOL_CIL_ = 126, /* CIL_ */ + YYSYMBOL_OPTIL_ = 127, /* OPTIL_ */ + YYSYMBOL_MANAGED_ = 128, /* MANAGED_ */ + YYSYMBOL_FORWARDREF_ = 129, /* FORWARDREF_ */ + YYSYMBOL_PRESERVESIG_ = 130, /* PRESERVESIG_ */ + YYSYMBOL_RUNTIME_ = 131, /* RUNTIME_ */ + YYSYMBOL_INTERNALCALL_ = 132, /* INTERNALCALL_ */ + YYSYMBOL__IMPORT = 133, /* _IMPORT */ + YYSYMBOL_NOMANGLE_ = 134, /* NOMANGLE_ */ + YYSYMBOL_LASTERR_ = 135, /* LASTERR_ */ + YYSYMBOL_WINAPI_ = 136, /* WINAPI_ */ + YYSYMBOL_AS_ = 137, /* AS_ */ + YYSYMBOL_BESTFIT_ = 138, /* BESTFIT_ */ + YYSYMBOL_ON_ = 139, /* ON_ */ + YYSYMBOL_OFF_ = 140, /* OFF_ */ + YYSYMBOL_CHARMAPERROR_ = 141, /* CHARMAPERROR_ */ + YYSYMBOL_INSTR_NONE = 142, /* INSTR_NONE */ + YYSYMBOL_INSTR_VAR = 143, /* INSTR_VAR */ + YYSYMBOL_INSTR_I = 144, /* INSTR_I */ + YYSYMBOL_INSTR_I8 = 145, /* INSTR_I8 */ + YYSYMBOL_INSTR_R = 146, /* INSTR_R */ + YYSYMBOL_INSTR_BRTARGET = 147, /* INSTR_BRTARGET */ + YYSYMBOL_INSTR_METHOD = 148, /* INSTR_METHOD */ + YYSYMBOL_INSTR_FIELD = 149, /* INSTR_FIELD */ + YYSYMBOL_INSTR_TYPE = 150, /* INSTR_TYPE */ + YYSYMBOL_INSTR_STRING = 151, /* INSTR_STRING */ + YYSYMBOL_INSTR_SIG = 152, /* INSTR_SIG */ + YYSYMBOL_INSTR_TOK = 153, /* INSTR_TOK */ + YYSYMBOL_INSTR_SWITCH = 154, /* INSTR_SWITCH */ + YYSYMBOL__CLASS = 155, /* _CLASS */ + YYSYMBOL__NAMESPACE = 156, /* _NAMESPACE */ + YYSYMBOL__METHOD = 157, /* _METHOD */ + YYSYMBOL__FIELD = 158, /* _FIELD */ + YYSYMBOL__DATA = 159, /* _DATA */ + YYSYMBOL__THIS = 160, /* _THIS */ + YYSYMBOL__BASE = 161, /* _BASE */ + YYSYMBOL__NESTER = 162, /* _NESTER */ + YYSYMBOL__EMITBYTE = 163, /* _EMITBYTE */ + YYSYMBOL__TRY = 164, /* _TRY */ + YYSYMBOL__MAXSTACK = 165, /* _MAXSTACK */ + YYSYMBOL__LOCALS = 166, /* _LOCALS */ + YYSYMBOL__ENTRYPOINT = 167, /* _ENTRYPOINT */ + YYSYMBOL__ZEROINIT = 168, /* _ZEROINIT */ + YYSYMBOL__EVENT = 169, /* _EVENT */ + YYSYMBOL__ADDON = 170, /* _ADDON */ + YYSYMBOL__REMOVEON = 171, /* _REMOVEON */ + YYSYMBOL__FIRE = 172, /* _FIRE */ + YYSYMBOL__OTHER = 173, /* _OTHER */ + YYSYMBOL__PROPERTY = 174, /* _PROPERTY */ + YYSYMBOL__SET = 175, /* _SET */ + YYSYMBOL__GET = 176, /* _GET */ + YYSYMBOL__PERMISSION = 177, /* _PERMISSION */ + YYSYMBOL__PERMISSIONSET = 178, /* _PERMISSIONSET */ + YYSYMBOL_REQUEST_ = 179, /* REQUEST_ */ + YYSYMBOL_DEMAND_ = 180, /* DEMAND_ */ + YYSYMBOL_ASSERT_ = 181, /* ASSERT_ */ + YYSYMBOL_DENY_ = 182, /* DENY_ */ + YYSYMBOL_PERMITONLY_ = 183, /* PERMITONLY_ */ + YYSYMBOL_LINKCHECK_ = 184, /* LINKCHECK_ */ + YYSYMBOL_INHERITCHECK_ = 185, /* INHERITCHECK_ */ + YYSYMBOL_REQMIN_ = 186, /* REQMIN_ */ + YYSYMBOL_REQOPT_ = 187, /* REQOPT_ */ + YYSYMBOL_REQREFUSE_ = 188, /* REQREFUSE_ */ + YYSYMBOL_PREJITGRANT_ = 189, /* PREJITGRANT_ */ + YYSYMBOL_PREJITDENY_ = 190, /* PREJITDENY_ */ + YYSYMBOL_NONCASDEMAND_ = 191, /* NONCASDEMAND_ */ + YYSYMBOL_NONCASLINKDEMAND_ = 192, /* NONCASLINKDEMAND_ */ + YYSYMBOL_NONCASINHERITANCE_ = 193, /* NONCASINHERITANCE_ */ + YYSYMBOL__LINE = 194, /* _LINE */ + YYSYMBOL_P_LINE = 195, /* P_LINE */ + YYSYMBOL__LANGUAGE = 196, /* _LANGUAGE */ + YYSYMBOL__CUSTOM = 197, /* _CUSTOM */ + YYSYMBOL_INIT_ = 198, /* INIT_ */ + YYSYMBOL__SIZE = 199, /* _SIZE */ + YYSYMBOL__PACK = 200, /* _PACK */ + YYSYMBOL__VTABLE = 201, /* _VTABLE */ + YYSYMBOL__VTFIXUP = 202, /* _VTFIXUP */ + YYSYMBOL_FROMUNMANAGED_ = 203, /* FROMUNMANAGED_ */ + YYSYMBOL_CALLMOSTDERIVED_ = 204, /* CALLMOSTDERIVED_ */ + YYSYMBOL__VTENTRY = 205, /* _VTENTRY */ + YYSYMBOL_RETAINAPPDOMAIN_ = 206, /* RETAINAPPDOMAIN_ */ + YYSYMBOL__FILE = 207, /* _FILE */ + YYSYMBOL_NOMETADATA_ = 208, /* NOMETADATA_ */ + YYSYMBOL__HASH = 209, /* _HASH */ + YYSYMBOL__ASSEMBLY = 210, /* _ASSEMBLY */ + YYSYMBOL__PUBLICKEY = 211, /* _PUBLICKEY */ + YYSYMBOL__PUBLICKEYTOKEN = 212, /* _PUBLICKEYTOKEN */ + YYSYMBOL_ALGORITHM_ = 213, /* ALGORITHM_ */ + YYSYMBOL__VER = 214, /* _VER */ + YYSYMBOL__LOCALE = 215, /* _LOCALE */ + YYSYMBOL_EXTERN_ = 216, /* EXTERN_ */ + YYSYMBOL__MRESOURCE = 217, /* _MRESOURCE */ + YYSYMBOL__MODULE = 218, /* _MODULE */ + YYSYMBOL__EXPORT = 219, /* _EXPORT */ + YYSYMBOL_LEGACY_ = 220, /* LEGACY_ */ + YYSYMBOL_LIBRARY_ = 221, /* LIBRARY_ */ + YYSYMBOL_X86_ = 222, /* X86_ */ + YYSYMBOL_AMD64_ = 223, /* AMD64_ */ + YYSYMBOL_ARM_ = 224, /* ARM_ */ + YYSYMBOL_ARM64_ = 225, /* ARM64_ */ + YYSYMBOL_MARSHAL_ = 226, /* MARSHAL_ */ + YYSYMBOL_CUSTOM_ = 227, /* CUSTOM_ */ + YYSYMBOL_SYSSTRING_ = 228, /* SYSSTRING_ */ + YYSYMBOL_FIXED_ = 229, /* FIXED_ */ + YYSYMBOL_VARIANT_ = 230, /* VARIANT_ */ + YYSYMBOL_CURRENCY_ = 231, /* CURRENCY_ */ + YYSYMBOL_SYSCHAR_ = 232, /* SYSCHAR_ */ + YYSYMBOL_DECIMAL_ = 233, /* DECIMAL_ */ + YYSYMBOL_DATE_ = 234, /* DATE_ */ + YYSYMBOL_BSTR_ = 235, /* BSTR_ */ + YYSYMBOL_TBSTR_ = 236, /* TBSTR_ */ + YYSYMBOL_LPSTR_ = 237, /* LPSTR_ */ + YYSYMBOL_LPWSTR_ = 238, /* LPWSTR_ */ + YYSYMBOL_LPTSTR_ = 239, /* LPTSTR_ */ + YYSYMBOL_OBJECTREF_ = 240, /* OBJECTREF_ */ + YYSYMBOL_IUNKNOWN_ = 241, /* IUNKNOWN_ */ + YYSYMBOL_IDISPATCH_ = 242, /* IDISPATCH_ */ + YYSYMBOL_STRUCT_ = 243, /* STRUCT_ */ + YYSYMBOL_SAFEARRAY_ = 244, /* SAFEARRAY_ */ + YYSYMBOL_BYVALSTR_ = 245, /* BYVALSTR_ */ + YYSYMBOL_LPVOID_ = 246, /* LPVOID_ */ + YYSYMBOL_ANY_ = 247, /* ANY_ */ + YYSYMBOL_ARRAY_ = 248, /* ARRAY_ */ + YYSYMBOL_LPSTRUCT_ = 249, /* LPSTRUCT_ */ + YYSYMBOL_IIDPARAM_ = 250, /* IIDPARAM_ */ + YYSYMBOL_IN_ = 251, /* IN_ */ + YYSYMBOL_OUT_ = 252, /* OUT_ */ + YYSYMBOL_OPT_ = 253, /* OPT_ */ + YYSYMBOL__PARAM = 254, /* _PARAM */ + YYSYMBOL__OVERRIDE = 255, /* _OVERRIDE */ + YYSYMBOL_WITH_ = 256, /* WITH_ */ + YYSYMBOL_NULL_ = 257, /* NULL_ */ + YYSYMBOL_ERROR_ = 258, /* ERROR_ */ + YYSYMBOL_HRESULT_ = 259, /* HRESULT_ */ + YYSYMBOL_CARRAY_ = 260, /* CARRAY_ */ + YYSYMBOL_USERDEFINED_ = 261, /* USERDEFINED_ */ + YYSYMBOL_RECORD_ = 262, /* RECORD_ */ + YYSYMBOL_FILETIME_ = 263, /* FILETIME_ */ + YYSYMBOL_BLOB_ = 264, /* BLOB_ */ + YYSYMBOL_STREAM_ = 265, /* STREAM_ */ + YYSYMBOL_STORAGE_ = 266, /* STORAGE_ */ + YYSYMBOL_STREAMED_OBJECT_ = 267, /* STREAMED_OBJECT_ */ + YYSYMBOL_STORED_OBJECT_ = 268, /* STORED_OBJECT_ */ + YYSYMBOL_BLOB_OBJECT_ = 269, /* BLOB_OBJECT_ */ + YYSYMBOL_CF_ = 270, /* CF_ */ + YYSYMBOL_CLSID_ = 271, /* CLSID_ */ + YYSYMBOL_VECTOR_ = 272, /* VECTOR_ */ + YYSYMBOL__SUBSYSTEM = 273, /* _SUBSYSTEM */ + YYSYMBOL__CORFLAGS = 274, /* _CORFLAGS */ + YYSYMBOL_ALIGNMENT_ = 275, /* ALIGNMENT_ */ + YYSYMBOL__IMAGEBASE = 276, /* _IMAGEBASE */ + YYSYMBOL__STACKRESERVE = 277, /* _STACKRESERVE */ + YYSYMBOL__TYPEDEF = 278, /* _TYPEDEF */ + YYSYMBOL__TEMPLATE = 279, /* _TEMPLATE */ + YYSYMBOL__TYPELIST = 280, /* _TYPELIST */ + YYSYMBOL__MSCORLIB = 281, /* _MSCORLIB */ + YYSYMBOL_P_DEFINE = 282, /* P_DEFINE */ + YYSYMBOL_P_UNDEF = 283, /* P_UNDEF */ + YYSYMBOL_P_IFDEF = 284, /* P_IFDEF */ + YYSYMBOL_P_IFNDEF = 285, /* P_IFNDEF */ + YYSYMBOL_P_ELSE = 286, /* P_ELSE */ + YYSYMBOL_P_ENDIF = 287, /* P_ENDIF */ + YYSYMBOL_P_INCLUDE = 288, /* P_INCLUDE */ + YYSYMBOL_CONSTRAINT_ = 289, /* CONSTRAINT_ */ + YYSYMBOL_290_ = 290, /* '{' */ + YYSYMBOL_291_ = 291, /* '}' */ + YYSYMBOL_292_ = 292, /* '+' */ + YYSYMBOL_293_ = 293, /* ',' */ + YYSYMBOL_294_ = 294, /* '.' */ + YYSYMBOL_295_ = 295, /* '(' */ + YYSYMBOL_296_ = 296, /* ')' */ + YYSYMBOL_297_ = 297, /* ';' */ + YYSYMBOL_298_ = 298, /* '=' */ + YYSYMBOL_299_ = 299, /* '[' */ + YYSYMBOL_300_ = 300, /* ']' */ + YYSYMBOL_301_ = 301, /* '<' */ + YYSYMBOL_302_ = 302, /* '>' */ + YYSYMBOL_303_ = 303, /* '-' */ + YYSYMBOL_304_ = 304, /* ':' */ + YYSYMBOL_305_ = 305, /* '*' */ + YYSYMBOL_306_ = 306, /* '&' */ + YYSYMBOL_307_ = 307, /* '/' */ + YYSYMBOL_308_ = 308, /* '!' */ + YYSYMBOL_YYACCEPT = 309, /* $accept */ + YYSYMBOL_decls = 310, /* decls */ + YYSYMBOL_decl = 311, /* decl */ + YYSYMBOL_classNameSeq = 312, /* classNameSeq */ + YYSYMBOL_compQstring = 313, /* compQstring */ + YYSYMBOL_languageDecl = 314, /* languageDecl */ + YYSYMBOL_id = 315, /* id */ + YYSYMBOL_dottedName = 316, /* dottedName */ + YYSYMBOL_int32 = 317, /* int32 */ + YYSYMBOL_int64 = 318, /* int64 */ + YYSYMBOL_float64 = 319, /* float64 */ + YYSYMBOL_typedefDecl = 320, /* typedefDecl */ + YYSYMBOL_compControl = 321, /* compControl */ + YYSYMBOL_customDescr = 322, /* customDescr */ + YYSYMBOL_customDescrWithOwner = 323, /* customDescrWithOwner */ + YYSYMBOL_customHead = 324, /* customHead */ + YYSYMBOL_customHeadWithOwner = 325, /* customHeadWithOwner */ + YYSYMBOL_customType = 326, /* customType */ + YYSYMBOL_ownerType = 327, /* ownerType */ + YYSYMBOL_customBlobDescr = 328, /* customBlobDescr */ + YYSYMBOL_customBlobArgs = 329, /* customBlobArgs */ + YYSYMBOL_customBlobNVPairs = 330, /* customBlobNVPairs */ + YYSYMBOL_fieldOrProp = 331, /* fieldOrProp */ + YYSYMBOL_customAttrDecl = 332, /* customAttrDecl */ + YYSYMBOL_serializType = 333, /* serializType */ + YYSYMBOL_moduleHead = 334, /* moduleHead */ + YYSYMBOL_vtfixupDecl = 335, /* vtfixupDecl */ + YYSYMBOL_vtfixupAttr = 336, /* vtfixupAttr */ + YYSYMBOL_vtableDecl = 337, /* vtableDecl */ + YYSYMBOL_vtableHead = 338, /* vtableHead */ + YYSYMBOL_nameSpaceHead = 339, /* nameSpaceHead */ + YYSYMBOL__class = 340, /* _class */ + YYSYMBOL_classHeadBegin = 341, /* classHeadBegin */ + YYSYMBOL_classHead = 342, /* classHead */ + YYSYMBOL_classAttr = 343, /* classAttr */ + YYSYMBOL_extendsClause = 344, /* extendsClause */ + YYSYMBOL_implClause = 345, /* implClause */ + YYSYMBOL_classDecls = 346, /* classDecls */ + YYSYMBOL_implList = 347, /* implList */ + YYSYMBOL_typeList = 348, /* typeList */ + YYSYMBOL_typeListNotEmpty = 349, /* typeListNotEmpty */ + YYSYMBOL_typarsClause = 350, /* typarsClause */ + YYSYMBOL_typarAttrib = 351, /* typarAttrib */ + YYSYMBOL_typarAttribs = 352, /* typarAttribs */ + YYSYMBOL_typars = 353, /* typars */ + YYSYMBOL_typarsRest = 354, /* typarsRest */ + YYSYMBOL_tyBound = 355, /* tyBound */ + YYSYMBOL_genArity = 356, /* genArity */ + YYSYMBOL_genArityNotEmpty = 357, /* genArityNotEmpty */ + YYSYMBOL_classDecl = 358, /* classDecl */ + YYSYMBOL_fieldDecl = 359, /* fieldDecl */ + YYSYMBOL_fieldAttr = 360, /* fieldAttr */ + YYSYMBOL_atOpt = 361, /* atOpt */ + YYSYMBOL_initOpt = 362, /* initOpt */ + YYSYMBOL_repeatOpt = 363, /* repeatOpt */ + YYSYMBOL_methodRef = 364, /* methodRef */ + YYSYMBOL_callConv = 365, /* callConv */ + YYSYMBOL_callKind = 366, /* callKind */ + YYSYMBOL_mdtoken = 367, /* mdtoken */ + YYSYMBOL_memberRef = 368, /* memberRef */ + YYSYMBOL_eventHead = 369, /* eventHead */ + YYSYMBOL_eventAttr = 370, /* eventAttr */ + YYSYMBOL_eventDecls = 371, /* eventDecls */ + YYSYMBOL_eventDecl = 372, /* eventDecl */ + YYSYMBOL_propHead = 373, /* propHead */ + YYSYMBOL_propAttr = 374, /* propAttr */ + YYSYMBOL_propDecls = 375, /* propDecls */ + YYSYMBOL_propDecl = 376, /* propDecl */ + YYSYMBOL_methodHeadPart1 = 377, /* methodHeadPart1 */ + YYSYMBOL_marshalClause = 378, /* marshalClause */ + YYSYMBOL_marshalBlob = 379, /* marshalBlob */ + YYSYMBOL_marshalBlobHead = 380, /* marshalBlobHead */ + YYSYMBOL_methodHead = 381, /* methodHead */ + YYSYMBOL_methAttr = 382, /* methAttr */ + YYSYMBOL_pinvAttr = 383, /* pinvAttr */ + YYSYMBOL_methodName = 384, /* methodName */ + YYSYMBOL_paramAttr = 385, /* paramAttr */ + YYSYMBOL_implAttr = 386, /* implAttr */ + YYSYMBOL_localsHead = 387, /* localsHead */ + YYSYMBOL_methodDecls = 388, /* methodDecls */ + YYSYMBOL_methodDecl = 389, /* methodDecl */ + YYSYMBOL_scopeBlock = 390, /* scopeBlock */ + YYSYMBOL_scopeOpen = 391, /* scopeOpen */ + YYSYMBOL_sehBlock = 392, /* sehBlock */ + YYSYMBOL_sehClauses = 393, /* sehClauses */ + YYSYMBOL_tryBlock = 394, /* tryBlock */ + YYSYMBOL_tryHead = 395, /* tryHead */ + YYSYMBOL_sehClause = 396, /* sehClause */ + YYSYMBOL_filterClause = 397, /* filterClause */ + YYSYMBOL_filterHead = 398, /* filterHead */ + YYSYMBOL_catchClause = 399, /* catchClause */ + YYSYMBOL_finallyClause = 400, /* finallyClause */ + YYSYMBOL_faultClause = 401, /* faultClause */ + YYSYMBOL_handlerBlock = 402, /* handlerBlock */ + YYSYMBOL_dataDecl = 403, /* dataDecl */ + YYSYMBOL_ddHead = 404, /* ddHead */ + YYSYMBOL_tls = 405, /* tls */ + YYSYMBOL_ddBody = 406, /* ddBody */ + YYSYMBOL_ddItemList = 407, /* ddItemList */ + YYSYMBOL_ddItemCount = 408, /* ddItemCount */ + YYSYMBOL_ddItem = 409, /* ddItem */ + YYSYMBOL_fieldSerInit = 410, /* fieldSerInit */ + YYSYMBOL_bytearrayhead = 411, /* bytearrayhead */ + YYSYMBOL_bytes = 412, /* bytes */ + YYSYMBOL_hexbytes = 413, /* hexbytes */ + YYSYMBOL_fieldInit = 414, /* fieldInit */ + YYSYMBOL_serInit = 415, /* serInit */ + YYSYMBOL_f32seq = 416, /* f32seq */ + YYSYMBOL_f64seq = 417, /* f64seq */ + YYSYMBOL_i64seq = 418, /* i64seq */ + YYSYMBOL_i32seq = 419, /* i32seq */ + YYSYMBOL_i16seq = 420, /* i16seq */ + YYSYMBOL_i8seq = 421, /* i8seq */ + YYSYMBOL_boolSeq = 422, /* boolSeq */ + YYSYMBOL_sqstringSeq = 423, /* sqstringSeq */ + YYSYMBOL_classSeq = 424, /* classSeq */ + YYSYMBOL_objSeq = 425, /* objSeq */ + YYSYMBOL_methodSpec = 426, /* methodSpec */ + YYSYMBOL_instr_none = 427, /* instr_none */ + YYSYMBOL_instr_var = 428, /* instr_var */ + YYSYMBOL_instr_i = 429, /* instr_i */ + YYSYMBOL_instr_i8 = 430, /* instr_i8 */ + YYSYMBOL_instr_r = 431, /* instr_r */ + YYSYMBOL_instr_brtarget = 432, /* instr_brtarget */ + YYSYMBOL_instr_method = 433, /* instr_method */ + YYSYMBOL_instr_field = 434, /* instr_field */ + YYSYMBOL_instr_type = 435, /* instr_type */ + YYSYMBOL_instr_string = 436, /* instr_string */ + YYSYMBOL_instr_sig = 437, /* instr_sig */ + YYSYMBOL_instr_tok = 438, /* instr_tok */ + YYSYMBOL_instr_switch = 439, /* instr_switch */ + YYSYMBOL_instr_r_head = 440, /* instr_r_head */ + YYSYMBOL_instr = 441, /* instr */ + YYSYMBOL_labels = 442, /* labels */ + YYSYMBOL_tyArgs0 = 443, /* tyArgs0 */ + YYSYMBOL_tyArgs1 = 444, /* tyArgs1 */ + YYSYMBOL_tyArgs2 = 445, /* tyArgs2 */ + YYSYMBOL_sigArgs0 = 446, /* sigArgs0 */ + YYSYMBOL_sigArgs1 = 447, /* sigArgs1 */ + YYSYMBOL_sigArg = 448, /* sigArg */ + YYSYMBOL_className = 449, /* className */ + YYSYMBOL_slashedName = 450, /* slashedName */ + YYSYMBOL_typeSpec = 451, /* typeSpec */ + YYSYMBOL_nativeType = 452, /* nativeType */ + YYSYMBOL_iidParamIndex = 453, /* iidParamIndex */ + YYSYMBOL_variantType = 454, /* variantType */ + YYSYMBOL_type = 455, /* type */ + YYSYMBOL_simpleType = 456, /* simpleType */ + YYSYMBOL_bounds1 = 457, /* bounds1 */ + YYSYMBOL_bound = 458, /* bound */ + YYSYMBOL_secDecl = 459, /* secDecl */ + YYSYMBOL_secAttrSetBlob = 460, /* secAttrSetBlob */ + YYSYMBOL_secAttrBlob = 461, /* secAttrBlob */ + YYSYMBOL_psetHead = 462, /* psetHead */ + YYSYMBOL_nameValPairs = 463, /* nameValPairs */ + YYSYMBOL_nameValPair = 464, /* nameValPair */ + YYSYMBOL_truefalse = 465, /* truefalse */ + YYSYMBOL_caValue = 466, /* caValue */ + YYSYMBOL_secAction = 467, /* secAction */ + YYSYMBOL_esHead = 468, /* esHead */ + YYSYMBOL_extSourceSpec = 469, /* extSourceSpec */ + YYSYMBOL_fileDecl = 470, /* fileDecl */ + YYSYMBOL_fileAttr = 471, /* fileAttr */ + YYSYMBOL_fileEntry = 472, /* fileEntry */ + YYSYMBOL_hashHead = 473, /* hashHead */ + YYSYMBOL_assemblyHead = 474, /* assemblyHead */ + YYSYMBOL_asmAttr = 475, /* asmAttr */ + YYSYMBOL_assemblyDecls = 476, /* assemblyDecls */ + YYSYMBOL_assemblyDecl = 477, /* assemblyDecl */ + YYSYMBOL_intOrWildcard = 478, /* intOrWildcard */ + YYSYMBOL_asmOrRefDecl = 479, /* asmOrRefDecl */ + YYSYMBOL_publicKeyHead = 480, /* publicKeyHead */ + YYSYMBOL_publicKeyTokenHead = 481, /* publicKeyTokenHead */ + YYSYMBOL_localeHead = 482, /* localeHead */ + YYSYMBOL_assemblyRefHead = 483, /* assemblyRefHead */ + YYSYMBOL_assemblyRefDecls = 484, /* assemblyRefDecls */ + YYSYMBOL_assemblyRefDecl = 485, /* assemblyRefDecl */ + YYSYMBOL_exptypeHead = 486, /* exptypeHead */ + YYSYMBOL_exportHead = 487, /* exportHead */ + YYSYMBOL_exptAttr = 488, /* exptAttr */ + YYSYMBOL_exptypeDecls = 489, /* exptypeDecls */ + YYSYMBOL_exptypeDecl = 490, /* exptypeDecl */ + YYSYMBOL_manifestResHead = 491, /* manifestResHead */ + YYSYMBOL_manresAttr = 492, /* manresAttr */ + YYSYMBOL_manifestResDecls = 493, /* manifestResDecls */ + YYSYMBOL_manifestResDecl = 494 /* manifestResDecl */ }; typedef enum yysymbol_kind_t yysymbol_kind_t; @@ -1286,19 +1288,19 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 2 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 3777 +#define YYLAST 5505 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 308 +#define YYNTOKENS 309 /* YYNNTS -- Number of nonterminals. */ #define YYNNTS 186 /* YYNRULES -- Number of rules. */ -#define YYNRULES 846 +#define YYNRULES 862 /* YYNSTATES -- Number of states. */ -#define YYNSTATES 1590 +#define YYNSTATES 1607 /* YYMAXUTOK -- Last valid token kind. */ -#define YYMAXUTOK 543 +#define YYMAXUTOK 544 /* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM @@ -1315,16 +1317,16 @@ static const yytype_int16 yytranslate[] = 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 307, 2, 2, 2, 2, 305, 2, - 294, 295, 304, 291, 292, 302, 293, 306, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 303, 296, - 300, 297, 301, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 308, 2, 2, 2, 2, 306, 2, + 295, 296, 305, 292, 293, 303, 294, 307, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 304, 297, + 301, 298, 302, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 298, 2, 299, 2, 2, 2, 2, 2, 2, + 2, 299, 2, 300, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 289, 2, 290, 2, 2, 2, 2, + 2, 2, 2, 290, 2, 291, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -1366,7 +1368,7 @@ static const yytype_int16 yytranslate[] = 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288 + 285, 286, 287, 288, 289 }; #if YYDEBUG @@ -1375,89 +1377,91 @@ static const yytype_int16 yyrline[] = { 0, 189, 189, 190, 193, 194, 195, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, - 212, 222, 223, 226, 229, 230, 231, 232, 233, 234, - 237, 238, 241, 242, 245, 246, 248, 253, 254, 257, - 258, 259, 262, 265, 266, 269, 270, 271, 275, 276, - 277, 278, 279, 284, 285, 286, 287, 290, 293, 294, - 298, 299, 303, 304, 305, 306, 309, 310, 311, 313, - 316, 319, 325, 328, 329, 333, 339, 340, 342, 345, - 346, 352, 355, 356, 359, 363, 364, 372, 373, 374, - 375, 377, 379, 384, 385, 386, 393, 397, 398, 399, - 400, 401, 402, 405, 408, 412, 415, 418, 424, 427, - 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, - 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, - 448, 449, 450, 451, 452, 453, 456, 457, 460, 461, - 464, 465, 468, 469, 473, 474, 477, 478, 481, 482, - 485, 486, 487, 488, 489, 490, 491, 494, 495, 498, - 499, 502, 503, 506, 509, 510, 513, 517, 521, 522, - 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, - 538, 547, 548, 549, 554, 560, 561, 562, 569, 574, - 575, 576, 577, 578, 579, 580, 581, 593, 595, 596, - 597, 598, 599, 600, 601, 604, 605, 608, 609, 612, - 613, 617, 634, 640, 656, 661, 662, 663, 666, 667, - 668, 669, 672, 673, 674, 675, 676, 677, 678, 679, - 682, 685, 690, 694, 698, 700, 702, 707, 708, 712, - 713, 714, 717, 718, 721, 722, 723, 724, 725, 726, - 727, 728, 732, 738, 739, 740, 743, 744, 748, 749, - 750, 751, 752, 753, 754, 758, 764, 765, 768, 769, - 772, 775, 791, 792, 793, 794, 795, 796, 797, 798, - 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, - 809, 810, 811, 814, 817, 822, 823, 824, 825, 826, - 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, - 837, 840, 841, 842, 845, 846, 847, 848, 849, 852, - 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, - 863, 864, 865, 866, 867, 870, 874, 875, 878, 879, - 880, 881, 883, 886, 887, 888, 889, 890, 891, 892, - 893, 894, 895, 896, 906, 916, 918, 921, 928, 929, - 934, 940, 941, 943, 964, 967, 971, 974, 975, 978, - 979, 980, 984, 989, 990, 991, 992, 996, 997, 999, - 1003, 1007, 1012, 1016, 1020, 1021, 1022, 1027, 1030, 1031, - 1034, 1035, 1036, 1039, 1040, 1043, 1044, 1047, 1048, 1053, - 1054, 1055, 1056, 1063, 1070, 1077, 1084, 1092, 1100, 1101, - 1102, 1103, 1104, 1105, 1109, 1112, 1114, 1116, 1118, 1120, - 1122, 1124, 1126, 1128, 1130, 1132, 1134, 1136, 1138, 1140, - 1142, 1144, 1146, 1150, 1153, 1154, 1157, 1158, 1162, 1163, - 1164, 1169, 1170, 1171, 1173, 1175, 1177, 1178, 1179, 1183, - 1187, 1191, 1195, 1199, 1203, 1207, 1211, 1215, 1219, 1223, - 1227, 1231, 1235, 1239, 1243, 1247, 1251, 1258, 1259, 1261, - 1265, 1266, 1268, 1272, 1273, 1277, 1278, 1281, 1282, 1285, - 1286, 1289, 1290, 1294, 1295, 1296, 1300, 1301, 1302, 1304, - 1308, 1309, 1313, 1319, 1322, 1325, 1328, 1331, 1334, 1337, - 1345, 1348, 1351, 1354, 1357, 1360, 1363, 1367, 1368, 1369, - 1370, 1371, 1372, 1373, 1374, 1383, 1384, 1385, 1392, 1400, - 1408, 1414, 1420, 1426, 1430, 1431, 1433, 1435, 1439, 1445, - 1448, 1449, 1450, 1451, 1452, 1456, 1457, 1460, 1461, 1464, - 1465, 1469, 1470, 1473, 1474, 1477, 1478, 1479, 1483, 1484, - 1485, 1486, 1487, 1488, 1489, 1490, 1493, 1499, 1506, 1507, - 1510, 1511, 1512, 1513, 1517, 1518, 1525, 1531, 1533, 1536, - 1538, 1539, 1541, 1543, 1544, 1545, 1546, 1547, 1548, 1549, - 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, - 1561, 1563, 1568, 1573, 1576, 1578, 1580, 1581, 1582, 1583, - 1584, 1586, 1588, 1590, 1591, 1593, 1596, 1600, 1601, 1602, - 1603, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1615, - 1616, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, - 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, - 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, - 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, - 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, - 1671, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, - 1687, 1689, 1696, 1703, 1709, 1715, 1730, 1745, 1746, 1747, - 1748, 1749, 1750, 1751, 1754, 1755, 1756, 1757, 1758, 1759, - 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, - 1770, 1771, 1774, 1775, 1778, 1779, 1780, 1781, 1784, 1788, - 1790, 1792, 1793, 1794, 1796, 1805, 1806, 1807, 1810, 1813, - 1818, 1819, 1823, 1824, 1827, 1830, 1831, 1834, 1837, 1840, - 1843, 1847, 1853, 1859, 1865, 1873, 1874, 1875, 1876, 1877, - 1878, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, - 1891, 1892, 1895, 1898, 1900, 1903, 1905, 1909, 1912, 1916, - 1919, 1923, 1926, 1932, 1934, 1937, 1938, 1941, 1942, 1945, - 1948, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, - 1960, 1963, 1964, 1967, 1968, 1969, 1972, 1973, 1976, 1977, - 1979, 1980, 1981, 1982, 1985, 1988, 1991, 1994, 1996, 2000, - 2001, 2004, 2005, 2006, 2007, 2010, 2013, 2016, 2017, 2018, - 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2028, 2029, 2032, - 2033, 2034, 2035, 2037, 2039, 2040, 2043, 2044, 2048, 2049, - 2050, 2053, 2054, 2057, 2058, 2059, 2060 + 212, 215, 216, 219, 222, 223, 224, 225, 226, 227, + 230, 231, 234, 235, 238, 239, 241, 246, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, 266, 267, 268, 271, 274, 275, + 278, 279, 280, 284, 285, 286, 287, 288, 293, 294, + 295, 296, 299, 302, 303, 307, 308, 312, 313, 314, + 315, 318, 319, 320, 322, 325, 328, 334, 337, 338, + 342, 348, 349, 351, 354, 355, 361, 364, 365, 368, + 372, 373, 381, 382, 383, 384, 386, 388, 393, 394, + 395, 402, 406, 407, 408, 409, 410, 411, 414, 417, + 421, 424, 427, 433, 436, 437, 438, 439, 440, 441, + 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 465, 466, 469, 470, 473, 474, 477, 478, 482, + 483, 486, 487, 490, 491, 494, 495, 496, 497, 498, + 499, 500, 503, 504, 507, 508, 511, 512, 515, 518, + 519, 522, 526, 530, 531, 532, 533, 534, 535, 536, + 537, 538, 539, 540, 541, 547, 556, 557, 558, 563, + 569, 570, 571, 578, 583, 584, 585, 586, 587, 588, + 589, 590, 602, 604, 605, 606, 607, 608, 609, 610, + 613, 614, 617, 618, 621, 622, 626, 643, 649, 665, + 670, 671, 672, 675, 676, 677, 678, 681, 682, 683, + 684, 685, 686, 687, 688, 691, 694, 699, 703, 707, + 709, 711, 716, 717, 721, 722, 723, 726, 727, 730, + 731, 732, 733, 734, 735, 736, 737, 741, 747, 748, + 749, 752, 753, 757, 758, 759, 760, 761, 762, 763, + 767, 773, 774, 777, 778, 781, 784, 800, 801, 802, + 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, + 813, 814, 815, 816, 817, 818, 819, 820, 823, 826, + 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, + 841, 842, 843, 844, 845, 846, 849, 850, 851, 854, + 855, 856, 857, 858, 861, 862, 863, 864, 865, 866, + 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, + 877, 880, 884, 885, 888, 889, 890, 891, 893, 896, + 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, + 916, 926, 928, 931, 938, 939, 944, 950, 951, 953, + 974, 977, 981, 984, 985, 988, 989, 990, 994, 999, + 1000, 1001, 1002, 1006, 1007, 1009, 1013, 1017, 1022, 1026, + 1030, 1031, 1032, 1037, 1040, 1041, 1044, 1045, 1046, 1049, + 1050, 1053, 1054, 1057, 1058, 1063, 1064, 1065, 1066, 1073, + 1080, 1087, 1094, 1102, 1110, 1111, 1112, 1113, 1114, 1115, + 1119, 1122, 1124, 1126, 1128, 1130, 1132, 1134, 1136, 1138, + 1140, 1142, 1144, 1146, 1148, 1150, 1152, 1154, 1156, 1160, + 1163, 1164, 1167, 1168, 1172, 1173, 1174, 1179, 1180, 1181, + 1183, 1185, 1187, 1188, 1189, 1193, 1197, 1201, 1205, 1209, + 1213, 1217, 1221, 1225, 1229, 1233, 1237, 1241, 1245, 1249, + 1253, 1257, 1261, 1268, 1269, 1271, 1275, 1276, 1278, 1282, + 1283, 1287, 1288, 1291, 1292, 1295, 1296, 1299, 1300, 1304, + 1305, 1306, 1310, 1311, 1312, 1314, 1318, 1319, 1323, 1329, + 1332, 1335, 1338, 1341, 1344, 1347, 1355, 1358, 1361, 1364, + 1367, 1370, 1373, 1377, 1378, 1379, 1380, 1381, 1382, 1383, + 1384, 1393, 1394, 1395, 1402, 1410, 1418, 1424, 1430, 1436, + 1440, 1441, 1443, 1445, 1449, 1455, 1458, 1459, 1460, 1461, + 1462, 1466, 1467, 1470, 1471, 1474, 1475, 1479, 1480, 1483, + 1484, 1487, 1488, 1489, 1493, 1494, 1495, 1496, 1497, 1498, + 1499, 1500, 1503, 1509, 1516, 1517, 1520, 1521, 1522, 1523, + 1527, 1528, 1535, 1541, 1543, 1546, 1548, 1549, 1551, 1553, + 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1563, + 1564, 1565, 1566, 1567, 1568, 1569, 1571, 1573, 1578, 1583, + 1586, 1588, 1590, 1591, 1592, 1593, 1594, 1596, 1598, 1600, + 1601, 1603, 1606, 1610, 1611, 1612, 1613, 1615, 1616, 1617, + 1618, 1619, 1620, 1621, 1622, 1625, 1626, 1629, 1630, 1631, + 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, + 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, + 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, + 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, + 1672, 1673, 1674, 1675, 1676, 1677, 1681, 1687, 1688, 1689, + 1690, 1691, 1692, 1693, 1694, 1695, 1697, 1699, 1706, 1713, + 1719, 1725, 1740, 1755, 1756, 1757, 1758, 1759, 1760, 1761, + 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, + 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1784, 1785, + 1788, 1789, 1790, 1791, 1794, 1798, 1800, 1802, 1803, 1804, + 1806, 1815, 1816, 1817, 1820, 1823, 1828, 1829, 1833, 1834, + 1837, 1840, 1841, 1844, 1847, 1850, 1853, 1857, 1863, 1869, + 1875, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, + 1892, 1893, 1894, 1895, 1896, 1897, 1901, 1902, 1905, 1908, + 1910, 1913, 1915, 1919, 1922, 1926, 1929, 1933, 1936, 1942, + 1944, 1947, 1948, 1951, 1952, 1955, 1958, 1961, 1962, 1963, + 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1973, 1974, 1977, + 1978, 1979, 1982, 1983, 1986, 1987, 1989, 1990, 1991, 1992, + 1995, 1998, 2001, 2004, 2006, 2010, 2011, 2014, 2015, 2016, + 2017, 2020, 2023, 2026, 2027, 2028, 2029, 2030, 2031, 2032, + 2033, 2034, 2035, 2038, 2039, 2042, 2043, 2044, 2045, 2047, + 2049, 2050, 2053, 2054, 2058, 2059, 2060, 2063, 2064, 2067, + 2068, 2069, 2070 }; #endif @@ -1490,7 +1494,7 @@ static const char *const yytname[] = "SEALED_", "NESTED_", "ABSTRACT_", "AUTO_", "SEQUENTIAL_", "EXPLICIT_", "ANSI_", "UNICODE_", "AUTOCHAR_", "IMPORT_", "ENUM_", "VIRTUAL_", "NOINLINING_", "AGGRESSIVEINLINING_", "NOOPTIMIZATION_", - "AGGRESSIVEOPTIMIZATION_", "UNMANAGEDEXP_", "BEFOREFIELDINIT_", + "AGGRESSIVEOPTIMIZATION_", "UNMANAGEDEXP_", "BEFOREFIELDINIT_", "ASYNC_", "STRICT_", "RETARGETABLE_", "WINDOWSRUNTIME_", "NOPLATFORM_", "METHOD_", "FIELD_", "PINNED_", "MODREQ_", "MODOPT_", "SERIALIZABLE_", "PROPERTY_", "TYPE_", "ASSEMBLY_", "FAMANDASSEM_", "FAMORASSEM_", "PRIVATESCOPE_", @@ -1574,12 +1578,12 @@ yysymbol_name (yysymbol_kind_t yysymbol) } #endif -#define YYPACT_NINF (-1367) +#define YYPACT_NINF (-1345) #define yypact_value_is_default(Yyn) \ ((Yyn) == YYPACT_NINF) -#define YYTABLE_NINF (-559) +#define YYTABLE_NINF (-575) #define yytable_value_is_error(Yyn) \ 0 @@ -1588,165 +1592,167 @@ yysymbol_name (yysymbol_kind_t yysymbol) STATE-NUM. */ static const yytype_int16 yypact[] = { - -1367, 2062, -1367, -1367, -51, 987, -1367, -86, 123, 2317, - 2317, -1367, -1367, 246, 182, -31, -19, 16, 105, -1367, - 133, 272, 272, 215, 215, 1641, 9, -1367, 987, 987, - 987, 987, -1367, -1367, 315, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, 320, 320, -1367, -1367, -1367, -1367, 320, 74, - -1367, 285, 103, -1367, -1367, -1367, -1367, 729, -1367, 320, - 272, -1367, -1367, 116, 144, 167, 169, -1367, -1367, -1367, - -1367, -1367, 191, 272, -1367, -1367, -1367, 368, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, 1929, 43, 58, -1367, -1367, 181, 195, - -1367, -1367, 824, 502, 502, 1825, 166, -1367, 2925, -1367, - -1367, 202, 272, 272, 238, -1367, 620, 849, 987, 191, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, 2925, - -1367, -1367, -1367, 894, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, 589, -1367, 418, 589, - 378, -1367, 2339, -1367, -1367, -1367, 67, 50, 191, 373, - 387, -1367, 404, 1377, 414, 254, 745, -1367, 589, 45, - 191, 191, 191, -1367, -1367, 282, 579, 301, 314, -1367, - 3481, 1929, 557, -1367, 3653, 2269, 335, 17, 93, 276, - 281, 291, 317, 347, 782, 358, -1367, -1367, 320, 359, - 61, -1367, -1367, -1367, -1367, 1130, 987, 385, 2715, 380, - 95, -1367, 502, -1367, 330, 926, -1367, 402, -11, 413, - 664, 272, 272, -1367, -1367, -1367, -1367, -1367, -1367, 432, - -1367, -1367, 91, 1273, -1367, 447, -1367, -1367, 69, 620, - -1367, -1367, -1367, -1367, 533, -1367, -1367, -1367, -1367, 191, - -1367, -1367, -34, 191, 926, -1367, -1367, -1367, -1367, -1367, - 589, -1367, 741, -1367, -1367, -1367, -1367, 1582, 987, 483, - 4, 523, 472, 191, -1367, 987, 987, 987, -1367, 2925, - 987, 987, -1367, 507, 536, 987, 68, 2925, -1367, -1367, - 490, 589, 413, -1367, -1367, -1367, -1367, 2862, 539, -1367, - -1367, -1367, -1367, -1367, -1367, 803, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -135, - -1367, 1929, -1367, 3003, 543, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, 562, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, 272, -1367, - 272, -1367, -1367, -1367, 272, 529, 11, 1999, -1367, -1367, - -1367, 537, -1367, -1367, -44, -1367, -1367, -1367, -1367, 546, - 208, -1367, -1367, 503, 272, 215, 296, 503, 1377, 985, - 1929, 171, 502, 1825, 582, 320, -1367, -1367, -1367, 588, - 272, 272, -1367, 272, -1367, 272, -1367, 215, -1367, 303, - -1367, 303, -1367, -1367, 559, 594, 368, 596, -1367, -1367, - -1367, 272, 272, 954, 3164, 1071, 581, -1367, -1367, -1367, - 868, 191, 191, -1367, 599, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, 607, 56, - -1367, 987, 44, 2925, 905, 629, -1367, 2093, -1367, 923, - 641, 651, 655, 1377, -1367, -1367, 413, -1367, -1367, 85, - 38, 654, 937, -1367, -1367, 763, -4, -1367, 987, -1367, - -1367, 38, 955, 107, 987, 987, 987, 191, -1367, 191, - 191, 191, 1433, 191, 191, 1929, 1929, 191, -1367, -1367, - 940, -62, -1367, 674, 690, 926, -1367, -1367, -1367, 272, - -1367, -1367, -1367, -1367, -1367, -1367, 222, -1367, 691, -1367, - 874, -1367, -1367, -1367, 272, 272, -1367, 25, 2162, -1367, - -1367, -1367, -1367, 702, -1367, -1367, 707, 714, -1367, -1367, - -1367, -1367, 715, 272, 905, 2819, -1367, -1367, 712, 272, - 111, 137, 272, 502, 1000, -1367, 735, 100, 2432, -1367, - 1929, -1367, -1367, -1367, 546, 28, 208, 28, 28, 28, - 968, 973, -1367, -1367, -1367, -1367, -1367, -1367, 743, 750, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, 1582, - -1367, 753, 413, 320, 2925, -1367, 503, 751, 905, 756, - 749, 757, 761, 762, 765, 766, -1367, 782, 767, -1367, - 755, 55, 862, 785, 21, 82, -1367, -1367, -1367, -1367, - -1367, -1367, 320, 320, -1367, 786, 788, -1367, 320, -1367, - 320, -1367, 792, 73, 987, 876, -1367, -1367, -1367, -1367, - 987, 877, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, 272, 3377, 8, 121, 987, 1085, 27, 798, - 806, -1367, 675, 802, 810, 809, -1367, 1100, -1367, -1367, - 825, 836, 3058, 2874, 839, 840, 575, 996, 320, 987, - 191, 987, 987, 254, 254, 254, 846, 848, 850, 272, - 146, -1367, -1367, 2925, 854, 847, -1367, -1367, -1367, -1367, - -1367, -1367, 222, 125, 843, 1929, 1929, 1741, 752, -1367, - -1367, 1130, 142, 164, 502, 1139, -1367, -1367, -1367, 2516, - -1367, 864, 1, 2005, 209, 426, 272, 873, 272, 191, - 272, 237, 878, 2925, 575, 100, -1367, 2819, 866, 881, - -1367, -1367, -1367, -1367, 503, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, 368, 272, 272, 215, 38, 1144, 905, - 883, 871, 887, 888, 893, -1367, 225, 884, -1367, 884, - 884, 884, 884, 884, -1367, -1367, 272, -1367, 272, 272, - 889, -1367, -1367, 886, 899, 413, 902, 907, 910, 913, - 915, 918, 272, 987, -1367, 191, 987, 15, 987, 919, - -1367, -1367, -1367, 791, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, 914, 976, 981, -1367, - 974, 925, -7, 1199, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, 914, 914, -1367, 2977, -1367, -1367, - -1367, -1367, 927, 320, 149, 368, 932, 987, 486, -1367, - 905, 933, 935, 944, -1367, 2093, -1367, 92, -1367, 355, - 365, 941, 375, 381, 388, 395, 403, 411, 417, 419, - 425, 434, 439, 441, 449, -1367, 1230, -1367, 320, -1367, - 272, 942, 100, 100, 191, 654, -1367, -1367, 368, -1367, - -1367, -1367, 939, 191, 191, 254, 100, -1367, -1367, -1367, - -1367, 926, -1367, 272, -1367, 1929, 374, 987, -1367, -1367, - 1046, -1367, -1367, 470, 987, -1367, -1367, 2925, 191, 272, - 191, 272, 481, 2925, 575, 3138, 870, 1533, -1367, 1129, - -1367, 905, 2196, 951, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, 943, 948, -1367, 956, 957, 967, - 969, 953, 575, -1367, 1117, 970, 971, 1929, 932, 1582, - -1367, 977, 426, -1367, 1251, 1211, 1212, -1367, -1367, 990, - 992, 987, 476, -1367, 100, 503, 503, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, 66, 1268, -1367, -1367, 21, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, 993, 254, 191, - 272, 191, -1367, -1367, -1367, -1367, -1367, -1367, 1033, -1367, - -1367, -1367, -1367, 905, 1005, 1008, -1367, -1367, -1367, -1367, - -1367, 879, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - 363, -1367, 31, 78, -1367, -1367, 2281, -1367, 997, -1367, - -1367, 413, -1367, 1004, -1367, -1367, -1367, -1367, 1001, -1367, - -1367, -1367, -1367, 413, 780, 272, 272, 272, 485, 500, - 509, 527, 272, 272, 272, 272, 272, 272, 215, 272, - 633, 272, 555, 272, 272, 272, 272, 272, 272, 272, - 215, 272, 3468, 272, 189, 272, 497, 272, -1367, -1367, - -1367, 3236, 1012, 1013, -1367, 1018, 1022, 1024, 1025, -1367, - 1154, 1026, 1028, 1032, 1036, -1367, 222, -1367, 374, 1377, - -1367, 191, 56, 1030, 1031, 1929, 1582, 1076, -1367, 1377, - 1377, 1377, 1377, -1367, -1367, -1367, -1367, -1367, -1367, 1377, - 1377, 1377, -1367, -1367, -1367, -1367, -1367, -1367, -1367, 413, - -1367, 272, 430, 722, -1367, -1367, -1367, -1367, 3377, 1037, - 368, -1367, 1040, -1367, -1367, 1317, -1367, 368, -1367, 368, - 272, -1367, -1367, 191, -1367, 1045, -1367, -1367, -1367, 272, - -1367, 1042, -1367, -1367, 1049, 619, 272, 272, -1367, -1367, - -1367, -1367, -1367, -1367, 905, 1048, -1367, -1367, 272, -1367, - -39, 1054, 1055, 1041, 1056, 1065, 1066, 1068, 1069, 1072, - 1074, 1077, 1078, 1079, -1367, 413, -1367, -1367, 272, 742, - -1367, 794, 1080, 1082, 1075, 1086, 1083, 272, 272, 272, - 272, 272, 272, 215, 272, 1089, 1088, 1101, 1096, 1103, - 1102, 1104, 1105, 1107, 1110, 1108, 1111, 1113, 1121, 1114, - 1122, 1128, 1127, 1132, 1131, 1133, 1141, 1134, 1143, 1148, - 1149, 1146, 1152, 1367, 1155, 1153, -1367, 531, -1367, 168, - -1367, -1367, 1099, -1367, -1367, 100, 100, -1367, -1367, -1367, - -1367, 1929, -1367, -1367, 643, -1367, 1159, -1367, 1439, 502, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, 2405, 1171, -1367, - -1367, -1367, -1367, 1172, 1183, -1367, 1929, 575, -1367, -1367, - -1367, -1367, 1470, 21, 272, 905, 1180, 1182, 413, -1367, - 1184, 272, -1367, 1188, 1191, 1194, 1195, 1196, 1187, 1192, - 1201, 1202, 1260, -1367, -1367, -1367, 1213, -1367, 1216, 1210, - 1207, 1223, 1220, 1228, 1225, 1232, 1226, -1367, 1234, -1367, - 1235, -1367, 1236, -1367, 1237, -1367, -1367, 1238, -1367, -1367, - 1239, -1367, 1240, -1367, 1241, -1367, 1254, -1367, 1255, -1367, - 1261, -1367, -1367, 1263, -1367, 1259, -1367, 1265, 1552, -1367, - 1262, 535, -1367, 1267, 1269, -1367, 100, 1929, 575, 2925, - -1367, -1367, -1367, 100, -1367, 1266, -1367, 1264, 1270, 266, - -1367, 3447, -1367, 1271, -1367, 272, 272, 272, -1367, -1367, - -1367, -1367, -1367, 1274, -1367, 1275, -1367, 1278, -1367, 1280, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, 3468, -1367, -1367, 1281, - -1367, 1266, 1582, 1286, 1277, 1288, -1367, 21, -1367, 905, - -1367, 149, -1367, 1289, 1290, 1291, 176, 57, -1367, -1367, - -1367, -1367, 83, 87, 101, 170, 175, 179, 106, 109, - 162, 173, 1881, 148, 477, -1367, 932, 1295, 1544, -1367, - 100, -1367, 635, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - 205, 206, 212, 193, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, 1583, -1367, -1367, - -1367, 100, 575, 2387, 1301, 905, -1367, -1367, -1367, -1367, - -1367, 1302, 1305, 1306, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - 505, 1346, 100, 272, -1367, 1504, 1320, 1321, 502, -1367, - -1367, 2925, 1582, 1593, 575, 1266, 1327, 100, 1331, -1367 + -1345, 1188, -1345, -1345, -80, 5281, -1345, -75, 61, 2078, + 2078, -1345, -1345, 240, 223, -67, -30, -20, 50, -1345, + 4471, 275, 275, 141, 141, 1282, -16, -1345, 5281, 5281, + 5281, 5281, -1345, -1345, 293, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, 290, 290, -1345, -1345, -1345, -1345, 290, 15, + -1345, 294, 89, -1345, -1345, -1345, -1345, 129, -1345, 290, + 275, -1345, -1345, 97, 107, 130, 135, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, 128, 275, -1345, + -1345, -1345, 4538, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, 2294, 33, + 99, -1345, -1345, 168, 175, -1345, -1345, 451, 132, 132, + 2151, 180, -1345, 4333, -1345, -1345, 207, 275, 275, 4880, + -1345, 4826, 5140, 5281, 128, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, 4333, -1345, -1345, -1345, 614, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, 3190, -1345, 453, 3190, 164, -1345, 3628, -1345, -1345, + -1345, 885, 733, 128, 377, 394, -1345, 397, 1446, 415, + 214, 359, -1345, 3190, 64, 128, 128, 128, -1345, -1345, + 261, 553, 274, 281, -1345, 5035, 2294, 537, -1345, 5380, + 3613, 299, 62, 86, 94, 162, 169, 186, 317, 178, + 322, -1345, -1345, 290, 324, 42, -1345, -1345, -1345, -1345, + 5130, 5281, 323, 4163, 334, 2832, -1345, 132, -1345, -13, + 221, -1345, 360, -33, 365, 660, 275, 275, -1345, -1345, + -1345, -1345, -1345, -1345, 374, -1345, -1345, 57, 164, 1535, + -1345, 382, -1345, -1345, -45, 4826, -1345, -1345, -1345, 31, + 472, -1345, -1345, -1345, -1345, 128, -1345, -1345, -53, 128, + 221, -1345, -1345, -1345, -1345, -1345, 3190, -1345, 663, -1345, + -1345, -1345, -1345, 1843, 5281, 401, -146, 413, 5238, 128, + -1345, 5281, 5281, 5281, -1345, 4333, 5281, 5281, -1345, 422, + 424, 5281, 38, 4333, -1345, -1345, 430, 3190, 365, -1345, + -1345, -1345, -1345, 4271, 434, -1345, -1345, -1345, -1345, -1345, + -1345, 416, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -102, -1345, 2294, -1345, 4502, + 441, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, 445, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, 275, -1345, 275, -1345, -1345, -1345, + 275, 426, -54, 2452, -1345, -1345, -1345, 439, -1345, -1345, + -78, -1345, -1345, -1345, -1345, 531, 2286, -1345, -1345, 2587, + 275, 141, 108, 2587, 1446, 3572, 2294, 140, 132, 2151, + 449, 290, -1345, -1345, -1345, 454, 275, 275, -1345, 275, + -1345, 275, -1345, 141, -1345, 138, -1345, 138, -1345, -1345, + 466, 458, 4538, 464, -1345, -1345, -1345, 275, 275, 1460, + 1144, 1632, 2147, -1345, -1345, -1345, 721, 128, 128, -1345, + 473, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, 474, 2431, -1345, 5281, -86, 4333, + 755, 481, -1345, 2595, -1345, 765, 482, 478, 489, 1446, + -1345, -1345, 365, -1345, -1345, 2850, 46, 476, 769, -1345, + -1345, 581, -51, -1345, 5281, -1345, -1345, 46, 772, -29, + 5281, 5281, 5281, 128, -1345, 128, 128, 128, 1685, 128, + 128, 2294, 2294, 128, -1345, -1345, 775, -81, -1345, 490, + 506, 221, -1345, -1345, -1345, 275, -1345, -1345, -1345, -1345, + -1345, -1345, 187, -1345, 507, -1345, 690, -1345, -1345, -1345, + 275, 275, -1345, -35, 2753, -1345, -1345, -1345, -1345, 516, + -1345, -1345, 518, 523, -1345, -1345, -1345, -1345, 525, 275, + 755, 3989, -1345, -1345, 513, 275, 892, 3285, 275, 132, + 804, -1345, 529, 79, 3795, -1345, 2294, -1345, -1345, -1345, + 531, 13, 2286, 13, 13, 13, 768, 776, -1345, -1345, + -1345, -1345, -1345, -1345, 536, 551, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, 1843, -1345, 552, 365, 290, + 4333, -1345, 2587, 555, 755, 556, 548, 557, 558, 560, + 571, 573, -1345, 178, 574, -1345, 545, 43, 645, 575, + 29, 34, -1345, -1345, -1345, -1345, -1345, -1345, 290, 290, + -1345, 577, 578, -1345, 290, -1345, 290, -1345, 582, 59, + 5281, 656, -1345, -1345, -1345, -1345, 5281, 662, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, 275, 4789, + -1, -27, 5281, 477, 21, 583, 587, -1345, 3274, 584, + 593, 592, -1345, 878, -1345, -1345, 589, 597, 4479, 4278, + 594, 600, 5184, 633, 290, 5281, 128, 5281, 5281, 214, + 214, 214, 601, 596, 603, 275, 176, -1345, -1345, 4333, + 605, 607, -1345, -1345, -1345, -1345, -1345, -1345, 187, 3976, + 606, 2294, 2294, 1993, 739, -1345, -1345, 5130, 3292, 3387, + 132, 886, -1345, -1345, -1345, 3969, -1345, 611, -3, 467, + 188, 404, 275, 609, 275, 128, 275, -100, 612, 4333, + 5184, 79, -1345, 3989, 615, 624, -1345, -1345, -1345, -1345, + 2587, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, 4538, + 275, 275, 141, 46, 900, 755, 625, 460, 627, 629, + 631, -1345, 24, 632, -1345, 632, 632, 632, 632, 632, + -1345, -1345, 275, -1345, 275, 275, 635, -1345, -1345, 628, + 638, 365, 640, 641, 639, 643, 644, 646, 275, 5281, + -1345, 128, 5281, 16, 5281, 647, -1345, -1345, -1345, 519, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, 649, 698, 713, -1345, 702, 670, -64, 929, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + 649, 649, -1345, 4385, -1345, -1345, -1345, -1345, 661, 290, + 18, 4538, 668, 5281, 2960, -1345, 755, 679, 674, 693, + -1345, 2595, -1345, 65, -1345, 210, 224, 793, 238, 241, + 254, 264, 265, 300, 311, 312, 338, 353, 354, 355, + 368, -1345, 954, -1345, 290, -1345, 275, 687, 79, 79, + 128, 476, -1345, -1345, 4538, -1345, -1345, -1345, 694, 128, + 128, 214, 79, -1345, -1345, -1345, -1345, 221, -1345, 275, + -1345, 2294, -60, 5281, -1345, -1345, 795, -1345, -1345, 105, + 5281, -1345, -1345, 4333, 128, 275, 128, 275, 48, 4333, + 5184, 4554, 1324, 1940, -1345, 2711, -1345, 755, 493, 703, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + 696, 730, -1345, 697, 707, 725, 735, 742, 5184, -1345, + 901, 740, 743, 2294, 668, 1843, -1345, 748, 404, -1345, + 1019, 983, 984, -1345, -1345, 754, 756, 5281, 84, -1345, + 79, 2587, 2587, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, 44, 1043, -1345, -1345, 29, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, 757, 214, 128, 275, 128, -1345, -1345, + -1345, -1345, -1345, -1345, 818, -1345, -1345, -1345, -1345, 755, + 774, 779, -1345, -1345, -1345, -1345, -1345, 706, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, 143, -1345, 27, 51, + -1345, -1345, 3441, -1345, 780, -1345, -1345, 365, -1345, 784, + -1345, -1345, -1345, -1345, 791, -1345, -1345, -1345, -1345, 365, + 378, 275, 275, 275, 384, 385, 390, 396, 275, 275, + 275, 275, 275, 275, 141, 275, 565, 275, 562, 275, + 275, 275, 275, 275, 275, 275, 141, 275, 3528, 275, + 127, 275, 3090, 275, -1345, -1345, -1345, 5362, 778, 782, + -1345, 787, 788, 796, 797, -1345, 921, 794, 801, 803, + 800, -1345, 187, -1345, -60, 1446, -1345, 128, 2431, 802, + 805, 2294, 1843, 844, -1345, 1446, 1446, 1446, 1446, -1345, + -1345, -1345, -1345, -1345, -1345, 1446, 1446, 1446, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, 365, -1345, 275, 336, 486, + -1345, -1345, -1345, -1345, 4789, 806, 4538, -1345, 811, -1345, + -1345, 1087, -1345, 4538, -1345, 4538, 275, -1345, -1345, 128, + -1345, 813, -1345, -1345, -1345, 275, -1345, 810, -1345, -1345, + 812, 406, 275, 275, -1345, -1345, -1345, -1345, -1345, -1345, + 755, 828, -1345, -1345, 275, -1345, -71, 821, 843, 816, + 847, 849, 851, 852, 854, 855, 857, 859, 860, 861, + -1345, 365, -1345, -1345, 275, 233, -1345, 745, 866, 863, + 864, 865, 867, 275, 275, 275, 275, 275, 275, 141, + 275, 869, 872, 870, 873, 879, 874, 880, 877, 887, + 888, 881, 889, 890, 882, 891, 893, 896, 894, 899, + 898, 903, 902, 904, 905, 907, 908, 909, 911, 1152, + 912, 913, -1345, 3111, -1345, 3450, -1345, -1345, 910, -1345, + -1345, 79, 79, -1345, -1345, -1345, -1345, 2294, -1345, -1345, + 417, -1345, 918, -1345, 1170, 132, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, 960, 919, -1345, -1345, -1345, -1345, 922, + 868, -1345, 2294, 5184, -1345, -1345, -1345, -1345, 1193, 29, + 275, 755, 917, 920, 365, -1345, 923, 275, -1345, 924, + 926, 927, 931, 928, 925, 930, 939, 934, 975, -1345, + -1345, -1345, 932, -1345, 936, 950, 947, 952, 949, 956, + 953, 958, 955, -1345, 972, -1345, 973, -1345, 974, -1345, + 976, -1345, -1345, 986, -1345, -1345, 987, -1345, 988, -1345, + 989, -1345, 990, -1345, 991, -1345, 997, -1345, -1345, 998, + -1345, 1000, -1345, 999, 1242, -1345, 970, 189, -1345, 1001, + 1003, -1345, 79, 2294, 5184, 4333, -1345, -1345, -1345, 79, + -1345, 895, -1345, 1008, 1005, 76, -1345, 4828, -1345, 979, + -1345, 275, 275, 275, -1345, -1345, -1345, -1345, -1345, 1012, + -1345, 1020, -1345, 1026, -1345, 1031, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, 3528, -1345, -1345, 1032, -1345, 895, 1843, 1033, + 1028, 1036, -1345, 29, -1345, 755, -1345, 18, -1345, 1038, + 1041, 1044, 8, 68, -1345, -1345, -1345, -1345, 78, 81, + 87, 98, 74, 71, 92, 95, 102, 100, 1034, 53, + 2978, -1345, 668, 1037, 1320, -1345, 79, -1345, 414, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, 106, 121, 122, 104, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, 1340, -1345, -1345, -1345, 79, 5184, 2000, + 1053, 755, -1345, -1345, -1345, -1345, -1345, 1058, 1062, 1067, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, 412, 1107, 79, + 275, -1345, 1260, 1072, 1073, 132, -1345, -1345, 4333, 1843, + 1351, 5184, 895, 1077, 79, 1078, -1345 }; /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. @@ -1754,213 +1760,215 @@ static const yytype_int16 yypact[] = means the default is an error. */ static const yytype_int16 yydefact[] = { - 2, 0, 1, 86, 106, 0, 265, 209, 390, 0, - 0, 760, 761, 0, 222, 0, 0, 775, 781, 838, - 93, 0, 0, 0, 0, 0, 0, 29, 0, 0, - 0, 0, 58, 59, 0, 61, 3, 25, 26, 27, - 84, 85, 434, 434, 19, 17, 10, 9, 434, 0, - 109, 136, 0, 7, 272, 336, 8, 0, 18, 434, - 0, 11, 12, 0, 0, 0, 0, 817, 37, 40, - 38, 39, 105, 0, 189, 391, 392, 389, 745, 746, - 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, - 757, 758, 759, 0, 0, 34, 216, 217, 0, 0, - 223, 224, 229, 222, 222, 0, 62, 72, 0, 220, - 215, 0, 0, 0, 0, 781, 0, 0, 0, 94, - 42, 20, 21, 44, 43, 23, 24, 554, 711, 0, - 688, 696, 694, 0, 697, 698, 699, 700, 701, 702, - 707, 708, 709, 710, 671, 695, 0, 687, 0, 0, - 0, 492, 0, 555, 556, 557, 0, 0, 558, 0, - 0, 236, 0, 222, 0, 552, 0, 692, 30, 53, - 55, 56, 57, 60, 436, 0, 435, 0, 0, 2, - 0, 0, 138, 140, 222, 0, 0, 397, 397, 397, - 397, 397, 397, 0, 0, 0, 387, 394, 434, 0, - 763, 791, 809, 827, 841, 0, 0, 0, 0, 0, - 0, 553, 222, 560, 721, 563, 32, 0, 0, 723, - 0, 0, 0, 225, 226, 227, 228, 218, 219, 0, - 74, 73, 0, 0, 104, 0, 22, 776, 777, 0, - 782, 783, 784, 786, 0, 787, 788, 789, 790, 780, - 839, 840, 836, 95, 693, 703, 704, 705, 706, 670, - 0, 673, 0, 689, 691, 234, 235, 0, 0, 0, - 0, 0, 0, 686, 684, 0, 0, 0, 231, 0, - 0, 0, 678, 0, 0, 0, 714, 537, 677, 676, - 0, 30, 54, 65, 437, 69, 103, 0, 0, 112, - 133, 110, 111, 114, 115, 0, 116, 117, 118, 119, - 120, 121, 122, 123, 113, 132, 125, 124, 134, 148, - 137, 0, 108, 0, 0, 278, 273, 274, 275, 276, - 277, 281, 279, 289, 280, 282, 283, 284, 285, 286, - 287, 288, 0, 290, 314, 493, 494, 495, 496, 497, - 498, 499, 500, 501, 502, 503, 504, 505, 0, 372, - 0, 335, 343, 344, 0, 0, 0, 0, 365, 6, - 350, 0, 352, 351, 0, 337, 358, 336, 339, 0, - 0, 345, 507, 0, 0, 0, 0, 0, 222, 0, - 0, 0, 222, 0, 0, 434, 346, 348, 349, 0, - 0, 0, 413, 0, 412, 0, 411, 0, 410, 0, - 408, 0, 409, 433, 0, 396, 0, 0, 722, 772, - 762, 0, 0, 0, 0, 0, 0, 820, 819, 818, - 0, 815, 41, 210, 0, 196, 190, 191, 192, 193, - 198, 199, 200, 201, 195, 202, 203, 194, 0, 0, - 388, 0, 0, 0, 0, 0, 731, 725, 730, 0, - 35, 0, 0, 222, 76, 70, 63, 311, 312, 714, - 313, 535, 0, 97, 778, 774, 807, 785, 0, 672, - 690, 233, 0, 0, 0, 0, 0, 685, 683, 51, - 52, 50, 0, 49, 559, 0, 0, 48, 715, 674, - 716, 0, 712, 0, 538, 539, 28, 31, 5, 0, - 126, 127, 128, 129, 130, 131, 157, 107, 139, 143, - 0, 106, 239, 253, 0, 0, 817, 0, 0, 4, - 181, 182, 175, 0, 141, 171, 0, 0, 336, 172, - 173, 174, 0, 0, 295, 0, 338, 340, 0, 0, - 0, 0, 0, 222, 0, 347, 0, 314, 0, 382, - 0, 380, 383, 366, 368, 0, 0, 0, 0, 0, - 0, 0, 369, 509, 508, 510, 511, 45, 0, 0, - 506, 513, 512, 516, 515, 517, 521, 522, 520, 0, - 523, 0, 524, 434, 0, 528, 530, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 393, 0, 0, 401, - 0, 765, 0, 0, 0, 0, 13, 803, 802, 794, - 792, 795, 434, 434, 814, 0, 0, 14, 434, 812, - 434, 810, 0, 0, 0, 0, 15, 835, 834, 828, - 0, 0, 16, 846, 845, 842, 821, 822, 823, 824, - 825, 826, 0, 564, 205, 0, 561, 0, 0, 0, - 732, 76, 0, 0, 0, 726, 33, 0, 221, 230, - 66, 0, 79, 537, 0, 0, 0, 0, 434, 0, - 837, 0, 0, 550, 548, 549, 677, 0, 0, 718, - 714, 675, 682, 0, 0, 0, 152, 154, 153, 155, - 150, 151, 157, 0, 0, 0, 0, 0, 222, 176, - 177, 0, 0, 0, 222, 0, 140, 242, 256, 0, - 827, 0, 295, 0, 0, 266, 0, 0, 0, 360, - 0, 0, 0, 0, 0, 314, 545, 0, 0, 542, - 543, 364, 381, 367, 0, 384, 374, 378, 379, 377, - 373, 375, 376, 0, 0, 0, 0, 519, 0, 0, - 0, 0, 533, 534, 0, 514, 0, 397, 398, 397, - 397, 397, 397, 397, 395, 400, 0, 764, 0, 0, - 0, 797, 796, 0, 0, 800, 0, 0, 0, 0, - 0, 0, 0, 0, 833, 829, 0, 0, 0, 0, - 618, 572, 573, 0, 607, 574, 575, 576, 577, 578, - 579, 609, 585, 586, 587, 588, 619, 0, 0, 615, - 0, 0, 0, 569, 570, 571, 594, 595, 596, 613, - 597, 598, 599, 600, 619, 619, 603, 621, 611, 617, - 580, 270, 0, 0, 268, 0, 207, 562, 0, 719, - 0, 0, 38, 0, 724, 725, 36, 0, 64, 0, + 2, 0, 1, 101, 121, 0, 280, 224, 406, 0, + 0, 776, 777, 0, 237, 0, 0, 791, 797, 854, + 108, 0, 0, 0, 0, 0, 0, 29, 0, 0, + 0, 0, 73, 74, 0, 76, 3, 25, 26, 27, + 99, 100, 450, 450, 19, 17, 10, 9, 450, 0, + 124, 151, 0, 7, 287, 352, 8, 0, 18, 450, + 0, 11, 12, 0, 0, 0, 0, 833, 37, 55, + 53, 42, 38, 47, 48, 49, 50, 51, 52, 39, + 40, 41, 43, 44, 45, 46, 54, 120, 0, 204, + 407, 408, 405, 761, 762, 763, 764, 765, 766, 767, + 768, 769, 770, 771, 772, 773, 774, 775, 0, 0, + 34, 231, 232, 0, 0, 238, 239, 244, 237, 237, + 0, 77, 87, 0, 235, 230, 0, 0, 0, 0, + 797, 0, 0, 0, 109, 57, 20, 21, 59, 58, + 23, 24, 570, 727, 0, 704, 712, 710, 0, 713, + 714, 715, 716, 717, 718, 723, 724, 725, 726, 687, + 711, 0, 703, 0, 0, 38, 508, 0, 571, 572, + 573, 0, 0, 574, 0, 0, 251, 0, 237, 0, + 568, 0, 708, 30, 68, 70, 71, 72, 75, 452, + 0, 451, 0, 0, 2, 0, 0, 153, 155, 237, + 0, 0, 413, 413, 413, 413, 413, 413, 0, 0, + 0, 403, 410, 450, 0, 779, 807, 825, 843, 857, + 0, 0, 0, 0, 0, 0, 569, 237, 576, 737, + 579, 32, 0, 0, 739, 0, 0, 0, 240, 241, + 242, 243, 233, 234, 0, 89, 88, 0, 0, 0, + 119, 0, 22, 792, 793, 0, 798, 799, 800, 802, + 0, 803, 804, 805, 806, 796, 855, 856, 852, 110, + 709, 719, 720, 721, 722, 686, 0, 689, 0, 705, + 707, 249, 250, 0, 0, 0, 0, 0, 0, 702, + 700, 0, 0, 0, 246, 0, 0, 0, 694, 0, + 0, 0, 730, 553, 693, 692, 0, 30, 69, 80, + 453, 84, 118, 0, 0, 127, 148, 125, 126, 129, + 130, 0, 131, 132, 133, 134, 135, 136, 137, 138, + 128, 147, 140, 139, 149, 163, 152, 0, 123, 0, + 0, 293, 288, 289, 290, 291, 292, 296, 294, 304, + 295, 297, 298, 299, 300, 301, 302, 303, 0, 305, + 329, 509, 510, 511, 512, 513, 514, 515, 516, 517, + 518, 519, 520, 521, 0, 388, 0, 351, 359, 360, + 0, 0, 0, 0, 381, 6, 366, 0, 368, 367, + 0, 353, 374, 352, 355, 0, 0, 361, 523, 0, + 0, 0, 0, 0, 237, 0, 0, 0, 237, 0, + 0, 450, 362, 364, 365, 0, 0, 0, 429, 0, + 428, 0, 427, 0, 426, 0, 424, 0, 425, 449, + 0, 412, 0, 0, 738, 788, 778, 0, 0, 0, + 0, 0, 0, 836, 835, 834, 0, 831, 56, 225, + 0, 211, 205, 206, 207, 208, 213, 214, 215, 216, + 210, 217, 218, 209, 0, 0, 404, 0, 0, 0, + 0, 0, 747, 741, 746, 0, 35, 0, 0, 237, + 91, 85, 78, 326, 327, 730, 328, 551, 0, 112, + 794, 790, 823, 801, 0, 688, 706, 248, 0, 0, + 0, 0, 0, 701, 699, 66, 67, 65, 0, 64, + 575, 0, 0, 63, 731, 690, 732, 0, 728, 0, + 554, 555, 28, 31, 5, 0, 141, 142, 143, 144, + 145, 146, 172, 122, 154, 158, 0, 121, 254, 268, + 0, 0, 833, 0, 0, 4, 196, 197, 190, 0, + 156, 186, 0, 0, 352, 187, 188, 189, 0, 0, + 310, 0, 354, 356, 0, 0, 0, 0, 0, 237, + 0, 363, 0, 329, 0, 398, 0, 396, 399, 382, + 384, 0, 0, 0, 0, 0, 0, 0, 385, 525, + 524, 526, 527, 60, 0, 0, 522, 529, 528, 532, + 531, 533, 537, 538, 536, 0, 539, 0, 540, 450, + 0, 544, 546, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 409, 0, 0, 417, 0, 781, 0, 0, + 0, 0, 13, 819, 818, 810, 808, 811, 450, 450, + 830, 0, 0, 14, 450, 828, 450, 826, 0, 0, + 0, 0, 15, 851, 850, 844, 0, 0, 16, 862, + 861, 858, 837, 838, 839, 840, 841, 842, 0, 580, + 220, 0, 577, 0, 0, 0, 748, 91, 0, 0, + 0, 742, 33, 0, 236, 245, 81, 0, 94, 553, + 0, 0, 0, 0, 450, 0, 853, 0, 0, 566, + 564, 565, 693, 0, 0, 734, 730, 691, 698, 0, + 0, 0, 167, 169, 168, 170, 165, 166, 172, 0, + 0, 0, 0, 0, 237, 191, 192, 0, 0, 0, + 237, 0, 155, 257, 271, 0, 843, 0, 310, 0, + 0, 281, 0, 0, 0, 376, 0, 0, 0, 0, + 0, 329, 561, 0, 0, 558, 559, 380, 397, 383, + 0, 400, 390, 394, 395, 393, 389, 391, 392, 0, + 0, 0, 0, 535, 0, 0, 0, 0, 549, 550, + 0, 530, 0, 413, 414, 413, 413, 413, 413, 413, + 411, 416, 0, 780, 0, 0, 0, 813, 812, 0, + 0, 816, 0, 0, 0, 0, 0, 0, 0, 0, + 849, 845, 0, 0, 0, 0, 634, 588, 589, 0, + 623, 590, 591, 592, 593, 594, 595, 625, 601, 602, + 603, 604, 635, 0, 0, 631, 0, 0, 0, 585, + 586, 587, 610, 611, 612, 629, 613, 614, 615, 616, + 635, 635, 619, 637, 627, 633, 596, 285, 0, 0, + 283, 0, 222, 578, 0, 735, 0, 0, 53, 0, + 740, 741, 36, 0, 79, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 93, 90, 457, 450, 92, 0, 0, 329, 329, + 328, 551, 113, 114, 0, 115, 116, 117, 0, 824, + 247, 567, 329, 695, 696, 733, 729, 556, 150, 0, + 173, 159, 176, 0, 164, 157, 0, 256, 255, 574, + 0, 270, 269, 0, 832, 0, 199, 0, 0, 0, + 0, 0, 0, 0, 182, 0, 306, 0, 0, 0, + 317, 318, 319, 320, 312, 313, 314, 311, 315, 316, + 0, 0, 309, 0, 0, 0, 0, 0, 0, 371, + 369, 0, 0, 0, 222, 0, 372, 0, 281, 357, + 329, 0, 0, 386, 387, 0, 0, 0, 0, 542, + 329, 546, 546, 545, 415, 423, 422, 421, 420, 418, + 419, 785, 783, 809, 820, 0, 822, 814, 817, 795, + 821, 827, 829, 0, 846, 847, 0, 860, 219, 624, + 597, 598, 599, 600, 0, 620, 626, 628, 632, 0, + 0, 0, 630, 617, 618, 641, 642, 0, 669, 643, + 644, 645, 646, 647, 648, 671, 653, 654, 655, 656, + 639, 640, 661, 662, 663, 664, 665, 666, 667, 668, + 638, 672, 673, 674, 675, 676, 677, 678, 679, 680, + 681, 682, 683, 684, 685, 657, 621, 212, 0, 0, + 605, 221, 0, 203, 0, 751, 752, 756, 754, 0, + 753, 750, 749, 736, 0, 94, 743, 91, 86, 82, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 78, 75, 441, 434, 77, - 0, 0, 314, 314, 313, 535, 98, 99, 0, 100, - 101, 102, 0, 808, 232, 551, 314, 679, 680, 717, - 713, 540, 135, 0, 158, 144, 161, 0, 149, 142, - 0, 241, 240, 558, 0, 255, 254, 0, 816, 0, - 184, 0, 0, 0, 0, 0, 0, 0, 167, 0, - 291, 0, 0, 0, 302, 303, 304, 305, 297, 298, - 299, 296, 300, 301, 0, 0, 294, 0, 0, 0, - 0, 0, 0, 355, 353, 0, 0, 0, 207, 0, - 356, 0, 266, 341, 314, 0, 0, 370, 371, 0, - 0, 0, 0, 526, 314, 530, 530, 529, 399, 407, - 406, 405, 404, 402, 403, 769, 767, 793, 804, 0, - 806, 798, 801, 779, 805, 811, 813, 0, 830, 831, - 0, 844, 204, 608, 581, 582, 583, 584, 0, 604, - 610, 612, 616, 0, 0, 0, 614, 601, 602, 625, - 626, 0, 653, 627, 628, 629, 630, 631, 632, 655, - 637, 638, 639, 640, 623, 624, 645, 646, 647, 648, - 649, 650, 651, 652, 622, 656, 657, 658, 659, 660, - 661, 662, 663, 664, 665, 666, 667, 668, 669, 641, - 605, 197, 0, 0, 589, 206, 0, 188, 0, 735, - 736, 740, 738, 0, 737, 734, 733, 720, 0, 79, - 727, 76, 71, 67, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 97, 98, 96, 0, 0, 0, + 552, 0, 0, 0, 0, 111, 793, 0, 0, 0, + 160, 161, 172, 175, 176, 237, 202, 252, 0, 0, + 0, 0, 0, 0, 183, 237, 237, 237, 237, 184, + 265, 266, 264, 258, 263, 237, 237, 237, 185, 278, + 279, 276, 272, 277, 193, 310, 308, 0, 0, 0, + 330, 331, 332, 333, 580, 163, 0, 375, 0, 378, + 379, 0, 358, 562, 560, 0, 0, 61, 62, 534, + 541, 0, 547, 548, 784, 0, 782, 0, 848, 859, + 0, 0, 0, 0, 670, 649, 650, 651, 652, 659, + 0, 0, 660, 284, 0, 606, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 456, 455, 454, 223, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 82, 83, - 81, 0, 0, 0, 536, 0, 0, 0, 0, 96, - 777, 0, 0, 0, 145, 146, 157, 160, 161, 222, - 187, 237, 0, 0, 0, 0, 0, 0, 168, 222, - 222, 222, 222, 169, 250, 251, 249, 243, 248, 222, - 222, 222, 170, 263, 264, 261, 257, 262, 178, 295, - 293, 0, 0, 0, 315, 316, 317, 318, 564, 148, - 0, 359, 0, 362, 363, 0, 342, 546, 544, 0, - 0, 46, 47, 518, 525, 0, 531, 532, 768, 0, - 766, 0, 832, 843, 0, 0, 0, 0, 654, 633, - 634, 635, 636, 643, 0, 0, 644, 269, 0, 590, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 440, 439, 438, 208, 0, 0, - 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 104, 0, 103, 0, 102, 448, 0, 229, + 228, 329, 329, 789, 697, 171, 178, 0, 177, 174, + 0, 198, 0, 201, 0, 237, 259, 260, 261, 262, + 275, 273, 274, 0, 0, 321, 322, 323, 324, 0, + 0, 370, 0, 0, 563, 401, 402, 543, 787, 0, + 0, 0, 0, 0, 622, 658, 0, 0, 607, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 744, + 83, 447, 0, 446, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 437, 0, 436, 0, 435, 0, 434, + 0, 432, 430, 0, 433, 431, 0, 445, 0, 444, + 0, 443, 0, 442, 0, 463, 0, 459, 458, 0, + 462, 0, 461, 0, 0, 106, 0, 0, 181, 0, + 0, 162, 329, 0, 0, 0, 307, 325, 282, 329, + 377, 179, 786, 0, 0, 0, 583, 580, 609, 0, + 755, 0, 0, 0, 760, 745, 497, 493, 441, 0, + 440, 0, 439, 0, 438, 0, 495, 493, 491, 489, + 483, 486, 495, 493, 491, 489, 506, 499, 460, 502, + 105, 107, 0, 227, 226, 0, 200, 179, 0, 0, + 0, 0, 180, 0, 636, 0, 582, 584, 608, 0, + 0, 0, 0, 0, 495, 493, 491, 489, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 89, 0, 88, 0, - 87, 432, 0, 214, 213, 314, 314, 773, 681, 156, - 163, 0, 162, 159, 0, 183, 0, 186, 0, 222, - 244, 245, 246, 247, 260, 258, 259, 0, 0, 306, - 307, 308, 309, 0, 0, 354, 0, 0, 547, 385, - 386, 527, 771, 0, 0, 0, 0, 0, 606, 642, - 0, 0, 591, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 728, 68, 431, 0, 430, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 421, 0, 420, - 0, 419, 0, 418, 0, 416, 414, 0, 417, 415, - 0, 429, 0, 428, 0, 427, 0, 426, 0, 447, - 0, 443, 442, 0, 446, 0, 445, 0, 0, 91, - 0, 0, 166, 0, 0, 147, 314, 0, 0, 0, - 292, 310, 267, 314, 361, 164, 770, 0, 0, 0, - 567, 564, 593, 0, 739, 0, 0, 0, 744, 729, - 481, 477, 425, 0, 424, 0, 423, 0, 422, 0, - 479, 477, 475, 473, 467, 470, 479, 477, 475, 473, - 490, 483, 444, 486, 90, 92, 0, 212, 211, 0, - 185, 164, 0, 0, 0, 0, 165, 0, 620, 0, - 566, 568, 592, 0, 0, 0, 0, 0, 479, 477, - 475, 473, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 80, 207, 0, 0, 319, - 314, 799, 0, 741, 742, 743, 463, 482, 462, 478, - 0, 0, 0, 0, 453, 480, 452, 451, 476, 450, - 474, 448, 469, 468, 449, 472, 471, 457, 456, 455, - 454, 466, 491, 485, 484, 464, 487, 0, 465, 489, - 252, 314, 0, 0, 0, 0, 461, 460, 459, 458, - 488, 0, 0, 0, 324, 320, 329, 330, 331, 332, - 333, 321, 322, 323, 325, 326, 327, 328, 271, 357, - 0, 0, 314, 0, 565, 0, 0, 0, 222, 179, - 334, 0, 0, 0, 0, 164, 0, 314, 0, 180 + 0, 95, 222, 0, 0, 334, 329, 815, 0, 757, + 758, 759, 479, 498, 478, 494, 0, 0, 0, 0, + 469, 496, 468, 467, 492, 466, 490, 464, 485, 484, + 465, 488, 487, 473, 472, 471, 470, 482, 507, 501, + 500, 480, 503, 0, 481, 505, 267, 329, 0, 0, + 0, 0, 477, 476, 475, 474, 504, 0, 0, 0, + 339, 335, 344, 345, 346, 347, 348, 349, 336, 337, + 338, 340, 341, 342, 343, 286, 373, 0, 0, 329, + 0, 581, 0, 0, 0, 237, 194, 350, 0, 0, + 0, 0, 179, 0, 329, 0, 195 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int16 yypgoto[] = { - -1367, 1443, -1367, 1336, -72, 32, -41, -5, 10, 22, - -358, -1367, 13, -18, 1603, -1367, -1367, 1166, 1243, -640, - -1367, -975, -1367, 26, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -313, -1367, -1367, -1367, 916, -1367, -1367, - -1367, 451, -1367, 929, 498, 499, -1367, -1366, -437, -1367, - -312, -1367, -1367, -942, -1367, -162, -98, -1367, 35, 1613, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, 677, - 462, -1367, -311, -1367, -702, -667, 1297, -1367, -1367, -243, - -1367, -141, -1367, -1367, 1081, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, 328, 7, -1367, -1367, -1367, 1035, -150, - 1586, 578, -40, -30, 805, -1367, -1058, -1367, -1367, -1324, - -1299, -1192, -1269, -1367, -1367, -1367, -1367, 23, -1367, -1367, - -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, - -1367, -1367, -1367, -27, 768, 982, -1367, -688, -1367, 692, - -22, -405, -74, 239, 130, -1367, -23, 538, -1367, 984, - 3, 811, -1367, -1367, 808, -1367, -1049, -1367, 1661, -1367, - 36, -1367, -1367, 545, 1205, -1367, 1566, -1367, -1367, -961, - 1272, -1367, -1367, -1367, -1367, -1367, -1367, -1367, -1367, 1160, - 975, -1367, -1367, -1367, -1367, -1367 + -1345, 1179, -1345, 1081, -106, 17, -88, -5, 10, 22, + -416, -1345, 32, -11, 1355, -1345, -1345, 914, 982, -642, + -1345, -966, -1345, 11, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -323, -1345, -1345, -1345, 664, -1345, -1345, + -1345, 197, -1345, 676, 245, 247, -1345, -1344, -445, -1345, + -320, -1345, -1345, -954, -1345, -172, -111, -1345, 3, 1374, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, 425, + 213, -1345, -317, -1345, -708, -687, 1055, -1345, -1345, -248, + -1345, -144, -1345, -1345, 824, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -151, 9, -1345, -1345, -1345, 798, -112, + 1359, 335, -44, 0, 559, -1345, -1094, -1345, -1345, -1322, + -1318, -1304, -1299, -1345, -1345, -1345, -1345, 12, -1345, -1345, + -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, + -1345, -1345, -1345, -264, 521, 731, -1345, -676, -1345, 443, + 19, -447, -107, -12, -103, -1345, -23, 287, -1345, 727, + 20, 563, -1345, -1345, 570, -1345, -1064, -1345, 1427, -1345, + 26, -1345, -1345, 292, 948, -1345, 1310, -1345, -1345, -976, + 1006, -1345, -1345, -1345, -1345, -1345, -1345, -1345, -1345, 906, + 709, -1345, -1345, -1345, -1345, -1345 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int16 yydefgoto[] = { - 0, 1, 36, 290, 658, 370, 71, 158, 782, 1520, - 582, 38, 372, 40, 41, 42, 43, 106, 229, 671, - 672, 876, 1121, 373, 1289, 45, 46, 677, 47, 48, - 49, 50, 51, 52, 180, 182, 322, 323, 518, 1133, - 1134, 517, 702, 703, 704, 1137, 907, 1465, 1466, 534, - 53, 208, 846, 1067, 74, 107, 108, 109, 211, 230, - 536, 707, 926, 1157, 537, 708, 927, 1166, 54, 952, - 842, 843, 55, 184, 723, 471, 737, 1543, 374, 185, - 375, 745, 377, 378, 563, 379, 380, 564, 565, 566, - 567, 568, 569, 746, 381, 57, 77, 196, 414, 402, - 415, 877, 878, 175, 176, 1237, 879, 1486, 1487, 1485, - 1484, 1477, 1482, 1476, 1493, 1494, 1492, 212, 382, 383, - 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, - 394, 395, 396, 764, 675, 503, 504, 738, 739, 740, - 213, 165, 231, 844, 1009, 1060, 215, 167, 501, 502, - 397, 664, 665, 59, 659, 660, 1074, 1075, 93, 60, - 398, 62, 114, 475, 628, 63, 116, 423, 620, 783, - 621, 622, 630, 623, 64, 424, 631, 65, 542, 205, - 425, 639, 66, 117, 426, 645 + 0, 1, 36, 306, 674, 386, 86, 173, 798, 1536, + 598, 38, 388, 40, 41, 42, 43, 121, 244, 687, + 688, 892, 1137, 389, 1305, 45, 46, 693, 47, 48, + 49, 50, 51, 52, 195, 197, 338, 339, 534, 1149, + 1150, 533, 718, 719, 720, 1153, 923, 1481, 1482, 550, + 53, 223, 862, 1083, 89, 122, 123, 124, 226, 245, + 552, 723, 942, 1173, 553, 724, 943, 1182, 54, 968, + 858, 859, 55, 199, 739, 487, 753, 1559, 390, 200, + 391, 761, 393, 394, 579, 395, 396, 580, 581, 582, + 583, 584, 585, 762, 397, 57, 92, 211, 430, 418, + 431, 893, 894, 190, 191, 1253, 895, 1502, 1503, 1501, + 1500, 1493, 1498, 1492, 1509, 1510, 1508, 227, 398, 399, + 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, + 410, 411, 412, 780, 691, 519, 520, 754, 755, 756, + 228, 180, 246, 860, 1025, 1076, 230, 182, 517, 518, + 413, 680, 681, 59, 675, 676, 1090, 1091, 108, 60, + 414, 62, 129, 491, 644, 63, 131, 439, 636, 799, + 637, 638, 646, 639, 64, 440, 647, 65, 558, 220, + 441, 655, 66, 132, 442, 661 }; /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If @@ -1968,1021 +1976,1371 @@ static const yytype_int16 yydefgoto[] = number is the opposite. If YYTABLE_NINF, syntax error. */ static const yytype_int16 yytable[] = { - 72, 278, 166, 164, 58, 227, 228, 159, 56, 885, - 533, 535, 538, 177, 39, 119, 1184, 198, 178, 214, - 932, 851, 219, 169, 170, 171, 172, 44, 1201, 199, - 120, 121, 122, 37, 674, 1243, 209, 61, 404, 406, - 408, 410, 412, 294, 376, 125, 126, 961, 163, 110, - 216, 604, 216, 605, 1277, 68, 69, -558, 70, 120, - 161, 68, 69, 777, 70, 279, 120, 960, 419, 420, - 200, 845, 68, 69, 1198, 70, 217, 120, 1000, 683, - 684, 685, 120, 207, 744, 233, 344, 120, 498, 216, - 68, 69, 120, 70, 120, 1497, 120, 292, 216, 216, - 68, 69, 478, 70, 1241, 498, 254, 320, 99, 238, - 120, 249, 252, 253, 453, 120, 68, 69, 120, 70, - 736, 550, 235, 236, 259, 1491, 99, 261, 163, 267, - 68, 69, 679, 70, 558, 712, 99, 931, 68, 69, - 161, 70, 68, 69, 371, 70, 291, 68, 69, 1490, - 70, 270, 273, 556, 198, 120, 1533, 1513, 206, 472, - 466, 282, 283, 284, 67, 516, 498, 274, 417, 68, - 69, 120, 70, 68, 69, 319, 70, 1488, 216, 123, - 124, 1512, 123, 124, 120, 449, 577, 75, 123, 124, - 577, 271, 1534, 482, 1125, 1126, 96, 1279, 110, 97, - 431, 432, 123, 124, 193, 452, 578, 579, 1131, 1510, - 578, 579, 73, 68, 120, 120, 70, 120, 120, 1586, - 1014, 120, 98, 99, 123, 124, 585, 100, 470, 101, - 690, 461, 462, 1280, 476, 474, 102, 691, 479, 572, - 1015, 1069, 1070, 68, 69, 271, 70, 519, 76, 1483, - 557, 684, 1341, 103, 95, 1489, 492, 1147, 591, 206, - 1342, 695, 481, 483, 505, 1352, 111, 487, 104, 291, - 489, 490, 491, 696, 697, 493, 494, 895, 457, 112, - 497, 120, 488, 458, 268, 1179, 1195, 1511, 793, 206, - 113, 698, 459, 554, 594, 719, 500, 206, 168, 551, - 58, 206, 451, 485, 56, 123, 124, 577, 206, 552, - 39, 400, 451, 713, 577, 401, 590, 368, 459, 592, - 115, 1217, 173, 44, 848, 781, 540, 578, 579, 37, - 539, 206, 174, 61, 578, 579, 531, 206, 206, 570, - 218, 699, 573, 656, -558, 181, 583, 778, 118, 532, - 220, 593, 1508, 421, 286, 530, 287, 272, 1199, 541, - 288, 289, 206, 179, 422, 597, 589, 499, 546, 1218, - 547, 269, 1417, 68, 548, 608, 70, 1219, 1514, 784, - 464, 1081, 1516, 663, 499, 465, 1082, 403, 998, 269, - 571, 401, 183, 574, 575, -541, 1517, 584, 1495, 269, - 206, 1527, 262, 263, 1528, 201, 682, 576, 581, 728, - 599, 600, 264, 601, 206, 602, 163, 376, 472, 905, - 847, 687, 688, 110, 588, 749, 619, 1507, 161, 603, - 657, 610, 611, 202, 1532, 730, 617, 617, 637, 643, - 919, 1242, 895, 1535, 654, 237, 655, 1063, 1127, 618, - 618, 638, 644, 1064, 715, 733, 203, 1529, 204, 947, - 948, 949, 921, 232, 452, 1519, 1400, 1317, 1530, 260, - 1521, 1506, 722, 680, 1524, 221, 105, 68, 69, 500, - 70, 120, 68, 69, 206, 70, 742, 470, 1549, 222, - 127, 68, 69, 216, 70, 120, 234, 368, 110, 127, - 1546, 1547, 68, 69, 271, 70, 1501, 1548, 68, 275, - 127, 70, 120, 700, 1068, 758, 459, 371, 99, 694, - 978, 1536, 725, 276, 701, 747, 766, 99, 1537, 957, - 206, 282, 283, 284, 709, 710, 68, 69, 99, 70, - 277, 1282, 98, 785, 127, 729, 731, 100, 1283, 101, - 280, 1069, 1070, 721, 1540, 762, 102, 459, 1469, 727, - 281, 1470, 732, 760, 123, 124, 577, 198, 1319, 1320, - 405, 761, 99, 103, 401, 407, 748, 293, 376, 401, - 68, 69, 1398, 70, 757, 409, 578, 579, 104, 401, - 580, 294, 786, 787, 68, 69, 295, 70, 790, 3, - 791, 559, 127, 560, 561, 562, 763, 1403, 1404, 296, - 917, 411, 533, 535, 538, 401, 923, 979, 321, 980, - 981, 982, 983, 984, 454, 68, 69, 455, 70, 795, - 99, 909, 910, 914, 1213, 797, 153, 154, 155, 399, - 259, 413, 120, 794, 577, 153, 154, 155, 892, 1084, - 505, 951, 416, 1085, 418, 1214, 153, 154, 155, 1086, - 1415, 1215, 799, 1087, 578, 579, 1136, 206, 1216, 1092, - 901, 884, 460, 1093, 893, 1094, 894, 450, 371, 1095, - 68, 69, 1096, 852, 433, 875, 1097, 972, 127, 1098, - 153, 154, 155, 1099, 467, 468, 456, 1100, 906, 899, - 500, 1101, 913, 965, 459, 1102, 918, 920, 922, 1103, - 959, 1104, 967, 1106, 962, 1105, 99, 1107, 1459, 1108, - 240, 241, 242, 1109, 286, 1463, 287, 463, 1110, 884, - 288, 289, 1111, 1112, 950, 1114, 953, 1113, 955, 1115, - 956, 1461, 1264, 1116, 1267, 243, 473, 1117, 153, 154, - 155, 120, 186, 477, 966, 187, 188, 189, 190, -238, - 191, 192, 193, 206, 968, 969, 480, 459, 1348, 1349, - 1350, 1194, 1538, 1145, 206, 156, 1071, 14, 970, 1247, - 506, 663, 484, 1248, 156, 1083, 985, 640, 986, 987, - 641, 999, 98, 1001, 1249, 156, 459, 100, 1250, 101, - 1574, 495, 997, 1251, 1065, 186, 102, 1252, 187, 188, - 189, 190, 1544, 191, 192, 193, 1003, 1004, 1005, 1006, - 1007, 1253, 486, 103, 915, 1254, 1073, 549, 206, 156, - 496, 1135, 1456, 509, 153, 154, 155, 543, 104, 244, - 555, 245, 246, 247, 248, 1069, 1070, 1129, 1122, 606, - 282, 283, 284, 1551, 68, 69, 544, 70, 1072, 1169, - 1321, 1322, 28, 29, 30, 31, 32, 33, 34, 916, - 223, 642, 224, 225, 226, 1552, 596, 35, 510, 511, - 512, 285, 598, 1183, 1576, 1185, 607, 156, 3, 1120, - 1123, 609, 1140, 652, 1142, 750, 751, 752, 1118, 1588, - 1146, 653, 1138, 1119, 1208, 1209, 1210, 1211, 1212, 1141, - 459, 1335, 216, 1132, 513, 514, 515, 1585, 661, 884, - 255, 256, 257, 258, 250, 251, 459, 1545, 540, 1143, - 666, 1144, 539, 667, 762, 762, 206, 1406, 531, 1155, - 1164, 1205, 637, 646, 647, 648, 668, 884, 1196, 1197, - 669, 532, 1156, 1165, 673, 638, 676, 530, 1154, 1163, - 689, 541, 1158, 1167, 1017, 1018, 1193, 1088, 1089, 1090, - 1091, 625, 3, 156, 681, 692, 282, 283, 284, 649, - 650, 651, 693, 705, 706, 763, 763, 1310, 1311, 1312, - 1313, 716, 68, 69, 1235, 70, 717, 1314, 1315, 1316, - 586, 128, 587, 718, 720, 129, 130, 131, 132, 133, - 1203, 134, 135, 136, 137, 726, 138, 139, 194, 734, - 140, 141, 142, 143, 886, 887, 99, 144, 145, 735, - 753, 282, 283, 284, 195, 754, 146, 755, 147, 1149, - 1150, 1151, 1152, 286, 756, 287, 765, 759, 768, 288, - 289, 767, 769, 148, 149, 150, 770, 771, 776, 888, - 772, 773, 775, 11, 12, 13, 14, 1343, 1344, 1345, - 1346, 1307, 1308, 1220, 779, 28, 29, 30, 31, 32, - 33, 34, 780, 788, 1353, 789, 792, 195, 151, 3, - 35, 796, 798, 849, 1284, 1244, 1245, 1246, 850, 853, - 854, 855, 1255, 1256, 1257, 1258, 1259, 1260, 856, 1262, - 1263, 1265, 632, 1268, 1269, 1270, 1271, 1272, 1273, 1274, - 1261, 1276, 857, 1278, 1266, 1281, 858, 1285, 1523, 1526, - 9, 10, 1275, 882, 883, 68, 69, 1304, 70, 1325, - 896, 903, 1338, 897, 908, 898, 1328, 3, 1329, 902, - 14, 28, 29, 30, 31, 32, 33, 34, 924, 930, - 1153, 963, 612, 971, 613, 974, 35, 614, 615, 286, - 632, 287, 954, 964, 110, 288, 289, 958, 973, 975, - 976, 1318, 401, 988, 110, 110, 110, 110, 977, 989, - 282, 283, 284, 990, 110, 110, 110, 991, 889, 890, - 1330, 891, 992, 427, 993, 428, 429, 994, 1008, 1332, - 995, 1409, 430, 996, 1002, 1011, 1336, 1337, 1010, 1013, - 1012, 1016, 1061, 1077, 286, 633, 287, 1405, 1340, 1066, - 288, 289, 1078, 1079, 1130, 28, 29, 30, 31, 32, - 33, 34, 1139, 1124, 616, 1171, 1172, 1178, 1347, 1351, - 35, 1173, 1414, 1180, 1120, 1174, 1175, 1359, 1360, 1361, - 1362, 1363, 1364, 1419, 1366, 1399, 1176, 14, 1177, 1181, - 1182, 736, 1186, 1189, 1190, 1365, 1200, 634, 68, 69, - 635, 70, 1204, 633, 1401, 1191, 127, 1192, 1202, 128, - 1240, 1238, 157, 129, 130, 131, 132, 133, 1239, 134, - 135, 136, 137, 1206, 138, 139, 1207, 1291, 140, 141, - 142, 143, 1292, 1293, 99, 144, 145, 1294, 1295, 1296, - 474, 1298, 884, 1299, 146, 14, 147, 1300, 1301, 1305, - 1306, 1309, 1326, 1460, 1118, 634, 1327, 516, 635, 1119, - 1331, 148, 149, 150, 1418, 1333, 1334, 1339, 1084, 1086, - 1092, 1423, 28, 29, 30, 31, 32, 33, 34, 1094, - 1096, 636, 1098, 1100, 1118, 1120, 1102, 35, 1104, 1119, - 1354, 1106, 1108, 1110, 1356, 1395, 151, 1355, 282, 283, - 284, 1357, 1358, 286, 1367, 287, 1462, 1368, 1498, 686, - 289, 96, 467, 468, 97, 1370, 1369, 1502, 1371, 1373, - 1402, 1372, 1375, 884, 1374, 1376, 1378, 1377, 1379, 1381, - 28, 29, 30, 31, 32, 33, 34, 98, 99, 1168, - 1380, 1382, 100, 1383, 101, 35, 1384, 1385, 1387, 1389, - 1386, 102, 153, 154, 155, 1473, 1474, 1475, 68, 69, - 1388, 70, 1390, 1391, 1392, 1393, 127, 1394, 103, 128, - 1396, 1407, 1397, 129, 130, 131, 132, 133, 1408, 134, - 135, 136, 137, 104, 138, 139, 1411, 1412, 140, 141, - 142, 143, 1539, 1570, 99, 144, 145, 1413, 1416, 1420, - 1581, 1421, 1247, 1422, 146, 1249, 147, 1509, 1251, 1253, - 1425, 1424, 1515, 1509, 1518, 1426, 1522, 1428, 1515, 1509, - 1518, 148, 149, 150, 1427, 1432, 1433, 1430, 1583, 1525, - 1431, 28, 29, 30, 31, 32, 33, 34, 1434, 1435, - 1515, 1509, 1518, 1436, 1437, 1439, 35, 1438, 1440, 1441, - 1442, 1443, 1444, 1445, 1446, 1447, 151, 884, 282, 283, - 284, 28, 29, 30, 31, 32, 33, 34, 1448, 1449, - 1429, 3, 467, 468, 1452, 1450, 35, 1451, 1582, 1453, - 1454, 1455, 1457, 1542, 1458, 1468, 1464, 1467, 1478, 1479, - 1472, 469, 1480, 287, 1481, 880, 1496, 288, 289, 884, - 157, 1499, 1500, 1577, 1503, 1504, 1505, 68, 69, 1541, - 70, 1550, 153, 154, 155, 127, 1569, 1571, 128, 1572, - 1573, 1575, 129, 130, 131, 132, 133, 1578, 134, 135, - 136, 137, 1584, 138, 139, 1579, 1580, 140, 141, 142, - 143, 1587, 297, 99, 144, 145, 1589, 507, 160, 670, - 1324, 904, 925, 146, 1302, 147, 595, 1303, 162, 1187, - 1323, 545, 774, 197, 1236, 743, 68, 69, 1062, 70, - 148, 149, 150, 1128, 127, 881, 1188, 128, 1076, 1290, - 1471, 129, 130, 131, 132, 133, 1080, 134, 135, 136, - 137, 94, 138, 139, 900, 1297, 140, 141, 142, 143, - 678, 239, 99, 144, 145, 151, 711, 282, 283, 284, - 0, 0, 146, 0, 147, 929, 629, 0, 0, 0, - 0, 0, 0, 0, 0, 1159, 0, 1160, 1161, 148, - 149, 150, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 11, 12, 13, 14, - 0, 469, 0, 287, 0, 0, 0, 686, 289, 0, - 157, 153, 154, 155, 151, 152, 68, 69, 0, 70, - 0, 0, 0, 0, 127, 0, 0, 128, 0, 0, - 0, 129, 130, 131, 132, 133, 0, 134, 135, 136, - 137, 0, 138, 139, 0, 0, 140, 141, 142, 143, - 0, 0, 99, 144, 145, 0, 0, 0, 0, 0, - 0, 0, 146, 0, 147, 0, 0, 0, 0, 0, - 153, 154, 155, 0, 0, 0, 0, 0, 0, 148, - 149, 150, 0, 911, 28, 29, 30, 31, 32, 33, - 34, 0, 0, 1162, 0, 0, 0, 0, 0, 35, - 68, 69, 0, 70, 0, 0, 0, 14, 127, 0, - 0, 128, 0, 0, 151, 129, 130, 131, 132, 133, - 0, 134, 135, 136, 137, 0, 138, 139, 912, 0, - 140, 141, 142, 143, 0, 0, 99, 144, 145, 0, - 0, 0, 0, 0, 0, 0, 146, 0, 147, 0, - 469, 0, 287, 0, 0, 0, 288, 289, 0, 157, - 0, 0, 0, 148, 149, 150, 0, 0, 0, 0, - 153, 154, 155, 859, 860, 861, 0, 862, 863, 864, - 865, 0, 866, 867, 193, 0, 868, 869, 870, 871, - 0, 0, 0, 872, 873, 0, 0, 0, 151, 152, - 0, 0, 0, 0, 68, 69, 0, 70, 0, 156, - 0, 0, 127, 0, 0, 128, 0, 0, 157, 129, - 130, 131, 132, 133, 0, 134, 135, 136, 137, 0, - 138, 139, 0, 0, 140, 141, 142, 143, 0, 0, - 99, 144, 145, 0, 0, 0, 0, 0, 0, 0, - 146, 0, 147, 0, 153, 154, 155, 0, 0, 0, - 0, 874, 0, 0, 0, 0, 0, 148, 149, 150, - 0, 0, 0, 0, 68, 69, 0, 70, 0, 0, - 0, 0, 127, 0, 0, 128, 0, 0, 0, 129, - 130, 131, 132, 133, 0, 134, 135, 136, 137, 0, - 138, 139, 151, 0, 140, 141, 142, 143, 0, 210, - 99, 144, 145, 0, 933, 0, 0, 0, 157, 0, - 146, 934, 147, 935, 936, 937, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, 0, 148, 149, 150, + 87, 229, 181, 234, 224, 901, 294, 242, 243, 620, + 56, 621, 44, 213, 174, 134, 549, 125, 37, 551, + 1200, 58, 554, 184, 185, 186, 187, 61, 176, 1217, + 948, 136, 137, 39, 1293, 867, 1259, 178, 135, 310, + 231, 231, 690, 192, 179, 140, 141, 135, 193, 435, + 436, 793, 1214, 699, 700, 701, 392, 566, 514, 214, + 135, 1549, 861, 976, 231, -574, 232, 295, 135, 760, + 215, 231, 231, 1085, 1086, 977, 728, 135, 308, 1016, + 138, 139, 593, 135, 494, 593, 695, 135, 360, 336, + 135, 420, 422, 424, 426, 428, 135, 1550, 222, 752, + 249, 135, 594, 595, 135, 594, 595, 138, 139, 138, + 139, 135, 387, 138, 139, 135, 469, 138, 139, 593, + 572, 270, 490, 176, 254, 90, 265, 268, 269, 1257, + 135, 135, 178, 1513, 947, 1295, 67, 251, 252, 594, + 595, 482, 488, 1507, 283, 574, 1506, 231, 221, 593, + 138, 139, 201, 1499, 501, 202, 203, 204, 205, 1505, + 206, 207, 208, 1504, 1030, 213, 286, 289, -39, 594, + 595, 1296, 113, 208, 287, 1529, 498, 115, 1528, 116, + 275, 125, 290, 277, 1031, 135, 117, 91, 278, 279, + 335, 1527, 221, 973, 221, 1526, 514, 135, 280, 532, + 465, 201, 307, 118, 202, 203, 204, 205, 221, 206, + 207, 208, 706, 433, 672, 447, 448, 573, 119, 707, + 468, 1357, 1141, 1142, 88, 700, 711, 607, 287, 1358, + 535, 126, 601, 1152, 221, 567, 1147, 111, 712, 713, + 112, 221, 135, 221, 486, 568, 477, 478, 110, 221, + 492, 911, 588, 1163, 729, 128, 714, 473, 1603, 1364, + 1365, 1366, 474, 113, 114, 221, 130, 221, 115, 127, + 116, 698, 508, 863, 183, 809, 570, 117, 497, 499, + 521, 1195, 470, 503, 135, 471, 505, 506, 507, 475, + 1368, 509, 510, 221, 118, 495, 513, 610, 504, 606, + 188, 608, 189, 384, 1522, 194, 735, 715, 586, 119, + 221, 589, 516, 475, 1211, 599, 475, 1079, 1233, 864, + 994, -39, 56, 1080, 44, -39, 307, 298, 299, 300, + 37, 233, 800, 58, 797, 437, 794, 1215, 515, 61, + 221, 1161, 221, 1234, 624, 39, 438, 480, 555, 1551, + 548, 1235, 481, -574, 196, 1097, 546, 416, 221, 556, + 1098, 417, 1014, 609, 1524, 557, 679, 1540, 475, 1485, + 1537, 547, 1486, 1433, 1530, -557, 475, 1532, 1511, 198, + 1210, 419, 605, 1533, 562, 417, 563, 216, 1543, 421, + 564, 1544, 235, 417, 1535, -253, 1546, 217, 1545, 221, + 1565, 488, 1562, 596, 703, 704, 587, 125, 604, 590, + 591, 613, 176, 600, 1548, 1229, 911, 1563, 1564, 209, + 218, 178, 221, 592, 597, 219, 615, 616, 1523, 617, + 392, 618, 766, 767, 768, 210, 1230, 731, 765, 963, + 964, 965, 1231, 1085, 1086, 619, 673, 626, 627, 1232, + 634, 634, 654, 660, 738, 1258, 1143, 423, 749, 635, + 670, 417, 671, 236, 425, 298, 299, 300, 417, 758, + 237, 633, 633, 653, 659, 1335, 1336, 1333, 247, 716, + 468, 427, 125, 221, 210, 417, 387, 1472, 287, 696, + 717, 526, 527, 528, 763, 516, 301, 238, 774, 239, + 240, 241, 250, 486, 276, 1100, 949, 1517, 782, 1101, + 298, 299, 300, 950, 291, 951, 952, 953, 120, 1102, + 302, 297, 303, 1103, 778, 801, 304, 305, 529, 530, + 531, 292, 949, 1108, 293, 710, 1110, 1109, 741, 950, + 1111, 951, 952, 953, 1019, 1020, 1021, 1022, 1023, 1112, + 725, 726, 296, 1113, 954, 955, 956, 309, 1556, 1114, + 1116, 745, 747, 1115, 1117, 310, 298, 299, 300, 737, + 311, 138, 139, 593, 135, 743, 593, 312, 748, 213, + 954, 955, 956, 298, 299, 300, 575, 777, 576, 577, + 578, 392, 764, 594, 595, 1118, 594, 595, 337, 1119, + 773, 957, 958, 959, 415, 960, 1120, 1122, 961, 776, + 1121, 1123, 429, 933, 925, 926, 930, 432, 549, 939, + 434, 551, 779, 449, 554, 1337, 1338, 957, 958, 959, + 967, 960, 466, 1124, 961, 1419, 1420, 1125, 802, 803, + 271, 272, 273, 274, 806, 811, 807, 387, 1126, 1128, + 1130, 813, 1127, 1129, 1131, 472, 1431, 475, 302, 810, + 303, 902, 903, 1132, 304, 305, 521, 1133, 476, 988, + 479, 995, 981, 996, 997, 998, 999, 1000, 815, 1263, + 1265, 983, 489, 1264, 1266, 1267, 917, 900, 496, 1268, + 909, 1269, 910, 493, 908, 1270, 904, 275, 475, 1351, + 1280, 500, 1283, 302, 475, 303, 475, 1561, 1591, 304, + 305, 221, 1422, 502, 922, 915, 516, 511, 929, 512, + 891, 522, 934, 936, 938, 565, 975, 1212, 1213, 525, + 978, 1224, 1225, 1226, 1227, 1228, 559, 1477, 68, 69, + 560, 70, 135, 571, 612, 900, 1475, 1033, 1034, 614, + 966, 623, 969, 1479, 971, 990, 972, 622, 1087, 302, + 625, 303, 231, 962, 679, 304, 305, 1099, 668, 669, + 982, 677, 682, 1081, 684, 683, 302, 689, 303, 113, + 984, 985, 702, 305, 115, 685, 116, 71, 692, 1186, + 641, 697, 708, 117, 986, 705, 662, 663, 664, 709, + 721, 722, 1001, 72, 1002, 1003, 732, 1015, 733, 1017, + 118, 931, 73, 734, 1151, 736, 1145, 742, 1013, 1104, + 1105, 1106, 1107, 750, 751, 119, 74, 75, 76, 77, + 769, 771, 78, 665, 666, 667, 905, 906, 770, 907, + 1560, 1185, 1359, 1360, 1361, 1362, 772, 775, 784, 792, + 1134, 781, 783, 785, 786, 1135, 787, 932, 795, 79, + 80, 81, 82, 83, 84, 85, 1199, 788, 1201, 789, + 791, 1568, 812, 796, 1088, 804, 805, 808, 814, 865, + 866, 1567, 869, 1089, 870, 871, 872, 873, 874, 898, + 68, 69, 913, 70, 1138, 899, 912, 68, 69, 914, + 70, 918, 919, 778, 778, 940, 1139, 946, 924, 970, + 1158, 979, 974, 1593, 1602, 1156, 1162, 980, 1154, 987, + 991, 989, 992, 1221, 1136, 1157, 114, 993, 1605, 1148, + 1004, 417, 1005, 1006, 1009, 900, 1007, 1008, 1010, 71, + 1011, 1026, 1012, 1018, 1024, 1159, 71, 1160, 1027, 1028, + 555, 1032, 548, 1172, 1181, 72, 654, 1077, 546, 1170, + 1179, 556, 72, 900, 73, 1029, 1082, 557, 1174, 1183, + 1093, 73, 1094, 547, 1171, 1180, 1251, 653, 74, 75, + 76, 77, 1209, 1095, 78, 74, 75, 76, 77, 1140, + 1146, 78, 1155, 1326, 1327, 1328, 1329, 1190, 1187, 949, + 1188, 779, 779, 1330, 1331, 1332, 950, 1191, 951, 952, + 953, 79, 80, 81, 82, 83, 84, 85, 79, 80, + 81, 82, 83, 84, 85, 1192, 1219, 28, 29, 30, + 31, 32, 33, 34, 1189, 1193, 1369, 1194, 1196, 752, + 1197, 288, 35, 1198, 1202, 1205, 1206, 954, 955, 956, + 1207, 1216, 1208, 1218, 1323, 1324, 875, 876, 877, 1134, + 878, 879, 880, 881, 1135, 882, 883, 208, 1220, 884, + 885, 886, 887, 1222, 1307, 1254, 888, 889, 1223, 1255, + 1134, 1256, 1308, 1309, 1310, 1135, 1539, 1542, 490, 1236, + 1314, 1311, 1312, 1317, 957, 958, 959, 1315, 960, 1316, + 1325, 961, 1321, 284, 1342, 1322, 1343, 532, 1341, 1347, + 1350, 1260, 1261, 1262, 1349, 1344, 1100, 1345, 1271, 1272, + 1273, 1274, 1275, 1276, 1354, 1278, 1279, 1281, 1355, 1284, + 1285, 1286, 1287, 1288, 1289, 1290, 1277, 1292, 1102, 1294, + 1282, 1297, 1108, 1301, 1110, 890, 1112, 1114, 1291, 1116, + 1118, 1300, 1120, 1320, 1122, 1124, 1126, 1370, 125, 1371, + 1411, 1373, 3, 1429, 1372, 1383, 1385, 1374, 125, 125, + 125, 125, 1384, 1386, 1388, 1387, 1389, 1390, 125, 125, + 125, 1393, 1396, 1391, 1392, 1394, 1395, 1397, 2, 1424, + 285, 744, 1399, 1398, 1400, 1401, 1480, 1334, 1402, 1403, + 1405, 1432, 1404, 1407, 1408, 1406, 3, 1410, 1412, 1409, + 1421, 1423, 1418, 1413, 1425, 1427, 1346, 1436, 1428, 1263, + 1437, 1265, 1267, 1438, 1440, 1348, 1269, 1446, 640, 1441, + 1444, 1447, 1352, 1353, 1442, 1430, 28, 29, 30, 31, + 32, 33, 34, 1443, 1356, 1435, 1448, 1449, 1450, 1451, + 1470, 35, 1452, 1453, 1454, 1455, 1426, 28, 29, 30, + 31, 32, 33, 34, 1363, 1367, 1445, 1456, 1457, 1458, + 1471, 1459, 35, 1375, 1376, 1377, 1378, 1379, 1380, 1488, + 1382, 1460, 1461, 1462, 1463, 1464, 1465, 68, 69, 1136, + 70, 1381, 1466, 1467, 1469, 142, 1468, 1473, 143, 1474, + 1417, 1484, 144, 145, 146, 147, 148, 1494, 149, 150, + 151, 152, 1483, 153, 154, 1495, 1476, 155, 156, 157, + 158, 1496, 1415, 114, 159, 160, 1497, 896, 1512, 1515, + 1547, 1516, 1557, 161, 1519, 162, 71, 1520, 900, 1558, + 1521, 14, 3, 4, 5, 6, 7, 8, 1566, 1586, + 163, 164, 165, 641, 1588, 629, 642, 1589, 630, 631, + 1434, 73, 1590, 1592, 1595, 9, 10, 1439, 1596, 1597, + 1601, 1514, 1604, 313, 1606, 74, 75, 76, 77, 1518, + 175, 78, 11, 12, 13, 14, 166, 167, 523, 15, + 16, 611, 1340, 686, 920, 17, 941, 1318, 18, 177, + 1136, 1319, 1478, 1203, 759, 19, 20, 1339, 79, 80, + 81, 82, 83, 84, 85, 561, 212, 1252, 1078, 900, + 897, 790, 1144, 1204, 1306, 1487, 28, 29, 30, 31, + 32, 33, 34, 916, 1096, 643, 1092, 109, 1313, 694, + 255, 35, 168, 169, 170, 945, 645, 0, 727, 0, + 0, 1489, 1490, 1491, 0, 1587, 0, 0, 0, 0, + 111, 21, 22, 112, 23, 24, 25, 0, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 0, 3, 14, + 0, 0, 0, 0, 1598, 35, 113, 114, 0, 0, + 0, 115, 1600, 116, 1165, 1166, 1167, 1168, 0, 0, + 117, 0, 0, 1525, 0, 0, 0, 0, 1531, 1525, + 1534, 0, 1538, 0, 1531, 1525, 1534, 118, 11, 12, + 13, 14, 0, 0, 0, 1541, 0, 0, 0, 1555, + 0, 0, 119, 0, 0, 0, 1531, 1525, 1534, 0, + 68, 69, 0, 70, 0, 0, 0, 0, 142, 0, + 0, 143, 0, 900, 0, 144, 145, 146, 147, 148, + 0, 149, 150, 151, 152, 0, 153, 154, 0, 0, + 155, 156, 157, 158, 0, 1599, 114, 159, 160, 0, + 0, 171, 0, 0, 0, 0, 161, 0, 162, 71, + 172, 0, 0, 0, 0, 0, 900, 0, 0, 0, + 1594, 0, 0, 163, 164, 165, 28, 29, 30, 31, + 32, 33, 34, 0, 73, 1169, 0, 0, 0, 0, + 0, 35, 0, 0, 0, 0, 0, 0, 74, 75, + 76, 77, 0, 0, 78, 0, 0, 9, 10, 166, + 0, 298, 299, 300, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 483, 484, 14, 0, 0, + 0, 79, 80, 81, 82, 83, 84, 85, 0, 628, + 0, 629, 0, 648, 630, 631, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, 0, 0, 153, 154, - 155, 0, 938, 939, 940, 0, 0, 0, 68, 69, - 0, 70, 553, 0, 0, 0, 127, 0, 0, 128, - 0, 0, 0, 129, 130, 131, 132, 133, 0, 134, - 135, 136, 137, 210, 138, 139, 0, 0, 140, 141, - 142, 143, 157, 0, 99, 144, 145, 0, 941, 942, - 943, 0, 944, 0, 662, 945, 147, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 153, 154, - 155, 148, 149, 150, 0, 0, 0, 68, 69, 0, - 70, 0, 0, 0, 0, 127, 1531, 0, 128, 0, - 0, 0, 129, 130, 131, 132, 133, 0, 134, 135, - 136, 137, 0, 138, 139, 0, 151, 140, 141, 142, - 143, 0, 0, 99, 144, 145, 0, 0, 0, 0, - 0, 0, 0, 146, 0, 147, 4, 5, 6, 7, - 8, 0, 0, 0, 0, 0, 0, 210, 0, 0, - 148, 149, 150, 0, 0, 933, 157, 0, 9, 10, - 0, 0, 934, 0, 935, 936, 937, 0, 0, 0, - 0, 0, 153, 154, 155, 11, 12, 13, 14, 0, - 0, 0, 15, 16, 0, 714, 0, 0, 17, 0, - 0, 18, 0, 0, 68, 0, 0, 70, 19, 20, - 0, 0, 0, 938, 939, 940, 0, 3, 216, 0, - 0, 0, 0, 0, 0, 0, 0, 210, 0, 0, - 946, 0, 0, 1221, 1222, 1223, 157, 1224, 1225, 1226, - 1227, 0, 1228, 1229, 193, 0, 1230, 1231, 1232, 1233, - 0, 153, 154, 155, 0, 1234, 0, 0, 0, 941, - 942, 943, 0, 944, 21, 22, 945, 23, 24, 25, - 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 0, 0, 0, 0, 265, 128, 266, 0, 35, 129, - 130, 131, 132, 133, 0, 134, 135, 136, 137, 0, - 138, 139, 0, 0, 140, 141, 142, 143, 0, 0, - 0, 144, 145, 0, 0, 0, 0, 0, 0, 0, - 146, 210, 147, 0, 0, 0, 0, 0, 0, 0, - 157, 0, 0, 0, 0, 0, 0, 148, 149, 150, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, - 355, 356, 357, 0, 0, 0, 1553, 8, 0, 0, - 0, 358, 359, 360, 361, 362, 363, 68, 0, 0, - 70, 1554, 151, 0, 933, 9, 10, 0, 0, 0, - 3, 934, 0, 935, 936, 937, 0, 1555, 0, 0, - 210, 0, 11, 12, 13, 14, 1556, 0, 0, 157, - 0, 0, 0, 364, 0, 0, 0, 0, 0, 0, - 1557, 1558, 1559, 1560, 0, 0, 0, 365, 0, 0, - 0, 1170, 938, 939, 940, 78, 79, 80, 81, 82, - 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, - 0, 0, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 0, - 0, 68, 366, 367, 70, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 3, 0, 0, 0, 941, 942, - 943, 0, 944, 0, 0, 945, 0, 0, 0, 0, - 28, 29, 30, 31, 32, 33, 34, 0, 368, 369, - 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, - 0, 0, 0, 345, 346, 347, 348, 349, 350, 351, - 352, 353, 354, 355, 356, 357, 0, 0, 0, 0, - 8, 0, 0, 0, 358, 359, 360, 361, 362, 363, - 0, 0, 0, 0, 0, 0, 0, 0, 9, 10, + 68, 69, 0, 70, 0, 168, 169, 170, 142, 0, + 0, 143, 0, 0, 0, 144, 145, 146, 147, 148, + 0, 149, 150, 151, 152, 0, 153, 154, 0, 0, + 155, 156, 157, 158, 0, 0, 114, 159, 160, 0, + 0, 0, 0, 0, 0, 0, 161, 0, 162, 71, + 0, 0, 28, 29, 30, 31, 32, 33, 34, 0, + 0, 632, 0, 163, 164, 165, 0, 35, 0, 0, + 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 74, 75, + 76, 77, 0, 0, 78, 0, 0, 649, 0, 166, + 0, 298, 299, 300, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 483, 484, 0, 0, 0, + 0, 79, 80, 81, 82, 83, 84, 85, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, + 0, 0, 0, 0, 485, 0, 303, 0, 0, 650, + 304, 305, 651, 172, 0, 168, 169, 170, 68, 69, + 0, 70, 0, 0, 0, 0, 142, 0, 0, 143, + 0, 0, 0, 144, 145, 146, 147, 148, 0, 149, + 150, 151, 152, 0, 153, 154, 0, 0, 155, 156, + 157, 158, 0, 0, 114, 159, 160, 0, 0, 0, + 0, 0, 0, 0, 161, 0, 162, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 11, 12, 13, 14, 0, - 0, 0, 0, 0, 0, 0, 364, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 157, 0, 0, 0, - 365, 0, 0, 0, 0, 0, 0, 345, 346, 347, - 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, - 0, 0, 0, 0, 8, 0, 1568, 0, 358, 359, - 360, 361, 362, 363, 0, 366, 367, 0, 0, 0, - 0, 0, 9, 10, 0, 0, 0, 0, 0, 0, - 1410, 0, 0, 0, 0, 0, 0, 0, 0, 11, - 12, 13, 14, 28, 29, 30, 31, 32, 33, 34, - 364, 368, 741, 0, 0, 0, 0, 0, 35, 0, - 0, 128, 0, 0, 365, 129, 130, 131, 132, 133, - 0, 134, 135, 136, 137, 0, 138, 139, 0, 0, - 140, 141, 142, 143, 434, 0, 0, 144, 145, 0, - 0, 0, 0, 0, 0, 0, 146, 0, 147, 366, - 367, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 148, 149, 150, 0, 435, 0, 436, - 437, 438, 439, 0, 0, 0, 0, 28, 29, 30, - 31, 32, 33, 34, 0, 368, 928, 0, 0, 0, - 0, 0, 35, 0, 0, 0, 0, 0, 151, 0, - 0, 0, 0, 0, 0, 0, 440, 441, 442, 443, - 0, 0, 444, 0, 0, 128, 445, 446, 447, 129, - 130, 131, 132, 133, 0, 134, 135, 136, 137, 0, - 138, 139, 0, 0, 140, 141, 142, 143, 0, 0, - 0, 144, 145, 0, 0, 0, 0, 0, 0, 0, - 146, 0, 147, 0, 0, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, 0, 148, 149, 150, - 128, 0, 0, 0, 129, 130, 131, 132, 133, 0, - 134, 135, 136, 137, 0, 138, 139, 0, 0, 140, - 141, 142, 143, 0, 0, 0, 144, 145, 0, 0, - 0, 0, 151, 0, 0, 146, 0, 147, 0, 0, + 0, 163, 164, 165, 28, 29, 30, 31, 32, 33, + 34, 0, 73, 652, 0, 0, 0, 0, 0, 35, + 0, 0, 0, 0, 0, 0, 74, 75, 76, 77, + 0, 0, 78, 0, 0, 0, 0, 166, 0, 298, + 299, 300, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, + 80, 81, 82, 83, 84, 85, 0, 0, 0, 0, + 0, 0, 0, 0, 485, 0, 303, 0, 0, 0, + 702, 305, 0, 172, 0, 0, 0, 0, 68, 69, + 0, 70, 0, 168, 169, 170, 142, 0, 0, 143, + 0, 0, 0, 144, 145, 146, 147, 148, 0, 149, + 150, 151, 152, 0, 153, 154, 0, 0, 155, 156, + 157, 158, 0, 0, 114, 159, 160, 0, 0, 1569, + 0, 0, 0, 0, 161, 0, 162, 71, 0, 0, + 0, 0, 0, 0, 1570, 0, 0, 0, 0, 0, + 0, 163, 164, 165, 0, 927, 0, 0, 0, 0, + 1571, 0, 73, 0, 0, 0, 0, 0, 0, 1572, + 0, 0, 0, 0, 0, 0, 74, 75, 76, 77, + 0, 0, 78, 1573, 1574, 1575, 1576, 166, 0, 1577, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 928, 0, 1175, 0, 1176, 1177, 0, 0, 79, + 80, 81, 82, 83, 84, 85, 1578, 1579, 1580, 1581, + 1582, 1583, 1584, 0, 11, 12, 13, 14, 0, 0, + 0, 0, 485, 0, 303, 0, 0, 0, 304, 305, + 0, 172, 0, 168, 169, 170, 68, 69, 0, 70, + 0, 0, 0, 0, 142, 3, 0, 143, 0, 0, + 0, 144, 145, 146, 147, 148, 0, 149, 150, 151, + 152, 0, 153, 154, 0, 0, 155, 156, 157, 158, + 0, 0, 114, 159, 160, 0, 0, 0, 0, 0, + 0, 0, 161, 0, 162, 71, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 163, + 164, 165, 28, 29, 30, 31, 32, 33, 34, 0, + 73, 1178, 0, 0, 0, 0, 0, 35, 0, 0, + 0, 0, 0, 0, 74, 75, 76, 77, 0, 0, + 78, 0, 0, 0, 0, 166, 167, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, + 106, 107, 0, 0, 0, 0, 0, 79, 80, 81, + 82, 83, 84, 85, 0, 0, 0, 0, 0, 0, + 1585, 68, 225, 0, 70, 135, 0, 0, 0, 68, + 69, 172, 70, 0, 0, 0, 0, 142, 0, 0, + 143, 168, 169, 170, 144, 145, 146, 147, 148, 0, + 149, 150, 151, 152, 0, 153, 154, 0, 0, 155, + 156, 157, 158, 0, 0, 114, 159, 160, 0, 0, + 71, 0, 0, 0, 14, 161, 0, 162, 71, 0, + 0, 0, 0, 0, 656, 0, 72, 657, 0, 0, + 0, 0, 163, 164, 165, 73, 0, 0, 0, 0, + 0, 0, 0, 73, 0, 0, 0, 0, 0, 74, + 75, 76, 77, 0, 0, 78, 0, 74, 75, 76, + 77, 0, 0, 78, 0, 0, 0, 0, 166, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 79, 80, 81, 82, 83, 84, 85, 0, + 79, 80, 81, 82, 83, 84, 85, 0, 0, 28, + 29, 30, 31, 32, 33, 34, 68, 69, 658, 70, + 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, + 225, 0, 0, 0, 168, 169, 170, 68, 69, 172, + 70, 0, 0, 0, 0, 142, 0, 0, 143, 0, + 0, 0, 144, 145, 146, 147, 148, 0, 149, 150, + 151, 152, 0, 153, 154, 71, 0, 155, 156, 157, + 158, 0, 0, 114, 159, 160, 0, 0, 0, 0, + 0, 72, 0, 161, 0, 162, 71, 0, 0, 0, + 73, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 163, 164, 165, 0, 74, 75, 76, 77, 0, 0, + 78, 73, 0, 0, 0, 0, 0, 298, 299, 300, + 0, 0, 0, 0, 0, 74, 75, 76, 77, 0, + 0, 78, 0, 0, 0, 0, 569, 79, 80, 81, + 82, 83, 84, 85, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 384, 0, 79, 80, + 81, 82, 83, 84, 85, 0, 0, 0, 0, 0, + 0, 0, 68, 225, 0, 70, 135, 0, 0, 0, + 68, 69, 172, 70, 0, 0, 0, 0, 142, 0, + 0, 143, 168, 169, 170, 144, 145, 146, 147, 148, + 0, 149, 150, 151, 152, 0, 153, 154, 0, 0, + 155, 156, 157, 158, 0, 0, 114, 159, 160, 0, + 0, 71, 0, 0, 0, 0, 678, 0, 162, 71, + 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, + 0, 0, 0, 163, 164, 165, 73, 0, 0, 0, + 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, + 74, 75, 76, 77, 0, 0, 78, 0, 74, 75, + 76, 77, 0, 0, 78, 0, 0, 0, 0, 166, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 79, 80, 81, 82, 83, 84, 85, + 0, 79, 80, 81, 82, 83, 84, 85, 0, 3, + 302, 0, 303, 0, 0, 0, 304, 305, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 225, 648, 0, 0, 168, 169, 170, 68, 69, + 172, 70, 0, 0, 0, 0, 142, 0, 0, 143, + 0, 0, 0, 144, 145, 146, 147, 148, 0, 149, + 150, 151, 152, 0, 153, 154, 0, 0, 155, 156, + 157, 158, 0, 0, 114, 159, 160, 0, 0, 0, + 0, 0, 0, 0, 161, 0, 162, 71, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 163, 164, 165, 0, 0, 0, 0, 0, 0, + 0, 0, 73, 0, 0, 0, 0, 68, 69, 0, + 70, 0, 0, 0, 0, 0, 74, 75, 76, 77, + 0, 0, 78, 0, 0, 68, 69, 730, 70, 135, + 0, 0, 0, 0, 0, 0, 649, 0, 0, 0, + 514, 0, 0, 114, 0, 0, 0, 0, 0, 79, + 80, 81, 82, 83, 84, 85, 71, 0, 0, 0, + 0, 114, 0, 0, 225, 0, 0, 0, 0, 0, + 0, 0, 72, 172, 71, 0, 0, 0, 14, 0, + 0, 73, 0, 168, 169, 170, 0, 0, 650, 0, + 72, 651, 0, 0, 0, 74, 75, 76, 77, 73, + 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 74, 75, 76, 77, 0, 0, 78, + 0, 0, 0, 0, 0, 0, 0, 0, 79, 80, + 81, 82, 83, 84, 85, 68, 69, 231, 70, 135, + 0, 0, 0, 142, 0, 0, 79, 80, 81, 82, + 83, 84, 85, 68, 69, 0, 70, 0, 1084, 0, + 0, 142, 0, 28, 29, 30, 31, 32, 33, 34, + 0, 114, 1184, 0, 0, 0, 0, 0, 35, 0, + 0, 0, 0, 0, 71, 0, 0, 0, 0, 114, + 0, 0, 1552, 0, 0, 1085, 1086, 0, 0, 1553, + 72, 0, 71, 0, 0, 0, 0, 0, 0, 73, + 0, 0, 0, 0, 0, 0, 0, 0, 72, 0, + 467, 0, 225, 74, 75, 76, 77, 73, 0, 78, + 0, 172, 0, 0, 0, 0, 0, 0, 467, 0, + 0, 74, 75, 76, 77, 0, 0, 78, 0, 0, + 0, 0, 0, 0, 0, 0, 79, 80, 81, 82, + 83, 84, 85, 0, 0, 68, 69, 0, 70, 0, + 0, 0, 0, 142, 79, 80, 81, 82, 83, 84, + 85, 0, 0, 0, 0, 0, 68, 69, 0, 70, + 168, 169, 170, 0, 142, 0, 0, 0, 0, 0, + 0, 114, 0, 0, 1298, 0, 0, 285, 168, 169, + 170, 1299, 0, 0, 71, 0, 0, 0, 0, 0, + 515, 0, 114, 0, 0, 285, 0, 0, 0, 0, + 72, 0, 1414, 0, 0, 71, 0, 0, 0, 73, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 72, 0, 74, 75, 76, 77, 0, 0, 78, + 73, 0, 0, 0, 0, 68, 69, 0, 70, 0, + 0, 0, 0, 142, 74, 75, 76, 77, 0, 0, + 78, 0, 0, 0, 0, 0, 79, 80, 81, 82, + 83, 84, 85, 0, 0, 0, 0, 0, 0, 0, + 0, 114, 0, 0, 0, 0, 0, 79, 80, 81, + 82, 83, 84, 85, 71, 0, 0, 0, 0, 0, + 168, 169, 170, 0, 0, 0, 0, 0, 0, 171, + 72, 0, 0, 0, 0, 0, 0, 0, 0, 73, + 0, 168, 169, 170, 1554, 0, 0, 171, 0, 68, + 69, 0, 868, 74, 75, 76, 77, 142, 0, 78, + 68, 69, 0, 70, 0, 0, 0, 68, 69, 0, + 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 114, 79, 80, 81, 82, + 83, 84, 85, 0, 0, 0, 0, 0, 71, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 71, + 0, 0, 0, 0, 72, 0, 71, 0, 0, 0, + 168, 169, 170, 73, 0, 72, 0, 0, 0, 0, + 0, 0, 72, 0, 73, 0, 0, 74, 75, 76, + 77, 73, 0, 78, 0, 0, 0, 0, 74, 75, + 76, 77, 0, 0, 78, 74, 75, 76, 77, 171, + 0, 78, 68, 69, 0, 70, 0, 0, 0, 0, + 79, 80, 81, 82, 83, 84, 85, 0, 0, 0, + 171, 79, 80, 81, 82, 83, 84, 85, 79, 80, + 81, 82, 83, 84, 85, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 168, 169, 170, 0, 0, 0, + 0, 71, 0, 0, 0, 0, 0, 0, 231, 0, + 0, 0, 0, 0, 0, 68, 69, 72, 70, 0, + 0, 0, 0, 1237, 1238, 1239, 73, 1240, 1241, 1242, + 1243, 0, 1244, 1245, 208, 0, 1246, 1247, 1248, 1249, + 74, 75, 76, 77, 0, 1250, 78, 0, 0, 171, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, + 0, 0, 0, 79, 80, 81, 82, 83, 84, 85, + 72, 0, 0, 0, 0, 0, 0, 0, 0, 73, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 74, 75, 76, 77, 0, 0, 78, + 875, 876, 877, 0, 878, 879, 880, 881, 0, 882, + 883, 208, 0, 884, 885, 886, 887, 0, 0, 0, + 888, 889, 0, 171, 0, 0, 79, 80, 81, 82, + 83, 84, 85, 0, 746, 0, 0, 602, 143, 603, + 0, 935, 144, 145, 146, 147, 148, 0, 149, 150, + 151, 152, 0, 153, 154, 0, 0, 155, 156, 157, + 158, 0, 0, 114, 159, 160, 0, 0, 68, 0, + 0, 70, 0, 161, 0, 162, 0, 0, 0, 0, + 0, 3, 0, 0, 0, 0, 0, 0, 0, 890, + 163, 164, 248, 281, 143, 282, 0, 0, 144, 145, + 146, 147, 148, 0, 149, 150, 151, 152, 0, 153, + 154, 0, 0, 155, 156, 157, 158, 71, 0, 0, + 159, 160, 0, 0, 0, 0, 166, 0, 0, 161, + 0, 162, 0, 72, 0, 0, 937, 0, 0, 0, + 0, 0, 73, 0, 0, 0, 163, 164, 248, 0, + 0, 0, 0, 0, 0, 0, 74, 75, 76, 77, + 0, 0, 78, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 166, 0, 0, 0, 0, 0, 0, 79, + 80, 81, 82, 83, 84, 85, 0, 0, 0, 1416, + 0, 0, 0, 0, 0, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, 371, 372, 373, 0, 0, + 0, 0, 8, 0, 0, 0, 374, 375, 376, 377, + 378, 379, 0, 0, 0, 0, 0, 0, 0, 0, + 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, + 68, 0, 0, 70, 0, 0, 0, 11, 12, 13, + 14, 0, 0, 3, 0, 0, 0, 0, 380, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 448, 128, 148, 149, 150, 129, 130, 131, 132, 133, - 0, 134, 135, 136, 137, 0, 138, 139, 0, 0, - 140, 141, 142, 143, 0, 0, 0, 144, 145, 0, - 0, 0, 0, 0, 0, 0, 146, 151, 147, 0, + 0, 0, 381, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 148, 149, 150, 0, 0, 1019, 1020, - 0, 1021, 1022, 1023, 1024, 1025, 1026, 0, 1027, 1028, - 0, 1029, 1030, 1031, 1032, 1033, 4, 5, 6, 7, - 8, 3, 157, 0, 0, 0, 0, 0, 151, 0, + 0, 0, 0, 0, 0, 72, 0, 382, 383, 0, + 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 74, 75, + 76, 77, 0, 0, 78, 28, 29, 30, 31, 32, + 33, 34, 0, 384, 385, 0, 0, 0, 0, 0, + 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 79, 80, 81, 82, 83, 84, 85, 0, 0, + 0, 0, 0, 0, 0, 0, 172, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, + 0, 0, 0, 0, 8, 0, 0, 0, 374, 375, + 376, 377, 378, 379, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 10, 68, 0, 0, 70, 0, 0, + 0, 68, 69, 0, 70, 0, 0, 3, 0, 11, + 12, 13, 14, 0, 0, 0, 0, 0, 0, 0, + 380, 0, 0, 0, 0, 143, 0, 0, 0, 144, + 145, 146, 147, 148, 381, 149, 150, 151, 152, 0, + 153, 154, 0, 71, 155, 156, 157, 158, 0, 0, + 71, 159, 160, 0, 0, 0, 0, 0, 0, 72, + 161, 0, 162, 0, 0, 0, 72, 0, 73, 382, + 383, 0, 0, 0, 0, 73, 0, 163, 164, 248, + 0, 0, 74, 75, 76, 77, 0, 0, 78, 74, + 75, 76, 77, 0, 0, 78, 0, 28, 29, 30, + 31, 32, 33, 34, 0, 384, 757, 0, 0, 0, + 0, 0, 35, 166, 0, 79, 80, 81, 82, 83, + 84, 85, 79, 80, 81, 82, 83, 84, 85, 0, + 0, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 371, 372, 373, 0, 0, 0, 0, 8, 0, + 0, 0, 374, 375, 376, 377, 378, 379, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 10, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 11, 12, 13, 14, 0, 0, 0, + 0, 0, 0, 0, 380, 0, 0, 0, 0, 143, + 0, 0, 0, 144, 145, 146, 147, 148, 381, 149, + 150, 151, 152, 0, 153, 154, 0, 0, 155, 156, + 157, 158, 450, 0, 0, 159, 160, 0, 0, 0, + 0, 0, 0, 0, 161, 0, 162, 0, 0, 0, + 0, 0, 0, 382, 383, 0, 0, 0, 0, 0, + 0, 163, 164, 248, 0, 451, 0, 452, 453, 454, + 455, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 28, 29, 30, 31, 32, 33, 34, 0, 384, + 944, 0, 0, 0, 0, 0, 35, 166, 0, 0, + 0, 921, 0, 0, 0, 456, 457, 458, 459, 0, + 0, 460, 0, 0, 0, 461, 462, 463, 740, 3, + 0, 0, 0, 0, 143, 0, 0, 172, 144, 145, + 146, 147, 148, 0, 149, 150, 151, 152, 0, 153, + 154, 0, 0, 155, 156, 157, 158, 0, 0, 0, + 159, 160, 0, 0, 0, 0, 0, 0, 0, 161, + 0, 162, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 163, 164, 248, 143, + 0, 0, 0, 144, 145, 146, 147, 148, 0, 149, + 150, 151, 152, 0, 153, 154, 0, 0, 155, 156, + 157, 158, 0, 0, 0, 159, 160, 0, 0, 0, + 0, 0, 166, 0, 161, 0, 162, 0, 0, 464, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 163, 164, 248, 0, 0, 1035, 1036, 0, 1037, + 1038, 1039, 1040, 1041, 1042, 0, 1043, 1044, 0, 1045, + 1046, 1047, 1048, 1049, 0, 0, 4, 5, 6, 7, + 8, 0, 0, 0, 0, 0, 0, 166, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 12, 13, 14, 0, - 0, 0, 15, 16, 0, 0, 0, 0, 17, 0, - 520, 18, 0, 0, 0, 0, 0, 0, 19, 20, - 859, 860, 861, 0, 862, 863, 864, 865, 0, 866, - 867, 193, 0, 868, 869, 870, 871, 0, 0, 0, - 872, 873, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 724, 0, 0, - 0, 0, 0, 0, 0, 0, 157, 0, 0, 0, - 0, 0, 0, 0, 21, 22, 0, 23, 24, 25, - 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 0, 0, 508, 0, 0, 0, 3, 521, 35, 6, - 7, 8, 0, 0, 0, 0, 0, 0, 874, 0, - 0, 522, 880, 0, 0, 0, 523, 0, 0, 9, - 10, 157, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 11, 12, 13, 14, - 0, 524, 525, 0, 0, 520, 1034, 1035, 0, 1036, - 1037, 1038, 0, 1039, 1040, 0, 0, 1041, 1042, 0, - 1043, 526, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 157, 1044, 1045, 1046, 1047, 1048, 1049, 1050, - 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 624, 0, - 0, 0, 128, 0, 0, 0, 527, 528, 131, 132, - 133, 0, 134, 135, 136, 137, 0, 138, 139, 0, - 0, 140, 141, 142, 143, 0, 0, 0, 1286, 145, - 0, 1059, 0, 0, 28, 29, 30, 31, 32, 33, - 34, 0, 521, 529, 6, 7, 8, 0, 0, 35, - 0, 0, 0, 0, 0, 0, 522, 0, 0, 0, - 0, 523, 0, 0, 9, 10, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1287, 0, 0, - 0, 11, 12, 13, 14, 0, 524, 525, 0, 28, - 29, 30, 31, 32, 33, 34, 1288, 0, 0, 0, - 0, 0, 0, 0, 35, 0, 526, 0, 0, 0, - 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 625, 0, 613, 626, 0, 614, 615, 0, + 0, 172, 15, 16, 0, 0, 68, 69, 17, 70, + 0, 18, 0, 0, 0, 0, 0, 0, 19, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 527, 528, 800, 0, 0, 0, 0, 801, 802, - 0, 803, 804, 805, 806, 807, 808, 0, 809, 810, - 0, 811, 812, 813, 814, 815, 0, 0, 0, 28, - 29, 30, 31, 32, 33, 34, 0, 0, 1148, 0, - 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 28, 29, 30, 31, 32, - 33, 34, 0, 0, 627, 0, 0, 816, 0, 817, - 35, 0, 0, 800, 818, 0, 0, 0, 801, 802, - 0, 803, 804, 805, 806, 807, 808, 0, 809, 810, - 819, 811, 812, 813, 814, 815, 68, 69, 0, 70, - 859, 860, 861, 0, 862, 863, 864, 865, 0, 866, - 867, 193, 0, 868, 869, 870, 871, 0, 0, 0, - 872, 873, 0, 820, 0, 0, 0, 0, 0, 0, - 298, 0, 0, 0, 0, 0, 0, 816, 0, 817, - 0, 0, 0, 0, 818, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 299, - 819, 0, 0, 300, 0, 0, 301, 302, 0, 0, - 0, 303, 304, 305, 306, 307, 308, 309, 310, 311, - 312, 313, 314, 0, 0, 0, 0, 0, 874, 315, - 0, 0, 316, 820, 0, 0, 0, 0, 0, 317, - 0, 0, 0, 0, 0, 0, 0, 0, 318, 0, - 0, 0, 0, 821, 0, 822, 823, 824, 825, 826, - 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, - 837, 838, 0, 0, 0, 839, 0, 0, 0, 0, - 0, 0, 0, 0, 840, 0, 0, 0, 0, 0, + 0, 875, 876, 877, 0, 878, 879, 880, 881, 0, + 882, 883, 208, 0, 884, 885, 886, 887, 0, 0, + 3, 888, 889, 0, 0, 71, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 72, 0, 68, 21, 22, 70, 23, 24, 25, + 73, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 0, 0, 524, 0, 74, 75, 76, 77, 35, 536, + 78, 0, 3, 0, 0, 0, 0, 896, 0, 0, + 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, + 890, 0, 71, 0, 0, 0, 0, 79, 80, 81, + 82, 83, 84, 85, 0, 0, 0, 0, 72, 0, + 0, 0, 0, 0, 0, 1050, 1051, 73, 1052, 1053, + 1054, 536, 1055, 1056, 0, 0, 1057, 1058, 0, 1059, + 0, 74, 75, 76, 77, 0, 0, 78, 0, 0, + 0, 172, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, + 1068, 1069, 1070, 1071, 1072, 1073, 1074, 537, 0, 6, + 7, 8, 0, 0, 79, 80, 81, 82, 83, 84, + 85, 538, 0, 0, 0, 0, 539, 0, 0, 9, + 10, 0, 0, 0, 0, 0, 0, 133, 0, 0, + 1075, 0, 0, 0, 0, 0, 11, 12, 13, 14, + 0, 540, 541, 0, 0, 0, 0, 0, 0, 537, + 0, 6, 7, 8, 0, 0, 0, 0, 0, 0, + 0, 542, 0, 538, 0, 0, 0, 0, 539, 0, + 0, 9, 10, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 11, 12, + 13, 14, 0, 540, 541, 0, 543, 544, 0, 0, + 0, 28, 29, 30, 31, 32, 33, 34, 0, 0, + 0, 0, 0, 542, 0, 0, 35, 0, 0, 0, + 0, 0, 0, 0, 28, 29, 30, 31, 32, 33, + 34, 0, 0, 545, 0, 0, 0, 0, 0, 35, + 0, 0, 0, 0, 0, 816, 0, 0, 543, 544, + 817, 818, 0, 819, 820, 821, 822, 823, 824, 0, + 825, 826, 0, 827, 828, 829, 830, 831, 0, 0, + 0, 68, 69, 0, 70, 0, 28, 29, 30, 31, + 32, 33, 34, 0, 816, 1164, 0, 0, 0, 817, + 818, 35, 819, 820, 821, 822, 823, 824, 0, 825, + 826, 0, 827, 828, 829, 830, 831, 0, 0, 832, + 0, 833, 0, 0, 0, 0, 834, 0, 0, 0, + 71, 0, 0, 0, 0, 68, 69, 0, 70, 0, + 0, 0, 0, 835, 0, 0, 72, 0, 0, 0, + 0, 0, 0, 0, 0, 73, 0, 0, 832, 0, + 833, 0, 0, 0, 0, 834, 0, 0, 0, 74, + 75, 76, 77, 0, 0, 78, 836, 256, 257, 258, + 0, 0, 835, 0, 71, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 72, 0, 259, 80, 81, 82, 83, 84, 85, 73, + 0, 0, 0, 0, 0, 836, 0, 0, 0, 0, + 0, 0, 0, 74, 75, 76, 77, 0, 0, 78, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 79, 80, 81, 82, + 83, 84, 85, 0, 0, 0, 837, 0, 838, 839, + 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, + 850, 851, 852, 853, 854, 0, 0, 0, 855, 0, + 68, 69, 0, 70, 0, 0, 260, 856, 261, 262, + 263, 264, 0, 0, 0, 837, 0, 838, 839, 840, + 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, + 851, 852, 853, 854, 314, 0, 0, 855, 0, 857, + 0, 0, 0, 0, 0, 0, 856, 0, 253, 71, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 315, 0, 72, 0, 316, 0, 0, + 317, 318, 0, 0, 73, 319, 320, 321, 322, 323, + 324, 325, 326, 327, 328, 329, 330, 0, 74, 75, + 76, 77, 0, 331, 78, 68, 69, 332, 70, 0, + 0, 0, 0, 0, 333, 68, 69, 0, 70, 0, + 0, 0, 0, 334, 0, 0, 0, 0, 0, 0, + 0, 79, 80, 81, 82, 83, 84, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 71, 0, 0, 0, 0, 68, + 69, 0, 70, 0, 71, 0, 0, 0, 0, 0, + 72, 0, 0, 443, 0, 444, 445, 0, 0, 73, + 72, 0, 446, 0, 0, 266, 267, 0, 0, 73, + 0, 0, 0, 74, 75, 76, 77, 0, 0, 78, + 0, 0, 0, 74, 75, 76, 77, 0, 71, 78, + 0, 0, 0, 68, 69, 0, 70, 135, 0, 0, + 0, 0, 0, 0, 72, 0, 79, 80, 81, 82, + 83, 84, 85, 73, 0, 0, 79, 80, 81, 82, + 83, 84, 85, 0, 0, 0, 0, 74, 75, 76, + 77, 0, 0, 78, 0, 0, 68, 69, 0, 70, + 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 483, 484, 0, 0, 72, 0, + 79, 80, 81, 82, 83, 84, 85, 73, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 841, 0, 0, 0, - 0, 0, 0, 821, 0, 822, 823, 824, 825, 826, - 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, - 837, 838, 324, 98, 0, 839, 0, 0, 100, 0, - 101, 0, 0, 0, 840, 0, 0, 102, 0, 0, + 0, 74, 75, 76, 77, 71, 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 103, 325, 0, 326, 327, 328, - 329, 330, 0, 0, 0, 0, 331, 0, 0, 104, - 0, 0, 0, 0, 0, 332, 0, 0, 0, 0, - 333, 0, 334, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 335, 336, 337, 338, 339, 340, - 341, 342, 0, 0, 0, 0, 0, 343 + 0, 72, 0, 0, 0, 0, 0, 0, 0, 0, + 73, 0, 0, 0, 79, 80, 81, 82, 83, 84, + 85, 0, 0, 0, 74, 75, 76, 77, 143, 0, + 78, 0, 0, 0, 146, 147, 148, 0, 149, 150, + 151, 152, 0, 153, 154, 0, 0, 155, 156, 157, + 158, 0, 0, 0, 1302, 160, 0, 79, 80, 81, + 82, 83, 84, 85, 0, 0, 0, 0, 0, 340, + 113, 0, 0, 0, 0, 115, 0, 116, 0, 0, + 0, 0, 0, 0, 117, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 118, 341, 1303, 342, 343, 344, 345, 346, 0, + 0, 0, 0, 347, 0, 0, 119, 0, 0, 0, + 0, 0, 348, 1304, 0, 0, 0, 349, 0, 0, + 350, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 351, 352, 353, 354, 355, 356, 357, 358, + 0, 0, 0, 0, 0, 359 }; static const yytype_int16 yycheck[] = { - 5, 163, 25, 25, 1, 103, 104, 25, 1, 676, - 323, 323, 323, 43, 1, 20, 958, 57, 48, 93, - 722, 661, 94, 28, 29, 30, 31, 1, 989, 59, - 9, 21, 22, 1, 471, 1084, 77, 1, 188, 189, - 190, 191, 192, 12, 185, 23, 24, 735, 25, 14, - 7, 409, 7, 411, 1112, 5, 6, 19, 8, 9, - 25, 5, 6, 8, 8, 163, 9, 734, 7, 8, - 60, 63, 5, 6, 8, 8, 33, 9, 63, 484, - 485, 486, 9, 73, 56, 108, 184, 9, 20, 7, - 5, 6, 9, 8, 9, 1461, 9, 169, 7, 7, - 5, 6, 136, 8, 1079, 20, 129, 181, 41, 114, - 9, 116, 117, 118, 212, 9, 5, 6, 9, 8, - 20, 110, 112, 113, 146, 1449, 41, 149, 105, 152, - 5, 6, 136, 8, 377, 110, 41, 136, 5, 6, - 105, 8, 5, 6, 185, 8, 168, 5, 6, 1448, - 8, 156, 157, 197, 194, 9, 8, 1481, 293, 233, - 232, 105, 106, 107, 215, 300, 20, 157, 198, 5, - 6, 9, 8, 5, 6, 180, 8, 1446, 7, 9, - 10, 1480, 9, 10, 9, 208, 11, 64, 9, 10, - 11, 156, 44, 267, 882, 883, 14, 8, 163, 17, - 205, 206, 9, 10, 33, 210, 31, 32, 896, 1478, - 31, 32, 298, 5, 9, 9, 8, 9, 9, 1585, - 227, 9, 40, 41, 9, 10, 388, 45, 233, 47, - 292, 221, 222, 44, 239, 166, 54, 299, 260, 380, - 247, 65, 66, 5, 6, 210, 8, 321, 125, 1441, - 294, 656, 291, 71, 8, 1447, 279, 924, 87, 293, - 299, 39, 267, 268, 287, 1240, 297, 272, 86, 291, - 275, 276, 277, 51, 52, 280, 281, 682, 289, 298, - 285, 9, 272, 294, 217, 952, 974, 1479, 215, 293, - 274, 69, 291, 367, 392, 538, 286, 293, 289, 288, - 297, 293, 217, 299, 297, 9, 10, 11, 293, 298, - 297, 294, 217, 288, 11, 298, 390, 289, 291, 391, - 215, 290, 7, 297, 297, 304, 323, 31, 32, 297, - 323, 293, 12, 297, 31, 32, 323, 293, 293, 380, - 297, 119, 383, 299, 306, 60, 387, 292, 215, 323, - 292, 391, 295, 292, 298, 323, 300, 307, 292, 323, - 304, 305, 293, 289, 303, 395, 389, 299, 358, 291, - 360, 304, 1333, 5, 364, 416, 8, 299, 295, 297, - 289, 289, 295, 457, 299, 294, 294, 294, 793, 304, - 380, 298, 289, 383, 384, 295, 295, 387, 1456, 304, - 293, 295, 24, 25, 295, 289, 299, 385, 386, 298, - 400, 401, 34, 403, 293, 405, 393, 558, 492, 294, - 299, 495, 496, 388, 389, 566, 423, 1476, 393, 407, - 453, 421, 422, 289, 1492, 298, 423, 424, 425, 426, - 298, 1081, 847, 295, 449, 207, 451, 298, 885, 423, - 424, 425, 426, 304, 528, 553, 289, 295, 289, 250, - 251, 252, 298, 297, 469, 295, 298, 1169, 295, 51, - 295, 295, 544, 478, 295, 294, 294, 5, 6, 469, - 8, 9, 5, 6, 293, 8, 560, 492, 295, 294, - 13, 5, 6, 7, 8, 9, 294, 289, 463, 13, - 295, 295, 5, 6, 469, 8, 1467, 295, 5, 136, - 13, 8, 9, 291, 28, 589, 291, 558, 41, 509, - 295, 44, 545, 136, 302, 566, 598, 41, 51, 292, - 293, 105, 106, 107, 524, 525, 5, 6, 41, 8, - 136, 44, 40, 615, 13, 550, 551, 45, 51, 47, - 136, 65, 66, 543, 1496, 596, 54, 291, 292, 549, - 306, 295, 552, 593, 9, 10, 11, 607, 138, 139, - 294, 594, 41, 71, 298, 294, 566, 295, 719, 298, - 5, 6, 51, 8, 589, 294, 31, 32, 86, 298, - 294, 12, 622, 623, 5, 6, 295, 8, 628, 18, - 630, 55, 13, 57, 58, 59, 596, 1295, 1296, 295, - 708, 294, 925, 925, 925, 298, 714, 767, 61, 769, - 770, 771, 772, 773, 294, 5, 6, 297, 8, 634, - 41, 705, 706, 707, 271, 640, 159, 160, 161, 304, - 662, 294, 9, 633, 11, 159, 160, 161, 678, 294, - 673, 225, 294, 298, 295, 292, 159, 160, 161, 294, - 1327, 298, 652, 298, 31, 32, 292, 293, 305, 294, - 693, 676, 8, 298, 679, 294, 681, 297, 719, 298, - 5, 6, 294, 8, 299, 672, 298, 759, 13, 294, - 159, 160, 161, 298, 119, 120, 294, 294, 703, 689, - 690, 298, 707, 744, 291, 294, 711, 712, 713, 298, - 733, 294, 753, 294, 737, 298, 41, 298, 1406, 294, - 100, 101, 102, 298, 298, 1413, 300, 295, 294, 734, - 304, 305, 298, 294, 724, 294, 726, 298, 728, 298, - 730, 1408, 1100, 294, 1102, 125, 299, 298, 159, 160, - 161, 9, 23, 220, 744, 26, 27, 28, 29, 289, - 31, 32, 33, 293, 754, 755, 25, 291, 26, 27, - 28, 295, 295, 292, 293, 298, 848, 196, 756, 294, - 290, 855, 299, 298, 298, 857, 776, 206, 778, 779, - 209, 796, 40, 798, 294, 298, 291, 45, 298, 47, - 295, 294, 792, 294, 845, 23, 54, 298, 26, 27, - 28, 29, 1500, 31, 32, 33, 25, 26, 27, 28, - 29, 294, 299, 71, 72, 298, 848, 298, 293, 298, - 294, 905, 297, 294, 159, 160, 161, 294, 86, 219, - 303, 221, 222, 223, 224, 65, 66, 888, 878, 290, - 105, 106, 107, 1541, 5, 6, 294, 8, 848, 931, - 138, 139, 281, 282, 283, 284, 285, 286, 287, 117, - 46, 290, 48, 49, 50, 1542, 294, 296, 75, 76, - 77, 136, 294, 957, 1572, 959, 292, 298, 18, 876, - 880, 295, 910, 294, 917, 567, 568, 569, 104, 1587, - 923, 294, 907, 109, 25, 26, 27, 28, 29, 914, - 291, 292, 7, 903, 111, 112, 113, 1584, 289, 924, - 26, 27, 28, 29, 75, 76, 291, 292, 925, 919, - 7, 921, 925, 292, 975, 976, 293, 294, 925, 926, - 927, 1013, 929, 75, 76, 77, 295, 952, 975, 976, - 295, 925, 926, 927, 300, 929, 19, 925, 926, 927, - 20, 925, 926, 927, 834, 835, 971, 26, 27, 28, - 29, 208, 18, 298, 19, 301, 105, 106, 107, 111, - 112, 113, 292, 292, 110, 975, 976, 1149, 1150, 1151, - 1152, 289, 5, 6, 1066, 8, 289, 1159, 1160, 1161, - 15, 16, 17, 289, 289, 20, 21, 22, 23, 24, - 1000, 26, 27, 28, 29, 303, 31, 32, 289, 19, - 35, 36, 37, 38, 28, 29, 41, 42, 43, 294, - 62, 105, 106, 107, 305, 62, 51, 294, 53, 169, - 170, 171, 172, 298, 294, 300, 295, 294, 299, 304, - 305, 295, 295, 68, 69, 70, 295, 295, 303, 63, - 295, 295, 295, 193, 194, 195, 196, 26, 27, 28, - 29, 1145, 1146, 1063, 212, 281, 282, 283, 284, 285, - 286, 287, 297, 297, 290, 297, 294, 305, 103, 18, - 296, 215, 215, 295, 1116, 1085, 1086, 1087, 292, 297, - 290, 292, 1092, 1093, 1094, 1095, 1096, 1097, 8, 1099, - 1100, 1101, 41, 1103, 1104, 1105, 1106, 1107, 1108, 1109, - 1098, 1111, 297, 1113, 1102, 1115, 290, 1117, 1486, 1487, - 176, 177, 1110, 294, 294, 5, 6, 1142, 8, 1180, - 294, 294, 1214, 295, 301, 295, 1187, 18, 1189, 295, - 196, 281, 282, 283, 284, 285, 286, 287, 19, 295, - 290, 295, 208, 19, 210, 294, 296, 213, 214, 298, - 41, 300, 299, 292, 1139, 304, 305, 299, 295, 292, - 292, 1171, 298, 294, 1149, 1150, 1151, 1152, 295, 303, - 105, 106, 107, 294, 1159, 1160, 1161, 295, 202, 203, - 1190, 205, 295, 73, 294, 75, 76, 294, 294, 1199, - 295, 1309, 82, 295, 295, 234, 1206, 1207, 242, 294, - 246, 22, 295, 290, 298, 154, 300, 1301, 1218, 297, - 304, 305, 297, 289, 295, 281, 282, 283, 284, 285, - 286, 287, 196, 301, 290, 294, 303, 294, 1238, 1239, - 296, 303, 1326, 136, 1241, 299, 299, 1247, 1248, 1249, - 1250, 1251, 1252, 1335, 1254, 1287, 299, 196, 299, 299, - 299, 20, 295, 62, 62, 1253, 8, 206, 5, 6, - 209, 8, 249, 154, 1289, 295, 13, 295, 295, 16, - 289, 294, 307, 20, 21, 22, 23, 24, 294, 26, - 27, 28, 29, 298, 31, 32, 298, 295, 35, 36, - 37, 38, 299, 295, 41, 42, 43, 295, 294, 294, - 166, 295, 1327, 295, 51, 196, 53, 295, 292, 299, - 299, 255, 292, 1407, 104, 206, 19, 300, 209, 109, - 295, 68, 69, 70, 1334, 303, 297, 299, 294, 294, - 294, 1341, 281, 282, 283, 284, 285, 286, 287, 294, - 294, 290, 294, 294, 104, 1352, 294, 296, 294, 109, - 290, 294, 294, 294, 299, 8, 103, 295, 105, 106, - 107, 295, 299, 298, 295, 300, 1409, 299, 1462, 304, - 305, 14, 119, 120, 17, 299, 295, 1469, 295, 295, - 301, 299, 295, 1408, 299, 295, 295, 299, 295, 295, - 281, 282, 283, 284, 285, 286, 287, 40, 41, 290, - 299, 299, 45, 295, 47, 296, 299, 295, 295, 295, - 299, 54, 159, 160, 161, 1425, 1426, 1427, 5, 6, - 299, 8, 299, 295, 295, 299, 13, 295, 71, 16, - 295, 292, 299, 20, 21, 22, 23, 24, 19, 26, - 27, 28, 29, 86, 31, 32, 295, 295, 35, 36, - 37, 38, 1494, 1545, 41, 42, 43, 294, 8, 299, - 1578, 299, 294, 299, 51, 294, 53, 1477, 294, 294, - 303, 295, 1482, 1483, 1484, 303, 1486, 295, 1488, 1489, - 1490, 68, 69, 70, 303, 295, 299, 294, 1582, 1487, - 294, 281, 282, 283, 284, 285, 286, 287, 295, 299, - 1510, 1511, 1512, 295, 299, 299, 296, 295, 294, 294, - 294, 294, 294, 294, 294, 294, 103, 1542, 105, 106, - 107, 281, 282, 283, 284, 285, 286, 287, 294, 294, - 290, 18, 119, 120, 295, 294, 296, 294, 1581, 294, - 8, 299, 295, 19, 295, 295, 300, 303, 294, 294, - 299, 298, 294, 300, 294, 298, 295, 304, 305, 1584, - 307, 295, 294, 1573, 295, 295, 295, 5, 6, 294, - 8, 8, 159, 160, 161, 13, 295, 295, 16, 294, - 294, 255, 20, 21, 22, 23, 24, 103, 26, 27, - 28, 29, 19, 31, 32, 295, 295, 35, 36, 37, - 38, 294, 179, 41, 42, 43, 295, 291, 25, 463, - 1179, 702, 716, 51, 1136, 53, 393, 1138, 25, 962, - 1178, 344, 607, 57, 1066, 564, 5, 6, 843, 8, - 68, 69, 70, 885, 13, 673, 964, 16, 850, 1121, - 1421, 20, 21, 22, 23, 24, 855, 26, 27, 28, - 29, 10, 31, 32, 690, 1130, 35, 36, 37, 38, - 475, 115, 41, 42, 43, 103, 526, 105, 106, 107, - -1, -1, 51, -1, 53, 720, 424, -1, -1, -1, - -1, -1, -1, -1, -1, 172, -1, 174, 175, 68, - 69, 70, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 193, 194, 195, 196, - -1, 298, -1, 300, -1, -1, -1, 304, 305, -1, - 307, 159, 160, 161, 103, 104, 5, 6, -1, 8, - -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, + 5, 108, 25, 109, 92, 692, 178, 118, 119, 425, + 1, 427, 1, 57, 25, 20, 339, 14, 1, 339, + 974, 1, 339, 28, 29, 30, 31, 1, 25, 1005, + 738, 21, 22, 1, 1128, 677, 1100, 25, 9, 12, + 7, 7, 487, 43, 25, 23, 24, 9, 48, 7, + 8, 8, 8, 500, 501, 502, 200, 111, 20, 59, + 9, 8, 63, 750, 7, 19, 33, 178, 9, 56, + 60, 7, 7, 65, 66, 751, 111, 9, 184, 63, + 9, 10, 11, 9, 137, 11, 137, 9, 199, 196, + 9, 203, 204, 205, 206, 207, 9, 44, 88, 20, + 123, 9, 31, 32, 9, 31, 32, 9, 10, 9, + 10, 9, 200, 9, 10, 9, 227, 9, 10, 11, + 198, 144, 167, 120, 129, 64, 131, 132, 133, 1095, + 9, 9, 120, 1477, 137, 8, 216, 127, 128, 31, + 32, 247, 249, 1465, 167, 393, 1464, 7, 294, 11, + 9, 10, 23, 1457, 300, 26, 27, 28, 29, 1463, + 31, 32, 33, 1462, 228, 209, 171, 172, 137, 31, + 32, 44, 40, 33, 171, 1497, 283, 45, 1496, 47, + 161, 178, 172, 164, 248, 9, 54, 126, 24, 25, + 195, 1495, 294, 293, 294, 1494, 20, 9, 34, 301, + 223, 23, 183, 71, 26, 27, 28, 29, 294, 31, + 32, 33, 293, 213, 300, 220, 221, 295, 86, 300, + 225, 292, 898, 899, 299, 672, 39, 87, 225, 300, + 337, 298, 404, 293, 294, 289, 912, 14, 51, 52, + 17, 294, 9, 294, 249, 299, 236, 237, 8, 294, + 255, 698, 396, 940, 289, 275, 69, 290, 1602, 26, + 27, 28, 295, 40, 41, 294, 216, 294, 45, 299, + 47, 300, 295, 300, 290, 216, 383, 54, 283, 284, + 303, 968, 295, 288, 9, 298, 291, 292, 293, 292, + 1256, 296, 297, 294, 71, 276, 301, 408, 288, 406, + 7, 407, 12, 290, 296, 290, 554, 120, 396, 86, + 294, 399, 302, 292, 990, 403, 292, 299, 291, 298, + 296, 290, 313, 305, 313, 294, 307, 106, 107, 108, + 313, 298, 298, 313, 305, 293, 293, 293, 300, 313, + 294, 293, 294, 292, 432, 313, 304, 290, 339, 296, + 339, 300, 295, 307, 60, 290, 339, 295, 294, 339, + 295, 299, 809, 407, 296, 339, 473, 296, 292, 293, + 296, 339, 296, 1349, 296, 296, 292, 296, 1472, 290, + 296, 295, 405, 296, 374, 299, 376, 290, 296, 295, + 380, 296, 293, 299, 296, 290, 296, 290, 296, 294, + 296, 508, 296, 295, 511, 512, 396, 404, 405, 399, + 400, 411, 409, 403, 1508, 272, 863, 296, 296, 290, + 290, 409, 294, 401, 402, 290, 416, 417, 1492, 419, + 574, 421, 583, 584, 585, 306, 293, 544, 582, 251, + 252, 253, 299, 65, 66, 423, 469, 437, 438, 306, + 439, 440, 441, 442, 560, 1097, 901, 295, 569, 439, + 465, 299, 467, 295, 295, 106, 107, 108, 299, 576, + 295, 439, 440, 441, 442, 139, 140, 1185, 298, 292, + 485, 295, 479, 294, 306, 299, 574, 298, 485, 494, + 303, 75, 76, 77, 582, 485, 137, 46, 605, 48, + 49, 50, 295, 508, 51, 295, 39, 1483, 614, 299, + 106, 107, 108, 46, 137, 48, 49, 50, 295, 295, + 299, 307, 301, 299, 612, 631, 305, 306, 112, 113, + 114, 137, 39, 295, 137, 525, 295, 299, 561, 46, + 299, 48, 49, 50, 25, 26, 27, 28, 29, 295, + 540, 541, 137, 299, 87, 88, 89, 296, 1512, 295, + 295, 566, 567, 299, 299, 12, 106, 107, 108, 559, + 296, 9, 10, 11, 9, 565, 11, 296, 568, 623, + 87, 88, 89, 106, 107, 108, 55, 610, 57, 58, + 59, 735, 582, 31, 32, 295, 31, 32, 61, 299, + 605, 134, 135, 136, 305, 138, 295, 295, 141, 609, + 299, 299, 295, 724, 721, 722, 723, 295, 941, 730, + 296, 941, 612, 300, 941, 139, 140, 134, 135, 136, + 226, 138, 298, 295, 141, 1311, 1312, 299, 638, 639, + 26, 27, 28, 29, 644, 650, 646, 735, 295, 295, + 295, 656, 299, 299, 299, 295, 1343, 292, 299, 649, + 301, 28, 29, 295, 305, 306, 689, 299, 8, 775, + 296, 783, 760, 785, 786, 787, 788, 789, 668, 295, + 295, 769, 300, 299, 299, 295, 709, 692, 25, 299, + 695, 295, 697, 221, 694, 299, 63, 678, 292, 293, + 1116, 300, 1118, 299, 292, 301, 292, 293, 296, 305, + 306, 294, 295, 300, 719, 705, 706, 295, 723, 295, + 688, 291, 727, 728, 729, 299, 749, 991, 992, 295, + 753, 25, 26, 27, 28, 29, 295, 1424, 5, 6, + 295, 8, 9, 304, 295, 750, 1422, 850, 851, 295, + 740, 293, 742, 1429, 744, 295, 746, 291, 864, 299, + 296, 301, 7, 296, 871, 305, 306, 873, 295, 295, + 760, 290, 7, 861, 296, 293, 299, 301, 301, 40, + 770, 771, 305, 306, 45, 296, 47, 54, 19, 296, + 209, 19, 302, 54, 772, 20, 75, 76, 77, 293, + 293, 111, 792, 70, 794, 795, 290, 812, 290, 814, + 71, 72, 79, 290, 921, 290, 904, 304, 808, 26, + 27, 28, 29, 19, 295, 86, 93, 94, 95, 96, + 62, 295, 99, 112, 113, 114, 203, 204, 62, 206, + 1516, 947, 26, 27, 28, 29, 295, 295, 300, 304, + 105, 296, 296, 296, 296, 110, 296, 118, 213, 126, + 127, 128, 129, 130, 131, 132, 973, 296, 975, 296, + 296, 1558, 216, 298, 864, 298, 298, 295, 216, 296, + 293, 1557, 298, 864, 291, 293, 8, 298, 291, 295, + 5, 6, 296, 8, 894, 295, 295, 5, 6, 296, + 8, 296, 295, 991, 992, 19, 896, 296, 302, 300, + 933, 296, 300, 1589, 1601, 926, 939, 293, 923, 19, + 293, 296, 293, 1029, 892, 930, 41, 296, 1604, 919, + 295, 299, 304, 295, 295, 940, 296, 296, 295, 54, + 296, 243, 296, 296, 295, 935, 54, 937, 235, 247, + 941, 22, 941, 942, 943, 70, 945, 296, 941, 942, + 943, 941, 70, 968, 79, 295, 298, 941, 942, 943, + 291, 79, 298, 941, 942, 943, 1082, 945, 93, 94, + 95, 96, 987, 290, 99, 93, 94, 95, 96, 302, + 296, 99, 197, 1165, 1166, 1167, 1168, 300, 295, 39, + 304, 991, 992, 1175, 1176, 1177, 46, 300, 48, 49, + 50, 126, 127, 128, 129, 130, 131, 132, 126, 127, + 128, 129, 130, 131, 132, 300, 1016, 282, 283, 284, + 285, 286, 287, 288, 304, 300, 291, 295, 137, 20, + 300, 308, 297, 300, 296, 62, 62, 87, 88, 89, + 296, 8, 296, 296, 1161, 1162, 22, 23, 24, 105, + 26, 27, 28, 29, 110, 31, 32, 33, 250, 35, + 36, 37, 38, 299, 296, 295, 42, 43, 299, 295, + 105, 290, 300, 296, 296, 110, 1502, 1503, 167, 1079, + 296, 295, 295, 293, 134, 135, 136, 296, 138, 296, + 256, 141, 300, 218, 293, 300, 19, 301, 1196, 296, + 298, 1101, 1102, 1103, 304, 1203, 295, 1205, 1108, 1109, + 1110, 1111, 1112, 1113, 1230, 1115, 1116, 1117, 300, 1119, + 1120, 1121, 1122, 1123, 1124, 1125, 1114, 1127, 295, 1129, + 1118, 1131, 295, 1133, 295, 111, 295, 295, 1126, 295, + 295, 1132, 295, 1158, 295, 295, 295, 291, 1155, 296, + 8, 296, 18, 295, 300, 296, 296, 300, 1165, 1166, + 1167, 1168, 300, 300, 300, 296, 296, 300, 1175, 1176, + 1177, 300, 300, 296, 296, 296, 296, 296, 0, 19, + 305, 299, 296, 300, 300, 296, 301, 1187, 300, 296, + 296, 8, 300, 296, 296, 300, 18, 296, 296, 300, + 1317, 293, 302, 300, 1325, 296, 1206, 300, 296, 295, + 300, 295, 295, 300, 296, 1215, 295, 295, 84, 304, + 296, 295, 1222, 1223, 304, 1342, 282, 283, 284, 285, + 286, 287, 288, 304, 1234, 1351, 296, 300, 296, 300, + 8, 297, 296, 300, 296, 300, 296, 282, 283, 284, + 285, 286, 287, 288, 1254, 1255, 291, 295, 295, 295, + 300, 295, 297, 1263, 1264, 1265, 1266, 1267, 1268, 300, + 1270, 295, 295, 295, 295, 295, 295, 5, 6, 1257, + 8, 1269, 295, 295, 295, 13, 296, 296, 16, 296, + 1305, 296, 20, 21, 22, 23, 24, 295, 26, 27, + 28, 29, 304, 31, 32, 295, 1423, 35, 36, 37, + 38, 295, 1303, 41, 42, 43, 295, 299, 296, 296, + 296, 295, 295, 51, 296, 53, 54, 296, 1343, 19, + 296, 197, 18, 155, 156, 157, 158, 159, 8, 296, + 68, 69, 70, 209, 296, 211, 212, 295, 214, 215, + 1350, 79, 295, 256, 104, 177, 178, 1357, 296, 296, + 19, 1478, 295, 194, 296, 93, 94, 95, 96, 1485, + 25, 99, 194, 195, 196, 197, 104, 105, 307, 201, + 202, 409, 1195, 479, 718, 207, 732, 1152, 210, 25, + 1368, 1154, 1425, 978, 580, 217, 218, 1194, 126, 127, + 128, 129, 130, 131, 132, 360, 57, 1082, 859, 1424, + 689, 623, 901, 980, 1137, 1437, 282, 283, 284, 285, + 286, 287, 288, 706, 871, 291, 866, 10, 1146, 491, + 130, 297, 160, 161, 162, 736, 440, -1, 542, -1, + -1, 1441, 1442, 1443, -1, 1561, -1, -1, -1, -1, + 14, 273, 274, 17, 276, 277, 278, -1, 280, 281, + 282, 283, 284, 285, 286, 287, 288, -1, 18, 197, + -1, -1, -1, -1, 1595, 297, 40, 41, -1, -1, + -1, 45, 1599, 47, 170, 171, 172, 173, -1, -1, + 54, -1, -1, 1493, -1, -1, -1, -1, 1498, 1499, + 1500, -1, 1502, -1, 1504, 1505, 1506, 71, 194, 195, + 196, 197, -1, -1, -1, 1503, -1, -1, -1, 1510, + -1, -1, 86, -1, -1, -1, 1526, 1527, 1528, -1, + 5, 6, -1, 8, -1, -1, -1, -1, 13, -1, + -1, 16, -1, 1558, -1, 20, 21, 22, 23, 24, + -1, 26, 27, 28, 29, -1, 31, 32, -1, -1, + 35, 36, 37, 38, -1, 1598, 41, 42, 43, -1, + -1, 299, -1, -1, -1, -1, 51, -1, 53, 54, + 308, -1, -1, -1, -1, -1, 1601, -1, -1, -1, + 1590, -1, -1, 68, 69, 70, 282, 283, 284, 285, + 286, 287, 288, -1, 79, 291, -1, -1, -1, -1, + -1, 297, -1, -1, -1, -1, -1, -1, 93, 94, + 95, 96, -1, -1, 99, -1, -1, 177, 178, 104, + -1, 106, 107, 108, -1, -1, -1, -1, -1, -1, + 18, -1, -1, -1, -1, 120, 121, 197, -1, -1, + -1, 126, 127, 128, 129, 130, 131, 132, -1, 209, + -1, 211, -1, 41, 214, 215, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 5, 6, -1, 8, -1, 160, 161, 162, 13, -1, + -1, 16, -1, -1, -1, 20, 21, 22, 23, 24, + -1, 26, 27, 28, 29, -1, 31, 32, -1, -1, + 35, 36, 37, 38, -1, -1, 41, 42, 43, -1, + -1, -1, -1, -1, -1, -1, 51, -1, 53, 54, + -1, -1, 282, 283, 284, 285, 286, 287, 288, -1, + -1, 291, -1, 68, 69, 70, -1, 297, -1, -1, + -1, -1, -1, -1, 79, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 93, 94, + 95, 96, -1, -1, 99, -1, -1, 155, -1, 104, + -1, 106, 107, 108, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 120, 121, -1, -1, -1, + -1, 126, 127, 128, 129, 130, 131, 132, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 197, + -1, -1, -1, -1, 299, -1, 301, -1, -1, 207, + 305, 306, 210, 308, -1, 160, 161, 162, 5, 6, + -1, 8, -1, -1, -1, -1, 13, -1, -1, 16, + -1, -1, -1, 20, 21, 22, 23, 24, -1, 26, + 27, 28, 29, -1, 31, 32, -1, -1, 35, 36, + 37, 38, -1, -1, 41, 42, 43, -1, -1, -1, + -1, -1, -1, -1, 51, -1, 53, 54, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 68, 69, 70, 282, 283, 284, 285, 286, 287, + 288, -1, 79, 291, -1, -1, -1, -1, -1, 297, + -1, -1, -1, -1, -1, -1, 93, 94, 95, 96, + -1, -1, 99, -1, -1, -1, -1, 104, -1, 106, + 107, 108, -1, -1, -1, -1, -1, -1, 18, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 126, + 127, 128, 129, 130, 131, 132, -1, -1, -1, -1, + -1, -1, -1, -1, 299, -1, 301, -1, -1, -1, + 305, 306, -1, 308, -1, -1, -1, -1, 5, 6, + -1, 8, -1, 160, 161, 162, 13, -1, -1, 16, + -1, -1, -1, 20, 21, 22, 23, 24, -1, 26, + 27, 28, 29, -1, 31, 32, -1, -1, 35, 36, + 37, 38, -1, -1, 41, 42, 43, -1, -1, 39, + -1, -1, -1, -1, 51, -1, 53, 54, -1, -1, + -1, -1, -1, -1, 54, -1, -1, -1, -1, -1, + -1, 68, 69, 70, -1, 72, -1, -1, -1, -1, + 70, -1, 79, -1, -1, -1, -1, -1, -1, 79, + -1, -1, -1, -1, -1, -1, 93, 94, 95, 96, + -1, -1, 99, 93, 94, 95, 96, 104, -1, 99, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 118, -1, 173, -1, 175, 176, -1, -1, 126, + 127, 128, 129, 130, 131, 132, 126, 127, 128, 129, + 130, 131, 132, -1, 194, 195, 196, 197, -1, -1, + -1, -1, 299, -1, 301, -1, -1, -1, 305, 306, + -1, 308, -1, 160, 161, 162, 5, 6, -1, 8, + -1, -1, -1, -1, 13, 18, -1, 16, -1, -1, -1, 20, 21, 22, 23, 24, -1, 26, 27, 28, 29, -1, 31, 32, -1, -1, 35, 36, 37, 38, -1, -1, 41, 42, 43, -1, -1, -1, -1, -1, - -1, -1, 51, -1, 53, -1, -1, -1, -1, -1, - 159, 160, 161, -1, -1, -1, -1, -1, -1, 68, - 69, 70, -1, 72, 281, 282, 283, 284, 285, 286, - 287, -1, -1, 290, -1, -1, -1, -1, -1, 296, - 5, 6, -1, 8, -1, -1, -1, 196, 13, -1, - -1, 16, -1, -1, 103, 20, 21, 22, 23, 24, - -1, 26, 27, 28, 29, -1, 31, 32, 117, -1, - 35, 36, 37, 38, -1, -1, 41, 42, 43, -1, - -1, -1, -1, -1, -1, -1, 51, -1, 53, -1, - 298, -1, 300, -1, -1, -1, 304, 305, -1, 307, - -1, -1, -1, 68, 69, 70, -1, -1, -1, -1, - 159, 160, 161, 22, 23, 24, -1, 26, 27, 28, - 29, -1, 31, 32, 33, -1, 35, 36, 37, 38, - -1, -1, -1, 42, 43, -1, -1, -1, 103, 104, - -1, -1, -1, -1, 5, 6, -1, 8, -1, 298, - -1, -1, 13, -1, -1, 16, -1, -1, 307, 20, - 21, 22, 23, 24, -1, 26, 27, 28, 29, -1, - 31, 32, -1, -1, 35, 36, 37, 38, -1, -1, - 41, 42, 43, -1, -1, -1, -1, -1, -1, -1, - 51, -1, 53, -1, 159, 160, 161, -1, -1, -1, - -1, 110, -1, -1, -1, -1, -1, 68, 69, 70, - -1, -1, -1, -1, 5, 6, -1, 8, -1, -1, - -1, -1, 13, -1, -1, 16, -1, -1, -1, 20, - 21, 22, 23, 24, -1, 26, 27, 28, 29, -1, - 31, 32, 103, -1, 35, 36, 37, 38, -1, 298, - 41, 42, 43, -1, 39, -1, -1, -1, 307, -1, - 51, 46, 53, 48, 49, 50, -1, -1, -1, -1, - -1, -1, 0, -1, -1, -1, -1, 68, 69, 70, + -1, -1, 51, -1, 53, 54, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 68, + 69, 70, 282, 283, 284, 285, 286, 287, 288, -1, + 79, 291, -1, -1, -1, -1, -1, 297, -1, -1, + -1, -1, -1, -1, 93, 94, 95, 96, -1, -1, + 99, -1, -1, -1, -1, 104, 105, 179, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, -1, -1, -1, -1, -1, 126, 127, 128, + 129, 130, 131, 132, -1, -1, -1, -1, -1, -1, + 290, 5, 299, -1, 8, 9, -1, -1, -1, 5, + 6, 308, 8, -1, -1, -1, -1, 13, -1, -1, + 16, 160, 161, 162, 20, 21, 22, 23, 24, -1, + 26, 27, 28, 29, -1, 31, 32, -1, -1, 35, + 36, 37, 38, -1, -1, 41, 42, 43, -1, -1, + 54, -1, -1, -1, 197, 51, -1, 53, 54, -1, + -1, -1, -1, -1, 207, -1, 70, 210, -1, -1, + -1, -1, 68, 69, 70, 79, -1, -1, -1, -1, + -1, -1, -1, 79, -1, -1, -1, -1, -1, 93, + 94, 95, 96, -1, -1, 99, -1, 93, 94, 95, + 96, -1, -1, 99, -1, -1, -1, -1, 104, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 18, -1, -1, -1, -1, -1, -1, -1, 159, 160, - 161, -1, 87, 88, 89, -1, -1, -1, 5, 6, - -1, 8, 103, -1, -1, -1, 13, -1, -1, 16, - -1, -1, -1, 20, 21, 22, 23, 24, -1, 26, - 27, 28, 29, 298, 31, 32, -1, -1, 35, 36, - 37, 38, 307, -1, 41, 42, 43, -1, 133, 134, - 135, -1, 137, -1, 51, 140, 53, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 159, 160, - 161, 68, 69, 70, -1, -1, -1, 5, 6, -1, - 8, -1, -1, -1, -1, 13, 295, -1, 16, -1, + -1, -1, 126, 127, 128, 129, 130, 131, 132, -1, + 126, 127, 128, 129, 130, 131, 132, -1, -1, 282, + 283, 284, 285, 286, 287, 288, 5, 6, 291, 8, + -1, -1, -1, -1, 297, -1, -1, -1, -1, -1, + 299, -1, -1, -1, 160, 161, 162, 5, 6, 308, + 8, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1, 20, 21, 22, 23, 24, -1, 26, 27, - 28, 29, -1, 31, 32, -1, 103, 35, 36, 37, + 28, 29, -1, 31, 32, 54, -1, 35, 36, 37, 38, -1, -1, 41, 42, 43, -1, -1, -1, -1, - -1, -1, -1, 51, -1, 53, 154, 155, 156, 157, - 158, -1, -1, -1, -1, -1, -1, 298, -1, -1, - 68, 69, 70, -1, -1, 39, 307, -1, 176, 177, - -1, -1, 46, -1, 48, 49, 50, -1, -1, -1, - -1, -1, 159, 160, 161, 193, 194, 195, 196, -1, - -1, -1, 200, 201, -1, 103, -1, -1, 206, -1, - -1, 209, -1, -1, 5, -1, -1, 8, 216, 217, - -1, -1, -1, 87, 88, 89, -1, 18, 7, -1, - -1, -1, -1, -1, -1, -1, -1, 298, -1, -1, - 295, -1, -1, 22, 23, 24, 307, 26, 27, 28, - 29, -1, 31, 32, 33, -1, 35, 36, 37, 38, - -1, 159, 160, 161, -1, 44, -1, -1, -1, 133, - 134, 135, -1, 137, 272, 273, 140, 275, 276, 277, - -1, 279, 280, 281, 282, 283, 284, 285, 286, 287, - -1, -1, -1, -1, 15, 16, 17, -1, 296, 20, - 21, 22, 23, 24, -1, 26, 27, 28, 29, -1, - 31, 32, -1, -1, 35, 36, 37, 38, -1, -1, - -1, 42, 43, -1, -1, -1, -1, -1, -1, -1, - 51, 298, 53, -1, -1, -1, -1, -1, -1, -1, - 307, -1, -1, -1, -1, -1, -1, 68, 69, 70, - 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, - 151, 152, 153, -1, -1, -1, 39, 158, -1, -1, - -1, 162, 163, 164, 165, 166, 167, 5, -1, -1, - 8, 54, 103, -1, 39, 176, 177, -1, -1, -1, - 18, 46, -1, 48, 49, 50, -1, 70, -1, -1, - 298, -1, 193, 194, 195, 196, 79, -1, -1, 307, - -1, -1, -1, 204, -1, -1, -1, -1, -1, -1, - 93, 94, 95, 96, -1, -1, -1, 218, -1, -1, - -1, 295, 87, 88, 89, 178, 179, 180, 181, 182, - 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, - -1, -1, 125, 126, 127, 128, 129, 130, 131, -1, - -1, 5, 253, 254, 8, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 18, -1, -1, -1, 133, 134, - 135, -1, 137, -1, -1, 140, -1, -1, -1, -1, - 281, 282, 283, 284, 285, 286, 287, -1, 289, 290, - -1, -1, -1, -1, -1, 296, -1, -1, -1, -1, - -1, -1, -1, 141, 142, 143, 144, 145, 146, 147, - 148, 149, 150, 151, 152, 153, -1, -1, -1, -1, - 158, -1, -1, -1, 162, 163, 164, 165, 166, 167, - -1, -1, -1, -1, -1, -1, -1, -1, 176, 177, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 193, 194, 195, 196, -1, - -1, -1, -1, -1, -1, -1, 204, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 307, -1, -1, -1, - 218, -1, -1, -1, -1, -1, -1, 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, - -1, -1, -1, -1, 158, -1, 289, -1, 162, 163, - 164, 165, 166, 167, -1, 253, 254, -1, -1, -1, - -1, -1, 176, 177, -1, -1, -1, -1, -1, -1, - 295, -1, -1, -1, -1, -1, -1, -1, -1, 193, - 194, 195, 196, 281, 282, 283, 284, 285, 286, 287, - 204, 289, 290, -1, -1, -1, -1, -1, 296, -1, - -1, 16, -1, -1, 218, 20, 21, 22, 23, 24, + -1, 70, -1, 51, -1, 53, 54, -1, -1, -1, + 79, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 68, 69, 70, -1, 93, 94, 95, 96, -1, -1, + 99, 79, -1, -1, -1, -1, -1, 106, 107, 108, + -1, -1, -1, -1, -1, 93, 94, 95, 96, -1, + -1, 99, -1, -1, -1, -1, 104, 126, 127, 128, + 129, 130, 131, 132, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 290, -1, 126, 127, + 128, 129, 130, 131, 132, -1, -1, -1, -1, -1, + -1, -1, 5, 299, -1, 8, 9, -1, -1, -1, + 5, 6, 308, 8, -1, -1, -1, -1, 13, -1, + -1, 16, 160, 161, 162, 20, 21, 22, 23, 24, -1, 26, 27, 28, 29, -1, 31, 32, -1, -1, - 35, 36, 37, 38, 39, -1, -1, 42, 43, -1, - -1, -1, -1, -1, -1, -1, 51, -1, 53, 253, - 254, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 68, 69, 70, -1, 72, -1, 74, - 75, 76, 77, -1, -1, -1, -1, 281, 282, 283, - 284, 285, 286, 287, -1, 289, 290, -1, -1, -1, - -1, -1, 296, -1, -1, -1, -1, -1, 103, -1, - -1, -1, -1, -1, -1, -1, 111, 112, 113, 114, - -1, -1, 117, -1, -1, 16, 121, 122, 123, 20, - 21, 22, 23, 24, -1, 26, 27, 28, 29, -1, - 31, 32, -1, -1, 35, 36, 37, 38, -1, -1, - -1, 42, 43, -1, -1, -1, -1, -1, -1, -1, - 51, -1, 53, -1, -1, -1, -1, -1, -1, -1, - 18, -1, -1, -1, -1, -1, -1, 68, 69, 70, - 16, -1, -1, -1, 20, 21, 22, 23, 24, -1, - 26, 27, 28, 29, -1, 31, 32, -1, -1, 35, - 36, 37, 38, -1, -1, -1, 42, 43, -1, -1, - -1, -1, 103, -1, -1, 51, -1, 53, -1, -1, + 35, 36, 37, 38, -1, -1, 41, 42, 43, -1, + -1, 54, -1, -1, -1, -1, 51, -1, 53, 54, + -1, -1, -1, -1, -1, -1, -1, 70, -1, -1, + -1, -1, -1, 68, 69, 70, 79, -1, -1, -1, + -1, -1, -1, -1, 79, -1, -1, -1, -1, -1, + 93, 94, 95, 96, -1, -1, 99, -1, 93, 94, + 95, 96, -1, -1, 99, -1, -1, -1, -1, 104, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 225, 16, 68, 69, 70, 20, 21, 22, 23, 24, - -1, 26, 27, 28, 29, -1, 31, 32, -1, -1, - 35, 36, 37, 38, -1, -1, -1, 42, 43, -1, - -1, -1, -1, -1, -1, -1, 51, 103, 53, -1, + -1, -1, -1, 126, 127, 128, 129, 130, 131, 132, + -1, 126, 127, 128, 129, 130, 131, 132, -1, 18, + 299, -1, 301, -1, -1, -1, 305, 306, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 299, 41, -1, -1, 160, 161, 162, 5, 6, + 308, 8, -1, -1, -1, -1, 13, -1, -1, 16, + -1, -1, -1, 20, 21, 22, 23, 24, -1, 26, + 27, 28, 29, -1, 31, 32, -1, -1, 35, 36, + 37, 38, -1, -1, 41, 42, 43, -1, -1, -1, + -1, -1, -1, -1, 51, -1, 53, 54, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 68, 69, 70, -1, -1, -1, -1, -1, -1, + -1, -1, 79, -1, -1, -1, -1, 5, 6, -1, + 8, -1, -1, -1, -1, -1, 93, 94, 95, 96, + -1, -1, 99, -1, -1, 5, 6, 104, 8, 9, + -1, -1, -1, -1, -1, -1, 155, -1, -1, -1, + 20, -1, -1, 41, -1, -1, -1, -1, -1, 126, + 127, 128, 129, 130, 131, 132, 54, -1, -1, -1, + -1, 41, -1, -1, 299, -1, -1, -1, -1, -1, + -1, -1, 70, 308, 54, -1, -1, -1, 197, -1, + -1, 79, -1, 160, 161, 162, -1, -1, 207, -1, + 70, 210, -1, -1, -1, 93, 94, 95, 96, 79, + -1, 99, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 93, 94, 95, 96, -1, -1, 99, + -1, -1, -1, -1, -1, -1, -1, -1, 126, 127, + 128, 129, 130, 131, 132, 5, 6, 7, 8, 9, + -1, -1, -1, 13, -1, -1, 126, 127, 128, 129, + 130, 131, 132, 5, 6, -1, 8, -1, 28, -1, + -1, 13, -1, 282, 283, 284, 285, 286, 287, 288, + -1, 41, 291, -1, -1, -1, -1, -1, 297, -1, + -1, -1, -1, -1, 54, -1, -1, -1, -1, 41, + -1, -1, 44, -1, -1, 65, 66, -1, -1, 51, + 70, -1, 54, -1, -1, -1, -1, -1, -1, 79, + -1, -1, -1, -1, -1, -1, -1, -1, 70, -1, + 218, -1, 299, 93, 94, 95, 96, 79, -1, 99, + -1, 308, -1, -1, -1, -1, -1, -1, 218, -1, + -1, 93, 94, 95, 96, -1, -1, 99, -1, -1, + -1, -1, -1, -1, -1, -1, 126, 127, 128, 129, + 130, 131, 132, -1, -1, 5, 6, -1, 8, -1, + -1, -1, -1, 13, 126, 127, 128, 129, 130, 131, + 132, -1, -1, -1, -1, -1, 5, 6, -1, 8, + 160, 161, 162, -1, 13, -1, -1, -1, -1, -1, + -1, 41, -1, -1, 44, -1, -1, 305, 160, 161, + 162, 51, -1, -1, 54, -1, -1, -1, -1, -1, + 300, -1, 41, -1, -1, 305, -1, -1, -1, -1, + 70, -1, 51, -1, -1, 54, -1, -1, -1, 79, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 70, -1, 93, 94, 95, 96, -1, -1, 99, + 79, -1, -1, -1, -1, 5, 6, -1, 8, -1, + -1, -1, -1, 13, 93, 94, 95, 96, -1, -1, + 99, -1, -1, -1, -1, -1, 126, 127, 128, 129, + 130, 131, 132, -1, -1, -1, -1, -1, -1, -1, + -1, 41, -1, -1, -1, -1, -1, 126, 127, 128, + 129, 130, 131, 132, 54, -1, -1, -1, -1, -1, + 160, 161, 162, -1, -1, -1, -1, -1, -1, 299, + 70, -1, -1, -1, -1, -1, -1, -1, -1, 79, + -1, 160, 161, 162, 296, -1, -1, 299, -1, 5, + 6, -1, 8, 93, 94, 95, 96, 13, -1, 99, + 5, 6, -1, 8, -1, -1, -1, 5, 6, -1, + 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 41, 126, 127, 128, 129, + 130, 131, 132, -1, -1, -1, -1, -1, 54, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 54, + -1, -1, -1, -1, 70, -1, 54, -1, -1, -1, + 160, 161, 162, 79, -1, 70, -1, -1, -1, -1, + -1, -1, 70, -1, 79, -1, -1, 93, 94, 95, + 96, 79, -1, 99, -1, -1, -1, -1, 93, 94, + 95, 96, -1, -1, 99, 93, 94, 95, 96, 299, + -1, 99, 5, 6, -1, 8, -1, -1, -1, -1, + 126, 127, 128, 129, 130, 131, 132, -1, -1, -1, + 299, 126, 127, 128, 129, 130, 131, 132, 126, 127, + 128, 129, 130, 131, 132, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 160, 161, 162, -1, -1, -1, + -1, 54, -1, -1, -1, -1, -1, -1, 7, -1, + -1, -1, -1, -1, -1, 5, 6, 70, 8, -1, + -1, -1, -1, 22, 23, 24, 79, 26, 27, 28, + 29, -1, 31, 32, 33, -1, 35, 36, 37, 38, + 93, 94, 95, 96, -1, 44, 99, -1, -1, 299, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 68, 69, 70, -1, -1, 21, 22, - -1, 24, 25, 26, 27, 28, 29, -1, 31, 32, - -1, 34, 35, 36, 37, 38, 154, 155, 156, 157, - 158, 18, 307, -1, -1, -1, -1, -1, 103, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 176, 177, + -1, -1, -1, -1, 54, -1, -1, -1, -1, -1, + -1, -1, -1, 126, 127, 128, 129, 130, 131, 132, + 70, -1, -1, -1, -1, -1, -1, -1, -1, 79, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 193, 194, 195, 196, -1, - -1, -1, 200, 201, -1, -1, -1, -1, 206, -1, - 67, 209, -1, -1, -1, -1, -1, -1, 216, 217, + -1, -1, -1, 93, 94, 95, 96, -1, -1, 99, 22, 23, 24, -1, 26, 27, 28, 29, -1, 31, 32, 33, -1, 35, 36, 37, 38, -1, -1, -1, - 42, 43, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 298, -1, -1, - -1, -1, -1, -1, -1, -1, 307, -1, -1, -1, - -1, -1, -1, -1, 272, 273, -1, 275, 276, 277, - -1, 279, 280, 281, 282, 283, 284, 285, 286, 287, - -1, -1, 290, -1, -1, -1, 18, 154, 296, 156, - 157, 158, -1, -1, -1, -1, -1, -1, 110, -1, - -1, 168, 298, -1, -1, -1, 173, -1, -1, 176, - 177, 307, 18, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 193, 194, 195, 196, - -1, 198, 199, -1, -1, 67, 229, 230, -1, 232, - 233, 234, -1, 236, 237, -1, -1, 240, 241, -1, - 243, 218, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 307, 256, 257, 258, 259, 260, 261, 262, - 263, 264, 265, 266, 267, 268, 269, 270, 84, -1, - -1, -1, 16, -1, -1, -1, 253, 254, 22, 23, - 24, -1, 26, 27, 28, 29, -1, 31, 32, -1, - -1, 35, 36, 37, 38, -1, -1, -1, 42, 43, - -1, 304, -1, -1, 281, 282, 283, 284, 285, 286, - 287, -1, 154, 290, 156, 157, 158, -1, -1, 296, - -1, -1, -1, -1, -1, -1, 168, -1, -1, -1, - -1, 173, -1, -1, 176, 177, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 91, -1, -1, - -1, 193, 194, 195, 196, -1, 198, 199, -1, 281, - 282, 283, 284, 285, 286, 287, 110, -1, -1, -1, - -1, -1, -1, -1, 296, -1, 218, -1, -1, -1, - 196, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 208, -1, 210, 211, -1, 213, 214, -1, + 42, 43, -1, 299, -1, -1, 126, 127, 128, 129, + 130, 131, 132, -1, 299, -1, -1, 15, 16, 17, + -1, 299, 20, 21, 22, 23, 24, -1, 26, 27, + 28, 29, -1, 31, 32, -1, -1, 35, 36, 37, + 38, -1, -1, 41, 42, 43, -1, -1, 5, -1, + -1, 8, -1, 51, -1, 53, -1, -1, -1, -1, + -1, 18, -1, -1, -1, -1, -1, -1, -1, 111, + 68, 69, 70, 15, 16, 17, -1, -1, 20, 21, + 22, 23, 24, -1, 26, 27, 28, 29, -1, 31, + 32, -1, -1, 35, 36, 37, 38, 54, -1, -1, + 42, 43, -1, -1, -1, -1, 104, -1, -1, 51, + -1, 53, -1, 70, -1, -1, 299, -1, -1, -1, + -1, -1, 79, -1, -1, -1, 68, 69, 70, -1, + -1, -1, -1, -1, -1, -1, 93, 94, 95, 96, + -1, -1, 99, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 104, -1, -1, -1, -1, -1, -1, 126, + 127, 128, 129, 130, 131, 132, -1, -1, -1, 299, + -1, -1, -1, -1, -1, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, -1, -1, + -1, -1, 159, -1, -1, -1, 163, 164, 165, 166, + 167, 168, -1, -1, -1, -1, -1, -1, -1, -1, + 177, 178, -1, -1, -1, -1, -1, -1, -1, -1, + 5, -1, -1, 8, -1, -1, -1, 194, 195, 196, + 197, -1, -1, 18, -1, -1, -1, -1, 205, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 219, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 54, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 70, -1, 254, 255, -1, + -1, -1, -1, -1, 79, -1, -1, -1, -1, -1, + 308, -1, -1, -1, -1, -1, -1, -1, 93, 94, + 95, 96, -1, -1, 99, 282, 283, 284, 285, 286, + 287, 288, -1, 290, 291, -1, -1, -1, -1, -1, + 297, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 126, 127, 128, 129, 130, 131, 132, -1, -1, + -1, -1, -1, -1, -1, -1, 308, 142, 143, 144, + 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, + -1, -1, -1, -1, 159, -1, -1, -1, 163, 164, + 165, 166, 167, 168, -1, -1, -1, -1, -1, -1, + -1, -1, 177, 178, 5, -1, -1, 8, -1, -1, + -1, 5, 6, -1, 8, -1, -1, 18, -1, 194, + 195, 196, 197, -1, -1, -1, -1, -1, -1, -1, + 205, -1, -1, -1, -1, 16, -1, -1, -1, 20, + 21, 22, 23, 24, 219, 26, 27, 28, 29, -1, + 31, 32, -1, 54, 35, 36, 37, 38, -1, -1, + 54, 42, 43, -1, -1, -1, -1, -1, -1, 70, + 51, -1, 53, -1, -1, -1, 70, -1, 79, 254, + 255, -1, -1, -1, -1, 79, -1, 68, 69, 70, + -1, -1, 93, 94, 95, 96, -1, -1, 99, 93, + 94, 95, 96, -1, -1, 99, -1, 282, 283, 284, + 285, 286, 287, 288, -1, 290, 291, -1, -1, -1, + -1, -1, 297, 104, -1, 126, 127, 128, 129, 130, + 131, 132, 126, 127, 128, 129, 130, 131, 132, -1, + -1, 142, 143, 144, 145, 146, 147, 148, 149, 150, + 151, 152, 153, 154, -1, -1, -1, -1, 159, -1, + -1, -1, 163, 164, 165, 166, 167, 168, -1, -1, + -1, -1, -1, -1, -1, -1, 177, 178, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 253, 254, 16, -1, -1, -1, -1, 21, 22, - -1, 24, 25, 26, 27, 28, 29, -1, 31, 32, - -1, 34, 35, 36, 37, 38, -1, -1, -1, 281, - 282, 283, 284, 285, 286, 287, -1, -1, 290, -1, - -1, -1, -1, -1, 296, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 281, 282, 283, 284, 285, - 286, 287, -1, -1, 290, -1, -1, 80, -1, 82, - 296, -1, -1, 16, 87, -1, -1, -1, 21, 22, - -1, 24, 25, 26, 27, 28, 29, -1, 31, 32, - 103, 34, 35, 36, 37, 38, 5, 6, -1, 8, + -1, -1, -1, 194, 195, 196, 197, -1, -1, -1, + -1, -1, -1, -1, 205, -1, -1, -1, -1, 16, + -1, -1, -1, 20, 21, 22, 23, 24, 219, 26, + 27, 28, 29, -1, 31, 32, -1, -1, 35, 36, + 37, 38, 39, -1, -1, 42, 43, -1, -1, -1, + -1, -1, -1, -1, 51, -1, 53, -1, -1, -1, + -1, -1, -1, 254, 255, -1, -1, -1, -1, -1, + -1, 68, 69, 70, -1, 72, -1, 74, 75, 76, + 77, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 282, 283, 284, 285, 286, 287, 288, -1, 290, + 291, -1, -1, -1, -1, -1, 297, 104, -1, -1, + -1, 295, -1, -1, -1, 112, 113, 114, 115, -1, + -1, 118, -1, -1, -1, 122, 123, 124, 299, 18, + -1, -1, -1, -1, 16, -1, -1, 308, 20, 21, 22, 23, 24, -1, 26, 27, 28, 29, -1, 31, - 32, 33, -1, 35, 36, 37, 38, -1, -1, -1, - 42, 43, -1, 136, -1, -1, -1, -1, -1, -1, - 39, -1, -1, -1, -1, -1, -1, 80, -1, 82, - -1, -1, -1, -1, 87, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 68, - 103, -1, -1, 72, -1, -1, 75, 76, -1, -1, - -1, 80, 81, 82, 83, 84, 85, 86, 87, 88, - 89, 90, 91, -1, -1, -1, -1, -1, 110, 98, - -1, -1, 101, 136, -1, -1, -1, -1, -1, 108, - -1, -1, -1, -1, -1, -1, -1, -1, 117, -1, - -1, -1, -1, 226, -1, 228, 229, 230, 231, 232, - 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, - 243, 244, -1, -1, -1, 248, -1, -1, -1, -1, - -1, -1, -1, -1, 257, -1, -1, -1, -1, -1, + 32, -1, -1, 35, 36, 37, 38, -1, -1, -1, + 42, 43, -1, -1, -1, -1, -1, -1, -1, 51, + -1, 53, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 68, 69, 70, 16, + -1, -1, -1, 20, 21, 22, 23, 24, -1, 26, + 27, 28, 29, -1, 31, 32, -1, -1, 35, 36, + 37, 38, -1, -1, -1, 42, 43, -1, -1, -1, + -1, -1, 104, -1, 51, -1, 53, -1, -1, 226, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 68, 69, 70, -1, -1, 21, 22, -1, 24, + 25, 26, 27, 28, 29, -1, 31, 32, -1, 34, + 35, 36, 37, 38, -1, -1, 155, 156, 157, 158, + 159, -1, -1, -1, -1, -1, -1, 104, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 177, 178, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 194, 195, 196, 197, -1, + -1, 308, 201, 202, -1, -1, 5, 6, 207, 8, + -1, 210, -1, -1, -1, -1, -1, -1, 217, 218, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 22, 23, 24, -1, 26, 27, 28, 29, -1, + 31, 32, 33, -1, 35, 36, 37, 38, -1, -1, + 18, 42, 43, -1, -1, 54, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 70, -1, 5, 273, 274, 8, 276, 277, 278, + 79, 280, 281, 282, 283, 284, 285, 286, 287, 288, + -1, -1, 291, -1, 93, 94, 95, 96, 297, 67, + 99, -1, 18, -1, -1, -1, -1, 299, -1, -1, + -1, -1, -1, -1, -1, -1, 308, -1, -1, -1, + 111, -1, 54, -1, -1, -1, -1, 126, 127, 128, + 129, 130, 131, 132, -1, -1, -1, -1, 70, -1, + -1, -1, -1, -1, -1, 230, 231, 79, 233, 234, + 235, 67, 237, 238, -1, -1, 241, 242, -1, 244, + -1, 93, 94, 95, 96, -1, -1, 99, -1, -1, + -1, 308, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 155, -1, 157, + 158, 159, -1, -1, 126, 127, 128, 129, 130, 131, + 132, 169, -1, -1, -1, -1, 174, -1, -1, 177, + 178, -1, -1, -1, -1, -1, -1, 216, -1, -1, + 305, -1, -1, -1, -1, -1, 194, 195, 196, 197, + -1, 199, 200, -1, -1, -1, -1, -1, -1, 155, + -1, 157, 158, 159, -1, -1, -1, -1, -1, -1, + -1, 219, -1, 169, -1, -1, -1, -1, 174, -1, + -1, 177, 178, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 194, 195, + 196, 197, -1, 199, 200, -1, 254, 255, -1, -1, + -1, 282, 283, 284, 285, 286, 287, 288, -1, -1, + -1, -1, -1, 219, -1, -1, 297, -1, -1, -1, + -1, -1, -1, -1, 282, 283, 284, 285, 286, 287, + 288, -1, -1, 291, -1, -1, -1, -1, -1, 297, + -1, -1, -1, -1, -1, 16, -1, -1, 254, 255, + 21, 22, -1, 24, 25, 26, 27, 28, 29, -1, + 31, 32, -1, 34, 35, 36, 37, 38, -1, -1, + -1, 5, 6, -1, 8, -1, 282, 283, 284, 285, + 286, 287, 288, -1, 16, 291, -1, -1, -1, 21, + 22, 297, 24, 25, 26, 27, 28, 29, -1, 31, + 32, -1, 34, 35, 36, 37, 38, -1, -1, 80, + -1, 82, -1, -1, -1, -1, 87, -1, -1, -1, + 54, -1, -1, -1, -1, 5, 6, -1, 8, -1, + -1, -1, -1, 104, -1, -1, 70, -1, -1, -1, + -1, -1, -1, -1, -1, 79, -1, -1, 80, -1, + 82, -1, -1, -1, -1, 87, -1, -1, -1, 93, + 94, 95, 96, -1, -1, 99, 137, 101, 102, 103, + -1, -1, 104, -1, 54, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 70, -1, 126, 127, 128, 129, 130, 131, 132, 79, + -1, -1, -1, -1, -1, 137, -1, -1, -1, -1, + -1, -1, -1, 93, 94, 95, 96, -1, -1, 99, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 126, 127, 128, 129, + 130, 131, 132, -1, -1, -1, 227, -1, 229, 230, + 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, + 241, 242, 243, 244, 245, -1, -1, -1, 249, -1, + 5, 6, -1, 8, -1, -1, 220, 258, 222, 223, + 224, 225, -1, -1, -1, 227, -1, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, 243, 244, 245, 39, -1, -1, 249, -1, 290, + -1, -1, -1, -1, -1, -1, 258, -1, 208, 54, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 68, -1, 70, -1, 72, -1, -1, + 75, 76, -1, -1, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, -1, 93, 94, + 95, 96, -1, 98, 99, 5, 6, 102, 8, -1, + -1, -1, -1, -1, 109, 5, 6, -1, 8, -1, + -1, -1, -1, 118, -1, -1, -1, -1, -1, -1, + -1, 126, 127, 128, 129, 130, 131, 132, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 54, -1, -1, -1, -1, 5, + 6, -1, 8, -1, 54, -1, -1, -1, -1, -1, + 70, -1, -1, 73, -1, 75, 76, -1, -1, 79, + 70, -1, 82, -1, -1, 75, 76, -1, -1, 79, + -1, -1, -1, 93, 94, 95, 96, -1, -1, 99, + -1, -1, -1, 93, 94, 95, 96, -1, 54, 99, + -1, -1, -1, 5, 6, -1, 8, 9, -1, -1, + -1, -1, -1, -1, 70, -1, 126, 127, 128, 129, + 130, 131, 132, 79, -1, -1, 126, 127, 128, 129, + 130, 131, 132, -1, -1, -1, -1, 93, 94, 95, + 96, -1, -1, 99, -1, -1, 5, 6, -1, 8, + -1, -1, 54, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 120, 121, -1, -1, 70, -1, + 126, 127, 128, 129, 130, 131, 132, 79, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 289, -1, -1, -1, - -1, -1, -1, 226, -1, 228, 229, 230, 231, 232, - 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, - 243, 244, 39, 40, -1, 248, -1, -1, 45, -1, - 47, -1, -1, -1, 257, -1, -1, 54, -1, -1, + -1, 93, 94, 95, 96, 54, -1, 99, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 71, 72, -1, 74, 75, 76, - 77, 78, -1, -1, -1, -1, 83, -1, -1, 86, - -1, -1, -1, -1, -1, 92, -1, -1, -1, -1, - 97, -1, 99, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 111, 112, 113, 114, 115, 116, - 117, 118, -1, -1, -1, -1, -1, 124 + -1, 70, -1, -1, -1, -1, -1, -1, -1, -1, + 79, -1, -1, -1, 126, 127, 128, 129, 130, 131, + 132, -1, -1, -1, 93, 94, 95, 96, 16, -1, + 99, -1, -1, -1, 22, 23, 24, -1, 26, 27, + 28, 29, -1, 31, 32, -1, -1, 35, 36, 37, + 38, -1, -1, -1, 42, 43, -1, 126, 127, 128, + 129, 130, 131, 132, -1, -1, -1, -1, -1, 39, + 40, -1, -1, -1, -1, 45, -1, 47, -1, -1, + -1, -1, -1, -1, 54, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 71, 72, 91, 74, 75, 76, 77, 78, -1, + -1, -1, -1, 83, -1, -1, 86, -1, -1, -1, + -1, -1, 92, 111, -1, -1, -1, 97, -1, -1, + 100, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 112, 113, 114, 115, 116, 117, 118, 119, + -1, -1, -1, -1, -1, 125 }; /* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of state STATE-NUM. */ static const yytype_int16 yystos[] = { - 0, 309, 0, 18, 154, 155, 156, 157, 158, 176, - 177, 193, 194, 195, 196, 200, 201, 206, 209, 216, - 217, 272, 273, 275, 276, 277, 279, 280, 281, 282, - 283, 284, 285, 286, 287, 296, 310, 313, 319, 320, - 321, 322, 323, 324, 331, 333, 334, 336, 337, 338, - 339, 340, 341, 358, 376, 380, 402, 403, 458, 461, - 467, 468, 469, 473, 482, 485, 490, 215, 5, 6, - 8, 314, 315, 298, 362, 64, 125, 404, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 192, 466, 466, 8, 14, 17, 40, 41, - 45, 47, 54, 71, 86, 294, 325, 363, 364, 365, - 366, 297, 298, 274, 470, 215, 474, 491, 215, 315, - 9, 316, 316, 9, 10, 317, 317, 13, 16, 20, - 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, - 35, 36, 37, 38, 42, 43, 51, 53, 68, 69, - 70, 103, 104, 159, 160, 161, 298, 307, 315, 321, - 322, 366, 367, 425, 448, 449, 454, 455, 289, 315, - 315, 315, 315, 7, 12, 411, 412, 411, 411, 289, - 342, 60, 343, 289, 381, 387, 23, 26, 27, 28, - 29, 31, 32, 33, 289, 305, 405, 408, 410, 411, - 316, 289, 289, 289, 289, 487, 293, 316, 359, 314, - 298, 366, 425, 448, 450, 454, 7, 33, 297, 312, - 292, 294, 294, 46, 48, 49, 50, 364, 364, 326, - 367, 450, 297, 454, 294, 316, 316, 207, 315, 474, - 100, 101, 102, 125, 219, 221, 222, 223, 224, 315, - 75, 76, 315, 315, 454, 26, 27, 28, 29, 448, - 51, 448, 24, 25, 34, 15, 17, 454, 217, 304, - 315, 366, 307, 315, 316, 136, 136, 136, 363, 364, - 136, 306, 105, 106, 107, 136, 298, 300, 304, 305, - 311, 448, 312, 295, 12, 295, 295, 309, 39, 68, - 72, 75, 76, 80, 81, 82, 83, 84, 85, 86, - 87, 88, 89, 90, 91, 98, 101, 108, 117, 315, - 450, 61, 344, 345, 39, 72, 74, 75, 76, 77, - 78, 83, 92, 97, 99, 111, 112, 113, 114, 115, - 116, 117, 118, 124, 364, 141, 142, 143, 144, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 162, 163, - 164, 165, 166, 167, 204, 218, 253, 254, 289, 290, - 313, 314, 320, 331, 386, 388, 389, 390, 391, 393, - 394, 402, 426, 427, 428, 429, 430, 431, 432, 433, - 434, 435, 436, 437, 438, 439, 440, 458, 468, 304, - 294, 298, 407, 294, 407, 294, 407, 294, 407, 294, - 407, 294, 407, 294, 406, 408, 294, 411, 295, 7, - 8, 292, 303, 475, 483, 488, 492, 73, 75, 76, - 82, 315, 315, 299, 39, 72, 74, 75, 76, 77, - 111, 112, 113, 114, 117, 121, 122, 123, 225, 454, - 297, 217, 315, 364, 294, 297, 294, 289, 294, 291, - 8, 316, 316, 295, 289, 294, 312, 119, 120, 298, - 315, 383, 450, 299, 166, 471, 315, 220, 136, 448, - 25, 315, 450, 315, 299, 299, 299, 315, 316, 315, - 315, 315, 454, 315, 315, 294, 294, 315, 20, 299, - 316, 456, 457, 443, 444, 454, 290, 311, 290, 294, - 75, 76, 77, 111, 112, 113, 300, 349, 346, 450, - 67, 154, 168, 173, 198, 199, 218, 253, 254, 290, - 313, 320, 331, 341, 357, 358, 368, 372, 380, 402, - 458, 468, 486, 294, 294, 384, 316, 316, 316, 298, - 110, 288, 298, 103, 450, 303, 197, 294, 387, 55, - 57, 58, 59, 392, 395, 396, 397, 398, 399, 400, - 314, 316, 389, 314, 316, 316, 317, 11, 31, 32, - 294, 317, 318, 314, 316, 363, 15, 17, 366, 454, - 450, 87, 312, 410, 364, 326, 294, 411, 294, 316, - 316, 316, 316, 317, 318, 318, 290, 292, 314, 295, - 316, 316, 208, 210, 213, 214, 290, 320, 331, 458, - 476, 478, 479, 481, 84, 208, 211, 290, 472, 478, - 480, 484, 41, 154, 206, 209, 290, 320, 331, 489, - 206, 209, 290, 320, 331, 493, 75, 76, 77, 111, - 112, 113, 294, 294, 315, 315, 299, 454, 312, 462, - 463, 289, 51, 450, 459, 460, 7, 292, 295, 295, - 325, 327, 328, 300, 356, 442, 19, 335, 472, 136, - 315, 19, 299, 449, 449, 449, 304, 450, 450, 20, - 292, 299, 301, 292, 316, 39, 51, 52, 69, 119, - 291, 302, 350, 351, 352, 292, 110, 369, 373, 316, - 316, 487, 110, 288, 103, 450, 289, 289, 289, 387, - 289, 316, 312, 382, 298, 454, 303, 316, 298, 315, - 298, 315, 316, 364, 19, 294, 20, 384, 445, 446, - 447, 290, 450, 392, 56, 389, 401, 314, 316, 389, - 401, 401, 401, 62, 62, 294, 294, 315, 450, 294, - 411, 454, 314, 316, 441, 295, 312, 295, 299, 295, - 295, 295, 295, 295, 406, 295, 303, 8, 292, 212, - 297, 304, 316, 477, 297, 312, 411, 411, 297, 297, - 411, 411, 294, 215, 316, 315, 215, 315, 215, 316, - 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, - 32, 34, 35, 36, 37, 38, 80, 82, 87, 103, - 136, 226, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 248, - 257, 289, 378, 379, 451, 63, 360, 299, 297, 295, - 292, 327, 8, 297, 290, 292, 8, 297, 290, 22, - 23, 24, 26, 27, 28, 29, 31, 32, 35, 36, - 37, 38, 42, 43, 110, 320, 329, 409, 410, 414, - 298, 443, 294, 294, 315, 383, 28, 29, 63, 202, - 203, 205, 411, 315, 315, 449, 294, 295, 295, 316, - 457, 454, 295, 294, 351, 294, 315, 354, 301, 450, - 450, 72, 117, 315, 450, 72, 117, 364, 315, 298, - 315, 298, 315, 364, 19, 345, 370, 374, 290, 488, - 295, 136, 382, 39, 46, 48, 49, 50, 87, 88, - 89, 133, 134, 135, 137, 140, 295, 250, 251, 252, - 316, 225, 377, 316, 299, 316, 316, 292, 299, 454, - 383, 445, 454, 295, 292, 314, 316, 314, 316, 316, - 317, 19, 312, 295, 294, 292, 292, 295, 295, 407, - 407, 407, 407, 407, 407, 316, 316, 316, 294, 303, - 294, 295, 295, 294, 294, 295, 295, 316, 449, 315, - 63, 315, 295, 25, 26, 27, 28, 29, 294, 452, - 242, 234, 246, 294, 227, 247, 22, 452, 452, 21, - 22, 24, 25, 26, 27, 28, 29, 31, 32, 34, - 35, 36, 37, 38, 229, 230, 232, 233, 234, 236, - 237, 240, 241, 243, 256, 257, 258, 259, 260, 261, - 262, 263, 264, 265, 266, 267, 268, 269, 270, 304, - 453, 295, 412, 298, 304, 314, 297, 361, 28, 65, - 66, 312, 316, 448, 464, 465, 462, 290, 297, 289, - 459, 289, 294, 312, 294, 298, 294, 298, 26, 27, - 28, 29, 294, 298, 294, 298, 294, 298, 294, 298, - 294, 298, 294, 298, 294, 298, 294, 298, 294, 298, - 294, 298, 294, 298, 294, 298, 294, 298, 104, 109, - 320, 330, 411, 316, 301, 445, 445, 356, 442, 314, - 295, 445, 316, 347, 348, 450, 292, 353, 315, 196, - 321, 315, 454, 316, 316, 292, 454, 383, 290, 169, - 170, 171, 172, 290, 313, 320, 331, 371, 468, 172, - 174, 175, 290, 313, 320, 331, 375, 468, 290, 312, - 295, 294, 303, 303, 299, 299, 299, 299, 294, 383, - 136, 299, 299, 450, 361, 450, 295, 377, 447, 62, - 62, 295, 295, 315, 295, 445, 441, 441, 8, 292, - 8, 477, 295, 316, 249, 312, 298, 298, 25, 26, - 27, 28, 29, 271, 292, 298, 305, 290, 291, 299, - 316, 22, 23, 24, 26, 27, 28, 29, 31, 32, - 35, 36, 37, 38, 44, 312, 409, 413, 294, 294, - 289, 329, 327, 464, 316, 316, 316, 294, 298, 294, - 298, 294, 298, 294, 298, 316, 316, 316, 316, 316, - 316, 317, 316, 316, 318, 316, 317, 318, 316, 316, - 316, 316, 316, 316, 316, 317, 316, 414, 316, 8, - 44, 316, 44, 51, 448, 316, 42, 91, 110, 332, - 455, 295, 299, 295, 295, 294, 294, 471, 295, 295, - 295, 292, 352, 353, 315, 299, 299, 450, 450, 255, - 363, 363, 363, 363, 363, 363, 363, 382, 316, 138, - 139, 138, 139, 378, 349, 314, 292, 19, 314, 314, - 316, 295, 316, 303, 297, 292, 316, 316, 312, 299, - 316, 291, 299, 26, 27, 28, 29, 316, 26, 27, - 28, 316, 329, 290, 290, 295, 299, 295, 299, 316, - 316, 316, 316, 316, 316, 317, 316, 295, 299, 295, - 299, 295, 299, 295, 299, 295, 295, 299, 295, 295, - 299, 295, 299, 295, 299, 295, 299, 295, 299, 295, - 299, 295, 295, 299, 295, 8, 295, 299, 51, 448, - 298, 315, 301, 445, 445, 450, 294, 292, 19, 364, - 295, 295, 295, 294, 450, 383, 8, 477, 316, 312, - 299, 299, 299, 316, 295, 303, 303, 303, 295, 290, - 294, 294, 295, 299, 295, 299, 295, 299, 295, 299, - 294, 294, 294, 294, 294, 294, 294, 294, 294, 294, - 294, 294, 295, 294, 8, 299, 297, 295, 295, 445, - 450, 383, 454, 445, 300, 355, 356, 303, 295, 292, - 295, 451, 299, 316, 316, 316, 421, 419, 294, 294, - 294, 294, 420, 419, 418, 417, 415, 416, 420, 419, - 418, 417, 424, 422, 423, 414, 295, 355, 450, 295, - 294, 477, 312, 295, 295, 295, 295, 464, 295, 316, - 420, 419, 418, 417, 295, 316, 295, 295, 316, 295, - 317, 295, 316, 318, 295, 317, 318, 295, 295, 295, - 295, 295, 414, 8, 44, 295, 44, 51, 295, 448, - 361, 294, 19, 385, 445, 292, 295, 295, 295, 295, - 8, 445, 383, 39, 54, 70, 79, 93, 94, 95, - 96, 125, 126, 127, 128, 129, 130, 131, 289, 295, - 312, 295, 294, 294, 295, 255, 445, 316, 103, 295, - 295, 364, 454, 450, 19, 383, 355, 294, 445, 295 + 0, 310, 0, 18, 155, 156, 157, 158, 159, 177, + 178, 194, 195, 196, 197, 201, 202, 207, 210, 217, + 218, 273, 274, 276, 277, 278, 280, 281, 282, 283, + 284, 285, 286, 287, 288, 297, 311, 314, 320, 321, + 322, 323, 324, 325, 332, 334, 335, 337, 338, 339, + 340, 341, 342, 359, 377, 381, 403, 404, 459, 462, + 468, 469, 470, 474, 483, 486, 491, 216, 5, 6, + 8, 54, 70, 79, 93, 94, 95, 96, 99, 126, + 127, 128, 129, 130, 131, 132, 315, 316, 299, 363, + 64, 126, 405, 179, 180, 181, 182, 183, 184, 185, + 186, 187, 188, 189, 190, 191, 192, 193, 467, 467, + 8, 14, 17, 40, 41, 45, 47, 54, 71, 86, + 295, 326, 364, 365, 366, 367, 298, 299, 275, 471, + 216, 475, 492, 216, 316, 9, 317, 317, 9, 10, + 318, 318, 13, 16, 20, 21, 22, 23, 24, 26, + 27, 28, 29, 31, 32, 35, 36, 37, 38, 42, + 43, 51, 53, 68, 69, 70, 104, 105, 160, 161, + 162, 299, 308, 316, 322, 323, 367, 368, 426, 449, + 450, 455, 456, 290, 316, 316, 316, 316, 7, 12, + 412, 413, 412, 412, 290, 343, 60, 344, 290, 382, + 388, 23, 26, 27, 28, 29, 31, 32, 33, 290, + 306, 406, 409, 411, 412, 317, 290, 290, 290, 290, + 488, 294, 317, 360, 315, 299, 367, 426, 449, 451, + 455, 7, 33, 298, 313, 293, 295, 295, 46, 48, + 49, 50, 365, 365, 327, 368, 451, 298, 70, 455, + 295, 317, 317, 208, 316, 475, 101, 102, 103, 126, + 220, 222, 223, 224, 225, 316, 75, 76, 316, 316, + 455, 26, 27, 28, 29, 449, 51, 449, 24, 25, + 34, 15, 17, 455, 218, 305, 316, 367, 308, 316, + 317, 137, 137, 137, 364, 365, 137, 307, 106, 107, + 108, 137, 299, 301, 305, 306, 312, 449, 313, 296, + 12, 296, 296, 310, 39, 68, 72, 75, 76, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 98, 102, 109, 118, 316, 451, 61, 345, 346, + 39, 72, 74, 75, 76, 77, 78, 83, 92, 97, + 100, 112, 113, 114, 115, 116, 117, 118, 119, 125, + 365, 142, 143, 144, 145, 146, 147, 148, 149, 150, + 151, 152, 153, 154, 163, 164, 165, 166, 167, 168, + 205, 219, 254, 255, 290, 291, 314, 315, 321, 332, + 387, 389, 390, 391, 392, 394, 395, 403, 427, 428, + 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, + 439, 440, 441, 459, 469, 305, 295, 299, 408, 295, + 408, 295, 408, 295, 408, 295, 408, 295, 408, 295, + 407, 409, 295, 412, 296, 7, 8, 293, 304, 476, + 484, 489, 493, 73, 75, 76, 82, 316, 316, 300, + 39, 72, 74, 75, 76, 77, 112, 113, 114, 115, + 118, 122, 123, 124, 226, 455, 298, 218, 316, 365, + 295, 298, 295, 290, 295, 292, 8, 317, 317, 296, + 290, 295, 313, 120, 121, 299, 316, 384, 451, 300, + 167, 472, 316, 221, 137, 449, 25, 316, 451, 316, + 300, 300, 300, 316, 317, 316, 316, 316, 455, 316, + 316, 295, 295, 316, 20, 300, 317, 457, 458, 444, + 445, 455, 291, 312, 291, 295, 75, 76, 77, 112, + 113, 114, 301, 350, 347, 451, 67, 155, 169, 174, + 199, 200, 219, 254, 255, 291, 314, 321, 332, 342, + 358, 359, 369, 373, 381, 403, 459, 469, 487, 295, + 295, 385, 317, 317, 317, 299, 111, 289, 299, 104, + 451, 304, 198, 295, 388, 55, 57, 58, 59, 393, + 396, 397, 398, 399, 400, 401, 315, 317, 390, 315, + 317, 317, 318, 11, 31, 32, 295, 318, 319, 315, + 317, 364, 15, 17, 367, 455, 451, 87, 313, 411, + 365, 327, 295, 412, 295, 317, 317, 317, 317, 318, + 319, 319, 291, 293, 315, 296, 317, 317, 209, 211, + 214, 215, 291, 321, 332, 459, 477, 479, 480, 482, + 84, 209, 212, 291, 473, 479, 481, 485, 41, 155, + 207, 210, 291, 321, 332, 490, 207, 210, 291, 321, + 332, 494, 75, 76, 77, 112, 113, 114, 295, 295, + 316, 316, 300, 455, 313, 463, 464, 290, 51, 451, + 460, 461, 7, 293, 296, 296, 326, 328, 329, 301, + 357, 443, 19, 336, 473, 137, 316, 19, 300, 450, + 450, 450, 305, 451, 451, 20, 293, 300, 302, 293, + 317, 39, 51, 52, 69, 120, 292, 303, 351, 352, + 353, 293, 111, 370, 374, 317, 317, 488, 111, 289, + 104, 451, 290, 290, 290, 388, 290, 317, 313, 383, + 299, 455, 304, 317, 299, 316, 299, 316, 317, 365, + 19, 295, 20, 385, 446, 447, 448, 291, 451, 393, + 56, 390, 402, 315, 317, 390, 402, 402, 402, 62, + 62, 295, 295, 316, 451, 295, 412, 455, 315, 317, + 442, 296, 313, 296, 300, 296, 296, 296, 296, 296, + 407, 296, 304, 8, 293, 213, 298, 305, 317, 478, + 298, 313, 412, 412, 298, 298, 412, 412, 295, 216, + 317, 316, 216, 316, 216, 317, 16, 21, 22, 24, + 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, + 37, 38, 80, 82, 87, 104, 137, 227, 229, 230, + 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, + 241, 242, 243, 244, 245, 249, 258, 290, 379, 380, + 452, 63, 361, 300, 298, 296, 293, 328, 8, 298, + 291, 293, 8, 298, 291, 22, 23, 24, 26, 27, + 28, 29, 31, 32, 35, 36, 37, 38, 42, 43, + 111, 321, 330, 410, 411, 415, 299, 444, 295, 295, + 316, 384, 28, 29, 63, 203, 204, 206, 412, 316, + 316, 450, 295, 296, 296, 317, 458, 455, 296, 295, + 352, 295, 316, 355, 302, 451, 451, 72, 118, 316, + 451, 72, 118, 365, 316, 299, 316, 299, 316, 365, + 19, 346, 371, 375, 291, 489, 296, 137, 383, 39, + 46, 48, 49, 50, 87, 88, 89, 134, 135, 136, + 138, 141, 296, 251, 252, 253, 317, 226, 378, 317, + 300, 317, 317, 293, 300, 455, 384, 446, 455, 296, + 293, 315, 317, 315, 317, 317, 318, 19, 313, 296, + 295, 293, 293, 296, 296, 408, 408, 408, 408, 408, + 408, 317, 317, 317, 295, 304, 295, 296, 296, 295, + 295, 296, 296, 317, 450, 316, 63, 316, 296, 25, + 26, 27, 28, 29, 295, 453, 243, 235, 247, 295, + 228, 248, 22, 453, 453, 21, 22, 24, 25, 26, + 27, 28, 29, 31, 32, 34, 35, 36, 37, 38, + 230, 231, 233, 234, 235, 237, 238, 241, 242, 244, + 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, + 267, 268, 269, 270, 271, 305, 454, 296, 413, 299, + 305, 315, 298, 362, 28, 65, 66, 313, 317, 449, + 465, 466, 463, 291, 298, 290, 460, 290, 295, 313, + 295, 299, 295, 299, 26, 27, 28, 29, 295, 299, + 295, 299, 295, 299, 295, 299, 295, 299, 295, 299, + 295, 299, 295, 299, 295, 299, 295, 299, 295, 299, + 295, 299, 295, 299, 105, 110, 321, 331, 412, 317, + 302, 446, 446, 357, 443, 315, 296, 446, 317, 348, + 349, 451, 293, 354, 316, 197, 322, 316, 455, 317, + 317, 293, 455, 384, 291, 170, 171, 172, 173, 291, + 314, 321, 332, 372, 469, 173, 175, 176, 291, 314, + 321, 332, 376, 469, 291, 313, 296, 295, 304, 304, + 300, 300, 300, 300, 295, 384, 137, 300, 300, 451, + 362, 451, 296, 378, 448, 62, 62, 296, 296, 316, + 296, 446, 442, 442, 8, 293, 8, 478, 296, 317, + 250, 313, 299, 299, 25, 26, 27, 28, 29, 272, + 293, 299, 306, 291, 292, 300, 317, 22, 23, 24, + 26, 27, 28, 29, 31, 32, 35, 36, 37, 38, + 44, 313, 410, 414, 295, 295, 290, 330, 328, 465, + 317, 317, 317, 295, 299, 295, 299, 295, 299, 295, + 299, 317, 317, 317, 317, 317, 317, 318, 317, 317, + 319, 317, 318, 319, 317, 317, 317, 317, 317, 317, + 317, 318, 317, 415, 317, 8, 44, 317, 44, 51, + 449, 317, 42, 91, 111, 333, 456, 296, 300, 296, + 296, 295, 295, 472, 296, 296, 296, 293, 353, 354, + 316, 300, 300, 451, 451, 256, 364, 364, 364, 364, + 364, 364, 364, 383, 317, 139, 140, 139, 140, 379, + 350, 315, 293, 19, 315, 315, 317, 296, 317, 304, + 298, 293, 317, 317, 313, 300, 317, 292, 300, 26, + 27, 28, 29, 317, 26, 27, 28, 317, 330, 291, + 291, 296, 300, 296, 300, 317, 317, 317, 317, 317, + 317, 318, 317, 296, 300, 296, 300, 296, 300, 296, + 300, 296, 296, 300, 296, 296, 300, 296, 300, 296, + 300, 296, 300, 296, 300, 296, 300, 296, 296, 300, + 296, 8, 296, 300, 51, 449, 299, 316, 302, 446, + 446, 451, 295, 293, 19, 365, 296, 296, 296, 295, + 451, 384, 8, 478, 317, 313, 300, 300, 300, 317, + 296, 304, 304, 304, 296, 291, 295, 295, 296, 300, + 296, 300, 296, 300, 296, 300, 295, 295, 295, 295, + 295, 295, 295, 295, 295, 295, 295, 295, 296, 295, + 8, 300, 298, 296, 296, 446, 451, 384, 455, 446, + 301, 356, 357, 304, 296, 293, 296, 452, 300, 317, + 317, 317, 422, 420, 295, 295, 295, 295, 421, 420, + 419, 418, 416, 417, 421, 420, 419, 418, 425, 423, + 424, 415, 296, 356, 451, 296, 295, 478, 313, 296, + 296, 296, 296, 465, 296, 317, 421, 420, 419, 418, + 296, 317, 296, 296, 317, 296, 318, 296, 317, 319, + 296, 318, 319, 296, 296, 296, 296, 296, 415, 8, + 44, 296, 44, 51, 296, 449, 362, 295, 19, 386, + 446, 293, 296, 296, 296, 296, 8, 446, 384, 39, + 54, 70, 79, 93, 94, 95, 96, 99, 126, 127, + 128, 129, 130, 131, 132, 290, 296, 313, 296, 295, + 295, 296, 256, 446, 317, 104, 296, 296, 365, 455, + 451, 19, 384, 356, 295, 446, 296 }; /* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */ static const yytype_int16 yyr1[] = { - 0, 308, 309, 309, 310, 310, 310, 310, 310, 310, - 310, 310, 310, 310, 310, 310, 310, 310, 310, 310, - 310, 310, 310, 310, 310, 310, 310, 310, 310, 310, - 311, 311, 312, 312, 313, 313, 313, 314, 314, 315, - 315, 315, 316, 317, 317, 318, 318, 318, 319, 319, - 319, 319, 319, 320, 320, 320, 320, 320, 320, 320, - 320, 320, 321, 321, 321, 321, 322, 322, 322, 322, - 323, 324, 325, 326, 326, 327, 328, 328, 328, 329, - 329, 329, 330, 330, 331, 331, 331, 332, 332, 332, - 332, 332, 332, 333, 333, 333, 334, 335, 335, 335, - 335, 335, 335, 336, 337, 338, 339, 340, 341, 342, - 342, 342, 342, 342, 342, 342, 342, 342, 342, 342, - 342, 342, 342, 342, 342, 342, 342, 342, 342, 342, - 342, 342, 342, 342, 342, 342, 343, 343, 344, 344, - 345, 345, 346, 346, 347, 347, 348, 348, 349, 349, - 350, 350, 350, 350, 350, 350, 350, 351, 351, 352, - 352, 353, 353, 354, 355, 355, 356, 357, 357, 357, - 357, 357, 357, 357, 357, 357, 357, 357, 357, 357, - 357, 357, 357, 357, 357, 357, 357, 357, 358, 359, - 359, 359, 359, 359, 359, 359, 359, 359, 359, 359, - 359, 359, 359, 359, 359, 360, 360, 361, 361, 362, - 362, 363, 363, 363, 363, 363, 363, 363, 364, 364, - 364, 364, 365, 365, 365, 365, 365, 365, 365, 365, - 366, 367, 367, 367, 367, 367, 367, 368, 368, 369, - 369, 369, 370, 370, 371, 371, 371, 371, 371, 371, - 371, 371, 372, 373, 373, 373, 374, 374, 375, 375, - 375, 375, 375, 375, 375, 376, 377, 377, 378, 378, - 379, 380, 381, 381, 381, 381, 381, 381, 381, 381, - 381, 381, 381, 381, 381, 381, 381, 381, 381, 381, - 381, 381, 381, 381, 381, 382, 382, 382, 382, 382, + 0, 309, 310, 310, 311, 311, 311, 311, 311, 311, + 311, 311, 311, 311, 311, 311, 311, 311, 311, 311, + 311, 311, 311, 311, 311, 311, 311, 311, 311, 311, + 312, 312, 313, 313, 314, 314, 314, 315, 315, 315, + 315, 315, 315, 315, 315, 315, 315, 315, 315, 315, + 315, 315, 315, 315, 316, 316, 316, 317, 318, 318, + 319, 319, 319, 320, 320, 320, 320, 320, 321, 321, + 321, 321, 321, 321, 321, 321, 321, 322, 322, 322, + 322, 323, 323, 323, 323, 324, 325, 326, 327, 327, + 328, 329, 329, 329, 330, 330, 330, 331, 331, 332, + 332, 332, 333, 333, 333, 333, 333, 333, 334, 334, + 334, 335, 336, 336, 336, 336, 336, 336, 337, 338, + 339, 340, 341, 342, 343, 343, 343, 343, 343, 343, + 343, 343, 343, 343, 343, 343, 343, 343, 343, 343, + 343, 343, 343, 343, 343, 343, 343, 343, 343, 343, + 343, 344, 344, 345, 345, 346, 346, 347, 347, 348, + 348, 349, 349, 350, 350, 351, 351, 351, 351, 351, + 351, 351, 352, 352, 353, 353, 354, 354, 355, 356, + 356, 357, 358, 358, 358, 358, 358, 358, 358, 358, + 358, 358, 358, 358, 358, 358, 358, 358, 358, 358, + 358, 358, 358, 359, 360, 360, 360, 360, 360, 360, + 360, 360, 360, 360, 360, 360, 360, 360, 360, 360, + 361, 361, 362, 362, 363, 363, 364, 364, 364, 364, + 364, 364, 364, 365, 365, 365, 365, 366, 366, 366, + 366, 366, 366, 366, 366, 367, 368, 368, 368, 368, + 368, 368, 369, 369, 370, 370, 370, 371, 371, 372, + 372, 372, 372, 372, 372, 372, 372, 373, 374, 374, + 374, 375, 375, 376, 376, 376, 376, 376, 376, 376, + 377, 378, 378, 379, 379, 380, 381, 382, 382, 382, + 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, 382, - 382, 383, 383, 383, 384, 384, 384, 384, 384, 385, - 385, 385, 385, 385, 385, 385, 385, 385, 385, 385, - 385, 385, 385, 385, 385, 386, 387, 387, 388, 388, - 388, 388, 388, 388, 388, 388, 388, 388, 388, 388, - 388, 388, 388, 388, 388, 388, 388, 388, 388, 388, - 388, 388, 388, 388, 389, 390, 391, 392, 392, 393, - 393, 393, 394, 395, 395, 395, 395, 396, 396, 396, - 397, 398, 399, 400, 401, 401, 401, 402, 403, 403, - 404, 404, 404, 405, 405, 406, 406, 407, 407, 408, - 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, - 408, 408, 408, 408, 409, 409, 409, 409, 409, 409, + 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, + 383, 383, 383, 383, 383, 383, 384, 384, 384, 385, + 385, 385, 385, 385, 386, 386, 386, 386, 386, 386, + 386, 386, 386, 386, 386, 386, 386, 386, 386, 386, + 386, 387, 388, 388, 389, 389, 389, 389, 389, 389, + 389, 389, 389, 389, 389, 389, 389, 389, 389, 389, + 389, 389, 389, 389, 389, 389, 389, 389, 389, 389, + 390, 391, 392, 393, 393, 394, 394, 394, 395, 396, + 396, 396, 396, 397, 397, 397, 398, 399, 400, 401, + 402, 402, 402, 403, 404, 404, 405, 405, 405, 406, + 406, 407, 407, 408, 408, 409, 409, 409, 409, 409, 409, 409, 409, 409, 409, 409, 409, 409, 409, 409, - 409, 409, 409, 410, 411, 411, 412, 412, 413, 413, - 413, 414, 414, 414, 414, 414, 414, 414, 414, 414, - 414, 414, 414, 414, 414, 414, 414, 414, 414, 414, - 414, 414, 414, 414, 414, 414, 414, 415, 415, 415, - 416, 416, 416, 417, 417, 418, 418, 419, 419, 420, - 420, 421, 421, 422, 422, 422, 423, 423, 423, 423, - 424, 424, 425, 426, 427, 428, 429, 430, 431, 432, - 433, 434, 435, 436, 437, 438, 439, 440, 440, 440, - 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, - 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, - 441, 441, 441, 441, 441, 442, 442, 443, 443, 444, - 444, 445, 445, 446, 446, 447, 447, 447, 448, 448, - 448, 448, 448, 448, 448, 448, 448, 448, 449, 449, - 450, 450, 450, 450, 451, 451, 451, 451, 451, 451, - 451, 451, 451, 451, 451, 451, 451, 451, 451, 451, - 451, 451, 451, 451, 451, 451, 451, 451, 451, 451, - 451, 451, 451, 451, 451, 451, 451, 451, 451, 451, - 451, 451, 451, 451, 451, 451, 451, 451, 451, 451, - 451, 451, 451, 451, 451, 451, 451, 451, 451, 452, - 452, 453, 453, 453, 453, 453, 453, 453, 453, 453, - 453, 453, 453, 453, 453, 453, 453, 453, 453, 453, - 453, 453, 453, 453, 453, 453, 453, 453, 453, 453, - 453, 453, 453, 453, 453, 453, 453, 453, 453, 453, - 453, 453, 453, 453, 453, 453, 453, 453, 453, 453, + 410, 410, 410, 410, 410, 410, 410, 410, 410, 410, + 410, 410, 410, 410, 410, 410, 410, 410, 410, 411, + 412, 412, 413, 413, 414, 414, 414, 415, 415, 415, + 415, 415, 415, 415, 415, 415, 415, 415, 415, 415, + 415, 415, 415, 415, 415, 415, 415, 415, 415, 415, + 415, 415, 415, 416, 416, 416, 417, 417, 417, 418, + 418, 419, 419, 420, 420, 421, 421, 422, 422, 423, + 423, 423, 424, 424, 424, 424, 425, 425, 426, 427, + 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, + 438, 439, 440, 441, 441, 441, 441, 441, 441, 441, + 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, + 441, 441, 441, 441, 441, 441, 442, 442, 442, 442, + 442, 443, 443, 444, 444, 445, 445, 446, 446, 447, + 447, 448, 448, 448, 449, 449, 449, 449, 449, 449, + 449, 449, 449, 449, 450, 450, 451, 451, 451, 451, + 452, 452, 452, 452, 452, 452, 452, 452, 452, 452, + 452, 452, 452, 452, 452, 452, 452, 452, 452, 452, + 452, 452, 452, 452, 452, 452, 452, 452, 452, 452, + 452, 452, 452, 452, 452, 452, 452, 452, 452, 452, + 452, 452, 452, 452, 452, 452, 452, 452, 452, 452, + 452, 452, 452, 452, 452, 453, 453, 454, 454, 454, + 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, - 454, 454, 454, 454, 455, 455, 455, 455, 455, 455, + 454, 454, 454, 454, 454, 454, 454, 454, 454, 454, + 454, 454, 454, 454, 454, 454, 455, 455, 455, 455, + 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, - 455, 455, 456, 456, 457, 457, 457, 457, 457, 458, - 458, 458, 458, 458, 458, 459, 459, 459, 460, 460, - 461, 461, 462, 462, 463, 464, 464, 465, 465, 465, - 465, 465, 465, 465, 465, 466, 466, 466, 466, 466, - 466, 466, 466, 466, 466, 466, 466, 466, 466, 466, - 467, 467, 468, 468, 468, 468, 468, 468, 468, 468, - 468, 468, 468, 469, 469, 470, 470, 471, 471, 472, - 473, 474, 474, 474, 474, 474, 474, 474, 474, 474, - 474, 475, 475, 476, 476, 476, 477, 477, 478, 478, - 478, 478, 478, 478, 479, 480, 481, 482, 482, 483, - 483, 484, 484, 484, 484, 485, 486, 487, 487, 487, - 487, 487, 487, 487, 487, 487, 487, 488, 488, 489, - 489, 489, 489, 489, 489, 489, 490, 490, 491, 491, - 491, 492, 492, 493, 493, 493, 493 + 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, + 456, 456, 456, 456, 456, 456, 456, 456, 457, 457, + 458, 458, 458, 458, 458, 459, 459, 459, 459, 459, + 459, 460, 460, 460, 461, 461, 462, 462, 463, 463, + 464, 465, 465, 466, 466, 466, 466, 466, 466, 466, + 466, 467, 467, 467, 467, 467, 467, 467, 467, 467, + 467, 467, 467, 467, 467, 467, 468, 468, 469, 469, + 469, 469, 469, 469, 469, 469, 469, 469, 469, 470, + 470, 471, 471, 472, 472, 473, 474, 475, 475, 475, + 475, 475, 475, 475, 475, 475, 475, 476, 476, 477, + 477, 477, 478, 478, 479, 479, 479, 479, 479, 479, + 480, 481, 482, 483, 483, 484, 484, 485, 485, 485, + 485, 486, 487, 488, 488, 488, 488, 488, 488, 488, + 488, 488, 488, 489, 489, 490, 490, 490, 490, 490, + 490, 490, 491, 491, 492, 492, 492, 493, 493, 494, + 494, 494, 494 }; /* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */ @@ -2992,87 +3350,89 @@ static const yytype_int8 yyr2[] = 1, 1, 1, 4, 4, 4, 4, 1, 1, 1, 2, 2, 3, 2, 2, 1, 1, 1, 4, 1, 0, 2, 1, 3, 2, 4, 6, 1, 1, 1, - 1, 3, 1, 1, 1, 1, 4, 4, 4, 4, - 4, 4, 4, 2, 3, 2, 2, 2, 1, 1, - 2, 1, 2, 4, 6, 3, 5, 7, 9, 3, - 4, 7, 1, 1, 1, 2, 0, 2, 2, 0, - 6, 2, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 2, 3, 1, 2, 3, 7, 0, 2, 2, - 2, 2, 2, 3, 3, 2, 1, 4, 3, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, + 1, 4, 4, 4, 4, 4, 4, 4, 2, 3, + 2, 2, 2, 1, 1, 2, 1, 2, 4, 6, + 3, 5, 7, 9, 3, 4, 7, 1, 1, 1, + 2, 0, 2, 2, 0, 6, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 3, 2, 3, 1, 2, + 3, 7, 0, 2, 2, 2, 2, 2, 3, 3, + 2, 1, 4, 3, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, - 3, 3, 2, 2, 2, 5, 0, 2, 0, 2, - 0, 2, 3, 1, 0, 1, 1, 3, 0, 3, - 1, 1, 1, 1, 1, 1, 4, 0, 2, 4, - 3, 0, 2, 3, 0, 1, 5, 3, 4, 4, - 4, 1, 1, 1, 1, 1, 2, 2, 4, 13, - 22, 1, 1, 5, 3, 7, 5, 4, 7, 0, - 2, 2, 2, 2, 2, 2, 2, 5, 2, 2, - 2, 2, 2, 2, 5, 0, 2, 0, 2, 0, - 3, 9, 9, 7, 7, 1, 1, 1, 2, 2, - 1, 4, 0, 1, 1, 2, 2, 2, 2, 1, - 4, 2, 5, 3, 2, 2, 1, 4, 3, 0, - 2, 2, 0, 2, 2, 2, 2, 2, 1, 1, - 1, 1, 9, 0, 2, 2, 0, 2, 2, 2, - 2, 1, 1, 1, 1, 1, 0, 4, 1, 3, - 1, 13, 0, 2, 2, 2, 2, 2, 2, 2, + 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, + 5, 0, 2, 0, 2, 0, 2, 3, 1, 0, + 1, 1, 3, 0, 3, 1, 1, 1, 1, 1, + 1, 4, 0, 2, 4, 3, 0, 2, 3, 0, + 1, 5, 3, 4, 4, 4, 1, 1, 1, 1, + 1, 2, 2, 4, 13, 22, 1, 1, 5, 3, + 7, 5, 4, 7, 0, 2, 2, 2, 2, 2, + 2, 2, 5, 2, 2, 2, 2, 2, 2, 5, + 0, 2, 0, 2, 0, 3, 9, 9, 7, 7, + 1, 1, 1, 2, 2, 1, 4, 0, 1, 1, + 2, 2, 2, 2, 1, 4, 2, 5, 3, 2, + 2, 1, 4, 3, 0, 2, 2, 0, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 9, 0, 2, + 2, 0, 2, 2, 2, 2, 1, 1, 1, 1, + 1, 0, 4, 1, 3, 1, 13, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 5, 8, 6, 5, 0, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, - 5, 1, 1, 1, 0, 4, 4, 4, 4, 0, + 2, 2, 2, 2, 2, 2, 5, 8, 6, 5, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 4, 4, 4, 4, 5, 1, 1, 1, 0, + 4, 4, 4, 4, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 5, 1, 0, 2, 2, 1, - 2, 4, 5, 1, 1, 1, 1, 2, 1, 1, - 1, 1, 1, 4, 6, 4, 4, 11, 1, 5, - 3, 7, 5, 5, 3, 1, 2, 2, 1, 2, - 4, 4, 1, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 1, 1, 1, 4, 4, 2, 4, 2, - 0, 1, 1, 3, 1, 3, 1, 0, 3, 5, - 4, 3, 5, 5, 5, 5, 5, 5, 2, 2, - 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, - 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, - 4, 4, 3, 2, 0, 1, 1, 2, 1, 1, - 1, 1, 4, 4, 5, 4, 4, 4, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, - 8, 8, 7, 7, 7, 7, 7, 0, 2, 2, - 0, 2, 2, 0, 2, 0, 2, 0, 2, 0, - 2, 0, 2, 0, 2, 2, 0, 2, 3, 2, - 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, - 2, 2, 2, 2, 3, 2, 2, 2, 5, 3, - 2, 2, 2, 2, 2, 5, 4, 6, 2, 4, - 0, 3, 3, 1, 1, 0, 3, 0, 1, 1, - 3, 0, 1, 1, 3, 1, 3, 4, 4, 4, - 4, 5, 1, 1, 1, 1, 1, 1, 1, 3, - 1, 3, 4, 1, 0, 10, 6, 5, 6, 1, + 5, 1, 0, 2, 2, 1, 2, 4, 5, 1, + 1, 1, 1, 2, 1, 1, 1, 1, 1, 4, + 6, 4, 4, 11, 1, 5, 3, 7, 5, 5, + 3, 1, 2, 2, 1, 2, 4, 4, 1, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, + 1, 4, 4, 2, 4, 2, 0, 1, 1, 3, + 1, 3, 1, 0, 3, 5, 4, 3, 5, 5, + 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, + 5, 5, 4, 4, 4, 4, 4, 4, 3, 2, + 0, 1, 1, 2, 1, 1, 1, 1, 4, 4, + 5, 4, 4, 4, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 8, 8, 8, 8, 7, 7, + 7, 7, 7, 0, 2, 2, 0, 2, 2, 0, + 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, + 2, 2, 0, 2, 3, 2, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, - 3, 4, 6, 5, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 1, 2, 2, 4, 1, 2, 1, - 2, 1, 2, 1, 2, 1, 2, 1, 1, 0, - 5, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, - 1, 1, 3, 2, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 3, 2, 3, 4, 2, 2, 2, 5, - 5, 7, 4, 3, 2, 3, 2, 1, 1, 2, - 3, 2, 1, 2, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, - 1, 1, 1, 3, 0, 1, 1, 3, 2, 6, - 7, 3, 3, 3, 6, 0, 1, 3, 5, 6, - 4, 4, 1, 3, 3, 1, 1, 1, 1, 4, - 1, 6, 6, 6, 4, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 3, 2, 5, 4, 7, 6, 7, 6, - 9, 8, 3, 8, 4, 0, 2, 0, 1, 3, - 3, 0, 2, 2, 2, 3, 2, 2, 2, 2, - 2, 0, 2, 3, 1, 1, 1, 1, 3, 8, - 2, 3, 1, 1, 3, 3, 3, 4, 6, 0, - 2, 3, 1, 3, 1, 4, 3, 0, 2, 2, - 2, 3, 3, 3, 3, 3, 3, 0, 2, 2, - 3, 3, 4, 2, 1, 1, 3, 5, 0, 2, - 2, 0, 2, 4, 3, 1, 1 + 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, + 3, 2, 2, 2, 5, 3, 2, 2, 2, 2, + 2, 5, 4, 6, 2, 4, 0, 3, 3, 1, + 1, 0, 3, 0, 1, 1, 3, 0, 1, 1, + 3, 1, 3, 4, 4, 4, 4, 5, 1, 1, + 1, 1, 1, 1, 1, 3, 1, 3, 4, 1, + 0, 10, 6, 5, 6, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, + 2, 1, 1, 1, 1, 2, 3, 4, 6, 5, + 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, + 2, 2, 4, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 1, 0, 5, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, + 2, 2, 2, 1, 1, 1, 1, 1, 3, 2, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, 3, 2, + 3, 4, 2, 2, 2, 5, 5, 7, 4, 3, + 2, 3, 2, 1, 1, 2, 3, 2, 1, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, + 2, 2, 2, 1, 1, 1, 1, 1, 1, 3, + 0, 1, 1, 3, 2, 6, 7, 3, 3, 3, + 6, 0, 1, 3, 5, 6, 4, 4, 1, 3, + 3, 1, 1, 1, 1, 4, 1, 6, 6, 6, + 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, + 5, 4, 7, 6, 7, 6, 9, 8, 3, 8, + 4, 0, 2, 0, 1, 3, 3, 0, 2, 2, + 2, 3, 2, 2, 2, 2, 2, 0, 2, 3, + 1, 1, 1, 1, 3, 8, 2, 3, 1, 1, + 3, 3, 3, 4, 6, 0, 2, 3, 1, 3, + 1, 4, 3, 0, 2, 2, 2, 3, 3, 3, + 3, 3, 3, 0, 2, 2, 3, 3, 4, 2, + 1, 1, 3, 5, 0, 2, 2, 0, 2, 4, + 3, 1, 1 }; @@ -3538,13 +3898,13 @@ yyparse (void) case 4: /* decl: classHead '{' classDecls '}' */ #line 193 "asmparse.y" { PASM->EndClass(); } -#line 3542 "asmparse.cpp" +#line 3902 "prebuilt\\asmparse.cpp" break; case 5: /* decl: nameSpaceHead '{' decls '}' */ #line 194 "asmparse.y" { PASM->EndNameSpace(); } -#line 3548 "asmparse.cpp" +#line 3908 "prebuilt\\asmparse.cpp" break; case 6: /* decl: methodHead methodDecls '}' */ @@ -3553,957 +3913,1040 @@ yyparse (void) { PASM->m_pCurMethod->m_ulLines[1] = PASM->m_ulCurLine; PASM->m_pCurMethod->m_ulColumns[1]=PASM->m_ulCurColumn;} PASM->EndMethod(); } -#line 3557 "asmparse.cpp" +#line 3917 "prebuilt\\asmparse.cpp" break; case 13: /* decl: assemblyHead '{' assemblyDecls '}' */ #line 205 "asmparse.y" { PASMM->EndAssembly(); } -#line 3563 "asmparse.cpp" +#line 3923 "prebuilt\\asmparse.cpp" break; case 14: /* decl: assemblyRefHead '{' assemblyRefDecls '}' */ #line 206 "asmparse.y" { PASMM->EndAssembly(); } -#line 3569 "asmparse.cpp" +#line 3929 "prebuilt\\asmparse.cpp" break; case 15: /* decl: exptypeHead '{' exptypeDecls '}' */ #line 207 "asmparse.y" { PASMM->EndComType(); } -#line 3575 "asmparse.cpp" +#line 3935 "prebuilt\\asmparse.cpp" break; case 16: /* decl: manifestResHead '{' manifestResDecls '}' */ #line 208 "asmparse.y" { PASMM->EndManifestRes(); } -#line 3581 "asmparse.cpp" +#line 3941 "prebuilt\\asmparse.cpp" break; case 20: /* decl: _SUBSYSTEM int32 */ #line 212 "asmparse.y" { -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22011) // Suppress PREFast warning about integer overflow/underflow -#endif PASM->m_dwSubsystem = (yyvsp[0].int32); -#ifdef _PREFAST_ -#pragma warning(pop) -#endif } -#line 3596 "asmparse.cpp" +#line 3949 "prebuilt\\asmparse.cpp" break; case 21: /* decl: _CORFLAGS int32 */ -#line 222 "asmparse.y" +#line 215 "asmparse.y" { PASM->m_dwComImageFlags = (yyvsp[0].int32); } -#line 3602 "asmparse.cpp" +#line 3955 "prebuilt\\asmparse.cpp" break; case 22: /* decl: _FILE ALIGNMENT_ int32 */ -#line 223 "asmparse.y" +#line 216 "asmparse.y" { PASM->m_dwFileAlignment = (yyvsp[0].int32); if(((yyvsp[0].int32) & ((yyvsp[0].int32) - 1))||((yyvsp[0].int32) < 0x200)||((yyvsp[0].int32) > 0x10000)) PASM->report->error("Invalid file alignment, must be power of 2 from 0x200 to 0x10000\n");} -#line 3610 "asmparse.cpp" +#line 3963 "prebuilt\\asmparse.cpp" break; case 23: /* decl: _IMAGEBASE int64 */ -#line 226 "asmparse.y" +#line 219 "asmparse.y" { PASM->m_stBaseAddress = (ULONGLONG)(*((yyvsp[0].int64))); delete (yyvsp[0].int64); if(PASM->m_stBaseAddress & 0xFFFF) PASM->report->error("Invalid image base, must be 0x10000-aligned\n");} -#line 3618 "asmparse.cpp" +#line 3971 "prebuilt\\asmparse.cpp" break; case 24: /* decl: _STACKRESERVE int64 */ -#line 229 "asmparse.y" +#line 222 "asmparse.y" { PASM->m_stSizeOfStackReserve = (size_t)(*((yyvsp[0].int64))); delete (yyvsp[0].int64); } -#line 3624 "asmparse.cpp" +#line 3977 "prebuilt\\asmparse.cpp" break; case 29: /* decl: _MSCORLIB */ -#line 234 "asmparse.y" +#line 227 "asmparse.y" { PASM->m_fIsMscorlib = TRUE; } -#line 3630 "asmparse.cpp" +#line 3983 "prebuilt\\asmparse.cpp" break; case 32: /* compQstring: QSTRING */ -#line 241 "asmparse.y" +#line 234 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 3636 "asmparse.cpp" +#line 3989 "prebuilt\\asmparse.cpp" break; case 33: /* compQstring: compQstring '+' QSTRING */ -#line 242 "asmparse.y" +#line 235 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); (yyval.binstr)->append((yyvsp[0].binstr)); delete (yyvsp[0].binstr); } -#line 3642 "asmparse.cpp" +#line 3995 "prebuilt\\asmparse.cpp" break; case 34: /* languageDecl: _LANGUAGE SQSTRING */ -#line 245 "asmparse.y" +#line 238 "asmparse.y" { LPCSTRToGuid((yyvsp[0].string),&(PASM->m_guidLang)); } -#line 3648 "asmparse.cpp" +#line 4001 "prebuilt\\asmparse.cpp" break; case 35: /* languageDecl: _LANGUAGE SQSTRING ',' SQSTRING */ -#line 246 "asmparse.y" +#line 239 "asmparse.y" { LPCSTRToGuid((yyvsp[-2].string),&(PASM->m_guidLang)); LPCSTRToGuid((yyvsp[0].string),&(PASM->m_guidLangVendor));} -#line 3655 "asmparse.cpp" +#line 4008 "prebuilt\\asmparse.cpp" break; case 36: /* languageDecl: _LANGUAGE SQSTRING ',' SQSTRING ',' SQSTRING */ -#line 248 "asmparse.y" +#line 241 "asmparse.y" { LPCSTRToGuid((yyvsp[-4].string),&(PASM->m_guidLang)); LPCSTRToGuid((yyvsp[-2].string),&(PASM->m_guidLangVendor)); LPCSTRToGuid((yyvsp[-2].string),&(PASM->m_guidDoc));} -#line 3663 "asmparse.cpp" +#line 4016 "prebuilt\\asmparse.cpp" break; case 37: /* id: ID */ -#line 253 "asmparse.y" +#line 246 "asmparse.y" { (yyval.string) = (yyvsp[0].string); } -#line 3669 "asmparse.cpp" +#line 4022 "prebuilt\\asmparse.cpp" + break; + + case 38: /* id: NATIVE_ */ +#line 248 "asmparse.y" + { (yyval.string) = newString("native"); } +#line 4028 "prebuilt\\asmparse.cpp" + break; + + case 39: /* id: CIL_ */ +#line 249 "asmparse.y" + { (yyval.string) = newString("cil"); } +#line 4034 "prebuilt\\asmparse.cpp" + break; + + case 40: /* id: OPTIL_ */ +#line 250 "asmparse.y" + { (yyval.string) = newString("optil"); } +#line 4040 "prebuilt\\asmparse.cpp" + break; + + case 41: /* id: MANAGED_ */ +#line 251 "asmparse.y" + { (yyval.string) = newString("managed"); } +#line 4046 "prebuilt\\asmparse.cpp" + break; + + case 42: /* id: UNMANAGED_ */ +#line 252 "asmparse.y" + { (yyval.string) = newString("unmanaged"); } +#line 4052 "prebuilt\\asmparse.cpp" + break; + + case 43: /* id: FORWARDREF_ */ +#line 253 "asmparse.y" + { (yyval.string) = newString("forwardref"); } +#line 4058 "prebuilt\\asmparse.cpp" break; - case 38: /* id: SQSTRING */ + case 44: /* id: PRESERVESIG_ */ #line 254 "asmparse.y" - { (yyval.string) = (yyvsp[0].string); } -#line 3675 "asmparse.cpp" + { (yyval.string) = newString("preservesig"); } +#line 4064 "prebuilt\\asmparse.cpp" + break; + + case 45: /* id: RUNTIME_ */ +#line 255 "asmparse.y" + { (yyval.string) = newString("runtime"); } +#line 4070 "prebuilt\\asmparse.cpp" + break; + + case 46: /* id: INTERNALCALL_ */ +#line 256 "asmparse.y" + { (yyval.string) = newString("internalcall"); } +#line 4076 "prebuilt\\asmparse.cpp" break; - case 39: /* dottedName: id */ + case 47: /* id: SYNCHRONIZED_ */ #line 257 "asmparse.y" - { (yyval.string) = (yyvsp[0].string); } -#line 3681 "asmparse.cpp" + { (yyval.string) = newString("synchronized"); } +#line 4082 "prebuilt\\asmparse.cpp" break; - case 40: /* dottedName: DOTTEDNAME */ + case 48: /* id: NOINLINING_ */ #line 258 "asmparse.y" - { (yyval.string) = (yyvsp[0].string); } -#line 3687 "asmparse.cpp" + { (yyval.string) = newString("noinlining"); } +#line 4088 "prebuilt\\asmparse.cpp" break; - case 41: /* dottedName: dottedName '.' dottedName */ + case 49: /* id: AGGRESSIVEINLINING_ */ #line 259 "asmparse.y" - { (yyval.string) = newStringWDel((yyvsp[-2].string), '.', (yyvsp[0].string)); } -#line 3693 "asmparse.cpp" + { (yyval.string) = newString("aggressiveinlining"); } +#line 4094 "prebuilt\\asmparse.cpp" + break; + + case 50: /* id: NOOPTIMIZATION_ */ +#line 260 "asmparse.y" + { (yyval.string) = newString("nooptimization"); } +#line 4100 "prebuilt\\asmparse.cpp" + break; + + case 51: /* id: AGGRESSIVEOPTIMIZATION_ */ +#line 261 "asmparse.y" + { (yyval.string) = newString("aggressiveoptimization"); } +#line 4106 "prebuilt\\asmparse.cpp" break; - case 42: /* int32: INT32_V */ + case 52: /* id: ASYNC_ */ #line 262 "asmparse.y" + { (yyval.string) = newString("async"); } +#line 4112 "prebuilt\\asmparse.cpp" + break; + + case 53: /* id: SQSTRING */ +#line 263 "asmparse.y" + { (yyval.string) = (yyvsp[0].string); } +#line 4118 "prebuilt\\asmparse.cpp" + break; + + case 54: /* dottedName: id */ +#line 266 "asmparse.y" + { (yyval.string) = (yyvsp[0].string); } +#line 4124 "prebuilt\\asmparse.cpp" + break; + + case 55: /* dottedName: DOTTEDNAME */ +#line 267 "asmparse.y" + { (yyval.string) = (yyvsp[0].string); } +#line 4130 "prebuilt\\asmparse.cpp" + break; + + case 56: /* dottedName: dottedName '.' dottedName */ +#line 268 "asmparse.y" + { (yyval.string) = newStringWDel((yyvsp[-2].string), '.', (yyvsp[0].string)); } +#line 4136 "prebuilt\\asmparse.cpp" + break; + + case 57: /* int32: INT32_V */ +#line 271 "asmparse.y" { (yyval.int32) = (yyvsp[0].int32); } -#line 3699 "asmparse.cpp" +#line 4142 "prebuilt\\asmparse.cpp" break; - case 43: /* int64: INT64_V */ -#line 265 "asmparse.y" + case 58: /* int64: INT64_V */ +#line 274 "asmparse.y" { (yyval.int64) = (yyvsp[0].int64); } -#line 3705 "asmparse.cpp" +#line 4148 "prebuilt\\asmparse.cpp" break; - case 44: /* int64: INT32_V */ -#line 266 "asmparse.y" + case 59: /* int64: INT32_V */ +#line 275 "asmparse.y" { (yyval.int64) = neg ? new int64_t((yyvsp[0].int32)) : new int64_t((unsigned)(yyvsp[0].int32)); } -#line 3711 "asmparse.cpp" +#line 4154 "prebuilt\\asmparse.cpp" break; - case 45: /* float64: FLOAT64 */ -#line 269 "asmparse.y" + case 60: /* float64: FLOAT64 */ +#line 278 "asmparse.y" { (yyval.float64) = (yyvsp[0].float64); } -#line 3717 "asmparse.cpp" +#line 4160 "prebuilt\\asmparse.cpp" break; - case 46: /* float64: FLOAT32_ '(' int32 ')' */ -#line 270 "asmparse.y" + case 61: /* float64: FLOAT32_ '(' int32 ')' */ +#line 279 "asmparse.y" { float f; *((int32_t*) (&f)) = (yyvsp[-1].int32); (yyval.float64) = new double(f); } -#line 3723 "asmparse.cpp" +#line 4166 "prebuilt\\asmparse.cpp" break; - case 47: /* float64: FLOAT64_ '(' int64 ')' */ -#line 271 "asmparse.y" + case 62: /* float64: FLOAT64_ '(' int64 ')' */ +#line 280 "asmparse.y" { (yyval.float64) = (double*) (yyvsp[-1].int64); } -#line 3729 "asmparse.cpp" +#line 4172 "prebuilt\\asmparse.cpp" break; - case 48: /* typedefDecl: _TYPEDEF type AS_ dottedName */ -#line 275 "asmparse.y" + case 63: /* typedefDecl: _TYPEDEF type AS_ dottedName */ +#line 284 "asmparse.y" { PASM->AddTypeDef((yyvsp[-2].binstr),(yyvsp[0].string)); } -#line 3735 "asmparse.cpp" +#line 4178 "prebuilt\\asmparse.cpp" break; - case 49: /* typedefDecl: _TYPEDEF className AS_ dottedName */ -#line 276 "asmparse.y" + case 64: /* typedefDecl: _TYPEDEF className AS_ dottedName */ +#line 285 "asmparse.y" { PASM->AddTypeDef((yyvsp[-2].token),(yyvsp[0].string)); } -#line 3741 "asmparse.cpp" +#line 4184 "prebuilt\\asmparse.cpp" break; - case 50: /* typedefDecl: _TYPEDEF memberRef AS_ dottedName */ -#line 277 "asmparse.y" + case 65: /* typedefDecl: _TYPEDEF memberRef AS_ dottedName */ +#line 286 "asmparse.y" { PASM->AddTypeDef((yyvsp[-2].token),(yyvsp[0].string)); } -#line 3747 "asmparse.cpp" +#line 4190 "prebuilt\\asmparse.cpp" break; - case 51: /* typedefDecl: _TYPEDEF customDescr AS_ dottedName */ -#line 278 "asmparse.y" + case 66: /* typedefDecl: _TYPEDEF customDescr AS_ dottedName */ +#line 287 "asmparse.y" { (yyvsp[-2].cad)->tkOwner = 0; PASM->AddTypeDef((yyvsp[-2].cad),(yyvsp[0].string)); } -#line 3753 "asmparse.cpp" +#line 4196 "prebuilt\\asmparse.cpp" break; - case 52: /* typedefDecl: _TYPEDEF customDescrWithOwner AS_ dottedName */ -#line 279 "asmparse.y" + case 67: /* typedefDecl: _TYPEDEF customDescrWithOwner AS_ dottedName */ +#line 288 "asmparse.y" { PASM->AddTypeDef((yyvsp[-2].cad),(yyvsp[0].string)); } -#line 3759 "asmparse.cpp" +#line 4202 "prebuilt\\asmparse.cpp" break; - case 53: /* compControl: P_DEFINE dottedName */ -#line 284 "asmparse.y" + case 68: /* compControl: P_DEFINE dottedName */ +#line 293 "asmparse.y" { DefineVar((yyvsp[0].string), NULL); } -#line 3765 "asmparse.cpp" +#line 4208 "prebuilt\\asmparse.cpp" break; - case 54: /* compControl: P_DEFINE dottedName compQstring */ -#line 285 "asmparse.y" + case 69: /* compControl: P_DEFINE dottedName compQstring */ +#line 294 "asmparse.y" { DefineVar((yyvsp[-1].string), (yyvsp[0].binstr)); } -#line 3771 "asmparse.cpp" +#line 4214 "prebuilt\\asmparse.cpp" break; - case 55: /* compControl: P_UNDEF dottedName */ -#line 286 "asmparse.y" + case 70: /* compControl: P_UNDEF dottedName */ +#line 295 "asmparse.y" { UndefVar((yyvsp[0].string)); } -#line 3777 "asmparse.cpp" +#line 4220 "prebuilt\\asmparse.cpp" break; - case 56: /* compControl: P_IFDEF dottedName */ -#line 287 "asmparse.y" + case 71: /* compControl: P_IFDEF dottedName */ +#line 296 "asmparse.y" { SkipToken = !IsVarDefined((yyvsp[0].string)); IfEndif++; } -#line 3785 "asmparse.cpp" +#line 4228 "prebuilt\\asmparse.cpp" break; - case 57: /* compControl: P_IFNDEF dottedName */ -#line 290 "asmparse.y" + case 72: /* compControl: P_IFNDEF dottedName */ +#line 299 "asmparse.y" { SkipToken = IsVarDefined((yyvsp[0].string)); IfEndif++; } -#line 3793 "asmparse.cpp" +#line 4236 "prebuilt\\asmparse.cpp" break; - case 58: /* compControl: P_ELSE */ -#line 293 "asmparse.y" + case 73: /* compControl: P_ELSE */ +#line 302 "asmparse.y" { if(IfEndif == 1) SkipToken = !SkipToken;} -#line 3799 "asmparse.cpp" +#line 4242 "prebuilt\\asmparse.cpp" break; - case 59: /* compControl: P_ENDIF */ -#line 294 "asmparse.y" + case 74: /* compControl: P_ENDIF */ +#line 303 "asmparse.y" { if(IfEndif == 0) PASM->report->error("Unmatched #endif\n"); else IfEndif--; } -#line 3808 "asmparse.cpp" +#line 4251 "prebuilt\\asmparse.cpp" break; - case 60: /* compControl: P_INCLUDE QSTRING */ -#line 298 "asmparse.y" + case 75: /* compControl: P_INCLUDE QSTRING */ +#line 307 "asmparse.y" { _ASSERTE(!"yylex should have dealt with this"); } -#line 3814 "asmparse.cpp" +#line 4257 "prebuilt\\asmparse.cpp" break; - case 61: /* compControl: ';' */ -#line 299 "asmparse.y" + case 76: /* compControl: ';' */ +#line 308 "asmparse.y" { } -#line 3820 "asmparse.cpp" +#line 4263 "prebuilt\\asmparse.cpp" break; - case 62: /* customDescr: _CUSTOM customType */ -#line 303 "asmparse.y" + case 77: /* customDescr: _CUSTOM customType */ +#line 312 "asmparse.y" { (yyval.cad) = new CustomDescr(PASM->m_tkCurrentCVOwner, (yyvsp[0].token), NULL); } -#line 3826 "asmparse.cpp" +#line 4269 "prebuilt\\asmparse.cpp" break; - case 63: /* customDescr: _CUSTOM customType '=' compQstring */ -#line 304 "asmparse.y" + case 78: /* customDescr: _CUSTOM customType '=' compQstring */ +#line 313 "asmparse.y" { (yyval.cad) = new CustomDescr(PASM->m_tkCurrentCVOwner, (yyvsp[-2].token), (yyvsp[0].binstr)); } -#line 3832 "asmparse.cpp" +#line 4275 "prebuilt\\asmparse.cpp" break; - case 64: /* customDescr: _CUSTOM customType '=' '{' customBlobDescr '}' */ -#line 305 "asmparse.y" + case 79: /* customDescr: _CUSTOM customType '=' '{' customBlobDescr '}' */ +#line 314 "asmparse.y" { (yyval.cad) = new CustomDescr(PASM->m_tkCurrentCVOwner, (yyvsp[-4].token), (yyvsp[-1].binstr)); } -#line 3838 "asmparse.cpp" +#line 4281 "prebuilt\\asmparse.cpp" break; - case 65: /* customDescr: customHead bytes ')' */ -#line 306 "asmparse.y" + case 80: /* customDescr: customHead bytes ')' */ +#line 315 "asmparse.y" { (yyval.cad) = new CustomDescr(PASM->m_tkCurrentCVOwner, (yyvsp[-2].int32), (yyvsp[-1].binstr)); } -#line 3844 "asmparse.cpp" +#line 4287 "prebuilt\\asmparse.cpp" break; - case 66: /* customDescrWithOwner: _CUSTOM '(' ownerType ')' customType */ -#line 309 "asmparse.y" + case 81: /* customDescrWithOwner: _CUSTOM '(' ownerType ')' customType */ +#line 318 "asmparse.y" { (yyval.cad) = new CustomDescr((yyvsp[-2].token), (yyvsp[0].token), NULL); } -#line 3850 "asmparse.cpp" +#line 4293 "prebuilt\\asmparse.cpp" break; - case 67: /* customDescrWithOwner: _CUSTOM '(' ownerType ')' customType '=' compQstring */ -#line 310 "asmparse.y" + case 82: /* customDescrWithOwner: _CUSTOM '(' ownerType ')' customType '=' compQstring */ +#line 319 "asmparse.y" { (yyval.cad) = new CustomDescr((yyvsp[-4].token), (yyvsp[-2].token), (yyvsp[0].binstr)); } -#line 3856 "asmparse.cpp" +#line 4299 "prebuilt\\asmparse.cpp" break; - case 68: /* customDescrWithOwner: _CUSTOM '(' ownerType ')' customType '=' '{' customBlobDescr '}' */ -#line 312 "asmparse.y" + case 83: /* customDescrWithOwner: _CUSTOM '(' ownerType ')' customType '=' '{' customBlobDescr '}' */ +#line 321 "asmparse.y" { (yyval.cad) = new CustomDescr((yyvsp[-6].token), (yyvsp[-4].token), (yyvsp[-1].binstr)); } -#line 3862 "asmparse.cpp" +#line 4305 "prebuilt\\asmparse.cpp" break; - case 69: /* customDescrWithOwner: customHeadWithOwner bytes ')' */ -#line 313 "asmparse.y" + case 84: /* customDescrWithOwner: customHeadWithOwner bytes ')' */ +#line 322 "asmparse.y" { (yyval.cad) = new CustomDescr(PASM->m_tkCurrentCVOwner, (yyvsp[-2].int32), (yyvsp[-1].binstr)); } -#line 3868 "asmparse.cpp" +#line 4311 "prebuilt\\asmparse.cpp" break; - case 70: /* customHead: _CUSTOM customType '=' '(' */ -#line 316 "asmparse.y" + case 85: /* customHead: _CUSTOM customType '=' '(' */ +#line 325 "asmparse.y" { (yyval.int32) = (yyvsp[-2].token); bParsingByteArray = TRUE; } -#line 3874 "asmparse.cpp" +#line 4317 "prebuilt\\asmparse.cpp" break; - case 71: /* customHeadWithOwner: _CUSTOM '(' ownerType ')' customType '=' '(' */ -#line 320 "asmparse.y" + case 86: /* customHeadWithOwner: _CUSTOM '(' ownerType ')' customType '=' '(' */ +#line 329 "asmparse.y" { PASM->m_pCustomDescrList = NULL; PASM->m_tkCurrentCVOwner = (yyvsp[-4].token); (yyval.int32) = (yyvsp[-2].token); bParsingByteArray = TRUE; } -#line 3882 "asmparse.cpp" +#line 4325 "prebuilt\\asmparse.cpp" break; - case 72: /* customType: methodRef */ -#line 325 "asmparse.y" + case 87: /* customType: methodRef */ +#line 334 "asmparse.y" { (yyval.token) = (yyvsp[0].token); } -#line 3888 "asmparse.cpp" +#line 4331 "prebuilt\\asmparse.cpp" break; - case 73: /* ownerType: typeSpec */ -#line 328 "asmparse.y" + case 88: /* ownerType: typeSpec */ +#line 337 "asmparse.y" { (yyval.token) = (yyvsp[0].token); } -#line 3894 "asmparse.cpp" +#line 4337 "prebuilt\\asmparse.cpp" break; - case 74: /* ownerType: memberRef */ -#line 329 "asmparse.y" + case 89: /* ownerType: memberRef */ +#line 338 "asmparse.y" { (yyval.token) = (yyvsp[0].token); } -#line 3900 "asmparse.cpp" +#line 4343 "prebuilt\\asmparse.cpp" break; - case 75: /* customBlobDescr: customBlobArgs customBlobNVPairs */ -#line 333 "asmparse.y" + case 90: /* customBlobDescr: customBlobArgs customBlobNVPairs */ +#line 342 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt16(VAL16(nCustomBlobNVPairs)); (yyval.binstr)->append((yyvsp[0].binstr)); nCustomBlobNVPairs = 0; } -#line 3909 "asmparse.cpp" +#line 4352 "prebuilt\\asmparse.cpp" break; - case 76: /* customBlobArgs: %empty */ -#line 339 "asmparse.y" + case 91: /* customBlobArgs: %empty */ +#line 348 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt16(VAL16(0x0001)); } -#line 3915 "asmparse.cpp" +#line 4358 "prebuilt\\asmparse.cpp" break; - case 77: /* customBlobArgs: customBlobArgs serInit */ -#line 340 "asmparse.y" + case 92: /* customBlobArgs: customBlobArgs serInit */ +#line 349 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); AppendFieldToCustomBlob((yyval.binstr),(yyvsp[0].binstr)); } -#line 3922 "asmparse.cpp" +#line 4365 "prebuilt\\asmparse.cpp" break; - case 78: /* customBlobArgs: customBlobArgs compControl */ -#line 342 "asmparse.y" + case 93: /* customBlobArgs: customBlobArgs compControl */ +#line 351 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); } -#line 3928 "asmparse.cpp" +#line 4371 "prebuilt\\asmparse.cpp" break; - case 79: /* customBlobNVPairs: %empty */ -#line 345 "asmparse.y" + case 94: /* customBlobNVPairs: %empty */ +#line 354 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 3934 "asmparse.cpp" +#line 4377 "prebuilt\\asmparse.cpp" break; - case 80: /* customBlobNVPairs: customBlobNVPairs fieldOrProp serializType dottedName '=' serInit */ -#line 347 "asmparse.y" + case 95: /* customBlobNVPairs: customBlobNVPairs fieldOrProp serializType dottedName '=' serInit */ +#line 356 "asmparse.y" { (yyval.binstr) = (yyvsp[-5].binstr); (yyval.binstr)->appendInt8((yyvsp[-4].int32)); (yyval.binstr)->append((yyvsp[-3].binstr)); AppendStringWithLength((yyval.binstr),(yyvsp[-2].string)); AppendFieldToCustomBlob((yyval.binstr),(yyvsp[0].binstr)); nCustomBlobNVPairs++; } -#line 3944 "asmparse.cpp" +#line 4387 "prebuilt\\asmparse.cpp" break; - case 81: /* customBlobNVPairs: customBlobNVPairs compControl */ -#line 352 "asmparse.y" + case 96: /* customBlobNVPairs: customBlobNVPairs compControl */ +#line 361 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); } -#line 3950 "asmparse.cpp" +#line 4393 "prebuilt\\asmparse.cpp" break; - case 82: /* fieldOrProp: FIELD_ */ -#line 355 "asmparse.y" + case 97: /* fieldOrProp: FIELD_ */ +#line 364 "asmparse.y" { (yyval.int32) = SERIALIZATION_TYPE_FIELD; } -#line 3956 "asmparse.cpp" +#line 4399 "prebuilt\\asmparse.cpp" break; - case 83: /* fieldOrProp: PROPERTY_ */ -#line 356 "asmparse.y" + case 98: /* fieldOrProp: PROPERTY_ */ +#line 365 "asmparse.y" { (yyval.int32) = SERIALIZATION_TYPE_PROPERTY; } -#line 3962 "asmparse.cpp" +#line 4405 "prebuilt\\asmparse.cpp" break; - case 84: /* customAttrDecl: customDescr */ -#line 359 "asmparse.y" + case 99: /* customAttrDecl: customDescr */ +#line 368 "asmparse.y" { if((yyvsp[0].cad)->tkOwner && !(yyvsp[0].cad)->tkInterfacePair) PASM->DefineCV((yyvsp[0].cad)); else if(PASM->m_pCustomDescrList) PASM->m_pCustomDescrList->PUSH((yyvsp[0].cad)); } -#line 3971 "asmparse.cpp" +#line 4414 "prebuilt\\asmparse.cpp" break; - case 85: /* customAttrDecl: customDescrWithOwner */ -#line 363 "asmparse.y" + case 100: /* customAttrDecl: customDescrWithOwner */ +#line 372 "asmparse.y" { PASM->DefineCV((yyvsp[0].cad)); } -#line 3977 "asmparse.cpp" +#line 4420 "prebuilt\\asmparse.cpp" break; - case 86: /* customAttrDecl: TYPEDEF_CA */ -#line 364 "asmparse.y" + case 101: /* customAttrDecl: TYPEDEF_CA */ +#line 373 "asmparse.y" { CustomDescr* pNew = new CustomDescr((yyvsp[0].tdd)->m_pCA); if(pNew->tkOwner == 0) pNew->tkOwner = PASM->m_tkCurrentCVOwner; if(pNew->tkOwner) PASM->DefineCV(pNew); else if(PASM->m_pCustomDescrList) PASM->m_pCustomDescrList->PUSH(pNew); } -#line 3988 "asmparse.cpp" +#line 4431 "prebuilt\\asmparse.cpp" break; - case 87: /* serializType: simpleType */ -#line 372 "asmparse.y" + case 102: /* serializType: simpleType */ +#line 381 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 3994 "asmparse.cpp" +#line 4437 "prebuilt\\asmparse.cpp" break; - case 88: /* serializType: TYPE_ */ -#line 373 "asmparse.y" + case 103: /* serializType: TYPE_ */ +#line 382 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_TYPE); } -#line 4000 "asmparse.cpp" +#line 4443 "prebuilt\\asmparse.cpp" break; - case 89: /* serializType: OBJECT_ */ -#line 374 "asmparse.y" + case 104: /* serializType: OBJECT_ */ +#line 383 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_TAGGED_OBJECT); } -#line 4006 "asmparse.cpp" +#line 4449 "prebuilt\\asmparse.cpp" break; - case 90: /* serializType: ENUM_ CLASS_ SQSTRING */ -#line 375 "asmparse.y" + case 105: /* serializType: ENUM_ CLASS_ SQSTRING */ +#line 384 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_ENUM); AppendStringWithLength((yyval.binstr),(yyvsp[0].string)); } -#line 4013 "asmparse.cpp" +#line 4456 "prebuilt\\asmparse.cpp" break; - case 91: /* serializType: ENUM_ className */ -#line 377 "asmparse.y" + case 106: /* serializType: ENUM_ className */ +#line 386 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_ENUM); AppendStringWithLength((yyval.binstr),PASM->ReflectionNotation((yyvsp[0].token))); } -#line 4020 "asmparse.cpp" +#line 4463 "prebuilt\\asmparse.cpp" break; - case 92: /* serializType: serializType '[' ']' */ -#line 379 "asmparse.y" + case 107: /* serializType: serializType '[' ']' */ +#line 388 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 4026 "asmparse.cpp" +#line 4469 "prebuilt\\asmparse.cpp" break; - case 93: /* moduleHead: _MODULE */ -#line 384 "asmparse.y" + case 108: /* moduleHead: _MODULE */ +#line 393 "asmparse.y" { PASMM->SetModuleName(NULL); PASM->m_tkCurrentCVOwner=1; } -#line 4032 "asmparse.cpp" +#line 4475 "prebuilt\\asmparse.cpp" break; - case 94: /* moduleHead: _MODULE dottedName */ -#line 385 "asmparse.y" + case 109: /* moduleHead: _MODULE dottedName */ +#line 394 "asmparse.y" { PASMM->SetModuleName((yyvsp[0].string)); PASM->m_tkCurrentCVOwner=1; } -#line 4038 "asmparse.cpp" +#line 4481 "prebuilt\\asmparse.cpp" break; - case 95: /* moduleHead: _MODULE EXTERN_ dottedName */ -#line 386 "asmparse.y" + case 110: /* moduleHead: _MODULE EXTERN_ dottedName */ +#line 395 "asmparse.y" { BinStr* pbs = new BinStr(); unsigned L = (unsigned)strlen((yyvsp[0].string)); memcpy((char*)(pbs->getBuff(L)),(yyvsp[0].string),L); PASM->EmitImport(pbs); delete pbs;} -#line 4047 "asmparse.cpp" +#line 4490 "prebuilt\\asmparse.cpp" break; - case 96: /* vtfixupDecl: _VTFIXUP '[' int32 ']' vtfixupAttr AT_ id */ -#line 393 "asmparse.y" + case 111: /* vtfixupDecl: _VTFIXUP '[' int32 ']' vtfixupAttr AT_ id */ +#line 402 "asmparse.y" { /*PASM->SetDataSection(); PASM->EmitDataLabel($7);*/ PASM->m_VTFList.PUSH(new VTFEntry((USHORT)(yyvsp[-4].int32), (USHORT)(yyvsp[-2].int32), (yyvsp[0].string))); } -#line 4054 "asmparse.cpp" +#line 4497 "prebuilt\\asmparse.cpp" break; - case 97: /* vtfixupAttr: %empty */ -#line 397 "asmparse.y" + case 112: /* vtfixupAttr: %empty */ +#line 406 "asmparse.y" { (yyval.int32) = 0; } -#line 4060 "asmparse.cpp" +#line 4503 "prebuilt\\asmparse.cpp" break; - case 98: /* vtfixupAttr: vtfixupAttr INT32_ */ -#line 398 "asmparse.y" + case 113: /* vtfixupAttr: vtfixupAttr INT32_ */ +#line 407 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) | COR_VTABLE_32BIT; } -#line 4066 "asmparse.cpp" +#line 4509 "prebuilt\\asmparse.cpp" break; - case 99: /* vtfixupAttr: vtfixupAttr INT64_ */ -#line 399 "asmparse.y" + case 114: /* vtfixupAttr: vtfixupAttr INT64_ */ +#line 408 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) | COR_VTABLE_64BIT; } -#line 4072 "asmparse.cpp" +#line 4515 "prebuilt\\asmparse.cpp" break; - case 100: /* vtfixupAttr: vtfixupAttr FROMUNMANAGED_ */ -#line 400 "asmparse.y" + case 115: /* vtfixupAttr: vtfixupAttr FROMUNMANAGED_ */ +#line 409 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) | COR_VTABLE_FROM_UNMANAGED; } -#line 4078 "asmparse.cpp" +#line 4521 "prebuilt\\asmparse.cpp" break; - case 101: /* vtfixupAttr: vtfixupAttr CALLMOSTDERIVED_ */ -#line 401 "asmparse.y" + case 116: /* vtfixupAttr: vtfixupAttr CALLMOSTDERIVED_ */ +#line 410 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) | COR_VTABLE_CALL_MOST_DERIVED; } -#line 4084 "asmparse.cpp" +#line 4527 "prebuilt\\asmparse.cpp" break; - case 102: /* vtfixupAttr: vtfixupAttr RETAINAPPDOMAIN_ */ -#line 402 "asmparse.y" + case 117: /* vtfixupAttr: vtfixupAttr RETAINAPPDOMAIN_ */ +#line 411 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) | COR_VTABLE_FROM_UNMANAGED_RETAIN_APPDOMAIN; } -#line 4090 "asmparse.cpp" +#line 4533 "prebuilt\\asmparse.cpp" break; - case 103: /* vtableDecl: vtableHead bytes ')' */ -#line 405 "asmparse.y" + case 118: /* vtableDecl: vtableHead bytes ')' */ +#line 414 "asmparse.y" { PASM->m_pVTable = (yyvsp[-1].binstr); } -#line 4096 "asmparse.cpp" +#line 4539 "prebuilt\\asmparse.cpp" break; - case 104: /* vtableHead: _VTABLE '=' '(' */ -#line 408 "asmparse.y" + case 119: /* vtableHead: _VTABLE '=' '(' */ +#line 417 "asmparse.y" { bParsingByteArray = TRUE; } -#line 4102 "asmparse.cpp" +#line 4545 "prebuilt\\asmparse.cpp" break; - case 105: /* nameSpaceHead: _NAMESPACE dottedName */ -#line 412 "asmparse.y" + case 120: /* nameSpaceHead: _NAMESPACE dottedName */ +#line 421 "asmparse.y" { PASM->StartNameSpace((yyvsp[0].string)); } -#line 4108 "asmparse.cpp" +#line 4551 "prebuilt\\asmparse.cpp" break; - case 106: /* _class: _CLASS */ -#line 415 "asmparse.y" + case 121: /* _class: _CLASS */ +#line 424 "asmparse.y" { newclass = TRUE; } -#line 4114 "asmparse.cpp" +#line 4557 "prebuilt\\asmparse.cpp" break; - case 107: /* classHeadBegin: _class classAttr dottedName typarsClause */ -#line 418 "asmparse.y" + case 122: /* classHeadBegin: _class classAttr dottedName typarsClause */ +#line 427 "asmparse.y" { if((yyvsp[0].typarlist)) FixupConstraints(); PASM->StartClass((yyvsp[-1].string), (yyvsp[-2].classAttr), (yyvsp[0].typarlist)); TyParFixupList.RESET(false); newclass = FALSE; } -#line 4124 "asmparse.cpp" +#line 4567 "prebuilt\\asmparse.cpp" break; - case 108: /* classHead: classHeadBegin extendsClause implClause */ -#line 424 "asmparse.y" + case 123: /* classHead: classHeadBegin extendsClause implClause */ +#line 433 "asmparse.y" { PASM->AddClass(); } -#line 4130 "asmparse.cpp" +#line 4573 "prebuilt\\asmparse.cpp" break; - case 109: /* classAttr: %empty */ -#line 427 "asmparse.y" + case 124: /* classAttr: %empty */ +#line 436 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) 0; } -#line 4136 "asmparse.cpp" +#line 4579 "prebuilt\\asmparse.cpp" break; - case 110: /* classAttr: classAttr PUBLIC_ */ -#line 428 "asmparse.y" + case 125: /* classAttr: classAttr PUBLIC_ */ +#line 437 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-1].classAttr) & ~tdVisibilityMask) | tdPublic); } -#line 4142 "asmparse.cpp" +#line 4585 "prebuilt\\asmparse.cpp" break; - case 111: /* classAttr: classAttr PRIVATE_ */ -#line 429 "asmparse.y" + case 126: /* classAttr: classAttr PRIVATE_ */ +#line 438 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-1].classAttr) & ~tdVisibilityMask) | tdNotPublic); } -#line 4148 "asmparse.cpp" +#line 4591 "prebuilt\\asmparse.cpp" break; - case 112: /* classAttr: classAttr VALUE_ */ -#line 430 "asmparse.y" + case 127: /* classAttr: classAttr VALUE_ */ +#line 439 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | 0x80000000 | tdSealed); } -#line 4154 "asmparse.cpp" +#line 4597 "prebuilt\\asmparse.cpp" break; - case 113: /* classAttr: classAttr ENUM_ */ -#line 431 "asmparse.y" + case 128: /* classAttr: classAttr ENUM_ */ +#line 440 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | 0x40000000); } -#line 4160 "asmparse.cpp" +#line 4603 "prebuilt\\asmparse.cpp" break; - case 114: /* classAttr: classAttr INTERFACE_ */ -#line 432 "asmparse.y" + case 129: /* classAttr: classAttr INTERFACE_ */ +#line 441 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | tdInterface | tdAbstract); } -#line 4166 "asmparse.cpp" +#line 4609 "prebuilt\\asmparse.cpp" break; - case 115: /* classAttr: classAttr SEALED_ */ -#line 433 "asmparse.y" + case 130: /* classAttr: classAttr SEALED_ */ +#line 442 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | tdSealed); } -#line 4172 "asmparse.cpp" +#line 4615 "prebuilt\\asmparse.cpp" break; - case 116: /* classAttr: classAttr ABSTRACT_ */ -#line 434 "asmparse.y" + case 131: /* classAttr: classAttr ABSTRACT_ */ +#line 443 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | tdAbstract); } -#line 4178 "asmparse.cpp" +#line 4621 "prebuilt\\asmparse.cpp" break; - case 117: /* classAttr: classAttr AUTO_ */ -#line 435 "asmparse.y" + case 132: /* classAttr: classAttr AUTO_ */ +#line 444 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-1].classAttr) & ~tdLayoutMask) | tdAutoLayout); } -#line 4184 "asmparse.cpp" +#line 4627 "prebuilt\\asmparse.cpp" break; - case 118: /* classAttr: classAttr SEQUENTIAL_ */ -#line 436 "asmparse.y" + case 133: /* classAttr: classAttr SEQUENTIAL_ */ +#line 445 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-1].classAttr) & ~tdLayoutMask) | tdSequentialLayout); } -#line 4190 "asmparse.cpp" +#line 4633 "prebuilt\\asmparse.cpp" break; - case 119: /* classAttr: classAttr EXPLICIT_ */ -#line 437 "asmparse.y" + case 134: /* classAttr: classAttr EXPLICIT_ */ +#line 446 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-1].classAttr) & ~tdLayoutMask) | tdExplicitLayout); } -#line 4196 "asmparse.cpp" +#line 4639 "prebuilt\\asmparse.cpp" break; - case 120: /* classAttr: classAttr ANSI_ */ -#line 438 "asmparse.y" + case 135: /* classAttr: classAttr ANSI_ */ +#line 447 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-1].classAttr) & ~tdStringFormatMask) | tdAnsiClass); } -#line 4202 "asmparse.cpp" +#line 4645 "prebuilt\\asmparse.cpp" break; - case 121: /* classAttr: classAttr UNICODE_ */ -#line 439 "asmparse.y" + case 136: /* classAttr: classAttr UNICODE_ */ +#line 448 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-1].classAttr) & ~tdStringFormatMask) | tdUnicodeClass); } -#line 4208 "asmparse.cpp" +#line 4651 "prebuilt\\asmparse.cpp" break; - case 122: /* classAttr: classAttr AUTOCHAR_ */ -#line 440 "asmparse.y" + case 137: /* classAttr: classAttr AUTOCHAR_ */ +#line 449 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-1].classAttr) & ~tdStringFormatMask) | tdAutoClass); } -#line 4214 "asmparse.cpp" +#line 4657 "prebuilt\\asmparse.cpp" break; - case 123: /* classAttr: classAttr IMPORT_ */ -#line 441 "asmparse.y" + case 138: /* classAttr: classAttr IMPORT_ */ +#line 450 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | tdImport); } -#line 4220 "asmparse.cpp" +#line 4663 "prebuilt\\asmparse.cpp" break; - case 124: /* classAttr: classAttr SERIALIZABLE_ */ -#line 442 "asmparse.y" + case 139: /* classAttr: classAttr SERIALIZABLE_ */ +#line 451 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | tdSerializable); } -#line 4226 "asmparse.cpp" +#line 4669 "prebuilt\\asmparse.cpp" break; - case 125: /* classAttr: classAttr WINDOWSRUNTIME_ */ -#line 443 "asmparse.y" + case 140: /* classAttr: classAttr WINDOWSRUNTIME_ */ +#line 452 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | tdWindowsRuntime); } -#line 4232 "asmparse.cpp" +#line 4675 "prebuilt\\asmparse.cpp" break; - case 126: /* classAttr: classAttr NESTED_ PUBLIC_ */ -#line 444 "asmparse.y" + case 141: /* classAttr: classAttr NESTED_ PUBLIC_ */ +#line 453 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-2].classAttr) & ~tdVisibilityMask) | tdNestedPublic); } -#line 4238 "asmparse.cpp" +#line 4681 "prebuilt\\asmparse.cpp" break; - case 127: /* classAttr: classAttr NESTED_ PRIVATE_ */ -#line 445 "asmparse.y" + case 142: /* classAttr: classAttr NESTED_ PRIVATE_ */ +#line 454 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-2].classAttr) & ~tdVisibilityMask) | tdNestedPrivate); } -#line 4244 "asmparse.cpp" +#line 4687 "prebuilt\\asmparse.cpp" break; - case 128: /* classAttr: classAttr NESTED_ FAMILY_ */ -#line 446 "asmparse.y" + case 143: /* classAttr: classAttr NESTED_ FAMILY_ */ +#line 455 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-2].classAttr) & ~tdVisibilityMask) | tdNestedFamily); } -#line 4250 "asmparse.cpp" +#line 4693 "prebuilt\\asmparse.cpp" break; - case 129: /* classAttr: classAttr NESTED_ ASSEMBLY_ */ -#line 447 "asmparse.y" + case 144: /* classAttr: classAttr NESTED_ ASSEMBLY_ */ +#line 456 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-2].classAttr) & ~tdVisibilityMask) | tdNestedAssembly); } -#line 4256 "asmparse.cpp" +#line 4699 "prebuilt\\asmparse.cpp" break; - case 130: /* classAttr: classAttr NESTED_ FAMANDASSEM_ */ -#line 448 "asmparse.y" + case 145: /* classAttr: classAttr NESTED_ FAMANDASSEM_ */ +#line 457 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-2].classAttr) & ~tdVisibilityMask) | tdNestedFamANDAssem); } -#line 4262 "asmparse.cpp" +#line 4705 "prebuilt\\asmparse.cpp" break; - case 131: /* classAttr: classAttr NESTED_ FAMORASSEM_ */ -#line 449 "asmparse.y" + case 146: /* classAttr: classAttr NESTED_ FAMORASSEM_ */ +#line 458 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) (((yyvsp[-2].classAttr) & ~tdVisibilityMask) | tdNestedFamORAssem); } -#line 4268 "asmparse.cpp" +#line 4711 "prebuilt\\asmparse.cpp" break; - case 132: /* classAttr: classAttr BEFOREFIELDINIT_ */ -#line 450 "asmparse.y" + case 147: /* classAttr: classAttr BEFOREFIELDINIT_ */ +#line 459 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | tdBeforeFieldInit); } -#line 4274 "asmparse.cpp" +#line 4717 "prebuilt\\asmparse.cpp" break; - case 133: /* classAttr: classAttr SPECIALNAME_ */ -#line 451 "asmparse.y" + case 148: /* classAttr: classAttr SPECIALNAME_ */ +#line 460 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr) | tdSpecialName); } -#line 4280 "asmparse.cpp" +#line 4723 "prebuilt\\asmparse.cpp" break; - case 134: /* classAttr: classAttr RTSPECIALNAME_ */ -#line 452 "asmparse.y" + case 149: /* classAttr: classAttr RTSPECIALNAME_ */ +#line 461 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].classAttr)); } -#line 4286 "asmparse.cpp" +#line 4729 "prebuilt\\asmparse.cpp" break; - case 135: /* classAttr: classAttr FLAGS_ '(' int32 ')' */ -#line 453 "asmparse.y" + case 150: /* classAttr: classAttr FLAGS_ '(' int32 ')' */ +#line 462 "asmparse.y" { (yyval.classAttr) = (CorRegTypeAttr) ((yyvsp[-1].int32)); } -#line 4292 "asmparse.cpp" +#line 4735 "prebuilt\\asmparse.cpp" break; - case 137: /* extendsClause: EXTENDS_ typeSpec */ -#line 457 "asmparse.y" + case 152: /* extendsClause: EXTENDS_ typeSpec */ +#line 466 "asmparse.y" { PASM->m_crExtends = (yyvsp[0].token); } -#line 4298 "asmparse.cpp" +#line 4741 "prebuilt\\asmparse.cpp" break; - case 142: /* implList: implList ',' typeSpec */ -#line 468 "asmparse.y" + case 157: /* implList: implList ',' typeSpec */ +#line 477 "asmparse.y" { PASM->AddToImplList((yyvsp[0].token)); } -#line 4304 "asmparse.cpp" +#line 4747 "prebuilt\\asmparse.cpp" break; - case 143: /* implList: typeSpec */ -#line 469 "asmparse.y" + case 158: /* implList: typeSpec */ +#line 478 "asmparse.y" { PASM->AddToImplList((yyvsp[0].token)); } -#line 4310 "asmparse.cpp" +#line 4753 "prebuilt\\asmparse.cpp" break; - case 144: /* typeList: %empty */ -#line 473 "asmparse.y" + case 159: /* typeList: %empty */ +#line 482 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 4316 "asmparse.cpp" +#line 4759 "prebuilt\\asmparse.cpp" break; - case 145: /* typeList: typeListNotEmpty */ -#line 474 "asmparse.y" + case 160: /* typeList: typeListNotEmpty */ +#line 483 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 4322 "asmparse.cpp" +#line 4765 "prebuilt\\asmparse.cpp" break; - case 146: /* typeListNotEmpty: typeSpec */ -#line 477 "asmparse.y" + case 161: /* typeListNotEmpty: typeSpec */ +#line 486 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt32((yyvsp[0].token)); } -#line 4328 "asmparse.cpp" +#line 4771 "prebuilt\\asmparse.cpp" break; - case 147: /* typeListNotEmpty: typeListNotEmpty ',' typeSpec */ -#line 478 "asmparse.y" + case 162: /* typeListNotEmpty: typeListNotEmpty ',' typeSpec */ +#line 487 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); (yyval.binstr)->appendInt32((yyvsp[0].token)); } -#line 4334 "asmparse.cpp" +#line 4777 "prebuilt\\asmparse.cpp" break; - case 148: /* typarsClause: %empty */ -#line 481 "asmparse.y" + case 163: /* typarsClause: %empty */ +#line 490 "asmparse.y" { (yyval.typarlist) = NULL; PASM->m_TyParList = NULL;} -#line 4340 "asmparse.cpp" +#line 4783 "prebuilt\\asmparse.cpp" break; - case 149: /* typarsClause: '<' typars '>' */ -#line 482 "asmparse.y" + case 164: /* typarsClause: '<' typars '>' */ +#line 491 "asmparse.y" { (yyval.typarlist) = (yyvsp[-1].typarlist); PASM->m_TyParList = (yyvsp[-1].typarlist);} -#line 4346 "asmparse.cpp" +#line 4789 "prebuilt\\asmparse.cpp" break; - case 150: /* typarAttrib: '+' */ -#line 485 "asmparse.y" + case 165: /* typarAttrib: '+' */ +#line 494 "asmparse.y" { (yyval.int32) = gpCovariant; } -#line 4352 "asmparse.cpp" +#line 4795 "prebuilt\\asmparse.cpp" break; - case 151: /* typarAttrib: '-' */ -#line 486 "asmparse.y" + case 166: /* typarAttrib: '-' */ +#line 495 "asmparse.y" { (yyval.int32) = gpContravariant; } -#line 4358 "asmparse.cpp" +#line 4801 "prebuilt\\asmparse.cpp" break; - case 152: /* typarAttrib: CLASS_ */ -#line 487 "asmparse.y" + case 167: /* typarAttrib: CLASS_ */ +#line 496 "asmparse.y" { (yyval.int32) = gpReferenceTypeConstraint; } -#line 4364 "asmparse.cpp" +#line 4807 "prebuilt\\asmparse.cpp" break; - case 153: /* typarAttrib: VALUETYPE_ */ -#line 488 "asmparse.y" + case 168: /* typarAttrib: VALUETYPE_ */ +#line 497 "asmparse.y" { (yyval.int32) = gpNotNullableValueTypeConstraint; } -#line 4370 "asmparse.cpp" +#line 4813 "prebuilt\\asmparse.cpp" break; - case 154: /* typarAttrib: BYREFLIKE_ */ -#line 489 "asmparse.y" + case 169: /* typarAttrib: BYREFLIKE_ */ +#line 498 "asmparse.y" { (yyval.int32) = gpAllowByRefLike; } -#line 4376 "asmparse.cpp" +#line 4819 "prebuilt\\asmparse.cpp" break; - case 155: /* typarAttrib: _CTOR */ -#line 490 "asmparse.y" + case 170: /* typarAttrib: _CTOR */ +#line 499 "asmparse.y" { (yyval.int32) = gpDefaultConstructorConstraint; } -#line 4382 "asmparse.cpp" +#line 4825 "prebuilt\\asmparse.cpp" break; - case 156: /* typarAttrib: FLAGS_ '(' int32 ')' */ -#line 491 "asmparse.y" + case 171: /* typarAttrib: FLAGS_ '(' int32 ')' */ +#line 500 "asmparse.y" { (yyval.int32) = (CorGenericParamAttr)(yyvsp[-1].int32); } -#line 4388 "asmparse.cpp" +#line 4831 "prebuilt\\asmparse.cpp" break; - case 157: /* typarAttribs: %empty */ -#line 494 "asmparse.y" + case 172: /* typarAttribs: %empty */ +#line 503 "asmparse.y" { (yyval.int32) = 0; } -#line 4394 "asmparse.cpp" +#line 4837 "prebuilt\\asmparse.cpp" break; - case 158: /* typarAttribs: typarAttrib typarAttribs */ -#line 495 "asmparse.y" + case 173: /* typarAttribs: typarAttrib typarAttribs */ +#line 504 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) | (yyvsp[0].int32); } -#line 4400 "asmparse.cpp" +#line 4843 "prebuilt\\asmparse.cpp" break; - case 159: /* typars: typarAttribs tyBound dottedName typarsRest */ -#line 498 "asmparse.y" + case 174: /* typars: typarAttribs tyBound dottedName typarsRest */ +#line 507 "asmparse.y" {(yyval.typarlist) = new TyParList((yyvsp[-3].int32), (yyvsp[-2].binstr), (yyvsp[-1].string), (yyvsp[0].typarlist));} -#line 4406 "asmparse.cpp" +#line 4849 "prebuilt\\asmparse.cpp" break; - case 160: /* typars: typarAttribs dottedName typarsRest */ -#line 499 "asmparse.y" + case 175: /* typars: typarAttribs dottedName typarsRest */ +#line 508 "asmparse.y" {(yyval.typarlist) = new TyParList((yyvsp[-2].int32), NULL, (yyvsp[-1].string), (yyvsp[0].typarlist));} -#line 4412 "asmparse.cpp" +#line 4855 "prebuilt\\asmparse.cpp" break; - case 161: /* typarsRest: %empty */ -#line 502 "asmparse.y" + case 176: /* typarsRest: %empty */ +#line 511 "asmparse.y" { (yyval.typarlist) = NULL; } -#line 4418 "asmparse.cpp" +#line 4861 "prebuilt\\asmparse.cpp" break; - case 162: /* typarsRest: ',' typars */ -#line 503 "asmparse.y" + case 177: /* typarsRest: ',' typars */ +#line 512 "asmparse.y" { (yyval.typarlist) = (yyvsp[0].typarlist); } -#line 4424 "asmparse.cpp" +#line 4867 "prebuilt\\asmparse.cpp" break; - case 163: /* tyBound: '(' typeList ')' */ -#line 506 "asmparse.y" + case 178: /* tyBound: '(' typeList ')' */ +#line 515 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); } -#line 4430 "asmparse.cpp" +#line 4873 "prebuilt\\asmparse.cpp" break; - case 164: /* genArity: %empty */ -#line 509 "asmparse.y" + case 179: /* genArity: %empty */ +#line 518 "asmparse.y" { (yyval.int32)= 0; } -#line 4436 "asmparse.cpp" +#line 4879 "prebuilt\\asmparse.cpp" break; - case 165: /* genArity: genArityNotEmpty */ -#line 510 "asmparse.y" + case 180: /* genArity: genArityNotEmpty */ +#line 519 "asmparse.y" { (yyval.int32) = (yyvsp[0].int32); } -#line 4442 "asmparse.cpp" +#line 4885 "prebuilt\\asmparse.cpp" break; - case 166: /* genArityNotEmpty: '<' '[' int32 ']' '>' */ -#line 513 "asmparse.y" + case 181: /* genArityNotEmpty: '<' '[' int32 ']' '>' */ +#line 522 "asmparse.y" { (yyval.int32) = (yyvsp[-2].int32); } -#line 4448 "asmparse.cpp" +#line 4891 "prebuilt\\asmparse.cpp" break; - case 167: /* classDecl: methodHead methodDecls '}' */ -#line 517 "asmparse.y" + case 182: /* classDecl: methodHead methodDecls '}' */ +#line 526 "asmparse.y" { if(PASM->m_pCurMethod->m_ulLines[1] ==0) { PASM->m_pCurMethod->m_ulLines[1] = PASM->m_ulCurLine; PASM->m_pCurMethod->m_ulColumns[1]=PASM->m_ulCurColumn;} PASM->EndMethod(); } -#line 4457 "asmparse.cpp" +#line 4900 "prebuilt\\asmparse.cpp" break; - case 168: /* classDecl: classHead '{' classDecls '}' */ -#line 521 "asmparse.y" + case 183: /* classDecl: classHead '{' classDecls '}' */ +#line 530 "asmparse.y" { PASM->EndClass(); } -#line 4463 "asmparse.cpp" +#line 4906 "prebuilt\\asmparse.cpp" break; - case 169: /* classDecl: eventHead '{' eventDecls '}' */ -#line 522 "asmparse.y" + case 184: /* classDecl: eventHead '{' eventDecls '}' */ +#line 531 "asmparse.y" { PASM->EndEvent(); } -#line 4469 "asmparse.cpp" +#line 4912 "prebuilt\\asmparse.cpp" break; - case 170: /* classDecl: propHead '{' propDecls '}' */ -#line 523 "asmparse.y" + case 185: /* classDecl: propHead '{' propDecls '}' */ +#line 532 "asmparse.y" { PASM->EndProp(); } -#line 4475 "asmparse.cpp" +#line 4918 "prebuilt\\asmparse.cpp" break; - case 176: /* classDecl: _SIZE int32 */ -#line 529 "asmparse.y" + case 191: /* classDecl: _SIZE int32 */ +#line 538 "asmparse.y" { PASM->m_pCurClass->m_ulSize = (yyvsp[0].int32); } -#line 4481 "asmparse.cpp" +#line 4924 "prebuilt\\asmparse.cpp" break; - case 177: /* classDecl: _PACK int32 */ -#line 530 "asmparse.y" + case 192: /* classDecl: _PACK int32 */ +#line 539 "asmparse.y" { PASM->m_pCurClass->m_ulPack = (yyvsp[0].int32); } -#line 4487 "asmparse.cpp" +#line 4930 "prebuilt\\asmparse.cpp" break; - case 178: /* classDecl: exportHead '{' exptypeDecls '}' */ -#line 531 "asmparse.y" + case 193: /* classDecl: exportHead '{' exptypeDecls '}' */ +#line 540 "asmparse.y" { PASMM->EndComType(); } -#line 4493 "asmparse.cpp" +#line 4936 "prebuilt\\asmparse.cpp" break; - case 179: /* classDecl: _OVERRIDE typeSpec DCOLON methodName WITH_ callConv type typeSpec DCOLON methodName '(' sigArgs0 ')' */ -#line 533 "asmparse.y" + case 194: /* classDecl: _OVERRIDE typeSpec DCOLON methodName WITH_ callConv type typeSpec DCOLON methodName '(' sigArgs0 ')' */ +#line 542 "asmparse.y" { BinStr *sig1 = parser->MakeSig((yyvsp[-7].int32), (yyvsp[-6].binstr), (yyvsp[-1].binstr)); BinStr *sig2 = new BinStr(); sig2->append(sig1); PASM->AddMethodImpl((yyvsp[-11].token),(yyvsp[-9].string),sig1,(yyvsp[-5].token),(yyvsp[-3].string),sig2); PASM->ResetArgNameList(); } -#line 4503 "asmparse.cpp" +#line 4946 "prebuilt\\asmparse.cpp" break; - case 180: /* classDecl: _OVERRIDE METHOD_ callConv type typeSpec DCOLON methodName genArity '(' sigArgs0 ')' WITH_ METHOD_ callConv type typeSpec DCOLON methodName genArity '(' sigArgs0 ')' */ -#line 539 "asmparse.y" + case 195: /* classDecl: _OVERRIDE METHOD_ callConv type typeSpec DCOLON methodName genArity '(' sigArgs0 ')' WITH_ METHOD_ callConv type typeSpec DCOLON methodName genArity '(' sigArgs0 ')' */ +#line 548 "asmparse.y" { PASM->AddMethodImpl((yyvsp[-17].token),(yyvsp[-15].string), ((yyvsp[-14].int32)==0 ? parser->MakeSig((yyvsp[-19].int32),(yyvsp[-18].binstr),(yyvsp[-12].binstr)) : parser->MakeSig((yyvsp[-19].int32)| IMAGE_CEE_CS_CALLCONV_GENERIC,(yyvsp[-18].binstr),(yyvsp[-12].binstr),(yyvsp[-14].int32))), @@ -4512,192 +4955,192 @@ yyparse (void) parser->MakeSig((yyvsp[-8].int32)| IMAGE_CEE_CS_CALLCONV_GENERIC,(yyvsp[-7].binstr),(yyvsp[-1].binstr),(yyvsp[-3].int32)))); PASM->ResetArgNameList(); } -#line 4516 "asmparse.cpp" +#line 4959 "prebuilt\\asmparse.cpp" break; - case 183: /* classDecl: _PARAM TYPE_ '[' int32 ']' */ -#line 549 "asmparse.y" + case 198: /* classDecl: _PARAM TYPE_ '[' int32 ']' */ +#line 558 "asmparse.y" { if(((yyvsp[-1].int32) > 0) && ((yyvsp[-1].int32) <= (int)PASM->m_pCurClass->m_NumTyPars)) PASM->m_pCustomDescrList = PASM->m_pCurClass->m_TyPars[(yyvsp[-1].int32)-1].CAList(); else PASM->report->error("Type parameter index out of range\n"); } -#line 4526 "asmparse.cpp" +#line 4969 "prebuilt\\asmparse.cpp" break; - case 184: /* classDecl: _PARAM TYPE_ dottedName */ -#line 554 "asmparse.y" + case 199: /* classDecl: _PARAM TYPE_ dottedName */ +#line 563 "asmparse.y" { int n = PASM->m_pCurClass->FindTyPar((yyvsp[0].string)); if(n >= 0) PASM->m_pCustomDescrList = PASM->m_pCurClass->m_TyPars[n].CAList(); else PASM->report->error("Type parameter '%s' undefined\n",(yyvsp[0].string)); } -#line 4537 "asmparse.cpp" +#line 4980 "prebuilt\\asmparse.cpp" break; - case 185: /* classDecl: _PARAM CONSTRAINT_ '[' int32 ']' ',' typeSpec */ -#line 560 "asmparse.y" + case 200: /* classDecl: _PARAM CONSTRAINT_ '[' int32 ']' ',' typeSpec */ +#line 569 "asmparse.y" { PASM->AddGenericParamConstraint((yyvsp[-3].int32), 0, (yyvsp[0].token)); } -#line 4543 "asmparse.cpp" +#line 4986 "prebuilt\\asmparse.cpp" break; - case 186: /* classDecl: _PARAM CONSTRAINT_ dottedName ',' typeSpec */ -#line 561 "asmparse.y" + case 201: /* classDecl: _PARAM CONSTRAINT_ dottedName ',' typeSpec */ +#line 570 "asmparse.y" { PASM->AddGenericParamConstraint(0, (yyvsp[-2].string), (yyvsp[0].token)); } -#line 4549 "asmparse.cpp" +#line 4992 "prebuilt\\asmparse.cpp" break; - case 187: /* classDecl: _INTERFACEIMPL TYPE_ typeSpec customDescr */ -#line 562 "asmparse.y" + case 202: /* classDecl: _INTERFACEIMPL TYPE_ typeSpec customDescr */ +#line 571 "asmparse.y" { (yyvsp[0].cad)->tkInterfacePair = (yyvsp[-1].token); if(PASM->m_pCustomDescrList) PASM->m_pCustomDescrList->PUSH((yyvsp[0].cad)); } -#line 4558 "asmparse.cpp" +#line 5001 "prebuilt\\asmparse.cpp" break; - case 188: /* fieldDecl: _FIELD repeatOpt fieldAttr type dottedName atOpt initOpt */ -#line 570 "asmparse.y" + case 203: /* fieldDecl: _FIELD repeatOpt fieldAttr type dottedName atOpt initOpt */ +#line 579 "asmparse.y" { (yyvsp[-3].binstr)->insertInt8(IMAGE_CEE_CS_CALLCONV_FIELD); PASM->AddField((yyvsp[-2].string), (yyvsp[-3].binstr), (yyvsp[-4].fieldAttr), (yyvsp[-1].string), (yyvsp[0].binstr), (yyvsp[-5].int32)); } -#line 4565 "asmparse.cpp" +#line 5008 "prebuilt\\asmparse.cpp" break; - case 189: /* fieldAttr: %empty */ -#line 574 "asmparse.y" + case 204: /* fieldAttr: %empty */ +#line 583 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) 0; } -#line 4571 "asmparse.cpp" +#line 5014 "prebuilt\\asmparse.cpp" break; - case 190: /* fieldAttr: fieldAttr STATIC_ */ -#line 575 "asmparse.y" + case 205: /* fieldAttr: fieldAttr STATIC_ */ +#line 584 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) ((yyvsp[-1].fieldAttr) | fdStatic); } -#line 4577 "asmparse.cpp" +#line 5020 "prebuilt\\asmparse.cpp" break; - case 191: /* fieldAttr: fieldAttr PUBLIC_ */ -#line 576 "asmparse.y" + case 206: /* fieldAttr: fieldAttr PUBLIC_ */ +#line 585 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) (((yyvsp[-1].fieldAttr) & ~mdMemberAccessMask) | fdPublic); } -#line 4583 "asmparse.cpp" +#line 5026 "prebuilt\\asmparse.cpp" break; - case 192: /* fieldAttr: fieldAttr PRIVATE_ */ -#line 577 "asmparse.y" + case 207: /* fieldAttr: fieldAttr PRIVATE_ */ +#line 586 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) (((yyvsp[-1].fieldAttr) & ~mdMemberAccessMask) | fdPrivate); } -#line 4589 "asmparse.cpp" +#line 5032 "prebuilt\\asmparse.cpp" break; - case 193: /* fieldAttr: fieldAttr FAMILY_ */ -#line 578 "asmparse.y" + case 208: /* fieldAttr: fieldAttr FAMILY_ */ +#line 587 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) (((yyvsp[-1].fieldAttr) & ~mdMemberAccessMask) | fdFamily); } -#line 4595 "asmparse.cpp" +#line 5038 "prebuilt\\asmparse.cpp" break; - case 194: /* fieldAttr: fieldAttr INITONLY_ */ -#line 579 "asmparse.y" + case 209: /* fieldAttr: fieldAttr INITONLY_ */ +#line 588 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) ((yyvsp[-1].fieldAttr) | fdInitOnly); } -#line 4601 "asmparse.cpp" +#line 5044 "prebuilt\\asmparse.cpp" break; - case 195: /* fieldAttr: fieldAttr RTSPECIALNAME_ */ -#line 580 "asmparse.y" + case 210: /* fieldAttr: fieldAttr RTSPECIALNAME_ */ +#line 589 "asmparse.y" { (yyval.fieldAttr) = (yyvsp[-1].fieldAttr); } -#line 4607 "asmparse.cpp" +#line 5050 "prebuilt\\asmparse.cpp" break; - case 196: /* fieldAttr: fieldAttr SPECIALNAME_ */ -#line 581 "asmparse.y" + case 211: /* fieldAttr: fieldAttr SPECIALNAME_ */ +#line 590 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) ((yyvsp[-1].fieldAttr) | fdSpecialName); } -#line 4613 "asmparse.cpp" +#line 5056 "prebuilt\\asmparse.cpp" break; - case 197: /* fieldAttr: fieldAttr MARSHAL_ '(' marshalBlob ')' */ -#line 594 "asmparse.y" + case 212: /* fieldAttr: fieldAttr MARSHAL_ '(' marshalBlob ')' */ +#line 603 "asmparse.y" { PASM->m_pMarshal = (yyvsp[-1].binstr); } -#line 4619 "asmparse.cpp" +#line 5062 "prebuilt\\asmparse.cpp" break; - case 198: /* fieldAttr: fieldAttr ASSEMBLY_ */ -#line 595 "asmparse.y" + case 213: /* fieldAttr: fieldAttr ASSEMBLY_ */ +#line 604 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) (((yyvsp[-1].fieldAttr) & ~mdMemberAccessMask) | fdAssembly); } -#line 4625 "asmparse.cpp" +#line 5068 "prebuilt\\asmparse.cpp" break; - case 199: /* fieldAttr: fieldAttr FAMANDASSEM_ */ -#line 596 "asmparse.y" + case 214: /* fieldAttr: fieldAttr FAMANDASSEM_ */ +#line 605 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) (((yyvsp[-1].fieldAttr) & ~mdMemberAccessMask) | fdFamANDAssem); } -#line 4631 "asmparse.cpp" +#line 5074 "prebuilt\\asmparse.cpp" break; - case 200: /* fieldAttr: fieldAttr FAMORASSEM_ */ -#line 597 "asmparse.y" + case 215: /* fieldAttr: fieldAttr FAMORASSEM_ */ +#line 606 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) (((yyvsp[-1].fieldAttr) & ~mdMemberAccessMask) | fdFamORAssem); } -#line 4637 "asmparse.cpp" +#line 5080 "prebuilt\\asmparse.cpp" break; - case 201: /* fieldAttr: fieldAttr PRIVATESCOPE_ */ -#line 598 "asmparse.y" + case 216: /* fieldAttr: fieldAttr PRIVATESCOPE_ */ +#line 607 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) (((yyvsp[-1].fieldAttr) & ~mdMemberAccessMask) | fdPrivateScope); } -#line 4643 "asmparse.cpp" +#line 5086 "prebuilt\\asmparse.cpp" break; - case 202: /* fieldAttr: fieldAttr LITERAL_ */ -#line 599 "asmparse.y" + case 217: /* fieldAttr: fieldAttr LITERAL_ */ +#line 608 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) ((yyvsp[-1].fieldAttr) | fdLiteral); } -#line 4649 "asmparse.cpp" +#line 5092 "prebuilt\\asmparse.cpp" break; - case 203: /* fieldAttr: fieldAttr NOTSERIALIZED_ */ -#line 600 "asmparse.y" + case 218: /* fieldAttr: fieldAttr NOTSERIALIZED_ */ +#line 609 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) ((yyvsp[-1].fieldAttr) | fdNotSerialized); } -#line 4655 "asmparse.cpp" +#line 5098 "prebuilt\\asmparse.cpp" break; - case 204: /* fieldAttr: fieldAttr FLAGS_ '(' int32 ')' */ -#line 601 "asmparse.y" + case 219: /* fieldAttr: fieldAttr FLAGS_ '(' int32 ')' */ +#line 610 "asmparse.y" { (yyval.fieldAttr) = (CorFieldAttr) ((yyvsp[-1].int32)); } -#line 4661 "asmparse.cpp" +#line 5104 "prebuilt\\asmparse.cpp" break; - case 205: /* atOpt: %empty */ -#line 604 "asmparse.y" + case 220: /* atOpt: %empty */ +#line 613 "asmparse.y" { (yyval.string) = 0; } -#line 4667 "asmparse.cpp" +#line 5110 "prebuilt\\asmparse.cpp" break; - case 206: /* atOpt: AT_ id */ -#line 605 "asmparse.y" + case 221: /* atOpt: AT_ id */ +#line 614 "asmparse.y" { (yyval.string) = (yyvsp[0].string); } -#line 4673 "asmparse.cpp" +#line 5116 "prebuilt\\asmparse.cpp" break; - case 207: /* initOpt: %empty */ -#line 608 "asmparse.y" + case 222: /* initOpt: %empty */ +#line 617 "asmparse.y" { (yyval.binstr) = NULL; } -#line 4679 "asmparse.cpp" +#line 5122 "prebuilt\\asmparse.cpp" break; - case 208: /* initOpt: '=' fieldInit */ -#line 609 "asmparse.y" + case 223: /* initOpt: '=' fieldInit */ +#line 618 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 4685 "asmparse.cpp" +#line 5128 "prebuilt\\asmparse.cpp" break; - case 209: /* repeatOpt: %empty */ -#line 612 "asmparse.y" + case 224: /* repeatOpt: %empty */ +#line 621 "asmparse.y" { (yyval.int32) = 0xFFFFFFFF; } -#line 4691 "asmparse.cpp" +#line 5134 "prebuilt\\asmparse.cpp" break; - case 210: /* repeatOpt: '[' int32 ']' */ -#line 613 "asmparse.y" + case 225: /* repeatOpt: '[' int32 ']' */ +#line 622 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32); } -#line 4697 "asmparse.cpp" +#line 5140 "prebuilt\\asmparse.cpp" break; - case 211: /* methodRef: callConv type typeSpec DCOLON methodName tyArgs0 '(' sigArgs0 ')' */ -#line 618 "asmparse.y" + case 226: /* methodRef: callConv type typeSpec DCOLON methodName tyArgs0 '(' sigArgs0 ')' */ +#line 627 "asmparse.y" { PASM->ResetArgNameList(); if ((yyvsp[-3].binstr) == NULL) { @@ -4714,21 +5157,21 @@ yyparse (void) parser->MakeSig(IMAGE_CEE_CS_CALLCONV_INSTANTIATION, 0, (yyvsp[-3].binstr))); } } -#line 4718 "asmparse.cpp" +#line 5161 "prebuilt\\asmparse.cpp" break; - case 212: /* methodRef: callConv type typeSpec DCOLON methodName genArityNotEmpty '(' sigArgs0 ')' */ -#line 635 "asmparse.y" + case 227: /* methodRef: callConv type typeSpec DCOLON methodName genArityNotEmpty '(' sigArgs0 ')' */ +#line 644 "asmparse.y" { PASM->ResetArgNameList(); if((iCallConv)&&(((yyvsp[-8].int32) & iCallConv) != iCallConv)) parser->warn("'instance' added to method's calling convention\n"); (yyval.token) = PASM->MakeMemberRef((yyvsp[-6].token), (yyvsp[-4].string), parser->MakeSig((yyvsp[-8].int32) | IMAGE_CEE_CS_CALLCONV_GENERIC|iCallConv, (yyvsp[-7].binstr), (yyvsp[-1].binstr), (yyvsp[-3].int32))); } -#line 4728 "asmparse.cpp" +#line 5171 "prebuilt\\asmparse.cpp" break; - case 213: /* methodRef: callConv type methodName tyArgs0 '(' sigArgs0 ')' */ -#line 641 "asmparse.y" + case 228: /* methodRef: callConv type methodName tyArgs0 '(' sigArgs0 ')' */ +#line 650 "asmparse.y" { PASM->ResetArgNameList(); if ((yyvsp[-3].binstr) == NULL) { @@ -4744,300 +5187,300 @@ yyparse (void) parser->MakeSig(IMAGE_CEE_CS_CALLCONV_INSTANTIATION, 0, (yyvsp[-3].binstr))); } } -#line 4748 "asmparse.cpp" +#line 5191 "prebuilt\\asmparse.cpp" break; - case 214: /* methodRef: callConv type methodName genArityNotEmpty '(' sigArgs0 ')' */ -#line 657 "asmparse.y" + case 229: /* methodRef: callConv type methodName genArityNotEmpty '(' sigArgs0 ')' */ +#line 666 "asmparse.y" { PASM->ResetArgNameList(); if((iCallConv)&&(((yyvsp[-6].int32) & iCallConv) != iCallConv)) parser->warn("'instance' added to method's calling convention\n"); (yyval.token) = PASM->MakeMemberRef(mdTokenNil, (yyvsp[-4].string), parser->MakeSig((yyvsp[-6].int32) | IMAGE_CEE_CS_CALLCONV_GENERIC|iCallConv, (yyvsp[-5].binstr), (yyvsp[-1].binstr), (yyvsp[-3].int32))); } -#line 4757 "asmparse.cpp" +#line 5200 "prebuilt\\asmparse.cpp" break; - case 215: /* methodRef: mdtoken */ -#line 661 "asmparse.y" + case 230: /* methodRef: mdtoken */ +#line 670 "asmparse.y" { (yyval.token) = (yyvsp[0].token); } -#line 4763 "asmparse.cpp" +#line 5206 "prebuilt\\asmparse.cpp" break; - case 216: /* methodRef: TYPEDEF_M */ -#line 662 "asmparse.y" + case 231: /* methodRef: TYPEDEF_M */ +#line 671 "asmparse.y" { (yyval.token) = (yyvsp[0].tdd)->m_tkTypeSpec; } -#line 4769 "asmparse.cpp" +#line 5212 "prebuilt\\asmparse.cpp" break; - case 217: /* methodRef: TYPEDEF_MR */ -#line 663 "asmparse.y" + case 232: /* methodRef: TYPEDEF_MR */ +#line 672 "asmparse.y" { (yyval.token) = (yyvsp[0].tdd)->m_tkTypeSpec; } -#line 4775 "asmparse.cpp" +#line 5218 "prebuilt\\asmparse.cpp" break; - case 218: /* callConv: INSTANCE_ callConv */ -#line 666 "asmparse.y" + case 233: /* callConv: INSTANCE_ callConv */ +#line 675 "asmparse.y" { (yyval.int32) = ((yyvsp[0].int32) | IMAGE_CEE_CS_CALLCONV_HASTHIS); } -#line 4781 "asmparse.cpp" +#line 5224 "prebuilt\\asmparse.cpp" break; - case 219: /* callConv: EXPLICIT_ callConv */ -#line 667 "asmparse.y" + case 234: /* callConv: EXPLICIT_ callConv */ +#line 676 "asmparse.y" { (yyval.int32) = ((yyvsp[0].int32) | IMAGE_CEE_CS_CALLCONV_EXPLICITTHIS); } -#line 4787 "asmparse.cpp" +#line 5230 "prebuilt\\asmparse.cpp" break; - case 220: /* callConv: callKind */ -#line 668 "asmparse.y" + case 235: /* callConv: callKind */ +#line 677 "asmparse.y" { (yyval.int32) = (yyvsp[0].int32); } -#line 4793 "asmparse.cpp" +#line 5236 "prebuilt\\asmparse.cpp" break; - case 221: /* callConv: CALLCONV_ '(' int32 ')' */ -#line 669 "asmparse.y" + case 236: /* callConv: CALLCONV_ '(' int32 ')' */ +#line 678 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32); } -#line 4799 "asmparse.cpp" +#line 5242 "prebuilt\\asmparse.cpp" break; - case 222: /* callKind: %empty */ -#line 672 "asmparse.y" + case 237: /* callKind: %empty */ +#line 681 "asmparse.y" { (yyval.int32) = IMAGE_CEE_CS_CALLCONV_DEFAULT; } -#line 4805 "asmparse.cpp" +#line 5248 "prebuilt\\asmparse.cpp" break; - case 223: /* callKind: DEFAULT_ */ -#line 673 "asmparse.y" + case 238: /* callKind: DEFAULT_ */ +#line 682 "asmparse.y" { (yyval.int32) = IMAGE_CEE_CS_CALLCONV_DEFAULT; } -#line 4811 "asmparse.cpp" +#line 5254 "prebuilt\\asmparse.cpp" break; - case 224: /* callKind: VARARG_ */ -#line 674 "asmparse.y" + case 239: /* callKind: VARARG_ */ +#line 683 "asmparse.y" { (yyval.int32) = IMAGE_CEE_CS_CALLCONV_VARARG; } -#line 4817 "asmparse.cpp" +#line 5260 "prebuilt\\asmparse.cpp" break; - case 225: /* callKind: UNMANAGED_ CDECL_ */ -#line 675 "asmparse.y" + case 240: /* callKind: UNMANAGED_ CDECL_ */ +#line 684 "asmparse.y" { (yyval.int32) = IMAGE_CEE_CS_CALLCONV_C; } -#line 4823 "asmparse.cpp" +#line 5266 "prebuilt\\asmparse.cpp" break; - case 226: /* callKind: UNMANAGED_ STDCALL_ */ -#line 676 "asmparse.y" + case 241: /* callKind: UNMANAGED_ STDCALL_ */ +#line 685 "asmparse.y" { (yyval.int32) = IMAGE_CEE_CS_CALLCONV_STDCALL; } -#line 4829 "asmparse.cpp" +#line 5272 "prebuilt\\asmparse.cpp" break; - case 227: /* callKind: UNMANAGED_ THISCALL_ */ -#line 677 "asmparse.y" + case 242: /* callKind: UNMANAGED_ THISCALL_ */ +#line 686 "asmparse.y" { (yyval.int32) = IMAGE_CEE_CS_CALLCONV_THISCALL; } -#line 4835 "asmparse.cpp" +#line 5278 "prebuilt\\asmparse.cpp" break; - case 228: /* callKind: UNMANAGED_ FASTCALL_ */ -#line 678 "asmparse.y" + case 243: /* callKind: UNMANAGED_ FASTCALL_ */ +#line 687 "asmparse.y" { (yyval.int32) = IMAGE_CEE_CS_CALLCONV_FASTCALL; } -#line 4841 "asmparse.cpp" +#line 5284 "prebuilt\\asmparse.cpp" break; - case 229: /* callKind: UNMANAGED_ */ -#line 679 "asmparse.y" + case 244: /* callKind: UNMANAGED_ */ +#line 688 "asmparse.y" { (yyval.int32) = IMAGE_CEE_CS_CALLCONV_UNMANAGED; } -#line 4847 "asmparse.cpp" +#line 5290 "prebuilt\\asmparse.cpp" break; - case 230: /* mdtoken: MDTOKEN_ '(' int32 ')' */ -#line 682 "asmparse.y" + case 245: /* mdtoken: MDTOKEN_ '(' int32 ')' */ +#line 691 "asmparse.y" { (yyval.token) = (yyvsp[-1].int32); } -#line 4853 "asmparse.cpp" +#line 5296 "prebuilt\\asmparse.cpp" break; - case 231: /* memberRef: methodSpec methodRef */ -#line 685 "asmparse.y" + case 246: /* memberRef: methodSpec methodRef */ +#line 694 "asmparse.y" { (yyval.token) = (yyvsp[0].token); PASM->delArgNameList(PASM->m_firstArgName); PASM->m_firstArgName = parser->m_ANSFirst.POP(); PASM->m_lastArgName = parser->m_ANSLast.POP(); PASM->SetMemberRefFixup((yyvsp[0].token),iOpcodeLen); } -#line 4863 "asmparse.cpp" +#line 5306 "prebuilt\\asmparse.cpp" break; - case 232: /* memberRef: FIELD_ type typeSpec DCOLON dottedName */ -#line 691 "asmparse.y" + case 247: /* memberRef: FIELD_ type typeSpec DCOLON dottedName */ +#line 700 "asmparse.y" { (yyvsp[-3].binstr)->insertInt8(IMAGE_CEE_CS_CALLCONV_FIELD); (yyval.token) = PASM->MakeMemberRef((yyvsp[-2].token), (yyvsp[0].string), (yyvsp[-3].binstr)); PASM->SetMemberRefFixup((yyval.token),iOpcodeLen); } -#line 4871 "asmparse.cpp" +#line 5314 "prebuilt\\asmparse.cpp" break; - case 233: /* memberRef: FIELD_ type dottedName */ -#line 695 "asmparse.y" + case 248: /* memberRef: FIELD_ type dottedName */ +#line 704 "asmparse.y" { (yyvsp[-1].binstr)->insertInt8(IMAGE_CEE_CS_CALLCONV_FIELD); (yyval.token) = PASM->MakeMemberRef(mdTokenNil, (yyvsp[0].string), (yyvsp[-1].binstr)); PASM->SetMemberRefFixup((yyval.token),iOpcodeLen); } -#line 4879 "asmparse.cpp" +#line 5322 "prebuilt\\asmparse.cpp" break; - case 234: /* memberRef: FIELD_ TYPEDEF_F */ -#line 698 "asmparse.y" + case 249: /* memberRef: FIELD_ TYPEDEF_F */ +#line 707 "asmparse.y" { (yyval.token) = (yyvsp[0].tdd)->m_tkTypeSpec; PASM->SetMemberRefFixup((yyval.token),iOpcodeLen); } -#line 4886 "asmparse.cpp" +#line 5329 "prebuilt\\asmparse.cpp" break; - case 235: /* memberRef: FIELD_ TYPEDEF_MR */ -#line 700 "asmparse.y" + case 250: /* memberRef: FIELD_ TYPEDEF_MR */ +#line 709 "asmparse.y" { (yyval.token) = (yyvsp[0].tdd)->m_tkTypeSpec; PASM->SetMemberRefFixup((yyval.token),iOpcodeLen); } -#line 4893 "asmparse.cpp" +#line 5336 "prebuilt\\asmparse.cpp" break; - case 236: /* memberRef: mdtoken */ -#line 702 "asmparse.y" + case 251: /* memberRef: mdtoken */ +#line 711 "asmparse.y" { (yyval.token) = (yyvsp[0].token); PASM->SetMemberRefFixup((yyval.token),iOpcodeLen); } -#line 4900 "asmparse.cpp" +#line 5343 "prebuilt\\asmparse.cpp" break; - case 237: /* eventHead: _EVENT eventAttr typeSpec dottedName */ -#line 707 "asmparse.y" + case 252: /* eventHead: _EVENT eventAttr typeSpec dottedName */ +#line 716 "asmparse.y" { PASM->ResetEvent((yyvsp[0].string), (yyvsp[-1].token), (yyvsp[-2].eventAttr)); } -#line 4906 "asmparse.cpp" +#line 5349 "prebuilt\\asmparse.cpp" break; - case 238: /* eventHead: _EVENT eventAttr dottedName */ -#line 708 "asmparse.y" + case 253: /* eventHead: _EVENT eventAttr dottedName */ +#line 717 "asmparse.y" { PASM->ResetEvent((yyvsp[0].string), mdTypeRefNil, (yyvsp[-1].eventAttr)); } -#line 4912 "asmparse.cpp" +#line 5355 "prebuilt\\asmparse.cpp" break; - case 239: /* eventAttr: %empty */ -#line 712 "asmparse.y" + case 254: /* eventAttr: %empty */ +#line 721 "asmparse.y" { (yyval.eventAttr) = (CorEventAttr) 0; } -#line 4918 "asmparse.cpp" +#line 5361 "prebuilt\\asmparse.cpp" break; - case 240: /* eventAttr: eventAttr RTSPECIALNAME_ */ -#line 713 "asmparse.y" + case 255: /* eventAttr: eventAttr RTSPECIALNAME_ */ +#line 722 "asmparse.y" { (yyval.eventAttr) = (yyvsp[-1].eventAttr); } -#line 4924 "asmparse.cpp" +#line 5367 "prebuilt\\asmparse.cpp" break; - case 241: /* eventAttr: eventAttr SPECIALNAME_ */ -#line 714 "asmparse.y" + case 256: /* eventAttr: eventAttr SPECIALNAME_ */ +#line 723 "asmparse.y" { (yyval.eventAttr) = (CorEventAttr) ((yyvsp[-1].eventAttr) | evSpecialName); } -#line 4930 "asmparse.cpp" +#line 5373 "prebuilt\\asmparse.cpp" break; - case 244: /* eventDecl: _ADDON methodRef */ -#line 721 "asmparse.y" + case 259: /* eventDecl: _ADDON methodRef */ +#line 730 "asmparse.y" { PASM->SetEventMethod(0, (yyvsp[0].token)); } -#line 4936 "asmparse.cpp" +#line 5379 "prebuilt\\asmparse.cpp" break; - case 245: /* eventDecl: _REMOVEON methodRef */ -#line 722 "asmparse.y" + case 260: /* eventDecl: _REMOVEON methodRef */ +#line 731 "asmparse.y" { PASM->SetEventMethod(1, (yyvsp[0].token)); } -#line 4942 "asmparse.cpp" +#line 5385 "prebuilt\\asmparse.cpp" break; - case 246: /* eventDecl: _FIRE methodRef */ -#line 723 "asmparse.y" + case 261: /* eventDecl: _FIRE methodRef */ +#line 732 "asmparse.y" { PASM->SetEventMethod(2, (yyvsp[0].token)); } -#line 4948 "asmparse.cpp" +#line 5391 "prebuilt\\asmparse.cpp" break; - case 247: /* eventDecl: _OTHER methodRef */ -#line 724 "asmparse.y" + case 262: /* eventDecl: _OTHER methodRef */ +#line 733 "asmparse.y" { PASM->SetEventMethod(3, (yyvsp[0].token)); } -#line 4954 "asmparse.cpp" +#line 5397 "prebuilt\\asmparse.cpp" break; - case 252: /* propHead: _PROPERTY propAttr callConv type dottedName '(' sigArgs0 ')' initOpt */ -#line 733 "asmparse.y" + case 267: /* propHead: _PROPERTY propAttr callConv type dottedName '(' sigArgs0 ')' initOpt */ +#line 742 "asmparse.y" { PASM->ResetProp((yyvsp[-4].string), parser->MakeSig((IMAGE_CEE_CS_CALLCONV_PROPERTY | ((yyvsp[-6].int32) & IMAGE_CEE_CS_CALLCONV_HASTHIS)),(yyvsp[-5].binstr),(yyvsp[-2].binstr)), (yyvsp[-7].propAttr), (yyvsp[0].binstr));} -#line 4962 "asmparse.cpp" +#line 5405 "prebuilt\\asmparse.cpp" break; - case 253: /* propAttr: %empty */ -#line 738 "asmparse.y" + case 268: /* propAttr: %empty */ +#line 747 "asmparse.y" { (yyval.propAttr) = (CorPropertyAttr) 0; } -#line 4968 "asmparse.cpp" +#line 5411 "prebuilt\\asmparse.cpp" break; - case 254: /* propAttr: propAttr RTSPECIALNAME_ */ -#line 739 "asmparse.y" + case 269: /* propAttr: propAttr RTSPECIALNAME_ */ +#line 748 "asmparse.y" { (yyval.propAttr) = (yyvsp[-1].propAttr); } -#line 4974 "asmparse.cpp" +#line 5417 "prebuilt\\asmparse.cpp" break; - case 255: /* propAttr: propAttr SPECIALNAME_ */ -#line 740 "asmparse.y" + case 270: /* propAttr: propAttr SPECIALNAME_ */ +#line 749 "asmparse.y" { (yyval.propAttr) = (CorPropertyAttr) ((yyvsp[-1].propAttr) | prSpecialName); } -#line 4980 "asmparse.cpp" +#line 5423 "prebuilt\\asmparse.cpp" break; - case 258: /* propDecl: _SET methodRef */ -#line 748 "asmparse.y" + case 273: /* propDecl: _SET methodRef */ +#line 757 "asmparse.y" { PASM->SetPropMethod(0, (yyvsp[0].token)); } -#line 4986 "asmparse.cpp" +#line 5429 "prebuilt\\asmparse.cpp" break; - case 259: /* propDecl: _GET methodRef */ -#line 749 "asmparse.y" + case 274: /* propDecl: _GET methodRef */ +#line 758 "asmparse.y" { PASM->SetPropMethod(1, (yyvsp[0].token)); } -#line 4992 "asmparse.cpp" +#line 5435 "prebuilt\\asmparse.cpp" break; - case 260: /* propDecl: _OTHER methodRef */ -#line 750 "asmparse.y" + case 275: /* propDecl: _OTHER methodRef */ +#line 759 "asmparse.y" { PASM->SetPropMethod(2, (yyvsp[0].token)); } -#line 4998 "asmparse.cpp" +#line 5441 "prebuilt\\asmparse.cpp" break; - case 265: /* methodHeadPart1: _METHOD */ -#line 758 "asmparse.y" + case 280: /* methodHeadPart1: _METHOD */ +#line 767 "asmparse.y" { PASM->ResetForNextMethod(); uMethodBeginLine = PASM->m_ulCurLine; uMethodBeginColumn=PASM->m_ulCurColumn; } -#line 5007 "asmparse.cpp" +#line 5450 "prebuilt\\asmparse.cpp" break; - case 266: /* marshalClause: %empty */ -#line 764 "asmparse.y" + case 281: /* marshalClause: %empty */ +#line 773 "asmparse.y" { (yyval.binstr) = NULL; } -#line 5013 "asmparse.cpp" +#line 5456 "prebuilt\\asmparse.cpp" break; - case 267: /* marshalClause: MARSHAL_ '(' marshalBlob ')' */ -#line 765 "asmparse.y" + case 282: /* marshalClause: MARSHAL_ '(' marshalBlob ')' */ +#line 774 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); } -#line 5019 "asmparse.cpp" +#line 5462 "prebuilt\\asmparse.cpp" break; - case 268: /* marshalBlob: nativeType */ -#line 768 "asmparse.y" + case 283: /* marshalBlob: nativeType */ +#line 777 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 5025 "asmparse.cpp" +#line 5468 "prebuilt\\asmparse.cpp" break; - case 269: /* marshalBlob: marshalBlobHead hexbytes '}' */ -#line 769 "asmparse.y" + case 284: /* marshalBlob: marshalBlobHead hexbytes '}' */ +#line 778 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); } -#line 5031 "asmparse.cpp" +#line 5474 "prebuilt\\asmparse.cpp" break; - case 270: /* marshalBlobHead: '{' */ -#line 772 "asmparse.y" + case 285: /* marshalBlobHead: '{' */ +#line 781 "asmparse.y" { bParsingByteArray = TRUE; } -#line 5037 "asmparse.cpp" +#line 5480 "prebuilt\\asmparse.cpp" break; - case 271: /* methodHead: methodHeadPart1 methAttr callConv paramAttr type marshalClause methodName typarsClause '(' sigArgs0 ')' implAttr '{' */ -#line 776 "asmparse.y" + case 286: /* methodHead: methodHeadPart1 methAttr callConv paramAttr type marshalClause methodName typarsClause '(' sigArgs0 ')' implAttr '{' */ +#line 785 "asmparse.y" { BinStr* sig; if ((yyvsp[-5].typarlist) == NULL) sig = parser->MakeSig((yyvsp[-10].int32), (yyvsp[-8].binstr), (yyvsp[-3].binstr)); else { @@ -5051,450 +5494,456 @@ yyparse (void) PASM->m_pCurMethod->m_ulLines[0] = uMethodBeginLine; PASM->m_pCurMethod->m_ulColumns[0]=uMethodBeginColumn; } -#line 5055 "asmparse.cpp" +#line 5498 "prebuilt\\asmparse.cpp" break; - case 272: /* methAttr: %empty */ -#line 791 "asmparse.y" + case 287: /* methAttr: %empty */ +#line 800 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) 0; } -#line 5061 "asmparse.cpp" +#line 5504 "prebuilt\\asmparse.cpp" break; - case 273: /* methAttr: methAttr STATIC_ */ -#line 792 "asmparse.y" + case 288: /* methAttr: methAttr STATIC_ */ +#line 801 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdStatic); } -#line 5067 "asmparse.cpp" +#line 5510 "prebuilt\\asmparse.cpp" break; - case 274: /* methAttr: methAttr PUBLIC_ */ -#line 793 "asmparse.y" + case 289: /* methAttr: methAttr PUBLIC_ */ +#line 802 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) (((yyvsp[-1].methAttr) & ~mdMemberAccessMask) | mdPublic); } -#line 5073 "asmparse.cpp" +#line 5516 "prebuilt\\asmparse.cpp" break; - case 275: /* methAttr: methAttr PRIVATE_ */ -#line 794 "asmparse.y" + case 290: /* methAttr: methAttr PRIVATE_ */ +#line 803 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) (((yyvsp[-1].methAttr) & ~mdMemberAccessMask) | mdPrivate); } -#line 5079 "asmparse.cpp" +#line 5522 "prebuilt\\asmparse.cpp" break; - case 276: /* methAttr: methAttr FAMILY_ */ -#line 795 "asmparse.y" + case 291: /* methAttr: methAttr FAMILY_ */ +#line 804 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) (((yyvsp[-1].methAttr) & ~mdMemberAccessMask) | mdFamily); } -#line 5085 "asmparse.cpp" +#line 5528 "prebuilt\\asmparse.cpp" break; - case 277: /* methAttr: methAttr FINAL_ */ -#line 796 "asmparse.y" + case 292: /* methAttr: methAttr FINAL_ */ +#line 805 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdFinal); } -#line 5091 "asmparse.cpp" +#line 5534 "prebuilt\\asmparse.cpp" break; - case 278: /* methAttr: methAttr SPECIALNAME_ */ -#line 797 "asmparse.y" + case 293: /* methAttr: methAttr SPECIALNAME_ */ +#line 806 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdSpecialName); } -#line 5097 "asmparse.cpp" +#line 5540 "prebuilt\\asmparse.cpp" break; - case 279: /* methAttr: methAttr VIRTUAL_ */ -#line 798 "asmparse.y" + case 294: /* methAttr: methAttr VIRTUAL_ */ +#line 807 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdVirtual); } -#line 5103 "asmparse.cpp" +#line 5546 "prebuilt\\asmparse.cpp" break; - case 280: /* methAttr: methAttr STRICT_ */ -#line 799 "asmparse.y" + case 295: /* methAttr: methAttr STRICT_ */ +#line 808 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdCheckAccessOnOverride); } -#line 5109 "asmparse.cpp" +#line 5552 "prebuilt\\asmparse.cpp" break; - case 281: /* methAttr: methAttr ABSTRACT_ */ -#line 800 "asmparse.y" + case 296: /* methAttr: methAttr ABSTRACT_ */ +#line 809 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdAbstract); } -#line 5115 "asmparse.cpp" +#line 5558 "prebuilt\\asmparse.cpp" break; - case 282: /* methAttr: methAttr ASSEMBLY_ */ -#line 801 "asmparse.y" + case 297: /* methAttr: methAttr ASSEMBLY_ */ +#line 810 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) (((yyvsp[-1].methAttr) & ~mdMemberAccessMask) | mdAssem); } -#line 5121 "asmparse.cpp" +#line 5564 "prebuilt\\asmparse.cpp" break; - case 283: /* methAttr: methAttr FAMANDASSEM_ */ -#line 802 "asmparse.y" + case 298: /* methAttr: methAttr FAMANDASSEM_ */ +#line 811 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) (((yyvsp[-1].methAttr) & ~mdMemberAccessMask) | mdFamANDAssem); } -#line 5127 "asmparse.cpp" +#line 5570 "prebuilt\\asmparse.cpp" break; - case 284: /* methAttr: methAttr FAMORASSEM_ */ -#line 803 "asmparse.y" + case 299: /* methAttr: methAttr FAMORASSEM_ */ +#line 812 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) (((yyvsp[-1].methAttr) & ~mdMemberAccessMask) | mdFamORAssem); } -#line 5133 "asmparse.cpp" +#line 5576 "prebuilt\\asmparse.cpp" break; - case 285: /* methAttr: methAttr PRIVATESCOPE_ */ -#line 804 "asmparse.y" + case 300: /* methAttr: methAttr PRIVATESCOPE_ */ +#line 813 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) (((yyvsp[-1].methAttr) & ~mdMemberAccessMask) | mdPrivateScope); } -#line 5139 "asmparse.cpp" +#line 5582 "prebuilt\\asmparse.cpp" break; - case 286: /* methAttr: methAttr HIDEBYSIG_ */ -#line 805 "asmparse.y" + case 301: /* methAttr: methAttr HIDEBYSIG_ */ +#line 814 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdHideBySig); } -#line 5145 "asmparse.cpp" +#line 5588 "prebuilt\\asmparse.cpp" break; - case 287: /* methAttr: methAttr NEWSLOT_ */ -#line 806 "asmparse.y" + case 302: /* methAttr: methAttr NEWSLOT_ */ +#line 815 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdNewSlot); } -#line 5151 "asmparse.cpp" +#line 5594 "prebuilt\\asmparse.cpp" break; - case 288: /* methAttr: methAttr RTSPECIALNAME_ */ -#line 807 "asmparse.y" + case 303: /* methAttr: methAttr RTSPECIALNAME_ */ +#line 816 "asmparse.y" { (yyval.methAttr) = (yyvsp[-1].methAttr); } -#line 5157 "asmparse.cpp" +#line 5600 "prebuilt\\asmparse.cpp" break; - case 289: /* methAttr: methAttr UNMANAGEDEXP_ */ -#line 808 "asmparse.y" + case 304: /* methAttr: methAttr UNMANAGEDEXP_ */ +#line 817 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdUnmanagedExport); } -#line 5163 "asmparse.cpp" +#line 5606 "prebuilt\\asmparse.cpp" break; - case 290: /* methAttr: methAttr REQSECOBJ_ */ -#line 809 "asmparse.y" + case 305: /* methAttr: methAttr REQSECOBJ_ */ +#line 818 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].methAttr) | mdRequireSecObject); } -#line 5169 "asmparse.cpp" +#line 5612 "prebuilt\\asmparse.cpp" break; - case 291: /* methAttr: methAttr FLAGS_ '(' int32 ')' */ -#line 810 "asmparse.y" + case 306: /* methAttr: methAttr FLAGS_ '(' int32 ')' */ +#line 819 "asmparse.y" { (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-1].int32)); } -#line 5175 "asmparse.cpp" +#line 5618 "prebuilt\\asmparse.cpp" break; - case 292: /* methAttr: methAttr PINVOKEIMPL_ '(' compQstring AS_ compQstring pinvAttr ')' */ -#line 812 "asmparse.y" + case 307: /* methAttr: methAttr PINVOKEIMPL_ '(' compQstring AS_ compQstring pinvAttr ')' */ +#line 821 "asmparse.y" { PASM->SetPinvoke((yyvsp[-4].binstr),0,(yyvsp[-2].binstr),(yyvsp[-1].pinvAttr)); (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-7].methAttr) | mdPinvokeImpl); } -#line 5182 "asmparse.cpp" +#line 5625 "prebuilt\\asmparse.cpp" break; - case 293: /* methAttr: methAttr PINVOKEIMPL_ '(' compQstring pinvAttr ')' */ -#line 815 "asmparse.y" + case 308: /* methAttr: methAttr PINVOKEIMPL_ '(' compQstring pinvAttr ')' */ +#line 824 "asmparse.y" { PASM->SetPinvoke((yyvsp[-2].binstr),0,NULL,(yyvsp[-1].pinvAttr)); (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-5].methAttr) | mdPinvokeImpl); } -#line 5189 "asmparse.cpp" +#line 5632 "prebuilt\\asmparse.cpp" break; - case 294: /* methAttr: methAttr PINVOKEIMPL_ '(' pinvAttr ')' */ -#line 818 "asmparse.y" + case 309: /* methAttr: methAttr PINVOKEIMPL_ '(' pinvAttr ')' */ +#line 827 "asmparse.y" { PASM->SetPinvoke(new BinStr(),0,NULL,(yyvsp[-1].pinvAttr)); (yyval.methAttr) = (CorMethodAttr) ((yyvsp[-4].methAttr) | mdPinvokeImpl); } -#line 5196 "asmparse.cpp" +#line 5639 "prebuilt\\asmparse.cpp" break; - case 295: /* pinvAttr: %empty */ -#line 822 "asmparse.y" + case 310: /* pinvAttr: %empty */ +#line 831 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) 0; } -#line 5202 "asmparse.cpp" +#line 5645 "prebuilt\\asmparse.cpp" break; - case 296: /* pinvAttr: pinvAttr NOMANGLE_ */ -#line 823 "asmparse.y" + case 311: /* pinvAttr: pinvAttr NOMANGLE_ */ +#line 832 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmNoMangle); } -#line 5208 "asmparse.cpp" +#line 5651 "prebuilt\\asmparse.cpp" break; - case 297: /* pinvAttr: pinvAttr ANSI_ */ -#line 824 "asmparse.y" + case 312: /* pinvAttr: pinvAttr ANSI_ */ +#line 833 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmCharSetAnsi); } -#line 5214 "asmparse.cpp" +#line 5657 "prebuilt\\asmparse.cpp" break; - case 298: /* pinvAttr: pinvAttr UNICODE_ */ -#line 825 "asmparse.y" + case 313: /* pinvAttr: pinvAttr UNICODE_ */ +#line 834 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmCharSetUnicode); } -#line 5220 "asmparse.cpp" +#line 5663 "prebuilt\\asmparse.cpp" break; - case 299: /* pinvAttr: pinvAttr AUTOCHAR_ */ -#line 826 "asmparse.y" + case 314: /* pinvAttr: pinvAttr AUTOCHAR_ */ +#line 835 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmCharSetAuto); } -#line 5226 "asmparse.cpp" +#line 5669 "prebuilt\\asmparse.cpp" break; - case 300: /* pinvAttr: pinvAttr LASTERR_ */ -#line 827 "asmparse.y" + case 315: /* pinvAttr: pinvAttr LASTERR_ */ +#line 836 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmSupportsLastError); } -#line 5232 "asmparse.cpp" +#line 5675 "prebuilt\\asmparse.cpp" break; - case 301: /* pinvAttr: pinvAttr WINAPI_ */ -#line 828 "asmparse.y" + case 316: /* pinvAttr: pinvAttr WINAPI_ */ +#line 837 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmCallConvWinapi); } -#line 5238 "asmparse.cpp" +#line 5681 "prebuilt\\asmparse.cpp" break; - case 302: /* pinvAttr: pinvAttr CDECL_ */ -#line 829 "asmparse.y" + case 317: /* pinvAttr: pinvAttr CDECL_ */ +#line 838 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmCallConvCdecl); } -#line 5244 "asmparse.cpp" +#line 5687 "prebuilt\\asmparse.cpp" break; - case 303: /* pinvAttr: pinvAttr STDCALL_ */ -#line 830 "asmparse.y" + case 318: /* pinvAttr: pinvAttr STDCALL_ */ +#line 839 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmCallConvStdcall); } -#line 5250 "asmparse.cpp" +#line 5693 "prebuilt\\asmparse.cpp" break; - case 304: /* pinvAttr: pinvAttr THISCALL_ */ -#line 831 "asmparse.y" + case 319: /* pinvAttr: pinvAttr THISCALL_ */ +#line 840 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmCallConvThiscall); } -#line 5256 "asmparse.cpp" +#line 5699 "prebuilt\\asmparse.cpp" break; - case 305: /* pinvAttr: pinvAttr FASTCALL_ */ -#line 832 "asmparse.y" + case 320: /* pinvAttr: pinvAttr FASTCALL_ */ +#line 841 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].pinvAttr) | pmCallConvFastcall); } -#line 5262 "asmparse.cpp" +#line 5705 "prebuilt\\asmparse.cpp" break; - case 306: /* pinvAttr: pinvAttr BESTFIT_ ':' ON_ */ -#line 833 "asmparse.y" + case 321: /* pinvAttr: pinvAttr BESTFIT_ ':' ON_ */ +#line 842 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-3].pinvAttr) | pmBestFitEnabled); } -#line 5268 "asmparse.cpp" +#line 5711 "prebuilt\\asmparse.cpp" break; - case 307: /* pinvAttr: pinvAttr BESTFIT_ ':' OFF_ */ -#line 834 "asmparse.y" + case 322: /* pinvAttr: pinvAttr BESTFIT_ ':' OFF_ */ +#line 843 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-3].pinvAttr) | pmBestFitDisabled); } -#line 5274 "asmparse.cpp" +#line 5717 "prebuilt\\asmparse.cpp" break; - case 308: /* pinvAttr: pinvAttr CHARMAPERROR_ ':' ON_ */ -#line 835 "asmparse.y" + case 323: /* pinvAttr: pinvAttr CHARMAPERROR_ ':' ON_ */ +#line 844 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-3].pinvAttr) | pmThrowOnUnmappableCharEnabled); } -#line 5280 "asmparse.cpp" +#line 5723 "prebuilt\\asmparse.cpp" break; - case 309: /* pinvAttr: pinvAttr CHARMAPERROR_ ':' OFF_ */ -#line 836 "asmparse.y" + case 324: /* pinvAttr: pinvAttr CHARMAPERROR_ ':' OFF_ */ +#line 845 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-3].pinvAttr) | pmThrowOnUnmappableCharDisabled); } -#line 5286 "asmparse.cpp" +#line 5729 "prebuilt\\asmparse.cpp" break; - case 310: /* pinvAttr: pinvAttr FLAGS_ '(' int32 ')' */ -#line 837 "asmparse.y" + case 325: /* pinvAttr: pinvAttr FLAGS_ '(' int32 ')' */ +#line 846 "asmparse.y" { (yyval.pinvAttr) = (CorPinvokeMap) ((yyvsp[-1].int32)); } -#line 5292 "asmparse.cpp" +#line 5735 "prebuilt\\asmparse.cpp" break; - case 311: /* methodName: _CTOR */ -#line 840 "asmparse.y" + case 326: /* methodName: _CTOR */ +#line 849 "asmparse.y" { (yyval.string) = newString(COR_CTOR_METHOD_NAME); } -#line 5298 "asmparse.cpp" +#line 5741 "prebuilt\\asmparse.cpp" break; - case 312: /* methodName: _CCTOR */ -#line 841 "asmparse.y" + case 327: /* methodName: _CCTOR */ +#line 850 "asmparse.y" { (yyval.string) = newString(COR_CCTOR_METHOD_NAME); } -#line 5304 "asmparse.cpp" +#line 5747 "prebuilt\\asmparse.cpp" break; - case 313: /* methodName: dottedName */ -#line 842 "asmparse.y" + case 328: /* methodName: dottedName */ +#line 851 "asmparse.y" { (yyval.string) = (yyvsp[0].string); } -#line 5310 "asmparse.cpp" +#line 5753 "prebuilt\\asmparse.cpp" break; - case 314: /* paramAttr: %empty */ -#line 845 "asmparse.y" + case 329: /* paramAttr: %empty */ +#line 854 "asmparse.y" { (yyval.int32) = 0; } -#line 5316 "asmparse.cpp" +#line 5759 "prebuilt\\asmparse.cpp" break; - case 315: /* paramAttr: paramAttr '[' IN_ ']' */ -#line 846 "asmparse.y" + case 330: /* paramAttr: paramAttr '[' IN_ ']' */ +#line 855 "asmparse.y" { (yyval.int32) = (yyvsp[-3].int32) | pdIn; } -#line 5322 "asmparse.cpp" +#line 5765 "prebuilt\\asmparse.cpp" break; - case 316: /* paramAttr: paramAttr '[' OUT_ ']' */ -#line 847 "asmparse.y" + case 331: /* paramAttr: paramAttr '[' OUT_ ']' */ +#line 856 "asmparse.y" { (yyval.int32) = (yyvsp[-3].int32) | pdOut; } -#line 5328 "asmparse.cpp" +#line 5771 "prebuilt\\asmparse.cpp" break; - case 317: /* paramAttr: paramAttr '[' OPT_ ']' */ -#line 848 "asmparse.y" + case 332: /* paramAttr: paramAttr '[' OPT_ ']' */ +#line 857 "asmparse.y" { (yyval.int32) = (yyvsp[-3].int32) | pdOptional; } -#line 5334 "asmparse.cpp" +#line 5777 "prebuilt\\asmparse.cpp" break; - case 318: /* paramAttr: paramAttr '[' int32 ']' */ -#line 849 "asmparse.y" + case 333: /* paramAttr: paramAttr '[' int32 ']' */ +#line 858 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) + 1; } -#line 5340 "asmparse.cpp" +#line 5783 "prebuilt\\asmparse.cpp" break; - case 319: /* implAttr: %empty */ -#line 852 "asmparse.y" + case 334: /* implAttr: %empty */ +#line 861 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) (miIL | miManaged); } -#line 5346 "asmparse.cpp" +#line 5789 "prebuilt\\asmparse.cpp" break; - case 320: /* implAttr: implAttr NATIVE_ */ -#line 853 "asmparse.y" + case 335: /* implAttr: implAttr NATIVE_ */ +#line 862 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) (((yyvsp[-1].implAttr) & 0xFFF4) | miNative); } -#line 5352 "asmparse.cpp" +#line 5795 "prebuilt\\asmparse.cpp" break; - case 321: /* implAttr: implAttr CIL_ */ -#line 854 "asmparse.y" + case 336: /* implAttr: implAttr CIL_ */ +#line 863 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) (((yyvsp[-1].implAttr) & 0xFFF4) | miIL); } -#line 5358 "asmparse.cpp" +#line 5801 "prebuilt\\asmparse.cpp" break; - case 322: /* implAttr: implAttr OPTIL_ */ -#line 855 "asmparse.y" + case 337: /* implAttr: implAttr OPTIL_ */ +#line 864 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) (((yyvsp[-1].implAttr) & 0xFFF4) | miOPTIL); } -#line 5364 "asmparse.cpp" +#line 5807 "prebuilt\\asmparse.cpp" break; - case 323: /* implAttr: implAttr MANAGED_ */ -#line 856 "asmparse.y" + case 338: /* implAttr: implAttr MANAGED_ */ +#line 865 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) (((yyvsp[-1].implAttr) & 0xFFFB) | miManaged); } -#line 5370 "asmparse.cpp" +#line 5813 "prebuilt\\asmparse.cpp" break; - case 324: /* implAttr: implAttr UNMANAGED_ */ -#line 857 "asmparse.y" + case 339: /* implAttr: implAttr UNMANAGED_ */ +#line 866 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) (((yyvsp[-1].implAttr) & 0xFFFB) | miUnmanaged); } -#line 5376 "asmparse.cpp" +#line 5819 "prebuilt\\asmparse.cpp" break; - case 325: /* implAttr: implAttr FORWARDREF_ */ -#line 858 "asmparse.y" + case 340: /* implAttr: implAttr FORWARDREF_ */ +#line 867 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miForwardRef); } -#line 5382 "asmparse.cpp" +#line 5825 "prebuilt\\asmparse.cpp" break; - case 326: /* implAttr: implAttr PRESERVESIG_ */ -#line 859 "asmparse.y" + case 341: /* implAttr: implAttr PRESERVESIG_ */ +#line 868 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miPreserveSig); } -#line 5388 "asmparse.cpp" +#line 5831 "prebuilt\\asmparse.cpp" break; - case 327: /* implAttr: implAttr RUNTIME_ */ -#line 860 "asmparse.y" + case 342: /* implAttr: implAttr RUNTIME_ */ +#line 869 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miRuntime); } -#line 5394 "asmparse.cpp" +#line 5837 "prebuilt\\asmparse.cpp" break; - case 328: /* implAttr: implAttr INTERNALCALL_ */ -#line 861 "asmparse.y" + case 343: /* implAttr: implAttr INTERNALCALL_ */ +#line 870 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miInternalCall); } -#line 5400 "asmparse.cpp" +#line 5843 "prebuilt\\asmparse.cpp" break; - case 329: /* implAttr: implAttr SYNCHRONIZED_ */ -#line 862 "asmparse.y" + case 344: /* implAttr: implAttr SYNCHRONIZED_ */ +#line 871 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miSynchronized); } -#line 5406 "asmparse.cpp" +#line 5849 "prebuilt\\asmparse.cpp" break; - case 330: /* implAttr: implAttr NOINLINING_ */ -#line 863 "asmparse.y" + case 345: /* implAttr: implAttr NOINLINING_ */ +#line 872 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miNoInlining); } -#line 5412 "asmparse.cpp" +#line 5855 "prebuilt\\asmparse.cpp" break; - case 331: /* implAttr: implAttr AGGRESSIVEINLINING_ */ -#line 864 "asmparse.y" + case 346: /* implAttr: implAttr AGGRESSIVEINLINING_ */ +#line 873 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miAggressiveInlining); } -#line 5418 "asmparse.cpp" +#line 5861 "prebuilt\\asmparse.cpp" break; - case 332: /* implAttr: implAttr NOOPTIMIZATION_ */ -#line 865 "asmparse.y" + case 347: /* implAttr: implAttr NOOPTIMIZATION_ */ +#line 874 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miNoOptimization); } -#line 5424 "asmparse.cpp" +#line 5867 "prebuilt\\asmparse.cpp" break; - case 333: /* implAttr: implAttr AGGRESSIVEOPTIMIZATION_ */ -#line 866 "asmparse.y" + case 348: /* implAttr: implAttr AGGRESSIVEOPTIMIZATION_ */ +#line 875 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miAggressiveOptimization); } -#line 5430 "asmparse.cpp" +#line 5873 "prebuilt\\asmparse.cpp" break; - case 334: /* implAttr: implAttr FLAGS_ '(' int32 ')' */ -#line 867 "asmparse.y" + case 349: /* implAttr: implAttr ASYNC_ */ +#line 876 "asmparse.y" + { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].implAttr) | miAsync); } +#line 5879 "prebuilt\\asmparse.cpp" + break; + + case 350: /* implAttr: implAttr FLAGS_ '(' int32 ')' */ +#line 877 "asmparse.y" { (yyval.implAttr) = (CorMethodImpl) ((yyvsp[-1].int32)); } -#line 5436 "asmparse.cpp" +#line 5885 "prebuilt\\asmparse.cpp" break; - case 335: /* localsHead: _LOCALS */ -#line 870 "asmparse.y" + case 351: /* localsHead: _LOCALS */ +#line 880 "asmparse.y" { PASM->delArgNameList(PASM->m_firstArgName); PASM->m_firstArgName = NULL;PASM->m_lastArgName = NULL; } -#line 5443 "asmparse.cpp" +#line 5892 "prebuilt\\asmparse.cpp" break; - case 338: /* methodDecl: _EMITBYTE int32 */ -#line 878 "asmparse.y" + case 354: /* methodDecl: _EMITBYTE int32 */ +#line 888 "asmparse.y" { PASM->EmitByte((yyvsp[0].int32)); } -#line 5449 "asmparse.cpp" +#line 5898 "prebuilt\\asmparse.cpp" break; - case 339: /* methodDecl: sehBlock */ -#line 879 "asmparse.y" + case 355: /* methodDecl: sehBlock */ +#line 889 "asmparse.y" { delete PASM->m_SEHD; PASM->m_SEHD = PASM->m_SEHDstack.POP(); } -#line 5455 "asmparse.cpp" +#line 5904 "prebuilt\\asmparse.cpp" break; - case 340: /* methodDecl: _MAXSTACK int32 */ -#line 880 "asmparse.y" + case 356: /* methodDecl: _MAXSTACK int32 */ +#line 890 "asmparse.y" { PASM->EmitMaxStack((yyvsp[0].int32)); } -#line 5461 "asmparse.cpp" +#line 5910 "prebuilt\\asmparse.cpp" break; - case 341: /* methodDecl: localsHead '(' sigArgs0 ')' */ -#line 881 "asmparse.y" + case 357: /* methodDecl: localsHead '(' sigArgs0 ')' */ +#line 891 "asmparse.y" { PASM->EmitLocals(parser->MakeSig(IMAGE_CEE_CS_CALLCONV_LOCAL_SIG, 0, (yyvsp[-1].binstr))); } -#line 5468 "asmparse.cpp" +#line 5917 "prebuilt\\asmparse.cpp" break; - case 342: /* methodDecl: localsHead INIT_ '(' sigArgs0 ')' */ -#line 883 "asmparse.y" + case 358: /* methodDecl: localsHead INIT_ '(' sigArgs0 ')' */ +#line 893 "asmparse.y" { PASM->EmitZeroInit(); PASM->EmitLocals(parser->MakeSig(IMAGE_CEE_CS_CALLCONV_LOCAL_SIG, 0, (yyvsp[-1].binstr))); } -#line 5476 "asmparse.cpp" +#line 5925 "prebuilt\\asmparse.cpp" break; - case 343: /* methodDecl: _ENTRYPOINT */ -#line 886 "asmparse.y" + case 359: /* methodDecl: _ENTRYPOINT */ +#line 896 "asmparse.y" { PASM->EmitEntryPoint(); } -#line 5482 "asmparse.cpp" +#line 5931 "prebuilt\\asmparse.cpp" break; - case 344: /* methodDecl: _ZEROINIT */ -#line 887 "asmparse.y" + case 360: /* methodDecl: _ZEROINIT */ +#line 897 "asmparse.y" { PASM->EmitZeroInit(); } -#line 5488 "asmparse.cpp" +#line 5937 "prebuilt\\asmparse.cpp" break; - case 347: /* methodDecl: id ':' */ -#line 890 "asmparse.y" + case 363: /* methodDecl: id ':' */ +#line 900 "asmparse.y" { PASM->AddLabel(PASM->m_CurPC,(yyvsp[-1].string)); /*PASM->EmitLabel($1);*/ } -#line 5494 "asmparse.cpp" +#line 5943 "prebuilt\\asmparse.cpp" break; - case 353: /* methodDecl: _EXPORT '[' int32 ']' */ -#line 896 "asmparse.y" + case 369: /* methodDecl: _EXPORT '[' int32 ']' */ +#line 906 "asmparse.y" { if(PASM->m_pCurMethod->m_dwExportOrdinal == 0xFFFFFFFF) { PASM->m_pCurMethod->m_dwExportOrdinal = (yyvsp[-1].int32); @@ -5505,11 +5954,11 @@ yyparse (void) else PASM->report->warn("Duplicate .export directive, ignored\n"); } -#line 5509 "asmparse.cpp" +#line 5958 "prebuilt\\asmparse.cpp" break; - case 354: /* methodDecl: _EXPORT '[' int32 ']' AS_ id */ -#line 906 "asmparse.y" + case 370: /* methodDecl: _EXPORT '[' int32 ']' AS_ id */ +#line 916 "asmparse.y" { if(PASM->m_pCurMethod->m_dwExportOrdinal == 0xFFFFFFFF) { PASM->m_pCurMethod->m_dwExportOrdinal = (yyvsp[-3].int32); @@ -5520,68 +5969,68 @@ yyparse (void) else PASM->report->warn("Duplicate .export directive, ignored\n"); } -#line 5524 "asmparse.cpp" +#line 5973 "prebuilt\\asmparse.cpp" break; - case 355: /* methodDecl: _VTENTRY int32 ':' int32 */ -#line 916 "asmparse.y" + case 371: /* methodDecl: _VTENTRY int32 ':' int32 */ +#line 926 "asmparse.y" { PASM->m_pCurMethod->m_wVTEntry = (WORD)(yyvsp[-2].int32); PASM->m_pCurMethod->m_wVTSlot = (WORD)(yyvsp[0].int32); } -#line 5531 "asmparse.cpp" +#line 5980 "prebuilt\\asmparse.cpp" break; - case 356: /* methodDecl: _OVERRIDE typeSpec DCOLON methodName */ -#line 919 "asmparse.y" + case 372: /* methodDecl: _OVERRIDE typeSpec DCOLON methodName */ +#line 929 "asmparse.y" { PASM->AddMethodImpl((yyvsp[-2].token),(yyvsp[0].string),NULL,mdTokenNil,NULL,NULL); } -#line 5537 "asmparse.cpp" +#line 5986 "prebuilt\\asmparse.cpp" break; - case 357: /* methodDecl: _OVERRIDE METHOD_ callConv type typeSpec DCOLON methodName genArity '(' sigArgs0 ')' */ -#line 922 "asmparse.y" + case 373: /* methodDecl: _OVERRIDE METHOD_ callConv type typeSpec DCOLON methodName genArity '(' sigArgs0 ')' */ +#line 932 "asmparse.y" { PASM->AddMethodImpl((yyvsp[-6].token),(yyvsp[-4].string), ((yyvsp[-3].int32)==0 ? parser->MakeSig((yyvsp[-8].int32),(yyvsp[-7].binstr),(yyvsp[-1].binstr)) : parser->MakeSig((yyvsp[-8].int32)| IMAGE_CEE_CS_CALLCONV_GENERIC,(yyvsp[-7].binstr),(yyvsp[-1].binstr),(yyvsp[-3].int32))) ,mdTokenNil,NULL,NULL); PASM->ResetArgNameList(); } -#line 5548 "asmparse.cpp" +#line 5997 "prebuilt\\asmparse.cpp" break; - case 359: /* methodDecl: _PARAM TYPE_ '[' int32 ']' */ -#line 929 "asmparse.y" + case 375: /* methodDecl: _PARAM TYPE_ '[' int32 ']' */ +#line 939 "asmparse.y" { if(((yyvsp[-1].int32) > 0) && ((yyvsp[-1].int32) <= (int)PASM->m_pCurMethod->m_NumTyPars)) PASM->m_pCustomDescrList = PASM->m_pCurMethod->m_TyPars[(yyvsp[-1].int32)-1].CAList(); else PASM->report->error("Type parameter index out of range\n"); } -#line 5558 "asmparse.cpp" +#line 6007 "prebuilt\\asmparse.cpp" break; - case 360: /* methodDecl: _PARAM TYPE_ dottedName */ -#line 934 "asmparse.y" + case 376: /* methodDecl: _PARAM TYPE_ dottedName */ +#line 944 "asmparse.y" { int n = PASM->m_pCurMethod->FindTyPar((yyvsp[0].string)); if(n >= 0) PASM->m_pCustomDescrList = PASM->m_pCurMethod->m_TyPars[n].CAList(); else PASM->report->error("Type parameter '%s' undefined\n",(yyvsp[0].string)); } -#line 5569 "asmparse.cpp" +#line 6018 "prebuilt\\asmparse.cpp" break; - case 361: /* methodDecl: _PARAM CONSTRAINT_ '[' int32 ']' ',' typeSpec */ -#line 940 "asmparse.y" + case 377: /* methodDecl: _PARAM CONSTRAINT_ '[' int32 ']' ',' typeSpec */ +#line 950 "asmparse.y" { PASM->m_pCurMethod->AddGenericParamConstraint((yyvsp[-3].int32), 0, (yyvsp[0].token)); } -#line 5575 "asmparse.cpp" +#line 6024 "prebuilt\\asmparse.cpp" break; - case 362: /* methodDecl: _PARAM CONSTRAINT_ dottedName ',' typeSpec */ -#line 941 "asmparse.y" + case 378: /* methodDecl: _PARAM CONSTRAINT_ dottedName ',' typeSpec */ +#line 951 "asmparse.y" { PASM->m_pCurMethod->AddGenericParamConstraint(0, (yyvsp[-2].string), (yyvsp[0].token)); } -#line 5581 "asmparse.cpp" +#line 6030 "prebuilt\\asmparse.cpp" break; - case 363: /* methodDecl: _PARAM '[' int32 ']' initOpt */ -#line 944 "asmparse.y" + case 379: /* methodDecl: _PARAM '[' int32 ']' initOpt */ +#line 954 "asmparse.y" { if( (yyvsp[-2].int32) ) { ARG_NAME_LIST* pAN=PASM->findArg(PASM->m_pCurMethod->m_firstArgName, (yyvsp[-2].int32) - 1); if(pAN) @@ -5600,241 +6049,241 @@ yyparse (void) } PASM->m_tkCurrentCVOwner = 0; } -#line 5604 "asmparse.cpp" +#line 6053 "prebuilt\\asmparse.cpp" break; - case 364: /* scopeBlock: scopeOpen methodDecls '}' */ -#line 964 "asmparse.y" + case 380: /* scopeBlock: scopeOpen methodDecls '}' */ +#line 974 "asmparse.y" { PASM->m_pCurMethod->CloseScope(); } -#line 5610 "asmparse.cpp" +#line 6059 "prebuilt\\asmparse.cpp" break; - case 365: /* scopeOpen: '{' */ -#line 967 "asmparse.y" + case 381: /* scopeOpen: '{' */ +#line 977 "asmparse.y" { PASM->m_pCurMethod->OpenScope(); } -#line 5616 "asmparse.cpp" +#line 6065 "prebuilt\\asmparse.cpp" break; - case 369: /* tryBlock: tryHead scopeBlock */ -#line 978 "asmparse.y" + case 385: /* tryBlock: tryHead scopeBlock */ +#line 988 "asmparse.y" { PASM->m_SEHD->tryTo = PASM->m_CurPC; } -#line 5622 "asmparse.cpp" +#line 6071 "prebuilt\\asmparse.cpp" break; - case 370: /* tryBlock: tryHead id TO_ id */ -#line 979 "asmparse.y" + case 386: /* tryBlock: tryHead id TO_ id */ +#line 989 "asmparse.y" { PASM->SetTryLabels((yyvsp[-2].string), (yyvsp[0].string)); } -#line 5628 "asmparse.cpp" +#line 6077 "prebuilt\\asmparse.cpp" break; - case 371: /* tryBlock: tryHead int32 TO_ int32 */ -#line 980 "asmparse.y" + case 387: /* tryBlock: tryHead int32 TO_ int32 */ +#line 990 "asmparse.y" { if(PASM->m_SEHD) {PASM->m_SEHD->tryFrom = (yyvsp[-2].int32); PASM->m_SEHD->tryTo = (yyvsp[0].int32);} } -#line 5635 "asmparse.cpp" +#line 6084 "prebuilt\\asmparse.cpp" break; - case 372: /* tryHead: _TRY */ -#line 984 "asmparse.y" + case 388: /* tryHead: _TRY */ +#line 994 "asmparse.y" { PASM->NewSEHDescriptor(); PASM->m_SEHD->tryFrom = PASM->m_CurPC; } -#line 5642 "asmparse.cpp" +#line 6091 "prebuilt\\asmparse.cpp" break; - case 373: /* sehClause: catchClause handlerBlock */ -#line 989 "asmparse.y" + case 389: /* sehClause: catchClause handlerBlock */ +#line 999 "asmparse.y" { PASM->EmitTry(); } -#line 5648 "asmparse.cpp" +#line 6097 "prebuilt\\asmparse.cpp" break; - case 374: /* sehClause: filterClause handlerBlock */ -#line 990 "asmparse.y" + case 390: /* sehClause: filterClause handlerBlock */ +#line 1000 "asmparse.y" { PASM->EmitTry(); } -#line 5654 "asmparse.cpp" +#line 6103 "prebuilt\\asmparse.cpp" break; - case 375: /* sehClause: finallyClause handlerBlock */ -#line 991 "asmparse.y" + case 391: /* sehClause: finallyClause handlerBlock */ +#line 1001 "asmparse.y" { PASM->EmitTry(); } -#line 5660 "asmparse.cpp" +#line 6109 "prebuilt\\asmparse.cpp" break; - case 376: /* sehClause: faultClause handlerBlock */ -#line 992 "asmparse.y" + case 392: /* sehClause: faultClause handlerBlock */ +#line 1002 "asmparse.y" { PASM->EmitTry(); } -#line 5666 "asmparse.cpp" +#line 6115 "prebuilt\\asmparse.cpp" break; - case 377: /* filterClause: filterHead scopeBlock */ -#line 996 "asmparse.y" + case 393: /* filterClause: filterHead scopeBlock */ +#line 1006 "asmparse.y" { PASM->m_SEHD->sehHandler = PASM->m_CurPC; } -#line 5672 "asmparse.cpp" +#line 6121 "prebuilt\\asmparse.cpp" break; - case 378: /* filterClause: filterHead id */ -#line 997 "asmparse.y" + case 394: /* filterClause: filterHead id */ +#line 1007 "asmparse.y" { PASM->SetFilterLabel((yyvsp[0].string)); PASM->m_SEHD->sehHandler = PASM->m_CurPC; } -#line 5679 "asmparse.cpp" +#line 6128 "prebuilt\\asmparse.cpp" break; - case 379: /* filterClause: filterHead int32 */ -#line 999 "asmparse.y" + case 395: /* filterClause: filterHead int32 */ +#line 1009 "asmparse.y" { PASM->m_SEHD->sehFilter = (yyvsp[0].int32); PASM->m_SEHD->sehHandler = PASM->m_CurPC; } -#line 5686 "asmparse.cpp" +#line 6135 "prebuilt\\asmparse.cpp" break; - case 380: /* filterHead: FILTER_ */ -#line 1003 "asmparse.y" + case 396: /* filterHead: FILTER_ */ +#line 1013 "asmparse.y" { PASM->m_SEHD->sehClause = COR_ILEXCEPTION_CLAUSE_FILTER; PASM->m_SEHD->sehFilter = PASM->m_CurPC; } -#line 5693 "asmparse.cpp" +#line 6142 "prebuilt\\asmparse.cpp" break; - case 381: /* catchClause: CATCH_ typeSpec */ -#line 1007 "asmparse.y" + case 397: /* catchClause: CATCH_ typeSpec */ +#line 1017 "asmparse.y" { PASM->m_SEHD->sehClause = COR_ILEXCEPTION_CLAUSE_NONE; PASM->SetCatchClass((yyvsp[0].token)); PASM->m_SEHD->sehHandler = PASM->m_CurPC; } -#line 5701 "asmparse.cpp" +#line 6150 "prebuilt\\asmparse.cpp" break; - case 382: /* finallyClause: FINALLY_ */ -#line 1012 "asmparse.y" + case 398: /* finallyClause: FINALLY_ */ +#line 1022 "asmparse.y" { PASM->m_SEHD->sehClause = COR_ILEXCEPTION_CLAUSE_FINALLY; PASM->m_SEHD->sehHandler = PASM->m_CurPC; } -#line 5708 "asmparse.cpp" +#line 6157 "prebuilt\\asmparse.cpp" break; - case 383: /* faultClause: FAULT_ */ -#line 1016 "asmparse.y" + case 399: /* faultClause: FAULT_ */ +#line 1026 "asmparse.y" { PASM->m_SEHD->sehClause = COR_ILEXCEPTION_CLAUSE_FAULT; PASM->m_SEHD->sehHandler = PASM->m_CurPC; } -#line 5715 "asmparse.cpp" +#line 6164 "prebuilt\\asmparse.cpp" break; - case 384: /* handlerBlock: scopeBlock */ -#line 1020 "asmparse.y" + case 400: /* handlerBlock: scopeBlock */ +#line 1030 "asmparse.y" { PASM->m_SEHD->sehHandlerTo = PASM->m_CurPC; } -#line 5721 "asmparse.cpp" +#line 6170 "prebuilt\\asmparse.cpp" break; - case 385: /* handlerBlock: HANDLER_ id TO_ id */ -#line 1021 "asmparse.y" + case 401: /* handlerBlock: HANDLER_ id TO_ id */ +#line 1031 "asmparse.y" { PASM->SetHandlerLabels((yyvsp[-2].string), (yyvsp[0].string)); } -#line 5727 "asmparse.cpp" +#line 6176 "prebuilt\\asmparse.cpp" break; - case 386: /* handlerBlock: HANDLER_ int32 TO_ int32 */ -#line 1022 "asmparse.y" + case 402: /* handlerBlock: HANDLER_ int32 TO_ int32 */ +#line 1032 "asmparse.y" { PASM->m_SEHD->sehHandler = (yyvsp[-2].int32); PASM->m_SEHD->sehHandlerTo = (yyvsp[0].int32); } -#line 5734 "asmparse.cpp" +#line 6183 "prebuilt\\asmparse.cpp" break; - case 388: /* ddHead: _DATA tls id '=' */ -#line 1030 "asmparse.y" + case 404: /* ddHead: _DATA tls id '=' */ +#line 1040 "asmparse.y" { PASM->EmitDataLabel((yyvsp[-1].string)); } -#line 5740 "asmparse.cpp" +#line 6189 "prebuilt\\asmparse.cpp" break; - case 390: /* tls: %empty */ -#line 1034 "asmparse.y" + case 406: /* tls: %empty */ +#line 1044 "asmparse.y" { PASM->SetDataSection(); } -#line 5746 "asmparse.cpp" +#line 6195 "prebuilt\\asmparse.cpp" break; - case 391: /* tls: TLS_ */ -#line 1035 "asmparse.y" + case 407: /* tls: TLS_ */ +#line 1045 "asmparse.y" { PASM->SetTLSSection(); } -#line 5752 "asmparse.cpp" +#line 6201 "prebuilt\\asmparse.cpp" break; - case 392: /* tls: CIL_ */ -#line 1036 "asmparse.y" + case 408: /* tls: CIL_ */ +#line 1046 "asmparse.y" { PASM->SetILSection(); } -#line 5758 "asmparse.cpp" +#line 6207 "prebuilt\\asmparse.cpp" break; - case 397: /* ddItemCount: %empty */ -#line 1047 "asmparse.y" + case 413: /* ddItemCount: %empty */ +#line 1057 "asmparse.y" { (yyval.int32) = 1; } -#line 5764 "asmparse.cpp" +#line 6213 "prebuilt\\asmparse.cpp" break; - case 398: /* ddItemCount: '[' int32 ']' */ -#line 1048 "asmparse.y" + case 414: /* ddItemCount: '[' int32 ']' */ +#line 1058 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32); if((yyvsp[-1].int32) <= 0) { PASM->report->error("Illegal item count: %d\n",(yyvsp[-1].int32)); if(!PASM->OnErrGo) (yyval.int32) = 1; }} -#line 5772 "asmparse.cpp" +#line 6221 "prebuilt\\asmparse.cpp" break; - case 399: /* ddItem: CHAR_ '*' '(' compQstring ')' */ -#line 1053 "asmparse.y" + case 415: /* ddItem: CHAR_ '*' '(' compQstring ')' */ +#line 1063 "asmparse.y" { PASM->EmitDataString((yyvsp[-1].binstr)); } -#line 5778 "asmparse.cpp" +#line 6227 "prebuilt\\asmparse.cpp" break; - case 400: /* ddItem: '&' '(' id ')' */ -#line 1054 "asmparse.y" + case 416: /* ddItem: '&' '(' id ')' */ +#line 1064 "asmparse.y" { PASM->EmitDD((yyvsp[-1].string)); } -#line 5784 "asmparse.cpp" +#line 6233 "prebuilt\\asmparse.cpp" break; - case 401: /* ddItem: bytearrayhead bytes ')' */ -#line 1055 "asmparse.y" + case 417: /* ddItem: bytearrayhead bytes ')' */ +#line 1065 "asmparse.y" { PASM->EmitData((yyvsp[-1].binstr)->ptr(),(yyvsp[-1].binstr)->length()); } -#line 5790 "asmparse.cpp" +#line 6239 "prebuilt\\asmparse.cpp" break; - case 402: /* ddItem: FLOAT32_ '(' float64 ')' ddItemCount */ -#line 1057 "asmparse.y" + case 418: /* ddItem: FLOAT32_ '(' float64 ')' ddItemCount */ +#line 1067 "asmparse.y" { float f = (float) (*(yyvsp[-2].float64)); float* p = new (nothrow) float[(yyvsp[0].int32)]; if(p != NULL) { for(int i=0; i < (yyvsp[0].int32); i++) p[i] = f; PASM->EmitData(p, sizeof(float)*(yyvsp[0].int32)); delete (yyvsp[-2].float64); delete [] p; } else PASM->report->error("Out of memory emitting data block %d bytes\n", sizeof(float)*(yyvsp[0].int32)); } -#line 5801 "asmparse.cpp" +#line 6250 "prebuilt\\asmparse.cpp" break; - case 403: /* ddItem: FLOAT64_ '(' float64 ')' ddItemCount */ -#line 1064 "asmparse.y" + case 419: /* ddItem: FLOAT64_ '(' float64 ')' ddItemCount */ +#line 1074 "asmparse.y" { double* p = new (nothrow) double[(yyvsp[0].int32)]; if(p != NULL) { for(int i=0; i<(yyvsp[0].int32); i++) p[i] = *((yyvsp[-2].float64)); PASM->EmitData(p, sizeof(double)*(yyvsp[0].int32)); delete (yyvsp[-2].float64); delete [] p; } else PASM->report->error("Out of memory emitting data block %d bytes\n", sizeof(double)*(yyvsp[0].int32)); } -#line 5812 "asmparse.cpp" +#line 6261 "prebuilt\\asmparse.cpp" break; - case 404: /* ddItem: INT64_ '(' int64 ')' ddItemCount */ -#line 1071 "asmparse.y" + case 420: /* ddItem: INT64_ '(' int64 ')' ddItemCount */ +#line 1081 "asmparse.y" { int64_t* p = new (nothrow) int64_t[(yyvsp[0].int32)]; if(p != NULL) { for(int i=0; i<(yyvsp[0].int32); i++) p[i] = *((yyvsp[-2].int64)); PASM->EmitData(p, sizeof(int64_t)*(yyvsp[0].int32)); delete (yyvsp[-2].int64); delete [] p; } else PASM->report->error("Out of memory emitting data block %d bytes\n", sizeof(int64_t)*(yyvsp[0].int32)); } -#line 5823 "asmparse.cpp" +#line 6272 "prebuilt\\asmparse.cpp" break; - case 405: /* ddItem: INT32_ '(' int32 ')' ddItemCount */ -#line 1078 "asmparse.y" + case 421: /* ddItem: INT32_ '(' int32 ')' ddItemCount */ +#line 1088 "asmparse.y" { int32_t* p = new (nothrow) int32_t[(yyvsp[0].int32)]; if(p != NULL) { for(int i=0; i<(yyvsp[0].int32); i++) p[i] = (yyvsp[-2].int32); PASM->EmitData(p, sizeof(int32_t)*(yyvsp[0].int32)); delete [] p; } else PASM->report->error("Out of memory emitting data block %d bytes\n", sizeof(int32_t)*(yyvsp[0].int32)); } -#line 5834 "asmparse.cpp" +#line 6283 "prebuilt\\asmparse.cpp" break; - case 406: /* ddItem: INT16_ '(' int32 ')' ddItemCount */ -#line 1085 "asmparse.y" + case 422: /* ddItem: INT16_ '(' int32 ')' ddItemCount */ +#line 1095 "asmparse.y" { int16_t i = (int16_t) (yyvsp[-2].int32); FAIL_UNLESS(i == (yyvsp[-2].int32), ("Value %d too big\n", (yyvsp[-2].int32))); int16_t* p = new (nothrow) int16_t[(yyvsp[0].int32)]; if(p != NULL) { @@ -5842,11 +6291,11 @@ yyparse (void) PASM->EmitData(p, sizeof(int16_t)*(yyvsp[0].int32)); delete [] p; } else PASM->report->error("Out of memory emitting data block %d bytes\n", sizeof(int16_t)*(yyvsp[0].int32)); } -#line 5846 "asmparse.cpp" +#line 6295 "prebuilt\\asmparse.cpp" break; - case 407: /* ddItem: INT8_ '(' int32 ')' ddItemCount */ -#line 1093 "asmparse.y" + case 423: /* ddItem: INT8_ '(' int32 ')' ddItemCount */ +#line 1103 "asmparse.y" { int8_t i = (int8_t) (yyvsp[-2].int32); FAIL_UNLESS(i == (yyvsp[-2].int32), ("Value %d too big\n", (yyvsp[-2].int32))); int8_t* p = new (nothrow) int8_t[(yyvsp[0].int32)]; if(p != NULL) { @@ -5854,726 +6303,726 @@ yyparse (void) PASM->EmitData(p, sizeof(int8_t)*(yyvsp[0].int32)); delete [] p; } else PASM->report->error("Out of memory emitting data block %d bytes\n", sizeof(int8_t)*(yyvsp[0].int32)); } -#line 5858 "asmparse.cpp" +#line 6307 "prebuilt\\asmparse.cpp" break; - case 408: /* ddItem: FLOAT32_ ddItemCount */ -#line 1100 "asmparse.y" + case 424: /* ddItem: FLOAT32_ ddItemCount */ +#line 1110 "asmparse.y" { PASM->EmitData(NULL, sizeof(float)*(yyvsp[0].int32)); } -#line 5864 "asmparse.cpp" +#line 6313 "prebuilt\\asmparse.cpp" break; - case 409: /* ddItem: FLOAT64_ ddItemCount */ -#line 1101 "asmparse.y" + case 425: /* ddItem: FLOAT64_ ddItemCount */ +#line 1111 "asmparse.y" { PASM->EmitData(NULL, sizeof(double)*(yyvsp[0].int32)); } -#line 5870 "asmparse.cpp" +#line 6319 "prebuilt\\asmparse.cpp" break; - case 410: /* ddItem: INT64_ ddItemCount */ -#line 1102 "asmparse.y" + case 426: /* ddItem: INT64_ ddItemCount */ +#line 1112 "asmparse.y" { PASM->EmitData(NULL, sizeof(int64_t)*(yyvsp[0].int32)); } -#line 5876 "asmparse.cpp" +#line 6325 "prebuilt\\asmparse.cpp" break; - case 411: /* ddItem: INT32_ ddItemCount */ -#line 1103 "asmparse.y" + case 427: /* ddItem: INT32_ ddItemCount */ +#line 1113 "asmparse.y" { PASM->EmitData(NULL, sizeof(int32_t)*(yyvsp[0].int32)); } -#line 5882 "asmparse.cpp" +#line 6331 "prebuilt\\asmparse.cpp" break; - case 412: /* ddItem: INT16_ ddItemCount */ -#line 1104 "asmparse.y" + case 428: /* ddItem: INT16_ ddItemCount */ +#line 1114 "asmparse.y" { PASM->EmitData(NULL, sizeof(int16_t)*(yyvsp[0].int32)); } -#line 5888 "asmparse.cpp" +#line 6337 "prebuilt\\asmparse.cpp" break; - case 413: /* ddItem: INT8_ ddItemCount */ -#line 1105 "asmparse.y" + case 429: /* ddItem: INT8_ ddItemCount */ +#line 1115 "asmparse.y" { PASM->EmitData(NULL, sizeof(int8_t)*(yyvsp[0].int32)); } -#line 5894 "asmparse.cpp" +#line 6343 "prebuilt\\asmparse.cpp" break; - case 414: /* fieldSerInit: FLOAT32_ '(' float64 ')' */ -#line 1109 "asmparse.y" + case 430: /* fieldSerInit: FLOAT32_ '(' float64 ')' */ +#line 1119 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_R4); float f = (float)(*(yyvsp[-1].float64)); (yyval.binstr)->appendInt32(*((int32_t*)&f)); delete (yyvsp[-1].float64); } -#line 5902 "asmparse.cpp" +#line 6351 "prebuilt\\asmparse.cpp" break; - case 415: /* fieldSerInit: FLOAT64_ '(' float64 ')' */ -#line 1112 "asmparse.y" + case 431: /* fieldSerInit: FLOAT64_ '(' float64 ')' */ +#line 1122 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_R8); (yyval.binstr)->appendInt64((int64_t *)(yyvsp[-1].float64)); delete (yyvsp[-1].float64); } -#line 5909 "asmparse.cpp" +#line 6358 "prebuilt\\asmparse.cpp" break; - case 416: /* fieldSerInit: FLOAT32_ '(' int32 ')' */ -#line 1114 "asmparse.y" + case 432: /* fieldSerInit: FLOAT32_ '(' int32 ')' */ +#line 1124 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_R4); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 5916 "asmparse.cpp" +#line 6365 "prebuilt\\asmparse.cpp" break; - case 417: /* fieldSerInit: FLOAT64_ '(' int64 ')' */ -#line 1116 "asmparse.y" + case 433: /* fieldSerInit: FLOAT64_ '(' int64 ')' */ +#line 1126 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_R8); (yyval.binstr)->appendInt64((int64_t *)(yyvsp[-1].int64)); delete (yyvsp[-1].int64); } -#line 5923 "asmparse.cpp" +#line 6372 "prebuilt\\asmparse.cpp" break; - case 418: /* fieldSerInit: INT64_ '(' int64 ')' */ -#line 1118 "asmparse.y" + case 434: /* fieldSerInit: INT64_ '(' int64 ')' */ +#line 1128 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I8); (yyval.binstr)->appendInt64((int64_t *)(yyvsp[-1].int64)); delete (yyvsp[-1].int64); } -#line 5930 "asmparse.cpp" +#line 6379 "prebuilt\\asmparse.cpp" break; - case 419: /* fieldSerInit: INT32_ '(' int32 ')' */ -#line 1120 "asmparse.y" + case 435: /* fieldSerInit: INT32_ '(' int32 ')' */ +#line 1130 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I4); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 5937 "asmparse.cpp" +#line 6386 "prebuilt\\asmparse.cpp" break; - case 420: /* fieldSerInit: INT16_ '(' int32 ')' */ -#line 1122 "asmparse.y" + case 436: /* fieldSerInit: INT16_ '(' int32 ')' */ +#line 1132 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I2); (yyval.binstr)->appendInt16((yyvsp[-1].int32)); } -#line 5944 "asmparse.cpp" +#line 6393 "prebuilt\\asmparse.cpp" break; - case 421: /* fieldSerInit: INT8_ '(' int32 ')' */ -#line 1124 "asmparse.y" + case 437: /* fieldSerInit: INT8_ '(' int32 ')' */ +#line 1134 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I1); (yyval.binstr)->appendInt8((yyvsp[-1].int32)); } -#line 5951 "asmparse.cpp" +#line 6400 "prebuilt\\asmparse.cpp" break; - case 422: /* fieldSerInit: UNSIGNED_ INT64_ '(' int64 ')' */ -#line 1126 "asmparse.y" + case 438: /* fieldSerInit: UNSIGNED_ INT64_ '(' int64 ')' */ +#line 1136 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U8); (yyval.binstr)->appendInt64((int64_t *)(yyvsp[-1].int64)); delete (yyvsp[-1].int64); } -#line 5958 "asmparse.cpp" +#line 6407 "prebuilt\\asmparse.cpp" break; - case 423: /* fieldSerInit: UNSIGNED_ INT32_ '(' int32 ')' */ -#line 1128 "asmparse.y" + case 439: /* fieldSerInit: UNSIGNED_ INT32_ '(' int32 ')' */ +#line 1138 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U4); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 5965 "asmparse.cpp" +#line 6414 "prebuilt\\asmparse.cpp" break; - case 424: /* fieldSerInit: UNSIGNED_ INT16_ '(' int32 ')' */ -#line 1130 "asmparse.y" + case 440: /* fieldSerInit: UNSIGNED_ INT16_ '(' int32 ')' */ +#line 1140 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U2); (yyval.binstr)->appendInt16((yyvsp[-1].int32)); } -#line 5972 "asmparse.cpp" +#line 6421 "prebuilt\\asmparse.cpp" break; - case 425: /* fieldSerInit: UNSIGNED_ INT8_ '(' int32 ')' */ -#line 1132 "asmparse.y" + case 441: /* fieldSerInit: UNSIGNED_ INT8_ '(' int32 ')' */ +#line 1142 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U1); (yyval.binstr)->appendInt8((yyvsp[-1].int32)); } -#line 5979 "asmparse.cpp" +#line 6428 "prebuilt\\asmparse.cpp" break; - case 426: /* fieldSerInit: UINT64_ '(' int64 ')' */ -#line 1134 "asmparse.y" + case 442: /* fieldSerInit: UINT64_ '(' int64 ')' */ +#line 1144 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U8); (yyval.binstr)->appendInt64((int64_t *)(yyvsp[-1].int64)); delete (yyvsp[-1].int64); } -#line 5986 "asmparse.cpp" +#line 6435 "prebuilt\\asmparse.cpp" break; - case 427: /* fieldSerInit: UINT32_ '(' int32 ')' */ -#line 1136 "asmparse.y" + case 443: /* fieldSerInit: UINT32_ '(' int32 ')' */ +#line 1146 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U4); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 5993 "asmparse.cpp" +#line 6442 "prebuilt\\asmparse.cpp" break; - case 428: /* fieldSerInit: UINT16_ '(' int32 ')' */ -#line 1138 "asmparse.y" + case 444: /* fieldSerInit: UINT16_ '(' int32 ')' */ +#line 1148 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U2); (yyval.binstr)->appendInt16((yyvsp[-1].int32)); } -#line 6000 "asmparse.cpp" +#line 6449 "prebuilt\\asmparse.cpp" break; - case 429: /* fieldSerInit: UINT8_ '(' int32 ')' */ -#line 1140 "asmparse.y" + case 445: /* fieldSerInit: UINT8_ '(' int32 ')' */ +#line 1150 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U1); (yyval.binstr)->appendInt8((yyvsp[-1].int32)); } -#line 6007 "asmparse.cpp" +#line 6456 "prebuilt\\asmparse.cpp" break; - case 430: /* fieldSerInit: CHAR_ '(' int32 ')' */ -#line 1142 "asmparse.y" + case 446: /* fieldSerInit: CHAR_ '(' int32 ')' */ +#line 1152 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_CHAR); (yyval.binstr)->appendInt16((yyvsp[-1].int32)); } -#line 6014 "asmparse.cpp" +#line 6463 "prebuilt\\asmparse.cpp" break; - case 431: /* fieldSerInit: BOOL_ '(' truefalse ')' */ -#line 1144 "asmparse.y" + case 447: /* fieldSerInit: BOOL_ '(' truefalse ')' */ +#line 1154 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_BOOLEAN); (yyval.binstr)->appendInt8((yyvsp[-1].int32));} -#line 6021 "asmparse.cpp" +#line 6470 "prebuilt\\asmparse.cpp" break; - case 432: /* fieldSerInit: bytearrayhead bytes ')' */ -#line 1146 "asmparse.y" + case 448: /* fieldSerInit: bytearrayhead bytes ')' */ +#line 1156 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_STRING); (yyval.binstr)->append((yyvsp[-1].binstr)); delete (yyvsp[-1].binstr);} -#line 6028 "asmparse.cpp" +#line 6477 "prebuilt\\asmparse.cpp" break; - case 433: /* bytearrayhead: BYTEARRAY_ '(' */ -#line 1150 "asmparse.y" + case 449: /* bytearrayhead: BYTEARRAY_ '(' */ +#line 1160 "asmparse.y" { bParsingByteArray = TRUE; } -#line 6034 "asmparse.cpp" +#line 6483 "prebuilt\\asmparse.cpp" break; - case 434: /* bytes: %empty */ -#line 1153 "asmparse.y" + case 450: /* bytes: %empty */ +#line 1163 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6040 "asmparse.cpp" +#line 6489 "prebuilt\\asmparse.cpp" break; - case 435: /* bytes: hexbytes */ -#line 1154 "asmparse.y" + case 451: /* bytes: hexbytes */ +#line 1164 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 6046 "asmparse.cpp" +#line 6495 "prebuilt\\asmparse.cpp" break; - case 436: /* hexbytes: HEXBYTE */ -#line 1157 "asmparse.y" + case 452: /* hexbytes: HEXBYTE */ +#line 1167 "asmparse.y" { int8_t i = (int8_t) (yyvsp[0].int32); (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(i); } -#line 6052 "asmparse.cpp" +#line 6501 "prebuilt\\asmparse.cpp" break; - case 437: /* hexbytes: hexbytes HEXBYTE */ -#line 1158 "asmparse.y" + case 453: /* hexbytes: hexbytes HEXBYTE */ +#line 1168 "asmparse.y" { int8_t i = (int8_t) (yyvsp[0].int32); (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt8(i); } -#line 6058 "asmparse.cpp" +#line 6507 "prebuilt\\asmparse.cpp" break; - case 438: /* fieldInit: fieldSerInit */ -#line 1162 "asmparse.y" + case 454: /* fieldInit: fieldSerInit */ +#line 1172 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 6064 "asmparse.cpp" +#line 6513 "prebuilt\\asmparse.cpp" break; - case 439: /* fieldInit: compQstring */ -#line 1163 "asmparse.y" + case 455: /* fieldInit: compQstring */ +#line 1173 "asmparse.y" { (yyval.binstr) = BinStrToUnicode((yyvsp[0].binstr),true); (yyval.binstr)->insertInt8(ELEMENT_TYPE_STRING);} -#line 6070 "asmparse.cpp" +#line 6519 "prebuilt\\asmparse.cpp" break; - case 440: /* fieldInit: NULLREF_ */ -#line 1164 "asmparse.y" + case 456: /* fieldInit: NULLREF_ */ +#line 1174 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_CLASS); (yyval.binstr)->appendInt32(0); } -#line 6077 "asmparse.cpp" +#line 6526 "prebuilt\\asmparse.cpp" break; - case 441: /* serInit: fieldSerInit */ -#line 1169 "asmparse.y" + case 457: /* serInit: fieldSerInit */ +#line 1179 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 6083 "asmparse.cpp" +#line 6532 "prebuilt\\asmparse.cpp" break; - case 442: /* serInit: STRING_ '(' NULLREF_ ')' */ -#line 1170 "asmparse.y" + case 458: /* serInit: STRING_ '(' NULLREF_ ')' */ +#line 1180 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_STRING); (yyval.binstr)->appendInt8(0xFF); } -#line 6089 "asmparse.cpp" +#line 6538 "prebuilt\\asmparse.cpp" break; - case 443: /* serInit: STRING_ '(' SQSTRING ')' */ -#line 1171 "asmparse.y" + case 459: /* serInit: STRING_ '(' SQSTRING ')' */ +#line 1181 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_STRING); AppendStringWithLength((yyval.binstr),(yyvsp[-1].string)); delete [] (yyvsp[-1].string);} -#line 6096 "asmparse.cpp" +#line 6545 "prebuilt\\asmparse.cpp" break; - case 444: /* serInit: TYPE_ '(' CLASS_ SQSTRING ')' */ -#line 1173 "asmparse.y" + case 460: /* serInit: TYPE_ '(' CLASS_ SQSTRING ')' */ +#line 1183 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_TYPE); AppendStringWithLength((yyval.binstr),(yyvsp[-1].string)); delete [] (yyvsp[-1].string);} -#line 6103 "asmparse.cpp" +#line 6552 "prebuilt\\asmparse.cpp" break; - case 445: /* serInit: TYPE_ '(' className ')' */ -#line 1175 "asmparse.y" + case 461: /* serInit: TYPE_ '(' className ')' */ +#line 1185 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_TYPE); AppendStringWithLength((yyval.binstr),PASM->ReflectionNotation((yyvsp[-1].token)));} -#line 6110 "asmparse.cpp" +#line 6559 "prebuilt\\asmparse.cpp" break; - case 446: /* serInit: TYPE_ '(' NULLREF_ ')' */ -#line 1177 "asmparse.y" + case 462: /* serInit: TYPE_ '(' NULLREF_ ')' */ +#line 1187 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_TYPE); (yyval.binstr)->appendInt8(0xFF); } -#line 6116 "asmparse.cpp" +#line 6565 "prebuilt\\asmparse.cpp" break; - case 447: /* serInit: OBJECT_ '(' serInit ')' */ -#line 1178 "asmparse.y" + case 463: /* serInit: OBJECT_ '(' serInit ')' */ +#line 1188 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt8(SERIALIZATION_TYPE_TAGGED_OBJECT);} -#line 6122 "asmparse.cpp" +#line 6571 "prebuilt\\asmparse.cpp" break; - case 448: /* serInit: FLOAT32_ '[' int32 ']' '(' f32seq ')' */ -#line 1180 "asmparse.y" + case 464: /* serInit: FLOAT32_ '[' int32 ']' '(' f32seq ')' */ +#line 1190 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_R4); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6130 "asmparse.cpp" +#line 6579 "prebuilt\\asmparse.cpp" break; - case 449: /* serInit: FLOAT64_ '[' int32 ']' '(' f64seq ')' */ -#line 1184 "asmparse.y" + case 465: /* serInit: FLOAT64_ '[' int32 ']' '(' f64seq ')' */ +#line 1194 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_R8); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6138 "asmparse.cpp" +#line 6587 "prebuilt\\asmparse.cpp" break; - case 450: /* serInit: INT64_ '[' int32 ']' '(' i64seq ')' */ -#line 1188 "asmparse.y" + case 466: /* serInit: INT64_ '[' int32 ']' '(' i64seq ')' */ +#line 1198 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_I8); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6146 "asmparse.cpp" +#line 6595 "prebuilt\\asmparse.cpp" break; - case 451: /* serInit: INT32_ '[' int32 ']' '(' i32seq ')' */ -#line 1192 "asmparse.y" + case 467: /* serInit: INT32_ '[' int32 ']' '(' i32seq ')' */ +#line 1202 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_I4); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6154 "asmparse.cpp" +#line 6603 "prebuilt\\asmparse.cpp" break; - case 452: /* serInit: INT16_ '[' int32 ']' '(' i16seq ')' */ -#line 1196 "asmparse.y" + case 468: /* serInit: INT16_ '[' int32 ']' '(' i16seq ')' */ +#line 1206 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_I2); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6162 "asmparse.cpp" +#line 6611 "prebuilt\\asmparse.cpp" break; - case 453: /* serInit: INT8_ '[' int32 ']' '(' i8seq ')' */ -#line 1200 "asmparse.y" + case 469: /* serInit: INT8_ '[' int32 ']' '(' i8seq ')' */ +#line 1210 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_I1); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6170 "asmparse.cpp" +#line 6619 "prebuilt\\asmparse.cpp" break; - case 454: /* serInit: UINT64_ '[' int32 ']' '(' i64seq ')' */ -#line 1204 "asmparse.y" + case 470: /* serInit: UINT64_ '[' int32 ']' '(' i64seq ')' */ +#line 1214 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_U8); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6178 "asmparse.cpp" +#line 6627 "prebuilt\\asmparse.cpp" break; - case 455: /* serInit: UINT32_ '[' int32 ']' '(' i32seq ')' */ -#line 1208 "asmparse.y" + case 471: /* serInit: UINT32_ '[' int32 ']' '(' i32seq ')' */ +#line 1218 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_U4); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6186 "asmparse.cpp" +#line 6635 "prebuilt\\asmparse.cpp" break; - case 456: /* serInit: UINT16_ '[' int32 ']' '(' i16seq ')' */ -#line 1212 "asmparse.y" + case 472: /* serInit: UINT16_ '[' int32 ']' '(' i16seq ')' */ +#line 1222 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_U2); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6194 "asmparse.cpp" +#line 6643 "prebuilt\\asmparse.cpp" break; - case 457: /* serInit: UINT8_ '[' int32 ']' '(' i8seq ')' */ -#line 1216 "asmparse.y" + case 473: /* serInit: UINT8_ '[' int32 ']' '(' i8seq ')' */ +#line 1226 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_U1); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6202 "asmparse.cpp" +#line 6651 "prebuilt\\asmparse.cpp" break; - case 458: /* serInit: UNSIGNED_ INT64_ '[' int32 ']' '(' i64seq ')' */ -#line 1220 "asmparse.y" + case 474: /* serInit: UNSIGNED_ INT64_ '[' int32 ']' '(' i64seq ')' */ +#line 1230 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_U8); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6210 "asmparse.cpp" +#line 6659 "prebuilt\\asmparse.cpp" break; - case 459: /* serInit: UNSIGNED_ INT32_ '[' int32 ']' '(' i32seq ')' */ -#line 1224 "asmparse.y" + case 475: /* serInit: UNSIGNED_ INT32_ '[' int32 ']' '(' i32seq ')' */ +#line 1234 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_U4); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6218 "asmparse.cpp" +#line 6667 "prebuilt\\asmparse.cpp" break; - case 460: /* serInit: UNSIGNED_ INT16_ '[' int32 ']' '(' i16seq ')' */ -#line 1228 "asmparse.y" + case 476: /* serInit: UNSIGNED_ INT16_ '[' int32 ']' '(' i16seq ')' */ +#line 1238 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_U2); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6226 "asmparse.cpp" +#line 6675 "prebuilt\\asmparse.cpp" break; - case 461: /* serInit: UNSIGNED_ INT8_ '[' int32 ']' '(' i8seq ')' */ -#line 1232 "asmparse.y" + case 477: /* serInit: UNSIGNED_ INT8_ '[' int32 ']' '(' i8seq ')' */ +#line 1242 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_U1); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6234 "asmparse.cpp" +#line 6683 "prebuilt\\asmparse.cpp" break; - case 462: /* serInit: CHAR_ '[' int32 ']' '(' i16seq ')' */ -#line 1236 "asmparse.y" + case 478: /* serInit: CHAR_ '[' int32 ']' '(' i16seq ')' */ +#line 1246 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_CHAR); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6242 "asmparse.cpp" +#line 6691 "prebuilt\\asmparse.cpp" break; - case 463: /* serInit: BOOL_ '[' int32 ']' '(' boolSeq ')' */ -#line 1240 "asmparse.y" + case 479: /* serInit: BOOL_ '[' int32 ']' '(' boolSeq ')' */ +#line 1250 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_BOOLEAN); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6250 "asmparse.cpp" +#line 6699 "prebuilt\\asmparse.cpp" break; - case 464: /* serInit: STRING_ '[' int32 ']' '(' sqstringSeq ')' */ -#line 1244 "asmparse.y" + case 480: /* serInit: STRING_ '[' int32 ']' '(' sqstringSeq ')' */ +#line 1254 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_STRING); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6258 "asmparse.cpp" +#line 6707 "prebuilt\\asmparse.cpp" break; - case 465: /* serInit: TYPE_ '[' int32 ']' '(' classSeq ')' */ -#line 1248 "asmparse.y" + case 481: /* serInit: TYPE_ '[' int32 ']' '(' classSeq ')' */ +#line 1258 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(SERIALIZATION_TYPE_TYPE); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6266 "asmparse.cpp" +#line 6715 "prebuilt\\asmparse.cpp" break; - case 466: /* serInit: OBJECT_ '[' int32 ']' '(' objSeq ')' */ -#line 1252 "asmparse.y" + case 482: /* serInit: OBJECT_ '[' int32 ']' '(' objSeq ')' */ +#line 1262 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt32((yyvsp[-4].int32)); (yyval.binstr)->insertInt8(SERIALIZATION_TYPE_TAGGED_OBJECT); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 6274 "asmparse.cpp" +#line 6723 "prebuilt\\asmparse.cpp" break; - case 467: /* f32seq: %empty */ -#line 1258 "asmparse.y" + case 483: /* f32seq: %empty */ +#line 1268 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6280 "asmparse.cpp" +#line 6729 "prebuilt\\asmparse.cpp" break; - case 468: /* f32seq: f32seq float64 */ -#line 1259 "asmparse.y" + case 484: /* f32seq: f32seq float64 */ +#line 1269 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); float f = (float) (*(yyvsp[0].float64)); (yyval.binstr)->appendInt32(*((int32_t*)&f)); delete (yyvsp[0].float64); } -#line 6287 "asmparse.cpp" +#line 6736 "prebuilt\\asmparse.cpp" break; - case 469: /* f32seq: f32seq int32 */ -#line 1261 "asmparse.y" + case 485: /* f32seq: f32seq int32 */ +#line 1271 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt32((yyvsp[0].int32)); } -#line 6294 "asmparse.cpp" +#line 6743 "prebuilt\\asmparse.cpp" break; - case 470: /* f64seq: %empty */ -#line 1265 "asmparse.y" + case 486: /* f64seq: %empty */ +#line 1275 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6300 "asmparse.cpp" +#line 6749 "prebuilt\\asmparse.cpp" break; - case 471: /* f64seq: f64seq float64 */ -#line 1266 "asmparse.y" + case 487: /* f64seq: f64seq float64 */ +#line 1276 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt64((int64_t *)(yyvsp[0].float64)); delete (yyvsp[0].float64); } -#line 6307 "asmparse.cpp" +#line 6756 "prebuilt\\asmparse.cpp" break; - case 472: /* f64seq: f64seq int64 */ -#line 1268 "asmparse.y" + case 488: /* f64seq: f64seq int64 */ +#line 1278 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt64((int64_t *)(yyvsp[0].int64)); delete (yyvsp[0].int64); } -#line 6314 "asmparse.cpp" +#line 6763 "prebuilt\\asmparse.cpp" break; - case 473: /* i64seq: %empty */ -#line 1272 "asmparse.y" + case 489: /* i64seq: %empty */ +#line 1282 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6320 "asmparse.cpp" +#line 6769 "prebuilt\\asmparse.cpp" break; - case 474: /* i64seq: i64seq int64 */ -#line 1273 "asmparse.y" + case 490: /* i64seq: i64seq int64 */ +#line 1283 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt64((int64_t *)(yyvsp[0].int64)); delete (yyvsp[0].int64); } -#line 6327 "asmparse.cpp" +#line 6776 "prebuilt\\asmparse.cpp" break; - case 475: /* i32seq: %empty */ -#line 1277 "asmparse.y" + case 491: /* i32seq: %empty */ +#line 1287 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6333 "asmparse.cpp" +#line 6782 "prebuilt\\asmparse.cpp" break; - case 476: /* i32seq: i32seq int32 */ -#line 1278 "asmparse.y" + case 492: /* i32seq: i32seq int32 */ +#line 1288 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt32((yyvsp[0].int32));} -#line 6339 "asmparse.cpp" +#line 6788 "prebuilt\\asmparse.cpp" break; - case 477: /* i16seq: %empty */ -#line 1281 "asmparse.y" + case 493: /* i16seq: %empty */ +#line 1291 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6345 "asmparse.cpp" +#line 6794 "prebuilt\\asmparse.cpp" break; - case 478: /* i16seq: i16seq int32 */ -#line 1282 "asmparse.y" + case 494: /* i16seq: i16seq int32 */ +#line 1292 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt16((yyvsp[0].int32));} -#line 6351 "asmparse.cpp" +#line 6800 "prebuilt\\asmparse.cpp" break; - case 479: /* i8seq: %empty */ -#line 1285 "asmparse.y" + case 495: /* i8seq: %empty */ +#line 1295 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6357 "asmparse.cpp" +#line 6806 "prebuilt\\asmparse.cpp" break; - case 480: /* i8seq: i8seq int32 */ -#line 1286 "asmparse.y" + case 496: /* i8seq: i8seq int32 */ +#line 1296 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt8((yyvsp[0].int32)); } -#line 6363 "asmparse.cpp" +#line 6812 "prebuilt\\asmparse.cpp" break; - case 481: /* boolSeq: %empty */ -#line 1289 "asmparse.y" + case 497: /* boolSeq: %empty */ +#line 1299 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6369 "asmparse.cpp" +#line 6818 "prebuilt\\asmparse.cpp" break; - case 482: /* boolSeq: boolSeq truefalse */ -#line 1290 "asmparse.y" + case 498: /* boolSeq: boolSeq truefalse */ +#line 1300 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt8((yyvsp[0].int32));} -#line 6376 "asmparse.cpp" +#line 6825 "prebuilt\\asmparse.cpp" break; - case 483: /* sqstringSeq: %empty */ -#line 1294 "asmparse.y" + case 499: /* sqstringSeq: %empty */ +#line 1304 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6382 "asmparse.cpp" +#line 6831 "prebuilt\\asmparse.cpp" break; - case 484: /* sqstringSeq: sqstringSeq NULLREF_ */ -#line 1295 "asmparse.y" + case 500: /* sqstringSeq: sqstringSeq NULLREF_ */ +#line 1305 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt8(0xFF); } -#line 6388 "asmparse.cpp" +#line 6837 "prebuilt\\asmparse.cpp" break; - case 485: /* sqstringSeq: sqstringSeq SQSTRING */ -#line 1296 "asmparse.y" + case 501: /* sqstringSeq: sqstringSeq SQSTRING */ +#line 1306 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); AppendStringWithLength((yyval.binstr),(yyvsp[0].string)); delete [] (yyvsp[0].string);} -#line 6395 "asmparse.cpp" +#line 6844 "prebuilt\\asmparse.cpp" break; - case 486: /* classSeq: %empty */ -#line 1300 "asmparse.y" + case 502: /* classSeq: %empty */ +#line 1310 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6401 "asmparse.cpp" +#line 6850 "prebuilt\\asmparse.cpp" break; - case 487: /* classSeq: classSeq NULLREF_ */ -#line 1301 "asmparse.y" + case 503: /* classSeq: classSeq NULLREF_ */ +#line 1311 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->appendInt8(0xFF); } -#line 6407 "asmparse.cpp" +#line 6856 "prebuilt\\asmparse.cpp" break; - case 488: /* classSeq: classSeq CLASS_ SQSTRING */ -#line 1302 "asmparse.y" + case 504: /* classSeq: classSeq CLASS_ SQSTRING */ +#line 1312 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); AppendStringWithLength((yyval.binstr),(yyvsp[0].string)); delete [] (yyvsp[0].string);} -#line 6414 "asmparse.cpp" +#line 6863 "prebuilt\\asmparse.cpp" break; - case 489: /* classSeq: classSeq className */ -#line 1304 "asmparse.y" + case 505: /* classSeq: classSeq className */ +#line 1314 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); AppendStringWithLength((yyval.binstr),PASM->ReflectionNotation((yyvsp[0].token)));} -#line 6421 "asmparse.cpp" +#line 6870 "prebuilt\\asmparse.cpp" break; - case 490: /* objSeq: %empty */ -#line 1308 "asmparse.y" + case 506: /* objSeq: %empty */ +#line 1318 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6427 "asmparse.cpp" +#line 6876 "prebuilt\\asmparse.cpp" break; - case 491: /* objSeq: objSeq serInit */ -#line 1309 "asmparse.y" + case 507: /* objSeq: objSeq serInit */ +#line 1319 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->append((yyvsp[0].binstr)); delete (yyvsp[0].binstr); } -#line 6433 "asmparse.cpp" +#line 6882 "prebuilt\\asmparse.cpp" break; - case 492: /* methodSpec: METHOD_ */ -#line 1313 "asmparse.y" + case 508: /* methodSpec: METHOD_ */ +#line 1323 "asmparse.y" { parser->m_ANSFirst.PUSH(PASM->m_firstArgName); parser->m_ANSLast.PUSH(PASM->m_lastArgName); PASM->m_firstArgName = NULL; PASM->m_lastArgName = NULL; } -#line 6442 "asmparse.cpp" +#line 6891 "prebuilt\\asmparse.cpp" break; - case 493: /* instr_none: INSTR_NONE */ -#line 1319 "asmparse.y" + case 509: /* instr_none: INSTR_NONE */ +#line 1329 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6448 "asmparse.cpp" +#line 6897 "prebuilt\\asmparse.cpp" break; - case 494: /* instr_var: INSTR_VAR */ -#line 1322 "asmparse.y" + case 510: /* instr_var: INSTR_VAR */ +#line 1332 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6454 "asmparse.cpp" +#line 6903 "prebuilt\\asmparse.cpp" break; - case 495: /* instr_i: INSTR_I */ -#line 1325 "asmparse.y" + case 511: /* instr_i: INSTR_I */ +#line 1335 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6460 "asmparse.cpp" +#line 6909 "prebuilt\\asmparse.cpp" break; - case 496: /* instr_i8: INSTR_I8 */ -#line 1328 "asmparse.y" + case 512: /* instr_i8: INSTR_I8 */ +#line 1338 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6466 "asmparse.cpp" +#line 6915 "prebuilt\\asmparse.cpp" break; - case 497: /* instr_r: INSTR_R */ -#line 1331 "asmparse.y" + case 513: /* instr_r: INSTR_R */ +#line 1341 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6472 "asmparse.cpp" +#line 6921 "prebuilt\\asmparse.cpp" break; - case 498: /* instr_brtarget: INSTR_BRTARGET */ -#line 1334 "asmparse.y" + case 514: /* instr_brtarget: INSTR_BRTARGET */ +#line 1344 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6478 "asmparse.cpp" +#line 6927 "prebuilt\\asmparse.cpp" break; - case 499: /* instr_method: INSTR_METHOD */ -#line 1337 "asmparse.y" + case 515: /* instr_method: INSTR_METHOD */ +#line 1347 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); if((!PASM->OnErrGo)&& (((yyvsp[0].opcode) == CEE_NEWOBJ)|| ((yyvsp[0].opcode) == CEE_CALLVIRT))) iCallConv = IMAGE_CEE_CS_CALLCONV_HASTHIS; } -#line 6489 "asmparse.cpp" +#line 6938 "prebuilt\\asmparse.cpp" break; - case 500: /* instr_field: INSTR_FIELD */ -#line 1345 "asmparse.y" + case 516: /* instr_field: INSTR_FIELD */ +#line 1355 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6495 "asmparse.cpp" +#line 6944 "prebuilt\\asmparse.cpp" break; - case 501: /* instr_type: INSTR_TYPE */ -#line 1348 "asmparse.y" + case 517: /* instr_type: INSTR_TYPE */ +#line 1358 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6501 "asmparse.cpp" +#line 6950 "prebuilt\\asmparse.cpp" break; - case 502: /* instr_string: INSTR_STRING */ -#line 1351 "asmparse.y" + case 518: /* instr_string: INSTR_STRING */ +#line 1361 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6507 "asmparse.cpp" +#line 6956 "prebuilt\\asmparse.cpp" break; - case 503: /* instr_sig: INSTR_SIG */ -#line 1354 "asmparse.y" + case 519: /* instr_sig: INSTR_SIG */ +#line 1364 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6513 "asmparse.cpp" +#line 6962 "prebuilt\\asmparse.cpp" break; - case 504: /* instr_tok: INSTR_TOK */ -#line 1357 "asmparse.y" + case 520: /* instr_tok: INSTR_TOK */ +#line 1367 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); iOpcodeLen = PASM->OpcodeLen((yyval.instr)); } -#line 6519 "asmparse.cpp" +#line 6968 "prebuilt\\asmparse.cpp" break; - case 505: /* instr_switch: INSTR_SWITCH */ -#line 1360 "asmparse.y" + case 521: /* instr_switch: INSTR_SWITCH */ +#line 1370 "asmparse.y" { (yyval.instr) = SetupInstr((yyvsp[0].opcode)); } -#line 6525 "asmparse.cpp" +#line 6974 "prebuilt\\asmparse.cpp" break; - case 506: /* instr_r_head: instr_r '(' */ -#line 1363 "asmparse.y" + case 522: /* instr_r_head: instr_r '(' */ +#line 1373 "asmparse.y" { (yyval.instr) = (yyvsp[-1].instr); bParsingByteArray = TRUE; } -#line 6531 "asmparse.cpp" +#line 6980 "prebuilt\\asmparse.cpp" break; - case 507: /* instr: instr_none */ -#line 1367 "asmparse.y" + case 523: /* instr: instr_none */ +#line 1377 "asmparse.y" { PASM->EmitOpcode((yyvsp[0].instr)); } -#line 6537 "asmparse.cpp" +#line 6986 "prebuilt\\asmparse.cpp" break; - case 508: /* instr: instr_var int32 */ -#line 1368 "asmparse.y" + case 524: /* instr: instr_var int32 */ +#line 1378 "asmparse.y" { PASM->EmitInstrVar((yyvsp[-1].instr), (yyvsp[0].int32)); } -#line 6543 "asmparse.cpp" +#line 6992 "prebuilt\\asmparse.cpp" break; - case 509: /* instr: instr_var id */ -#line 1369 "asmparse.y" + case 525: /* instr: instr_var id */ +#line 1379 "asmparse.y" { PASM->EmitInstrVarByName((yyvsp[-1].instr), (yyvsp[0].string)); } -#line 6549 "asmparse.cpp" +#line 6998 "prebuilt\\asmparse.cpp" break; - case 510: /* instr: instr_i int32 */ -#line 1370 "asmparse.y" + case 526: /* instr: instr_i int32 */ +#line 1380 "asmparse.y" { PASM->EmitInstrI((yyvsp[-1].instr), (yyvsp[0].int32)); } -#line 6555 "asmparse.cpp" +#line 7004 "prebuilt\\asmparse.cpp" break; - case 511: /* instr: instr_i8 int64 */ -#line 1371 "asmparse.y" + case 527: /* instr: instr_i8 int64 */ +#line 1381 "asmparse.y" { PASM->EmitInstrI8((yyvsp[-1].instr), (yyvsp[0].int64)); } -#line 6561 "asmparse.cpp" +#line 7010 "prebuilt\\asmparse.cpp" break; - case 512: /* instr: instr_r float64 */ -#line 1372 "asmparse.y" + case 528: /* instr: instr_r float64 */ +#line 1382 "asmparse.y" { PASM->EmitInstrR((yyvsp[-1].instr), (yyvsp[0].float64)); delete ((yyvsp[0].float64));} -#line 6567 "asmparse.cpp" +#line 7016 "prebuilt\\asmparse.cpp" break; - case 513: /* instr: instr_r int64 */ -#line 1373 "asmparse.y" + case 529: /* instr: instr_r int64 */ +#line 1383 "asmparse.y" { double f = (double) (*(yyvsp[0].int64)); PASM->EmitInstrR((yyvsp[-1].instr), &f); } -#line 6573 "asmparse.cpp" +#line 7022 "prebuilt\\asmparse.cpp" break; - case 514: /* instr: instr_r_head bytes ')' */ -#line 1374 "asmparse.y" + case 530: /* instr: instr_r_head bytes ')' */ +#line 1384 "asmparse.y" { unsigned L = (yyvsp[-1].binstr)->length(); FAIL_UNLESS(L >= sizeof(float), ("%d hexbytes, must be at least %d\n", L,sizeof(float))); @@ -6583,34 +7032,34 @@ yyparse (void) : (double)(*(float *)((yyvsp[-1].binstr)->ptr())); PASM->EmitInstrR((yyvsp[-2].instr),&f); } delete (yyvsp[-1].binstr); } -#line 6587 "asmparse.cpp" +#line 7036 "prebuilt\\asmparse.cpp" break; - case 515: /* instr: instr_brtarget int32 */ -#line 1383 "asmparse.y" + case 531: /* instr: instr_brtarget int32 */ +#line 1393 "asmparse.y" { PASM->EmitInstrBrOffset((yyvsp[-1].instr), (yyvsp[0].int32)); } -#line 6593 "asmparse.cpp" +#line 7042 "prebuilt\\asmparse.cpp" break; - case 516: /* instr: instr_brtarget id */ -#line 1384 "asmparse.y" + case 532: /* instr: instr_brtarget id */ +#line 1394 "asmparse.y" { PASM->EmitInstrBrTarget((yyvsp[-1].instr), (yyvsp[0].string)); } -#line 6599 "asmparse.cpp" +#line 7048 "prebuilt\\asmparse.cpp" break; - case 517: /* instr: instr_method methodRef */ -#line 1386 "asmparse.y" + case 533: /* instr: instr_method methodRef */ +#line 1396 "asmparse.y" { PASM->SetMemberRefFixup((yyvsp[0].token),PASM->OpcodeLen((yyvsp[-1].instr))); PASM->EmitInstrI((yyvsp[-1].instr),(yyvsp[0].token)); PASM->m_tkCurrentCVOwner = (yyvsp[0].token); PASM->m_pCustomDescrList = NULL; iCallConv = 0; } -#line 6610 "asmparse.cpp" +#line 7059 "prebuilt\\asmparse.cpp" break; - case 518: /* instr: instr_field type typeSpec DCOLON dottedName */ -#line 1393 "asmparse.y" + case 534: /* instr: instr_field type typeSpec DCOLON dottedName */ +#line 1403 "asmparse.y" { (yyvsp[-3].binstr)->insertInt8(IMAGE_CEE_CS_CALLCONV_FIELD); mdToken mr = PASM->MakeMemberRef((yyvsp[-2].token), (yyvsp[0].string), (yyvsp[-3].binstr)); PASM->SetMemberRefFixup(mr, PASM->OpcodeLen((yyvsp[-4].instr))); @@ -6618,11 +7067,11 @@ yyparse (void) PASM->m_tkCurrentCVOwner = mr; PASM->m_pCustomDescrList = NULL; } -#line 6622 "asmparse.cpp" +#line 7071 "prebuilt\\asmparse.cpp" break; - case 519: /* instr: instr_field type dottedName */ -#line 1401 "asmparse.y" + case 535: /* instr: instr_field type dottedName */ +#line 1411 "asmparse.y" { (yyvsp[-1].binstr)->insertInt8(IMAGE_CEE_CS_CALLCONV_FIELD); mdToken mr = PASM->MakeMemberRef(mdTokenNil, (yyvsp[0].string), (yyvsp[-1].binstr)); PASM->SetMemberRefFixup(mr, PASM->OpcodeLen((yyvsp[-2].instr))); @@ -6630,1067 +7079,1067 @@ yyparse (void) PASM->m_tkCurrentCVOwner = mr; PASM->m_pCustomDescrList = NULL; } -#line 6634 "asmparse.cpp" +#line 7083 "prebuilt\\asmparse.cpp" break; - case 520: /* instr: instr_field mdtoken */ -#line 1408 "asmparse.y" + case 536: /* instr: instr_field mdtoken */ +#line 1418 "asmparse.y" { mdToken mr = (yyvsp[0].token); PASM->SetMemberRefFixup(mr, PASM->OpcodeLen((yyvsp[-1].instr))); PASM->EmitInstrI((yyvsp[-1].instr),mr); PASM->m_tkCurrentCVOwner = mr; PASM->m_pCustomDescrList = NULL; } -#line 6645 "asmparse.cpp" +#line 7094 "prebuilt\\asmparse.cpp" break; - case 521: /* instr: instr_field TYPEDEF_F */ -#line 1414 "asmparse.y" + case 537: /* instr: instr_field TYPEDEF_F */ +#line 1424 "asmparse.y" { mdToken mr = (yyvsp[0].tdd)->m_tkTypeSpec; PASM->SetMemberRefFixup(mr, PASM->OpcodeLen((yyvsp[-1].instr))); PASM->EmitInstrI((yyvsp[-1].instr),mr); PASM->m_tkCurrentCVOwner = mr; PASM->m_pCustomDescrList = NULL; } -#line 6656 "asmparse.cpp" +#line 7105 "prebuilt\\asmparse.cpp" break; - case 522: /* instr: instr_field TYPEDEF_MR */ -#line 1420 "asmparse.y" + case 538: /* instr: instr_field TYPEDEF_MR */ +#line 1430 "asmparse.y" { mdToken mr = (yyvsp[0].tdd)->m_tkTypeSpec; PASM->SetMemberRefFixup(mr, PASM->OpcodeLen((yyvsp[-1].instr))); PASM->EmitInstrI((yyvsp[-1].instr),mr); PASM->m_tkCurrentCVOwner = mr; PASM->m_pCustomDescrList = NULL; } -#line 6667 "asmparse.cpp" +#line 7116 "prebuilt\\asmparse.cpp" break; - case 523: /* instr: instr_type typeSpec */ -#line 1426 "asmparse.y" + case 539: /* instr: instr_type typeSpec */ +#line 1436 "asmparse.y" { PASM->EmitInstrI((yyvsp[-1].instr), (yyvsp[0].token)); PASM->m_tkCurrentCVOwner = (yyvsp[0].token); PASM->m_pCustomDescrList = NULL; } -#line 6676 "asmparse.cpp" +#line 7125 "prebuilt\\asmparse.cpp" break; - case 524: /* instr: instr_string compQstring */ -#line 1430 "asmparse.y" + case 540: /* instr: instr_string compQstring */ +#line 1440 "asmparse.y" { PASM->EmitInstrStringLiteral((yyvsp[-1].instr), (yyvsp[0].binstr),TRUE); } -#line 6682 "asmparse.cpp" +#line 7131 "prebuilt\\asmparse.cpp" break; - case 525: /* instr: instr_string ANSI_ '(' compQstring ')' */ -#line 1432 "asmparse.y" + case 541: /* instr: instr_string ANSI_ '(' compQstring ')' */ +#line 1442 "asmparse.y" { PASM->EmitInstrStringLiteral((yyvsp[-4].instr), (yyvsp[-1].binstr),FALSE); } -#line 6688 "asmparse.cpp" +#line 7137 "prebuilt\\asmparse.cpp" break; - case 526: /* instr: instr_string bytearrayhead bytes ')' */ -#line 1434 "asmparse.y" + case 542: /* instr: instr_string bytearrayhead bytes ')' */ +#line 1444 "asmparse.y" { PASM->EmitInstrStringLiteral((yyvsp[-3].instr), (yyvsp[-1].binstr),FALSE,TRUE); } -#line 6694 "asmparse.cpp" +#line 7143 "prebuilt\\asmparse.cpp" break; - case 527: /* instr: instr_sig callConv type '(' sigArgs0 ')' */ -#line 1436 "asmparse.y" + case 543: /* instr: instr_sig callConv type '(' sigArgs0 ')' */ +#line 1446 "asmparse.y" { PASM->EmitInstrSig((yyvsp[-5].instr), parser->MakeSig((yyvsp[-4].int32), (yyvsp[-3].binstr), (yyvsp[-1].binstr))); PASM->ResetArgNameList(); } -#line 6702 "asmparse.cpp" +#line 7151 "prebuilt\\asmparse.cpp" break; - case 528: /* instr: instr_tok ownerType */ -#line 1440 "asmparse.y" + case 544: /* instr: instr_tok ownerType */ +#line 1450 "asmparse.y" { PASM->EmitInstrI((yyvsp[-1].instr),(yyvsp[0].token)); PASM->m_tkCurrentCVOwner = (yyvsp[0].token); PASM->m_pCustomDescrList = NULL; iOpcodeLen = 0; } -#line 6712 "asmparse.cpp" +#line 7161 "prebuilt\\asmparse.cpp" break; - case 529: /* instr: instr_switch '(' labels ')' */ -#line 1445 "asmparse.y" + case 545: /* instr: instr_switch '(' labels ')' */ +#line 1455 "asmparse.y" { PASM->EmitInstrSwitch((yyvsp[-3].instr), (yyvsp[-1].labels)); } -#line 6718 "asmparse.cpp" +#line 7167 "prebuilt\\asmparse.cpp" break; - case 530: /* labels: %empty */ -#line 1448 "asmparse.y" + case 546: /* labels: %empty */ +#line 1458 "asmparse.y" { (yyval.labels) = 0; } -#line 6724 "asmparse.cpp" +#line 7173 "prebuilt\\asmparse.cpp" break; - case 531: /* labels: id ',' labels */ -#line 1449 "asmparse.y" + case 547: /* labels: id ',' labels */ +#line 1459 "asmparse.y" { (yyval.labels) = new Labels((yyvsp[-2].string), (yyvsp[0].labels), TRUE); } -#line 6730 "asmparse.cpp" +#line 7179 "prebuilt\\asmparse.cpp" break; - case 532: /* labels: int32 ',' labels */ -#line 1450 "asmparse.y" + case 548: /* labels: int32 ',' labels */ +#line 1460 "asmparse.y" { (yyval.labels) = new Labels((char *)(UINT_PTR)(yyvsp[-2].int32), (yyvsp[0].labels), FALSE); } -#line 6736 "asmparse.cpp" +#line 7185 "prebuilt\\asmparse.cpp" break; - case 533: /* labels: id */ -#line 1451 "asmparse.y" + case 549: /* labels: id */ +#line 1461 "asmparse.y" { (yyval.labels) = new Labels((yyvsp[0].string), NULL, TRUE); } -#line 6742 "asmparse.cpp" +#line 7191 "prebuilt\\asmparse.cpp" break; - case 534: /* labels: int32 */ -#line 1452 "asmparse.y" + case 550: /* labels: int32 */ +#line 1462 "asmparse.y" { (yyval.labels) = new Labels((char *)(UINT_PTR)(yyvsp[0].int32), NULL, FALSE); } -#line 6748 "asmparse.cpp" +#line 7197 "prebuilt\\asmparse.cpp" break; - case 535: /* tyArgs0: %empty */ -#line 1456 "asmparse.y" + case 551: /* tyArgs0: %empty */ +#line 1466 "asmparse.y" { (yyval.binstr) = NULL; } -#line 6754 "asmparse.cpp" +#line 7203 "prebuilt\\asmparse.cpp" break; - case 536: /* tyArgs0: '<' tyArgs1 '>' */ -#line 1457 "asmparse.y" + case 552: /* tyArgs0: '<' tyArgs1 '>' */ +#line 1467 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); } -#line 6760 "asmparse.cpp" +#line 7209 "prebuilt\\asmparse.cpp" break; - case 537: /* tyArgs1: %empty */ -#line 1460 "asmparse.y" + case 553: /* tyArgs1: %empty */ +#line 1470 "asmparse.y" { (yyval.binstr) = NULL; } -#line 6766 "asmparse.cpp" +#line 7215 "prebuilt\\asmparse.cpp" break; - case 538: /* tyArgs1: tyArgs2 */ -#line 1461 "asmparse.y" + case 554: /* tyArgs1: tyArgs2 */ +#line 1471 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 6772 "asmparse.cpp" +#line 7221 "prebuilt\\asmparse.cpp" break; - case 539: /* tyArgs2: type */ -#line 1464 "asmparse.y" + case 555: /* tyArgs2: type */ +#line 1474 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 6778 "asmparse.cpp" +#line 7227 "prebuilt\\asmparse.cpp" break; - case 540: /* tyArgs2: tyArgs2 ',' type */ -#line 1465 "asmparse.y" + case 556: /* tyArgs2: tyArgs2 ',' type */ +#line 1475 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); (yyval.binstr)->append((yyvsp[0].binstr)); delete (yyvsp[0].binstr); } -#line 6784 "asmparse.cpp" +#line 7233 "prebuilt\\asmparse.cpp" break; - case 541: /* sigArgs0: %empty */ -#line 1469 "asmparse.y" + case 557: /* sigArgs0: %empty */ +#line 1479 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6790 "asmparse.cpp" +#line 7239 "prebuilt\\asmparse.cpp" break; - case 542: /* sigArgs0: sigArgs1 */ -#line 1470 "asmparse.y" + case 558: /* sigArgs0: sigArgs1 */ +#line 1480 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr);} -#line 6796 "asmparse.cpp" +#line 7245 "prebuilt\\asmparse.cpp" break; - case 543: /* sigArgs1: sigArg */ -#line 1473 "asmparse.y" + case 559: /* sigArgs1: sigArg */ +#line 1483 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 6802 "asmparse.cpp" +#line 7251 "prebuilt\\asmparse.cpp" break; - case 544: /* sigArgs1: sigArgs1 ',' sigArg */ -#line 1474 "asmparse.y" + case 560: /* sigArgs1: sigArgs1 ',' sigArg */ +#line 1484 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); (yyval.binstr)->append((yyvsp[0].binstr)); delete (yyvsp[0].binstr); } -#line 6808 "asmparse.cpp" +#line 7257 "prebuilt\\asmparse.cpp" break; - case 545: /* sigArg: ELLIPSIS */ -#line 1477 "asmparse.y" + case 561: /* sigArg: ELLIPSIS */ +#line 1487 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_SENTINEL); } -#line 6814 "asmparse.cpp" +#line 7263 "prebuilt\\asmparse.cpp" break; - case 546: /* sigArg: paramAttr type marshalClause */ -#line 1478 "asmparse.y" + case 562: /* sigArg: paramAttr type marshalClause */ +#line 1488 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->append((yyvsp[-1].binstr)); PASM->addArgName(NULL, (yyvsp[-1].binstr), (yyvsp[0].binstr), (yyvsp[-2].int32)); } -#line 6820 "asmparse.cpp" +#line 7269 "prebuilt\\asmparse.cpp" break; - case 547: /* sigArg: paramAttr type marshalClause id */ -#line 1479 "asmparse.y" + case 563: /* sigArg: paramAttr type marshalClause id */ +#line 1489 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->append((yyvsp[-2].binstr)); PASM->addArgName((yyvsp[0].string), (yyvsp[-2].binstr), (yyvsp[-1].binstr), (yyvsp[-3].int32));} -#line 6826 "asmparse.cpp" +#line 7275 "prebuilt\\asmparse.cpp" break; - case 548: /* className: '[' dottedName ']' slashedName */ -#line 1483 "asmparse.y" + case 564: /* className: '[' dottedName ']' slashedName */ +#line 1493 "asmparse.y" { (yyval.token) = PASM->ResolveClassRef(PASM->GetAsmRef((yyvsp[-2].string)), (yyvsp[0].string), NULL); delete[] (yyvsp[-2].string);} -#line 6832 "asmparse.cpp" +#line 7281 "prebuilt\\asmparse.cpp" break; - case 549: /* className: '[' mdtoken ']' slashedName */ -#line 1484 "asmparse.y" + case 565: /* className: '[' mdtoken ']' slashedName */ +#line 1494 "asmparse.y" { (yyval.token) = PASM->ResolveClassRef((yyvsp[-2].token), (yyvsp[0].string), NULL); } -#line 6838 "asmparse.cpp" +#line 7287 "prebuilt\\asmparse.cpp" break; - case 550: /* className: '[' '*' ']' slashedName */ -#line 1485 "asmparse.y" + case 566: /* className: '[' '*' ']' slashedName */ +#line 1495 "asmparse.y" { (yyval.token) = PASM->ResolveClassRef(mdTokenNil, (yyvsp[0].string), NULL); } -#line 6844 "asmparse.cpp" +#line 7293 "prebuilt\\asmparse.cpp" break; - case 551: /* className: '[' _MODULE dottedName ']' slashedName */ -#line 1486 "asmparse.y" + case 567: /* className: '[' _MODULE dottedName ']' slashedName */ +#line 1496 "asmparse.y" { (yyval.token) = PASM->ResolveClassRef(PASM->GetModRef((yyvsp[-2].string)),(yyvsp[0].string), NULL); delete[] (yyvsp[-2].string);} -#line 6850 "asmparse.cpp" +#line 7299 "prebuilt\\asmparse.cpp" break; - case 552: /* className: slashedName */ -#line 1487 "asmparse.y" + case 568: /* className: slashedName */ +#line 1497 "asmparse.y" { (yyval.token) = PASM->ResolveClassRef(1,(yyvsp[0].string),NULL); } -#line 6856 "asmparse.cpp" +#line 7305 "prebuilt\\asmparse.cpp" break; - case 553: /* className: mdtoken */ -#line 1488 "asmparse.y" + case 569: /* className: mdtoken */ +#line 1498 "asmparse.y" { (yyval.token) = (yyvsp[0].token); } -#line 6862 "asmparse.cpp" +#line 7311 "prebuilt\\asmparse.cpp" break; - case 554: /* className: TYPEDEF_T */ -#line 1489 "asmparse.y" + case 570: /* className: TYPEDEF_T */ +#line 1499 "asmparse.y" { (yyval.token) = (yyvsp[0].tdd)->m_tkTypeSpec; } -#line 6868 "asmparse.cpp" +#line 7317 "prebuilt\\asmparse.cpp" break; - case 555: /* className: _THIS */ -#line 1490 "asmparse.y" + case 571: /* className: _THIS */ +#line 1500 "asmparse.y" { if(PASM->m_pCurClass != NULL) (yyval.token) = PASM->m_pCurClass->m_cl; else { (yyval.token) = 0; PASM->report->error(".this outside class scope\n"); } } -#line 6876 "asmparse.cpp" +#line 7325 "prebuilt\\asmparse.cpp" break; - case 556: /* className: _BASE */ -#line 1493 "asmparse.y" + case 572: /* className: _BASE */ +#line 1503 "asmparse.y" { if(PASM->m_pCurClass != NULL) { (yyval.token) = PASM->m_pCurClass->m_crExtends; if(RidFromToken((yyval.token)) == 0) PASM->report->error(".base undefined\n"); } else { (yyval.token) = 0; PASM->report->error(".base outside class scope\n"); } } -#line 6887 "asmparse.cpp" +#line 7336 "prebuilt\\asmparse.cpp" break; - case 557: /* className: _NESTER */ -#line 1499 "asmparse.y" + case 573: /* className: _NESTER */ +#line 1509 "asmparse.y" { if(PASM->m_pCurClass != NULL) { if(PASM->m_pCurClass->m_pEncloser != NULL) (yyval.token) = PASM->m_pCurClass->m_pEncloser->m_cl; else { (yyval.token) = 0; PASM->report->error(".nester undefined\n"); } } else { (yyval.token) = 0; PASM->report->error(".nester outside class scope\n"); } } -#line 6897 "asmparse.cpp" +#line 7346 "prebuilt\\asmparse.cpp" break; - case 558: /* slashedName: dottedName */ -#line 1506 "asmparse.y" + case 574: /* slashedName: dottedName */ +#line 1516 "asmparse.y" { (yyval.string) = (yyvsp[0].string); } -#line 6903 "asmparse.cpp" +#line 7352 "prebuilt\\asmparse.cpp" break; - case 559: /* slashedName: slashedName '/' dottedName */ -#line 1507 "asmparse.y" + case 575: /* slashedName: slashedName '/' dottedName */ +#line 1517 "asmparse.y" { (yyval.string) = newStringWDel((yyvsp[-2].string), NESTING_SEP, (yyvsp[0].string)); } -#line 6909 "asmparse.cpp" +#line 7358 "prebuilt\\asmparse.cpp" break; - case 560: /* typeSpec: className */ -#line 1510 "asmparse.y" + case 576: /* typeSpec: className */ +#line 1520 "asmparse.y" { (yyval.token) = (yyvsp[0].token);} -#line 6915 "asmparse.cpp" +#line 7364 "prebuilt\\asmparse.cpp" break; - case 561: /* typeSpec: '[' dottedName ']' */ -#line 1511 "asmparse.y" + case 577: /* typeSpec: '[' dottedName ']' */ +#line 1521 "asmparse.y" { (yyval.token) = PASM->GetAsmRef((yyvsp[-1].string)); delete[] (yyvsp[-1].string);} -#line 6921 "asmparse.cpp" +#line 7370 "prebuilt\\asmparse.cpp" break; - case 562: /* typeSpec: '[' _MODULE dottedName ']' */ -#line 1512 "asmparse.y" + case 578: /* typeSpec: '[' _MODULE dottedName ']' */ +#line 1522 "asmparse.y" { (yyval.token) = PASM->GetModRef((yyvsp[-1].string)); delete[] (yyvsp[-1].string);} -#line 6927 "asmparse.cpp" +#line 7376 "prebuilt\\asmparse.cpp" break; - case 563: /* typeSpec: type */ -#line 1513 "asmparse.y" + case 579: /* typeSpec: type */ +#line 1523 "asmparse.y" { (yyval.token) = PASM->ResolveTypeSpec((yyvsp[0].binstr)); } -#line 6933 "asmparse.cpp" +#line 7382 "prebuilt\\asmparse.cpp" break; - case 564: /* nativeType: %empty */ -#line 1517 "asmparse.y" + case 580: /* nativeType: %empty */ +#line 1527 "asmparse.y" { (yyval.binstr) = new BinStr(); } -#line 6939 "asmparse.cpp" +#line 7388 "prebuilt\\asmparse.cpp" break; - case 565: /* nativeType: CUSTOM_ '(' compQstring ',' compQstring ',' compQstring ',' compQstring ')' */ -#line 1519 "asmparse.y" + case 581: /* nativeType: CUSTOM_ '(' compQstring ',' compQstring ',' compQstring ',' compQstring ')' */ +#line 1529 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_CUSTOMMARSHALER); corEmitInt((yyval.binstr),(yyvsp[-7].binstr)->length()); (yyval.binstr)->append((yyvsp[-7].binstr)); corEmitInt((yyval.binstr),(yyvsp[-5].binstr)->length()); (yyval.binstr)->append((yyvsp[-5].binstr)); corEmitInt((yyval.binstr),(yyvsp[-3].binstr)->length()); (yyval.binstr)->append((yyvsp[-3].binstr)); corEmitInt((yyval.binstr),(yyvsp[-1].binstr)->length()); (yyval.binstr)->append((yyvsp[-1].binstr)); PASM->report->warn("Deprecated 4-string form of custom marshaler, first two strings ignored\n");} -#line 6950 "asmparse.cpp" +#line 7399 "prebuilt\\asmparse.cpp" break; - case 566: /* nativeType: CUSTOM_ '(' compQstring ',' compQstring ')' */ -#line 1526 "asmparse.y" + case 582: /* nativeType: CUSTOM_ '(' compQstring ',' compQstring ')' */ +#line 1536 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_CUSTOMMARSHALER); corEmitInt((yyval.binstr),0); corEmitInt((yyval.binstr),0); corEmitInt((yyval.binstr),(yyvsp[-3].binstr)->length()); (yyval.binstr)->append((yyvsp[-3].binstr)); corEmitInt((yyval.binstr),(yyvsp[-1].binstr)->length()); (yyval.binstr)->append((yyvsp[-1].binstr)); } -#line 6960 "asmparse.cpp" +#line 7409 "prebuilt\\asmparse.cpp" break; - case 567: /* nativeType: FIXED_ SYSSTRING_ '[' int32 ']' */ -#line 1531 "asmparse.y" + case 583: /* nativeType: FIXED_ SYSSTRING_ '[' int32 ']' */ +#line 1541 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_FIXEDSYSSTRING); corEmitInt((yyval.binstr),(yyvsp[-1].int32)); } -#line 6967 "asmparse.cpp" +#line 7416 "prebuilt\\asmparse.cpp" break; - case 568: /* nativeType: FIXED_ ARRAY_ '[' int32 ']' nativeType */ -#line 1534 "asmparse.y" + case 584: /* nativeType: FIXED_ ARRAY_ '[' int32 ']' nativeType */ +#line 1544 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_FIXEDARRAY); corEmitInt((yyval.binstr),(yyvsp[-2].int32)); (yyval.binstr)->append((yyvsp[0].binstr)); } -#line 6974 "asmparse.cpp" +#line 7423 "prebuilt\\asmparse.cpp" break; - case 569: /* nativeType: VARIANT_ */ -#line 1536 "asmparse.y" + case 585: /* nativeType: VARIANT_ */ +#line 1546 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_VARIANT); PASM->report->warn("Deprecated native type 'variant'\n"); } -#line 6981 "asmparse.cpp" +#line 7430 "prebuilt\\asmparse.cpp" break; - case 570: /* nativeType: CURRENCY_ */ -#line 1538 "asmparse.y" + case 586: /* nativeType: CURRENCY_ */ +#line 1548 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_CURRENCY); } -#line 6987 "asmparse.cpp" +#line 7436 "prebuilt\\asmparse.cpp" break; - case 571: /* nativeType: SYSCHAR_ */ -#line 1539 "asmparse.y" + case 587: /* nativeType: SYSCHAR_ */ +#line 1549 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_SYSCHAR); PASM->report->warn("Deprecated native type 'syschar'\n"); } -#line 6994 "asmparse.cpp" +#line 7443 "prebuilt\\asmparse.cpp" break; - case 572: /* nativeType: VOID_ */ -#line 1541 "asmparse.y" + case 588: /* nativeType: VOID_ */ +#line 1551 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_VOID); PASM->report->warn("Deprecated native type 'void'\n"); } -#line 7001 "asmparse.cpp" +#line 7450 "prebuilt\\asmparse.cpp" break; - case 573: /* nativeType: BOOL_ */ -#line 1543 "asmparse.y" + case 589: /* nativeType: BOOL_ */ +#line 1553 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_BOOLEAN); } -#line 7007 "asmparse.cpp" +#line 7456 "prebuilt\\asmparse.cpp" break; - case 574: /* nativeType: INT8_ */ -#line 1544 "asmparse.y" + case 590: /* nativeType: INT8_ */ +#line 1554 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_I1); } -#line 7013 "asmparse.cpp" +#line 7462 "prebuilt\\asmparse.cpp" break; - case 575: /* nativeType: INT16_ */ -#line 1545 "asmparse.y" + case 591: /* nativeType: INT16_ */ +#line 1555 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_I2); } -#line 7019 "asmparse.cpp" +#line 7468 "prebuilt\\asmparse.cpp" break; - case 576: /* nativeType: INT32_ */ -#line 1546 "asmparse.y" + case 592: /* nativeType: INT32_ */ +#line 1556 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_I4); } -#line 7025 "asmparse.cpp" +#line 7474 "prebuilt\\asmparse.cpp" break; - case 577: /* nativeType: INT64_ */ -#line 1547 "asmparse.y" + case 593: /* nativeType: INT64_ */ +#line 1557 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_I8); } -#line 7031 "asmparse.cpp" +#line 7480 "prebuilt\\asmparse.cpp" break; - case 578: /* nativeType: FLOAT32_ */ -#line 1548 "asmparse.y" + case 594: /* nativeType: FLOAT32_ */ +#line 1558 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_R4); } -#line 7037 "asmparse.cpp" +#line 7486 "prebuilt\\asmparse.cpp" break; - case 579: /* nativeType: FLOAT64_ */ -#line 1549 "asmparse.y" + case 595: /* nativeType: FLOAT64_ */ +#line 1559 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_R8); } -#line 7043 "asmparse.cpp" +#line 7492 "prebuilt\\asmparse.cpp" break; - case 580: /* nativeType: ERROR_ */ -#line 1550 "asmparse.y" + case 596: /* nativeType: ERROR_ */ +#line 1560 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_ERROR); } -#line 7049 "asmparse.cpp" +#line 7498 "prebuilt\\asmparse.cpp" break; - case 581: /* nativeType: UNSIGNED_ INT8_ */ -#line 1551 "asmparse.y" + case 597: /* nativeType: UNSIGNED_ INT8_ */ +#line 1561 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_U1); } -#line 7055 "asmparse.cpp" +#line 7504 "prebuilt\\asmparse.cpp" break; - case 582: /* nativeType: UNSIGNED_ INT16_ */ -#line 1552 "asmparse.y" + case 598: /* nativeType: UNSIGNED_ INT16_ */ +#line 1562 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_U2); } -#line 7061 "asmparse.cpp" +#line 7510 "prebuilt\\asmparse.cpp" break; - case 583: /* nativeType: UNSIGNED_ INT32_ */ -#line 1553 "asmparse.y" + case 599: /* nativeType: UNSIGNED_ INT32_ */ +#line 1563 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_U4); } -#line 7067 "asmparse.cpp" +#line 7516 "prebuilt\\asmparse.cpp" break; - case 584: /* nativeType: UNSIGNED_ INT64_ */ -#line 1554 "asmparse.y" + case 600: /* nativeType: UNSIGNED_ INT64_ */ +#line 1564 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_U8); } -#line 7073 "asmparse.cpp" +#line 7522 "prebuilt\\asmparse.cpp" break; - case 585: /* nativeType: UINT8_ */ -#line 1555 "asmparse.y" + case 601: /* nativeType: UINT8_ */ +#line 1565 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_U1); } -#line 7079 "asmparse.cpp" +#line 7528 "prebuilt\\asmparse.cpp" break; - case 586: /* nativeType: UINT16_ */ -#line 1556 "asmparse.y" + case 602: /* nativeType: UINT16_ */ +#line 1566 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_U2); } -#line 7085 "asmparse.cpp" +#line 7534 "prebuilt\\asmparse.cpp" break; - case 587: /* nativeType: UINT32_ */ -#line 1557 "asmparse.y" + case 603: /* nativeType: UINT32_ */ +#line 1567 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_U4); } -#line 7091 "asmparse.cpp" +#line 7540 "prebuilt\\asmparse.cpp" break; - case 588: /* nativeType: UINT64_ */ -#line 1558 "asmparse.y" + case 604: /* nativeType: UINT64_ */ +#line 1568 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_U8); } -#line 7097 "asmparse.cpp" +#line 7546 "prebuilt\\asmparse.cpp" break; - case 589: /* nativeType: nativeType '*' */ -#line 1559 "asmparse.y" + case 605: /* nativeType: nativeType '*' */ +#line 1569 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt8(NATIVE_TYPE_PTR); PASM->report->warn("Deprecated native type '*'\n"); } -#line 7104 "asmparse.cpp" +#line 7553 "prebuilt\\asmparse.cpp" break; - case 590: /* nativeType: nativeType '[' ']' */ -#line 1561 "asmparse.y" + case 606: /* nativeType: nativeType '[' ']' */ +#line 1571 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); if((yyval.binstr)->length()==0) (yyval.binstr)->appendInt8(NATIVE_TYPE_MAX); (yyval.binstr)->insertInt8(NATIVE_TYPE_ARRAY); } -#line 7111 "asmparse.cpp" +#line 7560 "prebuilt\\asmparse.cpp" break; - case 591: /* nativeType: nativeType '[' int32 ']' */ -#line 1563 "asmparse.y" + case 607: /* nativeType: nativeType '[' int32 ']' */ +#line 1573 "asmparse.y" { (yyval.binstr) = (yyvsp[-3].binstr); if((yyval.binstr)->length()==0) (yyval.binstr)->appendInt8(NATIVE_TYPE_MAX); (yyval.binstr)->insertInt8(NATIVE_TYPE_ARRAY); corEmitInt((yyval.binstr),0); corEmitInt((yyval.binstr),(yyvsp[-1].int32)); corEmitInt((yyval.binstr),0); } -#line 7121 "asmparse.cpp" +#line 7570 "prebuilt\\asmparse.cpp" break; - case 592: /* nativeType: nativeType '[' int32 '+' int32 ']' */ -#line 1568 "asmparse.y" + case 608: /* nativeType: nativeType '[' int32 '+' int32 ']' */ +#line 1578 "asmparse.y" { (yyval.binstr) = (yyvsp[-5].binstr); if((yyval.binstr)->length()==0) (yyval.binstr)->appendInt8(NATIVE_TYPE_MAX); (yyval.binstr)->insertInt8(NATIVE_TYPE_ARRAY); corEmitInt((yyval.binstr),(yyvsp[-1].int32)); corEmitInt((yyval.binstr),(yyvsp[-3].int32)); corEmitInt((yyval.binstr),ntaSizeParamIndexSpecified); } -#line 7131 "asmparse.cpp" +#line 7580 "prebuilt\\asmparse.cpp" break; - case 593: /* nativeType: nativeType '[' '+' int32 ']' */ -#line 1573 "asmparse.y" + case 609: /* nativeType: nativeType '[' '+' int32 ']' */ +#line 1583 "asmparse.y" { (yyval.binstr) = (yyvsp[-4].binstr); if((yyval.binstr)->length()==0) (yyval.binstr)->appendInt8(NATIVE_TYPE_MAX); (yyval.binstr)->insertInt8(NATIVE_TYPE_ARRAY); corEmitInt((yyval.binstr),(yyvsp[-1].int32)); } -#line 7139 "asmparse.cpp" +#line 7588 "prebuilt\\asmparse.cpp" break; - case 594: /* nativeType: DECIMAL_ */ -#line 1576 "asmparse.y" + case 610: /* nativeType: DECIMAL_ */ +#line 1586 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_DECIMAL); PASM->report->warn("Deprecated native type 'decimal'\n"); } -#line 7146 "asmparse.cpp" +#line 7595 "prebuilt\\asmparse.cpp" break; - case 595: /* nativeType: DATE_ */ -#line 1578 "asmparse.y" + case 611: /* nativeType: DATE_ */ +#line 1588 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_DATE); PASM->report->warn("Deprecated native type 'date'\n"); } -#line 7153 "asmparse.cpp" +#line 7602 "prebuilt\\asmparse.cpp" break; - case 596: /* nativeType: BSTR_ */ -#line 1580 "asmparse.y" + case 612: /* nativeType: BSTR_ */ +#line 1590 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_BSTR); } -#line 7159 "asmparse.cpp" +#line 7608 "prebuilt\\asmparse.cpp" break; - case 597: /* nativeType: LPSTR_ */ -#line 1581 "asmparse.y" + case 613: /* nativeType: LPSTR_ */ +#line 1591 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_LPSTR); } -#line 7165 "asmparse.cpp" +#line 7614 "prebuilt\\asmparse.cpp" break; - case 598: /* nativeType: LPWSTR_ */ -#line 1582 "asmparse.y" + case 614: /* nativeType: LPWSTR_ */ +#line 1592 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_LPWSTR); } -#line 7171 "asmparse.cpp" +#line 7620 "prebuilt\\asmparse.cpp" break; - case 599: /* nativeType: LPTSTR_ */ -#line 1583 "asmparse.y" + case 615: /* nativeType: LPTSTR_ */ +#line 1593 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_LPTSTR); } -#line 7177 "asmparse.cpp" +#line 7626 "prebuilt\\asmparse.cpp" break; - case 600: /* nativeType: OBJECTREF_ */ -#line 1584 "asmparse.y" + case 616: /* nativeType: OBJECTREF_ */ +#line 1594 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_OBJECTREF); PASM->report->warn("Deprecated native type 'objectref'\n"); } -#line 7184 "asmparse.cpp" +#line 7633 "prebuilt\\asmparse.cpp" break; - case 601: /* nativeType: IUNKNOWN_ iidParamIndex */ -#line 1586 "asmparse.y" + case 617: /* nativeType: IUNKNOWN_ iidParamIndex */ +#line 1596 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_IUNKNOWN); if((yyvsp[0].int32) != -1) corEmitInt((yyval.binstr),(yyvsp[0].int32)); } -#line 7191 "asmparse.cpp" +#line 7640 "prebuilt\\asmparse.cpp" break; - case 602: /* nativeType: IDISPATCH_ iidParamIndex */ -#line 1588 "asmparse.y" + case 618: /* nativeType: IDISPATCH_ iidParamIndex */ +#line 1598 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_IDISPATCH); if((yyvsp[0].int32) != -1) corEmitInt((yyval.binstr),(yyvsp[0].int32)); } -#line 7198 "asmparse.cpp" +#line 7647 "prebuilt\\asmparse.cpp" break; - case 603: /* nativeType: STRUCT_ */ -#line 1590 "asmparse.y" + case 619: /* nativeType: STRUCT_ */ +#line 1600 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_STRUCT); } -#line 7204 "asmparse.cpp" +#line 7653 "prebuilt\\asmparse.cpp" break; - case 604: /* nativeType: INTERFACE_ iidParamIndex */ -#line 1591 "asmparse.y" + case 620: /* nativeType: INTERFACE_ iidParamIndex */ +#line 1601 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_INTF); if((yyvsp[0].int32) != -1) corEmitInt((yyval.binstr),(yyvsp[0].int32)); } -#line 7211 "asmparse.cpp" +#line 7660 "prebuilt\\asmparse.cpp" break; - case 605: /* nativeType: SAFEARRAY_ variantType */ -#line 1593 "asmparse.y" + case 621: /* nativeType: SAFEARRAY_ variantType */ +#line 1603 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_SAFEARRAY); corEmitInt((yyval.binstr),(yyvsp[0].int32)); corEmitInt((yyval.binstr),0);} -#line 7219 "asmparse.cpp" +#line 7668 "prebuilt\\asmparse.cpp" break; - case 606: /* nativeType: SAFEARRAY_ variantType ',' compQstring */ -#line 1596 "asmparse.y" + case 622: /* nativeType: SAFEARRAY_ variantType ',' compQstring */ +#line 1606 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_SAFEARRAY); corEmitInt((yyval.binstr),(yyvsp[-2].int32)); corEmitInt((yyval.binstr),(yyvsp[0].binstr)->length()); (yyval.binstr)->append((yyvsp[0].binstr)); } -#line 7227 "asmparse.cpp" +#line 7676 "prebuilt\\asmparse.cpp" break; - case 607: /* nativeType: INT_ */ -#line 1600 "asmparse.y" + case 623: /* nativeType: INT_ */ +#line 1610 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_INT); } -#line 7233 "asmparse.cpp" +#line 7682 "prebuilt\\asmparse.cpp" break; - case 608: /* nativeType: UNSIGNED_ INT_ */ -#line 1601 "asmparse.y" + case 624: /* nativeType: UNSIGNED_ INT_ */ +#line 1611 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_UINT); } -#line 7239 "asmparse.cpp" +#line 7688 "prebuilt\\asmparse.cpp" break; - case 609: /* nativeType: UINT_ */ -#line 1602 "asmparse.y" + case 625: /* nativeType: UINT_ */ +#line 1612 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_UINT); } -#line 7245 "asmparse.cpp" +#line 7694 "prebuilt\\asmparse.cpp" break; - case 610: /* nativeType: NESTED_ STRUCT_ */ -#line 1603 "asmparse.y" + case 626: /* nativeType: NESTED_ STRUCT_ */ +#line 1613 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_NESTEDSTRUCT); PASM->report->warn("Deprecated native type 'nested struct'\n"); } -#line 7252 "asmparse.cpp" +#line 7701 "prebuilt\\asmparse.cpp" break; - case 611: /* nativeType: BYVALSTR_ */ -#line 1605 "asmparse.y" + case 627: /* nativeType: BYVALSTR_ */ +#line 1615 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_BYVALSTR); } -#line 7258 "asmparse.cpp" +#line 7707 "prebuilt\\asmparse.cpp" break; - case 612: /* nativeType: ANSI_ BSTR_ */ -#line 1606 "asmparse.y" + case 628: /* nativeType: ANSI_ BSTR_ */ +#line 1616 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_ANSIBSTR); } -#line 7264 "asmparse.cpp" +#line 7713 "prebuilt\\asmparse.cpp" break; - case 613: /* nativeType: TBSTR_ */ -#line 1607 "asmparse.y" + case 629: /* nativeType: TBSTR_ */ +#line 1617 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_TBSTR); } -#line 7270 "asmparse.cpp" +#line 7719 "prebuilt\\asmparse.cpp" break; - case 614: /* nativeType: VARIANT_ BOOL_ */ -#line 1608 "asmparse.y" + case 630: /* nativeType: VARIANT_ BOOL_ */ +#line 1618 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_VARIANTBOOL); } -#line 7276 "asmparse.cpp" +#line 7725 "prebuilt\\asmparse.cpp" break; - case 615: /* nativeType: METHOD_ */ -#line 1609 "asmparse.y" + case 631: /* nativeType: METHOD_ */ +#line 1619 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_FUNC); } -#line 7282 "asmparse.cpp" +#line 7731 "prebuilt\\asmparse.cpp" break; - case 616: /* nativeType: AS_ ANY_ */ -#line 1610 "asmparse.y" + case 632: /* nativeType: AS_ ANY_ */ +#line 1620 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_ASANY); } -#line 7288 "asmparse.cpp" +#line 7737 "prebuilt\\asmparse.cpp" break; - case 617: /* nativeType: LPSTRUCT_ */ -#line 1611 "asmparse.y" + case 633: /* nativeType: LPSTRUCT_ */ +#line 1621 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(NATIVE_TYPE_LPSTRUCT); } -#line 7294 "asmparse.cpp" +#line 7743 "prebuilt\\asmparse.cpp" break; - case 618: /* nativeType: TYPEDEF_TS */ -#line 1612 "asmparse.y" + case 634: /* nativeType: TYPEDEF_TS */ +#line 1622 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->append((yyvsp[0].tdd)->m_pbsTypeSpec); } -#line 7300 "asmparse.cpp" +#line 7749 "prebuilt\\asmparse.cpp" break; - case 619: /* iidParamIndex: %empty */ -#line 1615 "asmparse.y" + case 635: /* iidParamIndex: %empty */ +#line 1625 "asmparse.y" { (yyval.int32) = -1; } -#line 7306 "asmparse.cpp" +#line 7755 "prebuilt\\asmparse.cpp" break; - case 620: /* iidParamIndex: '(' IIDPARAM_ '=' int32 ')' */ -#line 1616 "asmparse.y" + case 636: /* iidParamIndex: '(' IIDPARAM_ '=' int32 ')' */ +#line 1626 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32); } -#line 7312 "asmparse.cpp" +#line 7761 "prebuilt\\asmparse.cpp" break; - case 621: /* variantType: %empty */ -#line 1619 "asmparse.y" + case 637: /* variantType: %empty */ +#line 1629 "asmparse.y" { (yyval.int32) = VT_EMPTY; } -#line 7318 "asmparse.cpp" +#line 7767 "prebuilt\\asmparse.cpp" break; - case 622: /* variantType: NULL_ */ -#line 1620 "asmparse.y" + case 638: /* variantType: NULL_ */ +#line 1630 "asmparse.y" { (yyval.int32) = VT_NULL; } -#line 7324 "asmparse.cpp" +#line 7773 "prebuilt\\asmparse.cpp" break; - case 623: /* variantType: VARIANT_ */ -#line 1621 "asmparse.y" + case 639: /* variantType: VARIANT_ */ +#line 1631 "asmparse.y" { (yyval.int32) = VT_VARIANT; } -#line 7330 "asmparse.cpp" +#line 7779 "prebuilt\\asmparse.cpp" break; - case 624: /* variantType: CURRENCY_ */ -#line 1622 "asmparse.y" + case 640: /* variantType: CURRENCY_ */ +#line 1632 "asmparse.y" { (yyval.int32) = VT_CY; } -#line 7336 "asmparse.cpp" +#line 7785 "prebuilt\\asmparse.cpp" break; - case 625: /* variantType: VOID_ */ -#line 1623 "asmparse.y" + case 641: /* variantType: VOID_ */ +#line 1633 "asmparse.y" { (yyval.int32) = VT_VOID; } -#line 7342 "asmparse.cpp" +#line 7791 "prebuilt\\asmparse.cpp" break; - case 626: /* variantType: BOOL_ */ -#line 1624 "asmparse.y" + case 642: /* variantType: BOOL_ */ +#line 1634 "asmparse.y" { (yyval.int32) = VT_BOOL; } -#line 7348 "asmparse.cpp" +#line 7797 "prebuilt\\asmparse.cpp" break; - case 627: /* variantType: INT8_ */ -#line 1625 "asmparse.y" + case 643: /* variantType: INT8_ */ +#line 1635 "asmparse.y" { (yyval.int32) = VT_I1; } -#line 7354 "asmparse.cpp" +#line 7803 "prebuilt\\asmparse.cpp" break; - case 628: /* variantType: INT16_ */ -#line 1626 "asmparse.y" + case 644: /* variantType: INT16_ */ +#line 1636 "asmparse.y" { (yyval.int32) = VT_I2; } -#line 7360 "asmparse.cpp" +#line 7809 "prebuilt\\asmparse.cpp" break; - case 629: /* variantType: INT32_ */ -#line 1627 "asmparse.y" + case 645: /* variantType: INT32_ */ +#line 1637 "asmparse.y" { (yyval.int32) = VT_I4; } -#line 7366 "asmparse.cpp" +#line 7815 "prebuilt\\asmparse.cpp" break; - case 630: /* variantType: INT64_ */ -#line 1628 "asmparse.y" + case 646: /* variantType: INT64_ */ +#line 1638 "asmparse.y" { (yyval.int32) = VT_I8; } -#line 7372 "asmparse.cpp" +#line 7821 "prebuilt\\asmparse.cpp" break; - case 631: /* variantType: FLOAT32_ */ -#line 1629 "asmparse.y" + case 647: /* variantType: FLOAT32_ */ +#line 1639 "asmparse.y" { (yyval.int32) = VT_R4; } -#line 7378 "asmparse.cpp" +#line 7827 "prebuilt\\asmparse.cpp" break; - case 632: /* variantType: FLOAT64_ */ -#line 1630 "asmparse.y" + case 648: /* variantType: FLOAT64_ */ +#line 1640 "asmparse.y" { (yyval.int32) = VT_R8; } -#line 7384 "asmparse.cpp" +#line 7833 "prebuilt\\asmparse.cpp" break; - case 633: /* variantType: UNSIGNED_ INT8_ */ -#line 1631 "asmparse.y" + case 649: /* variantType: UNSIGNED_ INT8_ */ +#line 1641 "asmparse.y" { (yyval.int32) = VT_UI1; } -#line 7390 "asmparse.cpp" +#line 7839 "prebuilt\\asmparse.cpp" break; - case 634: /* variantType: UNSIGNED_ INT16_ */ -#line 1632 "asmparse.y" + case 650: /* variantType: UNSIGNED_ INT16_ */ +#line 1642 "asmparse.y" { (yyval.int32) = VT_UI2; } -#line 7396 "asmparse.cpp" +#line 7845 "prebuilt\\asmparse.cpp" break; - case 635: /* variantType: UNSIGNED_ INT32_ */ -#line 1633 "asmparse.y" + case 651: /* variantType: UNSIGNED_ INT32_ */ +#line 1643 "asmparse.y" { (yyval.int32) = VT_UI4; } -#line 7402 "asmparse.cpp" +#line 7851 "prebuilt\\asmparse.cpp" break; - case 636: /* variantType: UNSIGNED_ INT64_ */ -#line 1634 "asmparse.y" + case 652: /* variantType: UNSIGNED_ INT64_ */ +#line 1644 "asmparse.y" { (yyval.int32) = VT_UI8; } -#line 7408 "asmparse.cpp" +#line 7857 "prebuilt\\asmparse.cpp" break; - case 637: /* variantType: UINT8_ */ -#line 1635 "asmparse.y" + case 653: /* variantType: UINT8_ */ +#line 1645 "asmparse.y" { (yyval.int32) = VT_UI1; } -#line 7414 "asmparse.cpp" +#line 7863 "prebuilt\\asmparse.cpp" break; - case 638: /* variantType: UINT16_ */ -#line 1636 "asmparse.y" + case 654: /* variantType: UINT16_ */ +#line 1646 "asmparse.y" { (yyval.int32) = VT_UI2; } -#line 7420 "asmparse.cpp" +#line 7869 "prebuilt\\asmparse.cpp" break; - case 639: /* variantType: UINT32_ */ -#line 1637 "asmparse.y" + case 655: /* variantType: UINT32_ */ +#line 1647 "asmparse.y" { (yyval.int32) = VT_UI4; } -#line 7426 "asmparse.cpp" +#line 7875 "prebuilt\\asmparse.cpp" break; - case 640: /* variantType: UINT64_ */ -#line 1638 "asmparse.y" + case 656: /* variantType: UINT64_ */ +#line 1648 "asmparse.y" { (yyval.int32) = VT_UI8; } -#line 7432 "asmparse.cpp" +#line 7881 "prebuilt\\asmparse.cpp" break; - case 641: /* variantType: '*' */ -#line 1639 "asmparse.y" + case 657: /* variantType: '*' */ +#line 1649 "asmparse.y" { (yyval.int32) = VT_PTR; } -#line 7438 "asmparse.cpp" +#line 7887 "prebuilt\\asmparse.cpp" break; - case 642: /* variantType: variantType '[' ']' */ -#line 1640 "asmparse.y" + case 658: /* variantType: variantType '[' ']' */ +#line 1650 "asmparse.y" { (yyval.int32) = (yyvsp[-2].int32) | VT_ARRAY; } -#line 7444 "asmparse.cpp" +#line 7893 "prebuilt\\asmparse.cpp" break; - case 643: /* variantType: variantType VECTOR_ */ -#line 1641 "asmparse.y" + case 659: /* variantType: variantType VECTOR_ */ +#line 1651 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) | VT_VECTOR; } -#line 7450 "asmparse.cpp" +#line 7899 "prebuilt\\asmparse.cpp" break; - case 644: /* variantType: variantType '&' */ -#line 1642 "asmparse.y" + case 660: /* variantType: variantType '&' */ +#line 1652 "asmparse.y" { (yyval.int32) = (yyvsp[-1].int32) | VT_BYREF; } -#line 7456 "asmparse.cpp" +#line 7905 "prebuilt\\asmparse.cpp" break; - case 645: /* variantType: DECIMAL_ */ -#line 1643 "asmparse.y" + case 661: /* variantType: DECIMAL_ */ +#line 1653 "asmparse.y" { (yyval.int32) = VT_DECIMAL; } -#line 7462 "asmparse.cpp" +#line 7911 "prebuilt\\asmparse.cpp" break; - case 646: /* variantType: DATE_ */ -#line 1644 "asmparse.y" + case 662: /* variantType: DATE_ */ +#line 1654 "asmparse.y" { (yyval.int32) = VT_DATE; } -#line 7468 "asmparse.cpp" +#line 7917 "prebuilt\\asmparse.cpp" break; - case 647: /* variantType: BSTR_ */ -#line 1645 "asmparse.y" + case 663: /* variantType: BSTR_ */ +#line 1655 "asmparse.y" { (yyval.int32) = VT_BSTR; } -#line 7474 "asmparse.cpp" +#line 7923 "prebuilt\\asmparse.cpp" break; - case 648: /* variantType: LPSTR_ */ -#line 1646 "asmparse.y" + case 664: /* variantType: LPSTR_ */ +#line 1656 "asmparse.y" { (yyval.int32) = VT_LPSTR; } -#line 7480 "asmparse.cpp" +#line 7929 "prebuilt\\asmparse.cpp" break; - case 649: /* variantType: LPWSTR_ */ -#line 1647 "asmparse.y" + case 665: /* variantType: LPWSTR_ */ +#line 1657 "asmparse.y" { (yyval.int32) = VT_LPWSTR; } -#line 7486 "asmparse.cpp" +#line 7935 "prebuilt\\asmparse.cpp" break; - case 650: /* variantType: IUNKNOWN_ */ -#line 1648 "asmparse.y" + case 666: /* variantType: IUNKNOWN_ */ +#line 1658 "asmparse.y" { (yyval.int32) = VT_UNKNOWN; } -#line 7492 "asmparse.cpp" +#line 7941 "prebuilt\\asmparse.cpp" break; - case 651: /* variantType: IDISPATCH_ */ -#line 1649 "asmparse.y" + case 667: /* variantType: IDISPATCH_ */ +#line 1659 "asmparse.y" { (yyval.int32) = VT_DISPATCH; } -#line 7498 "asmparse.cpp" +#line 7947 "prebuilt\\asmparse.cpp" break; - case 652: /* variantType: SAFEARRAY_ */ -#line 1650 "asmparse.y" + case 668: /* variantType: SAFEARRAY_ */ +#line 1660 "asmparse.y" { (yyval.int32) = VT_SAFEARRAY; } -#line 7504 "asmparse.cpp" +#line 7953 "prebuilt\\asmparse.cpp" break; - case 653: /* variantType: INT_ */ -#line 1651 "asmparse.y" + case 669: /* variantType: INT_ */ +#line 1661 "asmparse.y" { (yyval.int32) = VT_INT; } -#line 7510 "asmparse.cpp" +#line 7959 "prebuilt\\asmparse.cpp" break; - case 654: /* variantType: UNSIGNED_ INT_ */ -#line 1652 "asmparse.y" + case 670: /* variantType: UNSIGNED_ INT_ */ +#line 1662 "asmparse.y" { (yyval.int32) = VT_UINT; } -#line 7516 "asmparse.cpp" +#line 7965 "prebuilt\\asmparse.cpp" break; - case 655: /* variantType: UINT_ */ -#line 1653 "asmparse.y" + case 671: /* variantType: UINT_ */ +#line 1663 "asmparse.y" { (yyval.int32) = VT_UINT; } -#line 7522 "asmparse.cpp" +#line 7971 "prebuilt\\asmparse.cpp" break; - case 656: /* variantType: ERROR_ */ -#line 1654 "asmparse.y" + case 672: /* variantType: ERROR_ */ +#line 1664 "asmparse.y" { (yyval.int32) = VT_ERROR; } -#line 7528 "asmparse.cpp" +#line 7977 "prebuilt\\asmparse.cpp" break; - case 657: /* variantType: HRESULT_ */ -#line 1655 "asmparse.y" + case 673: /* variantType: HRESULT_ */ +#line 1665 "asmparse.y" { (yyval.int32) = VT_HRESULT; } -#line 7534 "asmparse.cpp" +#line 7983 "prebuilt\\asmparse.cpp" break; - case 658: /* variantType: CARRAY_ */ -#line 1656 "asmparse.y" + case 674: /* variantType: CARRAY_ */ +#line 1666 "asmparse.y" { (yyval.int32) = VT_CARRAY; } -#line 7540 "asmparse.cpp" +#line 7989 "prebuilt\\asmparse.cpp" break; - case 659: /* variantType: USERDEFINED_ */ -#line 1657 "asmparse.y" + case 675: /* variantType: USERDEFINED_ */ +#line 1667 "asmparse.y" { (yyval.int32) = VT_USERDEFINED; } -#line 7546 "asmparse.cpp" +#line 7995 "prebuilt\\asmparse.cpp" break; - case 660: /* variantType: RECORD_ */ -#line 1658 "asmparse.y" + case 676: /* variantType: RECORD_ */ +#line 1668 "asmparse.y" { (yyval.int32) = VT_RECORD; } -#line 7552 "asmparse.cpp" +#line 8001 "prebuilt\\asmparse.cpp" break; - case 661: /* variantType: FILETIME_ */ -#line 1659 "asmparse.y" + case 677: /* variantType: FILETIME_ */ +#line 1669 "asmparse.y" { (yyval.int32) = VT_FILETIME; } -#line 7558 "asmparse.cpp" +#line 8007 "prebuilt\\asmparse.cpp" break; - case 662: /* variantType: BLOB_ */ -#line 1660 "asmparse.y" + case 678: /* variantType: BLOB_ */ +#line 1670 "asmparse.y" { (yyval.int32) = VT_BLOB; } -#line 7564 "asmparse.cpp" +#line 8013 "prebuilt\\asmparse.cpp" break; - case 663: /* variantType: STREAM_ */ -#line 1661 "asmparse.y" + case 679: /* variantType: STREAM_ */ +#line 1671 "asmparse.y" { (yyval.int32) = VT_STREAM; } -#line 7570 "asmparse.cpp" +#line 8019 "prebuilt\\asmparse.cpp" break; - case 664: /* variantType: STORAGE_ */ -#line 1662 "asmparse.y" + case 680: /* variantType: STORAGE_ */ +#line 1672 "asmparse.y" { (yyval.int32) = VT_STORAGE; } -#line 7576 "asmparse.cpp" +#line 8025 "prebuilt\\asmparse.cpp" break; - case 665: /* variantType: STREAMED_OBJECT_ */ -#line 1663 "asmparse.y" + case 681: /* variantType: STREAMED_OBJECT_ */ +#line 1673 "asmparse.y" { (yyval.int32) = VT_STREAMED_OBJECT; } -#line 7582 "asmparse.cpp" +#line 8031 "prebuilt\\asmparse.cpp" break; - case 666: /* variantType: STORED_OBJECT_ */ -#line 1664 "asmparse.y" + case 682: /* variantType: STORED_OBJECT_ */ +#line 1674 "asmparse.y" { (yyval.int32) = VT_STORED_OBJECT; } -#line 7588 "asmparse.cpp" +#line 8037 "prebuilt\\asmparse.cpp" break; - case 667: /* variantType: BLOB_OBJECT_ */ -#line 1665 "asmparse.y" + case 683: /* variantType: BLOB_OBJECT_ */ +#line 1675 "asmparse.y" { (yyval.int32) = VT_BLOB_OBJECT; } -#line 7594 "asmparse.cpp" +#line 8043 "prebuilt\\asmparse.cpp" break; - case 668: /* variantType: CF_ */ -#line 1666 "asmparse.y" + case 684: /* variantType: CF_ */ +#line 1676 "asmparse.y" { (yyval.int32) = VT_CF; } -#line 7600 "asmparse.cpp" +#line 8049 "prebuilt\\asmparse.cpp" break; - case 669: /* variantType: CLSID_ */ -#line 1667 "asmparse.y" + case 685: /* variantType: CLSID_ */ +#line 1677 "asmparse.y" { (yyval.int32) = VT_CLSID; } -#line 7606 "asmparse.cpp" +#line 8055 "prebuilt\\asmparse.cpp" break; - case 670: /* type: CLASS_ className */ -#line 1671 "asmparse.y" + case 686: /* type: CLASS_ className */ +#line 1681 "asmparse.y" { if((yyvsp[0].token) == PASM->m_tkSysString) { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_STRING); } else if((yyvsp[0].token) == PASM->m_tkSysObject) { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_OBJECT); } else (yyval.binstr) = parser->MakeTypeClass(ELEMENT_TYPE_CLASS, (yyvsp[0].token)); } -#line 7617 "asmparse.cpp" +#line 8066 "prebuilt\\asmparse.cpp" break; - case 671: /* type: OBJECT_ */ -#line 1677 "asmparse.y" + case 687: /* type: OBJECT_ */ +#line 1687 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_OBJECT); } -#line 7623 "asmparse.cpp" +#line 8072 "prebuilt\\asmparse.cpp" break; - case 672: /* type: VALUE_ CLASS_ className */ -#line 1678 "asmparse.y" + case 688: /* type: VALUE_ CLASS_ className */ +#line 1688 "asmparse.y" { (yyval.binstr) = parser->MakeTypeClass(ELEMENT_TYPE_VALUETYPE, (yyvsp[0].token)); } -#line 7629 "asmparse.cpp" +#line 8078 "prebuilt\\asmparse.cpp" break; - case 673: /* type: VALUETYPE_ className */ -#line 1679 "asmparse.y" + case 689: /* type: VALUETYPE_ className */ +#line 1689 "asmparse.y" { (yyval.binstr) = parser->MakeTypeClass(ELEMENT_TYPE_VALUETYPE, (yyvsp[0].token)); } -#line 7635 "asmparse.cpp" +#line 8084 "prebuilt\\asmparse.cpp" break; - case 674: /* type: type '[' ']' */ -#line 1680 "asmparse.y" + case 690: /* type: type '[' ']' */ +#line 1690 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SZARRAY); } -#line 7641 "asmparse.cpp" +#line 8090 "prebuilt\\asmparse.cpp" break; - case 675: /* type: type '[' bounds1 ']' */ -#line 1681 "asmparse.y" + case 691: /* type: type '[' bounds1 ']' */ +#line 1691 "asmparse.y" { (yyval.binstr) = parser->MakeTypeArray(ELEMENT_TYPE_ARRAY, (yyvsp[-3].binstr), (yyvsp[-1].binstr)); } -#line 7647 "asmparse.cpp" +#line 8096 "prebuilt\\asmparse.cpp" break; - case 676: /* type: type '&' */ -#line 1682 "asmparse.y" + case 692: /* type: type '&' */ +#line 1692 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt8(ELEMENT_TYPE_BYREF); } -#line 7653 "asmparse.cpp" +#line 8102 "prebuilt\\asmparse.cpp" break; - case 677: /* type: type '*' */ -#line 1683 "asmparse.y" + case 693: /* type: type '*' */ +#line 1693 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt8(ELEMENT_TYPE_PTR); } -#line 7659 "asmparse.cpp" +#line 8108 "prebuilt\\asmparse.cpp" break; - case 678: /* type: type PINNED_ */ -#line 1684 "asmparse.y" + case 694: /* type: type PINNED_ */ +#line 1694 "asmparse.y" { (yyval.binstr) = (yyvsp[-1].binstr); (yyval.binstr)->insertInt8(ELEMENT_TYPE_PINNED); } -#line 7665 "asmparse.cpp" +#line 8114 "prebuilt\\asmparse.cpp" break; - case 679: /* type: type MODREQ_ '(' typeSpec ')' */ -#line 1685 "asmparse.y" + case 695: /* type: type MODREQ_ '(' typeSpec ')' */ +#line 1695 "asmparse.y" { (yyval.binstr) = parser->MakeTypeClass(ELEMENT_TYPE_CMOD_REQD, (yyvsp[-1].token)); (yyval.binstr)->append((yyvsp[-4].binstr)); } -#line 7672 "asmparse.cpp" +#line 8121 "prebuilt\\asmparse.cpp" break; - case 680: /* type: type MODOPT_ '(' typeSpec ')' */ -#line 1687 "asmparse.y" + case 696: /* type: type MODOPT_ '(' typeSpec ')' */ +#line 1697 "asmparse.y" { (yyval.binstr) = parser->MakeTypeClass(ELEMENT_TYPE_CMOD_OPT, (yyvsp[-1].token)); (yyval.binstr)->append((yyvsp[-4].binstr)); } -#line 7679 "asmparse.cpp" +#line 8128 "prebuilt\\asmparse.cpp" break; - case 681: /* type: methodSpec callConv type '*' '(' sigArgs0 ')' */ -#line 1690 "asmparse.y" + case 697: /* type: methodSpec callConv type '*' '(' sigArgs0 ')' */ +#line 1700 "asmparse.y" { (yyval.binstr) = parser->MakeSig((yyvsp[-5].int32), (yyvsp[-4].binstr), (yyvsp[-1].binstr)); (yyval.binstr)->insertInt8(ELEMENT_TYPE_FNPTR); PASM->delArgNameList(PASM->m_firstArgName); PASM->m_firstArgName = parser->m_ANSFirst.POP(); PASM->m_lastArgName = parser->m_ANSLast.POP(); } -#line 7690 "asmparse.cpp" +#line 8139 "prebuilt\\asmparse.cpp" break; - case 682: /* type: type '<' tyArgs1 '>' */ -#line 1696 "asmparse.y" + case 698: /* type: type '<' tyArgs1 '>' */ +#line 1706 "asmparse.y" { if((yyvsp[-1].binstr) == NULL) (yyval.binstr) = (yyvsp[-3].binstr); else { (yyval.binstr) = new BinStr(); @@ -7698,33 +8147,33 @@ yyparse (void) (yyval.binstr)->append((yyvsp[-3].binstr)); corEmitInt((yyval.binstr), corCountArgs((yyvsp[-1].binstr))); (yyval.binstr)->append((yyvsp[-1].binstr)); delete (yyvsp[-3].binstr); delete (yyvsp[-1].binstr); }} -#line 7702 "asmparse.cpp" +#line 8151 "prebuilt\\asmparse.cpp" break; - case 683: /* type: '!' '!' int32 */ -#line 1703 "asmparse.y" + case 699: /* type: '!' '!' int32 */ +#line 1713 "asmparse.y" { //if(PASM->m_pCurMethod) { // if(($3 < 0)||((DWORD)$3 >= PASM->m_pCurMethod->m_NumTyPars)) // PASM->report->error("Invalid method type parameter '%d'\n",$3); (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_MVAR); corEmitInt((yyval.binstr), (yyvsp[0].int32)); //} else PASM->report->error("Method type parameter '%d' outside method scope\n",$3); } -#line 7713 "asmparse.cpp" +#line 8162 "prebuilt\\asmparse.cpp" break; - case 684: /* type: '!' int32 */ -#line 1709 "asmparse.y" + case 700: /* type: '!' int32 */ +#line 1719 "asmparse.y" { //if(PASM->m_pCurClass) { // if(($2 < 0)||((DWORD)$2 >= PASM->m_pCurClass->m_NumTyPars)) // PASM->report->error("Invalid type parameter '%d'\n",$2); (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_VAR); corEmitInt((yyval.binstr), (yyvsp[0].int32)); //} else PASM->report->error("Type parameter '%d' outside class scope\n",$2); } -#line 7724 "asmparse.cpp" +#line 8173 "prebuilt\\asmparse.cpp" break; - case 685: /* type: '!' '!' dottedName */ -#line 1715 "asmparse.y" + case 701: /* type: '!' '!' dottedName */ +#line 1725 "asmparse.y" { int eltype = ELEMENT_TYPE_MVAR; int n=-1; if(PASM->m_pCurMethod) n = PASM->m_pCurMethod->FindTyPar((yyvsp[0].string)); @@ -7740,11 +8189,11 @@ yyparse (void) n = 0x1FFFFFFF; } (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(eltype); corEmitInt((yyval.binstr),n); } -#line 7744 "asmparse.cpp" +#line 8193 "prebuilt\\asmparse.cpp" break; - case 686: /* type: '!' dottedName */ -#line 1730 "asmparse.y" + case 702: /* type: '!' dottedName */ +#line 1740 "asmparse.y" { int eltype = ELEMENT_TYPE_VAR; int n=-1; if(PASM->m_pCurClass && !newclass) n = PASM->m_pCurClass->FindTyPar((yyvsp[0].string)); @@ -7760,926 +8209,926 @@ yyparse (void) n = 0x1FFFFFFF; } (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(eltype); corEmitInt((yyval.binstr),n); } -#line 7764 "asmparse.cpp" +#line 8213 "prebuilt\\asmparse.cpp" break; - case 687: /* type: TYPEDREF_ */ -#line 1745 "asmparse.y" + case 703: /* type: TYPEDREF_ */ +#line 1755 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_TYPEDBYREF); } -#line 7770 "asmparse.cpp" +#line 8219 "prebuilt\\asmparse.cpp" break; - case 688: /* type: VOID_ */ -#line 1746 "asmparse.y" + case 704: /* type: VOID_ */ +#line 1756 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_VOID); } -#line 7776 "asmparse.cpp" +#line 8225 "prebuilt\\asmparse.cpp" break; - case 689: /* type: NATIVE_ INT_ */ -#line 1747 "asmparse.y" + case 705: /* type: NATIVE_ INT_ */ +#line 1757 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I); } -#line 7782 "asmparse.cpp" +#line 8231 "prebuilt\\asmparse.cpp" break; - case 690: /* type: NATIVE_ UNSIGNED_ INT_ */ -#line 1748 "asmparse.y" + case 706: /* type: NATIVE_ UNSIGNED_ INT_ */ +#line 1758 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U); } -#line 7788 "asmparse.cpp" +#line 8237 "prebuilt\\asmparse.cpp" break; - case 691: /* type: NATIVE_ UINT_ */ -#line 1749 "asmparse.y" + case 707: /* type: NATIVE_ UINT_ */ +#line 1759 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U); } -#line 7794 "asmparse.cpp" +#line 8243 "prebuilt\\asmparse.cpp" break; - case 692: /* type: simpleType */ -#line 1750 "asmparse.y" + case 708: /* type: simpleType */ +#line 1760 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 7800 "asmparse.cpp" +#line 8249 "prebuilt\\asmparse.cpp" break; - case 693: /* type: ELLIPSIS type */ -#line 1751 "asmparse.y" + case 709: /* type: ELLIPSIS type */ +#line 1761 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); (yyval.binstr)->insertInt8(ELEMENT_TYPE_SENTINEL); } -#line 7806 "asmparse.cpp" +#line 8255 "prebuilt\\asmparse.cpp" break; - case 694: /* simpleType: CHAR_ */ -#line 1754 "asmparse.y" + case 710: /* simpleType: CHAR_ */ +#line 1764 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_CHAR); } -#line 7812 "asmparse.cpp" +#line 8261 "prebuilt\\asmparse.cpp" break; - case 695: /* simpleType: STRING_ */ -#line 1755 "asmparse.y" + case 711: /* simpleType: STRING_ */ +#line 1765 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_STRING); } -#line 7818 "asmparse.cpp" +#line 8267 "prebuilt\\asmparse.cpp" break; - case 696: /* simpleType: BOOL_ */ -#line 1756 "asmparse.y" + case 712: /* simpleType: BOOL_ */ +#line 1766 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_BOOLEAN); } -#line 7824 "asmparse.cpp" +#line 8273 "prebuilt\\asmparse.cpp" break; - case 697: /* simpleType: INT8_ */ -#line 1757 "asmparse.y" + case 713: /* simpleType: INT8_ */ +#line 1767 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I1); } -#line 7830 "asmparse.cpp" +#line 8279 "prebuilt\\asmparse.cpp" break; - case 698: /* simpleType: INT16_ */ -#line 1758 "asmparse.y" + case 714: /* simpleType: INT16_ */ +#line 1768 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I2); } -#line 7836 "asmparse.cpp" +#line 8285 "prebuilt\\asmparse.cpp" break; - case 699: /* simpleType: INT32_ */ -#line 1759 "asmparse.y" + case 715: /* simpleType: INT32_ */ +#line 1769 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I4); } -#line 7842 "asmparse.cpp" +#line 8291 "prebuilt\\asmparse.cpp" break; - case 700: /* simpleType: INT64_ */ -#line 1760 "asmparse.y" + case 716: /* simpleType: INT64_ */ +#line 1770 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_I8); } -#line 7848 "asmparse.cpp" +#line 8297 "prebuilt\\asmparse.cpp" break; - case 701: /* simpleType: FLOAT32_ */ -#line 1761 "asmparse.y" + case 717: /* simpleType: FLOAT32_ */ +#line 1771 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_R4); } -#line 7854 "asmparse.cpp" +#line 8303 "prebuilt\\asmparse.cpp" break; - case 702: /* simpleType: FLOAT64_ */ -#line 1762 "asmparse.y" + case 718: /* simpleType: FLOAT64_ */ +#line 1772 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_R8); } -#line 7860 "asmparse.cpp" +#line 8309 "prebuilt\\asmparse.cpp" break; - case 703: /* simpleType: UNSIGNED_ INT8_ */ -#line 1763 "asmparse.y" + case 719: /* simpleType: UNSIGNED_ INT8_ */ +#line 1773 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U1); } -#line 7866 "asmparse.cpp" +#line 8315 "prebuilt\\asmparse.cpp" break; - case 704: /* simpleType: UNSIGNED_ INT16_ */ -#line 1764 "asmparse.y" + case 720: /* simpleType: UNSIGNED_ INT16_ */ +#line 1774 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U2); } -#line 7872 "asmparse.cpp" +#line 8321 "prebuilt\\asmparse.cpp" break; - case 705: /* simpleType: UNSIGNED_ INT32_ */ -#line 1765 "asmparse.y" + case 721: /* simpleType: UNSIGNED_ INT32_ */ +#line 1775 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U4); } -#line 7878 "asmparse.cpp" +#line 8327 "prebuilt\\asmparse.cpp" break; - case 706: /* simpleType: UNSIGNED_ INT64_ */ -#line 1766 "asmparse.y" + case 722: /* simpleType: UNSIGNED_ INT64_ */ +#line 1776 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U8); } -#line 7884 "asmparse.cpp" +#line 8333 "prebuilt\\asmparse.cpp" break; - case 707: /* simpleType: UINT8_ */ -#line 1767 "asmparse.y" + case 723: /* simpleType: UINT8_ */ +#line 1777 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U1); } -#line 7890 "asmparse.cpp" +#line 8339 "prebuilt\\asmparse.cpp" break; - case 708: /* simpleType: UINT16_ */ -#line 1768 "asmparse.y" + case 724: /* simpleType: UINT16_ */ +#line 1778 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U2); } -#line 7896 "asmparse.cpp" +#line 8345 "prebuilt\\asmparse.cpp" break; - case 709: /* simpleType: UINT32_ */ -#line 1769 "asmparse.y" + case 725: /* simpleType: UINT32_ */ +#line 1779 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U4); } -#line 7902 "asmparse.cpp" +#line 8351 "prebuilt\\asmparse.cpp" break; - case 710: /* simpleType: UINT64_ */ -#line 1770 "asmparse.y" + case 726: /* simpleType: UINT64_ */ +#line 1780 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(ELEMENT_TYPE_U8); } -#line 7908 "asmparse.cpp" +#line 8357 "prebuilt\\asmparse.cpp" break; - case 711: /* simpleType: TYPEDEF_TS */ -#line 1771 "asmparse.y" + case 727: /* simpleType: TYPEDEF_TS */ +#line 1781 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->append((yyvsp[0].tdd)->m_pbsTypeSpec); } -#line 7914 "asmparse.cpp" +#line 8363 "prebuilt\\asmparse.cpp" break; - case 712: /* bounds1: bound */ -#line 1774 "asmparse.y" + case 728: /* bounds1: bound */ +#line 1784 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); } -#line 7920 "asmparse.cpp" +#line 8369 "prebuilt\\asmparse.cpp" break; - case 713: /* bounds1: bounds1 ',' bound */ -#line 1775 "asmparse.y" + case 729: /* bounds1: bounds1 ',' bound */ +#line 1785 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); (yyvsp[-2].binstr)->append((yyvsp[0].binstr)); delete (yyvsp[0].binstr); } -#line 7926 "asmparse.cpp" +#line 8375 "prebuilt\\asmparse.cpp" break; - case 714: /* bound: %empty */ -#line 1778 "asmparse.y" + case 730: /* bound: %empty */ +#line 1788 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt32(0x7FFFFFFF); (yyval.binstr)->appendInt32(0x7FFFFFFF); } -#line 7932 "asmparse.cpp" +#line 8381 "prebuilt\\asmparse.cpp" break; - case 715: /* bound: ELLIPSIS */ -#line 1779 "asmparse.y" + case 731: /* bound: ELLIPSIS */ +#line 1789 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt32(0x7FFFFFFF); (yyval.binstr)->appendInt32(0x7FFFFFFF); } -#line 7938 "asmparse.cpp" +#line 8387 "prebuilt\\asmparse.cpp" break; - case 716: /* bound: int32 */ -#line 1780 "asmparse.y" + case 732: /* bound: int32 */ +#line 1790 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt32(0); (yyval.binstr)->appendInt32((yyvsp[0].int32)); } -#line 7944 "asmparse.cpp" +#line 8393 "prebuilt\\asmparse.cpp" break; - case 717: /* bound: int32 ELLIPSIS int32 */ -#line 1781 "asmparse.y" + case 733: /* bound: int32 ELLIPSIS int32 */ +#line 1791 "asmparse.y" { FAIL_UNLESS((yyvsp[-2].int32) <= (yyvsp[0].int32), ("lower bound %d must be <= upper bound %d\n", (yyvsp[-2].int32), (yyvsp[0].int32))); if ((yyvsp[-2].int32) > (yyvsp[0].int32)) { YYERROR; }; (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt32((yyvsp[-2].int32)); (yyval.binstr)->appendInt32((yyvsp[0].int32)-(yyvsp[-2].int32)+1); } -#line 7952 "asmparse.cpp" +#line 8401 "prebuilt\\asmparse.cpp" break; - case 718: /* bound: int32 ELLIPSIS */ -#line 1784 "asmparse.y" + case 734: /* bound: int32 ELLIPSIS */ +#line 1794 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); (yyval.binstr)->appendInt32(0x7FFFFFFF); } -#line 7958 "asmparse.cpp" +#line 8407 "prebuilt\\asmparse.cpp" break; - case 719: /* secDecl: _PERMISSION secAction typeSpec '(' nameValPairs ')' */ -#line 1789 "asmparse.y" + case 735: /* secDecl: _PERMISSION secAction typeSpec '(' nameValPairs ')' */ +#line 1799 "asmparse.y" { PASM->AddPermissionDecl((yyvsp[-4].secAct), (yyvsp[-3].token), (yyvsp[-1].pair)); } -#line 7964 "asmparse.cpp" +#line 8413 "prebuilt\\asmparse.cpp" break; - case 720: /* secDecl: _PERMISSION secAction typeSpec '=' '{' customBlobDescr '}' */ -#line 1791 "asmparse.y" + case 736: /* secDecl: _PERMISSION secAction typeSpec '=' '{' customBlobDescr '}' */ +#line 1801 "asmparse.y" { PASM->AddPermissionDecl((yyvsp[-5].secAct), (yyvsp[-4].token), (yyvsp[-1].binstr)); } -#line 7970 "asmparse.cpp" +#line 8419 "prebuilt\\asmparse.cpp" break; - case 721: /* secDecl: _PERMISSION secAction typeSpec */ -#line 1792 "asmparse.y" + case 737: /* secDecl: _PERMISSION secAction typeSpec */ +#line 1802 "asmparse.y" { PASM->AddPermissionDecl((yyvsp[-1].secAct), (yyvsp[0].token), (NVPair *)NULL); } -#line 7976 "asmparse.cpp" +#line 8425 "prebuilt\\asmparse.cpp" break; - case 722: /* secDecl: psetHead bytes ')' */ -#line 1793 "asmparse.y" + case 738: /* secDecl: psetHead bytes ')' */ +#line 1803 "asmparse.y" { PASM->AddPermissionSetDecl((yyvsp[-2].secAct), (yyvsp[-1].binstr)); } -#line 7982 "asmparse.cpp" +#line 8431 "prebuilt\\asmparse.cpp" break; - case 723: /* secDecl: _PERMISSIONSET secAction compQstring */ -#line 1795 "asmparse.y" + case 739: /* secDecl: _PERMISSIONSET secAction compQstring */ +#line 1805 "asmparse.y" { PASM->AddPermissionSetDecl((yyvsp[-1].secAct),BinStrToUnicode((yyvsp[0].binstr),true));} -#line 7988 "asmparse.cpp" +#line 8437 "prebuilt\\asmparse.cpp" break; - case 724: /* secDecl: _PERMISSIONSET secAction '=' '{' secAttrSetBlob '}' */ -#line 1797 "asmparse.y" + case 740: /* secDecl: _PERMISSIONSET secAction '=' '{' secAttrSetBlob '}' */ +#line 1807 "asmparse.y" { BinStr* ret = new BinStr(); ret->insertInt8('.'); corEmitInt(ret, nSecAttrBlobs); ret->append((yyvsp[-1].binstr)); PASM->AddPermissionSetDecl((yyvsp[-4].secAct),ret); nSecAttrBlobs = 0; } -#line 7999 "asmparse.cpp" +#line 8448 "prebuilt\\asmparse.cpp" break; - case 725: /* secAttrSetBlob: %empty */ -#line 1805 "asmparse.y" + case 741: /* secAttrSetBlob: %empty */ +#line 1815 "asmparse.y" { (yyval.binstr) = new BinStr(); nSecAttrBlobs = 0;} -#line 8005 "asmparse.cpp" +#line 8454 "prebuilt\\asmparse.cpp" break; - case 726: /* secAttrSetBlob: secAttrBlob */ -#line 1806 "asmparse.y" + case 742: /* secAttrSetBlob: secAttrBlob */ +#line 1816 "asmparse.y" { (yyval.binstr) = (yyvsp[0].binstr); nSecAttrBlobs = 1; } -#line 8011 "asmparse.cpp" +#line 8460 "prebuilt\\asmparse.cpp" break; - case 727: /* secAttrSetBlob: secAttrBlob ',' secAttrSetBlob */ -#line 1807 "asmparse.y" + case 743: /* secAttrSetBlob: secAttrBlob ',' secAttrSetBlob */ +#line 1817 "asmparse.y" { (yyval.binstr) = (yyvsp[-2].binstr); (yyval.binstr)->append((yyvsp[0].binstr)); nSecAttrBlobs++; } -#line 8017 "asmparse.cpp" +#line 8466 "prebuilt\\asmparse.cpp" break; - case 728: /* secAttrBlob: typeSpec '=' '{' customBlobNVPairs '}' */ -#line 1811 "asmparse.y" + case 744: /* secAttrBlob: typeSpec '=' '{' customBlobNVPairs '}' */ +#line 1821 "asmparse.y" { (yyval.binstr) = PASM->EncodeSecAttr(PASM->ReflectionNotation((yyvsp[-4].token)),(yyvsp[-1].binstr),nCustomBlobNVPairs); nCustomBlobNVPairs = 0; } -#line 8024 "asmparse.cpp" +#line 8473 "prebuilt\\asmparse.cpp" break; - case 729: /* secAttrBlob: CLASS_ SQSTRING '=' '{' customBlobNVPairs '}' */ -#line 1814 "asmparse.y" + case 745: /* secAttrBlob: CLASS_ SQSTRING '=' '{' customBlobNVPairs '}' */ +#line 1824 "asmparse.y" { (yyval.binstr) = PASM->EncodeSecAttr((yyvsp[-4].string),(yyvsp[-1].binstr),nCustomBlobNVPairs); nCustomBlobNVPairs = 0; } -#line 8031 "asmparse.cpp" +#line 8480 "prebuilt\\asmparse.cpp" break; - case 730: /* psetHead: _PERMISSIONSET secAction '=' '(' */ -#line 1818 "asmparse.y" + case 746: /* psetHead: _PERMISSIONSET secAction '=' '(' */ +#line 1828 "asmparse.y" { (yyval.secAct) = (yyvsp[-2].secAct); bParsingByteArray = TRUE; } -#line 8037 "asmparse.cpp" +#line 8486 "prebuilt\\asmparse.cpp" break; - case 731: /* psetHead: _PERMISSIONSET secAction BYTEARRAY_ '(' */ -#line 1820 "asmparse.y" + case 747: /* psetHead: _PERMISSIONSET secAction BYTEARRAY_ '(' */ +#line 1830 "asmparse.y" { (yyval.secAct) = (yyvsp[-2].secAct); bParsingByteArray = TRUE; } -#line 8043 "asmparse.cpp" +#line 8492 "prebuilt\\asmparse.cpp" break; - case 732: /* nameValPairs: nameValPair */ -#line 1823 "asmparse.y" + case 748: /* nameValPairs: nameValPair */ +#line 1833 "asmparse.y" { (yyval.pair) = (yyvsp[0].pair); } -#line 8049 "asmparse.cpp" +#line 8498 "prebuilt\\asmparse.cpp" break; - case 733: /* nameValPairs: nameValPair ',' nameValPairs */ -#line 1824 "asmparse.y" + case 749: /* nameValPairs: nameValPair ',' nameValPairs */ +#line 1834 "asmparse.y" { (yyval.pair) = (yyvsp[-2].pair)->Concat((yyvsp[0].pair)); } -#line 8055 "asmparse.cpp" +#line 8504 "prebuilt\\asmparse.cpp" break; - case 734: /* nameValPair: compQstring '=' caValue */ -#line 1827 "asmparse.y" + case 750: /* nameValPair: compQstring '=' caValue */ +#line 1837 "asmparse.y" { (yyvsp[-2].binstr)->appendInt8(0); (yyval.pair) = new NVPair((yyvsp[-2].binstr), (yyvsp[0].binstr)); } -#line 8061 "asmparse.cpp" +#line 8510 "prebuilt\\asmparse.cpp" break; - case 735: /* truefalse: TRUE_ */ -#line 1830 "asmparse.y" + case 751: /* truefalse: TRUE_ */ +#line 1840 "asmparse.y" { (yyval.int32) = 1; } -#line 8067 "asmparse.cpp" +#line 8516 "prebuilt\\asmparse.cpp" break; - case 736: /* truefalse: FALSE_ */ -#line 1831 "asmparse.y" + case 752: /* truefalse: FALSE_ */ +#line 1841 "asmparse.y" { (yyval.int32) = 0; } -#line 8073 "asmparse.cpp" +#line 8522 "prebuilt\\asmparse.cpp" break; - case 737: /* caValue: truefalse */ -#line 1834 "asmparse.y" + case 753: /* caValue: truefalse */ +#line 1844 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_BOOLEAN); (yyval.binstr)->appendInt8((yyvsp[0].int32)); } -#line 8081 "asmparse.cpp" +#line 8530 "prebuilt\\asmparse.cpp" break; - case 738: /* caValue: int32 */ -#line 1837 "asmparse.y" + case 754: /* caValue: int32 */ +#line 1847 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_I4); (yyval.binstr)->appendInt32((yyvsp[0].int32)); } -#line 8089 "asmparse.cpp" +#line 8538 "prebuilt\\asmparse.cpp" break; - case 739: /* caValue: INT32_ '(' int32 ')' */ -#line 1840 "asmparse.y" + case 755: /* caValue: INT32_ '(' int32 ')' */ +#line 1850 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_I4); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 8097 "asmparse.cpp" +#line 8546 "prebuilt\\asmparse.cpp" break; - case 740: /* caValue: compQstring */ -#line 1843 "asmparse.y" + case 756: /* caValue: compQstring */ +#line 1853 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_STRING); (yyval.binstr)->append((yyvsp[0].binstr)); delete (yyvsp[0].binstr); (yyval.binstr)->appendInt8(0); } -#line 8106 "asmparse.cpp" +#line 8555 "prebuilt\\asmparse.cpp" break; - case 741: /* caValue: className '(' INT8_ ':' int32 ')' */ -#line 1847 "asmparse.y" + case 757: /* caValue: className '(' INT8_ ':' int32 ')' */ +#line 1857 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_ENUM); char* sz = PASM->ReflectionNotation((yyvsp[-5].token)); strcpy_s((char *)(yyval.binstr)->getBuff((unsigned)strlen(sz) + 1), strlen(sz) + 1,sz); (yyval.binstr)->appendInt8(1); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 8117 "asmparse.cpp" +#line 8566 "prebuilt\\asmparse.cpp" break; - case 742: /* caValue: className '(' INT16_ ':' int32 ')' */ -#line 1853 "asmparse.y" + case 758: /* caValue: className '(' INT16_ ':' int32 ')' */ +#line 1863 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_ENUM); char* sz = PASM->ReflectionNotation((yyvsp[-5].token)); strcpy_s((char *)(yyval.binstr)->getBuff((unsigned)strlen(sz) + 1), strlen(sz) + 1,sz); (yyval.binstr)->appendInt8(2); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 8128 "asmparse.cpp" +#line 8577 "prebuilt\\asmparse.cpp" break; - case 743: /* caValue: className '(' INT32_ ':' int32 ')' */ -#line 1859 "asmparse.y" + case 759: /* caValue: className '(' INT32_ ':' int32 ')' */ +#line 1869 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_ENUM); char* sz = PASM->ReflectionNotation((yyvsp[-5].token)); strcpy_s((char *)(yyval.binstr)->getBuff((unsigned)strlen(sz) + 1), strlen(sz) + 1,sz); (yyval.binstr)->appendInt8(4); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 8139 "asmparse.cpp" +#line 8588 "prebuilt\\asmparse.cpp" break; - case 744: /* caValue: className '(' int32 ')' */ -#line 1865 "asmparse.y" + case 760: /* caValue: className '(' int32 ')' */ +#line 1875 "asmparse.y" { (yyval.binstr) = new BinStr(); (yyval.binstr)->appendInt8(SERIALIZATION_TYPE_ENUM); char* sz = PASM->ReflectionNotation((yyvsp[-3].token)); strcpy_s((char *)(yyval.binstr)->getBuff((unsigned)strlen(sz) + 1), strlen(sz) + 1,sz); (yyval.binstr)->appendInt8(4); (yyval.binstr)->appendInt32((yyvsp[-1].int32)); } -#line 8150 "asmparse.cpp" +#line 8599 "prebuilt\\asmparse.cpp" break; - case 745: /* secAction: REQUEST_ */ -#line 1873 "asmparse.y" + case 761: /* secAction: REQUEST_ */ +#line 1883 "asmparse.y" { (yyval.secAct) = dclRequest; } -#line 8156 "asmparse.cpp" +#line 8605 "prebuilt\\asmparse.cpp" break; - case 746: /* secAction: DEMAND_ */ -#line 1874 "asmparse.y" + case 762: /* secAction: DEMAND_ */ +#line 1884 "asmparse.y" { (yyval.secAct) = dclDemand; } -#line 8162 "asmparse.cpp" +#line 8611 "prebuilt\\asmparse.cpp" break; - case 747: /* secAction: ASSERT_ */ -#line 1875 "asmparse.y" + case 763: /* secAction: ASSERT_ */ +#line 1885 "asmparse.y" { (yyval.secAct) = dclAssert; } -#line 8168 "asmparse.cpp" +#line 8617 "prebuilt\\asmparse.cpp" break; - case 748: /* secAction: DENY_ */ -#line 1876 "asmparse.y" + case 764: /* secAction: DENY_ */ +#line 1886 "asmparse.y" { (yyval.secAct) = dclDeny; } -#line 8174 "asmparse.cpp" +#line 8623 "prebuilt\\asmparse.cpp" break; - case 749: /* secAction: PERMITONLY_ */ -#line 1877 "asmparse.y" + case 765: /* secAction: PERMITONLY_ */ +#line 1887 "asmparse.y" { (yyval.secAct) = dclPermitOnly; } -#line 8180 "asmparse.cpp" +#line 8629 "prebuilt\\asmparse.cpp" break; - case 750: /* secAction: LINKCHECK_ */ -#line 1878 "asmparse.y" + case 766: /* secAction: LINKCHECK_ */ +#line 1888 "asmparse.y" { (yyval.secAct) = dclLinktimeCheck; } -#line 8186 "asmparse.cpp" +#line 8635 "prebuilt\\asmparse.cpp" break; - case 751: /* secAction: INHERITCHECK_ */ -#line 1879 "asmparse.y" + case 767: /* secAction: INHERITCHECK_ */ +#line 1889 "asmparse.y" { (yyval.secAct) = dclInheritanceCheck; } -#line 8192 "asmparse.cpp" +#line 8641 "prebuilt\\asmparse.cpp" break; - case 752: /* secAction: REQMIN_ */ -#line 1880 "asmparse.y" + case 768: /* secAction: REQMIN_ */ +#line 1890 "asmparse.y" { (yyval.secAct) = dclRequestMinimum; } -#line 8198 "asmparse.cpp" +#line 8647 "prebuilt\\asmparse.cpp" break; - case 753: /* secAction: REQOPT_ */ -#line 1881 "asmparse.y" + case 769: /* secAction: REQOPT_ */ +#line 1891 "asmparse.y" { (yyval.secAct) = dclRequestOptional; } -#line 8204 "asmparse.cpp" +#line 8653 "prebuilt\\asmparse.cpp" break; - case 754: /* secAction: REQREFUSE_ */ -#line 1882 "asmparse.y" + case 770: /* secAction: REQREFUSE_ */ +#line 1892 "asmparse.y" { (yyval.secAct) = dclRequestRefuse; } -#line 8210 "asmparse.cpp" +#line 8659 "prebuilt\\asmparse.cpp" break; - case 755: /* secAction: PREJITGRANT_ */ -#line 1883 "asmparse.y" + case 771: /* secAction: PREJITGRANT_ */ +#line 1893 "asmparse.y" { (yyval.secAct) = dclPrejitGrant; } -#line 8216 "asmparse.cpp" +#line 8665 "prebuilt\\asmparse.cpp" break; - case 756: /* secAction: PREJITDENY_ */ -#line 1884 "asmparse.y" + case 772: /* secAction: PREJITDENY_ */ +#line 1894 "asmparse.y" { (yyval.secAct) = dclPrejitDenied; } -#line 8222 "asmparse.cpp" +#line 8671 "prebuilt\\asmparse.cpp" break; - case 757: /* secAction: NONCASDEMAND_ */ -#line 1885 "asmparse.y" + case 773: /* secAction: NONCASDEMAND_ */ +#line 1895 "asmparse.y" { (yyval.secAct) = dclNonCasDemand; } -#line 8228 "asmparse.cpp" +#line 8677 "prebuilt\\asmparse.cpp" break; - case 758: /* secAction: NONCASLINKDEMAND_ */ -#line 1886 "asmparse.y" + case 774: /* secAction: NONCASLINKDEMAND_ */ +#line 1896 "asmparse.y" { (yyval.secAct) = dclNonCasLinkDemand; } -#line 8234 "asmparse.cpp" +#line 8683 "prebuilt\\asmparse.cpp" break; - case 759: /* secAction: NONCASINHERITANCE_ */ -#line 1887 "asmparse.y" + case 775: /* secAction: NONCASINHERITANCE_ */ +#line 1897 "asmparse.y" { (yyval.secAct) = dclNonCasInheritance; } -#line 8240 "asmparse.cpp" +#line 8689 "prebuilt\\asmparse.cpp" break; - case 760: /* esHead: _LINE */ -#line 1891 "asmparse.y" + case 776: /* esHead: _LINE */ +#line 1901 "asmparse.y" { PASM->ResetLineNumbers(); nCurrPC = PASM->m_CurPC; PENV->bExternSource = TRUE; PENV->bExternSourceAutoincrement = FALSE; } -#line 8246 "asmparse.cpp" +#line 8695 "prebuilt\\asmparse.cpp" break; - case 761: /* esHead: P_LINE */ -#line 1892 "asmparse.y" + case 777: /* esHead: P_LINE */ +#line 1902 "asmparse.y" { PASM->ResetLineNumbers(); nCurrPC = PASM->m_CurPC; PENV->bExternSource = TRUE; PENV->bExternSourceAutoincrement = TRUE; } -#line 8252 "asmparse.cpp" +#line 8701 "prebuilt\\asmparse.cpp" break; - case 762: /* extSourceSpec: esHead int32 SQSTRING */ -#line 1895 "asmparse.y" + case 778: /* extSourceSpec: esHead int32 SQSTRING */ +#line 1905 "asmparse.y" { PENV->nExtLine = PENV->nExtLineEnd = (yyvsp[-1].int32); PENV->nExtCol = 0; PENV->nExtColEnd = static_cast(-1); PASM->SetSourceFileName((yyvsp[0].string));} -#line 8260 "asmparse.cpp" +#line 8709 "prebuilt\\asmparse.cpp" break; - case 763: /* extSourceSpec: esHead int32 */ -#line 1898 "asmparse.y" + case 779: /* extSourceSpec: esHead int32 */ +#line 1908 "asmparse.y" { PENV->nExtLine = PENV->nExtLineEnd = (yyvsp[0].int32); PENV->nExtCol = 0; PENV->nExtColEnd = static_cast(-1); } -#line 8267 "asmparse.cpp" +#line 8716 "prebuilt\\asmparse.cpp" break; - case 764: /* extSourceSpec: esHead int32 ':' int32 SQSTRING */ -#line 1900 "asmparse.y" + case 780: /* extSourceSpec: esHead int32 ':' int32 SQSTRING */ +#line 1910 "asmparse.y" { PENV->nExtLine = PENV->nExtLineEnd = (yyvsp[-3].int32); PENV->nExtCol=(yyvsp[-1].int32); PENV->nExtColEnd = static_cast(-1); PASM->SetSourceFileName((yyvsp[0].string));} -#line 8275 "asmparse.cpp" +#line 8724 "prebuilt\\asmparse.cpp" break; - case 765: /* extSourceSpec: esHead int32 ':' int32 */ -#line 1903 "asmparse.y" + case 781: /* extSourceSpec: esHead int32 ':' int32 */ +#line 1913 "asmparse.y" { PENV->nExtLine = PENV->nExtLineEnd = (yyvsp[-2].int32); PENV->nExtCol=(yyvsp[0].int32); PENV->nExtColEnd = static_cast(-1);} -#line 8282 "asmparse.cpp" +#line 8731 "prebuilt\\asmparse.cpp" break; - case 766: /* extSourceSpec: esHead int32 ':' int32 ',' int32 SQSTRING */ -#line 1906 "asmparse.y" + case 782: /* extSourceSpec: esHead int32 ':' int32 ',' int32 SQSTRING */ +#line 1916 "asmparse.y" { PENV->nExtLine = PENV->nExtLineEnd = (yyvsp[-5].int32); PENV->nExtCol=(yyvsp[-3].int32); PENV->nExtColEnd = (yyvsp[-1].int32); PASM->SetSourceFileName((yyvsp[0].string));} -#line 8290 "asmparse.cpp" +#line 8739 "prebuilt\\asmparse.cpp" break; - case 767: /* extSourceSpec: esHead int32 ':' int32 ',' int32 */ -#line 1910 "asmparse.y" + case 783: /* extSourceSpec: esHead int32 ':' int32 ',' int32 */ +#line 1920 "asmparse.y" { PENV->nExtLine = PENV->nExtLineEnd = (yyvsp[-4].int32); PENV->nExtCol=(yyvsp[-2].int32); PENV->nExtColEnd = (yyvsp[0].int32); } -#line 8297 "asmparse.cpp" +#line 8746 "prebuilt\\asmparse.cpp" break; - case 768: /* extSourceSpec: esHead int32 ',' int32 ':' int32 SQSTRING */ -#line 1913 "asmparse.y" + case 784: /* extSourceSpec: esHead int32 ',' int32 ':' int32 SQSTRING */ +#line 1923 "asmparse.y" { PENV->nExtLine = (yyvsp[-5].int32); PENV->nExtLineEnd = (yyvsp[-3].int32); PENV->nExtCol=(yyvsp[-1].int32); PENV->nExtColEnd = static_cast(-1); PASM->SetSourceFileName((yyvsp[0].string));} -#line 8305 "asmparse.cpp" +#line 8754 "prebuilt\\asmparse.cpp" break; - case 769: /* extSourceSpec: esHead int32 ',' int32 ':' int32 */ -#line 1917 "asmparse.y" + case 785: /* extSourceSpec: esHead int32 ',' int32 ':' int32 */ +#line 1927 "asmparse.y" { PENV->nExtLine = (yyvsp[-4].int32); PENV->nExtLineEnd = (yyvsp[-2].int32); PENV->nExtCol=(yyvsp[0].int32); PENV->nExtColEnd = static_cast(-1); } -#line 8312 "asmparse.cpp" +#line 8761 "prebuilt\\asmparse.cpp" break; - case 770: /* extSourceSpec: esHead int32 ',' int32 ':' int32 ',' int32 SQSTRING */ -#line 1920 "asmparse.y" + case 786: /* extSourceSpec: esHead int32 ',' int32 ':' int32 ',' int32 SQSTRING */ +#line 1930 "asmparse.y" { PENV->nExtLine = (yyvsp[-7].int32); PENV->nExtLineEnd = (yyvsp[-5].int32); PENV->nExtCol=(yyvsp[-3].int32); PENV->nExtColEnd = (yyvsp[-1].int32); PASM->SetSourceFileName((yyvsp[0].string));} -#line 8320 "asmparse.cpp" +#line 8769 "prebuilt\\asmparse.cpp" break; - case 771: /* extSourceSpec: esHead int32 ',' int32 ':' int32 ',' int32 */ -#line 1924 "asmparse.y" + case 787: /* extSourceSpec: esHead int32 ',' int32 ':' int32 ',' int32 */ +#line 1934 "asmparse.y" { PENV->nExtLine = (yyvsp[-6].int32); PENV->nExtLineEnd = (yyvsp[-4].int32); PENV->nExtCol=(yyvsp[-2].int32); PENV->nExtColEnd = (yyvsp[0].int32); } -#line 8327 "asmparse.cpp" +#line 8776 "prebuilt\\asmparse.cpp" break; - case 772: /* extSourceSpec: esHead int32 QSTRING */ -#line 1926 "asmparse.y" + case 788: /* extSourceSpec: esHead int32 QSTRING */ +#line 1936 "asmparse.y" { PENV->nExtLine = PENV->nExtLineEnd = (yyvsp[-1].int32) - 1; PENV->nExtCol = 0; PENV->nExtColEnd = static_cast(-1); PASM->SetSourceFileName((yyvsp[0].binstr));} -#line 8335 "asmparse.cpp" +#line 8784 "prebuilt\\asmparse.cpp" break; - case 773: /* fileDecl: _FILE fileAttr dottedName fileEntry hashHead bytes ')' fileEntry */ -#line 1933 "asmparse.y" + case 789: /* fileDecl: _FILE fileAttr dottedName fileEntry hashHead bytes ')' fileEntry */ +#line 1943 "asmparse.y" { PASMM->AddFile((yyvsp[-5].string), (yyvsp[-6].fileAttr)|(yyvsp[-4].fileAttr)|(yyvsp[0].fileAttr), (yyvsp[-2].binstr)); } -#line 8341 "asmparse.cpp" +#line 8790 "prebuilt\\asmparse.cpp" break; - case 774: /* fileDecl: _FILE fileAttr dottedName fileEntry */ -#line 1934 "asmparse.y" + case 790: /* fileDecl: _FILE fileAttr dottedName fileEntry */ +#line 1944 "asmparse.y" { PASMM->AddFile((yyvsp[-1].string), (yyvsp[-2].fileAttr)|(yyvsp[0].fileAttr), NULL); } -#line 8347 "asmparse.cpp" +#line 8796 "prebuilt\\asmparse.cpp" break; - case 775: /* fileAttr: %empty */ -#line 1937 "asmparse.y" + case 791: /* fileAttr: %empty */ +#line 1947 "asmparse.y" { (yyval.fileAttr) = (CorFileFlags) 0; } -#line 8353 "asmparse.cpp" +#line 8802 "prebuilt\\asmparse.cpp" break; - case 776: /* fileAttr: fileAttr NOMETADATA_ */ -#line 1938 "asmparse.y" + case 792: /* fileAttr: fileAttr NOMETADATA_ */ +#line 1948 "asmparse.y" { (yyval.fileAttr) = (CorFileFlags) ((yyvsp[-1].fileAttr) | ffContainsNoMetaData); } -#line 8359 "asmparse.cpp" +#line 8808 "prebuilt\\asmparse.cpp" break; - case 777: /* fileEntry: %empty */ -#line 1941 "asmparse.y" + case 793: /* fileEntry: %empty */ +#line 1951 "asmparse.y" { (yyval.fileAttr) = (CorFileFlags) 0; } -#line 8365 "asmparse.cpp" +#line 8814 "prebuilt\\asmparse.cpp" break; - case 778: /* fileEntry: _ENTRYPOINT */ -#line 1942 "asmparse.y" + case 794: /* fileEntry: _ENTRYPOINT */ +#line 1952 "asmparse.y" { (yyval.fileAttr) = (CorFileFlags) 0x80000000; } -#line 8371 "asmparse.cpp" +#line 8820 "prebuilt\\asmparse.cpp" break; - case 779: /* hashHead: _HASH '=' '(' */ -#line 1945 "asmparse.y" + case 795: /* hashHead: _HASH '=' '(' */ +#line 1955 "asmparse.y" { bParsingByteArray = TRUE; } -#line 8377 "asmparse.cpp" +#line 8826 "prebuilt\\asmparse.cpp" break; - case 780: /* assemblyHead: _ASSEMBLY asmAttr dottedName */ -#line 1948 "asmparse.y" + case 796: /* assemblyHead: _ASSEMBLY asmAttr dottedName */ +#line 1958 "asmparse.y" { PASMM->StartAssembly((yyvsp[0].string), NULL, (DWORD)(yyvsp[-1].asmAttr), FALSE); } -#line 8383 "asmparse.cpp" +#line 8832 "prebuilt\\asmparse.cpp" break; - case 781: /* asmAttr: %empty */ -#line 1951 "asmparse.y" + case 797: /* asmAttr: %empty */ +#line 1961 "asmparse.y" { (yyval.asmAttr) = (CorAssemblyFlags) 0; } -#line 8389 "asmparse.cpp" +#line 8838 "prebuilt\\asmparse.cpp" break; - case 782: /* asmAttr: asmAttr RETARGETABLE_ */ -#line 1952 "asmparse.y" + case 798: /* asmAttr: asmAttr RETARGETABLE_ */ +#line 1962 "asmparse.y" { (yyval.asmAttr) = (CorAssemblyFlags) ((yyvsp[-1].asmAttr) | afRetargetable); } -#line 8395 "asmparse.cpp" +#line 8844 "prebuilt\\asmparse.cpp" break; - case 783: /* asmAttr: asmAttr WINDOWSRUNTIME_ */ -#line 1953 "asmparse.y" + case 799: /* asmAttr: asmAttr WINDOWSRUNTIME_ */ +#line 1963 "asmparse.y" { (yyval.asmAttr) = (CorAssemblyFlags) ((yyvsp[-1].asmAttr) | afContentType_WindowsRuntime); } -#line 8401 "asmparse.cpp" +#line 8850 "prebuilt\\asmparse.cpp" break; - case 784: /* asmAttr: asmAttr NOPLATFORM_ */ -#line 1954 "asmparse.y" + case 800: /* asmAttr: asmAttr NOPLATFORM_ */ +#line 1964 "asmparse.y" { (yyval.asmAttr) = (CorAssemblyFlags) ((yyvsp[-1].asmAttr) | afPA_NoPlatform); } -#line 8407 "asmparse.cpp" +#line 8856 "prebuilt\\asmparse.cpp" break; - case 785: /* asmAttr: asmAttr LEGACY_ LIBRARY_ */ -#line 1955 "asmparse.y" + case 801: /* asmAttr: asmAttr LEGACY_ LIBRARY_ */ +#line 1965 "asmparse.y" { (yyval.asmAttr) = (yyvsp[-2].asmAttr); } -#line 8413 "asmparse.cpp" +#line 8862 "prebuilt\\asmparse.cpp" break; - case 786: /* asmAttr: asmAttr CIL_ */ -#line 1956 "asmparse.y" + case 802: /* asmAttr: asmAttr CIL_ */ +#line 1966 "asmparse.y" { SET_PA((yyval.asmAttr),(yyvsp[-1].asmAttr),afPA_MSIL); } -#line 8419 "asmparse.cpp" +#line 8868 "prebuilt\\asmparse.cpp" break; - case 787: /* asmAttr: asmAttr X86_ */ -#line 1957 "asmparse.y" + case 803: /* asmAttr: asmAttr X86_ */ +#line 1967 "asmparse.y" { SET_PA((yyval.asmAttr),(yyvsp[-1].asmAttr),afPA_x86); } -#line 8425 "asmparse.cpp" +#line 8874 "prebuilt\\asmparse.cpp" break; - case 788: /* asmAttr: asmAttr AMD64_ */ -#line 1958 "asmparse.y" + case 804: /* asmAttr: asmAttr AMD64_ */ +#line 1968 "asmparse.y" { SET_PA((yyval.asmAttr),(yyvsp[-1].asmAttr),afPA_AMD64); } -#line 8431 "asmparse.cpp" +#line 8880 "prebuilt\\asmparse.cpp" break; - case 789: /* asmAttr: asmAttr ARM_ */ -#line 1959 "asmparse.y" + case 805: /* asmAttr: asmAttr ARM_ */ +#line 1969 "asmparse.y" { SET_PA((yyval.asmAttr),(yyvsp[-1].asmAttr),afPA_ARM); } -#line 8437 "asmparse.cpp" +#line 8886 "prebuilt\\asmparse.cpp" break; - case 790: /* asmAttr: asmAttr ARM64_ */ -#line 1960 "asmparse.y" + case 806: /* asmAttr: asmAttr ARM64_ */ +#line 1970 "asmparse.y" { SET_PA((yyval.asmAttr),(yyvsp[-1].asmAttr),afPA_ARM64); } -#line 8443 "asmparse.cpp" +#line 8892 "prebuilt\\asmparse.cpp" break; - case 793: /* assemblyDecl: _HASH ALGORITHM_ int32 */ -#line 1967 "asmparse.y" + case 809: /* assemblyDecl: _HASH ALGORITHM_ int32 */ +#line 1977 "asmparse.y" { PASMM->SetAssemblyHashAlg((yyvsp[0].int32)); } -#line 8449 "asmparse.cpp" +#line 8898 "prebuilt\\asmparse.cpp" break; - case 796: /* intOrWildcard: int32 */ -#line 1972 "asmparse.y" + case 812: /* intOrWildcard: int32 */ +#line 1982 "asmparse.y" { (yyval.int32) = (yyvsp[0].int32); } -#line 8455 "asmparse.cpp" +#line 8904 "prebuilt\\asmparse.cpp" break; - case 797: /* intOrWildcard: '*' */ -#line 1973 "asmparse.y" + case 813: /* intOrWildcard: '*' */ +#line 1983 "asmparse.y" { (yyval.int32) = 0xFFFF; } -#line 8461 "asmparse.cpp" +#line 8910 "prebuilt\\asmparse.cpp" break; - case 798: /* asmOrRefDecl: publicKeyHead bytes ')' */ -#line 1976 "asmparse.y" + case 814: /* asmOrRefDecl: publicKeyHead bytes ')' */ +#line 1986 "asmparse.y" { PASMM->SetAssemblyPublicKey((yyvsp[-1].binstr)); } -#line 8467 "asmparse.cpp" +#line 8916 "prebuilt\\asmparse.cpp" break; - case 799: /* asmOrRefDecl: _VER intOrWildcard ':' intOrWildcard ':' intOrWildcard ':' intOrWildcard */ -#line 1978 "asmparse.y" + case 815: /* asmOrRefDecl: _VER intOrWildcard ':' intOrWildcard ':' intOrWildcard ':' intOrWildcard */ +#line 1988 "asmparse.y" { PASMM->SetAssemblyVer((USHORT)(yyvsp[-6].int32), (USHORT)(yyvsp[-4].int32), (USHORT)(yyvsp[-2].int32), (USHORT)(yyvsp[0].int32)); } -#line 8473 "asmparse.cpp" +#line 8922 "prebuilt\\asmparse.cpp" break; - case 800: /* asmOrRefDecl: _LOCALE compQstring */ -#line 1979 "asmparse.y" + case 816: /* asmOrRefDecl: _LOCALE compQstring */ +#line 1989 "asmparse.y" { (yyvsp[0].binstr)->appendInt8(0); PASMM->SetAssemblyLocale((yyvsp[0].binstr),TRUE); } -#line 8479 "asmparse.cpp" +#line 8928 "prebuilt\\asmparse.cpp" break; - case 801: /* asmOrRefDecl: localeHead bytes ')' */ -#line 1980 "asmparse.y" + case 817: /* asmOrRefDecl: localeHead bytes ')' */ +#line 1990 "asmparse.y" { PASMM->SetAssemblyLocale((yyvsp[-1].binstr),FALSE); } -#line 8485 "asmparse.cpp" +#line 8934 "prebuilt\\asmparse.cpp" break; - case 804: /* publicKeyHead: _PUBLICKEY '=' '(' */ -#line 1985 "asmparse.y" + case 820: /* publicKeyHead: _PUBLICKEY '=' '(' */ +#line 1995 "asmparse.y" { bParsingByteArray = TRUE; } -#line 8491 "asmparse.cpp" +#line 8940 "prebuilt\\asmparse.cpp" break; - case 805: /* publicKeyTokenHead: _PUBLICKEYTOKEN '=' '(' */ -#line 1988 "asmparse.y" + case 821: /* publicKeyTokenHead: _PUBLICKEYTOKEN '=' '(' */ +#line 1998 "asmparse.y" { bParsingByteArray = TRUE; } -#line 8497 "asmparse.cpp" +#line 8946 "prebuilt\\asmparse.cpp" break; - case 806: /* localeHead: _LOCALE '=' '(' */ -#line 1991 "asmparse.y" + case 822: /* localeHead: _LOCALE '=' '(' */ +#line 2001 "asmparse.y" { bParsingByteArray = TRUE; } -#line 8503 "asmparse.cpp" +#line 8952 "prebuilt\\asmparse.cpp" break; - case 807: /* assemblyRefHead: _ASSEMBLY EXTERN_ asmAttr dottedName */ -#line 1995 "asmparse.y" + case 823: /* assemblyRefHead: _ASSEMBLY EXTERN_ asmAttr dottedName */ +#line 2005 "asmparse.y" { PASMM->StartAssembly((yyvsp[0].string), NULL, (yyvsp[-1].asmAttr), TRUE); } -#line 8509 "asmparse.cpp" +#line 8958 "prebuilt\\asmparse.cpp" break; - case 808: /* assemblyRefHead: _ASSEMBLY EXTERN_ asmAttr dottedName AS_ dottedName */ -#line 1997 "asmparse.y" + case 824: /* assemblyRefHead: _ASSEMBLY EXTERN_ asmAttr dottedName AS_ dottedName */ +#line 2007 "asmparse.y" { PASMM->StartAssembly((yyvsp[-2].string), (yyvsp[0].string), (yyvsp[-3].asmAttr), TRUE); } -#line 8515 "asmparse.cpp" +#line 8964 "prebuilt\\asmparse.cpp" break; - case 811: /* assemblyRefDecl: hashHead bytes ')' */ -#line 2004 "asmparse.y" + case 827: /* assemblyRefDecl: hashHead bytes ')' */ +#line 2014 "asmparse.y" { PASMM->SetAssemblyHashBlob((yyvsp[-1].binstr)); } -#line 8521 "asmparse.cpp" +#line 8970 "prebuilt\\asmparse.cpp" break; - case 813: /* assemblyRefDecl: publicKeyTokenHead bytes ')' */ -#line 2006 "asmparse.y" + case 829: /* assemblyRefDecl: publicKeyTokenHead bytes ')' */ +#line 2016 "asmparse.y" { PASMM->SetAssemblyPublicKeyToken((yyvsp[-1].binstr)); } -#line 8527 "asmparse.cpp" +#line 8976 "prebuilt\\asmparse.cpp" break; - case 814: /* assemblyRefDecl: AUTO_ */ -#line 2007 "asmparse.y" + case 830: /* assemblyRefDecl: AUTO_ */ +#line 2017 "asmparse.y" { PASMM->SetAssemblyAutodetect(); } -#line 8533 "asmparse.cpp" +#line 8982 "prebuilt\\asmparse.cpp" break; - case 815: /* exptypeHead: _CLASS EXTERN_ exptAttr dottedName */ -#line 2010 "asmparse.y" + case 831: /* exptypeHead: _CLASS EXTERN_ exptAttr dottedName */ +#line 2020 "asmparse.y" { PASMM->StartComType((yyvsp[0].string), (yyvsp[-1].exptAttr));} -#line 8539 "asmparse.cpp" +#line 8988 "prebuilt\\asmparse.cpp" break; - case 816: /* exportHead: _EXPORT exptAttr dottedName */ -#line 2013 "asmparse.y" + case 832: /* exportHead: _EXPORT exptAttr dottedName */ +#line 2023 "asmparse.y" { PASMM->StartComType((yyvsp[0].string), (yyvsp[-1].exptAttr)); } -#line 8545 "asmparse.cpp" +#line 8994 "prebuilt\\asmparse.cpp" break; - case 817: /* exptAttr: %empty */ -#line 2016 "asmparse.y" + case 833: /* exptAttr: %empty */ +#line 2026 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) 0; } -#line 8551 "asmparse.cpp" +#line 9000 "prebuilt\\asmparse.cpp" break; - case 818: /* exptAttr: exptAttr PRIVATE_ */ -#line 2017 "asmparse.y" + case 834: /* exptAttr: exptAttr PRIVATE_ */ +#line 2027 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-1].exptAttr) | tdNotPublic); } -#line 8557 "asmparse.cpp" +#line 9006 "prebuilt\\asmparse.cpp" break; - case 819: /* exptAttr: exptAttr PUBLIC_ */ -#line 2018 "asmparse.y" + case 835: /* exptAttr: exptAttr PUBLIC_ */ +#line 2028 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-1].exptAttr) | tdPublic); } -#line 8563 "asmparse.cpp" +#line 9012 "prebuilt\\asmparse.cpp" break; - case 820: /* exptAttr: exptAttr FORWARDER_ */ -#line 2019 "asmparse.y" + case 836: /* exptAttr: exptAttr FORWARDER_ */ +#line 2029 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-1].exptAttr) | tdForwarder); } -#line 8569 "asmparse.cpp" +#line 9018 "prebuilt\\asmparse.cpp" break; - case 821: /* exptAttr: exptAttr NESTED_ PUBLIC_ */ -#line 2020 "asmparse.y" + case 837: /* exptAttr: exptAttr NESTED_ PUBLIC_ */ +#line 2030 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-2].exptAttr) | tdNestedPublic); } -#line 8575 "asmparse.cpp" +#line 9024 "prebuilt\\asmparse.cpp" break; - case 822: /* exptAttr: exptAttr NESTED_ PRIVATE_ */ -#line 2021 "asmparse.y" + case 838: /* exptAttr: exptAttr NESTED_ PRIVATE_ */ +#line 2031 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-2].exptAttr) | tdNestedPrivate); } -#line 8581 "asmparse.cpp" +#line 9030 "prebuilt\\asmparse.cpp" break; - case 823: /* exptAttr: exptAttr NESTED_ FAMILY_ */ -#line 2022 "asmparse.y" + case 839: /* exptAttr: exptAttr NESTED_ FAMILY_ */ +#line 2032 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-2].exptAttr) | tdNestedFamily); } -#line 8587 "asmparse.cpp" +#line 9036 "prebuilt\\asmparse.cpp" break; - case 824: /* exptAttr: exptAttr NESTED_ ASSEMBLY_ */ -#line 2023 "asmparse.y" + case 840: /* exptAttr: exptAttr NESTED_ ASSEMBLY_ */ +#line 2033 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-2].exptAttr) | tdNestedAssembly); } -#line 8593 "asmparse.cpp" +#line 9042 "prebuilt\\asmparse.cpp" break; - case 825: /* exptAttr: exptAttr NESTED_ FAMANDASSEM_ */ -#line 2024 "asmparse.y" + case 841: /* exptAttr: exptAttr NESTED_ FAMANDASSEM_ */ +#line 2034 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-2].exptAttr) | tdNestedFamANDAssem); } -#line 8599 "asmparse.cpp" +#line 9048 "prebuilt\\asmparse.cpp" break; - case 826: /* exptAttr: exptAttr NESTED_ FAMORASSEM_ */ -#line 2025 "asmparse.y" + case 842: /* exptAttr: exptAttr NESTED_ FAMORASSEM_ */ +#line 2035 "asmparse.y" { (yyval.exptAttr) = (CorTypeAttr) ((yyvsp[-2].exptAttr) | tdNestedFamORAssem); } -#line 8605 "asmparse.cpp" +#line 9054 "prebuilt\\asmparse.cpp" break; - case 829: /* exptypeDecl: _FILE dottedName */ -#line 2032 "asmparse.y" + case 845: /* exptypeDecl: _FILE dottedName */ +#line 2042 "asmparse.y" { PASMM->SetComTypeFile((yyvsp[0].string)); } -#line 8611 "asmparse.cpp" +#line 9060 "prebuilt\\asmparse.cpp" break; - case 830: /* exptypeDecl: _CLASS EXTERN_ slashedName */ -#line 2033 "asmparse.y" + case 846: /* exptypeDecl: _CLASS EXTERN_ slashedName */ +#line 2043 "asmparse.y" { PASMM->SetComTypeComType((yyvsp[0].string)); } -#line 8617 "asmparse.cpp" +#line 9066 "prebuilt\\asmparse.cpp" break; - case 831: /* exptypeDecl: _ASSEMBLY EXTERN_ dottedName */ -#line 2034 "asmparse.y" + case 847: /* exptypeDecl: _ASSEMBLY EXTERN_ dottedName */ +#line 2044 "asmparse.y" { PASMM->SetComTypeAsmRef((yyvsp[0].string)); } -#line 8623 "asmparse.cpp" +#line 9072 "prebuilt\\asmparse.cpp" break; - case 832: /* exptypeDecl: MDTOKEN_ '(' int32 ')' */ -#line 2035 "asmparse.y" + case 848: /* exptypeDecl: MDTOKEN_ '(' int32 ')' */ +#line 2045 "asmparse.y" { if(!PASMM->SetComTypeImplementationTok((yyvsp[-1].int32))) PASM->report->error("Invalid implementation of exported type\n"); } -#line 8630 "asmparse.cpp" +#line 9079 "prebuilt\\asmparse.cpp" break; - case 833: /* exptypeDecl: _CLASS int32 */ -#line 2037 "asmparse.y" + case 849: /* exptypeDecl: _CLASS int32 */ +#line 2047 "asmparse.y" { if(!PASMM->SetComTypeClassTok((yyvsp[0].int32))) PASM->report->error("Invalid TypeDefID of exported type\n"); } -#line 8637 "asmparse.cpp" +#line 9086 "prebuilt\\asmparse.cpp" break; - case 836: /* manifestResHead: _MRESOURCE manresAttr dottedName */ -#line 2043 "asmparse.y" + case 852: /* manifestResHead: _MRESOURCE manresAttr dottedName */ +#line 2053 "asmparse.y" { PASMM->StartManifestRes((yyvsp[0].string), (yyvsp[0].string), (yyvsp[-1].manresAttr)); } -#line 8643 "asmparse.cpp" +#line 9092 "prebuilt\\asmparse.cpp" break; - case 837: /* manifestResHead: _MRESOURCE manresAttr dottedName AS_ dottedName */ -#line 2045 "asmparse.y" + case 853: /* manifestResHead: _MRESOURCE manresAttr dottedName AS_ dottedName */ +#line 2055 "asmparse.y" { PASMM->StartManifestRes((yyvsp[-2].string), (yyvsp[0].string), (yyvsp[-3].manresAttr)); } -#line 8649 "asmparse.cpp" +#line 9098 "prebuilt\\asmparse.cpp" break; - case 838: /* manresAttr: %empty */ -#line 2048 "asmparse.y" + case 854: /* manresAttr: %empty */ +#line 2058 "asmparse.y" { (yyval.manresAttr) = (CorManifestResourceFlags) 0; } -#line 8655 "asmparse.cpp" +#line 9104 "prebuilt\\asmparse.cpp" break; - case 839: /* manresAttr: manresAttr PUBLIC_ */ -#line 2049 "asmparse.y" + case 855: /* manresAttr: manresAttr PUBLIC_ */ +#line 2059 "asmparse.y" { (yyval.manresAttr) = (CorManifestResourceFlags) ((yyvsp[-1].manresAttr) | mrPublic); } -#line 8661 "asmparse.cpp" +#line 9110 "prebuilt\\asmparse.cpp" break; - case 840: /* manresAttr: manresAttr PRIVATE_ */ -#line 2050 "asmparse.y" + case 856: /* manresAttr: manresAttr PRIVATE_ */ +#line 2060 "asmparse.y" { (yyval.manresAttr) = (CorManifestResourceFlags) ((yyvsp[-1].manresAttr) | mrPrivate); } -#line 8667 "asmparse.cpp" +#line 9116 "prebuilt\\asmparse.cpp" break; - case 843: /* manifestResDecl: _FILE dottedName AT_ int32 */ -#line 2057 "asmparse.y" + case 859: /* manifestResDecl: _FILE dottedName AT_ int32 */ +#line 2067 "asmparse.y" { PASMM->SetManifestResFile((yyvsp[-2].string), (ULONG)(yyvsp[0].int32)); } -#line 8673 "asmparse.cpp" +#line 9122 "prebuilt\\asmparse.cpp" break; - case 844: /* manifestResDecl: _ASSEMBLY EXTERN_ dottedName */ -#line 2058 "asmparse.y" + case 860: /* manifestResDecl: _ASSEMBLY EXTERN_ dottedName */ +#line 2068 "asmparse.y" { PASMM->SetManifestResAsmRef((yyvsp[0].string)); } -#line 8679 "asmparse.cpp" +#line 9128 "prebuilt\\asmparse.cpp" break; -#line 8683 "asmparse.cpp" +#line 9132 "prebuilt\\asmparse.cpp" default: break; } @@ -8872,7 +9321,7 @@ yyparse (void) return yyresult; } -#line 2063 "asmparse.y" +#line 2073 "asmparse.y" #include "grammar_after.cpp" diff --git a/src/coreclr/ilasm/prebuilt/asmparse.grammar b/src/coreclr/ilasm/prebuilt/asmparse.grammar index 544a442ed560..dd70298c98b1 100644 --- a/src/coreclr/ilasm/prebuilt/asmparse.grammar +++ b/src/coreclr/ilasm/prebuilt/asmparse.grammar @@ -68,6 +68,22 @@ languageDecl : '.language' SQSTRING ; /* Basic tokens */ id : ID + /* Allow methodImpl attributes to be used as identifiers */ + | 'native' + | 'cil' + | 'optil' + | 'managed' + | 'unmanaged' + | 'forwardref' + | 'preservesig' + | 'runtime' + | 'internalcall' + | 'synchronized' + | 'noinlining' + | 'aggressiveinlining' + | 'nooptimization' + | 'aggressiveoptimization' + | 'async' | SQSTRING ; @@ -528,6 +544,7 @@ implAttr : /* EMPTY */ | implAttr 'aggressiveinlining' | implAttr 'nooptimization' | implAttr 'aggressiveoptimization' + | implAttr 'async' | implAttr 'flags' '(' int32 ')' ; diff --git a/src/coreclr/ilasm/typar.hpp b/src/coreclr/ilasm/typar.hpp index e7cf405703c5..5b6f94cc3ada 100644 --- a/src/coreclr/ilasm/typar.hpp +++ b/src/coreclr/ilasm/typar.hpp @@ -105,11 +105,6 @@ class TyParList { return ret; }; -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6211) // "Leaking memory 'b' due to an exception. Consider using a local catch block to clean up memory" -#endif /*_PREFAST_ */ - int ToArray(BinStr ***bounds, LPCWSTR** names, DWORD **attrs) { int n = Count(); @@ -140,10 +135,6 @@ class TyParList { return n; }; -#ifdef _PREFAST_ -#pragma warning(pop) -#endif /*_PREFAST_*/ - int ToArray(TyParDescr **ppTPD) { int n = Count(); diff --git a/src/coreclr/ilasm/writer.cpp b/src/coreclr/ilasm/writer.cpp index 4fafc190d42c..e718839a8664 100644 --- a/src/coreclr/ilasm/writer.cpp +++ b/src/coreclr/ilasm/writer.cpp @@ -28,10 +28,9 @@ HRESULT Assembler::InitMetaData() if(m_fInitialisedMetaData) return S_OK; - if(bClock) bClock->cMDInitBegin = GetTickCount(); + if(bClock) bClock->cMDInitBegin = minipal_lowres_ticks(); - hr = MetaDataGetDispenser(CLSID_CorMetaDataDispenser, - IID_IMetaDataDispenserEx2, (void **)&m_pDisp); + hr = CreateMetaDataDispenser(IID_IMetaDataDispenserEx2, (void **)&m_pDisp); if (FAILED(hr)) goto exit; @@ -93,7 +92,7 @@ HRESULT Assembler::InitMetaData() hr = S_OK; exit: - if(bClock) bClock->cMDInitEnd = GetTickCount(); + if(bClock) bClock->cMDInitEnd = minipal_lowres_ticks(); return hr; } /*********************************************************************************/ @@ -341,7 +340,7 @@ HRESULT Assembler::CreateDebugDirectory(BYTE(&pdbChecksum)[32]) memcpy_s(pdbChecksumData + pdbChecksumOffset, pdbChecksumSize - pdbChecksumOffset, &pdbChecksum, sizeof(pdbChecksum)); /* END PDB CHECKSUM */ - auto finish = + auto finish = [&](HRESULT hr) { if (codeViewData) { @@ -558,18 +557,11 @@ HRESULT Assembler::CreateExportDirectory() // normalize ordinals for(i = 0; i < Nentries; i++) pOT[i] -= (WORD)ordBase; // fill the export address table -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22008) // "Suppress PREfast warnings about integer overflow" -#endif for(i = 0; i < Nentries; i++) { pEATE = m_EATList.PEEK(i); pEAT[pEATE->dwOrdinal - ordBase] = pEATE->dwStubRVA; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // fill the export names table unsigned l, j; for(i = 0, j = 0; i < Nentries; i++) @@ -1170,10 +1162,6 @@ HRESULT Assembler::AllocateStrongNameSignature() return S_OK; } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) { HRESULT hr; @@ -1183,7 +1171,7 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) GUID deterministicGuid = GUID(); ULONG deterministicTimestamp = 0; - if(bClock) bClock->cMDEmitBegin = GetTickCount(); + if(bClock) bClock->cMDEmitBegin = minipal_lowres_ticks(); if(m_fReportProgress) printf("Creating PE file\n"); if (!m_pEmitter) { @@ -1196,7 +1184,7 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) if(!OnErrGo) return E_FAIL; } - if(bClock) bClock->cMDEmit1 = GetTickCount(); + if(bClock) bClock->cMDEmit1 = minipal_lowres_ticks(); // Allocate space for a strong name signature if we're delay or full // signing the assembly. @@ -1211,7 +1199,7 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) m_dwComImageFlags |= COMIMAGE_FLAGS_STRONGNAMESIGNED; } - if(bClock) bClock->cMDEmit2 = GetTickCount(); + if(bClock) bClock->cMDEmit2 = minipal_lowres_ticks(); if(m_VTFList.COUNT()==0) { @@ -1338,9 +1326,9 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) } // All ref'ed items def'ed in this file are emitted, resolve member refs to member defs: - if(bClock) bClock->cRef2DefBegin = GetTickCount(); + if(bClock) bClock->cRef2DefBegin = minipal_lowres_ticks(); hr = ResolveLocalMemberRefs(); - if(bClock) bClock->cRef2DefEnd = GetTickCount(); + if(bClock) bClock->cRef2DefEnd = minipal_lowres_ticks(); if(FAILED(hr) &&(!OnErrGo)) goto exit; // Local member refs resolved, emit events, props and method impls @@ -1363,7 +1351,7 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) pSearch->m_fNewMembers = FALSE; } } - if(bClock) bClock->cMDEmit3 = GetTickCount(); + if(bClock) bClock->cMDEmit3 = minipal_lowres_ticks(); if(m_MethodImplDList.COUNT()) { if(m_fReportProgress) report->msg("Method Implementations (total): %d\n",m_MethodImplDList.COUNT()); @@ -1373,7 +1361,7 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) } } // Emit the rest of the metadata - if(bClock) bClock->cMDEmit4 = GetTickCount(); + if(bClock) bClock->cMDEmit4 = minipal_lowres_ticks(); hr = S_OK; if(m_pManifest) { @@ -1411,7 +1399,7 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) delete pTDD; } } - if(bClock) bClock->cMDEmitEnd = GetTickCount(); + if(bClock) bClock->cMDEmitEnd = minipal_lowres_ticks(); hr = DoLocalMemberRefFixups(); if(FAILED(hr) &&(!OnErrGo)) goto exit; @@ -1720,7 +1708,7 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) if (FAILED(hr)) goto exit; } - if(bClock) bClock->cFilegenBegin = GetTickCount(); + if(bClock) bClock->cFilegenBegin = minipal_lowres_ticks(); // actually output the meta-data if (FAILED(hr=m_pCeeFileGen->EmitMetaDataAt(m_pCeeFile, m_pEmitter, m_pILSection, metaDataOffset, metaData, metaDataSize))) goto exit; @@ -1804,7 +1792,3 @@ HRESULT Assembler::CreatePEFile(_In_ __nullterminated WCHAR *pwzOutputFilename) exit: return hr; } - -#ifdef _PREFAST_ -#pragma warning(pop) -#endif diff --git a/src/coreclr/ildasm/ceeload.cpp b/src/coreclr/ildasm/ceeload.cpp index e214710de819..df7666d6522b 100644 --- a/src/coreclr/ildasm/ceeload.cpp +++ b/src/coreclr/ildasm/ceeload.cpp @@ -188,7 +188,7 @@ BOOL PELoader::getCOMHeader(IMAGE_COR20_HEADER **ppCorHeader) // Get the image header from the image, then get the directory location // of the CLR header which may or may not be filled out. pImageHeader = (PIMAGE_NT_HEADERS32)Cor_RtlImageNtHeader(m_hMod, (ULONG) m_FileSize); - PREFIX_ASSUME(pImageHeader != NULL); + _ASSERTE(pImageHeader != NULL); pSectionHeader = (PIMAGE_SECTION_HEADER) Cor_RtlImageRvaToVa32(pImageHeader, (PBYTE)m_hMod, VAL32(pImageHeader->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_COMHEADER].VirtualAddress), (DWORD)m_FileSizeAligned /* FileLength */); @@ -200,7 +200,7 @@ BOOL PELoader::getCOMHeader(IMAGE_COR20_HEADER **ppCorHeader) // Get the image header from the image, then get the directory location // of the CLR header which may or may not be filled out. pImageHeader = (PIMAGE_NT_HEADERS64)Cor_RtlImageNtHeader(m_hMod, (ULONG) m_FileSize); - PREFIX_ASSUME(pImageHeader != NULL); + _ASSERTE(pImageHeader != NULL); pSectionHeader = (PIMAGE_SECTION_HEADER) Cor_RtlImageRvaToVa64(pImageHeader, (PBYTE)m_hMod, VAL32(pImageHeader->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_COMHEADER].VirtualAddress), (DWORD)m_FileSizeAligned /* FileLength */); @@ -228,7 +228,7 @@ BOOL PELoader::getVAforRVA(DWORD rva,void **ppva) // of the CLR header which may or may not be filled out. PIMAGE_NT_HEADERS32 pImageHeader; pImageHeader = (PIMAGE_NT_HEADERS32) Cor_RtlImageNtHeader(m_hMod, (ULONG) m_FileSize); - PREFIX_ASSUME(pImageHeader != NULL); + _ASSERTE(pImageHeader != NULL); pSectionHeader = (PIMAGE_SECTION_HEADER) Cor_RtlImageRvaToVa32(pImageHeader, (PBYTE)m_hMod, rva, (DWORD)m_FileSizeAligned /* FileLength */); } @@ -236,7 +236,7 @@ BOOL PELoader::getVAforRVA(DWORD rva,void **ppva) { PIMAGE_NT_HEADERS64 pImageHeader; pImageHeader = (PIMAGE_NT_HEADERS64) Cor_RtlImageNtHeader(m_hMod, (ULONG) m_FileSize); - PREFIX_ASSUME(pImageHeader != NULL); + _ASSERTE(pImageHeader != NULL); pSectionHeader = (PIMAGE_SECTION_HEADER) Cor_RtlImageRvaToVa64(pImageHeader, (PBYTE)m_hMod, rva, (DWORD)m_FileSizeAligned /* FileLength */); } diff --git a/src/coreclr/ildasm/dasm.cpp b/src/coreclr/ildasm/dasm.cpp index e30cea034d29..f6a9be411ede 100644 --- a/src/coreclr/ildasm/dasm.cpp +++ b/src/coreclr/ildasm/dasm.cpp @@ -477,10 +477,6 @@ HRESULT IsClassRefInScope(mdTypeRef classref) return hr; } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif BOOL EnumClasses() { HRESULT hr; @@ -869,9 +865,6 @@ BOOL EnumClasses() } // end for(i = 0; i <= g_NumClasses; i++) return TRUE; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif void DumpMscorlib(void* GUICookie) { @@ -1576,7 +1569,7 @@ mdToken TypeRefToTypeDef(mdToken tk, IMDInternalImport *pIMDI, IMDInternalImport IUnknown *pUnk; if(FAILED(pIAMDI[0]->QueryInterface(IID_IUnknown, (void**)&pUnk))) goto AssignAndReturn; - if (FAILED(GetMetaDataInternalInterfaceFromPublic( + if (FAILED(GetMDInternalInterfaceFromPublic( pUnk, IID_IMDInternalImport, (LPVOID *)ppIMDInew))) @@ -1754,11 +1747,6 @@ BYTE* skipType(BYTE* ptr) return(ptr); } - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif BYTE* PrettyPrintCABlobValue(PCCOR_SIGNATURE &typePtr, BYTE* dataPtr, BYTE* dataEnd, @@ -2116,9 +2104,6 @@ BYTE* PrettyPrintCABlobValue(PCCOR_SIGNATURE &typePtr, if(CloseParenthesis) appendStr(out,")"); return dataPtr; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif BOOL PrettyPrintCustomAttributeNVPairs(unsigned nPairs, BYTE* dataPtr, BYTE* dataEnd, CQuickBytes* out, void* GUICookie) { @@ -3483,10 +3468,6 @@ void PrettyPrintOverrideDecl(ULONG i, __inout __nullterminated char* szString, v if(g_fDumpTokens) szptr+=sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr),COMMENT(" /*%08X::%08X*/ "),tkDeclParent,(*g_pmi_list)[i].tkDecl); } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif BOOL DumpMethod(mdToken FuncToken, const char *pszClassName, DWORD dwEntryPointToken,void *GUICookie,BOOL DumpBody) { const char *pszMemberName = NULL;//[MAX_MEMBER_LENGTH]; @@ -3770,6 +3751,7 @@ BOOL DumpMethod(mdToken FuncToken, const char *pszClassName, DWORD dwEntryPointT if(IsMiAggressiveInlining(dwImplAttrs)) szptr+=sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr)," aggressiveinlining"); if(IsMiNoOptimization(dwImplAttrs)) szptr+=sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr)," nooptimization"); if(IsMiAggressiveOptimization(dwImplAttrs)) szptr+=sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr)," aggressiveoptimization"); + if(IsMiAsync(dwImplAttrs)) szptr+=sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr)," async"); szptr+=sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr),KEYWORD((char*)-1)); printLine(GUICookie, szString); VDELETE(buff); @@ -3950,9 +3932,6 @@ BOOL DumpMethod(mdToken FuncToken, const char *pszClassName, DWORD dwEntryPointT g_tkMVarOwner = tkMVarOwner; return TRUE; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif BOOL DumpField(mdToken FuncToken, const char *pszClassName,void *GUICookie, BOOL DumpBody) { @@ -5157,10 +5136,10 @@ void DumpCodeManager(IMAGE_COR20_HEADER *CORHeader, void* GUICookie) ULONG iCount = VAL32(CORHeader->CodeManagerTable.Size) / sizeof(GUID); for (ULONG i=0; iMetaData),GUICookie); } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - void DumpHeaderDetails(IMAGE_COR20_HEADER *CORHeader, void* GUICookie) { @@ -5913,12 +5884,6 @@ void DumpTable(unsigned long Table, const char *TableName, void* GUICookie) } } - - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif void DumpStatistics(IMAGE_COR20_HEADER *CORHeader, void* GUICookie) { int fileSize, miscPESize, miscCOMPlusSize, methodHeaderSize, methodBodySize; @@ -6492,9 +6457,6 @@ void DumpStatistics(IMAGE_COR20_HEADER *CORHeader, void* GUICookie) if(g_fDumpToPerfWriter) CloseHandle((char*) g_PerfDataFilePtr); } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif void DumpHexbytes(__inout __nullterminated char* szptr,BYTE *pb, DWORD fromPtr, DWORD toPtr, DWORD limPtr) { @@ -6971,8 +6933,7 @@ void DumpMetaInfo(_In_ __nullterminated const WCHAR* pwzFileName, _In_opt_z_ con if(pch && (!_wcsicmp(pch+1,W("lib")) || !_wcsicmp(pch+1,W("obj")))) { // This works only when all the rest does not // Init and run. - if (SUCCEEDED(MetaDataGetDispenser(CLSID_CorMetaDataDispenser, - IID_IMetaDataDispenserEx, (void **)&g_pDisp))) + if (SUCCEEDED(CreateMetaDataDispenser(IID_IMetaDataDispenserEx, (void **)&g_pDisp))) { WCHAR *pwzObjFileName=NULL; if (pszObjFileName) @@ -6993,8 +6954,7 @@ void DumpMetaInfo(_In_ __nullterminated const WCHAR* pwzFileName, _In_opt_z_ con HRESULT hr = S_OK; if(g_pDisp == NULL) { - hr = MetaDataGetDispenser(CLSID_CorMetaDataDispenser, - IID_IMetaDataDispenserEx, (void **)&g_pDisp); + hr = CreateMetaDataDispenser(IID_IMetaDataDispenserEx, (void **)&g_pDisp); } if(SUCCEEDED(hr)) { @@ -7086,7 +7046,7 @@ void DumpSummary() } qbMemberSig.Shrink(0); pcSig = cComSig ? PrettyPrintSig(pComSig, cComSig, "", &qbMemberSig, g_pImport,NULL) : "NO SIGNATURE"; - PREFIX_ASSUME(ProperName((char*)pcMember) != 0); + _ASSERTE(ProperName((char*)pcMember) != 0); sprintf_s(szString,SZSTRING_SIZE,"// %08X [GLM] %s : %s", tkMember,ProperName((char*)pcMember),pcSig); printLine(g_pFile,szString); } @@ -7105,7 +7065,7 @@ void DumpSummary() } qbMemberSig.Shrink(0); pcSig = cComSig ? PrettyPrintSig(pComSig, cComSig, "", &qbMemberSig, g_pImport,NULL) : "NO SIGNATURE"; - PREFIX_ASSUME(ProperName((char*)pcMember) != 0); + _ASSERTE(ProperName((char*)pcMember) != 0); sprintf_s(szString,SZSTRING_SIZE,"// %08X [GLF] %s : %s", tkMember,ProperName((char*)pcMember),pcSig); printLine(g_pFile,szString); } @@ -7120,7 +7080,7 @@ void DumpSummary() printLine(g_pFile, szString); continue; } - PREFIX_ASSUME(ProperName((char*)pcClass) != 0); + _ASSERTE(ProperName((char*)pcClass) != 0); if(*pcNS) sprintf_s(szFQN,4096,"%s.%s", ProperName((char*)pcNS),ProperName((char*)pcClass)); else strcpy_s(szFQN,4096,ProperName((char*)pcClass)); sprintf_s(szString,SZSTRING_SIZE,"// %08X [CLS] %s", g_cl_list[i],szFQN); @@ -7138,7 +7098,7 @@ void DumpSummary() } qbMemberSig.Shrink(0); pcSig = cComSig ? PrettyPrintSig(pComSig, cComSig, "", &qbMemberSig, g_pImport,NULL) : "NO SIGNATURE"; - PREFIX_ASSUME(ProperName((char*)pcMember) != 0); + _ASSERTE(ProperName((char*)pcMember) != 0); sprintf_s(szString,SZSTRING_SIZE,"// %08X [MET] %s::%s : %s", tkMember,szFQN,ProperName((char*)pcMember),pcSig); printLine(g_pFile,szString); } @@ -7157,7 +7117,7 @@ void DumpSummary() } qbMemberSig.Shrink(0); pcSig = cComSig ? PrettyPrintSig(pComSig, cComSig, "", &qbMemberSig, g_pImport,NULL) : "NO SIGNATURE"; - PREFIX_ASSUME(ProperName((char*)pcMember) != 0); + _ASSERTE(ProperName((char*)pcMember) != 0); sprintf_s(szString,SZSTRING_SIZE,"// %08X [FLD] %s::%s : %s", tkMember,szFQN,ProperName((char*)pcMember),pcSig); printLine(g_pFile,szString); } @@ -7188,7 +7148,7 @@ void DumpSummary() break; } } - PREFIX_ASSUME(ProperName((char*)pcMember) != 0); + _ASSERTE(ProperName((char*)pcMember) != 0); sprintf_s(szString,SZSTRING_SIZE,"// %08X [EVT] %s::%s : %s", tkMember,szFQN,ProperName((char*)pcMember),pcSig); printLine(g_pFile,szString); } @@ -7206,7 +7166,7 @@ void DumpSummary() } qbMemberSig.Shrink(0); pcSig = cComSig ? PrettyPrintSig(pComSig, cComSig, "", &qbMemberSig, g_pImport,NULL) : "NO SIGNATURE"; - PREFIX_ASSUME(ProperName((char*)pcMember) != 0); + _ASSERTE(ProperName((char*)pcMember) != 0); sprintf_s(szString,SZSTRING_SIZE,"// %08X [PRO] %s::%s : %s", tkMember,szFQN,ProperName((char*)pcMember),pcSig); printLine(g_pFile,szString); } @@ -7376,10 +7336,6 @@ FILE* OpenOutput(_In_ __nullterminated const char* szFileName) // // Init PELoader, dump file header info // -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif BOOL DumpFile() { BOOL fSuccess = FALSE; @@ -7506,7 +7462,7 @@ BOOL DumpFile() g_cbMetaData = VAL32(g_CORHeader->MetaData.Size); } - if (FAILED(GetMetaDataInternalInterface( + if (FAILED(GetMDInternalInterface( (BYTE *)g_pMetaData, g_cbMetaData, openFlags, @@ -7520,7 +7476,7 @@ BOOL DumpFile() } TokenSigInit(g_pImport); - if (FAILED(MetaDataGetDispenser(CLSID_CorMetaDataDispenser, IID_IMetaDataDispenser, (LPVOID*)&pMetaDataDispenser))) + if (FAILED(CreateMetaDataDispenser(IID_IMetaDataDispenser, (LPVOID*)&pMetaDataDispenser))) { if (g_fDumpHeader) DumpHeader(g_CORHeader, g_pFile); @@ -7824,9 +7780,6 @@ BOOL DumpFile() pMetaDataDispenser->Release(); return fSuccess; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif #ifdef _MSC_VER #pragma warning(default : 4640) diff --git a/src/coreclr/ildasm/dis.cpp b/src/coreclr/ildasm/dis.cpp index 44ce665814c1..8a70fc714433 100644 --- a/src/coreclr/ildasm/dis.cpp +++ b/src/coreclr/ildasm/dis.cpp @@ -819,7 +819,7 @@ BOOL SourceLinesHelper(void *GUICookie, LineCodeDescr* pLCD, _Out_writes_(nSize) PAL_TRY(Param *, pParam, ¶m) { GUID guidLang={0},guidLangVendor={0},guidDoc={0}; - CHAR zLang[GUID_STR_BUFFER_LEN],zVendor[GUID_STR_BUFFER_LEN],zDoc[GUID_STR_BUFFER_LEN]; + CHAR zLang[MINIPAL_GUID_BUFFER_LEN],zVendor[MINIPAL_GUID_BUFFER_LEN],zDoc[MINIPAL_GUID_BUFFER_LEN]; ULONG32 k; if(pParam->pLCD->FileToken != ulWasFileToken) { @@ -830,9 +830,9 @@ BOOL SourceLinesHelper(void *GUICookie, LineCodeDescr* pLCD, _Out_writes_(nSize) ||memcmp(&guidLangVendor,&guidWasLangVendor,sizeof(GUID)) ||memcmp(&guidDoc,&guidWasDoc,sizeof(GUID))) { - GuidToLPSTR(guidLang,zLang); - GuidToLPSTR(guidLangVendor,zVendor); - GuidToLPSTR(guidDoc,zDoc); + minipal_guid_as_string(guidLang, zLang, MINIPAL_GUID_BUFFER_LEN); + minipal_guid_as_string(guidLangVendor, zVendor, MINIPAL_GUID_BUFFER_LEN); + minipal_guid_as_string(guidDoc, zDoc, MINIPAL_GUID_BUFFER_LEN); sprintf_s(szString,SZSTRING_SIZE,"%s%s '%s', '%s', '%s'", g_szAsmCodeIndent,KEYWORD(".language"), zLang,zVendor,zDoc); printLine(pParam->GUICookie,szString); @@ -861,10 +861,6 @@ BOOL SourceLinesHelper(void *GUICookie, LineCodeDescr* pLCD, _Out_writes_(nSize) return param.fRet; } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif BOOL Disassemble(IMDInternalImport *pImport, BYTE *ILHeader, void *GUICookie, mdToken FuncToken, ParamDescriptor* pszArgname, ULONG ulArgs) { DWORD PC; @@ -1961,9 +1957,6 @@ BOOL Disassemble(IMDInternalImport *pImport, BYTE *ILHeader, void *GUICookie, md return TRUE; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif void SplitSignatureByCommas(__inout __nullterminated char* szString, __inout __nullterminated char* pszTailSig, diff --git a/src/coreclr/ildasm/dman.cpp b/src/coreclr/ildasm/dman.cpp index 91761723a081..d6409243d161 100644 --- a/src/coreclr/ildasm/dman.cpp +++ b/src/coreclr/ildasm/dman.cpp @@ -16,8 +16,6 @@ #include "dynamicarray.h" #include "resource.h" -#include "clrinternal.h" - #ifndef MAX_LOCALE_NAME #define MAX_LOCALE_NAME (32) #endif @@ -120,7 +118,7 @@ void DumpScope(void* GUICookie) mdModule mdm; GUID mvid; WCHAR scopeName[1024]; - CHAR guidString[GUID_STR_BUFFER_LEN]; + CHAR guidString[MINIPAL_GUID_BUFFER_LEN]; memset(scopeName,0,1024*sizeof(WCHAR)); if(SUCCEEDED(g_pPubImport->GetScopeProps( scopeName, 1024, NULL, &mvid))&& scopeName[0]) { @@ -133,7 +131,7 @@ void DumpScope(void* GUICookie) VDELETE(sz); } printLine(GUICookie,szString); - GuidToLPSTR(mvid, guidString); + minipal_guid_as_string(mvid, guidString, MINIPAL_GUID_BUFFER_LEN); sprintf_s(szString,SZSTRING_SIZE,COMMENT("%s// MVID: %s"),g_szAsmCodeIndent,guidString); printLine(GUICookie,szString); @@ -987,14 +985,14 @@ IMetaDataAssemblyImport* GetAssemblyImport(void* GUICookie) if(pdwSize && *pdwSize) { pbManifest += sizeof(DWORD); - if (SUCCEEDED(hr = GetMetaDataInternalInterface( + if (SUCCEEDED(hr = GetMDInternalInterface( pbManifest, VAL32(*pdwSize), ofRead, IID_IMDInternalImport, (LPVOID *)&pParam->pImport))) { - if (FAILED(hr = GetMetaDataPublicInterfaceFromInternal( + if (FAILED(hr = GetMDPublicInterfaceFromInternal( pParam->pImport, IID_IMetaDataAssemblyImport, (LPVOID *)&pParam->pAssemblyImport))) diff --git a/src/coreclr/ildasm/exe/CMakeLists.txt b/src/coreclr/ildasm/exe/CMakeLists.txt index c16fbed72c09..648cbc236b47 100644 --- a/src/coreclr/ildasm/exe/CMakeLists.txt +++ b/src/coreclr/ildasm/exe/CMakeLists.txt @@ -69,7 +69,6 @@ set(ILDASM_LINK_LIBRARIES mdcompiler_wks mdruntime_wks mdruntimerw_wks - mdstaticapi ${END_LIBRARY_GROUP} # End group of libraries that have circular references corguids ) diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index f51399812a08..4bdfd7409f84 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -175,9 +175,6 @@ End Crst DbgTransport End -Crst DelegateToFPtrHash -End - Crst GenericDictionaryExpansion AcquiredBefore PinnedHeapHandleTable IbcProfile LoaderHeap SystemDomainDelayedUnloadList UniqueStack End @@ -211,10 +208,6 @@ Crst MethodTableExposedObject Unordered End -Crst RetThunkCache - AcquiredBefore LoaderHeap -End - Crst FuncPtrStubs AcquiredBefore IbcProfile LoaderHeap UniqueStack CodeFragmentHeap JumpStubCache End @@ -532,3 +525,6 @@ End Crst PerfMap AcquiredAfter CodeVersioning AssemblyList End + +Crst InterfaceDispatchGlobalLists +End \ No newline at end of file diff --git a/src/coreclr/inc/arraylist.h b/src/coreclr/inc/arraylist.h index f2ffe29d26f4..9e2a360e210c 100644 --- a/src/coreclr/inc/arraylist.h +++ b/src/coreclr/inc/arraylist.h @@ -9,6 +9,9 @@ #include #include // offsetof +// Forward Declarations +template struct cdac_data; + // // ArrayList is a simple class which is used to contain a growable // list of pointers, stored in chunks. Modification is by appending @@ -263,8 +266,21 @@ class ArrayListBase return BlockIterator((ArrayListBlock *) &m_firstBlock, m_count); } + friend struct cdac_data; +}; + +template<> +struct cdac_data +{ + static constexpr size_t Count = offsetof(ArrayListBase, m_count); + static constexpr size_t FirstBlock = offsetof(ArrayListBase, m_firstBlock); + + static constexpr size_t Next = offsetof(ArrayListBase::ArrayListBlock, m_next); + static constexpr size_t Size = offsetof(ArrayListBase::ArrayListBlock, m_blockSize); + static constexpr size_t ArrayStart = offsetof(ArrayListBase::ArrayListBlock, m_array); }; + class ArrayList : public ArrayListBase { public: diff --git a/src/coreclr/inc/assemblyprobeextension.h b/src/coreclr/inc/assemblyprobeextension.h new file mode 100644 index 000000000000..460857655efd --- /dev/null +++ b/src/coreclr/inc/assemblyprobeextension.h @@ -0,0 +1,68 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef HAVE_ASSEMBLY_PROBE_EXTENSIONS_H +#define HAVE_ASSEMBLY_PROBE_EXTENSIONS_H + +#include +#include "bundle.h" + +class ProbeExtensionResult +{ +public: + enum class Type : int32_t + { + Invalid, + Bundle, + External, + }; + + Type Type; + union + { + BundleFileLocation BundleLocation; + struct + { + void* Data; + int64_t Size; + } ExternalData; + }; + + ProbeExtensionResult() + : Type{Type::Invalid} + { } + + static ProbeExtensionResult Bundle(BundleFileLocation location) + { + return ProbeExtensionResult(location); + } + + static ProbeExtensionResult External(void* data, int64_t size) + { + return ProbeExtensionResult(data, size); + } + + static ProbeExtensionResult Invalid() { LIMITED_METHOD_CONTRACT; return ProbeExtensionResult(); } + + bool IsValid() const { return Type != Type::Invalid; } + +private: + ProbeExtensionResult(BundleFileLocation location) + : Type{Type::Bundle} + , BundleLocation{location} + { } + + ProbeExtensionResult(void* data, int64_t size) + : Type{Type::External} + , ExternalData{data, size} + { } +}; + +class AssemblyProbeExtension +{ +public: + static bool IsEnabled(); + static ProbeExtensionResult Probe(const SString& path, bool pathIsBundleRelative = false); +}; + +#endif // HAVE_ASSEMBLY_PROBE_EXTENSIONS_H diff --git a/src/coreclr/inc/bundle.h b/src/coreclr/inc/bundle.h index 3d55a54b3515..3aa76e3651da 100644 --- a/src/coreclr/inc/bundle.h +++ b/src/coreclr/inc/bundle.h @@ -19,15 +19,15 @@ struct BundleFileLocation { INT64 Size; INT64 Offset; - INT64 UncompresedSize; + INT64 UncompressedSize; BundleFileLocation() - { + { LIMITED_METHOD_CONTRACT; Size = 0; - Offset = 0; - UncompresedSize = 0; + Offset = 0; + UncompressedSize = 0; } static BundleFileLocation Invalid() { LIMITED_METHOD_CONTRACT; return BundleFileLocation(); } @@ -43,17 +43,22 @@ class Bundle Bundle(LPCSTR bundlePath, BundleProbeFn *probe); BundleFileLocation Probe(const SString& path, bool pathIsBundleRelative = false) const; - const SString &Path() const { LIMITED_METHOD_CONTRACT; return m_path; } - const SString &BasePath() const { LIMITED_METHOD_CONTRACT; return m_basePath; } + // Paths do not change and should remain valid for the lifetime of the Bundle + const SString& Path() const { LIMITED_METHOD_CONTRACT; return m_path; } + const UTF8* BasePath() const { LIMITED_METHOD_CONTRACT; return m_basePath.GetUTF8(); } + + // Extraction path does not change and should remain valid for the lifetime of the Bundle + bool HasExtractedFiles() const { LIMITED_METHOD_CONTRACT; return !m_extractionPath.IsEmpty(); } + const WCHAR* ExtractionPath() const { LIMITED_METHOD_CONTRACT; return m_extractionPath.GetUnicode(); } static Bundle* AppBundle; // The BundleInfo for the current app, initialized by coreclr_initialize. static bool AppIsBundle() { LIMITED_METHOD_CONTRACT; return AppBundle != nullptr; } static BundleFileLocation ProbeAppBundle(const SString& path, bool pathIsBundleRelative = false); private: - SString m_path; // The path to single-file executable BundleProbeFn *m_probe; + SString m_extractionPath; // The path to the extraction location, if bundle extracted any files SString m_basePath; // The prefix to denote a path within the bundle COUNT_T m_basePathLength; diff --git a/src/coreclr/inc/check.h b/src/coreclr/inc/check.h index 21d717c13e6b..d12e6210f258 100644 --- a/src/coreclr/inc/check.h +++ b/src/coreclr/inc/check.h @@ -510,7 +510,7 @@ CHECK CheckValue(TYPENAME &val) // in a free build they are passed through to the compiler to use in optimization. //-------------------------------------------------------------------------------- -#if defined(_PREFAST_) || defined(_PREFIX_) || defined(__clang_analyzer__) +#if defined(__clang_analyzer__) #define COMPILER_ASSUME_MSG(_condition, _message) if (!(_condition)) __UNREACHABLE(); #define COMPILER_ASSUME_MSGF(_condition, args) if (!(_condition)) __UNREACHABLE(); #else @@ -532,37 +532,11 @@ CHECK CheckValue(TYPENAME &val) #endif // DACCESS_COMPILE -#endif // _PREFAST_ || _PREFIX_ +#endif #define COMPILER_ASSUME(_condition) \ COMPILER_ASSUME_MSG(_condition, "") -//-------------------------------------------------------------------------------- -// PREFIX_ASSUME_MSG and PREFAST_ASSUME_MSG are just another name -// for COMPILER_ASSUME_MSG -// In a checked build these turn into asserts; in a free build -// they are passed through to the compiler to use in optimization; -// via an __assume(_condition) optimization hint. -//-------------------------------------------------------------------------------- - -#define PREFIX_ASSUME_MSG(_condition, _message) \ - COMPILER_ASSUME_MSG(_condition, _message) - -#define PREFIX_ASSUME_MSGF(_condition, args) \ - COMPILER_ASSUME_MSGF(_condition, args) - -#define PREFIX_ASSUME(_condition) \ - COMPILER_ASSUME_MSG(_condition, "") - -#define PREFAST_ASSUME_MSG(_condition, _message) \ - COMPILER_ASSUME_MSG(_condition, _message) - -#define PREFAST_ASSUME_MSGF(_condition, args) \ - COMPILER_ASSUME_MSGF(_condition, args) - -#define PREFAST_ASSUME(_condition) \ - COMPILER_ASSUME_MSG(_condition, "") - //-------------------------------------------------------------------------------- // UNREACHABLE points are locations in the code which should not be able to be // reached under any circumstances (e.g. a default in a switch which is supposed to @@ -723,7 +697,9 @@ CHECK CheckOverflow(UINT64 value1, UINT64 value2); #ifdef __APPLE__ CHECK CheckOverflow(SIZE_T value1, SIZE_T value2); #endif +#ifndef __wasm__ CHECK CheckOverflow(PTR_CVOID address, UINT offset); +#endif #if defined(_MSC_VER) CHECK CheckOverflow(const void *address, ULONG offset); #endif diff --git a/src/coreclr/inc/check.inl b/src/coreclr/inc/check.inl index 34a2956d1be6..b0f65c5d218b 100644 --- a/src/coreclr/inc/check.inl +++ b/src/coreclr/inc/check.inl @@ -156,7 +156,7 @@ inline CHECK CheckAligned(UINT64 value, UINT alignment) CHECK_OK; } -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__wasm__) inline CHECK CheckAligned(SIZE_T value, UINT alignment) { STATIC_CONTRACT_WRAPPER; @@ -237,7 +237,7 @@ inline CHECK CheckOverflow(const void *address, UINT64 offset) CHECK_OK; } -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__wasm__) inline CHECK CheckOverflow(const void *address, SIZE_T offset) { CHECK((UINT64) address + offset >= (UINT64) address); @@ -316,10 +316,11 @@ inline CHECK CheckUnderflow(const void *address, UINT64 offset) CHECK_OK; } -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__wasm__) inline CHECK CheckUnderflow(const void *address, SIZE_T offset) { -#if POINTER_BITS == 32 + // SIZE_T is 32bit on wasm32 +#if !defined(__wasm__) && POINTER_BITS == 32 CHECK(offset >> 32 == 0); CHECK((UINT) (SIZE_T) address - (UINT) offset <= (UINT) (SIZE_T) address); #else diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 000452757cea..ad65b364003d 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -10,9 +10,9 @@ // // Given any config knob below that looks like this example: // RETAIL_CONFIG_DWORD_INFO(INTERNAL_LogEnable, W("LogEnable"), 0, "Turns on the traditional CLR log.") -// --------- -// | -// -------------------- +// --------- +// | +// ------- // | // V // You can set an environment variable DOTNET_LogEnable=1 to enable it. @@ -109,11 +109,6 @@ // * W("UNSUPPORTED_Security_DisableTransparency") // * W("Security_LegacyHMACMode") <---------------------- (No EXTERNAL prefix) -/// -/// AppDomain -/// -CONFIG_DWORD_INFO(INTERNAL_EnableFullDebug, W("EnableFullDebug"), 0, "Heavy-weight checking for AD boundary violations (AD leaks)") - /// /// Jit Pitching /// @@ -148,7 +143,6 @@ CONFIG_DWORD_INFO(INTERNAL_BreakOnDumpToken, W("BreakOnDumpToken"), 0xffffffff, RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_BreakOnEELoad, W("BreakOnEELoad"), 0, "") CONFIG_DWORD_INFO(INTERNAL_BreakOnEEShutdown, W("BreakOnEEShutdown"), 0, "") CONFIG_DWORD_INFO(INTERNAL_BreakOnExceptionInGetThrowable, W("BreakOnExceptionInGetThrowable"), 0, "") -CONFIG_DWORD_INFO(INTERNAL_BreakOnFindMethod, W("BreakOnFindMethod"), 0, "Breaks in findMethodInternal when it searches for the specified token.") CONFIG_DWORD_INFO(INTERNAL_BreakOnFirstPass, W("BreakOnFirstPass"), 0, "") CONFIG_DWORD_INFO(INTERNAL_BreakOnHR, W("BreakOnHR"), 0, "Debug.cpp, IfFailxxx use this macro to stop if hr matches ") CONFIG_STRING_INFO(INTERNAL_BreakOnInstantiation, W("BreakOnInstantiation"), "Very useful for debugging generic class instantiation.") @@ -177,11 +171,8 @@ CONFIG_DWORD_INFO(INTERNAL_DbgBreakOnRawInt3, W("DbgBreakOnRawInt3"), 0, "Allows CONFIG_DWORD_INFO(INTERNAL_DbgBreakOnSendBreakpoint, W("DbgBreakOnSendBreakpoint"), 0, "Allows an assert when sending a breakpoint to the right side") CONFIG_DWORD_INFO(INTERNAL_DbgBreakOnSetIP, W("DbgBreakOnSetIP"), 0, "Allows an assert when setting the IP") CONFIG_DWORD_INFO(INTERNAL_DbgCheckInt3, W("DbgCheckInt3"), 0, "Asserts if the debugger explicitly writes int3 instead of calling SetUnmanagedBreakpoint") -CONFIG_DWORD_INFO(INTERNAL_DbgDACAssertOnMismatch, W("DbgDACAssertOnMismatch"), 0, "Allows an assert when the mscordacwks and mscorwks dll versions don't match") CONFIG_DWORD_INFO(INTERNAL_DbgDACEnableAssert, W("DbgDACEnableAssert"), 0, "Enables extra validity checking in DAC - assumes target isn't corrupt") -RETAIL_CONFIG_DWORD_INFO(INTERNAL_DbgDACSkipVerifyDlls, W("DbgDACSkipVerifyDlls"), 0, "Allows disabling the check to ensure mscordacwks and mscorwks dll versions match") CONFIG_DWORD_INFO(INTERNAL_DbgDelayHelper, W("DbgDelayHelper"), 0, "Varies the wait in the helper thread startup for testing race between threads") -RETAIL_CONFIG_DWORD_INFO(INTERNAL_DbgDisableDynamicSymsCompat, W("DbgDisableDynamicSymsCompat"), 0, "") CONFIG_DWORD_INFO(INTERNAL_DbgDisableTargetConsistencyAsserts, W("DbgDisableTargetConsistencyAsserts"), 0, "Allows explicitly testing with corrupt targets") CONFIG_DWORD_INFO(INTERNAL_DbgExtraThreads, W("DbgExtraThreads"), 0, "Allows extra unmanaged threads to run and throw debug events for stress testing") CONFIG_DWORD_INFO(INTERNAL_DbgExtraThreadsCantStop, W("DbgExtraThreadsCantStop"), 0, "Allows extra unmanaged threads in can't stop region to run and throw debug events for stress testing") @@ -199,19 +190,14 @@ CONFIG_DWORD_INFO(INTERNAL_DbgPingInterop, W("DbgPingInterop"), 0, "Allows check CONFIG_DWORD_INFO(INTERNAL_DbgRace, W("DbgRace"), 0, "Allows pausing for native debug events to get hijicked") CONFIG_DWORD_INFO(INTERNAL_DbgShortcutCanary, W("DbgShortcutCanary"), 0, "Allows a way to force canary to fail to be able to test failure paths") CONFIG_DWORD_INFO(INTERNAL_DbgSkipMEOnStep, W("DbgSkipMEOnStep"), 0, "Turns off MethodEnter checks") -CONFIG_DWORD_INFO(INTERNAL_DbgSkipVerCheck, W("DbgSkipVerCheck"), 0, "Allows different RS and LS versions (for servicing work)") -CONFIG_DWORD_INFO(INTERNAL_DbgTC, W("DbgTC"), 0, "Allows checking boundary compression for offset mappings") CONFIG_DWORD_INFO(INTERNAL_DbgTransportFaultInject, W("DbgTransportFaultInject"), 0, "Allows injecting a fault for testing the debug transport") CONFIG_DWORD_INFO(INTERNAL_DbgTransportLog, W("DbgTransportLog"), 0 /* LE_None */, "Turns on logging for the debug transport") CONFIG_DWORD_INFO(INTERNAL_DbgTransportLogClass, W("DbgTransportLogClass"), (DWORD)-1 /* LC_All */, "Mask to control what is logged in DbgTransportLog") -RETAIL_CONFIG_STRING_INFO(UNSUPPORTED_DbgTransportProxyAddress, W("DbgTransportProxyAddress"), "Allows specifying the transport proxy address") CONFIG_DWORD_INFO(INTERNAL_DbgTrapOnSkip, W("DbgTrapOnSkip"), 0, "Allows breaking when we skip a breakpoint") CONFIG_DWORD_INFO(INTERNAL_DbgWaitTimeout, W("DbgWaitTimeout"), 1, "Specifies the timeout value for waits") RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_DbgWFDETimeout, W("DbgWFDETimeout"), 25, "Specifies the timeout value for wait when waiting for a debug event") CONFIG_DWORD_INFO(INTERNAL_RaiseExceptionOnAssert, W("RaiseExceptionOnAssert"), 0, "Raise a first chance (if set to 1) or second chance (if set to 2) exception on asserts.") -CONFIG_DWORD_INFO(INTERNAL_DebugBreakOnVerificationFailure, W("DebugBreakOnVerificationFailure"), 0, "Halts the jit on verification failure") CONFIG_STRING_INFO(INTERNAL_DebuggerBreakPoint, W("DebuggerBreakPoint"), "Allows counting various debug events") -CONFIG_STRING_INFO(INTERNAL_DebugVerify, W("DebugVerify"), "Control for tracing in peverify") CONFIG_DWORD_INFO(INTERNAL_EncApplyChanges, W("EncApplyChanges"), 0, "Allows breaking when ApplyEditAndContinue is called") CONFIG_DWORD_INFO(INTERNAL_EnCBreakOnRemapComplete, W("EnCBreakOnRemapComplete"), 0, "Allows breaking after N RemapCompletes") CONFIG_DWORD_INFO(INTERNAL_EnCBreakOnRemapOpportunity, W("EnCBreakOnRemapOpportunity"), 0, "Allows breaking after N RemapOpportunities") @@ -259,7 +245,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_legacyCorruptedStateExceptionsPolicy, W("le CONFIG_DWORD_INFO(INTERNAL_SuppressLostExceptionTypeAssert, W("SuppressLostExceptionTypeAssert"), 0, "") RETAIL_CONFIG_DWORD_INFO(INTERNAL_UseEntryPointFilter, W("UseEntryPointFilter"), 0, "") RETAIL_CONFIG_DWORD_INFO(INTERNAL_Corhost_Swallow_Uncaught_Exceptions, W("Corhost_Swallow_Uncaught_Exceptions"), 0, "") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_LegacyExceptionHandling, W("LegacyExceptionHandling"), 0, "Enable legacy exception handling."); +CONFIG_DWORD_INFO(INTERNAL_LogStackOverflowExit, W("LogStackOverflowExit"), 0, "Temporary flag to log stack overflow exit process") /// /// Garbage collector @@ -268,6 +254,7 @@ CONFIG_DWORD_INFO(INTERNAL_FastGCCheckStack, W("FastGCCheckStack"), 0, "") CONFIG_DWORD_INFO(INTERNAL_FastGCStress, W("FastGCStress"), 0, "Reduce the number of GCs done by enabling GCStress") RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_GCBreakOnOOM, W("GCBreakOnOOM"), 0, "Does a DebugBreak at the soonest time we detect an OOM") RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_gcConcurrent, W("gcConcurrent"), (DWORD)-1, "Enables/Disables concurrent GC") +RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_UseGCWriteBarrierCopy, W("UseGCWriteBarrierCopy"), 1, "Use a copy of the write barrier for the GC. This is somewhat faster and for optimizations where the barrier is mutated as the program runs. Setting this to 0 removes scenarios where the write barrier is ever mutable.") #ifdef FEATURE_CONSERVATIVE_GC RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_gcConservative, W("gcConservative"), 0, "Enables/Disables conservative GC") @@ -324,6 +311,12 @@ RETAIL_CONFIG_STRING_INFO(EXTERNAL_AltJitOs, W("AltJitOS"), "Sets target OS for RETAIL_CONFIG_STRING_INFO(EXTERNAL_AltJitExcludeAssemblies, W("AltJitExcludeAssemblies"), "Do not use AltJit on this semicolon-delimited list of assemblies.") #endif // defined(ALLOW_SXS_JIT) +#ifdef FEATURE_INTERPRETER +RETAIL_CONFIG_STRING_INFO(EXTERNAL_InterpreterName, W("InterpreterName"), "Primary interpreter to use") +CONFIG_STRING_INFO(INTERNAL_InterpreterPath, W("InterpreterPath"), "Full path to the interpreter to use") +RETAIL_CONFIG_STRING_INFO(EXTERNAL_Interpreter, W("Interpreter"), "Enables Interpreter and selectively limits it to the specified methods.") +#endif // FEATURE_INTERPRETER + RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitHostMaxSlabCache, W("JitHostMaxSlabCache"), 0x1000000, "Sets jit host max slab cache size, 16MB default") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitOptimizeType, W("JitOptimizeType"), 0 /* OPT_DEFAULT */, "") @@ -391,8 +384,6 @@ CONFIG_DWORD_INFO(INTERNAL_MD_ApplyDeltaBreak, W("MD_ApplyDeltaBreak"), 0, "ASSE RETAIL_CONFIG_DWORD_INFO(INTERNAL_AssertOnBadImageFormat, W("AssertOnBadImageFormat"), 0, "ASSERT when invalid MD read") RETAIL_CONFIG_DWORD_INFO(INTERNAL_MD_DeltaCheck, W("MD_DeltaCheck"), 1, "Some checks of GUID when applying EnC (?)") CONFIG_DWORD_INFO(INTERNAL_MD_EncDelta, W("MD_EncDelta"), 0, "Forces EnC Delta format in MD (?)") -RETAIL_CONFIG_DWORD_INFO(INTERNAL_MD_ForceNoColDesSharing, W("MD_ForceNoColDesSharing"), 0, "Don't know - the only usage I could find is #if 0 (?)") -CONFIG_DWORD_INFO(INTERNAL_MD_KeepKnownCA, W("MD_KeepKnownCA"), 0, "Something with known CAs (?)") CONFIG_DWORD_INFO(INTERNAL_MD_MiniMDBreak, W("MD_MiniMDBreak"), 0, "ASSERT when creating CMiniMdRw class") CONFIG_DWORD_INFO(INTERNAL_MD_PreSaveBreak, W("MD_PreSaveBreak"), 0, "ASSERT when calling CMiniMdRw::PreSave") CONFIG_DWORD_INFO(INTERNAL_MD_RegMetaBreak, W("MD_RegMetaBreak"), 0, "ASSERT when creating RegMeta class") @@ -414,12 +405,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_SpinLimitConstant, W("SpinLimitConstant"), 0x0 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_SpinRetryCount, W("SpinRetryCount"), 0xA, "Hex value specifying the number of times the entire spin process is repeated (when applicable)") RETAIL_CONFIG_DWORD_INFO(INTERNAL_Monitor_SpinCount, W("Monitor_SpinCount"), 0x1e, "Hex value specifying the maximum number of spin iterations Monitor may perform upon contention on acquiring the lock before waiting.") -/// -/// Native Binder -/// - -CONFIG_DWORD_INFO(INTERNAL_SymDiffDump, W("SymDiffDump"), 0, "Used to create the map file while binding the assembly. Used by SemanticDiffer") - /// /// Profiling API / ETW /// @@ -440,8 +425,6 @@ RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_ProfAPI_ProfilerCompatibilitySetting, W("P RETAIL_CONFIG_DWORD_INFO(EXTERNAL_ProfAPI_DetachMinSleepMs, W("ProfAPI_DetachMinSleepMs"), 0, "The minimum time, in milliseconds, the CLR will wait before checking whether a profiler that is in the process of detaching is ready to be unloaded.") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_ProfAPI_DetachMaxSleepMs, W("ProfAPI_DetachMaxSleepMs"), 0, "The maximum time, in milliseconds, the CLR will wait before checking whether a profiler that is in the process of detaching is ready to be unloaded.") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_ProfAPI_RejitOnAttach, W("ProfApi_RejitOnAttach"), 1, "Enables the ability for profilers to rejit methods on attach.") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_ProfAPI_InliningTracking, W("ProfApi_InliningTracking"), 1, "Enables the runtime's tracking of inlining for profiler ReJIT.") -CONFIG_DWORD_INFO(INTERNAL_ProfAPI_EnableRejitDiagnostics, W("ProfAPI_EnableRejitDiagnostics"), 0, "Enable extra dumping to stdout of rejit structures") CONFIG_DWORD_INFO(INTERNAL_ProfAPIFault, W("ProfAPIFault"), 0, "Test-only bitmask to inject various types of faults in the profapi code") CONFIG_DWORD_INFO(INTERNAL_TestOnlyAllowedEventMask, W("TestOnlyAllowedEventMask"), 0, "Test-only bitmask to allow profiler tests to override CLR enforcement of COR_PRF_ALLOWABLE_AFTER_ATTACH and COR_PRF_MONITOR_IMMUTABLE") CONFIG_DWORD_INFO(INTERNAL_TestOnlyEnableICorProfilerInfo, W("ProfAPI_TestOnlyEnableICorProfilerInfo"), 0, "Test-only flag to allow attaching profiler tests to call ICorProfilerInfo interface, which would otherwise be disallowed for attaching profilers") @@ -449,13 +432,12 @@ CONFIG_DWORD_INFO(INTERNAL_TestOnlyEnableObjectAllocatedHook, W("TestOnlyEnableO CONFIG_DWORD_INFO(INTERNAL_TestOnlyEnableSlowELTHooks, W("TestOnlyEnableSlowELTHooks"), 0, "Test-only flag that forces CLR to initialize on startup as if slow-ELT were requested, to enable post-attach ELT functionality.") RETAIL_CONFIG_STRING_INFO(UNSUPPORTED_ETW_ObjectAllocationEventsPerTypePerSec, W("ETW_ObjectAllocationEventsPerTypePerSec"), "Desired number of GCSampledObjectAllocation ETW events to be logged per type per second. If 0, then the default built in to the implementation for the enabled event (e.g., High, Low), will be used.") -RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_ProfAPI_ValidateNGENInstrumentation, W("ProfAPI_ValidateNGENInstrumentation"), 0, "This flag enables additional validations when using the IMetaDataEmit APIs for NGEN'ed images to ensure only supported edits are made.") #ifdef FEATURE_PERFMAP RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapEnabled, W("PerfMapEnabled"), 0, "This flag is used on Linux and macOS to enable writing /tmp/perf-$pid.map. It is disabled by default") -RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_PerfMapJitDumpPath, W("PerfMapJitDumpPath"), "Specifies a path to write the perf jitdump file. Defaults to /tmp", CLRConfig::LookupOptions::TrimWhiteSpaceFromStringValue) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapIgnoreSignal, W("PerfMapIgnoreSignal"), 0, "When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs. It is disabled by default") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapShowOptimizationTiers, W("PerfMapShowOptimizationTiers"), 1, "Shows optimization tiers in the perf map for methods, as part of the symbol name. Useful for seeing separate stack frames for different optimization tiers of each method.") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapStubGranularity, W("PerfMapStubGranularity"), 0, "Report stubs with varying amounts of granularity (low bit being zero indicates attempt to group all stubs of a type together) (second lowest bit being non-zero records stubs at individual allocation sites, which is more expensive, but also more accurate).") #endif RETAIL_CONFIG_STRING_INFO(EXTERNAL_StartupDelayMS, W("StartupDelayMS"), "") @@ -506,11 +488,11 @@ RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_ProcessorCount, W("PROCESSOR_COUNT"), 0, "S #endif // _DEBUG RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredCompilation, W("TieredCompilation"), 1, "Enables tiered compilation") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_QuickJit, W("TC_QuickJit"), 1, "For methods that would be jitted, enable using quick JIT when appropriate.") -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef FEATURE_ON_STACK_REPLACEMENT RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 1, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.") -#else // !(defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) || defined(TARGET_RISCV64) +#else // FEATURE_ON_STACK_REPLACEMENT RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 0, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.") -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#endif // FEATURE_ON_STACK_REPLACEMENT RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_AggressiveTiering, W("TC_AggressiveTiering"), 0, "Transition through tiers aggressively.") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_CallCountThreshold, W("TC_CallCountThreshold"), TC_CallCountThreshold, "Number of times a method must be called in tier 0 after which it is promoted to the next tier.") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_CallCountingDelayMs, W("TC_CallCountingDelayMs"), TC_CallCountingDelayMs, "A perpetual delay in milliseconds that is applied to call counting in tier 0 and jitting at higher tiers, while there is startup-like activity.") @@ -581,6 +563,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_VirtualCallStubLogging, W("VirtualCallStubLogg CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubMissCount, W("VirtualCallStubMissCount"), 100, "Used only when STUB_LOGGING is defined, which by default is not.") CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubResetCacheCounter, W("VirtualCallStubResetCacheCounter"), 0, "Used only when STUB_LOGGING is defined, which by default is not.") CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubResetCacheIncr, W("VirtualCallStubResetCacheIncr"), 0, "Used only when STUB_LOGGING is defined, which by default is not.") +CONFIG_DWORD_INFO(INTERNAL_UseCachedInterfaceDispatch, W("UseCachedInterfaceDispatch"), 0, "If cached interface dispatch is compiled in, use that instead of virtual stub dispatch") /// /// Watson @@ -594,7 +577,6 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_DbgEnableMiniDump, W("DbgEnableMiniDump"), 0, RETAIL_CONFIG_STRING_INFO(INTERNAL_DbgMiniDumpName, W("DbgMiniDumpName"), "Crash dump name") RETAIL_CONFIG_DWORD_INFO(INTERNAL_DbgMiniDumpType, W("DbgMiniDumpType"), 0, "Crash dump type: 1 normal, 2 withheap, 3 triage, 4 full") RETAIL_CONFIG_DWORD_INFO(INTERNAL_CreateDumpDiagnostics, W("CreateDumpDiagnostics"), 0, "Enable crash dump generation diagnostic logging") -RETAIL_CONFIG_DWORD_INFO(INTERNAL_EnableDumpOnSigTerm, W("EnableDumpOnSigTerm"), 0, "Enable crash dump generation on SIGTERM") /// /// R2R @@ -606,8 +588,6 @@ RETAIL_CONFIG_STRING_INFO(EXTERNAL_ReadyToRunLogFile, W("ReadyToRunLogFile"), "N #if defined(FEATURE_EVENT_TRACE) || defined(FEATURE_EVENTSOURCE_XPLAT) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableEventLog, W("EnableEventLog"), 0, "Enable/disable use of EnableEventLogging mechanism ") // Off by default -RETAIL_CONFIG_STRING_INFO(INTERNAL_EventSourceFilter, W("EventSourceFilter"), "") -RETAIL_CONFIG_STRING_INFO(INTERNAL_EventNameFilter, W("EventNameFilter"), "") #endif //defined(FEATURE_EVENT_TRACE) || defined(FEATURE_EVENTSOURCE_XPLAT) /// @@ -622,7 +602,6 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_EnableRCWCleanupOnSTAShutdown, W("EnableRCWCle // EventPipe // RETAIL_CONFIG_DWORD_INFO(INTERNAL_EnableEventPipe, W("EnableEventPipe"), 0, "Enable/disable event pipe. Non-zero values enable tracing.") -RETAIL_CONFIG_DWORD_INFO(INTERNAL_EventPipeNetTraceFormat, W("EventPipeNetTraceFormat"), 1, "Enable/disable using the newer nettrace file format.") RETAIL_CONFIG_STRING_INFO(INTERNAL_EventPipeOutputPath, W("EventPipeOutputPath"), "The full path excluding file name for the trace file that will be written when DOTNET_EnableEventPipe=1") RETAIL_CONFIG_STRING_INFO(INTERNAL_EventPipeConfig, W("EventPipeConfig"), "Configuration for EventPipe.") RETAIL_CONFIG_DWORD_INFO(INTERNAL_EventPipeRundown, W("EventPipeRundown"), 1, "Enable/disable eventpipe rundown.") @@ -663,7 +642,6 @@ RETAIL_CONFIG_STRING_INFO(EXTERNAL_DOTNET_DiagnosticPorts, W("DiagnosticPorts"), // LTTng // RETAIL_CONFIG_STRING_INFO(INTERNAL_LTTngConfig, W("LTTngConfig"), "Configuration for LTTng.") -RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If DOTNET_LTTng is set to 0, this will prevent the LTTng library from being loaded at runtime") // // Executable code @@ -686,55 +664,42 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame" #endif RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum decimal width, in bits, that Vector is allowed to be. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10) +#if defined(TARGET_AMD64) || defined(TARGET_X86) +RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0, "The maximum decimal width, in bits, of fixed-width vectors that may be considered hardware accelerated. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10) +#endif // defined(TARGET_AMD64) || defined(TARGET_X86) // // Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h // -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_LOONGARCH64) //TODO: should implement LoongArch64's features. -//TODO-RISCV64-CQ: should implement RISCV64's features. RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled") #else RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled") -#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#endif // defined(TARGET_LOONGARCH64) #if defined(TARGET_AMD64) || defined(TARGET_X86) -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), 1, "Allows AES+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW, W("EnableAVX512BW"), 1, "Allows AVX512BW+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW_VL, W("EnableAVX512BW_VL"), 1, "Allows AVX512BW_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD, W("EnableAVX512CD"), 1, "Allows AVX512CD+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1, "Allows AVX512CD_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ, W("EnableAVX512DQ"), 1, "Allows AVX512DQ+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1, "Allows AVX512DQ_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F"), 1, "Allows AVX512F+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F_VL"), 1, "Allows AVX512F_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI, W("EnableAVX512VBMI"), 1, "Allows AVX512VBMI+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL, W("EnableAVX512VBMI_VL"), 1, "Allows AVX512VBMI_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1, W("EnableAVX10v1"), 1, "Allows AVX10v1+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v2, W("EnableAVX10v2"), 1, "Allows AVX10v2+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI, W("EnableGFNI"), 1, "Allows GFNI+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePOPCNT, W("EnablePOPCNT"), 1, "Allows POPCNT+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE2, W("EnableSSE2"), 1, "Allows SSE2+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE3, W("EnableSSE3"), 1, "Allows SSE3+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE3_4, W("EnableSSE3_4"), 1, "Allows SSE3+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE41, W("EnableSSE41"), 1, "Allows SSE4.1+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE42, W("EnableSSE42"), 1, "Allows SSE4.2+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSSE3, W("EnableSSSE3"), 1, "Allows SSSE3+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableX86Serialize, W("EnableX86Serialize"), 1, "Allows X86Serialize+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAPX, W("EnableAPX"), 0, "Allows APX+ features to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE42, W("EnableSSE42"), 1, "Allows SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512, W("EnableAVX512"), 1, "Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled") + +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v2, W("EnableAVX512v2"), 1, "Allows AVX512 IFMA+VBMI and depdendent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v3, W("EnableAVX512v3"), 1, "Allows AVX512 BITALG+VBMI2+VNNI+VPOPCNTDQ and depdendent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1, W("EnableAVX10v1"), 1, "Allows AVX10v1 and depdendent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v2, W("EnableAVX10v2"), 0, "Allows AVX10v2 and depdendent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAPX, W("EnableAPX"), 0, "Allows APX and dependent features to be disabled") + +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), 1, "Allows AES, PCLMULQDQ, and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VP2INTERSECT, W("EnableAVX512VP2INTERSECT"), 1, "Allows AVX512VP2INTERSECT and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXIFMA, W("EnableAVXIFMA"), 1, "Allows AVXIFMA and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVXVNNI and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI, W("EnableGFNI"), 1, "Allows GFNI and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSHA, W("EnableSHA"), 1, "Allows SHA and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVAES, W("EnableVAES"), 1, "Allows VAES, VPCLMULQDQ, and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWAITPKG, W("EnableWAITPKG"), 1, "Allows WAITPKG and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableX86Serialize, W("EnableX86Serialize"), 1, "Allows X86Serialize and dependent hardware intrinsics to be disabled") #elif defined(TARGET_ARM64) -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64AdvSimd, W("EnableArm64AdvSimd"), 1, "Allows Arm64 AdvSimd+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Aes, W("EnableArm64Aes"), 1, "Allows Arm64 Aes+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Atomics, W("EnableArm64Atomics"), 1, "Allows Arm64 Atomics+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Crc32, W("EnableArm64Crc32"), 1, "Allows Arm64 Crc32+ hardware intrinsics to be disabled") @@ -746,6 +711,10 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha256, W("EnableArm64Sh RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc, W("EnableArm64Rcpc"), 1, "Allows Arm64 Rcpc+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc2, W("EnableArm64Rcpc2"), 1, "Allows Arm64 Rcpc2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sve, W("EnableArm64Sve"), 1, "Allows Arm64 SVE hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sve2, W("EnableArm64Sve2"), 1, "Allows Arm64 SVE2 hardware intrinsics to be disabled") +#elif defined(TARGET_RISCV64) +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zba, W("EnableRiscV64Zba"), 1, "Allows RiscV64 Zba hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zbb, W("EnableRiscV64Zbb"), 1, "Allows RiscV64 Zbb hardware intrinsics to be disabled") #endif /// @@ -764,28 +733,16 @@ CONFIG_DWORD_INFO(INTERNAL_ActivatePatchSkip, W("ActivatePatchSkip"), 0, "Allows CONFIG_DWORD_INFO(INTERNAL_AlwaysUseMetadataInterfaceMapLayout, W("AlwaysUseMetadataInterfaceMapLayout"), 0, "Used for debugging generic interface map layout.") CONFIG_DWORD_INFO(INTERNAL_AssertOnUnneededThis, W("AssertOnUnneededThis"), 0, "While the ConfigDWORD is unnecessary, the contained ASSERT should be kept. This may result in some work tracking down violating MethodDescCallSites.") CONFIG_DWORD_INFO(INTERNAL_AssertStacktrace, W("AssertStacktrace"), 1, "") -CONFIG_DWORD_INFO(INTERNAL_CPUFamily, W("CPUFamily"), 0xFFFFFFFF, "") -CONFIG_DWORD_INFO(INTERNAL_CPUFeatures, W("CPUFeatures"), 0xFFFFFFFF, "") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_DisableConfigCache, W("DisableConfigCache"), 0, "Used to disable the \"probabilistic\" config cache, which walks through the appropriate config registry keys on init and probabilistically keeps track of which exist.") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_DisableStackwalkCache, W("DisableStackwalkCache"), 0, "") -RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_DoubleArrayToLargeObjectHeap, W("DoubleArrayToLargeObjectHeap"), 0, "Controls double[] placement") CONFIG_STRING_INFO(INTERNAL_DumpOnClassLoad, W("DumpOnClassLoad"), "Dumps information about loaded class to log.") CONFIG_DWORD_INFO(INTERNAL_ExpandAllOnLoad, W("ExpandAllOnLoad"), 0, "") CONFIG_DWORD_INFO(INTERNAL_ForceRelocs, W("ForceRelocs"), 0, "") CONFIG_DWORD_INFO(INTERNAL_GenerateLongJumpDispatchStubRatio, W("GenerateLongJumpDispatchStubRatio"), 0, "Useful for testing VSD on AMD64") -CONFIG_DWORD_INFO(INTERNAL_HostManagerConfig, W("HostManagerConfig"), (DWORD)-1, "") CONFIG_DWORD_INFO(INTERNAL_HostTestThreadAbort, W("HostTestThreadAbort"), 0, "") CONFIG_STRING_INFO(INTERNAL_InvokeHalt, W("InvokeHalt"), "Throws an assert when the given method is invoked through reflection.") -CONFIG_DWORD_INFO(INTERNAL_MaxStubUnwindInfoSegmentSize, W("MaxStubUnwindInfoSegmentSize"), 0, "") -CONFIG_DWORD_INFO(INTERNAL_MessageDebugOut, W("MessageDebugOut"), 0, "") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_NativeImageRequire, W("NativeImageRequire"), 0, "") -CONFIG_DWORD_INFO(INTERNAL_NestedEhOom, W("NestedEhOom"), 0, "") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_NoProcedureSplitting, W("NoProcedureSplitting"), 0, "") -CONFIG_DWORD_INFO(INTERNAL_PauseOnLoad, W("PauseOnLoad"), 0, "Stops in SystemDomain::init. I think it can be removed.") CONFIG_DWORD_INFO(INTERNAL_PerfAllocsSizeThreshold, W("PerfAllocsSizeThreshold"), 0x3FFFFFFF, "Log facility LF_GCALLOC logs object allocations. This flag controls which ones also log stacktraces. Predates ClrProfiler.") CONFIG_DWORD_INFO(INTERNAL_PerfNumAllocsThreshold, W("PerfNumAllocsThreshold"), 0x3FFFFFFF, "Log facility LF_GCALLOC logs object allocations. This flag controls which ones also log stacktraces. Predates ClrProfiler.") CONFIG_STRING_INFO(INTERNAL_PerfTypesToLog, W("PerfTypesToLog"), "Log facility LF_GCALLOC logs object allocations. This flag controls which ones also log stacktraces. Predates ClrProfiler.") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Prepopulate1, W("Prepopulate1"), 1, "") CONFIG_STRING_INFO(INTERNAL_PrestubGC, W("PrestubGC"), "") CONFIG_STRING_INFO(INTERNAL_PrestubHalt, W("PrestubHalt"), "") RETAIL_CONFIG_STRING_INFO(EXTERNAL_RestrictedGCStressExe, W("RestrictedGCStressExe"), "") @@ -803,7 +760,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_ThreadGuardPages, W("ThreadGuardPages"), 0, "" RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TraceWrap, W("TraceWrap"), 0, "") #endif -CONFIG_DWORD_INFO(INTERNAL_VerifierOff, W("VerifierOff"), 0, "") // ** // PLEASE MOVE ANY CONFIG SWITCH YOU OWN OUT OF THIS SECTION INTO A CATEGORY ABOVE // diff --git a/src/coreclr/inc/clrhost.h b/src/coreclr/inc/clrhost.h index a1d22b6ee269..8d8309f8187b 100644 --- a/src/coreclr/inc/clrhost.h +++ b/src/coreclr/inc/clrhost.h @@ -29,17 +29,20 @@ using std::nothrow; #define _DEBUG_IMPL 1 #endif -#define BEGIN_PRESERVE_LAST_ERROR \ - { \ - DWORD __dwLastError = ::GetLastError(); \ - DEBUG_ASSURE_NO_RETURN_BEGIN(PRESERVE_LAST_ERROR); \ - { - -#define END_PRESERVE_LAST_ERROR \ - } \ - DEBUG_ASSURE_NO_RETURN_END(PRESERVE_LAST_ERROR); \ - ::SetLastError(__dwLastError); \ +struct PreserveLastErrorHolder +{ + PreserveLastErrorHolder() + { + m_dwLastError = ::GetLastError(); + } + + ~PreserveLastErrorHolder() + { + ::SetLastError(m_dwLastError); } +private: + DWORD m_dwLastError; +}; // // TRASH_LASTERROR macro sets bogus last error in debug builds to help find places that fail to save it diff --git a/src/coreclr/inc/clrnt.h b/src/coreclr/inc/clrnt.h index 2d935a95317e..8040117a28b8 100644 --- a/src/coreclr/inc/clrnt.h +++ b/src/coreclr/inc/clrnt.h @@ -211,7 +211,7 @@ RtlpGetFunctionEndAddress ( _In_ TADDR ImageBase ) { - PUNWIND_INFO pUnwindInfo = (PUNWIND_INFO)(ImageBase + FunctionEntry->UnwindData); + DPTR(UNWIND_INFO) pUnwindInfo = dac_cast(ImageBase + FunctionEntry->UnwindData); return FunctionEntry->BeginAddress + pUnwindInfo->FunctionLength; } @@ -501,4 +501,48 @@ RtlVirtualUnwind( #endif // TARGET_RISCV64 +#ifdef TARGET_WASM +// +// Define unwind information flags. +// + +#define UNW_FLAG_NHANDLER 0x0 /* any handler */ +#define UNW_FLAG_EHANDLER 0x1 /* filter handler */ +#define UNW_FLAG_UHANDLER 0x2 /* unwind handler */ + +PEXCEPTION_ROUTINE +RtlVirtualUnwind ( + _In_ DWORD HandlerType, + _In_ DWORD ImageBase, + _In_ DWORD ControlPc, + _In_ PRUNTIME_FUNCTION FunctionEntry, + __inout PT_CONTEXT ContextRecord, + _Out_ PVOID *HandlerData, + _Out_ PDWORD EstablisherFrame, + __inout_opt PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers + ); + +FORCEINLINE +ULONG +RtlpGetFunctionEndAddress ( + _In_ PT_RUNTIME_FUNCTION FunctionEntry, + _In_ TADDR ImageBase + ) +{ + _ASSERTE("The function RtlpGetFunctionEndAddress is not implemented on wasm"); + return 0; +} + +#define RUNTIME_FUNCTION__BeginAddress(FunctionEntry) ((FunctionEntry)->BeginAddress) +#define RUNTIME_FUNCTION__SetBeginAddress(FunctionEntry,address) ((FunctionEntry)->BeginAddress = (address)) + +#define RUNTIME_FUNCTION__EndAddress(FunctionEntry, ImageBase) (RtlpGetFunctionEndAddress(FunctionEntry, (ULONG64)(ImageBase))) + +#define RUNTIME_FUNCTION__SetUnwindInfoAddress(prf,address) do { (prf)->UnwindData = (address); } while (0) + +typedef struct _UNWIND_INFO { + // dummy +} UNWIND_INFO, *PUNWIND_INFO; +#endif + #endif // CLRNT_H_ diff --git a/src/coreclr/inc/clrtypes.h b/src/coreclr/inc/clrtypes.h index 9094e4932a25..b1990054c487 100644 --- a/src/coreclr/inc/clrtypes.h +++ b/src/coreclr/inc/clrtypes.h @@ -338,7 +338,7 @@ inline UINT64 AlignUp(UINT64 value, UINT alignment) return (value+alignment-1)&~(UINT64)(alignment-1); } -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__wasm__) inline SIZE_T AlignUp(SIZE_T value, UINT alignment) { STATIC_CONTRACT_LEAF; @@ -399,13 +399,13 @@ inline UINT AlignmentPad(UINT64 value, UINT alignment) return (UINT) (AlignUp(value, alignment) - value); } -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__wasm__) inline UINT AlignmentPad(SIZE_T value, UINT alignment) { STATIC_CONTRACT_WRAPPER; return (UINT) (AlignUp(value, alignment) - value); } -#endif // __APPLE__ +#endif // __APPLE__ || __wasm__ inline UINT AlignmentTrim(UINT value, UINT alignment) { @@ -432,7 +432,7 @@ inline UINT AlignmentTrim(UINT64 value, UINT alignment) return ((UINT)value)&(alignment-1); } -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__wasm__) inline UINT AlignmentTrim(SIZE_T value, UINT alignment) { STATIC_CONTRACT_LEAF; diff --git a/src/coreclr/inc/contract.h b/src/coreclr/inc/contract.h index a8f3f6f47c22..499819590da6 100644 --- a/src/coreclr/inc/contract.h +++ b/src/coreclr/inc/contract.h @@ -710,7 +710,6 @@ class DebugOnlyCodeHolder // we don't recreated one on exit if its been deleted. DEBUG_NOINLINE void Enter() { - SCAN_SCOPE_BEGIN; STATIC_CONTRACT_DEBUG_ONLY; m_pClrDebugState = GetClrDebugState(); @@ -723,7 +722,6 @@ class DebugOnlyCodeHolder DEBUG_NOINLINE void Leave() { - SCAN_SCOPE_END; STATIC_CONTRACT_DEBUG_ONLY; m_pClrDebugState = CheckClrDebugState(); @@ -751,7 +749,6 @@ class AutoCleanupDebugOnlyCodeHolder : public DebugOnlyCodeHolder public: DEBUG_NOINLINE AutoCleanupDebugOnlyCodeHolder() { - SCAN_SCOPE_BEGIN; STATIC_CONTRACT_DEBUG_ONLY; Enter(); @@ -759,8 +756,6 @@ class AutoCleanupDebugOnlyCodeHolder : public DebugOnlyCodeHolder DEBUG_NOINLINE ~AutoCleanupDebugOnlyCodeHolder() { - SCAN_SCOPE_END; - Leave(); }; }; @@ -1160,7 +1155,6 @@ typedef __SafeToUsePostCondition __PostConditionOK; Contract::RanPostconditions ___ran(__FUNCTION__); \ Contract::Operation ___op = Contract::Setup; \ BOOL ___contract_enabled = FALSE; \ - DEBUG_ASSURE_NO_RETURN_BEGIN(CONTRACT) \ ___contract_enabled = Contract::EnforceContract(); \ enum {___disabled = 0}; \ if (!___contract_enabled) \ @@ -1181,10 +1175,8 @@ typedef __SafeToUsePostCondition __PostConditionOK; } \ else \ { \ - DEBUG_OK_TO_RETURN_BEGIN(CONTRACT) \ ___run_return: \ return _returnexp; \ - DEBUG_OK_TO_RETURN_END(CONTRACT) \ } \ } \ if (0) \ @@ -1226,7 +1218,6 @@ typedef __SafeToUsePostCondition __PostConditionOK; Contract::Returner<_returntype> ___returner(RETVAL); \ Contract::RanPostconditions ___ran(__FUNCTION__); \ Contract::Operation ___op = Contract::Setup; \ - DEBUG_ASSURE_NO_RETURN_BEGIN(CONTRACT) \ BOOL ___contract_enabled = Contract::EnforceContract(); \ enum {___disabled = 0}; \ { \ @@ -1244,10 +1235,8 @@ typedef __SafeToUsePostCondition __PostConditionOK; } \ else \ { \ - DEBUG_OK_TO_RETURN_BEGIN(CONTRACT) \ ___run_return: \ return _returnexp; \ - DEBUG_OK_TO_RETURN_END(CONTRACT) \ } \ } \ if (0) \ @@ -1409,7 +1398,6 @@ typedef __SafeToUsePostCondition __PostConditionOK; #define UNCHECKED(thecheck) \ do { \ - ANNOTATION_UNCHECKED(thecheck); \ enum {___disabled = 1 }; \ thecheck; \ } while(0) @@ -1460,8 +1448,7 @@ typedef __SafeToUsePostCondition __PostConditionOK; #endif // __FORCE_NORUNTIME_CONTRACTS__ -#define CONTRACT_END CONTRACTL_END \ - DEBUG_ASSURE_NO_RETURN_END(CONTRACT) \ +#define CONTRACT_END CONTRACTL_END // The final expression in the RETURN macro deserves special explanation (or something.) @@ -1608,7 +1595,7 @@ typedef __SafeToUsePostCondition __PostConditionOK; #define WRAPPER_NO_CONTRACT CUSTOM_WRAPPER_NO_CONTRACT(Contract) // GC_NOTRIGGER allowed but not currently enforced at runtime -#define GC_NOTRIGGER STATIC_CONTRACT_GC_NOTRIGGER +#define GC_NOTRIGGER do { STATIC_CONTRACT_GC_NOTRIGGER; } while(0) #define GC_TRIGGERS static_assert(false, "TriggersGC not supported in utilcode contracts") #ifdef ENABLE_CONTRACTS_IMPL @@ -1626,7 +1613,6 @@ class ContractViolationHolder DEBUG_NOINLINE void Leave() { - SCAN_SCOPE_END; LeaveInternal(); }; @@ -1667,7 +1653,6 @@ class AutoCleanupContractViolationHolder : ContractViolationHolderLeaveInternal(); }; }; @@ -1679,7 +1664,6 @@ class AutoCleanupContractViolationHolder : ContractViolationHolder __violationHolder_onlyOneAllowedPerScope; \ __violationHolder_onlyOneAllowedPerScope.Enter(); \ - DEBUG_ASSURE_NO_RETURN_BEGIN(CONTRACT) \ // Use this to jump out prematurely from a violation. Used for EH // when the function might not return @@ -1687,7 +1671,6 @@ class AutoCleanupContractViolationHolder : ContractViolationHolderSetOkToThrow( m_oldOkayToThrowValue ); } diff --git a/src/coreclr/inc/cor.h b/src/coreclr/inc/cor.h index ab598f2ac348..d4fe6b0275a0 100644 --- a/src/coreclr/inc/cor.h +++ b/src/coreclr/inc/cor.h @@ -121,24 +121,16 @@ typedef UNALIGNED void const *UVCP_CONSTANT; //***************************************************************************** -// -#ifndef _WINDOWS_UPDATES_ #include -#endif // updates //***************************************************************************** //***************************************************************************** -// CLSID_Cor: {bee00000-ee77-11d0-a015-00c04fbbb884} -EXTERN_GUID(CLSID_Cor, 0xbee00010, 0xee77, 0x11d0, 0xa0, 0x15, 0x00, 0xc0, 0x4f, 0xbb, 0xb8, 0x84); - // CLSID_CorMetaDataDispenser: {E5CB7A31-7512-11d2-89CE-0080C792E5D8} // This is the "Master Dispenser", always guaranteed to be the most recent // dispenser on the machine. EXTERN_GUID(CLSID_CorMetaDataDispenser, 0xe5cb7a31, 0x7512, 0x11d2, 0x89, 0xce, 0x0, 0x80, 0xc7, 0x92, 0xe5, 0xd8); -interface IMetaDataDispenser; - //------------------------------------- //--- IMetaDataError //------------------------------------- diff --git a/src/coreclr/inc/corcompile.h b/src/coreclr/inc/corcompile.h index 845b72465c34..16b688eaa57e 100644 --- a/src/coreclr/inc/corcompile.h +++ b/src/coreclr/inc/corcompile.h @@ -56,6 +56,11 @@ inline ReadyToRunCrossModuleInlineFlags operator &( const ReadyToRunCrossModuleI return static_cast(static_cast(left) & static_cast(right)); } +#ifdef TARGET_WASM +// why was it defined only for x86 before? +typedef DPTR(RUNTIME_FUNCTION) PTR_RUNTIME_FUNCTION; +#endif + #ifdef TARGET_X86 typedef DPTR(RUNTIME_FUNCTION) PTR_RUNTIME_FUNCTION; @@ -94,82 +99,6 @@ enum CORCOMPILE_GCREFMAP_TOKENS GCREFMAP_VASIG_COOKIE = 5, }; -// Tags for fixup blobs -enum CORCOMPILE_FIXUP_BLOB_KIND -{ - ENCODE_NONE = 0, - - ENCODE_MODULE_OVERRIDE = 0x80, /* When the high bit is set, override of the module immediately follows */ - - ENCODE_DICTIONARY_LOOKUP_THISOBJ = 0x07, - ENCODE_DICTIONARY_LOOKUP_TYPE = 0x08, - ENCODE_DICTIONARY_LOOKUP_METHOD = 0x09, - - ENCODE_TYPE_HANDLE = 0x10, /* Type handle */ - ENCODE_METHOD_HANDLE, /* Method handle */ - ENCODE_FIELD_HANDLE, /* Field handle */ - - ENCODE_METHOD_ENTRY, /* For calling a method entry point */ - ENCODE_METHOD_ENTRY_DEF_TOKEN, /* Smaller version of ENCODE_METHOD_ENTRY - method is def token */ - ENCODE_METHOD_ENTRY_REF_TOKEN, /* Smaller version of ENCODE_METHOD_ENTRY - method is ref token */ - - ENCODE_VIRTUAL_ENTRY, /* For invoking a virtual method */ - ENCODE_VIRTUAL_ENTRY_DEF_TOKEN, /* Smaller version of ENCODE_VIRTUAL_ENTRY - method is def token */ - ENCODE_VIRTUAL_ENTRY_REF_TOKEN, /* Smaller version of ENCODE_VIRTUAL_ENTRY - method is ref token */ - ENCODE_VIRTUAL_ENTRY_SLOT, /* Smaller version of ENCODE_VIRTUAL_ENTRY - type & slot */ - - ENCODE_READYTORUN_HELPER, /* ReadyToRun helper */ - ENCODE_STRING_HANDLE, /* String token */ - - ENCODE_NEW_HELPER, /* Dynamically created new helpers */ - ENCODE_NEW_ARRAY_HELPER, - - ENCODE_ISINSTANCEOF_HELPER, /* Dynamically created casting helper */ - ENCODE_CHKCAST_HELPER, - - ENCODE_FIELD_ADDRESS, /* For accessing a cross-module static fields */ - ENCODE_CCTOR_TRIGGER, /* Static constructor trigger */ - - ENCODE_STATIC_BASE_NONGC_HELPER, /* Dynamically created static base helpers */ - ENCODE_STATIC_BASE_GC_HELPER, - ENCODE_THREAD_STATIC_BASE_NONGC_HELPER, - ENCODE_THREAD_STATIC_BASE_GC_HELPER, - - ENCODE_FIELD_BASE_OFFSET, /* Field base */ - ENCODE_FIELD_OFFSET, - - ENCODE_TYPE_DICTIONARY, - ENCODE_METHOD_DICTIONARY, - - ENCODE_CHECK_TYPE_LAYOUT, - ENCODE_CHECK_FIELD_OFFSET, - - ENCODE_DELEGATE_CTOR, - - ENCODE_DECLARINGTYPE_HANDLE, - - ENCODE_INDIRECT_PINVOKE_TARGET, /* For calling a pinvoke method ptr indirectly */ - ENCODE_PINVOKE_TARGET, /* For calling a pinvoke method ptr */ - - ENCODE_CHECK_INSTRUCTION_SET_SUPPORT, /* Define the set of instruction sets that must be supported/unsupported to use the fixup */ - - ENCODE_VERIFY_FIELD_OFFSET, /* Used for the R2R compiler can generate a check against the real field offset used at runtime */ - ENCODE_VERIFY_TYPE_LAYOUT, /* Used for the R2R compiler can generate a check against the real type layout used at runtime */ - - ENCODE_CHECK_VIRTUAL_FUNCTION_OVERRIDE, /* Generate a runtime check to ensure that virtual function resolution has equivalent behavior at runtime as at compile time. If not equivalent, code will not be used */ - ENCODE_VERIFY_VIRTUAL_FUNCTION_OVERRIDE, /* Generate a runtime check to ensure that virtual function resolution has equivalent behavior at runtime as at compile time. If not equivalent, generate runtime failure. */ - - ENCODE_CHECK_IL_BODY, /* Check to see if an IL method is defined the same at runtime as at compile time. A failed match will cause code not to be used. */ - ENCODE_VERIFY_IL_BODY, /* Verify an IL body is defined the same at compile time and runtime. A failed match will cause a hard runtime failure. */ - - ENCODE_MODULE_HANDLE = 0x50, /* Module token */ - ENCODE_SYNC_LOCK, /* For synchronizing access to a type */ - ENCODE_PROFILING_HANDLE, /* For the method's profiling counter */ - ENCODE_VARARGS_METHODDEF, /* For calling a varargs method */ - ENCODE_VARARGS_METHODREF, - ENCODE_VARARGS_SIG, -}; - enum EncodeMethodSigFlags { ENCODE_METHOD_SIG_UnboxingStub = 0x01, @@ -180,6 +109,7 @@ enum EncodeMethodSigFlags ENCODE_METHOD_SIG_Constrained = 0x20, ENCODE_METHOD_SIG_OwnerType = 0x40, ENCODE_METHOD_SIG_UpdateContext = 0x80, + ENCODE_METHOD_SIG_AsyncVariant = 0x100, }; enum EncodeFieldSigFlags @@ -188,11 +118,6 @@ enum EncodeFieldSigFlags ENCODE_FIELD_SIG_OwnerType = 0x40, }; -class SBuffer; -class SigBuilder; -class PEDecoder; -class GCRefMapBuilder; - //REVIEW: include for ee exception info #include "eexcp.h" @@ -227,17 +152,4 @@ struct CORCOMPILE_EXCEPTION_CLAUSE }; }; -/*********************************************************************************/ -// When NGEN install /Profile is run, the ZapProfilingHandleImport fixup table contains -// these 5 values per MethodDesc -enum -{ - kZapProfilingHandleImportValueIndexFixup = 0, - kZapProfilingHandleImportValueIndexEnterAddr = 1, - kZapProfilingHandleImportValueIndexLeaveAddr = 2, - kZapProfilingHandleImportValueIndexTailcallAddr = 3, - kZapProfilingHandleImportValueIndexClientData = 4, - - kZapProfilingHandleImportValueIndexCount -}; #endif /* COR_COMPILE_H_ */ diff --git a/src/coreclr/inc/cordebuginfo.h b/src/coreclr/inc/cordebuginfo.h index 1818c4fc1f81..e4cc1c5eb2a2 100644 --- a/src/coreclr/inc/cordebuginfo.h +++ b/src/coreclr/inc/cordebuginfo.h @@ -27,7 +27,7 @@ class ICorDebugInfo NO_BOUNDARIES = 0x00, // No implicit boundaries STACK_EMPTY_BOUNDARIES = 0x01, // Boundary whenever the IL evaluation stack is empty NOP_BOUNDARIES = 0x02, // Before every CEE_NOP instruction - CALL_SITE_BOUNDARIES = 0x04, // Before every CEE_CALL, CEE_CALLVIRT, etc instruction + CALL_SITE_BOUNDARIES = 0x04, // After every CEE_CALL, CEE_CALLVIRT, etc instruction // Set of boundaries that debugger should always reasonably ask the JIT for. DEFAULT_BOUNDARIES = STACK_EMPTY_BOUNDARIES | NOP_BOUNDARIES | CALL_SITE_BOUNDARIES @@ -215,6 +215,8 @@ class ICorDebugInfo REGNUM_T5, REGNUM_T6, REGNUM_PC, +#elif TARGET_WASM + REGNUM_PC, // wasm doesn't have registers #else PORTABILITY_WARNING("Register numbers not defined on this platform") #endif @@ -226,6 +228,7 @@ class ICorDebugInfo REGNUM_FP = REGNUM_EBP, REGNUM_SP = REGNUM_ESP, #elif TARGET_AMD64 + REGNUM_FP = REGNUM_RBP, REGNUM_SP = REGNUM_RSP, #elif TARGET_ARM REGNUM_FP = REGNUM_R11, diff --git a/src/coreclr/inc/coredistools.h b/src/coreclr/inc/coredistools.h index f26693f98d51..b14703e0f6da 100644 --- a/src/coreclr/inc/coredistools.h +++ b/src/coreclr/inc/coredistools.h @@ -1,12 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -//===--------- coredistools.h - Disassembly tools for CoreClr ------------===// -// -// Core Disassembly Tools API Version 1.4.0 +//===--------- coredistools.h - Disassembly tools for CoreClr ------------=== +// Core Disassembly Tools API // Disassembly tools required by CoreCLR for utilities like // GCStress, SuperPMI, and R2RDump. -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------=== #if !defined(_COREDISTOOLS_H_) #define _COREDISTOOLS_H_ @@ -42,7 +41,8 @@ enum TargetArch { Target_X64, Target_Thumb, Target_Arm64, - Target_LoongArch64 + Target_LoongArch64, + Target_RiscV64, }; struct CorDisasm; diff --git a/src/coreclr/inc/corhdr.h b/src/coreclr/inc/corhdr.h index 84f7ebcf428b..0bd7755e3b0d 100644 --- a/src/coreclr/inc/corhdr.h +++ b/src/coreclr/inc/corhdr.h @@ -640,13 +640,15 @@ typedef enum CorMethodImpl miNoOptimization = 0x0040, // Method may not be optimized. miAggressiveOptimization = 0x0200, // Method may contain hot code and should be aggressively optimized. + miAsync = 0x2000, // Method requires async state machine rewrite. + // These are the flags that are allowed in MethodImplAttribute's Value // property. This should include everything above except the code impl // flags (which are used for MethodImplAttribute's MethodCodeType field). miUserMask = miManagedMask | miForwardRef | miPreserveSig | miInternalCall | miSynchronized | miNoInlining | miAggressiveInlining | - miNoOptimization | miAggressiveOptimization, + miNoOptimization | miAggressiveOptimization | miAsync, miMaxMethodImplVal = 0xffff, // Range check value } CorMethodImpl; @@ -670,6 +672,7 @@ typedef enum CorMethodImpl #define IsMiAggressiveInlining(x) ((x) & miAggressiveInlining) #define IsMiNoOptimization(x) ((x) & miNoOptimization) #define IsMiAggressiveOptimization(x) (((x) & (miAggressiveOptimization | miNoOptimization)) == miAggressiveOptimization) +#define IsMiAsync(x) ((x) & miAsync) // PinvokeMap attr bits, used by DefinePinvokeMap. typedef enum CorPinvokeMap @@ -1136,12 +1139,11 @@ typedef struct IMAGE_COR_ILMETHOD_SECT_FAT typedef enum CorExceptionFlag // definitions for the Flags field below (for both big and small) { COR_ILEXCEPTION_CLAUSE_NONE, // This is a typed handler - COR_ILEXCEPTION_CLAUSE_OFFSETLEN = 0x0000, // Deprecated - COR_ILEXCEPTION_CLAUSE_DEPRECATED = 0x0000, // Deprecated COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // If this bit is on, then this EH entry is for a filter COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // Fault clause (finally that is called on exception only) - COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This clause was duplicated to a funclet which was pulled out of line + COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // Deprecated: Duplicated clause. This clause was duplicated to a funclet which was pulled out of line + COR_ILEXCEPTION_CLAUSE_SAMETRY = 0x0010, // This clause covers same try block as the previous one } CorExceptionFlag; /***********************************/ @@ -1680,6 +1682,8 @@ typedef enum CorAttributeTargets // Keep in sync with RuntimeCompatibilityAttribute.cs #define RUNTIMECOMPATIBILITY_TYPE_W W("System.Runtime.CompilerServices.RuntimeCompatibilityAttribute") #define RUNTIMECOMPATIBILITY_TYPE "System.Runtime.CompilerServices.RuntimeCompatibilityAttribute" +#define RUNTIMECOMPATIBILITY_TYPE_NAMESPACE "System.Runtime.CompilerServices" +#define RUNTIMECOMPATIBILITY_TYPE_NAME "RuntimeCompatibilityAttribute" // Keep in sync with AssemblySettingAttributes.cs diff --git a/src/coreclr/inc/corhlprpriv.h b/src/coreclr/inc/corhlprpriv.h index 7df77ea0ca62..e960eb416fc9 100644 --- a/src/coreclr/inc/corhlprpriv.h +++ b/src/coreclr/inc/corhlprpriv.h @@ -573,6 +573,16 @@ class CQuickArrayList : protected CQuickArray return m_curSize; } + T* Ptr() + { + return (T*) CQuickBytesBase::Ptr(); + } + + const T* Ptr() const + { + return (T*) CQuickBytesBase::Ptr(); + } + void Shrink() { CQuickArray::Shrink(m_curSize); diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 6854c4cf6b97..c13b1efb2878 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -336,7 +336,9 @@ enum CorInfoHelpFunc CORINFO_HELP_LMOD, CORINFO_HELP_ULDIV, CORINFO_HELP_ULMOD, + CORINFO_HELP_LNG2FLT, // Convert a signed int64 to a float CORINFO_HELP_LNG2DBL, // Convert a signed int64 to a double + CORINFO_HELP_ULNG2FLT, // Convert a unsigned int64 to a float CORINFO_HELP_ULNG2DBL, // Convert a unsigned int64 to a double CORINFO_HELP_DBL2INT, CORINFO_HELP_DBL2INT_OVF, @@ -363,7 +365,7 @@ enum CorInfoHelpFunc CORINFO_HELP_NEW_MDARR_RARE,// rare multi-dim array helper (Rank == 1) CORINFO_HELP_NEWARR_1_DIRECT, // helper for any one dimensional array creation CORINFO_HELP_NEWARR_1_MAYBEFROZEN, // allocator for arrays that *might* allocate them on a frozen segment - CORINFO_HELP_NEWARR_1_OBJ, // optimized 1-D object arrays + CORINFO_HELP_NEWARR_1_PTR, // optimized 1-D arrays with pointer sized elements CORINFO_HELP_NEWARR_1_VC, // optimized 1-D value class arrays CORINFO_HELP_NEWARR_1_ALIGN8, // like VC, but aligns the array start @@ -405,6 +407,7 @@ enum CorInfoHelpFunc CORINFO_HELP_THROW, // Throw an exception object CORINFO_HELP_RETHROW, // Rethrow the currently active exception + CORINFO_HELP_THROWEXACT, // Throw an exception object, preserving stack trace CORINFO_HELP_USER_BREAKPOINT, // For a user program to break to the debugger CORINFO_HELP_RNGCHKFAIL, // array bounds check failed CORINFO_HELP_OVERFLOW, // throw an overflow exception @@ -579,7 +582,7 @@ enum CorInfoHelpFunc CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame CORINFO_HELP_PATCHPOINT, // Notify runtime that code has reached a patchpoint - CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, // Notify runtime that code has reached a part of the method that wasn't originally jitted. + CORINFO_HELP_PATCHPOINT_FORCED, // Notify runtime that code has reached a part of the method that needs to transition CORINFO_HELP_CLASSPROFILE32, // Update 32-bit class profile for a call site CORINFO_HELP_CLASSPROFILE64, // Update 64-bit class profile for a call site @@ -595,6 +598,7 @@ enum CorInfoHelpFunc CORINFO_HELP_VALIDATE_INDIRECT_CALL, // CFG: Validate function pointer CORINFO_HELP_DISPATCH_INDIRECT_CALL, // CFG: Validate and dispatch to pointer +<<<<<<< HEAD CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP, CORINFO_HELP_LLVM_EH_CATCH, CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES, @@ -604,6 +608,11 @@ enum CorInfoHelpFunc CORINFO_HELP_LLVM_RESOLVE_INTERFACE_CALL_TARGET, CORINFO_HELP_LLVM_GET_EXTERNAL_CALL_TARGET, CORINFO_HELP_LLVM_STRESS_GC, +======= + CORINFO_HELP_ALLOC_CONTINUATION, + CORINFO_HELP_ALLOC_CONTINUATION_METHOD, + CORINFO_HELP_ALLOC_CONTINUATION_CLASS, +>>>>>>> upstream-jun CORINFO_HELP_COUNT, }; @@ -679,6 +688,7 @@ enum CorInfoCallConv CORINFO_CALLCONV_HASTHIS = 0x20, CORINFO_CALLCONV_EXPLICITTHIS=0x40, CORINFO_CALLCONV_PARAMTYPE = 0x80, // Passed last. Same as CORINFO_GENERICS_CTXT_FROM_PARAMTYPEARG + CORINFO_CALLCONV_ASYNCCALL = 0x100, // Is this a call to an async function? }; // Represents the calling conventions supported with the extensible calling convention syntax @@ -726,7 +736,6 @@ enum CorInfoOptions CORINFO_GENERICS_CTXT_FROM_METHODDESC | CORINFO_GENERICS_CTXT_FROM_METHODTABLE), CORINFO_GENERICS_CTXT_KEEP_ALIVE = 0x00000100, // Keep the generics context alive throughout the method even if there is no explicit use, and report its location to the CLR - }; // @@ -824,13 +833,14 @@ enum CORINFO_ACCESS_FLAGS }; // These are the flags set on an CORINFO_EH_CLAUSE +// Keep values in sync with COR_ILEXCEPTION_CLAUSE flags enum CORINFO_EH_CLAUSE_FLAGS { CORINFO_EH_CLAUSE_NONE = 0, CORINFO_EH_CLAUSE_FILTER = 0x0001, // If this bit is on, then this EH entry is for a filter CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause - CORINFO_EH_CLAUSE_DUPLICATE = 0x0008, // Duplicated clause. This clause was duplicated to a funclet which was pulled out of line + // UNUSED = 0x0008, CORINFO_EH_CLAUSE_SAMETRY = 0x0010, // This clause covers same try block as the previous one }; @@ -1002,6 +1012,7 @@ struct CORINFO_SIG_INFO unsigned totalILArgs() { return (numArgs + (hasImplicitThis() ? 1 : 0)); } bool isVarArg() { return ((getCallConv() == CORINFO_CALLCONV_VARARG) || (getCallConv() == CORINFO_CALLCONV_NATIVEVARARG)); } bool hasTypeArg() { return ((callConv & CORINFO_CALLCONV_PARAMTYPE) != 0); } + bool isAsyncCall() { return ((callConv & CORINFO_CALLCONV_ASYNCCALL) != 0); } }; struct CORINFO_METHOD_INFO @@ -1361,7 +1372,7 @@ enum CORINFO_CALLINFO_FLAGS CORINFO_CALLINFO_ALLOWINSTPARAM = 0x0001, // Can the compiler generate code to pass an instantiation parameters? Simple compilers should not use this flag CORINFO_CALLINFO_CALLVIRT = 0x0002, // Is it a virtual call? // UNUSED = 0x0004, - // UNUSED = 0x0008, + CORINFO_CALLINFO_DISALLOW_STUB = 0x0008, // Do not use a stub for this call, even if it is a virtual call. CORINFO_CALLINFO_SECURITYCHECKS = 0x0010, // Perform security checks. CORINFO_CALLINFO_LDFTN = 0x0020, // Resolving target of LDFTN // UNUSED = 0x0040, @@ -1407,6 +1418,9 @@ enum CorInfoTokenKind // token comes from devirtualizing a method CORINFO_TOKENKIND_DevirtualizedMethod = 0x800 | CORINFO_TOKENKIND_Method, + + // token comes from runtime async awaiting pattern + CORINFO_TOKENKIND_Await = 0x2000 | CORINFO_TOKENKIND_Method, }; struct CORINFO_RESOLVED_TOKEN @@ -1699,6 +1713,43 @@ struct CORINFO_EE_INFO CORINFO_OS osType; }; +enum CorInfoContinuationFlags +{ + // Whether or not the continuation expects the result to be boxed and + // placed in the GCData array at index 0. Not set if the callee is void. + CORINFO_CONTINUATION_RESULT_IN_GCDATA = 1, + // If this bit is set the continuation resumes inside a try block and thus + // if an exception is being propagated, needs to be resumed. The exception + // should be placed at index 0 or 1 depending on whether the continuation + // also expects a result. + CORINFO_CONTINUATION_NEEDS_EXCEPTION = 2, + // If this bit is set the continuation has the IL offset that inspired the + // OSR method saved in the beginning of 'Data', or -1 if the continuation + // belongs to a tier 0 method. + CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA = 4, +}; + +struct CORINFO_ASYNC_INFO +{ + // Class handle for System.Runtime.CompilerServices.Continuation + CORINFO_CLASS_HANDLE continuationClsHnd; + // 'Next' field + CORINFO_FIELD_HANDLE continuationNextFldHnd; + // 'Resume' field + CORINFO_FIELD_HANDLE continuationResumeFldHnd; + // 'State' field + CORINFO_FIELD_HANDLE continuationStateFldHnd; + // 'Flags' field + CORINFO_FIELD_HANDLE continuationFlagsFldHnd; + // 'Data' field + CORINFO_FIELD_HANDLE continuationDataFldHnd; + // 'GCData' field + CORINFO_FIELD_HANDLE continuationGCDataFldHnd; + // Whether or not the continuation needs to be allocated through the + // helper that also takes a method handle + bool continuationsNeedMethodHandle; +}; + // Flags passed from JIT to runtime. enum CORINFO_GET_TAILCALL_HELPERS_FLAGS { @@ -2492,14 +2543,6 @@ class ICorStaticInfo CORINFO_CLASS_HANDLE cls ) = 0; - // Get a representation for a stack-allocated boxed value type. - // - // This differs from getTypeForBox in that it includes an explicit field - // for the method table pointer. - virtual CORINFO_CLASS_HANDLE getTypeForBoxOnStack( - CORINFO_CLASS_HANDLE cls - ) = 0; - // returns the correct box helper for a particular class. Note // that if this returns CORINFO_HELP_BOX, the JIT can assume // 'standard' boxing (allocate object and copy), and optimize @@ -2970,6 +3013,10 @@ class ICorStaticInfo CORINFO_EE_INFO *pEEInfoOut ) = 0; + virtual void getAsyncInfo( + CORINFO_ASYNC_INFO* pAsyncInfoOut + ) = 0; + /*********************************************************************************/ // // Diagnostic methods @@ -3305,6 +3352,8 @@ class ICorDynamicInfo : public ICorStaticInfo CORINFO_TAILCALL_HELPERS* pResult ) = 0; + virtual CORINFO_METHOD_HANDLE getAsyncResumptionStub() = 0; + // Optionally, convert calli to regular method call. This is for PInvoke argument marshalling. virtual bool convertPInvokeCalliToCall( CORINFO_RESOLVED_TOKEN * pResolvedToken, diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 5b021c0b6890..b4742cada3b9 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -32,167 +32,158 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=14, InstructionSet_Rcpc2=15, InstructionSet_Sve=16, - InstructionSet_ArmBase_Arm64=17, - InstructionSet_AdvSimd_Arm64=18, - InstructionSet_Aes_Arm64=19, - InstructionSet_Crc32_Arm64=20, - InstructionSet_Dp_Arm64=21, - InstructionSet_Rdm_Arm64=22, - InstructionSet_Sha1_Arm64=23, - InstructionSet_Sha256_Arm64=24, - InstructionSet_Sve_Arm64=25, + InstructionSet_Sve2=17, + InstructionSet_ArmBase_Arm64=18, + InstructionSet_AdvSimd_Arm64=19, + InstructionSet_Aes_Arm64=20, + InstructionSet_Crc32_Arm64=21, + InstructionSet_Dp_Arm64=22, + InstructionSet_Rdm_Arm64=23, + InstructionSet_Sha1_Arm64=24, + InstructionSet_Sha256_Arm64=25, + InstructionSet_Sve_Arm64=26, + InstructionSet_Sve2_Arm64=27, #endif // TARGET_ARM64 +#ifdef TARGET_RISCV64 + InstructionSet_RiscV64Base=1, + InstructionSet_Zba=2, + InstructionSet_Zbb=3, +#endif // TARGET_RISCV64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, - InstructionSet_SSE=2, - InstructionSet_SSE2=3, - InstructionSet_SSE3=4, - InstructionSet_SSSE3=5, - InstructionSet_SSE41=6, - InstructionSet_SSE42=7, - InstructionSet_AVX=8, - InstructionSet_AVX2=9, - InstructionSet_AES=10, - InstructionSet_BMI1=11, - InstructionSet_BMI2=12, - InstructionSet_FMA=13, - InstructionSet_LZCNT=14, - InstructionSet_PCLMULQDQ=15, - InstructionSet_PCLMULQDQ_V256=16, - InstructionSet_PCLMULQDQ_V512=17, - InstructionSet_POPCNT=18, - InstructionSet_Vector128=19, - InstructionSet_Vector256=20, - InstructionSet_Vector512=21, - InstructionSet_AVXVNNI=22, - InstructionSet_MOVBE=23, - InstructionSet_X86Serialize=24, - InstructionSet_EVEX=25, - InstructionSet_AVX512F=26, - InstructionSet_AVX512F_VL=27, - InstructionSet_AVX512BW=28, - InstructionSet_AVX512BW_VL=29, - InstructionSet_AVX512CD=30, - InstructionSet_AVX512CD_VL=31, - InstructionSet_AVX512DQ=32, - InstructionSet_AVX512DQ_VL=33, - InstructionSet_AVX512VBMI=34, - InstructionSet_AVX512VBMI_VL=35, - InstructionSet_AVX10v1=36, - InstructionSet_AVX10v1_V512=37, + InstructionSet_SSE3=2, + InstructionSet_SSSE3=3, + InstructionSet_SSE41=4, + InstructionSet_SSE42=5, + InstructionSet_POPCNT=6, + InstructionSet_AVX=7, + InstructionSet_AVX2=8, + InstructionSet_BMI1=9, + InstructionSet_BMI2=10, + InstructionSet_FMA=11, + InstructionSet_LZCNT=12, + InstructionSet_MOVBE=13, + InstructionSet_AVX512=14, + InstructionSet_AVX512VBMI=15, + InstructionSet_AVX512v3=16, + InstructionSet_AVX10v1=17, + InstructionSet_AVX10v2=18, + InstructionSet_APX=19, + InstructionSet_AES=20, + InstructionSet_PCLMULQDQ=21, + InstructionSet_AVX512VP2INTERSECT=22, + InstructionSet_AVXIFMA=23, + InstructionSet_AVXVNNI=24, + InstructionSet_GFNI=25, + InstructionSet_GFNI_V256=26, + InstructionSet_GFNI_V512=27, + InstructionSet_SHA=28, + InstructionSet_AES_V256=29, + InstructionSet_AES_V512=30, + InstructionSet_PCLMULQDQ_V256=31, + InstructionSet_PCLMULQDQ_V512=32, + InstructionSet_WAITPKG=33, + InstructionSet_X86Serialize=34, + InstructionSet_Vector128=35, + InstructionSet_Vector256=36, + InstructionSet_Vector512=37, InstructionSet_VectorT128=38, InstructionSet_VectorT256=39, InstructionSet_VectorT512=40, - InstructionSet_APX=41, - InstructionSet_AVX10v2=42, - InstructionSet_AVX10v2_V512=43, - InstructionSet_GFNI=44, - InstructionSet_GFNI_V256=45, - InstructionSet_GFNI_V512=46, - InstructionSet_X86Base_X64=47, - InstructionSet_SSE_X64=48, - InstructionSet_SSE2_X64=49, - InstructionSet_SSE3_X64=50, - InstructionSet_SSSE3_X64=51, - InstructionSet_SSE41_X64=52, - InstructionSet_SSE42_X64=53, - InstructionSet_AVX_X64=54, - InstructionSet_AVX2_X64=55, - InstructionSet_AES_X64=56, - InstructionSet_BMI1_X64=57, - InstructionSet_BMI2_X64=58, - InstructionSet_FMA_X64=59, - InstructionSet_LZCNT_X64=60, - InstructionSet_PCLMULQDQ_X64=61, - InstructionSet_POPCNT_X64=62, - InstructionSet_AVXVNNI_X64=63, - InstructionSet_X86Serialize_X64=64, - InstructionSet_AVX512F_X64=65, - InstructionSet_AVX512BW_X64=66, - InstructionSet_AVX512CD_X64=67, - InstructionSet_AVX512DQ_X64=68, - InstructionSet_AVX512VBMI_X64=69, - InstructionSet_AVX10v1_X64=70, - InstructionSet_AVX10v1_V512_X64=71, - InstructionSet_AVX10v2_X64=72, - InstructionSet_AVX10v2_V512_X64=73, - InstructionSet_GFNI_X64=74, + InstructionSet_X86Base_X64=41, + InstructionSet_SSE3_X64=42, + InstructionSet_SSSE3_X64=43, + InstructionSet_SSE41_X64=44, + InstructionSet_SSE42_X64=45, + InstructionSet_POPCNT_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_BMI1_X64=49, + InstructionSet_BMI2_X64=50, + InstructionSet_FMA_X64=51, + InstructionSet_LZCNT_X64=52, + InstructionSet_AVX512_X64=53, + InstructionSet_AVX512VBMI_X64=54, + InstructionSet_AVX512v3_X64=55, + InstructionSet_AVX10v1_X64=56, + InstructionSet_AVX10v2_X64=57, + InstructionSet_AES_X64=58, + InstructionSet_PCLMULQDQ_X64=59, + InstructionSet_AVX512VP2INTERSECT_X64=60, + InstructionSet_AVXIFMA_X64=61, + InstructionSet_AVXVNNI_X64=62, + InstructionSet_GFNI_X64=63, + InstructionSet_SHA_X64=64, + InstructionSet_WAITPKG_X64=65, + InstructionSet_X86Serialize_X64=66, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, - InstructionSet_SSE=2, - InstructionSet_SSE2=3, - InstructionSet_SSE3=4, - InstructionSet_SSSE3=5, - InstructionSet_SSE41=6, - InstructionSet_SSE42=7, - InstructionSet_AVX=8, - InstructionSet_AVX2=9, - InstructionSet_AES=10, - InstructionSet_BMI1=11, - InstructionSet_BMI2=12, - InstructionSet_FMA=13, - InstructionSet_LZCNT=14, - InstructionSet_PCLMULQDQ=15, - InstructionSet_PCLMULQDQ_V256=16, - InstructionSet_PCLMULQDQ_V512=17, - InstructionSet_POPCNT=18, - InstructionSet_Vector128=19, - InstructionSet_Vector256=20, - InstructionSet_Vector512=21, - InstructionSet_AVXVNNI=22, - InstructionSet_MOVBE=23, - InstructionSet_X86Serialize=24, - InstructionSet_EVEX=25, - InstructionSet_AVX512F=26, - InstructionSet_AVX512F_VL=27, - InstructionSet_AVX512BW=28, - InstructionSet_AVX512BW_VL=29, - InstructionSet_AVX512CD=30, - InstructionSet_AVX512CD_VL=31, - InstructionSet_AVX512DQ=32, - InstructionSet_AVX512DQ_VL=33, - InstructionSet_AVX512VBMI=34, - InstructionSet_AVX512VBMI_VL=35, - InstructionSet_AVX10v1=36, - InstructionSet_AVX10v1_V512=37, + InstructionSet_SSE3=2, + InstructionSet_SSSE3=3, + InstructionSet_SSE41=4, + InstructionSet_SSE42=5, + InstructionSet_POPCNT=6, + InstructionSet_AVX=7, + InstructionSet_AVX2=8, + InstructionSet_BMI1=9, + InstructionSet_BMI2=10, + InstructionSet_FMA=11, + InstructionSet_LZCNT=12, + InstructionSet_MOVBE=13, + InstructionSet_AVX512=14, + InstructionSet_AVX512VBMI=15, + InstructionSet_AVX512v3=16, + InstructionSet_AVX10v1=17, + InstructionSet_AVX10v2=18, + InstructionSet_APX=19, + InstructionSet_AES=20, + InstructionSet_PCLMULQDQ=21, + InstructionSet_AVX512VP2INTERSECT=22, + InstructionSet_AVXIFMA=23, + InstructionSet_AVXVNNI=24, + InstructionSet_GFNI=25, + InstructionSet_GFNI_V256=26, + InstructionSet_GFNI_V512=27, + InstructionSet_SHA=28, + InstructionSet_AES_V256=29, + InstructionSet_AES_V512=30, + InstructionSet_PCLMULQDQ_V256=31, + InstructionSet_PCLMULQDQ_V512=32, + InstructionSet_WAITPKG=33, + InstructionSet_X86Serialize=34, + InstructionSet_Vector128=35, + InstructionSet_Vector256=36, + InstructionSet_Vector512=37, InstructionSet_VectorT128=38, InstructionSet_VectorT256=39, InstructionSet_VectorT512=40, - InstructionSet_APX=41, - InstructionSet_AVX10v2=42, - InstructionSet_AVX10v2_V512=43, - InstructionSet_GFNI=44, - InstructionSet_GFNI_V256=45, - InstructionSet_GFNI_V512=46, - InstructionSet_X86Base_X64=47, - InstructionSet_SSE_X64=48, - InstructionSet_SSE2_X64=49, - InstructionSet_SSE3_X64=50, - InstructionSet_SSSE3_X64=51, - InstructionSet_SSE41_X64=52, - InstructionSet_SSE42_X64=53, - InstructionSet_AVX_X64=54, - InstructionSet_AVX2_X64=55, - InstructionSet_AES_X64=56, - InstructionSet_BMI1_X64=57, - InstructionSet_BMI2_X64=58, - InstructionSet_FMA_X64=59, - InstructionSet_LZCNT_X64=60, - InstructionSet_PCLMULQDQ_X64=61, - InstructionSet_POPCNT_X64=62, - InstructionSet_AVXVNNI_X64=63, - InstructionSet_X86Serialize_X64=64, - InstructionSet_AVX512F_X64=65, - InstructionSet_AVX512BW_X64=66, - InstructionSet_AVX512CD_X64=67, - InstructionSet_AVX512DQ_X64=68, - InstructionSet_AVX512VBMI_X64=69, - InstructionSet_AVX10v1_X64=70, - InstructionSet_AVX10v1_V512_X64=71, - InstructionSet_AVX10v2_X64=72, - InstructionSet_AVX10v2_V512_X64=73, - InstructionSet_GFNI_X64=74, + InstructionSet_X86Base_X64=41, + InstructionSet_SSE3_X64=42, + InstructionSet_SSSE3_X64=43, + InstructionSet_SSE41_X64=44, + InstructionSet_SSE42_X64=45, + InstructionSet_POPCNT_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_BMI1_X64=49, + InstructionSet_BMI2_X64=50, + InstructionSet_FMA_X64=51, + InstructionSet_LZCNT_X64=52, + InstructionSet_AVX512_X64=53, + InstructionSet_AVX512VBMI_X64=54, + InstructionSet_AVX512v3_X64=55, + InstructionSet_AVX10v1_X64=56, + InstructionSet_AVX10v2_X64=57, + InstructionSet_AES_X64=58, + InstructionSet_PCLMULQDQ_X64=59, + InstructionSet_AVX512VP2INTERSECT_X64=60, + InstructionSet_AVXIFMA_X64=61, + InstructionSet_AVXVNNI_X64=62, + InstructionSet_GFNI_X64=63, + InstructionSet_SHA_X64=64, + InstructionSet_WAITPKG_X64=65, + InstructionSet_X86Serialize_X64=66, #endif // TARGET_X86 }; @@ -306,14 +297,14 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_Sha256_Arm64); if (HasInstructionSet(InstructionSet_Sve)) AddInstructionSet(InstructionSet_Sve_Arm64); + if (HasInstructionSet(InstructionSet_Sve2)) + AddInstructionSet(InstructionSet_Sve2_Arm64); #endif // TARGET_ARM64 +#ifdef TARGET_RISCV64 +#endif // TARGET_RISCV64 #ifdef TARGET_AMD64 if (HasInstructionSet(InstructionSet_X86Base)) AddInstructionSet(InstructionSet_X86Base_X64); - if (HasInstructionSet(InstructionSet_SSE)) - AddInstructionSet(InstructionSet_SSE_X64); - if (HasInstructionSet(InstructionSet_SSE2)) - AddInstructionSet(InstructionSet_SSE2_X64); if (HasInstructionSet(InstructionSet_SSE3)) AddInstructionSet(InstructionSet_SSE3_X64); if (HasInstructionSet(InstructionSet_SSSE3)) @@ -322,12 +313,12 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_SSE41_X64); if (HasInstructionSet(InstructionSet_SSE42)) AddInstructionSet(InstructionSet_SSE42_X64); + if (HasInstructionSet(InstructionSet_POPCNT)) + AddInstructionSet(InstructionSet_POPCNT_X64); if (HasInstructionSet(InstructionSet_AVX)) AddInstructionSet(InstructionSet_AVX_X64); if (HasInstructionSet(InstructionSet_AVX2)) AddInstructionSet(InstructionSet_AVX2_X64); - if (HasInstructionSet(InstructionSet_AES)) - AddInstructionSet(InstructionSet_AES_X64); if (HasInstructionSet(InstructionSet_BMI1)) AddInstructionSet(InstructionSet_BMI1_X64); if (HasInstructionSet(InstructionSet_BMI2)) @@ -336,34 +327,34 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_FMA_X64); if (HasInstructionSet(InstructionSet_LZCNT)) AddInstructionSet(InstructionSet_LZCNT_X64); - if (HasInstructionSet(InstructionSet_PCLMULQDQ)) - AddInstructionSet(InstructionSet_PCLMULQDQ_X64); - if (HasInstructionSet(InstructionSet_POPCNT)) - AddInstructionSet(InstructionSet_POPCNT_X64); - if (HasInstructionSet(InstructionSet_AVXVNNI)) - AddInstructionSet(InstructionSet_AVXVNNI_X64); - if (HasInstructionSet(InstructionSet_X86Serialize)) - AddInstructionSet(InstructionSet_X86Serialize_X64); - if (HasInstructionSet(InstructionSet_AVX512F)) - AddInstructionSet(InstructionSet_AVX512F_X64); - if (HasInstructionSet(InstructionSet_AVX512BW)) - AddInstructionSet(InstructionSet_AVX512BW_X64); - if (HasInstructionSet(InstructionSet_AVX512CD)) - AddInstructionSet(InstructionSet_AVX512CD_X64); - if (HasInstructionSet(InstructionSet_AVX512DQ)) - AddInstructionSet(InstructionSet_AVX512DQ_X64); + if (HasInstructionSet(InstructionSet_AVX512)) + AddInstructionSet(InstructionSet_AVX512_X64); if (HasInstructionSet(InstructionSet_AVX512VBMI)) AddInstructionSet(InstructionSet_AVX512VBMI_X64); + if (HasInstructionSet(InstructionSet_AVX512v3)) + AddInstructionSet(InstructionSet_AVX512v3_X64); if (HasInstructionSet(InstructionSet_AVX10v1)) AddInstructionSet(InstructionSet_AVX10v1_X64); - if (HasInstructionSet(InstructionSet_AVX10v1_V512)) - AddInstructionSet(InstructionSet_AVX10v1_V512_X64); if (HasInstructionSet(InstructionSet_AVX10v2)) AddInstructionSet(InstructionSet_AVX10v2_X64); - if (HasInstructionSet(InstructionSet_AVX10v2_V512)) - AddInstructionSet(InstructionSet_AVX10v2_V512_X64); + if (HasInstructionSet(InstructionSet_AES)) + AddInstructionSet(InstructionSet_AES_X64); + if (HasInstructionSet(InstructionSet_PCLMULQDQ)) + AddInstructionSet(InstructionSet_PCLMULQDQ_X64); + if (HasInstructionSet(InstructionSet_AVX512VP2INTERSECT)) + AddInstructionSet(InstructionSet_AVX512VP2INTERSECT_X64); + if (HasInstructionSet(InstructionSet_AVXIFMA)) + AddInstructionSet(InstructionSet_AVXIFMA_X64); + if (HasInstructionSet(InstructionSet_AVXVNNI)) + AddInstructionSet(InstructionSet_AVXVNNI_X64); if (HasInstructionSet(InstructionSet_GFNI)) AddInstructionSet(InstructionSet_GFNI_X64); + if (HasInstructionSet(InstructionSet_SHA)) + AddInstructionSet(InstructionSet_SHA_X64); + if (HasInstructionSet(InstructionSet_WAITPKG)) + AddInstructionSet(InstructionSet_WAITPKG_X64); + if (HasInstructionSet(InstructionSet_X86Serialize)) + AddInstructionSet(InstructionSet_X86Serialize_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -420,6 +411,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Sve); if (resultflags.HasInstructionSet(InstructionSet_Sve_Arm64) && !resultflags.HasInstructionSet(InstructionSet_Sve)) resultflags.RemoveInstructionSet(InstructionSet_Sve_Arm64); + if (resultflags.HasInstructionSet(InstructionSet_Sve2) && !resultflags.HasInstructionSet(InstructionSet_Sve2_Arm64)) + resultflags.RemoveInstructionSet(InstructionSet_Sve2); + if (resultflags.HasInstructionSet(InstructionSet_Sve2_Arm64) && !resultflags.HasInstructionSet(InstructionSet_Sve2)) + resultflags.RemoveInstructionSet(InstructionSet_Sve2_Arm64); if (resultflags.HasInstructionSet(InstructionSet_AdvSimd) && !resultflags.HasInstructionSet(InstructionSet_ArmBase)) resultflags.RemoveInstructionSet(InstructionSet_AdvSimd); if (resultflags.HasInstructionSet(InstructionSet_Aes) && !resultflags.HasInstructionSet(InstructionSet_ArmBase)) @@ -442,20 +437,20 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_Sve) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd)) resultflags.RemoveInstructionSet(InstructionSet_Sve); + if (resultflags.HasInstructionSet(InstructionSet_Sve2) && !resultflags.HasInstructionSet(InstructionSet_Sve)) + resultflags.RemoveInstructionSet(InstructionSet_Sve2); #endif // TARGET_ARM64 +#ifdef TARGET_RISCV64 + if (resultflags.HasInstructionSet(InstructionSet_Zbb) && !resultflags.HasInstructionSet(InstructionSet_RiscV64Base)) + resultflags.RemoveInstructionSet(InstructionSet_Zbb); + if (resultflags.HasInstructionSet(InstructionSet_Zba) && !resultflags.HasInstructionSet(InstructionSet_RiscV64Base)) + resultflags.RemoveInstructionSet(InstructionSet_Zba); +#endif // TARGET_RISCV64 #ifdef TARGET_AMD64 if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64)) resultflags.RemoveInstructionSet(InstructionSet_X86Base); if (resultflags.HasInstructionSet(InstructionSet_X86Base_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Base_X64); - if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_SSE_X64)) - resultflags.RemoveInstructionSet(InstructionSet_SSE); - if (resultflags.HasInstructionSet(InstructionSet_SSE_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE)) - resultflags.RemoveInstructionSet(InstructionSet_SSE_X64); - if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE2_X64)) - resultflags.RemoveInstructionSet(InstructionSet_SSE2); - if (resultflags.HasInstructionSet(InstructionSet_SSE2_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) - resultflags.RemoveInstructionSet(InstructionSet_SSE2_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE3_X64)) resultflags.RemoveInstructionSet(InstructionSet_SSE3); if (resultflags.HasInstructionSet(InstructionSet_SSE3_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE3)) @@ -472,6 +467,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_SSE42); if (resultflags.HasInstructionSet(InstructionSet_SSE42_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_SSE42_X64); + if (resultflags.HasInstructionSet(InstructionSet_POPCNT) && !resultflags.HasInstructionSet(InstructionSet_POPCNT_X64)) + resultflags.RemoveInstructionSet(InstructionSet_POPCNT); + if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT)) + resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64); if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_AVX_X64)) resultflags.RemoveInstructionSet(InstructionSet_AVX); if (resultflags.HasInstructionSet(InstructionSet_AVX_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -480,10 +479,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX2); if (resultflags.HasInstructionSet(InstructionSet_AVX2_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVX2_X64); - if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_AES_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AES); - if (resultflags.HasInstructionSet(InstructionSet_AES_X64) && !resultflags.HasInstructionSet(InstructionSet_AES)) - resultflags.RemoveInstructionSet(InstructionSet_AES_X64); if (resultflags.HasInstructionSet(InstructionSet_BMI1) && !resultflags.HasInstructionSet(InstructionSet_BMI1_X64)) resultflags.RemoveInstructionSet(InstructionSet_BMI1); if (resultflags.HasInstructionSet(InstructionSet_BMI1_X64) && !resultflags.HasInstructionSet(InstructionSet_BMI1)) @@ -500,67 +495,63 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_LZCNT); if (resultflags.HasInstructionSet(InstructionSet_LZCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_LZCNT)) resultflags.RemoveInstructionSet(InstructionSet_LZCNT_X64); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_X64)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_X64) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_X64); - if (resultflags.HasInstructionSet(InstructionSet_POPCNT) && !resultflags.HasInstructionSet(InstructionSet_POPCNT_X64)) - resultflags.RemoveInstructionSet(InstructionSet_POPCNT); - if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT)) - resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64); - if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64)) - resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize)) - resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_AVX512_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512_X64); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_X64)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512v3) && !resultflags.HasInstructionSet(InstructionSet_AVX512v3_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512v3); + if (resultflags.HasInstructionSet(InstructionSet_AVX512v3_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512v3)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512v3_X64); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_X64)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_X64)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512_X64); + if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_AES_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AES); + if (resultflags.HasInstructionSet(InstructionSet_AES_X64) && !resultflags.HasInstructionSet(InstructionSet_AES)) + resultflags.RemoveInstructionSet(InstructionSet_AES_X64); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_X64)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_X64) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VP2INTERSECT) && !resultflags.HasInstructionSet(InstructionSet_AVX512VP2INTERSECT_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512VP2INTERSECT); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VP2INTERSECT_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512VP2INTERSECT)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512VP2INTERSECT_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXIFMA) && !resultflags.HasInstructionSet(InstructionSet_AVXIFMA_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA); + if (resultflags.HasInstructionSet(InstructionSet_AVXIFMA_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXIFMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64); if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_GFNI_X64)) resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_X64) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_X64); - if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) - resultflags.RemoveInstructionSet(InstructionSet_SSE); - if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) - resultflags.RemoveInstructionSet(InstructionSet_SSE2); - if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_SHA) && !resultflags.HasInstructionSet(InstructionSet_SHA_X64)) + resultflags.RemoveInstructionSet(InstructionSet_SHA); + if (resultflags.HasInstructionSet(InstructionSet_SHA_X64) && !resultflags.HasInstructionSet(InstructionSet_SHA)) + resultflags.RemoveInstructionSet(InstructionSet_SHA_X64); + if (resultflags.HasInstructionSet(InstructionSet_WAITPKG) && !resultflags.HasInstructionSet(InstructionSet_WAITPKG_X64)) + resultflags.RemoveInstructionSet(InstructionSet_WAITPKG); + if (resultflags.HasInstructionSet(InstructionSet_WAITPKG_X64) && !resultflags.HasInstructionSet(InstructionSet_WAITPKG)) + resultflags.RemoveInstructionSet(InstructionSet_WAITPKG_X64); + if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64)) + resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize)) + resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64); + if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE3); if (resultflags.HasInstructionSet(InstructionSet_SSSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE3)) resultflags.RemoveInstructionSet(InstructionSet_SSSE3); @@ -570,69 +561,59 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_SSE42); if (resultflags.HasInstructionSet(InstructionSet_POPCNT) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_POPCNT); - if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) + if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_POPCNT)) resultflags.RemoveInstructionSet(InstructionSet_AVX); - if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_AVX)) - resultflags.RemoveInstructionSet(InstructionSet_AVX2); if (resultflags.HasInstructionSet(InstructionSet_BMI1) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_BMI1); if (resultflags.HasInstructionSet(InstructionSet_BMI2) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_BMI2); if (resultflags.HasInstructionSet(InstructionSet_FMA) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_FMA); - if (resultflags.HasInstructionSet(InstructionSet_LZCNT) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + if (resultflags.HasInstructionSet(InstructionSet_LZCNT) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_LZCNT); - if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) + if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_FMA)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_BMI1)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_BMI2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_FMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_LZCNT)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_MOVBE)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_BMI1)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_BMI2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_FMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_LZCNT)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_MOVBE)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_AVX512v3) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512v3); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX512v3)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_AES); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_AES)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VP2INTERSECT) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512VP2INTERSECT); + if (resultflags.HasInstructionSet(InstructionSet_AVXIFMA) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) - resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) + if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); @@ -640,55 +621,45 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); - if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) + if (resultflags.HasInstructionSet(InstructionSet_SHA) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_SHA); + if (resultflags.HasInstructionSet(InstructionSet_AES_V256) && !resultflags.HasInstructionSet(InstructionSet_AES)) + resultflags.RemoveInstructionSet(InstructionSet_AES_V256); + if (resultflags.HasInstructionSet(InstructionSet_AES_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_AES_V256); + if (resultflags.HasInstructionSet(InstructionSet_AES_V512) && !resultflags.HasInstructionSet(InstructionSet_AES_V256)) + resultflags.RemoveInstructionSet(InstructionSet_AES_V512); + if (resultflags.HasInstructionSet(InstructionSet_AES_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) + resultflags.RemoveInstructionSet(InstructionSet_AES_V512); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_AES_V256)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AES_V512)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_WAITPKG) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_WAITPKG); + if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_Vector256); - if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); - if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_VectorT512); #endif // TARGET_AMD64 #ifdef TARGET_X86 - if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) - resultflags.RemoveInstructionSet(InstructionSet_SSE); - if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) - resultflags.RemoveInstructionSet(InstructionSet_SSE2); - if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE3); if (resultflags.HasInstructionSet(InstructionSet_SSSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE3)) resultflags.RemoveInstructionSet(InstructionSet_SSSE3); @@ -698,69 +669,59 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_SSE42); if (resultflags.HasInstructionSet(InstructionSet_POPCNT) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_POPCNT); - if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) + if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_POPCNT)) resultflags.RemoveInstructionSet(InstructionSet_AVX); - if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_AVX)) - resultflags.RemoveInstructionSet(InstructionSet_AVX2); if (resultflags.HasInstructionSet(InstructionSet_BMI1) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_BMI1); if (resultflags.HasInstructionSet(InstructionSet_BMI2) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_BMI2); if (resultflags.HasInstructionSet(InstructionSet_FMA) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_FMA); - if (resultflags.HasInstructionSet(InstructionSet_LZCNT) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + if (resultflags.HasInstructionSet(InstructionSet_LZCNT) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_LZCNT); - if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) + if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_FMA)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_BMI1)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_BMI2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_FMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_LZCNT)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_MOVBE)) + resultflags.RemoveInstructionSet(InstructionSet_AVX2); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_BMI1)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_BMI2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_FMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_LZCNT)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_MOVBE)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_AVX512v3) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512v3); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX512v3)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_AES); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_AES)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VP2INTERSECT) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512VP2INTERSECT); + if (resultflags.HasInstructionSet(InstructionSet_AVXIFMA) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) - resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) + if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); @@ -768,47 +729,41 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); - if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) + if (resultflags.HasInstructionSet(InstructionSet_SHA) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_SHA); + if (resultflags.HasInstructionSet(InstructionSet_AES_V256) && !resultflags.HasInstructionSet(InstructionSet_AES)) + resultflags.RemoveInstructionSet(InstructionSet_AES_V256); + if (resultflags.HasInstructionSet(InstructionSet_AES_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_AES_V256); + if (resultflags.HasInstructionSet(InstructionSet_AES_V512) && !resultflags.HasInstructionSet(InstructionSet_AES_V256)) + resultflags.RemoveInstructionSet(InstructionSet_AES_V512); + if (resultflags.HasInstructionSet(InstructionSet_AES_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) + resultflags.RemoveInstructionSet(InstructionSet_AES_V512); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_AES_V256)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AES_V512)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_WAITPKG) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_WAITPKG); + if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_Vector256); - if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); - if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_VectorT512); #endif // TARGET_X86 @@ -876,20 +831,24 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Sve"; case InstructionSet_Sve_Arm64 : return "Sve_Arm64"; + case InstructionSet_Sve2 : + return "Sve2"; + case InstructionSet_Sve2_Arm64 : + return "Sve2_Arm64"; #endif // TARGET_ARM64 +#ifdef TARGET_RISCV64 + case InstructionSet_RiscV64Base : + return "RiscV64Base"; + case InstructionSet_Zba : + return "Zba"; + case InstructionSet_Zbb : + return "Zbb"; +#endif // TARGET_RISCV64 #ifdef TARGET_AMD64 case InstructionSet_X86Base : return "X86Base"; case InstructionSet_X86Base_X64 : return "X86Base_X64"; - case InstructionSet_SSE : - return "SSE"; - case InstructionSet_SSE_X64 : - return "SSE_X64"; - case InstructionSet_SSE2 : - return "SSE2"; - case InstructionSet_SSE2_X64 : - return "SSE2_X64"; case InstructionSet_SSE3 : return "SSE3"; case InstructionSet_SSE3_X64 : @@ -906,6 +865,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "SSE42"; case InstructionSet_SSE42_X64 : return "SSE42_X64"; + case InstructionSet_POPCNT : + return "POPCNT"; + case InstructionSet_POPCNT_X64 : + return "POPCNT_X64"; case InstructionSet_AVX : return "AVX"; case InstructionSet_AVX_X64 : @@ -914,10 +877,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX2"; case InstructionSet_AVX2_X64 : return "AVX2_X64"; - case InstructionSet_AES : - return "AES"; - case InstructionSet_AES_X64 : - return "AES_X64"; case InstructionSet_BMI1 : return "BMI1"; case InstructionSet_BMI1_X64 : @@ -934,90 +893,50 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "LZCNT"; case InstructionSet_LZCNT_X64 : return "LZCNT_X64"; - case InstructionSet_PCLMULQDQ : - return "PCLMULQDQ"; - case InstructionSet_PCLMULQDQ_X64 : - return "PCLMULQDQ_X64"; - case InstructionSet_PCLMULQDQ_V256 : - return "PCLMULQDQ_V256"; - case InstructionSet_PCLMULQDQ_V512 : - return "PCLMULQDQ_V512"; - case InstructionSet_POPCNT : - return "POPCNT"; - case InstructionSet_POPCNT_X64 : - return "POPCNT_X64"; - case InstructionSet_Vector128 : - return "Vector128"; - case InstructionSet_Vector256 : - return "Vector256"; - case InstructionSet_Vector512 : - return "Vector512"; - case InstructionSet_AVXVNNI : - return "AVXVNNI"; - case InstructionSet_AVXVNNI_X64 : - return "AVXVNNI_X64"; case InstructionSet_MOVBE : return "MOVBE"; - case InstructionSet_X86Serialize : - return "X86Serialize"; - case InstructionSet_X86Serialize_X64 : - return "X86Serialize_X64"; - case InstructionSet_EVEX : - return "EVEX"; - case InstructionSet_AVX512F : - return "AVX512F"; - case InstructionSet_AVX512F_X64 : - return "AVX512F_X64"; - case InstructionSet_AVX512F_VL : - return "AVX512F_VL"; - case InstructionSet_AVX512BW : - return "AVX512BW"; - case InstructionSet_AVX512BW_X64 : - return "AVX512BW_X64"; - case InstructionSet_AVX512BW_VL : - return "AVX512BW_VL"; - case InstructionSet_AVX512CD : - return "AVX512CD"; - case InstructionSet_AVX512CD_X64 : - return "AVX512CD_X64"; - case InstructionSet_AVX512CD_VL : - return "AVX512CD_VL"; - case InstructionSet_AVX512DQ : - return "AVX512DQ"; - case InstructionSet_AVX512DQ_X64 : - return "AVX512DQ_X64"; - case InstructionSet_AVX512DQ_VL : - return "AVX512DQ_VL"; + case InstructionSet_AVX512 : + return "AVX512"; + case InstructionSet_AVX512_X64 : + return "AVX512_X64"; case InstructionSet_AVX512VBMI : return "AVX512VBMI"; case InstructionSet_AVX512VBMI_X64 : return "AVX512VBMI_X64"; - case InstructionSet_AVX512VBMI_VL : - return "AVX512VBMI_VL"; + case InstructionSet_AVX512v3 : + return "AVX512v3"; + case InstructionSet_AVX512v3_X64 : + return "AVX512v3_X64"; case InstructionSet_AVX10v1 : return "AVX10v1"; case InstructionSet_AVX10v1_X64 : return "AVX10v1_X64"; - case InstructionSet_AVX10v1_V512 : - return "AVX10v1_V512"; - case InstructionSet_AVX10v1_V512_X64 : - return "AVX10v1_V512_X64"; - case InstructionSet_VectorT128 : - return "VectorT128"; - case InstructionSet_VectorT256 : - return "VectorT256"; - case InstructionSet_VectorT512 : - return "VectorT512"; - case InstructionSet_APX : - return "APX"; case InstructionSet_AVX10v2 : return "AVX10v2"; case InstructionSet_AVX10v2_X64 : return "AVX10v2_X64"; - case InstructionSet_AVX10v2_V512 : - return "AVX10v2_V512"; - case InstructionSet_AVX10v2_V512_X64 : - return "AVX10v2_V512_X64"; + case InstructionSet_APX : + return "APX"; + case InstructionSet_AES : + return "AES"; + case InstructionSet_AES_X64 : + return "AES_X64"; + case InstructionSet_PCLMULQDQ : + return "PCLMULQDQ"; + case InstructionSet_PCLMULQDQ_X64 : + return "PCLMULQDQ_X64"; + case InstructionSet_AVX512VP2INTERSECT : + return "AVX512VP2INTERSECT"; + case InstructionSet_AVX512VP2INTERSECT_X64 : + return "AVX512VP2INTERSECT_X64"; + case InstructionSet_AVXIFMA : + return "AVXIFMA"; + case InstructionSet_AVXIFMA_X64 : + return "AVXIFMA_X64"; + case InstructionSet_AVXVNNI : + return "AVXVNNI"; + case InstructionSet_AVXVNNI_X64 : + return "AVXVNNI_X64"; case InstructionSet_GFNI : return "GFNI"; case InstructionSet_GFNI_X64 : @@ -1026,14 +945,42 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "GFNI_V256"; case InstructionSet_GFNI_V512 : return "GFNI_V512"; + case InstructionSet_SHA : + return "SHA"; + case InstructionSet_SHA_X64 : + return "SHA_X64"; + case InstructionSet_AES_V256 : + return "AES_V256"; + case InstructionSet_AES_V512 : + return "AES_V512"; + case InstructionSet_PCLMULQDQ_V256 : + return "PCLMULQDQ_V256"; + case InstructionSet_PCLMULQDQ_V512 : + return "PCLMULQDQ_V512"; + case InstructionSet_WAITPKG : + return "WAITPKG"; + case InstructionSet_WAITPKG_X64 : + return "WAITPKG_X64"; + case InstructionSet_X86Serialize : + return "X86Serialize"; + case InstructionSet_X86Serialize_X64 : + return "X86Serialize_X64"; + case InstructionSet_Vector128 : + return "Vector128"; + case InstructionSet_Vector256 : + return "Vector256"; + case InstructionSet_Vector512 : + return "Vector512"; + case InstructionSet_VectorT128 : + return "VectorT128"; + case InstructionSet_VectorT256 : + return "VectorT256"; + case InstructionSet_VectorT512 : + return "VectorT512"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : return "X86Base"; - case InstructionSet_SSE : - return "SSE"; - case InstructionSet_SSE2 : - return "SSE2"; case InstructionSet_SSE3 : return "SSE3"; case InstructionSet_SSSE3 : @@ -1042,12 +989,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "SSE41"; case InstructionSet_SSE42 : return "SSE42"; + case InstructionSet_POPCNT : + return "POPCNT"; case InstructionSet_AVX : return "AVX"; case InstructionSet_AVX2 : return "AVX2"; - case InstructionSet_AES : - return "AES"; case InstructionSet_BMI1 : return "BMI1"; case InstructionSet_BMI2 : @@ -1056,70 +1003,62 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "FMA"; case InstructionSet_LZCNT : return "LZCNT"; + case InstructionSet_MOVBE : + return "MOVBE"; + case InstructionSet_AVX512 : + return "AVX512"; + case InstructionSet_AVX512VBMI : + return "AVX512VBMI"; + case InstructionSet_AVX512v3 : + return "AVX512v3"; + case InstructionSet_AVX10v1 : + return "AVX10v1"; + case InstructionSet_AVX10v2 : + return "AVX10v2"; + case InstructionSet_APX : + return "APX"; + case InstructionSet_AES : + return "AES"; case InstructionSet_PCLMULQDQ : return "PCLMULQDQ"; + case InstructionSet_AVX512VP2INTERSECT : + return "AVX512VP2INTERSECT"; + case InstructionSet_AVXIFMA : + return "AVXIFMA"; + case InstructionSet_AVXVNNI : + return "AVXVNNI"; + case InstructionSet_GFNI : + return "GFNI"; + case InstructionSet_GFNI_V256 : + return "GFNI_V256"; + case InstructionSet_GFNI_V512 : + return "GFNI_V512"; + case InstructionSet_SHA : + return "SHA"; + case InstructionSet_AES_V256 : + return "AES_V256"; + case InstructionSet_AES_V512 : + return "AES_V512"; case InstructionSet_PCLMULQDQ_V256 : return "PCLMULQDQ_V256"; case InstructionSet_PCLMULQDQ_V512 : return "PCLMULQDQ_V512"; - case InstructionSet_POPCNT : - return "POPCNT"; + case InstructionSet_WAITPKG : + return "WAITPKG"; + case InstructionSet_X86Serialize : + return "X86Serialize"; case InstructionSet_Vector128 : return "Vector128"; case InstructionSet_Vector256 : return "Vector256"; case InstructionSet_Vector512 : return "Vector512"; - case InstructionSet_AVXVNNI : - return "AVXVNNI"; - case InstructionSet_MOVBE : - return "MOVBE"; - case InstructionSet_X86Serialize : - return "X86Serialize"; - case InstructionSet_EVEX : - return "EVEX"; - case InstructionSet_AVX512F : - return "AVX512F"; - case InstructionSet_AVX512F_VL : - return "AVX512F_VL"; - case InstructionSet_AVX512BW : - return "AVX512BW"; - case InstructionSet_AVX512BW_VL : - return "AVX512BW_VL"; - case InstructionSet_AVX512CD : - return "AVX512CD"; - case InstructionSet_AVX512CD_VL : - return "AVX512CD_VL"; - case InstructionSet_AVX512DQ : - return "AVX512DQ"; - case InstructionSet_AVX512DQ_VL : - return "AVX512DQ_VL"; - case InstructionSet_AVX512VBMI : - return "AVX512VBMI"; - case InstructionSet_AVX512VBMI_VL : - return "AVX512VBMI_VL"; - case InstructionSet_AVX10v1 : - return "AVX10v1"; - case InstructionSet_AVX10v1_V512 : - return "AVX10v1_V512"; case InstructionSet_VectorT128 : return "VectorT128"; case InstructionSet_VectorT256 : return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; - case InstructionSet_APX : - return "APX"; - case InstructionSet_AVX10v2 : - return "AVX10v2"; - case InstructionSet_AVX10v2_V512 : - return "AVX10v2_V512"; - case InstructionSet_GFNI : - return "GFNI"; - case InstructionSet_GFNI_V256 : - return "GFNI_V256"; - case InstructionSet_GFNI_V512 : - return "GFNI_V512"; #endif // TARGET_X86 default: @@ -1153,96 +1092,142 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_Rcpc2: return InstructionSet_Rcpc2; case READYTORUN_INSTRUCTION_Sve: return InstructionSet_Sve; + case READYTORUN_INSTRUCTION_Sve2: return InstructionSet_Sve2; #endif // TARGET_ARM64 +#ifdef TARGET_RISCV64 + case READYTORUN_INSTRUCTION_RiscV64Base: return InstructionSet_RiscV64Base; + case READYTORUN_INSTRUCTION_Zba: return InstructionSet_Zba; + case READYTORUN_INSTRUCTION_Zbb: return InstructionSet_Zbb; +#endif // TARGET_RISCV64 #ifdef TARGET_AMD64 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; - case READYTORUN_INSTRUCTION_Sse: return InstructionSet_SSE; - case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_SSE2; + case READYTORUN_INSTRUCTION_Sse: return InstructionSet_X86Base; + case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_X86Base; case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3; case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_SSSE3; case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_SSE41; case READYTORUN_INSTRUCTION_Sse42: return InstructionSet_SSE42; + case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_Avx: return InstructionSet_AVX; case READYTORUN_INSTRUCTION_Avx2: return InstructionSet_AVX2; - case READYTORUN_INSTRUCTION_Aes: return InstructionSet_AES; case READYTORUN_INSTRUCTION_Bmi1: return InstructionSet_BMI1; case READYTORUN_INSTRUCTION_Bmi2: return InstructionSet_BMI2; + case READYTORUN_INSTRUCTION_F16C: return InstructionSet_AVX2; case READYTORUN_INSTRUCTION_Fma: return InstructionSet_FMA; case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; - case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; - case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_PCLMULQDQ_V256; - case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_PCLMULQDQ_V512; - case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; - case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; - case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; - case READYTORUN_INSTRUCTION_EVEX: return InstructionSet_EVEX; - case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512F; - case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512F_VL; - case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512BW; - case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512BW_VL; - case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512CD; - case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512CD_VL; - case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512DQ; - case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; + case READYTORUN_INSTRUCTION_Evex: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512Ifma: return InstructionSet_AVX512VBMI; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; - case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL; + case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI; + case READYTORUN_INSTRUCTION_Avx512Bitalg: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Bitalg_VL: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vbmi2: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vbmi2_VL: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vnni: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vpopcntdq: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vpopcntdq_VL: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Bf16: return InstructionSet_AVX10v1; + case READYTORUN_INSTRUCTION_Avx512Bf16_VL: return InstructionSet_AVX10v1; + case READYTORUN_INSTRUCTION_Avx512Fp16: return InstructionSet_AVX10v1; + case READYTORUN_INSTRUCTION_Avx512Fp16_VL: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; - case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512; - case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; - case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; - case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; - case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; + case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2; - case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512; + case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; + case READYTORUN_INSTRUCTION_Aes: return InstructionSet_AES; + case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; + case READYTORUN_INSTRUCTION_Avx512Vp2intersect: return InstructionSet_AVX512VP2INTERSECT; + case READYTORUN_INSTRUCTION_Avx512Vp2intersect_VL: return InstructionSet_AVX512VP2INTERSECT; + case READYTORUN_INSTRUCTION_AvxIfma: return InstructionSet_AVXIFMA; + case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; + case READYTORUN_INSTRUCTION_Sha: return InstructionSet_SHA; + case READYTORUN_INSTRUCTION_Aes_V256: return InstructionSet_AES_V256; + case READYTORUN_INSTRUCTION_Aes_V512: return InstructionSet_AES_V512; + case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_PCLMULQDQ_V256; + case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_PCLMULQDQ_V512; + case READYTORUN_INSTRUCTION_WaitPkg: return InstructionSet_WAITPKG; + case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; + case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; + case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; + case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; - case READYTORUN_INSTRUCTION_Sse: return InstructionSet_SSE; - case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_SSE2; + case READYTORUN_INSTRUCTION_Sse: return InstructionSet_X86Base; + case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_X86Base; case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3; case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_SSSE3; case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_SSE41; case READYTORUN_INSTRUCTION_Sse42: return InstructionSet_SSE42; + case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_Avx: return InstructionSet_AVX; case READYTORUN_INSTRUCTION_Avx2: return InstructionSet_AVX2; - case READYTORUN_INSTRUCTION_Aes: return InstructionSet_AES; case READYTORUN_INSTRUCTION_Bmi1: return InstructionSet_BMI1; case READYTORUN_INSTRUCTION_Bmi2: return InstructionSet_BMI2; + case READYTORUN_INSTRUCTION_F16C: return InstructionSet_AVX2; case READYTORUN_INSTRUCTION_Fma: return InstructionSet_FMA; case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; - case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; - case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_PCLMULQDQ_V256; - case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_PCLMULQDQ_V512; - case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; - case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; - case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; - case READYTORUN_INSTRUCTION_EVEX: return InstructionSet_EVEX; - case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512F; - case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512F_VL; - case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512BW; - case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512BW_VL; - case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512CD; - case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512CD_VL; - case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512DQ; - case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; + case READYTORUN_INSTRUCTION_Evex: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512Ifma: return InstructionSet_AVX512VBMI; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; - case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL; + case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI; + case READYTORUN_INSTRUCTION_Avx512Bitalg: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Bitalg_VL: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vbmi2: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vbmi2_VL: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vnni: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vpopcntdq: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Vpopcntdq_VL: return InstructionSet_AVX512v3; + case READYTORUN_INSTRUCTION_Avx512Bf16: return InstructionSet_AVX10v1; + case READYTORUN_INSTRUCTION_Avx512Bf16_VL: return InstructionSet_AVX10v1; + case READYTORUN_INSTRUCTION_Avx512Fp16: return InstructionSet_AVX10v1; + case READYTORUN_INSTRUCTION_Avx512Fp16_VL: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; - case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512; - case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; - case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; - case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; - case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; + case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2; - case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512; + case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; + case READYTORUN_INSTRUCTION_Aes: return InstructionSet_AES; + case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; + case READYTORUN_INSTRUCTION_Avx512Vp2intersect: return InstructionSet_AVX512VP2INTERSECT; + case READYTORUN_INSTRUCTION_Avx512Vp2intersect_VL: return InstructionSet_AVX512VP2INTERSECT; + case READYTORUN_INSTRUCTION_AvxIfma: return InstructionSet_AVXIFMA; + case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; + case READYTORUN_INSTRUCTION_Sha: return InstructionSet_SHA; + case READYTORUN_INSTRUCTION_Aes_V256: return InstructionSet_AES_V256; + case READYTORUN_INSTRUCTION_Aes_V512: return InstructionSet_AES_V512; + case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_PCLMULQDQ_V256; + case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_PCLMULQDQ_V512; + case READYTORUN_INSTRUCTION_WaitPkg: return InstructionSet_WAITPKG; + case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; + case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; + case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; + case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h index b7de9711f07f..d7a8349a81d6 100644 --- a/src/coreclr/inc/corjitflags.h +++ b/src/coreclr/inc/corjitflags.h @@ -40,10 +40,10 @@ class CORJIT_FLAGS CORJIT_FLAG_ALT_JIT = 8, // JIT should consider itself an ALT_JIT CORJIT_FLAG_FROZEN_ALLOC_ALLOWED = 9, // JIT is allowed to use *_MAYBEFROZEN allocators // CORJIT_FLAG_UNUSED = 10, - CORJIT_FLAG_READYTORUN = 11, // Use version-resilient code generation + CORJIT_FLAG_AOT = 11, // Do ahead-of-time code generation (ReadyToRun or NativeAOT) CORJIT_FLAG_PROF_ENTERLEAVE = 12, // Instrument prologues/epilogues CORJIT_FLAG_PROF_NO_PINVOKE_INLINE = 13, // Disables PInvoke inlining - CORJIT_FLAG_PREJIT = 14, // prejit is the execution engine. + // CORJIT_FLAG_UNUSED = 14, CORJIT_FLAG_RELOC = 15, // Generate relocatable code CORJIT_FLAG_IL_STUB = 16, // method is an IL stub CORJIT_FLAG_PROCSPLIT = 17, // JIT should separate code into hot and cold sections @@ -63,11 +63,7 @@ class CORJIT_FLAGS CORJIT_FLAG_RELATIVE_CODE_RELOCS = 29, // JIT should generate PC-relative address computations instead of EE relocation records CORJIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif - -#if defined(TARGET_X86) || defined(TARGET_AMD64) - CORJIT_FLAG_VECTOR512_THROTTLING = 31, // On x86/x64, 512-bit vector usage may incur CPU frequency throttling -#endif - + CORJIT_FLAG_ASYNC = 31, // Generate code for use as an async function }; CORJIT_FLAGS() diff --git a/src/coreclr/inc/corpriv.h b/src/coreclr/inc/corpriv.h index ee4c149d29a7..8086a3f9cb58 100644 --- a/src/coreclr/inc/corpriv.h +++ b/src/coreclr/inc/corpriv.h @@ -7,34 +7,22 @@ #ifndef _CORPRIV_H_ #define _CORPRIV_H_ -#if _MSC_VER >= 1000 -#pragma once -#endif // _MSC_VER >= 1000 -// %%Includes: --------------------------------------------------------------- -// avoid taking DLL import hit on intra-DLL calls -#define NODLLIMPORT #include #include "cor.h" #include "corimage.h" #include "metadata.h" -#include -// -interface IAssemblyName; class UTSemReadWrite; -// Helper function to get a pointer to the Dispenser interface. -STDAPI MetaDataGetDispenser( // Return HRESULT - REFCLSID rclsid, // The class to desired. - REFIID riid, // Interface wanted on class factory. - LPVOID FAR *ppv); // Return interface pointer here. - -BOOL RuntimeFileNotFound(HRESULT hr); +// Creation function to get IMetaDataDispenser(Ex) interface. +STDAPI CreateMetaDataDispenser( + REFIID riid, + void ** pMetaDataDispenserOut); // Helper function to get an Internal interface with an in-memory metadata section -STDAPI GetMetaDataInternalInterface( +STDAPI GetMDInternalInterface( LPVOID pData, // [IN] in memory metadata section ULONG cbData, // [IN] size of the metadata section DWORD flags, // [IN] CorOpenFlags @@ -42,13 +30,13 @@ STDAPI GetMetaDataInternalInterface( void **ppv); // [OUT] returned interface // Helper function to get an internal scopeless interface given a scope. -STDAPI GetMetaDataInternalInterfaceFromPublic( +STDAPI GetMDInternalInterfaceFromPublic( IUnknown *pv, // [IN] Given interface REFIID riid, // [IN] desired interface void **ppv); // [OUT] returned interface // Helper function to get an internal scopeless interface given a scope. -STDAPI GetMetaDataPublicInterfaceFromInternal( +STDAPI GetMDPublicInterfaceFromInternal( void *pv, // [IN] Given interface REFIID riid, // [IN] desired interface void **ppv); // [OUT] returned interface @@ -60,16 +48,12 @@ STDAPI ConvertMDInternalImport( // S_OK or error. IMDInternalImport *pIMD, // [IN] The metadata to be updated. IMDInternalImport **ppIMD); // [OUT] Put RW interface here. -STDAPI GetAssemblyMDInternalImport( // Return code. - LPCWSTR szFileName, // [IN] The scope to open. - REFIID riid, // [IN] The interface desired. - IUnknown **ppIUnk); // [OUT] Return interface on success. - -STDAPI GetAssemblyMDInternalImportByStream( // Return code. - IStream *pIStream, // [IN] The IStream for the file - UINT64 AssemblyId, // [IN] Unique Id for the assembly - REFIID riid, // [IN] The interface desired. - IUnknown **ppIUnk); // [OUT] Return interface on success. +// Update an existing metadata importer with a buffer +STDAPI MDReOpenMetaDataWithMemory( + void *pImport, // [IN] Given scope. public interfaces + LPCVOID pData, // [in] Location of scope data. + ULONG cbData, // [in] Size of the data pointed to by pData. + DWORD dwReOpenFlags); // [in] ReOpen flags enum MDInternalImportFlags @@ -82,36 +66,6 @@ enum MDInternalImportFlags }; // enum MDInternalImportFlags - -STDAPI GetAssemblyMDInternalImportEx( // Return code. - LPCWSTR szFileName, // [IN] The scope to open. - REFIID riid, // [IN] The interface desired. - MDInternalImportFlags flags, // [in] Flags to control opening the assembly - IUnknown **ppIUnk, // [OUT] Return interface on success. - HANDLE hFile = INVALID_HANDLE_VALUE); - -STDAPI GetAssemblyMDInternalImportByStreamEx( // Return code. - IStream *pIStream, // [IN] The IStream for the file - UINT64 AssemblyId, // [IN] Unique Id for the assembly - REFIID riid, // [IN] The interface desired. - MDInternalImportFlags flags, // [in] Flags to control opening the assembly - IUnknown **ppIUnk); // [OUT] Return interface on success. - - -// Returns part of the "Zap string" which describes the properties of a native image - -__success(SUCCEEDED(return)) -STDAPI GetNativeImageDescription( - _In_z_ LPCWSTR wzCustomString, // [IN] Custom string of the native image - DWORD dwConfigMask, // [IN] Config mask of the native image - _Out_writes_to_opt_(*pdwLength,*pdwLength) LPWSTR pwzZapInfo,// [OUT] The description string. Can be NULL to find the size of buffer to allocate - LPDWORD pdwLength); // [IN/OUT] Length of the pwzZapInfo buffer on IN. - // Number of WCHARs (including termintating NULL) on OUT - - -class CQuickBytes; - - // predefined constant for parent token for global functions #define COR_GLOBAL_PARENT_TOKEN TokenFromRid(1, mdtTypeDef) @@ -402,7 +356,7 @@ DECLARE_INTERFACE_(ICeeGenInternal, IUnknown) // Private interface exposed by // AssemblyMDInternalImport - gives us access to the internally stored IMDInternalImport*. // -// RegMeta - supports the internal GetMetaDataInternalInterfaceFromPublic() "api". +// RegMeta - supports the internal GetMDInternalInterfaceFromPublic() "api". // // {92B2FEF9-F7F5-420d-AD42-AECEEE10A1EF} EXTERN_GUID(IID_IGetIMDInternalImport, 0x92b2fef9, 0xf7f5, 0x420d, 0xad, 0x42, 0xae, 0xce, 0xee, 0x10, 0xa1, 0xef); diff --git a/src/coreclr/inc/corprof.idl b/src/coreclr/inc/corprof.idl index 33e3619a9622..dadacf934847 100644 --- a/src/coreclr/inc/corprof.idl +++ b/src/coreclr/inc/corprof.idl @@ -592,6 +592,8 @@ typedef enum COR_PRF_MONITOR_CLASS_LOADS | COR_PRF_MONITOR_EXCEPTIONS | COR_PRF_MONITOR_JIT_COMPILATION | + COR_PRF_DISABLE_INLINING | + COR_PRF_DISABLE_OPTIMIZATIONS | COR_PRF_ENABLE_REJIT, COR_PRF_ALLOWABLE_NOTIFICATION_PROFILER @@ -625,8 +627,6 @@ typedef enum COR_PRF_MONITOR_REMOTING_ASYNC | COR_PRF_ENABLE_INPROC_DEBUGGING | COR_PRF_ENABLE_JIT_MAPS | - COR_PRF_DISABLE_OPTIMIZATIONS | - COR_PRF_DISABLE_INLINING | COR_PRF_ENABLE_OBJECT_ALLOCATED | COR_PRF_ENABLE_FUNCTION_ARGS | COR_PRF_ENABLE_FUNCTION_RETVAL | @@ -4292,10 +4292,10 @@ interface ICorProfilerInfo14 : ICorProfilerInfo13 [out, size_is(cObjectRanges), length_is(*pcObjectRanges)] COR_PRF_NONGC_HEAP_RANGE ranges[]); - // EventPipeCreateProvider2 allows you to pass in a callback which will be called whenever a + // EventPipeCreateProvider2 allows you to pass in a callback which will be called whenever a // session enables your provider. The behavior of the callback matches the ETW behavior which - // can be counter intuitive. You will get a callback any time a session changes with the updated - // global keywords enabled for your session. The is_enabled parameter will be true if any + // can be counter intuitive. You will get a callback any time a session changes with the updated + // global keywords enabled for your session. The is_enabled parameter will be true if any // session has your provider enabled. The source_id parameter will be a valid id if the callback // was triggered due to a session enabling and it will be NULL if it was triggered due to a session // disabling. diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h index c3c1f97ddeed..4c30eb01ba8a 100644 --- a/src/coreclr/inc/crosscomp.h +++ b/src/coreclr/inc/crosscomp.h @@ -4,13 +4,27 @@ // crosscomp.h - cross-compilation enablement structures. // - #pragma once +#include + #if (!defined(HOST_64BIT) && defined(TARGET_64BIT)) || (defined(HOST_64BIT) && !defined(TARGET_64BIT)) #define CROSSBITNESS_COMPILE + +#ifndef CROSS_COMPILE +#define CROSS_COMPILE +#endif // !CROSS_COMPILE + #endif +#if defined(TARGET_WINDOWS) && !defined(HOST_WINDOWS) && !defined(CROSS_COMPILE) +#define CROSS_COMPILE +#endif // TARGET_WINDOWS && !HOST_WINDOWS && !CROSS_COMPILE + +#if defined(TARGET_UNIX) && !defined(HOST_UNIX) && !defined(CROSS_COMPILE) +#define CROSS_COMPILE +#endif // TARGET_UNIX && !HOST_UNIX && !CROSS_COMPILE + // Target platform-specific library naming // #ifdef TARGET_WINDOWS @@ -667,6 +681,36 @@ typedef struct _T_KNONVOLATILE_CONTEXT_POINTERS { #define T_DISPATCHER_CONTEXT DISPATCHER_CONTEXT #define PT_DISPATCHER_CONTEXT PDISPATCHER_CONTEXT +#if defined(HOST_WINDOWS) && defined(TARGET_X86) +typedef struct _KNONVOLATILE_CONTEXT { + + DWORD Edi; + DWORD Esi; + DWORD Ebx; + DWORD Ebp; + +} KNONVOLATILE_CONTEXT, *PKNONVOLATILE_CONTEXT; + +typedef struct _KNONVOLATILE_CONTEXT_POINTERS_EX +{ + // The ordering of these fields should be aligned with that + // of corresponding fields in CONTEXT + // + // (See FillRegDisplay in inc/regdisp.h for details) + PDWORD Edi; + PDWORD Esi; + PDWORD Ebx; + PDWORD Edx; + PDWORD Ecx; + PDWORD Eax; + + PDWORD Ebp; + +} KNONVOLATILE_CONTEXT_POINTERS_EX, *PKNONVOLATILE_CONTEXT_POINTERS_EX; + +#define KNONVOLATILE_CONTEXT_POINTERS KNONVOLATILE_CONTEXT_POINTERS_EX +#define PKNONVOLATILE_CONTEXT_POINTERS PKNONVOLATILE_CONTEXT_POINTERS_EX +#endif #define T_KNONVOLATILE_CONTEXT_POINTERS KNONVOLATILE_CONTEXT_POINTERS #define PT_KNONVOLATILE_CONTEXT_POINTERS PKNONVOLATILE_CONTEXT_POINTERS @@ -675,75 +719,42 @@ typedef struct _T_KNONVOLATILE_CONTEXT_POINTERS { #endif -#if defined(DACCESS_COMPILE) && defined(TARGET_UNIX) -// This is a TARGET oriented copy of CRITICAL_SECTION and PAL_CS_NATIVE_DATA_SIZE -// It is configured based on TARGET configuration rather than HOST configuration -// There is validation code in src/coreclr/vm/crst.cpp to keep these from -// getting out of sync - -#define T_CRITICAL_SECTION_VALIDATION_MESSAGE "T_CRITICAL_SECTION validation failed. It is not in sync with CRITICAL_SECTION" - -#if defined(TARGET_OSX) && defined(TARGET_X86) -#define DAC_CS_NATIVE_DATA_SIZE 76 -#elif defined(TARGET_APPLE) && defined(TARGET_AMD64) -#define DAC_CS_NATIVE_DATA_SIZE 120 -#elif defined(TARGET_APPLE) && defined(TARGET_ARM64) -#define DAC_CS_NATIVE_DATA_SIZE 120 -#elif defined(TARGET_FREEBSD) && defined(TARGET_X86) -#define DAC_CS_NATIVE_DATA_SIZE 12 -#elif defined(TARGET_FREEBSD) && defined(TARGET_AMD64) -#define DAC_CS_NATIVE_DATA_SIZE 24 -#elif defined(TARGET_FREEBSD) && defined(TARGET_ARM64) -#define DAC_CS_NATIVE_DATA_SIZE 24 -#elif (defined(TARGET_LINUX) || defined(TARGET_ANDROID)) && defined(TARGET_ARM) -#define DAC_CS_NATIVE_DATA_SIZE 80 -#elif (defined(TARGET_LINUX) || defined(TARGET_ANDROID)) && defined(TARGET_ARM64) -#define DAC_CS_NATIVE_DATA_SIZE 104 -#elif defined(TARGET_LINUX) && defined(TARGET_LOONGARCH64) -#define DAC_CS_NATIVE_DATA_SIZE 96 -#elif (defined(TARGET_LINUX) || defined(TARGET_ANDROID)) && defined(TARGET_X86) -#define DAC_CS_NATIVE_DATA_SIZE 76 -#elif (defined(TARGET_LINUX) || defined(TARGET_ANDROID)) && defined(TARGET_AMD64) -#define DAC_CS_NATIVE_DATA_SIZE 96 -#elif defined(TARGET_LINUX) && defined(TARGET_S390X) -#define DAC_CS_NATIVE_DATA_SIZE 96 -#elif defined(TARGET_LINUX) && defined(TARGET_RISCV64) -#define DAC_CS_NATIVE_DATA_SIZE 96 -#elif defined(TARGET_LINUX) && defined(TARGET_POWERPC64) -#define DAC_CS_NATIVE_DATA_SIZE 96 -#elif defined(TARGET_NETBSD) && defined(TARGET_AMD64) -#define DAC_CS_NATIVE_DATA_SIZE 96 -#elif defined(TARGET_NETBSD) && defined(TARGET_ARM) -#define DAC_CS_NATIVE_DATA_SIZE 56 -#elif defined(TARGET_NETBSD) && defined(TARGET_X86) -#define DAC_CS_NATIVE_DATA_SIZE 56 -#elif defined(__sun) && defined(TARGET_AMD64) -#define DAC_CS_NATIVE_DATA_SIZE 48 -#elif defined(TARGET_HAIKU) && defined(TARGET_AMD64) -#define DAC_CS_NATIVE_DATA_SIZE 56 +#if defined(TARGET_APPLE) +#define DAC_MUTEX_MAX_SIZE 96 +#elif defined(TARGET_FREEBSD) +#define DAC_MUTEX_MAX_SIZE 16 +#elif defined(TARGET_LINUX) || defined(TARGET_ANDROID) +#define DAC_MUTEX_MAX_SIZE 64 +#elif defined(TARGET_WINDOWS) +#ifdef TARGET_64BIT +#define DAC_MUTEX_MAX_SIZE 40 +#else +#define DAC_MUTEX_MAX_SIZE 24 +#endif // TARGET_64BIT #else -#warning -#error DAC_CS_NATIVE_DATA_SIZE is not defined for this architecture. This should be same value as PAL_CS_NATIVE_DATA_SIZE (aka sizeof(PAL_CS_NATIVE_DATA)). +// Fallback to a conservative default value +#define DAC_MUTEX_MAX_SIZE 128 #endif -struct T_CRITICAL_SECTION { - PVOID DebugInfo; - LONG LockCount; - LONG RecursionCount; - HANDLE OwningThread; - ULONG_PTR SpinCount; - -#ifdef PAL_TRACK_CRITICAL_SECTIONS_DATA - BOOL bInternal; -#endif // PAL_TRACK_CRITICAL_SECTIONS_DATA - volatile DWORD dwInitState; +#ifndef CROSS_COMPILE +static_assert(DAC_MUTEX_MAX_SIZE >= sizeof(minipal_mutex), "DAC_MUTEX_MAX_SIZE must be greater than or equal to the size of minipal_mutex"); +#endif // !CROSS_COMPILE - union CSNativeDataStorage +// This type is used to ensure a consistent size of mutexes +// contained with our Crst types. +// We have this requirement for cross OS compiling the DAC. +struct tgt_minipal_mutex final +{ + union { - BYTE rgNativeDataStorage[DAC_CS_NATIVE_DATA_SIZE]; - PVOID pvAlign; // make sure the storage is machine-pointer-size aligned - } csnds; + // DAC builds want to have the data layout of the target system. + // Make sure that the host minipal_mutex does not influence + // the target data layout +#ifndef DACCESS_COMPILE + minipal_mutex _mtx; +#endif // !DACCESS_COMPILE + + // This is unused padding to ensure struct size. + alignas(void*) BYTE _dacPadding[DAC_MUTEX_MAX_SIZE]; + }; }; -#else -#define T_CRITICAL_SECTION CRITICAL_SECTION -#endif diff --git a/src/coreclr/inc/crsttypes_generated.h b/src/coreclr/inc/crsttypes_generated.h index 8fde9aa204ab..6a5ef2352d24 100644 --- a/src/coreclr/inc/crsttypes_generated.h +++ b/src/coreclr/inc/crsttypes_generated.h @@ -38,28 +38,28 @@ enum CrstType CrstDebuggerHeapLock = 20, CrstDebuggerJitInfo = 21, CrstDebuggerMutex = 22, - CrstDelegateToFPtrHash = 23, - CrstDynamicIL = 24, - CrstDynamicMT = 25, - CrstEtwTypeLogHash = 26, - CrstEventPipe = 27, - CrstEventStore = 28, - CrstException = 29, - CrstExecutableAllocatorLock = 30, - CrstFCall = 31, - CrstFrozenObjectHeap = 32, - CrstFuncPtrStubs = 33, - CrstFusionAppCtx = 34, - CrstGCCover = 35, - CrstGenericDictionaryExpansion = 36, - CrstGlobalStrLiteralMap = 37, - CrstHandleTable = 38, - CrstIbcProfile = 39, - CrstIJWFixupData = 40, - CrstIJWHash = 41, - CrstILStubGen = 42, - CrstInlineTrackingMap = 43, - CrstInstMethodHashTable = 44, + CrstDynamicIL = 23, + CrstDynamicMT = 24, + CrstEtwTypeLogHash = 25, + CrstEventPipe = 26, + CrstEventStore = 27, + CrstException = 28, + CrstExecutableAllocatorLock = 29, + CrstFCall = 30, + CrstFrozenObjectHeap = 31, + CrstFuncPtrStubs = 32, + CrstFusionAppCtx = 33, + CrstGCCover = 34, + CrstGenericDictionaryExpansion = 35, + CrstGlobalStrLiteralMap = 36, + CrstHandleTable = 37, + CrstIbcProfile = 38, + CrstIJWFixupData = 39, + CrstIJWHash = 40, + CrstILStubGen = 41, + CrstInlineTrackingMap = 42, + CrstInstMethodHashTable = 43, + CrstInterfaceDispatchGlobalLists = 44, CrstInterop = 45, CrstInteropData = 46, CrstIsJMCMethod = 47, @@ -96,30 +96,29 @@ enum CrstType CrstReadyToRunEntryPointToMethodDescMap = 78, CrstReflection = 79, CrstReJITGlobalRequest = 80, - CrstRetThunkCache = 81, - CrstSigConvert = 82, - CrstSingleUseLock = 83, - CrstStressLog = 84, - CrstStubCache = 85, - CrstStubDispatchCache = 86, - CrstSyncBlockCache = 87, - CrstSyncHashLock = 88, - CrstSystemDomain = 89, - CrstSystemDomainDelayedUnloadList = 90, - CrstThreadIdDispenser = 91, - CrstThreadLocalStorageLock = 92, - CrstThreadStore = 93, - CrstTieredCompilation = 94, - CrstTypeEquivalenceMap = 95, - CrstTypeIDMap = 96, - CrstUMEntryThunkCache = 97, - CrstUMEntryThunkFreeListLock = 98, - CrstUniqueStack = 99, - CrstUnresolvedClassLock = 100, - CrstUnwindInfoTableLock = 101, - CrstVSDIndirectionCellLock = 102, - CrstWrapperTemplate = 103, - kNumberOfCrstTypes = 104 + CrstSigConvert = 81, + CrstSingleUseLock = 82, + CrstStressLog = 83, + CrstStubCache = 84, + CrstStubDispatchCache = 85, + CrstSyncBlockCache = 86, + CrstSyncHashLock = 87, + CrstSystemDomain = 88, + CrstSystemDomainDelayedUnloadList = 89, + CrstThreadIdDispenser = 90, + CrstThreadLocalStorageLock = 91, + CrstThreadStore = 92, + CrstTieredCompilation = 93, + CrstTypeEquivalenceMap = 94, + CrstTypeIDMap = 95, + CrstUMEntryThunkCache = 96, + CrstUMEntryThunkFreeListLock = 97, + CrstUniqueStack = 98, + CrstUnresolvedClassLock = 99, + CrstUnwindInfoTableLock = 100, + CrstVSDIndirectionCellLock = 101, + CrstWrapperTemplate = 102, + kNumberOfCrstTypes = 103 }; #endif // __CRST_TYPES_INCLUDED @@ -153,7 +152,6 @@ int g_rgCrstLevelMap[] = 0, // CrstDebuggerHeapLock 3, // CrstDebuggerJitInfo 12, // CrstDebuggerMutex - 0, // CrstDelegateToFPtrHash 0, // CrstDynamicIL 9, // CrstDynamicMT 0, // CrstEtwTypeLogHash @@ -175,6 +173,7 @@ int g_rgCrstLevelMap[] = 6, // CrstILStubGen 2, // CrstInlineTrackingMap 18, // CrstInstMethodHashTable + 0, // CrstInterfaceDispatchGlobalLists 21, // CrstInterop 9, // CrstInteropData 0, // CrstIsJMCMethod @@ -211,7 +210,6 @@ int g_rgCrstLevelMap[] = 9, // CrstReadyToRunEntryPointToMethodDescMap 7, // CrstReflection 15, // CrstReJITGlobalRequest - 3, // CrstRetThunkCache 3, // CrstSigConvert 4, // CrstSingleUseLock -1, // CrstStressLog @@ -262,7 +260,6 @@ LPCSTR g_rgCrstNameMap[] = "CrstDebuggerHeapLock", "CrstDebuggerJitInfo", "CrstDebuggerMutex", - "CrstDelegateToFPtrHash", "CrstDynamicIL", "CrstDynamicMT", "CrstEtwTypeLogHash", @@ -284,6 +281,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstILStubGen", "CrstInlineTrackingMap", "CrstInstMethodHashTable", + "CrstInterfaceDispatchGlobalLists", "CrstInterop", "CrstInteropData", "CrstIsJMCMethod", @@ -320,7 +318,6 @@ LPCSTR g_rgCrstNameMap[] = "CrstReadyToRunEntryPointToMethodDescMap", "CrstReflection", "CrstReJITGlobalRequest", - "CrstRetThunkCache", "CrstSigConvert", "CrstSingleUseLock", "CrstStressLog", diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h index 41f5e0587701..a398f668ab17 100644 --- a/src/coreclr/inc/daccess.h +++ b/src/coreclr/inc/daccess.h @@ -699,7 +699,6 @@ PWSTR DacInstantiateStringW(TADDR addr, ULONG32 maxChars, bool throwEx); TADDR DacGetTargetAddrForHostAddr(LPCVOID ptr, bool throwEx); TADDR DacGetTargetAddrForHostInteriorAddr(LPCVOID ptr, bool throwEx); TADDR DacGetTargetVtForHostVt(LPCVOID vtHost, bool throwEx); -PWSTR DacGetVtNameW(TADDR targetVtable); // Report a region of memory to the debugger bool DacEnumMemoryRegion(TADDR addr, TSIZE_T size, bool fExpectSuccess = true); @@ -830,11 +829,6 @@ struct _UNWIND_INFO * DacGetUnwindInfo(TADDR taUnwindInfo); BOOL DacUnwindStackFrame(T_CONTEXT * pContext, T_KNONVOLATILE_CONTEXT_POINTERS* pContextPointers); #endif // FEATURE_EH_FUNCLETS -#if defined(TARGET_UNIX) -// call back through data target to unwind out-of-process -HRESULT DacVirtualUnwind(ULONG32 threadId, PT_CONTEXT context, PT_KNONVOLATILE_CONTEXT_POINTERS contextPointers); -#endif // TARGET_UNIX - #ifdef FEATURE_MINIMETADATA_IN_TRIAGEDUMPS class SString; void DacMdCacheAddEEName(TADDR taEE, const SString& ssEEName); diff --git a/src/coreclr/inc/dacprivate.h b/src/coreclr/inc/dacprivate.h index 62821f71395a..9e23b4df7f94 100644 --- a/src/coreclr/inc/dacprivate.h +++ b/src/coreclr/inc/dacprivate.h @@ -967,7 +967,7 @@ struct MSLAYOUT DACEHInfo CLRDATA_ADDRESS tryEndOffset = 0; CLRDATA_ADDRESS handlerStartOffset = 0; CLRDATA_ADDRESS handlerEndOffset = 0; - BOOL isDuplicateClause = FALSE; + BOOL isDuplicateClause = FALSE; // unused CLRDATA_ADDRESS filterOffset = 0; // valid when clauseType is EHFilter BOOL isCatchAllHandler = FALSE; // valid when clauseType is EHTyped CLRDATA_ADDRESS moduleAddr = 0; // when == 0 mtCatch contains a MethodTable, when != 0 tokCatch contains a type token diff --git a/src/coreclr/inc/dacvars.h b/src/coreclr/inc/dacvars.h index ffe2be90ffb8..2c5a21fd80c1 100644 --- a/src/coreclr/inc/dacvars.h +++ b/src/coreclr/inc/dacvars.h @@ -84,6 +84,10 @@ DEFINE_DACVAR(PTR_EEJitManager, ExecutionManager__m_pEEJitManager, ExecutionMana #ifdef FEATURE_READYTORUN DEFINE_DACVAR(PTR_ReadyToRunJitManager, ExecutionManager__m_pReadyToRunJitManager, ExecutionManager::m_pReadyToRunJitManager) #endif +#ifdef FEATURE_INTERPRETER +DEFINE_DACVAR(PTR_InterpreterJitManager, ExecutionManager__m_pInterpreterJitManager, ExecutionManager::m_pInterpreterJitManager) +DEFINE_DACVAR(PTR_InterpreterCodeManager, ExecutionManager__m_pInterpreterCodeMan, ExecutionManager::m_pInterpreterCodeMan) +#endif DEFINE_DACVAR_NO_DUMP(VMHELPDEF *, dac__hlpFuncTable, ::hlpFuncTable) DEFINE_DACVAR(VMHELPDEF *, dac__hlpDynamicFuncTable, ::hlpDynamicFuncTable) @@ -91,7 +95,6 @@ DEFINE_DACVAR(VMHELPDEF *, dac__hlpDynamicFuncTable, ::hlpDynamicFuncTable) DEFINE_DACVAR(PTR_StubManager, StubManager__g_pFirstManager, StubManager::g_pFirstManager) DEFINE_DACVAR(PTR_PrecodeStubManager, PrecodeStubManager__g_pManager, PrecodeStubManager::g_pManager) DEFINE_DACVAR(PTR_StubLinkStubManager, StubLinkStubManager__g_pManager, StubLinkStubManager::g_pManager) -DEFINE_DACVAR(PTR_ThunkHeapStubManager, ThunkHeapStubManager__g_pManager, ThunkHeapStubManager::g_pManager) DEFINE_DACVAR(PTR_JumpStubStubManager, JumpStubStubManager__g_pManager, JumpStubStubManager::g_pManager) DEFINE_DACVAR(PTR_RangeSectionStubManager, RangeSectionStubManager__g_pManager, RangeSectionStubManager::g_pManager) DEFINE_DACVAR(PTR_VirtualCallStubManagerManager, VirtualCallStubManagerManager__g_pManager, VirtualCallStubManagerManager::g_pManager) @@ -117,7 +120,6 @@ DEFINE_DACVAR(DWORD, dac__g_TlsIndex, g_TlsIndex) DEFINE_DACVAR(UNKNOWN_POINTER_TYPE, dac__g_pEHClass, ::g_pEHClass) DEFINE_DACVAR(UNKNOWN_POINTER_TYPE, dac__g_pExceptionServicesInternalCallsClass, ::g_pExceptionServicesInternalCallsClass) DEFINE_DACVAR(UNKNOWN_POINTER_TYPE, dac__g_pStackFrameIteratorClass, ::g_pStackFrameIteratorClass) -DEFINE_DACVAR(BOOL, dac__g_isNewExceptionHandlingEnabled, ::g_isNewExceptionHandlingEnabled) #endif DEFINE_DACVAR(PTR_SString, SString__s_Empty, SString::s_Empty) diff --git a/src/coreclr/inc/debugmacros.h b/src/coreclr/inc/debugmacros.h index 35592ea36b12..3d2918f683fd 100644 --- a/src/coreclr/inc/debugmacros.h +++ b/src/coreclr/inc/debugmacros.h @@ -13,8 +13,8 @@ #include "stacktrace.h" #include "debugmacrosext.h" #include "palclr.h" +#include -#undef _ASSERTE #undef VERIFY #ifdef __cplusplus @@ -55,8 +55,12 @@ extern VOID ANALYZER_NORETURN DbgAssertDialog(const char *szFile, int iLine, con #else // !_DEBUG -#define _ASSERTE(expr) ((void)0) -#define _ASSERTE_MSG(expr, msg) ((void)0) +#if !defined(_ASSERTE) + #define _ASSERTE(expr) ((void)0) +#endif +#if !defined(_ASSERTE_MSG) + #define _ASSERTE_MSG(expr, msg) ((void)0) +#endif #define VERIFY(stmt) (void)(stmt) // At this point, EEPOLICY_HANDLE_FATAL_ERROR may or may not be defined. It will be defined diff --git a/src/coreclr/inc/debugreturn.h b/src/coreclr/inc/debugreturn.h index d052364ff890..93bd5b6c8078 100644 --- a/src/coreclr/inc/debugreturn.h +++ b/src/coreclr/inc/debugreturn.h @@ -5,27 +5,6 @@ #ifndef _DEBUGRETURN_H_ #define _DEBUGRETURN_H_ -// Note that with OACR Prefast is run over checked (_DEBUG is defined) sources -// so we have to first check the _PREFAST_ define followed by the _DEBUG define -// -#ifdef _PREFAST_ - -// Use prefast to detect gotos out of no-return blocks. The gotos out of no-return blocks -// should be reported as memory leaks by prefast. The (nothrow) is because PREfix sees the -// throw from the new statement, and doesn't like these macros used in a destructor (and -// the NULL returned by failure works just fine in delete[]) - -#define DEBUG_ASSURE_NO_RETURN_BEGIN(arg) { char* __noReturnInThisBlock_##arg = ::new (nothrow) char[1]; -#define DEBUG_ASSURE_NO_RETURN_END(arg) ::delete[] __noReturnInThisBlock_##arg; } - -#define DEBUG_OK_TO_RETURN_BEGIN(arg) { ::delete[] __noReturnInThisBlock_##arg; -#define DEBUG_OK_TO_RETURN_END(arg) __noReturnInThisBlock_##arg = ::new (nothrow) char[1]; } - -#define DEBUG_ASSURE_SAFE_TO_RETURN TRUE -#define return return - -#else // !_PREFAST_ - // This is disabled in build 190024315 (a pre-release build after VS 2015 Update 3) and // earlier because those builds only support C++11 constexpr, which doesn't allow the // use of 'if' statements within the body of a constexpr function. Later builds support @@ -75,7 +54,6 @@ class __YouCannotUseAReturnStatementHere { // If you got here, and you're wondering what you did wrong -- you're using // a return statement where it's not allowed. Likely, it's inside one of: // GCPROTECT_BEGIN ... GCPROTECT_END - // HELPER_METHOD_FRAME_BEGIN ... HELPER_METHOD_FRAME_END // static int safe_to_return() {return 0;}; public: @@ -96,17 +74,16 @@ typedef __SafeToReturn __ReturnOK; // build. (And, in fastchecked, there is no penalty at all.) // #ifdef _MSC_VER -#define return if (0 && __ReturnOK::safe_to_return()) { } else return +#define debug_instrumented_return if (0 && __ReturnOK::safe_to_return()) { } else return #else // _MSC_VER -#define return for (;1;__ReturnOK::safe_to_return()) return +#define debug_instrumented_return for (;1;__ReturnOK::safe_to_return()) return #endif // _MSC_VER +#define return debug_instrumented_return + #define DEBUG_ASSURE_NO_RETURN_BEGIN(arg) { typedef __YouCannotUseAReturnStatementHere __ReturnOK; if (0 && __ReturnOK::used()) { } else { #define DEBUG_ASSURE_NO_RETURN_END(arg) } } -#define DEBUG_OK_TO_RETURN_BEGIN(arg) { typedef __SafeToReturn __ReturnOK; if (0 && __ReturnOK::used()) { } else { -#define DEBUG_OK_TO_RETURN_END(arg) } } - #else // defined(_DEBUG) && !defined(JIT_BUILD) && (!defined(_MSC_FULL_VER) || _MSC_FULL_VER > 190024315) #define DEBUG_ASSURE_SAFE_TO_RETURN TRUE @@ -114,11 +91,6 @@ typedef __SafeToReturn __ReturnOK; #define DEBUG_ASSURE_NO_RETURN_BEGIN(arg) { #define DEBUG_ASSURE_NO_RETURN_END(arg) } -#define DEBUG_OK_TO_RETURN_BEGIN(arg) { -#define DEBUG_OK_TO_RETURN_END(arg) } - #endif // defined(_DEBUG) && !defined(JIT_BUILD) && (!defined(_MSC_FULL_VER) || _MSC_FULL_VER > 190024315) -#endif // !_PREFAST_ - #endif // _DEBUGRETURN_H_ diff --git a/src/coreclr/inc/eetwain.h b/src/coreclr/inc/eetwain.h index e229ae1fe29d..78dd453f55b2 100644 --- a/src/coreclr/inc/eetwain.h +++ b/src/coreclr/inc/eetwain.h @@ -65,18 +65,6 @@ typedef void (*GCEnumCallback)( DAC_ARG(DacSlotLocation loc) // where the reference came from ); -/****************************************************************************** - The stackwalker maintains some state on behalf of ICodeManager. -*/ - -const int CODEMAN_STATE_SIZE = 512; - -struct CodeManState -{ - DWORD dwIsSet; // Is set to 0 by the stackwalk as appropriate - BYTE stateBuf[CODEMAN_STATE_SIZE]; -}; - /****************************************************************************** These flags are used by some functions, although not all combinations might make sense for all functions. @@ -169,7 +157,6 @@ virtual void FixContext(ContextType ctxType, DWORD dwRelOffset, DWORD nestingLevel, OBJECTREF thrownObject, - CodeManState *pState, size_t ** ppShadowSP, // OUT size_t ** ppEndRegion) = 0; // OUT #endif // !FEATURE_EH_FUNCLETS @@ -183,8 +170,7 @@ virtual void FixContext(ContextType ctxType, virtual TADDR GetAmbientSP(PREGDISPLAY pContext, EECodeInfo *pCodeInfo, DWORD dwRelOffset, - DWORD nestingLevel, - CodeManState *pState) = 0; + DWORD nestingLevel) = 0; #endif // TARGET_X86 /* @@ -201,10 +187,13 @@ virtual ULONG32 GetStackParameterSize(EECodeInfo* pCodeInfo) = 0; (if UpdateAllRegs), callee-UNsaved registers are trashed) Returns success of operation. */ -virtual bool UnwindStackFrame(PREGDISPLAY pContext, +virtual bool UnwindStackFrame(PREGDISPLAY pRD, EECodeInfo *pCodeInfo, - unsigned flags, - CodeManState *pState) = 0; + unsigned flags) = 0; + +#ifdef FEATURE_EH_FUNCLETS +virtual void EnsureCallerContextIsValid(PREGDISPLAY pRD, EECodeInfo * pCodeInfo = NULL, unsigned flags = 0) = 0; +#endif // FEATURE_EH_FUNCLETS /* Is the function currently at a "GC safe point" ? @@ -217,17 +206,6 @@ virtual bool IsGcSafe(EECodeInfo *pCodeInfo, virtual bool HasTailCalls(EECodeInfo *pCodeInfo) = 0; #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 -#if defined(TARGET_AMD64) && defined(_DEBUG) -/* - Locates the end of the last interruptible region in the given code range. - Returns 0 if the entire range is uninterruptible. Returns the end point - if the entire range is interruptible. -*/ -virtual unsigned FindEndOfLastInterruptibleRegion(unsigned curOffset, - unsigned endOffset, - GCInfoToken gcInfoToken) = 0; -#endif // TARGET_AMD64 && _DEBUG - /* Enumerate all live object references in that function using the virtual register set. Same reference location cannot be enumerated @@ -272,8 +250,7 @@ virtual GenericParamContextType GetParamContextType(PREGDISPLAY pContext, */ virtual void * GetGSCookieAddr(PREGDISPLAY pContext, EECodeInfo * pCodeInfo, - unsigned flags, - CodeManState * pState) = 0; + unsigned flags) = 0; #ifndef USE_GC_INFO_DECODER /* @@ -282,7 +259,7 @@ virtual void * GetGSCookieAddr(PREGDISPLAY pContext, virtual bool IsInPrologOrEpilog(DWORD relPCOffset, GCInfoToken gcInfoToken, size_t* prologSize) = 0; - +#ifndef FEATURE_EH_FUNCLETS /* Returns true if the given IP is in the synchronized region of the method (valid for synchronized methods only) */ @@ -290,6 +267,7 @@ virtual bool IsInSynchronizedRegion( DWORD relOffset, GCInfoToken gcInfoToken, unsigned flags) = 0; +#endif // FEATURE_EH_FUNCLETS #endif // !USE_GC_INFO_DECODER /* @@ -321,11 +299,6 @@ virtual unsigned int GetFrameSize(GCInfoToken gcInfoToken) = 0; #ifndef FEATURE_EH_FUNCLETS virtual const BYTE* GetFinallyReturnAddr(PREGDISPLAY pReg)=0; -virtual BOOL IsInFilter(GCInfoToken gcInfoToken, - unsigned offset, - PCONTEXT pCtx, - DWORD curNestLevel) = 0; - virtual BOOL LeaveFinally(GCInfoToken gcInfoToken, unsigned offset, PCONTEXT pCtx) = 0; @@ -333,6 +306,12 @@ virtual BOOL LeaveFinally(GCInfoToken gcInfoToken, virtual void LeaveCatch(GCInfoToken gcInfoToken, unsigned offset, PCONTEXT pCtx)=0; +#else // FEATURE_EH_FUNCLETS +virtual DWORD_PTR CallFunclet(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilter) = 0; +virtual void ResumeAfterCatch(CONTEXT *pContext, size_t targetSSP, bool fIntercepted) = 0; +#if defined(HOST_AMD64) && defined(HOST_WINDOWS) +virtual void UpdateSSP(PREGDISPLAY pRD) = 0; +#endif // HOST_AMD64 && HOST_WINDOWS #endif // FEATURE_EH_FUNCLETS #ifdef FEATURE_REMAP_FUNCTION @@ -354,7 +333,6 @@ virtual HRESULT FixContextForEnC(PCONTEXT pCtx, #endif // #ifndef DACCESS_COMPILE - #ifdef DACCESS_COMPILE virtual void EnumMemoryRegions(CLRDataEnumMemoryFlags flags) = 0; #endif @@ -389,7 +367,6 @@ void FixContext(ContextType ctxType, DWORD dwRelOffset, DWORD nestingLevel, OBJECTREF thrownObject, - CodeManState *pState, size_t ** ppShadowSP, // OUT size_t ** ppEndRegion); // OUT #endif // !FEATURE_EH_FUNCLETS @@ -404,8 +381,7 @@ virtual TADDR GetAmbientSP(PREGDISPLAY pContext, EECodeInfo *pCodeInfo, DWORD dwRelOffset, - DWORD nestingLevel, - CodeManState *pState); + DWORD nestingLevel); #endif // TARGET_X86 /* @@ -425,10 +401,9 @@ ULONG32 GetStackParameterSize(EECodeInfo* pCodeInfo); */ virtual bool UnwindStackFrame( - PREGDISPLAY pContext, + PREGDISPLAY pRD, EECodeInfo *pCodeInfo, - unsigned flags, - CodeManState *pState); + unsigned flags); #ifdef HAS_LIGHTUNWIND enum LightUnwindFlag @@ -462,18 +437,6 @@ virtual bool HasTailCalls(EECodeInfo *pCodeInfo); #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || defined(TARGET_RISCV64) -#if defined(TARGET_AMD64) && defined(_DEBUG) -/* - Locates the end of the last interruptible region in the given code range. - Returns 0 if the entire range is uninterruptible. Returns the end point - if the entire range is interruptible. -*/ -virtual -unsigned FindEndOfLastInterruptibleRegion(unsigned curOffset, - unsigned endOffset, - GCInfoToken gcInfoToken); -#endif // TARGET_AMD64 && _DEBUG - /* Enumerate all live object references in that function using the virtual register set. Same reference location cannot be enumerated @@ -525,10 +488,10 @@ PTR_VOID GetExactGenericsToken(PREGDISPLAY pContext, EECodeInfo * pCodeInfo); static -PTR_VOID GetExactGenericsToken(SIZE_T baseStackSlot, +PTR_VOID GetExactGenericsToken(TADDR sp, + TADDR fp, EECodeInfo * pCodeInfo); - #endif // FEATURE_EH_FUNCLETS && USE_GC_INFO_DECODER /* @@ -538,8 +501,7 @@ PTR_VOID GetExactGenericsToken(SIZE_T baseStackSlot, virtual void * GetGSCookieAddr(PREGDISPLAY pContext, EECodeInfo * pCodeInfo, - unsigned flags, - CodeManState * pState); + unsigned flags); #ifndef USE_GC_INFO_DECODER @@ -552,6 +514,7 @@ bool IsInPrologOrEpilog( GCInfoToken gcInfoToken, size_t* prologSize); +#ifndef FEATURE_EH_FUNCLETS /* Returns true if the given IP is in the synchronized region of the method (valid for synchronized functions only) */ @@ -560,6 +523,7 @@ bool IsInSynchronizedRegion( DWORD relOffset, GCInfoToken gcInfoToken, unsigned flags); +#endif // FEATURE_EH_FUNCLETS #endif // !USE_GC_INFO_DECODER /* @@ -588,16 +552,19 @@ unsigned int GetFrameSize(GCInfoToken gcInfoToken); #ifndef FEATURE_EH_FUNCLETS virtual const BYTE* GetFinallyReturnAddr(PREGDISPLAY pReg); -virtual BOOL IsInFilter(GCInfoToken gcInfoToken, - unsigned offset, - PCONTEXT pCtx, - DWORD curNestLevel); virtual BOOL LeaveFinally(GCInfoToken gcInfoToken, unsigned offset, PCONTEXT pCtx); virtual void LeaveCatch(GCInfoToken gcInfoToken, unsigned offset, PCONTEXT pCtx); +#else // FEATURE_EH_FUNCLETS +virtual DWORD_PTR CallFunclet(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilter); +virtual void ResumeAfterCatch(CONTEXT *pContext, size_t targetSSP, bool fIntercepted); + +#if defined(HOST_AMD64) && defined(HOST_WINDOWS) +virtual void UpdateSSP(PREGDISPLAY pRD); +#endif // HOST_AMD64 && HOST_WINDOWS #endif // FEATURE_EH_FUNCLETS #ifdef FEATURE_REMAP_FUNCTION @@ -618,7 +585,7 @@ HRESULT FixContextForEnC(PCONTEXT pCtx, #endif // #ifndef DACCESS_COMPILE #ifdef FEATURE_EH_FUNCLETS - static void EnsureCallerContextIsValid( PREGDISPLAY pRD, EECodeInfo * pCodeInfo = NULL, unsigned flags = 0); + virtual void EnsureCallerContextIsValid( PREGDISPLAY pRD, EECodeInfo * pCodeInfo = NULL, unsigned flags = 0); static size_t GetCallerSp( PREGDISPLAY pRD ); #ifdef TARGET_X86 static size_t GetResumeSp( PCONTEXT pContext ); @@ -631,21 +598,220 @@ HRESULT FixContextForEnC(PCONTEXT pCtx, }; -#ifdef TARGET_X86 -#include "gc_unwind_x86.h" +#ifdef FEATURE_INTERPRETER + +class InterpreterCodeManager : public ICodeManager { -/***************************************************************************** - How the stackwalkers buffer will be interpreted + VPTR_VTABLE_CLASS_AND_CTOR(InterpreterCodeManager, ICodeManager) + +public: + + +#ifndef DACCESS_COMPILE +#ifndef FEATURE_EH_FUNCLETS +virtual +void FixContext(ContextType ctxType, + EHContext *ctx, + EECodeInfo *pCodeInfo, + DWORD dwRelOffset, + DWORD nestingLevel, + OBJECTREF thrownObject, + size_t ** ppShadowSP, // OUT + size_t ** ppEndRegion) // OUT +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); +} +#endif // !FEATURE_EH_FUNCLETS +#endif // !DACCESS_COMPILE + +#ifdef TARGET_X86 +/* + Gets the ambient stack pointer value at the given nesting level within + the method. */ +virtual +TADDR GetAmbientSP(PREGDISPLAY pContext, + EECodeInfo *pCodeInfo, + DWORD dwRelOffset, + DWORD nestingLevel) +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); + return NULL; +} +#endif // TARGET_X86 -struct CodeManStateBuf +virtual +ULONG32 GetStackParameterSize(EECodeInfo* pCodeInfo) { - DWORD hdrInfoSize; - hdrInfo hdrInfoBody; -}; + return 0; +} + +virtual +bool UnwindStackFrame( + PREGDISPLAY pRD, + EECodeInfo *pCodeInfo, + unsigned flags); + +#ifdef FEATURE_EH_FUNCLETS +virtual +void EnsureCallerContextIsValid(PREGDISPLAY pRD, EECodeInfo * pCodeInfo = NULL, unsigned flags = 0); +#endif // FEATURE_EH_FUNCLETS + +virtual +bool IsGcSafe( EECodeInfo *pCodeInfo, + DWORD dwRelOffset); + +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +virtual +bool HasTailCalls(EECodeInfo *pCodeInfo) +{ + _ASSERTE(FALSE); + return false; +} +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || defined(TARGET_RISCV64) + +virtual +bool EnumGcRefs(PREGDISPLAY pContext, + EECodeInfo *pCodeInfo, + unsigned flags, + GCEnumCallback pCallback, + LPVOID hCallBack, + DWORD relOffsetOverride = NO_OVERRIDE_OFFSET); + +virtual +OBJECTREF GetInstance( + PREGDISPLAY pContext, + EECodeInfo * pCodeInfo); + +virtual +PTR_VOID GetParamTypeArg(PREGDISPLAY pContext, + EECodeInfo * pCodeInfo); + +virtual GenericParamContextType GetParamContextType(PREGDISPLAY pContext, + EECodeInfo * pCodeInfo); + +virtual +void * GetGSCookieAddr(PREGDISPLAY pContext, + EECodeInfo * pCodeInfo, + unsigned flags) +{ + return NULL; +} + +#ifndef USE_GC_INFO_DECODER +virtual +bool IsInPrologOrEpilog( + DWORD relOffset, + GCInfoToken gcInfoToken, + size_t* prologSize) +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); + return false; +} + +#ifndef FEATURE_EH_FUNCLETS +virtual +bool IsInSynchronizedRegion( + DWORD relOffset, + GCInfoToken gcInfoToken, + unsigned flags) +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); + return false; +} +#endif // FEATURE_EH_FUNCLETS +#endif // !USE_GC_INFO_DECODER + +virtual +size_t GetFunctionSize(GCInfoToken gcInfoToken); + +virtual bool GetReturnAddressHijackInfo(GCInfoToken gcInfoToken X86_ARG(ReturnKind * returnKind)) +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); + return false; +} + +#ifndef USE_GC_INFO_DECODER + +virtual +unsigned int GetFrameSize(GCInfoToken gcInfoToken) +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); + return 0; +} +#endif // USE_GC_INFO_DECODER + +#ifndef DACCESS_COMPILE + +#ifndef FEATURE_EH_FUNCLETS +virtual const BYTE* GetFinallyReturnAddr(PREGDISPLAY pReg) +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); + return NULL; +} + +virtual BOOL LeaveFinally(GCInfoToken gcInfoToken, + unsigned offset, + PCONTEXT pCtx) +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); + return FALSE; +} + +virtual void LeaveCatch(GCInfoToken gcInfoToken, + unsigned offset, + PCONTEXT pCtx) +{ + // Interpreter-TODO: Implement this if needed + _ASSERTE(FALSE); +} +#else // FEATURE_EH_FUNCLETS +virtual DWORD_PTR CallFunclet(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilter); +virtual void ResumeAfterCatch(CONTEXT *pContext, size_t targetSSP, bool fIntercepted); +#if defined(HOST_AMD64) && defined(HOST_WINDOWS) +virtual void UpdateSSP(PREGDISPLAY pRD); +#endif // HOST_AMD64 && HOST_WINDOWS +#endif // FEATURE_EH_FUNCLETS + +#ifdef FEATURE_REMAP_FUNCTION + +virtual +HRESULT FixContextForEnC(PCONTEXT pCtx, + EECodeInfo * pOldCodeInfo, + const ICorDebugInfo::NativeVarInfo * oldMethodVars, + SIZE_T oldMethodVarsCount, + EECodeInfo * pNewCodeInfo, + const ICorDebugInfo::NativeVarInfo * newMethodVars, + SIZE_T newMethodVarsCount) +{ + // Interpreter-TODO: Implement this + _ASSERTE(FALSE); + return E_NOTIMPL; +} +#endif // FEATURE_REMAP_FUNCTION + +#endif // !DACCESS_COMPILE + +#ifdef DACCESS_COMPILE + virtual void EnumMemoryRegions(CLRDataEnumMemoryFlags flags) + { + // Nothing to do + } #endif +}; + +#endif // FEATURE_INTERPRETER + //***************************************************************************** #endif // _EETWAIN_H //***************************************************************************** diff --git a/src/coreclr/inc/eexcp.h b/src/coreclr/inc/eexcp.h index fb7bccbe0734..5c470b78dbf5 100644 --- a/src/coreclr/inc/eexcp.h +++ b/src/coreclr/inc/eexcp.h @@ -112,34 +112,5 @@ inline BOOL IsTypedHandler(EE_ILEXCEPTION_CLAUSE *EHClause) return ! (IsFilterHandler(EHClause) || IsFaultOrFinally(EHClause)); } -inline BOOL IsDuplicateClause(EE_ILEXCEPTION_CLAUSE* pEHClause) -{ - return pEHClause->Flags & COR_ILEXCEPTION_CLAUSE_DUPLICATED; -} - -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) -// Finally is the only EH construct that can be part of the execution as being fall-through. -// -// "Cloned" finally is a construct that represents a finally block that is used as -// fall through for normal try-block execution. Such a "cloned" finally will: -// -// 1) Have its try-clause's Start and End PC the same as its handler's start PC (i.e. will have -// zero length try block), AND -// 2) Is marked duplicate -// -// Because of their fall-through nature, JIT guarantees that only finally constructs can be cloned, -// and not catch or fault (since they cannot be fallen through but are invoked as funclets). -// -// The cloned finally construct is also used to mark "call to finally" thunks that are not within -// the EH region protected by the finally, and also not within the enclosing region. This is done -// to prevent ThreadAbortException from creating an infinite loop of calling the same finally. -inline BOOL IsClonedFinally(EE_ILEXCEPTION_CLAUSE* pEHClause) -{ - return ((pEHClause->TryStartPC == pEHClause->TryEndPC) && - (pEHClause->TryStartPC == pEHClause->HandlerStartPC) && - IsFinally(pEHClause) && IsDuplicateClause(pEHClause)); -} -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - #endif // __eexcp_h__ diff --git a/src/coreclr/inc/eventtracebase.h b/src/coreclr/inc/eventtracebase.h index 72e6cd09fc5b..4fbbbc9567f8 100644 --- a/src/coreclr/inc/eventtracebase.h +++ b/src/coreclr/inc/eventtracebase.h @@ -960,7 +960,6 @@ namespace ETW static VOID SendMethodDetailsEvent(MethodDesc *pMethodDesc); static VOID SendNonDuplicateMethodDetailsEvent(MethodDesc* pMethodDesc, MethodDescSet* set); static VOID StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName); - static VOID StubsInitialized(PVOID *pHelperStartAddress, PVOID *pHelperNames, LONG ulNoOfHelpers); static VOID MethodRestored(MethodDesc * pMethodDesc); static VOID MethodTableRestored(MethodTable * pMethodTable); static VOID DynamicMethodDestroyed(MethodDesc *pMethodDesc); @@ -972,7 +971,6 @@ namespace ETW static VOID MethodJitting(MethodDesc *pMethodDesc, COR_ILMETHOD_DECODER* methodDecoder, SString *namespaceOrClassName, SString *methodName, SString *methodSignature); static VOID MethodJitted(MethodDesc *pMethodDesc, SString *namespaceOrClassName, SString *methodName, SString *methodSignature, PCODE pNativeCodeStartAddress, PrepareCodeConfig *pConfig); static VOID StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName) {}; - static VOID StubsInitialized(PVOID *pHelperStartAddress, PVOID *pHelperNames, LONG ulNoOfHelpers) {}; static VOID MethodRestored(MethodDesc * pMethodDesc) {}; static VOID MethodTableRestored(MethodTable * pMethodTable) {}; static VOID DynamicMethodDestroyed(MethodDesc *pMethodDesc) {}; diff --git a/src/coreclr/inc/ex.h b/src/coreclr/inc/ex.h index 9a0cad2b2102..db24e5179f1e 100644 --- a/src/coreclr/inc/ex.h +++ b/src/coreclr/inc/ex.h @@ -598,7 +598,6 @@ class CAutoTryCleanup DEBUG_NOINLINE CAutoTryCleanup(STATETYPE& refState) : m_refState(refState) { - SCAN_SCOPE_BEGIN; STATIC_CONTRACT_THROWS; STATIC_CONTRACT_SUPPORTS_DAC; @@ -614,7 +613,6 @@ class CAutoTryCleanup DEBUG_NOINLINE ~CAutoTryCleanup() { - SCAN_SCOPE_END; WRAPPER_NO_CONTRACT; m_refState.CleanupTry(); @@ -729,7 +727,6 @@ class CAutoTryCleanup #define RethrowTerminalExceptions \ if (GET_EXCEPTION()->IsTerminal()) \ { \ - STATIC_CONTRACT_THROWS_TERMINAL; \ EX_RETHROW; \ } \ @@ -811,18 +808,11 @@ Exception *ExThrowWithInnerHelper(Exception *inner); { \ STATETYPE __state STATEARG; \ typedef DEFAULT_EXCEPTION_TYPE __defaultException_t; \ - SCAN_EHMARKER(); \ PAL_CPP_TRY \ { \ - SCAN_EHMARKER_TRY(); \ - SCAN_EHMARKER(); \ PAL_CPP_TRY \ { \ - SCAN_EHMARKER_TRY(); \ CAutoTryCleanup __autoCleanupTry(__state); \ - /* prevent annotations from being dropped by optimizations in debug */ \ - INDEBUG(static bool __alwayszero;) \ - INDEBUG(VolatileLoad(&__alwayszero);) \ { \ /* Disallow returns to make exception handling work. */ \ /* Some work is done after the catch, see EX_ENDTRY. */ \ @@ -833,32 +823,23 @@ Exception *ExThrowWithInnerHelper(Exception *inner); #define EX_CATCH_IMPL_EX(DerivedExceptionClass) \ DEBUG_ASSURE_NO_RETURN_END(EX_TRY) \ } \ - SCAN_EHMARKER_END_TRY(); \ } \ PAL_CPP_CATCH_NON_DERIVED_NOARG (const std::bad_alloc&) \ { \ - SCAN_EHMARKER_CATCH(); \ __state.SetCaughtCxx(); \ __state.m_pExceptionPtr = Exception::GetOOMException(); \ - SCAN_EHMARKER_END_CATCH(); \ - SCAN_IGNORE_THROW_MARKER; \ ThrowOutOfMemory(); \ } \ PAL_CPP_CATCH_DERIVED (DerivedExceptionClass, __pExceptionRaw) \ { \ - SCAN_EHMARKER_CATCH(); \ __state.SetCaughtCxx(); \ __state.m_pExceptionPtr = __pExceptionRaw; \ - SCAN_EHMARKER_END_CATCH(); \ - SCAN_IGNORE_THROW_MARKER; \ PAL_CPP_RETHROW; \ } \ PAL_CPP_ENDTRY \ - SCAN_EHMARKER_END_TRY(); \ } \ PAL_CPP_CATCH_ALL \ { \ - SCAN_EHMARKER_CATCH(); \ __defaultException_t __defaultException; \ CHECK::ResetAssert(); \ ExceptionHolder __pException(__state.m_pExceptionPtr); \ @@ -875,18 +856,11 @@ Exception *ExThrowWithInnerHelper(Exception *inner); { \ STATETYPE __state STATEARG; \ typedef DEFAULT_EXCEPTION_TYPE __defaultException_t; \ - SCAN_EHMARKER(); \ PAL_CPP_TRY \ { \ - SCAN_EHMARKER_TRY(); \ - SCAN_EHMARKER(); \ PAL_CPP_TRY \ { \ - SCAN_EHMARKER_TRY(); \ CAutoTryCleanup __autoCleanupTry(__state); \ - /* prevent annotations from being dropped by optimizations in debug */ \ - INDEBUG(static bool __alwayszero;) \ - INDEBUG(VolatileLoad(&__alwayszero);) \ { \ /* Disallow returns to make exception handling work. */ \ /* Some work is done after the catch, see EX_ENDTRY. */ \ @@ -895,27 +869,19 @@ Exception *ExThrowWithInnerHelper(Exception *inner); #define EX_CATCH_IMPL_CPP_ONLY \ DEBUG_ASSURE_NO_RETURN_END(EX_TRY) \ } \ - SCAN_EHMARKER_END_TRY(); \ } \ PAL_CPP_CATCH_NON_DERIVED_NOARG (const std::bad_alloc&) \ { \ - SCAN_EHMARKER_CATCH(); \ __state.SetCaughtCxx(); \ __state.m_pExceptionPtr = Exception::GetOOMException(); \ - SCAN_EHMARKER_END_CATCH(); \ - SCAN_IGNORE_THROW_MARKER; \ ThrowOutOfMemory(); \ } \ PAL_CPP_ENDTRY \ - SCAN_EHMARKER_END_TRY(); \ } \ PAL_CPP_CATCH_DERIVED (Exception, __pExceptionRaw) \ { \ - SCAN_EHMARKER_CATCH(); \ __state.SetCaughtCxx(); \ __state.m_pExceptionPtr = __pExceptionRaw; \ - SCAN_EHMARKER_END_CATCH(); \ - SCAN_IGNORE_THROW_MARKER; \ __defaultException_t __defaultException; \ CHECK::ResetAssert(); \ ExceptionHolder __pException(__state.m_pExceptionPtr); \ @@ -956,7 +922,6 @@ Exception *ExThrowWithInnerHelper(Exception *inner); #define EX_END_CATCH_UNREACHABLE \ DEBUG_ASSURE_NO_RETURN_END(EX_CATCH) \ } \ - SCAN_EHMARKER_END_CATCH(); \ UNREACHABLE(); \ } \ PAL_CPP_ENDTRY \ @@ -971,7 +936,6 @@ Exception *ExThrowWithInnerHelper(Exception *inner); __state.SucceedCatch(); \ DEBUG_ASSURE_NO_RETURN_END(EX_CATCH) \ } \ - SCAN_EHMARKER_END_CATCH(); \ } \ EX_ENDTRY \ } \ @@ -980,9 +944,7 @@ Exception *ExThrowWithInnerHelper(Exception *inner); #define EX_END_CATCH_FOR_HOOK \ __state.SucceedCatch(); \ DEBUG_ASSURE_NO_RETURN_END(EX_CATCH) \ - ANNOTATION_HANDLER_END; \ } \ - SCAN_EHMARKER_END_CATCH(); \ } \ EX_ENDTRY @@ -1217,7 +1179,6 @@ Exception *ExThrowWithInnerHelper(Exception *inner); #define EX_END_HOOK \ } \ - ANNOTATION_HANDLER_END; \ EX_RETHROW; \ EX_END_CATCH_FOR_HOOK; \ } diff --git a/src/coreclr/inc/executableallocator.h b/src/coreclr/inc/executableallocator.h index 11caf3a6857d..973b950ad369 100644 --- a/src/coreclr/inc/executableallocator.h +++ b/src/coreclr/inc/executableallocator.h @@ -182,6 +182,9 @@ class ExecutableAllocator // Return true if double mapping is enabled. static bool IsDoubleMappingEnabled(); + // Release memory allocated via DoubleMapping for either templates or normal double mapped data + void ReleaseWorker(void* pRX, bool releaseTemplate); + // Initialize the allocator instance bool Initialize(); @@ -262,6 +265,18 @@ class ExecutableAllocator // Unmap the RW mapping at the specified address void UnmapRW(void* pRW); + + // Allocate thunks from a template. pTemplate is the return value from CreateTemplate + void* AllocateThunksFromTemplate(void *pTemplate, size_t templateSize); + + // Free a set of thunks allocated from templates. pThunks must have been returned from AllocateThunksFromTemplate + void FreeThunksFromTemplate(void *pThunks, size_t templateSize); + + // Create a template + // If templateInImage is not null, it will attempt to use it as the template, otherwise it will create an temporary in memory file to serve as the template + // Some OS/Architectures may/may not be able to work with this, so this api is permitted to return NULL, and callers should have an alternate approach using + // the codePageGenerator directly. + void* CreateTemplate(void* templateInImage, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)); }; #define ExecutableWriterHolder ExecutableWriterHolderNoLog diff --git a/src/coreclr/inc/formattype.cpp b/src/coreclr/inc/formattype.cpp index af30955047ac..9d92b0207dbd 100644 --- a/src/coreclr/inc/formattype.cpp +++ b/src/coreclr/inc/formattype.cpp @@ -338,7 +338,7 @@ PCCOR_SIGNATURE PrettyPrintSignature( { if(name) // printing the arguments { - PREFIX_ASSUME(typePtr != NULL); + _ASSERTE(typePtr != NULL); if (*typePtr == ELEMENT_TYPE_SENTINEL) { if (needComma) @@ -555,7 +555,7 @@ PCCOR_SIGNATURE PrettyPrintType( case ELEMENT_TYPE_ARRAY : { typePtr = PrettyPrintTypeOrDef(typePtr, out, pIMDI); - PREFIX_ASSUME(typePtr != NULL); + _ASSERTE(typePtr != NULL); unsigned rank = CorSigUncompressData(typePtr); // what is the syntax for the rank 0 case? if (rank == 0) { @@ -563,11 +563,6 @@ PCCOR_SIGNATURE PrettyPrintType( } else { _ASSERTE(rank != 0); - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable:22009 "Suppress PREFAST warnings about integer overflow") -#endif int* lowerBounds = (int*) _alloca(sizeof(int)*2*rank); int* sizes = &lowerBounds[rank]; memset(lowerBounds, 0, sizeof(int)*2*rank); @@ -609,9 +604,6 @@ PCCOR_SIGNATURE PrettyPrintType( } } appendChar(out, ']'); -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif } } break; @@ -1106,11 +1098,6 @@ bool TrySigUncompress(PCCOR_SIGNATURE pData, // [IN] compressed dat } } - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif char* DumpMarshaling(IMDInternalImport* pImport, _Inout_updates_(cchszString) char* szString, DWORD cchszString, @@ -1236,18 +1223,11 @@ char* DumpMarshaling(IMDInternalImport* pImport, cbCur += ByteCountLength; if(strLen) { -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable:22009 "Suppress PREFAST warnings about integer overflow") -#endif strTemp = (LPUTF8)_alloca(strLen + 1); memcpy(strTemp, (LPUTF8)&pSigNativeType[cbCur], strLen); strTemp[strLen] = 0; buf.AppendPrintf(", \"%s\"", UnquotedProperName(strTemp)); cbCur += strLen; -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif } } break; @@ -1575,9 +1555,6 @@ char* DumpMarshaling(IMDInternalImport* pImport, } } } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif char* DumpParamAttr(_Inout_updates_(cchszString) char* szString, DWORD cchszString, DWORD dwAttr) { diff --git a/src/coreclr/inc/gc_unwind_x86.h b/src/coreclr/inc/gc_unwind_x86.h index 2430254e40ea..74fe4e753972 100644 --- a/src/coreclr/inc/gc_unwind_x86.h +++ b/src/coreclr/inc/gc_unwind_x86.h @@ -351,7 +351,7 @@ struct hdrInfo bool isSpeculativeStackWalk; // is the stackwalk seeded by an untrusted source (e.g., sampling profiler)? // These always includes EBP for EBP-frames and double-aligned-frames - RegMask savedRegMask:8; // which callee-saved regs are saved on stack + RegMask savedRegMask; // which callee-saved regs are saved on stack // Count of the callee-saved registers, excluding the frame pointer. // This does not include EBP for EBP-frames and double-aligned-frames. @@ -367,6 +367,8 @@ struct hdrInfo unsigned int syncEpilogStart; // The start of the epilog. Synchronized methods are guaranteed to have no more than one epilog. unsigned int revPInvokeOffset; // INVALID_REV_PINVOKE_OFFSET if there is no Reverse PInvoke frame + unsigned int noGCRegionCnt; + enum { NOT_IN_PROLOG = -1, NOT_IN_EPILOG = -1 }; int prologOffs; // NOT_IN_PROLOG if not in prolog @@ -397,8 +399,19 @@ bool UnwindStackFrameX86(PREGDISPLAY pContext, IN_EH_FUNCLETS_COMMA(bool isFunclet) bool updateAllRegs); +unsigned int DecodeGCHdrInfoMethodSize(GCInfoToken gcInfoToken); + size_t DecodeGCHdrInfo(GCInfoToken gcInfoToken, unsigned curOffset, hdrInfo * infoPtr); +bool IsInNoGCRegion(hdrInfo * infoPtr, + PTR_CBYTE table, + unsigned curOffset); + +unsigned FindFirstInterruptiblePoint(hdrInfo * infoPtr, + PTR_CBYTE table, + unsigned offs, + unsigned endOffs); + #endif // _UNWIND_X86_H diff --git a/src/coreclr/inc/gcdecoder.cpp b/src/coreclr/inc/gcdecoder.cpp index d4a3c4c3a6f5..541011b5f709 100644 --- a/src/coreclr/inc/gcdecoder.cpp +++ b/src/coreclr/inc/gcdecoder.cpp @@ -197,7 +197,7 @@ PTR_CBYTE FASTCALL decodeHeader(PTR_CBYTE table, UINT32 version, InfoHdr* header header->syncStartOffset ^= HAS_SYNC_OFFSET; break; case FLIP_REV_PINVOKE_FRAME: - header->revPInvokeOffset = INVALID_REV_PINVOKE_OFFSET ? HAS_REV_PINVOKE_FRAME_OFFSET : INVALID_REV_PINVOKE_OFFSET; + header->revPInvokeOffset ^= (INVALID_REV_PINVOKE_OFFSET ^ HAS_REV_PINVOKE_FRAME_OFFSET); break; case NEXT_OPCODE: @@ -205,9 +205,22 @@ PTR_CBYTE FASTCALL decodeHeader(PTR_CBYTE table, UINT32 version, InfoHdr* header nextByte = *table++; encoding = nextByte & ADJ_ENCODING_MAX; // encoding here always corresponds to codes in InfoHdrAdjust2 set - - _ASSERTE(encoding < SET_RET_KIND_MAX); - header->returnKind = (ReturnKind)encoding; + if (encoding <= SET_RET_KIND_MAX) + { + header->returnKind = (ReturnKind)encoding; + } + else if (encoding < FFFF_NOGCREGION_CNT) + { + header->noGCRegionCnt = encoding - SET_NOGCREGIONS_CNT; + } + else if (encoding == FFFF_NOGCREGION_CNT) + { + header->noGCRegionCnt = HAS_NOGCREGIONS; + } + else + { + _ASSERTE(!"Unexpected encoding"); + } break; } } @@ -470,7 +483,8 @@ bool InfoHdrSmall::isHeaderMatch(const InfoHdr& target) const target.varPtrTableSize != HAS_VARPTR && target.gsCookieOffset != HAS_GS_COOKIE_OFFSET && target.syncStartOffset != HAS_SYNC_OFFSET && - target.revPInvokeOffset != HAS_REV_PINVOKE_FRAME_OFFSET); + target.revPInvokeOffset != HAS_REV_PINVOKE_FRAME_OFFSET && + target.noGCRegionCnt != HAS_NOGCREGIONS); #endif // compare two InfoHdr's up to but not including the untrackCnt field @@ -495,7 +509,10 @@ bool InfoHdrSmall::isHeaderMatch(const InfoHdr& target) const if (target.syncStartOffset != INVALID_SYNC_OFFSET) return false; - if (target.revPInvokeOffset!= INVALID_REV_PINVOKE_OFFSET) + if (target.revPInvokeOffset != INVALID_REV_PINVOKE_OFFSET) + return false; + + if (target.noGCRegionCnt > 0) return false; return true; diff --git a/src/coreclr/inc/gcinfo.h b/src/coreclr/inc/gcinfo.h index 80ff267d583e..cdcbb9eb14f0 100644 --- a/src/coreclr/inc/gcinfo.h +++ b/src/coreclr/inc/gcinfo.h @@ -4,6 +4,7 @@ // ****************************************************************************** // WARNING!!!: These values are used by SOS in the diagnostics repo. Values should // added or removed in a backwards and forwards compatible way. +// There are scenarios in diagnostics that support parsing of old GC Info formats. // See: https://github.com/dotnet/diagnostics/blob/main/src/shared/inc/gcinfo.h // ****************************************************************************** @@ -38,6 +39,17 @@ const unsigned this_OFFSET_FLAG = 0x2; // the offset is "this" #define GCINFO_VERSION 4 +#ifdef SOS_INCLUDE +extern bool IsRuntimeVersionAtLeast(DWORD major); +inline int GCInfoVersion() +{ + // In SOS we only care about ability to parse/dump the GC Info. + // Since v2 and v3 had the same file format and v1 is no longer supported, + // we can assume that everything before net10.0 uses GCInfo v3. + return IsRuntimeVersionAtLeast(10) ? 4 : 3; +} +#endif + //----------------------------------------------------------------------------- // GCInfoToken: A wrapper that contains the GcInfo data and version number. // @@ -67,7 +79,11 @@ struct GCInfoToken static uint32_t ReadyToRunVersionToGcInfoVersion(uint32_t readyToRunMajorVersion, uint32_t readyToRunMinorVersion) { +#ifdef SOS_INCLUDE + return GCInfoVersion(); +#else return GCINFO_VERSION; +#endif } }; diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index 0b51833ee19d..87693d89d25c 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -16,6 +16,10 @@ #ifndef _GC_INFO_DECODER_ #define _GC_INFO_DECODER_ +#ifdef SOS_INCLUDE +#define DECODE_OLD_FORMATS +#endif + #define _max(a, b) (((a) > (b)) ? (a) : (b)) #define _min(a, b) (((a) < (b)) ? (a) : (b)) @@ -214,7 +218,7 @@ enum GcInfoDecoderFlags DECODE_INTERRUPTIBILITY = 0x08, DECODE_GC_LIFETIMES = 0x10, DECODE_NO_VALIDATION = 0x20, - DECODE_PSP_SYM = 0x40, + DECODE_PSP_SYM = 0x40, // Unused starting with v4 format DECODE_GENERICS_INST_CONTEXT = 0x80, // stack location of instantiation context for generics // (this may be either the 'this' ptr or the instantiation secret param) DECODE_GS_COOKIE = 0x100, // stack location of the GS cookie @@ -222,6 +226,7 @@ enum GcInfoDecoderFlags DECODE_PROLOG_LENGTH = 0x400, // length of the prolog (used to avoid reporting generics context) DECODE_EDIT_AND_CONTINUE = 0x800, DECODE_REVERSE_PINVOKE_VAR = 0x1000, + DECODE_RETURN_KIND = 0x2000, // Unused starting with v4 format #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) DECODE_HAS_TAILCALLS = 0x4000, #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 @@ -232,7 +237,7 @@ enum GcInfoHeaderFlags GC_INFO_IS_VARARG = 0x1, // unused = 0x2, // was GC_INFO_HAS_SECURITY_OBJECT GC_INFO_HAS_GS_COOKIE = 0x4, - GC_INFO_HAS_PSP_SYM = 0x8, + GC_INFO_HAS_PSP_SYM = 0x8, // Unused starting with v4 format GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK = 0x30, GC_INFO_HAS_GENERICS_INST_CONTEXT_NONE = 0x00, GC_INFO_HAS_GENERICS_INST_CONTEXT_MT = 0x10, @@ -247,7 +252,6 @@ enum GcInfoHeaderFlags GC_INFO_HAS_EDIT_AND_CONTINUE_INFO = 0x100, GC_INFO_REVERSE_PINVOKE_FRAME = 0x200, - GC_INFO_FLAGS_BIT_SIZE_VERSION_1 = 9, GC_INFO_FLAGS_BIT_SIZE = 10, }; @@ -465,6 +469,8 @@ struct GcSlotDesc GcSlotFlags Flags; }; + +template class GcSlotDecoder { public: @@ -507,12 +513,13 @@ class GcSlotDecoder }; #ifdef USE_GC_INFO_DECODER -class GcInfoDecoder +template +class TGcInfoDecoder { public: // If you are not interested in interruptibility or gc lifetime information, pass 0 as instructionOffset - GcInfoDecoder( + TGcInfoDecoder( GCInfoToken gcInfoToken, GcInfoDecoderFlags flags = DECODE_EVERYTHING, UINT32 instructionOffset = 0 @@ -532,7 +539,7 @@ class GcInfoDecoder // This is used for gcinfodumper bool IsSafePoint(UINT32 codeOffset); - typedef void EnumerateSafePointsCallback (GcInfoDecoder* decoder, UINT32 offset, void * hCallback); + typedef void EnumerateSafePointsCallback (TGcInfoDecoder * decoder, UINT32 offset, void * hCallback); void EnumerateSafePoints(EnumerateSafePointsCallback * pCallback, void * hCallback); #endif @@ -576,11 +583,13 @@ class GcInfoDecoder INT32 GetReversePInvokeFrameStackSlot(); bool HasMethodDescGenericsInstContext(); bool HasMethodTableGenericsInstContext(); + bool HasStackBaseRegister(); bool GetIsVarArg(); bool WantsReportOnlyLeaf(); #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool HasTailCalls(); #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || defined(TARGET_RISCV64) + ReturnKind GetReturnKind(); UINT32 GetCodeLength(); UINT32 GetStackBaseRegister(); UINT32 GetSizeOfEditAndContinuePreservedArea(); @@ -593,6 +602,10 @@ class GcInfoDecoder UINT32 GetSizeOfStackParameterArea(); #endif // FIXED_STACK_PARAMETER_SCRATCH_AREA + inline UINT32 Version() + { + return m_Version; + } private: BitStreamReader m_Reader; @@ -613,6 +626,8 @@ class GcInfoDecoder #ifdef TARGET_ARM64 UINT32 m_SizeOfEditAndContinueFixedStackFrame; #endif + // Unused starting with v4 format + ReturnKind m_ReturnKind; #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED UINT32 m_NumSafePoints; UINT32 m_SafePointIndex; @@ -631,6 +646,24 @@ class GcInfoDecoder #endif UINT32 m_Version; + inline UINT32 NormalizeCodeOffset(UINT32 offset) + { +#ifdef DECODE_OLD_FORMATS + if (Version() < 4) + return offset; +#endif + return GcInfoEncoding::NORMALIZE_CODE_OFFSET(offset); + } + + inline UINT32 DenormalizeCodeOffset(UINT32 offset) + { +#ifdef DECODE_OLD_FORMATS + if (Version() < 4) + return offset; +#endif + return GcInfoEncoding::DENORMALIZE_CODE_OFFSET(offset); + } + bool PredecodeFatHeader(int remainingFlags); static bool SetIsInterruptibleCB (UINT32 startOffset, UINT32 stopOffset, void * hCallback); @@ -661,7 +694,7 @@ class GcInfoDecoder bool IsScratchStackSlot(INT32 spOffset, GcStackSlotBase spBase, PREGDISPLAY pRD); void ReportUntrackedSlots( - GcSlotDecoder& slotDecoder, + GcSlotDecoder& slotDecoder, PREGDISPLAY pRD, unsigned flags, GCEnumCallback pCallBack, @@ -689,7 +722,7 @@ class GcInfoDecoder inline void ReportSlotToGC( - GcSlotDecoder& slotDecoder, + GcSlotDecoder& slotDecoder, UINT32 slotIndex, PREGDISPLAY pRD, bool reportScratchSlots, @@ -746,6 +779,12 @@ class GcInfoDecoder } } }; + +typedef TGcInfoDecoder GcInfoDecoder; +#ifdef FEATURE_INTERPRETER +typedef TGcInfoDecoder InterpreterGcInfoDecoder; +#endif // FEATURE_INTERPRETER + #endif // USE_GC_INFO_DECODER diff --git a/src/coreclr/inc/gcinfoencoder.h b/src/coreclr/inc/gcinfoencoder.h index 8c5daf92c23b..273d142a3435 100644 --- a/src/coreclr/inc/gcinfoencoder.h +++ b/src/coreclr/inc/gcinfoencoder.h @@ -23,7 +23,7 @@ - Flag: isVarArg, unused (was hasSecurityObject), hasGSCookie, - hasPSPSymStackSlot, + unused (was hasPSPSymStackSlot), hasGenericsInstContextStackSlot, hasStackBaseregister, wantsReportOnlyLeaf (AMD64 use only), @@ -34,9 +34,9 @@ - CodeLength - Prolog (if hasGenericsInstContextStackSlot || hasGSCookie) - Epilog (if hasGSCookie) - - SecurityObjectStackSlot (if any) + - SecurityObjectStackSlot (if any; no longer used) - GSCookieStackSlot (if any) - - PSPSymStackSlot (if any) + - PSPSymStackSlot (if any; no longer used) - GenericsInstContextStackSlot (if any) - StackBaseRegister (if any) - SizeOfEditAndContinuePreservedArea (if any) @@ -128,7 +128,6 @@ struct GcInfoSize size_t ProEpilogSize; size_t SecObjSize; size_t GsCookieSize; - size_t PspSymSize; size_t GenericsCtxSize; size_t StackBaseSize; size_t ReversePInvokeFrameSize; @@ -290,19 +289,7 @@ class BitStreamWriter *m_pCurrentSlot |= data; } - inline void AllocMemoryBlock() - { - _ASSERTE( IS_ALIGNED( m_MemoryBlockSize, sizeof( size_t ) ) ); - MemoryBlock* pMemBlock = m_MemoryBlocks.AppendNew(m_pAllocator, m_MemoryBlockSize); - - m_pCurrentSlot = pMemBlock->Contents; - m_OutOfBlockSlot = m_pCurrentSlot + m_MemoryBlockSize / sizeof( size_t ); - -#ifdef _DEBUG - m_MemoryBlocksCount++; -#endif - - } + inline void AllocMemoryBlock(); inline void InitCurrentSlot() { @@ -315,16 +302,6 @@ class BitStreamWriter typedef UINT32 GcSlotId; -inline UINT32 GetNormCodeOffsetChunk(UINT32 normCodeOffset) -{ - return normCodeOffset / NUM_NORM_CODE_OFFSETS_PER_CHUNK; -} - -inline UINT32 GetCodeOffsetChunk(UINT32 codeOffset) -{ - return (NORMALIZE_CODE_OFFSET(codeOffset)) / NUM_NORM_CODE_OFFSETS_PER_CHUNK; -} - enum GENERIC_CONTEXTPARAM_TYPE { GENERIC_CONTEXTPARAM_NONE = 0, @@ -335,18 +312,8 @@ enum GENERIC_CONTEXTPARAM_TYPE extern void DECLSPEC_NORETURN ThrowOutOfMemory(); -class GcInfoEncoder +namespace GcInfoEncoderExt { -public: - typedef void (*NoMemoryFunction)(void); - - GcInfoEncoder( - ICorJitInfo* pCorJitInfo, - CORINFO_METHOD_INFO* pMethodInfo, - IAllocator* pJitAllocator, - NoMemoryFunction pNoMem = ::ThrowOutOfMemory - ); - struct LifetimeTransition { UINT32 CodeOffset; @@ -354,7 +321,20 @@ class GcInfoEncoder BYTE BecomesLive; BYTE IsDeleted; }; +} + +template +class TGcInfoEncoder +{ +public: + typedef void (*NoMemoryFunction)(void); + TGcInfoEncoder( + ICorJitInfo* pCorJitInfo, + CORINFO_METHOD_INFO* pMethodInfo, + IAllocator* pJitAllocator, + NoMemoryFunction pNoMem = ::ThrowOutOfMemory + ); #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED void DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UINT32 numCallSites); @@ -415,7 +395,6 @@ class GcInfoEncoder void SetPrologSize( UINT32 prologSize ); void SetGSCookieStackSlot( INT32 spOffsetGSCookie, UINT32 validRangeStart, UINT32 validRangeEnd ); - void SetPSPSymStackSlot( INT32 spOffsetPSPSym ); void SetGenericsInstContextStackSlot( INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type); void SetReversePInvokeFrameSlot(INT32 spOffset); void SetIsVarArg(); @@ -488,7 +467,7 @@ class GcInfoEncoder BitStreamWriter m_Info2; // Used for chunk encodings GcInfoArrayList m_InterruptibleRanges; - GcInfoArrayList m_LifetimeTransitions; + GcInfoArrayList m_LifetimeTransitions; bool m_IsVarArg; #if defined(TARGET_AMD64) @@ -499,7 +478,6 @@ class GcInfoEncoder INT32 m_GSCookieStackSlot; UINT32 m_GSCookieValidRangeStart; UINT32 m_GSCookieValidRangeEnd; - INT32 m_PSPSymStackSlot; INT32 m_GenericsInstContextStackSlot; GENERIC_CONTEXTPARAM_TYPE m_contextParamType; UINT32 m_CodeLength; @@ -542,19 +520,20 @@ class GcInfoEncoder void SizeofSlotStateVarLengthVector(const BitArray& vector, UINT32 baseSkip, UINT32 baseRun, UINT32 * pSizeofSimple, UINT32 * pSizeofRLE, UINT32 * pSizeofRLENeg); UINT32 WriteSlotStateVarLengthVector(BitStreamWriter &writer, const BitArray& vector, UINT32 baseSkip, UINT32 baseRun); -#ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - bool DoNotTrackInPartiallyInterruptible(GcSlotDesc &slot); -#endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - // Assumes that "*ppTransitions" is has size "numTransitions", is sorted by CodeOffset then by SlotId, // and that "*ppEndTransitions" points one beyond the end of the array. If "*ppTransitions" contains // any dead/live transitions pairs for the same CodeOffset and SlotID, removes those, by allocating a // new array, and copying the non-removed elements into it. If it does this, sets "*ppTransitions" to // point to the new array, "*pNumTransitions" to its shorted length, and "*ppEndTransitions" to // point one beyond the used portion of this array. - void EliminateRedundantLiveDeadPairs(LifetimeTransition** ppTransitions, + void EliminateRedundantLiveDeadPairs(GcInfoEncoderExt::LifetimeTransition** ppTransitions, size_t* pNumTransitions, - LifetimeTransition** ppEndTransitions); + GcInfoEncoderExt::LifetimeTransition** ppEndTransitions); + + static inline UINT32 GetNormCodeOffsetChunk(UINT32 normCodeOffset) + { + return normCodeOffset / GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK; + } #ifdef _DEBUG bool m_IsSlotTableFrozen; @@ -565,4 +544,10 @@ class GcInfoEncoder #endif }; +typedef TGcInfoEncoder GcInfoEncoder; + +#ifdef FEATURE_INTERPRETER +typedef TGcInfoEncoder InterpreterGcInfoEncoder; +#endif // FEATURE_INTERPRETER + #endif // !__GCINFOENCODER_H__ diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index e1f7b517897a..e1168757c7dd 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -5,6 +5,11 @@ #ifndef __GCINFOTYPES_H__ #define __GCINFOTYPES_H__ +// HACK: debugreturn.h breaks constexpr +#if defined(debug_instrumented_return) || defined(_DEBUGRETURN_H_) +#undef return +#endif // debug_instrumented_return + #ifndef FEATURE_NATIVEAOT #include "gcinfo.h" #endif @@ -14,7 +19,7 @@ #endif // _MSC_VER // ***************************************************************************** -// WARNING!!!: These values and code are used in the runtime repo and SOS in the +// WARNING!!!: These values and code are used in the runtime repo and SOS in the // diagnostics repo. Should updated in a backwards and forwards compatible way. // See: https://github.com/dotnet/diagnostics/blob/main/src/shared/inc/gcinfotypes.h // https://github.com/dotnet/runtime/blob/main/src/coreclr/inc/gcinfotypes.h @@ -108,6 +113,8 @@ struct GcStackSlot } }; +// ReturnKind is not encoded in GCInfo v4 and later, except on x86. + //-------------------------------------------------------------------------------- // ReturnKind -- encoding return type information in GcInfo // @@ -132,61 +139,6 @@ struct GcStackSlot // //-------------------------------------------------------------------------------- -// RT_Unset: An intermediate step for staged bringup. -// When ReturnKind is RT_Unset, it means that the JIT did not set -// the ReturnKind in the GCInfo, and therefore the VM cannot rely on it, -// and must use other mechanisms (similar to GcInfo ver 1) to determine -// the Return type's GC information. -// -// RT_Unset is only used in the following situations: -// X64: Used by JIT64 until updated to use GcInfo v2 API -// ARM: Used by JIT32 until updated to use GcInfo v2 API -// -// RT_Unset should have a valid encoding, whose bits are actually stored in the image. -// For X86, there are no free bits, and there's no RT_Unused enumeration. - -#if defined(TARGET_X86) - -// 00 RT_Scalar -// 01 RT_Object -// 10 RT_ByRef -// 11 RT_Float - -#elif defined(TARGET_ARM) - -// 00 RT_Scalar -// 01 RT_Object -// 10 RT_ByRef -// 11 RT_Unset - -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - -// Slim Header: - -// 00 RT_Scalar -// 01 RT_Object -// 10 RT_ByRef -// 11 RT_Unset - -// Fat Header: - -// 0000 RT_Scalar -// 0001 RT_Object -// 0010 RT_ByRef -// 0011 RT_Unset -// 0100 RT_Scalar_Obj -// 1000 RT_Scalar_ByRef -// 0101 RT_Obj_Obj -// 1001 RT_Obj_ByRef -// 0110 RT_ByRef_Obj -// 1010 RT_ByRef_ByRef - -#else -#ifdef PORTABILITY_WARNING -PORTABILITY_WARNING("Need ReturnKind for new Platform") -#endif // PORTABILITY_WARNING -#endif // Target checks - enum ReturnKind { // Cases for Return in one register @@ -259,15 +211,6 @@ inline bool IsPointerFieldReturnKind(ReturnKind returnKind) return (returnKind == RT_Object || returnKind == RT_ByRef); } -inline bool IsValidReturnRegister(size_t regNo) -{ - return (regNo == 0) -#ifdef FEATURE_MULTIREG_RETURN - || (regNo == 1) -#endif // FEATURE_MULTIREG_RETURN - ; -} - inline bool IsStructReturnKind(ReturnKind returnKind) { // Two bits encode integer/ref/float return-kinds. @@ -308,7 +251,6 @@ inline ReturnKind GetStructReturnKind(ReturnKind reg0, ReturnKind reg1) inline ReturnKind ExtractRegReturnKind(ReturnKind returnKind, size_t returnRegOrdinal, bool& moreRegs) { _ASSERTE(IsValidReturnKind(returnKind)); - _ASSERTE(IsValidReturnRegister(returnRegOrdinal)); // Return kind of each return register is encoded in two bits at returnRegOrdinal*2 position from LSB ReturnKind regReturnKind = (ReturnKind)((returnKind >> (returnRegOrdinal * 2)) & 3); @@ -368,7 +310,8 @@ enum infoHdrAdjustConstants { SET_EPILOGSIZE_MAX = 10, // Change to 6 SET_EPILOGCNT_MAX = 4, SET_UNTRACKED_MAX = 3, - SET_RET_KIND_MAX = 4, // 2 bits for ReturnKind + SET_RET_KIND_MAX = 3, // 2 bits for ReturnKind + SET_NOGCREGIONS_MAX = 4, ADJ_ENCODING_MAX = 0x7f, // Maximum valid encoding in a byte // Also used to mask off next bit from each encoding byte. MORE_BYTES_TO_FOLLOW = 0x80 // If the High-bit of a header or adjustment byte @@ -420,10 +363,13 @@ enum infoHdrAdjust { // Second set of opcodes, when first code is 0x4F enum infoHdrAdjust2 { SET_RETURNKIND = 0, // 0x00-SET_RET_KIND_MAX Set ReturnKind to value + SET_NOGCREGIONS_CNT = SET_RETURNKIND + SET_RET_KIND_MAX + 1, // 0x04 + FFFF_NOGCREGION_CNT = SET_NOGCREGIONS_CNT + SET_NOGCREGIONS_MAX + 1 // 0x09 There is a count (>SET_NOGCREGIONS_MAX) after the header encoding }; #define HAS_UNTRACKED ((unsigned int) -1) #define HAS_VARPTR ((unsigned int) -1) +#define HAS_NOGCREGIONS ((unsigned int) -1) // 0 is a valid offset for the Reverse P/Invoke block // So use -1 as the sentinel for invalid and -2 as the sentinel for present. @@ -472,7 +418,7 @@ struct InfoHdrSmall { unsigned short argCount; // 5,6 in bytes unsigned int frameSize; // 7,8,9,10 in bytes unsigned int untrackedCnt; // 11,12,13,14 - unsigned int varPtrTableSize; // 15.16,17,18 + unsigned int varPtrTableSize; // 15,16,17,18 // Checks whether "this" is compatible with "target". // It is not an exact bit match as "this" could have some @@ -490,7 +436,8 @@ struct InfoHdr : public InfoHdrSmall { unsigned int syncStartOffset; // 23,24,25,26 unsigned int syncEndOffset; // 27,28,29,30 unsigned int revPInvokeOffset; // 31,32,33,34 Available GcInfo v2 onwards, previously undefined - // 35 bytes total + unsigned int noGCRegionCnt; // 35,36,37,38 + // 39 bytes total // Checks whether "this" is compatible with "target". // It is not an exact bit match as "this" could have some @@ -505,7 +452,8 @@ struct InfoHdr : public InfoHdrSmall { target.varPtrTableSize != HAS_VARPTR && target.gsCookieOffset != HAS_GS_COOKIE_OFFSET && target.syncStartOffset != HAS_SYNC_OFFSET && - target.revPInvokeOffset != HAS_REV_PINVOKE_FRAME_OFFSET); + target.revPInvokeOffset != HAS_REV_PINVOKE_FRAME_OFFSET && + target.noGCRegionCnt != HAS_NOGCREGIONS); #endif // compare two InfoHdr's up to but not including the untrackCnt field @@ -536,6 +484,13 @@ struct InfoHdr : public InfoHdrSmall { (target.revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET)) return false; + if (noGCRegionCnt != target.noGCRegionCnt) { + if (target.noGCRegionCnt <= SET_NOGCREGIONS_MAX) + return false; + else if (noGCRegionCnt != HAS_UNTRACKED) + return false; + } + return true; } }; @@ -566,6 +521,7 @@ inline void GetInfoHdr(int index, InfoHdr * header) header->syncStartOffset = INVALID_SYNC_OFFSET; header->syncEndOffset = INVALID_SYNC_OFFSET; header->revPInvokeOffset = INVALID_REV_PINVOKE_OFFSET; + header->noGCRegionCnt = 0; } PTR_CBYTE FASTCALL decodeHeader(PTR_CBYTE table, UINT32 version, InfoHdr* header); @@ -612,274 +568,293 @@ void FASTCALL decodeCallPattern(int pattern, #ifndef TARGET_POINTER_SIZE #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target #endif -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64) -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6) -#define NORMALIZE_STACK_SLOT(x) ((x)>>3) -#define DENORMALIZE_STACK_SLOT(x) ((x)<<3) -#define NORMALIZE_CODE_LENGTH(x) (x) -#define DENORMALIZE_CODE_LENGTH(x) (x) -// Encode RBP as 0 -#define NORMALIZE_STACK_BASE_REGISTER(x) ((x) ^ 5) -#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x) ^ 5) -#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3) -#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3) -#define CODE_OFFSETS_NEED_NORMALIZATION 0 -#define NORMALIZE_CODE_OFFSET(x) (x) -#define DENORMALIZE_CODE_OFFSET(x) (x) -#define NORMALIZE_REGISTER(x) (x) -#define DENORMALIZE_REGISTER(x) (x) -#define NORMALIZE_NUM_SAFE_POINTS(x) (x) -#define DENORMALIZE_NUM_SAFE_POINTS(x) (x) -#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) -#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) - -#define PSP_SYM_STACK_SLOT_ENCBASE 6 -#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6 -#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6 -#define GS_COOKIE_STACK_SLOT_ENCBASE 6 -#define CODE_LENGTH_ENCBASE 8 -#define STACK_BASE_REGISTER_ENCBASE 3 -#define SIZE_OF_STACK_AREA_ENCBASE 3 -#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4 -#define REVERSE_PINVOKE_FRAME_ENCBASE 6 -#define NUM_REGISTERS_ENCBASE 2 -#define NUM_STACK_SLOTS_ENCBASE 2 -#define NUM_UNTRACKED_SLOTS_ENCBASE 1 -#define NORM_PROLOG_SIZE_ENCBASE 5 -#define NORM_EPILOG_SIZE_ENCBASE 3 -#define NORM_CODE_OFFSET_DELTA_ENCBASE 3 -#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6 -#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6 -#define REGISTER_ENCBASE 3 -#define REGISTER_DELTA_ENCBASE 2 -#define STACK_SLOT_ENCBASE 6 -#define STACK_SLOT_DELTA_ENCBASE 4 -#define NUM_SAFE_POINTS_ENCBASE 2 -#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1 -#define NUM_EH_CLAUSES_ENCBASE 2 -#define POINTER_SIZE_ENCBASE 3 -#define LIVESTATE_RLE_RUN_ENCBASE 2 -#define LIVESTATE_RLE_SKIP_ENCBASE 4 + +#define TargetGcInfoEncoding AMD64GcInfoEncoding + +struct AMD64GcInfoEncoding { + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64); + + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6); + static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>3); } + static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<3); } + static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return (x); } + + // Encode RBP as 0 + static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) ^ 5); } + static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) ^ 5); } + static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>3); } + static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<3); } + static const bool CODE_OFFSETS_NEED_NORMALIZATION = false; + static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return (x); } + + static const int PSP_SYM_STACK_SLOT_ENCBASE = 6; + static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6; + static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6; + static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6; + static const int CODE_LENGTH_ENCBASE = 8; + static const int SIZE_OF_RETURN_KIND_IN_SLIM_HEADER = 2; + static const int SIZE_OF_RETURN_KIND_IN_FAT_HEADER = 4; + static const int STACK_BASE_REGISTER_ENCBASE = 3; + static const int SIZE_OF_STACK_AREA_ENCBASE = 3; + static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4; + static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6; + static const int NUM_REGISTERS_ENCBASE = 2; + static const int NUM_STACK_SLOTS_ENCBASE = 2; + static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1; + static const int NORM_PROLOG_SIZE_ENCBASE = 5; + static const int NORM_EPILOG_SIZE_ENCBASE = 3; + static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; + static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6; + static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6; + static const int REGISTER_ENCBASE = 3; + static const int REGISTER_DELTA_ENCBASE = 2; + static const int STACK_SLOT_ENCBASE = 6; + static const int STACK_SLOT_DELTA_ENCBASE = 4; + static const int NUM_SAFE_POINTS_ENCBASE = 2; + static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1; + static const int NUM_EH_CLAUSES_ENCBASE = 2; + static const int POINTER_SIZE_ENCBASE = 3; + static const int LIVESTATE_RLE_RUN_ENCBASE = 2; + static const int LIVESTATE_RLE_SKIP_ENCBASE = 4; +}; #elif defined(TARGET_ARM) #ifndef TARGET_POINTER_SIZE #define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this target #endif -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64) -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6) -#define NORMALIZE_STACK_SLOT(x) ((x)>>2) -#define DENORMALIZE_STACK_SLOT(x) ((x)<<2) -#define NORMALIZE_CODE_LENGTH(x) ((x)>>1) -#define DENORMALIZE_CODE_LENGTH(x) ((x)<<1) -// Encode R11 as zero -#define NORMALIZE_STACK_BASE_REGISTER(x) ((((x) - 4) & 7) ^ 7) -#define DENORMALIZE_STACK_BASE_REGISTER(x) (((x) ^ 7) + 4) -#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>2) -#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<2) -#define CODE_OFFSETS_NEED_NORMALIZATION 1 -#define NORMALIZE_CODE_OFFSET(x) ((x)>>1) // Instructions are 2/4 bytes long in Thumb/ARM states, -#define DENORMALIZE_CODE_OFFSET(x) ((x)<<1) -#define NORMALIZE_REGISTER(x) (x) -#define DENORMALIZE_REGISTER(x) (x) -#define NORMALIZE_NUM_SAFE_POINTS(x) (x) -#define DENORMALIZE_NUM_SAFE_POINTS(x) (x) -#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) -#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) - -// The choices of these encoding bases only affects space overhead -// and performance, not semantics/correctness. -#define PSP_SYM_STACK_SLOT_ENCBASE 5 -#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 5 -#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 5 -#define GS_COOKIE_STACK_SLOT_ENCBASE 5 -#define CODE_LENGTH_ENCBASE 7 -#define STACK_BASE_REGISTER_ENCBASE 1 -#define SIZE_OF_STACK_AREA_ENCBASE 3 -#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 3 -#define REVERSE_PINVOKE_FRAME_ENCBASE 5 -#define NUM_REGISTERS_ENCBASE 2 -#define NUM_STACK_SLOTS_ENCBASE 3 -#define NUM_UNTRACKED_SLOTS_ENCBASE 3 -#define NORM_PROLOG_SIZE_ENCBASE 5 -#define NORM_EPILOG_SIZE_ENCBASE 3 -#define NORM_CODE_OFFSET_DELTA_ENCBASE 3 -#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 4 -#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6 -#define REGISTER_ENCBASE 2 -#define REGISTER_DELTA_ENCBASE 1 -#define STACK_SLOT_ENCBASE 6 -#define STACK_SLOT_DELTA_ENCBASE 4 -#define NUM_SAFE_POINTS_ENCBASE 3 -#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 2 -#define NUM_EH_CLAUSES_ENCBASE 3 -#define POINTER_SIZE_ENCBASE 3 -#define LIVESTATE_RLE_RUN_ENCBASE 2 -#define LIVESTATE_RLE_SKIP_ENCBASE 4 + +#define TargetGcInfoEncoding ARM32GcInfoEncoding + +struct ARM32GcInfoEncoding { + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64); + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6); + static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>2); } + static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<2); } + static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)>>1); } + static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)<<1); } + // Encode R11 as zero + static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((((x) - 4) & 7) ^ 7); } + static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return (((x) ^ 7) + 4); } + static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>2); } + static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<2); } + static const bool CODE_OFFSETS_NEED_NORMALIZATION = true; + static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)>>1) /* Instructions are 2/4 bytes long in Thumb/ARM states */; } + static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)<<1); } + + // The choices of these encoding bases only affects space overhead + // and performance, not semantics/correctness. + static const int PSP_SYM_STACK_SLOT_ENCBASE = 5; + static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 5; + static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 5; + static const int GS_COOKIE_STACK_SLOT_ENCBASE = 5; + static const int CODE_LENGTH_ENCBASE = 7; + static const int SIZE_OF_RETURN_KIND_IN_SLIM_HEADER = 2; + static const int SIZE_OF_RETURN_KIND_IN_FAT_HEADER = 2; + static const int STACK_BASE_REGISTER_ENCBASE = 1; + static const int SIZE_OF_STACK_AREA_ENCBASE = 3; + static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 3; + static const int REVERSE_PINVOKE_FRAME_ENCBASE = 5; + static const int NUM_REGISTERS_ENCBASE = 2; + static const int NUM_STACK_SLOTS_ENCBASE = 3; + static const int NUM_UNTRACKED_SLOTS_ENCBASE = 3; + static const int NORM_PROLOG_SIZE_ENCBASE = 5; + static const int NORM_EPILOG_SIZE_ENCBASE = 3; + static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; + static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 4; + static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6; + static const int REGISTER_ENCBASE = 2; + static const int REGISTER_DELTA_ENCBASE = 1; + static const int STACK_SLOT_ENCBASE = 6; + static const int STACK_SLOT_DELTA_ENCBASE = 4; + static const int NUM_SAFE_POINTS_ENCBASE = 3; + static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 2; + static const int NUM_EH_CLAUSES_ENCBASE = 3; + static const int POINTER_SIZE_ENCBASE = 3; + static const int LIVESTATE_RLE_RUN_ENCBASE = 2; + static const int LIVESTATE_RLE_SKIP_ENCBASE = 4; +}; #elif defined(TARGET_ARM64) #ifndef TARGET_POINTER_SIZE #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target #endif -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64) -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6) -#define NORMALIZE_STACK_SLOT(x) ((x)>>3) // GC Pointers are 8-bytes aligned -#define DENORMALIZE_STACK_SLOT(x) ((x)<<3) -#define NORMALIZE_CODE_LENGTH(x) ((x)>>2) // All Instructions are 4 bytes long -#define DENORMALIZE_CODE_LENGTH(x) ((x)<<2) -#define NORMALIZE_STACK_BASE_REGISTER(x) ((x)^29) // Encode Frame pointer X29 as zero -#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x)^29) -#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3) -#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3) -#define CODE_OFFSETS_NEED_NORMALIZATION 1 -#define NORMALIZE_CODE_OFFSET(x) ((x)>>2) // Instructions are 4 bytes long -#define DENORMALIZE_CODE_OFFSET(x) ((x)<<2) -#define NORMALIZE_REGISTER(x) (x) -#define DENORMALIZE_REGISTER(x) (x) -#define NORMALIZE_NUM_SAFE_POINTS(x) (x) -#define DENORMALIZE_NUM_SAFE_POINTS(x) (x) -#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) -#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) - -#define PSP_SYM_STACK_SLOT_ENCBASE 6 -#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6 -#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6 -#define GS_COOKIE_STACK_SLOT_ENCBASE 6 -#define CODE_LENGTH_ENCBASE 8 -#define STACK_BASE_REGISTER_ENCBASE 2 // FP encoded as 0, SP as 2. -#define SIZE_OF_STACK_AREA_ENCBASE 3 -#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4 -#define SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE 4 -#define REVERSE_PINVOKE_FRAME_ENCBASE 6 -#define NUM_REGISTERS_ENCBASE 3 -#define NUM_STACK_SLOTS_ENCBASE 2 -#define NUM_UNTRACKED_SLOTS_ENCBASE 1 -#define NORM_PROLOG_SIZE_ENCBASE 5 -#define NORM_EPILOG_SIZE_ENCBASE 3 -#define NORM_CODE_OFFSET_DELTA_ENCBASE 3 -#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6 -#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6 -#define REGISTER_ENCBASE 3 -#define REGISTER_DELTA_ENCBASE 2 -#define STACK_SLOT_ENCBASE 6 -#define STACK_SLOT_DELTA_ENCBASE 4 -#define NUM_SAFE_POINTS_ENCBASE 3 -#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1 -#define NUM_EH_CLAUSES_ENCBASE 2 -#define POINTER_SIZE_ENCBASE 3 -#define LIVESTATE_RLE_RUN_ENCBASE 2 -#define LIVESTATE_RLE_SKIP_ENCBASE 4 + +#define TargetGcInfoEncoding ARM64GcInfoEncoding + +struct ARM64GcInfoEncoding { + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64); + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6); + // GC Pointers are 8-bytes aligned + static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>3); } + static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<3); } + // All Instructions are 4 bytes long + static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)>>2); } + static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)<<2); } + // Encode Frame pointer X29 as zero + static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x)^29); } + static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x)^29); } + static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>3); } + static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<3); } + static const bool CODE_OFFSETS_NEED_NORMALIZATION = true; + // Instructions are 4 bytes long + static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)>>2); } + static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)<<2); } + + static const int PSP_SYM_STACK_SLOT_ENCBASE = 6; + static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6; + static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6; + static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6; + static const int CODE_LENGTH_ENCBASE = 8; + static const int SIZE_OF_RETURN_KIND_IN_SLIM_HEADER = 2; + static const int SIZE_OF_RETURN_KIND_IN_FAT_HEADER = 4; + // FP encoded as 0, SP as 2. + static const int STACK_BASE_REGISTER_ENCBASE = 2; + static const int SIZE_OF_STACK_AREA_ENCBASE = 3; + static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4; + static const int SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE = 4; + static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6; + static const int NUM_REGISTERS_ENCBASE = 3; + static const int NUM_STACK_SLOTS_ENCBASE = 2; + static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1; + static const int NORM_PROLOG_SIZE_ENCBASE = 5; + static const int NORM_EPILOG_SIZE_ENCBASE = 3; + static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; + static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6; + static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6; + static const int REGISTER_ENCBASE = 3; + static const int REGISTER_DELTA_ENCBASE = 2; + static const int STACK_SLOT_ENCBASE = 6; + static const int STACK_SLOT_DELTA_ENCBASE = 4; + static const int NUM_SAFE_POINTS_ENCBASE = 3; + static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1; + static const int NUM_EH_CLAUSES_ENCBASE = 2; + static const int POINTER_SIZE_ENCBASE = 3; + static const int LIVESTATE_RLE_RUN_ENCBASE = 2; + static const int LIVESTATE_RLE_SKIP_ENCBASE = 4; +}; #elif defined(TARGET_LOONGARCH64) #ifndef TARGET_POINTER_SIZE #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target #endif -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64) -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6) -#define NORMALIZE_STACK_SLOT(x) ((x)>>3) // GC Pointers are 8-bytes aligned -#define DENORMALIZE_STACK_SLOT(x) ((x)<<3) -#define NORMALIZE_CODE_LENGTH(x) ((x)>>2) // All Instructions are 4 bytes long -#define DENORMALIZE_CODE_LENGTH(x) ((x)<<2) -#define NORMALIZE_STACK_BASE_REGISTER(x) ((x) == 22 ? 0u : 1u) // Encode Frame pointer fp=$22 as zero -#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x) == 0 ? 22u : 3u) -#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3) -#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3) -#define CODE_OFFSETS_NEED_NORMALIZATION 1 -#define NORMALIZE_CODE_OFFSET(x) ((x)>>2) // Instructions are 4 bytes long -#define DENORMALIZE_CODE_OFFSET(x) ((x)<<2) -#define NORMALIZE_REGISTER(x) (x) -#define DENORMALIZE_REGISTER(x) (x) -#define NORMALIZE_NUM_SAFE_POINTS(x) (x) -#define DENORMALIZE_NUM_SAFE_POINTS(x) (x) -#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) -#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) - -#define PSP_SYM_STACK_SLOT_ENCBASE 6 -#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6 -#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6 -#define GS_COOKIE_STACK_SLOT_ENCBASE 6 -#define CODE_LENGTH_ENCBASE 8 -// FP/SP encoded as 0 or 1. -#define STACK_BASE_REGISTER_ENCBASE 2 -#define SIZE_OF_STACK_AREA_ENCBASE 3 -#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4 -#define REVERSE_PINVOKE_FRAME_ENCBASE 6 -#define NUM_REGISTERS_ENCBASE 3 -#define NUM_STACK_SLOTS_ENCBASE 2 -#define NUM_UNTRACKED_SLOTS_ENCBASE 1 -#define NORM_PROLOG_SIZE_ENCBASE 5 -#define NORM_EPILOG_SIZE_ENCBASE 3 -#define NORM_CODE_OFFSET_DELTA_ENCBASE 3 -#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6 -#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6 -#define REGISTER_ENCBASE 3 -#define REGISTER_DELTA_ENCBASE 2 -#define STACK_SLOT_ENCBASE 6 -#define STACK_SLOT_DELTA_ENCBASE 4 -#define NUM_SAFE_POINTS_ENCBASE 3 -#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1 -#define NUM_EH_CLAUSES_ENCBASE 2 -#define POINTER_SIZE_ENCBASE 3 -#define LIVESTATE_RLE_RUN_ENCBASE 2 -#define LIVESTATE_RLE_SKIP_ENCBASE 4 + +#define TargetGcInfoEncoding LoongArch64GcInfoEncoding + +struct LoongArch64GcInfoEncoding { + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64); + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6); + // GC Pointers are 8-bytes aligned + static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>3); } + static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<3); } + // All Instructions are 4 bytes long + static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)>>2); } + static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)<<2); } + // Encode Frame pointer fp=$22 as zero + static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) == 22 ? 0u : 1u); } + static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) == 0 ? 22u : 3u); } + static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>3); } + static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<3); } + static const bool CODE_OFFSETS_NEED_NORMALIZATION = true; + // Instructions are 4 bytes long + static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)>>2); } + static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)<<2); } + + static const int PSP_SYM_STACK_SLOT_ENCBASE = 6; + static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6; + static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6; + static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6; + static const int CODE_LENGTH_ENCBASE = 8; + static const int SIZE_OF_RETURN_KIND_IN_SLIM_HEADER = 2; + static const int SIZE_OF_RETURN_KIND_IN_FAT_HEADER = 4; + // FP/SP encoded as 0 or 1. + static const int STACK_BASE_REGISTER_ENCBASE = 2; + static const int SIZE_OF_STACK_AREA_ENCBASE = 3; + static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4; + static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6; + static const int NUM_REGISTERS_ENCBASE = 3; + static const int NUM_STACK_SLOTS_ENCBASE = 2; + static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1; + static const int NORM_PROLOG_SIZE_ENCBASE = 5; + static const int NORM_EPILOG_SIZE_ENCBASE = 3; + static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; + static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6; + static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6; + static const int REGISTER_ENCBASE = 3; + static const int REGISTER_DELTA_ENCBASE = 2; + static const int STACK_SLOT_ENCBASE = 6; + static const int STACK_SLOT_DELTA_ENCBASE = 4; + static const int NUM_SAFE_POINTS_ENCBASE = 3; + static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1; + static const int NUM_EH_CLAUSES_ENCBASE = 2; + static const int POINTER_SIZE_ENCBASE = 3; + static const int LIVESTATE_RLE_RUN_ENCBASE = 2; + static const int LIVESTATE_RLE_SKIP_ENCBASE = 4; +}; #elif defined(TARGET_RISCV64) #ifndef TARGET_POINTER_SIZE #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target #endif -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64) -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6) -#define NORMALIZE_STACK_SLOT(x) ((x)>>3) // GC Pointers are 8-bytes aligned -#define DENORMALIZE_STACK_SLOT(x) ((x)<<3) -#define NORMALIZE_CODE_LENGTH(x) ((x)>>2) // All Instructions are 4 bytes long -#define DENORMALIZE_CODE_LENGTH(x) ((x)<<2) -#define NORMALIZE_STACK_BASE_REGISTER(x) ((x) == 8 ? 0u : 1u) // Encode Frame pointer X8 as zero, sp/x2 as 1 -#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x) == 0 ? 8u : 2u) -#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3) -#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3) -#define CODE_OFFSETS_NEED_NORMALIZATION 1 -#define NORMALIZE_CODE_OFFSET(x) ((x)>>2) // Instructions are 4 bytes long -#define DENORMALIZE_CODE_OFFSET(x) ((x)<<2) -#define NORMALIZE_REGISTER(x) (x) -#define DENORMALIZE_REGISTER(x) (x) -#define NORMALIZE_NUM_SAFE_POINTS(x) (x) -#define DENORMALIZE_NUM_SAFE_POINTS(x) (x) -#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) -#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) - -#define PSP_SYM_STACK_SLOT_ENCBASE 6 -#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6 -#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6 -#define GS_COOKIE_STACK_SLOT_ENCBASE 6 -#define CODE_LENGTH_ENCBASE 8 -#define STACK_BASE_REGISTER_ENCBASE 2 -// FP encoded as 0, SP as 1 -#define SIZE_OF_STACK_AREA_ENCBASE 3 -#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4 -#define SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE 4 -#define REVERSE_PINVOKE_FRAME_ENCBASE 6 -#define NUM_REGISTERS_ENCBASE 3 -#define NUM_STACK_SLOTS_ENCBASE 2 -#define NUM_UNTRACKED_SLOTS_ENCBASE 1 -#define NORM_PROLOG_SIZE_ENCBASE 5 -#define NORM_EPILOG_SIZE_ENCBASE 3 -#define NORM_CODE_OFFSET_DELTA_ENCBASE 3 -#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6 -#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6 -#define REGISTER_ENCBASE 3 -#define REGISTER_DELTA_ENCBASE 2 -#define STACK_SLOT_ENCBASE 6 -#define STACK_SLOT_DELTA_ENCBASE 4 -#define NUM_SAFE_POINTS_ENCBASE 3 -#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1 -#define NUM_EH_CLAUSES_ENCBASE 2 -#define POINTER_SIZE_ENCBASE 3 -#define LIVESTATE_RLE_RUN_ENCBASE 2 -#define LIVESTATE_RLE_SKIP_ENCBASE 4 +#define TargetGcInfoEncoding RISCV64GcInfoEncoding + +struct RISCV64GcInfoEncoding { + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64); + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6); + // GC Pointers are 8-bytes aligned + static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>3); } + static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<3); } + // All Instructions are 4 bytes long + static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)>>2); } + static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)<<2); } + // Encode Frame pointer X8 as zero, sp/x2 as 1 + static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) == 8 ? 0u : 1u); } + static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) == 0 ? 8u : 2u); } + static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>3); } + static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<3); } + static const bool CODE_OFFSETS_NEED_NORMALIZATION = true; + // Instructions are 4 bytes long + static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)>>2); } + static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)<<2); } + + static const int PSP_SYM_STACK_SLOT_ENCBASE = 6; + static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6; + static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6; + static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6; + static const int CODE_LENGTH_ENCBASE = 8; + static const int SIZE_OF_RETURN_KIND_IN_SLIM_HEADER = 2; + static const int SIZE_OF_RETURN_KIND_IN_FAT_HEADER = 4; + static const int STACK_BASE_REGISTER_ENCBASE = 2; + // FP encoded as 0, SP as 1 + static const int SIZE_OF_STACK_AREA_ENCBASE = 3; + static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4; + static const int SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE = 4; + static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6; + static const int NUM_REGISTERS_ENCBASE = 3; + static const int NUM_STACK_SLOTS_ENCBASE = 2; + static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1; + static const int NORM_PROLOG_SIZE_ENCBASE = 5; + static const int NORM_EPILOG_SIZE_ENCBASE = 3; + static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; + static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6; + static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6; + static const int REGISTER_ENCBASE = 3; + static const int REGISTER_DELTA_ENCBASE = 2; + static const int STACK_SLOT_ENCBASE = 6; + static const int STACK_SLOT_DELTA_ENCBASE = 4; + static const int NUM_SAFE_POINTS_ENCBASE = 3; + static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1; + static const int NUM_EH_CLAUSES_ENCBASE = 2; + static const int POINTER_SIZE_ENCBASE = 3; + static const int LIVESTATE_RLE_RUN_ENCBASE = 2; + static const int LIVESTATE_RLE_SKIP_ENCBASE = 4; +}; -#else +#else // defined(TARGET_xxx) #ifndef TARGET_X86 #ifdef PORTABILITY_WARNING @@ -890,55 +865,115 @@ PORTABILITY_WARNING("Please specialize these definitions for your platform!") #ifndef TARGET_POINTER_SIZE #define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this target #endif -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64) -#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6) -#define NORMALIZE_STACK_SLOT(x) (x) -#define DENORMALIZE_STACK_SLOT(x) (x) -#define NORMALIZE_CODE_LENGTH(x) (x) -#define DENORMALIZE_CODE_LENGTH(x) (x) -#define NORMALIZE_STACK_BASE_REGISTER(x) (x) -#define DENORMALIZE_STACK_BASE_REGISTER(x) (x) -#define NORMALIZE_SIZE_OF_STACK_AREA(x) (x) -#define DENORMALIZE_SIZE_OF_STACK_AREA(x) (x) -#define CODE_OFFSETS_NEED_NORMALIZATION 0 -#define NORMALIZE_CODE_OFFSET(x) (x) -#define DENORMALIZE_CODE_OFFSET(x) (x) -#define NORMALIZE_REGISTER(x) (x) -#define DENORMALIZE_REGISTER(x) (x) -#define NORMALIZE_NUM_SAFE_POINTS(x) (x) -#define DENORMALIZE_NUM_SAFE_POINTS(x) (x) -#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) -#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) - -#define PSP_SYM_STACK_SLOT_ENCBASE 6 -#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6 -#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6 -#define GS_COOKIE_STACK_SLOT_ENCBASE 6 -#define CODE_LENGTH_ENCBASE 6 -#define STACK_BASE_REGISTER_ENCBASE 3 -#define SIZE_OF_STACK_AREA_ENCBASE 6 -#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 3 -#define REVERSE_PINVOKE_FRAME_ENCBASE 6 -#define NUM_REGISTERS_ENCBASE 3 -#define NUM_STACK_SLOTS_ENCBASE 5 -#define NUM_UNTRACKED_SLOTS_ENCBASE 5 -#define NORM_PROLOG_SIZE_ENCBASE 4 -#define NORM_EPILOG_SIZE_ENCBASE 3 -#define NORM_CODE_OFFSET_DELTA_ENCBASE 3 -#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 5 -#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 5 -#define REGISTER_ENCBASE 3 -#define REGISTER_DELTA_ENCBASE REGISTER_ENCBASE -#define STACK_SLOT_ENCBASE 6 -#define STACK_SLOT_DELTA_ENCBASE 4 -#define NUM_SAFE_POINTS_ENCBASE 4 -#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1 -#define NUM_EH_CLAUSES_ENCBASE 2 -#define POINTER_SIZE_ENCBASE 3 -#define LIVESTATE_RLE_RUN_ENCBASE 2 -#define LIVESTATE_RLE_SKIP_ENCBASE 4 -#endif +#define TargetGcInfoEncoding X86GcInfoEncoding + +struct X86GcInfoEncoding { + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64); + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6); + static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return (x); } + static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return (x); } + static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return (x); } + static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return (x); } + static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return (x); } + static const bool CODE_OFFSETS_NEED_NORMALIZATION = false; + static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return (x); } + + static const int PSP_SYM_STACK_SLOT_ENCBASE = 6; + static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6; + static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6; + static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6; + static const int CODE_LENGTH_ENCBASE = 6; + static const int SIZE_OF_RETURN_KIND_IN_SLIM_HEADER = 2; + static const int SIZE_OF_RETURN_KIND_IN_FAT_HEADER = 2; + static const int STACK_BASE_REGISTER_ENCBASE = 3; + static const int SIZE_OF_STACK_AREA_ENCBASE = 6; + static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 3; + static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6; + static const int NUM_REGISTERS_ENCBASE = 3; + static const int NUM_STACK_SLOTS_ENCBASE = 5; + static const int NUM_UNTRACKED_SLOTS_ENCBASE = 5; + static const int NORM_PROLOG_SIZE_ENCBASE = 4; + static const int NORM_EPILOG_SIZE_ENCBASE = 3; + static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; + static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 5; + static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 5; + static const int REGISTER_ENCBASE = 3; + static const int REGISTER_DELTA_ENCBASE = REGISTER_ENCBASE; + static const int STACK_SLOT_ENCBASE = 6; + static const int STACK_SLOT_DELTA_ENCBASE = 4; + static const int NUM_SAFE_POINTS_ENCBASE = 4; + static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1; + static const int NUM_EH_CLAUSES_ENCBASE = 2; + static const int POINTER_SIZE_ENCBASE = 3; + static const int LIVESTATE_RLE_RUN_ENCBASE = 2; + static const int LIVESTATE_RLE_SKIP_ENCBASE = 4; +}; -#endif // !__GCINFOTYPES_H__ +#endif // defined(TARGET_xxx) + +#ifdef FEATURE_INTERPRETER + +struct InterpreterGcInfoEncoding { + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64); + + static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6); + // Interpreter-FIXME: Interpreter has fixed-size stack slots so we could normalize them based on that. + static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return (x); } + static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return (x); } + // Interpreter-FIXME: Interpreter has fixed-size opcodes so code length is a multiple of that. + static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return (x); } + + static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return (x); } + // Interpreter-FIXME: Interpreter has fixed-size stack slots so we could normalize them based on that. + static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return (x); } + static const bool CODE_OFFSETS_NEED_NORMALIZATION = false; + // Interpreter-FIXME: Interpreter has fixed-size opcodes so code length is a multiple of that. + static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return (x); } + static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return (x); } + + static const int PSP_SYM_STACK_SLOT_ENCBASE = 6; + static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6; + static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6; + static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6; + static const int CODE_LENGTH_ENCBASE = 8; + static const int STACK_BASE_REGISTER_ENCBASE = 3; + static const int SIZE_OF_STACK_AREA_ENCBASE = 3; + static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4; + // Interpreter-FIXME: This constant is only used on certain architectures. + static const int SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE = 4; + static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6; + static const int NUM_REGISTERS_ENCBASE = 2; + static const int NUM_STACK_SLOTS_ENCBASE = 2; + static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1; + static const int NORM_PROLOG_SIZE_ENCBASE = 5; + static const int NORM_EPILOG_SIZE_ENCBASE = 3; + static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; + static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6; + static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6; + static const int REGISTER_ENCBASE = 3; + static const int REGISTER_DELTA_ENCBASE = 2; + static const int STACK_SLOT_ENCBASE = 6; + static const int STACK_SLOT_DELTA_ENCBASE = 4; + static const int NUM_SAFE_POINTS_ENCBASE = 2; + static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1; + static const int NUM_EH_CLAUSES_ENCBASE = 2; + static const int POINTER_SIZE_ENCBASE = 3; + static const int LIVESTATE_RLE_RUN_ENCBASE = 2; + static const int LIVESTATE_RLE_SKIP_ENCBASE = 4; +}; + +#endif // FEATURE_INTERPRETER +#ifdef debug_instrumented_return +#define return debug_instrumented_return +#endif // debug_instrumented_return + +#endif // !__GCINFOTYPES_H__ diff --git a/src/coreclr/inc/holder.h b/src/coreclr/inc/holder.h index 47b93d4215fe..495441026434 100644 --- a/src/coreclr/inc/holder.h +++ b/src/coreclr/inc/holder.h @@ -144,8 +144,6 @@ class HolderBase }; // class HolderBase<> -#ifndef _PREFAST_ // Work around an ICE error in EspX.dll - template BOOL CompareDefault(TYPE value, TYPE defaultValue) { @@ -153,17 +151,6 @@ BOOL CompareDefault(TYPE value, TYPE defaultValue) return value == defaultValue; } -#else - -template -BOOL CompareDefault(TYPE value, TYPE defaultValue) -{ - return FALSE; -} - -#endif - - template BOOL NoNull(TYPE value, TYPE defaultValue) { diff --git a/src/coreclr/inc/hostinformation.h b/src/coreclr/inc/hostinformation.h index d57b4729d30e..5a9a62fec48a 100644 --- a/src/coreclr/inc/hostinformation.h +++ b/src/coreclr/inc/hostinformation.h @@ -11,6 +11,9 @@ class HostInformation public: static void SetContract(_In_ host_runtime_contract* hostContract); static bool GetProperty(_In_z_ const char* name, SString& value); + + static bool HasExternalProbe(); + static bool ExternalAssemblyProbe(_In_ const SString& path, _Out_ void** data, _Out_ int64_t* size); }; #endif // _HOSTINFORMATION_H_ diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index 08b1004d4642..73e330058f9d 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -281,9 +281,6 @@ CorInfoHelpFunc getSharedCCtorHelper( CORINFO_CLASS_HANDLE getTypeForBox( CORINFO_CLASS_HANDLE cls) override; -CORINFO_CLASS_HANDLE getTypeForBoxOnStack( - CORINFO_CLASS_HANDLE cls) override; - CorInfoHelpFunc getBoxHelper( CORINFO_CLASS_HANDLE cls) override; @@ -498,6 +495,9 @@ bool runWithSPMIErrorTrap( void getEEInfo( CORINFO_EE_INFO* pEEInfoOut) override; +void getAsyncInfo( + CORINFO_ASYNC_INFO* pAsyncInfoOut) override; + mdMethodDef getMethodDefFromMethod( CORINFO_METHOD_HANDLE hMethod) override; @@ -660,6 +660,8 @@ bool getTailCallHelpers( CORINFO_GET_TAILCALL_HELPERS_FLAGS flags, CORINFO_TAILCALL_HELPERS* pResult) override; +CORINFO_METHOD_HANDLE getAsyncResumptionStub() override; + bool convertPInvokeCalliToCall( CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert) override; diff --git a/src/coreclr/inc/il_kywd.h b/src/coreclr/inc/il_kywd.h index 625ee82ba228..8c284460a369 100644 --- a/src/coreclr/inc/il_kywd.h +++ b/src/coreclr/inc/il_kywd.h @@ -109,6 +109,7 @@ KYWD( "noinlining", NOINLINING_, NO_VALUE ) KYWD( "nooptimization", NOOPTIMIZATION_, NO_VALUE ) KYWD( "aggressiveoptimization", AGGRESSIVEOPTIMIZATION_, NO_VALUE ) + KYWD( "async", ASYNC_, NO_VALUE ) KYWD( "nomangle", NOMANGLE_, NO_VALUE ) KYWD( "lasterr", LASTERR_, NO_VALUE ) KYWD( "winapi", WINAPI_, NO_VALUE ) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 9383fceebc29..982e1f9fd82e 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -11,7 +11,7 @@ // the EE changes (by adding or removing methods to any interface shared between them), this GUID should // be changed. This is the identifier verified by ICorJitCompiler::getVersionIdentifier(). // -// You can use "uuidgen.exe -s" to generate this value. +// You can use src/coreclr/tools/Common/JitInterface/ThunkGenerator/gen.bat (or .sh on Unix) to update this file. // // Note that this file is parsed by some tools, namely superpmi.py, so make sure the first line is exactly // of the form: @@ -32,26 +32,16 @@ ////////////////////////////////////////////////////////////////////////////////////////////////////////// // -#ifndef GUID_DEFINED -typedef struct _GUID { - uint32_t Data1; // NOTE: diff from Win32, for LP64 - uint16_t Data2; - uint16_t Data3; - uint8_t Data4[ 8 ]; -} GUID; -typedef const GUID *LPCGUID; -#define GUID_DEFINED -#endif // !GUID_DEFINED +#ifndef JIT_EE_VERSIONING_GUID_H +#define JIT_EE_VERSIONING_GUID_H -constexpr GUID JITEEVersionIdentifier = { /* a116647a-3f80-4fd6-9c80-95156c7e9923 */ - 0xa116647a, - 0x3f80, - 0x4fd6, - {0x9c, 0x80, 0x95, 0x15, 0x6c, 0x7e, 0x99, 0x23} -}; +#include -////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// END JITEEVersionIdentifier -// -////////////////////////////////////////////////////////////////////////////////////////////////////////// +constexpr GUID JITEEVersionIdentifier = { /* f22d9c39-8d24-4e4d-86aa-7b883aecf97f */ + 0xf22d9c39, + 0x8d24, + 0x4e4d, + {0x86, 0xaa, 0x7b, 0x88, 0x3a, 0xec, 0xf9, 0x7f} + }; + +#endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index e7ae43ac2b8e..4d789a440fc0 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -29,24 +29,33 @@ #define DYNAMICJITHELPER_NOINDIRECT(code,fn,binderId) DYNAMICJITHELPER(code,fn,binderId) #endif -// pfnHelper is set to NULL if it is a stubbed helper. -// It will be set in InitJITHelpers2 +#if defined(TARGET_32BIT) && defined (TARGET_ARM) +#define FEATURE_USE_HELPERS_FOR_32BIT_INT_DIV +#endif + +// pfnHelper is set to NULL if it is an unused helper. JITHELPER(CORINFO_HELP_UNDEF, NULL, METHOD__NIL) // Arithmetic - JITHELPER(CORINFO_HELP_DIV, JIT_Div, METHOD__NIL) - JITHELPER(CORINFO_HELP_MOD, JIT_Mod, METHOD__NIL) - JITHELPER(CORINFO_HELP_UDIV, JIT_UDiv, METHOD__NIL) - JITHELPER(CORINFO_HELP_UMOD, JIT_UMod, METHOD__NIL) +#ifdef FEATURE_USE_HELPERS_FOR_32BIT_INT_DIV + DYNAMICJITHELPER(CORINFO_HELP_DIV, NULL, METHOD__MATH__DIV_INT32) + DYNAMICJITHELPER(CORINFO_HELP_MOD, NULL, METHOD__MATH__MOD_INT32) + DYNAMICJITHELPER(CORINFO_HELP_UDIV, NULL, METHOD__MATH__DIV_UINT32) + DYNAMICJITHELPER(CORINFO_HELP_UMOD, NULL, METHOD__MATH__MOD_UINT32) +#else + JITHELPER(CORINFO_HELP_DIV, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_MOD, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_UDIV, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_UMOD, NULL, METHOD__NIL) +#endif - // CORINFO_HELP_DBL2INT, CORINFO_HELP_DBL2UINT, and CORINFO_HELP_DBL2LONG get - // patched for CPUs that support SSE2 (P4 and above). -#ifndef TARGET_64BIT +#ifdef TARGET_32BIT JITHELPER(CORINFO_HELP_LLSH, JIT_LLsh, METHOD__NIL) JITHELPER(CORINFO_HELP_LRSH, JIT_LRsh, METHOD__NIL) JITHELPER(CORINFO_HELP_LRSZ, JIT_LRsz, METHOD__NIL) -#else // !TARGET_64BIT +#else // TARGET_32BIT + JITHELPER(CORINFO_HELP_LLSH, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LRSH, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LRSZ, NULL, METHOD__NIL) @@ -55,15 +64,28 @@ #ifndef TARGET_64BIT DYNAMICJITHELPER(CORINFO_HELP_LMUL_OVF, NULL, METHOD__MATH__MULTIPLY_CHECKED_INT64) DYNAMICJITHELPER(CORINFO_HELP_ULMUL_OVF, NULL, METHOD__MATH__MULTIPLY_CHECKED_UINT64) -#else - DYNAMICJITHELPER(CORINFO_HELP_LMUL_OVF, NULL, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_ULMUL_OVF, NULL, METHOD__NIL) -#endif // TARGET_64BIT +#if defined(TARGET_X86) && defined(TARGET_WINDOWS) JITHELPER(CORINFO_HELP_LDIV, JIT_LDiv, METHOD__NIL) JITHELPER(CORINFO_HELP_LMOD, JIT_LMod, METHOD__NIL) JITHELPER(CORINFO_HELP_ULDIV, JIT_ULDiv, METHOD__NIL) JITHELPER(CORINFO_HELP_ULMOD, JIT_ULMod, METHOD__NIL) +#else + DYNAMICJITHELPER(CORINFO_HELP_LDIV, NULL, METHOD__MATH__DIV_INT64) + DYNAMICJITHELPER(CORINFO_HELP_LMOD, NULL, METHOD__MATH__MOD_INT64) + DYNAMICJITHELPER(CORINFO_HELP_ULDIV, NULL, METHOD__MATH__DIV_UINT64) + DYNAMICJITHELPER(CORINFO_HELP_ULMOD, NULL, METHOD__MATH__MOD_UINT64) +#endif // TARGET_X86 && TARGET_WINDOWS +#else // TARGET_64BIT + JITHELPER(CORINFO_HELP_LMUL_OVF, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_ULMUL_OVF, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_LDIV, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_LMOD, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_ULDIV, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_ULMOD, NULL, METHOD__NIL) +#endif // TARGET_64BIT + JITHELPER(CORINFO_HELP_LNG2FLT, JIT_Lng2Flt, METHOD__NIL) JITHELPER(CORINFO_HELP_LNG2DBL, JIT_Lng2Dbl, METHOD__NIL) + JITHELPER(CORINFO_HELP_ULNG2FLT, JIT_ULng2Flt, METHOD__NIL) JITHELPER(CORINFO_HELP_ULNG2DBL, JIT_ULng2Dbl, METHOD__NIL) JITHELPER(CORINFO_HELP_DBL2INT, JIT_Dbl2Int, METHOD__NIL) DYNAMICJITHELPER(CORINFO_HELP_DBL2INT_OVF, NULL, METHOD__MATH__CONVERT_TO_INT32_CHECKED) @@ -77,22 +99,22 @@ JITHELPER(CORINFO_HELP_DBLREM, JIT_DblRem, METHOD__NIL) // Allocating a new object - JITHELPER(CORINFO_HELP_NEWFAST, JIT_New, METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWFAST_MAYBEFROZEN, JIT_NewMaybeFrozen,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST, JIT_New, METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWSFAST_FINALIZE, NULL, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_New, METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8_VC, NULL, METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8_FINALIZE, NULL, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEW_MDARR, NULL, METHOD__ARRAY__CREATEINSTANCEMDARRAY) - DYNAMICJITHELPER(CORINFO_HELP_NEW_MDARR_RARE, NULL, METHOD__ARRAY__CREATEINSTANCEMDARRAY) - JITHELPER(CORINFO_HELP_NEWARR_1_DIRECT, JIT_NewArr1,METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWARR_1_MAYBEFROZEN, JIT_NewArr1MaybeFrozen,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_ALIGN8, JIT_NewArr1,METHOD__NIL) - - JITHELPER(CORINFO_HELP_STRCNS, JIT_StrCns, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWFAST, RhpNew, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWFAST_MAYBEFROZEN, RhpNewMaybeFrozen, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST, RhpNew, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWSFAST_FINALIZE, NULL, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8, RhpNew, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8_VC, RhpNew, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8_FINALIZE, NULL, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEW_MDARR, NULL, METHOD__ARRAY__CREATEINSTANCEMDARRAY) + DYNAMICJITHELPER(CORINFO_HELP_NEW_MDARR_RARE, NULL, METHOD__ARRAY__CREATEINSTANCEMDARRAY) + JITHELPER(CORINFO_HELP_NEWARR_1_DIRECT, RhpNewVariableSizeObject, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWARR_1_MAYBEFROZEN, RhpNewArrayMaybeFrozen, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_PTR, RhpNewVariableSizeObject, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_VC, RhpNewVariableSizeObject, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_ALIGN8, RhpNewVariableSizeObject, METHOD__NIL) + + DYNAMICJITHELPER(CORINFO_HELP_STRCNS, NULL, METHOD__STRING__STRCNS) // Object model DYNAMICJITHELPER(CORINFO_HELP_INITCLASS, NULL, METHOD__INITHELPERS__INITCLASS) @@ -111,11 +133,11 @@ JITHELPER(CORINFO_HELP_ISINSTANCEOF_EXCEPTION, JIT_IsInstanceOfException, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_BOX, JIT_Box, METHOD__NIL) - JITHELPER(CORINFO_HELP_BOX_NULLABLE, JIT_Box, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_UNBOX, NULL, METHOD__CASTHELPERS__UNBOX) - DYNAMICJITHELPER(CORINFO_HELP_UNBOX_TYPETEST,NULL, METHOD__CASTHELPERS__UNBOX_TYPETEST) - DYNAMICJITHELPER(CORINFO_HELP_UNBOX_NULLABLE,NULL, METHOD__CASTHELPERS__UNBOX_NULLABLE) + DYNAMICJITHELPER(CORINFO_HELP_BOX, NULL, METHOD__CASTHELPERS__BOX) + DYNAMICJITHELPER(CORINFO_HELP_BOX_NULLABLE, NULL, METHOD__CASTHELPERS__BOX_NULLABLE) + DYNAMICJITHELPER(CORINFO_HELP_UNBOX, NULL, METHOD__CASTHELPERS__UNBOX) + DYNAMICJITHELPER(CORINFO_HELP_UNBOX_TYPETEST, NULL, METHOD__CASTHELPERS__UNBOX_TYPETEST) + DYNAMICJITHELPER(CORINFO_HELP_UNBOX_NULLABLE, NULL, METHOD__CASTHELPERS__UNBOX_NULLABLE) DYNAMICJITHELPER(CORINFO_HELP_GETREFANY, NULL, METHOD__TYPED_REFERENCE__GETREFANY) DYNAMICJITHELPER(CORINFO_HELP_ARRADDR_ST, NULL, METHOD__CASTHELPERS__STELEMREF) @@ -124,6 +146,7 @@ // Exceptions DYNAMICJITHELPER(CORINFO_HELP_THROW, IL_Throw, METHOD__NIL) DYNAMICJITHELPER(CORINFO_HELP_RETHROW, IL_Rethrow, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_THROWEXACT, IL_ThrowExact, METHOD__NIL) DYNAMICJITHELPER(CORINFO_HELP_USER_BREAKPOINT, NULL, METHOD__DEBUGGER__USERBREAKPOINT) DYNAMICJITHELPER_NOINDIRECT(CORINFO_HELP_RNGCHKFAIL, NULL, METHOD__THROWHELPERS__THROWINDEXOUTOFRANGEEXCEPTION) DYNAMICJITHELPER_NOINDIRECT(CORINFO_HELP_OVERFLOW, NULL, METHOD__THROWHELPERS__THROWOVERFLOWEXCEPTION) @@ -142,8 +165,16 @@ JITHELPER(CORINFO_HELP_ENDCATCH, JIT_EndCatch, METHOD__NIL) #endif - JITHELPER(CORINFO_HELP_MON_ENTER, JIT_MonEnterWorker, METHOD__NIL) - JITHELPER(CORINFO_HELP_MON_EXIT, JIT_MonExitWorker, METHOD__NIL) +// +// The legacy x86 monitor helpers do not need a state argument +// +#if defined(FEATURE_EH_FUNCLETS) + DYNAMICJITHELPER(CORINFO_HELP_MON_ENTER, NULL, METHOD__MONITOR__RELIABLEENTER) + DYNAMICJITHELPER(CORINFO_HELP_MON_EXIT, NULL, METHOD__MONITOR__EXIT_IF_TAKEN) +#else + DYNAMICJITHELPER(CORINFO_HELP_MON_ENTER, NULL, METHOD__MONITOR__ENTER) + DYNAMICJITHELPER(CORINFO_HELP_MON_EXIT, NULL, METHOD__MONITOR__EXIT) +#endif JITHELPER(CORINFO_HELP_GETCLASSFROMMETHODPARAM, JIT_GetClassFromMethodParam, METHOD__NIL) DYNAMICJITHELPER(CORINFO_HELP_GETSYNCFROMCLASSHANDLE, NULL, METHOD__RT_TYPE_HANDLE__GETRUNTIMETYPEFROMHANDLE) @@ -151,15 +182,15 @@ // GC support DYNAMICJITHELPER(CORINFO_HELP_STOP_FOR_GC, JIT_RareDisableHelper, METHOD__NIL) DYNAMICJITHELPER(CORINFO_HELP_POLL_GC, JIT_PollGC, METHOD__THREAD__POLLGC) - + JITHELPER(CORINFO_HELP_CHECK_OBJ, JIT_CheckObj, METHOD__NIL) // GC Write barrier support - DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF, JIT_WriteBarrier, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF, JIT_CheckedWriteBarrier,METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF, RhpAssignRef, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF, RhpCheckedAssignRef,METHOD__NIL) JITHELPER(CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, JIT_WriteBarrierEnsureNonHeapTarget,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, JIT_ByRefWriteBarrier,METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, RhpByRefAssignRef,METHOD__NIL) DYNAMICJITHELPER(CORINFO_HELP_BULK_WRITEBARRIER, NULL, METHOD__BUFFER__MEMCOPYGC) // Accessing fields @@ -215,11 +246,7 @@ JITHELPER(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, JIT_GetCurrentManagedThreadId, METHOD__NIL) -#ifdef TARGET_64BIT JITHELPER(CORINFO_HELP_INIT_PINVOKE_FRAME, JIT_InitPInvokeFrame, METHOD__NIL) -#else - DYNAMICJITHELPER(CORINFO_HELP_INIT_PINVOKE_FRAME, NULL, METHOD__NIL) -#endif DYNAMICJITHELPER(CORINFO_HELP_MEMSET, NULL, METHOD__SPAN_HELPERS__MEMSET) DYNAMICJITHELPER(CORINFO_HELP_MEMZERO, NULL, METHOD__SPAN_HELPERS__MEMZERO) @@ -261,19 +288,19 @@ #endif // !FEATURE_EH_FUNCLETS #ifdef TARGET_X86 - DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, METHOD__NIL) - - JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, JIT_CheckedWriteBarrierEAX, METHOD__NIL) - JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, JIT_CheckedWriteBarrierEBX, METHOD__NIL) - JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, JIT_CheckedWriteBarrierECX, METHOD__NIL) - JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, JIT_CheckedWriteBarrierESI, METHOD__NIL) - JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, JIT_CheckedWriteBarrierEDI, METHOD__NIL) - JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, JIT_CheckedWriteBarrierEBP, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, RhpAssignRefEAX, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, RhpAssignRefEBX, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, RhpAssignRefECX, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, RhpAssignRefESI, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, RhpAssignRefEDI, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, RhpAssignRefEBP, METHOD__NIL) + + JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, RhpCheckedAssignRefEAX, METHOD__NIL) + JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, RhpCheckedAssignRefEBX, METHOD__NIL) + JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, RhpCheckedAssignRefECX, METHOD__NIL) + JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, RhpCheckedAssignRefESI, METHOD__NIL) + JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, RhpCheckedAssignRefEDI, METHOD__NIL) + JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, RhpCheckedAssignRefEBP, METHOD__NIL) #else JITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, NULL, METHOD__NIL) @@ -318,7 +345,7 @@ #endif JITHELPER(CORINFO_HELP_PATCHPOINT, JIT_Patchpoint, METHOD__NIL) - JITHELPER(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, JIT_PartialCompilationPatchpoint, METHOD__NIL) + JITHELPER(CORINFO_HELP_PATCHPOINT_FORCED, JIT_PatchpointForced, METHOD__NIL) JITHELPER(CORINFO_HELP_CLASSPROFILE32, JIT_ClassProfile32, METHOD__NIL) JITHELPER(CORINFO_HELP_CLASSPROFILE64, JIT_ClassProfile64, METHOD__NIL) @@ -343,6 +370,7 @@ JITHELPER(CORINFO_HELP_DISPATCH_INDIRECT_CALL, NULL, METHOD__NIL) #endif +<<<<<<< HEAD JITHELPER(CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_EH_CATCH, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES, NULL, METHOD__NIL) @@ -352,6 +380,12 @@ JITHELPER(CORINFO_HELP_LLVM_RESOLVE_INTERFACE_CALL_TARGET, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_GET_EXTERNAL_CALL_TARGET, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_STRESS_GC, JIT_StressGC, METHOD__NIL) +======= + DYNAMICJITHELPER(CORINFO_HELP_ALLOC_CONTINUATION, NULL, METHOD__ASYNC_HELPERS__ALLOC_CONTINUATION) + DYNAMICJITHELPER(CORINFO_HELP_ALLOC_CONTINUATION_METHOD, NULL, METHOD__ASYNC_HELPERS__ALLOC_CONTINUATION_METHOD) + DYNAMICJITHELPER(CORINFO_HELP_ALLOC_CONTINUATION_CLASS, NULL, METHOD__ASYNC_HELPERS__ALLOC_CONTINUATION_CLASS) + +>>>>>>> upstream-jun #undef JITHELPER #undef DYNAMICJITHELPER #undef JITHELPER diff --git a/src/coreclr/inc/loaderheap.h b/src/coreclr/inc/loaderheap.h index 54ba2595bff1..d3040e0b4aa4 100644 --- a/src/coreclr/inc/loaderheap.h +++ b/src/coreclr/inc/loaderheap.h @@ -143,6 +143,7 @@ struct LoaderHeapBlock #endif }; + struct LoaderHeapFreeBlock; // Collection of methods for helping in debugging heap corruptions @@ -166,87 +167,96 @@ inline UINT32 GetStubCodePageSize() #endif } +enum class LoaderHeapImplementationKind +{ + Data, + Executable, + Interleaved +}; +class UnlockedLoaderHeapBaseTraversable +{ +protected: +#ifdef DACCESS_COMPILE + UnlockedLoaderHeapBaseTraversable() {} +#else + UnlockedLoaderHeapBaseTraversable() : + m_pFirstBlock(NULL) + { + LIMITED_METHOD_CONTRACT; + } +#endif +public: +#ifdef DACCESS_COMPILE +public: + void EnumMemoryRegions(enum CLRDataEnumMemoryFlags flags); + +typedef bool EnumPageRegionsCallback (PTR_VOID pvArgs, PTR_VOID pvAllocationBase, SIZE_T cbReserved); + void EnumPageRegions (EnumPageRegionsCallback *pCallback, PTR_VOID pvArgs); +#endif + +protected: + // Linked list of ClrVirtualAlloc'd pages + PTR_LoaderHeapBlock m_pFirstBlock; +}; //=============================================================================== -// This is the base class for LoaderHeap and ExplicitControlLoaderHeap. Unfortunately, -// this class has become schizophrenic. Sometimes, it's used as a simple -// allocator that's semantically (but not perfwise!) equivalent to a blackbox -// alloc/free heap. Othertimes, it's used by callers who are actually aware -// of how it reserves addresses and want direct control over the range over which -// this thing allocates. These two types of allocations are handed out -// from two independent pools inside the heap. -// -// The backout strategy we use for the simple heap probably isn't -// directly applicable to the more advanced uses. -// -// We don't have time to refactor this so as a second-best measure, -// we make most of UnlockedLoaderHeap's methods protected and force everyone -// to use it them through two public derived classes that are mutual siblings. -// -// The LoaderHeap is the black-box heap and has a Backout() method but none -// of the advanced features that let you control address ranges. -// -// The ExplicitControlLoaderHeap exposes all the advanced features but -// has no Backout() feature. (If someone wants a backout feature, they need -// to design an appropriate one into this class.) +// This is the base class for LoaderHeap and InterleavedLoaderHeap. It holds the +// common handling for LoaderHeap events, and the data structures used for bump +// pointer allocation (although not the actual allocation routines). //=============================================================================== -class UnlockedLoaderHeap +typedef DPTR(class UnlockedLoaderHeapBase) PTR_UnlockedLoaderHeapBase; +class UnlockedLoaderHeapBase : public UnlockedLoaderHeapBaseTraversable, public ILoaderHeapBackout { #ifdef _DEBUG friend class LoaderHeapSniffer; - friend struct LoaderHeapFreeBlock; #endif - #ifdef DACCESS_COMPILE friend class ClrDataAccess; #endif -public: +protected: + size_t GetBytesAvailCommittedRegion(); - enum class HeapKind - { - Data, - Executable, - Interleaved - }; +#ifndef DACCESS_COMPILE + const +#endif + LoaderHeapImplementationKind m_kind; -private: - // Linked list of ClrVirtualAlloc'd pages - PTR_LoaderHeapBlock m_pFirstBlock; + size_t m_dwTotalAlloc; // Allocation pointer in current block PTR_BYTE m_pAllocPtr; // Points to the end of the committed region in the current block PTR_BYTE m_pPtrToEndOfCommittedRegion; - PTR_BYTE m_pEndReservedRegion; - - // When we need to ClrVirtualAlloc() MEM_RESERVE a new set of pages, number of bytes to reserve - DWORD m_dwReserveBlockSize; - - // When we need to commit pages from our reserved list, number of bytes to commit at a time - DWORD m_dwCommitBlockSize; - - // For interleaved heap (RX pages interleaved with RW ones), this specifies the allocation granularity, - // which is the individual code block size - DWORD m_dwGranularity; - - // Range list to record memory ranges in - RangeList * m_pRangeList; + +public: +#ifdef DACCESS_COMPILE + UnlockedLoaderHeapBase() {} +#else + UnlockedLoaderHeapBase(LoaderHeapImplementationKind kind); + virtual ~UnlockedLoaderHeapBase(); +#endif // DACCESS_COMPILE - size_t m_dwTotalAlloc; + BOOL IsExecutable() { return m_kind == LoaderHeapImplementationKind::Executable || m_kind == LoaderHeapImplementationKind::Interleaved; } + BOOL IsInterleaved() { return m_kind == LoaderHeapImplementationKind::Interleaved; } - HeapKind m_kind; +#ifdef _DEBUG + size_t DebugGetWastedBytes() + { + WRAPPER_NO_CONTRACT; + return m_dwDebugWastedBytes + GetBytesAvailCommittedRegion(); + } - LoaderHeapFreeBlock *m_pFirstFreeBlock; + void DumpFreeList(); - // This is used to hold on to a block of reserved memory provided to the - // constructor. We do this instead of adding it as the first block because - // that requires comitting the first page of the reserved block, and for - // startup working set reasons we want to delay that as long as possible. - LoaderHeapBlock m_reservedBlock; +// Extra CallTracing support + void UnlockedClearEvents(); //Discard saved events + void UnlockedCompactEvents(); //Discard matching alloc/free events + void UnlockedPrintEvents(); //Print event list +#endif public: @@ -265,32 +275,51 @@ class UnlockedLoaderHeap #endif - #ifdef _DEBUG size_t m_dwDebugWastedBytes; static DWORD s_dwNumInstancesOfLoaderHeaps; #endif +}; +//=============================================================================== +// This is the base class for LoaderHeap It's used as a simple +// allocator that's semantically (but not perfwise!) equivalent to a blackbox +// alloc/free heap. The ability to free is via a "backout" mechanism that is +// not considered to have good performance. +// +//=============================================================================== +class UnlockedLoaderHeap : public UnlockedLoaderHeapBase +{ #ifdef _DEBUG - size_t DebugGetWastedBytes() - { - WRAPPER_NO_CONTRACT; - return m_dwDebugWastedBytes + GetBytesAvailCommittedRegion(); - } + friend class LoaderHeapSniffer; #endif - -public: - BOOL m_fExplicitControl; // Am I a LoaderHeap or an ExplicitControlLoaderHeap? - void (*m_codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size); - #ifdef DACCESS_COMPILE -public: - void EnumMemoryRegions(enum CLRDataEnumMemoryFlags flags); + friend class ClrDataAccess; #endif + friend struct LoaderHeapFreeBlock; public: - typedef bool EnumPageRegionsCallback (PTR_VOID pvArgs, PTR_VOID pvAllocationBase, SIZE_T cbReserved); - void EnumPageRegions (EnumPageRegionsCallback *pCallback, PTR_VOID pvArgs); + +private: + // Points to the end of the reserved region for the current block + PTR_BYTE m_pEndReservedRegion; + + // When we need to ClrVirtualAlloc() MEM_RESERVE a new set of pages, number of bytes to reserve + DWORD m_dwReserveBlockSize; + + // When we need to commit pages from our reserved list, number of bytes to commit at a time + DWORD m_dwCommitBlockSize; + + // Range list to record memory ranges in + RangeList * m_pRangeList; + + // This is used to hold on to a block of reserved memory provided to the + // constructor. We do this instead of adding it as the first block because + // that requires comitting the first page of the reserved block, and for + // startup working set reasons we want to delay that as long as possible. + LoaderHeapBlock m_reservedBlock; + + LoaderHeapFreeBlock *m_pFirstFreeBlock; #ifndef DACCESS_COMPILE protected: @@ -302,15 +331,12 @@ class UnlockedLoaderHeap const BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, RangeList *pRangeList = NULL, - HeapKind kind = HeapKind::Data, - void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size) = NULL, - DWORD dwGranularity = 1); + LoaderHeapImplementationKind kind = LoaderHeapImplementationKind::Data); - ~UnlockedLoaderHeap(); + virtual ~UnlockedLoaderHeap(); #endif private: - size_t GetBytesAvailCommittedRegion(); size_t GetBytesAvailReservedRegion(); protected: @@ -358,10 +384,6 @@ class UnlockedLoaderHeap #endif ); - - - - protected: // Allocates memory aligned on power-of-2 boundary. // @@ -423,28 +445,251 @@ class UnlockedLoaderHeap return m_dwTotalAlloc; } - BOOL IsExecutable(); - BOOL IsInterleaved(); - size_t AllocMem_TotalSize(size_t dwRequestedSize); - public: #ifdef _DEBUG void DumpFreeList(); #endif +private: + static void ValidateFreeList(UnlockedLoaderHeap *pHeap); + static void WeGotAFaultNowWhat(UnlockedLoaderHeap *pHeap); +}; + +struct InterleavedLoaderHeapConfig +{ + uint32_t StubSize; + void* Template; + void (*CodePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size); +}; + +void InitializeLoaderHeapConfig(InterleavedLoaderHeapConfig *pConfig, size_t stubSize, void* templateInImage, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)); + +//=============================================================================== +// This is the base class for InterleavedLoaderHeap It's used as a simple +// allocator for stubs in a scheme where each stub is a small fixed size, and is paired +// with memory which is GetStubCodePageSize() bytes away. In addition there is an +// ability to free is via a "backout" mechanism that is not considered to have good performance. +// +//=============================================================================== +class UnlockedInterleavedLoaderHeap : public UnlockedLoaderHeapBase +{ +#ifdef _DEBUG + friend class LoaderHeapSniffer; + friend struct LoaderHeapFreeBlock; +#endif + +#ifdef DACCESS_COMPILE + friend class ClrDataAccess; +#endif public: -// Extra CallTracing support + +private: + PTR_BYTE m_pEndReservedRegion; + + // For interleaved heap (RX pages interleaved with RW ones), this specifies the allocation granularity, + // which is the individual code block size + DWORD m_dwGranularity; + + // Range list to record memory ranges in + RangeList * m_pRangeList; + + struct InterleavedStubFreeListNode + { + InterleavedStubFreeListNode *m_pNext; + }; + + InterleavedStubFreeListNode *m_pFreeListHead; + + const InterleavedLoaderHeapConfig *m_pConfig; + +#ifndef DACCESS_COMPILE +protected: + UnlockedInterleavedLoaderHeap( + RangeList *pRangeList, + const InterleavedLoaderHeapConfig *pConfig); + + virtual ~UnlockedInterleavedLoaderHeap(); +#endif + +private: + size_t GetBytesAvailReservedRegion(); + +protected: + // number of bytes available in region + size_t UnlockedGetReservedBytesFree() + { + LIMITED_METHOD_CONTRACT; + return m_pEndReservedRegion - m_pAllocPtr; + } + + PTR_BYTE UnlockedGetAllocPtr() + { + LIMITED_METHOD_CONTRACT; + return m_pAllocPtr; + } + +private: + // Get some more committed pages - either commit some more in the current reserved region, or, if it + // has run out, reserve another set of pages + BOOL GetMoreCommittedPages(size_t dwMinSize); + + // Commit memory pages starting at the specified adress + BOOL CommitPages(void* pData, size_t dwSizeToCommitPart); + +protected: + // Reserve some pages at any address + BOOL UnlockedReservePages(size_t dwCommitBlockSize); + +protected: + // Allocates memory for a single stub which is a pair of memory addresses + // The first address is the pointer at the stub code, and the second + // address is the data for the stub. These are separated by GetStubCodePageSize() + // bytes. + // + // The return value is a pointer that's guaranteed to be aligned. + // + // Here is how to properly backout the memory: + // + // void *pMem = UnlockedAllocStub(d); + // UnlockedBackoutStub(pMem); + // + // If you use the AllocMemHolder or AllocMemTracker, all this is taken care of + // behind the scenes. + // + // + void *UnlockedAllocStub( #ifdef _DEBUG - void UnlockedClearEvents(); //Discard saved events - void UnlockedCompactEvents(); //Discard matching alloc/free events - void UnlockedPrintEvents(); //Print event list + _In_ _In_z_ const char *szFile + ,int lineNum +#endif + ); + + void *UnlockedAllocStub_NoThrow( +#ifdef _DEBUG + _In_ _In_z_ const char *szFile + ,int lineNum #endif + ); protected: - void *UnlockedAllocMemForCode_NoThrow(size_t dwHeaderSize, size_t dwCodeSize, DWORD dwCodeAlignment, size_t dwReserveForJumpStubs); + // This frees memory allocated by UnlockAllocMem. It's given this horrible name to emphasize + // that it's purpose is for error path leak prevention purposes. You shouldn't + // use LoaderHeap's as general-purpose alloc-free heaps. + void UnlockedBackoutStub(void *pMem +#ifdef _DEBUG + , _In_ _In_z_ const char *szFile + , int lineNum + , _In_ _In_z_ const char *szAllocFile + , int AllocLineNum +#endif + ); +}; - void UnlockedSetReservedRegion(BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, BOOL fReleaseMemory); +//=============================================================================== +// This is the class used for CodeManager allocations. At one point it the logic +// was shared with UnlockedLoaderHeap, but that has been changed. This heap is designed +// to provide an api surface that can be used to control the memory regions where +// allocations occur, and provides an alloc only api surface. +// +// Caller is responsible for synchronization. ExplicitControlLoaderHeap is +// not multithread safe. +//=============================================================================== +typedef DPTR(class ExplicitControlLoaderHeap) PTR_ExplicitControlLoaderHeap; +class ExplicitControlLoaderHeap : public UnlockedLoaderHeapBaseTraversable +{ +#ifdef DACCESS_COMPILE + friend class ClrDataAccess; +#endif + +private: + // Allocation pointer in current block + PTR_BYTE m_pAllocPtr; + + // Points to the end of the committed region in the current block + PTR_BYTE m_pPtrToEndOfCommittedRegion; + PTR_BYTE m_pEndReservedRegion; + + size_t m_dwTotalAlloc; + + // When we need to commit pages from our reserved list, number of bytes to commit at a time + DWORD m_dwCommitBlockSize; + + // Is this an executable heap? + bool m_fExecutableHeap; + + // This is used to hold on to a block of reserved memory provided to the + // constructor. We do this instead of adding it as the first block because + // that requires comitting the first page of the reserved block, and for + // startup working set reasons we want to delay that as long as possible. + LoaderHeapBlock m_reservedBlock; + +public: + +#ifdef _DEBUG + size_t m_dwDebugWastedBytes; + static DWORD s_dwNumInstancesOfLoaderHeaps; +#endif + +#ifdef _DEBUG + size_t DebugGetWastedBytes() + { + WRAPPER_NO_CONTRACT; + return m_dwDebugWastedBytes + GetBytesAvailCommittedRegion(); + } +#endif + +#ifndef DACCESS_COMPILE +public: + ExplicitControlLoaderHeap(bool fMakeExecutable = false); + + ~ExplicitControlLoaderHeap(); +#endif + +private: + size_t GetBytesAvailCommittedRegion(); + size_t GetBytesAvailReservedRegion(); + +public: + // number of bytes available in region + size_t GetReservedBytesFree() + { + LIMITED_METHOD_CONTRACT; + return m_pEndReservedRegion - m_pAllocPtr; + } + + PTR_BYTE GetAllocPtr() + { + LIMITED_METHOD_CONTRACT; + return m_pAllocPtr; + } + +private: + // Get some more committed pages - either commit some more in the current reserved region, or, if it + // has run out, reserve another set of pages + BOOL GetMoreCommittedPages(size_t dwMinSize); + + // Commit memory pages starting at the specified adress + BOOL CommitPages(void* pData, size_t dwSizeToCommitPart); + +public: + // Reserve some pages at any address + BOOL ReservePages(size_t dwCommitBlockSize); + + // Perf Counter reports the size of the heap + size_t GetSize () + { + LIMITED_METHOD_CONTRACT; + return m_dwTotalAlloc; + } + + size_t AllocMem_TotalSize(size_t dwRequestedSize); + +public: + + void *AllocMemForCode_NoThrow(size_t dwHeaderSize, size_t dwCodeSize, DWORD dwCodeAlignment, size_t dwReserveForJumpStubs); + + void SetReservedRegion(BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, BOOL fReleaseMemory); }; //=============================================================================== @@ -460,7 +705,7 @@ inline CRITSEC_COOKIE CreateLoaderHeapLock() // of the advanced features that let you control address ranges. //=============================================================================== typedef DPTR(class LoaderHeap) PTR_LoaderHeap; -class LoaderHeap : public UnlockedLoaderHeap, public ILoaderHeapBackout +class LoaderHeap : public UnlockedLoaderHeap { private: CRITSEC_COOKIE m_CriticalSection; @@ -470,22 +715,17 @@ class LoaderHeap : public UnlockedLoaderHeap, public ILoaderHeapBackout LoaderHeap(DWORD dwReserveBlockSize, DWORD dwCommitBlockSize, RangeList *pRangeList = NULL, - UnlockedLoaderHeap::HeapKind kind = UnlockedLoaderHeap::HeapKind::Data, - BOOL fUnlocked = FALSE, - void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size) = NULL, - DWORD dwGranularity = 1 + LoaderHeapImplementationKind kind = LoaderHeapImplementationKind::Data, + BOOL fUnlocked = FALSE ) : UnlockedLoaderHeap(dwReserveBlockSize, dwCommitBlockSize, NULL, 0, pRangeList, - kind, - codePageGenerator, - dwGranularity), + kind), m_CriticalSection(fUnlocked ? NULL : CreateLoaderHeapLock()) { WRAPPER_NO_CONTRACT; - m_fExplicitControl = FALSE; } public: @@ -494,22 +734,18 @@ class LoaderHeap : public UnlockedLoaderHeap, public ILoaderHeapBackout const BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, RangeList *pRangeList = NULL, - UnlockedLoaderHeap::HeapKind kind = UnlockedLoaderHeap::HeapKind::Data, - BOOL fUnlocked = FALSE, - void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size) = NULL, - DWORD dwGranularity = 1 + LoaderHeapImplementationKind kind = LoaderHeapImplementationKind::Data, + BOOL fUnlocked = FALSE ) : UnlockedLoaderHeap(dwReserveBlockSize, dwCommitBlockSize, dwReservedRegionAddress, dwReservedRegionSize, pRangeList, - kind, - codePageGenerator, dwGranularity), + kind), m_CriticalSection(fUnlocked ? NULL : CreateLoaderHeapLock()) { WRAPPER_NO_CONTRACT; - m_fExplicitControl = FALSE; } #endif // DACCESS_COMPILE @@ -789,112 +1025,138 @@ class LoaderHeap : public UnlockedLoaderHeap, public ILoaderHeapBackout }; - - +#ifdef _DEBUG +#define AllocStub() RealAllocStub(__FILE__, __LINE__) +#else +#define AllocStub() RealAllocStub() +#endif //=============================================================================== -// The ExplicitControlLoaderHeap exposes all the advanced features but -// has no Backout() feature. (If someone wants a backout feature, they need -// to design an appropriate one into this class.) -// -// Caller is responsible for synchronization. ExplicitControlLoaderHeap is -// not multithread safe. +// The LoaderHeap is the black-box heap and has a Backout() method but none +// of the advanced features that let you control address ranges. //=============================================================================== -typedef DPTR(class ExplicitControlLoaderHeap) PTR_ExplicitControlLoaderHeap; -class ExplicitControlLoaderHeap : public UnlockedLoaderHeap +typedef DPTR(class InterleavedLoaderHeap) PTR_InterleavedLoaderHeap; +class InterleavedLoaderHeap : public UnlockedInterleavedLoaderHeap { +private: + CRITSEC_COOKIE m_CriticalSection; + #ifndef DACCESS_COMPILE public: - ExplicitControlLoaderHeap(RangeList *pRangeList = NULL, - BOOL fMakeExecutable = FALSE + InterleavedLoaderHeap(RangeList *pRangeList, + BOOL fUnlocked, + const InterleavedLoaderHeapConfig *pConfig ) - : UnlockedLoaderHeap(0, 0, NULL, 0, + : UnlockedInterleavedLoaderHeap( pRangeList, - fMakeExecutable ? UnlockedLoaderHeap::HeapKind::Executable : UnlockedLoaderHeap::HeapKind::Data) + pConfig), + m_CriticalSection(fUnlocked ? NULL : CreateLoaderHeapLock()) { WRAPPER_NO_CONTRACT; - m_fExplicitControl = TRUE; } + #endif // DACCESS_COMPILE -public: - void *RealAllocMem(size_t dwSize -#ifdef _DEBUG - ,_In_ _In_z_ const char *szFile - ,int lineNum -#endif - ) + virtual ~InterleavedLoaderHeap() { WRAPPER_NO_CONTRACT; - void *pResult; - - pResult = UnlockedAllocMem(dwSize -#ifdef _DEBUG - , szFile - , lineNum -#endif - ); - return pResult; +#ifndef DACCESS_COMPILE + if (m_CriticalSection != NULL) + { + ClrDeleteCriticalSection(m_CriticalSection); + } +#endif // DACCESS_COMPILE } - void *RealAllocMem_NoThrow(size_t dwSize +public: + TaggedMemAllocPtr RealAllocStub( #ifdef _DEBUG - ,_In_ _In_z_ const char *szFile - ,int lineNum + _In_ _In_z_ const char *szFile + ,int lineNum #endif - ) + ) { WRAPPER_NO_CONTRACT; + CRITSEC_Holder csh(m_CriticalSection); + + + TaggedMemAllocPtr tmap; void *pResult; - pResult = UnlockedAllocMem_NoThrow(dwSize + pResult = UnlockedAllocStub( #ifdef _DEBUG - , szFile - , lineNum + szFile + ,lineNum #endif - ); - return pResult; - } + ); + tmap.m_pMem = pResult; + tmap.m_dwRequestedSize = 1; + tmap.m_pHeap = this; + tmap.m_dwExtra = 0; +#ifdef _DEBUG + tmap.m_szFile = szFile; + tmap.m_lineNum = lineNum; +#endif -public: - void *AllocMemForCode_NoThrow(size_t dwHeaderSize, size_t dwCodeSize, DWORD dwCodeAlignment, size_t dwReserveForJumpStubs) - { - WRAPPER_NO_CONTRACT; - return UnlockedAllocMemForCode_NoThrow(dwHeaderSize, dwCodeSize, dwCodeAlignment, dwReserveForJumpStubs); + return tmap; } - void SetReservedRegion(BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, BOOL fReleaseMemory) + +public: + // This frees memory allocated by AllocMem. It's given this horrible name to emphasize + // that it's purpose is for error path leak prevention purposes. You shouldn't + // use LoaderHeap's as general-purpose alloc-free heaps. + void RealBackoutMem(void *pMem + , size_t dwSize +#ifdef _DEBUG + , _In_ _In_z_ const char *szFile + , int lineNum + , _In_ _In_z_ const char *szAllocFile + , int allocLineNum +#endif + ) { WRAPPER_NO_CONTRACT; - return UnlockedSetReservedRegion(dwReservedRegionAddress, dwReservedRegionSize, fReleaseMemory); + CRITSEC_Holder csh(m_CriticalSection); + UnlockedBackoutStub(pMem +#ifdef _DEBUG + , szFile + , lineNum + , szAllocFile + , allocLineNum +#endif + ); } public: - // number of bytes available in region - size_t GetReservedBytesFree() +// Extra CallTracing support +#ifdef _DEBUG + void ClearEvents() { WRAPPER_NO_CONTRACT; - return UnlockedGetReservedBytesFree(); + CRITSEC_Holder csh(m_CriticalSection); + UnlockedClearEvents(); } - PTR_BYTE GetAllocPtr() + void CompactEvents() { WRAPPER_NO_CONTRACT; - return UnlockedGetAllocPtr(); + CRITSEC_Holder csh(m_CriticalSection); + UnlockedCompactEvents(); } - void ReservePages(size_t size) + void PrintEvents() { WRAPPER_NO_CONTRACT; - UnlockedReservePages(size); + CRITSEC_Holder csh(m_CriticalSection); + UnlockedPrintEvents(); } +#endif }; - - //============================================================================== // AllocMemHolder : Allocated memory from LoaderHeap // @@ -1109,14 +1371,11 @@ class AllocMemTracker AllocMemTrackerNode m_Node[kAllocMemTrackerBlockSize]; }; - AllocMemTrackerBlock *m_pFirstBlock; AllocMemTrackerBlock m_FirstBlock; // Stack-allocate the first block - "new" the rest. protected: BOOL m_fReleased; - }; #endif // __LoaderHeap_h__ - diff --git a/src/coreclr/inc/longfilepathwrappers.h b/src/coreclr/inc/longfilepathwrappers.h index 6407680900dc..88c22095d764 100644 --- a/src/coreclr/inc/longfilepathwrappers.h +++ b/src/coreclr/inc/longfilepathwrappers.h @@ -25,18 +25,6 @@ CreateFileWrapper( _In_opt_ HANDLE hTemplateFile ); -DWORD -GetFileAttributesWrapper( - _In_ LPCWSTR lpFileName - ); - -BOOL -GetFileAttributesExWrapper( - _In_ LPCWSTR lpFileName, - _In_ GET_FILEEX_INFO_LEVELS fInfoLevelId, - _Out_writes_bytes_(sizeof(WIN32_FILE_ATTRIBUTE_DATA)) LPVOID lpFileInformation - ); - BOOL CopyFileExWrapper( _In_ LPCWSTR lpExistingFileName, diff --git a/src/coreclr/inc/mdfileformat.h b/src/coreclr/inc/mdfileformat.h index b510ff23be9a..594957a886cb 100644 --- a/src/coreclr/inc/mdfileformat.h +++ b/src/coreclr/inc/mdfileformat.h @@ -36,13 +36,6 @@ #define FILE_VER_MAJOR 1 #define FILE_VER_MINOR 1 -// These are the last legitimate 0.x version macros. The file format has -// sinced move up to 1.x (see macros above). After CLR 1.0/NT 5 RTM's, these -// macros should no longer be required or ever seen. -#define FILE_VER_MAJOR_v0 0 - -#define FILE_VER_MINOR_v0 19 - #define MAXSTREAMNAME 32 diff --git a/src/coreclr/inc/metadata.h b/src/coreclr/inc/metadata.h index 3fbdabfd1125..4fe9f5dd231c 100644 --- a/src/coreclr/inc/metadata.h +++ b/src/coreclr/inc/metadata.h @@ -1035,7 +1035,6 @@ DECLARE_INTERFACE_(IMDInternalImport, IUnknown) mdToken *tkEnclosedToken) PURE; // [OUT] The enclosed type token #define MD_STREAM_VER_1X 0x10000 -#define MD_STREAM_VER_2_B1 0x10001 #define MD_STREAM_VER_2 0x20000 STDMETHOD_(DWORD, GetMetadataStreamVersion)() PURE; //returns DWORD with major version of // MD stream in senior word and minor version--in junior word @@ -1046,30 +1045,6 @@ DECLARE_INTERFACE_(IMDInternalImport, IUnknown) LPCUTF8 *pszNamespace, // [OUT] Namespace of Custom Attribute. LPCUTF8 *pszName) PURE; // [OUT] Name of Custom Attribute. - STDMETHOD(SetOptimizeAccessForSpeed)(// S_OK or error - BOOL fOptSpeed) PURE; - - STDMETHOD(SetVerifiedByTrustedSource)(// S_OK or error - BOOL fVerified) PURE; - - STDMETHOD(GetRvaOffsetData)( - DWORD *pFirstMethodRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in MethodDef table. - DWORD *pMethodDefRecordSize, // [OUT] Size of each record in MethodDef table. - DWORD *pMethodDefCount, // [OUT] Number of records in MethodDef table. - DWORD *pFirstFieldRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in FieldRVA table. - DWORD *pFieldRvaRecordSize, // [OUT] Size of each record in FieldRVA table. - DWORD *pFieldRvaCount // [OUT] Number of records in FieldRVA table. - ) PURE; - - //---------------------------------------------------------------------------------------- - // !!! READ THIS !!! - // - // New methods have to be added at the end. The order and signatures of the existing methods - // have to be preserved. We need to maintain a backward compatibility for this interface to - // allow ildasm to work on SingleCLR. - // - //---------------------------------------------------------------------------------------- - }; // IMDInternalImport diff --git a/src/coreclr/inc/metadataexports.h b/src/coreclr/inc/metadataexports.h deleted file mode 100644 index 5c0031851591..000000000000 --- a/src/coreclr/inc/metadataexports.h +++ /dev/null @@ -1,56 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -//***************************************************************************** -// MDCommon.h -// - -// -// Header for functions exported by metadata. These are consumed 2 ways: -// 1. Mscoree provides public exports that plumb to some of these functions to invoke these dynamically. -// 2. Standalone metadata (statically linked) -// -//***************************************************************************** - - -#ifndef _METADATA_EXPORTS_ -#define _METADATA_EXPORTS_ - - - -// General creation function for ClassFactory semantics. -STDAPI MetaDataDllGetClassObject(REFCLSID rclsid, REFIID riid, void ** ppv); - -// Specific creation function to get IMetaDataDispenser(Ex) interface. -HRESULT InternalCreateMetaDataDispenser(REFIID riid, void ** pMetaDataDispenserOut); - -STDAPI GetMDInternalInterface( - LPVOID pData, - ULONG cbData, - DWORD flags, // [IN] MDInternal_OpenForRead or MDInternal_OpenForENC - REFIID riid, // [in] The interface desired. - void **ppIUnk); // [out] Return interface on success. - -STDAPI GetMDInternalInterfaceFromPublic( - IUnknown *pIUnkPublic, // [IN] Given scope. - REFIID riid, // [in] The interface desired. - void **ppIUnkInternal); // [out] Return interface on success. - -STDAPI GetMDPublicInterfaceFromInternal( - void *pIUnkPublic, // [IN] Given scope. - REFIID riid, // [in] The interface desired. - void **ppIUnkInternal); // [out] Return interface on success. - -STDAPI MDReOpenMetaDataWithMemory( - void *pImport, // [IN] Given scope. public interfaces - LPCVOID pData, // [in] Location of scope data. - ULONG cbData); // [in] Size of the data pointed to by pData. - -STDAPI MDReOpenMetaDataWithMemoryEx( - void *pImport, // [IN] Given scope. public interfaces - LPCVOID pData, // [in] Location of scope data. - ULONG cbData, // [in] Size of the data pointed to by pData. - DWORD dwReOpenFlags); // [in] ReOpen flags - - - -#endif // _METADATA_EXPORTS_ diff --git a/src/coreclr/inc/mscoree.idl b/src/coreclr/inc/mscoree.idl index 1a5124ffbb2c..1b8e414b892b 100644 --- a/src/coreclr/inc/mscoree.idl +++ b/src/coreclr/inc/mscoree.idl @@ -36,58 +36,18 @@ cpp_quote("EXTERN_GUID(IID_ICLRRuntimeHost4, 0x64F6D366, 0xD7C2, 0x4F1F, 0xB4, 0 typedef HRESULT (__stdcall *FExecuteInAppDomainCallback) (void* cookie); -// By default GC is concurrent and only the base system library is loaded into the domain-neutral area. typedef enum { STARTUP_CONCURRENT_GC = 0x1, - STARTUP_LOADER_OPTIMIZATION_MASK = 0x3<<1, // loader optimization mask - STARTUP_LOADER_OPTIMIZATION_SINGLE_DOMAIN = 0x1<<1, // no domain neutral loading - STARTUP_LOADER_OPTIMIZATION_MULTI_DOMAIN = 0x2<<1, // all domain neutral loading - STARTUP_LOADER_OPTIMIZATION_MULTI_DOMAIN_HOST = 0x3<<1, // strong name domain neutral loading - - - STARTUP_LOADER_SAFEMODE = 0x10, // Do not apply runtime version policy to the version passed in - STARTUP_LOADER_SETPREFERENCE = 0x100, // Set preferred runtime. Do not actally start it - STARTUP_SERVER_GC = 0x1000, // Use server GC STARTUP_HOARD_GC_VM = 0x2000, // GC keeps virtual address used - STARTUP_SINGLE_VERSION_HOSTING_INTERFACE = 0x4000, // Disallow mixing hosting interface - STARTUP_LEGACY_IMPERSONATION = 0x10000, // Do not flow impersonation across async points by default - STARTUP_DISABLE_COMMITTHREADSTACK = 0x20000, // Don't eagerly commit thread stack - STARTUP_ALWAYSFLOW_IMPERSONATION = 0x40000, // Force flow impersonation across async points - // (impersonations achieved thru p/invoke and managed will flow. - // default is to flow only managed impersonation) - STARTUP_TRIM_GC_COMMIT = 0x80000, // GC uses less committed space when system memory low - STARTUP_ETW = 0x100000, - STARTUP_ARM = 0x400000, // Enable the ARM feature. - STARTUP_SINGLE_APPDOMAIN = 0x800000, // application runs in default domain, no more domains are created - STARTUP_APPX_APP_MODEL = 0x1000000, // jupiter app - STARTUP_DISABLE_RANDOMIZED_STRING_HASHING = 0x2000000 // Disable the randomized string hashing (not supported) } STARTUP_FLAGS; typedef enum { - APPDOMAIN_SECURITY_DEFAULT =0x0, - APPDOMAIN_SECURITY_SANDBOXED = 0x1, // appdomain is sandboxed - APPDOMAIN_SECURITY_FORBID_CROSSAD_REVERSE_PINVOKE = 0x2, // no cross ad reverse pinvokes - APPDOMAIN_IGNORE_UNHANDLED_EXCEPTIONS = 0x4, // APPDOMAIN_FORCE_TRIVIAL_WAIT_OPERATIONS = 0x08, // do not pump messages during wait operations, do not call sync context - // When passed by the host, this flag will allow any assembly to perform PInvoke or COMInterop operations. - // Otherwise, by default, only platform assemblies can perform those operations. - APPDOMAIN_ENABLE_PINVOKE_AND_CLASSIC_COMINTEROP = 0x10, - - APPDOMAIN_ENABLE_PLATFORM_SPECIFIC_APPS = 0x40, - APPDOMAIN_ENABLE_ASSEMBLY_LOADFILE = 0x80, - - APPDOMAIN_DISABLE_TRANSPARENCY_ENFORCEMENT = 0x100, } APPDOMAIN_SECURITY_FLAGS; -typedef enum { - WAIT_MSGPUMP = 0x1, - WAIT_ALERTABLE = 0x2, - WAIT_NOTINDEADLOCK = 0x4 -}WAIT_OPTION; - typedef enum { // Default to minidump DUMP_FLAVOR_Mini = 0, diff --git a/src/coreclr/inc/ostype.h b/src/coreclr/inc/ostype.h index 58a8f726d5a6..53f606af6b08 100644 --- a/src/coreclr/inc/ostype.h +++ b/src/coreclr/inc/ostype.h @@ -5,11 +5,11 @@ #include "staticcontract.h" #ifndef WRAPPER_NO_CONTRACT -#define WRAPPER_NO_CONTRACT ANNOTATION_WRAPPER +#define WRAPPER_NO_CONTRACT #endif #ifndef LIMITED_METHOD_CONTRACT -#define LIMITED_METHOD_CONTRACT ANNOTATION_FN_LEAF +#define LIMITED_METHOD_CONTRACT #endif //***************************************************************************** diff --git a/src/coreclr/inc/palclr.h b/src/coreclr/inc/palclr.h index c5628a1b9eee..1eca4438311e 100644 --- a/src/coreclr/inc/palclr.h +++ b/src/coreclr/inc/palclr.h @@ -8,12 +8,11 @@ // =========================================================================== - -#if defined(HOST_WINDOWS) - #ifndef __PALCLR_H__ #define __PALCLR_H__ +#if defined(HOST_WINDOWS) + // This macro is used to standardize the wide character string literals between UNIX and Windows. // Unix L"" is UTF32, and on windows it's UTF16. Because of built-in assumptions on the size // of string literals, it's important to match behaviour between Unix and Windows. Unix will be defined @@ -48,8 +47,6 @@ #endif // !_MSC_VER #endif // !NOINLINE -#define ANALYZER_NORETURN - #ifdef _MSC_VER #define EMPTY_BASES_DECL __declspec(empty_bases) #else @@ -320,17 +317,14 @@ { \ bool __exHandled; __exHandled = false; \ DWORD __exCode; __exCode = 0; \ - SCAN_EHMARKER(); \ __try \ - { \ - SCAN_EHMARKER_TRY(); + { #define PAL_EXCEPT_NAKED(Disposition) \ } \ __except(__exCode = GetExceptionCode(), Disposition) \ { \ __exHandled = true; \ - SCAN_EHMARKER_CATCH(); \ PAL_SEH_RESTORE_GUARD_PAGE #define PAL_EXCEPT_FILTER_NAKED(pfnFilter, param) \ @@ -339,7 +333,6 @@ pfnFilter(GetExceptionInformation(), param)) \ { \ __exHandled = true; \ - SCAN_EHMARKER_CATCH(); \ PAL_SEH_RESTORE_GUARD_PAGE #define PAL_FINALLY_NAKED \ @@ -349,7 +342,6 @@ #define PAL_ENDTRY_NAKED \ } \ - PAL_ENDTRY_NAKED_DBG \ } \ @@ -478,8 +470,8 @@ #define PAL_CPP_TRY try #define PAL_CPP_ENDTRY -#define PAL_CPP_THROW(type, obj) do { SCAN_THROW_MARKER; throw obj; } while (false) -#define PAL_CPP_RETHROW do { SCAN_THROW_MARKER; throw; } while (false) +#define PAL_CPP_THROW(type, obj) do { throw obj; } while (false) +#define PAL_CPP_RETHROW do { throw; } while (false) #define PAL_CPP_CATCH_DERIVED(type, obj) catch (type * obj) #define PAL_CPP_CATCH_NON_DERIVED(type, obj) catch (type obj) #define PAL_CPP_CATCH_NON_DERIVED_NOARG(type) catch (type) @@ -530,13 +522,10 @@ } \ } -#define PAL_ENDTRY_NAKED_DBG - #else -#define PAL_TRY_HANDLER_DBG_BEGIN ANNOTATION_TRY_BEGIN; -#define PAL_TRY_HANDLER_DBG_BEGIN_DLLMAIN(_reason) ANNOTATION_TRY_BEGIN; -#define PAL_TRY_HANDLER_DBG_END ANNOTATION_TRY_END; -#define PAL_ENDTRY_NAKED_DBG +#define PAL_TRY_HANDLER_DBG_BEGIN +#define PAL_TRY_HANDLER_DBG_BEGIN_DLLMAIN(_reason) +#define PAL_TRY_HANDLER_DBG_END #endif // defined(ENABLE_CONTRACTS_IMPL) @@ -604,9 +593,9 @@ #define __clr_reserved __reserved -#endif // __PALCLR_H__ - -#include "palclr_win.h" +// Native system libray handle. +// In Windows, NATIVE_LIBRARY_HANDLE is the same as HMODULE. +typedef HMODULE NATIVE_LIBRARY_HANDLE; #ifndef IMAGE_FILE_MACHINE_LOONGARCH64 #define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 // LOONGARCH64. @@ -617,3 +606,5 @@ #endif #endif // defined(HOST_WINDOWS) + +#endif // __PALCLR_H__ diff --git a/src/coreclr/inc/palclr_win.h b/src/coreclr/inc/palclr_win.h deleted file mode 100644 index be0b725e1a68..000000000000 --- a/src/coreclr/inc/palclr_win.h +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// =========================================================================== -// File: palclr.h -// -// Various macros and constants that are necessary to make the CLR portable. -// - -// =========================================================================== - -#ifndef __PALCLR_WIN_H__ -#define __PALCLR_WIN_H__ - -// PAL SEH -// Macros for portable exception handling. The Win32 SEH is emulated using -// these macros and setjmp/longjmp on Unix -// -// Usage notes: -// -// - The filter has to be a function taking two parameters: -// LONG MyFilter(PEXCEPTION_POINTERS *pExceptionInfo, PVOID pv) -// -// - It is not possible to directly use the local variables in the filter. -// All the local information that the filter has to need to know about should -// be passed through pv parameter -// -// - Do not use goto to jump out of the PAL_TRY block -// (jumping out of the try block is not a good idea even on Win32, because of -// it causes stack unwind) -// -// -// Simple examples: -// -// PAL_TRY { -// .... -// } WIN_PAL_FINALLY { -// .... -// } -// WIN_PAL_ENDTRY -// -// -// PAL_TRY { -// .... -// } WIN_PAL_EXCEPT(EXCEPTION_EXECUTE_HANDLER) { -// .... -// } -// WIN_PAL_ENDTRY -// -// -// LONG MyFilter(PEXCEPTION_POINTERS *pExceptionInfo, PVOID pv) -// { -// ... -// } -// PAL_TRY { -// .... -// } WIN_PAL_EXCEPT_FILTER(MyFilter, NULL) { -// .... -// } -// WIN_PAL_ENDTRY -// -// -// Complex example: -// -// struct MyParams -// { -// ... -// } params; -// -// PAL_TRY { -// PAL_TRY { -// ... -// if (error) goto Done; -// ... -// Done: ; -// } WIN_PAL_EXCEPT_FILTER(OtherFilter, ¶ms) { -// ... -// } -// WIN_PAL_ENDTRY -// } -// WIN_PAL_FINALLY { -// } -// WIN_PAL_ENDTRY -// - - - -#if defined(_DEBUG_IMPL) && !defined(JIT_BUILD) && !defined(HOST_ARM) // @ARMTODO -#define WIN_PAL_TRY_HANDLER_DBG_BEGIN \ - BOOL ___oldOkayToThrowValue = FALSE; \ - ClrDebugState *___pState = GetClrDebugState(); \ - __try \ - { \ - ___oldOkayToThrowValue = ___pState->IsOkToThrow(); \ - ___pState->SetOkToThrow(TRUE); \ - ANNOTATION_TRY_BEGIN; - -// Special version that avoids touching the debug state after doing work in a DllMain for process or thread detach. -#define WIN_PAL_TRY_HANDLER_DBG_BEGIN_DLLMAIN(_reason) \ - BOOL ___oldOkayToThrowValue = FALSE; \ - BOOL ___oldSOTolerantState = FALSE; \ - ClrDebugState *___pState = CheckClrDebugState(); \ - __try \ - { \ - if (___pState) \ - { \ - ___oldOkayToThrowValue = ___pState->IsOkToThrow(); \ - ___pState->SetOkToThrow(TRUE); \ - } \ - if ((_reason == DLL_PROCESS_DETACH) || (_reason == DLL_THREAD_DETACH)) \ - { \ - ___pState = NULL; \ - } \ - ANNOTATION_TRY_BEGIN; - -#define WIN_PAL_TRY_HANDLER_DBG_END \ - ANNOTATION_TRY_END; \ - } \ - __finally \ - { \ - if (___pState != NULL) \ - { \ - _ASSERTE(___pState == CheckClrDebugState()); \ - ___pState->SetOkToThrow(___oldOkayToThrowValue); \ - ___pState->SetSOTolerance(___oldSOTolerantState); \ - } \ - } - -#define WIN_PAL_ENDTRY_NAKED_DBG - -#else -#define WIN_PAL_TRY_HANDLER_DBG_BEGIN ANNOTATION_TRY_BEGIN; -#define WIN_PAL_TRY_HANDLER_DBG_BEGIN_DLLMAIN(_reason) ANNOTATION_TRY_BEGIN; -#define WIN_PAL_TRY_HANDLER_DBG_END ANNOTATION_TRY_END; -#define WIN_PAL_ENDTRY_NAKED_DBG -#endif // defined(ENABLE_CONTRACTS_IMPL) - -#if defined(HOST_WINDOWS) -// Native system libray handle. -// In Windows, NATIVE_LIBRARY_HANDLE is the same as HMODULE. -typedef HMODULE NATIVE_LIBRARY_HANDLE; -#endif // HOST_WINDOWS - -#endif // __PALCLR_WIN_H__ diff --git a/src/coreclr/inc/patchpointinfo.h b/src/coreclr/inc/patchpointinfo.h index 02b9fd89f338..bdff46def7ef 100644 --- a/src/coreclr/inc/patchpointinfo.h +++ b/src/coreclr/inc/patchpointinfo.h @@ -38,6 +38,7 @@ struct PatchpointInfo void Initialize(unsigned localCount, int totalFrameSize) { m_calleeSaveRegisters = 0; + m_tier0Version = 0; m_totalFrameSize = totalFrameSize; m_numberOfLocals = localCount; m_genericContextArgOffset = -1; @@ -50,6 +51,7 @@ struct PatchpointInfo void Copy(const PatchpointInfo* original) { m_calleeSaveRegisters = original->m_calleeSaveRegisters; + m_tier0Version = original->m_tier0Version; m_genericContextArgOffset = original->m_genericContextArgOffset; m_keptAliveThisOffset = original->m_keptAliveThisOffset; m_securityCookieOffset = original->m_securityCookieOffset; @@ -173,6 +175,16 @@ struct PatchpointInfo m_calleeSaveRegisters = registerMask; } + PCODE GetTier0EntryPoint() const + { + return m_tier0Version; + } + + void SetTier0EntryPoint(PCODE ip) + { + m_tier0Version = ip; + } + private: enum { @@ -181,6 +193,7 @@ struct PatchpointInfo }; uint64_t m_calleeSaveRegisters; + PCODE m_tier0Version; unsigned m_numberOfLocals; int m_totalFrameSize; int m_genericContextArgOffset; diff --git a/src/coreclr/inc/pedecoder.h b/src/coreclr/inc/pedecoder.h index 057dfa9a25de..c1b203e419b5 100644 --- a/src/coreclr/inc/pedecoder.h +++ b/src/coreclr/inc/pedecoder.h @@ -52,6 +52,8 @@ typedef DPTR(IMAGE_COR20_HEADER) PTR_IMAGE_COR20_HEADER; class Module; +template struct cdac_data; + // -------------------------------------------------------------------------------- // RVA definition // -------------------------------------------------------------------------------- @@ -89,6 +91,8 @@ inline CHECK CheckOverflow(RVA value1, COUNT_T value2) #define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_UNKNOWN #elif defined(TARGET_RISCV64) #define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_RISCV64 +#elif defined(TARGET_WASM) +#define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_UNKNOWN #else #error "port me" #endif @@ -403,6 +407,9 @@ class PEDecoder PTR_IMAGE_NT_HEADERS m_pNTHeaders; PTR_IMAGE_COR20_HEADER m_pCorHeader; PTR_READYTORUN_HEADER m_pReadyToRunHeader; + + // to allow inherited classes to access, friend to all specializations of cdac_data + template friend struct ::cdac_data; }; // diff --git a/src/coreclr/inc/pedecoder.inl b/src/coreclr/inc/pedecoder.inl index 7257bd9a78a5..485d7661c68d 100644 --- a/src/coreclr/inc/pedecoder.inl +++ b/src/coreclr/inc/pedecoder.inl @@ -690,7 +690,7 @@ inline RVA PEDecoder::OffsetToRva(COUNT_T fileOffset) const if(fileOffset > 0) { IMAGE_SECTION_HEADER *section = OffsetToSection(fileOffset); - PREFIX_ASSUME (section!=NULL); //TODO: actually it is possible that it si null we need to rethink how we handle this cases and do better there + _ASSERTE (section!=NULL); //TODO: actually it is possible that it si null we need to rethink how we handle this cases and do better there return fileOffset - VAL32(section->PointerToRawData) + VAL32(section->VirtualAddress); } @@ -825,7 +825,7 @@ inline PTR_VOID PEDecoder::GetTlsRange(COUNT_T * pSize) const if (pSize != 0) *pSize = (COUNT_T) (VALPTR(pTlsHeader->EndAddressOfRawData) - VALPTR(pTlsHeader->StartAddressOfRawData)); - PREFIX_ASSUME (pTlsHeader!=NULL); + _ASSERTE (pTlsHeader!=NULL); RETURN PTR_VOID(GetInternalAddressData(pTlsHeader->StartAddressOfRawData)); } diff --git a/src/coreclr/inc/profilepriv.h b/src/coreclr/inc/profilepriv.h index 7967a600e5f3..49322fdcf570 100644 --- a/src/coreclr/inc/profilepriv.h +++ b/src/coreclr/inc/profilepriv.h @@ -13,6 +13,9 @@ #ifndef _ProfilePriv_h_ #define _ProfilePriv_h_ +#ifndef FEATURE_PERFTRACING +typedef struct _EventPipeProvider EventPipeProvider; +#endif //FEATURE_PERFTRACING // Forward declarations class EEToProfInterfaceImpl; diff --git a/src/coreclr/inc/random.h b/src/coreclr/inc/random.h index 6a8d7001b204..98d24d2f1783 100644 --- a/src/coreclr/inc/random.h +++ b/src/coreclr/inc/random.h @@ -19,6 +19,7 @@ #define _CLRRANDOM_H_ #include +#include "minipal/time.h" // // Forbid the use of srand()/rand(), as these are globally shared facilities and our use of them would @@ -73,10 +74,7 @@ class CLRRandom void Init() { LIMITED_METHOD_CONTRACT; - LARGE_INTEGER time; - if (!QueryPerformanceCounter(&time)) - time.QuadPart = GetTickCount(); - Init((int)time.u.LowPart ^ GetCurrentThreadId() ^ GetCurrentProcessId()); + Init((int)minipal_hires_ticks() ^ GetCurrentThreadId() ^ GetCurrentProcessId()); } void Init(int Seed) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index b097179f9bf6..a9a0fba017b0 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -19,10 +19,15 @@ // src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h // If you update this, ensure you run `git grep MINIMUM_READYTORUN_MAJOR_VERSION` // and handle pending work. -#define READYTORUN_MAJOR_VERSION 12 +#define READYTORUN_MAJOR_VERSION 14 #define READYTORUN_MINOR_VERSION 0x0000 -#define MINIMUM_READYTORUN_MAJOR_VERSION 12 +// Remove the x86 special case once the general minimum version is bumped +#ifdef TARGET_X86 +#define MINIMUM_READYTORUN_MAJOR_VERSION 14 +#else +#define MINIMUM_READYTORUN_MAJOR_VERSION 13 +#endif // R2R Version 2.1 adds the InliningInfo section // R2R Version 2.2 adds the ProfileDataInfo section @@ -40,6 +45,11 @@ // R2R Version 10.1 adds Unbox_TypeTest helper // R2R Version 11 uses GCInfo v4, which encodes safe points without -1 offset and does not track return kinds in GCInfo // R2R Version 12 requires all return buffers to be always on the stack +// R2R Version 13 removes usage of PSPSym, changes ABI for funclets to match NativeAOT, changes register for +// exception parameter on AMD64, and redefines generics instance context stack slot in GCInfo v4 +// to be SP/FP relative +// R2R Version 13.1 added long/ulong to float helper calls +// R2R Version 14 changed x86 code generation to use funclets struct READYTORUN_CORE_HEADER { @@ -283,6 +293,9 @@ enum ReadyToRunFixupKind READYTORUN_FIXUP_Check_IL_Body = 0x35, /* Check to see if an IL method is defined the same at runtime as at compile time. A failed match will cause code not to be used. */ READYTORUN_FIXUP_Verify_IL_Body = 0x36, /* Verify an IL body is defined the same at compile time and runtime. A failed match will cause a hard runtime failure. */ + + READYTORUN_FIXUP_ModuleOverride = 0x80, /* followed by sig-encoded UInt with assemblyref index into either the assemblyref table of the MSIL metadata of the master context module for the signature or */ + /* into the extra assemblyref table in the manifest metadata R2R header table (used in cases inlining brings in references to assemblies not seen in the MSIL). */ }; // @@ -405,6 +418,8 @@ enum ReadyToRunHelper READYTORUN_HELPER_Dbl2UIntOvf = 0xD5, READYTORUN_HELPER_Dbl2ULng = 0xD6, READYTORUN_HELPER_Dbl2ULngOvf = 0xD7, + READYTORUN_HELPER_Lng2Flt = 0xD8, + READYTORUN_HELPER_ULng2Flt = 0xD9, // Floating point ops READYTORUN_HELPER_DblRem = 0xE0, diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h index b8f2fd366d36..50cd622b948b 100644 --- a/src/coreclr/inc/readytorunhelpers.h +++ b/src/coreclr/inc/readytorunhelpers.h @@ -86,6 +86,8 @@ HELPER(READYTORUN_HELPER_Dbl2UInt, CORINFO_HELP_DBL2UINT, HELPER(READYTORUN_HELPER_Dbl2UIntOvf, CORINFO_HELP_DBL2UINT_OVF, ) HELPER(READYTORUN_HELPER_Dbl2ULng, CORINFO_HELP_DBL2ULNG, ) HELPER(READYTORUN_HELPER_Dbl2ULngOvf, CORINFO_HELP_DBL2ULNG_OVF, ) +HELPER(READYTORUN_HELPER_Lng2Flt, CORINFO_HELP_LNG2FLT, ) +HELPER(READYTORUN_HELPER_ULng2Flt, CORINFO_HELP_ULNG2FLT, ) HELPER(READYTORUN_HELPER_FltRem, CORINFO_HELP_FLTREM, ) HELPER(READYTORUN_HELPER_DblRem, CORINFO_HELP_DBLREM, ) diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 05d534f2eeac..01f92e168c6b 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -54,7 +54,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Sve=43, READYTORUN_INSTRUCTION_Avx10v1=44, READYTORUN_INSTRUCTION_Avx10v1_V512=46, - READYTORUN_INSTRUCTION_EVEX=47, + READYTORUN_INSTRUCTION_Evex=47, READYTORUN_INSTRUCTION_Apx=48, READYTORUN_INSTRUCTION_Pclmulqdq_V256=49, READYTORUN_INSTRUCTION_Pclmulqdq_V512=50, @@ -63,6 +63,30 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Gfni=53, READYTORUN_INSTRUCTION_Gfni_V256=54, READYTORUN_INSTRUCTION_Gfni_V512=55, + READYTORUN_INSTRUCTION_RiscV64Base=56, + READYTORUN_INSTRUCTION_Zba=57, + READYTORUN_INSTRUCTION_Zbb=58, + READYTORUN_INSTRUCTION_Sve2=59, + READYTORUN_INSTRUCTION_Aes_V256=64, + READYTORUN_INSTRUCTION_Aes_V512=65, + READYTORUN_INSTRUCTION_AvxIfma=66, + READYTORUN_INSTRUCTION_F16C=67, + READYTORUN_INSTRUCTION_Sha=68, + READYTORUN_INSTRUCTION_WaitPkg=69, + READYTORUN_INSTRUCTION_Avx512Bitalg=70, + READYTORUN_INSTRUCTION_Avx512Bitalg_VL=71, + READYTORUN_INSTRUCTION_Avx512Bf16=72, + READYTORUN_INSTRUCTION_Avx512Bf16_VL=73, + READYTORUN_INSTRUCTION_Avx512Fp16=74, + READYTORUN_INSTRUCTION_Avx512Fp16_VL=75, + READYTORUN_INSTRUCTION_Avx512Ifma=76, + READYTORUN_INSTRUCTION_Avx512Vbmi2=77, + READYTORUN_INSTRUCTION_Avx512Vbmi2_VL=78, + READYTORUN_INSTRUCTION_Avx512Vnni=79, + READYTORUN_INSTRUCTION_Avx512Vp2intersect=80, + READYTORUN_INSTRUCTION_Avx512Vp2intersect_VL=81, + READYTORUN_INSTRUCTION_Avx512Vpopcntdq=82, + READYTORUN_INSTRUCTION_Avx512Vpopcntdq_VL=83, }; diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index 07d3f1f6d5e0..7a5cf9d9d0ce 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -132,12 +132,24 @@ inline TADDR GetRegdisplayFP(REGDISPLAY *display) { inline LPVOID GetRegdisplayFPAddress(REGDISPLAY *display) { LIMITED_METHOD_CONTRACT; +#ifdef FEATURE_EH_FUNCLETS + return &display->pCurrentContext->Ebp; +#else return (LPVOID)display->GetEbpLocation(); +#endif +} + +inline TADDR GetRegdisplayPCTAddr(REGDISPLAY *display) +{ + return display->PCTAddr; } inline void SetRegdisplayPCTAddr(REGDISPLAY *display, TADDR addr) { display->PCTAddr = addr; +#ifdef FEATURE_EH_FUNCLETS + display->pCurrentContext->Eip = *PTR_PCODE(addr); +#endif display->ControlPC = *PTR_PCODE(addr); } @@ -145,22 +157,12 @@ inline void SetRegdisplayPCTAddr(REGDISPLAY *display, TADDR addr) // This function tells us if the given stack pointer is in one of the frames of the functions called by the given frame inline BOOL IsInCalleesFrames(REGDISPLAY *display, LPVOID stackPointer) { LIMITED_METHOD_CONTRACT; - -#ifdef FEATURE_EH_FUNCLETS - return stackPointer < ((LPVOID)(display->SP)); -#else - return (TADDR)stackPointer < display->PCTAddr; -#endif + return (TADDR)stackPointer < GetRegdisplayPCTAddr(display); } inline TADDR GetRegdisplayStackMark(REGDISPLAY *display) { LIMITED_METHOD_DAC_CONTRACT; -#ifdef FEATURE_EH_FUNCLETS - _ASSERTE(GetRegdisplaySP(display) == GetSP(display->pCurrentContext)); - return GetRegdisplaySP(display); -#else - return display->PCTAddr; -#endif + return GetRegdisplayPCTAddr(display); } #elif defined(TARGET_64BIT) @@ -345,6 +347,25 @@ inline TADDR GetRegdisplayStackMark(REGDISPLAY *display) { return GetSP(display->pCallerContext); } +#elif defined(TARGET_WASM) +struct REGDISPLAY : public REGDISPLAY_BASE { + REGDISPLAY() + { + // Initialize + memset(this, 0, sizeof(REGDISPLAY)); + } +}; + +inline void SyncRegDisplayToCurrentContext(REGDISPLAY* pRD) +{ +} + +// This function tells us if the given stack pointer is in one of the frames of the functions called by the given frame +inline BOOL IsInCalleesFrames(REGDISPLAY *display, LPVOID stackPointer) { + _ASSERTE("IsInCalleesFrames is not implemented on wasm"); + return FALSE; +} + #else // none of the above processors #error "RegDisplay functions are not implemented on this platform." #endif @@ -513,6 +534,10 @@ inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pC // This will setup the PC and SP SyncRegDisplayToCurrentContext(pRD); +#ifdef TARGET_X86 + pRD->PCTAddr = (UINT_PTR)&(pctx->Eip); +#endif + #if !defined(DACCESS_COMPILE) #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) pRD->SSP = GetSSP(pctx); diff --git a/src/coreclr/inc/safemath.h b/src/coreclr/inc/safemath.h index e40e267984b1..8c8283b9e13c 100644 --- a/src/coreclr/inc/safemath.h +++ b/src/coreclr/inc/safemath.h @@ -48,12 +48,6 @@ // function are based on static type information and as such will // be optimized away. In particular, the case where the signs are // identical will result in no code branches. - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6326) // PREfast warning: Potential comparison of a constant with another constant -#endif // _PREFAST_ - template inline bool FitsIn(Src val) { @@ -140,10 +134,6 @@ inline bool DoubleFitsInIntType(double val) return DstMinD <= val && val <= DstMaxD; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //_PREFAST_ - #define ovadd_lt(a, b, rhs) (((a) + (b) < (rhs) ) && ((a) + (b) >= (a))) #define ovadd_le(a, b, rhs) (((a) + (b) <= (rhs) ) && ((a) + (b) >= (a))) #define ovadd_gt(a, b, rhs) (((a) + (b) > (rhs) ) || ((a) + (b) < (a))) diff --git a/src/coreclr/inc/sbuffer.inl b/src/coreclr/inc/sbuffer.inl index 402cda72f473..af4890cd8199 100644 --- a/src/coreclr/inc/sbuffer.inl +++ b/src/coreclr/inc/sbuffer.inl @@ -268,7 +268,7 @@ inline void SBuffer::Set(const SBuffer &buffer) // PreFix seems to think it can choose m_allocation==0 and buffer.m_size > 0 here. // From the code for Resize and EnsureMutable, this is clearly impossible. - PREFIX_ASSUME( (this->m_buffer != NULL) || (buffer.m_size == 0) ); + _ASSERTE( (this->m_buffer != NULL) || (buffer.m_size == 0) ); MoveMemory(m_buffer, buffer.m_buffer, buffer.m_size); } @@ -294,7 +294,7 @@ inline void SBuffer::Set(const BYTE *buffer, COUNT_T size) // PreFix seems to think it can choose m_allocation==0 and size > 0 here. // From the code for Resize, this is clearly impossible. - PREFIX_ASSUME( (this->m_buffer != NULL) || (size == 0) ); + _ASSERTE( (this->m_buffer != NULL) || (size == 0) ); if (size != 0) MoveMemory(m_buffer, buffer, size); diff --git a/src/coreclr/inc/shimload.h b/src/coreclr/inc/shimload.h deleted file mode 100644 index 9e1e88eebf62..000000000000 --- a/src/coreclr/inc/shimload.h +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================ -** -** Header: ShimLoad.hpp -** -** Purpose: Delay load hook used to images to bind to -** dll's shim shipped with the EE -** -** -===========================================================*/ -#ifndef _SHIMLOAD_H -#define _SHIMLOAD_H - - -//***************************************************************************** -// Sets/Gets the directory based on the location of the module. This routine -// is called at COR setup time. Set is called during EEStartup and by the -// MetaData dispenser. -//***************************************************************************** -HRESULT SetInternalSystemDirectory(); -HRESULT GetInternalSystemDirectory(_Out_writes_opt_(*pdwLength) LPWSTR buffer, __inout DWORD* pdwLength); - -#endif - diff --git a/src/coreclr/inc/sstring.h b/src/coreclr/inc/sstring.h index 1b58f299bef7..9ca98ada6362 100644 --- a/src/coreclr/inc/sstring.h +++ b/src/coreclr/inc/sstring.h @@ -647,8 +647,6 @@ class EMPTY_BASES_DECL SString : private SBuffer static const BYTE s_EmptyBuffer[2]; - static UINT s_ACP; - SPTR_DECL(SString,s_Empty); COUNT_T GetRawCount() const; diff --git a/src/coreclr/inc/stacktrace.h b/src/coreclr/inc/stacktrace.h index b843eee74baf..0df8da9d907d 100644 --- a/src/coreclr/inc/stacktrace.h +++ b/src/coreclr/inc/stacktrace.h @@ -63,16 +63,6 @@ void MagicDeinit(void); ******************************************************************** robch */ void GetStringFromStackLevels(UINT ifrStart, UINT cfrTotal, _Out_writes_(cchMaxAssertStackLevelStringLen * cfrTotal) CHAR *pszString, struct _CONTEXT * pContext = NULL); -/**************************************************************************** -* GetStringFromAddr * -*-------------------* -* Description: -* Builds a string from an address in the format: -* -* 0x
: ! + 0x -******************************************************************** robch */ -void GetStringFromAddr(DWORD_PTR dwAddr, _Out_writes_(cchMaxAssertStackLevelStringLen) LPSTR szString); - #if defined(HOST_X86) && !defined(TARGET_UNIX) /**************************************************************************** * ClrCaptureContext * diff --git a/src/coreclr/inc/staticcontract.h b/src/coreclr/inc/staticcontract.h index df26383593e7..b4558c9f04c2 100644 --- a/src/coreclr/inc/staticcontract.h +++ b/src/coreclr/inc/staticcontract.h @@ -4,346 +4,39 @@ // StaticContract.h // --------------------------------------------------------------------------- - #ifndef __STATIC_CONTRACT_H_ #define __STATIC_CONTRACT_H_ -// Make sure we have the WCHAR defines available. -#include "palclr.h" - -#define SCAN_WIDEN2(x) L ## x -#define SCAN_WIDEN(x) SCAN_WIDEN2(x) - -#ifndef NOINLINE -#if __GNUC__ -#define NOINLINE __attribute__((noinline)) -#else -#define NOINLINE __declspec(noinline) -#endif -#endif - -// -// PDB annotations for the static contract analysis tool. These are separated -// from Contract.h to allow their inclusion in any part of the system. -// - -#if defined(_DEBUG) && defined(TARGET_X86) -#define METHOD_CANNOT_BE_FOLDED_DEBUG \ - static int _noFold = 0; \ - _noFold++; -#else -#define METHOD_CANNOT_BE_FOLDED_DEBUG -#endif - -#ifdef TARGET_X86 - -// -// currently, only x86 has a static contract analysis tool, so let's not -// bloat the PDBs of all the other architectures too.. -// -#define ANNOTATION_TRY_BEGIN __annotation(W("TRY_BEGIN")) -#define ANNOTATION_TRY_END __annotation(W("TRY_END")) -#define ANNOTATION_HANDLER_BEGIN __annotation(W("HANDLER_BEGIN")) -#define ANNOTATION_HANDLER_END __annotation(W("HANDLER_END")) -#define ANNOTATION_NOTHROW __annotation(W("NOTHROW")) -#define ANNOTATION_CANNOT_TAKE_LOCK __annotation(W("CANNOT_TAKE_LOCK")) -#define ANNOTATION_WRAPPER __annotation(W("WRAPPER")) -#define ANNOTATION_FAULT __annotation(W("FAULT")) -#define ANNOTATION_FORBID_FAULT __annotation(W("FORBID_FAULT")) -#define ANNOTATION_COOPERATIVE __annotation(W("MODE_COOPERATIVE")) -#define ANNOTATION_MODE_COOPERATIVE __annotation(W("MODE_PREEMPTIVE")) -#define ANNOTATION_MODE_ANY __annotation(W("MODE_ANY")) -#define ANNOTATION_GC_TRIGGERS __annotation(W("GC_TRIGGERS")) -#define ANNOTATION_IGNORE_THROW __annotation(W("THROWS"), W("NOTHROW"), W("CONDITIONAL_EXEMPT")) -#define ANNOTATION_IGNORE_LOCK __annotation(W("CAN_TAKE_LOCK"), W("CANNOT_TAKE_LOCK"), W("CONDITIONAL_EXEMPT")) -#define ANNOTATION_IGNORE_FAULT __annotation(W("FAULT"), W("FORBID_FAULT"), W("CONDITIONAL_EXEMPT")) -#define ANNOTATION_IGNORE_TRIGGER __annotation(W("GC_TRIGGERS"), W("GC_NOTRIGGER"), W("CONDITIONAL_EXEMPT")) -#define ANNOTATION_VIOLATION(violationmask) __annotation(W("VIOLATION(") L#violationmask W(")")) -#define ANNOTATION_UNCHECKED(thecheck) __annotation(W("UNCHECKED(") L#thecheck W(")")) - -#define ANNOTATION_MARK_BLOCK_ANNOTATION __annotation(W("MARK")) -#define ANNOTATION_USE_BLOCK_ANNOTATION __annotation(W("USE")) -#define ANNOTATION_END_USE_BLOCK_ANNOTATION __annotation(W("END_USE")) - -// here is the plan: -// -// a special holder which implements a violation -// - -#define ANNOTATION_FN_SPECIAL_HOLDER_BEGIN __annotation(W("SPECIAL_HOLDER_BEGIN ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_SPECIAL_HOLDER_END __annotation(W("SPECIAL_HOLDER_END")) -#define ANNOTATION_SPECIAL_HOLDER_CALLER_NEEDS_DYNAMIC_CONTRACT __annotation(W("SPECIAL_HOLDER_DYNAMIC")) - -#define ANNOTATION_SO_PROBE_BEGIN(probeAmount) __annotation(W("SO_PROBE_BEGIN(") L#probeAmount W(")")) -#define ANNOTATION_SO_PROBE_END __annotation(W("SO_PROBE_END")) - -// -// these annotations are all function-name qualified -// -#define ANNOTATION_FN_LEAF __annotation(W("LEAF ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_WRAPPER __annotation(W("WRAPPER ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_THROWS __annotation(W("THROWS ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_NOTHROW __annotation(W("NOTHROW ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_CAN_TAKE_LOCK __annotation(W("CAN_TAKE_LOCK ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_CANNOT_TAKE_LOCK __annotation(W("CANNOT_TAKE_LOCK ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_FAULT __annotation(W("FAULT ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_FORBID_FAULT __annotation(W("FORBID_FAULT ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_GC_TRIGGERS __annotation(W("GC_TRIGGERS ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_GC_NOTRIGGER __annotation(W("GC_NOTRIGGER ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_MODE_COOPERATIVE __annotation(W("MODE_COOPERATIVE ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_MODE_PREEMPTIVE __annotation(W("MODE_PREEMPTIVE ") SCAN_WIDEN(__FUNCTION__)) -#define ANNOTATION_FN_MODE_ANY __annotation(W("MODE_ANY ") SCAN_WIDEN(__FUNCTION__)) - -#define ANNOTATION_ENTRY_POINT __annotation(W("SO_EP ") SCAN_WIDEN(__FUNCTION__)) - - -// for DacCop -#define ANNOTATION_SUPPORTS_DAC __annotation(W("SUPPORTS_DAC")) -#define ANNOTATION_SUPPORTS_DAC_HOST_ONLY __annotation(W("SUPPORTS_DAC_HOST_ONLY")) - -#ifdef _DEBUG -// @todo : put correct annotation in and fixup the static analysis tool -// This is used to flag debug-only functions that we want to ignore in our static analysis -#define ANNOTATION_DEBUG_ONLY __annotation(W("DBG_ONLY")) - -#endif - -#else // TARGET_X86 - -#define ANNOTATION_TRY_BEGIN { } -#define ANNOTATION_TRY_END { } -#define ANNOTATION_HANDLER_BEGIN { } -#define ANNOTATION_HANDLER_END { } -#define ANNOTATION_NOTHROW { } -#define ANNOTATION_CANNOT_TAKE_LOCK { } -#define ANNOTATION_WRAPPER { } -#define ANNOTATION_FAULT { } -#define ANNOTATION_FORBID_FAULT { } -#define ANNOTATION_COOPERATIVE { } -#define ANNOTATION_MODE_COOPERATIVE { } -#define ANNOTATION_MODE_ANY { } -#define ANNOTATION_GC_TRIGGERS { } -#define ANNOTATION_IGNORE_THROW { } -#define ANNOTATION_IGNORE_LOCK { } -#define ANNOTATION_IGNORE_FAULT { } -#define ANNOTATION_IGNORE_TRIGGER { } -#define ANNOTATION_VIOLATION(violationmask) { } -#define ANNOTATION_UNCHECKED(thecheck) { } - -#define ANNOTATION_TRY_MARKER { } -#define ANNOTATION_CATCH_MARKER { } - -#define ANNOTATION_FN_SPECIAL_HOLDER_BEGIN { } -#define ANNOTATION_SPECIAL_HOLDER_END { } -#define ANNOTATION_SPECIAL_HOLDER_CALLER_NEEDS_DYNAMIC_CONTRACT { } - -#define ANNOTATION_FN_LEAF { } -#define ANNOTATION_FN_WRAPPER { } -#define ANNOTATION_FN_THROWS { } -#define ANNOTATION_FN_NOTHROW { } -#define ANNOTATION_FN_CAN_TAKE_LOCK { } -#define ANNOTATION_FN_CANNOT_TAKE_LOCK { } -#define ANNOTATION_FN_FAULT { } -#define ANNOTATION_FN_FORBID_FAULT { } -#define ANNOTATION_FN_GC_TRIGGERS { } -#define ANNOTATION_FN_GC_NOTRIGGER { } -#define ANNOTATION_FN_MODE_COOPERATIVE { } -#define ANNOTATION_FN_MODE_PREEMPTIVE { } -#define ANNOTATION_FN_MODE_ANY { } - -#define ANNOTATION_SUPPORTS_DAC { } -#define ANNOTATION_SUPPORTS_DAC_HOST_ONLY { } - -#define ANNOTATION_SO_PROBE_BEGIN(probeAmount) { } -#define ANNOTATION_SO_PROBE_END { } - -#define ANNOTATION_ENTRY_POINT { } -#ifdef _DEBUG -#define ANNOTATION_DEBUG_ONLY { } -#endif - -#endif // TARGET_X86 - -#define STATIC_CONTRACT_THROWS ANNOTATION_FN_THROWS -#define STATIC_CONTRACT_NOTHROW ANNOTATION_FN_NOTHROW -#define STATIC_CONTRACT_CAN_TAKE_LOCK ANNOTATION_FN_CAN_TAKE_LOCK -#define STATIC_CONTRACT_CANNOT_TAKE_LOCK ANNOTATION_FN_CANNOT_TAKE_LOCK -#define STATIC_CONTRACT_FAULT ANNOTATION_FN_FAULT -#define STATIC_CONTRACT_FORBID_FAULT ANNOTATION_FN_FORBID_FAULT -#define STATIC_CONTRACT_GC_TRIGGERS ANNOTATION_FN_GC_TRIGGERS -#define STATIC_CONTRACT_GC_NOTRIGGER ANNOTATION_FN_GC_NOTRIGGER - -#define STATIC_CONTRACT_SUPPORTS_DAC ANNOTATION_SUPPORTS_DAC -#define STATIC_CONTRACT_SUPPORTS_DAC_HOST_ONLY ANNOTATION_SUPPORTS_DAC_HOST_ONLY - -#define STATIC_CONTRACT_MODE_COOPERATIVE ANNOTATION_FN_MODE_COOPERATIVE -#define STATIC_CONTRACT_MODE_PREEMPTIVE ANNOTATION_FN_MODE_PREEMPTIVE -#define STATIC_CONTRACT_MODE_ANY ANNOTATION_FN_MODE_ANY -#define STATIC_CONTRACT_LEAF ANNOTATION_FN_LEAF -#define STATIC_CONTRACT_LIMITED_METHOD ANNOTATION_FN_LEAF -#define STATIC_CONTRACT_WRAPPER ANNOTATION_FN_WRAPPER +#define STATIC_CONTRACT_THROWS +#define STATIC_CONTRACT_NOTHROW +#define STATIC_CONTRACT_CAN_TAKE_LOCK +#define STATIC_CONTRACT_CANNOT_TAKE_LOCK +#define STATIC_CONTRACT_FAULT +#define STATIC_CONTRACT_FORBID_FAULT +#define STATIC_CONTRACT_GC_TRIGGERS +#define STATIC_CONTRACT_GC_NOTRIGGER + +#define STATIC_CONTRACT_SUPPORTS_DAC +#define STATIC_CONTRACT_SUPPORTS_DAC_HOST_ONLY + +#define STATIC_CONTRACT_MODE_COOPERATIVE +#define STATIC_CONTRACT_MODE_PREEMPTIVE +#define STATIC_CONTRACT_MODE_ANY +#define STATIC_CONTRACT_LEAF +#define STATIC_CONTRACT_LIMITED_METHOD +#define STATIC_CONTRACT_WRAPPER #define STATIC_CONTRACT_ENTRY_POINT #ifdef _DEBUG #define STATIC_CONTRACT_DEBUG_ONLY \ - ANNOTATION_DEBUG_ONLY; \ - STATIC_CONTRACT_CANNOT_TAKE_LOCK; \ - ANNOTATION_VIOLATION(TakesLockViolation); + STATIC_CONTRACT_CANNOT_TAKE_LOCK; #else #define STATIC_CONTRACT_DEBUG_ONLY #endif -#define STATIC_CONTRACT_VIOLATION(mask) \ - ANNOTATION_VIOLATION(mask) - -#define SCAN_SCOPE_BEGIN \ - METHOD_CANNOT_BE_FOLDED_DEBUG; \ - ANNOTATION_FN_SPECIAL_HOLDER_BEGIN; - -#define SCAN_SCOPE_END \ - METHOD_CANNOT_BE_FOLDED_DEBUG; \ - ANNOTATION_SPECIAL_HOLDER_END; - -namespace StaticContract -{ - struct ScanThrowMarkerStandard - { - NOINLINE ScanThrowMarkerStandard() - { - METHOD_CANNOT_BE_FOLDED_DEBUG; - STATIC_CONTRACT_THROWS; - STATIC_CONTRACT_GC_NOTRIGGER; - } - - static void used() - { - } - }; - - struct ScanThrowMarkerTerminal - { - NOINLINE ScanThrowMarkerTerminal() - { - METHOD_CANNOT_BE_FOLDED_DEBUG; - } - - static void used() - { - } - }; - - struct ScanThrowMarkerIgnore - { - NOINLINE ScanThrowMarkerIgnore() - { - METHOD_CANNOT_BE_FOLDED_DEBUG; - } - - static void used() - { - } - }; -} -typedef StaticContract::ScanThrowMarkerStandard ScanThrowMarker; - -// This is used to annotate code as throwing a terminal exception, and should -// be used immediately before the throw so that infer that it can be inferred -// that the block in which this annotation appears throws unconditionally. -#define SCAN_THROW_MARKER do { ScanThrowMarker __throw_marker; } while (0) - -#define SCAN_IGNORE_THROW_MARKER \ - typedef StaticContract::ScanThrowMarkerIgnore ScanThrowMarker; if (0) ScanThrowMarker::used(); - -// Terminal exceptions are asynchronous and cannot be included in THROWS contract -// analysis. As such, this uses typedef to reassign the ScanThrowMarker to a -// non-annotating struct so that SCAN does not see the block as throwing. -#define STATIC_CONTRACT_THROWS_TERMINAL \ - typedef StaticContract::ScanThrowMarkerTerminal ScanThrowMarker; if (0) ScanThrowMarker::used(); - -#ifdef _MSC_VER -#define SCAN_IGNORE_THROW typedef StaticContract::ScanThrowMarkerIgnore ScanThrowMarker; ANNOTATION_IGNORE_THROW -#define SCAN_IGNORE_LOCK ANNOTATION_IGNORE_LOCK -#define SCAN_IGNORE_FAULT ANNOTATION_IGNORE_FAULT -#define SCAN_IGNORE_TRIGGER ANNOTATION_IGNORE_TRIGGER -#else -#define SCAN_IGNORE_THROW -#define SCAN_IGNORE_LOCK -#define SCAN_IGNORE_FAULT -#define SCAN_IGNORE_TRIGGER -#endif - - -// we use BlockMarker's only for SCAN -#if defined(_DEBUG) && defined(TARGET_X86) && !defined(DACCESS_COMPILE) - -template -class BlockMarker -{ -public: - NOINLINE void MarkBlock() - { - ANNOTATION_MARK_BLOCK_ANNOTATION; - METHOD_CANNOT_BE_FOLDED_DEBUG; - return; - } - - NOINLINE void UseMarkedBlockAnnotation() - { - ANNOTATION_USE_BLOCK_ANNOTATION; - METHOD_CANNOT_BE_FOLDED_DEBUG; - return; - } - - NOINLINE void EndUseMarkedBlockAnnotation() - { - ANNOTATION_END_USE_BLOCK_ANNOTATION; - METHOD_CANNOT_BE_FOLDED_DEBUG; - return; - } -}; - -#define SCAN_BLOCKMARKER() BlockMarker<__COUNTER__> __blockMarker_onlyOneAllowedPerScope -#define SCAN_BLOCKMARKER_MARK() __blockMarker_onlyOneAllowedPerScope.MarkBlock() -#define SCAN_BLOCKMARKER_USE() __blockMarker_onlyOneAllowedPerScope.UseMarkedBlockAnnotation() -#define SCAN_BLOCKMARKER_END_USE() __blockMarker_onlyOneAllowedPerScope.EndUseMarkedBlockAnnotation() - -#define SCAN_BLOCKMARKER_N(num) BlockMarker<__COUNTER__> __blockMarker_onlyOneAllowedPerScope##num -#define SCAN_BLOCKMARKER_MARK_N(num) __blockMarker_onlyOneAllowedPerScope##num.MarkBlock() -#define SCAN_BLOCKMARKER_USE_N(num) __blockMarker_onlyOneAllowedPerScope##num.UseMarkedBlockAnnotation() -#define SCAN_BLOCKMARKER_END_USE_N(num) __blockMarker_onlyOneAllowedPerScope##num.EndUseMarkedBlockAnnotation() - -#define SCAN_EHMARKER() BlockMarker<__COUNTER__> __marker_onlyOneAllowedPerScope -#define SCAN_EHMARKER_TRY() __annotation(W("SCOPE(BLOCK);SCAN_TRY_BEGIN")); __marker_onlyOneAllowedPerScope.MarkBlock() -#define SCAN_EHMARKER_END_TRY() __annotation(W("SCOPE(BLOCK);SCAN_TRY_END")) -#define SCAN_EHMARKER_CATCH() __marker_onlyOneAllowedPerScope.UseMarkedBlockAnnotation() -#define SCAN_EHMARKER_END_CATCH() __marker_onlyOneAllowedPerScope.EndUseMarkedBlockAnnotation() - -#else - -#define SCAN_BLOCKMARKER() -#define SCAN_BLOCKMARKER_MARK() -#define SCAN_BLOCKMARKER_USE() -#define SCAN_BLOCKMARKER_END_USE() - -#define SCAN_BLOCKMARKER_N(num) -#define SCAN_BLOCKMARKER_MARK_N(num) -#define SCAN_BLOCKMARKER_USE_N(num) -#define SCAN_BLOCKMARKER_END_USE_N(num) - -#define SCAN_EHMARKER() -#define SCAN_EHMARKER_TRY() -#define SCAN_EHMARKER_END_TRY() -#define SCAN_EHMARKER_CATCH() -#define SCAN_EHMARKER_END_CATCH() - -#endif - +#define STATIC_CONTRACT_VIOLATION(mask) -// -// @todo remove this... if there really are cases where a function just shouldn't have a contract, then perhaps -// we can add a more descriptive name for it... -// -#define CANNOT_HAVE_CONTRACT __annotation(W("NO_CONTRACT")) +#define CANNOT_HAVE_CONTRACT #endif // __STATIC_CONTRACT_H_ diff --git a/src/coreclr/inc/stgpool.h b/src/coreclr/inc/stgpool.h index 39390a201670..3293fb59bf48 100644 --- a/src/coreclr/inc/stgpool.h +++ b/src/coreclr/inc/stgpool.h @@ -49,6 +49,8 @@ class StgStringPool; class StgBlobPool; class StgCodePool; +template struct cdac_data; + // Perform binary search on index table. // class RIDBinarySearch : public CBinarySearch @@ -188,12 +190,6 @@ friend class VerifyLayoutsMD; virtual int IsValidCookie(UINT32 nCookie) { WRAPPER_NO_CONTRACT; return (IsValidOffset(nCookie)); } - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6387) // Suppress PREFast warning: '*pszString' might be '0': this does not adhere to the specification for the function - // *pszString may be NULL only if method fails, but warning 6387 doesn't respect __success(SUCCEEDED(return)) which is part of HRESULT definition -#endif //***************************************************************************** // Return a pointer to a null terminated string given an offset previously // handed out by AddString or FindString. @@ -217,7 +213,7 @@ friend class VerifyLayoutsMD; &stringData)); _ASSERTE(hr == S_OK); // Raw data are always at least 1 byte long, otherwise it would be invalid offset and hr != S_OK - PREFAST_ASSUME(stringData.GetDataPointer() != NULL); + _ASSERTE(stringData.GetDataPointer() != NULL); // Fills output string *pszString = reinterpret_cast(stringData.GetDataPointer()); //_ASSERTE(stringData.GetSize() > strlen(*pszString)); @@ -253,7 +249,7 @@ friend class VerifyLayoutsMD; &stringData)); _ASSERTE(hr == S_OK); // Raw data are always at least 1 byte long, otherwise it would be invalid offset and hr != S_OK - PREFAST_ASSUME(stringData.GetDataPointer() != NULL); + _ASSERTE(stringData.GetDataPointer() != NULL); // Fills output string *pszString = reinterpret_cast(stringData.GetDataPointer()); //_ASSERTE(stringData.GetSize() > strlen(*pszString)); @@ -265,9 +261,6 @@ friend class VerifyLayoutsMD; return hr; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //***************************************************************************** // Convert a string to UNICODE into the caller's buffer. @@ -1500,6 +1493,15 @@ class CGrowableStream : public IStream IStream ** ppstm); #endif // DACCESS_COMPILE + + friend struct cdac_data; }; // class CGrowableStream +template<> +struct cdac_data +{ + static constexpr size_t Buffer = offsetof(CGrowableStream, m_swBuffer); + static constexpr size_t Size = offsetof(CGrowableStream, m_dwBufferSize); +}; + #endif // __StgPool_h__ diff --git a/src/coreclr/inc/stresslog.h b/src/coreclr/inc/stresslog.h index 58fbfdba97e2..a1f814e272b1 100644 --- a/src/coreclr/inc/stresslog.h +++ b/src/coreclr/inc/stresslog.h @@ -17,7 +17,7 @@ extension (eg. strike). There is no memory allocation system calls etc to purtub things */ // ****************************************************************************** -// WARNING!!!: These classes are used by the runtime and SOS in the diagnostics +// WARNING!!!: These classes are used by the runtime and SOS in the diagnostics // repo. Values should added or removed in a backwards and forwards compatible way. // See: https://github.com/dotnet/diagnostics/blob/main/src/shared/inc/stresslog.h // https://github.com/dotnet/runtime/blob/main/src/coreclr/inc/stresslog.h @@ -32,7 +32,6 @@ #include "log.h" #if defined(STRESS_LOG) && !defined(FEATURE_NO_STRESSLOG) -#ifndef STRESS_LOG_ANALYZER #include "holder.h" #include "staticcontract.h" #include "mscoree.h" @@ -46,9 +45,6 @@ #ifndef _ASSERTE #define _ASSERTE(expr) #endif -#else -#include // offsetof -#endif // STRESS_LOG_ANALYZER /* The STRESS_LOG* macros work like printf. In fact the use printf in their implementation so all printf format specifications work. In addition the Stress log dumper knows @@ -172,6 +168,9 @@ void ReplacePid(LPCWSTR original, LPWSTR replaced, size_t replacedLength); +template +struct cdac_offsets; + class ThreadStressLog; struct StressLogMsg; @@ -180,12 +179,12 @@ struct StressLogMsg; /* a log is a circular queue of messages */ class StressLog { + template friend struct ::cdac_offsets; public: static void Initialize(unsigned facilities, unsigned level, unsigned maxBytesPerThread, unsigned maxBytesTotal, void* moduleBase, LPWSTR logFilename = nullptr); static void Terminate(BOOL fProcessDetach=FALSE); static void ThreadDetach(); // call at DllMain THREAD_DETACH if you want to recycle thread logs -#ifndef STRESS_LOG_ANALYZER static int NewChunk () { return InterlockedIncrement (&theLog.totalChunk); @@ -194,7 +193,6 @@ class StressLog { { return InterlockedDecrement (&theLog.totalChunk); } -#endif //STRESS_LOG_ANALYZER //the result is not 100% accurate. If multiple threads call this function at the same time, //we could allow the total size be bigger than required. But the memory won't grow forever @@ -246,28 +244,6 @@ class StressLog { #endif #endif -#ifdef STRESS_LOG_ANALYZER - static size_t writing_base_address; - static size_t reading_base_address; - - template - static T* TranslateMemoryMappedPointer(T* input) - { - if (input == nullptr) - { - return nullptr; - } - - return ((T*)(((uint8_t*)input) - writing_base_address + reading_base_address)); - } -#else - template - static T* TranslateMemoryMappedPointer(T* input) - { - return input; - } -#endif - #ifdef MEMORY_MAPPED_STRESSLOG // @@ -399,9 +375,7 @@ inline void* StressLog::ConvertArgument(int64_t arg) } #endif -#ifndef STRESS_LOG_ANALYZER typedef Holder> StressLogLockHolder; -#endif //!STRESS_LOG_ANALYZER #if defined(DACCESS_COMPILE) inline BOOL StressLog::LogOn(unsigned facility, unsigned level) @@ -414,6 +388,29 @@ inline BOOL StressLog::LogOn(unsigned facility, unsigned level) } #endif +template<> +struct cdac_offsets +{ + static const size_t facilitiesToLog = offsetof(StressLog, facilitiesToLog); + static const size_t levelToLog = offsetof(StressLog, levelToLog); + static const size_t MaxSizePerThread = offsetof(StressLog, MaxSizePerThread); + static const size_t MaxSizeTotal = offsetof(StressLog, MaxSizeTotal); + static const size_t totalChunk = offsetof(StressLog, totalChunk); + static const size_t logs = offsetof(StressLog, logs); + static const size_t tickFrequency = offsetof(StressLog, tickFrequency); + static const size_t startTimeStamp = offsetof(StressLog, startTimeStamp); + static const size_t startTime = offsetof(StressLog, startTime); + static const size_t moduleOffset = offsetof(StressLog, moduleOffset); + static constexpr uint64_t MAX_MODULES = StressLog::MAX_MODULES; + + struct ModuleDesc + { + static constexpr size_t type_size = sizeof(StressLog::ModuleDesc); + static const size_t baseAddress = offsetof(StressLog::ModuleDesc, baseAddress); + static const size_t size = offsetof(StressLog::ModuleDesc, size); + }; +}; + /*************************************************************************************/ /* private classes */ @@ -481,10 +478,9 @@ struct StressMsg timeStamp = time; } - static const size_t maxArgCnt = 63; + static constexpr size_t maxArgCnt = 63; static const int64_t maxOffset = (int64_t)1 << (formatOffsetLowBits + formatOffsetHighBits); - static size_t maxMsgSize () - { return sizeof(StressMsg) + maxArgCnt*sizeof(void*); } + static constexpr size_t maxMsgSize = sizeof(uint64_t) * 2 + maxArgCnt * sizeof(void*); }; static_assert(sizeof(StressMsg) == sizeof(uint64_t) * 2, "StressMsg bitfields aren't aligned correctly"); @@ -550,7 +546,7 @@ struct StressLogChunk #endif //!STRESS_LOG_READONLY StressLogChunk (StressLogChunk * p = NULL, StressLogChunk * n = NULL) - :prev (p), next (n), dwSig1 (0xCFCFCFCF), dwSig2 (0xCFCFCFCF) + :prev (p), next (n), dwSig1 (ValidChunkSig), dwSig2 (ValidChunkSig) {} char * StartPtr () @@ -565,8 +561,10 @@ struct StressLogChunk BOOL IsValid () const { - return dwSig1 == 0xCFCFCFCF && dwSig2 == 0xCFCFCFCF; + return dwSig1 == ValidChunkSig && dwSig2 == ValidChunkSig; } + + static constexpr uint32_t ValidChunkSig = 0xCFCFCFCF; }; // This class implements a circular stack of variable sized elements @@ -582,9 +580,7 @@ struct StressLogChunk // readPtr / curPtr fields. thecaller is responsible for reading/writing // to the corresponding field class ThreadStressLog { -#ifdef STRESS_LOG_ANALYZER -public: -#endif + template friend struct ::cdac_offsets; ThreadStressLog* next; // we keep a linked list of these uint64_t threadId; // the id for the thread using this buffer uint8_t isDead; // Is this thread dead @@ -613,7 +609,7 @@ class ThreadStressLog { #endif //STRESS_LOG_READONLY friend class StressLog; -#if !defined(STRESS_LOG_READONLY) && !defined(STRESS_LOG_ANALYZER) +#if !defined(STRESS_LOG_READONLY) FORCEINLINE BOOL GrowChunkList () { _ASSERTE (chunkListLength >= 1); @@ -634,10 +630,10 @@ class ThreadStressLog { return TRUE; } -#endif //!STRESS_LOG_READONLY && !STRESS_LOG_ANALYZER +#endif //!STRESS_LOG_READONLY public: -#if !defined(STRESS_LOG_READONLY) && !defined(STRESS_LOG_ANALYZER) +#if !defined(STRESS_LOG_READONLY) ThreadStressLog () { chunkListHead = chunkListTail = curWriteChunk = NULL; @@ -665,9 +661,9 @@ class ThreadStressLog { chunkListLength = 1; } -#endif //!STRESS_LOG_READONLY && !STRESS_LOG_ANALYZER +#endif //!STRESS_LOG_READONLY -#if defined(MEMORY_MAPPED_STRESSLOG) && !defined(STRESS_LOG_ANALYZER) +#if defined(MEMORY_MAPPED_STRESSLOG) void* __cdecl operator new(size_t n, const std::nothrow_t&) noexcept; void __cdecl operator delete (void * chunk); #endif @@ -679,9 +675,9 @@ class ThreadStressLog { { return; } -#if !defined(STRESS_LOG_READONLY) && !defined(STRESS_LOG_ANALYZER) +#if !defined(STRESS_LOG_READONLY) _ASSERTE (chunkListLength >= 1 && chunkListLength <= StressLog::theLog.totalChunk); -#endif //!STRESS_LOG_READONLY && !STRESS_LOG_ANALYZER +#endif //!STRESS_LOG_READONLY StressLogChunk * chunk = chunkListHead; do @@ -689,9 +685,9 @@ class ThreadStressLog { StressLogChunk * tmp = chunk; chunk = chunk->next; delete tmp; -#if !defined(STRESS_LOG_READONLY) && !defined(STRESS_LOG_ANALYZER) +#if !defined(STRESS_LOG_READONLY) StressLog::ChunkDeleted (); -#endif //!STRESS_LOG_READONLY && !STRESS_LOG_ANALYZER +#endif //!STRESS_LOG_READONLY } while (chunk != chunkListHead); } @@ -712,7 +708,7 @@ class ThreadStressLog { // a previous record. Update curPtr to reflect the last safe beginning of a record, // but curPtr shouldn't wrap around, otherwise it'll break our assumptions about stress // log - curPtr = (StressMsg*)((char*)curPtr - StressMsg::maxMsgSize()); + curPtr = (StressMsg*)((char*)curPtr - StressMsg::maxMsgSize); if (curPtr < (StressMsg*)curWriteChunk->StartPtr()) { curPtr = (StressMsg *)curWriteChunk->StartPtr(); @@ -727,7 +723,7 @@ class ThreadStressLog { BOOL IsValid () const { - return chunkListHead != NULL && (!curWriteChunk || StressLog::TranslateMemoryMappedPointer(curWriteChunk)->IsValid ()); + return chunkListHead != NULL && (!curWriteChunk || curWriteChunk->IsValid ()); } #ifdef STRESS_LOG_READONLY @@ -759,6 +755,18 @@ class ThreadStressLog { #endif //STRESS_LOG_READONLY }; +template<> +struct cdac_offsets +{ + static const size_t next = offsetof(ThreadStressLog, next); + static const size_t threadId = offsetof(ThreadStressLog, threadId); + static const size_t writeHasWrapped = offsetof(ThreadStressLog, writeHasWrapped); + static const size_t curPtr = offsetof(ThreadStressLog, curPtr); + static const size_t chunkListHead = offsetof(ThreadStressLog, chunkListHead); + static const size_t chunkListTail = offsetof(ThreadStressLog, chunkListTail); + static const size_t curWriteChunk = offsetof(ThreadStressLog, curWriteChunk); +}; + #ifdef STRESS_LOG_READONLY /*********************************************************************************/ // Called when dumping the log (by StressLog::Dump()) @@ -794,7 +802,7 @@ inline StressMsg* ThreadStressLog::AdvReadPastBoundary() { } curReadChunk = curReadChunk->next; void** p = (void**)curReadChunk->StartPtr(); - while (*p == NULL && (size_t)(p-(void**)curReadChunk->StartPtr()) < (StressMsg::maxMsgSize() / sizeof(void*))) + while (*p == NULL && (size_t)(p-(void**)curReadChunk->StartPtr()) < (StressMsg::maxMsgSize / sizeof(void*))) { ++p; } @@ -839,7 +847,7 @@ inline StressMsg* ThreadStressLog::AdvanceWrite(int cArgs) { // In addition it writes NULLs b/w the startPtr and curPtr inline StressMsg* ThreadStressLog::AdvWritePastBoundary(int cArgs) { STATIC_CONTRACT_WRAPPER; -#if !defined(STRESS_LOG_READONLY) && !defined(STRESS_LOG_ANALYZER) +#if !defined(STRESS_LOG_READONLY) //zeroed out remaining buffer memset (curWriteChunk->StartPtr (), 0, (BYTE *)curPtr - (BYTE *)curWriteChunk->StartPtr ()); @@ -848,7 +856,7 @@ inline StressMsg* ThreadStressLog::AdvWritePastBoundary(int cArgs) { { GrowChunkList (); } -#endif //!STRESS_LOG_READONLY && !STRESS_LOG_ANALYZER +#endif //!STRESS_LOG_READONLY curWriteChunk = curWriteChunk->prev; #ifndef STRESS_LOG_READONLY diff --git a/src/coreclr/inc/switches.h b/src/coreclr/inc/switches.h index 4c4792c6f465..f166d93e0116 100644 --- a/src/coreclr/inc/switches.h +++ b/src/coreclr/inc/switches.h @@ -43,13 +43,15 @@ #define GC_STATS #endif -#if defined(TARGET_X86) || defined(TARGET_ARM) +#if defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_BROWSER) #define USE_LAZY_PREFERRED_RANGE 0 -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_S390X) || defined(TARGET_LOONGARCH64) || defined(TARGET_POWERPC64) || defined(TARGET_RISCV64) +#elif defined(TARGET_64BIT) + +#define FEATURE_ON_STACK_REPLACEMENT #if defined(HOST_UNIX) - // In PAL we have a smechanism that reserves memory on start up that is + // In PAL we have a mechanism that reserves memory on start up that is // close to libcoreclr and intercepts calls to VirtualAlloc to serve back // from this area. #define USE_LAZY_PREFERRED_RANGE 0 @@ -146,17 +148,22 @@ #define FEATURE_HFA #endif -// ARM requires that 64-bit primitive types are aligned at 64-bit boundaries for interlocked-like operations. -// Additionally the platform ABI requires these types and composite type containing them to be similarly -// aligned when passed as arguments. -#ifdef TARGET_ARM +// Some 32-bit platform ABIs require that 64-bit primitive types and composite types containing them are aligned at 64-bit boundaries. +#if defined(TARGET_ARM) || defined(TARGET_WASM) #define FEATURE_64BIT_ALIGNMENT #endif -// Prefer double alignment for structs and arrays with doubles. Put arrays of doubles more agressively -// into large object heap for performance because large object heap is 8 byte aligned +// Prefer double alignment for structs with doubles on the stack. #if !defined(FEATURE_64BIT_ALIGNMENT) && !defined(HOST_64BIT) #define FEATURE_DOUBLE_ALIGNMENT_HINT #endif #define FEATURE_MINIMETADATA_IN_TRIAGEDUMPS + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH +#define CHAIN_LOOKUP +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + +// If this is uncommented, leaves a file "StubLog_.log" with statistics on the behavior +// of stub-based interface dispatch. +//#define STUB_LOGGING diff --git a/src/coreclr/inc/targetosarch.h b/src/coreclr/inc/targetosarch.h index f917677837ed..9bcf40323512 100644 --- a/src/coreclr/inc/targetosarch.h +++ b/src/coreclr/inc/targetosarch.h @@ -128,6 +128,14 @@ class TargetArchitecture static const bool IsArmArch = false; static const bool IsLoongArch64 = false; static const bool IsRiscV64 = true; +#elif defined(TARGET_WASM) + static const bool IsX86 = false; + static const bool IsX64 = false; + static const bool IsArm64 = false; + static const bool IsArm32 = false; + static const bool IsArmArch = false; + static const bool IsLoongArch64 = false; + static const bool IsRiscV64 = false; #else #error Unknown architecture #endif diff --git a/src/coreclr/inc/unreachable.h b/src/coreclr/inc/unreachable.h index a770209abe86..7cb857a176ef 100644 --- a/src/coreclr/inc/unreachable.h +++ b/src/coreclr/inc/unreachable.h @@ -8,7 +8,7 @@ #ifndef __UNREACHABLE_H__ #define __UNREACHABLE_H__ -#if defined(_MSC_VER) || defined(_PREFIX_) +#if defined(_MSC_VER) #define __UNREACHABLE() __assume(0) #else #define __UNREACHABLE() __builtin_unreachable() diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h index 30a0e38aa4c0..a65140ea8033 100644 --- a/src/coreclr/inc/utilcode.h +++ b/src/coreclr/inc/utilcode.h @@ -35,7 +35,9 @@ using std::nothrow; #include "contract.h" -#include +#include +#include +#include #include #include "clrnt.h" @@ -47,12 +49,8 @@ using std::nothrow; #define CoreLibName_W W("System.Private.CoreLib") #define CoreLibName_IL_W W("System.Private.CoreLib.dll") -#define CoreLibName_NI_W W("System.Private.CoreLib.ni.dll") -#define CoreLibName_TLB_W W("System.Private.CoreLib.tlb") #define CoreLibName_A "System.Private.CoreLib" #define CoreLibName_IL_A "System.Private.CoreLib.dll" -#define CoreLibName_NI_A "System.Private.CoreLib.ni.dll" -#define CoreLibName_TLB_A "System.Private.CoreLib.tlb" #define CoreLibNameLen 22 #define CoreLibSatelliteName_A "System.Private.CoreLib.resources" #define CoreLibSatelliteNameLen 32 @@ -149,14 +147,10 @@ typedef LPSTR LPUTF8; #endif #endif -#include // for offsetof -#include - #define IS_DIGIT(ch) (((ch) >= W('0')) && ((ch) <= W('9'))) #define DIGIT_TO_INT(ch) ((ch) - W('0')) #define INT_TO_DIGIT(i) ((WCHAR)(W('0') + (i))) - // Helper will 4 byte align a value, rounding up. #define ALIGN4BYTE(val) (((val) + 3) & ~0x3) @@ -731,8 +725,6 @@ void SplitPathInterior( #include "ostype.h" -#define CLRGetTickCount64() GetTickCount64() - // // Allocate free memory within the range [pMinAddr..pMaxAddr] using // ClrVirtualQuery to find free memory and ClrVirtualAlloc to allocate it. @@ -3031,23 +3023,6 @@ class ConfigMethodSet BYTE m_inited; }; -// 38 characters + 1 null terminating. -#define GUID_STR_BUFFER_LEN (ARRAY_SIZE("{12345678-1234-1234-1234-123456789abc}")) - -//***************************************************************************** -// Convert a GUID into a pointer to a string -//***************************************************************************** -int GuidToLPSTR( - REFGUID guid, // [IN] The GUID to convert. - LPSTR szGuid, // [OUT] String into which the GUID is stored - DWORD cchGuid); // [IN] Size in chars of szGuid - -template -int GuidToLPSTR(REFGUID guid, CHAR (&s)[N]) -{ - return GuidToLPSTR(guid, s, N); -} - //***************************************************************************** // Convert a pointer to a string into a GUID. //***************************************************************************** @@ -3220,14 +3195,6 @@ inline HRESULT FakeCoCreateInstance(REFCLSID rclsid, return FakeCoCreateInstanceEx(rclsid, NULL, riid, ppv, NULL); }; -//***************************************************************************** -// Gets the directory based on the location of the module. This routine -// is called at COR setup time. Set is called during EEStartup and by the -// MetaData dispenser. -//***************************************************************************** -HRESULT GetInternalSystemDirectory(_Out_writes_to_opt_(*pdwLength,*pdwLength) LPWSTR buffer, __inout DWORD* pdwLength); -LPCWSTR GetInternalSystemDirectory(_Out_opt_ DWORD * pdwLength = NULL); - //***************************************************************************** // This function validates the given Method/Field/Standalone signature. (util.cpp) //***************************************************************************** @@ -3239,13 +3206,6 @@ HRESULT validateTokenSig( DWORD dwFlags, // [IN] Method flags. IMDInternalImport* pImport); // [IN] Internal MD Import interface ptr -//***************************************************************************** -// Determine the version number of the runtime that was used to build the -// specified image. The pMetadata pointer passed in is the pointer to the -// metadata contained in the image. -//***************************************************************************** -HRESULT GetImageRuntimeVersionString(PVOID pMetaData, LPCSTR* pString); - //***************************************************************************** // The registry keys and values that contain the information regarding // the default registered unmanaged debugger. diff --git a/src/coreclr/inc/volatile.h b/src/coreclr/inc/volatile.h index efcb25f8acd8..ce49c38ef7ac 100644 --- a/src/coreclr/inc/volatile.h +++ b/src/coreclr/inc/volatile.h @@ -68,7 +68,7 @@ #error The Volatile type is currently only defined for Visual C++ and GNU C++ #endif -#if defined(__GNUC__) && !defined(HOST_X86) && !defined(HOST_AMD64) && !defined(HOST_ARM) && !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_RISCV64) && !defined(HOST_S390X) && !defined(HOST_POWERPC64) +#if defined(__GNUC__) && !defined(HOST_X86) && !defined(HOST_AMD64) && !defined(HOST_ARM) && !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_RISCV64) && !defined(HOST_S390X) && !defined(HOST_POWERPC64) && !defined(HOST_WASM) #error The Volatile type is currently only defined for GCC when targeting x86, AMD64, ARM, ARM64, LOONGARCH64, RISCV64, PPC64LE, or S390X CPUs #endif diff --git a/src/coreclr/inc/vptr_list.h b/src/coreclr/inc/vptr_list.h index dcc1e197acd7..b5581b019478 100644 --- a/src/coreclr/inc/vptr_list.h +++ b/src/coreclr/inc/vptr_list.h @@ -9,6 +9,10 @@ VPTR_CLASS(EEJitManager) #ifdef FEATURE_READYTORUN VPTR_CLASS(ReadyToRunJitManager) #endif +#ifdef FEATURE_INTERPRETER +VPTR_CLASS(InterpreterJitManager) +VPTR_CLASS(InterpreterCodeManager) +#endif VPTR_CLASS(EECodeManager) VPTR_CLASS(RangeList) @@ -24,7 +28,6 @@ VPTR_CLASS(ReflectionModule) VPTR_CLASS(PrecodeStubManager) VPTR_CLASS(StubLinkStubManager) VPTR_CLASS(ThePreStubManager) -VPTR_CLASS(ThunkHeapStubManager) VPTR_CLASS(VirtualCallStubManager) VPTR_CLASS(VirtualCallStubManagerManager) VPTR_CLASS(JumpStubStubManager) diff --git a/src/coreclr/inc/winwrap.h b/src/coreclr/inc/winwrap.h index a222b6bba1b4..4f5569343cbf 100644 --- a/src/coreclr/inc/winwrap.h +++ b/src/coreclr/inc/winwrap.h @@ -42,11 +42,9 @@ #ifdef HOST_WINDOWS #define WszLoadLibrary LoadLibraryExWrapper #define WszCreateFile CreateFileWrapper -#define WszGetFileAttributesEx GetFileAttributesExWrapper #else // HOST_WINDOWS #define WszLoadLibrary LoadLibraryExW #define WszCreateFile CreateFileW -#define WszGetFileAttributesEx GetFileAttributesExW #endif // HOST_WINDOWS //APIS which have a buffer as an out parameter diff --git a/src/coreclr/interop/comwrappers.cpp b/src/coreclr/interop/comwrappers.cpp index 58b83368f7bf..05fb629cf228 100644 --- a/src/coreclr/interop/comwrappers.cpp +++ b/src/coreclr/interop/comwrappers.cpp @@ -11,7 +11,6 @@ #endif // _WIN32 using OBJECTHANDLE = InteropLib::OBJECTHANDLE; -using AllocScenario = InteropLibImports::AllocScenario; using TryInvokeICustomQueryInterfaceResult = InteropLibImports::TryInvokeICustomQueryInterfaceResult; namespace ABI @@ -44,133 +43,11 @@ namespace ABI // See the dispatch section building API below for an example of how indexing works. //-------------------------------------------------------------------------------- - struct ComInterfaceDispatch - { - const void* vtable; - }; - ABI_ASSERT(sizeof(ComInterfaceDispatch) == sizeof(void*)); - + using InteropLib::ABI::ComInterfaceDispatch; + using InteropLib::ABI::ComInterfaceEntry; using InteropLib::ABI::DispatchAlignmentThisPtr; using InteropLib::ABI::DispatchThisPtrMask; - ABI_ASSERT(sizeof(void*) < DispatchAlignmentThisPtr); - - const intptr_t AlignmentThisPtrMaxPadding = DispatchAlignmentThisPtr - sizeof(void*); - const size_t EntriesPerThisPtr = (DispatchAlignmentThisPtr / sizeof(void*)) - 1; - - // Check if the instance can dispatch according to the ABI. - bool IsAbleToDispatch(_In_ ComInterfaceDispatch* disp) - { - return (reinterpret_cast(disp) & DispatchThisPtrMask) != 0; - } - - // Given the number of dispatch entries, compute the needed number of 'this' pointer entries. - constexpr size_t ComputeThisPtrForDispatchSection(_In_ size_t dispatchCount) - { - return (dispatchCount / ABI::EntriesPerThisPtr) + ((dispatchCount % ABI::EntriesPerThisPtr) == 0 ? 0 : 1); - } - - // Given a pointer and a padding allowance, attempt to find an offset into - // the memory that is properly aligned for the dispatch section. - char* AlignDispatchSection(_In_ char* section, _In_ intptr_t extraPadding) - { - _ASSERTE(section != nullptr); - - // If the dispatch section is not properly aligned by default, we - // utilize the padding to make sure the dispatch section is aligned. - while ((reinterpret_cast(section) % ABI::DispatchAlignmentThisPtr) != 0) - { - // Check if there is padding to attempt an alignment. - if (extraPadding <= 0) - return nullptr; - - extraPadding -= sizeof(void*); - -#ifdef _DEBUG - // Poison unused portions of the section. - ::memset(section, 0xff, sizeof(void*)); -#endif - - section += sizeof(void*); - } - - return section; - } - - struct ComInterfaceEntry - { - GUID IID; - const void* Vtable; - }; - - struct EntrySet - { - const ComInterfaceEntry* start; - int32_t count; - }; - - // Populate the dispatch section with the entry sets - ComInterfaceDispatch* PopulateDispatchSection( - _In_ void* thisPtr, - _In_ void* dispatchSection, - _In_ size_t entrySetCount, - _In_ const EntrySet* entrySets) - { - // Define dispatch section iterator. - const void** currDisp = reinterpret_cast(dispatchSection); - - // Keep rolling count of dispatch entries. - int32_t dispCount = 0; - - // Iterate over all interface entry sets. - const EntrySet* curr = entrySets; - const EntrySet* end = entrySets + entrySetCount; - for (; curr != end; ++curr) - { - const ComInterfaceEntry* currEntry = curr->start; - int32_t entryCount = curr->count; - - // Update dispatch section with 'this' pointer and vtables. - for (int32_t i = 0; i < entryCount; ++i, ++dispCount, ++currEntry) - { - // Insert the 'this' pointer at the appropriate locations - // e.g.: - // 32-bit | 64-bit - // (0 * 4) % 16 = 0 | (0 * 8) % 64 = 0 - // (1 * 4) % 16 = 4 | (1 * 8) % 64 = 8 - // (2 * 4) % 16 = 8 | (2 * 8) % 64 = 16 - // (3 * 4) % 16 = 12 | ... - // (4 * 4) % 16 = 0 | (7 * 8) % 64 = 56 - // (5 * 4) % 16 = 4 | (8 * 8) % 64 = 0 - // - if (((dispCount * sizeof(void*)) % ABI::DispatchAlignmentThisPtr) == 0) - { - *currDisp++ = thisPtr; - ++dispCount; - } - - // Fill in the dispatch entry - *currDisp++ = currEntry->Vtable; - } - } - - return reinterpret_cast(dispatchSection); - } - - // Given the entry index, compute the dispatch index. - ComInterfaceDispatch* IndexIntoDispatchSection(_In_ int32_t i, _In_ ComInterfaceDispatch* dispatches) - { - // Convert the supplied zero based index into what it represents as a count. - const size_t count = static_cast(i) + 1; - - // Based on the supplied count, compute how many previous 'this' pointers would be - // required in the dispatch section and add that to the supplied index to get the - // index into the dispatch section. - const size_t idx = ComputeThisPtrForDispatchSection(count) + i; - - ComInterfaceDispatch* disp = dispatches + idx; - _ASSERTE(IsAbleToDispatch(disp)); - return disp; - } + using InteropLib::ABI::IndexIntoDispatchSection; // Given a dispatcher instance, return the associated ManagedObjectWrapper. ManagedObjectWrapper* ToManagedObjectWrapper(_In_ ComInterfaceDispatch* disp) @@ -242,7 +119,7 @@ HRESULT STDMETHODCALLTYPE TrackerTarget_QueryInterface( // 1. Marked to Destroy - in this case it is unsafe to touch wrapper. // 2. Object Handle target has been NULLed out by GC. if (wrapper->IsMarkedToDestroy() - || !InteropLibImports::HasValidTarget(wrapper->Target)) + || !InteropLibImports::HasValidTarget(wrapper->GetTarget())) { // It is unsafe to proceed with a QueryInterface call. The MOW has been // marked destroyed or the associated managed object has been collected. @@ -338,6 +215,11 @@ namespace static_assert(sizeof(ManagedObjectWrapper_IReferenceTrackerTargetImpl) == (7 * sizeof(void*)), "Unexpected vtable size"); } +void const* ManagedObjectWrapper::GetIReferenceTrackerTargetImpl() noexcept +{ + return &ManagedObjectWrapper_IReferenceTrackerTargetImpl; +} + namespace { // This IID represents an internal interface we define to tag any ManagedObjectWrappers we create. @@ -355,22 +237,6 @@ namespace { return (version == (void*)&ITaggedImpl_IsCurrentVersion) ? S_OK : E_FAIL; } - - // Hard-coded ManagedObjectWrapper tagged vtable. - const struct - { - decltype(&ManagedObjectWrapper_QueryInterface) QueryInterface; - decltype(&ManagedObjectWrapper_AddRef) AddRef; - decltype(&ManagedObjectWrapper_Release) Release; - decltype(&ITaggedImpl_IsCurrentVersion) IsCurrentVersion; - } ManagedObjectWrapper_TaggedImpl { - &ManagedObjectWrapper_QueryInterface, - &ManagedObjectWrapper_AddRef, - &ManagedObjectWrapper_Release, - &ITaggedImpl_IsCurrentVersion, - }; - - static_assert(sizeof(ManagedObjectWrapper_TaggedImpl) == (4 * sizeof(void*)), "Unexpected vtable size"); } void ManagedObjectWrapper::GetIUnknownImpl( @@ -387,6 +253,11 @@ void ManagedObjectWrapper::GetIUnknownImpl( *fpRelease = (void*)ManagedObjectWrapper_IUnknownImpl.Release; } +void const* ManagedObjectWrapper::GetTaggedCurrentVersionImpl() noexcept +{ + return reinterpret_cast(&ITaggedImpl_IsCurrentVersion); +} + // The logic here should match code:ClrDataAccess::DACTryGetComWrappersObjectFromCCW in daccess/request.cpp ManagedObjectWrapper* ManagedObjectWrapper::MapFromIUnknown(_In_ IUnknown* pUnk) { @@ -428,166 +299,35 @@ ManagedObjectWrapper* ManagedObjectWrapper::MapFromIUnknownWithQueryInterface(_I return ABI::ToManagedObjectWrapper(disp); } -HRESULT ManagedObjectWrapper::Create( - _In_ InteropLib::Com::CreateComInterfaceFlags flagsRaw, - _In_ OBJECTHANDLE objectHandle, - _In_ int32_t userDefinedCount, - _In_ ABI::ComInterfaceEntry* userDefined, - _Outptr_ ManagedObjectWrapper** mow) +void* ManagedObjectWrapper::AsRuntimeDefined(_In_ REFIID riid) { - _ASSERTE(objectHandle != nullptr && mow != nullptr); - - auto flags = static_cast(flagsRaw); - _ASSERTE((flags & CreateComInterfaceFlagsEx::InternalMask) == CreateComInterfaceFlagsEx::None); - - // Maximum number of runtime supplied vtables. - ABI::ComInterfaceEntry runtimeDefinedLocal[3]; - int32_t runtimeDefinedCount = 0; + // The order of interface lookup here is important. + // See ComWrappers.CreateManagedObjectWrapper() for the expected order. + int i = _userDefinedCount; - // Check if the caller will provide the IUnknown table. - if ((flags & CreateComInterfaceFlagsEx::CallerDefinedIUnknown) == CreateComInterfaceFlagsEx::None) + if ((_flags & CreateComInterfaceFlagsEx::CallerDefinedIUnknown) == CreateComInterfaceFlagsEx::None) { - ABI::ComInterfaceEntry& curr = runtimeDefinedLocal[runtimeDefinedCount++]; - curr.IID = __uuidof(IUnknown); - curr.Vtable = &ManagedObjectWrapper_IUnknownImpl; - } - - // Check if the caller wants tracker support. - if ((flags & CreateComInterfaceFlagsEx::TrackerSupport) == CreateComInterfaceFlagsEx::TrackerSupport) - { - ABI::ComInterfaceEntry& curr = runtimeDefinedLocal[runtimeDefinedCount++]; - curr.IID = IID_IReferenceTrackerTarget; - curr.Vtable = &ManagedObjectWrapper_IReferenceTrackerTargetImpl; - } - - // Always add the tagged interface. This is used to confirm at run-time with certainty - // the wrapper is created by the ComWrappers API. - { - ABI::ComInterfaceEntry& curr = runtimeDefinedLocal[runtimeDefinedCount++]; - curr.IID = IID_TaggedImpl; - curr.Vtable = &ManagedObjectWrapper_TaggedImpl; - } - - _ASSERTE(runtimeDefinedCount <= static_cast(ARRAY_SIZE(runtimeDefinedLocal))); - - // Compute size for ManagedObjectWrapper instance. - const size_t totalRuntimeDefinedSize = runtimeDefinedCount * sizeof(ABI::ComInterfaceEntry); - const size_t totalDefinedCount = static_cast(runtimeDefinedCount) + userDefinedCount; - - // Compute the total entry size of dispatch section. - const size_t totalDispatchSectionCount = ABI::ComputeThisPtrForDispatchSection(totalDefinedCount) + totalDefinedCount; - const size_t totalDispatchSectionSize = totalDispatchSectionCount * sizeof(void*); - - // Allocate memory for the ManagedObjectWrapper. - char* wrapperMem = (char*)InteropLibImports::MemAlloc(sizeof(ManagedObjectWrapper) + totalRuntimeDefinedSize + totalDispatchSectionSize + ABI::AlignmentThisPtrMaxPadding, AllocScenario::ManagedObjectWrapper); - if (wrapperMem == nullptr) - return E_OUTOFMEMORY; - - // Compute Runtime defined offset. - char* runtimeDefinedOffset = wrapperMem + sizeof(ManagedObjectWrapper); + if (riid == IID_IUnknown) + { + return ABI::IndexIntoDispatchSection(i, _dispatches); + } - // Copy in runtime supplied COM interface entries. - ABI::ComInterfaceEntry* runtimeDefined = nullptr; - if (0 < runtimeDefinedCount) - { - ::memcpy(runtimeDefinedOffset, runtimeDefinedLocal, totalRuntimeDefinedSize); - runtimeDefined = reinterpret_cast(runtimeDefinedOffset); + ++i; } - // Compute the dispatch section offset and ensure it is aligned. - char* dispatchSectionOffset = runtimeDefinedOffset + totalRuntimeDefinedSize; - dispatchSectionOffset = ABI::AlignDispatchSection(dispatchSectionOffset, ABI::AlignmentThisPtrMaxPadding); - if (dispatchSectionOffset == nullptr) - return E_UNEXPECTED; - - // Define the sets for the tables to insert - const ABI::EntrySet AllEntries[] = + if ((_flags & CreateComInterfaceFlagsEx::TrackerSupport) == CreateComInterfaceFlagsEx::TrackerSupport) { - { runtimeDefined, runtimeDefinedCount }, - { userDefined, userDefinedCount } - }; - - ABI::ComInterfaceDispatch* dispSection = ABI::PopulateDispatchSection(wrapperMem, dispatchSectionOffset, ARRAY_SIZE(AllEntries), AllEntries); - - ManagedObjectWrapper* wrapper = new (wrapperMem) ManagedObjectWrapper + if (riid == IID_IReferenceTrackerTarget) { - flags, - objectHandle, - runtimeDefinedCount, - runtimeDefined, - userDefinedCount, - userDefined, - dispSection - }; - - *mow = wrapper; - return S_OK; -} - -void ManagedObjectWrapper::Destroy(_In_ ManagedObjectWrapper* wrapper) -{ - _ASSERTE(wrapper != nullptr); - _ASSERTE(GetComCount(wrapper->_refCount) == 0); + return ABI::IndexIntoDispatchSection(i, _dispatches); + } - // Attempt to set the destroyed bit. - LONGLONG refCount; - LONGLONG prev; - do - { - prev = wrapper->_refCount; - refCount = prev | DestroySentinel; - } while (InterlockedCompareExchange64(&wrapper->_refCount, refCount, prev) != prev); - - // The destroy sentinel represents the bit that indicates the wrapper - // should be destroyed. Since the reference count field (64-bit) holds - // two counters we rely on the singular sentinel value - no other bits - // in the 64-bit counter are set. If there are outstanding bits set it - // indicates there are still outstanding references. - if (refCount == DestroySentinel) - { - // Manually trigger the destructor since placement - // new was used to allocate the object. - wrapper->~ManagedObjectWrapper(); - InteropLibImports::MemFree(wrapper, AllocScenario::ManagedObjectWrapper); + ++i; } -} - -ManagedObjectWrapper::ManagedObjectWrapper( - _In_ CreateComInterfaceFlagsEx flags, - _In_ OBJECTHANDLE objectHandle, - _In_ int32_t runtimeDefinedCount, - _In_ const ABI::ComInterfaceEntry* runtimeDefined, - _In_ int32_t userDefinedCount, - _In_ const ABI::ComInterfaceEntry* userDefined, - _In_ ABI::ComInterfaceDispatch* dispatches) - : Target{ nullptr } - , _refCount{ 1 } - , _runtimeDefinedCount{ runtimeDefinedCount } - , _userDefinedCount{ userDefinedCount } - , _runtimeDefined{ runtimeDefined } - , _userDefined{ userDefined } - , _dispatches{ dispatches } - , _flags{ flags } -{ - bool wasSet = TrySetObjectHandle(objectHandle); - _ASSERTE(wasSet); -} - -ManagedObjectWrapper::~ManagedObjectWrapper() -{ - // If the target isn't null, then release it. - if (Target != nullptr) - InteropLibImports::DeleteObjectInstanceHandle(Target); -} -void* ManagedObjectWrapper::AsRuntimeDefined(_In_ REFIID riid) -{ - for (int32_t i = 0; i < _runtimeDefinedCount; ++i) + if (riid == IID_TaggedImpl) { - if (IsEqualGUID(_runtimeDefined[i].IID, riid)) - { - return ABI::IndexIntoDispatchSection(i, _dispatches); - } + return ABI::IndexIntoDispatchSection(i, _dispatches); } return nullptr; @@ -599,7 +339,7 @@ void* ManagedObjectWrapper::AsUserDefined(_In_ REFIID riid) { if (IsEqualGUID(_userDefined[i].IID, riid)) { - return ABI::IndexIntoDispatchSection(i + _runtimeDefinedCount, _dispatches); + return ABI::IndexIntoDispatchSection(i, _dispatches); } } @@ -616,11 +356,6 @@ void* ManagedObjectWrapper::As(_In_ REFIID riid) return typeMaybe; } -bool ManagedObjectWrapper::TrySetObjectHandle(_In_ OBJECTHANDLE objectHandle, _In_ OBJECTHANDLE current) -{ - return (InterlockedCompareExchangePointer(&Target, objectHandle, current) == current); -} - bool ManagedObjectWrapper::IsSet(_In_ CreateComInterfaceFlagsEx flag) const { return (_flags & flag) != CreateComInterfaceFlagsEx::None; @@ -689,7 +424,13 @@ ULONG ManagedObjectWrapper::ReleaseFromReferenceTracker() // If we observe the destroy sentinel, then this release // must destroy the wrapper. if (refCount == DestroySentinel) - Destroy(this); + { + InteropLib::OBJECTHANDLE handle = InterlockedExchangePointer(&_target, nullptr); + if (handle != nullptr) + { + InteropLibImports::DestroyHandle(handle); + } + } return GetTrackerCount(refCount); } @@ -720,7 +461,7 @@ HRESULT ManagedObjectWrapper::QueryInterface( // Check if the managed object has implemented ICustomQueryInterface if (!IsSet(CreateComInterfaceFlagsEx::LacksICustomQueryInterface)) { - TryInvokeICustomQueryInterfaceResult result = InteropLibImports::TryInvokeICustomQueryInterface(Target, riid, ppvObject); + TryInvokeICustomQueryInterfaceResult result = InteropLibImports::TryInvokeICustomQueryInterface(GetTarget(), riid, ppvObject); switch (result) { case TryInvokeICustomQueryInterfaceResult::Handled: @@ -782,166 +523,7 @@ ULONG ManagedObjectWrapper::Release(void) return GetComCount(::InterlockedDecrement64(&_refCount)); } -namespace -{ - const size_t LiveContextSentinel = 0x0a110ced; - const size_t DeadContextSentinel = 0xdeaddead; -} - -NativeObjectWrapperContext* NativeObjectWrapperContext::MapFromRuntimeContext(_In_ void* cxtMaybe) -{ - _ASSERTE(cxtMaybe != nullptr); - - // Convert the supplied context - char* cxtRaw = reinterpret_cast(cxtMaybe); - cxtRaw -= sizeof(NativeObjectWrapperContext); - NativeObjectWrapperContext* cxt = reinterpret_cast(cxtRaw); - -#ifdef _DEBUG - _ASSERTE(cxt->_sentinel == LiveContextSentinel); -#endif - - return cxt; -} - -HRESULT NativeObjectWrapperContext::Create( - _In_ IUnknown* external, - _In_opt_ IUnknown* inner, - _In_ InteropLib::Com::CreateObjectFlags flags, - _In_ size_t runtimeContextSize, - _Outptr_ NativeObjectWrapperContext** context) +InteropLib::OBJECTHANDLE ManagedObjectWrapper::GetTarget() const { - _ASSERTE(external != nullptr && context != nullptr); - - HRESULT hr; - - ComHolder trackerObject; - if (flags & InteropLib::Com::CreateObjectFlags_TrackerObject) - { - hr = external->QueryInterface(IID_IReferenceTracker, (void**)&trackerObject); - if (SUCCEEDED(hr)) - RETURN_IF_FAILED(TrackerObjectManager::OnIReferenceTrackerFound(trackerObject)); - } - - // Allocate memory for the RCW - char* cxtMem = (char*)InteropLibImports::MemAlloc(sizeof(NativeObjectWrapperContext) + runtimeContextSize, AllocScenario::NativeObjectWrapper); - if (cxtMem == nullptr) - return E_OUTOFMEMORY; - - void* runtimeContext = cxtMem + sizeof(NativeObjectWrapperContext); - - // Contract specifically requires zeroing out runtime context. - ::memset(runtimeContext, 0, runtimeContextSize); - - NativeObjectWrapperContext* contextLocal = new (cxtMem) NativeObjectWrapperContext{ runtimeContext, trackerObject, inner }; - - if (trackerObject != nullptr) - { - // Inform the tracker object manager - _ASSERTE(flags & InteropLib::Com::CreateObjectFlags_TrackerObject); - hr = TrackerObjectManager::AfterWrapperCreated(trackerObject); - if (FAILED(hr)) - { - Destroy(contextLocal); - return hr; - } - - // Aggregation with a tracker object must be "cleaned up". - if (flags & InteropLib::Com::CreateObjectFlags_Aggregated) - { - _ASSERTE(inner != nullptr); - contextLocal->HandleReferenceTrackerAggregation(); - } - } - - *context = contextLocal; - return S_OK; -} - -void NativeObjectWrapperContext::Destroy(_In_ NativeObjectWrapperContext* wrapper) -{ - _ASSERTE(wrapper != nullptr); - - // Manually trigger the destructor since placement - // new was used to allocate the object. - wrapper->~NativeObjectWrapperContext(); - InteropLibImports::MemFree(wrapper, AllocScenario::NativeObjectWrapper); -} - -NativeObjectWrapperContext::NativeObjectWrapperContext( - _In_ void* runtimeContext, - _In_opt_ IReferenceTracker* trackerObject, - _In_opt_ IUnknown* nativeObjectAsInner) - : _trackerObject{ trackerObject } - , _runtimeContext{ runtimeContext } - , _trackerObjectDisconnected{ FALSE } - , _trackerObjectState{ (trackerObject == nullptr ? TrackerObjectState::NotSet : TrackerObjectState::SetForRelease) } - , _nativeObjectAsInner{ nativeObjectAsInner } -#ifdef _DEBUG - , _sentinel{ LiveContextSentinel } -#endif -{ - if (_trackerObjectState == TrackerObjectState::SetForRelease) - (void)_trackerObject->AddRef(); -} - -NativeObjectWrapperContext::~NativeObjectWrapperContext() -{ - DisconnectTracker(); - - // If the inner was supplied, we need to release our reference. - if (_nativeObjectAsInner != nullptr) - (void)_nativeObjectAsInner->Release(); - -#ifdef _DEBUG - _sentinel = DeadContextSentinel; -#endif -} - -void* NativeObjectWrapperContext::GetRuntimeContext() const noexcept -{ - return _runtimeContext; -} - -IReferenceTracker* NativeObjectWrapperContext::GetReferenceTracker() const noexcept -{ - return ((_trackerObjectState == TrackerObjectState::NotSet || _trackerObjectDisconnected) ? nullptr : _trackerObject); -} - -// See TrackerObjectManager::AfterWrapperCreated() for AddRefFromTrackerSource() usage. -// See NativeObjectWrapperContext::HandleReferenceTrackerAggregation() for additional -// cleanup logistics. -void NativeObjectWrapperContext::DisconnectTracker() noexcept -{ - // Return if already disconnected or the tracker isn't set. - if (FALSE != ::InterlockedCompareExchange((LONG*)&_trackerObjectDisconnected, TRUE, FALSE) - || _trackerObjectState == TrackerObjectState::NotSet) - { - return; - } - - _ASSERTE(_trackerObject != nullptr); - - // Always release the tracker source during a disconnect. - // This to account for the implied IUnknown ownership by the runtime. - (void)_trackerObject->ReleaseFromTrackerSource(); // IUnknown - - // Disconnect from the tracker. - if (_trackerObjectState == TrackerObjectState::SetForRelease) - { - (void)_trackerObject->ReleaseFromTrackerSource(); // IReferenceTracker - (void)_trackerObject->Release(); - } -} - -void NativeObjectWrapperContext::HandleReferenceTrackerAggregation() noexcept -{ - _ASSERTE(_trackerObjectState == TrackerObjectState::SetForRelease && _trackerObject != nullptr); - - // Aggregation with an IReferenceTracker instance creates an extra AddRef() - // on the outer (e.g. MOW) so we clean up that issue here. - _trackerObjectState = TrackerObjectState::SetNoRelease; - - (void)_trackerObject->ReleaseFromTrackerSource(); // IReferenceTracker - (void)_trackerObject->Release(); + return _target; } diff --git a/src/coreclr/interop/comwrappers.hpp b/src/coreclr/interop/comwrappers.hpp index 47bf008ac501..00ebfc39194b 100644 --- a/src/coreclr/interop/comwrappers.hpp +++ b/src/coreclr/interop/comwrappers.hpp @@ -9,62 +9,14 @@ #include #include "referencetrackertypes.hpp" -#ifndef DEFINE_ENUM_FLAG_OPERATORS -#define DEFINE_ENUM_FLAG_OPERATORS(ENUMTYPE) \ -extern "C++" { \ - inline ENUMTYPE operator | (ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a)|((int)b)); } \ - inline ENUMTYPE operator |= (ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) |= ((int)b)); } \ - inline ENUMTYPE operator & (ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a)&((int)b)); } \ - inline ENUMTYPE operator &= (ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) &= ((int)b)); } \ - inline ENUMTYPE operator ~ (ENUMTYPE a) { return (ENUMTYPE)(~((int)a)); } \ - inline ENUMTYPE operator ^ (ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a)^((int)b)); } \ - inline ENUMTYPE operator ^= (ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) ^= ((int)b)); } \ -} -#endif - -enum class CreateComInterfaceFlagsEx : int32_t -{ - None = InteropLib::Com::CreateComInterfaceFlags_None, - CallerDefinedIUnknown = InteropLib::Com::CreateComInterfaceFlags_CallerDefinedIUnknown, - TrackerSupport = InteropLib::Com::CreateComInterfaceFlags_TrackerSupport, - - // Highest bits are reserved for internal usage - LacksICustomQueryInterface = 1 << 29, - IsComActivated = 1 << 30, - IsPegged = 1 << 31, - - InternalMask = IsPegged | IsComActivated | LacksICustomQueryInterface, -}; - -DEFINE_ENUM_FLAG_OPERATORS(CreateComInterfaceFlagsEx); - -// Forward declarations -namespace ABI -{ - struct ComInterfaceDispatch; - struct ComInterfaceEntry; -} +using InteropLib::Com::CreateComInterfaceFlagsEx; static constexpr size_t ManagedObjectWrapperRefCountOffset(); +static constexpr size_t ManagedObjectWrapperFlagsOffset(); // Class for wrapping a managed object and projecting it in a non-managed environment -class ManagedObjectWrapper +class ManagedObjectWrapper final : public InteropLib::ABI::ManagedObjectWrapperLayout { - friend constexpr size_t ManagedObjectWrapperRefCountOffset(); -public: - Volatile Target; - -private: - LONGLONG _refCount; - - const int32_t _runtimeDefinedCount; - const int32_t _userDefinedCount; - const ABI::ComInterfaceEntry* _runtimeDefined; - const ABI::ComInterfaceEntry* _userDefined; - ABI::ComInterfaceDispatch* _dispatches; - - Volatile _flags; - public: // static // Get the implementation for IUnknown. static void GetIUnknownImpl( @@ -72,6 +24,10 @@ class ManagedObjectWrapper _Out_ void** fpAddRef, _Out_ void** fpRelease); + static void const* GetIReferenceTrackerTargetImpl() noexcept; + + static void const* GetTaggedCurrentVersionImpl() noexcept; + // Convert the IUnknown if the instance is a ManagedObjectWrapper // into a ManagedObjectWrapper, otherwise null. static ManagedObjectWrapper* MapFromIUnknown(_In_ IUnknown* pUnk); @@ -82,42 +38,17 @@ class ManagedObjectWrapper // performing a QueryInterface() which may not always be possible. // See implementation for more details. static ManagedObjectWrapper* MapFromIUnknownWithQueryInterface(_In_ IUnknown* pUnk); - - // Create a ManagedObjectWrapper instance - static HRESULT Create( - _In_ InteropLib::Com::CreateComInterfaceFlags flags, - _In_ InteropLib::OBJECTHANDLE objectHandle, - _In_ int32_t userDefinedCount, - _In_ ABI::ComInterfaceEntry* userDefined, - _Outptr_ ManagedObjectWrapper** mow); - - // Destroy the instance - static void Destroy(_In_ ManagedObjectWrapper* wrapper); - private: - ManagedObjectWrapper( - _In_ CreateComInterfaceFlagsEx flags, - _In_ InteropLib::OBJECTHANDLE objectHandle, - _In_ int32_t runtimeDefinedCount, - _In_ const ABI::ComInterfaceEntry* runtimeDefined, - _In_ int32_t userDefinedCount, - _In_ const ABI::ComInterfaceEntry* userDefined, - _In_ ABI::ComInterfaceDispatch* dispatches); - - ~ManagedObjectWrapper(); - // Query the runtime defined tables. void* AsRuntimeDefined(_In_ REFIID riid); // Query the user defined tables. void* AsUserDefined(_In_ REFIID riid); - public: // N.B. Does not impact the reference count of the object. void* As(_In_ REFIID riid); // Attempt to set the target object handle based on an assumed current value. - bool TrySetObjectHandle(_In_ InteropLib::OBJECTHANDLE objectHandle, _In_ InteropLib::OBJECTHANDLE current = nullptr); bool IsSet(_In_ CreateComInterfaceFlagsEx flag) const; void SetFlag(_In_ CreateComInterfaceFlagsEx flag); void ResetFlag(_In_ CreateComInterfaceFlagsEx flag); @@ -128,6 +59,8 @@ class ManagedObjectWrapper // Check if the wrapper has been marked to be destroyed. bool IsMarkedToDestroy() const; + InteropLib::OBJECTHANDLE GetTarget() const; + public: // IReferenceTrackerTarget ULONG AddRefFromReferenceTracker(); ULONG ReleaseFromReferenceTracker(); @@ -142,82 +75,13 @@ class ManagedObjectWrapper ULONG Release(void); }; -// ABI contract. This below offset is assumed in managed code and the DAC. -ABI_ASSERT(offsetof(ManagedObjectWrapper, Target) == 0); - -static constexpr size_t ManagedObjectWrapperRefCountOffset() -{ - // _refCount is a private field and offsetof won't let you look at private fields. - // To overcome, this function is a friend function of ManagedObjectWrapper. - return offsetof(ManagedObjectWrapper, _refCount); -} - -// ABI contract used by the DAC. -ABI_ASSERT(offsetof(ManagedObjectWrapper, Target) == offsetof(InteropLib::ABI::ManagedObjectWrapperLayout, ManagedObject)); -ABI_ASSERT(ManagedObjectWrapperRefCountOffset() == offsetof(InteropLib::ABI::ManagedObjectWrapperLayout, RefCount)); - -// State ownership mechanism. -enum class TrackerObjectState -{ - NotSet, - SetNoRelease, - SetForRelease, -}; - -// Class for connecting a native COM object to a managed object instance -class NativeObjectWrapperContext -{ - IReferenceTracker* _trackerObject; - void* _runtimeContext; - Volatile _trackerObjectDisconnected; - TrackerObjectState _trackerObjectState; - IUnknown* _nativeObjectAsInner; - -#ifdef _DEBUG - size_t _sentinel; -#endif -public: // static - // Convert a context pointer into a NativeObjectWrapperContext. - static NativeObjectWrapperContext* MapFromRuntimeContext(_In_ void* cxt); - - // Create a NativeObjectWrapperContext instance - static HRESULT Create( - _In_ IUnknown* external, - _In_opt_ IUnknown* nativeObjectAsInner, - _In_ InteropLib::Com::CreateObjectFlags flags, - _In_ size_t runtimeContextSize, - _Outptr_ NativeObjectWrapperContext** context); - - // Destroy the instance - static void Destroy(_In_ NativeObjectWrapperContext* wrapper); - -private: - NativeObjectWrapperContext(_In_ void* runtimeContext, _In_opt_ IReferenceTracker* trackerObject, _In_opt_ IUnknown* nativeObjectAsInner); - ~NativeObjectWrapperContext(); - -public: - // Get the associated runtime context for this context. - void* GetRuntimeContext() const noexcept; - - // Get the IReferenceTracker instance. - IReferenceTracker* GetReferenceTracker() const noexcept; - - // Disconnect reference tracker instance. - void DisconnectTracker() noexcept; - -private: - void HandleReferenceTrackerAggregation() noexcept; -}; - // Manage native object wrappers that support IReferenceTracker. class TrackerObjectManager { public: - // Called when an IReferenceTracker instance is found. - static HRESULT OnIReferenceTrackerFound(_In_ IReferenceTracker* obj); + static bool HasReferenceTrackerManager(); - // Called after wrapper has been created. - static HRESULT AfterWrapperCreated(_In_ IReferenceTracker* obj); + static bool TryRegisterReferenceTrackerManager(_In_ IReferenceTrackerManager* manager); // Called before wrapper is about to be finalized (the same lifetime as short weak handle). static HRESULT BeforeWrapperFinalized(_In_ IReferenceTracker* obj); @@ -228,6 +92,8 @@ class TrackerObjectManager // End the reference tracking process for external object. static HRESULT EndReferenceTracking(); + + static HRESULT DetachNonPromotedObjects(_In_ InteropLibImports::RuntimeCallContext* cxt); }; // Class used to hold COM objects (i.e. IUnknown base class) diff --git a/src/coreclr/interop/inc/interoplib.h b/src/coreclr/interop/inc/interoplib.h index 684283b7133b..e4a09fb84c64 100644 --- a/src/coreclr/interop/inc/interoplib.h +++ b/src/coreclr/interop/inc/interoplib.h @@ -17,48 +17,16 @@ namespace InteropLib { using OBJECTHANDLE = void*; - namespace Com + namespace ABI { - // See CreateComInterfaceFlags in ComWrappers.cs - enum CreateComInterfaceFlags - { - CreateComInterfaceFlags_None = 0, - CreateComInterfaceFlags_CallerDefinedIUnknown = 1, - CreateComInterfaceFlags_TrackerSupport = 2, - }; - - // Create an IUnknown instance that represents the supplied managed object instance. - HRESULT CreateWrapperForObject( - _In_ OBJECTHANDLE instance, - _In_ INT32 vtableCount, - _In_ void* vtables, - _In_ enum CreateComInterfaceFlags flags, - _Outptr_ IUnknown** wrapper) noexcept; - - // Destroy the supplied wrapper - void DestroyWrapperForObject(_In_ void* wrapper) noexcept; - - // Check if a wrapper is considered a GC root. - HRESULT IsWrapperRooted(_In_ IUnknown* wrapper) noexcept; + struct ManagedObjectWrapperLayout; + } - // Get the object for the supplied wrapper - HRESULT GetObjectForWrapper(_In_ IUnknown* wrapper, _Outptr_result_maybenull_ OBJECTHANDLE* object) noexcept; + namespace Com + { + bool IsRooted(_In_ ABI::ManagedObjectWrapperLayout* wrapper) noexcept; HRESULT MarkComActivated(_In_ IUnknown* wrapper) noexcept; - HRESULT IsComActivated(_In_ IUnknown* wrapper) noexcept; - - struct ExternalWrapperResult - { - // The returned context memory is guaranteed to be initialized to zero. - void* Context; - - // See https://learn.microsoft.com/windows/win32/api/windows.ui.xaml.hosting.referencetracker/ - // for details. - bool FromTrackerRuntime; - - // The supplied external object is wrapping a managed object. - bool ManagedObjectWrapper; - }; // See CreateObjectFlags in ComWrappers.cs enum CreateObjectFlags @@ -70,32 +38,21 @@ namespace InteropLib CreateObjectFlags_Unwrap = 8, }; - // Get the true identity and inner for the supplied IUnknown. - HRESULT DetermineIdentityAndInnerForExternal( - _In_ IUnknown* external, - _In_ enum CreateObjectFlags flags, - _Outptr_ IUnknown** identity, - _Inout_ IUnknown** innerMaybe) noexcept; - - // Allocate a wrapper context for an external object. - // The runtime supplies the external object, flags, and a memory - // request in order to bring the object into the runtime. - HRESULT CreateWrapperForExternal( - _In_ IUnknown* external, - _In_opt_ IUnknown* inner, - _In_ enum CreateObjectFlags flags, - _In_ size_t contextSize, - _Out_ ExternalWrapperResult* result) noexcept; - - // Inform the wrapper it is being collected. - void NotifyWrapperForExternalIsBeingCollected(_In_ void* context) noexcept; - - // Destroy the supplied wrapper. - // Optionally notify the wrapper of collection at the same time. - void DestroyWrapperForExternal(_In_ void* context, _In_ bool notifyIsBeingCollected = false) noexcept; - - // Separate the supplied wrapper from the tracker runtime. - void SeparateWrapperFromTrackerRuntime(_In_ void* context) noexcept; + enum class CreateComInterfaceFlagsEx : int32_t + { + // Matches the managed definition of System.Runtime.InteropServices.CreateComInterfaceFlags + None = 0, + CallerDefinedIUnknown = 1, + TrackerSupport = 2, + + // Highest bits are reserved for internal usage + LacksICustomQueryInterface = 1 << 29, + IsComActivated = 1 << 30, + IsPegged = 1 << 31, + + InternalMask = IsPegged | IsComActivated | LacksICustomQueryInterface, + }; + // Get internal interop IUnknown dispatch pointers. void GetIUnknownImpl( @@ -103,6 +60,8 @@ namespace InteropLib _Out_ void** fpAddRef, _Out_ void** fpRelease) noexcept; + void const* GetTaggedCurrentVersionImpl() noexcept; + // Begin the reference tracking process on external COM objects. // This should only be called during a runtime's GC phase. HRESULT BeginExternalObjectReferenceTracking(_In_ InteropLibImports::RuntimeCallContext* cxt) noexcept; @@ -110,8 +69,35 @@ namespace InteropLib // End the reference tracking process. // This should only be called during a runtime's GC phase. HRESULT EndExternalObjectReferenceTracking() noexcept; + + // Detach non-promoted objects from the reference tracker. + // This should only be called during a runtime's GC phase. + HRESULT DetachNonPromotedObjects(_In_ InteropLibImports::RuntimeCallContext* cxt) noexcept; + + // Get the vtable for IReferenceTrackerTarget + void const* GetIReferenceTrackerTargetVftbl() noexcept; + + // Check if a ReferenceTrackerManager has been registered. + bool HasReferenceTrackerManager() noexcept; + + // Register a ReferenceTrackerManager if one has not already been registered. + bool TryRegisterReferenceTrackerManager(void* manager) noexcept; } } -#endif // _INTEROP_INC_INTEROPLIB_H_ +#ifndef DEFINE_ENUM_FLAG_OPERATORS +#define DEFINE_ENUM_FLAG_OPERATORS(ENUMTYPE) \ +extern "C++" { \ + inline ENUMTYPE operator | (ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a)|((int)b)); } \ + inline ENUMTYPE operator |= (ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) |= ((int)b)); } \ + inline ENUMTYPE operator & (ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a)&((int)b)); } \ + inline ENUMTYPE operator &= (ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) &= ((int)b)); } \ + inline ENUMTYPE operator ~ (ENUMTYPE a) { return (ENUMTYPE)(~((int)a)); } \ + inline ENUMTYPE operator ^ (ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a)^((int)b)); } \ + inline ENUMTYPE operator ^= (ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) ^= ((int)b)); } \ +} +#endif + +DEFINE_ENUM_FLAG_OPERATORS(InteropLib::Com::CreateComInterfaceFlagsEx); +#endif // _INTEROP_INC_INTEROPLIB_H_ diff --git a/src/coreclr/interop/inc/interoplibabi.h b/src/coreclr/interop/inc/interoplibabi.h index 7789a68217b2..217ecda8b73e 100644 --- a/src/coreclr/interop/inc/interoplibabi.h +++ b/src/coreclr/interop/inc/interoplibabi.h @@ -5,27 +5,79 @@ #define _INTEROP_INC_INTEROPLIBABI_H_ #include +#include namespace InteropLib { namespace ABI { - // Updating this also requires updating ComInterfaceDispatch::GetInstance. + // The definitions in this file are constants and data structures that are shared between interoplib, + // the managed ComWrappers code, and the DAC's ComWrappers support. + // All constants, type layouts, and algorithms that calculate pointer offsets should be in this file + // and should have identical implementations with the managed ComWrappers code. + #ifdef HOST_64BIT - const size_t DispatchAlignmentThisPtr = 64; // Should be a power of 2. + constexpr size_t DispatchAlignmentThisPtr = 64; // Should be a power of 2. #else - const size_t DispatchAlignmentThisPtr = 16; // Should be a power of 2. + constexpr size_t DispatchAlignmentThisPtr = 16; // Should be a power of 2. #endif - const intptr_t DispatchThisPtrMask = ~(DispatchAlignmentThisPtr - 1); + constexpr intptr_t DispatchThisPtrMask = ~(DispatchAlignmentThisPtr - 1); + + static_assert(sizeof(void*) < DispatchAlignmentThisPtr, "DispatchAlignmentThisPtr must be larger than sizeof(void*)."); + + constexpr size_t EntriesPerThisPtr = (DispatchAlignmentThisPtr / sizeof(void*)) - 1; + + struct ComInterfaceDispatch + { + const void* vtable; + }; + + static_assert(sizeof(ComInterfaceDispatch) == sizeof(void*), "ComInterfaceDispatch must be pointer-sized."); + + struct ManagedObjectWrapperLayout; + + struct InternalComInterfaceDispatch + { + private: + ManagedObjectWrapperLayout* _thisPtr; + public: + ComInterfaceDispatch _entries[EntriesPerThisPtr]; + }; + + struct ComInterfaceEntry + { + GUID IID; + const void* Vtable; + }; // Managed object wrapper layout. // This is designed to codify the binary layout. struct ManagedObjectWrapperLayout { - PTR_VOID ManagedObject; - long long RefCount; + public: + LONGLONG GetRawRefCount() const + { + return _refCount; + } + + protected: + Volatile _target; + int64_t _refCount; + + Volatile _flags; + int32_t _userDefinedCount; + ComInterfaceEntry* _userDefined; + InternalComInterfaceDispatch* _dispatches; }; + + // Given the entry index, compute the dispatch index. + inline ComInterfaceDispatch* IndexIntoDispatchSection(int32_t i, InternalComInterfaceDispatch* dispatches) + { + InternalComInterfaceDispatch* dispatch = dispatches + i / EntriesPerThisPtr; + ComInterfaceDispatch* entries = dispatch->_entries; + return entries + (i % EntriesPerThisPtr); + } } } diff --git a/src/coreclr/interop/inc/interoplibimports.h b/src/coreclr/interop/inc/interoplibimports.h index 57824c36d78c..a75252bf3019 100644 --- a/src/coreclr/interop/inc/interoplibimports.h +++ b/src/coreclr/interop/inc/interoplibimports.h @@ -8,45 +8,13 @@ namespace InteropLibImports { - enum class AllocScenario - { - ManagedObjectWrapper, - NativeObjectWrapper, - }; - - // Allocate the given amount of memory. - void* MemAlloc(_In_ size_t sizeInBytes, _In_ AllocScenario scenario) noexcept; - - // Free the previously allocated memory. - void MemFree(_In_ void* mem, _In_ AllocScenario scenario) noexcept; - - // Add memory pressure to the runtime's GC calculations. - HRESULT AddMemoryPressureForExternal(_In_ UINT64 memoryInBytes) noexcept; - - // Remove memory pressure from the runtime's GC calculations. - HRESULT RemoveMemoryPressureForExternal(_In_ UINT64 memoryInBytes) noexcept; - - enum class GcRequest - { - Default, - FullBlocking // This is an expensive GC request, akin to a Gen2/"stop the world" GC. - }; - - // Request a GC from the runtime. - HRESULT RequestGarbageCollectionForExternal(_In_ GcRequest req) noexcept; - - // Wait for the runtime's finalizer to clean up objects. - HRESULT WaitForRuntimeFinalizerForExternal() noexcept; - - // Release objects associated with the current thread. - HRESULT ReleaseExternalObjectsFromCurrentThread() noexcept; - - // Delete Object instance handle. - void DeleteObjectInstanceHandle(_In_ InteropLib::OBJECTHANDLE handle) noexcept; - // Check if Object instance handle still points at an Object. bool HasValidTarget(_In_ InteropLib::OBJECTHANDLE handle) noexcept; + void DestroyHandle(_In_ InteropLib::OBJECTHANDLE handle) noexcept; + + bool IsObjectPromoted(_In_ InteropLib::OBJECTHANDLE handle) noexcept; + // Get the current global pegging state. bool GetGlobalPeggingState() noexcept; @@ -54,26 +22,17 @@ namespace InteropLibImports void SetGlobalPeggingState(_In_ bool state) noexcept; // Get next External Object Context from the Runtime calling context. - // S_OK - Context is valid. - // S_FALSE - Iterator has reached end and context out parameter is set to NULL. - HRESULT IteratorNext( + bool IteratorNext( _In_ RuntimeCallContext* runtimeContext, - _Outptr_result_maybenull_ void** extObjContext) noexcept; + _Outptr_result_maybenull_ void** trackerTarget, + _Outptr_result_maybenull_ InteropLib::OBJECTHANDLE* proxyObject) noexcept; // Tell the runtime a reference path between the External Object Context and // OBJECTHANDLE was found. HRESULT FoundReferencePath( _In_ RuntimeCallContext* runtimeContext, - _In_ void* extObjContext, - _In_ InteropLib::OBJECTHANDLE handle) noexcept; - - // Get or create an IReferenceTrackerTarget instance for the supplied - // external object. - HRESULT GetOrCreateTrackerTargetForExternal( - _In_ IUnknown* externalComObject, - _In_ InteropLib::Com::CreateObjectFlags externalObjectFlags, - _In_ InteropLib::Com::CreateComInterfaceFlags trackerTargetFlags, - _Outptr_ void** trackerTarget) noexcept; + _In_ InteropLib::OBJECTHANDLE sourceHandle, + _In_ InteropLib::OBJECTHANDLE targetHandle) noexcept; // The enum describes the value of System.Runtime.InteropServices.CustomQueryInterfaceResult // and the case where the object doesn't support ICustomQueryInterface. diff --git a/src/coreclr/interop/interoplib.cpp b/src/coreclr/interop/interoplib.cpp index 452df3cce39b..b86842bcc3f0 100644 --- a/src/coreclr/interop/interoplib.cpp +++ b/src/coreclr/interop/interoplib.cpp @@ -3,6 +3,7 @@ #include "platform.h" #include +#include #include #ifdef FEATURE_COMWRAPPERS @@ -18,65 +19,6 @@ namespace InteropLib // Exposed COM related API namespace Com { - HRESULT CreateWrapperForObject( - _In_ OBJECTHANDLE instance, - _In_ INT32 vtableCount, - _In_ void* vtablesRaw, - _In_ enum CreateComInterfaceFlags flags, - _Outptr_ IUnknown** wrapper) noexcept - { - _ASSERTE(instance != nullptr && wrapper != nullptr); - - // Validate the supplied vtable data is valid with a - // reasonable count. - if ((vtablesRaw == nullptr && vtableCount != 0) || vtableCount < 0) - return E_INVALIDARG; - - HRESULT hr; - - // Convert input to appropriate types. - auto vtables = static_cast<::ABI::ComInterfaceEntry*>(vtablesRaw); - - ManagedObjectWrapper* mow; - RETURN_IF_FAILED(ManagedObjectWrapper::Create(flags, instance, vtableCount, vtables, &mow)); - - *wrapper = static_cast(mow->As(IID_IUnknown)); - return S_OK; - } - - void DestroyWrapperForObject(_In_ void* wrapperMaybe) noexcept - { - ManagedObjectWrapper* wrapper = ManagedObjectWrapper::MapFromIUnknownWithQueryInterface(static_cast(wrapperMaybe)); - - // A caller should not be destroying a wrapper without knowing if the wrapper is valid. - _ASSERTE(wrapper != nullptr); - - ManagedObjectWrapper::Destroy(wrapper); - } - - HRESULT IsWrapperRooted(_In_ IUnknown* wrapperMaybe) noexcept - { - ManagedObjectWrapper* wrapper = ManagedObjectWrapper::MapFromIUnknown(wrapperMaybe); - if (wrapper == nullptr) - return E_INVALIDARG; - - return wrapper->IsRooted() ? S_OK : S_FALSE; - } - - HRESULT GetObjectForWrapper(_In_ IUnknown* wrapper, _Outptr_result_maybenull_ OBJECTHANDLE* object) noexcept - { - _ASSERTE(wrapper != nullptr && object != nullptr); - *object = nullptr; - - // Attempt to get the managed object wrapper. - ManagedObjectWrapper *mow = ManagedObjectWrapper::MapFromIUnknownWithQueryInterface(wrapper); - if (mow == nullptr) - return E_INVALIDARG; - - *object = mow->Target; - return S_OK; - } - HRESULT MarkComActivated(_In_ IUnknown* wrapperMaybe) noexcept { ManagedObjectWrapper* wrapper = ManagedObjectWrapper::MapFromIUnknownWithQueryInterface(wrapperMaybe); @@ -87,143 +29,52 @@ namespace InteropLib return S_OK; } - HRESULT IsComActivated(_In_ IUnknown* wrapperMaybe) noexcept + void GetIUnknownImpl( + _Out_ void** fpQueryInterface, + _Out_ void** fpAddRef, + _Out_ void** fpRelease) noexcept { - ManagedObjectWrapper* wrapper = ManagedObjectWrapper::MapFromIUnknownWithQueryInterface(wrapperMaybe); - if (wrapper == nullptr) - return E_INVALIDARG; - - return wrapper->IsSet(CreateComInterfaceFlagsEx::IsComActivated) ? S_OK : S_FALSE; + ManagedObjectWrapper::GetIUnknownImpl(fpQueryInterface, fpAddRef, fpRelease); } - HRESULT DetermineIdentityAndInnerForExternal( - _In_ IUnknown* external, - _In_ enum CreateObjectFlags flags, - _Outptr_ IUnknown** identity, - _Inout_ IUnknown** innerMaybe) noexcept + void const* GetTaggedCurrentVersionImpl() noexcept { - _ASSERTE(external != nullptr && identity != nullptr && innerMaybe != nullptr); - - IUnknown* checkForIdentity = external; - - // Check if the flags indicate we are creating - // an object for an external IReferenceTracker instance - // that we are aggregating with. - bool refTrackerInnerScenario = (flags & CreateObjectFlags_TrackerObject) - && (flags & CreateObjectFlags_Aggregated); - - ComHolder trackerObject; - if (refTrackerInnerScenario) - { - // We are checking the supplied external value - // for IReferenceTracker since in .NET 5 this could - // actually be the inner and we want the true identity - // not the inner . This is a trick since the only way - // to get identity from an inner is through a non-IUnknown - // interface QI. Once we have the IReferenceTracker - // instance we can be sure the QI for IUnknown will really - // be the true identity. - HRESULT hr = external->QueryInterface(IID_IReferenceTracker, (void**)&trackerObject); - if (SUCCEEDED(hr)) - checkForIdentity = trackerObject.p; - } - - HRESULT hr; - - IUnknown* identityLocal; - RETURN_IF_FAILED(checkForIdentity->QueryInterface(IID_IUnknown, (void **)&identityLocal)); - - // Set the inner if scenario dictates an update. - if (*innerMaybe == nullptr // User didn't supply inner - .NET 5 API scenario sanity check. - && checkForIdentity != external // Target of check was changed - .NET 5 API scenario sanity check. - && external != identityLocal // The supplied object doesn't match the computed identity. - && refTrackerInnerScenario) // The appropriate flags were set. - { - *innerMaybe = external; - } - - *identity = identityLocal; - return S_OK; + return ManagedObjectWrapper::GetTaggedCurrentVersionImpl(); } - HRESULT CreateWrapperForExternal( - _In_ IUnknown* external, - _In_opt_ IUnknown* inner, - _In_ enum CreateObjectFlags flags, - _In_ size_t contextSize, - _Out_ ExternalWrapperResult* result) noexcept + HRESULT BeginExternalObjectReferenceTracking(_In_ RuntimeCallContext* cxt) noexcept { - _ASSERTE(external != nullptr && result != nullptr); - - HRESULT hr; - - NativeObjectWrapperContext* wrapperContext; - RETURN_IF_FAILED(NativeObjectWrapperContext::Create(external, inner, flags, contextSize, &wrapperContext)); - - result->Context = wrapperContext->GetRuntimeContext(); - result->FromTrackerRuntime = (wrapperContext->GetReferenceTracker() != nullptr); - result->ManagedObjectWrapper = (ManagedObjectWrapper::MapFromIUnknownWithQueryInterface(external) != nullptr); - return S_OK; + return TrackerObjectManager::BeginReferenceTracking(cxt); } - void NotifyWrapperForExternalIsBeingCollected(_In_ void* contextMaybe) noexcept - { - NativeObjectWrapperContext* context = NativeObjectWrapperContext::MapFromRuntimeContext(contextMaybe); - - // A caller should not be destroying a context without knowing if the context is valid. - _ASSERTE(context != nullptr); - - // Check if the tracker object manager should be informed of collection. - IReferenceTracker* trackerMaybe = context->GetReferenceTracker(); - if (trackerMaybe != nullptr) - { - // We only call this during a GC so ignore the failure as - // there is no way we can handle it at this point. - HRESULT hr = TrackerObjectManager::BeforeWrapperFinalized(trackerMaybe); - _ASSERTE(SUCCEEDED(hr)); - (void)hr; - } + HRESULT EndExternalObjectReferenceTracking() noexcept + { + return TrackerObjectManager::EndReferenceTracking(); } - void DestroyWrapperForExternal(_In_ void* contextMaybe, _In_ bool notifyIsBeingCollected) noexcept + HRESULT DetachNonPromotedObjects(_In_ RuntimeCallContext* cxt) noexcept { - NativeObjectWrapperContext* context = NativeObjectWrapperContext::MapFromRuntimeContext(contextMaybe); - - // A caller should not be destroying a context without knowing if the context is valid. - _ASSERTE(context != nullptr); - - if (notifyIsBeingCollected) - NotifyWrapperForExternalIsBeingCollected(contextMaybe); - - NativeObjectWrapperContext::Destroy(context); - } + return TrackerObjectManager::DetachNonPromotedObjects(cxt); + } - void SeparateWrapperFromTrackerRuntime(_In_ void* contextMaybe) noexcept + void const* GetIReferenceTrackerTargetVftbl() noexcept { - NativeObjectWrapperContext* context = NativeObjectWrapperContext::MapFromRuntimeContext(contextMaybe); - - // A caller should not be separating a context without knowing if the context is valid. - _ASSERTE(context != nullptr); - - context->DisconnectTracker(); + return ManagedObjectWrapper::GetIReferenceTrackerTargetImpl(); } - void GetIUnknownImpl( - _Out_ void** fpQueryInterface, - _Out_ void** fpAddRef, - _Out_ void** fpRelease) noexcept + bool HasReferenceTrackerManager() noexcept { - ManagedObjectWrapper::GetIUnknownImpl(fpQueryInterface, fpAddRef, fpRelease); + return TrackerObjectManager::HasReferenceTrackerManager(); } - HRESULT BeginExternalObjectReferenceTracking(_In_ RuntimeCallContext* cxt) noexcept + bool TryRegisterReferenceTrackerManager(_In_ void* manager) noexcept { - return TrackerObjectManager::BeginReferenceTracking(cxt); + return TrackerObjectManager::TryRegisterReferenceTrackerManager((IReferenceTrackerManager*)manager); } - HRESULT EndExternalObjectReferenceTracking() noexcept + bool IsRooted(InteropLib::ABI::ManagedObjectWrapperLayout* mow) noexcept { - return TrackerObjectManager::EndReferenceTracking(); + return reinterpret_cast(mow)->IsRooted(); } } diff --git a/src/coreclr/interop/trackerobjectmanager.cpp b/src/coreclr/interop/trackerobjectmanager.cpp index 0df78164906d..cc178d418717 100644 --- a/src/coreclr/interop/trackerobjectmanager.cpp +++ b/src/coreclr/interop/trackerobjectmanager.cpp @@ -9,148 +9,11 @@ using RuntimeCallContext = InteropLibImports::RuntimeCallContext; namespace { - // 29a71c6a-3c42-4416-a39d-e2825a07a773 - const GUID IID_IReferenceTrackerHost = { 0x29a71c6a, 0x3c42, 0x4416, { 0xa3, 0x9d, 0xe2, 0x82, 0x5a, 0x7, 0xa7, 0x73} }; - - // 3cf184b4-7ccb-4dda-8455-7e6ce99a3298 - const GUID IID_IReferenceTrackerManager = { 0x3cf184b4, 0x7ccb, 0x4dda, { 0x84, 0x55, 0x7e, 0x6c, 0xe9, 0x9a, 0x32, 0x98} }; - // 04b3486c-4687-4229-8d14-505ab584dd88 const GUID IID_IFindReferenceTargetsCallback = { 0x04b3486c, 0x4687, 0x4229, { 0x8d, 0x14, 0x50, 0x5a, 0xb5, 0x84, 0xdd, 0x88} }; - // In order to minimize the impact of a constructor running on module load, - // the HostServices class should have no instance fields. - class HostServices : public IReferenceTrackerHost - { - public: // IReferenceTrackerHost - STDMETHOD(DisconnectUnusedReferenceSources)(_In_ DWORD dwFlags); - STDMETHOD(ReleaseDisconnectedReferenceSources)(); - STDMETHOD(NotifyEndOfReferenceTrackingOnThread)(); - STDMETHOD(GetTrackerTarget)(_In_ IUnknown* obj, _Outptr_ IReferenceTrackerTarget** ppNewReference); - STDMETHOD(AddMemoryPressure)(_In_ UINT64 bytesAllocated); - STDMETHOD(RemoveMemoryPressure)(_In_ UINT64 bytesAllocated); - - public: // IUnknown - // Lifetime maintained by stack - we don't care about ref counts - STDMETHOD_(ULONG, AddRef)() { return 1; } - STDMETHOD_(ULONG, Release)() { return 1; } - - STDMETHOD(QueryInterface)( - /* [in] */ REFIID riid, - /* [iid_is][out] */ _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject) - { - if (ppvObject == nullptr) - return E_POINTER; - - if (IsEqualIID(riid, IID_IReferenceTrackerHost)) - { - *ppvObject = static_cast(this); - } - else if (IsEqualIID(riid, IID_IUnknown)) - { - *ppvObject = static_cast(this); - } - else - { - *ppvObject = nullptr; - return E_NOINTERFACE; - } - - (void)AddRef(); - return S_OK; - } - }; - - // Global instance of host services. - HostServices g_HostServicesInstance; - - // Defined in windows.ui.xaml.hosting.referencetracker.h. - enum XAML_REFERENCETRACKER_DISCONNECT - { - // Indicates the disconnect is during a suspend and a GC can be trigger. - XAML_REFERENCETRACKER_DISCONNECT_SUSPEND = 0x00000001 - }; - - STDMETHODIMP HostServices::DisconnectUnusedReferenceSources(_In_ DWORD flags) - { - InteropLibImports::GcRequest type = InteropLibImports::GcRequest::Default; - - // Request a "stop the world" GC when a suspend is occurring. - if (flags & XAML_REFERENCETRACKER_DISCONNECT_SUSPEND) - type = InteropLibImports::GcRequest::FullBlocking; - - return InteropLibImports::RequestGarbageCollectionForExternal(type); - } - - STDMETHODIMP HostServices::ReleaseDisconnectedReferenceSources() - { - // We'd like to call InteropLibImports::WaitForRuntimeFinalizerForExternal() here, but this could - // lead to deadlock if the finalizer thread is trying to get back to this thread, because we are - // not pumping anymore. Disable this for now. See: https://github.com/dotnet/runtime/issues/109538. - return S_OK; - } - - STDMETHODIMP HostServices::NotifyEndOfReferenceTrackingOnThread() - { - return InteropLibImports::ReleaseExternalObjectsFromCurrentThread(); - } - - // Creates a proxy object (managed object wrapper) that points to the given IUnknown. - // The proxy represents the following: - // 1. Has a managed reference pointing to the external object - // and therefore forms a cycle that can be resolved by GC. - // 2. Forwards data binding requests. - // - // For example: - // - // Grid <---- NoCW Grid <-------- NoCW - // | ^ | ^ - // | | Becomes | | - // v | v | - // Rectangle Rectangle ----->Proxy - // - // Arguments - // obj - An IUnknown* where a NoCW points to (Grid, in this case) - // Notes: - // 1. We can either create a new NoCW or get back an old one from the cache. - // 2. This obj could be a regular tracker runtime object for data binding. - // ppNewReference - The IReferenceTrackerTarget* for the proxy created - // The tracker runtime will call IReferenceTrackerTarget to establish a reference. - // - STDMETHODIMP HostServices::GetTrackerTarget(_In_ IUnknown* obj, _Outptr_ IReferenceTrackerTarget** ppNewReference) - { - if (obj == nullptr || ppNewReference == nullptr) - return E_INVALIDARG; - - HRESULT hr; - - // QI for IUnknown to get the identity unknown - ComHolder identity; - RETURN_IF_FAILED(obj->QueryInterface(IID_IUnknown, (void**)&identity)); - - // Get or create an existing implementation for this external. - ComHolder target; - RETURN_IF_FAILED(InteropLibImports::GetOrCreateTrackerTargetForExternal( - identity, - InteropLib::Com::CreateObjectFlags_TrackerObject, - InteropLib::Com::CreateComInterfaceFlags_TrackerSupport, - (void**)&target)); - - return target->QueryInterface(IID_IReferenceTrackerTarget, (void**)ppNewReference); - } - - STDMETHODIMP HostServices::AddMemoryPressure(_In_ UINT64 bytesAllocated) - { - return InteropLibImports::AddMemoryPressureForExternal(bytesAllocated); - } - - STDMETHODIMP HostServices::RemoveMemoryPressure(_In_ UINT64 bytesAllocated) - { - return InteropLibImports::RemoveMemoryPressureForExternal(bytesAllocated); - } - VolatilePtr s_TrackerManager; // The one and only Tracker Manager instance - Volatile s_HasTrackingStarted = FALSE; + Volatile s_HasTrackingStarted = false; // Indicates if walking the external objects is needed. // (i.e. Have any IReferenceTracker instances been found?) @@ -160,17 +23,17 @@ namespace } // Callback implementation of IFindReferenceTargetsCallback - class FindDependentWrappersCallback : public IFindReferenceTargetsCallback + class FindDependentWrappersCallback final : public IFindReferenceTargetsCallback { - NativeObjectWrapperContext* _nowCxt; + OBJECTHANDLE _sourceHandle; RuntimeCallContext* _runtimeCallCxt; public: - FindDependentWrappersCallback(_In_ NativeObjectWrapperContext* nowCxt, _In_ RuntimeCallContext* runtimeCallCxt) - : _nowCxt{ nowCxt } + FindDependentWrappersCallback(_In_ OBJECTHANDLE sourceHandle, _In_ RuntimeCallContext* runtimeCallCxt) + : _sourceHandle{ sourceHandle } , _runtimeCallCxt{ runtimeCallCxt } { - _ASSERTE(_nowCxt != nullptr && runtimeCallCxt != nullptr); + _ASSERTE(_sourceHandle != nullptr && runtimeCallCxt != nullptr); } STDMETHOD(FoundTrackerTarget)(_In_ IReferenceTrackerTarget* target) @@ -189,8 +52,8 @@ namespace // Notify the runtime a reference path was found. RETURN_IF_FAILED(InteropLibImports::FoundReferencePath( _runtimeCallCxt, - _nowCxt->GetRuntimeContext(), - mow->Target)); + _sourceHandle, + mow->GetTarget())); return S_OK; } @@ -229,24 +92,19 @@ namespace { _ASSERTE(cxt != nullptr); - BOOL walkFailed = FALSE; - HRESULT hr; + bool walkFailed = false; + HRESULT hr = S_OK; - void* extObjContext = nullptr; - while (S_OK == (hr = InteropLibImports::IteratorNext(cxt, &extObjContext))) + IReferenceTracker* trackerTarget = nullptr; + OBJECTHANDLE proxyObject = nullptr; + while (InteropLibImports::IteratorNext(cxt, (void**)&trackerTarget, &proxyObject)) { - _ASSERTE(extObjContext != nullptr); - - NativeObjectWrapperContext* nowc = NativeObjectWrapperContext::MapFromRuntimeContext(extObjContext); - - // Check if the object is a tracker object. - IReferenceTracker* trackerMaybe = nowc->GetReferenceTracker(); - if (trackerMaybe == nullptr) + if (trackerTarget == nullptr) continue; // Ask the tracker instance to find all reference targets. - FindDependentWrappersCallback cb{ nowc, cxt }; - hr = trackerMaybe->FindTrackerTargets(&cb); + FindDependentWrappersCallback cb{ proxyObject, cxt }; + hr = trackerTarget->FindTrackerTargets(&cb); if (FAILED(hr)) break; } @@ -254,58 +112,26 @@ namespace if (FAILED(hr)) { // Remember the fact that we've failed and stop walking - walkFailed = TRUE; + walkFailed = true; InteropLibImports::SetGlobalPeggingState(true); } _ASSERTE(s_TrackerManager != nullptr); - (void)s_TrackerManager->FindTrackerTargetsCompleted(walkFailed); + (void)s_TrackerManager->FindTrackerTargetsCompleted(walkFailed ? TRUE : FALSE); return hr; } } -HRESULT TrackerObjectManager::OnIReferenceTrackerFound(_In_ IReferenceTracker* obj) +bool TrackerObjectManager::HasReferenceTrackerManager() { - _ASSERTE(obj != nullptr); - if (s_TrackerManager != nullptr) - return S_OK; - - // Retrieve IReferenceTrackerManager - HRESULT hr; - ComHolder trackerManager; - RETURN_IF_FAILED(obj->GetReferenceTrackerManager(&trackerManager)); - - ComHolder hostServices; - RETURN_IF_FAILED(g_HostServicesInstance.QueryInterface(IID_IReferenceTrackerHost, (void**)&hostServices)); - - // Attempt to set the tracker instance. - if (InterlockedCompareExchangePointer((void**)&s_TrackerManager, trackerManager.p, nullptr) == nullptr) - { - (void)trackerManager.Detach(); // Ownership has been transferred - RETURN_IF_FAILED(s_TrackerManager->SetReferenceTrackerHost(hostServices)); - } - - return S_OK; + return s_TrackerManager != nullptr; } -HRESULT TrackerObjectManager::AfterWrapperCreated(_In_ IReferenceTracker* obj) +bool TrackerObjectManager::TryRegisterReferenceTrackerManager(_In_ IReferenceTrackerManager* manager) { - _ASSERTE(obj != nullptr); - - HRESULT hr; - - // Notify tracker runtime that we've created a new wrapper for this object. - // To avoid surprises, we should notify them before we fire the first AddRefFromTrackerSource. - RETURN_IF_FAILED(obj->ConnectFromTrackerSource()); - - // Send out AddRefFromTrackerSource callbacks to notify tracker runtime we've done AddRef() - // for certain interfaces. We should do this *after* we made a AddRef() because we should never - // be in a state where report refs > actual refs - RETURN_IF_FAILED(obj->AddRefFromTrackerSource()); // IUnknown - RETURN_IF_FAILED(obj->AddRefFromTrackerSource()); // IReferenceTracker - - return S_OK; + _ASSERTE(manager != nullptr); + return InterlockedCompareExchangePointer((void**)&s_TrackerManager, manager, nullptr) == nullptr; } HRESULT TrackerObjectManager::BeforeWrapperFinalized(_In_ IReferenceTracker* obj) @@ -332,10 +158,10 @@ HRESULT TrackerObjectManager::BeginReferenceTracking(_In_ RuntimeCallContext* cx HRESULT hr; - _ASSERTE(s_HasTrackingStarted == FALSE); + _ASSERTE(s_HasTrackingStarted == false); _ASSERTE(InteropLibImports::GetGlobalPeggingState()); - s_HasTrackingStarted = TRUE; + s_HasTrackingStarted = true; // Let the tracker runtime know we are about to walk external objects so that // they can lock their reference cache. Note that the tracker runtime doesn't need to @@ -356,7 +182,7 @@ HRESULT TrackerObjectManager::BeginReferenceTracking(_In_ RuntimeCallContext* cx HRESULT TrackerObjectManager::EndReferenceTracking() { - if (s_HasTrackingStarted != TRUE + if (s_HasTrackingStarted != true || !ShouldWalkExternalObjects()) return S_FALSE; @@ -372,7 +198,31 @@ HRESULT TrackerObjectManager::EndReferenceTracking() _ASSERTE(SUCCEEDED(hr)); InteropLibImports::SetGlobalPeggingState(true); - s_HasTrackingStarted = FALSE; + s_HasTrackingStarted = false; return hr; } + +HRESULT TrackerObjectManager::DetachNonPromotedObjects(_In_ RuntimeCallContext* cxt) +{ + _ASSERTE(cxt != nullptr); + + HRESULT hr; + IReferenceTracker* trackerTarget = nullptr; + OBJECTHANDLE proxyObject = NULL; + while (InteropLibImports::IteratorNext(cxt, (void**)&trackerTarget, &proxyObject)) + { + if (trackerTarget == nullptr) + continue; + + if (proxyObject == nullptr) + continue; + + if (!InteropLibImports::IsObjectPromoted(proxyObject)) + { + RETURN_IF_FAILED(BeforeWrapperFinalized(trackerTarget)); + } + } + + return S_OK; +} diff --git a/src/coreclr/interpreter/CMakeLists.txt b/src/coreclr/interpreter/CMakeLists.txt new file mode 100644 index 000000000000..dcde0cf2d43e --- /dev/null +++ b/src/coreclr/interpreter/CMakeLists.txt @@ -0,0 +1,64 @@ +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +# So simdhash will build correctly without a dn-config.h +add_compile_definitions(NO_CONFIG_H) + +set(INTERPRETER_SOURCES + compiler.cpp + compileropt.cpp + intops.cpp + interpconfig.cpp + eeinterp.cpp + stackmap.cpp + naming.cpp + methodset.cpp + ../../native/containers/dn-simdhash.c + ../../native/containers/dn-simdhash-ptr-ptr.c) + +set(INTERPRETER_LINK_LIBRARIES + gcinfo +) + +if(CLR_CMAKE_HOST_WIN32) + list(APPEND INTERPRETER_LINK_LIBRARIES + ${STATIC_MT_CRT_LIB} + ${STATIC_MT_VCRT_LIB} + ) +endif(CLR_CMAKE_HOST_WIN32) + +if(CLR_CMAKE_HOST_WIN32) + set(CLRINTERPRETER_EXPORTS ${CMAKE_CURRENT_LIST_DIR}/clrinterpreter.exports) + set(EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/clrinterpreter.def) + preprocess_file(${CLRINTERPRETER_EXPORTS} ${EXPORTS_FILE}) + list(APPEND INTERPRETER_SOURCES ${EXPORTS_FILE}) + add_custom_target(interpreter_exports DEPENDS ${EXPORTS_FILE}) +else() + set(CLRINTERPRETER_EXPORTS ${CMAKE_CURRENT_LIST_DIR}/clrinterpreter_unixexports.src) + set(EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/clrinterpreter.exports) + generate_exports_file(${CLRINTERPRETER_EXPORTS} ${EXPORTS_FILE}) + add_custom_target(interpreter_exports DEPENDS ${EXPORTS_FILE}) +endif() + +if(CLR_CMAKE_TARGET_BROWSER) + set(LIBRARY_TYPE STATIC) +else() + set(LIBRARY_TYPE SHARED) +endif() + +add_library_clr(clrinterpreter ${LIBRARY_TYPE} ${INTERPRETER_SOURCES}) + +add_dependencies(clrinterpreter interpreter_exports) + +if(NOT CLR_CMAKE_HOST_WIN32) + set_exports_linker_option(${EXPORTS_FILE}) + set_property(TARGET clrinterpreter APPEND_STRING PROPERTY LINK_FLAGS ${EXPORTS_LINKER_OPTION}) +endif() + +target_link_libraries(clrinterpreter + PRIVATE + ${INTERPRETER_LINK_LIBRARIES} + ) + +set_property(TARGET clrinterpreter APPEND_STRING PROPERTY LINK_DEPENDS ${EXPORTS_FILE}) + +install_clr(TARGETS clrinterpreter DESTINATIONS . COMPONENT runtime) diff --git a/src/coreclr/interpreter/clrinterpreter.exports b/src/coreclr/interpreter/clrinterpreter.exports new file mode 100644 index 000000000000..0afb54dca77d --- /dev/null +++ b/src/coreclr/interpreter/clrinterpreter.exports @@ -0,0 +1,5 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. +EXPORTS + getJit + jitStartup diff --git a/src/coreclr/interpreter/clrinterpreter_unixexports.src b/src/coreclr/interpreter/clrinterpreter_unixexports.src new file mode 100644 index 000000000000..11e42d3bb11a --- /dev/null +++ b/src/coreclr/interpreter/clrinterpreter_unixexports.src @@ -0,0 +1,2 @@ +getJit +jitStartup diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp new file mode 100644 index 000000000000..b49669a476b6 --- /dev/null +++ b/src/coreclr/interpreter/compiler.cpp @@ -0,0 +1,4878 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "gcinfoencoder.h" + +// HACK: debugreturn.h (included by gcinfoencoder.h) breaks constexpr +#if defined(debug_instrumented_return) || defined(_DEBUGRETURN_H_) +#undef return +#endif // debug_instrumented_return + +#include "interpreter.h" +#include "stackmap.h" + +#include + +#include // for std::bad_alloc + +static const StackType g_stackTypeFromInterpType[] = +{ + StackTypeI4, // I1 + StackTypeI4, // U1 + StackTypeI4, // I2 + StackTypeI4, // U2 + StackTypeI4, // I4 + StackTypeI8, // I8 + StackTypeR4, // R4 + StackTypeR8, // R8 + StackTypeO, // O + StackTypeVT, // VT + StackTypeByRef, // ByRef +}; + +static const InterpType g_interpTypeFromStackType[] = +{ + InterpTypeI4, // I4, + InterpTypeI8, // I8, + InterpTypeR4, // R4, + InterpTypeR8, // R8, + InterpTypeO, // O, + InterpTypeVT, // VT, + InterpTypeByRef, // MP, + InterpTypeI, // F +}; + +// Used by assertAbort +thread_local ICorJitInfo* t_InterpJitInfoTls = nullptr; + +static const char *g_stackTypeString[] = { "I4", "I8", "R4", "R8", "O ", "VT", "MP", "F " }; + +/*****************************************************************************/ +void DECLSPEC_NORETURN Interp_NOMEM() +{ + throw std::bad_alloc(); +} + +// GCInfoEncoder needs an IAllocator implementation. This is a simple one that forwards to the Compiler. +class InterpIAllocator : public IAllocator +{ + InterpCompiler *m_pCompiler; + +public: + InterpIAllocator(InterpCompiler *compiler) + : m_pCompiler(compiler) + { + } + + // Allocates a block of memory at least `sz` in size. + virtual void* Alloc(size_t sz) override + { + return m_pCompiler->AllocMethodData(sz); + } + + // Allocates a block of memory at least `elems * elemSize` in size. + virtual void* ArrayAlloc(size_t elems, size_t elemSize) override + { + // Ensure that elems * elemSize does not overflow. + if (elems > (SIZE_MAX / elemSize)) + { + Interp_NOMEM(); + } + + return m_pCompiler->AllocMethodData(elems * elemSize); + } + + // Frees the block of memory pointed to by p. + virtual void Free(void* p) override + { + // Interpreter-FIXME: m_pCompiler->FreeMethodData + free(p); + } +}; + +// Interpreter-FIXME Use specific allocators for their intended purpose +// Allocator for data that is kept alive throughout application execution, +// being freed only if the associated method gets freed. +void* InterpCompiler::AllocMethodData(size_t numBytes) +{ + return malloc(numBytes); +} + +// Fast allocator for small chunks of memory that can be freed together when the +// method compilation is finished. +void* InterpCompiler::AllocMemPool(size_t numBytes) +{ + return malloc(numBytes); +} + +void* InterpCompiler::AllocMemPool0(size_t numBytes) +{ + void *ptr = AllocMemPool(numBytes); + memset(ptr, 0, numBytes); + return ptr; +} + +// Allocator for potentially larger chunks of data, that we might want to free +// eagerly, before method is finished compiling, to prevent excessive memory usage. +void* InterpCompiler::AllocTemporary(size_t numBytes) +{ + return malloc(numBytes); +} + +void* InterpCompiler::AllocTemporary0(size_t numBytes) +{ + void *ptr = AllocTemporary(numBytes); + memset(ptr, 0, numBytes); + return ptr; +} + +void* InterpCompiler::ReallocTemporary(void* ptr, size_t numBytes) +{ + return realloc(ptr, numBytes); +} + +void InterpCompiler::FreeTemporary(void* ptr) +{ + free(ptr); +} + +static int GetDataLen(int opcode) +{ + int length = g_interpOpLen[opcode]; + int numSVars = g_interpOpSVars[opcode]; + int numDVars = g_interpOpDVars[opcode]; + + return length - 1 - numSVars - numDVars; +} + +InterpInst* InterpCompiler::AddIns(int opcode) +{ + return AddInsExplicit(opcode, GetDataLen(opcode)); +} + +InterpInst* InterpCompiler::AddInsExplicit(int opcode, int dataLen) +{ + InterpInst *ins = NewIns(opcode, dataLen); + ins->pPrev = m_pCBB->pLastIns; + if (m_pCBB->pLastIns) + m_pCBB->pLastIns->pNext = ins; + else + m_pCBB->pFirstIns = ins; + m_pCBB->pLastIns = ins; + return ins; +} + +InterpInst* InterpCompiler::NewIns(int opcode, int dataLen) +{ + int insSize = sizeof(InterpInst) + sizeof(uint32_t) * dataLen; + InterpInst *ins = (InterpInst*)AllocMemPool(insSize); + memset(ins, 0, insSize); + ins->opcode = opcode; + ins->ilOffset = m_currentILOffset; + m_pLastNewIns = ins; + return ins; +} + +InterpInst* InterpCompiler::InsertInsBB(InterpBasicBlock *pBB, InterpInst *pPrevIns, int opcode) +{ + InterpInst *ins = NewIns(opcode, GetDataLen(opcode)); + + ins->pPrev = pPrevIns; + + if (pPrevIns) + { + ins->pNext = pPrevIns->pNext; + pPrevIns->pNext = ins; + } + else + { + ins->pNext = pBB->pFirstIns; + pBB->pFirstIns = ins; + } + + if (ins->pNext == NULL) + { + pBB->pLastIns = ins; + } + else + { + ins->pNext->pPrev = ins; + } + + return ins; +} + +// Inserts a new instruction after prevIns. prevIns must be in cbb +InterpInst* InterpCompiler::InsertIns(InterpInst *pPrevIns, int opcode) +{ + return InsertInsBB(m_pCBB, pPrevIns, opcode); +} + +InterpInst* InterpCompiler::FirstRealIns(InterpBasicBlock *pBB) +{ + InterpInst *ins = pBB->pFirstIns; + if (!ins || !InsIsNop(ins)) + return ins; + while (ins && InsIsNop(ins)) + ins = ins->pNext; + return ins; +} + +InterpInst* InterpCompiler::NextRealIns(InterpInst *ins) +{ + ins = ins->pNext; + while (ins && InsIsNop(ins)) + ins = ins->pNext; + return ins; +} + +InterpInst* InterpCompiler::PrevRealIns(InterpInst *ins) +{ + ins = ins->pPrev; + while (ins && InsIsNop(ins)) + ins = ins->pPrev; + return ins; +} + +void InterpCompiler::ClearIns(InterpInst *ins) +{ + ins->opcode = INTOP_NOP; +} + +bool InterpCompiler::InsIsNop(InterpInst *ins) +{ + return ins->opcode == INTOP_NOP; +} + +int32_t InterpCompiler::GetInsLength(InterpInst *ins) +{ + int len = g_interpOpLen[ins->opcode]; + if (len == 0) + { + assert(ins->opcode == INTOP_SWITCH); + len = 3 + ins->data[0]; + } + + return len; +} + +void InterpCompiler::ForEachInsSVar(InterpInst *ins, void *pData, void (InterpCompiler::*callback)(int*, void*)) +{ + int numSVars = g_interpOpSVars[ins->opcode]; + if (numSVars) + { + for (int i = 0; i < numSVars; i++) + { + if (ins->sVars [i] == CALL_ARGS_SVAR) + { + if (ins->info.pCallInfo && ins->info.pCallInfo->pCallArgs) { + int *callArgs = ins->info.pCallInfo->pCallArgs; + while (*callArgs != CALL_ARGS_TERMINATOR) + { + (this->*callback) (callArgs, pData); + callArgs++; + } + } + } + else + { + (this->*callback) (&ins->sVars[i], pData); + } + } + } +} + +void InterpCompiler::ForEachInsVar(InterpInst *ins, void *pData, void (InterpCompiler::*callback)(int*, void*)) +{ + ForEachInsSVar(ins, pData, callback); + + if (g_interpOpDVars [ins->opcode]) + (this->*callback) (&ins->dVar, pData); +} + + +InterpBasicBlock* InterpCompiler::AllocBB(int32_t ilOffset) +{ + InterpBasicBlock *bb = (InterpBasicBlock*)AllocMemPool(sizeof(InterpBasicBlock)); + + new (bb) InterpBasicBlock (m_BBCount, ilOffset); + m_BBCount++; + return bb; +} + +InterpBasicBlock* InterpCompiler::GetBB(int32_t ilOffset) +{ + InterpBasicBlock *bb = m_ppOffsetToBB [ilOffset]; + + if (!bb) + { + bb = AllocBB(ilOffset); + + m_ppOffsetToBB[ilOffset] = bb; + } + + return bb; +} + +// Same implementation as JIT +static inline uint32_t LeadingZeroCount(uint32_t value) +{ + if (value == 0) + { + return 32; + } + +#if defined(_MSC_VER) + unsigned long result; + ::_BitScanReverse(&result, value); + return 31 ^ static_cast(result); +#else + int32_t result = __builtin_clz(value); + return static_cast(result); +#endif +} + + +int GetBBLinksCapacity(int links) +{ + if (links <= 2) + return links; + // Return the next power of 2 bigger or equal to links + uint32_t leadingZeroes = LeadingZeroCount(links - 1); + return 1 << (32 - leadingZeroes); +} + + +void InterpCompiler::LinkBBs(InterpBasicBlock *from, InterpBasicBlock *to) +{ + int i; + bool found = false; + + for (i = 0; i < from->outCount; i++) + { + if (to == from->ppOutBBs[i]) + { + found = true; + break; + } + } + if (!found) + { + int prevCapacity = GetBBLinksCapacity(from->outCount); + int newCapacity = GetBBLinksCapacity(from->outCount + 1); + if (newCapacity > prevCapacity) + { + InterpBasicBlock **newa = (InterpBasicBlock**)AllocMemPool(newCapacity * sizeof(InterpBasicBlock*)); + memcpy(newa, from->ppOutBBs, from->outCount * sizeof(InterpBasicBlock*)); + from->ppOutBBs = newa; + } + from->ppOutBBs [from->outCount] = to; + from->outCount++; + } + + found = false; + for (i = 0; i < to->inCount; i++) + { + if (from == to->ppInBBs [i]) + { + found = true; + break; + } + } + + if (!found) { + int prevCapacity = GetBBLinksCapacity(to->inCount); + int newCapacity = GetBBLinksCapacity(to->inCount + 1); + if (newCapacity > prevCapacity) { + InterpBasicBlock **newa = (InterpBasicBlock**)AllocMemPool(newCapacity * sizeof(InterpBasicBlock*)); + memcpy(newa, to->ppInBBs, to->inCount * sizeof(InterpBasicBlock*)); + to->ppInBBs = newa; + } + to->ppInBBs [to->inCount] = from; + to->inCount++; + } +} + +// array must contain ref +static void RemoveBBRef(InterpBasicBlock **array, InterpBasicBlock *ref, int len) +{ + int i = 0; + while (array[i] != ref) + { + i++; + } + i++; + while (i < len) + { + array[i - 1] = array[i]; + i++; + } +} + +void InterpCompiler::UnlinkBBs(InterpBasicBlock *from, InterpBasicBlock *to) +{ + RemoveBBRef(from->ppOutBBs, to, from->outCount); + from->outCount--; + RemoveBBRef(to->ppInBBs, from, to->inCount); + to->inCount--; +} + +// These are moves between vars, operating only on the interpreter stack +int32_t InterpCompiler::InterpGetMovForType(InterpType interpType, bool signExtend) +{ + switch (interpType) + { + case InterpTypeI1: + case InterpTypeU1: + case InterpTypeI2: + case InterpTypeU2: + if (signExtend) + return INTOP_MOV_I4_I1 + interpType; + else + return INTOP_MOV_4; + case InterpTypeI4: + case InterpTypeR4: + return INTOP_MOV_4; + case InterpTypeI8: + case InterpTypeR8: + return INTOP_MOV_8; + case InterpTypeO: + case InterpTypeByRef: + return INTOP_MOV_P; + case InterpTypeVT: + return INTOP_MOV_VT; + default: + assert(0); + } + return -1; +} + +// This method needs to be called when the current basic blocks ends and execution can +// continue into pTargetBB. When the stack state of a basic block is initialized, the vars +// associated with the stack state are set. When another bblock will continue execution +// into this bblock, it will first have to emit moves from the vars in its stack state +// to the vars of the target bblock stack state. +void InterpCompiler::EmitBBEndVarMoves(InterpBasicBlock *pTargetBB) +{ + if (pTargetBB->stackHeight <= 0) + return; + + for (int i = 0; i < pTargetBB->stackHeight; i++) + { + int sVar = m_pStackBase[i].var; + int dVar = pTargetBB->pStackState[i].var; + if (sVar != dVar) + { + InterpType interpType = m_pVars[sVar].interpType; + int32_t movOp = InterpGetMovForType(interpType, false); + + AddIns(movOp); + m_pLastNewIns->SetSVar(sVar); + m_pLastNewIns->SetDVar(dVar); + + if (interpType == InterpTypeVT) + { + assert(m_pVars[sVar].size == m_pVars[dVar].size); + m_pLastNewIns->data[0] = m_pVars[sVar].size; + } + } + } +} + +static void MergeStackTypeInfo(StackInfo *pState1, StackInfo *pState2, int len) +{ + // Discard type information if we have type conflicts for stack contents + for (int i = 0; i < len; i++) + { + if (pState1[i].clsHnd != pState2[i].clsHnd) + { + pState1[i].clsHnd = NULL; + pState2[i].clsHnd = NULL; + } + } +} + +// Initializes stack state at entry to bb, based on the current stack state +void InterpCompiler::InitBBStackState(InterpBasicBlock *pBB) +{ + if (pBB->stackHeight >= 0) + { + // Already initialized, update stack information + MergeStackTypeInfo(m_pStackBase, pBB->pStackState, pBB->stackHeight); + } + else + { + pBB->stackHeight = (int32_t)(m_pStackPointer - m_pStackBase); + if (pBB->stackHeight > 0) + { + int size = pBB->stackHeight * sizeof (StackInfo); + pBB->pStackState = (StackInfo*)AllocMemPool(size); + memcpy (pBB->pStackState, m_pStackBase, size); + } + } +} + + +int32_t InterpCompiler::CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size) +{ + if (m_varsSize == m_varsCapacity) { + m_varsCapacity *= 2; + if (m_varsCapacity == 0) + m_varsCapacity = 16; + m_pVars = (InterpVar*) ReallocTemporary(m_pVars, m_varsCapacity * sizeof(InterpVar)); + } + InterpVar *var = &m_pVars[m_varsSize]; + + new (var) InterpVar(interpType, clsHnd, size); + + m_varsSize++; + return m_varsSize - 1; +} + +void InterpCompiler::EnsureStack(int additional) +{ + int32_t currentSize = (int32_t)(m_pStackPointer - m_pStackBase); + + if ((additional + currentSize) > m_stackCapacity) { + m_stackCapacity *= 2; + m_pStackBase = (StackInfo*)ReallocTemporary (m_pStackBase, m_stackCapacity * sizeof(StackInfo)); + m_pStackPointer = m_pStackBase + currentSize; + } +} + +#define CHECK_STACK(n) \ + do \ + { \ + if (!CheckStackHelper (n)) \ + goto exit_bad_code; \ + } while (0) + +#define CHECK_STACK_RET_VOID(n) \ + do { \ + if (!CheckStackHelper(n)) \ + return; \ + } while (0) + +#define CHECK_STACK_RET(n, ret) \ + do { \ + if (!CheckStackHelper(n)) \ + return ret; \ + } while (0) + +#define INVALID_CODE_RET_VOID \ + do { \ + m_hasInvalidCode = true; \ + return; \ + } while (0) + +bool InterpCompiler::CheckStackHelper(int n) +{ + int32_t currentSize = (int32_t)(m_pStackPointer - m_pStackBase); + if (currentSize < n) + { + m_hasInvalidCode = true; + return false; + } + return true; +} + +void InterpCompiler::PushTypeExplicit(StackType stackType, CORINFO_CLASS_HANDLE clsHnd, int size) +{ + EnsureStack(1); + m_pStackPointer->type = stackType; + m_pStackPointer->clsHnd = clsHnd; + m_pStackPointer->size = ALIGN_UP_TO(size, INTERP_STACK_SLOT_SIZE); + int var = CreateVarExplicit(g_interpTypeFromStackType[stackType], clsHnd, size); + m_pStackPointer->var = var; + m_pStackPointer++; +} + +void InterpCompiler::PushStackType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd) +{ + // We don't really care about the exact size for non-valuetypes + PushTypeExplicit(stackType, clsHnd, INTERP_STACK_SLOT_SIZE); +} + +void InterpCompiler::PushInterpType(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd) +{ + PushStackType(g_stackTypeFromInterpType[interpType], clsHnd); +} + +void InterpCompiler::PushTypeVT(CORINFO_CLASS_HANDLE clsHnd, int size) +{ + PushTypeExplicit(StackTypeVT, clsHnd, size); +} + + +int32_t InterpCompiler::ComputeCodeSize() +{ + int32_t codeSize = 0; + + for (InterpBasicBlock *bb = m_pEntryBB; bb != NULL; bb = bb->pNextBB) + { + for (InterpInst *ins = bb->pFirstIns; ins != NULL; ins = ins->pNext) + { + codeSize += GetInsLength(ins); + } + } + return codeSize; +} + +int32_t InterpCompiler::GetLiveStartOffset(int var) +{ + if (m_pVars[var].global) + { + return 0; + } + else + { + assert(m_pVars[var].liveStart != NULL); + return m_pVars[var].liveStart->nativeOffset; + } +} + +int32_t InterpCompiler::GetLiveEndOffset(int var) +{ + if (m_pVars[var].global) + { + return m_methodCodeSize; + } + else + { + assert(m_pVars[var].liveEnd != NULL); + return m_pVars[var].liveEnd->nativeOffset + GetInsLength(m_pVars[var].liveEnd); + } +} + +uint32_t InterpCompiler::ConvertOffset(int32_t offset) +{ + // FIXME Once the VM moved the InterpMethod* to code header, we don't need to add a pointer size to the offset + return offset * sizeof(int32_t) + sizeof(void*); +} + +int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArray *relocs) +{ + ins->nativeOffset = (int32_t)(ip - m_pMethodCode); + + int32_t opcode = ins->opcode; + int32_t *startIp = ip; + *ip++ = opcode; + + // Set to true if the instruction was completely reverted. + bool isReverted = false; + + if (opcode == INTOP_SWITCH) + { + int32_t numLabels = ins->data [0]; + *ip++ = m_pVars[ins->sVars[0]].offset; + *ip++ = numLabels; + // Add relocation for each label + for (int32_t i = 0; i < numLabels; i++) + { + Reloc *reloc = (Reloc*)AllocMemPool(sizeof(Reloc)); + new (reloc) Reloc(RelocSwitch, (int32_t)(ip - m_pMethodCode), ins->info.ppTargetBBTable[i], 0); + relocs->Add(reloc); + *ip++ = (int32_t)0xdeadbeef; + } + } + else if (InterpOpIsUncondBranch(opcode) || InterpOpIsCondBranch(opcode) || (opcode == INTOP_LEAVE_CATCH) || (opcode == INTOP_CALL_FINALLY)) + { + int32_t brBaseOffset = (int32_t)(startIp - m_pMethodCode); + for (int i = 0; i < g_interpOpSVars[opcode]; i++) + *ip++ = m_pVars[ins->sVars[i]].offset; + + if (ins->info.pTargetBB->nativeOffset >= 0) + { + *ip++ = ins->info.pTargetBB->nativeOffset - brBaseOffset; + } + else if (opcode == INTOP_BR && ins->info.pTargetBB == m_pCBB->pNextBB) + { + // Ignore branch to the next basic block. Revert the added INTOP_BR. + isReverted = true; + ip--; + } + else + { + // We don't know yet the IR offset of the target, add a reloc instead + Reloc *reloc = (Reloc*)AllocMemPool(sizeof(Reloc)); + new (reloc) Reloc(RelocLongBranch, brBaseOffset, ins->info.pTargetBB, g_interpOpSVars[opcode]); + relocs->Add(reloc); + *ip++ = (int32_t)0xdeadbeef; + } + } + else if (opcode == INTOP_MOV_SRC_OFF) + { + // This opcode reuses the MOV opcodes, which are normally used to copy the + // contents of one var to the other, in order to copy a containing field + // of the source var (which is a vt) to another var. + int32_t fOffset = ins->data[0]; + InterpType fType = (InterpType)ins->data[1]; + int32_t fSize = ins->data[2]; + // Revert opcode emit + ip--; + + int destOffset = m_pVars[ins->dVar].offset; + int srcOffset = m_pVars[ins->sVars[0]].offset; + srcOffset += fOffset; + if (fSize) + opcode = INTOP_MOV_VT; + else + opcode = InterpGetMovForType(fType, true); + *ip++ = opcode; + *ip++ = destOffset; + *ip++ = srcOffset; + if (opcode == INTOP_MOV_VT) + *ip++ = fSize; + } + else if (opcode == INTOP_LDLOCA) + { + // This opcode references a var, int sVars[0], but it is not registered as a source for it + // aka g_interpOpSVars[INTOP_LDLOCA] is 0. + *ip++ = m_pVars[ins->dVar].offset; + *ip++ = m_pVars[ins->sVars[0]].offset; + } + else + { + // Default code emit for an instruction. The opcode was already emitted above. + // We emit the offset for the instruction destination, then for every single source + // variable we emit another offset. Finally, we will emit any additional data needed + // by the instruction. + if (g_interpOpDVars[opcode]) + *ip++ = m_pVars[ins->dVar].offset; + + if (g_interpOpSVars[opcode]) + { + for (int i = 0; i < g_interpOpSVars[opcode]; i++) + { + if (ins->sVars[i] == CALL_ARGS_SVAR) + { + *ip++ = m_paramAreaOffset + ins->info.pCallInfo->callOffset; + } + else + { + *ip++ = m_pVars[ins->sVars[i]].offset; + } + } + } + + int left = GetInsLength(ins) - (int32_t)(ip - startIp); + // Emit the rest of the data + for (int i = 0; i < left; i++) + *ip++ = ins->data[i]; + } + + if ((ins->ilOffset != -1) && !isReverted) + { + assert(ins->ilOffset >= 0); + assert(ins->nativeOffset >= 0); + uint32_t ilOffset = ins->ilOffset; + uint32_t nativeOffset = ConvertOffset(ins->nativeOffset); + if ((m_ILToNativeMapSize == 0) || (m_pILToNativeMap[m_ILToNativeMapSize - 1].ilOffset != ilOffset)) + { + // This code assumes that instructions for the same IL offset are emitted in a single run without + // any other IL offsets in between and that they don't repeat again after the run ends. +#ifdef _DEBUG + for (int i = 0; i < m_ILToNativeMapSize; i++) + { + assert(m_pILToNativeMap[i].ilOffset != ilOffset); + } +#endif // _DEBUG + + // Since we can have at most one entry per IL offset, + // this map cannot possibly use more entries than the size of the IL code + assert(m_ILToNativeMapSize < m_ILCodeSize); + + m_pILToNativeMap[m_ILToNativeMapSize].ilOffset = ilOffset; + m_pILToNativeMap[m_ILToNativeMapSize].nativeOffset = nativeOffset; + m_ILToNativeMapSize++; + } + } + + return ip; +} + +void InterpCompiler::PatchRelocations(TArray *relocs) +{ + int32_t size = relocs->GetSize(); + + for (int32_t i = 0; i < size; i++) + { + Reloc *reloc = relocs->Get(i); + int32_t offset = reloc->pTargetBB->nativeOffset - reloc->offset; + int32_t *pSlot = NULL; + + if (reloc->type == RelocLongBranch) + pSlot = m_pMethodCode + reloc->offset + reloc->skip + 1; + else if (reloc->type == RelocSwitch) + pSlot = m_pMethodCode + reloc->offset; + else + assert(0); + + assert(*pSlot == (int32_t)0xdeadbeef); + *pSlot = offset; + } +} + +int32_t *InterpCompiler::EmitBBCode(int32_t *ip, InterpBasicBlock *bb, TArray *relocs) +{ + m_pCBB = bb; + m_pCBB->nativeOffset = (int32_t)(ip - m_pMethodCode); + + for (InterpInst *ins = bb->pFirstIns; ins != NULL; ins = ins->pNext) + { + if (InterpOpIsEmitNop(ins->opcode)) + { + ins->nativeOffset = (int32_t)(ip - m_pMethodCode); + continue; + } + + ip = EmitCodeIns(ip, ins, relocs); + } + + m_pCBB->nativeEndOffset = (int32_t)(ip - m_pMethodCode); + + return ip; +} + +void InterpCompiler::EmitCode() +{ + TArray relocs; + int32_t codeSize = ComputeCodeSize(); + m_pMethodCode = (int32_t*)AllocMethodData(codeSize * sizeof(int32_t)); + + // These will eventually be freed by the VM, and they use the delete [] operator for the deletion. + m_pILToNativeMap = new ICorDebugInfo::OffsetMapping[m_ILCodeSize]; + ICorDebugInfo::NativeVarInfo* eeVars = NULL; + if (m_numILVars > 0) + { + eeVars = new ICorDebugInfo::NativeVarInfo[m_numILVars]; + } + + // For each BB, compute the number of EH clauses that overlap with it. + for (unsigned int i = 0; i < m_methodInfo->EHcount; i++) + { + CORINFO_EH_CLAUSE clause; + m_compHnd->getEHinfo(m_methodInfo->ftn, i, &clause); + for (InterpBasicBlock *bb = m_pEntryBB; bb != NULL; bb = bb->pNextBB) + { + if (clause.HandlerOffset <= (uint32_t)bb->ilOffset && (clause.HandlerOffset + clause.HandlerLength) > (uint32_t)bb->ilOffset) + { + bb->overlappingEHClauseCount++; + } + + if (clause.Flags == CORINFO_EH_CLAUSE_FILTER && clause.FilterOffset <= (uint32_t)bb->ilOffset && clause.HandlerOffset > (uint32_t)bb->ilOffset) + { + bb->overlappingEHClauseCount++; + } + } + } + + // Emit all the code in waves. First emit all blocks that are not inside any EH clauses. + // Then emit blocks that are inside of a single EH clause, then ones that are inside of + // two EH clauses, etc. + // The goal is to move all clauses to the end of the method code recursively so that + // no handler is inside of a try block. + int32_t *ip = m_pMethodCode; + bool emittedBlock; + int clauseDepth = 0; + do + { + emittedBlock = false; + for (InterpBasicBlock *bb = m_pEntryBB; bb != NULL; bb = bb->pNextBB) + { + if (bb->overlappingEHClauseCount == clauseDepth) + { + ip = EmitBBCode(ip, bb, &relocs); + emittedBlock = true; + } + } + clauseDepth++; + } + while (emittedBlock); + + m_methodCodeSize = (int32_t)(ip - m_pMethodCode); + + PatchRelocations(&relocs); + + int j = 0; + for (int i = 0; i < m_numILVars; i++) + { + assert(m_pVars[i].ILGlobal); + eeVars[j].startOffset = ConvertOffset(GetLiveStartOffset(i)); // This is where the variable mapping is start to become valid + eeVars[j].endOffset = ConvertOffset(GetLiveEndOffset(i)); // This is where the variable mapping is cease to become valid + eeVars[j].varNumber = j; // This is the index of the variable in [arg] + [local] + eeVars[j].loc.vlType = ICorDebugInfo::VLT_STK; // This is a stack slot + eeVars[j].loc.vlStk.vlsBaseReg = ICorDebugInfo::REGNUM_FP; // This specifies which register this offset is based off + eeVars[j].loc.vlStk.vlsOffset = m_pVars[i].offset; // This specifies starting from the offset, how much offset is this from + j++; + } + + if (m_numILVars > 0) + { + m_compHnd->setVars(m_methodInfo->ftn, m_numILVars, eeVars); + } + m_compHnd->setBoundaries(m_methodInfo->ftn, m_ILToNativeMapSize, m_pILToNativeMap); +} + +#ifdef FEATURE_INTERPRETER +class InterpGcSlotAllocator +{ + InterpCompiler *m_compiler; + InterpreterGcInfoEncoder *m_encoder; + // [pObjects, pByrefs] + GcSlotId *m_slotTables[2]; + unsigned m_slotTableSize; + +#ifdef DEBUG + bool m_verbose; +#endif + + GcSlotId* LocateGcSlotTableEntry(uint32_t offsetBytes, GcSlotFlags flags) + { + GcSlotId *slotTable = m_slotTables[(flags & GC_SLOT_INTERIOR) == GC_SLOT_INTERIOR]; + uint32_t slotIndex = offsetBytes / sizeof(void *); + assert(slotIndex < m_slotTableSize); + return &slotTable[slotIndex]; + } + +public: + InterpGcSlotAllocator(InterpCompiler *compiler, InterpreterGcInfoEncoder *encoder) + : m_compiler(compiler) + , m_encoder(encoder) + , m_slotTableSize(compiler->m_totalVarsStackSize / sizeof(void *)) +#ifdef DEBUG + , m_verbose(compiler->m_verbose) +#endif + { + for (int i = 0; i < 2; i++) + { + m_slotTables[i] = new (compiler) GcSlotId[m_slotTableSize]; + // 0 is a valid slot id so default-initialize all the slots to 0xFFFFFFFF + memset(m_slotTables[i], 0xFF, sizeof(GcSlotId) * m_slotTableSize); + } + } + + void AllocateOrReuseGcSlot(uint32_t offsetBytes, GcSlotFlags flags) + { + GcSlotId *pSlot = LocateGcSlotTableEntry(offsetBytes, flags); + bool allocateNewSlot = *pSlot == ((GcSlotId)-1); + + if (allocateNewSlot) + { + // Important to pass GC_FRAMEREG_REL, the default is broken due to GET_CALLER_SP being unimplemented + *pSlot = m_encoder->GetStackSlotId(offsetBytes, flags, GC_FRAMEREG_REL); + } + else + { + assert((flags & GC_SLOT_UNTRACKED) == 0); + } + + INTERP_DUMP( + "%s %s%sgcslot %u at %u\n", + allocateNewSlot ? "Allocated" : "Reused", + (flags & GC_SLOT_UNTRACKED) ? "global " : "", + (flags & GC_SLOT_INTERIOR) ? "interior " : "", + *pSlot, + offsetBytes + ); + } + + void ReportLiveRange(uint32_t offsetBytes, GcSlotFlags flags, int varIndex) + { + GcSlotId *pSlot = LocateGcSlotTableEntry(offsetBytes, flags); + assert(varIndex < m_compiler->m_varsSize); + + InterpVar *pVar = &m_compiler->m_pVars[varIndex]; + if (pVar->global) + return; + + GcSlotId slot = *pSlot; + assert(slot != ((GcSlotId)-1)); + assert(pVar->liveStart); + assert(pVar->liveEnd); + uint32_t startOffset = m_compiler->ConvertOffset(m_compiler->GetLiveStartOffset(varIndex)), + endOffset = m_compiler->ConvertOffset(m_compiler->GetLiveEndOffset(varIndex)); + INTERP_DUMP( + "Slot %u (%s var #%d offset %u) live [IR_%04x - IR_%04x] [%u - %u]\n", + slot, pVar->global ? "global" : "local", + varIndex, pVar->offset, + m_compiler->GetLiveStartOffset(varIndex), m_compiler->GetLiveEndOffset(varIndex), + startOffset, endOffset + ); + m_encoder->SetSlotState(startOffset, slot, GC_SLOT_LIVE); + m_encoder->SetSlotState(endOffset, slot, GC_SLOT_DEAD); + } +}; +#endif + +void InterpCompiler::BuildGCInfo(InterpMethod *pInterpMethod) +{ +#ifdef FEATURE_INTERPRETER + InterpIAllocator* pAllocator = new (this) InterpIAllocator(this); + InterpreterGcInfoEncoder* gcInfoEncoder = new (this) InterpreterGcInfoEncoder(m_compHnd, m_methodInfo, pAllocator, Interp_NOMEM); + InterpGcSlotAllocator slotAllocator (this, gcInfoEncoder); + + gcInfoEncoder->SetCodeLength(ConvertOffset(m_methodCodeSize)); + + INTERP_DUMP("Allocating gcinfo slots for %u vars\n", m_varsSize); + + for (int pass = 0; pass < 2; pass++) + { + for (int i = 0; i < m_varsSize; i++) + { + InterpVar *pVar = &m_pVars[i]; + GcSlotFlags flags = pVar->global + ? (GcSlotFlags)GC_SLOT_UNTRACKED + : (GcSlotFlags)0; + + switch (pVar->interpType) { + case InterpTypeO: + break; + case InterpTypeByRef: + flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); + break; + case InterpTypeVT: + { + InterpreterStackMap *stackMap = GetInterpreterStackMap(m_compHnd, pVar->clsHnd); + for (unsigned j = 0; j < stackMap->m_slotCount; j++) + { + InterpreterStackMapSlot slotInfo = stackMap->m_slots[j]; + unsigned fieldOffset = pVar->offset + slotInfo.m_offsetBytes; + GcSlotFlags fieldFlags = (GcSlotFlags)(flags | slotInfo.m_gcSlotFlags); + if (pass == 0) + slotAllocator.AllocateOrReuseGcSlot(fieldOffset, fieldFlags); + else + slotAllocator.ReportLiveRange(fieldOffset, fieldFlags, i); + } + + // Don't perform the regular allocateGcSlot call + continue; + } + default: + // Neither an object, interior pointer, or vt, so no slot needed + continue; + } + + if (pass == 0) + slotAllocator.AllocateOrReuseGcSlot(pVar->offset, flags); + else + slotAllocator.ReportLiveRange(pVar->offset, flags, i); + } + + if (pass == 0) + gcInfoEncoder->FinalizeSlotIds(); + else + gcInfoEncoder->Build(); + } + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + gcInfoEncoder->Emit(); +#endif +} + +void InterpCompiler::GetNativeRangeForClause(uint32_t startILOffset, uint32_t endILOffset, int32_t *nativeStartOffset, int32_t* nativeEndOffset) +{ + InterpBasicBlock* pStartBB = m_ppOffsetToBB[startILOffset]; + assert(pStartBB != NULL); + + InterpBasicBlock* pEndBB = pStartBB; + for (InterpBasicBlock* pBB = pStartBB->pNextBB; (pBB != NULL) && ((uint32_t)pBB->ilOffset < endILOffset); pBB = pBB->pNextBB) + { + if ((pBB->clauseType == pStartBB->clauseType) && (pBB->overlappingEHClauseCount == pStartBB->overlappingEHClauseCount)) + { + pEndBB = pBB; + } + } + + *nativeStartOffset = pStartBB->nativeOffset; + *nativeEndOffset = pEndBB->nativeEndOffset; +} + +void InterpCompiler::BuildEHInfo() +{ + uint32_t lastTryILOffset = 0; + uint32_t lastTryILLength = 0; + + INTERP_DUMP("EH info:\n"); + + if (m_methodInfo->EHcount == 0) + { + INTERP_DUMP(" None\n"); + return; + } + + m_compHnd->setEHcount(m_methodInfo->EHcount); + for (unsigned int i = 0; i < m_methodInfo->EHcount; i++) + { + CORINFO_EH_CLAUSE clause; + CORINFO_EH_CLAUSE nativeClause; + + m_compHnd->getEHinfo(m_methodInfo->ftn, i, &clause); + + int32_t tryStartNativeOffset; + int32_t tryEndNativeOffset; + GetNativeRangeForClause(clause.TryOffset, clause.TryOffset + clause.TryLength, &tryStartNativeOffset, &tryEndNativeOffset); + + int32_t handlerStartNativeOffset; + int32_t handlerEndNativeOffset; + GetNativeRangeForClause(clause.HandlerOffset, clause.HandlerOffset + clause.HandlerLength, &handlerStartNativeOffset, &handlerEndNativeOffset); + + nativeClause.TryOffset = ConvertOffset(tryStartNativeOffset); + nativeClause.TryLength = ConvertOffset(tryEndNativeOffset); + + nativeClause.HandlerOffset = ConvertOffset(handlerStartNativeOffset); + nativeClause.HandlerLength = ConvertOffset(handlerEndNativeOffset); + InterpBasicBlock* pFilterStartBB = NULL; + if (clause.Flags == CORINFO_EH_CLAUSE_FILTER) + { + pFilterStartBB = m_ppOffsetToBB[clause.FilterOffset]; + nativeClause.FilterOffset = ConvertOffset(pFilterStartBB->nativeOffset); + } + else + { + nativeClause.ClassToken = clause.ClassToken; + } + + nativeClause.Flags = clause.Flags; + + // A try region can have multiple catch / filter handlers. All except of the first one need to be marked by + // the COR_ILEXCEPTION_CLAUSE_SAMETRY flag so that runtime can distinguish this case from a case when + // the native try region is the same for multiple clauses, but the IL try region is different. + if ((lastTryILOffset == clause.TryOffset) && (lastTryILLength == clause.TryLength)) + { + nativeClause.Flags = (CORINFO_EH_CLAUSE_FLAGS)((int)nativeClause.Flags | COR_ILEXCEPTION_CLAUSE_SAMETRY); + } + + m_compHnd->setEHinfo(i, &nativeClause); + + INTERP_DUMP(" try [IR_%04x(%x), IR_%04x(%x)) ", tryStartNativeOffset, clause.TryOffset, tryEndNativeOffset, clause.TryOffset + clause.TryLength); + if (clause.Flags == CORINFO_EH_CLAUSE_FILTER) + { + INTERP_DUMP("filter IR_%04x(%x), handler [IR_%04x(%x), IR_%04x(%x))%s\n", pFilterStartBB->nativeOffset, clause.FilterOffset, handlerStartNativeOffset, clause.HandlerOffset, handlerEndNativeOffset, clause.HandlerOffset + clause.HandlerLength, ((int)nativeClause.Flags & COR_ILEXCEPTION_CLAUSE_SAMETRY) ? " (same try)" : ""); + } + else if (nativeClause.Flags == CORINFO_EH_CLAUSE_FINALLY) + { + INTERP_DUMP("finally handler [IR_%04x(%x), IR_%04x(%x))\n", handlerStartNativeOffset, clause.HandlerOffset, handlerEndNativeOffset, clause.HandlerOffset + clause.HandlerLength); + } + else + { + INTERP_DUMP("catch handler [IR_%04x(%x), IR_%04x(%x))%s\n", handlerStartNativeOffset, clause.HandlerOffset, handlerEndNativeOffset, clause.HandlerOffset + clause.HandlerLength, ((int)nativeClause.Flags & COR_ILEXCEPTION_CLAUSE_SAMETRY) ? " (same try)" : ""); + } + } +} + +InterpMethod* InterpCompiler::CreateInterpMethod() +{ + int numDataItems = m_dataItems.GetSize(); + void **pDataItems = (void**)AllocMethodData(numDataItems * sizeof(void*)); + + for (int i = 0; i < numDataItems; i++) + pDataItems[i] = m_dataItems.Get(i); + + bool initLocals = (m_methodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0; + + InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_totalVarsStackSize, pDataItems, initLocals); + + return pMethod; +} + +int32_t* InterpCompiler::GetCode(int32_t *pCodeSize) +{ + *pCodeSize = m_methodCodeSize; + return m_pMethodCode; +} + +InterpCompiler::InterpCompiler(COMP_HANDLE compHnd, + CORINFO_METHOD_INFO* methodInfo) + : m_pInitLocalsIns(nullptr) + , m_globalVarsWithRefsStackTop(0) +{ + // Fill in the thread-local used for assertions + t_InterpJitInfoTls = compHnd; + + m_methodHnd = methodInfo->ftn; + m_compScopeHnd = methodInfo->scope; + m_compHnd = compHnd; + m_methodInfo = methodInfo; + +#ifdef DEBUG + + m_classHnd = compHnd->getMethodClass(m_methodHnd); + + m_methodName = ::PrintMethodName(compHnd, m_classHnd, m_methodHnd, &m_methodInfo->args, + /* includeClassInstantiation */ true, + /* includeMethodInstantiation */ true, + /* includeSignature */ true, + /* includeReturnType */ false, + /* includeThis */ false); + + if (InterpConfig.InterpDump().contains(compHnd, m_methodHnd, m_classHnd, &m_methodInfo->args)) + m_verbose = true; +#endif +} + +InterpMethod* InterpCompiler::CompileMethod() +{ +#ifdef DEBUG + if (m_verbose || InterpConfig.InterpList()) + { + printf("Interpreter compile method %s\n", m_methodName.GetUnderlyingArray()); + } +#endif + + CreateILVars(); + + GenerateCode(m_methodInfo); + +#ifdef DEBUG + if (m_verbose) + { + printf("\nUnoptimized IR:\n"); + PrintCode(); + } +#endif + + AllocOffsets(); + PatchInitLocals(m_methodInfo); + + EmitCode(); + +#ifdef DEBUG + if (m_verbose) + { + printf("\nCompiled method: "); + PrintMethodName(m_methodHnd); + printf("\nLocals size %d\n", m_totalVarsStackSize); + PrintCompiledCode(); + printf("\n"); + } +#endif + + return CreateInterpMethod(); +} + +void InterpCompiler::PatchInitLocals(CORINFO_METHOD_INFO* methodInfo) +{ + // We may have global vars containing managed pointers or interior pointers, so we need + // to zero the region of the stack containing global vars, not just IL locals. Now that + // offset allocation has occurred we know where the global vars end, so we can expand + // the initlocals opcode that was originally generated to also zero them. + int32_t startOffset = m_pInitLocalsIns->data[0]; + int32_t totalSize = m_globalVarsWithRefsStackTop - startOffset; + if (totalSize > m_pInitLocalsIns->data[1]) + { + INTERP_DUMP( + "Expanding initlocals from [%d-%d] to [%d-%d]\n", + startOffset, startOffset + m_pInitLocalsIns->data[1], + startOffset, startOffset + totalSize + ); + m_pInitLocalsIns->data[1] = totalSize; + } + else + { + INTERP_DUMP( + "Not expanding initlocals from [%d-%d] for global vars stack top of %d\n", + startOffset, startOffset + m_pInitLocalsIns->data[1], + m_globalVarsWithRefsStackTop + ); + } +} + +// Adds a conversion instruction for the value pointed to by sp, also updating the stack information +void InterpCompiler::EmitConv(StackInfo *sp, StackType type, InterpOpcode convOp) +{ + InterpInst *newInst = AddIns(convOp); + + newInst->SetSVar(sp->var); + new (sp) StackInfo(type); + int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, INTERP_STACK_SLOT_SIZE); + sp->var = var; + newInst->SetDVar(var); +} + +static InterpType GetInterpType(CorInfoType corInfoType) +{ + switch (corInfoType) + { + case CORINFO_TYPE_BYTE: + return InterpTypeI1; + case CORINFO_TYPE_UBYTE: + case CORINFO_TYPE_BOOL: + return InterpTypeU1; + case CORINFO_TYPE_CHAR: + case CORINFO_TYPE_USHORT: + return InterpTypeU2; + case CORINFO_TYPE_SHORT: + return InterpTypeI2; + case CORINFO_TYPE_INT: + case CORINFO_TYPE_UINT: + return InterpTypeI4; + case CORINFO_TYPE_LONG: + case CORINFO_TYPE_ULONG: + return InterpTypeI8; + case CORINFO_TYPE_NATIVEINT: + case CORINFO_TYPE_NATIVEUINT: + return InterpTypeI; + case CORINFO_TYPE_FLOAT: + return InterpTypeR4; + case CORINFO_TYPE_DOUBLE: + return InterpTypeR8; + case CORINFO_TYPE_STRING: + case CORINFO_TYPE_CLASS: + return InterpTypeO; + case CORINFO_TYPE_PTR: + return InterpTypeI; + case CORINFO_TYPE_BYREF: + return InterpTypeByRef; + case CORINFO_TYPE_VALUECLASS: + case CORINFO_TYPE_REFANY: + return InterpTypeVT; + case CORINFO_TYPE_VOID: + return InterpTypeVoid; + default: + assert(!"Unimplemented CorInfoType"); + break; + } + return InterpTypeVoid; +} + +int32_t InterpCompiler::GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign) +{ + int32_t size, align; + if (interpType == InterpTypeVT) + { + size = m_compHnd->getClassSize(clsHnd); + align = m_compHnd->getClassAlignmentRequirement(clsHnd); + + assert(align <= INTERP_STACK_ALIGNMENT); + + // All vars are stored at 8 byte aligned offsets + if (align < INTERP_STACK_SLOT_SIZE) + align = INTERP_STACK_SLOT_SIZE; + } + else + { + size = INTERP_STACK_SLOT_SIZE; // not really + align = INTERP_STACK_SLOT_SIZE; + } + *pAlign = align; + return size; +} + +void InterpCompiler::CreateILVars() +{ + bool hasThis = m_methodInfo->args.hasThis(); + bool hasParamArg = m_methodInfo->args.hasTypeArg(); + int paramArgIndex = hasParamArg ? hasThis ? 1 : 0 : INT_MAX; + int32_t offset; + int numArgs = hasThis + m_methodInfo->args.numArgs; + int numILLocals = m_methodInfo->locals.numArgs; + m_numILVars = numArgs + numILLocals; + + // add some starting extra space for new vars + m_varsCapacity = m_numILVars + m_methodInfo->EHcount + 64; + m_pVars = (InterpVar*)AllocTemporary0(m_varsCapacity * sizeof (InterpVar)); + m_varsSize = m_numILVars + hasParamArg; + + offset = 0; + + INTERP_DUMP("\nCreate IL Vars:\n"); + + // NOTE: There is special handling for the param arg, which is stored after the IL locals in the m_pVars array. + // The param arg is not part of the set of arguments defined by the IL method signature, but instead is needed + // to support shared generics codegen, and to be able to determine which exact intantiation of a method is in use. + // The param arg is stashed into the m_pVars array at an unnatural index relative to its position in the physical stack + // so that when parsing the MSIL byte stream it is simple to determine the index of the normal argumentes + // and IL locals, by just knowing the number of IL defined arguments. This allows all of the special handling for + // the param arg to be localized to this function, and the small set of helper functions that directly use it. + + CORINFO_ARG_LIST_HANDLE sigArg = m_methodInfo->args.args; + + int argIndexOffset = 0; + if (hasThis) + { + CORINFO_CLASS_HANDLE argClass = m_compHnd->getMethodClass(m_methodInfo->ftn); + InterpType interpType = m_compHnd->isValueClass(argClass) ? InterpTypeByRef : InterpTypeO; + CreateNextLocalVar(0, argClass, interpType, &offset); + argIndexOffset++; + } + + if (hasParamArg) + { + m_paramArgIndex = m_varsSize - 1; // The param arg is stored after the IL locals in the m_pVars array + CreateNextLocalVar(m_paramArgIndex, NULL, InterpTypeI, &offset); + } + + for (int i = argIndexOffset; i < numArgs; i++) + { + CORINFO_CLASS_HANDLE argClass; + CorInfoType argCorType = strip(m_compHnd->getArgType(&m_methodInfo->args, sigArg, &argClass)); + InterpType interpType = GetInterpType(argCorType); + sigArg = m_compHnd->getArgNext(sigArg); + CreateNextLocalVar(i, argClass, interpType, &offset); + } + offset = ALIGN_UP_TO(offset, INTERP_STACK_ALIGNMENT); + + sigArg = m_methodInfo->locals.args; + m_ILLocalsOffset = offset; + int index = numArgs; + + for (int i = 0; i < numILLocals; i++) { + CORINFO_CLASS_HANDLE argClass; + CorInfoType argCorType = strip(m_compHnd->getArgType(&m_methodInfo->locals, sigArg, &argClass)); + InterpType interpType = GetInterpType(argCorType); + CreateNextLocalVar(index, argClass, interpType, &offset); + sigArg = m_compHnd->getArgNext(sigArg); + index++; + } + + if (hasParamArg) + { + // The param arg is stored after the IL locals in the m_pVars array + assert(index == m_paramArgIndex); + index++; + } + + offset = ALIGN_UP_TO(offset, INTERP_STACK_ALIGNMENT); + m_ILLocalsSize = offset - m_ILLocalsOffset; + + INTERP_DUMP("\nCreate clause Vars:\n"); + + m_clauseVarsIndex = index; + + for (unsigned int i = 0; i < m_methodInfo->EHcount; i++) + { + CreateNextLocalVar(index, NULL, InterpTypeO, &offset); + index++; + } + + m_totalVarsStackSize = offset; +} + +void InterpCompiler::CreateNextLocalVar(int iArgToSet, CORINFO_CLASS_HANDLE argClass, InterpType interpType, int32_t *pOffset) +{ + int32_t align; + int32_t size = GetInterpTypeStackSize(argClass, interpType, &align); + + new (&m_pVars[iArgToSet]) InterpVar(interpType, argClass, size); + + m_pVars[iArgToSet].global = true; + m_pVars[iArgToSet].ILGlobal = true; + m_pVars[iArgToSet].size = size; + *pOffset = ALIGN_UP_TO(*pOffset, align); + m_pVars[iArgToSet].offset = *pOffset; + INTERP_DUMP("alloc arg var %d to offset %d\n", iArgToSet, *pOffset); + *pOffset += size; +} + +// Create finally call island basic blocks for all try regions with finally clauses that the leave exits. +// That means when the leaveOffset is inside the try region and the target is outside of it. +// These finally call island blocks are used for non-exceptional finally execution. +// The linked list of finally call island blocks is stored in the pFinallyCallIslandBB field of the finally basic block. +// The pFinallyCallIslandBB in the actual finally call island block points to the outer try region's finally call island block. +void InterpCompiler::CreateFinallyCallIslandBasicBlocks(CORINFO_METHOD_INFO* methodInfo, int32_t leaveOffset, InterpBasicBlock* pLeaveTargetBB) +{ + bool firstFinallyCallIsland = true; + InterpBasicBlock* pInnerFinallyCallIslandBB = NULL; + for (unsigned int i = 0; i < methodInfo->EHcount; i++) + { + CORINFO_EH_CLAUSE clause; + m_compHnd->getEHinfo(methodInfo->ftn, i, &clause); + if (clause.Flags != CORINFO_EH_CLAUSE_FINALLY) + { + continue; + } + + // Only try regions in which the leave instruction is located are considered. + if ((uint32_t)leaveOffset < clause.TryOffset || (uint32_t)leaveOffset > (clause.TryOffset + clause.TryLength)) + { + continue; + } + + // If the leave target is inside the try region, we don't need to create a finally call island block. + if ((uint32_t)pLeaveTargetBB->ilOffset >= clause.TryOffset && (uint32_t)pLeaveTargetBB->ilOffset <= (clause.TryOffset + clause.TryLength)) + { + continue; + } + + InterpBasicBlock* pHandlerBB = GetBB(clause.HandlerOffset); + InterpBasicBlock* pFinallyCallIslandBB = NULL; + + InterpBasicBlock** ppLastBBNext = &pHandlerBB->pFinallyCallIslandBB; + while (*ppLastBBNext != NULL) + { + if ((*ppLastBBNext)->pLeaveTargetBB == pLeaveTargetBB) + { + // We already have finally call island block for the leave target + pFinallyCallIslandBB = (*ppLastBBNext); + break; + } + ppLastBBNext = &((*ppLastBBNext)->pFinallyCallIslandBB); + } + + if (pFinallyCallIslandBB == NULL) + { + pFinallyCallIslandBB = AllocBB(clause.HandlerOffset + clause.HandlerLength); + pFinallyCallIslandBB->pLeaveTargetBB = pLeaveTargetBB; + *ppLastBBNext = pFinallyCallIslandBB; + } + + if (pInnerFinallyCallIslandBB != NULL) + { + pInnerFinallyCallIslandBB->pFinallyCallIslandBB = pFinallyCallIslandBB; + } + pInnerFinallyCallIslandBB = pFinallyCallIslandBB; + + if (firstFinallyCallIsland) + { + // The leaves table entry points to the first finally call island block + firstFinallyCallIsland = false; + + LeavesTableEntry leavesEntry; + leavesEntry.ilOffset = leaveOffset; + leavesEntry.pFinallyCallIslandBB = pFinallyCallIslandBB; + m_leavesTable.Add(leavesEntry); + } + } +} + +bool InterpCompiler::CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo) +{ + int32_t codeSize = methodInfo->ILCodeSize; + uint8_t *codeStart = methodInfo->ILCode; + uint8_t *codeEnd = codeStart + codeSize; + const uint8_t *ip = codeStart; + + m_ppOffsetToBB = (InterpBasicBlock**)AllocMemPool0(sizeof(InterpBasicBlock*) * (methodInfo->ILCodeSize + 1)); + GetBB(0); + + while (ip < codeEnd) + { + int32_t insOffset = (int32_t)(ip - codeStart); + OPCODE opcode = CEEDecodeOpcode(&ip); + OPCODE_FORMAT opArgs = g_CEEOpArgs[opcode]; + int32_t target; + InterpBasicBlock *pTargetBB; + + switch (opArgs) + { + case InlineNone: + ip++; + break; + case InlineString: + case InlineType: + case InlineField: + case InlineMethod: + case InlineTok: + case InlineSig: + case ShortInlineR: + case InlineI: + ip += 5; + break; + case InlineVar: + ip += 3; + break; + case ShortInlineVar: + case ShortInlineI: + ip += 2; + break; + case ShortInlineBrTarget: + target = insOffset + 2 + (int8_t)ip [1]; + if (target >= codeSize) + return false; + pTargetBB = GetBB(target); + if (opcode == CEE_LEAVE_S) + { + CreateFinallyCallIslandBasicBlocks(methodInfo, insOffset, pTargetBB); + } + ip += 2; + GetBB((int32_t)(ip - codeStart)); + break; + case InlineBrTarget: + target = insOffset + 5 + getI4LittleEndian(ip + 1); + if (target >= codeSize) + return false; + pTargetBB = GetBB(target); + if (opcode == CEE_LEAVE) + { + CreateFinallyCallIslandBasicBlocks(methodInfo, insOffset, pTargetBB); + } + ip += 5; + GetBB((int32_t)(ip - codeStart)); + break; + case InlineSwitch: { + uint32_t n = getI4LittleEndian(ip + 1); + ip += 5; + insOffset += 5 + 4 * n; + target = insOffset; + if (target >= codeSize) + return false; + GetBB(target); + for (uint32_t i = 0; i < n; i++) + { + target = insOffset + getI4LittleEndian(ip); + if (target >= codeSize) + return false; + GetBB(target); + ip += 4; + } + GetBB((int32_t)(ip - codeStart)); + break; + } + case InlineR: + case InlineI8: + ip += 9; + break; + default: + assert(0); + } + if (opcode == CEE_THROW || opcode == CEE_ENDFINALLY || opcode == CEE_RETHROW) + GetBB((int32_t)(ip - codeStart)); + } + + return true; +} + +bool InterpCompiler::InitializeClauseBuildingBlocks(CORINFO_METHOD_INFO* methodInfo) +{ + int32_t codeSize = methodInfo->ILCodeSize; + uint8_t *codeStart = methodInfo->ILCode; + uint8_t *codeEnd = codeStart + codeSize; + + for (unsigned int i = 0; i < methodInfo->EHcount; i++) + { + CORINFO_EH_CLAUSE clause; + m_compHnd->getEHinfo(methodInfo->ftn, i, &clause); + + if ((codeStart + clause.TryOffset) > codeEnd || + (codeStart + clause.TryOffset + clause.TryLength) > codeEnd) + { + return false; + } + + InterpBasicBlock* pTryBB = GetBB(clause.TryOffset); + + if ((codeStart + clause.HandlerOffset) > codeEnd || + (codeStart + clause.HandlerOffset + clause.HandlerLength) > codeEnd) + { + return false; + } + + // Find and mark all basic blocks that are part of the try region. + for (uint32_t j = clause.TryOffset; j < (clause.TryOffset + clause.TryLength); j++) + { + InterpBasicBlock* pBB = m_ppOffsetToBB[j]; + if (pBB != NULL && pBB->clauseType == BBClauseNone) + { + pBB->clauseType = BBClauseTry; + } + } + + InterpBasicBlock* pHandlerBB = GetBB(clause.HandlerOffset); + + // Find and mark all basic blocks that are part of the handler region. + for (uint32_t j = clause.HandlerOffset; j < (clause.HandlerOffset + clause.HandlerLength); j++) + { + InterpBasicBlock* pBB = m_ppOffsetToBB[j]; + if (pBB != NULL && pBB->clauseType == BBClauseNone) + { + if ((clause.Flags == CORINFO_EH_CLAUSE_NONE) || (clause.Flags == CORINFO_EH_CLAUSE_FILTER)) + { + pBB->clauseType = BBClauseCatch; + } + else + { + assert((clause.Flags == CORINFO_EH_CLAUSE_FINALLY) || (clause.Flags == CORINFO_EH_CLAUSE_FAULT)); + pBB->clauseType = BBClauseFinally; + } + } + } + + if (clause.Flags == CORINFO_EH_CLAUSE_FILTER) + { + if ((codeStart + clause.FilterOffset) > codeEnd) + return false; + + // The filter funclet is always stored right before its handler funclet. + // So the filter end offset is equal to the start offset of the handler funclet. + InterpBasicBlock* pFilterBB = GetBB(clause.FilterOffset); + pFilterBB->isFilterOrCatchFuncletEntry = true; + pFilterBB->clauseVarIndex = m_clauseVarsIndex + i; + + // Initialize the filter stack state. It initially contains the exception object. + pFilterBB->stackHeight = 1; + pFilterBB->pStackState = (StackInfo*)AllocMemPool(sizeof (StackInfo)); + pFilterBB->pStackState[0].type = StackTypeO; + pFilterBB->pStackState[0].size = INTERP_STACK_SLOT_SIZE; + pFilterBB->pStackState[0].clsHnd = NULL; + pFilterBB->pStackState[0].var = pFilterBB->clauseVarIndex; + + // Find and mark all basic blocks that are part of the filter region. + for (uint32_t j = clause.FilterOffset; j < clause.HandlerOffset; j++) + { + InterpBasicBlock* pBB = m_ppOffsetToBB[j]; + if (pBB != NULL && pBB->clauseType == BBClauseNone) + { + pBB->clauseType = BBClauseFilter; + } + } + } + else if (clause.Flags == CORINFO_EH_CLAUSE_FINALLY|| clause.Flags == CORINFO_EH_CLAUSE_FAULT) + { + InterpBasicBlock* pFinallyBB = GetBB(clause.HandlerOffset); + + // Initialize finally handler stack state to empty. + pFinallyBB->stackHeight = 0; + } + + if (clause.Flags == CORINFO_EH_CLAUSE_NONE || clause.Flags == CORINFO_EH_CLAUSE_FILTER) + { + InterpBasicBlock* pCatchBB = GetBB(clause.HandlerOffset); + pCatchBB->isFilterOrCatchFuncletEntry = true; + pCatchBB->clauseVarIndex = m_clauseVarsIndex + i; + + // Initialize the catch / filtered handler stack state. It initially contains the exception object. + pCatchBB->stackHeight = 1; + pCatchBB->pStackState = (StackInfo*)AllocMemPool(sizeof (StackInfo)); + pCatchBB->pStackState[0].type = StackTypeO; + pCatchBB->pStackState[0].size = INTERP_STACK_SLOT_SIZE; + pCatchBB->pStackState[0].var = pCatchBB->clauseVarIndex; + pCatchBB->pStackState[0].clsHnd = NULL; + } + } + + // Now that we have classified all the basic blocks, we can set the clause type for the finally call island blocks. + // We set it to the same type as the basic block after the finally handler. + for (unsigned int i = 0; i < methodInfo->EHcount; i++) + { + CORINFO_EH_CLAUSE clause; + m_compHnd->getEHinfo(methodInfo->ftn, i, &clause); + + if (clause.Flags != CORINFO_EH_CLAUSE_FINALLY) + { + continue; + } + + InterpBasicBlock* pFinallyBB = GetBB(clause.HandlerOffset); + + InterpBasicBlock* pFinallyCallIslandBB = pFinallyBB->pFinallyCallIslandBB; + while (pFinallyCallIslandBB != NULL) + { + InterpBasicBlock* pAfterFinallyBB = m_ppOffsetToBB[clause.HandlerOffset + clause.HandlerLength]; + assert(pAfterFinallyBB != NULL); + pFinallyCallIslandBB->clauseType = pAfterFinallyBB->clauseType; + pFinallyCallIslandBB = pFinallyCallIslandBB->pNextBB; + } + } + + return true; +} + +void InterpCompiler::EmitBranchToBB(InterpOpcode opcode, InterpBasicBlock *pTargetBB) +{ + EmitBBEndVarMoves(pTargetBB); + InitBBStackState(pTargetBB); + + AddIns(opcode); + m_pLastNewIns->info.pTargetBB = pTargetBB; +} + +// ilOffset represents relative branch offset +void InterpCompiler::EmitBranch(InterpOpcode opcode, int32_t ilOffset) +{ + int32_t target = (int32_t)(m_ip - m_pILCode) + ilOffset; + if (target < 0 || target >= m_ILCodeSize) + assert(0); + + // Backwards branch, emit safepoint + if (ilOffset < 0) + AddIns(INTOP_SAFEPOINT); + + InterpBasicBlock *pTargetBB = m_ppOffsetToBB[target]; + assert(pTargetBB != NULL); + + EmitBranchToBB(opcode, pTargetBB); +} + +void InterpCompiler::EmitOneArgBranch(InterpOpcode opcode, int32_t ilOffset, int insSize) +{ + CHECK_STACK_RET_VOID(1); + StackType argType = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-1].type; + // offset the opcode to obtain the type specific I4/I8/R4/R8 variant. + InterpOpcode opcodeArgType = (InterpOpcode)(opcode + argType - StackTypeI4); + m_pStackPointer--; + if (ilOffset) + { + EmitBranch(opcodeArgType, ilOffset + insSize); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + } + else + { + AddIns(INTOP_NOP); + } +} + +void InterpCompiler::EmitTwoArgBranch(InterpOpcode opcode, int32_t ilOffset, int insSize) +{ + CHECK_STACK_RET_VOID(2); + StackType argType1 = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-1].type; + StackType argType2 = (m_pStackPointer[-2].type == StackTypeO || m_pStackPointer[-2].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-2].type; + + // Since branch opcodes only compare args of the same type, handle implicit conversions before + // emitting the conditional branch + if (argType1 == StackTypeI4 && argType2 == StackTypeI8) + { + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_I4); + argType1 = StackTypeI8; + } + else if (argType1 == StackTypeI8 && argType2 == StackTypeI4) + { + EmitConv(m_pStackPointer - 2, StackTypeI8, INTOP_CONV_I8_I4); + } + else if (argType1 == StackTypeR4 && argType2 == StackTypeR8) + { + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R8_R4); + argType1 = StackTypeR8; + } + else if (argType1 == StackTypeR8 && argType2 == StackTypeR4) + { + EmitConv(m_pStackPointer - 2, StackTypeR8, INTOP_CONV_R8_R4); + } + else if (argType1 != argType2) + { + m_hasInvalidCode = true; + return; + } + + // offset the opcode to obtain the type specific I4/I8/R4/R8 variant. + InterpOpcode opcodeArgType = (InterpOpcode)(opcode + argType1 - StackTypeI4); + m_pStackPointer -= 2; + + if (ilOffset) + { + EmitBranch(opcodeArgType, ilOffset + insSize); + m_pLastNewIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + } + else + { + AddIns(INTOP_NOP); + } +} + +void InterpCompiler::EmitLoadVar(int32_t var) +{ + InterpType interpType = m_pVars[var].interpType; + CORINFO_CLASS_HANDLE clsHnd = m_pVars[var].clsHnd; + + if (m_pCBB->clauseType == BBClauseFilter) + { + assert(m_pVars[var].ILGlobal); + AddIns(INTOP_LOAD_FRAMEVAR); + PushInterpType(InterpTypeI, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + EmitLdind(interpType, clsHnd, m_pVars[var].offset); + return; + } + + int32_t size = m_pVars[var].size; + + if (interpType == InterpTypeVT) + PushTypeVT(clsHnd, size); + else + PushInterpType(interpType, clsHnd); + + AddIns(InterpGetMovForType(interpType, true)); + m_pLastNewIns->SetSVar(var); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + if (interpType == InterpTypeVT) + m_pLastNewIns->data[0] = size; +} + +void InterpCompiler::EmitStoreVar(int32_t var) +{ + InterpType interpType = m_pVars[var].interpType; + CHECK_STACK_RET_VOID(1); + + if (m_pCBB->clauseType == BBClauseFilter) + { + AddIns(INTOP_LOAD_FRAMEVAR); + PushInterpType(InterpTypeI, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + EmitStind(interpType, m_pVars[var].clsHnd, m_pVars[var].offset, true /* reverseSVarOrder */); + return; + } + +#ifdef TARGET_64BIT + // nint and int32 can be used interchangeably. Add implicit conversions. + if (m_pStackPointer[-1].type == StackTypeI4 && g_stackTypeFromInterpType[interpType] == StackTypeI8) + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_I4); +#endif + if (m_pStackPointer[-1].type == StackTypeR4 && g_stackTypeFromInterpType[interpType] == StackTypeR8) + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R8_R4); + else if (m_pStackPointer[-1].type == StackTypeR8 && g_stackTypeFromInterpType[interpType] == StackTypeR4) + EmitConv(m_pStackPointer - 1, StackTypeR4, INTOP_CONV_R4_R8); + + m_pStackPointer--; + AddIns(InterpGetMovForType(interpType, false)); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + m_pLastNewIns->SetDVar(var); + if (interpType == InterpTypeVT) + m_pLastNewIns->data[0] = m_pVars[var].size; +} + +void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase) +{ + CHECK_STACK_RET_VOID(2); + StackType type1 = m_pStackPointer[-2].type; + StackType type2 = m_pStackPointer[-1].type; + + StackType typeRes; + + if (opBase == INTOP_ADD_I4 && (type1 == StackTypeByRef || type2 == StackTypeByRef)) + { + if (type1 == type2) + INVALID_CODE_RET_VOID; + if (type1 == StackTypeByRef) + { + if (type2 == StackTypeI4) + { +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_I4); + type2 = StackTypeI8; +#endif + typeRes = StackTypeByRef; + } + else if (type2 == StackTypeI) + { + typeRes = StackTypeByRef; + } + else + { + INVALID_CODE_RET_VOID; + } + } + else + { + // type2 == StackTypeByRef + if (type1 == StackTypeI4) + { +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 2, StackTypeI8, INTOP_CONV_I8_I4); + type1 = StackTypeI8; +#endif + typeRes = StackTypeByRef; + } + else if (type1 == StackTypeI) + { + typeRes = StackTypeByRef; + } + else + { + INVALID_CODE_RET_VOID; + } + } + } + else if (opBase == INTOP_SUB_I4 && type1 == StackTypeByRef) + { + if (type2 == StackTypeI4) + { +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_I4); + type2 = StackTypeI8; +#endif + typeRes = StackTypeByRef; + } + else if (type2 == StackTypeI) + { + typeRes = StackTypeByRef; + } + else if (type2 == StackTypeByRef) + { + typeRes = StackTypeI; + } + else + { + INVALID_CODE_RET_VOID; + } + } + else + { +#if TARGET_64BIT + if (type1 == StackTypeI8 && type2 == StackTypeI4) + { + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_I4); + type2 = StackTypeI8; + } + else if (type1 == StackTypeI4 && type2 == StackTypeI8) + { + EmitConv(m_pStackPointer - 2, StackTypeI8, INTOP_CONV_I8_I4); + type1 = StackTypeI8; + } +#endif + if (type1 == StackTypeR8 && type2 == StackTypeR4) + { + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R8_R4); + type2 = StackTypeR8; + } + else if (type1 == StackTypeR4 && type2 == StackTypeR8) + { + EmitConv(m_pStackPointer - 2, StackTypeR8, INTOP_CONV_R8_R4); + type1 = StackTypeR8; + } + if (type1 != type2) + INVALID_CODE_RET_VOID; + + typeRes = type1; + } + + // The argument opcode is for the base _I4 instruction. Depending on the type of the result + // we compute the specific variant, _I4/_I8/_R4 or R8. + int32_t typeOffset = ((typeRes == StackTypeByRef) ? StackTypeI : typeRes) - StackTypeI4; + int32_t finalOpcode = opBase + typeOffset; + + m_pStackPointer -= 2; + AddIns(finalOpcode); + m_pLastNewIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + PushStackType(typeRes, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); +} + +void InterpCompiler::EmitUnaryArithmeticOp(int32_t opBase) +{ + CHECK_STACK_RET_VOID(1); + StackType stackType = m_pStackPointer[-1].type; + int32_t finalOpcode = opBase + (stackType - StackTypeI4); + + if (stackType == StackTypeByRef || stackType == StackTypeO) + INVALID_CODE_RET_VOID; + if (opBase == INTOP_NOT_I4 && (stackType != StackTypeI4 && stackType != StackTypeI8)) + INVALID_CODE_RET_VOID; + + m_pStackPointer--; + AddIns(finalOpcode); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + PushStackType(stackType, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); +} + +void InterpCompiler::EmitShiftOp(int32_t opBase) +{ + CHECK_STACK_RET_VOID(2); + StackType stackType = m_pStackPointer[-2].type; + StackType shiftAmountType = m_pStackPointer[-1].type; + int32_t typeOffset = stackType - StackTypeI4; + int32_t finalOpcode = opBase + typeOffset; + + if ((stackType != StackTypeI4 && stackType != StackTypeI8) || + (shiftAmountType != StackTypeI4 && shiftAmountType != StackTypeI)) + INVALID_CODE_RET_VOID; + + m_pStackPointer -= 2; + AddIns(finalOpcode); + m_pLastNewIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + PushStackType(stackType, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); +} + +void InterpCompiler::EmitCompareOp(int32_t opBase) +{ + CHECK_STACK_RET_VOID(2); + if (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef) + { + AddIns(opBase + StackTypeI - StackTypeI4); + } + else + { + if (m_pStackPointer[-1].type == StackTypeR4 && m_pStackPointer[-2].type == StackTypeR8) + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R8_R4); + if (m_pStackPointer[-1].type == StackTypeR8 && m_pStackPointer[-2].type == StackTypeR4) + EmitConv(m_pStackPointer - 2, StackTypeR8, INTOP_CONV_R8_R4); + AddIns(opBase + m_pStackPointer[-1].type - StackTypeI4); + } + m_pStackPointer -= 2; + m_pLastNewIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + PushStackType(StackTypeI4, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); +} + +int32_t InterpCompiler::GetDataItemIndex(void *data) +{ + int32_t index = m_dataItems.Find(data); + if (index != -1) + return index; + + return m_dataItems.Add(data); +} + +void* InterpCompiler::GetDataItemAtIndex(int32_t index) +{ + if (index < 0 || index >= m_dataItems.GetSize()) + { + assert(!"Invalid data item index"); + return NULL; + } + return m_dataItems.Get(index); +} + +int32_t InterpCompiler::GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle) +{ + return GetDataItemIndex((void*)mHandle); +} + +int32_t InterpCompiler::GetDataItemIndexForHelperFtn(CorInfoHelpFunc ftn) +{ + void *indirect; + void *direct = m_compHnd->getHelperFtn(ftn, &indirect); + size_t data = !direct + ? (size_t)indirect | INTERP_INDIRECT_HELPER_TAG + : (size_t)direct; + assert(data); + return GetDataItemIndex((void*)data); +} + +bool InterpCompiler::EmitCallIntrinsics(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO sig) +{ + const char *className = NULL; + const char *namespaceName = NULL; + const char *methodName = m_compHnd->getMethodNameFromMetadata(method, &className, &namespaceName, NULL, 0); + + if (namespaceName && !strcmp(namespaceName, "System")) + { + if (className && !strcmp(className, "Environment")) + { + if (methodName && !strcmp(methodName, "FailFast")) + { + AddIns(INTOP_FAILFAST); // to be removed, not really an intrisic + m_pStackPointer--; + return true; + } + } + else if (className && !strcmp(className, "Object")) + { + // This is needed at this moment because we don't have support for interop + // with compiled code, but it might make sense in the future for this to remain + // in order to avoid redundant interp to jit transition. + if (methodName && !strcmp(methodName, ".ctor")) + { + AddIns(INTOP_NOP); + m_pStackPointer--; + return true; + } + } + else if (className && !strcmp(className, "GC")) + { + if (methodName && !strcmp(methodName, "Collect")) + { + AddIns(INTOP_GC_COLLECT); + // Not reducing the stack pointer because we expect the version with no arguments + return true; + } + } + } + + return false; +} + +void InterpCompiler::ResolveToken(uint32_t token, CorInfoTokenKind tokenKind, CORINFO_RESOLVED_TOKEN *pResolvedToken) +{ + pResolvedToken->tokenScope = m_compScopeHnd; + pResolvedToken->tokenContext = METHOD_BEING_COMPILED_CONTEXT(); + pResolvedToken->token = token; + pResolvedToken->tokenType = tokenKind; + m_compHnd->resolveToken(pResolvedToken); +} + +CORINFO_METHOD_HANDLE InterpCompiler::ResolveMethodToken(uint32_t token) +{ + CORINFO_RESOLVED_TOKEN resolvedToken; + + ResolveToken(token, CORINFO_TOKENKIND_Method, &resolvedToken); + + return resolvedToken.hMethod; +} + +CORINFO_CLASS_HANDLE InterpCompiler::ResolveClassToken(uint32_t token) +{ + CORINFO_RESOLVED_TOKEN resolvedToken; + + ResolveToken(token, CORINFO_TOKENKIND_Class, &resolvedToken); + + return resolvedToken.hClass; +} + +CORINFO_CLASS_HANDLE InterpCompiler::getClassFromContext(CORINFO_CONTEXT_HANDLE context) +{ + if (context == METHOD_BEING_COMPILED_CONTEXT()) + { + return m_compHnd->getMethodClass(m_methodHnd); // This really should be just a field access, but we don't have that field in the InterpCompiler now + } + + if (((SIZE_T)context & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS) + { + return CORINFO_CLASS_HANDLE((SIZE_T)context & ~CORINFO_CONTEXTFLAGS_MASK); + } + else + { + return m_compHnd->getMethodClass(CORINFO_METHOD_HANDLE((SIZE_T)context & ~CORINFO_CONTEXTFLAGS_MASK)); + } +} + +int InterpCompiler::getParamArgIndex() +{ + return m_paramArgIndex; +} + +InterpCompiler::InterpEmbedGenericResult InterpCompiler::EmitGenericHandle(CORINFO_RESOLVED_TOKEN* resolvedToken, GenericHandleEmbedOptions options) +{ + CORINFO_GENERICHANDLE_RESULT embedInfo; + InterpEmbedGenericResult result; + m_compHnd->embedGenericHandle(resolvedToken, HasFlag(options, GenericHandleEmbedOptions::EmbedParent), m_methodInfo->ftn, &embedInfo); + if (HasFlag(options, GenericHandleEmbedOptions::VarOnly) || embedInfo.lookup.lookupKind.needsRuntimeLookup) + { + result.var = EmitGenericHandleAsVar(embedInfo); + } + else + { + assert(embedInfo.lookup.constLookup.accessType == IAT_VALUE); + result.dataItemIndex = GetDataItemIndex(embedInfo.lookup.constLookup.handle); + } + return result; +} + +int InterpCompiler::EmitGenericHandleAsVar(const CORINFO_GENERICHANDLE_RESULT &embedInfo) +{ + PushStackType(StackTypeI, NULL); + int resultVar = m_pStackPointer[-1].var; + m_pStackPointer--; + + if (embedInfo.lookup.lookupKind.needsRuntimeLookup) + { + CORINFO_RUNTIME_LOOKUP_KIND runtimeLookupKind = embedInfo.lookup.lookupKind.runtimeLookupKind; + if (runtimeLookupKind == CORINFO_LOOKUP_METHODPARAM) + { + AddIns(INTOP_GENERICLOOKUP_METHOD); + } + else if (runtimeLookupKind == CORINFO_LOOKUP_THISOBJ) + { + AddIns(INTOP_GENERICLOOKUP_THIS); + } + else + { + AddIns(INTOP_GENERICLOOKUP_CLASS); + } + CORINFO_RUNTIME_LOOKUP *pRuntimeLookup = (CORINFO_RUNTIME_LOOKUP*)AllocMethodData(sizeof(CORINFO_RUNTIME_LOOKUP)); + *pRuntimeLookup = embedInfo.lookup.runtimeLookup; + m_pLastNewIns->data[0] = GetDataItemIndex(pRuntimeLookup); + + m_pLastNewIns->SetSVar(getParamArgIndex()); + m_pLastNewIns->SetDVar(resultVar); + } + else + { + AddIns(INTOP_LDPTR); + m_pLastNewIns->SetDVar(resultVar); + + assert(embedInfo.lookup.constLookup.accessType == IAT_VALUE); + m_pLastNewIns->data[0] = GetDataItemIndex(embedInfo.lookup.constLookup.handle); + } + return resultVar; +} + +void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* constrainedClass, bool readonly, bool tailcall, bool newObj) +{ + uint32_t token = getU4LittleEndian(m_ip + 1); + bool isVirtual = (*m_ip == CEE_CALLVIRT); + + CORINFO_RESOLVED_TOKEN resolvedCallToken; + bool doCallInsteadOfNew = false; + + ResolveToken(token, newObj ? CORINFO_TOKENKIND_Method : CORINFO_TOKENKIND_NewObj, &resolvedCallToken); + + CORINFO_CALL_INFO callInfo; + CORINFO_CALLINFO_FLAGS flags = (CORINFO_CALLINFO_FLAGS)(CORINFO_CALLINFO_ALLOWINSTPARAM | CORINFO_CALLINFO_SECURITYCHECKS | CORINFO_CALLINFO_DISALLOW_STUB); + if (isVirtual) + flags = (CORINFO_CALLINFO_FLAGS)(flags | CORINFO_CALLINFO_CALLVIRT); + + m_compHnd->getCallInfo(&resolvedCallToken, constrainedClass, m_methodInfo->ftn, flags, &callInfo); + + if (EmitCallIntrinsics(callInfo.hMethod, callInfo.sig)) + { + m_ip += 5; + return; + } + + if (callInfo.classFlags & CORINFO_FLG_VAROBJSIZE) + { + // This is a variable size object which means "System.String". + // For these, we just call the resolved method directly, but don't actually pass a this pointer to it. + doCallInsteadOfNew = true; + } + + // Process sVars + int numArgsFromStack = callInfo.sig.numArgs + (newObj ? 0 : callInfo.sig.hasThis()); + int newObjThisArgLocation = newObj && !doCallInsteadOfNew ? 0 : INT_MAX; + int numArgs = numArgsFromStack + (newObjThisArgLocation == 0); + m_pStackPointer -= numArgsFromStack; + + int extraParamArgLocation = INT_MAX; + if (callInfo.sig.hasTypeArg()) + { + extraParamArgLocation = callInfo.sig.hasThis() ? 1 : 0; + numArgs++; + } + + int *callArgs = (int*) AllocMemPool((numArgs + 1) * sizeof(int)); + for (int iActualArg = 0, iLogicalArg = 0; iActualArg < numArgs; iActualArg++) + { + if (iActualArg == extraParamArgLocation) + { + // This is the extra type argument, which is not on the logical IL stack + // Skip it for now. We will fill it in later. + } + else if (iActualArg == newObjThisArgLocation) + { + // This is the newObj arg type argument, which is not on the logical IL stack + // Skip it for now. We will fill it in later. + } + else + { + callArgs[iActualArg] = m_pStackPointer [iLogicalArg].var; + iLogicalArg++; + } + } + callArgs[numArgs] = -1; + + InterpEmbedGenericResult newObjType; + int32_t newObjThisVar = -1; + int32_t newObjDVar = -1; + InterpType ctorType = InterpTypeO; + int32_t vtsize = 0; + + if (newObjThisArgLocation != INT_MAX) + { + ctorType = GetInterpType(m_compHnd->asCorInfoType(resolvedCallToken.hClass)); + if (ctorType == InterpTypeVT) + { + vtsize = m_compHnd->getClassSize(resolvedCallToken.hClass); + PushTypeVT(resolvedCallToken.hClass, vtsize); + PushInterpType(InterpTypeByRef, NULL); + } + else + { + PushInterpType(ctorType, resolvedCallToken.hClass); + PushInterpType(ctorType, resolvedCallToken.hClass); + + newObjType = EmitGenericHandle(&resolvedCallToken, GenericHandleEmbedOptions::EmbedParent); + } + newObjDVar = m_pStackPointer[-2].var; + newObjThisVar = m_pStackPointer[-1].var; + m_pStackPointer--; + // Consider this arg as being defined, although newobj defines it + AddIns(INTOP_DEF); + m_pLastNewIns->SetDVar(newObjThisVar); + + callArgs[newObjThisArgLocation] = newObjThisVar; + } + + if (extraParamArgLocation != INT_MAX) + { + int contextParamVar = -1; + + // Instantiated generic method + CORINFO_CONTEXT_HANDLE exactContextHnd = callInfo.contextHandle; + if (((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_METHOD) + { + assert(exactContextHnd != METHOD_BEING_COMPILED_CONTEXT()); + + CORINFO_METHOD_HANDLE exactMethodHandle = + (CORINFO_METHOD_HANDLE)((SIZE_T)exactContextHnd & ~CORINFO_CONTEXTFLAGS_MASK); + + if (!callInfo.exactContextNeedsRuntimeLookup) + { + PushStackType(StackTypeI, NULL); + m_pStackPointer--; + contextParamVar = m_pStackPointer[0].var; + AddIns(INTOP_LDPTR); + m_pLastNewIns->SetDVar(contextParamVar); + m_pLastNewIns->data[0] = GetDataItemIndex((void*)exactMethodHandle); + } + else + { + contextParamVar = EmitGenericHandle(&resolvedCallToken, GenericHandleEmbedOptions::VarOnly).var; + } + } + + // otherwise must be an instance method in a generic struct, + // a static method in a generic type, or a runtime-generated array method + else + { + assert(((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS); + CORINFO_CLASS_HANDLE exactClassHandle = getClassFromContext(exactContextHnd); + + if ((callInfo.classFlags & CORINFO_FLG_ARRAY) && readonly) + { + PushStackType(StackTypeI, NULL); + m_pStackPointer--; + contextParamVar = m_pStackPointer[0].var; + // We indicate "readonly" to the Address operation by using a null + // instParam. + AddIns(INTOP_LDPTR); + m_pLastNewIns->SetDVar(contextParamVar); + m_pLastNewIns->data[0] = GetDataItemIndex(NULL); + } + else if (!callInfo.exactContextNeedsRuntimeLookup) + { + PushStackType(StackTypeI, NULL); + m_pStackPointer--; + contextParamVar = m_pStackPointer[0].var; + AddIns(INTOP_LDPTR); + m_pLastNewIns->SetDVar(contextParamVar); + m_pLastNewIns->data[0] = GetDataItemIndex((void*)exactClassHandle); + } + else + { + contextParamVar = EmitGenericHandle(&resolvedCallToken, GenericHandleEmbedOptions::VarOnly | GenericHandleEmbedOptions::EmbedParent).var; + } + } + callArgs[extraParamArgLocation] = contextParamVar; + } + + // Process dVar + int32_t dVar; + if (newObjDVar != -1) + { + dVar = newObjDVar; + } + else if (doCallInsteadOfNew) + { + PushInterpType(InterpTypeO, NULL); + dVar = m_pStackPointer[-1].var; + } + else if (callInfo.sig.retType != CORINFO_TYPE_VOID) + { + InterpType interpType = GetInterpType(callInfo.sig.retType); + + if (interpType == InterpTypeVT) + { + int32_t size = m_compHnd->getClassSize(callInfo.sig.retTypeClass); + PushTypeVT(callInfo.sig.retTypeClass, size); + } + else + { + PushInterpType(interpType, NULL); + } + dVar = m_pStackPointer[-1].var; + } + else + { + // Create a new dummy var to serve as the dVar of the call + // FIXME Consider adding special dVar type (ex -1), that is + // resolved to null offset. The opcode shouldn't really write to it + PushStackType(StackTypeI4, NULL); + m_pStackPointer--; + dVar = m_pStackPointer[0].var; + } + + // Emit call instruction + switch (callInfo.kind) + { + case CORINFO_CALL: + if (newObj && !doCallInsteadOfNew) + { + if (ctorType == InterpTypeVT) + { + // If this is a newobj for a value type, we need to call the constructor + // and then copy the value type to the stack. + AddIns(INTOP_NEWOBJ_VT); + m_pLastNewIns->data[1] = (int32_t)ALIGN_UP_TO(vtsize, INTERP_STACK_SLOT_SIZE); + } + else + { + if (newObjType.var != -1) + { + // newobj of type known only through a generic dictionary lookup. + AddIns(INTOP_NEWOBJ_VAR); + m_pLastNewIns->SetSVars2(CALL_ARGS_SVAR, newObjType.var); + } + else + { + // Normal newobj call + AddIns(INTOP_NEWOBJ); + m_pLastNewIns->data[1] = newObjType.dataItemIndex; + } + } + m_pLastNewIns->data[0] = GetDataItemIndex(callInfo.hMethod); + } + else + { + // Normal call + if (callInfo.nullInstanceCheck) + { + // If the call is a normal call, we need to check for null instance + // before the call. + // TODO: Add null checking behavior somewhere here! + } + AddIns(INTOP_CALL); + m_pLastNewIns->data[0] = GetMethodDataItemIndex(callInfo.hMethod); + } + break; + + case CORINFO_CALL_CODE_POINTER: + if (callInfo.nullInstanceCheck) + { + // If the call is a normal call, we need to check for null instance + // before the call. + // TODO: Add null checking behavior somewhere here! + } + assert(!"Need to support calling a code pointer"); + break; + + case CORINFO_VIRTUALCALL_VTABLE: + // Traditional virtual call. In theory we could optimize this to using the vtable + AddIns(INTOP_CALLVIRT); + m_pLastNewIns->data[0] = GetDataItemIndex(callInfo.hMethod); + break; + + case CORINFO_VIRTUALCALL_LDVIRTFTN: + if (callInfo.exactContextNeedsRuntimeLookup) + { + // Resolve a virtual call using the helper function to a function pointer, and then call through that + assert(!"Need to support ldvirtftn path"); + } + else + { + AddIns(INTOP_CALLVIRT); + m_pLastNewIns->data[0] = GetDataItemIndex(callInfo.hMethod); + } + break; + + case CORINFO_VIRTUALCALL_STUB: + // This case should never happen + assert(!"Unexpected call kind"); + break; + } + + m_pLastNewIns->SetDVar(dVar); + m_pLastNewIns->SetSVar(CALL_ARGS_SVAR); + + m_pLastNewIns->flags |= INTERP_INST_FLAG_CALL; + m_pLastNewIns->info.pCallInfo = (InterpCallInfo*)AllocMemPool0(sizeof (InterpCallInfo)); + m_pLastNewIns->info.pCallInfo->pCallArgs = callArgs; + + m_ip += 5; +} + +static int32_t GetLdindForType(InterpType interpType) +{ + switch (interpType) + { + case InterpTypeI1: return INTOP_LDIND_I1; + case InterpTypeU1: return INTOP_LDIND_U1; + case InterpTypeI2: return INTOP_LDIND_I2; + case InterpTypeU2: return INTOP_LDIND_U2; + case InterpTypeI4: return INTOP_LDIND_I4; + case InterpTypeI8: return INTOP_LDIND_I8; + case InterpTypeR4: return INTOP_LDIND_R4; + case InterpTypeR8: return INTOP_LDIND_R8; + case InterpTypeO: return INTOP_LDIND_I; + case InterpTypeVT: return INTOP_LDIND_VT; + case InterpTypeByRef: return INTOP_LDIND_I; + default: + assert(0); + } + return -1; +} + +static int32_t GetStindForType(InterpType interpType) +{ + switch (interpType) + { + case InterpTypeI1: return INTOP_STIND_I1; + case InterpTypeU1: return INTOP_STIND_U1; + case InterpTypeI2: return INTOP_STIND_I2; + case InterpTypeU2: return INTOP_STIND_U2; + case InterpTypeI4: return INTOP_STIND_I4; + case InterpTypeI8: return INTOP_STIND_I8; + case InterpTypeR4: return INTOP_STIND_R4; + case InterpTypeR8: return INTOP_STIND_R8; + case InterpTypeO: return INTOP_STIND_O; + case InterpTypeVT: return INTOP_STIND_VT; + case InterpTypeByRef: return INTOP_STIND_I; + default: + assert(0); + } + return -1; +} + +static int32_t GetStelemForType(InterpType interpType) +{ + switch (interpType) + { + case InterpTypeI1: return INTOP_STELEM_I1; + case InterpTypeU1: return INTOP_STELEM_U1; + case InterpTypeI2: return INTOP_STELEM_I2; + case InterpTypeU2: return INTOP_STELEM_U2; + case InterpTypeI4: return INTOP_STELEM_I4; + case InterpTypeI8: return INTOP_STELEM_I8; + case InterpTypeR4: return INTOP_STELEM_R4; + case InterpTypeR8: return INTOP_STELEM_R8; + default: + assert(0); + } + return -1; +} + +void InterpCompiler::EmitLdind(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int32_t offset) +{ + // Address is at the top of the stack + m_pStackPointer--; + int32_t opcode = GetLdindForType(interpType); + AddIns(opcode); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + m_pLastNewIns->data[0] = offset; + if (interpType == InterpTypeVT) + { + int size = m_compHnd->getClassSize(clsHnd); + m_pLastNewIns->data[1] = size; + PushTypeVT(clsHnd, size); + } + else + { + PushInterpType(interpType, NULL); + } + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); +} + +void InterpCompiler::EmitStind(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int32_t offset, bool reverseSVarOrder) +{ + // stack contains address and then the value to be stored + // or in the reverse order if the flag is set + if (interpType == InterpTypeVT) + { + if (m_compHnd->getClassAttribs(clsHnd) & CORINFO_FLG_CONTAINS_GC_PTR) + { + AddIns(INTOP_STIND_VT); + m_pLastNewIns->data[1] = GetDataItemIndex(clsHnd); + } + else + { + AddIns(INTOP_STIND_VT_NOREF); + m_pLastNewIns->data[1] = m_compHnd->getClassSize(clsHnd); + } + } + else + { + AddIns(GetStindForType(interpType)); + } + + m_pLastNewIns->data[0] = offset; + + m_pStackPointer -= 2; + if (reverseSVarOrder) + m_pLastNewIns->SetSVars2(m_pStackPointer[1].var, m_pStackPointer[0].var); + else + m_pLastNewIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + +} + +void InterpCompiler::EmitLdelem(int32_t opcode, InterpType interpType) +{ + m_pStackPointer -= 2; + AddIns(opcode); + m_pLastNewIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + PushInterpType(interpType, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); +} + +void InterpCompiler::EmitStelem(InterpType interpType) +{ + m_pStackPointer -= 3; + int32_t opcode = GetStelemForType(interpType); + AddIns(opcode); + m_pLastNewIns->SetSVars3(m_pStackPointer[0].var, m_pStackPointer[1].var, m_pStackPointer[2].var); +} + +void InterpCompiler::EmitStaticFieldAddress(CORINFO_FIELD_INFO *pFieldInfo, CORINFO_RESOLVED_TOKEN *pResolvedToken) +{ + bool isBoxedStatic = (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP) != 0; + switch (pFieldInfo->fieldAccessor) + { + case CORINFO_FIELD_STATIC_ADDRESS: + case CORINFO_FIELD_STATIC_RVA_ADDRESS: + { + // const field address + assert(pFieldInfo->fieldLookup.accessType == IAT_VALUE); + AddIns(INTOP_LDPTR); + PushInterpType(InterpTypeByRef, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = GetDataItemIndex(pFieldInfo->fieldLookup.addr); + break; + } + case CORINFO_FIELD_STATIC_TLS_MANAGED: + case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER: + { + void *helperArg = NULL; + switch (pFieldInfo->helper) + { + case CORINFO_HELP_GETDYNAMIC_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED: + case CORINFO_HELP_GETDYNAMIC_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED2: + case CORINFO_HELP_GETDYNAMIC_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED2_NOJITOPT: + helperArg = (void*)(size_t)m_compHnd->getThreadLocalFieldInfo(pResolvedToken->hField, false); + break; + case CORINFO_HELP_GETDYNAMIC_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED: + helperArg = (void*)(size_t)m_compHnd->getThreadLocalFieldInfo(pResolvedToken->hField, true); + break; + case CORINFO_HELP_GETDYNAMIC_GCTHREADSTATIC_BASE_NOCTOR: + case CORINFO_HELP_GETDYNAMIC_NONGCTHREADSTATIC_BASE_NOCTOR: + case CORINFO_HELP_GETDYNAMIC_GCTHREADSTATIC_BASE: + case CORINFO_HELP_GETDYNAMIC_NONGCTHREADSTATIC_BASE: + helperArg = (void*)m_compHnd->getClassThreadStaticDynamicInfo(pResolvedToken->hClass); + break; + default: + // TODO + assert(0); + break; + } + // Call helper to obtain thread static base address + AddIns(INTOP_CALL_HELPER_PP); + m_pLastNewIns->data[0] = GetDataItemIndexForHelperFtn(pFieldInfo->helper); + m_pLastNewIns->data[1] = GetDataItemIndex(helperArg); + PushInterpType(InterpTypeByRef, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + + // Add field offset + m_pStackPointer--; + AddIns(INTOP_ADD_P_IMM); + m_pLastNewIns->data[0] = (int32_t)pFieldInfo->offset; + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + PushInterpType(InterpTypeByRef, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + break; + } + case CORINFO_FIELD_INTRINSIC_EMPTY_STRING: + { + void *emptyString; + InfoAccessType iat = m_compHnd->emptyStringLiteral(&emptyString); + assert(iat == IAT_VALUE); + AddIns(INTOP_LDPTR); + PushInterpType(InterpTypeO, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = GetDataItemIndex(emptyString); + break; + } + default: + // TODO + assert(0); + break; + } + + if (isBoxedStatic) + { + // Obtain boxed instance ref + m_pStackPointer--; + AddIns(INTOP_LDIND_I); + m_pLastNewIns->data[0] = 0; + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + PushInterpType(InterpTypeO, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + + // Skip method table word + m_pStackPointer--; + AddIns(INTOP_ADD_P_IMM); + m_pLastNewIns->data[0] = sizeof(void*); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + PushInterpType(InterpTypeByRef, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + } +} + +void InterpCompiler::EmitStaticFieldAccess(InterpType interpFieldType, CORINFO_FIELD_INFO *pFieldInfo, CORINFO_RESOLVED_TOKEN *pResolvedToken, bool isLoad) +{ + EmitStaticFieldAddress(pFieldInfo, pResolvedToken); + if (isLoad) + EmitLdind(interpFieldType, pFieldInfo->structType, 0); + else + EmitStind(interpFieldType, pFieldInfo->structType, 0, true); +} + +void InterpCompiler::EmitLdLocA(int32_t var) +{ + if (m_pCBB->clauseType == BBClauseFilter) + { + AddIns(INTOP_LOAD_FRAMEVAR); + PushInterpType(InterpTypeI, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + AddIns(INTOP_ADD_P_IMM); + m_pLastNewIns->data[0] = m_pVars[var].offset; + m_pLastNewIns->SetSVar(m_pStackPointer[-1].var); + m_pStackPointer--; + PushInterpType(InterpTypeByRef, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + return; + } + + AddIns(INTOP_LDLOCA); + m_pLastNewIns->SetSVar(var); + PushInterpType(InterpTypeByRef, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); +} + +int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) +{ + bool readonly = false; + bool tailcall = false; + bool volatile_ = false; + CORINFO_RESOLVED_TOKEN* constrainedClass = NULL; + CORINFO_RESOLVED_TOKEN constrainedToken; + uint8_t *codeEnd; + int numArgs = m_methodInfo->args.hasThis() + m_methodInfo->args.numArgs; + bool emittedBBlocks, linkBBlocks, needsRetryEmit; + m_ip = m_pILCode = methodInfo->ILCode; + m_ILCodeSize = (int32_t)methodInfo->ILCodeSize; + + m_stackCapacity = methodInfo->maxStack + 1; + m_pStackBase = m_pStackPointer = (StackInfo*)AllocTemporary(sizeof(StackInfo) * m_stackCapacity); + + m_pEntryBB = AllocBB(0); + m_pEntryBB->emitState = BBStateEmitting; + m_pEntryBB->stackHeight = 0; + m_pCBB = m_pEntryBB; + + InterpBasicBlock *pFirstFuncletBB = NULL; + InterpBasicBlock *pLastFuncletBB = NULL; + + if (!CreateBasicBlocks(methodInfo)) + { + m_hasInvalidCode = true; + goto exit_bad_code; + } + + if (!InitializeClauseBuildingBlocks(methodInfo)) + { + m_hasInvalidCode = true; + goto exit_bad_code; + } + + m_currentILOffset = -1; + +#if DEBUG + if (InterpConfig.InterpHalt().contains(m_compHnd, m_methodHnd, m_classHnd, &m_methodInfo->args)) + AddIns(INTOP_BREAKPOINT); +#endif + + // We need to always generate this opcode because even if we have no IL locals, we may have + // global vars which contain managed pointers or interior pointers + m_pInitLocalsIns = AddIns(INTOP_INITLOCALS); + // if (methodInfo->options & CORINFO_OPT_INIT_LOCALS) + // FIXME: We can't currently skip zeroing locals because we don't have accurate liveness for global refs and byrefs + m_pInitLocalsIns->data[0] = m_ILLocalsOffset; + m_pInitLocalsIns->data[1] = m_ILLocalsSize; + + codeEnd = m_ip + m_ILCodeSize; + + // Safepoint at each method entry. This could be done as part of a call, rather than + // adding an opcode. + AddIns(INTOP_SAFEPOINT); + + linkBBlocks = true; + needsRetryEmit = false; + +retry_emit: + emittedBBlocks = false; + while (m_ip < codeEnd) + { + // Check here for every opcode to avoid code bloat + if (m_hasInvalidCode) + goto exit_bad_code; + + int32_t insOffset = (int32_t)(m_ip - m_pILCode); + m_currentILOffset = insOffset; + + InterpBasicBlock *pNewBB = m_ppOffsetToBB[insOffset]; + if (pNewBB != NULL && m_pCBB != pNewBB) + { + INTERP_DUMP("BB%d (IL_%04x):\n", pNewBB->index, pNewBB->ilOffset); + // If we were emitting into previous bblock, we are finished now + if (m_pCBB->emitState == BBStateEmitting) + m_pCBB->emitState = BBStateEmitted; + // If the new bblock was already emitted, skip its instructions + if (pNewBB->emitState == BBStateEmitted) + { + if (linkBBlocks) + { + LinkBBs(m_pCBB, pNewBB); + // Further emitting can only start at a point where the bblock is not fallthrough + linkBBlocks = false; + } + // If the bblock was fully emitted it means we already iterated at least once over + // all instructions so we have `pNextBB` initialized, unless it is the last bblock. + // Skip through all emitted bblocks. + m_pCBB = pNewBB; + while (m_pCBB->pNextBB && m_pCBB->pNextBB->emitState == BBStateEmitted) + m_pCBB = m_pCBB->pNextBB; + + if (m_pCBB->pNextBB) + m_ip = m_pILCode + m_pCBB->pNextBB->ilOffset; + else + m_ip = codeEnd; + + continue; + } + else + { + assert (pNewBB->emitState == BBStateNotEmitted); + } + + // We are starting a new basic block. Change cbb and link them together + if (linkBBlocks) + { + // By default we link cbb with the new starting bblock, unless the previous + // instruction is an unconditional branch (BR, LEAVE, ENDFINALLY) + LinkBBs(m_pCBB, pNewBB); + EmitBBEndVarMoves(pNewBB); + pNewBB->emitState = BBStateEmitting; + emittedBBlocks = true; + if (pNewBB->stackHeight >= 0) + { + MergeStackTypeInfo(m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight); + // This is relevant only for copying the vars associated with the values on the stack + memcpy(m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight * sizeof(StackInfo)); + m_pStackPointer = m_pStackBase + pNewBB->stackHeight; + } + else + { + // This bblock has not been branched to yet. Initialize its stack state + InitBBStackState(pNewBB); + } + // linkBBlocks remains true, which is the default + } + else + { + if (pNewBB->stackHeight >= 0) + { + // This is relevant only for copying the vars associated with the values on the stack + memcpy (m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight * sizeof(StackInfo)); + m_pStackPointer = m_pStackBase + pNewBB->stackHeight; + pNewBB->emitState = BBStateEmitting; + emittedBBlocks = true; + linkBBlocks = true; + } + else + { + INTERP_DUMP("BB%d without initialized stack\n", pNewBB->index); + assert(pNewBB->emitState == BBStateNotEmitted); + needsRetryEmit = true; + // linking to its next bblock, if its the case, will only happen + // after we actually emit the bblock + linkBBlocks = false; + // If we had pNewBB->pNextBB initialized, here we could skip to its il offset directly. + // We will just skip all instructions instead, since it doesn't seem that problematic. + } + } + + InterpBasicBlock *pPrevBB = m_pCBB; + + pPrevBB = GenerateCodeForFinallyCallIslands(pNewBB, pPrevBB); + + if (!pPrevBB->pNextBB) + { + INTERP_DUMP("Chaining BB%d -> BB%d\n" , pPrevBB->index, pNewBB->index); + pPrevBB->pNextBB = pNewBB; + } + + m_pCBB = pNewBB; + if (m_pCBB->isFilterOrCatchFuncletEntry && (m_pCBB->emitState == BBStateEmitting)) + { + AddIns(INTOP_LOAD_EXCEPTION); + m_pLastNewIns->SetDVar(m_pCBB->clauseVarIndex); + } + } + + int32_t opcodeSize = CEEOpcodeSize(m_ip, codeEnd); + if (m_pCBB->emitState != BBStateEmitting) + { + // If we are not really emitting, just skip the instructions in the bblock + m_ip += opcodeSize; + continue; + } + + m_ppOffsetToBB[insOffset] = m_pCBB; + +#ifdef DEBUG + if (m_verbose) + { + const uint8_t *ip = m_ip; + printf("IL_%04x %-10s, sp %d, %s", + (int32_t)(m_ip - m_pILCode), + CEEOpName(CEEDecodeOpcode(&ip)), (int32_t)(m_pStackPointer - m_pStackBase), + m_pStackPointer > m_pStackBase ? g_stackTypeString[m_pStackPointer[-1].type] : " "); + if (m_pStackPointer > m_pStackBase && + (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeVT) && + m_pStackPointer[-1].clsHnd != NULL) + PrintClassName(m_pStackPointer[-1].clsHnd); + printf("\n"); + } +#endif + + uint8_t opcode = *m_ip; + switch (opcode) + { + case CEE_NOP: + m_ip++; + break; + case CEE_LDC_I4_M1: + case CEE_LDC_I4_0: + case CEE_LDC_I4_1: + case CEE_LDC_I4_2: + case CEE_LDC_I4_3: + case CEE_LDC_I4_4: + case CEE_LDC_I4_5: + case CEE_LDC_I4_6: + case CEE_LDC_I4_7: + case CEE_LDC_I4_8: + AddIns(INTOP_LDC_I4); + m_pLastNewIns->data[0] = opcode - CEE_LDC_I4_0; + PushStackType(StackTypeI4, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_ip++; + break; + case CEE_LDC_I4_S: + AddIns(INTOP_LDC_I4); + m_pLastNewIns->data[0] = (int8_t)m_ip[1]; + PushStackType(StackTypeI4, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_ip += 2; + break; + case CEE_LDC_I4: + AddIns(INTOP_LDC_I4); + m_pLastNewIns->data[0] = getI4LittleEndian(m_ip + 1); + PushStackType(StackTypeI4, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_ip += 5; + break; + case CEE_LDC_I8: + { + int64_t val = getI8LittleEndian(m_ip + 1); + AddIns(INTOP_LDC_I8); + PushInterpType(InterpTypeI8, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = (int32_t)val; + m_pLastNewIns->data[1] = (int32_t)(val >> 32); + m_ip += 9; + break; + } + case CEE_LDC_R4: + { + int32_t val = getI4LittleEndian(m_ip + 1); + AddIns(INTOP_LDC_R4); + PushInterpType(InterpTypeR4, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = val; + m_ip += 5; + break; + } + case CEE_LDC_R8: + { + int64_t val = getI8LittleEndian(m_ip + 1); + AddIns(INTOP_LDC_R8); + PushInterpType(InterpTypeR8, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = (int32_t)val; + m_pLastNewIns->data[1] = (int32_t)(val >> 32); + m_ip += 9; + break; + } + case CEE_LDNULL: + AddIns(INTOP_LDNULL); + PushStackType(StackTypeO, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_ip++; + break; + case CEE_LDSTR: + { + int32_t token = getI4LittleEndian(m_ip + 1); + void *str; + InfoAccessType accessType = m_compHnd->constructStringLiteral(m_compScopeHnd, token, &str); + assert(accessType == IAT_VALUE); + // str should be forever pinned, so we can include its ref inside interpreter code + AddIns(INTOP_LDPTR); + PushInterpType(InterpTypeO, m_compHnd->getBuiltinClass(CLASSID_STRING)); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = GetDataItemIndex(str); + m_ip += 5; + break; + } + case CEE_LDARG_S: + EmitLoadVar(m_ip[1]); + m_ip += 2; + break; + case CEE_LDARG_0: + case CEE_LDARG_1: + case CEE_LDARG_2: + case CEE_LDARG_3: + EmitLoadVar(*m_ip - CEE_LDARG_0); + m_ip++; + break; + case CEE_LDARGA_S: + EmitLdLocA(m_ip[1]); + m_ip += 2; + break; + case CEE_STARG_S: + EmitStoreVar(m_ip[1]); + m_ip += 2; + break; + case CEE_LDLOC_S: + EmitLoadVar(numArgs + m_ip[1]); + m_ip += 2; + break; + case CEE_LDLOC_0: + case CEE_LDLOC_1: + case CEE_LDLOC_2: + case CEE_LDLOC_3: + EmitLoadVar(numArgs + *m_ip - CEE_LDLOC_0); + m_ip++; + break; + case CEE_LDLOCA_S: + EmitLdLocA(numArgs + m_ip[1]); + m_ip += 2; + break; + case CEE_STLOC_S: + EmitStoreVar(numArgs + m_ip[1]); + m_ip += 2; + break; + case CEE_STLOC_0: + case CEE_STLOC_1: + case CEE_STLOC_2: + case CEE_STLOC_3: + EmitStoreVar(numArgs + *m_ip - CEE_STLOC_0); + m_ip++; + break; + + case CEE_LDOBJ: + case CEE_STOBJ: + { + CHECK_STACK(*m_ip == CEE_LDOBJ ? 1 : 2); + CORINFO_RESOLVED_TOKEN resolvedToken; + ResolveToken(getU4LittleEndian(m_ip + 1), CORINFO_TOKENKIND_Class, &resolvedToken); + InterpType interpType = GetInterpType(m_compHnd->asCorInfoType(resolvedToken.hClass)); + if (*m_ip == CEE_LDOBJ) + { + EmitLdind(interpType, resolvedToken.hClass, 0); + } + else + { + EmitStind(interpType, resolvedToken.hClass, 0, false); + } + m_ip += 5; + break; + } + + case CEE_RET: + { + CORINFO_SIG_INFO sig = methodInfo->args; + InterpType retType = GetInterpType(sig.retType); + + if (retType == InterpTypeVoid) + { + AddIns(INTOP_RET_VOID); + } + else if (retType == InterpTypeVT) + { + CHECK_STACK(1); + AddIns(INTOP_RET_VT); + m_pStackPointer--; + int32_t retVar = m_pStackPointer[0].var; + m_pLastNewIns->SetSVar(retVar); + m_pLastNewIns->data[0] = m_pVars[retVar].size; + } + else + { + CHECK_STACK(1); + AddIns(INTOP_RET); + m_pStackPointer--; + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + } + m_ip++; + break; + } + case CEE_CONV_U1: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U1_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U1_R8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U1_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U1_I8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I1: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I1_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I1_R8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I1_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I1_I8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_U2: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U2_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U2_R8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U2_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U2_I8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I2: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I2_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I2_R8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I2_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I2_I8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_U: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR8: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_U8_R8); +#else + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_U4_R8); +#endif + break; + case StackTypeR4: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_U8_R4); +#else + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_U4_R4); +#endif + break; + case StackTypeI4: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_I8_U4); +#endif + break; + case StackTypeI8: +#ifndef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_MOV_8); +#endif + break; + case StackTypeByRef: + case StackTypeO: + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_MOV_8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR8: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_I8_R8); +#else + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_I4_R8); +#endif + break; + case StackTypeR4: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_I8_R4); +#else + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_I4_R4); +#endif + break; + case StackTypeI4: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_CONV_I8_I4); +#endif + break; + case StackTypeO: + case StackTypeByRef: + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_MOV_8); + break; + case StackTypeI8: +#ifndef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI, INTOP_MOV_8); +#endif + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_U4: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U4_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_U4_R8); + break; + case StackTypeI4: + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_MOV_8); + break; + case StackTypeByRef: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_MOV_P); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I4: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I4_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_CONV_I4_R8); + break; + case StackTypeI4: + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_MOV_8); + break; + case StackTypeByRef: + EmitConv(m_pStackPointer - 1, StackTypeI4, INTOP_MOV_P); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I8: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_R8); + break; + case StackTypeI4: { + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_I4); + break; + } + case StackTypeI8: + break; + case StackTypeByRef: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_MOV_8); +#else + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_I4); +#endif + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_R4: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeR4, INTOP_CONV_R4_R8); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeR4, INTOP_CONV_R4_I8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, StackTypeR4, INTOP_CONV_R4_I4); + break; + case StackTypeR4: + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_R8: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeI4: + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R8_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R8_I8); + break; + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R8_R4); + break; + case StackTypeR8: + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_U8: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeI4: + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_U4); + break; + case StackTypeI8: + break; + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_U8_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_U8_R8); + break; + case StackTypeByRef: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_MOV_8); +#else + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_CONV_I8_U4); +#endif + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_R_UN: + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R8_R4); + break; + case StackTypeR8: + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R_UN_I8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, StackTypeR8, INTOP_CONV_R_UN_I4); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_SWITCH: + { + m_ip++; + uint32_t n = getU4LittleEndian(m_ip); + // Format of switch instruction is opcode + srcVal + n + T1 + T2 + ... + Tn + AddInsExplicit(INTOP_SWITCH, n + 3); + m_pLastNewIns->data[0] = n; + m_ip += 4; + const uint8_t *nextIp = m_ip + n * 4; + m_pStackPointer--; + m_pLastNewIns->SetSVar(m_pStackPointer->var); + InterpBasicBlock **targetBBTable = (InterpBasicBlock**)AllocMemPool(sizeof (InterpBasicBlock*) * n); + + for (uint32_t i = 0; i < n; i++) + { + int32_t offset = getU4LittleEndian(m_ip); + uint32_t target = (uint32_t)(nextIp - m_pILCode + offset); + InterpBasicBlock *targetBB = m_ppOffsetToBB[target]; + assert(targetBB); + + InitBBStackState(targetBB); + targetBBTable[i] = targetBB; + LinkBBs(m_pCBB, targetBB); + m_ip += 4; + } + m_pLastNewIns->info.ppTargetBBTable = targetBBTable; + break; + } + case CEE_BR: + { + int32_t offset = getI4LittleEndian(m_ip + 1); + if (offset) + { + EmitBranch(INTOP_BR, 5 + offset); + linkBBlocks = false; + } + m_ip += 5; + break; + } + case CEE_BR_S: + { + int32_t offset = (int8_t)m_ip [1]; + if (offset) + { + EmitBranch(INTOP_BR, 2 + (int8_t)m_ip [1]); + linkBBlocks = false; + } + m_ip += 2; + break; + } + case CEE_BRFALSE: + EmitOneArgBranch(INTOP_BRFALSE_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BRFALSE_S: + EmitOneArgBranch(INTOP_BRFALSE_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BRTRUE: + EmitOneArgBranch(INTOP_BRTRUE_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BRTRUE_S: + EmitOneArgBranch(INTOP_BRTRUE_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BEQ: + EmitTwoArgBranch(INTOP_BEQ_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BEQ_S: + EmitTwoArgBranch(INTOP_BEQ_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BGE: + EmitTwoArgBranch(INTOP_BGE_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BGE_S: + EmitTwoArgBranch(INTOP_BGE_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BGT: + EmitTwoArgBranch(INTOP_BGT_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BGT_S: + EmitTwoArgBranch(INTOP_BGT_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BLT: + EmitTwoArgBranch(INTOP_BLT_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BLT_S: + EmitTwoArgBranch(INTOP_BLT_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BLE: + EmitTwoArgBranch(INTOP_BLE_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BLE_S: + EmitTwoArgBranch(INTOP_BLE_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BNE_UN: + EmitTwoArgBranch(INTOP_BNE_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BNE_UN_S: + EmitTwoArgBranch(INTOP_BNE_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BGE_UN: + EmitTwoArgBranch(INTOP_BGE_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BGE_UN_S: + EmitTwoArgBranch(INTOP_BGE_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BGT_UN: + EmitTwoArgBranch(INTOP_BGT_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BGT_UN_S: + EmitTwoArgBranch(INTOP_BGT_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BLE_UN: + EmitTwoArgBranch(INTOP_BLE_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BLE_UN_S: + EmitTwoArgBranch(INTOP_BLE_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BLT_UN: + EmitTwoArgBranch(INTOP_BLT_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BLT_UN_S: + EmitTwoArgBranch(INTOP_BLT_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + + case CEE_ADD: + EmitBinaryArithmeticOp(INTOP_ADD_I4); + m_ip++; + break; + case CEE_SUB: + EmitBinaryArithmeticOp(INTOP_SUB_I4); + m_ip++; + break; + case CEE_MUL: + EmitBinaryArithmeticOp(INTOP_MUL_I4); + m_ip++; + break; + case CEE_MUL_OVF: + EmitBinaryArithmeticOp(INTOP_MUL_OVF_I4); + m_ip++; + break; + case CEE_MUL_OVF_UN: + EmitBinaryArithmeticOp(INTOP_MUL_OVF_UN_I4); + m_ip++; + break; + case CEE_DIV: + EmitBinaryArithmeticOp(INTOP_DIV_I4); + m_ip++; + break; + case CEE_DIV_UN: + EmitBinaryArithmeticOp(INTOP_DIV_UN_I4); + m_ip++; + break; + case CEE_REM: + EmitBinaryArithmeticOp(INTOP_REM_I4); + m_ip++; + break; + case CEE_REM_UN: + EmitBinaryArithmeticOp(INTOP_REM_UN_I4); + m_ip++; + break; + case CEE_AND: + EmitBinaryArithmeticOp(INTOP_AND_I4); + m_ip++; + break; + case CEE_OR: + EmitBinaryArithmeticOp(INTOP_OR_I4); + m_ip++; + break; + case CEE_XOR: + EmitBinaryArithmeticOp(INTOP_XOR_I4); + m_ip++; + break; + case CEE_SHL: + EmitShiftOp(INTOP_SHL_I4); + m_ip++; + break; + case CEE_SHR: + EmitShiftOp(INTOP_SHR_I4); + m_ip++; + break; + case CEE_SHR_UN: + EmitShiftOp(INTOP_SHR_UN_I4); + m_ip++; + break; + case CEE_NEG: + EmitUnaryArithmeticOp(INTOP_NEG_I4); + m_ip++; + break; + case CEE_NOT: + EmitUnaryArithmeticOp(INTOP_NOT_I4); + m_ip++; + break; + case CEE_CALLVIRT: + case CEE_CALL: + EmitCall(constrainedClass, readonly, tailcall, false /*newObj*/); + constrainedClass = NULL; + readonly = false; + tailcall = false; + break; + case CEE_NEWOBJ: + { + EmitCall(NULL /*constrainedClass*/, false /* readonly*/, false /* tailcall*/, true /*newObj*/); + constrainedClass = NULL; + readonly = false; + tailcall = false; + break; + } + case CEE_DUP: + { + int32_t svar = m_pStackPointer[-1].var; + InterpType interpType = m_pVars[svar].interpType; + if (interpType == InterpTypeVT) + { + int32_t size = m_pVars[svar].size; + AddIns(INTOP_MOV_VT); + m_pLastNewIns->SetSVar(svar); + PushTypeVT(m_pVars[svar].clsHnd, size); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = size; + } + else + { + AddIns(InterpGetMovForType(interpType, false)); + m_pLastNewIns->SetSVar(svar); + PushInterpType(interpType, m_pVars[svar].clsHnd); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + } + m_ip++; + break; + } + case CEE_POP: + CHECK_STACK(1); + AddIns(INTOP_NOP); + m_pStackPointer--; + m_ip++; + break; + case CEE_LDFLDA: + { + CORINFO_RESOLVED_TOKEN resolvedToken; + CORINFO_FIELD_INFO fieldInfo; + uint32_t token = getU4LittleEndian(m_ip + 1); + ResolveToken(token, CORINFO_TOKENKIND_Field, &resolvedToken); + m_compHnd->getFieldInfo(&resolvedToken, m_methodHnd, CORINFO_ACCESS_ADDRESS, &fieldInfo); + + bool isStatic = !!(fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC); + + if (isStatic) + { + // Pop unused object reference + m_pStackPointer--; + EmitStaticFieldAddress(&fieldInfo, &resolvedToken); + } + else + { + assert(fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE); + m_pStackPointer--; + AddIns(INTOP_LDFLDA); + m_pLastNewIns->data[0] = (int32_t)fieldInfo.offset; + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + PushInterpType(InterpTypeByRef, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + } + + m_ip += 5; + break; + } + case CEE_LDFLD: + { + CHECK_STACK(1); + CORINFO_RESOLVED_TOKEN resolvedToken; + CORINFO_FIELD_INFO fieldInfo; + uint32_t token = getU4LittleEndian(m_ip + 1); + ResolveToken(token, CORINFO_TOKENKIND_Field, &resolvedToken); + m_compHnd->getFieldInfo(&resolvedToken, m_methodHnd, CORINFO_ACCESS_GET, &fieldInfo); + + CorInfoType fieldType = fieldInfo.fieldType; + bool isStatic = !!(fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC); + InterpType interpFieldType = GetInterpType(fieldType); + + if (isStatic) + { + // Pop unused object reference + m_pStackPointer--; + EmitStaticFieldAccess(interpFieldType, &fieldInfo, &resolvedToken, true); + } + else + { + assert(fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE); + m_pStackPointer--; + int sizeDataIndexOffset = 0; + if (m_pStackPointer[0].type == StackTypeVT) + { + sizeDataIndexOffset = 1; + AddIns(INTOP_MOV_SRC_OFF); + m_pLastNewIns->data[1] = interpFieldType; + } + else + { + int32_t opcode = GetLdindForType(interpFieldType); + AddIns(opcode); + } + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + m_pLastNewIns->data[0] = (int32_t)fieldInfo.offset; + if (interpFieldType == InterpTypeVT) + { + CORINFO_CLASS_HANDLE fieldClass = fieldInfo.structType; + int size = m_compHnd->getClassSize(fieldClass); + m_pLastNewIns->data[1 + sizeDataIndexOffset] = size; + PushTypeVT(fieldClass, size); + } + else + { + PushInterpType(interpFieldType, NULL); + } + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + } + + m_ip += 5; + if (volatile_) + { + // Acquire membar + AddIns(INTOP_MEMBAR); + volatile_ = false; + } + break; + } + case CEE_STFLD: + { + CHECK_STACK(2); + CORINFO_RESOLVED_TOKEN resolvedToken; + CORINFO_FIELD_INFO fieldInfo; + uint32_t token = getU4LittleEndian(m_ip + 1); + ResolveToken(token, CORINFO_TOKENKIND_Field, &resolvedToken); + m_compHnd->getFieldInfo(&resolvedToken, m_methodHnd, CORINFO_ACCESS_GET, &fieldInfo); + + CorInfoType fieldType = fieldInfo.fieldType; + bool isStatic = !!(fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC); + InterpType interpFieldType = GetInterpType(fieldType); + + if (volatile_) + { + // Release memory barrier + AddIns(INTOP_MEMBAR); + volatile_ = false; + } + + if (isStatic) + { + EmitStaticFieldAccess(interpFieldType, &fieldInfo, &resolvedToken, false); + // Pop the unused object reference + m_pStackPointer--; + } + else + { + assert(fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE); + EmitStind(interpFieldType, fieldInfo.structType, fieldInfo.offset, false); + } + m_ip += 5; + + break; + } + case CEE_LDSFLDA: + { + CORINFO_RESOLVED_TOKEN resolvedToken; + CORINFO_FIELD_INFO fieldInfo; + uint32_t token = getU4LittleEndian(m_ip + 1); + ResolveToken(token, CORINFO_TOKENKIND_Field, &resolvedToken); + m_compHnd->getFieldInfo(&resolvedToken, m_methodHnd, CORINFO_ACCESS_GET, &fieldInfo); + + EmitStaticFieldAddress(&fieldInfo, &resolvedToken); + + m_ip += 5; + break; + } + case CEE_LDSFLD: + { + CORINFO_RESOLVED_TOKEN resolvedToken; + CORINFO_FIELD_INFO fieldInfo; + uint32_t token = getU4LittleEndian(m_ip + 1); + ResolveToken(token, CORINFO_TOKENKIND_Field, &resolvedToken); + m_compHnd->getFieldInfo(&resolvedToken, m_methodHnd, CORINFO_ACCESS_GET, &fieldInfo); + + CorInfoType fieldType = fieldInfo.fieldType; + InterpType interpFieldType = GetInterpType(fieldType); + + EmitStaticFieldAccess(interpFieldType, &fieldInfo, &resolvedToken, true); + + if (volatile_) + { + // Acquire memory barrier + AddIns(INTOP_MEMBAR); + volatile_ = false; + } + m_ip += 5; + break; + } + case CEE_STSFLD: + { + CHECK_STACK(1); + CORINFO_RESOLVED_TOKEN resolvedToken; + CORINFO_FIELD_INFO fieldInfo; + uint32_t token = getU4LittleEndian(m_ip + 1); + ResolveToken(token, CORINFO_TOKENKIND_Field, &resolvedToken); + m_compHnd->getFieldInfo(&resolvedToken, m_methodHnd, CORINFO_ACCESS_GET, &fieldInfo); + + CorInfoType fieldType = fieldInfo.fieldType; + InterpType interpFieldType = GetInterpType(fieldType); + + if (volatile_) + { + // Release memory barrier + AddIns(INTOP_MEMBAR); + volatile_ = false; + } + + EmitStaticFieldAccess(interpFieldType, &fieldInfo, &resolvedToken, false); + m_ip += 5; + break; + } + case CEE_LDIND_I1: + case CEE_LDIND_U1: + case CEE_LDIND_I2: + case CEE_LDIND_U2: + case CEE_LDIND_I4: + case CEE_LDIND_U4: + case CEE_LDIND_I8: + case CEE_LDIND_I: + case CEE_LDIND_R4: + case CEE_LDIND_R8: + case CEE_LDIND_REF: + { + InterpType interpType = InterpTypeVoid; + switch(opcode) + { + case CEE_LDIND_I1: + interpType = InterpTypeI1; + break; + case CEE_LDIND_U1: + interpType = InterpTypeU1; + break; + case CEE_LDIND_I2: + interpType = InterpTypeI2; + break; + case CEE_LDIND_U2: + interpType = InterpTypeU2; + break; + case CEE_LDIND_I4: + case CEE_LDIND_U4: + interpType = InterpTypeI4; + break; + case CEE_LDIND_I8: + interpType = InterpTypeI8; + break; + case CEE_LDIND_I: + interpType = InterpTypeI; + break; + case CEE_LDIND_R4: + interpType = InterpTypeR4; + break; + case CEE_LDIND_R8: + interpType = InterpTypeR8; + break; + case CEE_LDIND_REF: + interpType = InterpTypeO; + break; + default: + assert(0); + } + EmitLdind(interpType, NULL, 0); + if (volatile_) + { + // Acquire memory barrier + AddIns(INTOP_MEMBAR); + volatile_ = false; + } + m_ip++; + break; + } + case CEE_STIND_I1: + case CEE_STIND_I2: + case CEE_STIND_I4: + case CEE_STIND_I8: + case CEE_STIND_I: + case CEE_STIND_R4: + case CEE_STIND_R8: + case CEE_STIND_REF: + { + InterpType interpType = InterpTypeVoid; + switch(opcode) + { + case CEE_STIND_I1: + interpType = InterpTypeI1; + break; + case CEE_STIND_I2: + interpType = InterpTypeI2; + break; + case CEE_STIND_I4: + interpType = InterpTypeI4; + break; + case CEE_STIND_I8: + interpType = InterpTypeI8; + break; + case CEE_STIND_I: + interpType = InterpTypeI; + break; + case CEE_STIND_R4: + interpType = InterpTypeR4; + break; + case CEE_STIND_R8: + interpType = InterpTypeR8; + break; + case CEE_STIND_REF: + interpType = InterpTypeO; + break; + default: + assert(0); + } + if (volatile_) + { + // Release memory barrier + AddIns(INTOP_MEMBAR); + volatile_ = false; + } + EmitStind(interpType, NULL, 0, false); + m_ip++; + break; + } + case CEE_PREFIX1: + m_ip++; + switch (*m_ip + 256) + { + case CEE_LDARG: + EmitLoadVar(getU2LittleEndian(m_ip + 1)); + m_ip += 3; + break; + case CEE_LDARGA: + EmitLdLocA(getU2LittleEndian(m_ip + 1)); + m_ip += 3; + break; + case CEE_STARG: + EmitStoreVar(getU2LittleEndian(m_ip + 1)); + m_ip += 3; + break; + case CEE_LDLOC: + EmitLoadVar(numArgs + getU2LittleEndian(m_ip + 1)); + m_ip += 3; + break; + case CEE_LDLOCA: + EmitLdLocA(numArgs + getU2LittleEndian(m_ip + 1)); + m_ip += 3; + break; + case CEE_STLOC: + EmitStoreVar(numArgs + getU2LittleEndian(m_ip + 1));\ + m_ip += 3; + break; + case CEE_CEQ: + EmitCompareOp(INTOP_CEQ_I4); + m_ip++; + break; + case CEE_CGT: + EmitCompareOp(INTOP_CGT_I4); + m_ip++; + break; + case CEE_CGT_UN: + EmitCompareOp(INTOP_CGT_UN_I4); + m_ip++; + break; + case CEE_CLT: + EmitCompareOp(INTOP_CLT_I4); + m_ip++; + break; + case CEE_CLT_UN: + EmitCompareOp(INTOP_CLT_UN_I4); + m_ip++; + break; + case CEE_CONSTRAINED: + { + uint32_t token = getU4LittleEndian(m_ip + 1); + + constrainedToken.tokenScope = m_compScopeHnd; + constrainedToken.tokenContext = METHOD_BEING_COMPILED_CONTEXT(); + constrainedToken.token = token; + constrainedToken.tokenType = CORINFO_TOKENKIND_Constrained; + m_compHnd->resolveToken(&constrainedToken); + constrainedClass = &constrainedToken; + m_ip += 5; + break; + } + case CEE_READONLY: + readonly = true; + m_ip++; + break; + case CEE_TAILCALL: + tailcall = true; + m_ip++; + break; + case CEE_VOLATILE: + volatile_ = true; + m_ip++; + break; + case CEE_INITOBJ: + { + CHECK_STACK(1); + CORINFO_CLASS_HANDLE clsHnd = ResolveClassToken(getU4LittleEndian(m_ip + 1)); + if (m_compHnd->isValueClass(clsHnd)) + { + m_pStackPointer--; + AddIns(INTOP_ZEROBLK_IMM); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + m_pLastNewIns->data[0] = m_compHnd->getClassSize(clsHnd); + } + else + { + AddIns(INTOP_LDNULL); + PushInterpType(InterpTypeO, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + + AddIns(INTOP_STIND_O); + m_pStackPointer -= 2; + m_pLastNewIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + } + m_ip += 5; + break; + } + case CEE_LOCALLOC: + CHECK_STACK(1); +#if TARGET_64BIT + // Length is natural unsigned int + if (m_pStackPointer[-1].type == StackTypeI4) + { + EmitConv(m_pStackPointer - 1, StackTypeI8, INTOP_MOV_8); + m_pStackPointer[-1].type = StackTypeI8; + } +#endif + AddIns(INTOP_LOCALLOC); + m_pStackPointer--; + if (m_pStackPointer != m_pStackBase) + { + m_hasInvalidCode = true; + goto exit_bad_code; + } + + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + PushStackType(StackTypeByRef, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_ip++; + break; + case CEE_SIZEOF: + { + CORINFO_CLASS_HANDLE clsHnd = ResolveClassToken(getU4LittleEndian(m_ip + 1)); + AddIns(INTOP_LDC_I4); + m_pLastNewIns->data[0] = m_compHnd->getClassSize(clsHnd); + PushStackType(StackTypeI4, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_ip += 5; + break; + } + case CEE_ENDFILTER: + AddIns(INTOP_LEAVE_FILTER); + m_pStackPointer--; + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + m_ip++; + linkBBlocks = false; + break; + case CEE_RETHROW: + AddIns(INTOP_RETHROW); + m_ip++; + linkBBlocks = false; + break; + default: + assert(0); + break; + } + break; + + case CEE_ENDFINALLY: + { + AddIns(INTOP_RET_VOID); + m_ip++; + linkBBlocks = false; + break; + } + case CEE_LEAVE: + case CEE_LEAVE_S: + { + int32_t ilOffset = (int32_t)(m_ip - m_pILCode); + int32_t target = (opcode == CEE_LEAVE) ? ilOffset + 5 + *(int32_t*)(m_ip + 1) : (ilOffset + 2 + (int8_t)m_ip[1]); + InterpBasicBlock *pTargetBB = m_ppOffsetToBB[target]; + + m_pStackPointer = m_pStackBase; + + // The leave will jump: + // * directly to its target if it doesn't jump out of any try regions with finally. + // * to a finally call island of the first try region with finally that it jumps out of. + + for (int i = 0; i < m_leavesTable.GetSize(); i++) + { + if (m_leavesTable.Get(i).ilOffset == ilOffset) + { + // There is a finally call island for this leave, so we will jump to it + // instead of the target. The chain of these islands will end up on + // the target in the end. + // NOTE: we need to use basic block to branch and not an IL offset extracted + // from the building block, because the finally call islands share the same IL + // offset with another block of original code in front of which it is injected. + // The EmitBranch would to that block instead of the finally call island. + pTargetBB = m_leavesTable.Get(i).pFinallyCallIslandBB; + break; + } + } + + // The leave doesn't jump out of any try region with finally, so we can just emit a branch + // to the target. + if (m_pCBB->clauseType == BBClauseCatch) + { + // leave out of catch is different from a leave out of finally. It + // exits the catch handler and returns the address of the finally + // call island as the continuation address to the EH code. + EmitBranchToBB(INTOP_LEAVE_CATCH, pTargetBB); + } + else + { + EmitBranchToBB(INTOP_BR, pTargetBB); + } + + m_ip += (opcode == CEE_LEAVE) ? 5 : 2; + linkBBlocks = false; + break; + } + + case CEE_THROW: + AddIns(INTOP_THROW); + m_pLastNewIns->SetSVar(m_pStackPointer[-1].var); + m_ip += 1; + linkBBlocks = false; + break; + + case CEE_BOX: + { + CHECK_STACK(1); + m_pStackPointer -= 1; + CORINFO_CLASS_HANDLE clsHnd = ResolveClassToken(getU4LittleEndian(m_ip + 1)); + CORINFO_CLASS_HANDLE boxedClsHnd = m_compHnd->getTypeForBox(clsHnd); + CorInfoHelpFunc helpFunc = m_compHnd->getBoxHelper(clsHnd); + AddIns(INTOP_BOX); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + PushStackType(StackTypeO, boxedClsHnd); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = GetDataItemIndex(clsHnd); + m_pLastNewIns->data[1] = GetDataItemIndexForHelperFtn(helpFunc); + m_ip += 5; + break; + } + + case CEE_UNBOX: + case CEE_UNBOX_ANY: + { + CHECK_STACK(1); + m_pStackPointer -= 1; + CORINFO_CLASS_HANDLE clsHnd = ResolveClassToken(getU4LittleEndian(m_ip + 1)); + CorInfoHelpFunc helpFunc = m_compHnd->getUnBoxHelper(clsHnd); + AddIns(opcode == CEE_UNBOX ? INTOP_UNBOX : INTOP_UNBOX_ANY); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + if (opcode == CEE_UNBOX) + PushStackType(StackTypeI, NULL); + else + PushInterpType(GetInterpType(m_compHnd->asCorInfoType(clsHnd)), clsHnd); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_pLastNewIns->data[0] = GetDataItemIndex(clsHnd); + m_pLastNewIns->data[1] = GetDataItemIndexForHelperFtn(helpFunc); + m_ip += 5; + break; + } + case CEE_NEWARR: + { + CHECK_STACK(1); + + uint32_t token = getU4LittleEndian(m_ip + 1); + + CORINFO_RESOLVED_TOKEN resolvedToken; + ResolveToken(token, CORINFO_TOKENKIND_Newarr, &resolvedToken); + + CORINFO_CLASS_HANDLE arrayClsHnd = resolvedToken.hClass; + CorInfoHelpFunc helpFunc = m_compHnd->getNewArrHelper(arrayClsHnd); + + m_pStackPointer--; + + AddIns(INTOP_NEWARR); + m_pLastNewIns->SetSVar(m_pStackPointer[0].var); + + PushInterpType(InterpTypeO, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + + m_pLastNewIns->data[0] = GetDataItemIndex(arrayClsHnd); + m_pLastNewIns->data[1] = GetDataItemIndexForHelperFtn(helpFunc); + + m_ip += 5; + break; + } + case CEE_LDLEN: + { + CHECK_STACK(1); + EmitLdind(InterpTypeI4, NULL, OFFSETOF__CORINFO_Array__length); + m_ip++; + break; + } + case CEE_LDELEM_I1: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_I1, InterpTypeI4); + m_ip++; + break; + } + case CEE_LDELEM_U1: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_U1, InterpTypeI4); + m_ip++; + break; + } + case CEE_LDELEM_I2: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_I2, InterpTypeI4); + m_ip++; + break; + } + case CEE_LDELEM_U2: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_U2, InterpTypeI4); + m_ip++; + break; + } + case CEE_LDELEM_I4: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_I4, InterpTypeI4); + m_ip++; + break; + } + case CEE_LDELEM_U4: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_I4, InterpTypeI4); + m_ip++; + break; + } + case CEE_LDELEM_I8: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_I8, InterpTypeI8); + m_ip++; + break; + } + case CEE_LDELEM_I: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_I, InterpTypeI); + m_ip++; + break; + } + case CEE_LDELEM_R4: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_R4, InterpTypeR4); + m_ip++; + break; + } + case CEE_LDELEM_R8: + { + CHECK_STACK(2); + EmitLdelem(INTOP_LDELEM_R8, InterpTypeR8); + m_ip++; + break; + } + case CEE_STELEM_I: + { + CHECK_STACK(3); + EmitStelem(InterpTypeI); + m_ip++; + break; + } + case CEE_STELEM_I1: + { + CHECK_STACK(3); + EmitStelem(InterpTypeI1); + m_ip++; + break; + } + case CEE_STELEM_I2: + { + CHECK_STACK(3); + EmitStelem(InterpTypeI2); + m_ip++; + break; + } + case CEE_STELEM_I4: + { + CHECK_STACK(3); + EmitStelem(InterpTypeI4); + m_ip++; + break; + } + case CEE_STELEM_I8: + { + CHECK_STACK(3); + EmitStelem(InterpTypeI8); + m_ip++; + break; + } + case CEE_STELEM_R4: + { + CHECK_STACK(3); + EmitStelem(InterpTypeR4); + m_ip++; + break; + } + case CEE_STELEM_R8: + { + CHECK_STACK(3); + EmitStelem(InterpTypeR8); + m_ip++; + break; + } + + case CEE_LDTOKEN: + { + + CORINFO_RESOLVED_TOKEN resolvedToken; + ResolveToken(getU4LittleEndian(m_ip + 1), CORINFO_TOKENKIND_Ldtoken, &resolvedToken); + + InterpEmbedGenericResult resolvedEmbedResult = EmitGenericHandle(&resolvedToken, GenericHandleEmbedOptions::None); + + if (resolvedEmbedResult.var != -1) + { + AddIns(INTOP_LDTOKEN_VAR); + m_pLastNewIns->SetSVar(resolvedEmbedResult.var); + } + else + { + AddIns(INTOP_LDTOKEN); + m_pLastNewIns->data[1] = resolvedEmbedResult.dataItemIndex; + } + + CORINFO_CLASS_HANDLE clsHnd = m_compHnd->getTokenTypeAsHandle(&resolvedToken); + PushStackType(StackTypeVT, clsHnd); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + + // see jit/importer.cpp CEE_LDTOKEN + CorInfoHelpFunc helper; + if (resolvedToken.hField) + { + helper = CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD; + } + else if (resolvedToken.hMethod) + { + helper = CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD; + } + else if (resolvedToken.hClass) + { + helper = CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPEHANDLE; + } + else + { + helper = CORINFO_HELP_FAIL_FAST; + assert(!"Token not resolved or resolved to unexpected type"); + } + m_pLastNewIns->data[0] = GetDataItemIndexForHelperFtn(helper); + + m_ip += 5; + break; + } + + case CEE_ISINST: + { + CHECK_STACK(1); + CORINFO_RESOLVED_TOKEN resolvedToken; + ResolveToken(getU4LittleEndian(m_ip + 1), CORINFO_TOKENKIND_Casting, &resolvedToken); + + CorInfoHelpFunc castingHelper = m_compHnd->getCastingHelper(&resolvedToken, false /* throwing */); + AddIns(INTOP_CALL_HELPER_PP_2); + m_pLastNewIns->data[0] = GetDataItemIndexForHelperFtn(castingHelper); + m_pLastNewIns->data[1] = GetDataItemIndex(resolvedToken.hClass); + m_pLastNewIns->SetSVar(m_pStackPointer[-1].var); + m_pStackPointer--; + PushInterpType(InterpTypeI, NULL); + m_pLastNewIns->SetDVar(m_pStackPointer[-1].var); + m_ip += 5; + break; + } + default: + assert(0); + break; + } + } + + if (m_pCBB->emitState == BBStateEmitting) + m_pCBB->emitState = BBStateEmitted; + + // If no bblocks were emitted during the last iteration, there is no point to try again + // Some bblocks are just unreachable in the code. + if (needsRetryEmit && emittedBBlocks) + { + m_ip = m_pILCode; + m_pCBB = m_pEntryBB; + + linkBBlocks = false; + needsRetryEmit = false; + INTERP_DUMP("retry emit\n"); + goto retry_emit; + } + + UnlinkUnreachableBBlocks(); + + return CORJIT_OK; +exit_bad_code: + return CORJIT_BADCODE; +} + +InterpBasicBlock *InterpCompiler::GenerateCodeForFinallyCallIslands(InterpBasicBlock *pNewBB, InterpBasicBlock *pPrevBB) +{ + InterpBasicBlock *pFinallyCallIslandBB = pNewBB->pFinallyCallIslandBB; + + while (pFinallyCallIslandBB != NULL) + { + INTERP_DUMP("Injecting finally call island BB%d\n", pFinallyCallIslandBB->index); + if (pFinallyCallIslandBB->emitState != BBStateEmitted) + { + // Set the finally call island BB as current so that the instructions are emitted into it + m_pCBB = pFinallyCallIslandBB; + InitBBStackState(m_pCBB); + EmitBranchToBB(INTOP_CALL_FINALLY, pNewBB); // The pNewBB is the finally BB + m_pLastNewIns->ilOffset = -1; + // Try to get the next finally call island block (for an outer try's finally) + if (pFinallyCallIslandBB->pFinallyCallIslandBB) + { + // Branch to the next finally call island (at an outer try block) + EmitBranchToBB(INTOP_BR, pFinallyCallIslandBB->pFinallyCallIslandBB); + } + else + { + // This is the last finally call island, so we need to emit a branch to the leave target + EmitBranchToBB(INTOP_BR, pFinallyCallIslandBB->pLeaveTargetBB); + } + m_pLastNewIns->ilOffset = -1; + m_pCBB->emitState = BBStateEmitted; + INTERP_DUMP("Chaining BB%d -> BB%d\n", pPrevBB->index, pFinallyCallIslandBB->index); + } + assert(pPrevBB->pNextBB == NULL || pPrevBB->pNextBB == pFinallyCallIslandBB); + pPrevBB->pNextBB = pFinallyCallIslandBB; + pPrevBB = pFinallyCallIslandBB; + pFinallyCallIslandBB = pFinallyCallIslandBB->pNextBB; + } + + return pPrevBB; +} +void InterpCompiler::UnlinkUnreachableBBlocks() +{ + // Unlink unreachable bblocks, prevBB is always an emitted bblock + InterpBasicBlock *prevBB = m_pEntryBB; + InterpBasicBlock *nextBB = prevBB->pNextBB; + while (nextBB != NULL) + { + if (nextBB->emitState == BBStateNotEmitted) + { + m_ppOffsetToBB[nextBB->ilOffset] = NULL; + prevBB->pNextBB = nextBB->pNextBB; + nextBB = prevBB->pNextBB; + } + else + { + prevBB = nextBB; + nextBB = nextBB->pNextBB; + } + } +} + +void InterpCompiler::PrintClassName(CORINFO_CLASS_HANDLE cls) +{ + char className[100]; + m_compHnd->printClassName(cls, className, 100); + printf("%s", className); +} + +void InterpCompiler::PrintMethodName(CORINFO_METHOD_HANDLE method) +{ + CORINFO_CLASS_HANDLE cls = m_compHnd->getMethodClass(method); + + CORINFO_SIG_INFO sig; + m_compHnd->getMethodSig(method, &sig, cls); + + TArray methodName = ::PrintMethodName(m_compHnd, cls, method, &sig, + /* includeClassInstantiation */ true, + /* includeMethodInstantiation */ true, + /* includeSignature */ true, + /* includeReturnType */ false, + /* includeThis */ false); + + + printf(".%s", methodName.GetUnderlyingArray()); +} + +void InterpCompiler::PrintCode() +{ + for (InterpBasicBlock *pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB) + PrintBBCode(pBB); +} + +void InterpCompiler::PrintBBCode(InterpBasicBlock *pBB) +{ + printf("BB%d:\n", pBB->index); + for (InterpInst *ins = pBB->pFirstIns; ins != NULL; ins = ins->pNext) + { + PrintIns(ins); + printf("\n"); + } +} + +void InterpCompiler::PrintIns(InterpInst *ins) +{ + int32_t opcode = ins->opcode; + if (ins->ilOffset == -1) + printf("IL_----: %-14s", InterpOpName(opcode)); + else + printf("IL_%04x: %-14s", ins->ilOffset, InterpOpName(opcode)); + + if (g_interpOpDVars[opcode] > 0) + printf(" [%d <-", ins->dVar); + else + printf(" [nil <-"); + + if (g_interpOpSVars[opcode] > 0) + { + for (int i = 0; i < g_interpOpSVars[opcode]; i++) + { + if (ins->sVars[i] == CALL_ARGS_SVAR) + { + printf(" c:"); + if (ins->info.pCallInfo && ins->info.pCallInfo->pCallArgs) + { + int *callArgs = ins->info.pCallInfo->pCallArgs; + while (*callArgs != CALL_ARGS_TERMINATOR) + { + printf(" %d", *callArgs); + callArgs++; + } + } + } + else + { + printf(" %d", ins->sVars[i]); + } + } + printf("],"); + } + else + { + printf(" nil],"); + } + + // LDLOCA has special semantics, it has data in sVars[0], but it doesn't have any sVars + if (opcode == INTOP_LDLOCA) + printf(" %d", ins->sVars[0]); + else + PrintInsData(ins, ins->ilOffset, &ins->data[0], ins->opcode); +} + +static const char* s_jitHelperNames[CORINFO_HELP_COUNT] = { +#define JITHELPER(code, pfnHelper, binderId) #code, +#define DYNAMICJITHELPER(code, pfnHelper, binderId) #code, +#include "jithelpers.h" +#include "compiler.h" +}; + +const char* CorInfoHelperToName(CorInfoHelpFunc helper) +{ + if (helper < 0 || helper >= CORINFO_HELP_COUNT) + return "UnknownHelper"; + + return s_jitHelperNames[helper]; +} + +void InterpCompiler::PrintInsData(InterpInst *ins, int32_t insOffset, const int32_t *pData, int32_t opcode) +{ + switch (g_interpOpArgType[opcode]) { + case InterpOpNoArgs: + break; + case InterpOpInt: + printf(" %d", *pData); + break; + case InterpOpLongInt: + { + int64_t i64 = (int64_t)pData[0] + ((int64_t)pData[1] << 32); + printf(" %" PRId64, i64); + break; + } + case InterpOpFloat: + { + printf(" %g", *(float*)pData); + break; + } + case InterpOpDouble: + { + int64_t i64 = (int64_t)pData[0] + ((int64_t)pData[1] << 32); + printf(" %g", *(double*)&i64); + break; + } + case InterpOpTwoInts: + printf(" %d,%d", *pData, *(pData + 1)); + break; + case InterpOpThreeInts: + printf(" %d,%d,%d", *pData, *(pData + 1), *(pData + 2)); + break; + case InterpOpBranch: + if (ins) + printf(" BB%d", ins->info.pTargetBB->index); + else + printf(" IR_%04x", insOffset + *pData); + break; + case InterpOpLdPtr: + { + printf("%p", (void*)GetDataItemAtIndex(pData[0])); + break; + } + case InterpOpGenericLookup: + { + CORINFO_RUNTIME_LOOKUP *pGenericLookup = (CORINFO_RUNTIME_LOOKUP*)GetDataItemAtIndex(pData[0]); + printf("%s,%p[", CorInfoHelperToName(pGenericLookup->helper), pGenericLookup->signature); + for (int i = 0; i < pGenericLookup->indirections; i++) + { + if (i > 0) + printf(","); + + if (i == 0 && pGenericLookup->indirectFirstOffset) + printf("*"); + if (i == 1 && pGenericLookup->indirectSecondOffset) + printf("*"); + printf("%d", (int)pGenericLookup->offsets[i]); + } + printf("]"); + if (pGenericLookup->sizeOffset != CORINFO_NO_SIZE_CHECK) + { + printf(" sizeOffset=%d", (int)pGenericLookup->sizeOffset); + } + if (pGenericLookup->testForNull) + { + printf(" testForNull"); + } + } + break; + case InterpOpSwitch: + { + int32_t n = *pData; + printf(" ("); + for (int i = 0; i < n; i++) + { + if (i > 0) + printf(", "); + + if (ins) + printf("BB%d", ins->info.ppTargetBBTable[i]->index); + else + printf("IR_%04x", insOffset + 3 + i + *(pData + 1 + i)); + } + printf(")"); + break; + } + case InterpOpMethodHandle: + { + CORINFO_METHOD_HANDLE mh = (CORINFO_METHOD_HANDLE)((size_t)m_dataItems.Get(*pData)); + printf(" "); + PrintMethodName(mh); + break; + } + case InterpOpClassHandle: + { + CORINFO_CLASS_HANDLE ch = (CORINFO_CLASS_HANDLE)((size_t)m_dataItems.Get(*pData)); + printf(" "); + PrintClassName(ch); + break; + } + default: + assert(0); + break; + } +} + +void InterpCompiler::PrintCompiledCode() +{ + const int32_t *ip = m_pMethodCode; + const int32_t *end = m_pMethodCode + m_methodCodeSize; + + while (ip < end) + { + PrintCompiledIns(ip, m_pMethodCode); + ip = InterpNextOp(ip); + } +} + +void InterpCompiler::PrintCompiledIns(const int32_t *ip, const int32_t *start) +{ + int32_t opcode = *ip; + int32_t insOffset = (int32_t)(ip - start); + + printf("IR_%04x: %-14s", insOffset, InterpOpName(opcode)); + ip++; + + if (g_interpOpDVars[opcode] > 0) + printf(" [%d <-", *ip++); + else + printf(" [nil <-"); + + if (g_interpOpSVars[opcode] > 0) + { + for (int i = 0; i < g_interpOpSVars[opcode]; i++) + printf(" %d", *ip++); + printf("],"); + } + else + { + printf(" nil],"); + } + + PrintInsData(NULL, insOffset, ip, opcode); + printf("\n"); +} + +extern "C" void assertAbort(const char* why, const char* file, unsigned line) +{ + if (t_InterpJitInfoTls) { + if (!t_InterpJitInfoTls->doAssert(file, line, why)) + return; + } + +#ifdef _MSC_VER + __debugbreak(); +#else // _MSC_VER + __builtin_trap(); +#endif // _MSC_VER +} diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h new file mode 100644 index 000000000000..47d38f3e1928 --- /dev/null +++ b/src/coreclr/interpreter/compiler.h @@ -0,0 +1,564 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _COMPILER_H_ +#define _COMPILER_H_ + +#include "intops.h" +#include "datastructs.h" +#include "enum_class_flags.h" + +TArray PrintMethodName(COMP_HANDLE comp, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE methHnd, + CORINFO_SIG_INFO* sig, + bool includeClassInstantiation, + bool includeMethodInstantiation, + bool includeSignature, + bool includeReturnType, + bool includeThisSpecifier); + +// Types that can exist on the IL execution stack. They are used only during +// IL import compilation stage. +enum StackType { + StackTypeI4 = 0, + StackTypeI8, + StackTypeR4, + StackTypeR8, + StackTypeO, + StackTypeVT, + StackTypeByRef, + StackTypeF, +#ifdef TARGET_64BIT + StackTypeI = StackTypeI8 +#else + StackTypeI = StackTypeI4 +#endif +}; + +// Types relevant for interpreter vars and opcodes. They are used in the final +// stages of the codegen and can be used during execution. +enum InterpType { + InterpTypeI1 = 0, + InterpTypeU1, + InterpTypeI2, + InterpTypeU2, + InterpTypeI4, + InterpTypeI8, + InterpTypeR4, + InterpTypeR8, + InterpTypeO, + InterpTypeVT, + InterpTypeByRef, + InterpTypeVoid, +#ifdef TARGET_64BIT + InterpTypeI = InterpTypeI8 +#else + InterpTypeI = InterpTypeI4 +#endif +}; + +#ifdef DEBUG +#define INTERP_DUMP(...) \ + { \ + if (m_verbose) \ + printf(__VA_ARGS__); \ + } +#else +#define INTERP_DUMP(...) +#endif + +struct InterpInst; +struct InterpBasicBlock; + +struct InterpCallInfo +{ + // For call instructions, this represents an array of all call arg vars + // in the order they are pushed to the stack. This makes it easy to find + // all source vars for these types of opcodes. This is terminated with -1. + int32_t *pCallArgs; + int32_t callOffset; + union { + // Array of call dependencies that need to be resolved before + TSList *callDeps; + // Stack end offset of call arguments + int32_t callEndOffset; + }; +}; + +enum InterpInstFlags +{ + INTERP_INST_FLAG_CALL = 0x01, + // Flag used internally by the var offset allocator + INTERP_INST_FLAG_ACTIVE_CALL = 0x02 +}; + +struct InterpInst +{ + InterpInst *pNext, *pPrev; + union + { + InterpBasicBlock *pTargetBB; // target basic block for branch instructions + InterpBasicBlock **ppTargetBBTable; // basic block table for switch instruction + InterpCallInfo *pCallInfo; // additional information for call instructions + } info; + + int32_t opcode; + int32_t ilOffset; + int32_t nativeOffset; + uint32_t flags; + int32_t dVar; + int32_t sVars[3]; // Currently all instructions have at most 3 sregs + + int32_t data[]; + + void SetDVar(int32_t dv) + { + dVar = dv; + } + + void SetSVar(int32_t sv1) + { + sVars[0] = sv1; + } + + void SetSVars2(int32_t sv1, int32_t sv2) + { + sVars[0] = sv1; + sVars[1] = sv2; + } + + void SetSVars3(int32_t sv1, int32_t sv2, int32_t sv3) + { + sVars[0] = sv1; + sVars[1] = sv2; + sVars[2] = sv3; + } +}; + +#define CALL_ARGS_SVAR -2 +#define CALL_ARGS_TERMINATOR -1 + +struct StackInfo; + +enum InterpBBState +{ + BBStateNotEmitted, + BBStateEmitting, + BBStateEmitted +}; + +enum InterpBBClauseType +{ + BBClauseNone, + BBClauseTry, + BBClauseCatch, + BBClauseFinally, + BBClauseFilter, +}; + +struct InterpBasicBlock +{ + int32_t index; + int32_t ilOffset, nativeOffset; + int32_t nativeEndOffset; + int32_t stackHeight; + StackInfo *pStackState; + + InterpInst *pFirstIns, *pLastIns; + InterpBasicBlock *pNextBB; + + // * If this basic block is a finally, this points to a finally call island that is located where the finally + // was before all funclets were moved to the end of the method. + // * If this basic block is a call island, this points to the next finally call island basic block. + // * Otherwise, this is NULL. + InterpBasicBlock *pFinallyCallIslandBB; + // Target of a leave instruction that is located in this basic block. NULL if there is none. + InterpBasicBlock *pLeaveTargetBB; + + int inCount, outCount; + InterpBasicBlock **ppInBBs; + InterpBasicBlock **ppOutBBs; + + InterpBBState emitState; + + // Type of the innermost try block, catch, filter, or finally that contains this basic block. + uint8_t clauseType; + + // True indicates that this basic block is the first block of a filter, catch or filtered handler funclet. + bool isFilterOrCatchFuncletEntry; + + // If this basic block is a catch or filter funclet entry, this is the index of the variable + // that holds the exception object. + int clauseVarIndex; + + // Number of catch, filter or finally clauses that overlap with this basic block. + int32_t overlappingEHClauseCount; + + InterpBasicBlock(int32_t index) : InterpBasicBlock(index, 0) { } + + InterpBasicBlock(int32_t index, int32_t ilOffset) + { + this->index = index; + this->ilOffset = ilOffset; + nativeOffset = -1; + nativeEndOffset = -1; + stackHeight = -1; + + pFirstIns = pLastIns = NULL; + pNextBB = NULL; + pFinallyCallIslandBB = NULL; + pLeaveTargetBB = NULL; + + inCount = 0; + outCount = 0; + + emitState = BBStateNotEmitted; + + clauseType = BBClauseNone; + isFilterOrCatchFuncletEntry = false; + clauseVarIndex = -1; + overlappingEHClauseCount = 0; + } +}; + +struct InterpVar +{ + CORINFO_CLASS_HANDLE clsHnd; + InterpType interpType; + int offset; + int size; + // live_start and live_end are used by the offset allocator + InterpInst* liveStart; + InterpInst* liveEnd; + // index of first basic block where this var is used + int bbIndex; + // If var is callArgs, this is the call instruction using it. + // Only used by the var offset allocator + InterpInst *call; + + unsigned int callArgs : 1; // Var used as argument to a call + unsigned int noCallArgs : 1; // Var can't be used as argument to a call, needs to be copied to temp + unsigned int global : 1; // Dedicated stack offset throughout method execution + unsigned int ILGlobal : 1; // Args and IL locals + unsigned int alive : 1; // Used internally by the var offset allocator + + InterpVar(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size) + { + this->interpType = interpType; + this->clsHnd = clsHnd; + this->size = size; + offset = -1; + liveStart = NULL; + bbIndex = -1; + + callArgs = false; + noCallArgs = false; + global = false; + ILGlobal = false; + alive = false; + } +}; + +struct StackInfo +{ + StackType type; + CORINFO_CLASS_HANDLE clsHnd; + // Size that this value will occupy on the interpreter stack. It is a multiple + // of INTERP_STACK_SLOT_SIZE + int size; + + // The var associated with the value of this stack entry. Every time we push on + // the stack a new var is created. + int var; + + StackInfo(StackType type) + { + this->type = type; + clsHnd = NULL; + size = 0; + var = -1; + } +}; + +enum RelocType +{ + RelocLongBranch, + RelocSwitch +}; + +struct Reloc +{ + RelocType type; + // For branch relocation, how many sVar slots to skip + int skip; + // Base offset that the relative offset to be embedded in IR applies to + int32_t offset; + InterpBasicBlock *pTargetBB; + + Reloc(RelocType type, int32_t offset, InterpBasicBlock *pTargetBB, int skip) + { + this->type = type; + this->offset = offset; + this->pTargetBB = pTargetBB; + this->skip = skip; + } +}; + + +class InterpIAllocator; + +// Entry of the table where for each leave instruction we store the first finally call island +// to be executed when the leave instruction is executed. +struct LeavesTableEntry +{ + // offset of the CEE_LEAVE instruction + int32_t ilOffset; + // The BB of the call island BB that will be the first to call when the leave + // instruction is executed. + InterpBasicBlock *pFinallyCallIslandBB; +}; + +class InterpCompiler +{ + friend class InterpIAllocator; + friend class InterpGcSlotAllocator; + +private: + CORINFO_METHOD_HANDLE m_methodHnd; + CORINFO_MODULE_HANDLE m_compScopeHnd; + COMP_HANDLE m_compHnd; + CORINFO_METHOD_INFO* m_methodInfo; +#ifdef DEBUG + CORINFO_CLASS_HANDLE m_classHnd; + TArray m_methodName; + bool m_verbose = false; +#endif + + static int32_t InterpGetMovForType(InterpType interpType, bool signExtend); + + uint8_t* m_ip; + uint8_t* m_pILCode; + int32_t m_ILCodeSize; + int32_t m_currentILOffset; + InterpInst* m_pInitLocalsIns; + + // Table of mappings of leave instructions to the first finally call island the leave + // needs to execute. + TArray m_leavesTable; + + // This represents a mapping from indexes to pointer sized data. During compilation, an + // instruction can request an index for some data (like a MethodDesc pointer), that it + // will then embed in the instruction stream. The data item table will be referenced + // from the interpreter code header during execution. + // FIXME during compilation this should be a hashtable for fast lookup of duplicates + TArray m_dataItems; + int32_t GetDataItemIndex(void* data); + void* GetDataItemAtIndex(int32_t index); + int32_t GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle); + int32_t GetDataItemIndexForHelperFtn(CorInfoHelpFunc ftn); + + int GenerateCode(CORINFO_METHOD_INFO* methodInfo); + InterpBasicBlock* GenerateCodeForFinallyCallIslands(InterpBasicBlock *pNewBB, InterpBasicBlock *pPrevBB); + void PatchInitLocals(CORINFO_METHOD_INFO* methodInfo); + + void ResolveToken(uint32_t token, CorInfoTokenKind tokenKind, CORINFO_RESOLVED_TOKEN *pResolvedToken); + CORINFO_METHOD_HANDLE ResolveMethodToken(uint32_t token); + CORINFO_CLASS_HANDLE ResolveClassToken(uint32_t token); + CORINFO_CLASS_HANDLE getClassFromContext(CORINFO_CONTEXT_HANDLE context); + int getParamArgIndex(); // Get the index into the m_pVars array of the Parameter argument. This is either the this pointer, a methoddesc or a class handle + + struct InterpEmbedGenericResult + { + // If var is != -1, then the var holds the result of the lookup + int var = -1; + // If var == -1, then the data item holds the result of the lookup + int dataItemIndex = -1; + }; + + enum class GenericHandleEmbedOptions + { + support_use_as_flags = -1, // Magic value which in combination with enum_class_flags.h allows the use of bitwise operations and the HasFlag helper method + + None = 0, + VarOnly = 1, + EmbedParent = 2, + }; + InterpEmbedGenericResult EmitGenericHandle(CORINFO_RESOLVED_TOKEN* resolvedToken, GenericHandleEmbedOptions options); + int EmitGenericHandleAsVar(const CORINFO_GENERICHANDLE_RESULT &embedInfo); + + void* AllocMethodData(size_t numBytes); +public: + // FIXME Mempool allocation currently leaks. We need to add an allocator and then + // free all memory when method is finished compilling. + void* AllocMemPool(size_t numBytes); + void* AllocMemPool0(size_t numBytes); +private: + void* AllocTemporary(size_t numBytes); + void* AllocTemporary0(size_t numBytes); + void* ReallocTemporary(void* ptr, size_t numBytes); + void FreeTemporary(void* ptr); + + // Instructions + InterpBasicBlock *m_pCBB, *m_pEntryBB; + InterpInst* m_pLastNewIns; + + int32_t GetInsLength(InterpInst *pIns); + bool InsIsNop(InterpInst *pIns); + InterpInst* AddIns(int opcode); + InterpInst* NewIns(int opcode, int len); + InterpInst* AddInsExplicit(int opcode, int dataLen); + InterpInst* InsertInsBB(InterpBasicBlock *pBB, InterpInst *pPrevIns, int opcode); + InterpInst* InsertIns(InterpInst *pPrevIns, int opcode); + InterpInst* FirstRealIns(InterpBasicBlock *pBB); + InterpInst* NextRealIns(InterpInst *pIns); + InterpInst* PrevRealIns(InterpInst *pIns); + void ClearIns(InterpInst *pIns); + + void ForEachInsSVar(InterpInst *ins, void *pData, void (InterpCompiler::*callback)(int*, void*)); + void ForEachInsVar(InterpInst *ins, void *pData, void (InterpCompiler::*callback)(int*, void*)); + + // Basic blocks + int m_BBCount = 0; + InterpBasicBlock** m_ppOffsetToBB; + + ICorDebugInfo::OffsetMapping* m_pILToNativeMap = NULL; + int32_t m_ILToNativeMapSize = 0; + + InterpBasicBlock* AllocBB(int32_t ilOffset); + InterpBasicBlock* GetBB(int32_t ilOffset); + void LinkBBs(InterpBasicBlock *from, InterpBasicBlock *to); + void UnlinkBBs(InterpBasicBlock *from, InterpBasicBlock *to); + + void EmitBranch(InterpOpcode opcode, int ilOffset); + void EmitOneArgBranch(InterpOpcode opcode, int ilOffset, int insSize); + void EmitTwoArgBranch(InterpOpcode opcode, int ilOffset, int insSize); + void EmitBranchToBB(InterpOpcode opcode, InterpBasicBlock *pTargetBB); + + void EmitBBEndVarMoves(InterpBasicBlock *pTargetBB); + void InitBBStackState(InterpBasicBlock *pBB); + void UnlinkUnreachableBBlocks(); + + // Vars + InterpVar *m_pVars = NULL; + int32_t m_varsSize = 0; + int32_t m_varsCapacity = 0; + int32_t m_numILVars = 0; + int32_t m_paramArgIndex = 0; // Index of the type parameter argument in the m_pVars array. + // For each catch or filter clause, we create a variable that holds the exception object. + // This is the index of the first such variable. + int32_t m_clauseVarsIndex = 0; + + int32_t CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size); + + int32_t m_totalVarsStackSize, m_globalVarsWithRefsStackTop; + int32_t m_paramAreaOffset = 0; + int32_t m_ILLocalsOffset, m_ILLocalsSize; + void AllocVarOffsetCB(int *pVar, void *pData); + int32_t AllocVarOffset(int var, int32_t *pPos); + int32_t GetLiveStartOffset(int var); + int32_t GetLiveEndOffset(int var); + + int32_t GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign); + void CreateILVars(); + + void CreateNextLocalVar(int iArgToSet, CORINFO_CLASS_HANDLE argClass, InterpType interpType, int32_t *pOffset); + + // Stack + StackInfo *m_pStackPointer, *m_pStackBase; + int32_t m_stackCapacity; + bool m_hasInvalidCode = false; + + bool CheckStackHelper(int n); + void EnsureStack(int additional); + void PushTypeExplicit(StackType stackType, CORINFO_CLASS_HANDLE clsHnd, int size); + void PushStackType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd); + void PushInterpType(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd); + void PushTypeVT(CORINFO_CLASS_HANDLE clsHnd, int size); + + // Code emit + void EmitConv(StackInfo *sp, StackType type, InterpOpcode convOp); + void EmitLoadVar(int var); + void EmitStoreVar(int var); + void EmitBinaryArithmeticOp(int32_t opBase); + void EmitUnaryArithmeticOp(int32_t opBase); + void EmitShiftOp(int32_t opBase); + void EmitCompareOp(int32_t opBase); + void EmitCall(CORINFO_RESOLVED_TOKEN* constrainedClass, bool readonly, bool tailcall, bool newObj); + bool EmitCallIntrinsics(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO sig); + void EmitLdind(InterpType type, CORINFO_CLASS_HANDLE clsHnd, int32_t offset); + void EmitStind(InterpType type, CORINFO_CLASS_HANDLE clsHnd, int32_t offset, bool reverseSVarOrder); + void EmitLdelem(int32_t opcode, InterpType type); + void EmitStelem(InterpType type); + void EmitStaticFieldAddress(CORINFO_FIELD_INFO *pFieldInfo, CORINFO_RESOLVED_TOKEN *pResolvedToken); + void EmitStaticFieldAccess(InterpType interpFieldType, CORINFO_FIELD_INFO *pFieldInfo, CORINFO_RESOLVED_TOKEN *pResolvedToken, bool isLoad); + void EmitLdLocA(int32_t var); + + // Var Offset allocator + TArray *m_pActiveCalls; + TArray *m_pActiveVars; + TSList *m_pDeferredCalls; + + int32_t AllocGlobalVarOffset(int var); + void SetVarLiveRange(int32_t var, InterpInst* ins); + void SetVarLiveRangeCB(int32_t *pVar, void *pData); + void InitializeGlobalVar(int32_t var, int bbIndex); + void InitializeGlobalVarCB(int32_t *pVar, void *pData); + void InitializeGlobalVars(); + void EndActiveCall(InterpInst *call); + void CompactActiveVars(int32_t *current_offset); + + // Passes + int32_t* m_pMethodCode; + int32_t m_methodCodeSize; // code size measured in int32_t slots, instead of bytes + + void AllocOffsets(); + int32_t ComputeCodeSize(); + uint32_t ConvertOffset(int32_t offset); + void EmitCode(); + int32_t* EmitBBCode(int32_t *ip, InterpBasicBlock *bb, TArray *relocs); + int32_t* EmitCodeIns(int32_t *ip, InterpInst *pIns, TArray *relocs); + void PatchRelocations(TArray *relocs); + InterpMethod* CreateInterpMethod(); + bool CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo); + bool InitializeClauseBuildingBlocks(CORINFO_METHOD_INFO* methodInfo); + void CreateFinallyCallIslandBasicBlocks(CORINFO_METHOD_INFO* methodInfo, int32_t leaveOffset, InterpBasicBlock* pLeaveTargetBB); + void GetNativeRangeForClause(uint32_t startILOffset, uint32_t endILOffset, int32_t *nativeStartOffset, int32_t* nativeEndOffset); + + // Debug + void PrintClassName(CORINFO_CLASS_HANDLE cls); + void PrintMethodName(CORINFO_METHOD_HANDLE method); + void PrintCode(); + void PrintBBCode(InterpBasicBlock *pBB); + void PrintIns(InterpInst *ins); + void PrintInsData(InterpInst *ins, int32_t offset, const int32_t *pData, int32_t opcode); + void PrintCompiledCode(); + void PrintCompiledIns(const int32_t *ip, const int32_t *start); +public: + + InterpCompiler(COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo); + + InterpMethod* CompileMethod(); + void BuildGCInfo(InterpMethod *pInterpMethod); + void BuildEHInfo(); + + int32_t* GetCode(int32_t *pCodeSize); +}; + +/***************************************************************************** + * operator new + * + * Uses the compiler's AllocMemPool0, which will eventually free automatically at the end of compilation (doesn't yet). + */ + + inline void* operator new(size_t sz, InterpCompiler* compiler) + { + return compiler->AllocMemPool0(sz); +} + + inline void* operator new[](size_t sz, InterpCompiler* compiler) + { + return compiler->AllocMemPool0(sz); + } + +#endif //_COMPILER_H_ diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp new file mode 100644 index 000000000000..c0be16553902 --- /dev/null +++ b/src/coreclr/interpreter/compileropt.cpp @@ -0,0 +1,437 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "interpreter.h" + +// Allocates the offset for var at the stack position identified by +// *pPos while bumping the pointer to point to the next stack location +int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) +{ + int32_t size, offset; + + offset = *pPos; + size = m_pVars[var].size; + + m_pVars[var].offset = offset; + + *pPos = ALIGN_UP_TO(offset + size, INTERP_STACK_SLOT_SIZE); + + return m_pVars[var].offset; +} + +// Global vars are variables that are referenced from multiple basic blocks. We reserve +// a dedicated slot for each such variable. +int32_t InterpCompiler::AllocGlobalVarOffset(int var) +{ + return AllocVarOffset(var, &m_totalVarsStackSize); +} + +// For a var that is local to the current bblock that we process, as we iterate +// over instructions we mark the first and last intruction using it. +void InterpCompiler::SetVarLiveRange(int32_t var, InterpInst* ins) +{ + // We don't track liveness yet for global vars + if (m_pVars[var].global) + return; + if (m_pVars[var].liveStart == NULL) + m_pVars[var].liveStart = ins; + m_pVars[var].liveEnd = ins; +} + +void InterpCompiler::SetVarLiveRangeCB(int32_t *pVar, void *pData) +{ + SetVarLiveRange(*pVar, (InterpInst*)pData); +} + +void InterpCompiler::InitializeGlobalVar(int32_t var, int bbIndex) +{ + // Check if already handled + if (m_pVars[var].global) + return; + + if (m_pVars[var].bbIndex == -1) + { + m_pVars[var].bbIndex = bbIndex; + } + else if (m_pVars[var].bbIndex != bbIndex) + { + AllocGlobalVarOffset(var); + m_pVars[var].global = true; + INTERP_DUMP("alloc global var %d to offset %d\n", var, m_pVars[var].offset); + } +} + +void InterpCompiler::InitializeGlobalVarCB(int32_t *pVar, void *pData) +{ + InitializeGlobalVar(*pVar, (int)(size_t)pData); +} + +void InterpCompiler::InitializeGlobalVars() +{ + InterpBasicBlock *pBB; + for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB) + { + InterpInst *pIns; + + for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext) { + + int32_t opcode = pIns->opcode; + if (opcode == INTOP_NOP) + continue; + if (opcode == INTOP_LDLOCA) + { + int var = pIns->sVars[0]; + // If global flag is set, it means its offset was already allocated + if (!m_pVars[var].global) + { + AllocGlobalVarOffset(var); + m_pVars[var].global = true; + INTERP_DUMP("alloc global var %d to offset %d\n", var, m_pVars[var].offset); + } + } + ForEachInsVar(pIns, (void*)(size_t)pBB->index, &InterpCompiler::InitializeGlobalVarCB); + } + } + + m_totalVarsStackSize = ALIGN_UP_TO(m_totalVarsStackSize, INTERP_STACK_ALIGNMENT); +} + +// In the final codegen, each call instruction will receive a single offset as an argument. At this +// offset all the call arguments will be located. This offset will point into the param area. Vars +// allocated here have special constraints compared to normal local/global vars. +// +// For each call instruction, this method computes its args offset. The call offset is computed as +// the max offset of all call offsets on which the call depends. Stack ensures that all call offsets +// on which the call depends are calculated before the call in question, by deferring calls from the +// last to the first one. +// +// This method allocates offsets of resolved calls following a constraint where the base offset +// of a call must be greater than the offset of any argument of other active call args. It first +// removes the call from an array of active calls. If a match is found, the call is removed from +// the array by moving the last entry into its place. Otherwise, it is a call without arguments. +// +// If there are active calls, the call in question is pushed onto the stack as a deferred call. +// The call contains a list of other active calls on which it depends. Those calls need to be +// resolved first in order to determine optimal base offset for the call in question. Otherwise, +// if there are no active calls, we resolve the call in question and deferred calls from the stack. +// +// For better understanding, consider a simple example: +// a <- _ +// b <- _ +// call1 c <- b +// d <- _ +// call2 _ <- a c d +// +// When `a` is defined, call2 becomes an active call, since `a` is part of call2 arguments. +// When `b` is defined, call1 also becomes an active call, +// When reaching call1, we attempt to resolve it. The problem with this is that call2 is already +// active, and all arguments of call1 should be placed after any arguments of call2 (in this example +// it would be enough for them to be placed after `a`, but for simplicity we place them after all +// arguments, so after `d` offset). Given call1 offset depends on call2 offset, we initialize its +// callDeps (to call2) and add call1 to the set of currently deferred calls. Call1 is no longer an +// an active call at this point. +// When reaching call2, we see we have no remaining active calls, so we will resolve its offset. +// Once the offset is resolved, we continue to resolve each remaining call from the deferred list. +// Processing call1, we iterate over each call dependency (in our case just call2) and allocate its +// offset accordingly so it doesn't overlap with any call2 args offsets. +void InterpCompiler::EndActiveCall(InterpInst *call) +{ + // Remove call from array + m_pActiveCalls->Remove(call); + + // Push active call that should be resolved onto the stack + if (m_pActiveCalls->GetSize()) + { + TSList *callDeps = NULL; + for (int i = 0; i < m_pActiveCalls->GetSize(); i++) + callDeps = TSList::Push(callDeps, m_pActiveCalls->Get(i)); + call->info.pCallInfo->callDeps = callDeps; + + m_pDeferredCalls = TSList::Push(m_pDeferredCalls, call); + } + else + { + call->info.pCallInfo->callDeps = NULL; + // If no other active calls, current active call and all deferred calls can be resolved from the stack + InterpInst *deferredCall = call; + while (deferredCall) { + // `base_offset` is a relative offset (to the start of the call args stack) where the args for this + // call reside. The deps for a call represent the list of active calls at the moment when the call ends. + // This means that all deps for a call end after the call in question. Given we iterate over the list + // of deferred calls from the last to the first one to end, all deps of a call are guaranteed to have + // been processed at this point. + int32_t baseOffset = 0; + for (TSList *list = deferredCall->info.pCallInfo->callDeps; list; list = list->pNext) + { + int32_t endOffset = list->data->info.pCallInfo->callEndOffset; + if (endOffset > baseOffset) + baseOffset = endOffset; + } + deferredCall->info.pCallInfo->callOffset = baseOffset; + // Compute to offset of each call argument + int32_t *callArgs = deferredCall->info.pCallInfo->pCallArgs; + if (callArgs && (*callArgs != -1)) + { + int32_t var = *callArgs; + while (var != CALL_ARGS_TERMINATOR) + { + AllocVarOffset(var, &baseOffset); + callArgs++; + var = *callArgs; + } + } + deferredCall->info.pCallInfo->callEndOffset = ALIGN_UP_TO(baseOffset, INTERP_STACK_ALIGNMENT); + + if (m_pDeferredCalls) + { + deferredCall = m_pDeferredCalls->data; + m_pDeferredCalls = TSList::Pop(m_pDeferredCalls); + } + else + { + deferredCall = NULL; + } + } + } +} + +// Remove dead vars from the end of the active vars array and update the current offset +// to point immediately after the first found alive var. The space that used to belong +// to the now dead vars will be reused for future defined local vars in the same bblock. +void InterpCompiler::CompactActiveVars(int32_t *pCurrentOffset) +{ + int32_t size = m_pActiveVars->GetSize(); + if (!size) + return; + int32_t i = size - 1; + while (i >= 0) + { + int32_t var = m_pActiveVars->Get(i); + // If var is alive we can't compact anymore + if (m_pVars[var].alive) + return; + *pCurrentOffset = m_pVars[var].offset; + m_pActiveVars->RemoveAt(i); + i--; + } +} + +void InterpCompiler::AllocOffsets() +{ + InterpBasicBlock *pBB; + m_pActiveVars = new TArray(); + m_pActiveCalls = new TArray(); + m_pDeferredCalls = NULL; + + InitializeGlobalVars(); + + INTERP_DUMP("\nAllocating var offsets\n"); + + int finalVarsStackSize = m_totalVarsStackSize, + globalVarsWithRefsStackTop = m_totalVarsStackSize; + + // We now have the top of stack offset. All local regs are allocated after this offset, with each basic block + for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB) + { + InterpInst *pIns; + int insIndex = 0; + + INTERP_DUMP("BB%d\n", pBB->index); + + // All data structs should be left empty after a bblock iteration + assert(m_pActiveVars->GetSize() == 0); + assert(m_pActiveCalls->GetSize() == 0); + assert(m_pDeferredCalls == NULL); + + for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext) + { + if (pIns->opcode == INTOP_NOP) + continue; + + // TODO NewObj will be marked as noCallArgs + if (pIns->flags & INTERP_INST_FLAG_CALL) + { + if (pIns->info.pCallInfo && pIns->info.pCallInfo->pCallArgs) + { + int32_t *callArgs = pIns->info.pCallInfo->pCallArgs; + int32_t var = *callArgs; + + while (var != -1) + { + if (m_pVars[var].global || m_pVars[var].noCallArgs) + { + // Some vars can't be allocated on the call args stack, since the constraint is that + // call args vars die after the call. This isn't necessarily true for global vars or + // vars that are used by other instructions aside from the call. + // We need to copy the var into a new tmp var + int newVar = CreateVarExplicit(m_pVars[var].interpType, m_pVars[var].clsHnd, m_pVars[var].size); + m_pVars[newVar].call = pIns; + m_pVars[newVar].callArgs = true; + + int32_t opcode = InterpGetMovForType(m_pVars[newVar].interpType, false); + InterpInst *newInst = InsertInsBB(pBB, pIns->pPrev, opcode); + // The InsertInsBB assigns m_currentILOffset to ins->ilOffset, which is incorrect for + // instructions injected here. Copy the ilOffset from the call instruction instead. + newInst->ilOffset = pIns->ilOffset; + + newInst->SetDVar(newVar); + newInst->SetSVar(var); + if (opcode == INTOP_MOV_VT) + newInst->data[0] = m_pVars[var].size; + // The arg of the call is no longer global + *callArgs = newVar; + // Also update liveness for this instruction + ForEachInsVar(newInst, newInst, &InterpCompiler::SetVarLiveRangeCB); + insIndex++; + } + else + { + // Flag this var as it has special storage on the call args stack + m_pVars[var].call = pIns; + m_pVars[var].callArgs = true; + } + callArgs++; + var = *callArgs; + } + } + } + // Set liveStart and liveEnd for every referenced local that is not global + ForEachInsVar(pIns, pIns, &InterpCompiler::SetVarLiveRangeCB); + insIndex++; + } + int32_t currentOffset = m_totalVarsStackSize; + + insIndex = 0; + for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext) { + int32_t opcode = pIns->opcode; + bool isCall = pIns->flags & INTERP_INST_FLAG_CALL; + + if (opcode == INTOP_NOP) + continue; + +#ifdef DEBUG + if (m_verbose) + { + printf("\tins_index %d\t", insIndex); + PrintIns(pIns); + printf("\n"); + } +#endif + + // Expire source vars. We first mark them as not alive and then compact the array + for (int i = 0; i < g_interpOpSVars[opcode]; i++) + { + int32_t var = pIns->sVars[i]; + if (var == CALL_ARGS_SVAR) + continue; + if (!m_pVars[var].global && m_pVars[var].liveEnd == pIns) + { + // Mark the var as no longer being alive + assert(!m_pVars[var].callArgs); + m_pVars[var].alive = false; + } + } + + if (isCall) + EndActiveCall(pIns); + + CompactActiveVars(¤tOffset); + + // Alloc dreg local starting at the stack_offset + if (g_interpOpDVars[opcode]) + { + int32_t var = pIns->dVar; + + if (m_pVars[var].callArgs) + { + InterpInst *call = m_pVars[var].call; + // Check if already added + if (!(call->flags & INTERP_INST_FLAG_ACTIVE_CALL)) + { + m_pActiveCalls->Add(call); + // Mark a flag on it so we don't have to lookup the array with every argument store. + call->flags |= INTERP_INST_FLAG_ACTIVE_CALL; + } + } + else if (!m_pVars[var].global && m_pVars[var].offset == -1) + { + AllocVarOffset(var, ¤tOffset); + INTERP_DUMP("alloc var %d to offset %d\n", var, m_pVars[var].offset); + + if (currentOffset > finalVarsStackSize) + finalVarsStackSize = currentOffset; + + if (m_pVars[var].liveEnd != pIns) + { + // If dVar is still used in the basic block, add it to the active list + m_pActiveVars->Add(var); + m_pVars[var].alive = true; + } + else + { + // Otherwise dealloc it + currentOffset = m_pVars[var].offset; + } + } + } + +#ifdef DEBUG + if (m_verbose) + { + printf("active vars:"); + for (int i = 0; i < m_pActiveVars->GetSize(); i++) + { + int32_t var = m_pActiveVars->Get(i); + if (m_pVars[var].alive) + { + printf(" %d (end ", var); + PrintIns(m_pVars[var].liveEnd); + printf("),"); + } + } + printf("\n"); + } +#endif + insIndex++; + } + } + finalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT); + + // Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them) + // then also update td->total_locals_size to account for this space. + m_paramAreaOffset = finalVarsStackSize; + for (int32_t i = 0; i < m_varsSize; i++) + { + InterpVar *pVar = &m_pVars[i]; + // These are allocated separately at the end of the stack + if (pVar->callArgs) + { + pVar->offset += m_paramAreaOffset; + int32_t topOffset = pVar->offset + pVar->size; + if (finalVarsStackSize < topOffset) + finalVarsStackSize = topOffset; + } + + // For any global vars that might contain managed pointers we need to maintain a 'global stack top' + // which specifies what stack region we need to zero at method entry in order to avoid reporting + // garbage pointers to the GC when it does a stackwalk + // Non-global vars have accurate liveness ranges we report to the GC, so we don't care about them + if ( + pVar->global && ( + (pVar->interpType == InterpTypeO) || + (pVar->interpType == InterpTypeByRef) || + ( + (pVar->interpType == InterpTypeVT) && + (m_compHnd->getClassAttribs(pVar->clsHnd) & CORINFO_FLG_CONTAINS_GC_PTR) + ) + ) + ) + { + int32_t endOfVar = pVar->offset + pVar->size; + if (endOfVar > globalVarsWithRefsStackTop) + globalVarsWithRefsStackTop = endOfVar; + } + } + + m_globalVarsWithRefsStackTop = globalVarsWithRefsStackTop; + m_totalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT); +} diff --git a/src/coreclr/interpreter/datastructs.h b/src/coreclr/interpreter/datastructs.h new file mode 100644 index 000000000000..706821f76c25 --- /dev/null +++ b/src/coreclr/interpreter/datastructs.h @@ -0,0 +1,205 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _DATASTRUCTS_H_ +#define _DATASTRUCTS_H_ + +template +class TArray +{ +private: + int32_t m_size, m_capacity; + T *m_array; + + void Grow() + { + if (m_capacity) + m_capacity *= 2; + else + m_capacity = 16; + + m_array = (T*)realloc(m_array, m_capacity * sizeof(T)); + } + + void Grow(int32_t minNewCapacity) + { + if (m_capacity) + m_capacity *= 2; + else + m_capacity = 16; + + m_capacity = (m_capacity > minNewCapacity) ? m_capacity : minNewCapacity; + + m_array = (T*)realloc(m_array, m_capacity * sizeof(T)); + } +public: + TArray() + { + m_size = 0; + m_capacity = 0; + m_array = NULL; + } + + // Implicit copies are not permitted to prevent accidental allocation of large arrays. + TArray(const TArray &other) = delete; + TArray& operator=(const TArray &other) = delete; + + TArray(TArray &&other) + { + m_size = other.m_size; + m_capacity = other.m_capacity; + m_array = other.m_array; + + other.m_size = 0; + other.m_capacity = 0; + other.m_array = NULL; + } + TArray& operator=(TArray &&other) + { + if (this != &other) + { + if (m_capacity > 0) + free(m_array); + + m_size = other.m_size; + m_capacity = other.m_capacity; + m_array = other.m_array; + + other.m_size = 0; + other.m_capacity = 0; + other.m_array = NULL; + } + return *this; + } + + ~TArray() + { + if (m_capacity > 0) + free(m_array); + } + + int32_t GetSize() + { + return m_size; + } + + int32_t Add(T element) + { + if (m_size == m_capacity) + Grow(); + m_array[m_size] = element; + return m_size++; + } + + void Append(const T* pElements, int32_t count) + { + int32_t availableCapacity = m_capacity - m_size; + if (count > availableCapacity) + { + // Grow the array if there is not enough space + Grow(count + m_size); + } + for (int32_t i = 0; i < count; i++) + { + m_array[m_size + i] = pElements[i]; + } + m_size += count; + } + + void GrowBy(int32_t count) + { + int32_t availableCapacity = m_capacity - m_size; + if (count > availableCapacity) + { + // Grow the array if there is not enough space + Grow(count + m_size); + } + memset(&m_array[m_size], 0, count * sizeof(T)); // Initialize new elements to zero + m_size += count; + } + + // Returns a pointer to the element at the specified index. + T* GetUnderlyingArray() + { + return m_array; + } + + T Get(int32_t index) + { + assert(index < m_size); + return m_array[index]; + } + + int32_t Find(T element) + { + for (int i = 0; i < m_size; i++) + { + if (element == m_array[i]) + return i; + } + return -1; + } + + // Assumes elements are unique + void RemoveAt(int32_t index) + { + assert(index < m_size); + m_size--; + // Since this entry is removed, move the last entry into it + if (m_size > 0 && index < m_size) + m_array[index] = m_array[m_size]; + } + + // Assumes elements are unique + void Remove(T element) + { + for (int32_t i = 0; i < m_size; i++) + { + if (element == m_array[i]) + { + RemoveAt(i); + break; + } + } + } + + void Clear() + { + m_size = 0; + } +}; + +// Singly linked list, implemented as a stack +template +struct TSList +{ + T data; + TSList *pNext; + + TSList(T data, TSList *pNext) + { + this->data = data; + this->pNext = pNext; + } + + static TSList* Push(TSList *head, T data) + { + TSList *newHead = new TSList(data, head); + return newHead; + } + + static TSList* Pop(TSList *head) + { + TSList *next = head->pNext; + delete head; + return next; + } + + static void Free(TSList *head) + { + while (head != NULL) + head = Pop(head); + } +}; + +#endif diff --git a/src/coreclr/interpreter/eeinterp.cpp b/src/coreclr/interpreter/eeinterp.cpp new file mode 100644 index 000000000000..05163cd083eb --- /dev/null +++ b/src/coreclr/interpreter/eeinterp.cpp @@ -0,0 +1,126 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include +#include "corjit.h" + +#include "interpreter.h" +#include "eeinterp.h" + +#include +#include + +/*****************************************************************************/ +ICorJitHost* g_interpHost = nullptr; +bool g_interpInitialized = false; +/*****************************************************************************/ +extern "C" INTERP_API void jitStartup(ICorJitHost* jitHost) +{ + if (g_interpInitialized) + { + return; + } + g_interpHost = jitHost; + + assert(!InterpConfig.IsInitialized()); + InterpConfig.Initialize(jitHost); + + g_interpInitialized = true; +} +/*****************************************************************************/ +static CILInterp g_CILInterp; +extern "C" INTERP_API ICorJitCompiler* getJit() +{ + if (!g_interpInitialized) + { + return nullptr; + } + return &g_CILInterp; +} + + +static CORINFO_MODULE_HANDLE g_interpModule = NULL; + +//**************************************************************************** +CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd, + CORINFO_METHOD_INFO* methodInfo, + unsigned flags, + uint8_t** entryAddress, + uint32_t* nativeSizeOfCode) +{ + + bool doInterpret; + + if (g_interpModule != NULL) + { + if (methodInfo->scope == g_interpModule) + doInterpret = true; + else + doInterpret = false; + } + else + { + const char *methodName = compHnd->getMethodNameFromMetadata(methodInfo->ftn, nullptr, nullptr, nullptr, 0); +#ifdef TARGET_WASM + // interpret everything on wasm + doInterpret = true; +#else + doInterpret = (InterpConfig.Interpreter().contains(compHnd, methodInfo->ftn, compHnd->getMethodClass(methodInfo->ftn), &methodInfo->args)); +#endif + + if (doInterpret) + g_interpModule = methodInfo->scope; + } + + if (!doInterpret) + { + return CORJIT_SKIPPED; + } + + InterpCompiler compiler(compHnd, methodInfo); + InterpMethod *pMethod = compiler.CompileMethod(); + + int32_t IRCodeSize; + int32_t *pIRCode = compiler.GetCode(&IRCodeSize); + + // FIXME this shouldn't be here + compHnd->setMethodAttribs(methodInfo->ftn, CORINFO_FLG_INTERPRETER); + + uint32_t sizeOfCode = sizeof(InterpMethod*) + IRCodeSize * sizeof(int32_t); + uint8_t unwindInfo[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + + AllocMemArgs args {}; + args.hotCodeSize = sizeOfCode; + args.coldCodeSize = 0; + args.roDataSize = 0; + args.xcptnsCount = 0; + args.flag = CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN; + compHnd->allocMem(&args); + + // We store first the InterpMethod pointer as the code header, followed by the actual code + *(InterpMethod**)args.hotCodeBlockRW = pMethod; + memcpy ((uint8_t*)args.hotCodeBlockRW + sizeof(InterpMethod*), pIRCode, IRCodeSize * sizeof(int32_t)); + + *entryAddress = (uint8_t*)args.hotCodeBlock; + *nativeSizeOfCode = sizeOfCode; + + // We can't do this until we've called allocMem + compiler.BuildGCInfo(pMethod); + compiler.BuildEHInfo(); + + return CORJIT_OK; +} + +void CILInterp::ProcessShutdownWork(ICorStaticInfo* statInfo) +{ + g_interpInitialized = false; +} + +void CILInterp::getVersionIdentifier(GUID* versionIdentifier) +{ + assert(versionIdentifier != nullptr); + memcpy(versionIdentifier, &JITEEVersionIdentifier, sizeof(GUID)); +} + +void CILInterp::setTargetOS(CORINFO_OS os) +{ +} diff --git a/src/coreclr/interpreter/eeinterp.h b/src/coreclr/interpreter/eeinterp.h new file mode 100644 index 000000000000..dd6140040226 --- /dev/null +++ b/src/coreclr/interpreter/eeinterp.h @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +extern ICorJitHost* g_interpHost; + +class CILInterp : public ICorJitCompiler +{ + CorJitResult compileMethod(ICorJitInfo* comp, /* IN */ + CORINFO_METHOD_INFO* methodInfo, /* IN */ + unsigned flags, /* IN */ + uint8_t** nativeEntry, /* OUT */ + uint32_t* nativeSizeOfCode /* OUT */ + ); + void ProcessShutdownWork(ICorStaticInfo* statInfo); + void getVersionIdentifier(GUID* versionIdentifier /* OUT */ ); + void setTargetOS(CORINFO_OS os); +}; diff --git a/src/coreclr/interpreter/interpconfig.cpp b/src/coreclr/interpreter/interpconfig.cpp new file mode 100644 index 000000000000..7783a54c0c7c --- /dev/null +++ b/src/coreclr/interpreter/interpconfig.cpp @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "interpreter.h" + +InterpConfigValues InterpConfig; + +void InterpConfigValues::Initialize(ICorJitHost* host) +{ + assert(!m_isInitialized); + +#define RELEASE_CONFIG_STRING(name, key) m_##name = host->getStringConfigValue(key); +#define RELEASE_CONFIG_METHODSET(name, key) do { const char *pConfigValue = host->getStringConfigValue(key); m_##name.initialize(pConfigValue); host->freeStringConfigValue(pConfigValue); } while (0); +#define RELEASE_CONFIG_INTEGER(name, key, defaultValue) m_##name = host->getIntConfigValue(key, defaultValue); +#include "interpconfigvalues.h" + + m_isInitialized = true; +} + +void InterpConfigValues::Destroy(ICorJitHost* host) +{ + if (!m_isInitialized) + return; + +#define RELEASE_CONFIG_STRING(name, key) host->freeStringConfigValue(m_##name); +#define RELEASE_CONFIG_METHODSET(name, key) m_##name.destroy(); +#define RELEASE_CONFIG_INTEGER(name, key, defaultValue) +#include "interpconfigvalues.h" + + m_isInitialized = false; +} diff --git a/src/coreclr/interpreter/interpconfig.h b/src/coreclr/interpreter/interpconfig.h new file mode 100644 index 000000000000..a34f027e8e98 --- /dev/null +++ b/src/coreclr/interpreter/interpconfig.h @@ -0,0 +1,57 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _INTERPCONFIG_H_ +#define _INTERPCONFIG_H_ + +class ICorJitHost; + +class InterpConfigValues +{ +private: + bool m_isInitialized; + +#define RELEASE_CONFIG_STRING(name, key) const char* m_##name; +#define RELEASE_CONFIG_INTEGER(name, key, defaultValue) int m_##name; +#define RELEASE_CONFIG_METHODSET(name, key) MethodSet m_##name; +#include "interpconfigvalues.h" + +public: + +#define RELEASE_CONFIG_STRING(name, key) \ + inline const char* name() const \ + { \ + return m_##name; \ + } + +#define RELEASE_CONFIG_INTEGER(name, key, defaultValue) \ + inline int name() const \ + { \ + return m_##name; \ + } + +#define RELEASE_CONFIG_METHODSET(name, key) \ + inline const MethodSet& name() const \ + { \ + return m_##name; \ + } + +#include "interpconfigvalues.h" + +public: + InterpConfigValues() + { + } + + inline bool IsInitialized() const + { + return m_isInitialized != 0; + } + + void Initialize(ICorJitHost* host); + void Destroy(ICorJitHost* host); +}; + +extern InterpConfigValues InterpConfig; + +#endif diff --git a/src/coreclr/interpreter/interpconfigvalues.h b/src/coreclr/interpreter/interpconfigvalues.h new file mode 100644 index 000000000000..53ff3cbf7c49 --- /dev/null +++ b/src/coreclr/interpreter/interpconfigvalues.h @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifdef DEBUG +#define CONFIG_STRING(name, key) RELEASE_CONFIG_STRING(name, key) +#else +#define CONFIG_STRING(name, key) +#endif + +#ifdef DEBUG +#define CONFIG_METHODSET(name, key) RELEASE_CONFIG_METHODSET(name, key) +#else +#define CONFIG_METHODSET(name, key) +#endif + +#ifdef DEBUG +#define CONFIG_INTEGER(name, key, defaultValue) RELEASE_CONFIG_INTEGER(name, key, defaultValue) +#else +#define CONFIG_INTEGER(name, key, defaultValue) +#endif + +RELEASE_CONFIG_METHODSET(Interpreter, "Interpreter") +CONFIG_METHODSET(InterpHalt, "InterpHalt"); +CONFIG_METHODSET(InterpDump, "InterpDump"); +CONFIG_INTEGER(InterpList, "InterpList", 0); // List the methods which are compiled by the interpreter JIT + +#undef CONFIG_STRING +#undef RELEASE_CONFIG_STRING +#undef RELEASE_CONFIG_METHODSET +#undef RELEASE_CONFIG_INTEGER diff --git a/src/coreclr/interpreter/interpreter.h b/src/coreclr/interpreter/interpreter.h new file mode 100644 index 000000000000..a06dfefbab5e --- /dev/null +++ b/src/coreclr/interpreter/interpreter.h @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HOST_WINDOWS +#include +#endif + +#include "corhdr.h" +#include "corjit.h" + +#include "interpretershared.h" +#include "compiler.h" +#include "interpconfig.h" + +#define ALIGN_UP_TO(val,align) ((((size_t)val) + (size_t)((align) - 1)) & (~((size_t)(align - 1)))) + diff --git a/src/coreclr/interpreter/interpretershared.h b/src/coreclr/interpreter/interpretershared.h new file mode 100644 index 000000000000..9d55ae47761d --- /dev/null +++ b/src/coreclr/interpreter/interpretershared.h @@ -0,0 +1,117 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This header contains definitions needed by this compiler library and also by +// the interpreter executor in the main coreclr library +#ifndef _INTERPRETERSHARED_H_ +#define _INTERPRETERSHARED_H_ + +#include "intopsshared.h" + +#ifdef _MSC_VER +#define INTERP_API +#else +#define INTERP_API __attribute__ ((visibility ("default"))) +#endif // _MSC_VER + +#define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack +#define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame + +#define INTERP_INDIRECT_HELPER_TAG 1 // When a helper ftn's address is indirect we tag it with this tag bit + +struct InterpMethod +{ +#if DEBUG + InterpMethod *self; +#endif + CORINFO_METHOD_HANDLE methodHnd; + int32_t allocaSize; + void** pDataItems; + bool initLocals; + + InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems, bool initLocals) + { +#if DEBUG + this->self = this; +#endif + this->methodHnd = methodHnd; + this->allocaSize = allocaSize; + this->pDataItems = pDataItems; + this->initLocals = initLocals; + } + + bool CheckIntegrity() + { +#if DEBUG + return this->self == this; +#else + return true; +#endif + } +}; + +struct InterpByteCodeStart +{ +#ifndef DPTR + InterpMethod* const Method; // Pointer to the InterpMethod structure +#else + DPTR(InterpMethod) const Method; // Pointer to the InterpMethod structure +#endif + const int32_t* GetByteCodes() const + { + return reinterpret_cast(this + 1); + } +}; + +typedef class ICorJitInfo* COMP_HANDLE; + +class MethodSet +{ +private: + struct MethodName + { + MethodName* m_next; + const char* m_patternStart; + const char* m_patternEnd; + bool m_containsClassName; + bool m_classNameContainsInstantiation; + bool m_methodNameContainsInstantiation; + bool m_containsSignature; + }; + + const char* m_listFromConfig = nullptr; + MethodName* m_names = nullptr; + + MethodSet(const MethodSet& other) = delete; + MethodSet& operator=(const MethodSet& other) = delete; + +public: + MethodSet() + { + } + + ~MethodSet() + { + destroy(); + } + + const char* list() const + { + return m_listFromConfig; + } + + void initialize(const char* listFromConfig); + void destroy(); + + inline bool isEmpty() const + { + return m_names == nullptr; + } + bool contains(COMP_HANDLE comp, CORINFO_METHOD_HANDLE methodHnd, CORINFO_CLASS_HANDLE classHnd, CORINFO_SIG_INFO* sigInfo) const; +}; + +const CORINFO_CLASS_HANDLE NO_CLASS_HANDLE = nullptr; +const CORINFO_FIELD_HANDLE NO_FIELD_HANDLE = nullptr; +const CORINFO_METHOD_HANDLE NO_METHOD_HANDLE = nullptr; + +#endif diff --git a/src/coreclr/interpreter/intops.cpp b/src/coreclr/interpreter/intops.cpp new file mode 100644 index 000000000000..7616b2d82c2d --- /dev/null +++ b/src/coreclr/interpreter/intops.cpp @@ -0,0 +1,171 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "intops.h" + +#include +#include + +// This, instead of an array of pointers, to optimize away a pointer and a relocation per string. +struct InterpOpNameCharacters +{ +#define OPDEF(a,b,c,d,e,f) char a[sizeof(b)]; +#include "intops.def" +#undef OPDEF +}; + +const struct InterpOpNameCharacters g_interpOpNameCharacters = { +#define OPDEF(a,b,c,d,e,f) b, +#include "intops.def" +#undef OPDEF +}; + +const uint32_t g_interpOpNameOffsets[] = { +#define OPDEF(a,b,c,d,e,f) offsetof(InterpOpNameCharacters, a), +#include "intops.def" +#undef OPDEF +}; + +const uint8_t g_interpOpLen[] = { +#define OPDEF(a,b,c,d,e,f) c, +#include "intops.def" +#undef OPDEF +}; + +const int g_interpOpSVars[] = { +#define OPDEF(a,b,c,d,e,f) e, +#include "intops.def" +#undef OPDEF +}; + +const int g_interpOpDVars[] = { +#define OPDEF(a,b,c,d,e,f) d, +#include "intops.def" +#undef OPDEF +}; + +const InterpOpArgType g_interpOpArgType[] = { +#define OPDEF(a,b,c,d,e,f) f, +#include "intops.def" +#undef OPDEF +}; + +const int32_t* InterpNextOp(const int32_t *ip) +{ + int len = g_interpOpLen[*ip]; + if (len == 0) + { + assert(*ip == INTOP_SWITCH); + len = 3 + ip[2]; + } + + return ip + len; +} + +const char* InterpOpName(int op) +{ + return ((const char*)&g_interpOpNameCharacters) + g_interpOpNameOffsets[op]; +} + +// Information about IL opcodes + +OPCODE_FORMAT const g_CEEOpArgs[] = { +#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) args, +#include "opcode.def" +#undef OPDEF +}; + +struct CEEOpNameCharacters +{ +#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) char c[sizeof(s)]; +#include "opcode.def" +#undef OPDEF +}; + +const struct CEEOpNameCharacters g_CEEOpNameCharacters = { +#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) s, +#include "opcode.def" +#undef OPDEF +}; + +const uint32_t g_CEEOpNameOffsets[] = { +#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) offsetof(CEEOpNameCharacters, c), +#include "opcode.def" +#undef OPDEF +}; + +const char* CEEOpName(OPCODE op) +{ + return ((const char*)&g_CEEOpNameCharacters) + g_CEEOpNameOffsets[op]; +} + +// Also updates ip to skip over prefix, if any +OPCODE CEEDecodeOpcode(const uint8_t **pIp) +{ + OPCODE res; + const uint8_t *ip = *pIp; + + if (*ip == 0xFE) + { + // Double byte encoding, offset + ip++; + res = (OPCODE)(*ip + CEE_ARGLIST); + } + else + { + res = (OPCODE)*ip; + } + *pIp = ip; + return res; +} + +int32_t CEEOpcodeSize(const uint8_t *ip, const uint8_t *codeEnd) +{ + const uint8_t *p = ip; + OPCODE opcode = CEEDecodeOpcode(&p); + OPCODE_FORMAT opArgs = g_CEEOpArgs[opcode]; + + size_t size = 0; + + switch (opArgs) + { + case InlineNone: + size = 1; + break; + case InlineString: + case InlineType: + case InlineField: + case InlineMethod: + case InlineTok: + case InlineSig: + case ShortInlineR: + case InlineI: + case InlineBrTarget: + size = 5; + break; + case InlineVar: + size = 3; + break; + case ShortInlineVar: + case ShortInlineI: + case ShortInlineBrTarget: + size = 2; + break; + case InlineR: + case InlineI8: + size = 9; + break; + case InlineSwitch: { + size_t entries = getI4LittleEndian(p + 1); + size = 5 + 4 * entries; + break; + } + default: + assert(0); + } + + if ((ip + size) > codeEnd) + return -1; + + return (int32_t)((p - ip) + size); +} diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def new file mode 100644 index 000000000000..160ca3944d22 --- /dev/null +++ b/src/coreclr/interpreter/intops.def @@ -0,0 +1,314 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license + +// OPDEF (opsymbol, opstring, oplength, numDestVars (0 or 1), numSourceVars, optype) +// Currently, opcode, dvar offset, svar offsets as well as any additional instruction data +// are stored in uint32_t slots in the instruction stream for simplicity. In the future +// we should add compact opcodes where all data is in uint16_t slots. + +OPDEF(INTOP_RET, "ret", 2, 0, 1, InterpOpNoArgs) +OPDEF(INTOP_RET_VT, "ret.vt", 3, 0, 1, InterpOpInt) +OPDEF(INTOP_RET_VOID, "ret.void", 1, 0, 0, InterpOpNoArgs) + +OPDEF(INTOP_INITLOCALS, "initlocals", 3, 0, 0, InterpOpTwoInts) +OPDEF(INTOP_MEMBAR, "membar", 1, 0, 0, InterpOpNoArgs) + +OPDEF(INTOP_LDC_I4, "ldc.i4", 3, 1, 0, InterpOpInt) +OPDEF(INTOP_LDC_I4_0, "ldc.i4.0", 2, 1, 0, InterpOpNoArgs) +OPDEF(INTOP_LDC_I8_0, "ldc.i8.0", 2, 1, 0, InterpOpNoArgs) +OPDEF(INTOP_LDC_I8, "ldc.i8", 4, 1, 0, InterpOpLongInt) + +OPDEF(INTOP_LDC_R4, "ldc.r4", 3, 1, 0, InterpOpFloat) +OPDEF(INTOP_LDC_R8, "ldc.r8", 4, 1, 0, InterpOpDouble) + +OPDEF(INTOP_LDPTR, "ldptr", 3, 1, 0, InterpOpLdPtr) +OPDEF(INTOP_NEWARR, "newarr", 5, 1, 1, InterpOpInt) + +OPDEF(INTOP_LDELEM_I1, "ldelem.i1", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_LDELEM_U1, "ldelem.u1", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_LDELEM_I2, "ldelem.i2", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_LDELEM_U2, "ldelem.u2", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_LDELEM_I4, "ldelem.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_LDELEM_I8, "ldelem.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_LDELEM_R4, "ldelem.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_LDELEM_R8, "ldelem.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_STELEM_I1, "stelem.i1", 4, 0, 3, InterpOpNoArgs) +OPDEF(INTOP_STELEM_U1, "stelem.u1", 4, 0, 3, InterpOpNoArgs) +OPDEF(INTOP_STELEM_I2, "stelem.i2", 4, 0, 3, InterpOpNoArgs) +OPDEF(INTOP_STELEM_U2, "stelem.u2", 4, 0, 3, InterpOpNoArgs) +OPDEF(INTOP_STELEM_I4, "stelem.i4", 4, 0, 3, InterpOpNoArgs) +OPDEF(INTOP_STELEM_I8, "stelem.i8", 4, 0, 3, InterpOpNoArgs) +OPDEF(INTOP_STELEM_R4, "stelem.r4", 4, 0, 3, InterpOpNoArgs) +OPDEF(INTOP_STELEM_R8, "stelem.r8", 4, 0, 3, InterpOpNoArgs) + +OPDEF(INTOP_LDTOKEN, "ldtoken", 4, 1, 0, InterpOpTwoInts) // [token data item] [conversion helper func] +OPDEF(INTOP_LDTOKEN_VAR, "ldtoken.var", 4, 1, 1, InterpOpInt) // [var index] [conversion helper func] + +OPDEF(INTOP_MOV_I4_I1, "mov.i4.i1", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_I4_U1, "mov.i4.u1", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_I4_I2, "mov.i4.i2", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_I4_U2, "mov.i4.u2", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_4, "mov.4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_8, "mov.8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_VT, "mov.vt", 4, 1, 1, InterpOpInt) + +OPDEF(INTOP_LDLOCA, "ldloca", 3, 1, 0, InterpOpInt) + +OPDEF(INTOP_SWITCH, "switch", 0, 0, 1, InterpOpSwitch) + +OPDEF(INTOP_SAFEPOINT, "safepoint", 1, 0, 0, InterpOpNoArgs) +OPDEF(INTOP_BR, "br", 2, 0, 0, InterpOpBranch) + +OPDEF(INTOP_BRFALSE_I4, "brfalse.i4", 3, 0, 1, InterpOpBranch) +OPDEF(INTOP_BRFALSE_I8, "brfalse.i8", 3, 0, 1, InterpOpBranch) +OPDEF(INTOP_BRTRUE_I4, "brtrue.i4", 3, 0, 1, InterpOpBranch) +OPDEF(INTOP_BRTRUE_I8, "brtrue.i8", 3, 0, 1, InterpOpBranch) + +OPDEF(INTOP_BEQ_I4, "beq.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BEQ_I8, "beq.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BEQ_R4, "beq.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BEQ_R8, "beq.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_I4, "bge.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_I8, "bge.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_R4, "bge.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_R8, "bge.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_I4, "bgt.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_I8, "bgt.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_R4, "bgt.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_R8, "bgt.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_I4, "blt.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_I8, "blt.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_R4, "blt.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_R8, "blt.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_I4, "ble.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_I8, "ble.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_R4, "ble.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_R8, "ble.r8", 4, 0, 2, InterpOpBranch) + +OPDEF(INTOP_BNE_UN_I4, "bne.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BNE_UN_I8, "bne.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BNE_UN_R4, "bne.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BNE_UN_R8, "bne.un.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_UN_I4, "bge.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_UN_I8, "bge.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_UN_R4, "bge.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_UN_R8, "bge.un.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_UN_I4, "bgt.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_UN_I8, "bgt.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_UN_R4, "bgt.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_UN_R8, "bgt.un.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_UN_I4, "ble.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_UN_I8, "ble.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_UN_R4, "ble.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_UN_R8, "ble.un.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_UN_I4, "blt.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_UN_I8, "blt.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_UN_R4, "blt.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_UN_R8, "blt.un.r8", 4, 0, 2, InterpOpBranch) + +// Unary operations + +OPDEF(INTOP_NEG_I4, "neg.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_NEG_I8, "neg.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_NEG_R4, "neg.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_NEG_R8, "neg.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_NOT_I4, "not.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_NOT_I8, "not.i8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_R_UN_I4, "conv.r.un.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R_UN_I8, "conv.r.un.i8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_I1_I4, "conv.i1.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I1_I8, "conv.i1.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I1_R4, "conv.i1.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I1_R8, "conv.i1.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_U1_I4, "conv.u1.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U1_I8, "conv.u1.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U1_R4, "conv.u1.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U1_R8, "conv.u1.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_I2_I4, "conv.i2.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I2_I8, "conv.i2.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I2_R4, "conv.i2.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I2_R8, "conv.i2.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_U2_I4, "conv.u2.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U2_I8, "conv.u2.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U2_R4, "conv.u2.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U2_R8, "conv.u2.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_I4_R4, "conv.i4.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I4_R8, "conv.i4.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_U4_R4, "conv.u4.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U4_R8, "conv.u4.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_I8_I4, "conv.i8.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I8_U4, "conv.i8.u4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I8_R4, "conv.i8.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I8_R8, "conv.i8.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_R4_I4, "conv.r4.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R4_I8, "conv.r4.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R4_R8, "conv.r4.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_R8_I4, "conv.r8.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R8_I8, "conv.r8.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R8_R4, "conv.r8.r4", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_U8_R4, "conv.u8.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U8_R8, "conv.u8.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_BOX, "box", 5, 1, 1, InterpOpClassHandle) // [class handle data item] [helper data item] +OPDEF(INTOP_UNBOX, "unbox", 5, 1, 1, InterpOpClassHandle) // [class handle data item] [helper data item] +OPDEF(INTOP_UNBOX_ANY, "unbox.any", 5, 1, 1, InterpOpClassHandle) // [class handle data item] [helper data item] +// Unary operations end + +OPDEF(INTOP_ADD_I4_IMM, "add.i4.imm", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_ADD_I8_IMM, "add.i8.imm", 4, 1, 1, InterpOpInt) + +// Binary operations + +OPDEF(INTOP_ADD_I4, "add.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_ADD_I8, "add.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_ADD_R4, "add.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_ADD_R8, "add.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_SUB_I4, "sub.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SUB_I8, "sub.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SUB_R4, "sub.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SUB_R8, "sub.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_MUL_I4, "mul.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_MUL_I8, "mul.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_MUL_R4, "mul.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_MUL_R8, "mul.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_MUL_OVF_I4, "mul.ovf.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_MUL_OVF_I8, "mul.ovf.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_MUL_OVF_UN_I4, "mul.ovf.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_MUL_OVF_UN_I8, "mul.ovf.un.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_DIV_I4, "div.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_DIV_I8, "div.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_DIV_R4, "div.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_DIV_R8, "div.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_DIV_UN_I4, "div.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_DIV_UN_I8, "div.un.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_REM_I4, "rem.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_REM_I8, "rem.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_REM_R4, "rem.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_REM_R8, "rem.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_REM_UN_I4, "rem.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_REM_UN_I8, "rem.un.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_AND_I4, "and.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_AND_I8, "and.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_OR_I4, "or.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_OR_I8, "or.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_XOR_I4, "xor.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_XOR_I8, "xor.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_SHR_UN_I4, "shr.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHR_UN_I8, "shr.un.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHL_I4, "shl.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHL_I8, "shl.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHR_I4, "shr.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHR_I8, "shr.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CEQ_I4, "ceq.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CEQ_I8, "ceq.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CEQ_R4, "ceq.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CEQ_R8, "ceq.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CGT_I4, "cgt.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_I8, "cgt.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_R4, "cgt.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_R8, "cgt.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CGT_UN_I4, "cgt.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_UN_I8, "cgt.un.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_UN_R4, "cgt.un.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_UN_R8, "cgt.un.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CLT_I4, "clt.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_I8, "clt.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_R4, "clt.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_R8, "clt.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CLT_UN_I4, "clt.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_UN_I8, "clt.un.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_UN_R4, "clt.un.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_UN_R8, "clt.un.r8", 4, 1, 2, InterpOpNoArgs) +// Binary operations end + +// Fields +OPDEF(INTOP_LDIND_I1, "ldind.i1", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_U1, "ldind.u1", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_I2, "ldind.i2", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_U2, "ldind.u2", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_I4, "ldind.i4", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_I8, "ldind.i8", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_R4, "ldind.r4", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_R8, "ldind.r8", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_O, "ldind.o", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDIND_VT, "ldind.vt", 5, 1, 1, InterpOpTwoInts) + +OPDEF(INTOP_STIND_I1, "stind.i1", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_U1, "stind.u1", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_I2, "stind.i2", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_U2, "stind.u2", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_I4, "stind.i4", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_I8, "stind.i8", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_R4, "stind.r4", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_R8, "stind.r8", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_O, "stind.o", 4, 0, 2, InterpOpInt) +OPDEF(INTOP_STIND_VT, "stind.vt", 5, 0, 2, InterpOpTwoInts) +OPDEF(INTOP_STIND_VT_NOREF, "stind.vt.noref", 5, 0, 2, InterpOpTwoInts) + +OPDEF(INTOP_LDFLDA, "ldflda", 4, 1, 1, InterpOpInt) + +// Calls +OPDEF(INTOP_CALL, "call", 4, 1, 1, InterpOpMethodHandle) +OPDEF(INTOP_CALLVIRT, "callvirt", 4, 1, 1, InterpOpMethodHandle) +OPDEF(INTOP_NEWOBJ, "newobj", 5, 1, 1, InterpOpMethodHandle) +OPDEF(INTOP_NEWOBJ_VAR, "newobj.var", 5, 1, 2, InterpOpMethodHandle) +OPDEF(INTOP_NEWOBJ_VT, "newobj.vt", 5, 1, 1, InterpOpMethodHandle) + +OPDEF(INTOP_CALL_HELPER_PP, "call.helper.pp", 4, 1, 0, InterpOpTwoInts) +OPDEF(INTOP_CALL_HELPER_PP_2, "call.helper.pp.2", 5, 1, 1, InterpOpTwoInts) + +OPDEF(INTOP_GENERICLOOKUP_METHOD, "generic.method", 4, 1, 1, InterpOpGenericLookup) +OPDEF(INTOP_GENERICLOOKUP_CLASS, "generic.class", 4, 1, 1, InterpOpGenericLookup) +OPDEF(INTOP_GENERICLOOKUP_THIS, "generic.this", 4, 1, 1, InterpOpGenericLookup) + +OPDEF(INTOP_CALL_FINALLY, "call.finally", 2, 0, 0, InterpOpBranch) + +OPDEF(INTOP_ZEROBLK_IMM, "zeroblk.imm", 3, 0, 1, InterpOpInt) +OPDEF(INTOP_LOCALLOC, "localloc", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_BREAKPOINT, "breakpoint", 1, 0, 0, InterpOpNoArgs) + +OPDEF(INTOP_THROW, "throw", 4, 0, 1, InterpOpInt) +OPDEF(INTOP_RETHROW, "rethrow", 1, 0, 0, InterpOpInt) +OPDEF(INTOP_LEAVE_FILTER, "leavefilter", 2, 0, 1, InterpOpNoArgs) +OPDEF(INTOP_LEAVE_CATCH, "leavecatch", 2, 0, 0, InterpOpBranch) +OPDEF(INTOP_LOAD_EXCEPTION, "load.exception", 2, 1, 0, InterpOpNoArgs) + +OPDEF(INTOP_FAILFAST, "failfast", 1, 0, 0, InterpOpNoArgs) +OPDEF(INTOP_GC_COLLECT, "gc.collect", 1, 0, 0, InterpOpNoArgs) + +OPDEF(INTOP_LOAD_FRAMEVAR, "load.framevar", 2, 1, 0, InterpOpNoArgs) + +// All instructions after this point are IROPS, instructions that are not emitted/executed +OPDEF(INTOP_NOP, "nop", 1, 0, 0, InterpOpNoArgs) +OPDEF(INTOP_DEF, "def", 2, 1, 0, InterpOpNoArgs) +OPDEF(INTOP_MOV_SRC_OFF, "mov.src.off", 6, 1, 1, InterpOpThreeInts) diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h new file mode 100644 index 000000000000..5ff775b2b0d8 --- /dev/null +++ b/src/coreclr/interpreter/intops.h @@ -0,0 +1,118 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _INTOPS_H +#define _INTOPS_H + +#include "openum.h" +#include + +#include "intopsshared.h" + +typedef enum +{ + InterpOpNoArgs, + InterpOpInt, + InterpOpLongInt, + InterpOpFloat, + InterpOpDouble, + InterpOpTwoInts, + InterpOpThreeInts, + InterpOpBranch, + InterpOpSwitch, + InterpOpMethodHandle, + InterpOpClassHandle, + InterpOpGenericLookup, + InterpOpLdPtr, +} InterpOpArgType; + +extern const uint8_t g_interpOpLen[]; +extern const int g_interpOpDVars[]; +extern const int g_interpOpSVars[]; +extern const InterpOpArgType g_interpOpArgType[]; +extern const int32_t* InterpNextOp(const int32_t* ip); + +// This, instead of an array of pointers, to optimize away a pointer and a relocation per string. +extern const uint32_t g_interpOpNameOffsets[]; +struct InterpOpNameCharacters; +extern const InterpOpNameCharacters g_interpOpNameCharacters; + +const char* InterpOpName(int op); + +extern OPCODE_FORMAT const g_CEEOpArgs[]; +const char* CEEOpName(OPCODE op); +OPCODE CEEDecodeOpcode(const uint8_t **ip); +int CEEOpcodeSize(const uint8_t *ip, const uint8_t *codeEnd); + +#ifdef TARGET_64BIT +#define INTOP_MOV_P INTOP_MOV_8 +#define INTOP_LDNULL INTOP_LDC_I8_0 +#define INTOP_LDIND_I INTOP_LDIND_I8 +#define INTOP_STIND_I INTOP_STIND_I8 +#define INTOP_ADD_P_IMM INTOP_ADD_I8_IMM +#define INTOP_LDELEM_I INTOP_LDELEM_I8 +#define INTOP_STELEM_I INTOP_STELEM_I8 +#else +#define INTOP_MOV_P INTOP_MOV_4 +#define INTOP_LDNULL INTOP_LDC_I4_0 +#define INTOP_LDIND_I INTOP_LDIND_I4 +#define INTOP_STIND_I INTOP_STIND_I4 +#define INTOP_ADD_P_IMM INTOP_ADD_I4_IMM +#define INTOP_LDELEM_I INTOP_LDELEM_I4 +#define INTOP_STELEM_I INTOP_STELEM_I4 +#endif + +static inline bool InterpOpIsEmitNop(int32_t opcode) +{ + return opcode >= INTOP_NOP && opcode != INTOP_MOV_SRC_OFF; +} + +static inline bool InterpOpIsUncondBranch(int32_t opcode) +{ + return opcode == INTOP_BR; +} + +static inline bool InterpOpIsCondBranch(int32_t opcode) +{ + return opcode >= INTOP_BRFALSE_I4 && opcode <= INTOP_BLT_UN_R8; +} + +// Helpers for reading data from uint8_t code stream +inline uint16_t getU2LittleEndian(const uint8_t* ptr) +{ + return *ptr | *(ptr + 1) << 8; +} + +inline uint32_t getU4LittleEndian(const uint8_t* ptr) +{ + return *ptr | *(ptr + 1) << 8 | *(ptr + 2) << 16 | *(ptr + 3) << 24; +} + +inline int16_t getI2LittleEndian(const uint8_t* ptr) +{ + return (int16_t)getU2LittleEndian(ptr); +} + +inline int32_t getI4LittleEndian(const uint8_t* ptr) +{ + return (int32_t)getU4LittleEndian(ptr); +} + +inline int64_t getI8LittleEndian(const uint8_t* ptr) +{ + return (int64_t)getU4LittleEndian(ptr) | ((int64_t)getI4LittleEndian(ptr + 4)) << 32; +} + +inline float getR4LittleEndian(const uint8_t* ptr) +{ + int32_t val = getI4LittleEndian(ptr); + return *(float*)&val; +} + +inline double getR8LittleEndian(const uint8_t* ptr) +{ + int64_t val = getI8LittleEndian(ptr); + return *(double*)&val; +} + +#endif diff --git a/src/coreclr/interpreter/intopsshared.h b/src/coreclr/interpreter/intopsshared.h new file mode 100644 index 000000000000..80be6d2f53d9 --- /dev/null +++ b/src/coreclr/interpreter/intopsshared.h @@ -0,0 +1,15 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _INTOPSSHARED_H_ +#define _INTOPSSHARED_H_ + +#define OPDEF(a,b,c,d,e,f) a, +typedef enum +{ +#include "intops.def" + INTOP_LAST +} InterpOpcode; +#undef OPDEF + +#endif diff --git a/src/coreclr/interpreter/methodset.cpp b/src/coreclr/interpreter/methodset.cpp new file mode 100644 index 000000000000..7858b044ad39 --- /dev/null +++ b/src/coreclr/interpreter/methodset.cpp @@ -0,0 +1,181 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "interpreter.h" + +//---------------------------------------------------------------------- +// initialize: Initialize the method set by parsing the string +// +// Arguments: +// listFromConfig - A string containing the list. The string must have come from the host's config, +// and this class takes ownership of the string. +// host - Pointer to host interface +// +void MethodSet::initialize(const char* listFromConfig) +{ + assert(m_listFromConfig == nullptr); + assert(m_names == nullptr); + + if (listFromConfig == nullptr) + { + return; + } + + size_t configSize = strlen(listFromConfig); + if (configSize == 0) + { + return; + } + + m_listFromConfig = static_cast(malloc(configSize + 1)); + strcpy(const_cast(m_listFromConfig), listFromConfig); + + auto commitPattern = [this](const char* start, const char* end) { + if (end <= start) + { + return; + } + + MethodName* name = static_cast(calloc(1, sizeof(MethodName))); + name->m_next = m_names; + name->m_patternStart = start; + name->m_patternEnd = end; + const char* colon = static_cast(memchr(start, ':', end - start)); + const char* startOfMethodName = colon != nullptr ? colon + 1 : start; + + const char* parens = static_cast(memchr(startOfMethodName, '(', end - startOfMethodName)); + const char* endOfMethodName = parens != nullptr ? parens : end; + name->m_methodNameContainsInstantiation = + memchr(startOfMethodName, '[', endOfMethodName - startOfMethodName) != nullptr; + + if (colon != nullptr) + { + name->m_containsClassName = true; + name->m_classNameContainsInstantiation = memchr(start, '[', colon - start) != nullptr; + } + else + { + name->m_containsClassName = false; + name->m_classNameContainsInstantiation = false; + } + + name->m_containsSignature = parens != nullptr; + m_names = name; + }; + + const char* curPatternStart = m_listFromConfig; + const char* curChar; + for (curChar = curPatternStart; *curChar != '\0'; curChar++) + { + if (*curChar == ' ') + { + commitPattern(curPatternStart, curChar); + curPatternStart = curChar + 1; + } + } + + commitPattern(curPatternStart, curChar); +} + +//---------------------------------------------------------------------- +// destroy: Destroy the method set. +// +// Arguments: +// host - Pointer to host interface +// +void MethodSet::destroy() +{ + // Free method names, free the list string, and reset our state + for (MethodName *name = m_names, *next = nullptr; name != nullptr; name = next) + { + next = name->m_next; + free(static_cast(name)); + } + if (m_listFromConfig != nullptr) + { + free((void*)m_listFromConfig); + m_listFromConfig = nullptr; + } + m_names = nullptr; +} + +// Quadratic string matching algorithm that supports * and ? wildcards +static bool matchGlob(const char* pattern, const char* patternEnd, const char* str) +{ + // Invariant: [patternStart..backtrackPattern) matches [stringStart..backtrackStr) + const char* backtrackPattern = nullptr; + const char* backtrackStr = nullptr; + + while (true) + { + if (pattern == patternEnd) + { + if (*str == '\0') + return true; + } + else if (*pattern == '*') + { + backtrackPattern = ++pattern; + backtrackStr = str; + continue; + } + else if (*str == '\0') + { + // No match since pattern needs at least one char in remaining cases. + } + else if ((*pattern == '?') || (*pattern == *str)) + { + pattern++; + str++; + continue; + } + + // In this case there was no match, see if we can backtrack to a wild + // card and consume one more character from the string. + if ((backtrackPattern == nullptr) || (*backtrackStr == '\0')) + return false; + + // Consume one more character for the wildcard. + pattern = backtrackPattern; + str = ++backtrackStr; + } +} + +bool MethodSet::contains(COMP_HANDLE comp, + CORINFO_METHOD_HANDLE methodHnd, + CORINFO_CLASS_HANDLE classHnd, + CORINFO_SIG_INFO* sigInfo) const +{ + if (isEmpty()) + { + return false; + } + + TArray printer; + MethodName* prevPattern = nullptr; + + for (MethodName* name = m_names; name != nullptr; name = name->m_next) + { + if ((prevPattern == nullptr) || (name->m_containsClassName != prevPattern->m_containsClassName) || + (name->m_classNameContainsInstantiation != prevPattern->m_classNameContainsInstantiation) || + (name->m_methodNameContainsInstantiation != prevPattern->m_methodNameContainsInstantiation) || + (name->m_containsSignature != prevPattern->m_containsSignature)) + { + printer = PrintMethodName(comp, name->m_containsClassName ? classHnd : NO_CLASS_HANDLE, methodHnd, sigInfo, + /* includeClassInstantiation */ name->m_classNameContainsInstantiation, + /* includeMethodInstantiation */ name->m_methodNameContainsInstantiation, + /* includeSignature */ name->m_containsSignature, + /* includeReturnType */ false, + /* includeThis */ false); + + prevPattern = name; + } + + if (matchGlob(name->m_patternStart, name->m_patternEnd, printer.GetUnderlyingArray())) + { + return true; + } + } + + return false; +} diff --git a/src/coreclr/interpreter/naming.cpp b/src/coreclr/interpreter/naming.cpp new file mode 100644 index 000000000000..92d4253e593d --- /dev/null +++ b/src/coreclr/interpreter/naming.cpp @@ -0,0 +1,283 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "interpreter.h" + +void AppendType(COMP_HANDLE comp, TArray* printer, CORINFO_CLASS_HANDLE clsHnd, bool includeInstantiation); +void AppendCorInfoType(TArray* printer, CorInfoType corInfoType); +void AppendTypeOrJitAlias(COMP_HANDLE comp, TArray* printer, CORINFO_CLASS_HANDLE clsHnd, bool includeInstantiation); + +void AppendString(TArray& array, const char* str) +{ + if (str != nullptr) + { + size_t strLen = strlen(str); + array.Append(str, static_cast(strLen)); + } +} + +void AppendCorInfoTypeWithModModifiers(TArray* printer, CorInfoTypeWithMod corInfoTypeWithMod) +{ + if ((corInfoTypeWithMod & CORINFO_TYPE_MOD_PINNED) == CORINFO_TYPE_MOD_PINNED) + { + printer->Append("PINNED__", 7); + } + if ((corInfoTypeWithMod & CORINFO_TYPE_MOD_COPY_WITH_HELPER) == CORINFO_TYPE_MOD_COPY_WITH_HELPER) + { + printer->Append("COPY_WITH_HELPER__", 17); + } +} + +void AppendCorInfoType(TArray* printer, CorInfoType corInfoType) +{ + static const char* preciseVarTypeMap[CORINFO_TYPE_COUNT] = { + // see the definition of enum CorInfoType in file inc/corinfo.h + "", + "void", + "bool", + "char", + "sbyte", + "byte", + "short", + "ushort", + "int", + "uint", + "long", + "ulong", + "nint", + "nuint", + "float", + "double", + "string", + "ptr", + "byref", + "struct", + "class", + "typedbyref", + "var" + }; + + const char *corInfoTypeName = "CORINFO_TYPE_INVALID"; + if (corInfoType >= 0 && corInfoType < CORINFO_TYPE_COUNT) + { + corInfoTypeName = preciseVarTypeMap[corInfoType]; + } + + printer->Append(corInfoTypeName, static_cast(strlen(corInfoTypeName))); +} + +void AppendTypeOrJitAlias(COMP_HANDLE comp, TArray* printer, CORINFO_CLASS_HANDLE clsHnd, bool includeInstantiation) +{ + CorInfoType typ = comp->asCorInfoType(clsHnd); + if ((typ == CORINFO_TYPE_CLASS) || (typ == CORINFO_TYPE_VALUECLASS)) + { + AppendType(comp, printer, clsHnd, includeInstantiation); + } + else + { + AppendCorInfoType(printer, typ); + } +} + +void AppendType(COMP_HANDLE comp, TArray* printer, CORINFO_CLASS_HANDLE clsHnd, bool includeInstantiation) +{ + unsigned arrayRank = comp->getArrayRank(clsHnd); + if (arrayRank > 0) + { + CORINFO_CLASS_HANDLE childClsHnd; + CorInfoType childType = comp->getChildType(clsHnd, &childClsHnd); + if ((childType == CORINFO_TYPE_CLASS) || (childType == CORINFO_TYPE_VALUECLASS)) + { + AppendType(comp, printer, childClsHnd, includeInstantiation); + } + else + { + AppendCorInfoType(printer, childType); + } + + printer->Add('['); + for (unsigned i = 1; i < arrayRank; i++) + { + printer->Add(','); + } + printer->Add(']'); + return; + } + + size_t bufferSizeNeeded = 0; + comp->printClassName(clsHnd, NULL, 0, &bufferSizeNeeded); + if (bufferSizeNeeded != 0) + { + int32_t oldBufferSize = printer->GetSize(); + printer->GrowBy(static_cast(bufferSizeNeeded)); + comp->printClassName(clsHnd, (printer->GetUnderlyingArray() + oldBufferSize), bufferSizeNeeded, &bufferSizeNeeded); + printer->RemoveAt(printer->GetSize() - 1); + } + + if (!includeInstantiation) + { + return; + } + + char pref = '['; + for (unsigned typeArgIndex = 0;; typeArgIndex++) + { + CORINFO_CLASS_HANDLE typeArg = comp->getTypeInstantiationArgument(clsHnd, typeArgIndex); + + if (typeArg == NO_CLASS_HANDLE) + { + break; + } + + printer->Add(pref); + pref = ','; + AppendTypeOrJitAlias(comp, printer, typeArg, true); + } + + if (pref != '[') + { + printer->Add(']'); + } +} + +void AppendMethodName(COMP_HANDLE comp, + TArray* printer, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE methHnd, + CORINFO_SIG_INFO* sig, + bool includeClassInstantiation, + bool includeMethodInstantiation, + bool includeSignature, + bool includeReturnType, + bool includeThisSpecifier) +{ + TArray result; + + if (clsHnd != NO_CLASS_HANDLE) + { + AppendType(comp, printer, clsHnd, includeClassInstantiation); + printer->Add(':'); + } + + size_t bufferSizeNeeded = 0; + comp->printMethodName(methHnd, NULL, 0, &bufferSizeNeeded); + if (bufferSizeNeeded != 0) + { + int32_t oldBufferSize = printer->GetSize(); + printer->GrowBy(static_cast(bufferSizeNeeded)); + comp->printMethodName(methHnd, (printer->GetUnderlyingArray() + oldBufferSize), bufferSizeNeeded, &bufferSizeNeeded); + printer->RemoveAt(printer->GetSize() - 1); // Remove null terminator + } + + if (includeMethodInstantiation && (sig->sigInst.methInstCount > 0)) + { + printer->Add('['); + for (unsigned i = 0; i < sig->sigInst.methInstCount; i++) + { + if (i > 0) + { + printer->Add(','); + } + + AppendTypeOrJitAlias(comp, printer, sig->sigInst.methInst[i], true); + } + printer->Add(']'); + } + + if (includeSignature) + { + printer->Add('('); + + CORINFO_ARG_LIST_HANDLE argLst = sig->args; + for (unsigned i = 0; i < sig->numArgs; i++) + { + if (i > 0) + printer->Add(','); + + CORINFO_CLASS_HANDLE vcClsHnd; + CorInfoTypeWithMod withMod = comp->getArgType(sig, argLst, &vcClsHnd); + AppendCorInfoTypeWithModModifiers(printer, withMod); + CorInfoType type = strip(withMod); + switch (type) + { + case CORINFO_TYPE_STRING: + case CORINFO_TYPE_CLASS: + case CORINFO_TYPE_VAR: + case CORINFO_TYPE_VALUECLASS: + case CORINFO_TYPE_REFANY: + { + CORINFO_CLASS_HANDLE clsHnd = comp->getArgClass(sig, argLst); + // For some SIMD struct types we can get a nullptr back from eeGetArgClass on Linux/X64 + if (clsHnd != NO_CLASS_HANDLE) + { + AppendType(comp, printer, clsHnd, true); + break; + } + } + + FALLTHROUGH; + default: + AppendCorInfoType(printer, type); + break; + } + + argLst = comp->getArgNext(argLst); + } + + printer->Add(')'); + + if (includeReturnType) + { + CorInfoType retType = sig->retType; + if (retType != CORINFO_TYPE_VOID) + { + printer->Add(':'); + switch (retType) + { + case CORINFO_TYPE_STRING: + case CORINFO_TYPE_CLASS: + case CORINFO_TYPE_VAR: + case CORINFO_TYPE_VALUECLASS: + case CORINFO_TYPE_REFANY: + { + CORINFO_CLASS_HANDLE clsHnd = sig->retTypeClass; + if (clsHnd != NO_CLASS_HANDLE) + { + AppendType(comp, printer, clsHnd, true); + break; + } + } + FALLTHROUGH; + default: + AppendCorInfoType(printer, retType); + break; + } + } + } + + // Does it have a 'this' pointer? Don't count explicit this, which has + // the this pointer type as the first element of the arg type list + if (includeThisSpecifier && sig->hasThis() && !sig->hasExplicitThis()) + { + printer->Append(":this", 5); + } + } +} + +TArray PrintMethodName(COMP_HANDLE comp, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE methHnd, + CORINFO_SIG_INFO* sig, + bool includeClassInstantiation, + bool includeMethodInstantiation, + bool includeSignature, + bool includeReturnType, + bool includeThisSpecifier) +{ + TArray printer; + AppendMethodName(comp, &printer, clsHnd, methHnd, sig, + includeClassInstantiation, includeMethodInstantiation, + includeSignature, includeReturnType, includeThisSpecifier); + printer.Add('\0'); // Ensure null-termination + return printer; +} diff --git a/src/coreclr/interpreter/stackmap.cpp b/src/coreclr/interpreter/stackmap.cpp new file mode 100644 index 000000000000..36242c5526c1 --- /dev/null +++ b/src/coreclr/interpreter/stackmap.cpp @@ -0,0 +1,86 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "gcinfoencoder.h" // for GcSlotFlags + +// HACK: debugreturn.h (included by gcinfoencoder.h) breaks constexpr +#if defined(debug_instrumented_return) || defined(_DEBUGRETURN_H_) +#undef return +#endif // debug_instrumented_return + +#include "interpreter.h" +#include "stackmap.h" + +extern "C" { + #include "../../native/containers/dn-simdhash.h" + #include "../../native/containers/dn-simdhash-specializations.h" + + void assertAbort(const char* why, const char* file, unsigned line); + + void + dn_simdhash_assert_fail (const char* file, int line, const char* condition); + + void + dn_simdhash_assert_fail (const char* file, int line, const char* condition) { + assertAbort(condition, file, line); + } +} + +thread_local dn_simdhash_ptr_ptr_t *t_sharedStackMapLookup = nullptr; + +InterpreterStackMap* GetInterpreterStackMap(ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle) +{ + InterpreterStackMap* result = nullptr; + if (!t_sharedStackMapLookup) + t_sharedStackMapLookup = dn_simdhash_ptr_ptr_new(0, nullptr); + if (!dn_simdhash_ptr_ptr_try_get_value(t_sharedStackMapLookup, classHandle, (void **)&result)) + { + result = new InterpreterStackMap(jitInfo, classHandle); + dn_simdhash_ptr_ptr_try_add(t_sharedStackMapLookup, classHandle, result); + } + return result; +} + +void InterpreterStackMap::PopulateStackMap(ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle) +{ + unsigned size = jitInfo->getClassSize(classHandle); + // getClassGClayout assumes it's given a buffer of exactly this size + unsigned maxGcPtrs = (size + sizeof(void *) - 1) / sizeof(void *); + if (maxGcPtrs < 1) + return; + + uint8_t *gcPtrs = (uint8_t *)alloca(maxGcPtrs); + unsigned numGcPtrs = jitInfo->getClassGClayout(classHandle, gcPtrs), + newCapacity = m_slotCount + numGcPtrs; + + // Allocate enough space in case all the offsets in the buffer are GC pointers + m_slots = (InterpreterStackMapSlot *)malloc(sizeof(InterpreterStackMapSlot) * newCapacity); + + for (unsigned i = 0; i < numGcPtrs; i++) { + GcSlotFlags flags; + + switch (gcPtrs[i]) { + case TYPE_GC_NONE: + case TYPE_GC_OTHER: + continue; + case TYPE_GC_BYREF: + flags = GC_SLOT_INTERIOR; + break; + case TYPE_GC_REF: + flags = GC_SLOT_BASE; + break; + default: + assert(false); + continue; + } + + unsigned slotOffset = (sizeof(void *) * i); + m_slots[m_slotCount++] = { slotOffset, (unsigned)flags }; + } + + // Shrink our allocation based on the number of slots we actually recorded + unsigned finalSize = sizeof(InterpreterStackMapSlot) * m_slotCount; + if (finalSize == 0) + finalSize = sizeof(InterpreterStackMapSlot); + m_slots = (InterpreterStackMapSlot *)realloc(m_slots, finalSize); +} diff --git a/src/coreclr/interpreter/stackmap.h b/src/coreclr/interpreter/stackmap.h new file mode 100644 index 000000000000..fb8b6ab43988 --- /dev/null +++ b/src/coreclr/interpreter/stackmap.h @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +struct InterpreterStackMapSlot +{ + unsigned m_offsetBytes; + unsigned m_gcSlotFlags; +}; + +class InterpreterStackMap +{ + void PopulateStackMap (ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle); + +public: + unsigned m_slotCount; + InterpreterStackMapSlot* m_slots; + + InterpreterStackMap (ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle) + : m_slotCount(0) + , m_slots(nullptr) + { + PopulateStackMap(jitInfo, classHandle); + } +}; + +InterpreterStackMap* GetInterpreterStackMap(ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle); diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 28029791f2ad..a1606092e534 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -68,7 +68,7 @@ function(create_standalone_jit) endif() set_target_definitions_to_custom_os_and_arch(${ARGN}) - set_target_properties(${TARGETDETAILS_TARGET} PROPERTIES IGNORE_FEATURE_MERGE_JIT_AND_ENGINE TRUE) + set_target_properties(${TARGETDETAILS_TARGET} PROPERTIES IGNORE_FEATURE_STATICALLY_LINKED TRUE) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_NO_HOST) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE SELF_NO_HOST) @@ -115,6 +115,7 @@ set( JIT_SOURCES abi.cpp alloc.cpp assertionprop.cpp + async.cpp bitset.cpp block.cpp buildstring.cpp @@ -185,10 +186,10 @@ set( JIT_SOURCES promotiondecomposition.cpp promotionliveness.cpp rangecheck.cpp + rangecheckcloning.cpp rationalize.cpp redundantbranchopts.cpp regalloc.cpp - registerargconvention.cpp regMaskTPOps.cpp regset.cpp scev.cpp @@ -331,6 +332,7 @@ set( JIT_HEADERS abi.h alloc.h arraystack.h + async.h bitset.h layout.h bitsetasshortlong.h @@ -403,9 +405,9 @@ set( JIT_HEADERS priorityqueue.h promotion.h rangecheck.h + rangecheckcloning.h rationalize.h regalloc.h - registerargconvention.h register.h regset.h scev.h @@ -544,6 +546,9 @@ elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) elseif(CLR_CMAKE_TARGET_ARCH_RISCV64) set(JIT_ARCH_SOURCES ${JIT_RISCV64_SOURCES}) set(JIT_ARCH_HEADERS ${JIT_RISCV64_HEADERS}) +elseif(CLR_CMAKE_TARGET_ARCH_WASM) + set(JIT_ARCH_SOURCES ${JIT_WASM32_SOURCES}) + set(JIT_ARCH_HEADERS ${JIT_WASM32_HEADERS}) else() clr_unknown_arch() endif() @@ -551,7 +556,7 @@ endif() set(JIT_DLL_MAIN_FILE ${CMAKE_CURRENT_LIST_DIR}/dllmain.cpp) -if(CLR_CMAKE_TARGET_WIN32) +if(CLR_CMAKE_HOST_WIN32) set(CLRJIT_EXPORTS ${CMAKE_CURRENT_LIST_DIR}/ClrJit.exports) set(JIT_EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/ClrJit.exports.def) preprocess_file (${CLRJIT_EXPORTS} ${JIT_EXPORTS_FILE}) @@ -576,6 +581,7 @@ add_custom_target(jit_exports DEPENDS ${JIT_EXPORTS_FILE}) set(JIT_LINK_LIBRARIES utilcodestaticnohost + minipal ) set(JIT_ARCH_LINK_LIBRARIES @@ -607,6 +613,10 @@ else() ) endif(CLR_CMAKE_HOST_UNIX) +if (CLR_CMAKE_HOST_ANDROID) + list(APPEND JIT_LINK_LIBRARIES log) +endif() + # Shared function for generating JIT function(add_jit jitName) diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index 94e244c0749b..cf50808bb3ef 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -69,7 +69,6 @@ DEF_CLR_API(getNewArrHelper) DEF_CLR_API(getCastingHelper) DEF_CLR_API(getSharedCCtorHelper) DEF_CLR_API(getTypeForBox) -DEF_CLR_API(getTypeForBoxOnStack) DEF_CLR_API(getBoxHelper) DEF_CLR_API(getUnBoxHelper) DEF_CLR_API(getRuntimeTypePointer) @@ -124,6 +123,7 @@ DEF_CLR_API(getHFAType) DEF_CLR_API(runWithErrorTrap) DEF_CLR_API(runWithSPMIErrorTrap) DEF_CLR_API(getEEInfo) +DEF_CLR_API(getAsyncInfo) DEF_CLR_API(getMethodDefFromMethod) DEF_CLR_API(printMethodName) DEF_CLR_API(getMethodNameFromMetadata) @@ -161,6 +161,7 @@ DEF_CLR_API(getFieldThreadLocalStoreID) DEF_CLR_API(GetDelegateCtor) DEF_CLR_API(MethodCompileComplete) DEF_CLR_API(getTailCallHelpers) +DEF_CLR_API(getAsyncResumptionStub) DEF_CLR_API(convertPInvokeCalliToCall) DEF_CLR_API(notifyInstructionSetUsage) DEF_CLR_API(updateEntryPointForTailCall) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index 9c7e6c109982..bcc31380d5ce 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -647,15 +647,6 @@ CORINFO_CLASS_HANDLE WrapICorJitInfo::getTypeForBox( return temp; } -CORINFO_CLASS_HANDLE WrapICorJitInfo::getTypeForBoxOnStack( - CORINFO_CLASS_HANDLE cls) -{ - API_ENTER(getTypeForBoxOnStack); - CORINFO_CLASS_HANDLE temp = wrapHnd->getTypeForBoxOnStack(cls); - API_LEAVE(getTypeForBoxOnStack); - return temp; -} - CorInfoHelpFunc WrapICorJitInfo::getBoxHelper( CORINFO_CLASS_HANDLE cls) { @@ -1181,6 +1172,14 @@ void WrapICorJitInfo::getEEInfo( API_LEAVE(getEEInfo); } +void WrapICorJitInfo::getAsyncInfo( + CORINFO_ASYNC_INFO* pAsyncInfoOut) +{ + API_ENTER(getAsyncInfo); + wrapHnd->getAsyncInfo(pAsyncInfoOut); + API_LEAVE(getAsyncInfo); +} + mdMethodDef WrapICorJitInfo::getMethodDefFromMethod( CORINFO_METHOD_HANDLE hMethod) { @@ -1555,6 +1554,14 @@ bool WrapICorJitInfo::getTailCallHelpers( return temp; } +CORINFO_METHOD_HANDLE WrapICorJitInfo::getAsyncResumptionStub() +{ + API_ENTER(getAsyncResumptionStub); + CORINFO_METHOD_HANDLE temp = wrapHnd->getAsyncResumptionStub(); + API_LEAVE(getAsyncResumptionStub); + return temp; +} + bool WrapICorJitInfo::convertPInvokeCalliToCall( CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index ba0008c4aea1..5ec2f6c04abc 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -38,7 +38,7 @@ bool ABIPassingSegment::IsPassedOnStack() const regNumber ABIPassingSegment::GetRegister() const { assert(IsPassedInRegister()); - return m_register; + return static_cast(m_register); } //----------------------------------------------------------------------------- @@ -50,17 +50,17 @@ regNumber ABIPassingSegment::GetRegister() const // regMaskTP ABIPassingSegment::GetRegisterMask() const { - assert(IsPassedInRegister()); - regMaskTP reg = genRegMask(m_register); + regNumber reg = GetRegister(); + regMaskTP mask = genRegMask(reg); #ifdef TARGET_ARM - if (genIsValidFloatReg(m_register) && (Size == 8)) + if (genIsValidFloatReg(reg) && (Size == 8)) { - reg |= genRegMask(REG_NEXT(m_register)); + mask |= genRegMask(REG_NEXT(reg)); } #endif - return reg; + return mask; } //----------------------------------------------------------------------------- @@ -87,6 +87,21 @@ unsigned ABIPassingSegment::GetStackOffset() const return m_stackOffset; } +//----------------------------------------------------------------------------- +// GetStackSize: +// Get the amount of stack size consumed by this segment. +// +// Return Value: +// Normally the size rounded up to the pointer size. For Apple's arm64 ABI, +// however, some arguments do not get their own stack slots, in which case +// the return value is the same as "Size". +// +unsigned ABIPassingSegment::GetStackSize() const +{ + assert(IsPassedOnStack()); + return m_isFullStackSlot ? roundUp(Size, TARGET_POINTER_SIZE) : Size; +} + //----------------------------------------------------------------------------- // GetRegisterType: // Return the smallest type larger or equal to Size that most naturally @@ -97,8 +112,7 @@ unsigned ABIPassingSegment::GetStackOffset() const // var_types ABIPassingSegment::GetRegisterType() const { - assert(IsPassedInRegister()); - if (genIsValidFloatReg(m_register)) + if (genIsValidFloatReg(GetRegister())) { switch (Size) { @@ -140,6 +154,32 @@ var_types ABIPassingSegment::GetRegisterType() const } } +//----------------------------------------------------------------------------- +// GetRegisterType: +// Return the smallest type larger or equal to Size that most naturally +// represents the register this segment is passed in, taking into account the +// GC info of the specified layout. +// +// Parameters: +// layout - The layout of the class that this segment is part of +// +// Return Value: +// A type that matches ABIPassingSegment::Size and the register. +// +var_types ABIPassingSegment::GetRegisterType(ClassLayout* layout) const +{ + if (genIsValidIntReg(GetRegister())) + { + assert(Offset < layout->GetSize()); + if (((Offset % TARGET_POINTER_SIZE) == 0) && (Size == TARGET_POINTER_SIZE)) + { + return layout->GetGCPtrType(Offset / TARGET_POINTER_SIZE); + } + } + + return GetRegisterType(); +} + //----------------------------------------------------------------------------- // InRegister: // Create an ABIPassingSegment representing that a segment is passed in a @@ -159,7 +199,7 @@ ABIPassingSegment ABIPassingSegment::InRegister(regNumber reg, unsigned offset, assert(reg != REG_NA); #endif //!TARGET_WASM - TODO-LLVM: Delete when old classifer has gone. ABIPassingSegment segment; - segment.m_register = reg; + segment.m_register = static_cast(reg); segment.m_stackOffset = 0; segment.Offset = offset; segment.Size = size; @@ -189,6 +229,35 @@ ABIPassingSegment ABIPassingSegment::OnStack(unsigned stackOffset, unsigned offs return segment; } +//----------------------------------------------------------------------------- +// OnStackWithoutConsumingFullSlot: +// Create an ABIPassingSegment representing that a segment is passed on the +// stack, and which does not gets its own full stack slot. +// +// Parameters: +// stackOffset - Offset relative to the first stack parameter/argument +// offset - The offset of the segment that is passed in the register +// size - The size of the segment passed in the register +// +// Return Value: +// New instance of ABIPassingSegment. +// +// Remarks: +// This affects what ABIPassingSegment::GetStackSize() returns. +// +ABIPassingSegment ABIPassingSegment::OnStackWithoutConsumingFullSlot(unsigned stackOffset, + unsigned offset, + unsigned size) +{ + ABIPassingSegment segment; + segment.m_register = REG_NA; + segment.m_stackOffset = stackOffset; + segment.m_isFullStackSlot = false; + segment.Offset = offset; + segment.Size = size; + return segment; +} + //----------------------------------------------------------------------------- // ABIPassingInformation: // Construct an instance with the specified number of segments allocated in @@ -272,6 +341,19 @@ IteratorPair ABIPassingInformation::Segments() const ABIPassingSegmentIterator(begin + NumSegments)); } +//----------------------------------------------------------------------------- +// IsPassedByReference: +// Check if the argument is passed by (implicit) reference. If true, a single +// pointer-sized segment is expected. +// +// Return Value: +// True if so. +// +bool ABIPassingInformation::IsPassedByReference() const +{ + return m_passedByRef; +} + //----------------------------------------------------------------------------- // HasAnyRegisterSegment: // Check if any part of this value is passed in a register. @@ -406,25 +488,77 @@ unsigned ABIPassingInformation::CountRegsAndStackSlots() const return numSlots; } +//----------------------------------------------------------------------------- +// StackBytesConsumes: +// Count the amount of stack bytes consumed by this argument. +// +// Return Value: +// Bytes. +// +unsigned ABIPassingInformation::StackBytesConsumed() const +{ + unsigned numBytes = 0; + + for (const ABIPassingSegment& seg : Segments()) + { + if (seg.IsPassedOnStack()) + { + numBytes += seg.GetStackSize(); + } + } + + return numBytes; +} + //----------------------------------------------------------------------------- // FromSegment: // Create ABIPassingInformation from a single segment. // // Parameters: -// comp - Compiler instance -// segment - The single segment that represents the passing information +// comp - Compiler instance +// passedByRef - If true, the argument is passed by reference and the segment is for its pointer. +// segment - The single segment that represents the passing information // // Return Value: // An instance of ABIPassingInformation. // -ABIPassingInformation ABIPassingInformation::FromSegment(Compiler* comp, const ABIPassingSegment& segment) +ABIPassingInformation ABIPassingInformation::FromSegment(Compiler* comp, + bool passedByRef, + const ABIPassingSegment& segment) { ABIPassingInformation info; + info.m_passedByRef = passedByRef; info.NumSegments = 1; info.m_singleSegment = segment; + +#ifdef DEBUG + if (passedByRef) + { + assert(segment.Size == TARGET_POINTER_SIZE); + assert(!segment.IsPassedInRegister() || (segment.GetRegisterType() == TYP_I_IMPL)); + } +#endif + return info; } +//----------------------------------------------------------------------------- +// FromSegmentByValue: +// Create ABIPassingInformation from a single segment passing an argument by +// value. +// +// Parameters: +// comp - Compiler instance +// segment - The single segment that represents the passing information +// +// Return Value: +// An instance of ABIPassingInformation. +// +ABIPassingInformation ABIPassingInformation::FromSegmentByValue(Compiler* comp, const ABIPassingSegment& segment) +{ + return FromSegment(comp, /* passedByRef */ false, segment); +} + //----------------------------------------------------------------------------- // FromSegments: // Create ABIPassingInformation from two segments. @@ -468,7 +602,7 @@ void ABIPassingInformation::Dump() const const ABIPassingSegment& seg = Segment(i); seg.Dump(); - printf("\n"); + printf("%s\n", IsPassedByReference() ? " (implicit by-ref)" : ""); } } @@ -547,13 +681,14 @@ ABIPassingInformation SwiftABIClassifier::Classify(Compiler* comp, if (wellKnownParam == WellKnownArg::RetBuffer) { regNumber reg = theFixedRetBuffReg(CorInfoCallConvExtension::Swift); - return ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(reg, 0, TARGET_POINTER_SIZE)); + return ABIPassingInformation::FromSegmentByValue(comp, + ABIPassingSegment::InRegister(reg, 0, TARGET_POINTER_SIZE)); } if (wellKnownParam == WellKnownArg::SwiftSelf) { - return ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_SWIFT_SELF, 0, - TARGET_POINTER_SIZE)); + return ABIPassingInformation::FromSegmentByValue(comp, ABIPassingSegment::InRegister(REG_SWIFT_SELF, 0, + TARGET_POINTER_SIZE)); } if (wellKnownParam == WellKnownArg::SwiftError) @@ -563,8 +698,8 @@ ABIPassingInformation SwiftABIClassifier::Classify(Compiler* comp, // as that will mess with other args. // Quirk: To work around the JIT for now, "pass" it in REG_SWIFT_ERROR, // and let CodeGen::genFnProlog handle the rest. - return ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_SWIFT_ERROR, 0, - TARGET_POINTER_SIZE)); + return ABIPassingInformation::FromSegmentByValue(comp, ABIPassingSegment::InRegister(REG_SWIFT_ERROR, 0, + TARGET_POINTER_SIZE)); } if (type == TYP_STRUCT) @@ -572,7 +707,9 @@ ABIPassingInformation SwiftABIClassifier::Classify(Compiler* comp, const CORINFO_SWIFT_LOWERING* lowering = comp->GetSwiftLowering(structLayout->GetClassHandle()); if (lowering->byReference) { - return m_classifier.Classify(comp, TYP_I_IMPL, nullptr, WellKnownArg::None); + ABIPassingInformation abiInfo = m_classifier.Classify(comp, TYP_I_IMPL, nullptr, WellKnownArg::None); + assert(abiInfo.NumSegments == 1); + return ABIPassingInformation::FromSegment(comp, /* passedByRef */ true, abiInfo.Segment(0)); } ArrayStack segments(comp->getAllocator(CMK_ABI)); diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 8ea7ab699ce5..500d67638d7e 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -8,8 +8,9 @@ enum class WellKnownArg : unsigned; class ABIPassingSegment { - regNumber m_register = REG_NA; - unsigned m_stackOffset = 0; + regNumberSmall m_register = REG_NA; + bool m_isFullStackSlot = true; + unsigned m_stackOffset = 0; public: bool IsPassedInRegister() const; @@ -34,10 +35,17 @@ class ABIPassingSegment // offset, relative to the base of stack arguments. unsigned GetStackOffset() const; + // Get the size of stack consumed. Normally this is 'Size' rounded up to + // the pointer size, but for apple arm64 ABI some primitives do not consume + // full stack slots. + unsigned GetStackSize() const; + var_types GetRegisterType() const; + var_types GetRegisterType(ClassLayout* layout) const; static ABIPassingSegment InRegister(regNumber reg, unsigned offset, unsigned size); static ABIPassingSegment OnStack(unsigned stackOffset, unsigned offset, unsigned size); + static ABIPassingSegment OnStackWithoutConsumingFullSlot(unsigned stackOffset, unsigned offset, unsigned size); #ifdef DEBUG void Dump() const; @@ -88,6 +96,8 @@ struct ABIPassingInformation ABIPassingSegment m_singleSegment; }; + bool m_passedByRef = false; + public: // The number of segments used to pass the value. Examples: // - On SysV x64, structs can be passed in two registers, resulting in two @@ -101,10 +111,9 @@ struct ABIPassingInformation // - On loongarch64/riscv64, structs can be passed in two registers or // can be split out over register and stack, giving // multiple register segments and a struct segment. - unsigned NumSegments; + unsigned NumSegments = 0; ABIPassingInformation() - : NumSegments(0) { } @@ -114,6 +123,7 @@ struct ABIPassingInformation ABIPassingSegment& Segment(unsigned index); IteratorPair Segments() const; + bool IsPassedByReference() const; bool HasAnyRegisterSegment() const; bool HasAnyFloatingRegisterSegment() const; bool HasAnyStackSegment() const; @@ -121,8 +131,10 @@ struct ABIPassingInformation bool HasExactlyOneStackSegment() const; bool IsSplitAcrossRegistersAndStack() const; unsigned CountRegsAndStackSlots() const; + unsigned StackBytesConsumed() const; - static ABIPassingInformation FromSegment(Compiler* comp, const ABIPassingSegment& segment); + static ABIPassingInformation FromSegment(Compiler* comp, bool passedByRef, const ABIPassingSegment& segment); + static ABIPassingInformation FromSegmentByValue(Compiler* comp, const ABIPassingSegment& segment); static ABIPassingInformation FromSegments(Compiler* comp, const ABIPassingSegment& firstSegment, const ABIPassingSegment& secondSegment); diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index ae8c38daadb9..7156486f2aba 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -11,6 +11,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" +#include "rangecheck.h" #ifdef _MSC_VER #pragma hdrstop #endif @@ -238,30 +239,18 @@ bool IntegralRange::Contains(int64_t value) const case NI_Vector256_op_Inequality: case NI_Vector512_op_Equality: case NI_Vector512_op_Inequality: - case NI_SSE_CompareScalarOrderedEqual: - case NI_SSE_CompareScalarOrderedNotEqual: - case NI_SSE_CompareScalarOrderedLessThan: - case NI_SSE_CompareScalarOrderedLessThanOrEqual: - case NI_SSE_CompareScalarOrderedGreaterThan: - case NI_SSE_CompareScalarOrderedGreaterThanOrEqual: - case NI_SSE_CompareScalarUnorderedEqual: - case NI_SSE_CompareScalarUnorderedNotEqual: - case NI_SSE_CompareScalarUnorderedLessThanOrEqual: - case NI_SSE_CompareScalarUnorderedLessThan: - case NI_SSE_CompareScalarUnorderedGreaterThanOrEqual: - case NI_SSE_CompareScalarUnorderedGreaterThan: - case NI_SSE2_CompareScalarOrderedEqual: - case NI_SSE2_CompareScalarOrderedNotEqual: - case NI_SSE2_CompareScalarOrderedLessThan: - case NI_SSE2_CompareScalarOrderedLessThanOrEqual: - case NI_SSE2_CompareScalarOrderedGreaterThan: - case NI_SSE2_CompareScalarOrderedGreaterThanOrEqual: - case NI_SSE2_CompareScalarUnorderedEqual: - case NI_SSE2_CompareScalarUnorderedNotEqual: - case NI_SSE2_CompareScalarUnorderedLessThanOrEqual: - case NI_SSE2_CompareScalarUnorderedLessThan: - case NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual: - case NI_SSE2_CompareScalarUnorderedGreaterThan: + case NI_X86Base_CompareScalarOrderedEqual: + case NI_X86Base_CompareScalarOrderedNotEqual: + case NI_X86Base_CompareScalarOrderedLessThan: + case NI_X86Base_CompareScalarOrderedLessThanOrEqual: + case NI_X86Base_CompareScalarOrderedGreaterThan: + case NI_X86Base_CompareScalarOrderedGreaterThanOrEqual: + case NI_X86Base_CompareScalarUnorderedEqual: + case NI_X86Base_CompareScalarUnorderedNotEqual: + case NI_X86Base_CompareScalarUnorderedLessThanOrEqual: + case NI_X86Base_CompareScalarUnorderedLessThan: + case NI_X86Base_CompareScalarUnorderedGreaterThanOrEqual: + case NI_X86Base_CompareScalarUnorderedGreaterThan: case NI_SSE41_TestC: case NI_SSE41_TestZ: case NI_SSE41_TestNotZAndNotC: @@ -270,7 +259,7 @@ bool IntegralRange::Contains(int64_t value) const case NI_AVX_TestNotZAndNotC: return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::One}; - case NI_SSE2_Extract: + case NI_X86Base_Extract: case NI_SSE41_Extract: case NI_SSE41_X64_Extract: case NI_Vector128_ToScalar: @@ -674,6 +663,8 @@ void Compiler::optAssertionInit(bool isLocalProp) { optMaxAssertionCount = (AssertionIndex)min(maxTrackedLocals, ((3 * lvaTrackedCount / 128) + 1) * 64); } + + JITDUMP("Cross-block table size %u (for %u tracked locals)\n", optMaxAssertionCount, lvaTrackedCount); } else { @@ -742,6 +733,10 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse { printf("ArrBnds "); } + else if (curAssertion->op1.kind == O1K_VN) + { + printf("Vn "); + } else if (curAssertion->op1.kind == O1K_SUBTYPE) { printf("Subtype "); @@ -750,8 +745,8 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse { printf("Copy "); } - else if ((curAssertion->op2.kind == O2K_CONST_INT) || (curAssertion->op2.kind == O2K_CONST_LONG) || - (curAssertion->op2.kind == O2K_CONST_DOUBLE) || (curAssertion->op2.kind == O2K_ZEROOBJ)) + else if ((curAssertion->op2.kind == O2K_CONST_INT) || (curAssertion->op2.kind == O2K_CONST_DOUBLE) || + (curAssertion->op2.kind == O2K_ZEROOBJ)) { printf("Constant "); } @@ -770,15 +765,28 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse printf("(" FMT_VN "," FMT_VN ") ", curAssertion->op1.vn, curAssertion->op2.vn); } - if ((curAssertion->op1.kind == O1K_LCLVAR) || (curAssertion->op1.kind == O1K_EXACT_TYPE) || - (curAssertion->op1.kind == O1K_SUBTYPE)) + if (curAssertion->op1.kind == O1K_LCLVAR) { - printf("V%02u", curAssertion->op1.lcl.lclNum); - if (curAssertion->op1.lcl.ssaNum != SsaConfig::RESERVED_SSA_NUM) + if (!optLocalAssertionProp) + { + printf("LCLVAR"); + vnStore->vnDump(this, curAssertion->op1.vn); + } + else { - printf(".%02u", curAssertion->op1.lcl.ssaNum); + printf("V%02u", curAssertion->op1.lclNum); } } + else if (curAssertion->op1.kind == O1K_EXACT_TYPE) + { + printf("Exact_Type"); + vnStore->vnDump(this, curAssertion->op1.vn); + } + else if (curAssertion->op1.kind == O1K_SUBTYPE) + { + printf("Sub_Type"); + vnStore->vnDump(this, curAssertion->op1.vn); + } else if (curAssertion->op1.kind == O1K_ARR_BND) { printf("[idx: " FMT_VN, curAssertion->op1.bnd.vnIdx); @@ -787,6 +795,12 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse vnStore->vnDump(this, curAssertion->op1.bnd.vnLen); printf("]"); } + else if (curAssertion->op1.kind == O1K_VN) + { + printf("[vn: " FMT_VN, curAssertion->op1.vn); + vnStore->vnDump(this, curAssertion->op1.vn); + printf("]"); + } else if (curAssertion->op1.kind == O1K_BOUND_OPER_BND) { printf("Oper_Bnd"); @@ -852,19 +866,14 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse switch (curAssertion->op2.kind) { case O2K_LCLVAR_COPY: - printf("V%02u", curAssertion->op2.lcl.lclNum); - if (curAssertion->op1.lcl.ssaNum != SsaConfig::RESERVED_SSA_NUM) - { - printf(".%02u", curAssertion->op1.lcl.ssaNum); - } + printf("V%02u", curAssertion->op2.lclNum); break; case O2K_CONST_INT: - case O2K_IND_CNS_INT: if (curAssertion->op1.kind == O1K_EXACT_TYPE) { ssize_t iconVal = curAssertion->op2.u1.iconVal; - if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun()) + if (IsAot()) { printf("Exact Type MT(0x%p)", dspPtr(iconVal)); } @@ -884,7 +893,7 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse else if (curAssertion->op1.kind == O1K_SUBTYPE) { ssize_t iconVal = curAssertion->op2.u1.iconVal; - if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun()) + if (IsAot()) { printf("MT(0x%p)", dspPtr(iconVal)); } @@ -904,7 +913,8 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse } else { - var_types op1Type = lvaGetDesc(curAssertion->op1.lcl.lclNum)->lvType; + var_types op1Type = !optLocalAssertionProp ? vnStore->TypeOfVN(curAssertion->op1.vn) + : lvaGetRealType(curAssertion->op1.lclNum); if (op1Type == TYP_REF) { if (curAssertion->op2.u1.iconVal == 0) @@ -930,10 +940,6 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse } break; - case O2K_CONST_LONG: - printf("0x%016llx", curAssertion->op2.lconVal); - break; - case O2K_CONST_DOUBLE: if (FloatingPointUtils::isNegativeZero(curAssertion->op2.dconVal)) { @@ -1103,47 +1109,35 @@ ssize_t Compiler::optCastConstantSmall(ssize_t iconVal, var_types smallType) // Assertion creation may fail either because the provided assertion // operands aren't supported or because the assertion table is full. // -AssertionIndex Compiler::optCreateAssertion(GenTree* op1, - GenTree* op2, - optAssertionKind assertionKind, - bool helperCallArgs) +AssertionIndex Compiler::optCreateAssertion(GenTree* op1, GenTree* op2, optAssertionKind assertionKind) { assert(op1 != nullptr); - assert(!helperCallArgs || (op2 != nullptr)); AssertionDsc assertion = {OAK_INVALID}; assert(assertion.assertionKind == OAK_INVALID); - if (op1->OperIs(GT_BOUNDS_CHECK)) + if (op1->OperIs(GT_BOUNDS_CHECK) && (assertionKind == OAK_NO_THROW)) { - if (assertionKind == OAK_NO_THROW) - { - GenTreeBoundsChk* arrBndsChk = op1->AsBoundsChk(); - assertion.assertionKind = assertionKind; - assertion.op1.kind = O1K_ARR_BND; - assertion.op1.bnd.vnIdx = optConservativeNormalVN(arrBndsChk->GetIndex()); - assertion.op1.bnd.vnLen = optConservativeNormalVN(arrBndsChk->GetArrayLength()); - goto DONE_ASSERTION; - } + GenTreeBoundsChk* arrBndsChk = op1->AsBoundsChk(); + assertion.assertionKind = assertionKind; + assertion.op1.kind = O1K_ARR_BND; + assertion.op1.bnd.vnIdx = optConservativeNormalVN(arrBndsChk->GetIndex()); + assertion.op1.bnd.vnLen = optConservativeNormalVN(arrBndsChk->GetArrayLength()); } - // // Are we trying to make a non-null assertion? + // (note we now do this for all indirs, regardless of address type) // - if (op2 == nullptr) + else if (op2 == nullptr) { - // // Must be an OAK_NOT_EQUAL assertion - // - noway_assert(assertionKind == OAK_NOT_EQUAL); + assert(assertionKind == OAK_NOT_EQUAL); - // // Set op1 to the instance pointer of the indirection - // op1 = op1->gtEffectiveVal(); ssize_t offset = 0; - while ((op1->gtOper == GT_ADD) && (op1->gtType == TYP_BYREF)) + while (op1->OperIs(GT_ADD) && op1->TypeIs(TYP_BYREF)) { if (op1->gtGetOp2()->IsCnsIntOrI()) { @@ -1161,39 +1155,17 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, } } - if (fgIsBigOffset(offset) || op1->gtOper != GT_LCL_VAR) - { - goto DONE_ASSERTION; // Don't make an assertion - } - - unsigned lclNum = op1->AsLclVarCommon()->GetLclNum(); - LclVarDsc* lclVar = lvaGetDesc(lclNum); - - ValueNum vn; - - // We only perform null-checks on byrefs and GC refs - if (!varTypeIsGC(lclVar->TypeGet())) - { - goto DONE_ASSERTION; // Don't make an assertion - } - - // If the local variable has its address exposed then bail - if (lclVar->IsAddressExposed()) + if (!fgIsBigOffset(offset) && op1->OperIs(GT_LCL_VAR) && !lvaVarAddrExposed(op1->AsLclVar()->GetLclNum())) { - goto DONE_ASSERTION; // Don't make an assertion + assertion.op1.kind = O1K_LCLVAR; + assertion.op1.lclNum = op1->AsLclVarCommon()->GetLclNum(); + assertion.op1.vn = optConservativeNormalVN(op1); + assertion.assertionKind = assertionKind; + assertion.op2.kind = O2K_CONST_INT; + assertion.op2.vn = ValueNumStore::VNForNull(); + assertion.op2.u1.iconVal = 0; + assertion.op2.SetIconFlag(GTF_EMPTY); } - - assertion.op1.kind = O1K_LCLVAR; - assertion.op1.lcl.lclNum = lclNum; - assertion.op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum(); - vn = optConservativeNormalVN(op1); - - assertion.op1.vn = vn; - assertion.assertionKind = assertionKind; - assertion.op2.kind = O2K_CONST_INT; - assertion.op2.vn = ValueNumStore::VNForNull(); - assertion.op2.u1.iconVal = 0; - assertion.op2.SetIconFlag(GTF_EMPTY); } // // Are we making an assertion about a local variable? @@ -1210,62 +1182,16 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, goto DONE_ASSERTION; // Don't make an assertion } - if (helperCallArgs) - { - // - // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion - // - if ((assertionKind != OAK_EQUAL) && (assertionKind != OAK_NOT_EQUAL)) - { - goto DONE_ASSERTION; // Don't make an assertion - } - - if (op2->gtOper == GT_IND) - { - op2 = op2->AsOp()->gtOp1; - assertion.op2.kind = O2K_IND_CNS_INT; - } - else - { - assertion.op2.kind = O2K_CONST_INT; - } - - if (op2->gtOper != GT_CNS_INT) - { - goto DONE_ASSERTION; // Don't make an assertion - } - - // - // TODO-CQ: Check for Sealed class and change kind to O1K_EXACT_TYPE - // And consider the special cases, like CORINFO_FLG_SHAREDINST or CORINFO_FLG_VARIANCE - // where a class can be sealed, but they don't behave as exact types because casts to - // non-base types sometimes still succeed. - // - assertion.op1.kind = O1K_SUBTYPE; - assertion.op1.lcl.lclNum = lclNum; - assertion.op1.vn = optConservativeNormalVN(op1); - assertion.op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum(); - assertion.op2.u1.iconVal = op2->AsIntCon()->gtIconVal; - assertion.op2.vn = optConservativeNormalVN(op2); - assertion.op2.SetIconFlag(op2->GetIconHandleFlag()); - - // - // Ok everything has been set and the assertion looks good - // - assertion.assertionKind = assertionKind; - } - else // !helperCallArgs { /* Skip over a GT_COMMA node(s), if necessary */ - while (op2->gtOper == GT_COMMA) + while (op2->OperIs(GT_COMMA)) { op2 = op2->AsOp()->gtOp2; } - assertion.op1.kind = O1K_LCLVAR; - assertion.op1.lcl.lclNum = lclNum; - assertion.op1.vn = optConservativeNormalVN(op1); - assertion.op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum(); + assertion.op1.kind = O1K_LCLVAR; + assertion.op1.lclNum = lclNum; + assertion.op1.vn = optConservativeNormalVN(op1); switch (op2->gtOper) { @@ -1286,10 +1212,6 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, } goto CNS_COMMON; - case GT_CNS_LNG: - op2Kind = O2K_CONST_LONG; - goto CNS_COMMON; - case GT_CNS_DBL: op2Kind = O2K_CONST_DOUBLE; goto CNS_COMMON; @@ -1304,47 +1226,26 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, goto DONE_ASSERTION; // Don't make an assertion } - // If the LclVar is a TYP_LONG then we only make - // assertions where op2 is also TYP_LONG - // - if ((lclVar->TypeGet() == TYP_LONG) && (op2->TypeGet() != TYP_LONG)) - { - goto DONE_ASSERTION; // Don't make an assertion - } + assertion.op2.kind = op2Kind; + assertion.op2.vn = optConservativeNormalVN(op2); - assertion.op2.kind = op2Kind; - assertion.op2.lconVal = 0; - assertion.op2.vn = optConservativeNormalVN(op2); - - if (op2->gtOper == GT_CNS_INT) + if (op2->OperIs(GT_CNS_INT)) { - ssize_t iconVal = op2->AsIntCon()->gtIconVal; - - if (varTypeIsSmall(lclVar)) + ssize_t iconVal = op2->AsIntCon()->IconValue(); + if (varTypeIsSmall(lclVar) && op1->OperIs(GT_STORE_LCL_VAR)) { iconVal = optCastConstantSmall(iconVal, lclVar->TypeGet()); + if (!optLocalAssertionProp) + { + assertion.op2.vn = vnStore->VNForIntCon(static_cast(iconVal)); + } } - -#ifdef TARGET_ARM - // Do not Constant-Prop large constants for ARM - // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had - // target_ssize_t type. - if (!codeGen->validImmForMov((target_ssize_t)iconVal)) - { - goto DONE_ASSERTION; // Don't make an assertion - } -#endif // TARGET_ARM - assertion.op2.u1.iconVal = iconVal; assertion.op2.SetIconFlag(op2->GetIconHandleFlag(), op2->AsIntCon()->gtFieldSeq); } - else if (op2->gtOper == GT_CNS_LNG) - { - assertion.op2.lconVal = op2->AsLngCon()->gtLconVal; - } else { - noway_assert(op2->gtOper == GT_CNS_DBL); + noway_assert(op2->OperIs(GT_CNS_DBL)); /* If we have an NaN value then don't record it */ if (FloatingPointUtils::isNaN(op2->AsDblCon()->DconValue())) { @@ -1363,9 +1264,13 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, case GT_LCL_VAR: { - // + if (!optLocalAssertionProp) + { + // O2K_LCLVAR_COPY is local assertion prop only + goto DONE_ASSERTION; + } + // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion - // if ((assertionKind != OAK_EQUAL) && (assertionKind != OAK_NOT_EQUAL)) { goto DONE_ASSERTION; // Don't make an assertion @@ -1414,10 +1319,9 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, goto DONE_ASSERTION; // Don't make an assertion } - assertion.op2.kind = O2K_LCLVAR_COPY; - assertion.op2.vn = optConservativeNormalVN(op2); - assertion.op2.lcl.lclNum = lclNum2; - assertion.op2.lcl.ssaNum = op2->AsLclVarCommon()->GetSsaNum(); + assertion.op2.kind = O2K_LCLVAR_COPY; + assertion.op2.vn = optConservativeNormalVN(op2); + assertion.op2.lclNum = lclNum2; // Ok everything has been set and the assertion looks good assertion.assertionKind = assertionKind; @@ -1425,6 +1329,22 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, goto DONE_ASSERTION; } + case GT_CALL: + { + if (optLocalAssertionProp) + { + GenTreeCall* const call = op2->AsCall(); + if (call->IsHelperCall() && s_helperCallProperties.NonNullReturn(call->GetHelperNum())) + { + assertion.assertionKind = OAK_NOT_EQUAL; + assertion.op2.kind = O2K_CONST_INT; + assertion.op2.u1.iconVal = 0; + goto DONE_ASSERTION; + } + } + break; + } + default: break; } @@ -1445,92 +1365,27 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, } } } - - // - // Are we making an IsType assertion? - // - else if (op1->gtOper == GT_IND) + else { - op1 = op1->AsOp()->gtOp1; - // - // Is this an indirection of a local variable? - // - if (op1->gtOper == GT_LCL_VAR) + // Currently, O1K_VN serves as a backup for O1K_LCLVAR (where it's not a local), + // but long term we should keep O1K_LCLVAR for local assertions only. + if (!optLocalAssertionProp) { - unsigned lclNum = op1->AsLclVarCommon()->GetLclNum(); - - // If the local variable is not in SSA then bail - if (!lvaInSsa(lclNum)) - { - goto DONE_ASSERTION; - } - - // If we have an typeHnd indirection then op1 must be a TYP_REF - // and the indirection must produce a TYP_I - // - if (op1->gtType != TYP_REF) - { - goto DONE_ASSERTION; // Don't make an assertion - } - - assertion.op1.kind = O1K_EXACT_TYPE; - assertion.op1.lcl.lclNum = lclNum; - assertion.op1.vn = optConservativeNormalVN(op1); - assertion.op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum(); - -#ifdef DEBUG - - // If we're ssa based, check that the VN is reasonable. - // - if (assertion.op1.lcl.ssaNum != SsaConfig::RESERVED_SSA_NUM) - { - LclSsaVarDsc* const ssaDsc = lvaGetDesc(lclNum)->GetPerSsaData(assertion.op1.lcl.ssaNum); - - bool doesVNMatch = (assertion.op1.vn == vnStore->VNConservativeNormalValue(ssaDsc->m_vnPair)); - - if (!doesVNMatch && ssaDsc->m_updated) - { - doesVNMatch = (assertion.op1.vn == vnStore->VNConservativeNormalValue(ssaDsc->m_origVNPair)); - } - - assert(doesVNMatch); - } -#endif - - ssize_t cnsValue = 0; - GenTreeFlags iconFlags = GTF_EMPTY; - // Ngen case - if (op2->gtOper == GT_IND) - { - if (!optIsTreeKnownIntValue(!optLocalAssertionProp, op2->AsOp()->gtOp1, &cnsValue, &iconFlags)) - { - goto DONE_ASSERTION; // Don't make an assertion - } - - assertion.assertionKind = assertionKind; - assertion.op2.kind = O2K_IND_CNS_INT; - assertion.op2.u1.iconVal = cnsValue; - assertion.op2.vn = optConservativeNormalVN(op2->AsOp()->gtOp1); + ValueNum op1VN = optConservativeNormalVN(op1); + ValueNum op2VN = optConservativeNormalVN(op2); - /* iconFlags should only contain bits in GTF_ICON_HDL_MASK */ - assert((iconFlags & ~GTF_ICON_HDL_MASK) == 0); - assertion.op2.SetIconFlag(iconFlags); - } - // JIT case - else if (optIsTreeKnownIntValue(!optLocalAssertionProp, op2, &cnsValue, &iconFlags)) + // For TP reasons, limited to 32-bit constants on the op2 side. + if (vnStore->IsVNInt32Constant(op2VN) && !vnStore->IsVNHandle(op2VN)) { + assert(assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL); assertion.assertionKind = assertionKind; + assertion.op1.vn = op1VN; + assertion.op1.kind = O1K_VN; + assertion.op2.vn = op2VN; assertion.op2.kind = O2K_CONST_INT; - assertion.op2.u1.iconVal = cnsValue; - assertion.op2.vn = optConservativeNormalVN(op2); - - /* iconFlags should only contain bits in GTF_ICON_HDL_MASK */ - assert((iconFlags & ~GTF_ICON_HDL_MASK) == 0); - assertion.op2.SetIconFlag(iconFlags); - } - else - { - goto DONE_ASSERTION; // Don't make an assertion + assertion.op2.u1.iconVal = vnStore->ConstantValue(op2VN); + assertion.op2.SetIconFlag(GTF_EMPTY); + return optAddAssertion(&assertion); } } } @@ -1565,12 +1420,6 @@ AssertionIndex Compiler::optFinalizeCreatingAssertion(AssertionDsc* assertion) { return NO_ASSERTION_INDEX; } - - // TODO: only copy assertions rely on valid SSA number so we could generate more assertions here - if (assertion->op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM) - { - return NO_ASSERTION_INDEX; - } } // Now add the assertion to our assertion table @@ -1592,7 +1441,7 @@ bool Compiler::optIsTreeKnownIntValue(bool vnBased, GenTree* tree, ssize_t* pCon // Is Local assertion prop? if (!vnBased) { - if (tree->OperGet() == GT_CNS_INT) + if (tree->OperIs(GT_CNS_INT)) { *pConstant = tree->AsIntCon()->IconValue(); *pFlags = tree->GetIconHandleFlag(); @@ -1715,6 +1564,37 @@ AssertionIndex Compiler::optAddAssertion(AssertionDsc* newAssertion) return NO_ASSERTION_INDEX; } + if (!optLocalAssertionProp) + { + // Ignore VN-based assertions with NoVN + switch (newAssertion->op1.kind) + { + case O1K_LCLVAR: + case O1K_VN: + case O1K_BOUND_OPER_BND: + case O1K_BOUND_LOOP_BND: + case O1K_CONSTANT_LOOP_BND: + case O1K_CONSTANT_LOOP_BND_UN: + case O1K_EXACT_TYPE: + case O1K_SUBTYPE: + if (newAssertion->op1.vn == ValueNumStore::NoVN) + { + return NO_ASSERTION_INDEX; + } + break; + case O1K_ARR_BND: + if ((newAssertion->op1.bnd.vnIdx == ValueNumStore::NoVN) || + (newAssertion->op1.bnd.vnLen == ValueNumStore::NoVN)) + { + return NO_ASSERTION_INDEX; + } + break; + + default: + break; + } + } + // See if we already have this assertion in the table. // // For local assertion prop we can speed things up by checking the dep vector. @@ -1723,10 +1603,9 @@ AssertionIndex Compiler::optAddAssertion(AssertionDsc* newAssertion) // if (optLocalAssertionProp) { - assert((newAssertion->op1.kind == O1K_LCLVAR) || (newAssertion->op1.kind == O1K_SUBTYPE) || - (newAssertion->op1.kind == O1K_EXACT_TYPE)); + assert(newAssertion->op1.kind == O1K_LCLVAR); - unsigned lclNum = newAssertion->op1.lcl.lclNum; + unsigned lclNum = newAssertion->op1.lclNum; BitVecOps::Iter iter(apTraits, GetAssertionDep(lclNum)); unsigned bvIndex = 0; while (iter.NextElem(&bvIndex)) @@ -1785,15 +1664,14 @@ AssertionIndex Compiler::optAddAssertion(AssertionDsc* newAssertion) // Assertion mask bits are [index + 1]. if (optLocalAssertionProp) { - assert((newAssertion->op1.kind == O1K_LCLVAR) || (newAssertion->op1.kind == O1K_SUBTYPE) || - (newAssertion->op1.kind == O1K_EXACT_TYPE)); + assert(newAssertion->op1.kind == O1K_LCLVAR); // Mark the variables this index depends on - unsigned lclNum = newAssertion->op1.lcl.lclNum; + unsigned lclNum = newAssertion->op1.lclNum; BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), optAssertionCount - 1); if (newAssertion->op2.kind == O2K_LCLVAR_COPY) { - lclNum = newAssertion->op2.lcl.lclNum; + lclNum = newAssertion->op2.lclNum; BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), optAssertionCount - 1); } } @@ -1823,15 +1701,14 @@ void Compiler::optDebugCheckAssertion(AssertionDsc* assertion) switch (assertion->op1.kind) { - case O1K_LCLVAR: - case O1K_EXACT_TYPE: - case O1K_SUBTYPE: - assert(optLocalAssertionProp || - lvaGetDesc(assertion->op1.lcl.lclNum)->lvPerSsaData.IsValidSsaNum(assertion->op1.lcl.ssaNum)); - break; case O1K_ARR_BND: // It would be good to check that bnd.vnIdx and bnd.vnLen are valid value numbers. + assert(!optLocalAssertionProp); + assert(assertion->assertionKind == OAK_NO_THROW); break; + case O1K_EXACT_TYPE: + case O1K_SUBTYPE: + case O1K_VN: case O1K_BOUND_OPER_BND: case O1K_BOUND_LOOP_BND: case O1K_CONSTANT_LOOP_BND: @@ -1843,32 +1720,10 @@ void Compiler::optDebugCheckAssertion(AssertionDsc* assertion) } switch (assertion->op2.kind) { - case O2K_IND_CNS_INT: - case O2K_CONST_INT: - { - // The only flags that can be set are those in the GTF_ICON_HDL_MASK. - switch (assertion->op1.kind) - { - case O1K_EXACT_TYPE: - case O1K_SUBTYPE: - break; - case O1K_LCLVAR: - assert((lvaGetDesc(assertion->op1.lcl.lclNum)->lvType != TYP_REF) || - (assertion->op2.u1.iconVal == 0) || doesMethodHaveFrozenObjects()); - break; - default: - break; - } - } - break; - - case O2K_CONST_LONG: - { - // All handles should be represented by O2K_CONST_INT, - // so no handle bits should be set here. - assert(!assertion->op2.HasIconFlag()); - } - break; + case O2K_SUBRANGE: + case O2K_LCLVAR_COPY: + assert(optLocalAssertionProp); + break; case O2K_ZEROOBJ: { @@ -1912,18 +1767,12 @@ void Compiler::optDebugCheckAssertions(AssertionIndex index) // assertionIndex - the index of the assertion // op1 - the first assertion operand // op2 - the second assertion operand -// helperCallArgs - when true this indicates that the assertion operands -// are the arguments of a type cast helper call such as -// CORINFO_HELP_ISINSTANCEOFCLASS // // Notes: // The created complementary assertion is associated with the original // assertion such that it can be found by optFindComplementary. // -void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, - GenTree* op1, - GenTree* op2, - bool helperCallArgs) +void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, GenTree* op1, GenTree* op2) { if (assertionIndex == NO_ASSERTION_INDEX) { @@ -1943,73 +1792,38 @@ void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, if (candidateAssertion.assertionKind == OAK_EQUAL) { - AssertionIndex index = optCreateAssertion(op1, op2, OAK_NOT_EQUAL, helperCallArgs); - optMapComplementary(index, assertionIndex); - } - else if (candidateAssertion.assertionKind == OAK_NOT_EQUAL) - { - AssertionIndex index = optCreateAssertion(op1, op2, OAK_EQUAL, helperCallArgs); - optMapComplementary(index, assertionIndex); - } - - // Are we making a subtype or exact type assertion? - if ((candidateAssertion.op1.kind == O1K_SUBTYPE) || (candidateAssertion.op1.kind == O1K_EXACT_TYPE)) - { - optCreateAssertion(op1, nullptr, OAK_NOT_EQUAL); - } -} + // Don't create useless OAK_NOT_EQUAL assertions -// optAssertionGenCast: Create a tentative subrange assertion for a cast. -// -// This function will try to create an assertion that the cast's operand -// is within the "input" range for the cast, so that this assertion can -// later be proven via implication and the cast removed. Such assertions -// are only generated during global propagation, and only for LCL_VARs. -// -// Arguments: -// cast - the cast node for which to create the assertion -// -// Return Value: -// Index of the generated assertion, or NO_ASSERTION_INDEX if it was not -// legal, profitable, or possible to create one. -// -AssertionIndex Compiler::optAssertionGenCast(GenTreeCast* cast) -{ - if (optLocalAssertionProp || !varTypeIsIntegral(cast) || !varTypeIsIntegral(cast->CastOp())) - { - return NO_ASSERTION_INDEX; - } + if ((candidateAssertion.op1.kind == O1K_LCLVAR) || (candidateAssertion.op1.kind == O1K_VN)) + { + // "LCLVAR != CNS" is not a useful assertion (unless CNS is 0/1) + if (((candidateAssertion.op2.kind == O2K_CONST_INT)) && (candidateAssertion.op2.u1.iconVal != 0) && + (candidateAssertion.op2.u1.iconVal != 1)) + { + return; + } - // This condition exists to preserve previous behavior. - if (!cast->CastOp()->OperIs(GT_LCL_VAR)) - { - return NO_ASSERTION_INDEX; - } + // "LCLVAR != LCLVAR_COPY" + if (candidateAssertion.op2.kind == O2K_LCLVAR_COPY) + { + return; + } + } - GenTreeLclVar* lclVar = cast->CastOp()->AsLclVar(); - LclVarDsc* varDsc = lvaGetDesc(lclVar); + // "Object is not Class" is also not a useful assertion (at least for now) + if ((candidateAssertion.op1.kind == O1K_EXACT_TYPE) || (candidateAssertion.op1.kind == O1K_SUBTYPE)) + { + return; + } - // It is not useful to make assertions about address-exposed variables, they will never be proven. - if (varDsc->IsAddressExposed()) - { - return NO_ASSERTION_INDEX; + AssertionIndex index = optCreateAssertion(op1, op2, OAK_NOT_EQUAL); + optMapComplementary(index, assertionIndex); } - - // A representation-changing cast cannot be simplified if it is not checked. - if (!cast->gtOverflow() && (genActualType(cast) != genActualType(lclVar))) + else if (candidateAssertion.assertionKind == OAK_NOT_EQUAL) { - return NO_ASSERTION_INDEX; + AssertionIndex index = optCreateAssertion(op1, op2, OAK_EQUAL); + optMapComplementary(index, assertionIndex); } - - AssertionDsc assertion = {OAK_SUBRANGE}; - assertion.op1.kind = O1K_LCLVAR; - assertion.op1.vn = vnStore->VNConservativeNormalValue(lclVar->gtVNPair); - assertion.op1.lcl.lclNum = lclVar->GetLclNum(); - assertion.op1.lcl.ssaNum = lclVar->GetSsaNum(); - assertion.op2.kind = O2K_SUBRANGE; - assertion.op2.u2 = IntegralRange::ForCastInput(cast); - - return optFinalizeCreatingAssertion(&assertion); } //------------------------------------------------------------------------ @@ -2019,9 +1833,7 @@ AssertionIndex Compiler::optAssertionGenCast(GenTreeCast* cast) // op1 - the first assertion operand // op2 - the second assertion operand // assertionKind - the assertion kind -// helperCallArgs - when true this indicates that the assertion operands -// are the arguments of a type cast helper call such as -// CORINFO_HELP_ISINSTANCEOFCLASS +// // Return Value: // The new assertion index or NO_ASSERTION_INDEX if a new assertion // was not created. @@ -2033,17 +1845,14 @@ AssertionIndex Compiler::optAssertionGenCast(GenTreeCast* cast) // create a second, complementary assertion. This may too fail, for the // same reasons as the first one. // -AssertionIndex Compiler::optCreateJtrueAssertions(GenTree* op1, - GenTree* op2, - Compiler::optAssertionKind assertionKind, - bool helperCallArgs) +AssertionIndex Compiler::optCreateJtrueAssertions(GenTree* op1, GenTree* op2, optAssertionKind assertionKind) { - AssertionIndex assertionIndex = optCreateAssertion(op1, op2, assertionKind, helperCallArgs); + AssertionIndex assertionIndex = optCreateAssertion(op1, op2, assertionKind); // Don't bother if we don't have an assertion on the JTrue False path. Current implementation // allows for a complementary only if there is an assertion on the False path (tree->HasAssertion()). if (assertionIndex != NO_ASSERTION_INDEX) { - optCreateComplementaryAssertion(assertionIndex, op1, op2, helperCallArgs); + optCreateComplementaryAssertion(assertionIndex, op1, op2); } return assertionIndex; } @@ -2118,6 +1927,12 @@ AssertionInfo Compiler::optCreateJTrueBoundsAssertion(GenTree* tree) dsc.op2.kind = O2K_INVALID; dsc.op2.vn = ValueNumStore::NoVN; + if ((dsc.op1.bnd.vnIdx == ValueNumStore::NoVN) || (dsc.op1.bnd.vnLen == ValueNumStore::NoVN)) + { + // Don't make an assertion if one of the operands has no VN + return NO_ASSERTION_INDEX; + } + AssertionIndex index = optAddAssertion(&dsc); if (unsignedCompareBnd.cmpOper == VNF_GE_UN) { @@ -2214,15 +2029,63 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree) return NO_ASSERTION_INDEX; } + // See if we have IND(obj) ==/!= TypeHandle + // + if (!optLocalAssertionProp && op1->OperIs(GT_IND) && op1->gtGetOp1()->TypeIs(TYP_REF)) + { + ValueNum objVN = optConservativeNormalVN(op1->gtGetOp1()); + ValueNum typeHndVN = optConservativeNormalVN(op2); + + if ((objVN != ValueNumStore::NoVN) && vnStore->IsVNTypeHandle(typeHndVN)) + { + AssertionDsc assertion; + assertion.assertionKind = OAK_EQUAL; + assertion.op1.kind = O1K_EXACT_TYPE; + assertion.op1.vn = objVN; + assertion.op2.kind = O2K_CONST_INT; + assertion.op2.u1.iconVal = vnStore->CoercedConstantValue(typeHndVN); + assertion.op2.vn = typeHndVN; + assertion.op2.SetIconFlag(GTF_ICON_CLASS_HDL); + AssertionIndex index = optAddAssertion(&assertion); + + // We don't need to create a complementary assertion here. We're only interested + // in the assertion that the object is of a certain type. The opposite assertion + // (that the object is not of a certain type) is not useful (at least not yet). + // + // So if we have "if (obj->pMT != CNS) then create the assertion for the "else" edge. + if (relop->OperIs(GT_NE)) + { + return AssertionInfo::ForNextEdge(index); + } + return index; + } + } + // Check for op1 or op2 to be lcl var and if so, keep it in op1. - if ((op1->gtOper != GT_LCL_VAR) && (op2->gtOper == GT_LCL_VAR)) + if (!op1->OperIs(GT_LCL_VAR) && op2->OperIs(GT_LCL_VAR)) { std::swap(op1, op2); } // If op1 is lcl and op2 is const or lcl, create assertion. - if ((op1->gtOper == GT_LCL_VAR) && (op2->OperIsConst() || (op2->gtOper == GT_LCL_VAR))) // Fix for Dev10 851483 + if (op1->OperIs(GT_LCL_VAR) && (op2->OperIsConst() || op2->OperIs(GT_LCL_VAR))) // Fix for Dev10 851483 { + // Watch out for cases where long local(s) are implicitly truncated. + // + LclVarDsc* const lcl1Dsc = lvaGetDesc(op1->AsLclVarCommon()); + if (lcl1Dsc->TypeIs(TYP_LONG) && !op1->TypeIs(TYP_LONG)) + { + return NO_ASSERTION_INDEX; + } + if (op2->OperIs(GT_LCL_VAR)) + { + LclVarDsc* const lcl2Dsc = lvaGetDesc(op2->AsLclVarCommon()); + if (lcl2Dsc->TypeIs(TYP_LONG) && !op2->TypeIs(TYP_LONG)) + { + return NO_ASSERTION_INDEX; + } + } + return optCreateJtrueAssertions(op1, op2, assertionKind); } else if (!optLocalAssertionProp) @@ -2233,62 +2096,24 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree) if (vnStore->IsVNCheckedBound(op1VN) && vnStore->IsVNInt32Constant(op2VN)) { assert(relop->OperIs(GT_EQ, GT_NE)); - - int con = vnStore->ConstantValue(op2VN); - if (con >= 0) - { - AssertionDsc dsc; - - // For arr.Length != 0, we know that 0 is a valid index - // For arr.Length == con, we know that con - 1 is the greatest valid index - if (con == 0) - { - dsc.assertionKind = OAK_NOT_EQUAL; - dsc.op1.bnd.vnIdx = vnStore->VNForIntCon(0); - } - else - { - dsc.assertionKind = OAK_EQUAL; - dsc.op1.bnd.vnIdx = vnStore->VNForIntCon(con - 1); - } - - dsc.op1.vn = op1VN; - dsc.op1.kind = O1K_ARR_BND; - dsc.op1.bnd.vnLen = op1VN; - dsc.op2.vn = vnStore->VNConservativeNormalValue(op2->gtVNPair); - dsc.op2.kind = O2K_CONST_INT; - dsc.op2.u1.iconVal = 0; - dsc.op2.SetIconFlag(GTF_EMPTY); - - // when con is not zero, create an assertion on the arr.Length == con edge - // when con is zero, create an assertion on the arr.Length != 0 edge - AssertionIndex index = optAddAssertion(&dsc); - if (relop->OperIs(GT_NE) != (con == 0)) - { - return AssertionInfo::ForNextEdge(index); - } - else - { - return index; - } - } + return optCreateJtrueAssertions(op1, op2, assertionKind); } } // Check op1 and op2 for an indirection of a GT_LCL_VAR and keep it in op1. - if (((op1->gtOper != GT_IND) || (op1->AsOp()->gtOp1->gtOper != GT_LCL_VAR)) && - ((op2->gtOper == GT_IND) && (op2->AsOp()->gtOp1->gtOper == GT_LCL_VAR))) + if ((!op1->OperIs(GT_IND) || !op1->AsOp()->gtOp1->OperIs(GT_LCL_VAR)) && + (op2->OperIs(GT_IND) && op2->AsOp()->gtOp1->OperIs(GT_LCL_VAR))) { std::swap(op1, op2); } // If op1 is ind, then extract op1's oper. - if ((op1->gtOper == GT_IND) && (op1->AsOp()->gtOp1->gtOper == GT_LCL_VAR)) + if (op1->OperIs(GT_IND) && op1->AsOp()->gtOp1->OperIs(GT_LCL_VAR)) { return optCreateJtrueAssertions(op1, op2, assertionKind); } // Look for a call to an IsInstanceOf helper compared to a nullptr - if ((op2->gtOper != GT_CNS_INT) && (op1->gtOper == GT_CNS_INT)) + if (!op2->OperIs(GT_CNS_INT) && op1->OperIs(GT_CNS_INT)) { std::swap(op1, op2); } @@ -2299,6 +2124,12 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree) return NO_ASSERTION_INDEX; } + if (optLocalAssertionProp) + { + // O1K_SUBTYPE is Global Assertion Prop only + return NO_ASSERTION_INDEX; + } + GenTreeCall* const call = op1->AsCall(); // Note CORINFO_HELP_READYTORUN_ISINSTANCEOF does not have the same argument pattern. @@ -2306,26 +2137,47 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree) // // Also note The CASTCLASS helpers won't appear in predicates as they throw on failure. // So the helper list here is smaller than the one in optAssertionProp_Call. + // if ((call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFINTERFACE)) || (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFARRAY)) || (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFCLASS)) || (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFANY))) { - GenTree* objectNode = call->gtArgs.GetArgByIndex(1)->GetNode(); - GenTree* methodTableNode = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTree* objectNode = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + GenTree* methodTableNode = call->gtArgs.GetUserArgByIndex(0)->GetNode(); // objectNode can be TYP_I_IMPL in case if it's a constant handle // (e.g. a string literal from frozen segments) + // assert(objectNode->TypeIs(TYP_REF, TYP_I_IMPL)); assert(methodTableNode->TypeIs(TYP_I_IMPL)); - // Reverse the assertion - assert((assertionKind == OAK_EQUAL) || (assertionKind == OAK_NOT_EQUAL)); - assertionKind = (assertionKind == OAK_EQUAL) ? OAK_NOT_EQUAL : OAK_EQUAL; + ValueNum objVN = optConservativeNormalVN(objectNode); + ValueNum typeHndVN = optConservativeNormalVN(methodTableNode); - if (objectNode->OperIs(GT_LCL_VAR)) + if ((objVN != ValueNumStore::NoVN) && vnStore->IsVNTypeHandle(typeHndVN)) { - return optCreateJtrueAssertions(objectNode, methodTableNode, assertionKind, /* helperCallArgs */ true); + AssertionDsc assertion; + assertion.op1.kind = O1K_SUBTYPE; + assertion.op1.vn = objVN; + assertion.op2.kind = O2K_CONST_INT; + assertion.op2.u1.iconVal = vnStore->CoercedConstantValue(typeHndVN); + assertion.op2.vn = typeHndVN; + assertion.op2.SetIconFlag(GTF_ICON_CLASS_HDL); + assertion.assertionKind = OAK_EQUAL; + AssertionIndex index = optAddAssertion(&assertion); + + // We don't need to create a complementary assertion here. We're only interested + // in the assertion that the object is of a certain type. The opposite assertion + // (that the object is not of a certain type) is not useful (at least not yet). + // + // So if we have "if (ISINST(obj, pMT) == null) then create the assertion for the "else" edge. + // + if (relop->OperIs(GT_EQ)) + { + return AssertionInfo::ForNextEdge(index); + } + return index; } } @@ -2353,9 +2205,6 @@ void Compiler::optAssertionGen(GenTree* tree) optAssertionPropCurrentTree = tree; #endif - // For most of the assertions that we create below - // the assertion is true after the tree is processed - bool assertionProven = true; AssertionInfo assertionInfo; switch (tree->OperGet()) { @@ -2380,7 +2229,11 @@ void Compiler::optAssertionGen(GenTree* tree) case GT_ARR_LENGTH: case GT_MDARR_LENGTH: case GT_MDARR_LOWER_BOUND: - assertionInfo = optCreateAssertion(tree->GetIndirOrArrMetaDataAddr(), nullptr, OAK_NOT_EQUAL); + // These indirs (esp. GT_IND and GT_STOREIND) are the most popular sources of assertions. + if (tree->IndirMayFault(this)) + { + assertionInfo = optCreateAssertion(tree->GetIndirOrArrMetaDataAddr(), nullptr, OAK_NOT_EQUAL); + } break; case GT_INTRINSIC: @@ -2418,14 +2271,6 @@ void Compiler::optAssertionGen(GenTree* tree) } break; - case GT_CAST: - // This represets an assertion that we would like to prove to be true. - // If we can prove this assertion true then we can eliminate this cast. - // We only create this assertion for global assertion propagation. - assertionInfo = optAssertionGenCast(tree->AsCast()); - assertionProven = false; - break; - case GT_JTRUE: assertionInfo = optAssertionGenJtrue(tree); break; @@ -2435,7 +2280,7 @@ void Compiler::optAssertionGen(GenTree* tree) break; } - if (assertionInfo.HasAssertion() && assertionProven) + if (assertionInfo.HasAssertion()) { tree->SetAssertionInfo(assertionInfo); } @@ -2531,7 +2376,7 @@ AssertionIndex Compiler::optAssertionIsSubrange(GenTree* tree, IntegralRange ran { // For local assertion prop use comparison on locals, and use comparison on vns for global prop. bool isEqual = optLocalAssertionProp - ? (curAssertion->op1.lcl.lclNum == tree->AsLclVarCommon()->GetLclNum()) + ? (curAssertion->op1.lclNum == tree->AsLclVarCommon()->GetLclNum()) : (curAssertion->op1.vn == vnStore->VNConservativeNormalValue(tree->gtVNPair)); if (!isEqual) { @@ -2912,7 +2757,7 @@ GenTree* Compiler::optVNBasedFoldExpr(BasicBlock* block, GenTree* parent, GenTre // GenTree* Compiler::optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, GenTree* tree) { - if (tree->OperGet() == GT_JTRUE) + if (tree->OperIs(GT_JTRUE)) { // Treat JTRUE separately to extract side effects into respective statements rather // than using a COMMA separated op1. @@ -2952,7 +2797,7 @@ GenTree* Compiler::optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, G { float value = vnStore->ConstantValue(vnCns); - if (tree->TypeGet() == TYP_INT) + if (tree->TypeIs(TYP_INT)) { // Same sized reinterpretation of bits to integer conValTree = gtNewIconNode(*(reinterpret_cast(&value))); @@ -2970,7 +2815,7 @@ GenTree* Compiler::optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, G { double value = vnStore->ConstantValue(vnCns); - if (tree->TypeGet() == TYP_LONG) + if (tree->TypeIs(TYP_LONG)) { conValTree = gtNewLconNode(*(reinterpret_cast(&value))); } @@ -3033,7 +2878,7 @@ GenTree* Compiler::optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, G case TYP_REF: { - if (tree->TypeGet() == TYP_REF) + if (tree->TypeIs(TYP_REF)) { const size_t value = vnStore->ConstantValue(vnCns); if (value == 0) @@ -3365,17 +3210,6 @@ GenTree* Compiler::optConstantAssertionProp(AssertionDsc* curAssertion, newTree->BashToConst(curAssertion->op2.dconVal, tree->TypeGet()); break; - case O2K_CONST_LONG: - if (newTree->TypeIs(TYP_LONG)) - { - newTree->BashToConst(curAssertion->op2.lconVal); - } - else - { - newTree->BashToConst(static_cast(curAssertion->op2.lconVal)); - } - break; - case O2K_CONST_INT: // Don't propagate handles if we need to report relocs. @@ -3396,10 +3230,6 @@ GenTree* Compiler::optConstantAssertionProp(AssertionDsc* curAssertion, // and insert casts in morph, which would be problematic to track // here). assert(tree->TypeGet() == lvaGetDesc(lclNum)->TypeGet()); - // Assertions for small-typed locals should have been normalized - // when the assertion was created. - assert(!varTypeIsSmall(tree) || (curAssertion->op2.u1.iconVal == - optCastConstantSmall(curAssertion->op2.u1.iconVal, tree->TypeGet()))); if (curAssertion->op2.HasIconFlag()) { @@ -3597,42 +3427,30 @@ GenTree* Compiler::optCopyAssertionProp(AssertionDsc* curAssertion, GenTreeLclVarCommon* tree, Statement* stmt DEBUGARG(AssertionIndex index)) { + assert(optLocalAssertionProp); + const AssertionDsc::AssertionDscOp1& op1 = curAssertion->op1; const AssertionDsc::AssertionDscOp2& op2 = curAssertion->op2; - noway_assert(op1.lcl.lclNum != op2.lcl.lclNum); + noway_assert(op1.lclNum != op2.lclNum); const unsigned lclNum = tree->GetLclNum(); // Make sure one of the lclNum of the assertion matches with that of the tree. - if (op1.lcl.lclNum != lclNum && op2.lcl.lclNum != lclNum) + if (op1.lclNum != lclNum && op2.lclNum != lclNum) { return nullptr; } // Extract the matching lclNum and ssaNum, as well as the field sequence. unsigned copyLclNum; - unsigned copySsaNum; - if (op1.lcl.lclNum == lclNum) + if (op1.lclNum == lclNum) { - copyLclNum = op2.lcl.lclNum; - copySsaNum = op2.lcl.ssaNum; + copyLclNum = op2.lclNum; } else { - copyLclNum = op1.lcl.lclNum; - copySsaNum = op1.lcl.ssaNum; - } - - if (!optLocalAssertionProp) - { - // Extract the ssaNum of the matching lclNum. - unsigned ssaNum = (op1.lcl.lclNum == lclNum) ? op1.lcl.ssaNum : op2.lcl.ssaNum; - - if (ssaNum != tree->GetSsaNum()) - { - return nullptr; - } + copyLclNum = op1.lclNum; } LclVarDsc* const copyVarDsc = lvaGetDesc(copyLclNum); @@ -3645,7 +3463,7 @@ GenTree* Compiler::optCopyAssertionProp(AssertionDsc* curAssertion, } // Make sure we can perform this copy prop. - if (optCopyProp_LclVarScore(lclVarDsc, copyVarDsc, curAssertion->op1.lcl.lclNum == lclNum) <= 0) + if (optCopyProp_LclVarScore(lclVarDsc, copyVarDsc, curAssertion->op1.lclNum == lclNum) <= 0) { return nullptr; } @@ -3665,7 +3483,6 @@ GenTree* Compiler::optCopyAssertionProp(AssertionDsc* curAssertion, } tree->SetLclNum(copyLclNum); - tree->SetSsaNum(copySsaNum); // Copy prop and last-use copy elision happens at the same time in morph. // This node may potentially not be a last use of the new local. @@ -3774,30 +3591,26 @@ GenTree* Compiler::optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, GenTreeL continue; } - // Constant prop. - // - // The case where the tree type could be different than the LclVar type is caused by - // gtFoldExpr, specifically the case of a cast, where the fold operation changes the type of the LclVar - // node. In such a case is not safe to perform the substitution since later on the JIT will assert mismatching - // types between trees. - // - if (curAssertion->op1.lcl.lclNum == lclNum) + // Verify types match + if (tree->TypeGet() != lvaGetRealType(lclNum)) { - LclVarDsc* const lclDsc = lvaGetDesc(lclNum); - // Verify types match - if (tree->TypeGet() == lclDsc->lvType) - { - // If local assertion prop, just perform constant prop. - if (optLocalAssertionProp) - { - return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(assertionIndex)); - } + continue; + } - // If global assertion, perform constant propagation only if the VN's match. - if (curAssertion->op1.vn == vnStore->VNConservativeNormalValue(tree->gtVNPair)) - { - return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(assertionIndex)); - } + if (optLocalAssertionProp) + { + // Check lclNum in Local Assertion Prop + if (curAssertion->op1.lclNum == lclNum) + { + return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(assertionIndex)); + } + } + else + { + // Check VN in Global Assertion Prop + if (curAssertion->op1.vn == vnStore->VNConservativeNormalValue(tree->gtVNPair)) + { + return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(assertionIndex)); } } } @@ -3911,7 +3724,7 @@ GenTree* Compiler::optAssertionProp_LocalStore(ASSERT_VALARG_TP assertions, GenT // does not kill the zerobj assertion for s. // unsigned const dstLclNum = store->GetLclNum(); - bool const dstLclIsStruct = lvaGetDesc(dstLclNum)->TypeGet() == TYP_STRUCT; + bool const dstLclIsStruct = lvaGetDesc(dstLclNum)->TypeIs(TYP_STRUCT); AssertionIndex const dstIndex = optLocalAssertionIsEqualOrNotEqual(O1K_LCLVAR, dstLclNum, dstLclIsStruct ? O2K_ZEROOBJ : O2K_CONST_INT, 0, assertions); @@ -3986,11 +3799,15 @@ GenTree* Compiler::optAssertionProp_BlockStore(ASSERT_VALARG_TP assertions, GenT // Arguments: // assertions - set of live assertions // tree - the integral tree to analyze +// stmt - statement containing "tree" +// block - block containing "stmt" // isKnownNonZero - [OUT] set to true if the tree is known to be non-zero // isKnownNonNegative - [OUT] set to true if the tree is known to be non-negative // void Compiler::optAssertionProp_RangeProperties(ASSERT_VALARG_TP assertions, GenTree* tree, + Statement* stmt, + BasicBlock* block, bool* isKnownNonZero, bool* isKnownNonNegative) { @@ -4045,8 +3862,7 @@ void Compiler::optAssertionProp_RangeProperties(ASSERT_VALARG_TP assertions, } // First, analyze possible X ==/!= CNS assertions. - if (curAssertion->IsConstantInt32Assertion() && (curAssertion->op1.kind == O1K_LCLVAR) && - (curAssertion->op1.vn == treeVN)) + if (curAssertion->IsConstantInt32Assertion() && (curAssertion->op1.vn == treeVN)) { if ((curAssertion->assertionKind == OAK_NOT_EQUAL) && (curAssertion->op2.u1.iconVal == 0)) { @@ -4101,11 +3917,79 @@ void Compiler::optAssertionProp_RangeProperties(ASSERT_VALARG_TP assertions, // (uint)X <= CNS means X is [0..CNS] *isKnownNonNegative = true; } - else if (!info.isUnsigned && ((cmpOper == GT_GE) || (cmpOper == GT_GT))) + else if (!info.isUnsigned && ((cmpOper == GT_GE) || (cmpOper == GT_GT))) + { + // X >= CNS means X is [CNS..unknown] + *isKnownNonNegative = true; + *isKnownNonZero = (cmpOper == GT_GT) || (info.constVal > 0); + } + } + } + + if (*isKnownNonZero && *isKnownNonNegative) + { + return; + } + + // Let's see if MergeEdgeAssertions can help us: + if (tree->TypeIs(TYP_INT)) + { + // See if (X + CNS) is known to be non-negative + if (tree->OperIs(GT_ADD) && tree->gtGetOp2()->IsIntCnsFitsInI32()) + { + Range rng = Range(Limit(Limit::keDependent)); + ValueNum vn = vnStore->VNConservativeNormalValue(tree->gtGetOp1()->gtVNPair); + if (!RangeCheck::TryGetRangeFromAssertions(this, vn, assertions, &rng)) + { + return; + } + + int cns = static_cast(tree->gtGetOp2()->AsIntCon()->IconValue()); + rng.LowerLimit().AddConstant(cns); + + if ((rng.LowerLimit().IsConstant() && !rng.LowerLimit().AddConstant(cns)) || + (rng.UpperLimit().IsConstant() && !rng.UpperLimit().AddConstant(cns))) + { + // Add cns to both bounds if they are constants. Make sure the addition doesn't overflow. + return; + } + + if (rng.LowerLimit().IsConstant()) + { + // E.g. "X + -8" when X's range is [8..unknown] + // it's safe to say "X + -8" is non-negative + if ((rng.LowerLimit().GetConstant() == 0)) + { + *isKnownNonNegative = true; + } + + // E.g. "X + 8" when X's range is [0..CNS] + // Here we have to check the upper bound as well to avoid overflow + if ((rng.LowerLimit().GetConstant() > 0) && rng.UpperLimit().IsConstant() && + rng.UpperLimit().GetConstant() > rng.LowerLimit().GetConstant()) + { + *isKnownNonNegative = true; + *isKnownNonZero = true; + } + } + } + else + { + Range rng = Range(Limit(Limit::keUnknown)); + if (RangeCheck::TryGetRangeFromAssertions(this, treeVN, assertions, &rng)) { - // X >= CNS means X is [CNS..unknown] - *isKnownNonNegative = true; - *isKnownNonZero = (cmpOper == GT_GT) || (info.constVal > 0); + Limit lowerBound = rng.LowerLimit(); + if (lowerBound.IsConstant()) + { + if (lowerBound.GetConstant() >= 0) + { + *isKnownNonNegative = true; + } + if (lowerBound.GetConstant() > 0) + { + *isKnownNonZero = true; + } + } } } } @@ -4121,11 +4005,15 @@ void Compiler::optAssertionProp_RangeProperties(ASSERT_VALARG_TP assertions, // assertions - set of live assertions // tree - the DIV/UDIV/MOD/UMOD node to optimize // stmt - statement containing DIV/UDIV/MOD/UMOD +// block - the block containing the statement // // Returns: // Updated DIV/UDIV/MOD/UMOD node, or nullptr // -GenTree* Compiler::optAssertionProp_ModDiv(ASSERT_VALARG_TP assertions, GenTreeOp* tree, Statement* stmt) +GenTree* Compiler::optAssertionProp_ModDiv(ASSERT_VALARG_TP assertions, + GenTreeOp* tree, + Statement* stmt, + BasicBlock* block) { GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); @@ -4134,8 +4022,8 @@ GenTree* Compiler::optAssertionProp_ModDiv(ASSERT_VALARG_TP assertions, GenTreeO bool op2IsNotZero; bool op1IsNotNegative; bool op2IsNotNegative; - optAssertionProp_RangeProperties(assertions, op1, &op1IsNotZero, &op1IsNotNegative); - optAssertionProp_RangeProperties(assertions, op2, &op2IsNotZero, &op2IsNotNegative); + optAssertionProp_RangeProperties(assertions, op1, stmt, block, &op1IsNotZero, &op1IsNotNegative); + optAssertionProp_RangeProperties(assertions, op2, stmt, block, &op2IsNotZero, &op2IsNotNegative); bool changed = false; if (op1IsNotNegative && op2IsNotNegative && tree->OperIs(GT_DIV, GT_MOD)) @@ -4203,8 +4091,8 @@ GenTree* Compiler::optAssertionProp_Return(ASSERT_VALARG_TP assertions, GenTreeO AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual( optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions) { - noway_assert((op1Kind == O1K_LCLVAR) || (op1Kind == O1K_EXACT_TYPE) || (op1Kind == O1K_SUBTYPE)); - noway_assert((op2Kind == O2K_CONST_INT) || (op2Kind == O2K_IND_CNS_INT) || (op2Kind == O2K_ZEROOBJ)); + noway_assert(op1Kind == O1K_LCLVAR); + noway_assert((op2Kind == O2K_CONST_INT) || (op2Kind == O2K_ZEROOBJ)); assert(optLocalAssertionProp); ASSERT_TP apDependent = BitVecOps::Intersection(apTraits, GetAssertionDep(lclNum), assertions); @@ -4221,7 +4109,7 @@ AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual( continue; } - if ((curAssertion->op1.kind == op1Kind) && (curAssertion->op1.lcl.lclNum == lclNum) && + if ((curAssertion->op1.kind == op1Kind) && (curAssertion->op1.lclNum == lclNum) && (curAssertion->op2.kind == op2Kind)) { bool constantIsEqual = (curAssertion->op2.u1.iconVal == cnsVal); @@ -4345,14 +4233,17 @@ AssertionIndex Compiler::optGlobalAssertionIsEqualOrNotEqualZero(ASSERT_VALARG_T * Returns the modified tree, or nullptr if no assertion prop took place */ -GenTree* Compiler::optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt) +GenTree* Compiler::optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, + GenTree* tree, + Statement* stmt, + BasicBlock* block) { assert(tree->OperIsCompare()); if (!optLocalAssertionProp) { // If global assertion prop then use value numbering. - return optAssertionPropGlobal_RelOp(assertions, tree, stmt); + return optAssertionPropGlobal_RelOp(assertions, tree, stmt, block); } // @@ -4394,6 +4285,11 @@ Compiler::AssertVisit Compiler::optVisitReachingAssertions(ValueNum vn, TAssertV GenTreeLclVarCommon* node = ssaDef->GetDefNode(); assert(node->IsPhiDefn()); + // Keep track of the set of phi-preds + // + BitVecTraits traits(fgBBNumMax + 1, this); + BitVec visitedBlocks = BitVecOps::MakeEmpty(&traits); + for (GenTreePhi::Use& use : node->Data()->AsPhi()->Uses()) { GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); @@ -4404,6 +4300,22 @@ Compiler::AssertVisit Compiler::optVisitReachingAssertions(ValueNum vn, TAssertV // The visitor wants to abort the walk. return AssertVisit::Abort; } + BitVecOps::AddElemD(&traits, visitedBlocks, phiArg->gtPredBB->bbNum); + } + + // Verify the set of phi-preds covers the set of block preds + // + for (BasicBlock* const pred : ssaDef->GetBlock()->PredBlocks()) + { + if (!BitVecOps::IsMember(&traits, visitedBlocks, pred->bbNum)) + { + JITDUMP("... optVisitReachingAssertions in " FMT_BB ": pred " FMT_BB " not a phi-pred\n", + ssaDef->GetBlock()->bbNum, pred->bbNum); + + // We missed examining a block pred. Fail the phi inference. + // + return AssertVisit::Abort; + } } return AssertVisit::Continue; } @@ -4415,11 +4327,15 @@ Compiler::AssertVisit Compiler::optVisitReachingAssertions(ValueNum vn, TAssertV // assertions - set of live assertions // tree - tree to possibly optimize // stmt - statement containing the tree +// block - the block containing the statement // // Returns: // The modified tree, or nullptr if no assertion prop took place. // -GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt) +GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, + GenTree* tree, + Statement* stmt, + BasicBlock* block) { assert(!optLocalAssertionProp); @@ -4431,7 +4347,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen if (op2->IsIntegralConst(0) && tree->OperIsCmpCompare()) { bool isNonZero, isNeverNegative; - optAssertionProp_RangeProperties(assertions, op1, &isNonZero, &isNeverNegative); + optAssertionProp_RangeProperties(assertions, op1, stmt, block, &isNonZero, &isNeverNegative); if (tree->OperIs(GT_GE, GT_LT) && isNeverNegative) { @@ -4489,6 +4405,32 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen return optAssertionProp_Update(newTree, tree, stmt); } + ValueNum op1VN = vnStore->VNConservativeNormalValue(op1->gtVNPair); + ValueNum op2VN = vnStore->VNConservativeNormalValue(op2->gtVNPair); + + // See if we can fold "X relop CNS" using TryGetRangeFromAssertions. + int op2cns; + if (op1->TypeIs(TYP_INT) && op2->TypeIs(TYP_INT) && + vnStore->IsVNIntegralConstant(op2VN, &op2cns) + // "op2cns != 0" is purely a TP quirk (such relops are handled by the code above): + && (op2cns != 0)) + { + // NOTE: we can call TryGetRangeFromAssertions for op2 as well if we want, but it's not cheap. + Range rng1 = Range(Limit(Limit::keUndef)); + Range rng2 = Range(Limit(Limit::keConstant, op2cns)); + + if (RangeCheck::TryGetRangeFromAssertions(this, op1VN, assertions, &rng1)) + { + RangeOps::RelationKind kind = RangeOps::EvalRelop(tree->OperGet(), tree->IsUnsigned(), rng1, rng2); + if ((kind != RangeOps::RelationKind::Unknown)) + { + newTree = kind == RangeOps::RelationKind::AlwaysTrue ? gtNewTrue() : gtNewFalse(); + newTree = gtWrapWithSideEffects(newTree, tree, GTF_ALL_EFFECT); + return optAssertionProp_Update(newTree, tree, stmt); + } + } + } + // Else check if we have an equality check involving a local or an indir if (!tree->OperIs(GT_EQ, GT_NE)) { @@ -4510,6 +4452,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen // and if all of them are known to be non-null, we can bash the comparison to true/false. if (op2->IsIntegralConst(0) && op1->TypeIs(TYP_REF)) { + JITDUMP("Checking PHI [%06u] arguments for non-nullness\n", dspTreeID(op1)) auto visitor = [this](ValueNum reachingVN, ASSERT_TP reachingAssertions) { return optAssertionVNIsNonNull(reachingVN, reachingAssertions) ? AssertVisit::Continue : AssertVisit::Abort; }; @@ -4553,19 +4496,19 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen { printf("%d\n", vnStore->ConstantValue(vnCns)); } - else if (op1->TypeGet() == TYP_LONG) + else if (op1->TypeIs(TYP_LONG)) { printf("%lld\n", vnStore->ConstantValue(vnCns)); } - else if (op1->TypeGet() == TYP_DOUBLE) + else if (op1->TypeIs(TYP_DOUBLE)) { printf("%f\n", vnStore->ConstantValue(vnCns)); } - else if (op1->TypeGet() == TYP_FLOAT) + else if (op1->TypeIs(TYP_FLOAT)) { printf("%f\n", vnStore->ConstantValue(vnCns)); } - else if (op1->TypeGet() == TYP_REF) + else if (op1->TypeIs(TYP_REF)) { // The only constant of TYP_REF that ValueNumbering supports is 'null' if (vnStore->ConstantValue(vnCns) == 0) @@ -4577,7 +4520,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen printf("%d (gcref)\n", static_cast(vnStore->ConstantValue(vnCns))); } } - else if (op1->TypeGet() == TYP_BYREF) + else if (op1->TypeIs(TYP_BYREF)) { printf("%d (byref)\n", static_cast(vnStore->ConstantValue(vnCns))); } @@ -4598,7 +4541,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen op1->gtFlags |= (vnStore->GetHandleFlags(vnCns) & GTF_ICON_HDL_MASK); } } - else if (op1->TypeGet() == TYP_LONG) + else if (op1->TypeIs(TYP_LONG)) { op1->BashToConst(vnStore->ConstantValue(vnCns)); @@ -4607,7 +4550,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen op1->gtFlags |= (vnStore->GetHandleFlags(vnCns) & GTF_ICON_HDL_MASK); } } - else if (op1->TypeGet() == TYP_DOUBLE) + else if (op1->TypeIs(TYP_DOUBLE)) { double constant = vnStore->ConstantValue(vnCns); op1->BashToConst(constant); @@ -4618,7 +4561,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen // assertion we have made. allowReverse = !FloatingPointUtils::isNaN(constant); } - else if (op1->TypeGet() == TYP_FLOAT) + else if (op1->TypeIs(TYP_FLOAT)) { float constant = vnStore->ConstantValue(vnCns); op1->BashToConst(constant); @@ -4626,11 +4569,11 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen // See comments for TYP_DOUBLE. allowReverse = !FloatingPointUtils::isNaN(constant); } - else if (op1->TypeGet() == TYP_REF) + else if (op1->TypeIs(TYP_REF)) { op1->BashToConst(static_cast(vnStore->ConstantValue(vnCns)), TYP_REF); } - else if (op1->TypeGet() == TYP_BYREF) + else if (op1->TypeIs(TYP_BYREF)) { op1->BashToConst(static_cast(vnStore->ConstantValue(vnCns)), TYP_BYREF); } @@ -4643,7 +4586,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen // set foldResult to either 0 or 1 bool foldResult = assertionKindIsEqual; - if (tree->gtOper == GT_NE) + if (tree->OperIs(GT_NE)) { foldResult = !foldResult; } @@ -4723,19 +4666,19 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen */ GenTree* Compiler::optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt) { - assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE); + assert(tree->OperIs(GT_EQ) || tree->OperIs(GT_NE)); GenTree* op1 = tree->AsOp()->gtOp1; GenTree* op2 = tree->AsOp()->gtOp2; // For Local AssertionProp we only can fold when op1 is a GT_LCL_VAR - if (op1->gtOper != GT_LCL_VAR) + if (!op1->OperIs(GT_LCL_VAR)) { return nullptr; } // For Local AssertionProp we only can fold when op2 is a GT_CNS_INT - if (op2->gtOper != GT_CNS_INT) + if (!op2->OperIs(GT_CNS_INT)) { return nullptr; } @@ -4795,7 +4738,7 @@ GenTree* Compiler::optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, GenT // Return either CNS_INT 0 or CNS_INT 1. bool foldResult = (constantIsEqual == assertionKindIsEqual); - if (tree->gtOper == GT_NE) + if (tree->OperIs(GT_NE)) { foldResult = !foldResult; } @@ -4818,11 +4761,15 @@ GenTree* Compiler::optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, GenT // assertions - the set of live assertions // cast - the cast for which to propagate the assertions // stmt - statement "cast" is a part of, "nullptr" for local prop +// block - the block containing the statement // // Return Value: // The, possibly modified, cast tree or "nullptr" if no propagation took place. // -GenTree* Compiler::optAssertionProp_Cast(ASSERT_VALARG_TP assertions, GenTreeCast* cast, Statement* stmt) +GenTree* Compiler::optAssertionProp_Cast(ASSERT_VALARG_TP assertions, + GenTreeCast* cast, + Statement* stmt, + BasicBlock* block) { GenTree* op1 = cast->CastOp(); @@ -4835,25 +4782,25 @@ GenTree* Compiler::optAssertionProp_Cast(ASSERT_VALARG_TP assertions, GenTreeCas // Skip over a GT_COMMA node(s), if necessary to get to the lcl. GenTree* lcl = op1->gtEffectiveVal(); - // If we don't have a cast of a LCL_VAR then bail. - if (!lcl->OperIs(GT_LCL_VAR)) - { - return nullptr; - } - // Try and see if we can make this cast into a cheaper zero-extending version // if the input is known to be non-negative. if (!cast->IsUnsigned() && genActualTypeIsInt(lcl) && cast->TypeIs(TYP_LONG) && (TARGET_POINTER_SIZE == 8)) { bool isKnownNonZero; bool isKnownNonNegative; - optAssertionProp_RangeProperties(assertions, lcl, &isKnownNonZero, &isKnownNonNegative); + optAssertionProp_RangeProperties(assertions, lcl, stmt, block, &isKnownNonZero, &isKnownNonNegative); if (isKnownNonNegative) { cast->SetUnsigned(); } } + // If we don't have a cast of a LCL_VAR then bail. + if (!lcl->OperIs(GT_LCL_VAR)) + { + return nullptr; + } + IntegralRange range = IntegralRange::ForCastInput(cast); AssertionIndex index = optAssertionIsSubrange(lcl, range, assertions); if (index != NO_ASSERTION_INDEX) @@ -5040,7 +4987,7 @@ bool Compiler::optAssertionIsNonNull(GenTree* op, ASSERT_VALARG_TP assertions) if ((curAssertion->assertionKind == OAK_NOT_EQUAL) && // kind (curAssertion->op1.kind == O1K_LCLVAR) && // op1 (curAssertion->op2.kind == O2K_CONST_INT) && // op2 - (curAssertion->op1.lcl.lclNum == lclNum) && (curAssertion->op2.u1.iconVal == 0)) + (curAssertion->op1.lclNum == lclNum) && (curAssertion->op2.u1.iconVal == 0)) { return true; } @@ -5080,6 +5027,7 @@ bool Compiler::optAssertionVNIsNonNull(ValueNum vn, ASSERT_VALARG_TP assertions) } } } + return false; } @@ -5177,8 +5125,9 @@ static GCInfo::WriteBarrierForm GetWriteBarrierForm(Compiler* comp, ValueNum vn) { if (funcApp.m_func == VNF_PtrToArrElem) { - // Arrays are always on the heap - return GCInfo::WriteBarrierForm::WBF_BarrierUnchecked; + // Check whether the array is on the heap + ValueNum arrayVN = funcApp.m_args[1]; + return GetWriteBarrierForm(comp, arrayVN); } if (funcApp.m_func == VNF_PtrToLoc) { @@ -5373,6 +5322,50 @@ GenTree* Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, GenTree } #endif // FEATURE_ENABLE_NO_RANGE_CHECKS + GenTreeBoundsChk* arrBndsChk = tree->AsBoundsChk(); + ValueNum vnCurIdx = vnStore->VNConservativeNormalValue(arrBndsChk->GetIndex()->gtVNPair); + ValueNum vnCurLen = vnStore->VNConservativeNormalValue(arrBndsChk->GetArrayLength()->gtVNPair); + + auto dropBoundsCheck = [&](INDEBUG(const char* reason)) -> GenTree* { + JITDUMP("\nVN based redundant (%s) bounds check assertion prop in " FMT_BB ":\n", reason, compCurBB->bbNum); + DISPTREE(tree); + if (arrBndsChk != stmt->GetRootNode()) + { + // Defer the removal. + arrBndsChk->gtFlags |= GTF_CHK_INDEX_INBND; + return nullptr; + } + + GenTree* newTree = optRemoveStandaloneRangeCheck(arrBndsChk, stmt); + return optAssertionProp_Update(newTree, arrBndsChk, stmt); + }; + + // First, check if we have arr[arr.Length - cns] when we know arr.Length is >= cns. + VNFuncApp funcApp; + if (vnStore->GetVNFunc(vnCurIdx, &funcApp) && (funcApp.m_func == VNF_ADD)) + { + if (!vnStore->IsVNInt32Constant(funcApp.m_args[1])) + { + // Normalize constants to be on the right side + std::swap(funcApp.m_args[0], funcApp.m_args[1]); + } + + Range rng = Range(Limit(Limit::keUnknown)); + if ((funcApp.m_args[0] == vnCurLen) && vnStore->IsVNInt32Constant(funcApp.m_args[1]) && + RangeCheck::TryGetRangeFromAssertions(this, vnCurLen, assertions, &rng) && rng.LowerLimit().IsConstant()) + { + // Lower known limit of ArrLen: + const int lenLowerLimit = rng.LowerLimit().GetConstant(); + + // Negative delta in the array access (ArrLen + -CNS) + const int delta = vnStore->GetConstantInt32(funcApp.m_args[1]); + if ((lenLowerLimit > 0) && (delta < 0) && (delta > INT_MIN) && (lenLowerLimit >= -delta)) + { + return dropBoundsCheck(INDEBUG("a[a.Length-cns] when a.Length is known to be >= cns")); + } + } + } + BitVecOps::Iter iter(apTraits, assertions); unsigned index = 0; while (iter.NextElem(&index)) @@ -5389,38 +5382,21 @@ GenTree* Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, GenTree continue; } - GenTreeBoundsChk* arrBndsChk = tree->AsBoundsChk(); - - // Set 'isRedundant' to true if we can determine that 'arrBndsChk' can be - // classified as a redundant bounds check using 'curAssertion' - bool isRedundant = false; -#ifdef DEBUG - const char* dbgMsg = "Not Set"; -#endif - // Do we have a previous range check involving the same 'vnLen' upper bound? if (curAssertion->op1.bnd.vnLen == vnStore->VNConservativeNormalValue(arrBndsChk->GetArrayLength()->gtVNPair)) { - ValueNum vnCurIdx = vnStore->VNConservativeNormalValue(arrBndsChk->GetIndex()->gtVNPair); - // Do we have the exact same lower bound 'vnIdx'? // a[i] followed by a[i] if (curAssertion->op1.bnd.vnIdx == vnCurIdx) { - isRedundant = true; -#ifdef DEBUG - dbgMsg = "a[i] followed by a[i]"; -#endif + return dropBoundsCheck(INDEBUG("a[i] followed by a[i]")); } // Are we using zero as the index? // It can always be considered as redundant with any previous value // a[*] followed by a[0] else if (vnCurIdx == vnStore->VNZeroForType(arrBndsChk->GetIndex()->TypeGet())) { - isRedundant = true; -#ifdef DEBUG - dbgMsg = "a[*] followed by a[0]"; -#endif + return dropBoundsCheck(INDEBUG("a[*] followed by a[0]")); } // Do we have two constant indexes? else if (vnStore->IsVNConstant(curAssertion->op1.bnd.vnIdx) && vnStore->IsVNConstant(vnCurIdx)) @@ -5441,10 +5417,7 @@ GenTree* Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, GenTree // a[K1] followed by a[K2], with K2 >= 0 and K1 >= K2 if (index2 >= 0 && index1 >= index2) { - isRedundant = true; -#ifdef DEBUG - dbgMsg = "a[K1] followed by a[K2], with K2 >= 0 and K1 >= K2"; -#endif + return dropBoundsCheck(INDEBUG("a[K1] followed by a[K2], with K2 >= 0 and K1 >= K2")); } } } @@ -5453,35 +5426,6 @@ GenTree* Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, GenTree // a[i] followed by a[j] when j is known to be >= i // a[i] followed by a[5] when i is known to be >= 5 } - - if (!isRedundant) - { - continue; - } - -#ifdef DEBUG - if (verbose) - { - printf("\nVN based redundant (%s) bounds check assertion prop for index #%02u in " FMT_BB ":\n", dbgMsg, - assertionIndex, compCurBB->bbNum); - gtDispTree(tree, nullptr, nullptr, true); - } -#endif - if (arrBndsChk == stmt->GetRootNode()) - { - // We have a top-level bounds check node. - // This can happen when trees are broken up due to inlining. - // optRemoveStandaloneRangeCheck will return the modified tree (side effects or a no-op). - GenTree* newTree = optRemoveStandaloneRangeCheck(arrBndsChk, stmt); - - return optAssertionProp_Update(newTree, arrBndsChk, stmt); - } - - // Defer actually removing the tree until processing reaches its parent comma, since - // optRemoveCommaBasedRangeCheck needs to rewrite the whole comma tree. - arrBndsChk->gtFlags |= GTF_CHK_INDEX_INBND; - - return nullptr; } return nullptr; @@ -5605,7 +5549,7 @@ GenTree* Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, GenTree* tree, case GT_DIV: case GT_UMOD: case GT_UDIV: - return optAssertionProp_ModDiv(assertions, tree->AsOp(), stmt); + return optAssertionProp_ModDiv(assertions, tree->AsOp(), stmt, block); case GT_BLK: case GT_IND: @@ -5620,7 +5564,7 @@ GenTree* Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, GenTree* tree, return optAssertionProp_Comma(assertions, tree, stmt); case GT_CAST: - return optAssertionProp_Cast(assertions, tree->AsCast(), stmt); + return optAssertionProp_Cast(assertions, tree->AsCast(), stmt, block); case GT_CALL: return optAssertionProp_Call(assertions, tree->AsCall(), stmt); @@ -5631,7 +5575,7 @@ GenTree* Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, GenTree* tree, case GT_LE: case GT_GT: case GT_GE: - return optAssertionProp_RelOp(assertions, tree, stmt); + return optAssertionProp_RelOp(assertions, tree, stmt, block); case GT_JTRUE: if (block != nullptr) @@ -5671,6 +5615,149 @@ void Compiler::optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& ac } } +//------------------------------------------------------------------------ +// optCreateJumpTableImpliedAssertions: Create assertions for the switch statement +// for each of its jump targets. +// +// Arguments: +// switchBb - The switch statement block. +// +// Returns: +// true if any modifications were made, false otherwise. +// +bool Compiler::optCreateJumpTableImpliedAssertions(BasicBlock* switchBb) +{ + assert(!optLocalAssertionProp); + assert(switchBb->KindIs(BBJ_SWITCH)); + assert(switchBb->lastStmt() != nullptr); + bool modified = false; + + GenTree* switchTree = switchBb->lastStmt()->GetRootNode()->gtEffectiveVal(); + assert(switchTree->OperIs(GT_SWITCH)); + + // bbsCount is uint32_t, but it's unlikely to be more than INT32_MAX. + noway_assert(switchBb->GetSwitchTargets()->bbsCount <= INT32_MAX); + + ValueNum opVN = optConservativeNormalVN(switchTree->gtGetOp1()); + if (opVN == ValueNumStore::NoVN) + { + return modified; + } + + if (vnStore->TypeOfVN(opVN) != TYP_INT) + { + // Should probably be an assert instead - GT_SWITCH is expected to be TYP_INT. + return modified; + } + + // Typically, the switch value is ADD(X, -cns), so we actually want to create the assertions for X + int offset = 0; + vnStore->PeelOffsetsI32(&opVN, &offset); + + int jumpCount = static_cast(switchBb->GetSwitchTargets()->bbsCount); + FlowEdge** jumpTable = switchBb->GetSwitchTargets()->bbsDstTab; + bool hasDefault = switchBb->GetSwitchTargets()->bbsHasDefault; + + for (int jmpTargetIdx = 0; jmpTargetIdx < jumpCount; jmpTargetIdx++) + { + // The value for each target is jmpTargetIdx - offset. + if (CheckedOps::SubOverflows(jmpTargetIdx, offset, false)) + { + continue; + } + int value = jmpTargetIdx - offset; + + // We can only make "X == caseValue" assertions for blocks with a single edge from the switch. + BasicBlock* target = jumpTable[jmpTargetIdx]->getDestinationBlock(); + if (target->GetUniquePred(this) != switchBb) + { + // Target block is potentially reachable from multiple blocks (outside the switch). + continue; + } + + if (fgGetPredForBlock(target, switchBb)->getDupCount() > 1) + { + // We have just one predecessor (BBJ_SWITCH), but there may be multiple edges (cases) per target. + continue; + } + + AssertionInfo newAssertIdx = NO_ASSERTION_INDEX; + + // Is this target a default case? + if (hasDefault && (jmpTargetIdx == jumpCount - 1)) + { + // For default case we can create "X >= maxValue" assertion. Example: + // + // void Test(ReadOnlySpan name) + // { + // switch (name.Length) + // { + // case 0: ... + // case 1: ... + // ... + // case 7: ... + // default: %name.Length is >= 8 here% + // } + // + if ((value > 0) && !vnStore->IsVNConstant(opVN)) + { + AssertionDsc dsc = {}; + dsc.assertionKind = OAK_NOT_EQUAL; + dsc.op2.kind = O2K_CONST_INT; + dsc.op2.vn = vnStore->VNZeroForType(TYP_INT); + dsc.op2.u1.iconVal = 0; + dsc.op2.SetIconFlag(GTF_EMPTY); + if (vnStore->IsVNNeverNegative(opVN)) + { + // Create "X >= value" assertion (both operands are never negative) + dsc.op1.kind = O1K_CONSTANT_LOOP_BND; + dsc.op1.vn = vnStore->VNForFunc(TYP_INT, VNF_GE, opVN, vnStore->VNForIntCon(value)); + assert(vnStore->IsVNConstantBound(dsc.op1.vn)); + } + else + { + // Create "X u>= value" assertion + dsc.op1.kind = O1K_CONSTANT_LOOP_BND_UN; + dsc.op1.vn = vnStore->VNForFunc(TYP_INT, VNF_GE_UN, opVN, vnStore->VNForIntCon(value)); + assert(vnStore->IsVNConstantBoundUnsigned(dsc.op1.vn)); + } + newAssertIdx = optAddAssertion(&dsc); + } + else + { + continue; + } + } + else + { + // Create "VN == value" assertion. + AssertionDsc dsc = {}; + dsc.assertionKind = OAK_EQUAL; + dsc.op1.lclNum = BAD_VAR_NUM; // O1K_LCLVAR relies only on op1.vn in Global Assertion Prop + dsc.op1.vn = opVN; + dsc.op1.kind = O1K_LCLVAR; + dsc.op2.vn = vnStore->VNForIntCon(value); + dsc.op2.u1.iconVal = value; + dsc.op2.kind = O2K_CONST_INT; + dsc.op2.SetIconFlag(GTF_EMPTY); + newAssertIdx = optAddAssertion(&dsc); + } + + if (newAssertIdx.HasAssertion()) + { + // TODO-Cleanup: We shouldn't attach assertions to nodes in Global Assertion Prop. + // It limits the ability to create multiple assertions for the same node. + GenTree* tree = gtNewNothingNode(); + fgInsertStmtAtBeg(target, fgNewStmtFromTree(tree)); + + modified = true; + tree->SetAssertionInfo(newAssertIdx); + } + } + + return modified; +} + /***************************************************************************** * * Given a set of active assertions this method computes the set @@ -5861,176 +5948,6 @@ void Compiler::optImpliedByConstAssertion(AssertionDsc* constAssertion, ASSERT_T } } -/***************************************************************************** - * - * Given a copy assertion and a dependent assertion this method computes the - * set of implied assertions that are also true. - * For copy assertions, exact SSA num and LCL nums should match, because - * we don't have kill sets and we depend on their value num for dataflow. - */ - -void Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result) -{ - noway_assert(copyAssertion->IsCopyAssertion()); - - // Get the copyAssert's lcl/ssa nums. - unsigned copyAssertLclNum = BAD_VAR_NUM; - unsigned copyAssertSsaNum = SsaConfig::RESERVED_SSA_NUM; - - // Check if copyAssertion's op1 or op2 matches the depAssertion's op1. - if (depAssertion->op1.lcl.lclNum == copyAssertion->op1.lcl.lclNum) - { - copyAssertLclNum = copyAssertion->op2.lcl.lclNum; - copyAssertSsaNum = copyAssertion->op2.lcl.ssaNum; - } - else if (depAssertion->op1.lcl.lclNum == copyAssertion->op2.lcl.lclNum) - { - copyAssertLclNum = copyAssertion->op1.lcl.lclNum; - copyAssertSsaNum = copyAssertion->op1.lcl.ssaNum; - } - // Check if copyAssertion's op1 or op2 matches the depAssertion's op2. - else if (depAssertion->op2.kind == O2K_LCLVAR_COPY) - { - if (depAssertion->op2.lcl.lclNum == copyAssertion->op1.lcl.lclNum) - { - copyAssertLclNum = copyAssertion->op2.lcl.lclNum; - copyAssertSsaNum = copyAssertion->op2.lcl.ssaNum; - } - else if (depAssertion->op2.lcl.lclNum == copyAssertion->op2.lcl.lclNum) - { - copyAssertLclNum = copyAssertion->op1.lcl.lclNum; - copyAssertSsaNum = copyAssertion->op1.lcl.ssaNum; - } - } - - if (copyAssertLclNum == BAD_VAR_NUM || copyAssertSsaNum == SsaConfig::RESERVED_SSA_NUM) - { - return; - } - - // Get the depAssert's lcl/ssa nums. - unsigned depAssertLclNum = BAD_VAR_NUM; - unsigned depAssertSsaNum = SsaConfig::RESERVED_SSA_NUM; - if ((depAssertion->op1.kind == O1K_LCLVAR) && (depAssertion->op2.kind == O2K_LCLVAR_COPY)) - { - if ((depAssertion->op1.lcl.lclNum == copyAssertion->op1.lcl.lclNum) || - (depAssertion->op1.lcl.lclNum == copyAssertion->op2.lcl.lclNum)) - { - depAssertLclNum = depAssertion->op2.lcl.lclNum; - depAssertSsaNum = depAssertion->op2.lcl.ssaNum; - } - else if ((depAssertion->op2.lcl.lclNum == copyAssertion->op1.lcl.lclNum) || - (depAssertion->op2.lcl.lclNum == copyAssertion->op2.lcl.lclNum)) - { - depAssertLclNum = depAssertion->op1.lcl.lclNum; - depAssertSsaNum = depAssertion->op1.lcl.ssaNum; - } - } - - if (depAssertLclNum == BAD_VAR_NUM || depAssertSsaNum == SsaConfig::RESERVED_SSA_NUM) - { - return; - } - - // Is depAssertion a constant store of a 32-bit integer? - // (i.e GT_LVL_VAR X == GT_CNS_INT) - bool depIsConstAssertion = ((depAssertion->assertionKind == OAK_EQUAL) && (depAssertion->op1.kind == O1K_LCLVAR) && - (depAssertion->op2.kind == O2K_CONST_INT)); - - // Search the assertion table for an assertion on op1 that matches depAssertion - // The matching assertion is the implied assertion. - for (AssertionIndex impIndex = 1; impIndex <= optAssertionCount; impIndex++) - { - AssertionDsc* impAssertion = optGetAssertion(impIndex); - - // The impAssertion must be different from the copy and dependent assertions - if (impAssertion == copyAssertion || impAssertion == depAssertion) - { - continue; - } - - if (!AssertionDsc::SameKind(depAssertion, impAssertion)) - { - continue; - } - - bool op1MatchesCopy = - (copyAssertLclNum == impAssertion->op1.lcl.lclNum) && (copyAssertSsaNum == impAssertion->op1.lcl.ssaNum); - - bool usable = false; - switch (impAssertion->op2.kind) - { - case O2K_SUBRANGE: - usable = op1MatchesCopy && impAssertion->op2.u2.Contains(depAssertion->op2.u2); - break; - - case O2K_CONST_LONG: - usable = op1MatchesCopy && (impAssertion->op2.lconVal == depAssertion->op2.lconVal); - break; - - case O2K_CONST_DOUBLE: - // Exact memory match because of positive and negative zero - usable = op1MatchesCopy && - (memcmp(&impAssertion->op2.dconVal, &depAssertion->op2.dconVal, sizeof(double)) == 0); - break; - - case O2K_IND_CNS_INT: - // This is the ngen case where we have an indirection of an address. - noway_assert((impAssertion->op1.kind == O1K_EXACT_TYPE) || (impAssertion->op1.kind == O1K_SUBTYPE)); - - FALLTHROUGH; - - case O2K_CONST_INT: - usable = op1MatchesCopy && (impAssertion->op2.u1.iconVal == depAssertion->op2.u1.iconVal); - break; - - case O2K_LCLVAR_COPY: - // Check if op1 of impAssertion matches copyAssertion and also op2 of impAssertion matches depAssertion. - if (op1MatchesCopy && (depAssertLclNum == impAssertion->op2.lcl.lclNum && - depAssertSsaNum == impAssertion->op2.lcl.ssaNum)) - { - usable = true; - } - else - { - // Otherwise, op2 of impAssertion should match copyAssertion and also op1 of impAssertion matches - // depAssertion. - usable = ((copyAssertLclNum == impAssertion->op2.lcl.lclNum && - copyAssertSsaNum == impAssertion->op2.lcl.ssaNum) && - (depAssertLclNum == impAssertion->op1.lcl.lclNum && - depAssertSsaNum == impAssertion->op1.lcl.ssaNum)); - } - break; - - default: - // leave 'usable' = false; - break; - } - - if (usable) - { - BitVecOps::AddElemD(apTraits, result, impIndex - 1); - -#ifdef DEBUG - if (verbose) - { - AssertionDsc* firstAssertion = optGetAssertion(1); - printf("\nCompiler::optImpliedByCopyAssertion: copyAssertion #%02d and depAssertion #%02d, implies " - "assertion #%02d", - (copyAssertion - firstAssertion) + 1, (depAssertion - firstAssertion) + 1, - (impAssertion - firstAssertion) + 1); - } -#endif - // If the depAssertion is a const assertion then any other assertions that it implies could also imply a - // subrange assertion. - if (depIsConstAssertion) - { - optImpliedByConstAssertion(impAssertion, result); - } - } - } -} - #include "dataflow.h" /***************************************************************************** @@ -6207,7 +6124,7 @@ ASSERT_TP* Compiler::optComputeAssertionGen() { for (GenTree* const tree : stmt->TreeList()) { - if (tree->gtOper == GT_JTRUE) + if (tree->OperIs(GT_JTRUE)) { // A GT_TRUE is always the last node in a tree, so we can break here assert((tree->gtNext == nullptr) && (stmt->GetNextStmt() == nullptr)); @@ -6430,7 +6347,7 @@ Compiler::fgWalkResult Compiler::optVNBasedFoldCurStmt(BasicBlock* block, // Don't propagate floating-point constants into a TYP_STRUCT LclVar // This can occur for HFA return values (see hfa_sf3E_r.exe) - if (tree->TypeGet() == TYP_STRUCT) + if (tree->TypeIs(TYP_STRUCT)) { return WALK_CONTINUE; } @@ -6551,7 +6468,7 @@ void Compiler::optVnNonNullPropCurStmt(BasicBlock* block, Statement* stmt, GenTr { ASSERT_TP empty = BitVecOps::UninitVal(); GenTree* newTree = nullptr; - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { newTree = optNonNullAssertionProp_Call(empty, tree->AsCall()); } @@ -6653,10 +6570,10 @@ PhaseStatus Compiler::optAssertionPropMain() INDEBUG(const unsigned baseTreeID = compGenTreeID); // First discover all assertions and record them in the table. + ArrayStack switchBlocks(getAllocator(CMK_AssertionProp)); for (BasicBlock* const block : Blocks()) { - compCurBB = block; - + compCurBB = block; fgRemoveRestOfBlock = false; Statement* stmt = block->firstStmt(); @@ -6701,6 +6618,16 @@ PhaseStatus Compiler::optAssertionPropMain() // Advance the iterator stmt = stmt->GetNextStmt(); } + + if (block->KindIs(BBJ_SWITCH)) + { + switchBlocks.Push(block); + } + } + + for (int i = 0; i < switchBlocks.Height(); i++) + { + madeChanges |= optCreateJumpTableImpliedAssertions(switchBlocks.Bottom(i)); } if (optAssertionCount == 0) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp new file mode 100644 index 000000000000..17cf1b989a2d --- /dev/null +++ b/src/coreclr/jit/async.cpp @@ -0,0 +1,1999 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file implements the transformation of C# async methods into state +// machines. The transformation takes place late in the JIT pipeline, when most +// optimizations have already been performed, right before lowering. +// +// The transformation performs the following key operations: +// +// 1. Each async call becomes a suspension point where execution can pause and +// return to the caller, accompanied by a resumption point where execution can +// continue when the awaited operation completes. +// +// 2. When suspending at a suspension point a continuation object is created that contains: +// - All live local variables +// - State number to identify which await is being resumed +// - Return value from the awaited operation (filled in by the callee later) +// - Exception information if an exception occurred +// - Resumption function pointer +// - Flags containing additional information +// +// 3. The method entry is modified to include dispatch logic that checks for an +// incoming continuation and jumps to the appropriate resumption point. +// +// 4. Special handling is included for: +// - Exception propagation across await boundaries +// - Return value management for different types (primitives, references, structs) +// - Tiered compilation and On-Stack Replacement (OSR) +// - Optimized state capture based on variable liveness analysis +// +// The transformation ensures that the semantics of the original async method are +// preserved while enabling efficient suspension and resumption of execution. +// + +#include "jitpch.h" +#include "jitstd/algorithm.h" +#include "async.h" + +class AsyncLiveness +{ + Compiler* m_comp; + bool m_hasLiveness; + TreeLifeUpdater m_updater; + unsigned m_numVars; + +public: + AsyncLiveness(Compiler* comp, bool hasLiveness) + : m_comp(comp) + , m_hasLiveness(hasLiveness) + , m_updater(comp) + , m_numVars(comp->lvaCount) + { + } + + void StartBlock(BasicBlock* block); + void Update(GenTree* node); + bool IsLive(unsigned lclNum); + void GetLiveLocals(jitstd::vector& liveLocals, unsigned fullyDefinedRetBufLcl); + +private: + bool IsLocalCaptureUnnecessary(unsigned lclNum); +}; + +//------------------------------------------------------------------------ +// AsyncLiveness::StartBlock: +// Indicate that we are now starting a new block, and do relevant liveness +// updates for it. +// +// Parameters: +// block - The block that we are starting. +// +void AsyncLiveness::StartBlock(BasicBlock* block) +{ + if (!m_hasLiveness) + return; + + VarSetOps::Assign(m_comp, m_comp->compCurLife, block->bbLiveIn); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::Update: +// Update liveness to be consistent with the specified node having been +// executed. +// +// Parameters: +// node - The node. +// +void AsyncLiveness::Update(GenTree* node) +{ + if (!m_hasLiveness) + return; + + m_updater.UpdateLife(node); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::IsLocalCaptureUnnecessary: +// Check if capturing a specified local can be skipped. +// +// Parameters: +// lclNum - The local +// +// Returns: +// True if the local should not be captured. Even without liveness +// +bool AsyncLiveness::IsLocalCaptureUnnecessary(unsigned lclNum) +{ +#if FEATURE_FIXED_OUT_ARGS + if (lclNum == m_comp->lvaOutgoingArgSpaceVar) + { + return true; + } +#endif + + if (lclNum == m_comp->info.compRetBuffArg) + { + return true; + } + + if (lclNum == m_comp->lvaGSSecurityCookie) + { + // Initialized in prolog + return true; + } + + if (lclNum == m_comp->info.compLvFrameListRoot) + { + return true; + } + + if (lclNum == m_comp->lvaInlinedPInvokeFrameVar) + { + return true; + } + +#ifdef FEATURE_EH_WINDOWS_X86 + if (lclNum == m_comp->lvaShadowSPslotsVar) + { + // Only expected to be live in handlers + return true; + } +#endif + + if (lclNum == m_comp->lvaRetAddrVar) + { + return true; + } + + if (lclNum == m_comp->lvaAsyncContinuationArg) + { + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// AsyncLiveness::IsLive: +// Check if the specified local is live at this point and should be captured. +// +// Parameters: +// lclNum - The local +// +// Returns: +// True if the local is live and capturing it is necessary. +// +bool AsyncLiveness::IsLive(unsigned lclNum) +{ + if (IsLocalCaptureUnnecessary(lclNum)) + { + return false; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(lclNum); + + if ((dsc->TypeIs(TYP_BYREF) && !dsc->IsImplicitByRef()) || + (dsc->TypeIs(TYP_STRUCT) && dsc->GetLayout()->HasGCByRef())) + { + // Even if these are address exposed we expect them to be dead at + // suspension points. TODO: It would be good to somehow verify these + // aren't obviously live, if the JIT creates live ranges that span a + // suspension point then this makes it quite hard to diagnose that. + return false; + } + + if (!m_hasLiveness) + { + return true; + } + + if (dsc->lvRefCnt(RCS_NORMAL) == 0) + { + return false; + } + + Compiler::lvaPromotionType promoType = m_comp->lvaGetPromotionType(dsc); + if (promoType == Compiler::PROMOTION_TYPE_INDEPENDENT) + { + // Independently promoted structs are handled only through their + // fields. + return false; + } + + if (promoType == Compiler::PROMOTION_TYPE_DEPENDENT) + { + // Dependently promoted structs are handled only through the base + // struct local. + // + // A dependently promoted struct is live if any of its fields are live. + + for (unsigned i = 0; i < dsc->lvFieldCnt; i++) + { + LclVarDsc* fieldDsc = m_comp->lvaGetDesc(dsc->lvFieldLclStart + i); + if (!fieldDsc->lvTracked || VarSetOps::IsMember(m_comp, m_comp->compCurLife, fieldDsc->lvVarIndex)) + { + return true; + } + } + + return false; + } + + if (dsc->lvIsStructField && (m_comp->lvaGetParentPromotionType(dsc) == Compiler::PROMOTION_TYPE_DEPENDENT)) + { + return false; + } + + return !dsc->lvTracked || VarSetOps::IsMember(m_comp, m_comp->compCurLife, dsc->lvVarIndex); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::GetLiveLocals: +// Get live locals that should be captured at this point. +// +// Parameters: +// liveLocals - Vector to add live local information into +// fullyDefinedRetBufLcl - Local to skip even if live +// +void AsyncLiveness::GetLiveLocals(jitstd::vector& liveLocals, unsigned fullyDefinedRetBufLcl) +{ + for (unsigned lclNum = 0; lclNum < m_numVars; lclNum++) + { + if ((lclNum != fullyDefinedRetBufLcl) && IsLive(lclNum)) + { + liveLocals.push_back(LiveLocalInfo(lclNum)); + } + } +} + +//------------------------------------------------------------------------ +// TransformAsync: Run async transformation. +// +// Returns: +// Suitable phase status. +// +// Remarks: +// This transformation creates the state machine structure of the async +// function. After each async call a check for whether that async call +// suspended is inserted. If the check passes a continuation is allocated +// into which the live state is stored. The continuation is returned back to +// the caller to indicate that now this function also suspended. +// +// Associated with each suspension point is also resumption IR. The +// resumption IR restores all live state from the continuation object. IR is +// inserted at the beginning of the function to dispatch on the continuation +// (if one is present), which each suspension point having an associated +// state number that can be switched over. +// +PhaseStatus Compiler::TransformAsync() +{ + assert(compIsAsync()); + + AsyncTransformation transformation(this); + return transformation.Run(); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::Run: +// Run the transformation over all the IR. +// +// Returns: +// Suitable phase status. +// +PhaseStatus AsyncTransformation::Run() +{ + ArrayStack worklist(m_comp->getAllocator(CMK_Async)); + + // First find all basic blocks with awaits in them. We'll have to track + // liveness in these basic blocks, so it does not help to record the calls + // ahead of time. + for (BasicBlock* block : m_comp->Blocks()) + { + for (GenTree* tree : LIR::AsRange(block)) + { + if (tree->IsCall() && tree->AsCall()->IsAsync() && !tree->AsCall()->IsTailCall()) + { + JITDUMP(FMT_BB " contains await(s)\n", block->bbNum); + worklist.Push(block); + break; + } + } + } + + JITDUMP("Found %d blocks with awaits\n", worklist.Height()); + + if (worklist.Height() <= 0) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + // Ask the VM to create a resumption stub for this specific version of the + // code. It is stored in the continuation as a function pointer, so we need + // the fixed entry point here. + m_resumeStub = m_comp->info.compCompHnd->getAsyncResumptionStub(); + m_comp->info.compCompHnd->getFunctionFixedEntryPoint(m_resumeStub, false, &m_resumeStubLookup); + + m_returnedContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("returned continuation")); + m_comp->lvaGetDesc(m_returnedContinuationVar)->lvType = TYP_REF; + m_newContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("new continuation")); + m_comp->lvaGetDesc(m_newContinuationVar)->lvType = TYP_REF; + + m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); + +#ifdef JIT32_GCENCODER + // Due to a hard cap on epilogs we need a shared return here. + m_sharedReturnBB = m_comp->fgNewBBafter(BBJ_RETURN, m_comp->fgLastBBInMainFunction(), false); + m_sharedReturnBB->bbSetRunRarely(); + m_sharedReturnBB->clearTryIndex(); + m_sharedReturnBB->clearHndIndex(); + + if (m_comp->fgIsUsingProfileWeights()) + { + // All suspension BBs are cold, so we do not need to propagate any + // weights, but we do need to propagate the flag. + m_sharedReturnBB->SetFlags(BBF_PROF_WEIGHT); + } + + GenTree* continuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + GenTree* ret = m_comp->gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, continuation); + LIR::AsRange(m_sharedReturnBB).InsertAtEnd(continuation, ret); + + JITDUMP("Created shared return BB " FMT_BB "\n", m_sharedReturnBB->bbNum); + + DISPRANGE(LIR::AsRange(m_sharedReturnBB)); +#endif + + // Compute liveness to be used for determining what must be captured on + // suspension. In unoptimized codegen we capture everything. + if (m_comp->opts.OptimizationEnabled()) + { + if (m_comp->m_dfsTree == nullptr) + { + m_comp->m_dfsTree = m_comp->fgComputeDfs(); + } + + m_comp->lvaComputeRefCounts(true, false); + m_comp->fgLocalVarLiveness(); + INDEBUG(m_comp->mostRecentlyActivePhase = PHASE_ASYNC); + VarSetOps::AssignNoCopy(m_comp, m_comp->compCurLife, VarSetOps::MakeEmpty(m_comp)); + } + + AsyncLiveness liveness(m_comp, m_comp->opts.OptimizationEnabled()); + + // Now walk the IR for all the blocks that contain async calls. Keep track + // of liveness and outstanding LIR edges as we go; the LIR edges that cross + // async calls are additional live variables that must be spilled. + jitstd::vector defs(m_comp->getAllocator(CMK_Async)); + + for (int i = 0; i < worklist.Height(); i++) + { + assert(defs.size() == 0); + + BasicBlock* block = worklist.Bottom(i); + liveness.StartBlock(block); + + bool any; + do + { + any = false; + for (GenTree* tree : LIR::AsRange(block)) + { + // Remove all consumed defs; those are no longer 'live' LIR + // edges. + tree->VisitOperands([&defs](GenTree* op) { + if (op->IsValue()) + { + for (size_t i = defs.size(); i > 0; i--) + { + if (op == defs[i - 1]) + { + defs[i - 1] = defs[defs.size() - 1]; + defs.erase(defs.begin() + (defs.size() - 1), defs.end()); + break; + } + } + } + + return GenTree::VisitResult::Continue; + }); + + // Update liveness to reflect state after this node. + liveness.Update(tree); + + if (tree->IsCall() && tree->AsCall()->IsAsync() && !tree->AsCall()->IsTailCall()) + { + // Transform call; continue with the remainder block + Transform(block, tree->AsCall(), defs, liveness, &block); + defs.clear(); + any = true; + break; + } + + // Push a new definition if necessary; this defined value is + // now a live LIR edge. + if (tree->IsValue() && !tree->IsUnusedValue()) + { + defs.push_back(tree); + } + } + } while (any); + } + + // After transforming all async calls we have created resumption blocks; + // create the resumption switch. + CreateResumptionSwitch(); + + m_comp->fgInvalidateDfsTree(); + + return PhaseStatus::MODIFIED_EVERYTHING; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::Transform: +// Transform a single async call in the specified block. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// defs - Current live LIR edges +// life - Liveness information about live locals +// remainder - [out] Remainder block after the transformation +// +void AsyncTransformation::Transform( + BasicBlock* block, GenTreeCall* call, jitstd::vector& defs, AsyncLiveness& life, BasicBlock** remainder) +{ +#ifdef DEBUG + if (m_comp->verbose) + { + printf("Processing call [%06u] in " FMT_BB "\n", Compiler::dspTreeID(call), block->bbNum); + printf(" %zu live LIR edges\n", defs.size()); + + if (defs.size() > 0) + { + const char* sep = " "; + for (GenTree* tree : defs) + { + printf("%s[%06u] (%s)", sep, Compiler::dspTreeID(tree), varTypeName(tree->TypeGet())); + sep = ", "; + } + + printf("\n"); + } + } +#endif + + m_liveLocalsScratch.clear(); + jitstd::vector& liveLocals = m_liveLocalsScratch; + + CreateLiveSetForSuspension(block, call, defs, life, liveLocals); + + ContinuationLayout layout = LayOutContinuation(block, call, liveLocals); + + CallDefinitionInfo callDefInfo = CanonicalizeCallDefinition(block, call, life); + + unsigned stateNum = (unsigned)m_resumptionBBs.size(); + JITDUMP(" Assigned state %u\n", stateNum); + + BasicBlock* suspendBB = CreateSuspension(block, stateNum, life, layout); + + CreateCheckAndSuspendAfterCall(block, callDefInfo, life, suspendBB, remainder); + + BasicBlock* resumeBB = CreateResumption(block, *remainder, call, callDefInfo, stateNum, layout); + + m_resumptionBBs.push_back(resumeBB); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateLiveSetForSuspension: +// Create the set of live state to be captured for suspension, for the +// specified call. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// defs - Current live LIR edges +// life - Liveness information about live locals +// liveLocals - Information about each live local. +// +void AsyncTransformation::CreateLiveSetForSuspension(BasicBlock* block, + GenTreeCall* call, + const jitstd::vector& defs, + AsyncLiveness& life, + jitstd::vector& liveLocals) +{ + unsigned fullyDefinedRetBufLcl = BAD_VAR_NUM; + CallArg* retbufArg = call->gtArgs.GetRetBufferArg(); + if (retbufArg != nullptr) + { + GenTree* retbuf = retbufArg->GetNode(); + if (retbuf->IsLclVarAddr()) + { + LclVarDsc* dsc = m_comp->lvaGetDesc(retbuf->AsLclVarCommon()); + ClassLayout* defLayout = m_comp->typGetObjLayout(call->gtRetClsHnd); + if (defLayout->GetSize() == dsc->lvExactSize()) + { + // This call fully defines this retbuf. There is no need to + // consider it live across the call since it is going to be + // overridden anyway. + fullyDefinedRetBufLcl = retbuf->AsLclVarCommon()->GetLclNum(); + JITDUMP(" V%02u is a fully defined retbuf and will not be considered live\n", fullyDefinedRetBufLcl); + } + } + } + + life.GetLiveLocals(liveLocals, fullyDefinedRetBufLcl); + LiftLIREdges(block, defs, liveLocals); + +#ifdef DEBUG + if (m_comp->verbose) + { + printf(" %zu live locals\n", liveLocals.size()); + + if (liveLocals.size() > 0) + { + const char* sep = " "; + for (LiveLocalInfo& inf : liveLocals) + { + printf("%sV%02u (%s)", sep, inf.LclNum, varTypeName(m_comp->lvaGetDesc(inf.LclNum)->TypeGet())); + sep = ", "; + } + + printf("\n"); + } + } +#endif +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LiftLIREdges: +// Create locals capturing outstanding LIR edges and add information +// indicating that these locals are live. +// +// Parameters: +// block - The block containing the definitions of the LIR edges +// defs - Current outstanding LIR edges +// liveLocals - [out] Vector to add new live local information into +// +void AsyncTransformation::LiftLIREdges(BasicBlock* block, + const jitstd::vector& defs, + jitstd::vector& liveLocals) +{ + if (defs.size() <= 0) + { + return; + } + + for (GenTree* tree : defs) + { + // TODO-CQ: Enable this. It currently breaks our recognition of how the + // call is stored. + // if (tree->OperIs(GT_LCL_VAR)) + //{ + // LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + // if (!dsc->IsAddressExposed()) + // { + // // No interference by IR invariants. + // LIR::AsRange(block).Remove(tree); + // LIR::AsRange(block).InsertAfter(beyond, tree); + // continue; + // } + //} + + LIR::Use use; + bool gotUse = LIR::AsRange(block).TryGetUse(tree, &use); + assert(gotUse); // Defs list should not contain unused values. + + unsigned newLclNum = use.ReplaceWithLclVar(m_comp); + liveLocals.push_back(LiveLocalInfo(newLclNum)); + GenTree* newUse = use.Def(); + LIR::AsRange(block).Remove(newUse); + LIR::AsRange(block).InsertBefore(use.User(), newUse); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LayOutContinuation: +// Create the layout of the GC pointer and data arrays in the continuation +// object. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// liveLocals - [in, out] Information about each live local. Size/alignment +// information is read and offset/index information is written. +// +// Returns: +// Layout information. +// +ContinuationLayout AsyncTransformation::LayOutContinuation(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& liveLocals) +{ + ContinuationLayout layout(liveLocals); + + for (LiveLocalInfo& inf : liveLocals) + { + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + if (dsc->TypeIs(TYP_STRUCT) || dsc->IsImplicitByRef()) + { + ClassLayout* layout = dsc->GetLayout(); + assert(!layout->HasGCByRef()); + + if (layout->IsCustomLayout()) + { + inf.Alignment = 1; + inf.DataSize = layout->GetSize(); + inf.GCDataCount = layout->GetGCPtrCount(); + } + else + { + inf.Alignment = m_comp->info.compCompHnd->getClassAlignmentRequirement(layout->GetClassHandle()); + if ((layout->GetGCPtrCount() * TARGET_POINTER_SIZE) == layout->GetSize()) + { + inf.DataSize = 0; + } + else + { + inf.DataSize = layout->GetSize(); + } + + inf.GCDataCount = layout->GetGCPtrCount(); + } + } + else if (dsc->TypeIs(TYP_REF)) + { + inf.Alignment = TARGET_POINTER_SIZE; + inf.DataSize = 0; + inf.GCDataCount = 1; + } + else + { + assert(!dsc->TypeIs(TYP_BYREF)); + + inf.Alignment = genTypeAlignments[dsc->TypeGet()]; + inf.DataSize = genTypeSize(dsc); + inf.GCDataCount = 0; + } + } + + jitstd::sort(liveLocals.begin(), liveLocals.end(), [](const LiveLocalInfo& lhs, const LiveLocalInfo& rhs) { + if (lhs.Alignment == rhs.Alignment) + { + // Prefer lowest local num first for same alignment. + return lhs.LclNum < rhs.LclNum; + } + + // Otherwise prefer highest alignment first. + return lhs.Alignment > rhs.Alignment; + }); + + // For OSR, we store the IL offset that inspired the OSR method at the + // beginning of the data (-1 in the tier0 version): + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + { + JITDUMP(" Method %s; keeping IL offset that inspired OSR method at the beginning of non-GC data\n", + m_comp->doesMethodHavePatchpoints() ? "has patchpoints" : "is an OSR method"); + layout.DataSize += sizeof(int); + } + + if (call->gtReturnType == TYP_STRUCT) + { + layout.ReturnStructLayout = m_comp->typGetObjLayout(call->gtRetClsHnd); + layout.ReturnSize = layout.ReturnStructLayout->GetSize(); + layout.ReturnInGCData = layout.ReturnStructLayout->HasGCPtr(); + } + else + { + layout.ReturnSize = genTypeSize(call->gtReturnType); + layout.ReturnInGCData = varTypeIsGC(call->gtReturnType); + } + + assert((layout.ReturnSize > 0) == (call->gtReturnType != TYP_VOID)); + + // The return value is always stored: + // 1. At index 0 in GCData if it is a TYP_REF or a struct with GC references + // 2. At index 0 in Data, for non OSR methods without GC ref returns + // 3. At index 4 in Data for OSR methods without GC ref returns. The + // continuation flags indicates this scenario with a flag. + if (layout.ReturnInGCData) + { + layout.GCRefsCount++; + } + else if (layout.ReturnSize > 0) + { + layout.ReturnValDataOffset = layout.DataSize; + layout.DataSize += layout.ReturnSize; + } + +#ifdef DEBUG + if (layout.ReturnSize > 0) + { + JITDUMP(" Will store return of type %s, size %u in", + call->gtReturnType == TYP_STRUCT ? layout.ReturnStructLayout->GetClassName() + : varTypeName(call->gtReturnType), + layout.ReturnSize); + + if (layout.ReturnInGCData) + { + JITDUMP(" GC data\n"); + } + else + { + JITDUMP(" non-GC data at offset %u\n", layout.ReturnValDataOffset); + } + } +#endif + + if (block->hasTryIndex()) + { + layout.ExceptionGCDataIndex = layout.GCRefsCount++; + JITDUMP(" " FMT_BB " is in try region %u; exception will be at GC@+%02u in GC data\n", block->bbNum, + block->getTryIndex(), layout.ExceptionGCDataIndex); + } + + for (LiveLocalInfo& inf : liveLocals) + { + layout.DataSize = roundUp(layout.DataSize, inf.Alignment); + + inf.DataOffset = layout.DataSize; + inf.GCDataIndex = layout.GCRefsCount; + + layout.DataSize += inf.DataSize; + layout.GCRefsCount += inf.GCDataCount; + } + +#ifdef DEBUG + if (m_comp->verbose) + { + printf(" Continuation layout (%u bytes, %u GC pointers):\n", layout.DataSize, layout.GCRefsCount); + for (LiveLocalInfo& inf : liveLocals) + { + printf(" +%03u (GC@+%02u) V%02u: %u bytes, %u GC pointers\n", inf.DataOffset, inf.GCDataIndex, + inf.LclNum, inf.DataSize, inf.GCDataCount); + } + } +#endif + + return layout; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CanonicalizeCallDefinition: +// Put the call definition in a canonical form. This ensures that either the +// value is defined by a LCL_ADDR retbuffer or by a +// STORE_LCL_VAR/STORE_LCL_FLD that follows the call node. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// life - Liveness information about live locals +// +// Returns: +// Information about the definition after canonicalization. +// +CallDefinitionInfo AsyncTransformation::CanonicalizeCallDefinition(BasicBlock* block, + GenTreeCall* call, + AsyncLiveness& life) +{ + CallDefinitionInfo callDefInfo; + + callDefInfo.InsertAfter = call; + + CallArg* retbufArg = call->gtArgs.GetRetBufferArg(); + + if (!call->TypeIs(TYP_VOID) && !call->IsUnusedValue()) + { + assert(retbufArg == nullptr); + assert(call->gtNext != nullptr); + if (!call->gtNext->OperIsLocalStore() || (call->gtNext->Data() != call)) + { + LIR::Use use; + bool gotUse = LIR::AsRange(block).TryGetUse(call, &use); + assert(gotUse); + + use.ReplaceWithLclVar(m_comp); + } + else + { + // We will split after the store, but we still have to update liveness for it. + life.Update(call->gtNext); + } + + assert(call->gtNext->OperIsLocalStore() && (call->gtNext->Data() == call)); + callDefInfo.DefinitionNode = call->gtNext->AsLclVarCommon(); + callDefInfo.InsertAfter = call->gtNext; + } + + if (retbufArg != nullptr) + { + assert(call->TypeIs(TYP_VOID)); + + // For async methods we always expect retbufs to point to locals. We + // ensure this in impStoreStruct. + noway_assert(retbufArg->GetNode()->OperIs(GT_LCL_ADDR)); + + callDefInfo.DefinitionNode = retbufArg->GetNode()->AsLclVarCommon(); + } + + return callDefInfo; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateSuspension: +// Create the basic block that when branched to suspends execution after the +// specified async call. +// +// Parameters: +// block - The block containing the async call +// stateNum - State number assigned to this suspension point +// life - Liveness information about live locals +// layout - Layout information for the continuation object +// +// Returns: +// The new basic block that was created. +// +BasicBlock* AsyncTransformation::CreateSuspension(BasicBlock* block, + unsigned stateNum, + AsyncLiveness& life, + const ContinuationLayout& layout) +{ + if (m_lastSuspensionBB == nullptr) + { + m_lastSuspensionBB = m_comp->fgLastBBInMainFunction(); + } + + BasicBlock* suspendBB = m_comp->fgNewBBafter(BBJ_RETURN, m_lastSuspensionBB, false); + suspendBB->clearTryIndex(); + suspendBB->clearHndIndex(); + suspendBB->inheritWeightPercentage(block, 0); + m_lastSuspensionBB = suspendBB; + + if (m_sharedReturnBB != nullptr) + { + suspendBB->SetKindAndTargetEdge(BBJ_ALWAYS, m_comp->fgAddRefPred(m_sharedReturnBB, suspendBB)); + } + + JITDUMP(" Creating suspension " FMT_BB " for state %u\n", suspendBB->bbNum, stateNum); + + // Allocate continuation + GenTree* returnedContinuation = m_comp->gtNewLclvNode(m_returnedContinuationVar, TYP_REF); + + GenTreeCall* allocContinuation = + CreateAllocContinuationCall(life, returnedContinuation, layout.GCRefsCount, layout.DataSize); + + m_comp->compCurBB = suspendBB; + m_comp->fgMorphTree(allocContinuation); + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, allocContinuation)); + + GenTree* storeNewContinuation = m_comp->gtNewStoreLclVarNode(m_newContinuationVar, allocContinuation); + LIR::AsRange(suspendBB).InsertAtEnd(storeNewContinuation); + + // Fill in 'Resume' + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned resumeOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationResumeFldHnd); + GenTree* resumeStubAddr = CreateResumptionStubAddrTree(); + GenTree* storeResume = StoreAtOffset(newContinuation, resumeOffset, resumeStubAddr, TYP_I_IMPL); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResume)); + + // Fill in 'state' + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateNumNode = m_comp->gtNewIconNode((ssize_t)stateNum, TYP_INT); + GenTree* storeState = StoreAtOffset(newContinuation, stateOffset, stateNumNode, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeState)); + + // Fill in 'flags' + unsigned continuationFlags = 0; + if (layout.ReturnInGCData) + continuationFlags |= CORINFO_CONTINUATION_RESULT_IN_GCDATA; + if (block->hasTryIndex()) + continuationFlags |= CORINFO_CONTINUATION_NEEDS_EXCEPTION; + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + continuationFlags |= CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA; + + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned flagsOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationFlagsFldHnd); + GenTree* flagsNode = m_comp->gtNewIconNode((ssize_t)continuationFlags, TYP_INT); + GenTree* storeFlags = StoreAtOffset(newContinuation, flagsOffset, flagsNode, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeFlags)); + + if (layout.GCRefsCount > 0) + { + FillInGCPointersOnSuspension(layout.Locals, suspendBB); + } + + if (layout.DataSize > 0) + { + FillInDataOnSuspension(layout.Locals, suspendBB); + } + + if (suspendBB->KindIs(BBJ_RETURN)) + { + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + GenTree* ret = m_comp->gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, newContinuation); + LIR::AsRange(suspendBB).InsertAtEnd(newContinuation, ret); + } + + return suspendBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateAllocContinuationCall: +// Create a call to the JIT helper that allocates a continuation. +// +// Parameters: +// life - Liveness information about live locals +// prevContinuation - IR node that has the value of the previous continuation object +// gcRefsCount - Number of GC refs to allocate in the continuation object +// dataSize - Number of bytes to allocate in the continuation object +// +// Returns: +// IR node representing the allocation. +// +GenTreeCall* AsyncTransformation::CreateAllocContinuationCall(AsyncLiveness& life, + GenTree* prevContinuation, + unsigned gcRefsCount, + unsigned dataSize) +{ + GenTree* gcRefsCountNode = m_comp->gtNewIconNode((ssize_t)gcRefsCount, TYP_I_IMPL); + GenTree* dataSizeNode = m_comp->gtNewIconNode((ssize_t)dataSize, TYP_I_IMPL); + // If VM requests that we report the method handle, or if we have a shared generic context method handle + // that is live here, then we need to call a different helper to keep the loader alive. + GenTree* methodHandleArg = nullptr; + GenTree* classHandleArg = nullptr; + if (((m_comp->info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_METHODDESC) != 0) && + life.IsLive(m_comp->info.compTypeCtxtArg)) + { + methodHandleArg = m_comp->gtNewLclvNode(m_comp->info.compTypeCtxtArg, TYP_I_IMPL); + } + else if (((m_comp->info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_METHODTABLE) != 0) && + life.IsLive(m_comp->info.compTypeCtxtArg)) + { + classHandleArg = m_comp->gtNewLclvNode(m_comp->info.compTypeCtxtArg, TYP_I_IMPL); + } + else if (m_asyncInfo.continuationsNeedMethodHandle) + { + methodHandleArg = m_comp->gtNewIconEmbMethHndNode(m_comp->info.compMethodHnd); + } + + if (methodHandleArg != nullptr) + { + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_METHOD, TYP_REF, prevContinuation, + gcRefsCountNode, dataSizeNode, methodHandleArg); + } + + if (classHandleArg != nullptr) + { + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_CLASS, TYP_REF, prevContinuation, + gcRefsCountNode, dataSizeNode, classHandleArg); + } + + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION, TYP_REF, prevContinuation, gcRefsCountNode, + dataSizeNode); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::FillInGCPointersOnSuspension: +// Create IR that fills the GC pointers of the continuation object. +// This also nulls out the GC pointers in the locals if the local has data +// parts that need to be stored. +// +// Parameters: +// liveLocals - Information about each live local. +// suspendBB - Basic block to add IR to. +// +void AsyncTransformation::FillInGCPointersOnSuspension(const jitstd::vector& liveLocals, + BasicBlock* suspendBB) +{ + unsigned objectArrLclNum = GetGCDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned gcDataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationGCDataFldHnd); + GenTree* gcDataInd = LoadFromOffset(newContinuation, gcDataOffset, TYP_REF); + GenTree* storeAllocedObjectArr = m_comp->gtNewStoreLclVarNode(objectArrLclNum, gcDataInd); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedObjectArr)); + + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.GCDataCount <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + if (dsc->TypeIs(TYP_REF)) + { + GenTree* value = m_comp->gtNewLclvNode(inf.LclNum, TYP_REF); + GenTree* objectArr = m_comp->gtNewLclvNode(objectArrLclNum, TYP_REF); + GenTree* store = + StoreAtOffset(objectArr, OFFSETOF__CORINFO_Array__data + (inf.GCDataIndex * TARGET_POINTER_SIZE), value, + TYP_REF); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + else + { + assert(dsc->TypeIs(TYP_STRUCT) || dsc->IsImplicitByRef()); + ClassLayout* layout = dsc->GetLayout(); + unsigned numSlots = layout->GetSlotCount(); + unsigned gcRefIndex = 0; + for (unsigned i = 0; i < numSlots; i++) + { + var_types gcPtrType = layout->GetGCPtrType(i); + assert((gcPtrType == TYP_I_IMPL) || (gcPtrType == TYP_REF)); + if (gcPtrType != TYP_REF) + { + continue; + } + + GenTree* value; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + value = LoadFromOffset(baseAddr, i * TARGET_POINTER_SIZE, TYP_REF); + } + else + { + value = m_comp->gtNewLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE); + } + + GenTree* objectArr = m_comp->gtNewLclvNode(objectArrLclNum, TYP_REF); + unsigned offset = + OFFSETOF__CORINFO_Array__data + ((inf.GCDataIndex + gcRefIndex) * TARGET_POINTER_SIZE); + GenTree* store = StoreAtOffset(objectArr, offset, value, TYP_REF); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + gcRefIndex++; + + if (inf.DataSize > 0) + { + // Null out the GC field in preparation of storing the rest. + GenTree* null = m_comp->gtNewNull(); + + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = StoreAtOffset(baseAddr, i * TARGET_POINTER_SIZE, null, TYP_REF); + } + else + { + store = m_comp->gtNewStoreLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE, null); + } + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + } + + if (!dsc->IsImplicitByRef()) + { + m_comp->lvaSetVarDoNotEnregister(inf.LclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + } + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::FillInDataOnSuspension: +// Create IR that fills the data array of the continuation object. +// +// Parameters: +// liveLocals - Information about each live local. +// suspendBB - Basic block to add IR to. +// +void AsyncTransformation::FillInDataOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB) +{ + unsigned byteArrLclNum = GetDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned dataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataInd = LoadFromOffset(newContinuation, dataOffset, TYP_REF); + GenTree* storeAllocedByteArr = m_comp->gtNewStoreLclVarNode(byteArrLclNum, dataInd); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedByteArr)); + + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + { + GenTree* ilOffsetToStore; + if (m_comp->doesMethodHavePatchpoints()) + ilOffsetToStore = m_comp->gtNewIconNode(-1); + else + ilOffsetToStore = m_comp->gtNewIconNode((int)m_comp->info.compILEntry); + + GenTree* byteArr = m_comp->gtNewLclvNode(byteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data; + GenTree* storePatchpointOffset = StoreAtOffset(byteArr, offset, ilOffsetToStore, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storePatchpointOffset)); + } + + // Fill in data + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.DataSize <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + GenTree* byteArr = m_comp->gtNewLclvNode(byteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + inf.DataOffset; + + GenTree* value; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + value = m_comp->gtNewLoadValueNode(dsc->GetLayout(), baseAddr, GTF_IND_NONFAULTING); + } + else + { + value = m_comp->gtNewLclVarNode(inf.LclNum); + } + + GenTree* store; + if (dsc->TypeIs(TYP_STRUCT) || dsc->IsImplicitByRef()) + { + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, byteArr, cns); + // This is to heap, but all GC refs are nulled out already, so we can skip the write barrier. + // TODO-CQ: Backend does not care about GTF_IND_TGT_NOT_HEAP for STORE_BLK. + store = + m_comp->gtNewStoreValueNode(dsc->GetLayout(), addr, value, GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP); + } + else + { + store = StoreAtOffset(byteArr, offset, value, dsc->TypeGet()); + } + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateCheckAndSuspendAfterCall: +// Split the block containing the specified async call, and create the IR +// that checks whether suspension should be done after an async call. +// +// Parameters: +// block - The block containing the async call +// callDefInfo - Information about the async call's definition +// life - Liveness information about live locals +// suspendBB - Basic block to add IR to +// remainder - [out] The remainder block containing the IR that was after the async call. +// +void AsyncTransformation::CreateCheckAndSuspendAfterCall(BasicBlock* block, + const CallDefinitionInfo& callDefInfo, + AsyncLiveness& life, + BasicBlock* suspendBB, + BasicBlock** remainder) +{ + GenTree* continuationArg = new (m_comp, GT_ASYNC_CONTINUATION) GenTree(GT_ASYNC_CONTINUATION, TYP_REF); + continuationArg->SetHasOrderingSideEffect(); + + GenTree* storeContinuation = m_comp->gtNewStoreLclVarNode(m_returnedContinuationVar, continuationArg); + LIR::AsRange(block).InsertAfter(callDefInfo.InsertAfter, continuationArg, storeContinuation); + + GenTree* null = m_comp->gtNewNull(); + GenTree* returnedContinuation = m_comp->gtNewLclvNode(m_returnedContinuationVar, TYP_REF); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, returnedContinuation, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + + LIR::AsRange(block).InsertAfter(storeContinuation, null, returnedContinuation, neNull, jtrue); + *remainder = m_comp->fgSplitBlockAfterNode(block, jtrue); + JITDUMP(" Remainder is " FMT_BB "\n", (*remainder)->bbNum); + + FlowEdge* retBBEdge = m_comp->fgAddRefPred(suspendBB, block); + block->SetCond(retBBEdge, block->GetTargetEdge()); + + block->GetTrueEdge()->setLikelihood(0); + block->GetFalseEdge()->setLikelihood(1); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumption: +// Create the basic block that when branched to resumes execution on entry to +// the function. +// +// Parameters: +// block - The block containing the async call +// remainder - The block that contains the IR after the (split) async call +// call - The async call +// callDefInfo - Information about the async call's definition +// stateNum - State number assigned to this suspension point +// layout - Layout information for the continuation object +// +// Returns: +// The new basic block that was created. +// +BasicBlock* AsyncTransformation::CreateResumption(BasicBlock* block, + BasicBlock* remainder, + GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned stateNum, + const ContinuationLayout& layout) +{ + if (m_lastResumptionBB == nullptr) + { + m_lastResumptionBB = m_comp->fgLastBBInMainFunction(); + } + + BasicBlock* resumeBB = m_comp->fgNewBBafter(BBJ_ALWAYS, m_lastResumptionBB, true); + FlowEdge* remainderEdge = m_comp->fgAddRefPred(remainder, resumeBB); + + // It does not really make sense to inherit from the target, but given this + // is always 0% this just propagates the profile weight flag + sets + // BBF_RUN_RARELY. + resumeBB->inheritWeightPercentage(remainder, 0); + resumeBB->SetTargetEdge(remainderEdge); + resumeBB->clearTryIndex(); + resumeBB->clearHndIndex(); + resumeBB->SetFlags(BBF_ASYNC_RESUMPTION); + m_lastResumptionBB = resumeBB; + + JITDUMP(" Creating resumption " FMT_BB " for state %u\n", resumeBB->bbNum, stateNum); + + // We need to restore data before we restore GC pointers, since restoring + // the data may also write the GC pointer fields with nulls. + unsigned resumeByteArrLclNum = BAD_VAR_NUM; + if (layout.DataSize > 0) + { + resumeByteArrLclNum = GetDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned dataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataInd = LoadFromOffset(newContinuation, dataOffset, TYP_REF); + GenTree* storeAllocedByteArr = m_comp->gtNewStoreLclVarNode(resumeByteArrLclNum, dataInd); + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedByteArr)); + + RestoreFromDataOnResumption(resumeByteArrLclNum, layout.Locals, resumeBB); + } + + unsigned resumeObjectArrLclNum = BAD_VAR_NUM; + BasicBlock* storeResultBB = resumeBB; + + if (layout.GCRefsCount > 0) + { + resumeObjectArrLclNum = GetGCDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned gcDataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationGCDataFldHnd); + GenTree* gcDataInd = LoadFromOffset(newContinuation, gcDataOffset, TYP_REF); + GenTree* storeAllocedObjectArr = m_comp->gtNewStoreLclVarNode(resumeObjectArrLclNum, gcDataInd); + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedObjectArr)); + + RestoreFromGCPointersOnResumption(resumeObjectArrLclNum, layout.Locals, resumeBB); + + if (layout.ExceptionGCDataIndex != UINT_MAX) + { + storeResultBB = RethrowExceptionOnResumption(block, remainder, resumeObjectArrLclNum, layout, resumeBB); + } + } + + // Copy call return value. + if ((layout.ReturnSize > 0) && (callDefInfo.DefinitionNode != nullptr)) + { + CopyReturnValueOnResumption(call, callDefInfo, resumeByteArrLclNum, resumeObjectArrLclNum, layout, + storeResultBB); + } + + return resumeBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RestoreFromDataOnResumption: +// Create IR that restores locals from the data array of the continuation +// object. +// +// Parameters: +// resumeByteArrLclNum - Local that has the continuation object's data array +// liveLocals - Information about each live local. +// resumeBB - Basic block to append IR to +// +void AsyncTransformation::RestoreFromDataOnResumption(unsigned resumeByteArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB) +{ + // Copy data + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.DataSize <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + GenTree* byteArr = m_comp->gtNewLclvNode(resumeByteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + inf.DataOffset; + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, byteArr, cns); + + GenTree* value; + if (dsc->TypeIs(TYP_STRUCT) || dsc->IsImplicitByRef()) + { + value = m_comp->gtNewLoadValueNode(dsc->GetLayout(), addr, GTF_IND_NONFAULTING); + } + else + { + value = m_comp->gtNewIndir(dsc->TypeGet(), addr, GTF_IND_NONFAULTING); + } + + GenTree* store; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = m_comp->gtNewStoreValueNode(dsc->GetLayout(), baseAddr, value, + GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP); + } + else + { + store = m_comp->gtNewStoreLclVarNode(inf.LclNum, value); + } + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RestoreFromGCPointersOnResumption: +// Create IR that restores locals from the GC pointers array of the +// continuation object. +// +// Parameters: +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// liveLocals - Information about each live local. +// resumeBB - Basic block to append IR to +// +void AsyncTransformation::RestoreFromGCPointersOnResumption(unsigned resumeObjectArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB) +{ + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.GCDataCount <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + if (dsc->TypeIs(TYP_REF)) + { + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + (inf.GCDataIndex * TARGET_POINTER_SIZE); + GenTree* value = LoadFromOffset(objectArr, offset, TYP_REF); + GenTree* store = m_comp->gtNewStoreLclVarNode(inf.LclNum, value); + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + else + { + assert(dsc->TypeIs(TYP_STRUCT) || dsc->IsImplicitByRef()); + ClassLayout* layout = dsc->GetLayout(); + unsigned numSlots = layout->GetSlotCount(); + unsigned gcRefIndex = 0; + for (unsigned i = 0; i < numSlots; i++) + { + var_types gcPtrType = layout->GetGCPtrType(i); + assert((gcPtrType == TYP_I_IMPL) || (gcPtrType == TYP_REF)); + if (gcPtrType != TYP_REF) + { + continue; + } + + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned offset = + OFFSETOF__CORINFO_Array__data + ((inf.GCDataIndex + gcRefIndex) * TARGET_POINTER_SIZE); + GenTree* value = LoadFromOffset(objectArr, offset, TYP_REF); + GenTree* store; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = StoreAtOffset(baseAddr, i * TARGET_POINTER_SIZE, value, TYP_REF); + // Implicit byref args are never on heap + store->gtFlags |= GTF_IND_TGT_NOT_HEAP; + } + else + { + store = m_comp->gtNewStoreLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE, value); + } + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + gcRefIndex++; + } + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RethrowExceptionOnResumption: +// Create IR that checks for an exception and rethrows it at the original +// suspension point if necessary. +// +// Parameters: +// block - The block containing the async call +// remainder - The block that contains the IR after the (split) async call +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// layout - Layout information for the continuation object +// resumeBB - Basic block to append IR to +// +// Returns: +// The new non-exception successor basic block for resumption. This is the +// basic block where execution will continue if there was no exception to +// rethrow. +// +BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* block, + BasicBlock* remainder, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* resumeBB) +{ + JITDUMP(" We need to rethrow an exception\n"); + + BasicBlock* rethrowExceptionBB = + m_comp->fgNewBBinRegion(BBJ_THROW, block, /* runRarely */ true, /* insertAtEnd */ true); + JITDUMP(" Created " FMT_BB " to rethrow exception on resumption\n", rethrowExceptionBB->bbNum); + + BasicBlock* storeResultBB = m_comp->fgNewBBafter(BBJ_ALWAYS, resumeBB, true); + JITDUMP(" Created " FMT_BB " to store result when resuming with no exception\n", storeResultBB->bbNum); + + FlowEdge* rethrowEdge = m_comp->fgAddRefPred(rethrowExceptionBB, resumeBB); + FlowEdge* storeResultEdge = m_comp->fgAddRefPred(storeResultBB, resumeBB); + + assert(resumeBB->KindIs(BBJ_ALWAYS)); + m_comp->fgRemoveRefPred(resumeBB->GetTargetEdge()); + + resumeBB->SetCond(rethrowEdge, storeResultEdge); + rethrowEdge->setLikelihood(0); + storeResultEdge->setLikelihood(1); + rethrowExceptionBB->inheritWeightPercentage(resumeBB, 0); + storeResultBB->inheritWeightPercentage(resumeBB, 100); + JITDUMP(" Resumption " FMT_BB " becomes BBJ_COND to check for non-null exception\n", resumeBB->bbNum); + + FlowEdge* remainderEdge = m_comp->fgAddRefPred(remainder, storeResultBB); + storeResultBB->SetTargetEdge(remainderEdge); + + m_lastResumptionBB = storeResultBB; + + // Check if we have an exception. + unsigned exceptionLclNum = GetExceptionVar(); + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned exceptionOffset = OFFSETOF__CORINFO_Array__data + layout.ExceptionGCDataIndex * TARGET_POINTER_SIZE; + GenTree* exceptionInd = LoadFromOffset(objectArr, exceptionOffset, TYP_REF); + GenTree* storeException = m_comp->gtNewStoreLclVarNode(exceptionLclNum, exceptionInd); + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeException)); + + GenTree* exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); + GenTree* null = m_comp->gtNewNull(); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, exception, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + LIR::AsRange(resumeBB).InsertAtEnd(exception, null, neNull, jtrue); + + exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); + + GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); + + m_comp->compCurBB = rethrowExceptionBB; + m_comp->fgMorphTree(rethrowException); + + LIR::AsRange(rethrowExceptionBB).InsertAtEnd(LIR::SeqTree(m_comp, rethrowException)); + + storeResultBB->SetFlags(BBF_ASYNC_RESUMPTION); + JITDUMP(" Added " FMT_BB " to rethrow exception at suspension point\n", rethrowExceptionBB->bbNum); + + return storeResultBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CopyReturnValueOnResumption: +// Create IR that copies the return value from the continuation object to the +// right local. +// +// Parameters: +// call - The async call +// callDefInfo - Information about the async call's definition +// block - The block containing the async call +// resumeByteArrLclNum - Local that has the continuation object's data array +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// layout - Layout information for the continuation object +// storeResultBB - Basic block to append IR to +// +void AsyncTransformation::CopyReturnValueOnResumption(GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned resumeByteArrLclNum, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* storeResultBB) +{ + GenTree* resultBase; + unsigned resultOffset; + GenTreeFlags resultIndirFlags = GTF_IND_NONFAULTING; + if (layout.ReturnInGCData) + { + assert(resumeObjectArrLclNum != BAD_VAR_NUM); + resultBase = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + + if (call->gtReturnType == TYP_STRUCT) + { + // Boxed struct. + resultBase = LoadFromOffset(resultBase, OFFSETOF__CORINFO_Array__data, TYP_REF); + resultOffset = TARGET_POINTER_SIZE; // Offset of data inside box + } + else + { + assert(call->gtReturnType == TYP_REF); + resultOffset = OFFSETOF__CORINFO_Array__data; + } + } + else + { + assert(resumeByteArrLclNum != BAD_VAR_NUM); + resultBase = m_comp->gtNewLclvNode(resumeByteArrLclNum, TYP_REF); + resultOffset = OFFSETOF__CORINFO_Array__data + layout.ReturnValDataOffset; + if (layout.ReturnValDataOffset != 0) + resultIndirFlags = GTF_IND_UNALIGNED; + } + + assert(callDefInfo.DefinitionNode != nullptr); + LclVarDsc* resultLcl = m_comp->lvaGetDesc(callDefInfo.DefinitionNode); + + // TODO-TP: We can use liveness to avoid generating a lot of this IR. + if (call->gtReturnType == TYP_STRUCT) + { + if (m_comp->lvaGetPromotionType(resultLcl) != Compiler::PROMOTION_TYPE_INDEPENDENT) + { + GenTree* resultOffsetNode = m_comp->gtNewIconNode((ssize_t)resultOffset, TYP_I_IMPL); + GenTree* resultAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, resultBase, resultOffsetNode); + GenTree* resultData = m_comp->gtNewLoadValueNode(layout.ReturnStructLayout, resultAddr, resultIndirFlags); + GenTree* storeResult; + if ((callDefInfo.DefinitionNode->GetLclOffs() == 0) && + ClassLayout::AreCompatible(resultLcl->GetLayout(), layout.ReturnStructLayout)) + { + storeResult = m_comp->gtNewStoreLclVarNode(callDefInfo.DefinitionNode->GetLclNum(), resultData); + } + else + { + storeResult = m_comp->gtNewStoreLclFldNode(callDefInfo.DefinitionNode->GetLclNum(), TYP_STRUCT, + layout.ReturnStructLayout, + callDefInfo.DefinitionNode->GetLclOffs(), resultData); + } + + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResult)); + } + else + { + assert(!call->gtArgs.HasRetBuffer()); // Locals defined through retbufs are never independently promoted. + + if ((resultLcl->lvFieldCnt > 1) && !resultBase->OperIsLocal()) + { + unsigned resultBaseVar = GetResultBaseVar(); + GenTree* storeResultBase = m_comp->gtNewStoreLclVarNode(resultBaseVar, resultBase); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResultBase)); + + resultBase = m_comp->gtNewLclVarNode(resultBaseVar, TYP_REF); + } + + assert(callDefInfo.DefinitionNode->OperIs(GT_STORE_LCL_VAR)); + for (unsigned i = 0; i < resultLcl->lvFieldCnt; i++) + { + unsigned fieldLclNum = resultLcl->lvFieldLclStart + i; + LclVarDsc* fieldDsc = m_comp->lvaGetDesc(fieldLclNum); + + unsigned fldOffset = resultOffset + fieldDsc->lvFldOffset; + GenTree* value = LoadFromOffset(resultBase, fldOffset, fieldDsc->TypeGet(), resultIndirFlags); + GenTree* store = m_comp->gtNewStoreLclVarNode(fieldLclNum, value); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + if (i + 1 != resultLcl->lvFieldCnt) + { + resultBase = m_comp->gtCloneExpr(resultBase); + } + } + } + } + else + { + GenTree* value = LoadFromOffset(resultBase, resultOffset, call->gtReturnType, resultIndirFlags); + + GenTree* storeResult; + if (callDefInfo.DefinitionNode->OperIs(GT_STORE_LCL_VAR)) + { + storeResult = m_comp->gtNewStoreLclVarNode(callDefInfo.DefinitionNode->GetLclNum(), value); + } + else + { + storeResult = m_comp->gtNewStoreLclFldNode(callDefInfo.DefinitionNode->GetLclNum(), + callDefInfo.DefinitionNode->TypeGet(), + callDefInfo.DefinitionNode->GetLclOffs(), value); + } + + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResult)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LoadFromOffset: +// Create a load. +// +// Parameters: +// base - Base address of the load +// offset - Offset to add on top of the base address +// type - Type of the load to create +// indirFlags - Flags to add to the load +// +// Returns: +// IR node of the load. +// +GenTreeIndir* AsyncTransformation::LoadFromOffset(GenTree* base, + unsigned offset, + var_types type, + GenTreeFlags indirFlags) +{ + assert(base->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL)); + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + var_types addrType = base->TypeIs(TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, addrType, base, cns); + GenTreeIndir* load = m_comp->gtNewIndir(type, addr, indirFlags); + return load; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::StoreAtOffset: +// Create a store. +// +// Parameters: +// base - Base address of the store +// offset - Offset to add on top of the base address +// value - Value to store +// storeType - Type of store +// +// Returns: +// IR node of the store. +// +GenTreeStoreInd* AsyncTransformation::StoreAtOffset(GenTree* base, unsigned offset, GenTree* value, var_types storeType) +{ + assert(base->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL)); + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + var_types addrType = base->TypeIs(TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, addrType, base, cns); + GenTreeStoreInd* store = m_comp->gtNewStoreIndNode(storeType, addr, value, GTF_IND_NONFAULTING); + return store; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetDataArrayVar: +// Create a new local to hold the data array of the continuation object. This +// local can be validly used for the entire suspension point; the returned +// local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetDataArrayVar() +{ + // Create separate locals unless we have many locals in the method for live + // range splitting purposes. This helps LSRA to avoid create additional + // callee saves that harm the prolog/epilog. + if ((m_dataArrayVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_dataArrayVar = m_comp->lvaGrabTemp(false DEBUGARG("byte[] for continuation")); + m_comp->lvaGetDesc(m_dataArrayVar)->lvType = TYP_REF; + } + + return m_dataArrayVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetGCDataArrayVar: +// Create a new local to hold the GC pointers array of the continuation +// object. This local can be validly used for the entire suspension point; +// the returned local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetGCDataArrayVar() +{ + if ((m_gcDataArrayVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_gcDataArrayVar = m_comp->lvaGrabTemp(false DEBUGARG("object[] for continuation")); + m_comp->lvaGetDesc(m_gcDataArrayVar)->lvType = TYP_REF; + } + + return m_gcDataArrayVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetResultBaseVar: +// Create a new local to hold the base address of the incoming result from +// the continuation. This local can be validly used for the entire suspension +// point; the returned local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetResultBaseVar() +{ + if ((m_resultBaseVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_resultBaseVar = m_comp->lvaGrabTemp(false DEBUGARG("object for resuming result base")); + m_comp->lvaGetDesc(m_resultBaseVar)->lvType = TYP_REF; + } + + return m_resultBaseVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetExceptionVar: +// Create a new local to hold the exception in the continuation. This +// local can be validly used for the entire suspension point; the returned +// local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetExceptionVar() +{ + if ((m_exceptionVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_exceptionVar = m_comp->lvaGrabTemp(false DEBUGARG("object for resuming exception")); + m_comp->lvaGetDesc(m_exceptionVar)->lvType = TYP_REF; + } + + return m_exceptionVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumptionStubAddrTree: +// Create a tree that represents the address of the resumption stub entry +// point. +// +// Returns: +// IR node. +// +GenTree* AsyncTransformation::CreateResumptionStubAddrTree() +{ + switch (m_resumeStubLookup.accessType) + { + case IAT_VALUE: + { + return CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + } + case IAT_PVALUE: + { + GenTree* tree = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + tree = m_comp->gtNewIndir(TYP_I_IMPL, tree, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + return tree; + } + case IAT_PPVALUE: + { + noway_assert(!"Unexpected IAT_PPVALUE"); + return nullptr; + } + case IAT_RELPVALUE: + { + GenTree* addr = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + GenTree* tree = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + tree = m_comp->gtNewIndir(TYP_I_IMPL, tree, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + tree = m_comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, tree, addr); + return tree; + } + default: + { + noway_assert(!"Bad accessType"); + return nullptr; + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateFunctionTargetAddr: +// Create a tree that represents the address of the resumption stub entry +// point. +// +// Returns: +// IR node. +// +GenTree* AsyncTransformation::CreateFunctionTargetAddr(CORINFO_METHOD_HANDLE methHnd, + const CORINFO_CONST_LOOKUP& lookup) +{ + GenTree* con = m_comp->gtNewIconHandleNode((size_t)lookup.addr, GTF_ICON_FTN_ADDR); + INDEBUG(con->AsIntCon()->gtTargetHandle = (size_t)methHnd); + return con; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumptionSwitch: +// Create the IR for the entry of the function that checks the continuation +// and dispatches on its state number. +// +void AsyncTransformation::CreateResumptionSwitch() +{ + m_comp->fgCreateNewInitBB(); + BasicBlock* newEntryBB = m_comp->fgFirstBB; + + GenTree* continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + GenTree* null = m_comp->gtNewNull(); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, continuationArg, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + LIR::AsRange(newEntryBB).InsertAtEnd(continuationArg, null, neNull, jtrue); + + FlowEdge* resumingEdge; + + if (m_resumptionBBs.size() == 1) + { + JITDUMP(" Redirecting entry " FMT_BB " directly to " FMT_BB " as it is the only resumption block\n", + newEntryBB->bbNum, m_resumptionBBs[0]->bbNum); + resumingEdge = m_comp->fgAddRefPred(m_resumptionBBs[0], newEntryBB); + } + else if (m_resumptionBBs.size() == 2) + { + BasicBlock* condBB = m_comp->fgNewBBbefore(BBJ_COND, m_resumptionBBs[0], true); + condBB->inheritWeightPercentage(newEntryBB, 0); + + FlowEdge* to0 = m_comp->fgAddRefPred(m_resumptionBBs[0], condBB); + FlowEdge* to1 = m_comp->fgAddRefPred(m_resumptionBBs[1], condBB); + condBB->SetCond(to1, to0); + to1->setLikelihood(0.5); + to0->setLikelihood(0.5); + + resumingEdge = m_comp->fgAddRefPred(condBB, newEntryBB); + + JITDUMP(" Redirecting entry " FMT_BB " to BBJ_COND " FMT_BB " for resumption with 2 states\n", + newEntryBB->bbNum, condBB->bbNum); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateOffsetNode = m_comp->gtNewIconNode((ssize_t)stateOffset, TYP_I_IMPL); + GenTree* stateAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, continuationArg, stateOffsetNode); + GenTree* stateInd = m_comp->gtNewIndir(TYP_INT, stateAddr, GTF_IND_NONFAULTING); + GenTree* zero = m_comp->gtNewZeroConNode(TYP_INT); + GenTree* stateNeZero = m_comp->gtNewOperNode(GT_NE, TYP_INT, stateInd, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, stateNeZero); + + LIR::AsRange(condBB).InsertAtEnd(continuationArg, stateOffsetNode, stateAddr, stateInd, zero, stateNeZero, + jtrue); + } + else + { + BasicBlock* switchBB = m_comp->fgNewBBbefore(BBJ_SWITCH, m_resumptionBBs[0], true); + switchBB->inheritWeightPercentage(newEntryBB, 0); + + resumingEdge = m_comp->fgAddRefPred(switchBB, newEntryBB); + + JITDUMP(" Redirecting entry " FMT_BB " to BBJ_SWITCH " FMT_BB " for resumption with %zu states\n", + newEntryBB->bbNum, switchBB->bbNum, m_resumptionBBs.size()); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateOffsetNode = m_comp->gtNewIconNode((ssize_t)stateOffset, TYP_I_IMPL); + GenTree* stateAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, continuationArg, stateOffsetNode); + GenTree* stateInd = m_comp->gtNewIndir(TYP_INT, stateAddr, GTF_IND_NONFAULTING); + GenTree* switchNode = m_comp->gtNewOperNode(GT_SWITCH, TYP_VOID, stateInd); + + LIR::AsRange(switchBB).InsertAtEnd(continuationArg, stateOffsetNode, stateAddr, stateInd, switchNode); + + m_comp->fgHasSwitch = true; + + // Default case. TODO-CQ: Support bbsHasDefault = false before lowering. + m_resumptionBBs.push_back(m_resumptionBBs[0]); + BBswtDesc* swtDesc = new (m_comp, CMK_BasicBlock) BBswtDesc; + swtDesc->bbsCount = (unsigned)m_resumptionBBs.size(); + swtDesc->bbsHasDefault = true; + swtDesc->bbsDstTab = new (m_comp, CMK_Async) FlowEdge*[m_resumptionBBs.size()]; + + weight_t stateLikelihood = 1.0 / m_resumptionBBs.size(); + for (size_t i = 0; i < m_resumptionBBs.size(); i++) + { + swtDesc->bbsDstTab[i] = m_comp->fgAddRefPred(m_resumptionBBs[i], switchBB); + swtDesc->bbsDstTab[i]->setLikelihood(stateLikelihood); + } + + switchBB->SetSwitch(swtDesc); + } + + newEntryBB->SetCond(resumingEdge, newEntryBB->GetTargetEdge()); + resumingEdge->setLikelihood(0); + newEntryBB->GetFalseEdge()->setLikelihood(1); + + if (m_comp->doesMethodHavePatchpoints()) + { + JITDUMP(" Method has patch points...\n"); + // If we have patchpoints then first check if we need to resume in the OSR version. + BasicBlock* callHelperBB = m_comp->fgNewBBafter(BBJ_THROW, m_comp->fgLastBBInMainFunction(), false); + callHelperBB->bbSetRunRarely(); + callHelperBB->clearTryIndex(); + callHelperBB->clearHndIndex(); + + JITDUMP(" Created " FMT_BB " for transitions back into OSR method\n", callHelperBB->bbNum); + + BasicBlock* onContinuationBB = newEntryBB->GetTrueTarget(); + BasicBlock* checkILOffsetBB = m_comp->fgNewBBbefore(BBJ_COND, onContinuationBB, true); + + JITDUMP(" Created " FMT_BB " to check whether we should transition immediately to OSR\n", + checkILOffsetBB->bbNum); + + // Redirect newEntryBB -> onContinuationBB into newEntryBB -> checkILOffsetBB -> onContinuationBB + m_comp->fgRemoveRefPred(newEntryBB->GetTrueEdge()); + + FlowEdge* toCheckILOffsetBB = m_comp->fgAddRefPred(checkILOffsetBB, newEntryBB); + newEntryBB->SetTrueEdge(toCheckILOffsetBB); + toCheckILOffsetBB->setLikelihood(0); + checkILOffsetBB->inheritWeightPercentage(newEntryBB, 0); + + FlowEdge* toOnContinuationBB = m_comp->fgAddRefPred(onContinuationBB, checkILOffsetBB); + FlowEdge* toCallHelperBB = m_comp->fgAddRefPred(callHelperBB, checkILOffsetBB); + checkILOffsetBB->SetCond(toCallHelperBB, toOnContinuationBB); + toCallHelperBB->setLikelihood(0); + toOnContinuationBB->setLikelihood(1); + callHelperBB->inheritWeightPercentage(checkILOffsetBB, 0); + + // We need to dispatch to the OSR version if the IL offset is non-negative. + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned offsetOfData = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataArr = LoadFromOffset(continuationArg, offsetOfData, TYP_REF); + unsigned offsetOfIlOffset = OFFSETOF__CORINFO_Array__data; + GenTree* ilOffset = LoadFromOffset(dataArr, offsetOfIlOffset, TYP_INT); + unsigned ilOffsetLclNum = m_comp->lvaGrabTemp(false DEBUGARG("IL offset for tier0 OSR method")); + m_comp->lvaGetDesc(ilOffsetLclNum)->lvType = TYP_INT; + GenTree* storeIlOffset = m_comp->gtNewStoreLclVarNode(ilOffsetLclNum, ilOffset); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(LIR::SeqTree(m_comp, storeIlOffset)); + + ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); + GenTree* zero = m_comp->gtNewIconNode(0); + GenTree* geZero = m_comp->gtNewOperNode(GT_GE, TYP_INT, ilOffset, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, geZero); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(ilOffset, zero, geZero, jtrue); + + ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); + + GenTreeCall* callHelper = m_comp->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffset); + callHelper->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; + + m_comp->compCurBB = callHelperBB; + m_comp->fgMorphTree(callHelper); + + LIR::AsRange(callHelperBB).InsertAtEnd(LIR::SeqTree(m_comp, callHelper)); + } + else if (m_comp->opts.IsOSR()) + { + JITDUMP(" Method is an OSR function\n"); + // If the tier-0 version resumed and then transitioned to the OSR + // version by normal means then we will see a non-zero continuation + // here that belongs to the tier0 method. In that case we should just + // ignore it, so create a BB that jumps back. + BasicBlock* onContinuationBB = newEntryBB->GetTrueTarget(); + BasicBlock* onNoContinuationBB = newEntryBB->GetFalseTarget(); + BasicBlock* checkILOffsetBB = m_comp->fgNewBBbefore(BBJ_COND, onContinuationBB, true); + + // Switch newEntryBB -> onContinuationBB into newEntryBB -> checkILOffsetBB + m_comp->fgRemoveRefPred(newEntryBB->GetTrueEdge()); + FlowEdge* toCheckILOffset = m_comp->fgAddRefPred(checkILOffsetBB, newEntryBB); + newEntryBB->SetTrueEdge(toCheckILOffset); + toCheckILOffset->setLikelihood(0); + checkILOffsetBB->inheritWeightPercentage(newEntryBB, 0); + + // Make checkILOffsetBB ->(true) onNoContinuationBB + // ->(false) onContinuationBB + + FlowEdge* toOnContinuationBB = m_comp->fgAddRefPred(onContinuationBB, checkILOffsetBB); + FlowEdge* toOnNoContinuationBB = m_comp->fgAddRefPred(onNoContinuationBB, checkILOffsetBB); + checkILOffsetBB->SetCond(toOnNoContinuationBB, toOnContinuationBB); + toOnContinuationBB->setLikelihood(0); + toOnNoContinuationBB->setLikelihood(1); + + JITDUMP(" Created " FMT_BB " to check for Tier-0 continuations\n", checkILOffsetBB->bbNum); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned offsetOfData = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataArr = LoadFromOffset(continuationArg, offsetOfData, TYP_REF); + unsigned offsetOfIlOffset = OFFSETOF__CORINFO_Array__data; + GenTree* ilOffset = LoadFromOffset(dataArr, offsetOfIlOffset, TYP_INT); + GenTree* zero = m_comp->gtNewIconNode(0); + GenTree* ltZero = m_comp->gtNewOperNode(GT_LT, TYP_INT, ilOffset, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, ltZero); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(LIR::SeqTree(m_comp, jtrue)); + } +} diff --git a/src/coreclr/jit/async.h b/src/coreclr/jit/async.h new file mode 100644 index 000000000000..63e1db0a636e --- /dev/null +++ b/src/coreclr/jit/async.h @@ -0,0 +1,152 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +struct LiveLocalInfo +{ + unsigned LclNum; + unsigned Alignment; + unsigned DataOffset; + unsigned DataSize; + unsigned GCDataIndex; + unsigned GCDataCount; + + explicit LiveLocalInfo(unsigned lclNum) + : LclNum(lclNum) + { + } +}; + +struct ContinuationLayout +{ + unsigned DataSize = 0; + unsigned GCRefsCount = 0; + ClassLayout* ReturnStructLayout = nullptr; + unsigned ReturnSize = 0; + bool ReturnInGCData = false; + unsigned ReturnValDataOffset = UINT_MAX; + unsigned ExceptionGCDataIndex = UINT_MAX; + const jitstd::vector& Locals; + + explicit ContinuationLayout(const jitstd::vector& locals) + : Locals(locals) + { + } +}; + +struct CallDefinitionInfo +{ + GenTreeLclVarCommon* DefinitionNode = nullptr; + + // Where to insert new IR for suspension checks. + GenTree* InsertAfter = nullptr; +}; + +class AsyncTransformation +{ + friend class AsyncLiveness; + + Compiler* m_comp; + jitstd::vector m_liveLocalsScratch; + CORINFO_ASYNC_INFO m_asyncInfo; + jitstd::vector m_resumptionBBs; + CORINFO_METHOD_HANDLE m_resumeStub = NO_METHOD_HANDLE; + CORINFO_CONST_LOOKUP m_resumeStubLookup; + unsigned m_returnedContinuationVar = BAD_VAR_NUM; + unsigned m_newContinuationVar = BAD_VAR_NUM; + unsigned m_dataArrayVar = BAD_VAR_NUM; + unsigned m_gcDataArrayVar = BAD_VAR_NUM; + unsigned m_resultBaseVar = BAD_VAR_NUM; + unsigned m_exceptionVar = BAD_VAR_NUM; + BasicBlock* m_lastSuspensionBB = nullptr; + BasicBlock* m_lastResumptionBB = nullptr; + BasicBlock* m_sharedReturnBB = nullptr; + + bool IsLive(unsigned lclNum); + void Transform(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& defs, + class AsyncLiveness& life, + BasicBlock** remainder); + + void CreateLiveSetForSuspension(BasicBlock* block, + GenTreeCall* call, + const jitstd::vector& defs, + AsyncLiveness& life, + jitstd::vector& liveLocals); + + void LiftLIREdges(BasicBlock* block, + const jitstd::vector& defs, + jitstd::vector& liveLocals); + + ContinuationLayout LayOutContinuation(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& liveLocals); + + CallDefinitionInfo CanonicalizeCallDefinition(BasicBlock* block, GenTreeCall* call, AsyncLiveness& life); + + BasicBlock* CreateSuspension(BasicBlock* block, + unsigned stateNum, + AsyncLiveness& life, + const ContinuationLayout& layout); + GenTreeCall* CreateAllocContinuationCall(AsyncLiveness& life, + GenTree* prevContinuation, + unsigned gcRefsCount, + unsigned int dataSize); + void FillInGCPointersOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB); + void FillInDataOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB); + void CreateCheckAndSuspendAfterCall(BasicBlock* block, + const CallDefinitionInfo& callDefInfo, + AsyncLiveness& life, + BasicBlock* suspendBB, + BasicBlock** remainder); + + BasicBlock* CreateResumption(BasicBlock* block, + BasicBlock* remainder, + GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned stateNum, + const ContinuationLayout& layout); + void RestoreFromDataOnResumption(unsigned resumeByteArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB); + void RestoreFromGCPointersOnResumption(unsigned resumeObjectArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB); + BasicBlock* RethrowExceptionOnResumption(BasicBlock* block, + BasicBlock* remainder, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* resumeBB); + void CopyReturnValueOnResumption(GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned resumeByteArrLclNum, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* storeResultBB); + + GenTreeIndir* LoadFromOffset(GenTree* base, + unsigned offset, + var_types type, + GenTreeFlags indirFlags = GTF_IND_NONFAULTING); + GenTreeStoreInd* StoreAtOffset(GenTree* base, unsigned offset, GenTree* value, var_types storeType); + + unsigned GetDataArrayVar(); + unsigned GetGCDataArrayVar(); + unsigned GetResultBaseVar(); + unsigned GetExceptionVar(); + + GenTree* CreateResumptionStubAddrTree(); + GenTree* CreateFunctionTargetAddr(CORINFO_METHOD_HANDLE methHnd, const CORINFO_CONST_LOOKUP& lookup); + + void CreateResumptionSwitch(); + +public: + AsyncTransformation(Compiler* comp) + : m_comp(comp) + , m_liveLocalsScratch(comp->getAllocator(CMK_Async)) + , m_resumptionBBs(comp->getAllocator(CMK_Async)) + { + } + + PhaseStatus Run(); +}; diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index eb4d5666fcf4..b4cf9f508451 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -497,13 +497,11 @@ void BasicBlock::dspFlags() const {BBF_DONT_REMOVE, "keep"}, {BBF_INTERNAL, "internal"}, {BBF_HAS_SUPPRESSGC_CALL, "sup-gc"}, - {BBF_LOOP_HEAD, "loophead"}, {BBF_HAS_LABEL, "label"}, {BBF_HAS_JMP, "jmp"}, {BBF_HAS_CALL, "hascall"}, {BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY, "xentry"}, {BBF_GC_SAFE_POINT, "gcsafe"}, - {BBF_FUNCLET_BEG, "flet"}, {BBF_HAS_IDX_LEN, "idxlen"}, {BBF_HAS_MD_IDX_LEN, "mdidxlen"}, {BBF_HAS_NEWOBJ, "newobj"}, @@ -528,6 +526,7 @@ void BasicBlock::dspFlags() const {BBF_HAS_ALIGN, "has-align"}, {BBF_HAS_MDARRAYREF, "mdarr"}, {BBF_NEEDS_GCPOLL, "gcpoll"}, + {BBF_ASYNC_RESUMPTION, "resume"}, }; bool first = true; @@ -1045,7 +1044,7 @@ bool BasicBlock::isEmpty() const { for (GenTree* node : LIR::AsRange(this)) { - if (node->OperGet() != GT_IL_OFFSET) + if (!node->OperIs(GT_IL_OFFSET)) { return false; } @@ -1416,7 +1415,7 @@ bool BasicBlock::endsWithJmpMethod(Compiler* comp) const { GenTree* lastNode = this->lastNode(); assert(lastNode != nullptr); - return lastNode->OperGet() == GT_JMP; + return lastNode->OperIs(GT_JMP); } return false; @@ -1432,8 +1431,8 @@ bool BasicBlock::endsWithJmpMethod(Compiler* comp) const // bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /*=false*/) const { - GenTree* tailCall = nullptr; - bool tailCallsConvertibleToLoopOnly = false; + GenTreeCall* tailCall = nullptr; + bool tailCallsConvertibleToLoopOnly = false; return endsWithJmpMethod(comp) || endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, &tailCall); } @@ -1454,10 +1453,10 @@ bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /* // Notes: // At most one of fastTailCallsOnly and tailCallsConvertibleToLoopOnly flags can be true. // -bool BasicBlock::endsWithTailCall(Compiler* comp, - bool fastTailCallsOnly, - bool tailCallsConvertibleToLoopOnly, - GenTree** tailCall) const +bool BasicBlock::endsWithTailCall(Compiler* comp, + bool fastTailCallsOnly, + bool tailCallsConvertibleToLoopOnly, + GenTreeCall** tailCall) const { assert(!fastTailCallsOnly || !tailCallsConvertibleToLoopOnly); *tailCall = nullptr; @@ -1482,7 +1481,7 @@ bool BasicBlock::endsWithTailCall(Compiler* comp, if (result) { GenTree* lastNode = this->lastNode(); - if (lastNode->OperGet() == GT_CALL) + if (lastNode->OperIs(GT_CALL)) { GenTreeCall* call = lastNode->AsCall(); if (tailCallsConvertibleToLoopOnly) @@ -1524,7 +1523,7 @@ bool BasicBlock::endsWithTailCall(Compiler* comp, // Return Value: // true if the block ends with a tail call convertible to loop. // -bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall) const +bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTreeCall** tailCall) const { bool fastTailCallsOnly = false; bool tailCallsConvertibleToLoopOnly = true; @@ -1742,12 +1741,7 @@ bool BasicBlock::isBBCallFinallyPairTail() const // bool BasicBlock::hasEHBoundaryIn() const { - bool returnVal = (bbCatchTyp != BBCT_NONE); - if (!returnVal) - { - assert(!HasFlag(BBF_FUNCLET_BEG)); - } - return returnVal; + return (bbCatchTyp != BBCT_NONE); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index bb746593842a..25df699d387d 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -426,55 +426,54 @@ enum BasicBlockFlags : uint64_t BBF_IMPORTED = MAKE_BBFLAG( 4), // BB byte-code has been imported BBF_INTERNAL = MAKE_BBFLAG( 5), // BB has been added by the compiler BBF_NEEDS_GCPOLL = MAKE_BBFLAG( 6), // BB may need a GC poll because it uses the slow tail call helper - BBF_FUNCLET_BEG = MAKE_BBFLAG( 7), // BB is the beginning of a funclet - BBF_CLONED_FINALLY_BEGIN = MAKE_BBFLAG( 8), // First block of a cloned finally region - BBF_CLONED_FINALLY_END = MAKE_BBFLAG( 9), // Last block of a cloned finally region - BBF_HAS_NULLCHECK = MAKE_BBFLAG(10), // BB contains a null check - BBF_HAS_SUPPRESSGC_CALL = MAKE_BBFLAG(11), // BB contains a call to a method with SuppressGCTransitionAttribute - BBF_RUN_RARELY = MAKE_BBFLAG(12), // BB is rarely run (catch clauses, blocks with throws etc) - BBF_LOOP_HEAD = MAKE_BBFLAG(13), // BB is the head of a loop (can reach a predecessor) - BBF_HAS_LABEL = MAKE_BBFLAG(14), // BB needs a label - BBF_LOOP_ALIGN = MAKE_BBFLAG(15), // Block is lexically the first block in a loop we intend to align. - BBF_HAS_ALIGN = MAKE_BBFLAG(16), // BB ends with 'align' instruction - BBF_HAS_JMP = MAKE_BBFLAG(17), // BB executes a JMP instruction (instead of return) - BBF_GC_SAFE_POINT = MAKE_BBFLAG(18), // BB has a GC safe point (e.g. a call) - BBF_HAS_IDX_LEN = MAKE_BBFLAG(19), // BB contains simple index or length expressions on an SD array local var. - BBF_HAS_MD_IDX_LEN = MAKE_BBFLAG(20), // BB contains simple index, length, or lower bound expressions on an MD array local var. - BBF_HAS_MDARRAYREF = MAKE_BBFLAG(21), // Block has a multi-dimensional array reference - BBF_HAS_NEWOBJ = MAKE_BBFLAG(22), // BB contains 'new' of an object type. - - BBF_RETLESS_CALL = MAKE_BBFLAG(23), // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired + BBF_CLONED_FINALLY_BEGIN = MAKE_BBFLAG( 7), // First block of a cloned finally region + BBF_CLONED_FINALLY_END = MAKE_BBFLAG( 8), // Last block of a cloned finally region + BBF_HAS_NULLCHECK = MAKE_BBFLAG( 9), // BB contains a null check + BBF_HAS_SUPPRESSGC_CALL = MAKE_BBFLAG(10), // BB contains a call to a method with SuppressGCTransitionAttribute + BBF_RUN_RARELY = MAKE_BBFLAG(11), // BB is rarely run (catch clauses, blocks with throws etc) + BBF_HAS_LABEL = MAKE_BBFLAG(12), // BB needs a label + BBF_LOOP_ALIGN = MAKE_BBFLAG(13), // Block is lexically the first block in a loop we intend to align. + BBF_HAS_ALIGN = MAKE_BBFLAG(14), // BB ends with 'align' instruction + BBF_HAS_JMP = MAKE_BBFLAG(15), // BB executes a JMP instruction (instead of return) + BBF_GC_SAFE_POINT = MAKE_BBFLAG(16), // BB has a GC safe point (e.g. a call) + BBF_HAS_IDX_LEN = MAKE_BBFLAG(17), // BB contains simple index or length expressions on an SD array local var. + BBF_HAS_MD_IDX_LEN = MAKE_BBFLAG(18), // BB contains simple index, length, or lower bound expressions on an MD array local var. + BBF_HAS_MDARRAYREF = MAKE_BBFLAG(19), // Block has a multi-dimensional array reference + BBF_HAS_NEWOBJ = MAKE_BBFLAG(20), // BB contains 'new' of an object type. + + BBF_RETLESS_CALL = MAKE_BBFLAG(21), // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired // BBJ_CALLFINALLYRET); see isBBCallFinallyPair(). - BBF_COLD = MAKE_BBFLAG(24), // BB is cold - BBF_PROF_WEIGHT = MAKE_BBFLAG(25), // BB weight is computed from profile data - BBF_KEEP_BBJ_ALWAYS = MAKE_BBFLAG(26), // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind + BBF_COLD = MAKE_BBFLAG(22), // BB is cold + BBF_PROF_WEIGHT = MAKE_BBFLAG(23), // BB weight is computed from profile data + BBF_KEEP_BBJ_ALWAYS = MAKE_BBFLAG(24), // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind // as BBJ_ALWAYS. Used on x86 for the final step block out of a finally. - BBF_HAS_CALL = MAKE_BBFLAG(27), // BB contains a call - BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY = MAKE_BBFLAG(28), // Block is dominated by exceptional entry. - BBF_BACKWARD_JUMP = MAKE_BBFLAG(29), // BB is surrounded by a backward jump/switch arc - BBF_BACKWARD_JUMP_SOURCE = MAKE_BBFLAG(30), // Block is a source of a backward jump - BBF_BACKWARD_JUMP_TARGET = MAKE_BBFLAG(31), // Block is a target of a backward jump - BBF_PATCHPOINT = MAKE_BBFLAG(32), // Block is a patchpoint - BBF_PARTIAL_COMPILATION_PATCHPOINT = MAKE_BBFLAG(33), // Block is a partial compilation patchpoint - BBF_HAS_HISTOGRAM_PROFILE = MAKE_BBFLAG(34), // BB contains a call needing a histogram profile - BBF_TAILCALL_SUCCESSOR = MAKE_BBFLAG(35), // BB has pred that has potential tail call - BBF_RECURSIVE_TAILCALL = MAKE_BBFLAG(36), // Block has recursive tailcall that may turn into a loop - BBF_NO_CSE_IN = MAKE_BBFLAG(37), // Block should kill off any incoming CSE - BBF_CAN_ADD_PRED = MAKE_BBFLAG(38), // Ok to add pred edge to this block, even when "safe" edge creation disabled - BBF_HAS_VALUE_PROFILE = MAKE_BBFLAG(39), // Block has a node that needs a value probing - - BBF_HAS_NEWARR = MAKE_BBFLAG(40), // BB contains 'new' of an array type. + BBF_HAS_CALL = MAKE_BBFLAG(25), // BB contains a call + BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY = MAKE_BBFLAG(26), // Block is dominated by exceptional entry. + BBF_BACKWARD_JUMP = MAKE_BBFLAG(27), // BB is surrounded by a backward jump/switch arc + BBF_BACKWARD_JUMP_SOURCE = MAKE_BBFLAG(28), // Block is a source of a backward jump + BBF_BACKWARD_JUMP_TARGET = MAKE_BBFLAG(29), // Block is a target of a backward jump + BBF_PATCHPOINT = MAKE_BBFLAG(30), // Block is a patchpoint + BBF_PARTIAL_COMPILATION_PATCHPOINT = MAKE_BBFLAG(31), // Block is a partial compilation patchpoint + BBF_HAS_HISTOGRAM_PROFILE = MAKE_BBFLAG(32), // BB contains a call needing a histogram profile + BBF_TAILCALL_SUCCESSOR = MAKE_BBFLAG(33), // BB has pred that has potential tail call + BBF_RECURSIVE_TAILCALL = MAKE_BBFLAG(34), // Block has recursive tailcall that may turn into a loop + BBF_NO_CSE_IN = MAKE_BBFLAG(35), // Block should kill off any incoming CSE + BBF_CAN_ADD_PRED = MAKE_BBFLAG(36), // Ok to add pred edge to this block, even when "safe" edge creation disabled + BBF_HAS_VALUE_PROFILE = MAKE_BBFLAG(37), // Block has a node that needs a value probing + BBF_HAS_NEWARR = MAKE_BBFLAG(38), // BB contains 'new' of an array type. + BBF_MAY_HAVE_BOUNDS_CHECKS = MAKE_BBFLAG(39), // BB *likely* has a bounds check (after rangecheck phase). + BBF_ASYNC_RESUMPTION = MAKE_BBFLAG(40), // Block is a resumption block in an async method // The following are sets of flags. // Flags to update when two blocks are compacted BBF_COMPACT_UPD = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_BACKWARD_JUMP | \ - BBF_HAS_NEWOBJ | BBF_HAS_NEWARR | BBF_HAS_NULLCHECK | BBF_HAS_MDARRAYREF | BBF_LOOP_HEAD, + BBF_HAS_NEWOBJ | BBF_HAS_NEWARR | BBF_HAS_NULLCHECK | BBF_HAS_MDARRAYREF | BBF_MAY_HAVE_BOUNDS_CHECKS, // Flags a block should not have had before it is split. - BBF_SPLIT_NONEXIST = BBF_LOOP_HEAD | BBF_RETLESS_CALL | BBF_COLD, + BBF_SPLIT_NONEXIST = BBF_RETLESS_CALL | BBF_COLD, // Flags lost by the top block when a block is split. // Note, this is a conservative guess. @@ -489,14 +488,14 @@ enum BasicBlockFlags : uint64_t // TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ? BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_PROF_WEIGHT | BBF_HAS_NEWARR | \ - BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL, + BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL | BBF_MAY_HAVE_BOUNDS_CHECKS | BBF_ASYNC_RESUMPTION, // Flags that must be propagated to a new block if code is copied from a block to a new block. These are flags that // limit processing of a block if the code in question doesn't exist. This is conservative; we might not // have actually copied one of these type of tree nodes, but if we only copy a portion of the block's statements, // we don't know (unless we actually pay close attention during the copy). - BBF_COPY_PROPAGATE = BBF_HAS_NEWOBJ | BBF_HAS_NEWARR | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_HAS_MDARRAYREF, + BBF_COPY_PROPAGATE = BBF_HAS_NEWOBJ | BBF_HAS_NEWARR | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_HAS_MDARRAYREF | BBF_MAY_HAVE_BOUNDS_CHECKS, }; FORCEINLINE @@ -573,10 +572,6 @@ enum class BasicBlockVisit // The bbPreds list is initially created by Compiler::fgLinkBasicBlocks() // and is incrementally kept up to date. // -// The edge weight are computed by Compiler::fgComputeEdgeWeights() -// the edge weights are used to straighten conditional branches -// by Compiler::fgReorderBlocks() -// struct FlowEdge { private: @@ -1154,10 +1149,6 @@ struct BasicBlock : private LIR::Range { return HasFlag(BBF_RUN_RARELY); } - bool isLoopHead() const - { - return HasFlag(BBF_LOOP_HEAD); - } bool isLoopAlign() const { @@ -1416,6 +1407,16 @@ struct BasicBlock : private LIR::Range m_firstNode = tree; } + GenTree* GetLastLIRNode() const + { + return m_lastNode; + } + + void SetLastLIRNode(GenTree* tree) + { + m_lastNode = tree; + } + EntryState* bbEntryState; // verifier tracked state of all entries in stack. #define NO_BASE_TMP UINT_MAX // base# to use when we have none @@ -1765,14 +1766,14 @@ struct BasicBlock : private LIR::Range bool endsWithJmpMethod(Compiler* comp) const; - bool endsWithTailCall(Compiler* comp, - bool fastTailCallsOnly, - bool tailCallsConvertibleToLoopOnly, - GenTree** tailCall) const; + bool endsWithTailCall(Compiler* comp, + bool fastTailCallsOnly, + bool tailCallsConvertibleToLoopOnly, + GenTreeCall** tailCall) const; bool endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly = false) const; - bool endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall) const; + bool endsWithTailCallConvertibleToLoop(Compiler* comp, GenTreeCall** tailCall) const; // Returns the first statement in the statement list of "this" that is // not an SSA definition (a lcl = phi(...) store). diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis index 54661833ef85..424bd4e3f2dc 100644 --- a/src/coreclr/jit/clrjit.natvis +++ b/src/coreclr/jit/clrjit.natvis @@ -20,7 +20,8 @@ Documentation for VS debugger format specifiers: https://learn.microsoft.com/vis - BB{bbNum,d}->BB{bbTargetEdge->m_destBlock->bbNum,d}; {bbKind,en} + BB{bbNum,d}->BB{bbTargetEdge->m_destBlock->bbNum,d}; {bbKind,en} + BB{bbNum,d}-> (BB{bbTrueEdge->m_destBlock->bbNum,d}(T),BB{bbFalseEdge->m_destBlock->bbNum,d}(F)) ; {bbKind,en} BB{bbNum,d}; {bbKind,en}; {bbSwtTargets->bbsCount} cases BB{bbNum,d}; {bbKind,en}; {bbEhfTargets->bbeCount} succs BB{bbNum,d}; {bbKind,en} @@ -266,6 +267,7 @@ Documentation for VS debugger format specifiers: https://learn.microsoft.com/vis (LcJaggedArrayOptInfo*)this,nd (LcMdArrayOptInfo*)this,nd + (LcSpanOptInfo*)this,nd diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 5d921f6f8b88..baaff55a5ada 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -58,14 +58,14 @@ class CodeGen final : public CodeGenInterface // We use movaps when non-VEX because it is a smaller instruction; // however the VEX version vmovaps would be used which is the same size as vmovdqa; // also vmovdqa has more available CPU ports on older processors so we switch to that - return compiler->canUseVexEncoding() ? INS_movdqa : INS_movaps; + return compiler->canUseVexEncoding() ? INS_movdqa32 : INS_movaps; } instruction simdUnalignedMovIns() { // We use movups when non-VEX because it is a smaller instruction; // however the VEX version vmovups would be used which is the same size as vmovdqu; // but vmovdqu has more available CPU ports on older processors so we switch to that - return compiler->canUseVexEncoding() ? INS_movdqu : INS_movups; + return compiler->canUseVexEncoding() ? INS_movdqu32 : INS_movups; } #endif // defined(TARGET_XARCH) @@ -212,6 +212,8 @@ class CodeGen final : public CodeGenInterface public: void genSpillVar(GenTree* tree); + void genEmitCallWithCurrentGC(EmitCallParams& callParams); + protected: void genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTarget = REG_NA); @@ -425,10 +427,6 @@ class CodeGen final : public CodeGenInterface void genOSRSaveRemainingCalleeSavedRegisters(); #endif // TARGET_AMD64 -#if defined(TARGET_RISCV64) - void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize); -#endif - void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn); void genPoisonFrame(regMaskTP bbRegLiveIn); @@ -463,11 +461,8 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR) - unsigned fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer - unsigned fiSpDelta; // Stack pointer delta - unsigned fiPSP_slot_SP_offset; // PSP slot offset from SP - int fiPSP_slot_CallerSP_offset; // PSP slot offset from Caller SP + regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR) + unsigned fiSpDelta; // Stack pointer delta }; FuncletFrameInfoDsc genFuncletInfo; @@ -479,16 +474,12 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes LR) - int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function - // (negative) - int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive) - int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) - int fiSP_to_CalleeSave_delta; // First callee-saved register slot offset from SP (positive) - int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) - int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. - int fiSpDelta1; // Stack pointer delta 1 (negative) - int fiSpDelta2; // Stack pointer delta 2 (negative) + regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes LR) + int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive) + int fiSP_to_CalleeSave_delta; // First callee-saved register slot offset from SP (positive) + int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. + int fiSpDelta1; // Stack pointer delta 1 (negative) + int fiSpDelta2; // Stack pointer delta 2 (negative) }; FuncletFrameInfoDsc genFuncletInfo; @@ -500,9 +491,7 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - unsigned fiFunction_InitialSP_to_FP_delta; // Delta between Initial-SP and the frame pointer - unsigned fiSpDelta; // Stack pointer delta - int fiPSP_slot_InitialSP_offset; // PSP slot offset from Initial-SP + unsigned fiSpDelta; // Stack pointer delta }; FuncletFrameInfoDsc genFuncletInfo; @@ -515,12 +504,8 @@ class CodeGen final : public CodeGenInterface struct FuncletFrameInfoDsc { regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) - int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function - // (negative) - int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) - int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) - int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) - int fiSpDelta; // Stack pointer delta (negative) + int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) + int fiSpDelta; // Stack pointer delta (negative) }; FuncletFrameInfoDsc genFuncletInfo; @@ -530,8 +515,12 @@ class CodeGen final : public CodeGenInterface #if defined(TARGET_XARCH) // Save/Restore callee saved float regs to stack - void genPreserveCalleeSavedFltRegs(unsigned lclFrameSize); - void genRestoreCalleeSavedFltRegs(unsigned lclFrameSize); + void genPreserveCalleeSavedFltRegs(); + void genRestoreCalleeSavedFltRegs(); + + // Generate vzeroupper instruction to clear AVX state if necessary + void genClearAvxStateInProlog(); + void genClearAvxStateInEpilog(); #endif // TARGET_XARCH @@ -553,32 +542,6 @@ class CodeGen final : public CodeGenInterface void genProfilingLeaveCallback(unsigned helper); #endif // PROFILING_SUPPORTED - // clang-format off - void genEmitCall(int callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) - void* addr - X86_ARG(int argSize), - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - const DebugInfo& di, - regNumber base, - bool isJump, - bool noSafePoint = false); - // clang-format on - - // clang-format off - void genEmitCallIndir(int callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) - GenTreeIndir* indir - X86_ARG(int argSize), - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - const DebugInfo& di, - bool isJump); - // clang-format on - // // Epilog functions // @@ -616,80 +579,6 @@ class CodeGen final : public CodeGenInterface void genFuncletEpilog(); void genCaptureFuncletPrologEpilogInfo(); - /*----------------------------------------------------------------------------- - * - * Set the main function PSPSym value in the frame. - * Funclets use different code to load the PSP sym and save it in their frame. - * See the document "CLR ABI.md" for a full description of the PSPSym. - * The PSPSym section of that document is copied here. - * - *********************************** - * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet - * accesses locals from the main function body. - * - * First, two definitions. - * - * Caller-SP is the value of the stack pointer in a function's caller before the call - * instruction is executed. That is, when function A calls function B, Caller-SP for B - * is the value of the stack pointer immediately before the call instruction in A - * (calling B) was executed. Note that this definition holds for both AMD64, which - * pushes the return value when a call instruction is executed, and for ARM, which - * doesn't. For AMD64, Caller-SP is the address above the call return address. - * - * Initial-SP is the initial value of the stack pointer after the fixed-size portion of - * the frame has been allocated. That is, before any "alloca"-type allocations. - * - * The PSPSym is a pointer-sized local variable in the frame of the main function and - * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP - * for the main function. The stack offset of the PSPSym is reported to the VM in the - * GC information header. The value reported in the GC information is the offset of the - * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the - * value is reported to the VM, differs between architectures. In particular, note that - * most things in the GC information header are reported as offsets relative to Caller-SP, - * but PSPSym on AMD64 is one (maybe the only) exception.) - * - * The VM uses the PSPSym to find other locals it cares about (such as the generics context - * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that - * the frame pointer is the same value in a funclet as it is in the main function body. - * - * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is - * true for all funclets and it is passed as the first argument in RCX, but for ARM this is - * only true for first pass funclets (currently just filters) and it is passed as the second - * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent" - * frame in the exception processing system. For the CLR, it points either to the main function - * frame or a dynamically enclosing funclet frame from the same function, for the funclet being - * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM. - * - * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we - * don't know if the Establisher Frame is from the main function or a funclet, we design the - * main function and funclet frame layouts to place the PSPSym at an identical, small, constant - * offset from the Establisher Frame in each case. (This is also required because we only report - * a single offset to the PSPSym in the GC information, and that offset must be valid for the main - * function and all of its funclets). Then, the funclet uses this known offset to compute the - * PSPSym address and read its value. From this, it can compute the value of the frame pointer - * (which is a constant offset from the PSPSym value) and set the frame register to be the same - * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's - * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular, - * for every nested funclet invocation. - * - * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM - * restores all non-volatile registers to their values within the parent frame. This includes - * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register - * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets. - * - * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument - * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On - * ARM this is the first argument and passed in R0. - * - * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always - * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of - * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym - * is required in all funclets as well as the main function, whereas if the establisher frame was - * correctly reported, the PSPSym could be omitted in some cases.) - *********************************** - */ - void genSetPSPSym(regNumber initReg, bool* pInitRegZeroed); - void genUpdateCurrentFunclet(BasicBlock* block); void genGeneratePrologsAndEpilogs(); @@ -708,6 +597,7 @@ class CodeGen final : public CodeGenInterface void genAmd64EmitterUnitTestsSse2(); void genAmd64EmitterUnitTestsApx(); void genAmd64EmitterUnitTestsAvx10v2(); + void genAmd64EmitterUnitTestsCCMP(); #endif #endif // defined(DEBUG) @@ -868,6 +758,13 @@ class CodeGen final : public CodeGenInterface int scale RISCV64_ARG(regNumber scaleTempReg)); #endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 +#if defined(TARGET_RISCV64) + void genCodeForShxadd(GenTreeOp* tree); + void genCodeForAddUw(GenTreeOp* tree); + void genCodeForSlliUw(GenTreeOp* tree); + instruction getShxaddVariant(int scale, bool useUnsignedVariant); +#endif + #if defined(TARGET_ARMARCH) void genCodeForMulLong(GenTreeOp* mul); #endif // TARGET_ARMARCH @@ -968,16 +865,13 @@ class CodeGen final : public CodeGenInterface void genIntToFloatCast(GenTree* treeNode); void genCkfinite(GenTree* treeNode); void genCodeForCompare(GenTreeOp* tree); -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) void genCodeForCCMP(GenTreeCCMP* ccmp); #endif void genCodeForSelect(GenTreeOp* select); void genIntrinsic(GenTreeIntrinsic* treeNode); void genPutArgStk(GenTreePutArgStk* treeNode); void genPutArgReg(GenTreeOp* tree); -#if FEATURE_ARG_SPLIT - void genPutArgSplit(GenTreePutArgSplit* treeNode); -#endif // FEATURE_ARG_SPLIT #if defined(TARGET_XARCH) unsigned getBaseVarForPutArgStk(GenTree* treeNode); @@ -1043,8 +937,6 @@ class CodeGen final : public CodeGenInterface void genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); - void genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); - void genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genSSE42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); @@ -1058,6 +950,8 @@ class CodeGen final : public CodeGenInterface template void genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsic, + instruction ins, + emitAttr attr, regNumber nonConstImmReg, regNumber baseReg, regNumber offsReg, @@ -1194,16 +1088,10 @@ class CodeGen final : public CodeGenInterface void genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg); void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); -#ifdef FEATURE_PUT_STRUCT_ARG_STK void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); -#endif // FEATURE_PUT_STRUCT_ARG_STK -#if FEATURE_ARG_SPLIT - void genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode); -#endif // FEATURE_ARG_SPLIT - void genConsumeRegs(GenTree* tree); void genConsumeOperands(GenTreeOp* tree); #if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) @@ -1241,6 +1129,7 @@ class CodeGen final : public CodeGenInterface #ifdef SWIFT_SUPPORT void genCodeForSwiftErrorReg(GenTree* tree); #endif // SWIFT_SUPPORT + void genCodeForAsyncContinuation(GenTree* tree); void genCodeForNullCheck(GenTreeIndir* tree); void genCodeForCmpXchg(GenTreeCmpXchg* tree); void genCodeForReuseVal(GenTree* treeNode); @@ -1287,7 +1176,6 @@ class CodeGen final : public CodeGenInterface void genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArgVarNum); #endif // !TARGET_X86 -#ifdef FEATURE_PUT_STRUCT_ARG_STK #ifdef TARGET_X86 bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk); void genPushReg(var_types type, regNumber srcReg); @@ -1309,7 +1197,6 @@ class CodeGen final : public CodeGenInterface #else void genStructPutArgPartialRepMovs(GenTreePutArgStk* putArgStkNode); #endif -#endif // FEATURE_PUT_STRUCT_ARG_STK void genCodeForStoreBlk(GenTreeBlk* storeBlkNode); void genCodeForInitBlkLoop(GenTreeBlk* initBlkNode); @@ -1358,7 +1245,7 @@ class CodeGen final : public CodeGenInterface // Codegen for multi-register struct returns. bool isStructReturn(GenTree* treeNode); #ifdef FEATURE_SIMD - void genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc); + void genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc); #endif void genStructReturn(GenTree* treeNode); @@ -1375,6 +1262,8 @@ class CodeGen final : public CodeGenInterface #endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 void genReturn(GenTree* treeNode); + void genReturnSuspend(GenTreeUnOp* treeNode); + void genMarkReturnGCInfo(); #ifdef SWIFT_SUPPORT void genSwiftErrorReturn(GenTree* treeNode); @@ -1403,14 +1292,12 @@ class CodeGen final : public CodeGenInterface return compiler->lvaGetDesc(tree->AsLclVarCommon())->lvIsRegCandidate(); } -#ifdef FEATURE_PUT_STRUCT_ARG_STK #ifdef TARGET_X86 bool m_pushStkArg; #else // !TARGET_X86 unsigned m_stkArgVarNum; unsigned m_stkArgOffset; #endif // !TARGET_X86 -#endif // !FEATURE_PUT_STRUCT_ARG_STK #if defined(DEBUG) && defined(TARGET_XARCH) void genStackPointerCheck(bool doStackPointerCheck, @@ -1647,7 +1534,7 @@ class CodeGen final : public CodeGenInterface } }; - OperandDesc genOperandDesc(GenTree* op); + OperandDesc genOperandDesc(instruction ins, GenTree* op); void inst_TT(instruction ins, emitAttr size, GenTree* op1); void inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTree* op2); @@ -1713,53 +1600,13 @@ class CodeGen final : public CodeGenInterface static insOpts ShiftOpToInsOpts(genTreeOps op); #elif defined(TARGET_XARCH) static instruction JumpKindToCmov(emitJumpKind condition); + static instruction JumpKindToCcmp(emitJumpKind condition); + static insOpts OptsFromCFlags(insCflags flags); #endif - -#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - // Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions - // such as X86's SETcc. A sequence of instructions rather than just a single one is required for - // certain floating point conditions. - // For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF, - // to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0 - // and then jump if ZF is 1: - // JP fallThroughBlock - // JE jumpDestBlock - // fallThroughBlock: - // ... - // jumpDestBlock: - // - // This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so - // in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like - // pattern is used to encode the above: - // { EJ_jnp, GT_AND, EJ_je } - // { EJ_jp, GT_OR, EJ_jne } - // - // For more details check inst_JCC and inst_SETCC functions. - // - struct GenConditionDesc - { - emitJumpKind jumpKind1; - genTreeOps oper; - emitJumpKind jumpKind2; - char padTo4Bytes; - - static const GenConditionDesc& Get(GenCondition condition) - { - assert(condition.GetCode() < ArrLen(map)); - const GenConditionDesc& desc = map[condition.GetCode()]; - assert(desc.jumpKind1 != EJ_NONE); - assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR)); - assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE)); - return desc; - } - - private: - static const GenConditionDesc map[32]; - }; - void inst_JCC(GenCondition condition, BasicBlock* target); void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg); +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) void genCodeForJcc(GenTreeCC* tree); void genCodeForSetcc(GenTreeCC* setcc); void genCodeForJTrue(GenTreeOp* jtrue); diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 26975c0130ab..7e9df15ff4fe 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -379,7 +379,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) // void CodeGen::genLclHeap(GenTree* tree) { - assert(tree->OperGet() == GT_LCLHEAP); + assert(tree->OperIs(GT_LCLHEAP)); assert(compiler->compLocallocUsed); GenTree* size = tree->AsOp()->gtOp1; @@ -799,7 +799,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) regNumber srcReg = REG_NA; assert(source->isContained()); - if (source->gtOper == GT_IND) + if (source->OperIs(GT_IND)) { GenTree* srcAddr = source->gtGetOp1(); assert(!srcAddr->isContained()); @@ -908,7 +908,7 @@ void CodeGen::genCodeForShiftLong(GenTree* tree) assert(oper == GT_LSH_HI || oper == GT_RSH_LO); GenTree* operand = tree->AsOp()->gtOp1; - assert(operand->OperGet() == GT_LONG); + assert(operand->OperIs(GT_LONG)); assert(operand->AsOp()->gtOp1->isUsedFromReg()); assert(operand->AsOp()->gtOp2->isUsedFromReg()); @@ -1200,7 +1200,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) // void CodeGen::genCkfinite(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_CKFINITE); + assert(treeNode->OperIs(GT_CKFINITE)); emitter* emit = GetEmitter(); var_types targetType = treeNode->TypeGet(); @@ -1307,7 +1307,7 @@ void CodeGen::genCodeForJTrue(GenTreeOp* jtrue) // void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) { - assert(tree->OperGet() == GT_RETURNTRAP); + assert(tree->OperIs(GT_RETURNTRAP)); // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC // based on the contents of 'data' @@ -1402,10 +1402,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) // void CodeGen::genLongToIntCast(GenTree* cast) { - assert(cast->OperGet() == GT_CAST); + assert(cast->OperIs(GT_CAST)); GenTree* src = cast->gtGetOp1(); - noway_assert(src->OperGet() == GT_LONG); + noway_assert(src->OperIs(GT_LONG)); genConsumeRegs(src); @@ -1485,7 +1485,7 @@ void CodeGen::genLongToIntCast(GenTree* cast) void CodeGen::genIntToFloatCast(GenTree* treeNode) { // int --> float/double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -1550,7 +1550,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) { // we don't expect to see overflow detecting float/double --> int type conversions here // as they should have been converted into helper calls by front-end. - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -1619,6 +1619,12 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr); } + EmitCallParams params; + + params.methHnd = compiler->eeFindHelper(helper); + params.argSize = argSize; + params.retSize = retSize; + if (!addr || !validImmForBL((ssize_t)addr)) { if (callTargetReg == REG_NA) @@ -1639,23 +1645,14 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regSet.verifyRegUsed(callTargetReg); } - GetEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper), - INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr - argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, DebugInfo(), - callTargetReg, // ireg - REG_NA, 0, 0, // xreg, xmul, disp - false // isJump - ); + params.callType = EC_INDIR_R; + params.ireg = callTargetReg; + genEmitCallWithCurrentGC(params); } else { - GetEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper), - INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), REG_NA, REG_NA, 0, - 0, /* ilOffset, ireg, xreg, xmul, disp */ - false /* isJump */ - ); + params.callType = EC_FUNC_TOKEN; + genEmitCallWithCurrentGC(params); } regSet.verifyRegistersUsed(RBM_CALLEE_TRASH); @@ -2084,7 +2081,16 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC // We can't do this optimization with callee saved floating point registers because // the stack would be allocated in a wrong spot. if (maskCalleeSavedFloat != RBM_NONE) + { + return RBM_NONE; + } + + // We similarly skip it for async due to the extra async continuation + // return that may be overridden by the pop. + if (compiler->compIsAsync()) + { return RBM_NONE; + } // Allocate space for small frames by pushing extra registers. It generates smaller and faster code // that extra sub sp,XXX/add sp,XXX. @@ -2223,7 +2229,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) * Funclets have the following incoming arguments: * * catch: r0 = the exception object that was caught (see GT_CATCH_ARG) - * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function + * filter: r0 = the exception object to filter (see GT_CATCH_ARG) * finally/fault: none * * Funclets set the following registers on exit: @@ -2239,50 +2245,9 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) * ; actually use in the funclet. Currently, we save the same set of callee-saved regs * ; calculated for the entire function. * sub sp, XXX ; Establish the rest of the frame. - * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned - * ; up to preserve stack alignment. If we push an odd number of registers, we also - * ; generate this, to keep the stack aligned. - * - * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested - * ; filters. - * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet - * ; epilog. - * - * if (this is a filter funclet) - * { - * // r1 on entry to a filter funclet is CallerSP of the containing function: - * // either the main function, or the funclet for a handler that this filter is dynamically nested within. - * // Note that a filter can be dynamically nested within a funclet even if it is not statically within - * // a funclet. Consider: - * // - * // try { - * // try { - * // throw new Exception(); - * // } catch(Exception) { - * // throw new Exception(); // The exception thrown here ... - * // } - * // } filter { // ... will be processed here, while the "catch" funclet frame is - * // // still on the stack - * // } filter-handler { - * // } - * // - * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the - * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting - * // nested EH. To simplify, we just always create a main function PSP for any function with a filter. - * - * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of - * ; the dynamically containing funclet or function) - * str r1, [sp + PSP_slot_SP_offset] ; store the PSP - * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer - * } - * else - * { - * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. - * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction. + * ; XXX is determined by lvaOutgoingArgSpaceSize, aligned up to preserve stack alignment. + * ; If we push an odd number of registers, we also generate this, to keep the stack aligned. * - * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch. - * str r3, [sp + PSP_slot_SP_offset] ; store the PSP - * } * * The epilog sequence is then: * @@ -2301,11 +2266,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) * +=======================+ <---- Caller's SP * |Callee saved registers | * |-----------------------| - * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset - * | | // in function and funclet - * |-----------------------| - * | PSP slot | // Omitted in NativeAOT ABI - * |-----------------------| * ~ possible 4 byte pad ~ * ~ for alignment ~ * |-----------------------| @@ -2325,7 +2285,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) #endif assert(block != NULL); - assert(block->HasFlag(BBF_FUNCLET_BEG)); + assert(compiler->bbIsFuncletBeg(block)); ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); @@ -2375,31 +2335,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - if (isFilter) - { - // This is the first block of a filter - - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiPSP_slot_CallerSP_offset); - regSet.verifyRegUsed(REG_R1); - GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset); - GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1, - genFuncletInfo.fiFunctionCallerSPtoFPdelta); - } - else - { - // This is a non-filter funclet - GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, - genFuncletInfo.fiFunctionCallerSPtoFPdelta); - regSet.verifyRegUsed(REG_R3); - GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset); - } } /***************************************************************************** @@ -2486,8 +2421,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved // (also assumed in genFnProlog()). assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0); - unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; - genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES; + unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); @@ -2504,97 +2438,23 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; unsigned spDelta = funcletFrameSizeAligned - saveRegsSize; - unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad; - int PSP_slot_CallerSP_offset = - -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative! - /* Now save it for future use */ - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSpDelta = spDelta; - genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset; - genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSpDelta = spDelta; #ifdef DEBUG if (verbose) { printf("\n"); printf("Funclet prolog / epilog info\n"); - printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta); printf(" Save regs: "); dspRegMask(rsMaskSaveRegs); printf("\n"); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); - printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset); - printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset); - - if (PSP_slot_CallerSP_offset != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) - { - printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); - } } #endif // DEBUG - - assert(PSP_slot_CallerSP_offset < 0); - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(PSP_slot_CallerSP_offset == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main - // function and funclet! - } - } -} - -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - - // We either generate: - // add r1, r11, 8 - // str r1, [reg + PSPSymOffset] - // or: - // add r1, sp, 76 - // str r1, [reg + PSPSymOffset] - // depending on the smallest encoding - - int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); - - int callerSPOffs; - regNumber regBase; - - if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta)) - { - // use the "add , sp, imm" form - - callerSPOffs = SPtoCallerSPdelta; - regBase = REG_SPBASE; - } - else - { - // use the "add , r11, imm" form - - int FPtoCallerSPdelta = -genCallerSPtoFPdelta(); - noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE)); - - callerSPOffs = FPtoCallerSPdelta; - regBase = REG_FPBASE; - } - - // We will just use the initReg since it is an available register - // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegZeroed = false; - - GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs); - GetEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); } //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index ec01f356e194..1ba4369e3423 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1101,7 +1101,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * Funclets have the following incoming arguments: * * catch: x0 = the exception object that was caught (see GT_CATCH_ARG) - * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function + * filter: x0 = the exception object to filter (see GT_CATCH_ARG) * finally/fault: none * * Funclets set the following registers on exit: @@ -1132,8 +1132,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Saved FP, LR | // 16 bytes @@ -1163,8 +1161,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Saved FP, LR | // 16 bytes @@ -1197,8 +1193,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned * |-----------------------| * | Saved FP, LR | // 16 bytes <-- SP after first adjustment (points at saved FP) @@ -1214,27 +1208,27 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack - * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 240 bytes: + * pointer needs to be 16 byte aligned at all times. The size of the callee-saved registers space is a maximum of 240 bytes: * * FP,LR registers * 10 int callee-saved register x19-x28 * 8 float callee-saved registers v8-v15 * 8 saved integer argument registers x0-x7, if varargs function - * 1 PSP slot - * 1 alignment slot or monitor acquired slot + * 1 monitor acquired slot +* 1 alignment slot * == 30 slots * 8 bytes = 240 bytes. * * The outgoing argument size, however, can be very large, if we call a function that takes a large number of * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before - * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset - * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument - * space. Both changes to SP might need to add alignment padding. + * saving off the callee-saved registers, so we can use the limited immediate offset encodings we have available, + * before doing another 16-byte aligned SP adjustment to create the outgoing argument space. Both changes to + * SP might need to add alignment padding. * * In addition to the above "standard" frames, we also need to support a frame where the saved FP/LR are at the - * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/LR - * and the PSPSym) that is used in the main function when a GS cookie is required due to the use of localloc. + * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/LR) + * that is used in the main function when a GS cookie is required due to the use of localloc. * (Note that localloc cannot be used in a funclet.) In these variants, not only has the position of FP/LR * changed, but where the alignment padding is placed has also changed. * @@ -1243,13 +1237,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * sub sp,sp,#framesz ; establish the frame * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * stp fp,lr,[sp,#yyy] ; save FP/LR. - * ; write PSPSym * * The "#framesz <= 512" condition ensures that after we've established the frame, we can use "stp" with its * maximum allowed offset (504) to save the callee-saved register at the highest address. * - * We use "sub" instead of folding it into the next instruction as a predecrement, as we need to write PSPSym - * at the bottom of the stack, and there might also be an alignment padding slot. + * We use "sub" instead of folding it into the next instruction as a predecrement as there might also be an + * alignment padding slot. * * The funclet frame is thus: * @@ -1269,8 +1262,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes (optional; if #outsz > 0) @@ -1286,7 +1277,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * stp fp,lr,[sp,#yyy] ; save FP/LR. * sub sp,sp,#outsz ; create space for outgoing argument space - * ; write PSPSym * * For large frames with "#framesz > 512", we must do one SP adjustment first, after which we can save callee-saved * registers with up to the maximum "stp" offset of 504. Then, we can establish the rest of the frame (namely, the @@ -1310,10 +1300,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| - * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned <-- SP after first adjustment (points at alignment padding or PSP slot) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space). * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes @@ -1326,48 +1312,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * Note that in this case we might have 16 bytes of alignment that is adjacent. This is because we are doing 2 SP * subtractions, and each one must be aligned up to 16 bytes. * - * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP - * as in the main function. - * - * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we - * must add buffer space for the saved varargs argument registers here, if the main function did the same. - * - * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. - * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. - * - * if (this is a filter funclet) - * { - * // x1 on entry to a filter funclet is CallerSP of the containing function: - * // either the main function, or the funclet for a handler that this filter is dynamically nested within. - * // Note that a filter can be dynamically nested within a funclet even if it is not statically within - * // a funclet. Consider: - * // - * // try { - * // try { - * // throw new Exception(); - * // } catch(Exception) { - * // throw new Exception(); // The exception thrown here ... - * // } - * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack - * // } filter-handler { - * // } - * // - * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will - * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always - * // create a main function PSP for any function with a filter. - * - * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) - * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP - * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer - * } - * else - * { - * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. - * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction. - * - * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch. - * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP - * } + * Funclets do not have varargs arguments. * * An example epilog sequence is then: * @@ -1390,7 +1335,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) #endif assert(block != NULL); - assert(block->HasFlag(BBF_FUNCLET_BEG)); + assert(compiler->bbIsFuncletBeg(block)); ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); @@ -1537,44 +1482,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. Otherwise, we need to set up the PSPSym in the funclet frame. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - if (isFilter) - { - // This is the first block of a filter - // Note that register x1 = CallerSP of the containing function - // X1 is overwritten by the first Load (new callerSP) - // X2 is scratch when we have a large constant offset - - // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or - // function) - genInstrWithConstant(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, - REG_R2, false); - regSet.verifyRegUsed(REG_R1); - - // Store the PSP value (aka CallerSP) - genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, - false); - - // re-establish the frame pointer - genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, - genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false); - } - else // This is a non-filter funclet - { - // X3 is scratch, X2 can also become scratch - - // compute the CallerSP, given the frame pointer. x3 is scratch. - genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, - -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false); - regSet.verifyRegUsed(REG_R3); - - genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, - false); - } - } } /***************************************************************************** @@ -1747,33 +1654,17 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // The frame size and offsets must be finalized assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); - unsigned const PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; - - // Because a method and funclets must have the same caller-relative PSPSym offset, - // if there is a PSPSym, we have to pad the funclet frame size for OSR. - // - unsigned osrPad = 0; - if (compiler->opts.IsOSR() && (PSPSize > 0)) - { - osrPad = compiler->info.compPatchpointInfo->TotalFrameSize(); - - // OSR pad must be already aligned to stack size. - assert((osrPad % STACK_ALIGN) == 0); - } - - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad; - regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; assert((rsMaskSaveRegs & RBM_LR) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); - unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize; + unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); + unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; if (compiler->info.compIsVarArgs) { // For varargs we always save all of the integer register arguments // so that they are contiguous with the incoming stack arguments. - saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES; + saveRegsSize += MAX_REG_ARG * REGSIZE_BYTES; } if (compiler->lvaMonAcquired != BAD_VAR_NUM && !compiler->opts.IsOSR()) @@ -1781,23 +1672,21 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // We furthermore allocate the "monitor acquired" bool between PSP and // the saved registers because this is part of the EnC header. // Note that OSR methods reuse the monitor bool created by tier 0. - saveRegsPlusPSPSize += compiler->lvaLclSize(compiler->lvaMonAcquired); + saveRegsSize += compiler->lvaLclStackHomeSize(compiler->lvaMonAcquired); } - unsigned const saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN); + unsigned const saveRegsSizeAligned = roundUp(saveRegsSize, STACK_ALIGN); assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); unsigned const outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); // If do two SP adjustments, each one must be aligned. This represents the largest possible stack size, if two // separate alignment slots are required. - unsigned const twoSpAdjustmentFuncletFrameSizeAligned = - osrPad + saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned; + unsigned const twoSpAdjustmentFuncletFrameSizeAligned = saveRegsSizeAligned + outgoingArgSpaceAligned; assert((twoSpAdjustmentFuncletFrameSizeAligned % STACK_ALIGN) == 0); int SP_to_FPLR_save_delta; - int SP_to_PSP_slot_delta; - int CallerSP_to_PSP_slot_delta; + int SP_to_CalleeSave_delta; // Are we stressing frame type 5? Don't do it unless we have non-zero outgoing arg space. const bool useFrameType5 = @@ -1805,8 +1694,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() if ((twoSpAdjustmentFuncletFrameSizeAligned <= 512) && !useFrameType5) { - unsigned const oneSpAdjustmentFuncletFrameSize = - osrPad + saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; + unsigned const oneSpAdjustmentFuncletFrameSize = saveRegsSize + compiler->lvaOutgoingArgSpaceSize; unsigned const oneSpAdjustmentFuncletFrameSizeAligned = roundUp(oneSpAdjustmentFuncletFrameSize, STACK_ALIGN); assert(oneSpAdjustmentFuncletFrameSizeAligned <= twoSpAdjustmentFuncletFrameSizeAligned); @@ -1823,17 +1711,15 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES; } - SP_to_PSP_slot_delta = compiler->lvaOutgoingArgSpaceSize + oneSpAdjustmentFuncletFrameSizeAlignmentPad; - CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize); + SP_to_CalleeSave_delta = compiler->lvaOutgoingArgSpaceSize + oneSpAdjustmentFuncletFrameSizeAlignmentPad; genFuncletInfo.fiFrameType = 4; } else { SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize; - SP_to_PSP_slot_delta = + SP_to_CalleeSave_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + oneSpAdjustmentFuncletFrameSizeAlignmentPad; - CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES); if (compiler->lvaOutgoingArgSpaceSize == 0) { @@ -1852,8 +1738,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() } else { - unsigned const saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; - assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); + unsigned const saveRegsAlignmentPad = saveRegsSizeAligned - saveRegsSize; + assert((saveRegsAlignmentPad == 0) || (saveRegsAlignmentPad == REGSIZE_BYTES)); if (genSaveFpLrWithAllCalleeSavedRegisters) { @@ -1863,22 +1749,19 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES; } - SP_to_PSP_slot_delta = outgoingArgSpaceAligned + saveRegsPlusPSPAlignmentPad; - CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize); + SP_to_CalleeSave_delta = outgoingArgSpaceAligned + saveRegsAlignmentPad; genFuncletInfo.fiFrameType = 5; } else { - SP_to_FPLR_save_delta = outgoingArgSpaceAligned; - SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad; - CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - - saveRegsPlusPSPAlignmentPad); + SP_to_FPLR_save_delta = outgoingArgSpaceAligned; + SP_to_CalleeSave_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsAlignmentPad; genFuncletInfo.fiFrameType = 3; } - genFuncletInfo.fiSpDelta1 = -(int)(osrPad + saveRegsPlusPSPSizeAligned); + genFuncletInfo.fiSpDelta1 = -(int)saveRegsSizeAligned; genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned; assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)twoSpAdjustmentFuncletFrameSizeAligned); @@ -1886,11 +1769,9 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() /* Now save it for future use */ - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; - genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; - genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + PSPSize; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; + genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_CalleeSave_delta; #ifdef DEBUG if (verbose) @@ -1900,70 +1781,18 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); - if (compiler->opts.IsOSR()) - { - printf(" OSR Pad: %d\n", osrPad); - } printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta); - printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta); printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta); - printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta); printf(" Frame type: %d\n", genFuncletInfo.fiFrameType); printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1); printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2); - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for - // debugging - { - printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); - } - } } assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0); - assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0); assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0); - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0); - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! - } #endif // DEBUG } -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - - int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); - - if (compiler->opts.IsOSR()) - { - SPtoCallerSPdelta += compiler->info.compPatchpointInfo->TotalFrameSize(); - } - - // We will just use the initReg since it is an available register - // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegZeroed = false; - - GetEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta); - GetEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); -} - //----------------------------------------------------------------------------- // genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case // `genUseBlockInit` is set. @@ -2179,20 +2008,10 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) BasicBlock* const nextBlock = block->Next(); // Generate a call to the finally, like this: - // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used // bl finally-funclet // b finally-return // Only for non-retless finally calls // The 'b' can be a NOP if we're going to the next block. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - GetEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); - } - else - { - GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE, /* canSkip */ false); - } - if (block->HasFlag(BBF_RETLESS_CALL)) { GetEmitter()->emitIns_J(INS_bl_local, block->GetTarget()); @@ -2253,8 +2072,18 @@ void CodeGen::instGen_Set_Reg_To_Base_Plus_Imm(emitAttr size, insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) { - instGen_Set_Reg_To_Imm(size, dstReg, imm); - GetEmitter()->emitIns_R_R_R(INS_add, size, dstReg, dstReg, baseReg); + // If the imm values < 12 bits, we can use a single "add rsvd, reg2, #imm". + // Otherwise, use "mov rsvd, #imm", followed up "add rsvd, reg2, rsvd". + + if (imm < 4096) + { + GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, dstReg, baseReg, imm); + } + else + { + instGen_Set_Reg_To_Imm(size, dstReg, imm); + GetEmitter()->emitIns_R_R_R(INS_add, size, dstReg, dstReg, baseReg); + } } // move an immediate value into an integer register @@ -2966,7 +2795,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) if (lclNode->IsMultiReg()) { // This is the case of storing to a multi-reg HFA local from a fixed-size SIMD type. - assert(varTypeIsSIMD(data) && varDsc->lvIsHfa() && (varDsc->GetHfaType() == TYP_FLOAT)); + assert(varTypeIsSIMD(data)); regNumber operandReg = genConsumeReg(data); unsigned int regCount = varDsc->lvFieldCnt; for (unsigned i = 0; i < regCount; ++i) @@ -2975,7 +2804,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) assert(varReg != REG_NA); unsigned fieldLclNum = varDsc->lvFieldLclStart + i; LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum); - assert(fieldVarDsc->TypeGet() == TYP_FLOAT); + assert(fieldVarDsc->TypeIs(TYP_FLOAT)); GetEmitter()->emitIns_R_R_I(INS_dup, emitTypeSize(TYP_FLOAT), varReg, operandReg, i); } genProduceReg(lclNode); @@ -3106,7 +2935,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) if (!movRequired) { - if (op1->OperGet() == GT_LCL_VAR) + if (op1->OperIs(GT_LCL_VAR)) { GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); const LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); @@ -3134,7 +2963,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) */ void CodeGen::genLclHeap(GenTree* tree) { - assert(tree->OperGet() == GT_LCLHEAP); + assert(tree->OperIs(GT_LCLHEAP)); assert(compiler->compLocallocUsed); GenTree* size = tree->AsOp()->gtOp1; @@ -3142,7 +2971,6 @@ void CodeGen::genLclHeap(GenTree* tree) regNumber targetReg = tree->GetRegNum(); regNumber regCnt = REG_NA; - regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; @@ -3693,7 +3521,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) bool sourceIsLocal = false; assert(source->isContained()); - if (source->gtOper == GT_IND) + if (source->OperIs(GT_IND)) { GenTree* srcAddr = source->gtGetOp1(); assert(!srcAddr->isContained()); @@ -3785,7 +3613,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // On ARM64, SIMD loads/stores provide 8-byte atomicity guarantees when aligned to 8 bytes. regNumber tmpSimdReg1 = REG_NA; regNumber tmpSimdReg2 = REG_NA; - if ((slots >= 4) && compiler->IsBaselineSimdIsaSupported()) + if (slots >= 4) { tmpSimdReg1 = internalRegisters.Extract(cpObjNode, RBM_ALLFLOAT); tmpSimdReg2 = internalRegisters.Extract(cpObjNode, RBM_ALLFLOAT); @@ -3816,8 +3644,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // Copy at least two slots at a time if (nonGcSlots >= 2) { - // Do 4 slots at a time if SIMD is supported - if ((nonGcSlots >= 4) && compiler->IsBaselineSimdIsaSupported()) + // Do 4 slots at a time with SIMD instructions + if (nonGcSlots >= 4) { // We need SIMD temp regs now tmp1 = tmpSimdReg1; @@ -3960,10 +3788,9 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) // These are imported normally if Atomics aren't supported. assert(!treeNode->OperIs(GT_XORR, GT_XAND)); - regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); - regNumber storeDataReg = - (treeNode->OperGet() == GT_XCHG) ? dataReg : internalRegisters.Extract(treeNode, RBM_ALLINT); - regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; + regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); + regNumber storeDataReg = treeNode->OperIs(GT_XCHG) ? dataReg : internalRegisters.Extract(treeNode, RBM_ALLINT); + regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; // Check allocator assumptions // @@ -3975,12 +3802,12 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) noway_assert(dataReg != loadReg); noway_assert(addrReg != storeDataReg); - noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); + noway_assert(treeNode->OperIs(GT_XCHG) || (addrReg != dataReg)); assert(addr->isUsedFromReg()); noway_assert(exResultReg != REG_NA); noway_assert(exResultReg != targetReg); - noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); + noway_assert((targetReg != REG_NA) || !treeNode->OperIs(GT_XCHG)); // Store exclusive unpredictable cases must be avoided noway_assert(exResultReg != storeDataReg); @@ -4317,7 +4144,7 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) // void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) { - assert(tree->OperGet() == GT_RETURNTRAP); + assert(tree->OperIs(GT_RETURNTRAP)); // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC // based on the contents of 'data' @@ -4345,7 +4172,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) { #ifdef FEATURE_SIMD // Storing Vector3 of size 12 bytes through indirection - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genStoreIndTypeSimd12(tree); return; @@ -4501,7 +4328,7 @@ void CodeGen::genCodeForSwap(GenTreeOp* tree) void CodeGen::genIntToFloatCast(GenTree* treeNode) { // int type --> float/double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -4579,7 +4406,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) { // we don't expect to see overflow detecting float/double --> int type conversions here // as they should have been converted into helper calls by front-end. - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -4655,7 +4482,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // void CodeGen::genCkfinite(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_CKFINITE); + assert(treeNode->OperIs(GT_CKFINITE)); GenTree* op1 = treeNode->AsOp()->gtOp1; var_types targetType = treeNode->TypeGet(); @@ -4775,8 +4602,44 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) emit->emitIns_R_R_I(ins, cmpSize, op1->GetRegNum(), shiftOp1->GetRegNum(), shiftOp2->AsIntConCommon()->IntegralValue(), ShiftOpToInsOpts(oper)); + break; + } + case GT_CAST: + { + GenTreeCast* cast = op2->gtGetOp1()->AsCast(); + + GenIntCastDesc desc(cast); + + // These casts should not lead to an overflow check. + assert(desc.CheckKind() == GenIntCastDesc::CHECK_NONE); + + insOpts extOpts = INS_OPTS_NONE; + switch (desc.ExtendKind()) + { + case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + extOpts = (desc.ExtendSrcSize() == 1) ? INS_OPTS_UXTB : INS_OPTS_UXTH; + break; + case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + extOpts = (desc.ExtendSrcSize() == 1) ? INS_OPTS_SXTB : INS_OPTS_SXTH; + break; + case GenIntCastDesc::ZERO_EXTEND_INT: + extOpts = INS_OPTS_UXTW; + break; + case GenIntCastDesc::SIGN_EXTEND_INT: + extOpts = INS_OPTS_SXTW; + break; + case GenIntCastDesc::COPY: + extOpts = INS_OPTS_NONE; // Perform cast implicitly. + break; + default: + // Other casts should not lead here as they will not pass the + // IsContainableUnaryOrBinaryOp check. + unreached(); + } + + emit->emitIns_R_R(ins, cmpSize, op1->GetRegNum(), cast->CastOp()->GetRegNum(), extOpts); + break; } - break; default: unreached(); @@ -4798,6 +4661,45 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) op2->gtGetOp2()->AsIntConCommon()->IntegralValue(), ShiftOpToInsOpts(oper)); break; + case GT_CAST: + { + assert(ins == INS_cmp); + assert(cmpSize >= genTypeSize(op2->CastToType())); + assert(cmpSize == EA_4BYTE || cmpSize == EA_8BYTE); + assert(op1->gtHasReg(compiler)); + assert(op2->gtGetOp1()->gtHasReg(compiler)); + + GenTreeCast* cast = op2->AsCast(); + + GenIntCastDesc desc(cast); + + // These casts should not lead to an overflow check. + assert(desc.CheckKind() == GenIntCastDesc::CHECK_NONE); + + insOpts extOpts = INS_OPTS_NONE; + switch (desc.ExtendKind()) + { + case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + extOpts = (desc.ExtendSrcSize() == 1) ? INS_OPTS_UXTB : INS_OPTS_UXTH; + break; + case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + extOpts = (desc.ExtendSrcSize() == 1) ? INS_OPTS_SXTB : INS_OPTS_SXTH; + break; + case GenIntCastDesc::ZERO_EXTEND_INT: + extOpts = INS_OPTS_UXTW; + break; + case GenIntCastDesc::SIGN_EXTEND_INT: + extOpts = INS_OPTS_SXTW; + break; + default: + // Other casts should not lead here as they will not pass the + // IsContainableUnaryOrBinaryOp check. + unreached(); + } + + emit->emitIns_R_R(INS_cmp, cmpSize, op1->GetRegNum(), cast->gtGetOp1()->GetRegNum(), extOpts); + break; + } default: unreached(); } @@ -5072,6 +4974,23 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) } } +//------------------------------------------------------------------------ +// genCompareImmAndJump: Generates code for a compare-and-branch between a register and +// immediate value. +// +// The implementation tries to use cb(n)z wherever possible. Otherwise it will +// fall back to a default cmp/b.cc sequence. +// +// Arguments: +// cond - The condition code to test (EQ/NE). +// reg - The register to compare. +// compareImm - The immediate value to compare against. +// emitAttr - The size of the comparison. +// target - The branch target for when the check passes. +// +// Return Value: +// None +// void CodeGen::genCompareImmAndJump( GenCondition::Code cond, regNumber reg, ssize_t compareImm, emitAttr size, BasicBlock* target) { @@ -5084,13 +5003,6 @@ void CodeGen::genCompareImmAndJump( instruction ins = (cond == GenCondition::EQ) ? INS_cbz : INS_cbnz; GetEmitter()->emitIns_J_R(ins, size, target, reg); } - else if (isPow2(compareImm)) - { - // We can use tbz/tbnz - instruction ins = (cond == GenCondition::EQ) ? INS_tbz : INS_tbnz; - int imm = genLog2((size_t)compareImm); - GetEmitter()->emitIns_J_R_I(ins, size, target, reg, imm); - } else { // Emit compare and branch pair default. @@ -5222,14 +5134,14 @@ bool CodeGen::IsSaveFpLrWithAllCalleeSavedRegisters() const void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) { - void* addr = nullptr; void* pAddr = nullptr; - emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; - addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); - regNumber callTarget = REG_NA; + EmitCallParams params; + params.callType = EC_FUNC_TOKEN; + params.addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regMaskTP killSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - if (addr == nullptr) + if (params.addr == nullptr) { // This is call to a runtime helper. // adrp x, [reloc:rel page addr] @@ -5245,37 +5157,33 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, } regMaskTP callTargetMask = genRegMask(callTargetReg); - regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - // assert that all registers in callTargetMask are in the callKillSet - noway_assert((callTargetMask & callKillSet) == callTargetMask); - - callTarget = callTargetReg; + noway_assert((callTargetMask & killSet) == callTargetMask); if (compiler->opts.compReloc) { // adrp + add with relocations will be emitted - GetEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, + GetEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTargetReg, (ssize_t)pAddr DEBUGARG((size_t)compiler->eeFindHelper(helper)) DEBUGARG(GTF_ICON_METHOD_HDL)); } else { - instGen_Set_Reg_To_Imm(EA_PTRSIZE, callTarget, (ssize_t)addr); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, callTargetReg, (ssize_t)pAddr); } - GetEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget); - callType = emitter::EC_INDIR_R; + GetEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTargetReg, callTargetReg); + + params.callType = EC_INDIR_R; + params.ireg = callTargetReg; } - GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, - retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, DebugInfo(), callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ - ); + params.methHnd = compiler->eeFindHelper(helper); + params.argSize = argSize; + params.retSize = retSize; + + genEmitCallWithCurrentGC(params); - regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - regSet.verifyRegistersUsed(killMask); + regSet.verifyRegistersUsed(killSet); } #ifdef FEATURE_SIMD @@ -5978,7 +5886,7 @@ BasicBlock* CodeGen::genGetThrowHelper(SpecialCodeKind codeKind) // For code with throw helper blocks, find and use the helper block for // raising the exception. The block may be shared by other trees too. Compiler::AddCodeDsc* add = compiler->fgFindExcptnTarget(codeKind, compiler->compCurBB); - PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block")); + assert((add != nullptr) && ("ERROR: failed to find exception throw block")); assert(add->acdUsed); excpRaisingBlock = add->acdDstBlk; #if !FEATURE_FIXED_OUT_ARGS diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 3b7aa126e3c4..58275619194d 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -295,6 +295,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; #endif // SWIFT_SUPPORT + case GT_RETURN_SUSPEND: + genReturnSuspend(treeNode->AsUnOp()); + break; + case GT_LEA: // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. genLeaInstruction(treeNode->AsAddrMode()); @@ -424,10 +428,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genPutArgReg(treeNode->AsOp()); break; - case GT_PUTARG_SPLIT: - genPutArgSplit(treeNode->AsPutArgSplit()); - break; - case GT_CALL: genCall(treeNode->AsCall()); break; @@ -510,6 +510,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genConsumeReg(treeNode); break; + case GT_ASYNC_CONTINUATION: + genCodeForAsyncContinuation(treeNode); + break; + case GT_PINVOKE_PROLOG: noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask(compiler->info.compCallConv)) == 0); @@ -630,7 +634,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } else { - // Ngen case - GS cookie constant needs to be accessed through an indirection. + // AOT case - GS cookie constant needs to be accessed through an indirection. instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr, INS_FLAGS_DONT_CARE DEBUGARG((size_t)THT_GSCookieCheck) DEBUGARG(GTF_EMPTY)); GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSConst, regGSConst, 0); @@ -792,7 +796,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) if (treeNode->putInIncomingArgArea()) { varNumOut = getFirstArgWithStackSlot(); - argOffsetMax = compiler->compArgSize; + argOffsetMax = compiler->lvaParameterStackSize; #if FEATURE_FASTTAILCALL // This must be a fast tail call. assert(treeNode->gtCall->IsFastTailCall()); @@ -866,7 +870,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) if (source->isContained()) { #ifdef TARGET_ARM64 - assert(source->OperGet() == GT_CNS_INT); + assert(source->OperIs(GT_CNS_INT)); assert(source->AsIntConCommon()->IconValue() == 0); emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut); @@ -899,7 +903,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { assert(source->isContained()); // We expect that this node was marked as contained in Lower - if (source->OperGet() == GT_FIELD_LIST) + if (source->OperIs(GT_FIELD_LIST)) { genPutArgStkFieldList(treeNode, varNumOut); } @@ -1153,237 +1157,6 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) genProduceReg(tree); } -//--------------------------------------------------------------------- -// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node -// -// Arguments -// tree - the GT_PUTARG_SPLIT node -// -// Return value: -// None -// -void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) -{ - assert(treeNode->OperIs(GT_PUTARG_SPLIT)); - - GenTree* source = treeNode->gtOp1; - emitter* emit = GetEmitter(); - unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; - unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; - - if (source->OperGet() == GT_FIELD_LIST) - { - // Evaluate each of the GT_FIELD_LIST items into their register - // and store their register into the outgoing argument area - unsigned regIndex = 0; - unsigned firstOnStackOffs = UINT_MAX; - - for (GenTreeFieldList::Use& use : source->AsFieldList()->Uses()) - { - GenTree* nextArgNode = use.GetNode(); - regNumber fieldReg = nextArgNode->GetRegNum(); - genConsumeReg(nextArgNode); - - if (regIndex >= treeNode->gtNumRegs) - { - if (firstOnStackOffs == UINT_MAX) - { - firstOnStackOffs = use.GetOffset(); - } - - var_types type = use.GetType(); - unsigned offset = treeNode->getArgOffset() + use.GetOffset() - firstOnStackOffs; - // We can't write beyond the outgoing arg area - assert((offset + genTypeSize(type)) <= argOffsetMax); - - // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing - // argument area - emit->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), fieldReg, varNumOut, offset); - } - else - { - var_types type = treeNode->GetRegType(regIndex); - regNumber argReg = treeNode->GetRegNumByIdx(regIndex); - - // If child node is not already in the register we need, move it - inst_Mov(type, argReg, fieldReg, /* canSkip */ true); - - regIndex++; - } - } - } - else - { - var_types targetType = source->TypeGet(); - assert(source->isContained() && varTypeIsStruct(targetType)); - - // We need a register to store intermediate values that we are loading - // from the source into. We can usually use one of the target registers - // that will be overridden anyway. The exception is when the source is - // in a register and that register is the unique target register we are - // placing. LSRA will always allocate an internal register when there - // is just one target register to handle this situation. - // - int firstRegToPlace; - regNumber valueReg = REG_NA; - unsigned srcLclNum = BAD_VAR_NUM; - unsigned srcLclOffset = 0; - regNumber addrReg = REG_NA; - var_types addrType = TYP_UNDEF; - ClassLayout* layout = nullptr; - - if (source->OperIsLocalRead()) - { - srcLclNum = source->AsLclVarCommon()->GetLclNum(); - srcLclOffset = source->AsLclVarCommon()->GetLclOffs(); - layout = source->AsLclVarCommon()->GetLayout(compiler); - LclVarDsc* varDsc = compiler->lvaGetDesc(srcLclNum); - - // This struct must live on the stack frame. - assert(varDsc->lvOnFrame && !varDsc->lvRegister); - - // No possible conflicts, just use the first register as the value register. - firstRegToPlace = 0; - valueReg = treeNode->GetRegNumByIdx(0); - } - else // we must have a GT_BLK - { - layout = source->AsBlk()->GetLayout(); - addrReg = genConsumeReg(source->AsBlk()->Addr()); - addrType = source->AsBlk()->Addr()->TypeGet(); - - regNumber allocatedValueReg = REG_NA; - if (treeNode->gtNumRegs == 1) - { - allocatedValueReg = internalRegisters.Extract(treeNode); - } - - // Pick a register to store intermediate values in for the to-stack - // copy. It must not conflict with addrReg. We try to prefer an - // argument register since those can always use thumb encoding. - valueReg = treeNode->GetRegNumByIdx(0); - if (valueReg == addrReg) - { - if (treeNode->gtNumRegs == 1) - { - valueReg = allocatedValueReg; - } - else - { - // Prefer argument register that can always use thumb encoding. - valueReg = treeNode->GetRegNumByIdx(1); - } - } - - // Find first register to place. If we are placing addrReg, then - // make sure we place it last to avoid clobbering its value. - // - // The loop below will start at firstRegToPlace and place - // treeNode->gtNumRegs registers in order, with wraparound. For - // example, if the registers to place are r0, r1, r2=addrReg, r3 - // then we will set firstRegToPlace = 3 (r3) and the loop below - // will place r3, r0, r1, r2. The last placement will clobber - // addrReg. - firstRegToPlace = 0; - for (unsigned i = 0; i < treeNode->gtNumRegs; i++) - { - if (treeNode->GetRegNumByIdx(i) == addrReg) - { - firstRegToPlace = i + 1; - break; - } - } - } - - // Put on stack first - unsigned structOffset = treeNode->gtNumRegs * TARGET_POINTER_SIZE; - unsigned remainingSize = layout->GetSize() - structOffset; - unsigned argOffsetOut = treeNode->getArgOffset(); - - assert((remainingSize > 0) && (roundUp(remainingSize, TARGET_POINTER_SIZE) == treeNode->GetStackByteSize())); - while (remainingSize > 0) - { - var_types type; - if (remainingSize >= TARGET_POINTER_SIZE) - { - type = layout->GetGCPtrType(structOffset / TARGET_POINTER_SIZE); - } - else if (remainingSize >= 4) - { - type = TYP_INT; - } - else if (remainingSize >= 2) - { - type = TYP_USHORT; - } - else - { - assert(remainingSize == 1); - type = TYP_UBYTE; - } - - emitAttr attr = emitActualTypeSize(type); - unsigned moveSize = genTypeSize(type); - - instruction loadIns = ins_Load(type); - if (srcLclNum != BAD_VAR_NUM) - { - // Load from our local source - emit->emitIns_R_S(loadIns, attr, valueReg, srcLclNum, srcLclOffset + structOffset); - } - else - { - assert(valueReg != addrReg); - - // Load from our address expression source - emit->emitIns_R_R_I(loadIns, attr, valueReg, addrReg, structOffset); - } - - // Emit the instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(type), attr, valueReg, varNumOut, argOffsetOut); - argOffsetOut += moveSize; - assert(argOffsetOut <= argOffsetMax); - - remainingSize -= moveSize; - structOffset += moveSize; - } - - // Place registers starting from firstRegToPlace. It should ensure we - // place addrReg last (if we place it at all). - structOffset = static_cast(firstRegToPlace) * TARGET_POINTER_SIZE; - unsigned curRegIndex = firstRegToPlace; - - for (unsigned regsPlaced = 0; regsPlaced < treeNode->gtNumRegs; regsPlaced++) - { - if (curRegIndex == treeNode->gtNumRegs) - { - curRegIndex = 0; - structOffset = 0; - } - - regNumber targetReg = treeNode->GetRegNumByIdx(curRegIndex); - var_types type = treeNode->GetRegType(curRegIndex); - - if (srcLclNum != BAD_VAR_NUM) - { - // Load from our local source - emit->emitIns_R_S(INS_ldr, emitTypeSize(type), targetReg, srcLclNum, srcLclOffset + structOffset); - } - else - { - assert((addrReg != targetReg) || (regsPlaced == treeNode->gtNumRegs - 1)); - - // Load from our address expression source - emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), targetReg, addrReg, structOffset); - } - - curRegIndex++; - structOffset += TARGET_POINTER_SIZE; - } - } - genProduceReg(treeNode); -} - #ifdef FEATURE_SIMD //---------------------------------------------------------------------------------- // genMultiRegStoreToSIMDLocal: store multi-reg value to a single-reg SIMD local @@ -1481,13 +1254,10 @@ void CodeGen::genRangeCheck(GenTree* oper) assert(!arrLen->isContained()); // For (index == 0), we can just test if (length == 0) as this is the only case that would throw. // This may lead to an optimization by using cbz/tbnz. - genJumpToThrowHlpBlk( - bndsChk->gtThrowKind, - [&](BasicBlock* target, bool isInline) { + genJumpToThrowHlpBlk(bndsChk->gtThrowKind, [&](BasicBlock* target, bool isInline) { genCompareImmAndJump(isInline ? GenCondition::NE : GenCondition::EQ, arrLen->GetRegNum(), 0, emitActualTypeSize(arrLen), target); - }, - bndsChk->gtIndRngFailBB); + }); return; } #endif @@ -1509,7 +1279,7 @@ void CodeGen::genRangeCheck(GenTree* oper) #endif // DEBUG GetEmitter()->emitInsBinary(INS_cmp, emitActualTypeSize(bndsChkType), src1, src2); - genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB); + genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind); } //--------------------------------------------------------------------- @@ -1717,7 +1487,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) { GetEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, base->GetRegNum(), node->gtLenOffset); GetEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(index->TypeGet()), indexReg, tmpReg); - genJumpToThrowHlpBlk(EJ_hs, SCK_RNGCHK_FAIL, node->gtIndRngFailBB); + genJumpToThrowHlpBlk(EJ_hs, SCK_RNGCHK_FAIL); } // Can we use a ScaledAdd instruction? @@ -1919,7 +1689,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) #ifdef FEATURE_SIMD // Handling of Vector3 type values loaded through indirection. - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genLoadIndTypeSimd12(tree); return; @@ -3460,56 +3230,58 @@ void CodeGen::genCall(GenTreeCall* call) void CodeGen::genCallInstruction(GenTreeCall* call) { // Determine return value size(s). - const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - emitAttr retSize = EA_PTRSIZE; - emitAttr secondRetSize = EA_UNKNOWN; + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + EmitCallParams params; // unused values are of no interest to GC. if (!call->IsUnusedValue()) { if (call->HasMultiRegRetVal()) { - retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); - secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + params.retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); + params.secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); } else { - assert(call->gtType != TYP_STRUCT); + assert(!call->TypeIs(TYP_STRUCT)); - if (call->gtType == TYP_REF) + if (call->TypeIs(TYP_REF)) { - retSize = EA_GCREF; + params.retSize = EA_GCREF; } - else if (call->gtType == TYP_BYREF) + else if (call->TypeIs(TYP_BYREF)) { - retSize = EA_BYREF; + params.retSize = EA_BYREF; } } } #ifdef TARGET_ARM // ARM32 support multireg returns, but only to return 64bit primitives. - assert(secondRetSize != EA_GCREF); - assert(secondRetSize != EA_BYREF); + assert(params.secondRetSize != EA_GCREF); + assert(params.secondRetSize != EA_BYREF); #endif - DebugInfo di; + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); + // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, // so we skip this hash table lookup logic in that case. if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall()) { + DebugInfo di; (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di); + params.debugInfo = di; } - CORINFO_SIG_INFO* sigInfo = nullptr; #ifdef DEBUG // Pass the call signature information down into the emitter so the emitter can associate // native call sites with the signatures they were generated from. if (!call->IsHelperCall()) { - sigInfo = call->callSig; + params.sigInfo = call->callSig; } if (call->IsFastTailCall()) @@ -3526,7 +3298,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) for (CallArg& arg : call->gtArgs.Args()) { - for (const ABIPassingSegment& seg : arg.NewAbiInfo.Segments()) + for (const ABIPassingSegment& seg : arg.AbiInfo.Segments()) { if (seg.IsPassedInRegister() && ((trashedByEpilog & seg.GetRegisterMask()) != 0)) { @@ -3539,8 +3311,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } #endif // DEBUG - CORINFO_METHOD_HANDLE methHnd; - GenTree* target = getCallTarget(call, &methHnd); + + GenTree* target = getCallTarget(call, ¶ms.methHnd); if (target != nullptr) { @@ -3559,7 +3331,6 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // We just need to emit "call reg" in this case. // assert(genIsValidIntReg(target->GetRegNum())); - bool noSafePoint = false; #ifdef TARGET_ARM64 bool isTlsHandleTarget = @@ -3569,11 +3340,11 @@ void CodeGen::genCallInstruction(GenTreeCall* call) { assert(call->gtFlags & GTF_TLS_GET_ADDR); emitter* emitter = GetEmitter(); - emitAttr attr = (emitAttr)(EA_CNS_TLSGD_RELOC | EA_CNS_RELOC_FLG | retSize); + emitAttr attr = (emitAttr)(EA_CNS_TLSGD_RELOC | EA_CNS_RELOC_FLG | params.retSize); GenTreeIntCon* iconNode = target->AsIntCon(); - methHnd = (CORINFO_METHOD_HANDLE)iconNode->gtIconVal; - retSize = EA_SET_FLG(retSize, EA_CNS_TLSGD_RELOC); - noSafePoint = true; + params.methHnd = (CORINFO_METHOD_HANDLE)iconNode->gtIconVal; + params.retSize = EA_SET_FLG(params.retSize, EA_CNS_TLSGD_RELOC); + params.noSafePoint = true; // For NativeAOT, linux/arm64, linker wants the following pattern, so we will generate // it as part of the call. Generating individual instructions is tricky to get it @@ -3595,22 +3366,15 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // ldr // add emitter->emitIns_Adrp_Ldr_Add(attr, REG_R0, target->GetRegNum(), - (ssize_t)methHnd DEBUGARG(iconNode->gtTargetHandle) + (ssize_t)params.methHnd DEBUGARG(iconNode->gtTargetHandle) DEBUGARG(iconNode->gtFlags)); } #endif - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, // addr - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - target->GetRegNum(), - call->IsFastTailCall(), - noSafePoint); + params.callType = EC_INDIR_R; + params.ireg = target->GetRegNum(); + + genEmitCallWithCurrentGC(params); #ifdef TARGET_ARM64 if (isTlsHandleTarget) @@ -3619,7 +3383,6 @@ void CodeGen::genCallInstruction(GenTreeCall* call) GetEmitter()->emitIns_R_R_R(INS_add, EA_8BYTE, REG_R0, REG_R1, REG_R0); } #endif - // clang-format on } else { @@ -3667,80 +3430,55 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // assert(genIsValidIntReg(targetAddrReg)); - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, // addr - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - targetAddrReg, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_INDIR_R; + params.ireg = targetAddrReg; + genEmitCallWithCurrentGC(params); } else { // Generate a direct call to a non-virtual user defined or helper method assert(call->IsHelperCall() || (call->gtCallType == CT_USER_FUNC)); - void* addr = nullptr; #ifdef FEATURE_READYTORUN if (call->gtEntryPoint.addr != NULL) { assert(call->gtEntryPoint.accessType == IAT_VALUE); - addr = call->gtEntryPoint.addr; + params.addr = call->gtEntryPoint.addr; } else #endif // FEATURE_READYTORUN if (call->IsHelperCall()) { - CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(params.methHnd); noway_assert(helperNum != CORINFO_HELP_UNDEF); void* pAddr = nullptr; - addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + params.addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); assert(pAddr == nullptr); } else { // Direct call to a non-virtual user function. - addr = call->gtDirectCallAddress; + params.addr = call->gtDirectCallAddress; } - assert(addr != nullptr); + assert(params.addr != nullptr); // Non-virtual direct call to known addresses #ifdef TARGET_ARM - if (!validImmForBL((ssize_t)addr)) + if (!validImmForBL((ssize_t)params.addr)) { regNumber tmpReg = internalRegisters.GetSingle(call); - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr); - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - NULL, - retSize, - di, - tmpReg, - call->IsFastTailCall()); - // clang-format on + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)params.addr); + params.callType = EC_INDIR_R; + params.ireg = tmpReg; + genEmitCallWithCurrentGC(params); } else #endif // TARGET_ARM { - // clang-format off - genEmitCall(emitter::EC_FUNC_TOKEN, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - addr, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - REG_NA, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_FUNC_TOKEN; + genEmitCallWithCurrentGC(params); } } } @@ -4024,7 +3762,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) void CodeGen::genFloatToFloatCast(GenTree* treeNode) { // float <--> double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -4165,7 +3903,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, } // clang-format off -const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] +const GenConditionDesc GenConditionDesc::map[32] { { }, // NONE { }, // 1 @@ -4528,7 +4266,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) // src - The source of the return // retTypeDesc - The return type descriptor. // -void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) +void CodeGen::genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc) { assert(varTypeIsSIMD(src)); assert(src->isUsedFromReg()); @@ -4622,7 +4360,7 @@ void CodeGen::genPushCalleeSavedRegisters() // - Generate fully interruptible code for loops that contains calls // - Generate fully interruptible code for leaf methods // - // Given the limited benefit from this optimization (<10k for CoreLib NGen image), the extra complexity + // Given the limited benefit from this optimization (<10k for CoreLib AOT image), the extra complexity // is not worth it. // rsPushRegs |= RBM_LR; // We must save the return address (in the LR register) @@ -4678,8 +4416,7 @@ void CodeGen::genPushCalleeSavedRegisters() // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). // // For functions with GS and localloc, we change the frame so the frame pointer and LR are saved at the top - // of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same - // rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + // of the frame, just under the varargs registers (if any). // Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. // // The frames look like the following (simplified to only include components that matter for establishing the @@ -4697,8 +4434,6 @@ void CodeGen::genPushCalleeSavedRegisters() // |-----------------------| // | MonitorAcquired | // 8 bytes; for synchronized methods // |-----------------------| - // | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - // |-----------------------| // | locals, temps, etc. | // |-----------------------| // | possible GS cookie | @@ -4730,8 +4465,6 @@ void CodeGen::genPushCalleeSavedRegisters() // |-----------------------| // | MonitorAcquired | // 8 bytes; for synchronized methods // |-----------------------| - // | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - // |-----------------------| // | locals, temps, etc. | // |-----------------------| // | possible GS cookie | @@ -5204,7 +4937,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) addrInfo.addr = nullptr; addrInfo.accessType = IAT_VALUE; - if (jmpEpilog && lastNode->gtOper == GT_JMP) + if (jmpEpilog && lastNode->OperIs(GT_JMP)) { methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); @@ -5253,7 +4986,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) unwindStarted = true; } - if (jmpEpilog && lastNode->gtOper == GT_JMP && addrInfo.accessType == IAT_RELPVALUE) + if (jmpEpilog && lastNode->OperIs(GT_JMP) && addrInfo.accessType == IAT_RELPVALUE) { // IAT_RELPVALUE jump at the end is done using relative indirection, so, // additional helper register is required. @@ -5311,19 +5044,18 @@ void CodeGen::genFnEpilog(BasicBlock* block) /* figure out what jump we have */ GenTree* jmpNode = lastNode; #if !FEATURE_FASTTAILCALL - noway_assert(jmpNode->gtOper == GT_JMP); + noway_assert(jmpNode->OperIs(GT_JMP)); #else // FEATURE_FASTTAILCALL // armarch // If jmpNode is GT_JMP then gtNext must be null. // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. - noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); + noway_assert(!jmpNode->OperIs(GT_JMP) || (jmpNode->gtNext == nullptr)); // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp - noway_assert((jmpNode->gtOper == GT_JMP) || - ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); + noway_assert(jmpNode->OperIs(GT_JMP) || (jmpNode->OperIs(GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); // The next block is associated with this "if" stmt - if (jmpNode->gtOper == GT_JMP) + if (jmpNode->OperIs(GT_JMP)) #endif // FEATURE_FASTTAILCALL { // Simply emit a jump to the methodHnd. This is similar to a call so we can use @@ -5331,19 +5063,17 @@ void CodeGen::genFnEpilog(BasicBlock* block) assert(methHnd != nullptr); assert(addrInfo.addr != nullptr); -#ifdef TARGET_ARMARCH - emitter::EmitCallType callType; - void* addr; - regNumber indCallReg; + EmitCallParams params; + params.methHnd = methHnd; + switch (addrInfo.accessType) { case IAT_VALUE: if (validImmForBL((ssize_t)addrInfo.addr)) { // Simple direct call - callType = emitter::EC_FUNC_TOKEN; - addr = addrInfo.addr; - indCallReg = REG_NA; + params.callType = EC_FUNC_TOKEN; + params.addr = addrInfo.addr; break; } @@ -5354,14 +5084,13 @@ void CodeGen::genFnEpilog(BasicBlock* block) case IAT_PVALUE: // Load the address into a register, load indirect and call through a register // We have to use R12 since we assume the argument registers are in use - callType = emitter::EC_INDIR_R; - indCallReg = REG_INDIRECT_CALL_TARGET_REG; - addr = NULL; - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); + params.callType = EC_INDIR_R; + params.ireg = REG_INDIRECT_CALL_TARGET_REG; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, params.ireg, (ssize_t)addrInfo.addr); if (addrInfo.accessType == IAT_PVALUE) { - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); - regSet.verifyRegUsed(indCallReg); + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, params.ireg, params.ireg, 0); + regSet.verifyRegUsed(params.ireg); } break; @@ -5371,11 +5100,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) // We have to use R12 since we assume the argument registers are in use // LR is used as helper register right before it is restored from stack, thus, // all relative address calculations are performed before LR is restored. - callType = emitter::EC_INDIR_R; - indCallReg = REG_R12; - addr = NULL; + params.callType = EC_INDIR_R; + params.ireg = REG_R12; - regSet.verifyRegUsed(indCallReg); + regSet.verifyRegUsed(params.ireg); break; } @@ -5384,31 +5112,12 @@ void CodeGen::genFnEpilog(BasicBlock* block) NO_WAY("Unsupported JMP indirection"); } - /* Simply emit a jump to the methodHnd. This is similar to a call so we can use - * the same descriptor with some minor adjustments. - */ - - // clang-format off - GetEmitter()->emitIns_Call(callType, - methHnd, - INDEBUG_LDISASM_COMMA(nullptr) - addr, - 0, // argSize - EA_UNKNOWN, // retSize -#if defined(TARGET_ARM64) - EA_UNKNOWN, // secondRetSize -#endif - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - DebugInfo(), - indCallReg, // ireg - REG_NA, // xreg - 0, // xmul - 0, // disp - true); // isJump - // clang-format on -#endif // TARGET_ARMARCH + // Simply emit a jump to the methodHnd. This is similar to a call so we can use + // the same descriptor with some minor adjustments. + // + + params.isJump = true; + genEmitCallWithCurrentGC(params); } #if FEATURE_FASTTAILCALL else diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 5b280951178a..8fc6ccce7adf 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -201,7 +201,7 @@ void CodeGenInterface::CopyRegisterInfo() CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) { -#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(TARGET_X86) +#if !defined(TARGET_X86) m_stkArgVarNum = BAD_VAR_NUM; #endif @@ -613,7 +613,7 @@ regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc) // inline regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree) { - assert(tree->gtOper == GT_LCL_VAR); + assert(tree->OperIs(GT_LCL_VAR)); regMaskTP regMask = RBM_NONE; const LclVarDsc* varDsc = compiler->lvaGetDesc(tree->AsLclVarCommon()); @@ -805,8 +805,8 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) { unsigned varNum = lvaTrackedIndexToLclNum(deadVarIndex); LclVarDsc* varDsc = lvaGetDesc(varNum); - bool isGCRef = (varDsc->TypeGet() == TYP_REF); - bool isByRef = (varDsc->TypeGet() == TYP_BYREF); + bool isGCRef = varDsc->TypeIs(TYP_REF); + bool isByRef = varDsc->TypeIs(TYP_BYREF); bool isInReg = varDsc->lvIsInReg(); bool isInMemory = !isInReg || varDsc->IsAlwaysAliveInMemory(); @@ -841,8 +841,8 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) { unsigned varNum = lvaTrackedIndexToLclNum(bornVarIndex); LclVarDsc* varDsc = lvaGetDesc(varNum); - bool isGCRef = (varDsc->TypeGet() == TYP_REF); - bool isByRef = (varDsc->TypeGet() == TYP_BYREF); + bool isGCRef = varDsc->TypeIs(TYP_REF); + bool isByRef = varDsc->TypeIs(TYP_BYREF); if (varDsc->lvIsInReg()) { @@ -1119,6 +1119,11 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, [reg1 + reg2] [reg1 + reg2 * natural-scale] + The following indirections are valid address modes on riscv64: + + [reg] + [reg + icon] + */ /* All indirect address modes require the address to be an addition */ @@ -1199,7 +1204,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, /* Check for an addition of a constant */ if (op2->IsIntCnsFitsInI32() && op2->AsIntConCommon()->ImmedValCanBeFolded(compiler, addr->OperGet()) && - (op2->gtType != TYP_REF) && FitsIn(cns + op2->AsIntConCommon()->IconValue())) + !op2->TypeIs(TYP_REF) && FitsIn(cns + op2->AsIntConCommon()->IconValue())) { /* We're adding a constant */ @@ -1269,7 +1274,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, switch (op1->gtOper) { -#ifdef TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_RISCV64) // TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_ADD: @@ -1291,7 +1296,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, } } break; -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_RISCV64 case GT_MUL: @@ -1314,7 +1319,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, rv2 = op1->AsOp()->gtOp1; int argScale; - while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0) + while ((rv2->OperIs(GT_MUL) || rv2->OperIs(GT_LSH)) && (argScale = rv2->GetScaledIndex()) != 0) { if (jitIsScaleIndexMul(argScale * mul, naturalMul)) { @@ -1347,7 +1352,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, noway_assert(op2); switch (op2->gtOper) { -#ifdef TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_RISCV64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because // arm doesn't support both scale and offset at the same. Offset is handled // at the emitter as a peephole optimization. @@ -1370,7 +1375,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, } } break; -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_RISCV64 case GT_MUL: @@ -1390,7 +1395,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, // 'op2' is a scaled value...is it's argument also scaled? int argScale; rv2 = op2->AsOp()->gtOp1; - while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0) + while ((rv2->OperIs(GT_MUL) || rv2->OperIs(GT_LSH)) && (argScale = rv2->GetScaledIndex()) != 0) { if (jitIsScaleIndexMul(argScale * mul, naturalMul)) { @@ -1428,6 +1433,9 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, #endif FOUND_AM: +#ifdef TARGET_RISCV64 + assert(mul == 0 || mul == 1); +#endif if (rv2) { @@ -1498,6 +1506,21 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, return true; } +//------------------------------------------------------------------------ +// genEmitCallWithCurrentGC: +// Emit a call with GC information captured from current GC information. +// +// Parameters: +// params - Call emission parameters +// +void CodeGen::genEmitCallWithCurrentGC(EmitCallParams& params) +{ + params.ptrVars = gcInfo.gcVarPtrSetCur; + params.gcrefRegs = gcInfo.gcRegGCrefSetCur; + params.byrefRegs = gcInfo.gcRegByrefSetCur; + GetEmitter()->emitIns_Call(params); +} + /***************************************************************************** * * Generate an exit sequence for a return from a method (note: when compiling @@ -1593,7 +1616,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi { // Find the helper-block which raises the exception. Compiler::AddCodeDsc* add = compiler->fgFindExcptnTarget(codeKind, compiler->compCurBB); - PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block")); + assert((add != nullptr) && ("ERROR: failed to find exception throw block")); assert(add->acdUsed); excpRaisingBlock = add->acdDstBlk; #if !FEATURE_FIXED_OUT_ARGS @@ -1651,7 +1674,7 @@ void CodeGen::genCheckOverflow(GenTree* tree) emitJumpKind jumpKind; #ifdef TARGET_ARM64 - if (tree->OperGet() == GT_MUL) + if (tree->OperIs(GT_MUL)) { jumpKind = EJ_ne; } @@ -1670,7 +1693,7 @@ void CodeGen::genCheckOverflow(GenTree* tree) if (jumpKind == EJ_lo) { - if (tree->OperGet() != GT_SUB) + if (!tree->OperIs(GT_SUB)) { jumpKind = EJ_hs; } @@ -1686,49 +1709,26 @@ void CodeGen::genCheckOverflow(GenTree* tree) /***************************************************************************** * - * Update the current funclet as needed by calling genUpdateCurrentFunclet(). - * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet - * is up-to-date. + * Update the current funclet by calling genUpdateCurrentFunclet(). + * 'block' must be the beginning of a funclet region. * */ void CodeGen::genUpdateCurrentFunclet(BasicBlock* block) { - if (!compiler->UsesFunclets()) - { - return; - } + assert(compiler->bbIsFuncletBeg(block)); + compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block)); - if (block->HasFlag(BBF_FUNCLET_BEG)) + // Check the current funclet index for correctness + if (compiler->funCurrentFunc()->funKind == FUNC_FILTER) { - compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block)); - if (compiler->funCurrentFunc()->funKind == FUNC_FILTER) - { - assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block); - } - else - { - // We shouldn't see FUNC_ROOT - assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER); - assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block); - } + assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block); } else { - assert(compiler->funCurrentFuncIdx() <= compiler->compFuncInfoCount); - if (compiler->funCurrentFunc()->funKind == FUNC_FILTER) - { - assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block)); - } - else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT) - { - assert(!block->hasHndIndex()); - } - else - { - assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER); - assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block)); - } + // We shouldn't see FUNC_ROOT + assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER); + assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block); } } @@ -1760,9 +1760,9 @@ void CodeGen::genGenerateCode(void** codePtr, uint32_t* nativeSizeOfCode) DoPhase(this, PHASE_EMIT_GCEH, &CodeGen::genEmitUnwindDebugGCandEH); #ifdef DEBUG - // For R2R/NAOT not all these helpers are implemented. So don't ask for them. + // For AOT not all these helpers are implemented. So don't ask for them. // - if (genWriteBarrierUsed && JitConfig.EnableExtraSuperPmiQueries() && !compiler->opts.IsReadyToRun()) + if (genWriteBarrierUsed && JitConfig.EnableExtraSuperPmiQueries() && !compiler->IsAot()) { void* ignored; for (int i = CORINFO_HELP_ASSIGN_REF; i <= CORINFO_HELP_BULK_WRITEBARRIER; i++) @@ -1826,88 +1826,40 @@ void CodeGen::genGenerateMachineCode() printf(" for "); -#if defined(TARGET_X86) - if (compiler->canUseEvexEncoding()) - { - if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) - { - if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2_V512)) - { - printf("X86 with AVX10.2/512"); - } - else - { - printf("X86 with AVX10.2/256"); - } - } - else if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512)) - { - printf("X86 with AVX10.1/512"); - } - else - { - printf("X86 with AVX10.1/256"); - } - } - else - { - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - printf("X86 with AVX512"); - } - } - else if (compiler->canUseVexEncoding()) - { - printf("X86 with AVX"); - } - else - { - printf("generic X86"); - } -#elif defined(TARGET_AMD64) - if (compiler->canUseEvexEncoding()) +#if defined(TARGET_XARCH) +#if defined(TARGET_64BIT) + printf("generic X64"); +#else + printf("generic X86"); +#endif + + // Check ISA directly here instead of using + // compOpportunisticallyDependsOn to avoid JIT-EE calls that could make + // us miss in SPMI + + if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX)) { - if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) - { - if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2_V512)) - { - printf("X64 with AVX10.2/512"); - } - else - { - printf("X64 with AVX10.2/256"); - } - } - else if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512)) - { - printf("X64 with AVX10.1/512"); - } - else - { - printf("X64 with AVX10.1/256"); - } - } - else - { - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - printf("X64 with AVX512"); - } + printf(" + VEX"); } - else if (compiler->canUseVexEncoding()) + + if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX512)) { - printf("X64 with AVX"); + printf(" + EVEX"); } - else + + if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_APX)) { - printf("generic X64"); + printf(" + APX"); } #elif defined(TARGET_ARM) printf("generic ARM"); #elif defined(TARGET_ARM64) printf("generic ARM64"); + + if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_Sve)) + { + printf(" + SVE"); + } #elif defined(TARGET_LOONGARCH64) printf("generic LOONGARCH64"); #elif defined(TARGET_RISCV64) @@ -1918,28 +1870,31 @@ void CodeGen::genGenerateMachineCode() if (TargetOS::IsWindows) { - printf(" - Windows"); + printf(" on Windows"); } else if (TargetOS::IsApplePlatform) { - printf(" - Apple"); + printf(" on Apple"); } else if (TargetOS::IsUnix) { - printf(" - Unix"); + printf(" on Unix"); } printf("\n"); printf("; %s code\n", compiler->compGetTieringName(false)); - if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + if (compiler->IsAot()) { - printf("; NativeAOT compilation\n"); - } - else if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN)) - { - printf("; ReadyToRun compilation\n"); + if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + { + printf("; NativeAOT compilation\n"); + } + else + { + printf("; ReadyToRun compilation\n"); + } } if (compiler->opts.IsOSR()) @@ -1947,6 +1902,11 @@ void CodeGen::genGenerateMachineCode() printf("; OSR variant for entry point 0x%x\n", compiler->info.compILEntry); } + if (compiler->compIsAsync()) + { + printf("; async\n"); + } + if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT) { printf("; optimized code\n"); @@ -2020,8 +1980,7 @@ void CodeGen::genGenerateMachineCode() GetEmitter()->emitBegFN(isFramePointerUsed() #if defined(DEBUG) , - (compiler->compCodeOpt() != Compiler::SMALL_CODE) && - !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) + (compiler->compCodeOpt() != Compiler::SMALL_CODE) && !compiler->IsAot() #endif ); @@ -2102,7 +2061,7 @@ void CodeGen::genEmitMachineCode() bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ? -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef TARGET_64BIT trackedStackPtrsContig = false; #elif defined(TARGET_ARM) // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous @@ -2338,95 +2297,16 @@ void CodeGen::genReportEH() unsigned XTnum; - bool isNativeAOT = compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI); - - unsigned EHCount = compiler->compHndBBtabCount; - - // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the - // VM. - unsigned duplicateClauseCount = 0; - unsigned enclosingTryIndex; - - // Duplicate clauses are not used by NativeAOT ABI - if (compiler->UsesFunclets() && !isNativeAOT) - { - for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++) - { - for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index, - // ignoring 'mutual protect' trys - enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX; - enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex)) - { - ++duplicateClauseCount; - } - } - EHCount += duplicateClauseCount; - } - - unsigned clonedFinallyCount = 0; - - // Duplicate clauses are not used by NativeAOT ABI - if (compiler->UsesFunclets() && compiler->UsesCallFinallyThunks() && !isNativeAOT) - { - // We don't keep track of how many cloned finally there are. So, go through and count. - // We do a quick pass first through the EH table to see if there are any try/finally - // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY. - - bool anyFinallys = false; - for (EHblkDsc* const HBtab : EHClauses(compiler)) - { - if (HBtab->HasFinallyHandler()) - { - anyFinallys = true; - break; - } - } - if (anyFinallys) - { - for (BasicBlock* const block : compiler->Blocks()) - { - if (block->KindIs(BBJ_CALLFINALLY)) - { - ++clonedFinallyCount; - } - } - - EHCount += clonedFinallyCount; - } - } - #ifdef DEBUG if (compiler->opts.dspEHTable) { - if (compiler->UsesFunclets()) - { - if (compiler->UsesCallFinallyThunks()) - { - printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to " - "VM\n", - compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount); - assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount); - } - else - { - printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n", - compiler->compHndBBtabCount, duplicateClauseCount, EHCount); - assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount); - } - } -#if defined(FEATURE_EH_WINDOWS_X86) - else - { - printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount); - assert(compiler->compHndBBtabCount == EHCount); - } -#endif // FEATURE_EH_WINDOWS_X86 + printf("%d EH table entries\n", compiler->compHndBBtabCount); } #endif // DEBUG // Tell the VM how many EH clauses to expect. - compiler->eeSetEHcount(EHCount); - compiler->Metrics.EHClauseCount = (int)EHCount; + compiler->eeSetEHcount(compiler->compHndBBtabCount); + compiler->Metrics.EHClauseCount = (int)compiler->compHndBBtabCount; struct EHClauseInfo { @@ -2514,288 +2394,10 @@ void CodeGen::genReportEH() } } - assert(XTnum < EHCount); compiler->eeSetEHinfo(XTnum, &clause); } - // Now output duplicated clauses. - // - // If a funclet has been created by moving a handler out of a try region that it was originally nested - // within, then we need to report a "duplicate" clause representing the fact that an exception in that - // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region - // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is - // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler - // region as the enclosing try region's handler region. This is the sense in which it is duplicated: - // there is now a "duplicate" clause with the same handler region as another, but a different 'try' - // region. - // - // For example, consider this (capital letters represent an unknown code sequence, numbers identify a - // try or handler region): - // - // A - // try (1) { - // B - // try (2) { - // C - // } catch (3) { - // D - // } catch (4) { - // E - // } - // F - // } catch (5) { - // G - // } - // H - // - // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected - // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D". - // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4) - // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again - // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that. - // The code "D" and "E" won't be contiguous with the protected region for try (1) (which - // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor - // representing try (1) protecting the new funclets catch (3) and (4). - // The code will be generated as follows: - // - // ABCFH // "main" code - // D // funclet - // E // funclet - // G // funclet - // - // The EH regions are: - // - // C -> D - // C -> E - // BCF -> G - // D -> G // "duplicate" clause - // E -> G // "duplicate" clause - // - // Note that we actually need to generate one of these additional "duplicate" clauses for every - // region the funclet is nested in. Take this example: - // - // A - // try (1) { - // B - // try (2,3) { - // C - // try (4) { - // D - // try (5,6) { - // E - // } catch { - // F - // } catch { - // G - // } - // H - // } catch { - // I - // } - // J - // } catch { - // K - // } catch { - // L - // } - // M - // } catch { - // N - // } - // O - // - // When we pull out funclets, we get the following generated code: - // - // ABCDEHJMO // "main" function - // F // funclet - // G // funclet - // I // funclet - // K // funclet - // L // funclet - // N // funclet - // - // And the EH regions we report to the VM are (in order; main clauses - // first in most-to-least nested order, funclets ("duplicated clauses") - // last, in most-to-least nested) are: - // - // E -> F - // E -> G - // DEH -> I - // CDEHJ -> K - // CDEHJ -> L - // BCDEHJM -> N - // F -> I // funclet clause #1 for F - // F -> K // funclet clause #2 for F - // F -> L // funclet clause #3 for F - // F -> N // funclet clause #4 for F - // G -> I // funclet clause #1 for G - // G -> K // funclet clause #2 for G - // G -> L // funclet clause #3 for G - // G -> N // funclet clause #4 for G - // I -> K // funclet clause #1 for I - // I -> L // funclet clause #2 for I - // I -> N // funclet clause #3 for I - // K -> N // funclet clause #1 for K - // L -> N // funclet clause #1 for L - // - // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM. - // Note that due to the nature of 'mutually protect' clauses, it would be incorrect - // to add a clause "F -> G" because F is NOT protected by G, but we still have - // both "F -> K" and "F -> L" because F IS protected by both of those handlers. - // - // The overall ordering of the clauses is still the same most-to-least nesting - // after front-to-back start offset. Because we place the funclets at the end - // these new clauses should also go at the end by this ordering. - // - - if (duplicateClauseCount > 0) - { - unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported? - unsigned XTnum2; - EHblkDsc* HBtab; - for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++) - { - unsigned enclosingTryIndex; - - EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2); - - for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index, - // ignoring 'mutual protect' trys - enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX; - enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex)) - { - // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet - // that will have the enclosing try protecting the funclet. - - noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a - // greater EH table index - - EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex); - - // The try region is the handler of the funclet. Note that for filters, we don't protect the - // filter region, only the filter handler region. This is because exceptions in filters never - // escape; the VM swallows them. - - BasicBlock* bbTryBeg = fletTab->ebdHndBeg; - BasicBlock* bbTryLast = fletTab->ebdHndLast; - - BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try - BasicBlock* bbHndLast = encTab->ebdHndLast; - - UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp; - - tryBeg = compiler->ehCodeOffset(bbTryBeg); - hndBeg = compiler->ehCodeOffset(bbHndBeg); - - tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize - : compiler->ehCodeOffset(bbTryLast->Next()); - hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize - : compiler->ehCodeOffset(bbHndLast->Next()); - - if (encTab->HasFilter()) - { - hndTyp = compiler->ehCodeOffset(encTab->ebdFilter); - } - else - { - hndTyp = encTab->ebdTyp; - } - - CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType); - - // Tell the VM this is an extra clause caused by moving funclets out of line. - flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE); - - // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of - // the fields aren't really accurate. For example, we set "TryLength" to the offset of the - // instruction immediately after the 'try' body. So, it really could be more accurately named - // "TryEndOffset". - - CORINFO_EH_CLAUSE clause; - clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */ - clause.Flags = flags; - clause.TryOffset = tryBeg; - clause.TryLength = tryEnd; - clause.HandlerOffset = hndBeg; - clause.HandlerLength = hndEnd; - - assert(XTnum < EHCount); - - // Tell the VM about this EH clause (a duplicated clause). - compiler->eeSetEHinfo(XTnum, &clause); - - ++XTnum; - ++reportedDuplicateClauseCount; - -#ifndef DEBUG - if (duplicateClauseCount == reportedDuplicateClauseCount) - { - break; // we've reported all of them; no need to continue looking - } -#endif // !DEBUG - - } // for each 'true' enclosing 'try' - } // for each EH table entry - - assert(duplicateClauseCount == reportedDuplicateClauseCount); - } // if (duplicateClauseCount > 0) - - if (clonedFinallyCount > 0) - { - unsigned reportedClonedFinallyCount = 0; - for (BasicBlock* const block : compiler->Blocks()) - { - if (block->KindIs(BBJ_CALLFINALLY)) - { - UNATIVE_OFFSET hndBeg, hndEnd; - - hndBeg = compiler->ehCodeOffset(block); - - // How big is it? The BBJ_CALLFINALLYRET has a null bbEmitCookie! Look for the block after, which must - // be a label or jump target, since the BBJ_CALLFINALLY doesn't fall through. - BasicBlock* bbLabel = block->Next(); - if (block->isBBCallFinallyPair()) - { - bbLabel = bbLabel->Next(); // skip the BBJ_CALLFINALLYRET - } - if (bbLabel == nullptr) - { - hndEnd = compiler->info.compNativeCodeSize; - } - else - { - hndEnd = compiler->ehCodeOffset(bbLabel); - } - - CORINFO_EH_CLAUSE clause; - clause.ClassToken = 0; // unused - clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE); - clause.TryOffset = hndBeg; - clause.TryLength = hndBeg; - clause.HandlerOffset = hndBeg; - clause.HandlerLength = hndEnd; - - assert(XTnum < EHCount); - - // Tell the VM about this EH clause (a cloned finally clause). - compiler->eeSetEHinfo(XTnum, &clause); - - ++XTnum; - ++reportedClonedFinallyCount; - -#ifndef DEBUG - if (clonedFinallyCount == reportedClonedFinallyCount) - { - break; // we're done; no need to keep looking - } -#endif // !DEBUG - } // block is BBJ_CALLFINALLY - } // for each block - - assert(clonedFinallyCount == reportedClonedFinallyCount); - } // if (clonedFinallyCount > 0) - - assert(XTnum == EHCount); + assert(XTnum == compiler->compHndBBtabCount); } //---------------------------------------------------------------------- @@ -2932,7 +2534,7 @@ void CodeGen::genGCWriteBarrier(GenTreeStoreInd* store, GCInfo::WriteBarrierForm { wbKind = CWBKind_RetBuf } - else if (varDsc->TypeGet() == TYP_BYREF) + else if (varDsc->TypeIs(TYP_BYREF)) { wbKind = varDsc->lvIsParam ? CWBKind_ByRefArg : CWBKind_OtherByRefLocal; } @@ -3180,6 +2782,42 @@ class RegGraph printf("\n"); } } + + // ----------------------------------------------------------------------------- + // Validate: Validate that the graph looks reasonable + // + void Validate() + { + for (int i = 0; i < m_nodes.Height(); i++) + { + RegNode* regNode = m_nodes.Bottom(i); + for (RegNodeEdge* incoming = regNode->incoming; incoming != nullptr; incoming = incoming->nextIncoming) + { + unsigned destStart = incoming->destOffset; + unsigned destEnd = destStart + genTypeSize(incoming->type); + + for (RegNodeEdge* otherIncoming = incoming->nextIncoming; otherIncoming != nullptr; + otherIncoming = otherIncoming->nextIncoming) + { + unsigned otherDestStart = otherIncoming->destOffset; + unsigned otherDestEnd = otherDestStart + genTypeSize(otherIncoming->type); + if (otherDestEnd <= destStart) + { + continue; + } + + if (otherDestStart >= destEnd) + { + continue; + } + + // This means we have multiple registers being assigned to + // the same register. That should not happen. + assert(!"Detected conflicting incoming edges when homing parameter registers"); + } + } + } + } #endif }; @@ -3278,7 +2916,7 @@ void CodeGen::genSpillOrAddRegisterParam( LclVarDsc* paramVarDsc = compiler->lvaGetDesc(paramLclNum); var_types storeType = genParamStackType(paramVarDsc, segment); - if ((varDsc->TypeGet() != TYP_STRUCT) && (genTypeSize(genActualType(varDsc)) < genTypeSize(storeType))) + if (!varDsc->TypeIs(TYP_STRUCT) && (genTypeSize(genActualType(varDsc)) < genTypeSize(storeType))) { // Can happen for struct fields due to padding. storeType = genActualType(varDsc); @@ -3344,7 +2982,10 @@ void CodeGen::genSpillOrAddNonStandardRegisterParam(unsigned lclNum, regNumber s { RegNode* sourceRegNode = graph->GetOrAdd(sourceReg); RegNode* destRegNode = graph->GetOrAdd(varDsc->GetRegNum()); - graph->AddEdge(sourceRegNode, destRegNode, TYP_I_IMPL, 0); + if (sourceRegNode != destRegNode) + { + graph->AddEdge(sourceRegNode, destRegNode, TYP_I_IMPL, 0); + } } } @@ -3465,6 +3106,8 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) DBEXEC(VERBOSE, graph.Dump()); + INDEBUG(graph.Validate()); + regMaskTP busyRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; while (true) { @@ -3820,7 +3463,7 @@ void CodeGen::genCheckUseBlockInit() { // Var is on the stack at entry. initStkLclCnt += - roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); + roundUp(compiler->lvaLclStackHomeSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); counted = true; } } @@ -3869,7 +3512,8 @@ void CodeGen::genCheckUseBlockInit() if (!counted) { - initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); + initStkLclCnt += + roundUp(compiler->lvaLclStackHomeSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); counted = true; } } @@ -4123,14 +3767,14 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, // or when compInitMem is true // or when in debug code - noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) || - compiler->info.compInitMem || compiler->opts.compDbgCode); + noway_assert(varTypeIsGC(varDsc->TypeGet()) || varDsc->TypeIs(TYP_STRUCT) || compiler->info.compInitMem || + compiler->opts.compDbgCode); - if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem && + if (varDsc->TypeIs(TYP_STRUCT) && !compiler->info.compInitMem && (varDsc->lvExactSize() >= TARGET_POINTER_SIZE)) { // We only initialize the GC variables in the TYP_STRUCT - const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES; + const unsigned slots = (unsigned)compiler->lvaLclStackHomeSize(varNum) / REGSIZE_BYTES; ClassLayout* layout = varDsc->GetLayout(); for (unsigned i = 0; i < slots; i++) @@ -4147,7 +3791,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed); // zero out the whole thing rounded up to a single stack slot size - unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int)); + unsigned lclSize = roundUp(compiler->lvaLclStackHomeSize(varNum), (unsigned)sizeof(int)); unsigned i; for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES) { @@ -4445,7 +4089,7 @@ void CodeGen::genHomeSwiftStructStackParameters() } LclVarDsc* dsc = compiler->lvaGetDesc(lclNum); - if ((dsc->TypeGet() != TYP_STRUCT) || compiler->lvaIsImplicitByRefLocal(lclNum) || !dsc->lvOnFrame) + if (!dsc->TypeIs(TYP_STRUCT) || compiler->lvaIsImplicitByRefLocal(lclNum) || !dsc->lvOnFrame) { continue; } @@ -4492,7 +4136,6 @@ void CodeGen::genHomeStackPartOfSplitParameter(regNumber initReg, bool* initRegS const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); if (abiInfo.IsSplitAcrossRegistersAndStack()) { - assert(var->lvIsSplit); JITDUMP("Homing stack part of split parameter V%02u\n", lclNum); assert(abiInfo.NumSegments == 2); @@ -4582,27 +4225,13 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed #endif // Load from the argument register only if it is not prespilled. - if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling) + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(contextArg); + if (abiInfo.HasExactlyOneRegisterSegment() && !isPrespilledForProfiling) { - reg = varDsc->GetArgReg(); + reg = abiInfo.Segment(0).GetRegister(); } else { - if (isFramePointerUsed()) - { -#if defined(TARGET_ARM) - // GetStackOffset() is always valid for incoming stack-arguments, even if the argument - // will become enregistered. - // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES - noway_assert((2 * REGSIZE_BYTES <= varDsc->GetStackOffset()) && - (size_t(varDsc->GetStackOffset()) < compiler->compArgSize + 2 * REGSIZE_BYTES)); -#else - // GetStackOffset() is always valid for incoming stack-arguments, even if the argument - // will become enregistered. - noway_assert((0 < varDsc->GetStackOffset()) && (size_t(varDsc->GetStackOffset()) < compiler->compArgSize)); -#endif - } - // We will just use the initReg since it is an available register // and we are probably done using it anyway... reg = initReg; @@ -5075,11 +4704,6 @@ void CodeGen::genFinalizeFrame() * * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp. */ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif void CodeGen::genFnProlog() { ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); @@ -5155,18 +4779,6 @@ void CodeGen::genFnProlog() } #endif // DEBUG -#if defined(DEBUG) - - // We cannot force 0-initialization of the PSPSym - // as it will overwrite the real value - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - const LclVarDsc* varDsc = compiler->lvaGetDesc(compiler->lvaPSPSym); - assert(!varDsc->lvMustInit); - } - -#endif // DEBUG - /*------------------------------------------------------------------------- * * Record the stack frame ranges that will cover all of the tracked @@ -5211,7 +4823,7 @@ void CodeGen::genFnProlog() } signed int loOffs = varDsc->GetStackOffset(); - signed int hiOffs = varDsc->GetStackOffset() + compiler->lvaLclSize(varNum); + signed int hiOffs = varDsc->GetStackOffset() + compiler->lvaLclStackHomeSize(varNum); /* We need to know the offset range of tracked stack GC refs */ /* We assume that the GC reference can be anywhere in the TYP_STRUCT */ @@ -5286,7 +4898,7 @@ void CodeGen::genFnProlog() } } } - else if (varDsc->TypeGet() == TYP_DOUBLE) + else if (varDsc->TypeIs(TYP_DOUBLE)) { initDblRegs |= regMask; } @@ -5381,8 +4993,11 @@ void CodeGen::genFnProlog() bool initRegZeroed = false; regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn; #if defined(TARGET_AMD64) - // TODO-Xarch-apx : Revert. Excluding eGPR so that it's not used for non REX2 supported movs. - excludeMask = excludeMask | RBM_HIGHINT; + // we'd require eEVEX present to enable EGPRs in HWIntrinsics. + if (!compiler->canUseEvexEncoding()) + { + excludeMask = excludeMask | RBM_HIGHINT; + } #endif // !defined(TARGET_AMD64) #ifdef TARGET_ARM @@ -5622,8 +5237,10 @@ void CodeGen::genFnProlog() #endif // TARGET_ARMARCH #if defined(TARGET_XARCH) + genClearAvxStateInProlog(); + // Preserve callee saved float regs to stack. - genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize); + genPreserveCalleeSavedFltRegs(); #endif // defined(TARGET_XARCH) #ifdef TARGET_AMD64 @@ -5656,19 +5273,15 @@ void CodeGen::genFnProlog() genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed); - if (compiler->UsesFunclets()) - { - genSetPSPSym(initReg, &initRegZeroed); - } - else - { #if defined(FEATURE_EH_WINDOWS_X86) + if (!compiler->UsesFunclets()) + { // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem) { // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) unsigned filterEndOffsetSlotOffs = - compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE; + compiler->lvaLclStackHomeSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE; // Zero out the slot for nesting level 0 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE; @@ -5682,8 +5295,8 @@ void CodeGen::genFnProlog() GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar, firstSlotOffs); } -#endif // FEATURE_EH_WINDOWS_X86 } +#endif // FEATURE_EH_WINDOWS_X86 genReportGenericContextArg(initReg, &initRegZeroed); @@ -5774,7 +5387,7 @@ void CodeGen::genFnProlog() #endif // OSR functions take no parameters in registers. Ensure no mappings // are present. - // assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty()); + assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty()); compiler->lvaUpdateArgsWithInitialReg(); } @@ -5889,11 +5502,11 @@ void CodeGen::genFnProlog() // MOV EAX, assert(compiler->lvaVarargsHandleArg == compiler->info.compArgsCount - 1); - GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->lvaVarargsHandleArg, 0); - regSet.verifyRegUsed(REG_EAX); + GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SCRATCH, compiler->lvaVarargsHandleArg, 0); + regSet.verifyRegUsed(REG_SCRATCH); // MOV EAX, [EAX] - GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0); + GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SCRATCH, REG_SCRATCH, 0); // EDX might actually be holding something here. So make sure to only use EAX for this code // sequence. @@ -5905,16 +5518,16 @@ void CodeGen::genFnProlog() noway_assert(lastArg->lvFramePointerBased); // LEA EAX, & + EAX - GetEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset); + GetEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_SCRATCH, genFramePointerReg(), REG_SCRATCH, offset); if (varDsc->lvIsInReg()) { - GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, varDsc->GetRegNum(), REG_EAX, /* canSkip */ true); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, varDsc->GetRegNum(), REG_SCRATCH, /* canSkip */ true); regSet.verifyRegUsed(varDsc->GetRegNum()); } else { - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0); + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SCRATCH, argsStartVar, 0); } } @@ -5932,9 +5545,6 @@ void CodeGen::genFnProlog() GetEmitter()->emitEndProlog(); } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //---------------------------------------------------------------------------------- // genEmitJumpTable: emit jump table and return its base offset @@ -5953,7 +5563,7 @@ void CodeGen::genFnProlog() unsigned CodeGen::genEmitJumpTable(GenTree* treeNode, bool relativeAddr) { noway_assert(compiler->compCurBB->KindIs(BBJ_SWITCH)); - assert(treeNode->OperGet() == GT_JMPTABLE); + assert(treeNode->OperIs(GT_JMPTABLE)); emitter* emit = GetEmitter(); const unsigned jumpCount = compiler->compCurBB->GetSwitchTargets()->bbsCount; @@ -6045,7 +5655,9 @@ regNumber CodeGen::getCallIndirectionCellReg(GenTreeCall* call) if (call->GetIndirectionCellArgKind() != WellKnownArg::None) { CallArg* indirCellArg = call->gtArgs.FindWellKnownArg(call->GetIndirectionCellArgKind()); - assert((indirCellArg != nullptr) && (indirCellArg->AbiInfo.GetRegNum() == result)); + assert(indirCellArg != nullptr); + assert(indirCellArg->AbiInfo.HasExactlyOneRegisterSegment()); + assert(indirCellArg->AbiInfo.Segment(0).GetRegister() == result); } #endif @@ -6265,13 +5877,15 @@ unsigned CodeGen::getFirstArgWithStackSlot() // that's passed on the stack. for (unsigned i = 0; i < compiler->info.compArgsCount; i++) { - LclVarDsc* varDsc = compiler->lvaGetDesc(i); - // We should have found a stack parameter (and broken out of this loop) before // we find any non-parameters. - assert(varDsc->lvIsParam); + assert(compiler->lvaGetDesc(i)->lvIsParam); + + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(i); + // We do not expect to need this function in ambiguous cases. + assert(!abiInfo.IsSplitAcrossRegistersAndStack()); - if (varDsc->GetArgReg() == REG_STK) + if (abiInfo.HasAnyStackSegment()) { return i; } @@ -7104,7 +6718,7 @@ void CodeGen::genLongReturn(GenTree* treeNode) var_types targetType = treeNode->TypeGet(); assert(op1 != nullptr); - assert(op1->OperGet() == GT_LONG); + assert(op1->OperIs(GT_LONG)); GenTree* loRetVal = op1->gtGetOp1(); GenTree* hiRetVal = op1->gtGetOp2(); assert((loRetVal->GetRegNum() != REG_NA) && (hiRetVal->GetRegNum() != REG_NA)); @@ -7217,23 +6831,14 @@ void CodeGen::genReturn(GenTree* treeNode) } } - if (treeNode->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) + if (treeNode->OperIs(GT_RETURN) && compiler->compIsAsync()) { - const ReturnTypeDesc& retTypeDesc = compiler->compRetTypeDesc; + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_ASYNC_CONTINUATION_RET); + } - if (compiler->compMethodReturnsRetBufAddr()) - { - gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); - } - else - { - unsigned retRegCount = retTypeDesc.GetReturnRegCount(); - for (unsigned i = 0; i < retRegCount; ++i) - { - gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv), - retTypeDesc.GetReturnRegType(i)); - } - } + if (treeNode->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) + { + genMarkReturnGCInfo(); } #ifdef PROFILING_SUPPORTED @@ -7300,6 +6905,83 @@ void CodeGen::genSwiftErrorReturn(GenTree* treeNode) } #endif // SWIFT_SUPPORT +//------------------------------------------------------------------------ +// genReturnSuspend: +// Generate code for a GT_RETURN_SUSPEND node +// +// Arguments: +// treeNode - The node +// +void CodeGen::genReturnSuspend(GenTreeUnOp* treeNode) +{ + GenTree* op = treeNode->gtGetOp1(); + assert(op->TypeIs(TYP_REF)); + + regNumber reg = genConsumeReg(op); + inst_Mov(TYP_REF, REG_ASYNC_CONTINUATION_RET, reg, /* canSkip */ true); + + ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc; + unsigned numRetRegs = retTypeDesc.GetReturnRegCount(); + for (unsigned i = 0; i < numRetRegs; i++) + { + if (varTypeIsGC(retTypeDesc.GetReturnRegType(i))) + { + regNumber returnReg = retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, returnReg); + } + } + + genMarkReturnGCInfo(); +} + +//------------------------------------------------------------------------ +// genMarkReturnGCInfo: +// Mark GC and non-GC pointers of return registers going into the epilog.. +// +void CodeGen::genMarkReturnGCInfo() +{ + const ReturnTypeDesc& retTypeDesc = compiler->compRetTypeDesc; + + if (compiler->compMethodReturnsRetBufAddr()) + { + gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); + } + else + { + unsigned retRegCount = retTypeDesc.GetReturnRegCount(); + for (unsigned i = 0; i < retRegCount; ++i) + { + gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv), + retTypeDesc.GetReturnRegType(i)); + } + } + + if (compiler->compIsAsync()) + { + gcInfo.gcMarkRegPtrVal(REG_ASYNC_CONTINUATION_RET, TYP_REF); + } +} + +//------------------------------------------------------------------------ +// genCodeForAsyncContinuation: +// Generate code for a GT_ASYNC_CONTINUATION node. +// +// Arguments: +// tree - The node +// +void CodeGen::genCodeForAsyncContinuation(GenTree* tree) +{ + assert(tree->OperIs(GT_ASYNC_CONTINUATION)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + inst_Mov(targetType, targetReg, REG_ASYNC_CONTINUATION_RET, /* canSkip */ true); + genTransferRegGCState(targetReg, REG_ASYNC_CONTINUATION_RET); + + genProduceReg(tree); +} + //------------------------------------------------------------------------ // isStructReturn: Returns whether the 'treeNode' is returning a struct. // @@ -7320,6 +7002,11 @@ bool CodeGen::isStructReturn(GenTree* treeNode) return false; } + if (!treeNode->TypeIs(TYP_VOID) && treeNode->AsOp()->GetReturnValue()->OperIsFieldList()) + { + return true; + } + #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) assert(!varTypeIsStruct(treeNode)); return false; @@ -7347,12 +7034,32 @@ void CodeGen::genStructReturn(GenTree* treeNode) GenTree* op1 = treeNode->AsOp()->GetReturnValue(); GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); - genConsumeRegs(op1); + const ReturnTypeDesc& retTypeDesc = compiler->compRetTypeDesc; + const unsigned regCount = retTypeDesc.GetReturnRegCount(); - ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc; - const unsigned regCount = retTypeDesc.GetReturnRegCount(); assert(regCount <= MAX_RET_REG_COUNT); + if (op1->OperIsFieldList()) + { + unsigned regIndex = 0; + for (GenTreeFieldList::Use& use : op1->AsFieldList()->Uses()) + { + GenTree* fieldNode = use.GetNode(); + regNumber sourceReg = genConsumeReg(fieldNode); + regNumber destReg = retTypeDesc.GetABIReturnReg(regIndex, compiler->info.compCallConv); + var_types type = retTypeDesc.GetReturnRegType(regIndex); + + // We have constrained the reg in LSRA, but due to def-use + // conflicts we may still need a move here. + inst_Mov(type, destReg, sourceReg, /* canSkip */ true, emitActualTypeSize(type)); + regIndex++; + } + + return; + } + + genConsumeRegs(op1); + #if FEATURE_MULTIREG_RET // Right now the only enregisterable structs supported are SIMD vector types. if (genIsRegCandidateLocal(actualOp1)) @@ -7474,7 +7181,7 @@ void CodeGen::genCallPlaceRegArgs(GenTreeCall* call) // Consume all the arg regs for (CallArg& arg : call->gtArgs.LateArgs()) { - ABIPassingInformation& abiInfo = arg.NewAbiInfo; + ABIPassingInformation& abiInfo = arg.AbiInfo; GenTree* argNode = arg.GetLateNode(); #if FEATURE_MULTIREG_ARGS @@ -7511,36 +7218,6 @@ void CodeGen::genCallPlaceRegArgs(GenTreeCall* call) } #endif -#if FEATURE_ARG_SPLIT - if (argNode->OperIs(GT_PUTARG_SPLIT)) - { - assert(compFeatureArgSplit()); - genConsumeArgSplitStruct(argNode->AsPutArgSplit()); - unsigned regIndex = 0; - for (const ABIPassingSegment& seg : abiInfo.Segments()) - { - if (!seg.IsPassedInRegister()) - { - continue; - } - - regNumber allocReg = argNode->AsPutArgSplit()->GetRegNumByIdx(regIndex); - var_types type = argNode->AsPutArgSplit()->GetRegType(regIndex); - inst_Mov(genActualType(type), seg.GetRegister(), allocReg, /* canSkip */ true); - - if (call->IsFastTailCall()) - { - // We won't actually consume the register here -- keep it alive into the epilog. - gcInfo.gcMarkRegPtrVal(seg.GetRegister(), type); - } - - regIndex++; - } - - continue; - } -#endif - if (abiInfo.HasExactlyOneRegisterSegment()) { regNumber argReg = abiInfo.Segment(0).GetRegister(); @@ -7566,7 +7243,7 @@ void CodeGen::genCallPlaceRegArgs(GenTreeCall* call) { for (CallArg& arg : call->gtArgs.Args()) { - for (const ABIPassingSegment& seg : arg.NewAbiInfo.Segments()) + for (const ABIPassingSegment& seg : arg.AbiInfo.Segments()) { if (seg.IsPassedInRegister() && genIsValidFloatReg(seg.GetRegister())) { @@ -7723,7 +7400,7 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode) if (actualOp1->OperIs(GT_CALL)) { assert(regCount <= MAX_RET_REG_COUNT); - noway_assert(varDsc->lvIsMultiRegRet); + noway_assert(varDsc->lvIsMultiRegDest); } #ifdef FEATURE_SIMD @@ -7850,16 +7527,17 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode) offset += genTypeSize(srcType); #ifdef DEBUG + unsigned stackHomeSize = compiler->lvaLclStackHomeSize(lclNum); #ifdef TARGET_64BIT - assert(offset <= varDsc->lvSize()); + assert(offset <= stackHomeSize); #else // !TARGET_64BIT if (varTypeIsStruct(varDsc)) { - assert(offset <= varDsc->lvSize()); + assert(offset <= stackHomeSize); } else { - assert(varDsc->TypeGet() == TYP_LONG); + assert(varDsc->TypeIs(TYP_LONG)); assert(offset <= genTypeSize(TYP_LONG)); } #endif // !TARGET_64BIT @@ -7903,7 +7581,7 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode) // void CodeGen::genRegCopy(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_COPY); + assert(treeNode->OperIs(GT_COPY)); GenTree* op1 = treeNode->AsOp()->gtOp1; if (op1->IsMultiRegNode()) @@ -8023,7 +7701,7 @@ void CodeGen::genRegCopy(GenTree* treeNode) // regNumber CodeGen::genRegCopy(GenTree* treeNode, unsigned multiRegIndex) { - assert(treeNode->OperGet() == GT_COPY); + assert(treeNode->OperIs(GT_COPY)); GenTree* op1 = treeNode->gtGetOp1(); assert(op1->IsMultiRegNode()); @@ -8170,7 +7848,7 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn) assert(varDsc->lvOnFrame); - unsigned int size = compiler->lvaLclSize(varNum); + unsigned int size = compiler->lvaLclStackHomeSize(varNum); if ((size / TARGET_POINTER_SIZE) > 16) { // This will require more than 16 instructions, switch to rep stosd/memset call. diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index d6c2f5fe6a07..b09090eff407 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -180,8 +180,13 @@ class CodeGenInterface public: static bool instIsFP(instruction ins); #if defined(TARGET_XARCH) - static bool instIsEmbeddedBroadcastCompatible(instruction ins); + bool instIsEmbeddedBroadcastCompatible(instruction ins); + static bool instIsEmbeddedMaskingCompatible(instruction ins); + static unsigned instInputSize(instruction ins); + static unsigned instKMaskBaseSize(instruction ins); + + bool IsEmbeddedBroadcastEnabled(instruction ins, GenTree* op); #endif // TARGET_XARCH //------------------------------------------------------------------------- // Liveness-related fields & methods @@ -823,11 +828,50 @@ class CodeGenInterface virtual const char* siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs) = 0; #endif // LATE_DISASM +}; -#if defined(TARGET_XARCH) - bool IsEmbeddedBroadcastEnabled(instruction ins, GenTree* op); -#endif +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) +// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions +// such as X86's SETcc. A sequence of instructions rather than just a single one is required for +// certain floating point conditions. +// For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF, +// to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0 +// and then jump if ZF is 1: +// JP fallThroughBlock +// JE jumpDestBlock +// fallThroughBlock: +// ... +// jumpDestBlock: +// +// This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so +// in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like +// pattern is used to encode the above: +// { EJ_jnp, GT_AND, EJ_je } +// { EJ_jp, GT_OR, EJ_jne } +// +// For more details check inst_JCC and inst_SETCC functions. +// +struct GenConditionDesc +{ + emitJumpKind jumpKind1; + genTreeOps oper; + emitJumpKind jumpKind2; + char padTo4Bytes; + + static const GenConditionDesc& Get(GenCondition condition) + { + assert(condition.GetCode() < ArrLen(map)); + const GenConditionDesc& desc = map[condition.GetCode()]; + assert(desc.jumpKind1 != EJ_NONE); + assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR)); + assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE)); + return desc; + } + +private: + static const GenConditionDesc map[32]; }; +#endif // !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) #endif // _CODEGEN_INTERFACE_H_ #endif // TARGET_WASM diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 945f468d86fa..67467b5c71ee 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -279,7 +279,7 @@ void CodeGen::genCodeForBBlist() { for (GenTree* node : LIR::AsRange(block)) { - if (node->OperGet() == GT_CATCH_ARG) + if (node->OperIs(GT_CATCH_ARG)) { gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT); break; @@ -289,8 +289,6 @@ void CodeGen::genCodeForBBlist() /* Start a new code output block */ - genUpdateCurrentFunclet(block); - genLogLabel(block); // Tell everyone which basic block we're working on @@ -372,9 +370,9 @@ void CodeGen::genCodeForBBlist() bool firstMapping = true; - if (block->HasFlag(BBF_FUNCLET_BEG)) + if (compiler->bbIsFuncletBeg(block)) { - assert(compiler->UsesFunclets()); + genUpdateCurrentFunclet(block); genReserveFuncletProlog(block); } @@ -433,7 +431,7 @@ void CodeGen::genCodeForBBlist() for (GenTree* node : LIR::AsRange(block)) { // Do we have a new IL offset? - if (node->OperGet() == GT_IL_OFFSET) + if (node->OperIs(GT_IL_OFFSET)) { GenTreeILOffset* ilOffset = node->AsILOffset(); DebugInfo rootDI = ilOffset->gtStmtDI.GetRoot(); @@ -505,8 +503,13 @@ void CodeGen::genCodeForBBlist() } } + if (compiler->compIsAsync()) + { + nonVarPtrRegs &= ~RBM_ASYNC_CONTINUATION_RET; + } + // For a tailcall arbitrary argument registers may be live into the - // prolog. Skip validating those. + // epilog. Skip validating those. if (block->HasFlag(BBF_HAS_JMP)) { nonVarPtrRegs &= ~fullIntArgRegMask(CorInfoCallConvExtension::Managed); @@ -711,10 +714,9 @@ void CodeGen::genCodeForBBlist() // 2. If this is this is the last block of the hot section. // 3. If the subsequent block is a special throw block. // 4. On AMD64, if the next block is in a different EH region. - if (block->IsLast() || block->Next()->HasFlag(BBF_FUNCLET_BEG) || - !BasicBlock::sameEHRegion(block, block->Next()) || + if (block->IsLast() || !BasicBlock::sameEHRegion(block, block->Next()) || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->Next())) || - block->IsLastHotBlock(compiler)) + compiler->bbIsFuncletBeg(block->Next()) || block->IsLastHotBlock(compiler)) { instGen(INS_BREAKPOINT); // This should never get executed } @@ -725,7 +727,7 @@ void CodeGen::genCodeForBBlist() { GenTree* call = block->lastNode(); - if ((call != nullptr) && (call->gtOper == GT_CALL)) + if ((call != nullptr) && call->OperIs(GT_CALL)) { if (call->AsCall()->IsNoReturn()) { @@ -767,7 +769,7 @@ void CodeGen::genCodeForBBlist() { #ifdef DEBUG GenTree* call = block->lastNode(); - if ((call != nullptr) && (call->gtOper == GT_CALL)) + if ((call != nullptr) && call->OperIs(GT_CALL)) { // At this point, BBJ_ALWAYS should never end with a call that doesn't return. assert(!call->AsCall()->IsNoReturn()); @@ -1021,7 +1023,7 @@ void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTree* tree, int reg { // This should only be called for multireg lclVars. assert(compiler->lvaEnregMultiRegVars); - assert(tree->IsMultiRegLclVar() || (tree->gtOper == GT_COPY)); + assert(tree->IsMultiRegLclVar() || tree->OperIs(GT_COPY)); varDsc->SetRegNum(tree->GetRegByIndex(regIndex)); } @@ -1036,7 +1038,7 @@ void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTree* tree, int reg void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTree* tree) { // This should not be called for multireg lclVars. - assert((tree->OperIsScalarLocal() && !tree->IsMultiRegLclVar()) || (tree->gtOper == GT_COPY)); + assert((tree->OperIsScalarLocal() && !tree->IsMultiRegLclVar()) || tree->OperIs(GT_COPY)); varDsc->SetRegNum(tree->GetRegNum()); } @@ -1138,7 +1140,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree, unsigned multiRegIndex) GenTree* unspillTree = tree; assert(unspillTree->IsMultiRegNode()); - if (tree->gtOper == GT_RELOAD) + if (tree->OperIs(GT_RELOAD)) { unspillTree = tree->AsOp()->gtOp1; } @@ -1205,7 +1207,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree, unsigned multiRegIndex) void CodeGen::genUnspillRegIfNeeded(GenTree* tree) { GenTree* unspillTree = tree; - if (tree->gtOper == GT_RELOAD) + if (tree->OperIs(GT_RELOAD)) { unspillTree = tree->AsOp()->gtOp1; } @@ -1384,7 +1386,7 @@ void CodeGen::genCheckConsumeNode(GenTree* const node) } } - assert((node->OperGet() == GT_CATCH_ARG) || ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) == 0)); + assert(node->OperIs(GT_CATCH_ARG) || ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) == 0)); assert((lastConsumedNode == nullptr) || (node->gtUseNum == -1) || (node->gtUseNum > lastConsumedNode->gtUseNum)); node->gtDebugFlags |= GTF_DEBUG_NODE_CG_CONSUMED; @@ -1475,7 +1477,7 @@ regNumber CodeGen::genConsumeReg(GenTree* tree, unsigned multiRegIndex) // regNumber CodeGen::genConsumeReg(GenTree* tree) { - if (tree->OperGet() == GT_COPY) + if (tree->OperIs(GT_COPY)) { genRegCopy(tree); } @@ -1576,7 +1578,7 @@ void CodeGen::genConsumeAddress(GenTree* addr) { genConsumeReg(addr); } - else if (addr->OperGet() == GT_LEA) + else if (addr->OperIs(GT_LEA)) { genConsumeAddrMode(addr->AsAddrMode()); } @@ -1591,7 +1593,7 @@ void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr) void CodeGen::genConsumeRegs(GenTree* tree) { #if !defined(TARGET_64BIT) - if (tree->OperGet() == GT_LONG) + if (tree->OperIs(GT_LONG)) { genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); @@ -1741,7 +1743,6 @@ void CodeGen::genConsumeMultiOpOperands(GenTreeMultiOp* tree) } #endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) -#if FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node. // Also loads in the right register the addresses of the @@ -1824,30 +1825,6 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, inst_RV_IV(INS_mov, sizeReg, size, EA_PTRSIZE); } } -#endif // FEATURE_PUT_STRUCT_ARG_STK - -#if FEATURE_ARG_SPLIT -//------------------------------------------------------------------------ -// genConsumeArgRegSplit: Consume register(s) in Call node to set split struct argument. -// -// Arguments: -// putArgNode - the PUTARG_STK tree. -// -// Return Value: -// None. -// -void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode) -{ - assert(putArgNode->OperGet() == GT_PUTARG_SPLIT); - assert(putArgNode->gtHasReg(compiler)); - - genUnspillRegIfNeeded(putArgNode); - - gcInfo.gcMarkRegSetNpt(putArgNode->gtGetRegMask()); - - genCheckConsumeNode(putArgNode); -} -#endif // FEATURE_ARG_SPLIT //------------------------------------------------------------------------ // genPutArgStkFieldList: Generate code for a putArgStk whose source is a GT_FIELD_LIST @@ -1895,11 +1872,11 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg // We can't write beyond the arg area unless this is a tail call, in which case we use // the first stack arg as the base of the incoming arg area. #ifdef DEBUG - unsigned areaSize = compiler->lvaLclSize(outArgVarNum); + unsigned areaSize = compiler->lvaLclStackHomeSize(outArgVarNum); #if FEATURE_FASTTAILCALL if (putArgStk->gtCall->IsFastTailCall()) { - areaSize = compiler->info.compArgStackSize; + areaSize = compiler->lvaParameterStackSize; } #endif @@ -1940,7 +1917,7 @@ void CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode) { // For a CopyBlk we need the address of the source. assert(src->isContained()); - if (src->OperGet() == GT_IND) + if (src->OperIs(GT_IND)) { src = src->AsOp()->gtOp1; } @@ -1976,7 +1953,7 @@ void CodeGen::genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg) if (blkNode->OperIsCopyBlkOp()) { // For a CopyBlk we need the address of the source. - if (src->OperGet() == GT_IND) + if (src->OperIs(GT_IND)) { src = src->AsOp()->gtOp1; } @@ -2192,7 +2169,7 @@ void CodeGen::genProduceReg(GenTree* tree) { // we should never see reload of multi-reg call here // because GT_RELOAD gets generated in reg consuming path. - noway_assert(tree->OperGet() == GT_COPY); + noway_assert(tree->OperIs(GT_COPY)); // A multi-reg GT_COPY node produces those regs to which // copy has taken place. @@ -2259,93 +2236,6 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) } } -// generates an ip-relative call or indirect call via reg ('call reg') -// pass in 'addr' for a relative call or 'base' for a indirect register call -// methHnd - optional, only used for pretty printing -// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) -// noSafePoint - force not making this call a safe point in partially interruptible code -// -// clang-format off -void CodeGen::genEmitCall(int callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) - void* addr - X86_ARG(int argSize), - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - const DebugInfo& di, - regNumber base, - bool isJump, - bool noSafePoint) -{ -#if !defined(TARGET_X86) - int argSize = 0; -#endif // !defined(TARGET_X86) - - // This should have been put in volatile registers to ensure it does not - // get overridden by epilog sequence during tailcall. - noway_assert(!isJump || (base == REG_NA) || ((RBM_INT_CALLEE_TRASH & genRegMask(base)) != 0)); - - GetEmitter()->emitIns_Call(emitter::EmitCallType(callType), - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - addr, - argSize, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - di, base, REG_NA, 0, 0, isJump, noSafePoint); -} -// clang-format on - -// generates an indirect call via addressing mode (call []) given an indir node -// methHnd - optional, only used for pretty printing -// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) -// -// clang-format off -void CodeGen::genEmitCallIndir(int callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) - GenTreeIndir* indir - X86_ARG(int argSize), - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - const DebugInfo& di, - bool isJump) -{ -#if !defined(TARGET_X86) - int argSize = 0; -#endif // !defined(TARGET_X86) - - regNumber iReg = indir->HasBase() ? indir->Base()->GetRegNum() : REG_NA; - regNumber xReg = indir->HasIndex() ? indir->Index()->GetRegNum() : REG_NA; - - // These should have been put in volatile registers to ensure they do not - // get overridden by epilog sequence during tailcall. - noway_assert(!isJump || (iReg == REG_NA) || ((RBM_CALLEE_TRASH & genRegMask(iReg)) != 0)); - noway_assert(!isJump || (xReg == REG_NA) || ((RBM_CALLEE_TRASH & genRegMask(xReg)) != 0)); - - GetEmitter()->emitIns_Call(emitter::EmitCallType(callType), - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, - argSize, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - di, - iReg, - xReg, - indir->Scale(), - indir->Offset(), - isJump); -} -// clang-format on - //------------------------------------------------------------------------ // genCodeForCast: Generates the code for GT_CAST. // @@ -2486,8 +2376,11 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) } #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // For LoongArch64's ISA which is same with the MIPS64 ISA, even the instructions of 32bits operation need - // the upper 32bits be sign-extended to 64 bits. + // TODO-LOONGARCH64: + // TODO-RISCV64: + // LoongArch64 and RiscV64 ABIs require 32-bit values to be sign-extended to 64-bits. + // We apply the sign-extension unconditionally here to avoid corner case bugs, even + // though it may not be strictly necessary in all cases. m_extendKind = SIGN_EXTEND_INT; #else m_extendKind = COPY; @@ -2577,7 +2470,7 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon(); unsigned lclNum = lclNode->GetLclNum(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum); - assert(varDsc->TypeGet() == TYP_LONG); + assert(varDsc->TypeIs(TYP_LONG)); assert(!varDsc->lvPromoted); GenTree* op1 = treeNode->AsOp()->gtOp1; @@ -2716,6 +2609,10 @@ void CodeGen::genEmitterUnitTests() { genAmd64EmitterUnitTestsAvx10v2(); } + if (unitTestSectionAll || (strstr(unitTestSection, "ccmp") != nullptr)) + { + genAmd64EmitterUnitTestsCCMP(); + } #elif defined(TARGET_ARM64) if (unitTestSectionAll || (strstr(unitTestSection, "general") != nullptr)) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 94390a885800..cc488deb7023 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -327,7 +327,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * Funclets have the following incoming arguments: * * catch: a0 = the exception object that was caught (see GT_CATCH_ARG) - * filter: a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function + * filter: a0 = the exception object to filter (see GT_CATCH_ARG) * finally/fault: none * * Funclets set the following registers on exit: @@ -356,8 +356,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * |Callee saved registers | // multiple of 8 bytes, not including FP/RA * |-----------------------| * | Saved FP, RA | // 16 bytes @@ -377,54 +375,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of * outgoing arguments for any call). * - * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, - * and that location is the same relative to Caller-SP as in the main function where higher than - * the callee-saved registers. - * That is to say, the PSPSym's relative offset to Caller-SP is not depended on the callee-saved registers. * TODO-LoongArch64: the funclet's callee-saved registers should not shared with main function. * - * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we - * must add buffer space for the saved varargs/argument registers here, if the main function did the same. + * Funclets do not have varargs arguments. * * Note that localloc cannot be used in a funclet. * - * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. - * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. - * - * if (this is a filter funclet) - * { - * // a1 on entry to a filter funclet is CallerSP of the containing function: - * // either the main function, or the funclet for a handler that this filter is dynamically nested within. - * // Note that a filter can be dynamically nested within a funclet even if it is not statically within - * // a funclet. Consider: - * // - * // try { - * // try { - * // throw new Exception(); - * // } catch(Exception) { - * // throw new Exception(); // The exception thrown here ... - * // } - * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack - * // } filter-handler { - * // } - * // - * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will - * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always - * // create a main function PSP for any function with a filter. - * - * ld.d a1,a1, CallerSP_to_PSP_slot_delta ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) - * st.d a1,sp, SP_to_PSP_slot_delta ; store the PSP - * addi.d fp, a1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer - * } - * else - * { - * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. - * // TODO-LOONGARCH64-CQ: if VM set a1 to CallerSP on entry, like for filters, we could save an instruction. - * - * addi.d a3,fp,Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. a3 is scratch? - * st.d a3,sp,SP_to_PSP_slot_delta ; store the PSP - * } - * * An example epilog sequence is then: * * addi.d sp,sp,#outsz ; if any outgoing argument space @@ -449,7 +405,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) #endif assert(block != NULL); - assert(block->HasFlag(BBF_FUNCLET_BEG)); + assert(compiler->bbIsFuncletBeg(block)); ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); @@ -512,45 +468,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet - // frame. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - if (isFilter) - { - // This is the first block of a filter - // Note that register a1 = CallerSP of the containing function - // A1 is overwritten by the first Load (new callerSP) - // A2 is scratch when we have a large constant offset - - // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or - // function) - genInstrWithConstant(INS_ld_d, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, - REG_A2, false); - regSet.verifyRegUsed(REG_A1); - - // Store the PSP value (aka CallerSP) - genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, - REG_A2, false); - - // re-establish the frame pointer - genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_A1, - genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); - } - else // This is a non-filter funclet - { - // A3 is scratch, A2 can also become scratch. - - // compute the CallerSP, given the frame pointer. a3 is scratch? - genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_A3, REG_FPBASE, - -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); - regSet.verifyRegUsed(REG_A3); - - genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, - REG_A2, false); - } - } } /***************************************************************************** @@ -628,21 +545,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_RA) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - // Because a method and funclets must have the same caller-relative PSPSym offset, - // if there is a PSPSym, we have to pad the funclet frame size for OSR. - // - int osrPad = 0; - if (compiler->opts.IsOSR()) - { - osrPad -= compiler->info.compPatchpointInfo->TotalFrameSize(); - - // OSR pad must be already aligned to stack size. - assert((osrPad % STACK_ALIGN) == 0); - } - /* Now save it for future use */ - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() + osrPad; - int funcletFrameSize = compiler->lvaOutgoingArgSpaceSize; genFuncletInfo.fiSP_to_CalleeSaved_delta = funcletFrameSize; @@ -655,13 +558,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() delta_PSP -= TARGET_POINTER_SIZE; } - funcletFrameSize = funcletFrameSize - delta_PSP - osrPad; + funcletFrameSize = funcletFrameSize - delta_PSP; funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); - genFuncletInfo.fiSpDelta = -funcletFrameSize; - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSize + delta_PSP + osrPad; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = osrPad + delta_PSP; + genFuncletInfo.fiSpDelta = -funcletFrameSize; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; #ifdef DEBUG if (verbose) @@ -671,22 +572,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); - if (compiler->opts.IsOSR()) - { - printf(" OSR Pad: %d\n", osrPad); - } - printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } assert(genFuncletInfo.fiSP_to_CalleeSaved_delta >= 0); - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! - } #endif // DEBUG } @@ -735,7 +624,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) addrInfo.addr = nullptr; addrInfo.accessType = IAT_VALUE; - if (jmpEpilog && (lastNode->gtOper == GT_JMP)) + if (jmpEpilog && lastNode->OperIs(GT_JMP)) { methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); @@ -753,19 +642,18 @@ void CodeGen::genFnEpilog(BasicBlock* block) /* figure out what jump we have */ GenTree* jmpNode = lastNode; #if !FEATURE_FASTTAILCALL - noway_assert(jmpNode->gtOper == GT_JMP); + noway_assert(jmpNode->OperIs(GT_JMP)); #else // FEATURE_FASTTAILCALL // armarch // If jmpNode is GT_JMP then gtNext must be null. // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. - noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); + noway_assert(!jmpNode->OperIs(GT_JMP) || (jmpNode->gtNext == nullptr)); // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp - noway_assert((jmpNode->gtOper == GT_JMP) || - ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); + noway_assert(jmpNode->OperIs(GT_JMP) || (jmpNode->OperIs(GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); // The next block is associated with this "if" stmt - if (jmpNode->gtOper == GT_JMP) + if (jmpNode->OperIs(GT_JMP)) #endif // FEATURE_FASTTAILCALL { // Simply emit a jump to the methodHnd. This is similar to a call so we can use @@ -773,9 +661,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) assert(methHnd != nullptr); assert(addrInfo.addr != nullptr); - emitter::EmitCallType callType; - void* addr; - regNumber indCallReg; + EmitCallParams params; + params.methHnd = methHnd; + switch (addrInfo.accessType) { case IAT_VALUE: @@ -783,14 +671,13 @@ void CodeGen::genFnEpilog(BasicBlock* block) case IAT_PVALUE: // Load the address into a register, load indirect and call through a register // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use - callType = emitter::EC_INDIR_R; - indCallReg = REG_INDIRECT_CALL_TARGET_REG; - addr = NULL; - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); + params.callType = EC_INDIR_R; + params.ireg = REG_INDIRECT_CALL_TARGET_REG; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, params.ireg, (ssize_t)addrInfo.addr); if (addrInfo.accessType == IAT_PVALUE) { - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0); - regSet.verifyRegUsed(indCallReg); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, params.ireg, params.ireg, 0); + regSet.verifyRegUsed(params.ireg); } break; @@ -800,11 +687,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) // We have to use R12 since we assume the argument registers are in use // LR is used as helper register right before it is restored from stack, thus, // all relative address calculations are performed before LR is restored. - callType = emitter::EC_INDIR_R; - indCallReg = REG_T2; - addr = NULL; + params.callType = EC_INDIR_R; + params.ireg = REG_T2; - regSet.verifyRegUsed(indCallReg); + regSet.verifyRegUsed(params.ireg); break; } @@ -819,24 +705,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) genPopCalleeSavedRegisters(true); - // clang-format off - GetEmitter()->emitIns_Call(callType, - methHnd, - INDEBUG_LDISASM_COMMA(nullptr) - addr, - 0, // argSize - EA_UNKNOWN // retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - DebugInfo(), - indCallReg, // ireg - REG_NA, // xreg - 0, // xmul - 0, // disp - true); // isJump - // clang-format on + params.isJump = true; + + genEmitCallWithCurrentGC(params); } #if FEATURE_FASTTAILCALL else @@ -857,33 +728,6 @@ void CodeGen::genFnEpilog(BasicBlock* block) compiler->unwindEndEpilog(); } -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - - int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); - - if (compiler->opts.IsOSR()) - { - SPtoCallerSPdelta += compiler->info.compPatchpointInfo->TotalFrameSize(); - } - - // We will just use the initReg since it is an available register - // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegZeroed = false; - - genInstrWithConstant(INS_addi_d, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, regTmp, false); - GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); -} - //----------------------------------------------------------------------------- // genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case // `genUseBlockInit` is set. @@ -1037,21 +881,6 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) BasicBlock* const nextBlock = block->Next(); - // Generate a call to the finally, like this: - // mov a0,qword ptr [fp + 10H] / sp // Load a0 with PSPSym, or sp if PSPSym is not used - // bl finally-funclet - // b finally-return // Only for non-retless finally calls - // The 'b' can be a NOP if we're going to the next block. - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0); - } - else - { - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0); - } - if (block->HasFlag(BBF_RETLESS_CALL)) { GetEmitter()->emitIns_J(INS_bl, block->GetTarget()); @@ -1339,7 +1168,7 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) #ifdef FEATURE_SIMD // storing of TYP_SIMD12 (i.e. Vector3) field - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genStoreLclTypeSIMD12(tree); return; @@ -1423,7 +1252,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) assert(varReg != REG_NA); unsigned fieldLclNum = varDsc->lvFieldLclStart + i; LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum); - assert(fieldVarDsc->TypeGet() == TYP_FLOAT); + assert(fieldVarDsc->TypeIs(TYP_FLOAT)); GetEmitter()->emitIns_R_R_I(INS_st_d, emitTypeSize(TYP_FLOAT), varReg, operandReg, i); } genProduceReg(lclNode); @@ -1437,7 +1266,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) #ifdef FEATURE_SIMD // storing of TYP_SIMD12 (i.e. Vector3) field - if (lclNode->TypeGet() == TYP_SIMD12) + if (lclNode->TypeIs(TYP_SIMD12)) { genStoreLclTypeSIMD12(lclNode); return; @@ -1525,7 +1354,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) // void CodeGen::genSimpleReturn(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + assert(treeNode->OperIs(GT_RETURN) || treeNode->OperIs(GT_RETFILT)); GenTree* op1 = treeNode->gtGetOp1(); var_types targetType = treeNode->TypeGet(); @@ -1538,7 +1367,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) if (!movRequired) { - if (op1->OperGet() == GT_LCL_VAR) + if (op1->OperIs(GT_LCL_VAR)) { GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); bool isRegCandidate = compiler->lvaTable[lcl->GetLclNum()].lvIsRegCandidate(); @@ -1579,7 +1408,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) */ void CodeGen::genLclHeap(GenTree* tree) { - assert(tree->OperGet() == GT_LCLHEAP); + assert(tree->OperIs(GT_LCLHEAP)); assert(compiler->compLocallocUsed); emitter* emit = GetEmitter(); @@ -1588,7 +1417,6 @@ void CodeGen::genLclHeap(GenTree* tree) regNumber targetReg = tree->GetRegNum(); regNumber regCnt = REG_NA; - regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; // can optimize for loongarch. @@ -1924,7 +1752,40 @@ void CodeGen::genCodeForNegNot(GenTree* tree) // void CodeGen::genCodeForBswap(GenTree* tree) { - NYI_LOONGARCH64("genCodeForBswap unimpleement yet"); + assert(tree->OperIs(GT_BSWAP, GT_BSWAP16)); + + emitAttr attr = emitActualTypeSize(tree); + regNumber targetReg = tree->GetRegNum(); + emitter* emit = GetEmitter(); + + GenTree* operand = tree->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + instruction ins; + + if (tree->OperIs(GT_BSWAP16)) + { + ins = INS_revb_4h; + } + else if (attr == EA_8BYTE) + { + ins = INS_revb_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_revb_2w; + } + + emit->emitIns_R_R(ins, attr, targetReg, operandReg); + + if (tree->OperIs(GT_BSWAP16) && !genCanOmitNormalizationForBswap16(tree)) + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, targetReg, targetReg, 15, 0); + } + + genProduceReg(tree); } //------------------------------------------------------------------------ @@ -2221,7 +2082,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) bool sourceIsLocal = false; assert(source->isContained()); - if (source->gtOper == GT_IND) + if (source->OperIs(GT_IND)) { GenTree* srcAddr = source->gtGetOp1(); assert(!srcAddr->isContained()); @@ -2814,7 +2675,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) // void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) { - assert(tree->OperGet() == GT_RETURNTRAP); + assert(tree->OperIs(GT_RETURNTRAP)); // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC // based on the contents of 'data' @@ -2825,45 +2686,42 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) BasicBlock* skipLabel = genCreateTempLabel(); GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0); - void* pAddr = nullptr; - void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); - emitter::EmitCallType callType; - regNumber callTarget; + EmitCallParams params; - if (addr == nullptr) + void* pAddr = nullptr; + params.addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); + + if (params.addr == nullptr) { - callType = emitter::EC_INDIR_R; - callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + params.callType = EC_INDIR_R; + params.ireg = REG_DEFAULT_HELPER_CALL_TARGET; if (compiler->opts.compReloc) { - GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, params.ireg, (ssize_t)pAddr); } else { // TODO-LOONGARCH64: maybe optimize further. // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); - GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, params.ireg, ((ssize_t)pAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, params.ireg, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, params.ireg, params.ireg, + ((ssize_t)pAddr & 0xfff) >> 2); } - regSet.verifyRegUsed(callTarget); + regSet.verifyRegUsed(params.ireg); } else { - callType = emitter::EC_FUNC_TOKEN; - callTarget = REG_NA; + params.callType = EC_FUNC_TOKEN; } // TODO-LOONGARCH64: can optimize further !!! - GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), - INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ - ); + // TODO-LOONGARCH64: Why does this not use genEmitHelperCall? + params.methHnd = compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC); + + genEmitCallWithCurrentGC(params); regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); regSet.verifyRegistersUsed(killMask); @@ -2881,7 +2739,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) { #ifdef FEATURE_SIMD // Storing Vector3 of size 12 bytes through indirection - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genStoreIndTypeSIMD12(tree); return; @@ -2945,6 +2803,9 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) } GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), dataReg, tree); + + // If store was to a variable, update variable liveness after instruction was emitted. + genUpdateLife(tree); } } @@ -2976,7 +2837,7 @@ void CodeGen::genCodeForSwap(GenTreeOp* tree) void CodeGen::genIntToFloatCast(GenTree* treeNode) { // int type --> float/double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -3091,7 +2952,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) { // we don't expect to see overflow detecting float/double --> int type conversions here // as they should have been converted into helper calls by front-end. - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -3248,7 +3109,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // void CodeGen::genCkfinite(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_CKFINITE); + assert(treeNode->OperIs(GT_CKFINITE)); GenTree* op1 = treeNode->AsOp()->gtOp1; var_types targetType = treeNode->TypeGet(); @@ -3377,7 +3238,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) if (targetReg != REG_NA) { - assert(tree->TypeGet() != TYP_VOID); + assert(!tree->TypeIs(TYP_VOID)); assert(emitter::isGeneralRegister(targetReg)); emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0); @@ -3388,7 +3249,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) else { assert(targetReg != REG_NA); - assert(tree->TypeGet() != TYP_VOID); + assert(!tree->TypeIs(TYP_VOID)); assert(!op1->isContainedIntOrIImmed()); assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); @@ -3458,7 +3319,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); } - else if (IsUnsigned && emitter::isValidUimm11(imm + 1)) + else if (IsUnsigned && emitter::isValidUimm11(imm + 1) && (imm != (~0))) { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1); } @@ -3475,7 +3336,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); } - else if (IsUnsigned && emitter::isValidUimm11(imm + 1)) + else if (IsUnsigned && emitter::isValidUimm11(imm + 1) && (imm != (~0))) { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1); emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); @@ -3610,7 +3471,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) assert(tree->OperIs(GT_JCMP)); assert(!varTypeIsFloating(tree)); - assert(tree->TypeGet() == TYP_VOID); + assert(tree->TypeIs(TYP_VOID)); assert(tree->GetRegNum() == REG_NA); GenTree* op1 = tree->gtOp1; @@ -3830,10 +3691,6 @@ int CodeGenInterface::genSPtoFPdelta() const assert(compiler->compCalleeRegsPushed >= 2); // always FP/RA. int delta = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - delta -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { delta -= TARGET_POINTER_SIZE; @@ -3901,14 +3758,14 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() const void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) { - void* addr = nullptr; void* pAddr = nullptr; - emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; - addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); - regNumber callTarget = REG_NA; + EmitCallParams params; + params.callType = EC_FUNC_TOKEN; + params.addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regMaskTP killSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - if (addr == nullptr) + if (params.addr == nullptr) { // This is call to a runtime helper. // li reg, pAddr #NOTE: this maybe multi-instructions. @@ -3923,41 +3780,37 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, } regMaskTP callTargetMask = genRegMask(callTargetReg); - regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); // assert that all registers in callTargetMask are in the callKillSet - noway_assert((callTargetMask & callKillSet) == callTargetMask); - - callTarget = callTargetReg; + noway_assert((callTargetMask & killSet) == callTargetMask); if (compiler->opts.compReloc) { // TODO-LOONGARCH64: here the bl is special flag rather than a real instruction. - GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTargetReg, (ssize_t)pAddr); } else { // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); - GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTargetReg, ((ssize_t)pAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTargetReg, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTargetReg, callTargetReg, + ((ssize_t)pAddr & 0xfff) >> 2); } - regSet.verifyRegUsed(callTarget); + regSet.verifyRegUsed(callTargetReg); - callType = emitter::EC_INDIR_R; + params.callType = EC_INDIR_R; + params.ireg = callTargetReg; } - GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, - retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ - ); + params.methHnd = compiler->eeFindHelper(helper); + params.argSize = argSize; + params.retSize = retSize; - regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - regSet.verifyRegistersUsed(killMask); + genEmitCallWithCurrentGC(params); + + regSet.verifyRegistersUsed(killSet); } #ifdef FEATURE_SIMD @@ -4190,7 +4043,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) if (treeNode->IsReuseRegVal()) { // For now, this is only used for constant nodes. - assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); + assert(treeNode->OperIs(GT_CNS_INT) || treeNode->OperIs(GT_CNS_DBL)); JITDUMP(" TreeNode is marked ReuseReg\n"); return; } @@ -4422,10 +4275,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genPutArgReg(treeNode->AsOp()); break; - case GT_PUTARG_SPLIT: - genPutArgSplit(treeNode->AsPutArgSplit()); - break; - case GT_CALL: genCall(treeNode->AsCall()); break; @@ -4631,7 +4480,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } else { - //// Ngen case - GS cookie constant needs to be accessed through an indirection. + // AOT case - GS cookie constant needs to be accessed through an indirection. // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); if (compiler->opts.compReloc) @@ -4713,7 +4562,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) if (treeNode->putInIncomingArgArea()) { varNumOut = getFirstArgWithStackSlot(); - argOffsetMax = compiler->compArgSize; + argOffsetMax = compiler->lvaParameterStackSize; #if FEATURE_FASTTAILCALL // This must be a fast tail call. assert(treeNode->gtCall->IsFastTailCall()); @@ -4731,7 +4580,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) argOffsetMax = compiler->lvaOutgoingArgSpaceSize; } - bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); + bool isStruct = (targetType == TYP_STRUCT) || source->OperIs(GT_FIELD_LIST); if (!isStruct) // a normal non-Struct argument { @@ -4746,7 +4595,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // If it is contained then source must be the integer constant zero if (source->isContained()) { - assert(source->OperGet() == GT_CNS_INT); + assert(source->OperIs(GT_CNS_INT)); assert(source->AsIntConCommon()->IconValue() == 0); emit->emitIns_S_R(storeIns, storeAttr, REG_R0, varNumOut, argOffsetOut); @@ -4769,13 +4618,13 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { assert(source->isContained()); // We expect that this node was marked as contained in Lower - if (source->OperGet() == GT_FIELD_LIST) + if (source->OperIs(GT_FIELD_LIST)) { genPutArgStkFieldList(treeNode, varNumOut); } else // We must have a GT_BLK or a GT_LCL_VAR { - noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_BLK)); + noway_assert(source->OperIs(GT_LCL_VAR) || source->OperIs(GT_BLK)); var_types targetType = source->TypeGet(); noway_assert(varTypeIsStruct(targetType)); @@ -4788,13 +4637,13 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) GenTreeLclVarCommon* varNode = nullptr; GenTree* addrNode = nullptr; - if (source->OperGet() == GT_LCL_VAR) + if (source->OperIs(GT_LCL_VAR)) { varNode = source->AsLclVarCommon(); } else // we must have a GT_BLK { - assert(source->OperGet() == GT_BLK); + assert(source->OperIs(GT_BLK)); addrNode = source->AsOp()->gtOp1; @@ -4831,7 +4680,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // gcPtrCount = treeNode->gtNumSlots; // Setup the srcSize and layout - if (source->OperGet() == GT_LCL_VAR) + if (source->OperIs(GT_LCL_VAR)) { assert(varNode != nullptr); LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); @@ -4841,13 +4690,13 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) assert(varDsc->lvType == TYP_STRUCT); assert(varDsc->lvOnFrame && !varDsc->lvRegister); - srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine - // as that is how much stack is allocated for this LclVar + srcSize = compiler->lvaLclStackHomeSize(varNode->GetLclNum()); + layout = varDsc->GetLayout(); } else // we must have a GT_BLK { - assert(source->OperGet() == GT_BLK); + assert(source->OperIs(GT_BLK)); // If the source is an BLK node then we need to use the type information // it provides (size and GC layout) even if the node wraps a lclvar. Due @@ -4870,8 +4719,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) if (varNode != nullptr) { // If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size. - const LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); - const unsigned varStackSize = varDsc->lvSize(); + const unsigned varStackSize = compiler->lvaLclStackHomeSize(varNode->GetLclNum()); if (varStackSize >= srcSize) { srcSize = varStackSize; @@ -4995,213 +4843,6 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) genProduceReg(tree); } -//--------------------------------------------------------------------- -// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node -// -// Arguments -// tree - the GT_PUTARG_SPLIT node -// -// Return value: -// None -// -void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) -{ - assert(treeNode->OperIs(GT_PUTARG_SPLIT)); - - GenTree* source = treeNode->gtOp1; - emitter* emit = GetEmitter(); - unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; - unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; - - if (source->OperGet() == GT_FIELD_LIST) - { - // Evaluate each of the GT_FIELD_LIST items into their register - // and store their register into the outgoing argument area - unsigned regIndex = 0; - unsigned firstOnStackOffs = UINT_MAX; - - for (GenTreeFieldList::Use& use : source->AsFieldList()->Uses()) - { - GenTree* nextArgNode = use.GetNode(); - regNumber fieldReg = nextArgNode->GetRegNum(); - genConsumeReg(nextArgNode); - - if (regIndex >= treeNode->gtNumRegs) - { - if (firstOnStackOffs == UINT_MAX) - { - firstOnStackOffs = use.GetOffset(); - } - var_types type = nextArgNode->TypeGet(); - emitAttr attr = emitTypeSize(type); - - unsigned offset = treeNode->getArgOffset() + use.GetOffset() - firstOnStackOffs; - // We can't write beyond the outgoing arg area - assert(offset + EA_SIZE_IN_BYTES(attr) <= argOffsetMax); - - // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing - // argument area - emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, offset); - } - else - { - var_types type = treeNode->GetRegType(regIndex); - regNumber argReg = treeNode->GetRegNumByIdx(regIndex); - - // If child node is not already in the register we need, move it - if (argReg != fieldReg) - { - inst_RV_RV(ins_Copy(type), argReg, fieldReg, type); - } - regIndex++; - } - } - } - else - { - var_types targetType = source->TypeGet(); - assert(source->OperGet() == GT_BLK); - assert(varTypeIsStruct(targetType)); - - regNumber baseReg = internalRegisters.Extract(treeNode); - regNumber addrReg = REG_NA; - - GenTreeLclVarCommon* varNode = nullptr; - GenTree* addrNode = nullptr; - - addrNode = source->AsOp()->gtOp1; - - // addrNode can either be a GT_LCL_ADDR<0> or an address expression - // - if (addrNode->isContained() && addrNode->IsLclVarAddr()) - { - // We have a GT_BLK(GT_LCL_ADDR<0>) - // - // We will treat this case the same as above - // (i.e if we just had this GT_LCL_VAR directly as the source) - // so update 'source' to point this GT_LCL_ADDR node - // and continue to the codegen for the LCL_VAR node below - // - varNode = addrNode->AsLclVarCommon(); - addrNode = nullptr; - } - - // Either varNode or addrNOde must have been setup above, - // the xor ensures that only one of the two is setup, not both - assert((varNode != nullptr) ^ (addrNode != nullptr)); - - // This is the varNum for our load operations, - // only used when we have a struct with a LclVar source - unsigned srcVarNum = BAD_VAR_NUM; - - if (varNode != nullptr) - { - assert(varNode->isContained()); - srcVarNum = varNode->GetLclNum(); - LclVarDsc* varDsc = compiler->lvaGetDesc(srcVarNum); - - // This struct also must live in the stack frame. - // And it can't live in a register. - assert(varDsc->lvOnFrame && !varDsc->lvRegister); - } - else // addrNode is used - { - addrReg = genConsumeReg(addrNode); - - // If addrReg equal to baseReg, we use the last target register as alternative baseReg. - // Because the candidate mask for the internal baseReg does not include any of the target register, - // we can ensure that baseReg, addrReg, and the last target register are not all same. - assert(baseReg != addrReg); - } - - ClassLayout* layout = source->AsBlk()->GetLayout(); - - // Put on stack first - unsigned structOffset = treeNode->gtNumRegs * TARGET_POINTER_SIZE; - unsigned remainingSize = layout->GetSize() - structOffset; - unsigned argOffsetOut = treeNode->getArgOffset(); - - assert((remainingSize > 0) && (roundUp(remainingSize, TARGET_POINTER_SIZE) == treeNode->GetStackByteSize())); - while (remainingSize > 0) - { - var_types type; - if (remainingSize >= TARGET_POINTER_SIZE) - { - type = layout->GetGCPtrType(structOffset / TARGET_POINTER_SIZE); - } - else if (remainingSize >= 4) - { - type = TYP_INT; - } - else if (remainingSize >= 2) - { - type = TYP_USHORT; - } - else - { - assert(remainingSize == 1); - type = TYP_UBYTE; - } - - emitAttr attr = emitActualTypeSize(type); - unsigned moveSize = genTypeSize(type); - - instruction loadIns = ins_Load(type); - if (varNode != nullptr) - { - // Load from our local source - emit->emitIns_R_S(loadIns, attr, baseReg, srcVarNum, structOffset); - } - else - { - // check for case of destroying the addrRegister while we still need it - assert(baseReg != addrReg); - - // Load from our address expression source - emit->emitIns_R_R_I(loadIns, attr, baseReg, addrReg, structOffset); - } - - // Emit the instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(type), attr, baseReg, varNumOut, argOffsetOut); - argOffsetOut += moveSize; - assert(argOffsetOut <= argOffsetMax); - - remainingSize -= moveSize; - structOffset += moveSize; - } - - // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use, - // in case we had to reuse the last target register for it. - structOffset = 0; - for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++) - { - regNumber targetReg = treeNode->GetRegNumByIdx(idx); - var_types type = treeNode->GetRegType(idx); - - if (varNode != nullptr) - { - // Load from our local source - emit->emitIns_R_S(ins_Load(type), emitTypeSize(type), targetReg, srcVarNum, structOffset); - } - else - { - // check for case of destroying the addrRegister while we still need it - if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1) - { - assert(targetReg != baseReg); - emit->emitIns_R_R_I(INS_ori, emitActualTypeSize(type), baseReg, addrReg, 0); - addrReg = baseReg; - } - - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(type), emitTypeSize(type), targetReg, addrReg, structOffset); - } - structOffset += TARGET_POINTER_SIZE; - } - } - genProduceReg(treeNode); -} - //------------------------------------------------------------------------ // genRangeCheck: generate code for GT_BOUNDS_CHECK node. // @@ -5276,7 +4917,7 @@ void CodeGen::genRangeCheck(GenTree* oper) assert(src1ChkType == TYP_INT || src1ChkType == TYP_LONG); #endif // DEBUG - genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, reg1, bndsChk->gtIndRngFailBB, reg2); + genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, reg1, nullptr, reg2); } //--------------------------------------------------------------------- @@ -5510,7 +5151,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) // IndRngFail: // ... // RngChkExit: - genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, REG_R21); + genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), nullptr, REG_R21); } emitAttr attr = emitActualTypeSize(node); @@ -5587,7 +5228,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) #ifdef FEATURE_SIMD // Handling of Vector3 type values loaded through indirection. - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genLoadIndTypeSIMD12(tree); return; @@ -5983,50 +5624,62 @@ void CodeGen::genCall(GenTreeCall* call) void CodeGen::genCallInstruction(GenTreeCall* call) { // Determine return value size(s). - const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - emitAttr retSize = EA_PTRSIZE; - emitAttr secondRetSize = EA_UNKNOWN; + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + EmitCallParams params; // unused values are of no interest to GC. if (!call->IsUnusedValue()) { if (call->HasMultiRegRetVal()) { - retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); - secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + params.retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); + params.secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + + if (pRetTypeDesc->GetABIReturnReg(1, call->GetUnmanagedCallConv()) == REG_INTRET) + { + // If the second return register is REG_INTRET, then the first return is expected to be in a floating + // register. The emitter has hardcoded belief that params.retSize corresponds to REG_INTRET and + // secondRetSize to REG_INTRET_1, so fix up the situation here. + assert(!EA_IS_GCREF_OR_BYREF(params.retSize)); + params.retSize = params.secondRetSize; + params.secondRetSize = EA_UNKNOWN; + } } else { - assert(call->gtType != TYP_STRUCT); + assert(!call->TypeIs(TYP_STRUCT)); - if (call->gtType == TYP_REF) + if (call->TypeIs(TYP_REF)) { - retSize = EA_GCREF; + params.retSize = EA_GCREF; } - else if (call->gtType == TYP_BYREF) + else if (call->TypeIs(TYP_BYREF)) { - retSize = EA_BYREF; + params.retSize = EA_BYREF; } } } - DebugInfo di; + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); + // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, // so we skip this hash table lookup logic in that case. if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall()) { + DebugInfo di; (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di); + params.debugInfo = di; } - CORINFO_SIG_INFO* sigInfo = nullptr; #ifdef DEBUG // Pass the call signature information down into the emitter so the emitter can associate // native call sites with the signatures they were generated from. if (!call->IsHelperCall()) { - sigInfo = call->callSig; + params.sigInfo = call->callSig; } if (call->IsFastTailCall()) @@ -6043,9 +5696,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call) for (CallArg& arg : call->gtArgs.Args()) { - for (unsigned i = 0; i < arg.NewAbiInfo.NumSegments; i++) + for (unsigned i = 0; i < arg.AbiInfo.NumSegments; i++) { - const ABIPassingSegment& seg = arg.NewAbiInfo.Segment(i); + const ABIPassingSegment& seg = arg.AbiInfo.Segment(i); if (seg.IsPassedInRegister() && ((trashedByEpilog & seg.GetRegisterMask()) != 0)) { JITDUMP("Tail call node:\n"); @@ -6057,8 +5710,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } #endif // DEBUG - CORINFO_METHOD_HANDLE methHnd; - GenTree* target = getCallTarget(call, &methHnd); + GenTree* target = getCallTarget(call, ¶ms.methHnd); if (target != nullptr) { @@ -6078,17 +5730,10 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // assert(genIsValidIntReg(target->GetRegNum())); - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, // addr - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - target->GetRegNum(), - call->IsFastTailCall()); - // clang-format on + params.callType = EC_INDIR_R; + params.ireg = target->GetRegNum(); + + genEmitCallWithCurrentGC(params); } else { @@ -6126,60 +5771,42 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // assert(genIsValidIntReg(targetAddrReg)); - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, // addr - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - targetAddrReg, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_INDIR_R; + params.ireg = targetAddrReg; + genEmitCallWithCurrentGC(params); } else { // Generate a direct call to a non-virtual user defined or helper method assert(call->IsHelperCall() || (call->gtCallType == CT_USER_FUNC)); - void* addr = nullptr; #ifdef FEATURE_READYTORUN if (call->gtEntryPoint.addr != NULL) { assert(call->gtEntryPoint.accessType == IAT_VALUE); - addr = call->gtEntryPoint.addr; + params.addr = call->gtEntryPoint.addr; } else #endif // FEATURE_READYTORUN if (call->IsHelperCall()) { - CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(params.methHnd); noway_assert(helperNum != CORINFO_HELP_UNDEF); void* pAddr = nullptr; - addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + params.addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); assert(pAddr == nullptr); } else { // Direct call to a non-virtual user function. - addr = call->gtDirectCallAddress; + params.addr = call->gtDirectCallAddress; } - assert(addr != nullptr); + assert(params.addr != nullptr); - // clang-format off - genEmitCall(emitter::EC_FUNC_TOKEN, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - addr, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - REG_NA, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_FUNC_TOKEN; + genEmitCallWithCurrentGC(params); } } } @@ -6203,11 +5830,20 @@ void CodeGen::genJmpPlaceVarArgs() // void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg) { + assert(REG_R21 != reg); + switch (desc.CheckKind()) { case GenIntCastDesc::CHECK_POSITIVE: { - genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, REG_R0); + if (desc.CheckSrcSize() == 4) // (u)int + { + // If uint is UINT32_MAX then it will be treated as a signed + // number so overflow will also be triggered + GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_R21, reg, 0); + reg = REG_R21; + } + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg); } break; @@ -6215,11 +5851,7 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d { // We need to check if the value is not greater than 0xFFFFFFFF // if the upper 32 bits are zero. - ssize_t imm = -1; - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); - - GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 32); - GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21); + GetEmitter()->emitIns_R_R_I(INS_srli_d, EA_8BYTE, REG_R21, reg, 32); genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); } break; @@ -6228,62 +5860,42 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d { // We need to check if the value is not greater than 0x7FFFFFFF // if the upper 33 bits are zero. - // instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL); - ssize_t imm = -1; - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); - - GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 31); - - GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21); + GetEmitter()->emitIns_R_R_I(INS_srli_d, EA_8BYTE, REG_R21, reg, 31); genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); } break; case GenIntCastDesc::CHECK_INT_RANGE: { - const regNumber tempReg = REG_R21; - assert(tempReg != reg); - GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MAX); - genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, tempReg, nullptr, reg); - - GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MIN); - genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, tempReg); + // Emit "if ((long)(int)x != x) goto OVERFLOW" + GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_R21, reg, 0); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, reg, nullptr, REG_R21); } break; default: { assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE); - const int castMaxValue = desc.CheckSmallIntMax(); - const int castMinValue = desc.CheckSmallIntMin(); - instruction ins; + const unsigned castSize = genTypeSize(cast->gtCastType); + const bool isSrcOrDstUnsigned = desc.CheckSmallIntMin() == 0; - if (castMaxValue > 2047) + if (isSrcOrDstUnsigned) { - assert((castMaxValue == 32767) || (castMaxValue == 65535)); - GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1); - ins = castMinValue == 0 ? INS_bgeu : INS_bge; - genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21); - } - else - { - GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue); - ins = castMinValue == 0 ? INS_bltu : INS_blt; - genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg); + // Check if bits leading the actual small int are all zeros + // If destination type is signed then also check if MSB of it is zero + const bool isDstSigned = !varTypeIsUnsigned(cast->gtCastType); + const unsigned excludeMsb = isDstSigned ? 1 : 0; + const unsigned typeSize = 8 * castSize - excludeMsb; + GetEmitter()->emitIns_R_R_I(INS_srli_d, EA_8BYTE, REG_R21, reg, typeSize); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); } - - if (castMinValue != 0) + else // Signed to signed cast { - if (emitter::isValidSimm12(castMinValue)) - { - GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue); - } - else - { - GetEmitter()->emitIns_I_la(EA_8BYTE, REG_R21, castMinValue); - GetEmitter()->emitIns_R_R_R(INS_slt, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, REG_R21); - } - genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + // Extend sign of a small int on all of the bits above it and check whether the original type was same + const auto extensionSize = (8 - castSize) * 8; + GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, reg, extensionSize); + GetEmitter()->emitIns_R_R_I(INS_srai_d, EA_8BYTE, REG_R21, REG_R21, extensionSize); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, reg); } } break; @@ -6386,7 +5998,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) void CodeGen::genFloatToFloatCast(GenTree* treeNode) { // float <--> double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -6853,7 +6465,7 @@ inline void CodeGen::genJumpToThrowHlpBlk_la( { // Find the helper-block which raises the exception. Compiler::AddCodeDsc* add = compiler->fgFindExcptnTarget(codeKind, compiler->compCurBB); - PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block")); + assert((add != nullptr) && ("ERROR: failed to find exception throw block")); assert(add->acdUsed); excpRaisingBlock = add->acdDstBlk; #if !FEATURE_FIXED_OUT_ARGS @@ -6871,10 +6483,9 @@ inline void CodeGen::genJumpToThrowHlpBlk_la( // The code to throw the exception will be generated inline, and // we will jump around it in the normal non-exception case. - void* pAddr = nullptr; - void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); - emitter::EmitCallType callType; - regNumber callTarget; + void* pAddr = nullptr; + EmitCallParams params; + params.addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); // maybe optimize // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne))); @@ -6899,16 +6510,16 @@ inline void CodeGen::genJumpToThrowHlpBlk_la( ins = ins == INS_beq ? INS_bne : INS_beq; } - if (addr == nullptr) + if (params.addr == nullptr) { - callType = emitter::EC_INDIR_R; - callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + params.callType = EC_INDIR_R; + params.ireg = REG_DEFAULT_HELPER_CALL_TARGET; if (compiler->opts.compReloc) { ssize_t imm = (3 + 1) << 2; // to jirl's next instr. emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); - GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, params.ireg, (ssize_t)pAddr); } else { @@ -6917,16 +6528,15 @@ inline void CodeGen::genJumpToThrowHlpBlk_la( // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); - GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, params.ireg, ((ssize_t)pAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, params.ireg, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, params.ireg, params.ireg, ((ssize_t)pAddr & 0xfff) >> 2); } } else { // INS_OPTS_C - callType = emitter::EC_FUNC_TOKEN; - callTarget = REG_NA; + params.callType = EC_FUNC_TOKEN; ssize_t imm = 5 << 2; if (compiler->opts.compReloc) @@ -6939,13 +6549,10 @@ inline void CodeGen::genJumpToThrowHlpBlk_la( BasicBlock* skipLabel = genCreateTempLabel(); - emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), - INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ - ); + params.methHnd = compiler->eeFindHelper(compiler->acdHelper(codeKind)); + + // TODO-LOONGARCH64: Why is this not using genEmitHelperCall? + genEmitCallWithCurrentGC(params); regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind))); regSet.verifyRegistersUsed(killMask); @@ -7001,8 +6608,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). * * For functions with GS and localloc, we had saved the frame pointer and RA at the top - * of the frame. Note that the funclet frames must follow the same rule, - * and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + * of the frame. * Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. * * The frames look like the following (simplified to only include components that matter for establishing the @@ -7022,8 +6628,6 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSPSym | // 8 bytes, Only for frames with EH, (omitted in NativeAOT ABI) - * |-----------------------| * |Callee saved registers | // not including FP/RA; multiple of 8 bytes * |-----------------------| * | Saved RA | // 8 bytes @@ -7076,7 +6680,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // - Generate fully interruptible code for loops that contains calls // - Generate fully interruptible code for leaf methods // - // Given the limited benefit from this optimization (<10k for SPCL NGen image), the extra complexity + // Given the limited benefit from this optimization (<10k for SPCL AOT image), the extra complexity // is not worth it. // @@ -7111,10 +6715,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe int totalFrameSize = genTotalFrameSize(); int leftFrameSize = 0; int localFrameSize = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - localFrameSize -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { localFrameSize -= TARGET_POINTER_SIZE; @@ -7166,7 +6766,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe if (leftFrameSize != 0) { - genStackPointerAdjustment(-leftFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ true); + // We've already established the frame pointer, so no need to report the stack pointer change to unwind info. + genStackPointerAdjustment(-leftFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ false); } } @@ -7180,10 +6781,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) int totalFrameSize = genTotalFrameSize(); int localFrameSize = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - localFrameSize -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { localFrameSize -= TARGET_POINTER_SIZE; diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 54cd716b1cce..680b18213f9a 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -318,7 +318,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * Funclets have the following incoming arguments: * * catch: a0 = the exception object that was caught (see GT_CATCH_ARG) - * filter: a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function + * filter: a0 = the exception object to filter (see GT_CATCH_ARG) * finally/fault: none * * Funclets set the following registers on exit: @@ -347,8 +347,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned * |-----------------------| * |Callee saved registers | // multiple of 8 bytes, not including FP/RA @@ -368,13 +366,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of * outgoing arguments for any call). * - * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, - * and that location is the same relative to Caller-SP as in the main function where higher than - * the callee-saved registers. - * That is to say, the PSPSym's relative offset to Caller-SP is not depended on the callee-saved registers. - * - * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we - * must add buffer space for the saved varargs/argument registers here, if the main function did the same. + * Funclets do not have varargs arguments. * * Note that localloc cannot be used in a funclet. * @@ -401,7 +393,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // TODO-RISCV64: Implement varargs (NYI_RISCV64) assert(block != NULL); - assert(block->HasFlag(BBF_FUNCLET_BEG)); + assert(compiler->bbIsFuncletBeg(block)); ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); @@ -464,45 +456,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet - // frame. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - if (isFilter) - { - // This is the first block of a filter - // Note that register a1 = CallerSP of the containing function - // A1 is overwritten by the first Load (new callerSP) - // A2 is scratch when we have a large constant offset - - // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or - // function) - genInstrWithConstant(INS_ld, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, - REG_A2, false); - regSet.verifyRegUsed(REG_A1); - - // Store the PSP value (aka CallerSP) - genInstrWithConstant(INS_sd, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2, - false); - - // re-establish the frame pointer - genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_FPBASE, REG_A1, - genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); - } - else // This is a non-filter funclet - { - // A3 is scratch, A2 can also become scratch. - - // compute the CallerSP, given the frame pointer. a3 is scratch? - genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_A3, REG_FPBASE, - -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); - regSet.verifyRegUsed(REG_A3); - - genInstrWithConstant(INS_sd, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2, - false); - } - } } /***************************************************************************** @@ -580,21 +533,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_RA) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - // Because a method and funclets must have the same caller-relative PSPSym offset, - // if there is a PSPSym, we have to pad the funclet frame size for OSR. - // - int osrPad = 0; - if (compiler->opts.IsOSR()) - { - osrPad -= compiler->info.compPatchpointInfo->TotalFrameSize(); - - // OSR pad must be already aligned to stack size. - assert((osrPad % STACK_ALIGN) == 0); - } - /* Now save it for future use */ - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() + osrPad; - int funcletFrameSize = compiler->lvaOutgoingArgSpaceSize; genFuncletInfo.fiSP_to_CalleeSaved_delta = funcletFrameSize; @@ -607,13 +546,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() delta_PSP -= TARGET_POINTER_SIZE; } - funcletFrameSize = funcletFrameSize - delta_PSP - osrPad; + funcletFrameSize = funcletFrameSize - delta_PSP; funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); - genFuncletInfo.fiSpDelta = -funcletFrameSize; - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSize + delta_PSP + osrPad; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = osrPad + delta_PSP; + genFuncletInfo.fiSpDelta = -funcletFrameSize; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; #ifdef DEBUG if (verbose) @@ -623,22 +560,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); - if (compiler->opts.IsOSR()) - { - printf(" OSR Pad: %d\n", osrPad); - } - printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } assert(genFuncletInfo.fiSP_to_CalleeSaved_delta >= 0); - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! - } #endif // DEBUG } @@ -687,7 +612,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) addrInfo.addr = nullptr; addrInfo.accessType = IAT_VALUE; - if (jmpEpilog && (lastNode->gtOper == GT_JMP)) + if (jmpEpilog && lastNode->OperIs(GT_JMP)) { methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); @@ -705,19 +630,18 @@ void CodeGen::genFnEpilog(BasicBlock* block) /* figure out what jump we have */ GenTree* jmpNode = lastNode; #if !FEATURE_FASTTAILCALL - noway_assert(jmpNode->gtOper == GT_JMP); + noway_assert(jmpNode->OperIs(GT_JMP)); #else // FEATURE_FASTTAILCALL // armarch // If jmpNode is GT_JMP then gtNext must be null. // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. - noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); + noway_assert(!jmpNode->OperIs(GT_JMP) || (jmpNode->gtNext == nullptr)); // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp - noway_assert((jmpNode->gtOper == GT_JMP) || - ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); + noway_assert(jmpNode->OperIs(GT_JMP) || (jmpNode->OperIs(GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); // The next block is associated with this "if" stmt - if (jmpNode->gtOper == GT_JMP) + if (jmpNode->OperIs(GT_JMP)) #endif // FEATURE_FASTTAILCALL { // Simply emit a jump to the methodHnd. This is similar to a call so we can use @@ -725,9 +649,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) assert(methHnd != nullptr); assert(addrInfo.addr != nullptr); - emitter::EmitCallType callType; - void* addr; - regNumber indCallReg; + EmitCallParams params; + params.methHnd = methHnd; + switch (addrInfo.accessType) { case IAT_VALUE: @@ -735,14 +659,13 @@ void CodeGen::genFnEpilog(BasicBlock* block) case IAT_PVALUE: // Load the address into a register, load indirect and call through a register // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use - callType = emitter::EC_INDIR_R; - indCallReg = REG_INDIRECT_CALL_TARGET_REG; - addr = NULL; - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); + params.callType = EC_INDIR_R; + params.ireg = REG_INDIRECT_CALL_TARGET_REG; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, params.ireg, (ssize_t)addrInfo.addr); if (addrInfo.accessType == IAT_PVALUE) { - GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, indCallReg, indCallReg, 0); - regSet.verifyRegUsed(indCallReg); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, params.ireg, params.ireg, 0); + regSet.verifyRegUsed(params.ireg); } break; @@ -752,11 +675,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) // We have to use R12 since we assume the argument registers are in use // LR is used as helper register right before it is restored from stack, thus, // all relative address calculations are performed before LR is restored. - callType = emitter::EC_INDIR_R; - indCallReg = REG_T2; - addr = NULL; + params.callType = EC_INDIR_R; + params.ireg = REG_T2; - regSet.verifyRegUsed(indCallReg); + regSet.verifyRegUsed(params.ireg); break; } @@ -771,24 +693,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) genPopCalleeSavedRegisters(true); - // clang-format off - GetEmitter()->emitIns_Call(callType, - methHnd, - INDEBUG_LDISASM_COMMA(nullptr) - addr, - 0, // argSize - EA_UNKNOWN // retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - DebugInfo(), - indCallReg, // ireg - REG_NA, // xreg - 0, // xmul - 0, // disp - true); // isJump - // clang-format on + params.isJump = true; + + genEmitCallWithCurrentGC(params); } #if FEATURE_FASTTAILCALL else @@ -809,37 +716,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) compiler->unwindEndEpilog(); } -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - - int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); - - if (compiler->opts.IsOSR()) - { - SPtoCallerSPdelta += compiler->info.compPatchpointInfo->TotalFrameSize(); - } - - // We will just use the initReg since it is an available register - // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegZeroed = false; - - genInstrWithConstant(INS_addi, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, regTmp, false); - GetEmitter()->emitIns_S_R(INS_sd, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); -} - void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed) { regNumber rAddr; - regNumber rCnt = REG_NA; // Invalid regMaskTP regMask; regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers @@ -868,84 +747,77 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu *pInitRegZeroed = false; } - bool useLoop = false; - unsigned uCntBytes = untrLclHi - untrLclLo; - assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes. - unsigned int padding = untrLclLo & 0x7; + ssize_t uLclBytes = untrLclHi - untrLclLo; + assert((uLclBytes % 4) == 0); // The smallest stack slot is always 4 bytes. + ssize_t padding = untrLclLo & 0x7; if (padding) { assert(padding == 4); GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, 0); - uCntBytes -= 4; + uLclBytes -= 4; } - unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use. + ssize_t uRegSlots = uLclBytes / REGSIZE_BYTES; + ssize_t uAddrCurr = 0; - // When uCntSlots is 9 or less, we will emit a sequence of sd instructions inline. - // When it is 10 or greater, we will emit a loop containing a sd instruction. - // In both of these cases the sd instruction will write two zeros to memory - // and we will use a single str instruction at the end whenever we have an odd count. - if (uCntSlots >= 10) - useLoop = true; - - if (useLoop) + if (uRegSlots >= 12) { - // We pick the next lowest register number for rCnt + regNumber rEndAddr; noway_assert(availMask != RBM_NONE); - regMask = genFindLowestBit(availMask); - rCnt = genRegNumFromMask(regMask); + regMask = genFindLowestBit(availMask); + rEndAddr = genRegNumFromMask(regMask); availMask &= ~regMask; - noway_assert(uCntSlots >= 2); - assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming - // argument reg - instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); + // rEndAddr is not a live incoming argument reg + assert((genRegMask(rEndAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); - // TODO-RISCV64: maybe optimize further - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); + ssize_t uLoopBytes = (uRegSlots & ~0x3) * REGSIZE_BYTES; - // bne rCnt, zero, -4 * 4 - ssize_t imm = -16; - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); - GetEmitter()->emitIns_R_R_I(INS_bne, EA_PTRSIZE, rCnt, REG_R0, imm); + if (uLoopBytes) + { + if (emitter::isValidSimm12(uLoopBytes)) + { + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rEndAddr, rAddr, uLoopBytes); + } + else + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rEndAddr, uLoopBytes); + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rEndAddr, rEndAddr, rAddr); + } - uCntBytes %= REGSIZE_BYTES * 2; + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + 2 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + 3 * REGSIZE_BYTES); + + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, rAddr, rEndAddr, -5 << 2); + + uLclBytes -= uLoopBytes; + uAddrCurr = 0; + } } - else + + while (uLclBytes >= REGSIZE_BYTES) { - while (uCntBytes >= REGSIZE_BYTES * 2) - { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); - uCntBytes -= REGSIZE_BYTES * 2; - padding = 0; - } + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, uAddrCurr + padding); + uLclBytes -= REGSIZE_BYTES; + uAddrCurr += REGSIZE_BYTES; } - if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number) + if (uAddrCurr != 0) { - if ((uCntBytes - REGSIZE_BYTES) == 0) - { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); - } - else - { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES); - } - uCntBytes -= REGSIZE_BYTES; + uAddrCurr -= REGSIZE_BYTES; } - if (uCntBytes > 0) + + if (uLclBytes != 0) { - assert(uCntBytes == sizeof(int)); - GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, padding); - uCntBytes -= sizeof(int); + assert(uLclBytes == 4); + GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, uAddrCurr + padding); + uLclBytes -= 4; } - noway_assert(uCntBytes == 0); + noway_assert(uLclBytes == 0); } void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock) @@ -964,20 +836,10 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) BasicBlock* const nextBlock = block->Next(); // Generate a call to the finally, like this: - // mov a0,qword ptr [fp + 10H] / sp // Load a0 with PSPSym, or sp if PSPSym is not used // jal finally-funclet // j finally-return // Only for non-retless finally calls // The 'b' can be a NOP if we're going to the next block. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - GetEmitter()->emitIns_R_S(INS_ld, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0); - } - else - { - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0); - } - if (block->HasFlag(BBF_RETLESS_CALL)) { GetEmitter()->emitIns_J(INS_jal, block->GetTarget()); @@ -1091,31 +953,23 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre double constValue = tree->AsDblCon()->DconValue(); assert(emitter::isFloatReg(targetReg)); - - // Make sure we use "fmv.w.x reg, zero" only for positive zero (0.0) and not for negative zero (-0.0) - if (FloatingPointUtils::isPositiveZero(constValue)) - { - // A faster/smaller way to generate 0.0 - // We will just zero out the entire register for both float and double - emit->emitIns_R_R(size == EA_4BYTE ? INS_fmv_w_x : INS_fmv_d_x, size, targetReg, REG_R0); - break; - } - - int64_t bits = - (size == EA_4BYTE) - ? (int32_t)BitOperations::SingleToUInt32Bits(FloatingPointUtils::convertToSingle(constValue)) - : (int64_t)BitOperations::DoubleToUInt64Bits(constValue); - bool fitsInLui = ((bits & 0xfff) == 0) && emitter::isValidSimm20(bits >> 12); - if (fitsInLui || emitter::isValidSimm12(bits)) // can we synthesize bits with a single instruction? + int64_t bits; + if (emitter::isSingleInstructionFpImm(constValue, size, &bits)) { - regNumber temp = internalRegisters.GetSingle(tree); - if (fitsInLui) - { - emit->emitIns_R_I(INS_lui, size, temp, bits >> 12); - } - else + regNumber temp = REG_ZERO; + if (bits != 0) { - emit->emitIns_R_R_I(INS_addi, size, temp, REG_ZERO, bits); + temp = internalRegisters.GetSingle(tree); + if (emitter::isValidSimm12(bits)) + { + emit->emitIns_R_R_I(INS_addi, size, temp, REG_ZERO, bits); + } + else + { + int64_t upperBits = bits >> 12; + assert((upperBits << 12) == bits); + emit->emitIns_R_I(INS_lui, size, temp, upperBits); + } } emit->emitIns_R_R(size == EA_4BYTE ? INS_fmv_w_x : INS_fmv_d_x, size, targetReg, temp); @@ -1212,7 +1066,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) genProduceReg(treeNode); } -// Generate code for ADD, SUB, MUL, AND, AND_NOT, OR and XOR +// Generate code for ADD, SUB, MUL, AND, AND_NOT, OR, OR_NOT, XOR, and XOR_NOT // This method is expected to have called genConsumeOperands() before calling it. void CodeGen::genCodeForBinary(GenTreeOp* treeNode) { @@ -1220,7 +1074,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) regNumber targetReg = treeNode->GetRegNum(); emitter* emit = GetEmitter(); - assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_AND_NOT, GT_OR, GT_XOR)); + assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_AND_NOT, GT_OR, GT_OR_NOT, GT_XOR, GT_XOR_NOT)); GenTree* op1 = treeNode->gtGetOp1(); GenTree* op2 = treeNode->gtGetOp2(); @@ -1283,7 +1137,7 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) #ifdef FEATURE_SIMD // storing of TYP_SIMD12 (i.e. Vector3) field - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genStoreLclTypeSIMD12(tree); return; @@ -1371,7 +1225,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) #ifdef FEATURE_SIMD // storing of TYP_SIMD12 (i.e. Vector3) field - if (lclNode->TypeGet() == TYP_SIMD12) + if (lclNode->TypeIs(TYP_SIMD12)) { genStoreLclTypeSIMD12(lclNode); return; @@ -1447,7 +1301,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) void CodeGen::genSimpleReturn(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + assert(treeNode->OperIs(GT_RETURN) || treeNode->OperIs(GT_RETFILT)); GenTree* op1 = treeNode->gtGetOp1(); var_types targetType = treeNode->TypeGet(); @@ -1460,7 +1314,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) if (!movRequired) { - if (op1->OperGet() == GT_LCL_VAR) + if (op1->OperIs(GT_LCL_VAR)) { GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); bool isRegCandidate = compiler->lvaTable[lcl->GetLclNum()].lvIsRegCandidate(); @@ -1500,7 +1354,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) */ void CodeGen::genLclHeap(GenTree* tree) { - assert(tree->OperGet() == GT_LCLHEAP); + assert(tree->OperIs(GT_LCLHEAP)); assert(compiler->compLocallocUsed); emitter* emit = GetEmitter(); @@ -1510,7 +1364,6 @@ void CodeGen::genLclHeap(GenTree* tree) regNumber targetReg = tree->GetRegNum(); regNumber regCnt = REG_NA; regNumber tempReg = REG_NA; - regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; // can optimize for riscv64. @@ -1635,7 +1488,7 @@ void CodeGen::genLclHeap(GenTree* tree) // The SP might already be in the guard page, so we must touch it BEFORE // the alloc, not after. - // ld_w r0, 0(SP) + // tickle the page - this triggers a page fault when on the guard page emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SP, 0); lastTouchDelta = amount; @@ -1680,8 +1533,7 @@ void CodeGen::genLclHeap(GenTree* tree) // and localloc size is a multiple of STACK_ALIGN. // Loop: - ssize_t imm = -16; - emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -16); emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 8); emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 0); @@ -1693,8 +1545,8 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_R_R_I(INS_addi, emitActualTypeSize(type), regCnt, regCnt, -16); - assert(imm == (-4 << 2)); // goto loop. - emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2)); + // goto Loop + emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, -4 << 2); lastTouchDelta = 0; } @@ -1708,7 +1560,6 @@ void CodeGen::genLclHeap(GenTree* tree) // case SP is on the last byte of the guard page. Thus you must // touch SP-0 first not SP-0x1000. // - // This is similar to the prolog code in CodeGen::genAllocLclFrame(). // // Note that we go through a few hoops so that SP never points to // illegal pages at any time during the tickling process. @@ -1719,23 +1570,20 @@ void CodeGen::genLclHeap(GenTree* tree) // addi regCnt, REG_R0, 0 // // Skip: - // lui regTmp, eeGetPageSize()>>12 + // lui regPageSize, eeGetPageSize()>>12 + // addi regTmp, SP, 0 // Loop: - // lw r0, 0(SP) // tickle the page - read from the page - // sub RA, SP, regTmp // decrement SP by eeGetPageSize() - // bltu RA, regCnt, Done - // sub SP, SP,regTmp - // j Loop + // lw r0, 0(regTmp) // tickle the page - read from the page + // sub regTmp, regTmp, regPageSize + // bgeu regTmp, regCnt, Loop // // Done: - // mov SP, regCnt + // addi SP, regCnt, 0 // if (tempReg == REG_NA) tempReg = internalRegisters.Extract(tree); - regNumber rPageSize = internalRegisters.GetSingle(tree); - assert(regCnt != tempReg); emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); @@ -1746,35 +1594,24 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2); emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regCnt, REG_R0, 0); - emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); - - // genDefineTempLabel(loop); - - // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page - emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SPBASE, 0); - - // decrement SP by eeGetPageSize() - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, rPageSize); - - assert(rPageSize != tempReg); - - ssize_t imm = 3 << 2; // goto done. - emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, tempReg, regCnt, imm); + regNumber rPageSize = internalRegisters.GetSingle(tree); - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rPageSize); + noway_assert(rPageSize != tempReg); - imm = -4 << 2; - // Jump to loop and tickle new stack address - emit->emitIns_I(INS_j, EA_PTRSIZE, imm); + emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); + regSet.verifyRegUsed(rPageSize); + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, REG_SPBASE, 0); - // Done with stack tickle loop - // genDefineTempLabel(done); + // tickle the page - this triggers a page fault when on the guard page + emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, tempReg, 0); + emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tempReg, tempReg, rPageSize); - // Now just move the final value to SP - emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0); + emit->emitIns_R_R_I(INS_bgeu, EA_PTRSIZE, tempReg, regCnt, -2 << 2); // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, // we're going to assume the worst and probe. + // Move the final value to SP + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, regCnt, 0); } ALLOC_DONE: @@ -1868,7 +1705,25 @@ void CodeGen::genCodeForNegNot(GenTree* tree) // void CodeGen::genCodeForBswap(GenTree* tree) { - NYI_RISCV64("genCodeForBswap-----unimplemented on RISCV64 yet----"); + assert(tree->OperIs(GT_BSWAP, GT_BSWAP16)); + var_types type = tree->gtGetOp1()->TypeGet(); + emitAttr size = emitTypeSize(type); + regNumber dest = tree->GetRegNum(); + regNumber src = genConsumeReg(tree->gtGetOp1()); + + assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)); + emitter& emit = *GetEmitter(); + emit.emitIns_R_R(INS_rev8, size, dest, src); + if (size < EA_PTRSIZE) + { + int shiftAmount = tree->OperIs(GT_BSWAP16) ? 48 : 32; + // TODO: we need to right-shift the byte-reversed register anyway. Remove the cast (in Lowering::LowerCast?) + // wrapping GT_BSWAP16 and pass the exact destination type here, so that this codegen could leave the register + // properly extended. + emit.emitIns_R_R_I(INS_srli, size, dest, dest, shiftAmount); + } + + genProduceReg(tree); } //------------------------------------------------------------------------ @@ -1892,7 +1747,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) // Floating point divide never raises an exception assert(varTypeIsFloating(tree->gtOp1)); assert(varTypeIsFloating(tree->gtOp2)); - assert(tree->gtOper == GT_DIV); + assert(tree->OperIs(GT_DIV)); instruction ins = genGetInsForOper(tree); emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(), @@ -2156,7 +2011,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) bool sourceIsLocal = false; assert(source->isContained()); - if (source->gtOper == GT_IND) + if (source->OperIs(GT_IND)) { GenTree* srcAddr = source->gtGetOp1(); assert(!srcAddr->isContained()); @@ -2371,12 +2226,12 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) GenTree* data = treeNode->AsOp()->gtOp2; GenTree* addr = treeNode->AsOp()->gtOp1; - regNumber dataReg = data->GetRegNum(); + regNumber dataReg = !data->isContained() ? data->GetRegNum() : REG_ZERO; regNumber addrReg = addr->GetRegNum(); regNumber targetReg = treeNode->GetRegNum(); if (targetReg == REG_NA) { - targetReg = REG_R0; + targetReg = REG_ZERO; } genConsumeAddress(addr); @@ -2385,8 +2240,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) emitAttr dataSize = emitActualTypeSize(data); bool is4 = (dataSize == EA_4BYTE); - assert(!data->isContainedIntOrIImmed()); - instruction ins = INS_none; switch (treeNode->gtOper) { @@ -2407,7 +2260,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) } GetEmitter()->emitIns_R_R_R(ins, dataSize, targetReg, addrReg, dataReg); - if (targetReg != REG_R0) + if (targetReg != REG_ZERO) { genProduceReg(treeNode); } @@ -2430,9 +2283,19 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) regNumber target = treeNode->GetRegNum(); regNumber loc = locOp->GetRegNum(); - regNumber val = valOp->GetRegNum(); - regNumber comparand = comparandOp->GetRegNum(); - regNumber storeErr = internalRegisters.Extract(treeNode, RBM_ALLINT); + regNumber val = !valOp->isContained() ? valOp->GetRegNum() : REG_ZERO; + regNumber comparand = REG_ZERO; + if (!comparandOp->isContained()) + { + comparand = comparandOp->GetRegNum(); + if (comparandOp->TypeIs(TYP_INT, TYP_UINT)) + { + regNumber signExtendedComparand = internalRegisters.Extract(treeNode); + GetEmitter()->emitIns_R_R(INS_sext_w, EA_4BYTE, signExtendedComparand, comparand); + comparand = signExtendedComparand; + } + } + regNumber storeErr = internalRegisters.GetSingle(treeNode); // Register allocator should have extended the lifetimes of all input and internal registers // They should all be different @@ -2443,16 +2306,12 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) noway_assert(loc != val); noway_assert(loc != comparand); noway_assert(loc != storeErr); - noway_assert(val != comparand); + noway_assert((val != comparand) || (val == REG_ZERO)); noway_assert(val != storeErr); noway_assert(comparand != storeErr); noway_assert(target != REG_NA); noway_assert(storeErr != REG_NA); - assert(locOp->isUsedFromReg()); - assert(valOp->isUsedFromReg()); - assert(!comparandOp->isUsedFromMemory()); - genConsumeAddress(locOp); genConsumeRegs(valOp); genConsumeRegs(comparandOp); @@ -2674,7 +2533,9 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) break; case GT_AND_NOT: - NYI_RISCV64("GT_AND_NOT-----unimplemented/unused on RISCV64 yet----"); + assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)); + assert(!isImmed(treeNode)); + ins = INS_andn; break; case GT_OR: @@ -2689,6 +2550,12 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) } break; + case GT_OR_NOT: + assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)); + assert(!isImmed(treeNode)); + ins = INS_orn; + break; + case GT_LSH: isImm = isImmed(treeNode); if (isImm) @@ -2786,6 +2653,36 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) } break; + case GT_SH1ADD: + ins = INS_sh1add; + break; + + case GT_SH2ADD: + ins = INS_sh2add; + break; + + case GT_SH3ADD: + ins = INS_sh3add; + break; + + case GT_SH1ADD_UW: + ins = INS_sh1add_uw; + break; + + case GT_SH2ADD_UW: + ins = INS_sh2add_uw; + break; + + case GT_SH3ADD_UW: + ins = INS_sh3add_uw; + break; + + case GT_XOR_NOT: + assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)); + assert(!isImmed(treeNode)); + ins = INS_xnor; + break; + default: NO_WAY("Unhandled oper in genGetInsForOper() - integer"); break; @@ -2802,7 +2699,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) // void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) { - assert(tree->OperGet() == GT_RETURNTRAP); + assert(tree->OperIs(GT_RETURNTRAP)); // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC // based on the contents of 'data' @@ -2813,42 +2710,38 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) BasicBlock* skipLabel = genCreateTempLabel(); GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0); - void* pAddr = nullptr; - void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); - emitter::EmitCallType callType; - regNumber callTarget; + EmitCallParams params; + + void* pAddr = nullptr; + params.addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); - if (addr == nullptr) + if (params.addr == nullptr) { - callType = emitter::EC_INDIR_R; - callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + params.callType = EC_INDIR_R; + params.ireg = REG_DEFAULT_HELPER_CALL_TARGET; if (compiler->opts.compReloc) { - GetEmitter()->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + GetEmitter()->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, params.ireg, (ssize_t)pAddr); } else { // TODO-RISCV64: maybe optimize further. - GetEmitter()->emitLoadImmediate(EA_PTRSIZE, callTarget, (ssize_t)pAddr); - GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, callTarget, callTarget, 0); + GetEmitter()->emitLoadImmediate(EA_PTRSIZE, params.ireg, (ssize_t)pAddr); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, params.ireg, params.ireg, 0); } - regSet.verifyRegUsed(callTarget); + regSet.verifyRegUsed(params.ireg); } else { - callType = emitter::EC_FUNC_TOKEN; - callTarget = REG_NA; + params.callType = EC_FUNC_TOKEN; } // TODO-RISCV64: can optimize further !!! - GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), - INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ - ); + // TODO-RISCV64: Why does this not use genEmitHelperCall? + params.methHnd = compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC); + + genEmitCallWithCurrentGC(params); regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); regSet.verifyRegistersUsed(killMask); @@ -2866,7 +2759,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) { #ifdef FEATURE_SIMD // Storing Vector3 of size 12 bytes through indirection - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genStoreIndTypeSIMD12(tree); return; @@ -2962,7 +2855,7 @@ void CodeGen::genCodeForSwap(GenTreeOp*) void CodeGen::genIntToFloatCast(GenTree* treeNode) { // int type --> float/double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -3064,7 +2957,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) void CodeGen::genFloatToIntCast(GenTree* treeNode) { // int type --> float/double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); assert(genIsValidIntReg(treeNode->GetRegNum())); // Must be a valid int reg. @@ -3176,7 +3069,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // void CodeGen::genCkfinite(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_CKFINITE); + assert(treeNode->OperIs(GT_CKFINITE)); GenTree* op1 = treeNode->AsOp()->gtOp1; var_types targetType = treeNode->TypeGet(); @@ -3221,6 +3114,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) assert(!op2->isUsedFromMemory()); emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + assert(cmpSize == EA_4BYTE || cmpSize == EA_8BYTE); assert(genTypeSize(op1Type) == genTypeSize(op2Type)); @@ -3228,7 +3122,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) regNumber targetReg = tree->GetRegNum(); assert(targetReg != REG_NA); - assert(tree->TypeGet() != TYP_VOID); + assert(!tree->TypeIs(TYP_VOID)); assert(!op1->isContainedIntOrIImmed()); assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); @@ -3240,7 +3134,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) if (isUnordered) { - BasicBlock* skipLabel = nullptr; if (tree->OperIs(GT_LT)) { emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_fle_s : INS_fle_d, cmpSize, targetReg, regOp2, regOp1); @@ -3249,19 +3142,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) { emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_flt_s : INS_flt_d, cmpSize, targetReg, regOp2, regOp1); } - else if (tree->OperIs(GT_EQ)) - { - regNumber tempReg = internalRegisters.GetSingle(tree); - skipLabel = genCreateTempLabel(); - emit->emitIns_R_R(cmpSize == EA_4BYTE ? INS_fclass_s : INS_fclass_d, cmpSize, targetReg, regOp1); - emit->emitIns_R_R(cmpSize == EA_4BYTE ? INS_fclass_s : INS_fclass_d, cmpSize, tempReg, regOp2); - emit->emitIns_R_R_R(INS_or, EA_8BYTE, tempReg, targetReg, tempReg); - emit->emitIns_R_R_I(INS_andi, EA_8BYTE, tempReg, tempReg, 0x300); - emit->emitIns_R_R_I(INS_addi, EA_8BYTE, targetReg, REG_R0, 1); - emit->emitIns_J(INS_bnez, skipLabel, tempReg); - emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_feq_s : INS_feq_d, cmpSize, targetReg, regOp1, regOp2); - genDefineTempLabel(skipLabel); - } else if (tree->OperIs(GT_NE)) { emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_feq_s : INS_feq_d, cmpSize, targetReg, regOp1, regOp2); @@ -3274,11 +3154,11 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) { emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_flt_s : INS_flt_d, cmpSize, targetReg, regOp1, regOp2); } - if (skipLabel == nullptr) + else { - emit->emitIns_R_R_R(INS_sub, EA_8BYTE, targetReg, REG_R0, targetReg); - emit->emitIns_R_R_I(INS_addi, EA_8BYTE, targetReg, targetReg, 1); + unreached(); } + emit->emitIns_R_R_I(INS_xori, EA_8BYTE, targetReg, targetReg, 1); } else { @@ -3294,21 +3174,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) { emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_feq_s : INS_feq_d, cmpSize, targetReg, regOp1, regOp2); } - else if (tree->OperIs(GT_NE)) - { - regNumber tempReg = internalRegisters.GetSingle(tree); - emit->emitIns_R_R(cmpSize == EA_4BYTE ? INS_fclass_s : INS_fclass_d, cmpSize, targetReg, regOp1); - emit->emitIns_R_R(cmpSize == EA_4BYTE ? INS_fclass_s : INS_fclass_d, cmpSize, tempReg, regOp2); - emit->emitIns_R_R_R(INS_or, EA_8BYTE, tempReg, targetReg, tempReg); - emit->emitIns_R_R_I(INS_andi, EA_8BYTE, tempReg, tempReg, 0x300); - emit->emitIns_R_R_I(INS_addi, EA_8BYTE, targetReg, REG_R0, 0); - BasicBlock* skipLabel = genCreateTempLabel(); - emit->emitIns_J(INS_bnez, skipLabel, tempReg); - emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_feq_s : INS_feq_d, cmpSize, targetReg, regOp1, regOp2); - emit->emitIns_R_R_R(INS_sub, EA_8BYTE, targetReg, REG_R0, targetReg); - emit->emitIns_R_R_I(INS_addi, EA_8BYTE, targetReg, targetReg, 1); - genDefineTempLabel(skipLabel); - } else if (tree->OperIs(GT_GT)) { emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_flt_s : INS_flt_d, cmpSize, targetReg, regOp2, regOp1); @@ -3317,6 +3182,10 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) { emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_fle_s : INS_fle_d, cmpSize, targetReg, regOp2, regOp1); } + else + { + unreached(); + } } } else @@ -3328,132 +3197,82 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) { ssize_t imm = op2->AsIntCon()->gtIconVal; - switch (cmpSize) - { - case EA_4BYTE: - if (isUnsigned) - { - imm = static_cast(imm); - - regNumber tmpRegOp1 = internalRegisters.GetSingle(tree); - assert(regOp1 != tmpRegOp1); - - emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32); - emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32); - regOp1 = tmpRegOp1; - } - else - { - imm = static_cast(imm); - } - break; - case EA_8BYTE: - break; - default: - unreached(); - } + bool useAddSub = !(!tree->OperIs(GT_EQ, GT_NE) || (imm == -2048)); + bool useShiftRight = + !isUnsigned && ((tree->OperIs(GT_LT) && (imm == 0)) || (tree->OperIs(GT_LE) && (imm == -1))); + bool useLoadImm = isUnsigned && ((tree->OperIs(GT_LT, GT_GE) && (imm == 0)) || + (tree->OperIs(GT_LE, GT_GT) && (imm == -1))); - if (tree->OperIs(GT_LT)) - { - if (!isUnsigned && emitter::isValidSimm12(imm)) - { - emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); - } - else if (isUnsigned && emitter::isValidUimm11(imm)) - { - emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm); - } - else - { - emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm); - emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); - } - } - else if (tree->OperIs(GT_LE)) - { - if (!isUnsigned && emitter::isValidSimm12(imm + 1)) - { - emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); - } - else if (isUnsigned && emitter::isValidUimm11(imm + 1)) - { - emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm + 1); - } - else - { - emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm + 1); - emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); - } - } - else if (tree->OperIs(GT_GT)) + if (cmpSize == EA_4BYTE) { - if (!isUnsigned && emitter::isValidSimm12(imm + 1)) - { - emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); - emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); - } - else if (isUnsigned && emitter::isValidUimm11(imm + 1)) - { - emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm + 1); - emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); - } - else + if (!useAddSub && !useShiftRight && !useLoadImm) { - emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm); - emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1); + regNumber tmpRegOp1 = internalRegisters.GetSingle(tree); + assert(regOp1 != tmpRegOp1); + imm = static_cast(imm); + emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1); + regOp1 = tmpRegOp1; } } - else if (tree->OperIs(GT_GE)) + + if (tree->OperIs(GT_EQ, GT_NE)) { - if (!isUnsigned && emitter::isValidSimm12(imm)) + if ((imm != 0) || (cmpSize == EA_4BYTE)) { - emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); + instruction diff = INS_xori; + if (imm != -2048) + { + assert(useAddSub); + diff = (cmpSize == EA_4BYTE) ? INS_addiw : INS_addi; + imm = -imm; + } + emit->emitIns_R_R_I(diff, cmpSize, targetReg, regOp1, imm); + regOp1 = targetReg; } - else if (isUnsigned && emitter::isValidUimm11(imm)) + assert(emitter::isValidSimm12(imm)); + + if (tree->OperIs(GT_EQ)) { - emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm); + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, 1); } else { - emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm); - emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + assert(tree->OperIs(GT_NE)); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_ZERO, regOp1); } - emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); } - else if (tree->OperIs(GT_NE)) + else { - if (!imm) - { - emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1); - } - else if (emitter::isValidUimm12(imm)) + assert(tree->OperIs(GT_LT, GT_LE, GT_GT, GT_GE)); + if (useLoadImm) { - emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); - emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + // unsigned (a <= ~0), (a >= 0) / (a > ~0), (a < 0) is always true / false + imm = tree->OperIs(GT_GE, GT_LE) ? 1 : 0; + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, targetReg, REG_ZERO, imm); } - else - { - emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm); - emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); - emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); - } - } - else if (tree->OperIs(GT_EQ)) - { - if (!imm) + else if (useShiftRight) { - emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, 1); + // signed (a < 0) or (a <= -1) is just the sign bit + instruction srli = (cmpSize == EA_4BYTE) ? INS_srliw : INS_srli; + emit->emitIns_R_R_I(srli, cmpSize, targetReg, regOp1, cmpSize * 8 - 1); } - else if (emitter::isValidUimm12(imm)) + else if ((tree->OperIs(GT_GT) && (imm == 0)) || (tree->OperIs(GT_GE) && (imm == 1))) { - emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); - emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1); + instruction slt = isUnsigned ? INS_sltu : INS_slt; + emit->emitIns_R_R_R(slt, EA_PTRSIZE, targetReg, REG_ZERO, regOp1); } else { - emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm); - emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); - emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1); + instruction slti = isUnsigned ? INS_sltiu : INS_slti; + if (tree->OperIs(GT_LE, GT_GT)) + imm += 1; + assert(emitter::isValidSimm12(imm)); + assert(!isUnsigned || (imm != 0)); // should be handled in useLoadImm + + emit->emitIns_R_R_I(slti, EA_PTRSIZE, targetReg, regOp1, imm); + + if (tree->OperIs(GT_GT, GT_GE)) + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); } } } @@ -3461,58 +3280,43 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) { regNumber regOp2 = op2->GetRegNum(); - if (cmpSize == EA_4BYTE) + if (tree->OperIs(GT_EQ, GT_NE)) { - regNumber tmpRegOp1 = REG_RA; - regNumber tmpRegOp2 = internalRegisters.GetSingle(tree); - assert(regOp1 != tmpRegOp2); - assert(regOp2 != tmpRegOp2); - - if (isUnsigned) + instruction sub = (cmpSize == EA_4BYTE) ? INS_subw : INS_sub; + emit->emitIns_R_R_R(sub, EA_PTRSIZE, targetReg, regOp1, regOp2); + if (tree->OperIs(GT_EQ)) { - emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32); - emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32); - - emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp2, regOp2, 32); - emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp2, tmpRegOp2, 32); + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1); } else { - emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp1, regOp1, 0); - emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp2, regOp2, 0); + assert(tree->OperIs(GT_NE)); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_ZERO, targetReg); } - - regOp1 = tmpRegOp1; - regOp2 = tmpRegOp2; - } - - if (tree->OperIs(GT_LT)) - { - emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2); - } - else if (tree->OperIs(GT_LE)) - { - emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1); - emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); } - else if (tree->OperIs(GT_GT)) - { - emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1); - } - else if (tree->OperIs(GT_GE)) - { - emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2); - emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); - } - else if (tree->OperIs(GT_NE)) - { - emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); - emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); - } - else if (tree->OperIs(GT_EQ)) + else { - emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); - emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1); + assert(tree->OperIs(GT_LT, GT_LE, GT_GT, GT_GE)); + if (cmpSize == EA_4BYTE) + { + regNumber tmpRegOp1 = REG_RA; + regNumber tmpRegOp2 = internalRegisters.GetSingle(tree); + assert(regOp1 != tmpRegOp2); + assert(regOp2 != tmpRegOp2); + emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1); + emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp2, regOp2); + regOp1 = tmpRegOp1; + regOp2 = tmpRegOp2; + } + + instruction slt = isUnsigned ? INS_sltu : INS_slt; + if (tree->OperIs(GT_LE, GT_GT)) + std::swap(regOp1, regOp2); + + emit->emitIns_R_R_R(slt, EA_8BYTE, targetReg, regOp1, regOp2); + + if (tree->OperIs(GT_LE, GT_GE)) + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); } } } @@ -3538,7 +3342,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) assert(tree->OperIs(GT_JCMP)); assert(!varTypeIsFloating(tree)); - assert(tree->TypeGet() == TYP_VOID); + assert(tree->TypeIs(TYP_VOID)); assert(tree->GetRegNum() == REG_NA); GenTree* op1 = tree->gtGetOp1(); @@ -3574,19 +3378,8 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) { regNumber tmpRegOp1 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp1); - if (cond.IsUnsigned()) - { - imm = static_cast(imm); - - assert(regOp1 != tmpRegOp1); - emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32); - emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32); - } - else - { - imm = static_cast(imm); - emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1); - } + imm = static_cast(imm); + emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1); regOp1 = tmpRegOp1; break; } @@ -3622,15 +3415,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) { regNumber tmpRegOp1 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp1); - if (cond.IsUnsigned()) - { - emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32); - emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32); - } - else - { - emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1); - } + emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1); regOp1 = tmpRegOp1; } } @@ -3679,20 +3464,8 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) regNumber tmpRegOp2 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp2); assert(regOp2 != tmpRegOp2); - - if (cond.IsUnsigned()) - { - emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32); - emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32); - emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp2, regOp2, 32); - emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp2, tmpRegOp2, 32); - } - else - { - emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp1, regOp1, 0); - emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp2, regOp2, 0); - } - + emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1); + emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp2, regOp2); regOp1 = tmpRegOp1; regOp2 = tmpRegOp2; } @@ -3755,10 +3528,6 @@ int CodeGenInterface::genSPtoFPdelta() const assert(compiler->compCalleeRegsPushed >= 2); // always FP/RA. int delta = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - delta -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { delta -= TARGET_POINTER_SIZE; @@ -3845,14 +3614,14 @@ static void emitLoadConstAtAddr(emitter* emit, regNumber dstRegister, ssize_t im void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) { - void* addr = nullptr; void* pAddr = nullptr; - emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; - addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); - regNumber callTarget = REG_NA; + EmitCallParams params; + params.callType = EC_FUNC_TOKEN; + params.addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regMaskTP killSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - if (addr == nullptr) + if (params.addr == nullptr) { // This is call to a runtime helper. // lui reg, pAddr #NOTE: this maybe multi-instructions. @@ -3867,37 +3636,32 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, } regMaskTP callTargetMask = genRegMask(callTargetReg); - regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); // assert that all registers in callTargetMask are in the callKillSet - noway_assert((callTargetMask & callKillSet) == callTargetMask); - - callTarget = callTargetReg; + noway_assert((callTargetMask & killSet) == callTargetMask); if (compiler->opts.compReloc) { // TODO-RISCV64: here the jal is special flag rather than a real instruction. - GetEmitter()->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + GetEmitter()->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, callTargetReg, (ssize_t)pAddr); } else { - emitLoadConstAtAddr(GetEmitter(), callTarget, (ssize_t)pAddr); + emitLoadConstAtAddr(GetEmitter(), callTargetReg, (ssize_t)pAddr); } - regSet.verifyRegUsed(callTarget); + regSet.verifyRegUsed(callTargetReg); - callType = emitter::EC_INDIR_R; + params.callType = EC_INDIR_R; + params.ireg = callTargetReg; } - GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, - retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ - ); + params.methHnd = compiler->eeFindHelper(helper); + params.argSize = argSize; + params.retSize = retSize; - regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - regSet.verifyRegistersUsed(killMask); + genEmitCallWithCurrentGC(params); + + regSet.verifyRegistersUsed(killSet); } #ifdef FEATURE_SIMD @@ -4298,7 +4062,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) if (treeNode->IsReuseRegVal()) { // For now, this is only used for constant nodes. - assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); + assert(treeNode->OperIs(GT_CNS_INT) || treeNode->OperIs(GT_CNS_DBL)); JITDUMP(" TreeNode is marked ReuseReg\n"); return; } @@ -4372,6 +4136,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_XOR: case GT_AND: case GT_AND_NOT: + case GT_OR_NOT: + case GT_XOR_NOT: assert(varTypeIsIntegralOrI(treeNode)); FALLTHROUGH; @@ -4512,10 +4278,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genPutArgReg(treeNode->AsOp()); break; - case GT_PUTARG_SPLIT: - genPutArgSplit(treeNode->AsPutArgSplit()); - break; - case GT_CALL: genCall(treeNode->AsCall()); break; @@ -4621,6 +4383,23 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) // Do nothing; these nodes are simply markers for debug info. break; + case GT_SH1ADD: + case GT_SH1ADD_UW: + case GT_SH2ADD: + case GT_SH2ADD_UW: + case GT_SH3ADD: + case GT_SH3ADD_UW: + genCodeForShxadd(treeNode->AsOp()); + break; + + case GT_ADD_UW: + genCodeForAddUw(treeNode->AsOp()); + break; + + case GT_SLLI_UW: + genCodeForSlliUw(treeNode->AsOp()); + break; + default: { #ifdef DEBUG @@ -4714,7 +4493,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } else { - //// Ngen case - GS cookie constant needs to be accessed through an indirection. + // AOT case - GS cookie constant needs to be accessed through an indirection. // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); if (compiler->opts.compReloc) @@ -4767,7 +4546,65 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) // void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode) { - NYI_RISCV64("genIntrinsic-----unimplemented/unused on RISCV64 yet----"); + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2IfPresent(); + + emitAttr size = emitActualTypeSize(op1); + bool is4 = (size == EA_4BYTE); + + instruction instr = INS_invalid; + switch (treeNode->gtIntrinsicName) + { + case NI_System_Math_Abs: + instr = is4 ? INS_fsgnjx_s : INS_fsgnjx_d; + op2 = op1; // "fabs rd, rs" is a pseudo-instruction for "fsgnjx rd, rs, rs" + break; + case NI_System_Math_Sqrt: + instr = is4 ? INS_fsqrt_s : INS_fsqrt_d; + break; + case NI_System_Math_MinNumber: + instr = is4 ? INS_fmin_s : INS_fmin_d; + break; + case NI_System_Math_MaxNumber: + instr = is4 ? INS_fmax_s : INS_fmax_d; + break; + case NI_System_Math_Min: + instr = INS_min; + break; + case NI_System_Math_MinUnsigned: + instr = INS_minu; + break; + case NI_System_Math_Max: + instr = INS_max; + break; + case NI_System_Math_MaxUnsigned: + instr = INS_maxu; + break; + case NI_PRIMITIVE_LeadingZeroCount: + instr = is4 ? INS_clzw : INS_clz; + break; + case NI_PRIMITIVE_TrailingZeroCount: + instr = is4 ? INS_ctzw : INS_ctz; + break; + case NI_PRIMITIVE_PopCount: + instr = is4 ? INS_cpopw : INS_cpop; + break; + default: + NO_WAY("Unknown intrinsic"); + } + + genConsumeOperands(treeNode->AsOp()); + regNumber dest = treeNode->GetRegNum(); + regNumber src1 = op1->GetRegNum(); + if (op2 == nullptr) + { + GetEmitter()->emitIns_R_R(instr, size, dest, src1); + } + else + { + GetEmitter()->emitIns_R_R_R(instr, size, dest, src1, op2->GetRegNum()); + } + genProduceReg(treeNode); } //--------------------------------------------------------------------- @@ -4801,7 +4638,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) if (treeNode->putInIncomingArgArea()) { varNumOut = getFirstArgWithStackSlot(); - argOffsetMax = compiler->compArgSize; + argOffsetMax = compiler->lvaParameterStackSize; #if FEATURE_FASTTAILCALL // This must be a fast tail call. assert(treeNode->gtCall->IsFastTailCall()); @@ -4843,7 +4680,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // If it is contained then source must be the integer constant zero if (source->isContained()) { - assert(source->OperGet() == GT_CNS_INT); + assert(source->OperIs(GT_CNS_INT)); assert(source->AsIntConCommon()->IconValue() == 0); emit->emitIns_S_R(storeIns, storeAttr, REG_R0, varNumOut, argOffsetOut); } @@ -4859,7 +4696,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { assert(source->isContained()); // We expect that this node was marked as contained in Lower - if (source->OperGet() == GT_FIELD_LIST) + if (source->OperIs(GT_FIELD_LIST)) { genPutArgStkFieldList(treeNode, varNumOut); } @@ -5012,238 +4849,6 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) genProduceReg(tree); } -//--------------------------------------------------------------------- -// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node -// -// Arguments -// tree - the GT_PUTARG_SPLIT node -// -// Return value: -// None -// -void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) -{ - assert(treeNode->OperIs(GT_PUTARG_SPLIT)); - - GenTree* source = treeNode->gtOp1; - emitter* emit = GetEmitter(); - unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; - unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; - - if (source->OperGet() == GT_FIELD_LIST) - { - // Evaluate each of the GT_FIELD_LIST items into their register - // and store their register into the outgoing argument area - unsigned regIndex = 0; - unsigned firstOnStackOffs = UINT_MAX; - - for (GenTreeFieldList::Use& use : source->AsFieldList()->Uses()) - { - GenTree* nextArgNode = use.GetNode(); - regNumber fieldReg = nextArgNode->GetRegNum(); - genConsumeReg(nextArgNode); - - if (regIndex >= treeNode->gtNumRegs) - { - if (firstOnStackOffs == UINT_MAX) - { - firstOnStackOffs = use.GetOffset(); - } - var_types type = nextArgNode->TypeGet(); - emitAttr attr = emitTypeSize(type); - - unsigned offset = treeNode->getArgOffset() + use.GetOffset() - firstOnStackOffs; - // We can't write beyond the outgoing arg area - assert(offset + EA_SIZE_IN_BYTES(attr) <= argOffsetMax); - - // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing - // argument area - emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, offset); - } - else - { - var_types type = treeNode->GetRegType(regIndex); - regNumber argReg = treeNode->GetRegNumByIdx(regIndex); - - // If child node is not already in the register we need, move it - inst_Mov(type, argReg, fieldReg, /* canSkip */ true); - - regIndex++; - } - } - } - else - { - var_types targetType = source->TypeGet(); - assert(source->isContained() && varTypeIsStruct(targetType)); - - // We need a register to store intermediate values that we are loading - // from the source into. We can usually use one of the target registers - // that will be overridden anyway. The exception is when the source is - // in a register and that register is the unique target register we are - // placing. LSRA will always allocate an internal register when there - // is just one target register to handle this situation. - // - int firstRegToPlace; - regNumber valueReg = REG_NA; - unsigned srcLclNum = BAD_VAR_NUM; - unsigned srcLclOffset = 0; - regNumber addrReg = REG_NA; - var_types addrType = TYP_UNDEF; - ClassLayout* layout = nullptr; - - if (source->OperIsLocalRead()) - { - srcLclNum = source->AsLclVarCommon()->GetLclNum(); - srcLclOffset = source->AsLclVarCommon()->GetLclOffs(); - layout = source->AsLclVarCommon()->GetLayout(compiler); - LclVarDsc* varDsc = compiler->lvaGetDesc(srcLclNum); - - // This struct must live on the stack frame. - assert(varDsc->lvOnFrame && !varDsc->lvRegister); - - // No possible conflicts, just use the first register as the value register. - firstRegToPlace = 0; - valueReg = treeNode->GetRegNumByIdx(0); - } - else // we must have a GT_BLK - { - layout = source->AsBlk()->GetLayout(); - addrReg = genConsumeReg(source->AsBlk()->Addr()); - addrType = source->AsBlk()->Addr()->TypeGet(); - - regNumber allocatedValueReg = REG_NA; - if (treeNode->gtNumRegs == 1) - { - allocatedValueReg = internalRegisters.Extract(treeNode); - } - - // Pick a register to store intermediate values in for the to-stack - // copy. It must not conflict with addrReg. - valueReg = treeNode->GetRegNumByIdx(0); - if (valueReg == addrReg) - { - if (treeNode->gtNumRegs == 1) - { - valueReg = allocatedValueReg; - } - else - { - valueReg = treeNode->GetRegNumByIdx(1); - } - } - - // Find first register to place. If we are placing addrReg, then - // make sure we place it last to avoid clobbering its value. - // - // The loop below will start at firstRegToPlace and place - // treeNode->gtNumRegs registers in order, with wraparound. For - // example, if the registers to place are r0, r1, r2=addrReg, r3 - // then we will set firstRegToPlace = 3 (r3) and the loop below - // will place r3, r0, r1, r2. The last placement will clobber - // addrReg. - firstRegToPlace = 0; - for (unsigned i = 0; i < treeNode->gtNumRegs; i++) - { - if (treeNode->GetRegNumByIdx(i) == addrReg) - { - firstRegToPlace = i + 1; - break; - } - } - } - - // Put on stack first - unsigned structOffset = treeNode->gtNumRegs * TARGET_POINTER_SIZE; - unsigned remainingSize = layout->GetSize() - structOffset; - unsigned argOffsetOut = treeNode->getArgOffset(); - - assert((remainingSize > 0) && (roundUp(remainingSize, TARGET_POINTER_SIZE) == treeNode->GetStackByteSize())); - while (remainingSize > 0) - { - var_types type; - if (remainingSize >= TARGET_POINTER_SIZE) - { - type = layout->GetGCPtrType(structOffset / TARGET_POINTER_SIZE); - } - else if (remainingSize >= 4) - { - type = TYP_INT; - } - else if (remainingSize >= 2) - { - type = TYP_USHORT; - } - else - { - assert(remainingSize == 1); - type = TYP_UBYTE; - } - - emitAttr attr = emitActualTypeSize(type); - unsigned moveSize = genTypeSize(type); - - instruction loadIns = ins_Load(type); - if (srcLclNum != BAD_VAR_NUM) - { - // Load from our local source - emit->emitIns_R_S(loadIns, attr, valueReg, srcLclNum, srcLclOffset + structOffset); - } - else - { - assert(valueReg != addrReg); - - // Load from our address expression source - emit->emitIns_R_R_I(loadIns, attr, valueReg, addrReg, structOffset); - } - - // Emit the instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(type), attr, valueReg, varNumOut, argOffsetOut); - argOffsetOut += moveSize; - assert(argOffsetOut <= argOffsetMax); - - remainingSize -= moveSize; - structOffset += moveSize; - } - - // Place registers starting from firstRegToPlace. It should ensure we - // place addrReg last (if we place it at all). - structOffset = static_cast(firstRegToPlace) * TARGET_POINTER_SIZE; - unsigned curRegIndex = firstRegToPlace; - - for (unsigned regsPlaced = 0; regsPlaced < treeNode->gtNumRegs; regsPlaced++) - { - if (curRegIndex == treeNode->gtNumRegs) - { - curRegIndex = 0; - structOffset = 0; - } - - regNumber targetReg = treeNode->GetRegNumByIdx(curRegIndex); - var_types type = treeNode->GetRegType(curRegIndex); - - if (srcLclNum != BAD_VAR_NUM) - { - // Load from our local source - emit->emitIns_R_S(ins_Load(type), emitTypeSize(type), targetReg, srcLclNum, - srcLclOffset + structOffset); - } - else - { - assert((addrReg != targetReg) || (regsPlaced == treeNode->gtNumRegs - 1)); - - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(type), emitTypeSize(type), targetReg, addrReg, structOffset); - } - - curRegIndex++; - structOffset += TARGET_POINTER_SIZE; - } - } - - genProduceReg(treeNode); -} - //------------------------------------------------------------------------ // genRangeCheck: generate code for GT_BOUNDS_CHECK node. // @@ -5281,7 +4886,7 @@ void CodeGen::genRangeCheck(GenTree* oper) assert(indexType == TYP_INT || indexType == TYP_LONG); #endif // DEBUG - genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, indexReg, bndsChk->gtIndRngFailBB, lengthReg); + genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, indexReg, nullptr, lengthReg); } //--------------------------------------------------------------------- @@ -5349,48 +4954,81 @@ void CodeGen::genCodeForShift(GenTree* tree) GenTree* operand = tree->gtGetOp1(); GenTree* shiftBy = tree->gtGetOp2(); + unsigned immWidth = emitter::getBitWidth(size); // For RISCV64, immWidth will be set to 32 or 64 + if (tree->OperIs(GT_ROR, GT_ROL)) { - regNumber tempReg = internalRegisters.GetSingle(tree); - unsigned immWidth = emitter::getBitWidth(size); // For RISCV64, immWidth will be set to 32 or 64 - if (!shiftBy->IsCnsIntOrI()) + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)) { - regNumber shiftRight = tree->OperIs(GT_ROR) ? shiftBy->GetRegNum() : tempReg; - regNumber shiftLeft = tree->OperIs(GT_ROR) ? tempReg : shiftBy->GetRegNum(); - GetEmitter()->emitIns_R_R_I(INS_addi, size, tempReg, REG_R0, immWidth); - GetEmitter()->emitIns_R_R_R(INS_sub, size, tempReg, tempReg, shiftBy->GetRegNum()); - if (size == EA_8BYTE) + bool is4 = (size == EA_4BYTE); + bool isR = tree->OperIs(GT_ROR); + if (!shiftBy->IsCnsIntOrI()) { - GetEmitter()->emitIns_R_R_R(INS_srl, size, REG_RA, operand->GetRegNum(), shiftRight); - GetEmitter()->emitIns_R_R_R(INS_sll, size, tempReg, operand->GetRegNum(), shiftLeft); + instruction ins; + if (isR) + { + ins = is4 ? INS_rorw : INS_ror; + } + else + { + ins = is4 ? INS_rolw : INS_rol; + } + GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftBy->GetRegNum()); } else { - GetEmitter()->emitIns_R_R_R(INS_srlw, size, REG_RA, operand->GetRegNum(), shiftRight); - GetEmitter()->emitIns_R_R_R(INS_sllw, size, tempReg, operand->GetRegNum(), shiftLeft); + unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; + assert(shiftByImm < immWidth); + if (!isR) + { + shiftByImm = immWidth - shiftByImm; + } + instruction ins = is4 ? INS_roriw : INS_rori; + GetEmitter()->emitIns_R_R_I(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftByImm); } } else { - unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; - if (shiftByImm >= 32 && shiftByImm < 64) + regNumber tempReg = internalRegisters.GetSingle(tree); + if (!shiftBy->IsCnsIntOrI()) { - immWidth = 64; - } - unsigned shiftRight = tree->OperIs(GT_ROR) ? shiftByImm : immWidth - shiftByImm; - unsigned shiftLeft = tree->OperIs(GT_ROR) ? immWidth - shiftByImm : shiftByImm; - if ((shiftByImm >= 32 && shiftByImm < 64) || size == EA_8BYTE) - { - GetEmitter()->emitIns_R_R_I(INS_srli, size, REG_RA, operand->GetRegNum(), shiftRight); - GetEmitter()->emitIns_R_R_I(INS_slli, size, tempReg, operand->GetRegNum(), shiftLeft); + regNumber shiftRight = tree->OperIs(GT_ROR) ? shiftBy->GetRegNum() : tempReg; + regNumber shiftLeft = tree->OperIs(GT_ROR) ? tempReg : shiftBy->GetRegNum(); + GetEmitter()->emitIns_R_R_I(INS_addi, size, tempReg, REG_R0, immWidth); + GetEmitter()->emitIns_R_R_R(INS_sub, size, tempReg, tempReg, shiftBy->GetRegNum()); + if (size == EA_8BYTE) + { + GetEmitter()->emitIns_R_R_R(INS_srl, size, REG_RA, operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_R(INS_sll, size, tempReg, operand->GetRegNum(), shiftLeft); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_srlw, size, REG_RA, operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_R(INS_sllw, size, tempReg, operand->GetRegNum(), shiftLeft); + } } else { - GetEmitter()->emitIns_R_R_I(INS_srliw, size, REG_RA, operand->GetRegNum(), shiftRight); - GetEmitter()->emitIns_R_R_I(INS_slliw, size, tempReg, operand->GetRegNum(), shiftLeft); + unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; + if (shiftByImm >= 32 && shiftByImm < 64) + { + immWidth = 64; + } + unsigned shiftRight = tree->OperIs(GT_ROR) ? shiftByImm : immWidth - shiftByImm; + unsigned shiftLeft = tree->OperIs(GT_ROR) ? immWidth - shiftByImm : shiftByImm; + if ((shiftByImm >= 32 && shiftByImm < 64) || size == EA_8BYTE) + { + GetEmitter()->emitIns_R_R_I(INS_srli, size, REG_RA, operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_I(INS_slli, size, tempReg, operand->GetRegNum(), shiftLeft); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_srliw, size, REG_RA, operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_I(INS_slliw, size, tempReg, operand->GetRegNum(), shiftLeft); + } } + GetEmitter()->emitIns_R_R_R(INS_or, size, tree->GetRegNum(), REG_RA, tempReg); } - GetEmitter()->emitIns_R_R_R(INS_or, size, tree->GetRegNum(), REG_RA, tempReg); } else { @@ -5405,7 +5043,6 @@ void CodeGen::genCodeForShift(GenTree* tree) unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; // should check shiftByImm for riscv64-ins. - unsigned immWidth = emitter::getBitWidth(size); // For RISCV64, immWidth will be set to 32 or 64 shiftByImm &= (immWidth - 1); if (ins == INS_slliw && shiftByImm >= 32) @@ -5557,7 +5194,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) // IndRngFail: // ... // RngChkExit: - genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, tempReg); + genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), nullptr, tempReg); } emitAttr attr = emitActualTypeSize(node); @@ -5571,7 +5208,16 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) // dest = base + (index << scale) if (node->gtElemSize <= 64) { - genScaledAdd(attr, node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale, tempReg); + instruction shxaddIns = getShxaddVariant(scale, (genTypeSize(index) == 4)); + + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zba) && (shxaddIns != INS_none)) + { + GetEmitter()->emitIns_R_R_R(shxaddIns, attr, node->GetRegNum(), index->GetRegNum(), base->GetRegNum()); + } + else + { + genScaledAdd(attr, node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale, tempReg); + } } else { @@ -5635,7 +5281,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) #ifdef FEATURE_SIMD // Handling of Vector3 type values loaded through indirection. - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genLoadIndTypeSIMD12(tree); return; @@ -6038,50 +5684,62 @@ void CodeGen::genCall(GenTreeCall* call) void CodeGen::genCallInstruction(GenTreeCall* call) { // Determine return value size(s). - const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - emitAttr retSize = EA_PTRSIZE; - emitAttr secondRetSize = EA_UNKNOWN; + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + EmitCallParams params; // unused values are of no interest to GC. if (!call->IsUnusedValue()) { if (call->HasMultiRegRetVal()) { - retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); - secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + params.retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); + params.secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + + if (pRetTypeDesc->GetABIReturnReg(1, call->GetUnmanagedCallConv()) == REG_INTRET) + { + // If the second return register is REG_INTRET, then the first return is expected to be in a floating + // register. The emitter has hardcoded belief that params.retSize corresponds to REG_INTRET and + // secondRetSize to REG_INTRET_1, so fix up the situation here. + assert(!EA_IS_GCREF_OR_BYREF(params.retSize)); + params.retSize = params.secondRetSize; + params.secondRetSize = EA_UNKNOWN; + } } else { - assert(call->gtType != TYP_STRUCT); + assert(!call->TypeIs(TYP_STRUCT)); - if (call->gtType == TYP_REF) + if (call->TypeIs(TYP_REF)) { - retSize = EA_GCREF; + params.retSize = EA_GCREF; } - else if (call->gtType == TYP_BYREF) + else if (call->TypeIs(TYP_BYREF)) { - retSize = EA_BYREF; + params.retSize = EA_BYREF; } } } - DebugInfo di; + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); + // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, // so we skip this hash table lookup logic in that case. if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall()) { + DebugInfo di; (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di); + params.debugInfo = di; } - CORINFO_SIG_INFO* sigInfo = nullptr; #ifdef DEBUG // Pass the call signature information down into the emitter so the emitter can associate // native call sites with the signatures they were generated from. if (!call->IsHelperCall()) { - sigInfo = call->callSig; + params.sigInfo = call->callSig; } if (call->IsFastTailCall()) @@ -6098,9 +5756,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call) for (CallArg& arg : call->gtArgs.Args()) { - for (unsigned i = 0; i < arg.NewAbiInfo.NumSegments; i++) + for (unsigned i = 0; i < arg.AbiInfo.NumSegments; i++) { - const ABIPassingSegment& seg = arg.NewAbiInfo.Segment(i); + const ABIPassingSegment& seg = arg.AbiInfo.Segment(i); if (seg.IsPassedInRegister() && ((trashedByEpilog & seg.GetRegisterMask()) != 0)) { JITDUMP("Tail call node:\n"); @@ -6112,8 +5770,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } #endif // DEBUG - CORINFO_METHOD_HANDLE methHnd; - GenTree* target = getCallTarget(call, &methHnd); + GenTree* target = getCallTarget(call, ¶ms.methHnd); if (target != nullptr) { @@ -6123,27 +5780,39 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // For fast tailcall we have already consumed the target. We ensure in // RA that the target was allocated into a volatile register that will // not be messed up by epilog sequence. - if (!call->IsFastTailCall()) + if (!call->IsFastTailCall() && !target->isContainedIntOrIImmed()) { genConsumeReg(target); } + regNumber targetReg; + ssize_t jalrOffset = 0; + + if (target->isContainedIntOrIImmed()) + { + // Load upper (64-12) bits to a temporary register. Lower 12 bits will be put inside JALR's instruction as + // offset. + targetReg = internalRegisters.GetSingle(call); + ssize_t imm = target->AsIntCon()->IconValue(); + jalrOffset = (imm << (64 - 12)) >> (64 - 12); + imm -= jalrOffset; + GetEmitter()->emitLoadImmediate(EA_PTRSIZE, targetReg, imm); + } + else + { + targetReg = target->GetRegNum(); + } + // We have already generated code for gtControlExpr evaluating it into a register. // We just need to emit "call reg" in this case. // - assert(genIsValidIntReg(target->GetRegNum())); - - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, // addr - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - target->GetRegNum(), - call->IsFastTailCall()); - // clang-format on + assert(genIsValidIntReg(targetReg)); + + params.callType = EC_INDIR_R; + params.ireg = targetReg; + params.addr = (jalrOffset == 0) ? nullptr : (void*)jalrOffset; // We use addr to pass offset value + + genEmitCallWithCurrentGC(params); } else { @@ -6181,60 +5850,42 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // assert(genIsValidIntReg(targetAddrReg)); - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, // addr - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - targetAddrReg, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_INDIR_R; + params.ireg = targetAddrReg; + genEmitCallWithCurrentGC(params); } else { // Generate a direct call to a non-virtual user defined or helper method assert(call->IsHelperCall() || (call->gtCallType == CT_USER_FUNC)); - void* addr = nullptr; #ifdef FEATURE_READYTORUN if (call->gtEntryPoint.addr != NULL) { assert(call->gtEntryPoint.accessType == IAT_VALUE); - addr = call->gtEntryPoint.addr; + params.addr = call->gtEntryPoint.addr; } else #endif // FEATURE_READYTORUN if (call->IsHelperCall()) { - CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(params.methHnd); noway_assert(helperNum != CORINFO_HELP_UNDEF); void* pAddr = nullptr; - addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + params.addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); assert(pAddr == nullptr); } else { // Direct call to a non-virtual user function. - addr = call->gtDirectCallAddress; + params.addr = call->gtDirectCallAddress; } - assert(addr != nullptr); + assert(params.addr != nullptr); - // clang-format off - genEmitCall(emitter::EC_FUNC_TOKEN, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - addr, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - REG_NA, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_FUNC_TOKEN; + genEmitCallWithCurrentGC(params); } } } @@ -6367,33 +6018,48 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: if (desc.ExtendSrcSize() == 1) { - emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 64 - 8); - emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 64 - 8); - } - else - { - - emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 64 - 16); - emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 64 - 16); + emit->emitIns_R_R_I(INS_andi, EA_PTRSIZE, dstReg, srcReg, 0xff); + break; } - break; + FALLTHROUGH; case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: - if (desc.ExtendSrcSize() == 1) + { + bool isSignExtend = (desc.ExtendKind() == GenIntCastDesc::SIGN_EXTEND_SMALL_INT); + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)) { - emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 64 - 8); - emit->emitIns_R_R_I(INS_srai, EA_PTRSIZE, dstReg, dstReg, 64 - 8); + instruction extend = INS_none; + bool isHalf = (desc.ExtendSrcSize() == 2); + if (isSignExtend) + { + extend = isHalf ? INS_sext_h : INS_sext_b; + } + else + { + assert(isHalf); + extend = INS_zext_h; + } + emit->emitIns_R_R(extend, EA_PTRSIZE, dstReg, srcReg); } else { - emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 64 - 16); - emit->emitIns_R_R_I(INS_srai, EA_PTRSIZE, dstReg, dstReg, 64 - 16); + instruction shiftRight = isSignExtend ? INS_srai : INS_srli; + unsigned shiftAmount = 64 - desc.ExtendSrcSize() * 8; + emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, shiftAmount); + emit->emitIns_R_R_I(shiftRight, EA_PTRSIZE, dstReg, dstReg, shiftAmount); } break; + } case GenIntCastDesc::ZERO_EXTEND_INT: - - emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 32); - emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 32); + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zba)) + { + emit->emitIns_R_R_R(INS_add_uw, EA_PTRSIZE, dstReg, srcReg, REG_R0); + } + else + { + emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 32); + emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 32); + } break; case GenIntCastDesc::SIGN_EXTEND_INT: emit->emitIns_R_R_I(INS_slliw, EA_4BYTE, dstReg, srcReg, 0); @@ -6433,7 +6099,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) void CodeGen::genFloatToFloatCast(GenTree* treeNode) { // float <--> double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -6681,193 +6347,101 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) genProduceReg(lea); } -//------------------------------------------------------------------------ -// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer. -// -// Arguments: -// delta - the offset to add to the current stack pointer to establish the frame pointer -// reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data. - -void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) +instruction CodeGen::getShxaddVariant(int scale, bool useUnsignedVariant) { - assert(compiler->compGeneratingProlog); - - assert(emitter::isValidSimm12(delta)); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); - - if (reportUnwindData) + if (useUnsignedVariant) { - compiler->unwindSetFrameReg(REG_FPBASE, delta); - }; + switch (scale) + { + case 1: + return INS_sh1add_uw; + case 2: + return INS_sh2add_uw; + case 3: + return INS_sh3add_uw; + } + } + else + { + switch (scale) + { + case 1: + return INS_sh1add; + case 2: + return INS_sh2add; + case 3: + return INS_sh3add; + } + } + return INS_none; } -//------------------------------------------------------------------------ -// genStackProbe: Probe the stack without changing it -// -// Notes: -// This function is using loop to probe each memory page. -// -// Arguments: -// frameSize - total frame size -// rOffset - usually initial register number -// rLimit - an extra register for comparison -// rPageSize - register for storing page size -// -void CodeGen::genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize) +void CodeGen::genCodeForShxadd(GenTreeOp* tree) { - // make sure frameSize safely fits within 4 bytes - noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); - - const target_size_t pageSize = compiler->eeGetPageSize(); - - // According to RISC-V Privileged ISA page size should be equal 4KiB - noway_assert(pageSize == 0x1000); + instruction ins = genGetInsForOper(tree); - emitter* emit = GetEmitter(); - - emit->emitLoadImmediate(EA_PTRSIZE, rLimit, -frameSize); - regSet.verifyRegUsed(rLimit); - - emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, rLimit, rLimit, REG_SPBASE); + assert(ins == INS_sh1add || ins == INS_sh2add || ins == INS_sh3add || ins == INS_sh1add_uw || + ins == INS_sh2add_uw || ins == INS_sh3add_uw); - emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); - regSet.verifyRegUsed(rPageSize); + genConsumeOperands(tree); - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, REG_SPBASE, rPageSize); + emitAttr attr = emitActualTypeSize(tree); - // Loop: - // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page - emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, rOffset, 0); - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, rOffset, rPageSize); + GetEmitter()->emitIns_R_R_R(ins, attr, tree->GetRegNum(), tree->gtOp1->GetRegNum(), tree->gtOp2->GetRegNum()); - // each instr is 4 bytes - // if (rOffset >= rLimit) goto Loop; - emit->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, -2 << 2); + genProduceReg(tree); } -//------------------------------------------------------------------------ -// genAllocLclFrame: Probe the stack. -// -// Notes: -// This only does the probing; allocating the frame is done when callee-saved registers are saved. -// This is done before anything has been pushed. The previous frame might have a large outgoing argument -// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might -// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however, -// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard -// page by default, so we need to be more careful. We do an extra probe if we might not have probed -// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this -// on Windows as well just to be consistent, even though it should not be necessary. -// -// Arguments: -// frameSize - the size of the stack frame being allocated. -// initReg - register to use as a scratch register. -// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if -// this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged. -// maskArgRegsLiveIn - incoming argument registers that are currently live. -// -// Return value: -// None -// -void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +void CodeGen::genCodeForAddUw(GenTreeOp* tree) { - assert(compiler->compGeneratingProlog); + assert(tree->OperIs(GT_ADD_UW)); - if (frameSize == 0) - { - return; - } - - // According to RISC-V Privileged ISA page size should be equal 4KiB - const target_size_t pageSize = compiler->eeGetPageSize(); - - assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); - - target_size_t lastTouchDelta = 0; - - emitter* emit = GetEmitter(); - - // Emit the following sequence to 'tickle' the pages. - // Note it is important that stack pointer not change until this is complete since the tickles - // could cause a stack overflow, and we need to be able to crawl the stack afterward - // (which means the stack pointer needs to be known). - - if (frameSize < pageSize) - { - // no probe needed - lastTouchDelta = frameSize; - } - else if (frameSize < 3 * pageSize) - { - // between 1 and 3 pages we will probe each page without a loop, - // because it is faster that way and doesn't cost us much - lastTouchDelta = frameSize; - - for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize) - { - emit->emitIns_R_I(INS_lui, EA_PTRSIZE, initReg, probeOffset >> 12); - regSet.verifyRegUsed(initReg); + genConsumeOperands(tree); - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg); - emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0); + emitAttr attr = emitActualTypeSize(tree); - lastTouchDelta -= pageSize; - } + GetEmitter()->emitIns_R_R_R(INS_add_uw, attr, tree->GetRegNum(), tree->gtOp1->GetRegNum(), + tree->gtOp2->GetRegNum()); - assert(pInitRegZeroed != nullptr); - *pInitRegZeroed = false; // The initReg does not contain zero + genProduceReg(tree); +} - assert(lastTouchDelta == frameSize % pageSize); - compiler->unwindPadding(); - } - else - { - // probe each page, that we need to allocate large stack frame - assert(frameSize >= 3 * pageSize); +void CodeGen::genCodeForSlliUw(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_SLLI_UW)); - regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED); - availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers - // as they are currently live - availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg + genConsumeOperands(tree); - noway_assert(availMask != RBM_NONE); + emitAttr attr = emitActualTypeSize(tree); + GenTree* shiftBy = tree->gtOp2; - regMaskTP regMask = genFindLowestBit(availMask); - regNumber rLimit = genRegNumFromMask(regMask); + assert(shiftBy->IsCnsIntOrI()); - availMask &= ~regMask; // Remove rLimit register + unsigned shamt = (unsigned)shiftBy->AsIntCon()->gtIconVal; - noway_assert(availMask != RBM_NONE); + GetEmitter()->emitIns_R_R_I(INS_slli_uw, attr, tree->GetRegNum(), tree->gtOp1->GetRegNum(), shamt); - regMask = genFindLowestBit(availMask); - regNumber rPageSize = genRegNumFromMask(regMask); + genProduceReg(tree); +} - genStackProbe((ssize_t)frameSize, initReg, rLimit, rPageSize); +//------------------------------------------------------------------------ +// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer. +// +// Arguments: +// delta - the offset to add to the current stack pointer to establish the frame pointer +// reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data. - assert(pInitRegZeroed != nullptr); - *pInitRegZeroed = false; // The initReg does not contain zero +void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) +{ + assert(compiler->compGeneratingProlog); - lastTouchDelta = frameSize % pageSize; - compiler->unwindPadding(); - } + assert(emitter::isValidSimm12(delta)); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); -#if STACK_PROBE_BOUNDARY_THRESHOLD_BYTES != 0 - // if the last page was too far, we will make an extra probe at the bottom - if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) + if (reportUnwindData) { - assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < pageSize << 1); - - emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, initReg, REG_R0, frameSize); - regSet.verifyRegUsed(initReg); - - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg); - emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0); - - assert(pInitRegZeroed != nullptr); - *pInitRegZeroed = false; // The initReg does not contain zero - - compiler->unwindPadding(); - } -#endif + compiler->unwindSetFrameReg(REG_FPBASE, delta); + }; } void CodeGen::genJumpToThrowHlpBlk_la( @@ -6903,7 +6477,7 @@ void CodeGen::genJumpToThrowHlpBlk_la( { // Find the helper-block which raises the exception. Compiler::AddCodeDsc* add = compiler->fgFindExcptnTarget(codeKind, compiler->compCurBB); - PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block")); + assert((add != nullptr) && ("ERROR: failed to find exception throw block")); assert(add->acdUsed); excpRaisingBlock = add->acdDstBlk; #if !FEATURE_FIXED_OUT_ARGS @@ -6921,10 +6495,9 @@ void CodeGen::genJumpToThrowHlpBlk_la( // The code to throw the exception will be generated inline, and // we will jump around it in the normal non-exception case. - void* pAddr = nullptr; - void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); - emitter::EmitCallType callType; - regNumber callTarget; + void* pAddr = nullptr; + EmitCallParams params; + params.addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); // maybe optimize // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne))); @@ -6949,15 +6522,15 @@ void CodeGen::genJumpToThrowHlpBlk_la( ins = ins == INS_beq ? INS_bne : INS_beq; } - if (addr == nullptr) + if (params.addr == nullptr) { - callType = emitter::EC_INDIR_R; - callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + params.callType = EC_INDIR_R; + params.ireg = REG_DEFAULT_HELPER_CALL_TARGET; if (compiler->opts.compReloc) { ssize_t imm = (3 + 1) << 2; emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); - emit->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + emit->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, params.ireg, (ssize_t)pAddr); } else { @@ -6965,14 +6538,13 @@ void CodeGen::genJumpToThrowHlpBlk_la( emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); // TODO-RISCV64-CQ: In the future we may consider using emitter::emitLoadImmediate instead, // which is less straightforward but offers slightly better codegen. - emitLoadConstAtAddr(GetEmitter(), callTarget, (ssize_t)pAddr); + emitLoadConstAtAddr(GetEmitter(), params.ireg, (ssize_t)pAddr); } - regSet.verifyRegUsed(callTarget); + regSet.verifyRegUsed(params.ireg); } else { // INS_OPTS_C - callType = emitter::EC_FUNC_TOKEN; - callTarget = REG_NA; + params.callType = EC_FUNC_TOKEN; ssize_t imm = 9 << 2; if (compiler->opts.compReloc) @@ -6985,13 +6557,10 @@ void CodeGen::genJumpToThrowHlpBlk_la( BasicBlock* skipLabel = genCreateTempLabel(); - emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), - INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ - ); + params.methHnd = compiler->eeFindHelper(compiler->acdHelper(codeKind)); + + // TODO-RISCV64: Why is this not using genEmitHelperCall? + genEmitCallWithCurrentGC(params); regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind))); regSet.verifyRegistersUsed(killMask); @@ -7052,8 +6621,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). * * For functions with GS and localloc, we had saved the frame pointer and RA at the top - * of the frame. Note that the funclet frames must follow the same rule, - * and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + * of the frame. * Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. * * The frames look like the following (simplified to only include components that matter for establishing the @@ -7074,8 +6642,6 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * |Callee saved registers | // not including FP/RA; multiple of 8 bytes * |-----------------------| * | Saved FP | // 8 bytes @@ -7128,7 +6694,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // - Generate fully interruptible code for loops that contains calls // - Generate fully interruptible code for leaf methods // - // Given the limited benefit from this optimization (<10k for SPCL NGen image), the extra complexity + // Given the limited benefit from this optimization (<10k for SPCL AOT image), the extra complexity // is not worth it. // @@ -7163,10 +6729,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe int totalFrameSize = genTotalFrameSize(); int leftFrameSize = 0; int localFrameSize = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - localFrameSize -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { localFrameSize -= TARGET_POINTER_SIZE; @@ -7223,7 +6785,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe if (leftFrameSize != 0) { - genStackPointerAdjustment(-leftFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + genStackPointerAdjustment(-leftFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ false); } } @@ -7237,10 +6799,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) int totalFrameSize = genTotalFrameSize(); int localFrameSize = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - localFrameSize -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { localFrameSize -= TARGET_POINTER_SIZE; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index bedab1fe6371..628406f8a2fb 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -156,7 +156,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } else { - // Ngen case - GS cookie value needs to be accessed through an indirection. + // AOT case - GS cookie value needs to be accessed through an indirection. pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs); @@ -182,25 +182,9 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) if (compiler->UsesFunclets()) { // Generate a call to the finally, like this: - // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym // call finally-funclet // jmp finally-return // Only for non-retless finally calls // The jmp can be a NOP if we're going to the next block. - // If we're generating code for the main function (not a funclet), and there is no localloc, - // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP - // instead of loading the PSPSym in this case, or if PSPSym is not used (NativeAOT ABI). - - if ((compiler->lvaPSPSym == BAD_VAR_NUM) || - (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))) - { -#ifndef UNIX_X86_ABI - inst_Mov(TYP_I_IMPL, REG_ARG_0, REG_SPBASE, /* canSkip */ false); -#endif // !UNIX_X86_ABI - } - else - { - GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0); - } if (block->HasFlag(BBF_RETLESS_CALL)) { @@ -218,14 +202,10 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) } else { -// TODO-Linux-x86: Do we need to handle the GC information for this NOP or JMP specially, as is done for other -// architectures? -#ifndef JIT32_GCENCODER // Because of the way the flowgraph is connected, the liveness info for this one instruction // after the call is not (can not be) correct in cases where a variable has a last use in the // handler. So turn off GC reporting once we execute the call and reenable after the jmp/nop GetEmitter()->emitDisableGC(); -#endif // JIT32_GCENCODER GetEmitter()->emitIns_J(INS_call, block->GetTarget()); @@ -245,9 +225,7 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) inst_JMP(EJ_jmp, finallyContinuation); } -#ifndef JIT32_GCENCODER GetEmitter()->emitEnableGC(); -#endif // JIT32_GCENCODER } } #if defined(FEATURE_EH_WINDOWS_X86) @@ -279,7 +257,8 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) unsigned filterEndOffsetSlotOffs; - filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); + filterEndOffsetSlotOffs = + (unsigned)(compiler->lvaLclStackHomeSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); unsigned curNestingSlotOffs; curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE)); @@ -452,8 +431,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t } else { - CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(val8); - emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + emit->emitSimdConstCompressedLoad(val, attr, targetReg); } break; } @@ -480,10 +458,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t } else { - simd16_t val16 = {}; + simd_t val16 = {}; memcpy(&val16, &val12, sizeof(val12)); - CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(val16); - emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + emit->emitSimdConstCompressedLoad(val, EA_16BYTE, targetReg); } break; } @@ -510,8 +487,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t } else { - CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(val16); - emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + emit->emitSimdConstCompressedLoad(val, attr, targetReg); } break; } @@ -538,8 +514,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t } else { - CORINFO_FIELD_HANDLE hnd = emit->emitSimd32Const(val32); - emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + emit->emitSimdConstCompressedLoad(val, attr, targetReg); } break; } @@ -547,7 +522,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t case TYP_SIMD64: { simd64_t val64 = *(simd64_t*)val; - if (val64.IsAllBitsSet() && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (val64.IsAllBitsSet() && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg, static_cast(0xFF), INS_OPTS_NONE); @@ -564,8 +539,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t } else { - CORINFO_FIELD_HANDLE hnd = emit->emitSimd64Const(val64); - emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + emit->emitSimdConstCompressedLoad(val, attr, targetReg); } break; } @@ -732,7 +706,7 @@ void CodeGen::genCodeForNegNot(GenTree* tree) if (varTypeIsFloating(targetType)) { - assert(tree->gtOper == GT_NEG); + assert(tree->OperIs(GT_NEG)); genSSE2BitwiseOp(tree); } else @@ -782,7 +756,30 @@ void CodeGen::genCodeForBswap(GenTree* tree) } else { - GetEmitter()->emitInsBinary(INS_movbe, emitTypeSize(operand), tree, operand); + instruction ins = INS_movbe; +#ifdef TARGET_AMD64 + bool needsEvex = false; + + if (GetEmitter()->IsExtendedGPReg(tree->GetRegNum())) + { + needsEvex = true; + } + else if (operand->isIndir()) + { + GenTreeIndir* indir = operand->AsIndir(); + if (indir->HasBase() && GetEmitter()->IsExtendedGPReg(indir->Base()->GetRegNum())) + { + needsEvex = true; + } + else if (indir->HasIndex() && GetEmitter()->IsExtendedGPReg(indir->Index()->GetRegNum())) + { + needsEvex = true; + } + } + + ins = needsEvex ? INS_movbe_apx : INS_movbe; +#endif + GetEmitter()->emitInsBinary(ins, emitTypeSize(operand), tree, operand); } if (tree->OperIs(GT_BSWAP16) && !genCanOmitNormalizationForBswap16(tree)) @@ -854,7 +851,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) emit->emitInsBinary(ins, size, treeNode, rmOp); // Move the result to the desired register, if necessary - if (treeNode->OperGet() == GT_MULHI) + if (treeNode->OperIs(GT_MULHI)) { inst_Mov(targetType, targetReg, REG_RDX, /* canSkip */ true); } @@ -873,11 +870,11 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) void CodeGen::genCodeForLongUMod(GenTreeOp* node) { assert(node != nullptr); - assert(node->OperGet() == GT_UMOD); - assert(node->TypeGet() == TYP_INT); + assert(node->OperIs(GT_UMOD)); + assert(node->TypeIs(TYP_INT)); GenTreeOp* const dividend = node->gtOp1->AsOp(); - assert(dividend->OperGet() == GT_LONG); + assert(dividend->OperIs(GT_LONG)); assert(varTypeIsLong(dividend)); genConsumeOperands(node); @@ -1352,7 +1349,7 @@ void CodeGen::genCodeForMul(GenTreeOp* treeNode) // src - The source of the return // retTypeDesc - The return type descriptor. // -void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) +void CodeGen::genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc) { assert(varTypeIsSIMD(src)); assert(src->isUsedFromReg()); @@ -1572,6 +1569,46 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition) return s_table[condition]; } +//------------------------------------------------------------------------ +// JumpKindToCcmp: +// Convert an emitJumpKind to the corresponding ccmp instruction. +// +// Arguments: +// condition - the condition +// +// Returns: +// A ccmp instruction. +// +instruction CodeGen::JumpKindToCcmp(emitJumpKind condition) +{ + static constexpr instruction s_table[EJ_COUNT] = { + INS_none, INS_none, INS_ccmpo, INS_ccmpno, INS_ccmpb, INS_ccmpae, INS_ccmpe, INS_ccmpne, INS_ccmpbe, + INS_ccmpa, INS_ccmps, INS_ccmpns, INS_none, INS_none, INS_ccmpl, INS_ccmpge, INS_ccmple, INS_ccmpg, + }; + + static_assert_no_msg(s_table[EJ_NONE] == INS_none); + static_assert_no_msg(s_table[EJ_jmp] == INS_none); + static_assert_no_msg(s_table[EJ_jo] == INS_ccmpo); + static_assert_no_msg(s_table[EJ_jno] == INS_ccmpno); + static_assert_no_msg(s_table[EJ_jb] == INS_ccmpb); + static_assert_no_msg(s_table[EJ_jae] == INS_ccmpae); + static_assert_no_msg(s_table[EJ_je] == INS_ccmpe); + static_assert_no_msg(s_table[EJ_jne] == INS_ccmpne); + static_assert_no_msg(s_table[EJ_jbe] == INS_ccmpbe); + static_assert_no_msg(s_table[EJ_ja] == INS_ccmpa); + static_assert_no_msg(s_table[EJ_js] == INS_ccmps); + static_assert_no_msg(s_table[EJ_jns] == INS_ccmpns); + static_assert_no_msg(s_table[EJ_jp] == INS_none); + static_assert_no_msg(s_table[EJ_jnp] == INS_none); + static_assert_no_msg(s_table[EJ_jl] == INS_ccmpl); + static_assert_no_msg(s_table[EJ_jge] == INS_ccmpge); + static_assert_no_msg(s_table[EJ_jle] == INS_ccmple); + static_assert_no_msg(s_table[EJ_jg] == INS_ccmpg); + + assert((condition >= EJ_NONE) && (condition < EJ_COUNT)); + return s_table[condition]; +} + //------------------------------------------------------------------------ // genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node. // @@ -1668,7 +1705,7 @@ void CodeGen::genCodeForSelect(GenTreeOp* select) } // clang-format off -const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] +const GenConditionDesc GenConditionDesc::map[32] { { }, // NONE { }, // 1 @@ -1782,7 +1819,7 @@ void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock, bool isRemovableJ // void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) { - assert(tree->OperGet() == GT_RETURNTRAP); + assert(tree->OperIs(GT_RETURNTRAP)); // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC // based on the contents of 'data' @@ -1816,7 +1853,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) { regNumber targetReg; #if !defined(TARGET_64BIT) - if (treeNode->TypeGet() == TYP_LONG) + if (treeNode->TypeIs(TYP_LONG)) { // All long enregistered nodes will have been decomposed into their // constituent lo and hi nodes. @@ -1859,11 +1896,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) switch (treeNode->gtOper) { -#ifndef JIT32_GCENCODER case GT_START_NONGC: GetEmitter()->emitDisableGC(); break; -#endif // !defined(JIT32_GCENCODER) case GT_START_PREEMPTGC: // Kill callee saves GC registers, and create a label @@ -2013,6 +2048,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; #endif // SWIFT_SUPPORT + case GT_RETURN_SUSPEND: + genReturnSuspend(treeNode->AsUnOp()); + break; + case GT_LEA: // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. genLeaInstruction(treeNode->AsAddrMode()); @@ -2197,29 +2236,42 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genConsumeReg(treeNode); break; + case GT_ASYNC_CONTINUATION: + genCodeForAsyncContinuation(treeNode); + break; + #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: + { + // Find the eh table entry via the eh ID + // + unsigned const ehID = (unsigned)treeNode->AsVal()->gtVal1; + assert(ehID < compiler->compEHID); + assert(compiler->m_EHIDtoEHblkDsc != nullptr); + + EHblkDsc* HBtab = nullptr; + bool found = compiler->m_EHIDtoEHblkDsc->Lookup(ehID, &HBtab); + assert(found); + assert(HBtab != nullptr); // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates: // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var - - size_t finallyNesting; - finallyNesting = treeNode->AsVal()->gtVal1; - noway_assert(treeNode->AsVal()->gtVal1 < compiler->compHndBBtabCount); + // + const size_t finallyNesting = HBtab->ebdHandlerNestingLevel; noway_assert(finallyNesting < compiler->compHndBBtabCount); // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) unsigned filterEndOffsetSlotOffs; - PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > TARGET_POINTER_SIZE); // below doesn't - // underflow. + assert(compiler->lvaLclStackHomeSize(compiler->lvaShadowSPslotsVar) > TARGET_POINTER_SIZE); filterEndOffsetSlotOffs = - (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); + (unsigned)(compiler->lvaLclStackHomeSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); size_t curNestingSlotOffs; curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE); GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, (unsigned)curNestingSlotOffs, 0); break; + } #endif // FEATURE_EH_WINDOWS_X86 case GT_PINVOKE_PROLOG: @@ -2260,6 +2312,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) // Do nothing; these nodes are simply markers for debug info. break; +#if defined(TARGET_AMD64) + case GT_CCMP: + genCodeForCCMP(treeNode->AsCCMP()); + break; +#endif + default: { #ifdef DEBUG @@ -2848,7 +2906,7 @@ void CodeGen::genCodeForMemmove(GenTreeBlk* tree) // void CodeGen::genLclHeap(GenTree* tree) { - assert(tree->OperGet() == GT_LCLHEAP); + assert(tree->OperIs(GT_LCLHEAP)); assert(compiler->compLocallocUsed); GenTree* size = tree->AsOp()->gtOp1; @@ -3117,9 +3175,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) { case GenTreeBlk::BlkOpKindCpObjRepInstr: case GenTreeBlk::BlkOpKindCpObjUnroll: -#ifndef JIT32_GCENCODER assert(!storeBlkNode->gtBlkOpGcUnsafe); -#endif genCodeForCpObj(storeBlkNode->AsBlk()); break; @@ -3129,9 +3185,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) break; case GenTreeBlk::BlkOpKindRepInstr: -#ifndef JIT32_GCENCODER assert(!storeBlkNode->gtBlkOpGcUnsafe); -#endif if (isCopyBlk) { genCodeForCpBlkRepMovs(storeBlkNode); @@ -3145,12 +3199,10 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) case GenTreeBlk::BlkOpKindUnroll: if (isCopyBlk) { -#ifndef JIT32_GCENCODER if (storeBlkNode->gtBlkOpGcUnsafe) { GetEmitter()->emitDisableGC(); } -#endif if (storeBlkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) { genCodeForCpBlkUnroll(storeBlkNode); @@ -3160,18 +3212,14 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) assert(storeBlkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnrollMemmove); genCodeForMemmove(storeBlkNode); } -#ifndef JIT32_GCENCODER if (storeBlkNode->gtBlkOpGcUnsafe) { GetEmitter()->emitEnableGC(); } -#endif } else { -#ifndef JIT32_GCENCODER assert(!storeBlkNode->gtBlkOpGcUnsafe); -#endif genCodeForInitBlkUnroll(storeBlkNode); } break; @@ -3254,7 +3302,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) // INITBLK zeroes a struct that contains GC pointers and can be observed by // other threads (i.e. when dstAddr is not an address of a local). // For example, this can happen when initializing a struct field of an object. - const bool canUse16BytesSimdMov = !node->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported(); + const bool canUse16BytesSimdMov = !node->IsOnHeapAndContainsReferences(); const bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES); if (!src->isContained()) @@ -3485,7 +3533,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) } } -#ifdef FEATURE_PUT_STRUCT_ARG_STK // Generate code for a load from some address + offset // base: tree node which can be either a local or an indir // offset: distance from the "base" location from which to load @@ -3502,7 +3549,6 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst GetEmitter()->emitIns_R_AR(ins, size, dst, base->AsIndir()->Addr()->GetRegNum(), offset); } } -#endif // FEATURE_PUT_STRUCT_ARG_STK //---------------------------------------------------------------------------------- // genCodeForCpBlkUnroll - Generate unrolled block copy code. @@ -3790,7 +3836,6 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode) instGen(INS_r_movsb); } -#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area // @@ -3979,7 +4024,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) // this probably needs to be changed. // Load - genCodeForLoadOffset(INS_movdqu, EA_16BYTE, xmmTmpReg, src, offset); + genCodeForLoadOffset(INS_movdqu32, EA_16BYTE, xmmTmpReg, src, offset); // Store genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset); @@ -4213,13 +4258,14 @@ void CodeGen::genClearStackVec3ArgUpperBits() { // Assume that for x64 linux, an argument is fully in registers // or fully on stack. - regNumber argReg = varDsc->GetOtherArgReg(); + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(varNum); + assert((abiInfo.NumSegments == 2) && !abiInfo.HasAnyStackSegment()); + regNumber argReg = abiInfo.Segment(1).GetRegister(); genSimd12UpperClear(argReg); } } } #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) -#endif // FEATURE_PUT_STRUCT_ARG_STK // // genCodeForCpObj - Generate code for CpObj nodes to copy structs that have interleaved @@ -4255,7 +4301,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // This is because these registers are incremented as we go through the struct. if (!source->IsLocal()) { - assert(source->gtOper == GT_IND); + assert(source->OperIs(GT_IND)); GenTree* srcAddr = source->gtGetOp1(); srcAddrType = srcAddr->TypeGet(); @@ -4670,7 +4716,7 @@ void CodeGen::genRangeCheck(GenTree* oper) #endif // DEBUG GetEmitter()->emitInsBinary(cmpKind, emitTypeSize(bndsChkType), src1, src2); - genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB); + genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind); } //--------------------------------------------------------------------- @@ -4800,17 +4846,16 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) // tree - the bit shift node (that specifies the type of bit shift to perform). // // Assumptions: -// a) All GenTrees are register allocated. -// b) The shift-by-amount in tree->AsOp()->gtOp2 is either a contained constant or -// it's a register-allocated expression. If it is in a register that is -// not RCX, it will be moved to RCX (so RCX better not be in use!). +// The shift-by-amount in tree->AsOp()->gtOp2 is either a contained constant or it's a +// register-allocated expression. If not using BMI2 instructions and op2 is in a register +// that is not RCX, it will be moved to RCX (so RCX better not be in use!). // void CodeGen::genCodeForShift(GenTree* tree) { // Only the non-RMW case here. assert(tree->OperIsShiftOrRotate()); - assert(tree->AsOp()->gtOp1->isUsedFromReg()); assert(tree->GetRegNum() != REG_NA); + assert(tree->AsOp()->gtOp1->isUsedFromReg() || compiler->compIsaSupportedDebugOnly(InstructionSet_BMI2)); genConsumeOperands(tree->AsOp()); @@ -4821,12 +4866,13 @@ void CodeGen::genCodeForShift(GenTree* tree) regNumber operandReg = operand->GetRegNum(); GenTree* shiftBy = tree->gtGetOp2(); + emitAttr size = emitTypeSize(tree); if (shiftBy->isContainedIntOrIImmed()) { - emitAttr size = emitTypeSize(tree); + assert(tree->OperIsRotate() || (operandReg != REG_NA)); - bool mightOptimizeLsh = tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags(); + bool mightOptimizeLsh = tree->OperIs(GT_LSH) && !tree->gtSetFlags(); // Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg" if (mightOptimizeLsh && shiftBy->IsIntegralConst(1)) @@ -4840,14 +4886,14 @@ void CodeGen::genCodeForShift(GenTree* tree) GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0); } } - // Optimize "X<<2" to "lea [reg*4]" - we only do this when the dst and src registers are different since it will - // remove a 'mov'. + // Optimize "X<<2" to "lea [reg*4]" + // We only do this when the dst and src registers are different since it will remove a 'mov'. else if (mightOptimizeLsh && shiftBy->IsIntegralConst(2) && tree->GetRegNum() != operandReg) { GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), REG_NA, operandReg, 4, 0); } - // Optimize "X<<3" to "lea [reg*8]" - we only do this when the dst and src registers are different since it will - // remove a 'mov'. + // Optimize "X<<3" to "lea [reg*8]" + // We only do this when the dst and src registers are different since it will remove a 'mov'. else if (mightOptimizeLsh && shiftBy->IsIntegralConst(3) && tree->GetRegNum() != operandReg) { GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), REG_NA, operandReg, 8, 0); @@ -4856,53 +4902,56 @@ void CodeGen::genCodeForShift(GenTree* tree) { int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); -#if defined(TARGET_64BIT) - // Try to emit rorx if BMI2 is available instead of mov+rol - // it makes sense only for 64bit integers - if ((genActualType(targetType) == TYP_LONG) && (tree->GetRegNum() != operandReg) && - compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) && - (shiftByValue > 0) && (shiftByValue < 64)) + if (tree->OperIsRotate() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && + !tree->gtSetFlags()) { - const int value = tree->OperIs(GT_ROL) ? (64 - shiftByValue) : shiftByValue; - GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value); - genProduceReg(tree); - return; + // If we have a contained source operand, we must emit rorx. + // We may also use rorx for 64bit values when a mov would otherwise be required, + // because rorx is smaller than mov+rol/ror when REX prefix is included. + + if ((operandReg == REG_NA) || ((varTypeIsLong(targetType) && (tree->GetRegNum() != operandReg)))) + { + // There is no 'rolx', so for rol, we use rorx with the shift value adjusted. + if (tree->OperIs(GT_ROL)) + { + shiftByValue &= (size * BITS_PER_BYTE - 1); + shiftByValue = (size * BITS_PER_BYTE - shiftByValue); + } + + inst_RV_TT_IV(INS_rorx, size, tree->GetRegNum(), operand, shiftByValue, INS_OPTS_NONE); + genProduceReg(tree); + return; + } } -#endif + ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue); GetEmitter()->emitIns_BASE_R_R_I(ins, emitTypeSize(tree), tree->GetRegNum(), operandReg, shiftByValue); genProduceReg(tree); return; } } -#if defined(TARGET_64BIT) - else if (tree->OperIsShift() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2)) + else if (tree->OperIsShift() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && + !tree->gtSetFlags()) { - // Try to emit shlx, sarx, shrx if BMI2 is available instead of mov+shl, mov+sar, mov+shr. + // Emit shlx, sarx, shrx if BMI2 is available instead of mov+shl, mov+sar, mov+shr. switch (tree->OperGet()) { case GT_LSH: ins = INS_shlx; break; - case GT_RSH: ins = INS_sarx; break; - case GT_RSZ: ins = INS_shrx; break; - default: unreached(); } - regNumber shiftByReg = shiftBy->GetRegNum(); - emitAttr size = emitTypeSize(tree); - // The order of operandReg and shiftByReg are swapped to follow shlx, sarx and shrx encoding spec. - GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), shiftByReg, operandReg); + // The order of operand and shiftBy are swapped to follow shlx, sarx and shrx encoding spec. + inst_RV_RV_TT(ins, size, tree->GetRegNum(), shiftBy->GetRegNum(), operand, /*isRMW*/ false, INS_OPTS_NONE); } -#endif else { // We must have the number of bits to shift stored in ECX, since we constrained this node to @@ -4943,7 +4992,7 @@ void CodeGen::genCodeForShiftLong(GenTree* tree) assert(oper == GT_LSH_HI || oper == GT_RSH_LO); GenTree* operand = tree->AsOp()->gtOp1; - assert(operand->OperGet() == GT_LONG); + assert(operand->OperIs(GT_LONG)); assert(operand->AsOp()->gtOp1->isUsedFromReg()); assert(operand->AsOp()->gtOp2->isUsedFromReg()); @@ -5171,7 +5220,7 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) { #if defined(FEATURE_SIMD) && defined(TARGET_X86) // Loading of TYP_SIMD12 (i.e. Vector3) variable - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genLoadLclTypeSimd12(tree); return; @@ -5394,7 +5443,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) #ifdef TARGET_64BIT // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index // is a native int on a 64-bit platform, we will need to widen the array length and then compare. - if (index->TypeGet() == TYP_I_IMPL) + if (index->TypeIs(TYP_I_IMPL)) { GetEmitter()->emitIns_R_AR(INS_mov, EA_4BYTE, tmpReg, baseReg, static_cast(node->gtLenOffset)); GetEmitter()->emitIns_R_R(INS_cmp, EA_8BYTE, indexReg, tmpReg); @@ -5405,11 +5454,11 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) GetEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, indexReg, baseReg, static_cast(node->gtLenOffset)); } - genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL, node->gtIndRngFailBB); + genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL); } #ifdef TARGET_64BIT - if (index->TypeGet() != TYP_I_IMPL) + if (!index->TypeIs(TYP_I_IMPL)) { // LEA needs 64-bit operands so we need to widen the index if it's TYP_INT. GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, tmpReg, indexReg, /* canSkip */ false); @@ -5464,7 +5513,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) #ifdef FEATURE_SIMD // Handling of Vector3 type values loaded through indirection. - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genLoadIndTypeSimd12(tree); return; @@ -5508,7 +5557,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) #ifdef FEATURE_SIMD // Storing Vector3 of size 12 bytes through indirection - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { genStoreIndTypeSimd12(tree); return; @@ -5653,6 +5702,22 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) if (data->OperIs(GT_BSWAP, GT_BSWAP16)) { ins = INS_movbe; +#ifdef TARGET_AMD64 + bool needsEvex = false; + if (GetEmitter()->IsExtendedGPReg(data->gtGetOp1()->GetRegNum())) + { + needsEvex = true; + } + else if (tree->HasBase() && GetEmitter()->IsExtendedGPReg(tree->Base()->GetRegNum())) + { + needsEvex = true; + } + else if (tree->HasIndex() && GetEmitter()->IsExtendedGPReg(tree->Index()->GetRegNum())) + { + needsEvex = true; + } + ins = needsEvex ? INS_movbe_apx : INS_movbe; +#endif // TARGET_AMD64 } #if defined(FEATURE_HW_INTRINSICS) else if (data->OperIsHWIntrinsic()) @@ -5666,16 +5731,23 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) case NI_Vector128_ToScalar: case NI_Vector256_ToScalar: case NI_Vector512_ToScalar: - case NI_SSE2_ConvertToInt32: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_X64_ConvertToUInt64: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_X64_ConvertToUInt64: case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: { // These intrinsics are "ins reg/mem, xmm" - ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); attr = emitActualTypeSize(baseType); +#if defined(TARGET_X86) + if (varTypeIsLong(baseType)) + { + ins = INS_movq; + attr = EA_8BYTE; + } +#endif // TARGET_X86 break; } @@ -5685,23 +5757,19 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) FALLTHROUGH; } - case NI_SSE2_Extract: + case NI_X86Base_Extract: case NI_SSE41_Extract: case NI_SSE41_X64_Extract: case NI_AVX_ExtractVector128: case NI_AVX2_ExtractVector128: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: { // These intrinsics are "ins reg/mem, xmm, imm8" - ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(hwintrinsic->GetSimdSize())); - if (intrinsicId == NI_SSE2_Extract) + if (intrinsicId == NI_X86Base_Extract) { // The encoding that supports containment is SSE4.1 only ins = INS_pextrw_sse41; @@ -5720,62 +5788,38 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) break; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { assert(!varTypeIsFloating(baseType)); FALLTHROUGH; } - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: { // These intrinsics are "ins reg/mem, xmm" - ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(hwintrinsic->GetSimdSize())); break; } @@ -5978,7 +6022,7 @@ void CodeGen::genCall(GenTreeCall* call) genCallPlaceRegArgs(call); -#if defined(TARGET_X86) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_X86) // The call will pop its arguments. // for each putarg_stk: target_ssize_t stackArgBytes = 0; @@ -5992,14 +6036,12 @@ void CodeGen::genCall(GenTreeCall* call) stackArgBytes += argSize; #ifdef DEBUG - assert(argSize == arg.AbiInfo.ByteSize); -#ifdef FEATURE_PUT_STRUCT_ARG_STK + assert(argSize == arg.AbiInfo.StackBytesConsumed()); if (source->TypeIs(TYP_STRUCT) && !source->OperIs(GT_FIELD_LIST)) { unsigned loadSize = source->GetLayout(compiler)->GetSize(); assert(argSize == roundUp(loadSize, TARGET_POINTER_SIZE)); } -#endif // FEATURE_PUT_STRUCT_ARG_STK #endif // DEBUG } } @@ -6237,6 +6279,8 @@ void CodeGen::genCall(GenTreeCall* call) // void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackArgBytes)) { + EmitCallParams params; + #if defined(TARGET_X86) // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will // adjust its stack level accordingly. @@ -6247,60 +6291,73 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA { argSizeForEmitter = -stackArgBytes; } + params.argSize = argSizeForEmitter; #endif // defined(TARGET_X86) // Determine return value size(s). - const ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - emitAttr retSize = EA_PTRSIZE; - emitAttr secondRetSize = EA_UNKNOWN; + const ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); // unused values are of no interest to GC. if (!call->IsUnusedValue()) { if (call->HasMultiRegRetVal()) { - retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0)); - secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1)); + params.retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0)); + params.secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1)); + + if (retTypeDesc->GetABIReturnReg(1, call->GetUnmanagedCallConv()) == REG_INTRET) + { + // If the second return register is REG_INTRET, then the first + // return is expected to be in a SIMD register. + // The emitter has hardcoded belief that params.retSize corresponds to + // REG_INTRET and secondRetSize to REG_INTRET_1, so fix up the + // situation here. + assert(!EA_IS_GCREF_OR_BYREF(params.retSize)); + params.retSize = params.secondRetSize; + params.secondRetSize = EA_UNKNOWN; + } } else { assert(!varTypeIsStruct(call)); - if (call->gtType == TYP_REF) + if (call->TypeIs(TYP_REF)) { - retSize = EA_GCREF; + params.retSize = EA_GCREF; } - else if (call->gtType == TYP_BYREF) + else if (call->TypeIs(TYP_BYREF)) { - retSize = EA_BYREF; + params.retSize = EA_BYREF; } } } + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); + // We need to propagate the IL offset information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, // so we skip this hash table lookup logic in that case. - DebugInfo di; - if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall()) { + DebugInfo di; (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di); + params.debugInfo = di; } - CORINFO_SIG_INFO* sigInfo = nullptr; #ifdef DEBUG // Pass the call signature information down into the emitter so the emitter can associate // native call sites with the signatures they were generated from. if (!call->IsHelperCall()) { - sigInfo = call->callSig; + params.sigInfo = call->callSig; } #endif // DEBUG - CORINFO_METHOD_HANDLE methHnd; - GenTree* target = getCallTarget(call, &methHnd); + GenTree* target = getCallTarget(call, ¶ms.methHnd); + if (target != nullptr) { #ifdef TARGET_X86 @@ -6317,7 +6374,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA assert(compiler->virtualStubParamInfo->GetReg() == REG_VIRTUAL_STUB_TARGET); assert(target->isContainedIndir()); - assert(target->OperGet() == GT_IND); + assert(target->OperIs(GT_IND)); GenTree* addr = target->AsIndir()->Addr(); assert(addr->isUsedFromReg()); @@ -6327,19 +6384,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA GetEmitter()->emitIns_Nop(3); - // clang-format off - GetEmitter()->emitIns_Call(emitter::EC_INDIR_ARD, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, - argSizeForEmitter, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - di, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0); - // clang-format on + params.callType = EC_INDIR_ARD; + params.ireg = REG_VIRTUAL_STUB_TARGET; + genEmitCallWithCurrentGC(params); } else #endif @@ -6356,18 +6403,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA // contained only if it can be encoded as PC-relative offset. assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler)); - // clang-format off - genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue() - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - REG_NA, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_FUNC_TOKEN_INDIR; + params.addr = (void*)target->AsIndir()->Base()->AsIntConCommon()->IconValue(); + genEmitCallWithCurrentGC(params); } else { @@ -6378,17 +6416,21 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA genConsumeAddress(target->AsIndir()->Addr()); } - // clang-format off - genEmitCallIndir(emitter::EC_INDIR_ARD, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - target->AsIndir() - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - call->IsFastTailCall()); - // clang-format on + GenTreeIndir* indir = target->AsIndir(); + regNumber iReg = indir->HasBase() ? indir->Base()->GetRegNum() : REG_NA; + regNumber xReg = indir->HasIndex() ? indir->Index()->GetRegNum() : REG_NA; + + // These should have been put in volatile registers to ensure they do not + // get overridden by epilog sequence during tailcall. + assert(!params.isJump || (iReg == REG_NA) || ((RBM_CALLEE_TRASH & genRegMask(iReg)) != 0)); + assert(!params.isJump || (xReg == REG_NA) || ((RBM_CALLEE_TRASH & genRegMask(xReg)) != 0)); + + params.callType = EC_INDIR_ARD; + params.ireg = iReg; + params.xreg = xReg; + params.xmul = indir->Scale(); + params.disp = indir->Offset(); + genEmitCallWithCurrentGC(params); } } else @@ -6406,18 +6448,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA genConsumeReg(target); } - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr // addr - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - target->GetRegNum(), - call->IsFastTailCall()); - // clang-format on + params.callType = EC_INDIR_R; + params.ireg = target->GetRegNum(); + genEmitCallWithCurrentGC(params); } else { @@ -6428,19 +6461,11 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA GetEmitter()->emitIns_Data16(); GetEmitter()->emitIns_Data16(); - // clang-format off - genEmitCall(emitter::EC_FUNC_TOKEN, - (CORINFO_METHOD_HANDLE)1, - INDEBUG_LDISASM_COMMA(sigInfo) - (void*)tlsGetAddr->AsIntCon()->gtIconVal // addr - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - target->GetRegNum(), - call->IsFastTailCall(), - true); // noSafePoint - // clang-format on + params.callType = EC_FUNC_TOKEN; + params.methHnd = (CORINFO_METHOD_HANDLE)1; + params.addr = (void*)tlsGetAddr->AsIntCon()->gtIconVal; + params.noSafePoint = true; + genEmitCallWithCurrentGC(params); } } } @@ -6456,39 +6481,16 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA regNumber indirCellReg = getCallIndirectionCellReg(call); if (indirCellReg != REG_NA) { - // clang-format off - GetEmitter()->emitIns_Call( - emitter::EC_INDIR_ARD, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, - 0, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - di, indirCellReg, REG_NA, 0, 0, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_INDIR_ARD; + params.ireg = indirCellReg; + genEmitCallWithCurrentGC(params); } #ifdef FEATURE_READYTORUN else if (call->gtEntryPoint.addr != nullptr) { - emitter::EmitCallType type = - (call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR; - // clang-format off - genEmitCall(type, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - (void*)call->gtEntryPoint.addr - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - REG_NA, - call->IsFastTailCall()); - // clang-format on + params.callType = (call->gtEntryPoint.accessType == IAT_VALUE) ? EC_FUNC_TOKEN : EC_FUNC_TOKEN_INDIR; + params.addr = (void*)call->gtEntryPoint.addr; + genEmitCallWithCurrentGC(params); } #endif else @@ -6500,7 +6502,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA if (call->IsHelperCall()) { // Direct call to a helper method. - CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(params.methHnd); noway_assert(helperNum != CORINFO_HELP_UNDEF); void* pAddr = nullptr; @@ -6517,18 +6519,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA // Non-virtual direct calls to known addresses - // clang-format off - genEmitCall(emitter::EC_FUNC_TOKEN, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - addr - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - di, - REG_NA, - call->IsFastTailCall()); - // clang-format on + params.callType = EC_FUNC_TOKEN; + params.addr = addr; + genEmitCallWithCurrentGC(params); } } } @@ -6980,10 +6973,10 @@ GenTree* CodeGen::genTryFindFlagsConsumer(GenTree* producer, GenCondition** cond // void CodeGen::genLongToIntCast(GenTree* cast) { - assert(cast->OperGet() == GT_CAST); + assert(cast->OperIs(GT_CAST)); GenTree* src = cast->gtGetOp1(); - noway_assert(src->OperGet() == GT_LONG); + noway_assert(src->OperIs(GT_LONG)); genConsumeRegs(src); @@ -7215,7 +7208,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) void CodeGen::genFloatToFloatCast(GenTree* treeNode) { // float <--> double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); + assert(treeNode->OperIs(GT_CAST)); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->GetRegNum(); @@ -7235,11 +7228,16 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode) assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); genConsumeOperands(treeNode->AsOp()); - if (srcType == dstType && (op1->isUsedFromReg() && (targetReg == op1->GetRegNum()))) + if (srcType == dstType) { - // source and destinations types are the same and also reside in the same register. - // we just need to consume and produce the reg in this case. - ; + if (op1->isUsedFromReg()) + { + GetEmitter()->emitIns_Mov(INS_movaps, EA_16BYTE, targetReg, op1->GetRegNum(), /* canSkip */ true); + } + else + { + inst_RV_TT(ins_Move_Extend(dstType, /* srcInReg */ false), emitTypeSize(dstType), targetReg, op1); + } } else { @@ -7286,7 +7284,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) #endif var_types dstType = treeNode->CastToType(); - var_types srcType = op1->TypeGet(); + var_types srcType = genActualType(op1->TypeGet()); assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we @@ -7301,15 +7299,6 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) srcType = TYP_I_IMPL; } - // At this point, we should not see a srcType that is not int or long. - // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect - // either the front-end or lowering phase to have generated two levels of cast. - // The first one is for widening smaller int type to int32 and the second one is - // to the float/double. - // On 32-bit, we expect morph to replace long to float/double casts with helper calls, - // so we should only see int here. - noway_assert(varTypeIsIntOrI(srcType)); - // To convert integral type to floating, the cvt[u]si2ss/sd instruction is used // which does a partial write to lower 4/8 bytes of xmm register keeping the other // upper bytes unmodified. If "cvt[u]si2ss/sd xmmReg, r32/r64" occurs inside a loop, @@ -7353,9 +7342,11 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) // addsd xmm0, xmm0 //.LABEL // - regNumber argReg = treeNode->gtGetOp1()->GetRegNum(); - regNumber tmpReg1 = internalRegisters.Extract(treeNode); + regNumber argReg = treeNode->gtGetOp1()->GetRegNum(); + // Get the APXIncompatible register first regNumber tmpReg2 = internalRegisters.Extract(treeNode); + // tmpReg1 can be EGPR + regNumber tmpReg1 = internalRegisters.Extract(treeNode); inst_Mov(TYP_LONG, tmpReg1, argReg, /* canSkip */ false, EA_8BYTE); inst_RV_SH(INS_shr, EA_8BYTE, tmpReg1, 1); @@ -7433,7 +7424,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); // We shouldn't be seeing uint64 here as it should have been converted - // into a helper call by either front-end or lowering phase, unless we have AVX512F + // into a helper call by either front-end or lowering phase, unless we have AVX512 // accelerated conversions. assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || compiler->canUseEvexEncodingDebugOnly()); @@ -7471,7 +7462,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // void CodeGen::genCkfinite(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_CKFINITE); + assert(treeNode->OperIs(GT_CKFINITE)); GenTree* op1 = treeNode->AsOp()->gtOp1; var_types targetType = treeNode->TypeGet(); @@ -7772,7 +7763,7 @@ void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE41)); // ii) treeNode oper is a GT_INTRINSIC - assert(treeNode->OperGet() == GT_INTRINSIC); + assert(treeNode->OperIs(GT_INTRINSIC)); GenTree* srcNode = treeNode->gtGetOp1(); @@ -7785,7 +7776,7 @@ void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) genConsumeOperands(treeNode); - instruction ins = (treeNode->TypeGet() == TYP_FLOAT) ? INS_roundss : INS_roundsd; + instruction ins = treeNode->TypeIs(TYP_FLOAT) ? INS_roundss : INS_roundsd; emitAttr size = emitTypeSize(treeNode); regNumber dstReg = treeNode->GetRegNum(); @@ -7855,7 +7846,7 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode) genConsumeOperands(treeNode->AsOp()); - const instruction ins = (treeNode->TypeGet() == TYP_FLOAT) ? INS_sqrtss : INS_sqrtsd; + const instruction ins = treeNode->TypeIs(TYP_FLOAT) ? INS_sqrtss : INS_sqrtsd; regNumber targetReg = treeNode->GetRegNum(); bool isRMW = !compiler->canUseVexEncoding(); @@ -7910,7 +7901,7 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode) // unsigned CodeGen::getBaseVarForPutArgStk(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_PUTARG_STK); + assert(treeNode->OperIs(GT_PUTARG_STK)); unsigned baseVarNum; @@ -7921,25 +7912,6 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTree* treeNode) { // See the note in the function header re: finding the first stack passed argument. baseVarNum = getFirstArgWithStackSlot(); - assert(baseVarNum != BAD_VAR_NUM); - -#ifdef DEBUG - // This must be a fast tail call. - assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); - - // Since it is a fast tail call, the existence of first incoming arg is guaranteed - // because fast tail call requires that in-coming arg area of caller is >= out-going - // arg area required for tail call. - LclVarDsc* varDsc = compiler->lvaGetDesc(baseVarNum); - assert(varDsc != nullptr); - -#ifdef UNIX_AMD64_ABI - assert(!varDsc->lvIsRegArg && varDsc->GetArgReg() == REG_STK); -#else // !UNIX_AMD64_ABI - // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0. - assert(varDsc->lvIsRegArg && (varDsc->GetArgReg() == REG_ARG_0 || varDsc->GetArgReg() == REG_FLTARG_0)); -#endif // !UNIX_AMD64_ABI -#endif // !DEBUG } else { @@ -8387,8 +8359,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) { unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk); -#ifdef UNIX_AMD64_ABI - if (data->OperIs(GT_FIELD_LIST)) { genPutArgStkFieldList(putArgStk, baseVarNum); @@ -8402,7 +8372,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) m_stkArgVarNum = BAD_VAR_NUM; return; } -#endif // UNIX_AMD64_ABI noway_assert(targetType != TYP_STRUCT); @@ -8414,7 +8383,8 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) #ifdef DEBUG CallArg* callArg = putArgStk->gtCall->gtArgs.FindByNode(putArgStk); assert(callArg != nullptr); - assert(argOffset == callArg->AbiInfo.ByteOffset); + assert(callArg->AbiInfo.HasExactlyOneStackSegment()); + assert(argOffset == callArg->AbiInfo.Segment(0).GetStackOffset()); #endif if (data->isContainedIntOrIImmed()) @@ -8504,7 +8474,6 @@ void CodeGen::genPushReg(var_types type, regNumber srcReg) } #endif // TARGET_X86 -#if defined(FEATURE_PUT_STRUCT_ARG_STK) // genStoreRegToStackArg: Store a register value into the stack argument area // // Arguments: @@ -8535,7 +8504,7 @@ void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset if (type == TYP_STRUCT) { - ins = INS_movdqu; + ins = INS_movdqu32; // This should be changed! attr = EA_16BYTE; size = 16; @@ -8644,7 +8613,6 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) unreached(); } } -#endif // defined(FEATURE_PUT_STRUCT_ARG_STK) /***************************************************************************** * @@ -8865,15 +8833,14 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg) { - void* addr = nullptr; void* pAddr = nullptr; - emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; - addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); - regNumber callTarget = REG_NA; - regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + EmitCallParams params; + params.callType = EC_FUNC_TOKEN; + params.addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - if (!addr) + if (params.addr == nullptr) { assert(pAddr != nullptr); @@ -8884,8 +8851,8 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr)) { // generate call whose target is specified by 32-bit offset relative to PC or zero. - callType = emitter::EC_FUNC_TOKEN_INDIR; - addr = pAddr; + params.callType = EC_FUNC_TOKEN_INDIR; + params.addr = pAddr; } else { @@ -8913,32 +8880,99 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, } #endif - callTarget = callTargetReg; - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, callTarget, (ssize_t)pAddr); - callType = emitter::EC_INDIR_ARD; - } - } - - // clang-format off - GetEmitter()->emitIns_Call(callType, - compiler->eeFindHelper(helper), - INDEBUG_LDISASM_COMMA(nullptr) addr, - argSize, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - DebugInfo(), - callTarget, // ireg - REG_NA, 0, 0, // xreg, xmul, disp - false // isJump - ); - // clang-format on + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, callTargetReg, (ssize_t)pAddr); + + params.ireg = callTargetReg; + params.callType = EC_INDIR_ARD; + } + } + + params.methHnd = compiler->eeFindHelper(helper); + params.argSize = argSize; + params.retSize = retSize; + genEmitCallWithCurrentGC(params); regSet.verifyRegistersUsed(killMask); } +//----------------------------------------------------------------------------------------- +// OptsFromCFlags - Convert condition flags into approxpriate insOpts. +// +// Arguments: +// flags - The condition flags to be converted. +// +// Return Value: +// An insOpts value encoding the condition flags. +// +// Notes: +// This function maps the condition flags (e.g., CF, ZF, SF, OF) to the appropriate +// instruction options used for setting the default flag values in extneded EVEX +// encoding conditional instructions. +// +insOpts CodeGen::OptsFromCFlags(insCflags flags) +{ + unsigned opts = 0x0; + if (flags & INS_FLAGS_CF) + opts |= INS_OPTS_EVEX_dfv_cf; + if (flags & INS_FLAGS_ZF) + opts |= INS_OPTS_EVEX_dfv_zf; + if (flags & INS_FLAGS_SF) + opts |= INS_OPTS_EVEX_dfv_sf; + if (flags & INS_FLAGS_OF) + opts |= INS_OPTS_EVEX_dfv_of; + return (insOpts)opts; +} + +#ifdef TARGET_AMD64 + +//----------------------------------------------------------------------------------------- +// genCodeForCCMP - Generate code for a conditional compare (CCMP) node. +// +// Arguments: +// ccmp - The GenTreeCCMP node representing the conditional compare. +// +// Return Value: +// None. +// +// Notes: +// This function generates code for a conditional compare operation. On X86, +// comparisons using the extended EVEX encoding and ccmp instruction. +void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp) +{ + emitter* emit = GetEmitter(); + assert(emit->UsePromotedEVEXEncoding()); + + genConsumeOperands(ccmp); + GenTree* op1 = ccmp->gtGetOp1(); + GenTree* op2 = ccmp->gtGetOp2(); + var_types op1Type = genActualType(op1->TypeGet()); + var_types op2Type = genActualType(op2->TypeGet()); + emitAttr cmpSize = emitActualTypeSize(op1Type); + regNumber srcReg1 = op1->GetRegNum(); + + // No float support or swapping op1 and op2 to generate cmp reg, imm. + assert(!varTypeIsFloating(op2Type)); + assert(!op1->isContainedIntOrIImmed()); + + // For the ccmp flags, invert the condition of the compare. + // For the condition, use the previous compare. + const GenConditionDesc& condDesc = GenConditionDesc::Get(ccmp->gtCondition); + instruction ccmpIns = JumpKindToCcmp(condDesc.jumpKind1); + insOpts opts = OptsFromCFlags(ccmp->gtFlagsVal); + + if (op2->isContainedIntOrIImmed()) + { + GenTreeIntConCommon* intConst = op2->AsIntConCommon(); + emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts); + } + else + { + regNumber srcReg2 = op2->GetRegNum(); + emit->emitIns_R_R(ccmpIns, cmpSize, srcReg1, srcReg2, opts); + } +} +#endif // TARGET_AMD64 + #if defined(DEBUG) && defined(TARGET_AMD64) /***************************************************************************** @@ -9157,10 +9191,6 @@ void CodeGen::genAmd64EmitterUnitTestsApx() theEmitter->emitIns_R_R_I(INS_shld, EA_4BYTE, REG_EAX, REG_ECX, 5); theEmitter->emitIns_R_R_I(INS_shrd, EA_2BYTE, REG_EAX, REG_ECX, 5); - // TODO-XArch-apx: S_R_I path only accepts SEE or VEX instructions, - // so I assuem shld/shrd will not be taking the first argument from stack. - // theEmitter->emitIns_S_R_I(INS_shld, EA_2BYTE, 1, 2, REG_EAX, 5); - // theEmitter->emitIns_S_R_I(INS_shrd, EA_2BYTE, 1, 2, REG_EAX, 5); theEmitter->emitIns_AR_R(INS_cmpxchg, EA_2BYTE, REG_EAX, REG_EDX, 2); @@ -9295,8 +9325,6 @@ void CodeGen::genAmd64EmitterUnitTestsApx() theEmitter->emitIns_R(INS_shl_1, EA_2BYTE, REG_R11, INS_OPTS_EVEX_nf); theEmitter->emitIns_R_I(INS_shl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf); theEmitter->emitIns_R_I(INS_shl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf); - theEmitter->emitIns_R_I(INS_rcr_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf); - theEmitter->emitIns_R_I(INS_rcl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf); theEmitter->emitIns_R_R(INS_imul, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); theEmitter->emitIns_R_S(INS_imul, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); @@ -9330,6 +9358,49 @@ void CodeGen::genAmd64EmitterUnitTestsApx() theEmitter->emitIns_BASE_R_R(INS_inc, EA_4BYTE, REG_R11, REG_R12); theEmitter->emitIns_BASE_R_R_I(INS_add, EA_4BYTE, REG_R11, REG_R12, 5); + + // testing for EGPR encodings. + GenTreePhysReg eGPR(REG_R16); + eGPR.SetRegNum(REG_R16); + GenTreeIndir loadGPR = indirForm(TYP_SIMD32, &eGPR); + + // // SIMD instructions + // // In most of the cases, EGPR will only be used as BASE/INDEX registers in SIMD instructions. + theEmitter->emitIns_R_R_A(INS_addps, EA_32BYTE, REG_XMM16, REG_XMM16, &loadGPR); + + // // Legacy instructions + theEmitter->emitIns_R_ARX(INS_add, EA_4BYTE, REG_R16, REG_R17, REG_R18, 1, 0); + + theEmitter->emitIns_AR_R(INS_movnti64, EA_8BYTE, REG_R17, REG_R16, 10); + theEmitter->emitIns_R_R_R(INS_andn, EA_8BYTE, REG_R17, REG_R16, REG_R18); + + theEmitter->emitIns_Mov(INS_kmovb_gpr, EA_4BYTE, REG_R16, REG_K0, false); + theEmitter->emitIns_Mov(INS_kmovb_msk, EA_4BYTE, REG_K5, REG_K0, false); + theEmitter->emitIns_Mov(INS_kmovw_gpr, EA_4BYTE, REG_R16, REG_K0, false); + theEmitter->emitIns_Mov(INS_kmovw_msk, EA_4BYTE, REG_K5, REG_K0, false); + theEmitter->emitIns_Mov(INS_kmovd_gpr, EA_4BYTE, REG_R16, REG_K0, false); + theEmitter->emitIns_Mov(INS_kmovd_msk, EA_4BYTE, REG_K5, REG_K0, false); + theEmitter->emitIns_Mov(INS_kmovq_gpr, EA_8BYTE, REG_R16, REG_K0, false); + theEmitter->emitIns_Mov(INS_kmovq_msk, EA_8BYTE, REG_K5, REG_K0, false); + + theEmitter->emitIns_R_R(INS_crc32_apx, EA_1BYTE, REG_R16, REG_R17); + theEmitter->emitIns_R_R(INS_crc32_apx, EA_2BYTE, REG_R16, REG_R17); + theEmitter->emitIns_R_R(INS_crc32_apx, EA_8BYTE, REG_R16, REG_R17); + theEmitter->emitIns_R_A(INS_crc32_apx, EA_8BYTE, REG_R18, &loadGPR); + theEmitter->emitIns_R_S(INS_crc32_apx, EA_8BYTE, REG_R18, 0, 0); + + // Note that BZHI has a reversed src operands due to special handling at import. + theEmitter->emitIns_R_R_R(INS_bzhi, EA_4BYTE, REG_R16, REG_R18, REG_R17); + theEmitter->emitIns_R_R_R(INS_bzhi, EA_8BYTE, REG_R16, REG_R18, REG_R17); + theEmitter->emitIns_R_R_R(INS_mulx, EA_4BYTE, REG_R16, REG_R18, REG_R17); + theEmitter->emitIns_R_R_R(INS_mulx, EA_8BYTE, REG_R16, REG_R18, REG_R17); + theEmitter->emitIns_R_R_R(INS_pdep, EA_4BYTE, REG_R16, REG_R18, REG_R17); + theEmitter->emitIns_R_R_R(INS_pdep, EA_8BYTE, REG_R16, REG_R18, REG_R17); + theEmitter->emitIns_R_R_R(INS_pext, EA_4BYTE, REG_R16, REG_R18, REG_R17); + theEmitter->emitIns_R_R_R(INS_pext, EA_8BYTE, REG_R16, REG_R18, REG_R17); + + theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM0, false); + theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false); } void CodeGen::genAmd64EmitterUnitTestsAvx10v2() @@ -9479,10 +9550,88 @@ void CodeGen::genAmd64EmitterUnitTestsAvx10v2() theEmitter->emitIns_R_R(INS_vucomxss, EA_16BYTE, REG_XMM0, REG_XMM1); // VMOVD - theEmitter->emitIns_R_R(INS_vmovd, EA_16BYTE, REG_XMM0, REG_XMM1); + theEmitter->emitIns_R_R(INS_vmovd_simd, EA_16BYTE, REG_XMM0, REG_XMM1); // VMOVW - theEmitter->emitIns_R_R(INS_vmovw, EA_16BYTE, REG_XMM0, REG_XMM1); + theEmitter->emitIns_R_R(INS_vmovw_simd, EA_16BYTE, REG_XMM0, REG_XMM1); +} + +/***************************************************************************** + * Unit tests for the CCMP instructions. + */ + +void CodeGen::genAmd64EmitterUnitTestsCCMP() +{ + emitter* theEmitter = GetEmitter(); + genDefineTempLabel(genCreateTempLabel()); + + // ============ + // Test RR form + // ============ + + // Test all sizes + theEmitter->emitIns_R_R(INS_ccmpe, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_ccmpe, EA_8BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_ccmpe, EA_2BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_ccmpe, EA_1BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + + // Test all CC codes + for (uint32_t ins = INS_FIRST_CCMP_INSTRUCTION + 1; ins < INS_LAST_CCMP_INSTRUCTION; ins++) + { + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + } + + // Test all dfv + for (int i = 0; i < 16; i++) + { + theEmitter->emitIns_R_R(INS_ccmpe, EA_4BYTE, REG_RAX, REG_RCX, (insOpts)(i << INS_OPTS_EVEX_dfv_byte_offset)); + } + + // ============ + // Test RS form + // ============ + + // Test all sizes + theEmitter->emitIns_R_S(INS_ccmpe, EA_4BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_S(INS_ccmpe, EA_8BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_S(INS_ccmpe, EA_2BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_S(INS_ccmpe, EA_1BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + + // Test all CC codes + for (uint32_t ins = INS_FIRST_CCMP_INSTRUCTION + 1; ins < INS_LAST_CCMP_INSTRUCTION; ins++) + { + theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + } + + // Test all dfv + for (int i = 0; i < 16; i++) + { + theEmitter->emitIns_R_S(INS_ccmpe, EA_4BYTE, REG_RAX, 0, 0, (insOpts)(i << INS_OPTS_EVEX_dfv_byte_offset)); + } + + // ============ + // Test RI form (test small and large sizes and constants) + // ============ + + theEmitter->emitIns_R_I(INS_ccmpe, EA_4BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_ccmpe, EA_4BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_ccmpe, EA_8BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_ccmpe, EA_8BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_ccmpe, EA_2BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_ccmpe, EA_2BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_ccmpe, EA_1BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_ccmpe, EA_1BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + // ============ + // Test RC form + // ============ + + CORINFO_FIELD_HANDLE hnd = theEmitter->emitFltOrDblConst(1.0f, EA_4BYTE); + theEmitter->emitIns_R_C(INS_ccmpe, EA_4BYTE, REG_RAX, hnd, 0, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_C(INS_ccmpe, EA_4BYTE, REG_RAX, hnd, 4, INS_OPTS_EVEX_dfv_cf); } #endif // defined(DEBUG) && defined(TARGET_AMD64) @@ -9552,9 +9701,6 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) 0, // argSize. Again, we have to lie about it EA_UNKNOWN); // retSize - // Check that we have place for the push. - assert(compiler->fgGetPtrArgCntMax() >= 1); - #if defined(UNIX_X86_ABI) // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall GetEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); @@ -9633,9 +9779,6 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #endif genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */); - // Check that we have place for the push. - assert(compiler->fgGetPtrArgCntMax() >= 1); - #if defined(UNIX_X86_ABI) // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall GetEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); @@ -9672,7 +9815,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) return; } -#if !defined(UNIX_AMD64_ABI) +#if defined(WINDOWS_AMD64_ABI) unsigned varNum; LclVarDsc* varDsc; @@ -9694,13 +9837,15 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { noway_assert(varDsc->lvIsParam); - if (!varDsc->lvIsRegArg) + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(varNum); + if (!abiInfo.HasExactlyOneRegisterSegment()) { + assert(!abiInfo.HasAnyRegisterSegment()); continue; } var_types storeType = varDsc->GetRegisterType(); - regNumber argReg = varDsc->GetArgReg(); + regNumber argReg = abiInfo.Segment(0).GetRegister(); instruction store_ins = ins_Store(storeType); @@ -9719,7 +9864,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) // RCX = ProfilerMethHnd if (compiler->compProfilerMethHndIndirected) { - // Profiler hooks enabled during Ngen time. + // Profiler hooks enabled during AOT. // Profiler handle needs to be accessed through an indirection of a pointer. GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); } @@ -9755,13 +9900,16 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { noway_assert(varDsc->lvIsParam); - if (!varDsc->lvIsRegArg) + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(varNum); + + if (!abiInfo.HasExactlyOneRegisterSegment()) { + assert(!abiInfo.HasAnyRegisterSegment()); continue; } var_types loadType = varDsc->GetRegisterType(); - regNumber argReg = varDsc->GetArgReg(); + regNumber argReg = abiInfo.Segment(0).GetRegister(); instruction load_ins = ins_Load(loadType); @@ -9795,7 +9943,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) // R14 = ProfilerMethHnd if (compiler->compProfilerMethHndIndirected) { - // Profiler hooks enabled during Ngen time. + // Profiler hooks enabled during AOT. // Profiler handle needs to be accessed through an indirection of a pointer. GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd); @@ -9877,7 +10025,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // RCX = ProfilerMethHnd if (compiler->compProfilerMethHndIndirected) { - // Profiler hooks enabled during Ngen time. + // Profiler hooks enabled during AOT. // Profiler handle needs to be accessed through an indirection of an address. GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); } @@ -10327,8 +10475,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) } #endif + genClearAvxStateInEpilog(); + // Restore float registers that were saved to stack before SP is modified. - genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize); + genRestoreCalleeSavedFltRegs(); #ifdef JIT32_GCENCODER // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after @@ -10414,10 +10564,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (frameSize > 0) { #ifdef TARGET_X86 - /* Add 'compiler->compLclFrameSize' to ESP */ - /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */ + // Add 'compiler->compLclFrameSize' to ESP. Use "pop ECX" for that, except in cases + // where ECX may contain some state. - if ((frameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed) + if ((frameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed && !compiler->compIsAsync()) { inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); regSet.verifyRegUsed(REG_ECX); @@ -10425,8 +10575,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) else #endif // TARGET_X86 { - /* Add 'compiler->compLclFrameSize' to ESP */ - /* Generate "add esp, " */ + // Add 'compiler->compLclFrameSize' to ESP + // Generate "add esp, " inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE); } } @@ -10504,7 +10654,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) // do nothing before popping the callee-saved registers } #ifdef TARGET_X86 - else if (compiler->compLclFrameSize == REGSIZE_BYTES) + else if ((compiler->compLclFrameSize == REGSIZE_BYTES) && !compiler->compJmpOpUsed && + !compiler->compIsAsync()) { // "pop ecx" will make ESP point to the callee-saved registers inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); @@ -10599,19 +10750,18 @@ void CodeGen::genFnEpilog(BasicBlock* block) GenTree* jmpNode = block->lastNode(); #if !FEATURE_FASTTAILCALL // x86 - noway_assert(jmpNode->gtOper == GT_JMP); + noway_assert(jmpNode->OperIs(GT_JMP)); #else // amd64 // If jmpNode is GT_JMP then gtNext must be null. // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. - noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); + noway_assert(!jmpNode->OperIs(GT_JMP) || (jmpNode->gtNext == nullptr)); // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp - noway_assert((jmpNode->gtOper == GT_JMP) || - ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); + noway_assert(jmpNode->OperIs(GT_JMP) || (jmpNode->OperIs(GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); // The next block is associated with this "if" stmt - if (jmpNode->gtOper == GT_JMP) + if (jmpNode->OperIs(GT_JMP)) #endif { // Simply emit a jump to the methodHnd. This is similar to a call so we can use @@ -10627,52 +10777,34 @@ void CodeGen::genFnEpilog(BasicBlock* block) // If we have IAT_PVALUE we might need to jump via register indirect, as sometimes the // indirection cell can't be reached by the jump. - emitter::EmitCallType callType; - void* addr; - regNumber indCallReg; + EmitCallParams params; + params.methHnd = methHnd; if (addrInfo.accessType == IAT_PVALUE) { if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)addrInfo.addr)) { // 32 bit displacement will work - callType = emitter::EC_FUNC_TOKEN_INDIR; - addr = addrInfo.addr; - indCallReg = REG_NA; + params.callType = EC_FUNC_TOKEN_INDIR; + params.addr = addrInfo.addr; } else { // 32 bit displacement won't work - callType = emitter::EC_INDIR_ARD; - indCallReg = REG_RAX; - addr = nullptr; - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); - regSet.verifyRegUsed(indCallReg); + params.callType = EC_INDIR_ARD; + params.ireg = REG_RAX; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_RAX, (ssize_t)addrInfo.addr); + regSet.verifyRegUsed(REG_RAX); } } else { - callType = emitter::EC_FUNC_TOKEN; - addr = addrInfo.addr; - indCallReg = REG_NA; - } - - // clang-format off - GetEmitter()->emitIns_Call(callType, - methHnd, - INDEBUG_LDISASM_COMMA(nullptr) - addr, - 0, // argSize - EA_UNKNOWN // retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - DebugInfo(), - indCallReg, REG_NA, 0, 0, /* ireg, xreg, xmul, disp */ - true /* isJump */ - ); - // clang-format on + params.callType = EC_FUNC_TOKEN; + params.addr = addrInfo.addr; + } + + params.isJump = true; + genEmitCallWithCurrentGC(params); } #if FEATURE_FASTTAILCALL else @@ -10697,10 +10829,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (fCalleePop) { - noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES); - stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; - - noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand + stkArgSize = compiler->lvaParameterStackSize; + noway_assert(stkArgSize < 0x10000); // "ret" only has 2 byte operand } #ifdef UNIX_X86_ABI @@ -10725,9 +10855,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) * * Funclets have the following incoming arguments: * - * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG) - * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG) - * finally/fault: rcx = InitialSP + * catch/filter-handler: rcx/rdi = the exception object that was caught (see GT_CATCH_ARG) + * filter: rcx/rdi = the exception object to filter (see GT_CATCH_ARG) + * finally/fault: none * * Funclets set the following registers on exit: * @@ -10737,34 +10867,13 @@ void CodeGen::genFnEpilog(BasicBlock* block) * * The AMD64 funclet prolog sequence is: * - * push ebp - * push callee-saved regs - * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use - * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for - * ; the entire function. - * sub sp, XXX ; Establish the rest of the frame. - * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned - * ; up to preserve stack alignment. If we push an odd number of registers, we also - * ; generate this, to keep the stack aligned. - * - * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested - * ; filters. - * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet - * ; epilog. - * ; Also, re-establish the frame pointer from the PSP. - * - * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the - * ; PSP of the dynamically containing funclet or function) - * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame - * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If - * ; Function_InitialSP_to_FP_delta==0, we don't need this - * ; instruction. + * sub sp, XXX ; Establish the frame. + * ; XXX is determined by lvaOutgoingArgSpaceSize, aligned up to preserve stack alignment. + * ; If we push an odd number of registers, we also generate this, to keep the stack aligned. * * The epilog sequence is then: * * add rsp, XXX - * pop callee-saved regs ; if necessary - * pop rbp * ret * * The funclet frame is thus: @@ -10776,15 +10885,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) * +=======================+ <---- Caller's SP * | Return address | * |-----------------------| - * | Saved EBP | - * |-----------------------| - * |Callee saved registers | - * |-----------------------| * ~ possible 8 byte pad ~ * ~ for alignment ~ * |-----------------------| - * | PSP slot | // Omitted in NativeAOT ABI - * |-----------------------| * | Outgoing arg space | // this only exists if the function makes a call * |-----------------------| <---- Initial SP * | | | @@ -10792,10 +10895,6 @@ void CodeGen::genFnEpilog(BasicBlock* block) * | | downward | * V * - * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this - * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64 - * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h - * "FRAMEPTR OFFSETS" for details. */ void CodeGen::genFuncletProlog(BasicBlock* block) @@ -10809,7 +10908,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) assert(!regSet.rsRegsModified(RBM_FPBASE)); assert(block != nullptr); - assert(block->HasFlag(BBF_FUNCLET_BEG)); + assert(compiler->bbIsFuncletBeg(block)); assert(isFramePointerUsed()); ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); @@ -10818,19 +10917,10 @@ void CodeGen::genFuncletProlog(BasicBlock* block) compiler->unwindBegProlog(); - // We need to push ebp, since it's callee-saved. - // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't - // keep track of that on a per-funclet basis, so we push the same set as in the main function. - // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else + // We do not need to push callee-saved registers. The runtime takes care of preserving them. + // We do not need to allocate fixed-size frame, since nothing else // is stored here (all temps are allocated in the parent frame). - // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same - // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same. - - inst_RV(INS_push, REG_FPBASE, TYP_REF); - compiler->unwindPush(REG_FPBASE); - - // Callee saved int registers are pushed to stack. - genPushCalleeSavedRegisters(); + // We do need to allocate the outgoing argument space, in case there are calls here. regMaskTP maskArgRegsLiveIn; if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) @@ -10842,38 +10932,13 @@ void CodeGen::genFuncletProlog(BasicBlock* block) maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2; } - regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed - bool initRegZeroed = false; - - genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn); - - // Callee saved float registers are copied to stack in their assigned stack slots - // after allocating space for them as part of funclet frame. - genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta); + bool initRegZeroed = false; + genAllocLclFrame(genFuncletInfo.fiSpDelta, REG_NA, &initRegZeroed, maskArgRegsLiveIn); // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - // If there is no PSPSym (NativeAOT ABI), we are done. - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - GetEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset); - - regSet.verifyRegUsed(REG_FPBASE); - - GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset); - - if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0) - { - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE, - genFuncletInfo.fiFunction_InitialSP_to_FP_delta); - } - - // We've modified EBP, but not really. Say that we haven't... - regSet.rsRemoveRegsModified(RBM_FPBASE); + genClearAvxStateInProlog(); } /***************************************************************************** @@ -10894,12 +10959,9 @@ void CodeGen::genFuncletEpilog() ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); - // Restore callee saved XMM regs from their stack slots before modifying SP - // to position at callee saved int regs. - genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta); + genClearAvxStateInEpilog(); + inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE); - genPopCalleeSavedRegisters(); - inst_RV(INS_pop, REG_EBP, TYP_I_IMPL); instGen_Return(0); } @@ -10921,13 +10983,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert(isFramePointerUsed()); assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be // finalized - assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized - - // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize, - // that's ok, because we're figuring out an offset in the parent frame. - genFuncletInfo.fiFunction_InitialSP_to_FP_delta = - compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame - // pointer. assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); #ifndef UNIX_AMD64_ABI @@ -10938,53 +10993,22 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() #endif // UNIX_AMD64_ABI unsigned offset = compiler->lvaOutgoingArgSpaceSize; - genFuncletInfo.fiPSP_slot_InitialSP_offset = offset; - // How much stack do we allocate in the funclet? // We need to 16-byte align the stack. - unsigned totalFrameSize = - REGSIZE_BYTES // return address - + REGSIZE_BYTES // pushed EBP - + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP - - // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement. - // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary. - unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES; - unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0; - - unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; - - totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs - + calleeFPRegsSavedSize // pushed callee-saved float regs - // below calculated 'pad' will go here - + PSPSymSize // PSPSym - + compiler->lvaOutgoingArgSpaceSize // outgoing arg space - ; + unsigned totalFrameSize = REGSIZE_BYTES // return address + + compiler->lvaOutgoingArgSpaceSize; unsigned pad = AlignmentPad(totalFrameSize, 16); - genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary - + calleeFPRegsSavedSize // Callee saved xmm regs - + pad + PSPSymSize // PSPSym - + compiler->lvaOutgoingArgSpaceSize // outgoing arg space - ; + genFuncletInfo.fiSpDelta = pad + compiler->lvaOutgoingArgSpaceSize; #ifdef DEBUG if (verbose) { printf("\n"); printf("Funclet prolog / epilog info\n"); - printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); - printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset); - } - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(genFuncletInfo.fiPSP_slot_InitialSP_offset == - compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! } #endif // DEBUG } @@ -11030,12 +11054,19 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - // TODO We may need EBP restore sequence here if we introduce PSPSym - #ifdef UNIX_X86_ABI // Add a padding for 16-byte alignment inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE); +#else + if (!compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + { + // Funclet prologs need to have at least 1 byte or the IL->Native mapping data will not + // include the first IL instruction in the funclet. + instGen(INS_nop); + } #endif + + genClearAvxStateInProlog(); } /***************************************************************************** @@ -11054,6 +11085,8 @@ void CodeGen::genFuncletEpilog() ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + genClearAvxStateInEpilog(); + #ifdef UNIX_X86_ABI // Revert a padding that was added for 16-byte alignment inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE); @@ -11077,35 +11110,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() #endif // TARGET_X86 -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - -#if defined(TARGET_AMD64) - - // The PSP sym value is Initial-SP, not Caller-SP! - // We assume that RSP is Initial-SP when this function is called. That is, the stack frame - // has been established. - // - // We generate: - // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym - - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0); - -#else // TARGET* - - NYI("Set function PSP sym"); - -#endif // TARGET* -} - //----------------------------------------------------------------------------- // genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case // `genUseBlockInit` is set. @@ -11257,29 +11261,45 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // movups xmmword ptr [ebp/esp-OFFS], xmm4 // ... // movups xmmword ptr [ebp/esp-OFFS], xmm4 - // mov qword ptr [ebp/esp-OFFS], rax - // + // NOTE: it implicitly zeroes YMM4 and ZMM4 as well. emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, zeroSIMDReg, zeroSIMDReg, zeroSIMDReg, INS_OPTS_NONE); - int i = 0; - if (maxSimdSize > XMM_REGSIZE_BYTES) + assert((blkSize % XMM_REGSIZE_BYTES) == 0); + + int regSize = (int)compiler->roundDownSIMDSize(blkSize); + int lenRemaining = blkSize; + while (lenRemaining > 0) { - for (; i <= blkSize - maxSimdSize; i += maxSimdSize) + // Overlap with the previously zeroed memory if we can clear the remainder + // with just single store. Example: say we have 112 bytes to clear: + // + // Option 1 (no overlapping): + // movups zmmword ptr [+0] + // movups ymmword ptr [+64] + // movups xmmword ptr [+96] + // + // Option 2 (overlapping): + // movups zmmword ptr [+0] + // movups zmmword ptr [+48] + // + if ((regSize > lenRemaining) && !isPow2(lenRemaining)) { - // We previously aligned data to 16 bytes which might not be aligned to maxSimdSize - emit->emitIns_AR_R(simdUnalignedMovIns(), EA_ATTR(maxSimdSize), zeroSIMDReg, frameReg, - alignedLclLo + i); + lenRemaining = regSize; } - // Remainder will be handled by the xmm loop below - } - for (; i < blkSize; i += XMM_REGSIZE_BYTES) - { - emit->emitIns_AR_R(simdMov, EA_ATTR(XMM_REGSIZE_BYTES), zeroSIMDReg, frameReg, alignedLclLo + i); - } + // Use the largest SIMD register size that fits in the remaining length + regSize = (int)compiler->roundDownSIMDSize(lenRemaining); + assert(regSize >= XMM_REGSIZE_BYTES); - assert(i == blkSize); + // frameReg is definitely not known to be 32B/64B aligned -> switch to unaligned movs + instruction ins = regSize > XMM_REGSIZE_BYTES ? simdUnalignedMovIns() : simdMov; + const int offset = blkSize - lenRemaining; + emit->emitIns_AR_R(ins, EA_ATTR(regSize), zeroSIMDReg, frameReg, alignedLclLo + offset); + + lenRemaining -= regSize; + } + assert(lenRemaining == 0); } else { @@ -11372,40 +11392,21 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE] // Here offset = 16-byte aligned offset after pushing integer registers. -// -// Params -// lclFrameSize - Fixed frame size excluding callee pushed int regs. -// non-funclet: this will be compLclFrameSize. -// funclet frames: this will be FuncletInfo.fiSpDelta. -void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize) +void CodeGen::genPreserveCalleeSavedFltRegs() { regMaskTP regMask = compiler->compCalleeFPRegsSavedMask; // Only callee saved floating point registers should be in regMask assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask); - if (GetEmitter()->ContainsCallNeedingVzeroupper() && !GetEmitter()->Contains256bitOrMoreAVX()) - { - // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states: - // Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean - // between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a - // VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX - // register) and before any call to an unknown function. - - // This method contains a call that needs vzeroupper but also doesn't use 256-bit or higher - // AVX itself. Thus we can optimize to only emitting a single vzeroupper in the function prologue - // This reduces the overall amount of codegen, particularly for more common paths not using any - // SIMD or floating-point. - - instGen(INS_vzeroupper); - } - // fast path return if (regMask == RBM_NONE) { return; } + unsigned lclFrameSize = compiler->compLclFrameSize; + #ifdef TARGET_AMD64 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0; unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES; @@ -11435,35 +11436,21 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize) // Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE] // Here offset = 16-byte aligned offset after pushing integer registers. -// -// Params -// lclFrameSize - Fixed frame size excluding callee pushed int regs. -// non-funclet: this will be compLclFrameSize. -// funclet frames: this will be FuncletInfo.fiSpDelta. -void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) +void CodeGen::genRestoreCalleeSavedFltRegs() { regMaskTP regMask = compiler->compCalleeFPRegsSavedMask; // Only callee saved floating point registers should be in regMask assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask); - if (GetEmitter()->Contains256bitOrMoreAVX()) - { - // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states: - // Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean - // between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a - // VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX - // register) and before any call to an unknown function. - - instGen(INS_vzeroupper); - } - // fast path return if (regMask == RBM_NONE) { return; } + unsigned lclFrameSize = compiler->compLclFrameSize; + #ifdef TARGET_AMD64 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0; instruction copyIns = ins_Copy(TYP_FLOAT); @@ -11505,6 +11492,45 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) } } +//----------------------------------------------------------------------------------- +// genClearAvxStateInProlog: Generate vzeroupper instruction to clear AVX state if necessary in a prolog +// +void CodeGen::genClearAvxStateInProlog() +{ + if (GetEmitter()->ContainsCallNeedingVzeroupper() && !GetEmitter()->Contains256bitOrMoreAVX()) + { + // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states: + // Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean + // between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a + // VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX + // register) and before any call to an unknown function. + + // This method contains a call that needs vzeroupper but also doesn't use 256-bit or higher + // AVX itself. Thus we can optimize to only emitting a single vzeroupper in the function prologue + // This reduces the overall amount of codegen, particularly for more common paths not using any + // SIMD or floating-point. + + instGen(INS_vzeroupper); + } +} + +//----------------------------------------------------------------------------------- +// genClearAvxStateInEpilog: Generate vzeroupper instruction to clear AVX state if necessary in an epilog +// +void CodeGen::genClearAvxStateInEpilog() +{ + if (GetEmitter()->Contains256bitOrMoreAVX()) + { + // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states: + // Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean + // between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a + // VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX + // register) and before any call to an unknown function. + + instGen(INS_vzeroupper); + } +} + //----------------------------------------------------------------------------------- // instGen_MemoryBarrier: Emit a MemoryBarrier instruction // @@ -11615,7 +11641,7 @@ bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr) // bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr) { - // If generating relocatable ngen code, then all code addr should go through relocation + // If generating relocatable AOT code, then all code addr should go through relocation if (compiler->opts.compReloc) { return true; @@ -11648,7 +11674,7 @@ bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr) // bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr) { - // If generating relocatable ngen code, then all code addr should go through relocation + // If generating relocatable AOT code, then all code addr should go through relocation if (compiler->opts.compReloc) { return true; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index cf703e7c099a..c08a6dda5eae 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -22,6 +22,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "stacklevelsetter.h" #include "patchpointinfo.h" #include "jitstd/algorithm.h" +#include "minipal/time.h" extern ICorJitHost* g_jitHost; @@ -296,40 +297,6 @@ Histogram computeReachabilitySetsIterationTable(computeReachabilitySetsIteration #endif // COUNT_BASIC_BLOCKS -/***************************************************************************** - * - * Used by optFindNaturalLoops to gather statistical information such as - * - total number of natural loops - * - number of loops with 1, 2, ... exit conditions - * - number of loops that have an iterator (for like) - * - number of loops that have a constant iterator - */ - -#if COUNT_LOOPS - -unsigned totalLoopMethods; // counts the total number of methods that have natural loops -unsigned maxLoopsPerMethod; // counts the maximum number of loops a method has -unsigned totalLoopCount; // counts the total number of natural loops -unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops -unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent -unsigned iterLoopCount; // counts the # of loops with an iterator (for like) -unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like) -bool hasMethodLoops; // flag to keep track if we already counted a method as having loops -unsigned loopsThisMethod; // counts the number of loops in the current method -bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method. - -/* Histogram for number of loops in a method */ - -unsigned loopCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0}; -Histogram loopCountTable(loopCountBuckets); - -/* Histogram for number of loop exits */ - -unsigned loopExitCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 0}; -Histogram loopExitCountTable(loopExitCountBuckets); - -#endif // COUNT_LOOPS - Compiler::Compiler(ArenaAllocator* arena, CORINFO_METHOD_HANDLE methodHnd, COMP_HANDLE compHnd, @@ -506,8 +473,9 @@ Compiler::Compiler(ArenaAllocator* arena, info.compILCodeSize = methodInfo->ILCodeSize; info.compILImportSize = 0; - info.compHasNextCallRetAddr = false; - info.compIsVarArgs = false; + info.compHasNextCallRetAddr = false; + info.compIsVarArgs = false; + info.compUsesAsyncContinuation = false; } //------------------------------------------------------------------------ @@ -657,7 +625,7 @@ bool Compiler::isNativePrimitiveStructType(CORINFO_CLASS_HANDLE clsHnd) // For vector calling conventions, a vector is considered a "primitive" // type, as it is passed in a single register. // -var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd, bool isVarArg) +var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd) { assert(structSize != 0); @@ -666,37 +634,32 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS // Start by determining if we have an HFA/HVA with a single element. if (GlobalJitOptions::compFeatureHfa) { - // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated - // as if they are not HFA types. - if (!(TargetArchitecture::IsArm64 && TargetOS::IsWindows && isVarArg)) + switch (structSize) { - switch (structSize) - { - case 4: - case 8: + case 4: + case 8: #ifdef TARGET_ARM64 - case 16: + case 16: #endif // TARGET_ARM64 + { + var_types hfaType = GetHfaType(clsHnd); + // We're only interested in the case where the struct size is equal to the size of the hfaType. + if (varTypeIsValidHfaType(hfaType)) { - var_types hfaType = GetHfaType(clsHnd); - // We're only interested in the case where the struct size is equal to the size of the hfaType. - if (varTypeIsValidHfaType(hfaType)) + if (genTypeSize(hfaType) == structSize) { - if (genTypeSize(hfaType) == structSize) - { - useType = hfaType; - } - else - { - return TYP_UNKNOWN; - } + useType = hfaType; + } + else + { + return TYP_UNKNOWN; } } } - if (useType != TYP_UNKNOWN) - { - return useType; - } + } + if (useType != TYP_UNKNOWN) + { + return useType; } } @@ -752,6 +715,7 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS } //----------------------------------------------------------------------------- +<<<<<<< HEAD // getArgTypeForStruct: // Get the type that is used to pass values of the given struct type. // If you have already retrieved the struct size then it should be @@ -981,6 +945,8 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, } //----------------------------------------------------------------------------- +======= +>>>>>>> upstream-jun // getReturnTypeForStruct: // Get the type that is used to return values of the given struct type. // If you have already retrieved the struct size then it should be @@ -1162,7 +1128,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // // The ABI for struct returns in varArg methods, is same as the normal case, // so pass false for isVararg - useType = getPrimitiveTypeForStruct(structSize, clsHnd, /*isVararg=*/false); + useType = getPrimitiveTypeForStruct(structSize, clsHnd); if (useType != TYP_UNKNOWN) { @@ -1792,32 +1758,6 @@ void Compiler::compShutdown() #endif // COUNT_BASIC_BLOCKS -#if COUNT_LOOPS - - jitprintf("\n"); - jitprintf("---------------------------------------------------\n"); - jitprintf("Loop stats\n"); - jitprintf("---------------------------------------------------\n"); - jitprintf("Total number of methods with loops is %5u\n", totalLoopMethods); - jitprintf("Total number of loops is %5u\n", totalLoopCount); - jitprintf("Maximum number of loops per method is %5u\n", maxLoopsPerMethod); - jitprintf("Total number of 'unnatural' loops is %5u\n", totalUnnatLoopCount); - jitprintf("# of methods overflowing unnat loop limit is %5u\n", totalUnnatLoopOverflows); - jitprintf("Total number of loops with an iterator is %5u\n", iterLoopCount); - jitprintf("Total number of loops with a constant iterator is %5u\n", constIterLoopCount); - - jitprintf("--------------------------------------------------\n"); - jitprintf("Loop count frequency table:\n"); - jitprintf("--------------------------------------------------\n"); - loopCountTable.dump(jitstdout()); - jitprintf("--------------------------------------------------\n"); - jitprintf("Loop exit count frequency table:\n"); - jitprintf("--------------------------------------------------\n"); - loopExitCountTable.dump(jitstdout()); - jitprintf("--------------------------------------------------\n"); - -#endif // COUNT_LOOPS - #if MEASURE_NODE_SIZE jitprintf("\n"); @@ -1951,7 +1891,7 @@ void Compiler::compShutdown() jitprintf(" NYI: %u\n", fatal_NYI); #endif // MEASURE_FATAL -#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC +#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC DumpOnShutdown::DumpAll(); #endif } @@ -2241,75 +2181,73 @@ void Compiler::compSetProcessor() opts.compSupportsISAReported.Reset(); opts.compSupportsISAExactly.Reset(); -// The VM will set the ISA flags depending on actual hardware support -// and any specified config switches specified by the user. The exception -// here is for certain "artificial ISAs" such as Vector64/128/256 where they -// don't actually exist. The JIT is in charge of adding those and ensuring -// the total sum of flags is still valid. + // The VM will set the ISA flags depending on actual hardware support and any + // config values specified by the user. Config may cause the VM to omit baseline + // ISAs from the supported set. We force their inclusion here so that JIT code + // can use them unconditionally, but we will honor the config when resolving + // managed HWIntrinsic methods. + // + // We also take care of adding the virtual vector ISAs (i.e. Vector64/128/256/512) + // here, based on the combination of hardware ISA support and config values. + #if defined(TARGET_XARCH) - // Get the preferred vector bitwidth, rounding down to the nearest multiple of 128-bits - uint32_t preferredVectorBitWidth = (ReinterpretHexAsDecimal(JitConfig.PreferredVectorBitWidth()) / 128) * 128; - uint32_t preferredVectorByteLength = preferredVectorBitWidth / 8; + // If the VM passed in a virtual vector ISA, it was done to communicate PreferredVectorBitWidth. + // No check is done for the validity of the value, since it will be clamped to max supported by + // hardware and config when queried. We will, therefore, remove the marker ISA and allow it to + // be re-added if appropriate based on the hardware ISA evaluations below. - if (instructionSetFlags.HasInstructionSet(InstructionSet_SSE)) + uint32_t preferredVectorBitWidth = 0; + if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector128)) { - instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); + instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector128); + preferredVectorBitWidth = 128; } - - if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX)) + else if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector256)) { - instructionSetFlags.AddInstructionSet(InstructionSet_Vector256); + instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector256); + preferredVectorBitWidth = 256; } - - if (instructionSetFlags.HasInstructionSet(InstructionSet_EVEX)) + else if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector512)) { - if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)) - { - // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL - // These have been shipped together historically and at the time of this writing - // there exists no hardware which doesn't support the entire feature set. To simplify - // the overall JIT implementation, we currently require the entire set of ISAs to be - // supported and disable AVX512 support otherwise. + instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector512); + preferredVectorBitWidth = 512; + } - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); + opts.preferredVectorByteLength = preferredVectorBitWidth / BITS_PER_BYTE; - instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); + // Only one marker ISA should have been passed in, and it should now be cleared. + assert(!instructionSetFlags.HasInstructionSet(InstructionSet_Vector128) && + !instructionSetFlags.HasInstructionSet(InstructionSet_Vector256) && + !instructionSetFlags.HasInstructionSet(InstructionSet_Vector512)); - if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING)) - { - // Some architectures can experience frequency throttling when - // executing 512-bit width instructions. To account for this we set the - // default preferred vector width to 256-bits in some scenarios. Power - // users can override this with `DOTNET_PreferredVectorBitWidth=512` to - // allow using such instructions where hardware support is available. - // - // Do not condition this based on stress mode as it makes the support - // reported inconsistent across methods and breaks expectations/functionality + // Ensure required baseline ISAs are supported in JIT code, even if not passed in by the VM. + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); +#ifdef TARGET_AMD64 + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base_X64); +#endif // TARGET_AMD64 - preferredVectorByteLength = 256 / 8; - } - } - else - { - // We shouldn't have EVEX enabled if neither AVX512 nor AVX10v1 are supported - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX10v1)); - } + // We can now add the virtual vector ISAs as appropriate. Vector128 is part of the required baseline. + instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); + + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX)) + { + instructionSetFlags.AddInstructionSet(InstructionSet_Vector256); } - opts.preferredVectorByteLength = preferredVectorByteLength; -#elif defined(TARGET_ARM64) - if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd)) + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512)) { - instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); - instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); + instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); } +#elif defined(TARGET_ARM64) + // Ensure required baseline ISAs are supported in JIT code, even if not passed in by the VM. + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase_Arm64); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd_Arm64); + + // Add virtual vector ISAs. These are both supported as part of the required baseline. + instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); + instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); #endif // TARGET_ARM64 assert(instructionSetFlags.Equals(EnsureInstructionSetFlagsAreValid(instructionSetFlags))); @@ -2553,7 +2491,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #ifdef DEBUG const JitConfigValues::MethodSet* pfAltJit; - if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_AOT)) { pfAltJit = &JitConfig.AltJitNgen(); } @@ -2579,7 +2517,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #else // !DEBUG const char* altJitVal; - if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_AOT)) { altJitVal = JitConfig.AltJitNgen().list(); } @@ -2812,6 +2750,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) fgPgoDisabled = true; } } +#endif // DEBUG // A successful result implies a non-NULL fgPgoSchema // @@ -2831,6 +2770,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags) break; } } + + // Stash pointers to PGO info on the context so + // we can access contextually it later. + // + compInlineContext->SetPgoInfo(PgoInfo(this)); } // A failed result implies a NULL fgPgoSchema @@ -2840,9 +2784,20 @@ void Compiler::compInitOptions(JitFlags* jitFlags) { assert(fgPgoSchema == nullptr); } -#endif // DEBUG } + bool enableInliningMethodsWithEH = JitConfig.JitInlineMethodsWithEH() > 0; + +#ifdef DEBUG + static ConfigMethodRange JitInlineMethodsWithEHRange; + JitInlineMethodsWithEHRange.EnsureInit(JitConfig.JitInlineMethodsWithEHRange()); + const unsigned hash = impInlineRoot()->info.compMethodHash(); + const bool inRange = JitInlineMethodsWithEHRange.Contains(hash); + enableInliningMethodsWithEH &= inRange; +#endif + + opts.compInlineMethodsWithEH = enableInliningMethodsWithEH; + if (compIsForInlining()) { return; @@ -3179,13 +3134,13 @@ void Compiler::compInitOptions(JitFlags* jitFlags) // Honour DOTNET_JitELTHookEnabled or STRESS_PROFILER_CALLBACKS stress mode // only if VM has not asked us to generate profiler hooks in the first place. // That is, override VM only if it hasn't asked for a profiler callback for this method. - // Don't run this stress mode when pre-JITing, as we would need to emit a relocation + // Don't run this stress mode under AOT, as we would need to emit a relocation // for the call to the fake ELT hook, which wouldn't make sense, as we can't store that - // in the pre-JIT image. + // in the AOT image. if (!compProfilerHookNeeded) { if ((JitConfig.JitELTHookEnabled() != 0) || - (!jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && compStressCompile(STRESS_PROFILER_CALLBACKS, 5))) + (!jitFlags->IsSet(JitFlags::JIT_FLAG_AOT) && compStressCompile(STRESS_PROFILER_CALLBACKS, 5))) { opts.compJitELTHookEnabled = true; } @@ -3301,7 +3256,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) if (opts.compProcedureSplitting) { - // Note that opts.compdbgCode is true under ngen for checked assemblies! + // Note that opts.compDbgCode is true under AOT for checked assemblies! opts.compProcedureSplitting = !opts.compDbgCode || enableFakeSplitting; #ifdef DEBUG @@ -3407,9 +3362,14 @@ void Compiler::compInitOptions(JitFlags* jitFlags) printf("OPTIONS: optimizer should use profile data\n"); } - if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_AOT)) { - printf("OPTIONS: Jit invoked for ngen\n"); + printf("OPTIONS: Jit invoked for AOT\n"); + } + + if (compIsAsync()) + { + printf("OPTIONS: compilation is an async state machine\n"); } } #endif @@ -3452,7 +3412,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) rbmAllInt |= RBM_HIGHINT; rbmIntCalleeTrash |= RBM_HIGHINT; cntCalleeTrashInt += CNT_CALLEE_TRASH_HIGHINT; - regIntLast = REG_R23; + regIntLast = REG_R31; } #endif // TARGET_AMD64 @@ -3486,15 +3446,15 @@ void Compiler::compInitOptions(JitFlags* jitFlags) bool Compiler::compJitHaltMethod() { - /* This method returns true when we use an INS_BREAKPOINT to allow us to step into the generated native code */ - /* Note that this these two "Jit" environment variables also work for ngen images */ + // This method returns true when we use an INS_BREAKPOINT to allow us to step into the generated native code. + // Note that these two "Jit" environment variables also work for AOT images. if (JitConfig.JitHalt().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args)) { return true; } - /* Use this Hash variant when there are a lot of method with the same name and different signatures */ + // Use this Hash variant when there are a lot of method with the same name and different signatures. unsigned fJitHashHaltVal = (unsigned)JitConfig.JitHashHalt(); if ((fJitHashHaltVal != (unsigned)-1) && (fJitHashHaltVal == info.compMethodHash())) @@ -3973,9 +3933,8 @@ void Compiler::compSetOptimizationLevel() { theMinOptsValue = true; } - // For PREJIT we never drop down to MinOpts - // unless unless CLFLG_MINOPT is set - else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + // For AOT we never drop down to MinOpts unless unless CLFLG_MINOPT is set + else if (!IsAot()) { if ((unsigned)JitConfig.JitMinOptsCodeSize() < info.compILCodeSize) { @@ -4015,10 +3974,9 @@ void Compiler::compSetOptimizationLevel() } } #else // !DEBUG - // Retail check if we should force Minopts due to the complexity of the method - // For PREJIT we never drop down to MinOpts - // unless unless CLFLG_MINOPT is set - if (!theMinOptsValue && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && + // Retail check if we should force Minopts due to the complexity of the method. + // For AOT we never drop down to MinOpts unless unless CLFLG_MINOPT is set. + if (!theMinOptsValue && !IsAot() && ((DEFAULT_MIN_OPTS_CODE_SIZE < info.compILCodeSize) || (DEFAULT_MIN_OPTS_INSTR_COUNT < opts.instrCount) || (DEFAULT_MIN_OPTS_BB_COUNT < fgBBcount) || (DEFAULT_MIN_OPTS_LV_NUM_COUNT < lvaCount) || (DEFAULT_MIN_OPTS_LV_REF_COUNT < opts.lvRefCount))) @@ -4088,14 +4046,13 @@ void Compiler::compSetOptimizationLevel() codeGen->setFrameRequired(true); #endif - if (opts.OptimizationDisabled() || - (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && !IsTargetAbi(CORINFO_NATIVEAOT_ABI))) + if (opts.OptimizationDisabled() || IsReadyToRun()) { - // The JIT doesn't currently support loop alignment for prejitted images outside NativeAOT. + // The JIT doesn't currently support loop alignment for AOT images outside NativeAOT. // (The JIT doesn't know the final address of the code, hence // it can't align code based on unknown addresses.) - codeGen->SetAlignLoops(false); // loop alignment not supported for prejitted code + codeGen->SetAlignLoops(false); // loop alignment not supported for AOT code } else { @@ -4128,8 +4085,8 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) JITDUMP("\n" "compRsvdRegCheck\n" " frame size = %6d\n" - " compArgSize = %6d\n", - frameSize, compArgSize); + " lvaParameterStackSize = %6d\n", + frameSize, lvaParameterStackSize); if (opts.MinOpts()) { @@ -4199,7 +4156,7 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) JITDUMP(" maxR11NegativeEncodingOffset = %6d\n", maxR11NegativeEncodingOffset); // -1 because otherwise we are computing the address just beyond the last argument, which we don't need to do. - unsigned maxR11PositiveOffset = compArgSize + (2 * REGSIZE_BYTES) - 1; + unsigned maxR11PositiveOffset = lvaParameterStackSize + (2 * REGSIZE_BYTES) - 1; JITDUMP(" maxR11PositiveOffset = %6d\n", maxR11PositiveOffset); // The value is positive, but represents a negative offset from R11. @@ -4230,8 +4187,8 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) JITDUMP(" maxSPPositiveEncodingOffset = %6d\n", maxSPPositiveEncodingOffset); // -1 because otherwise we are computing the address just beyond the last argument, which we don't need to do. - assert(compArgSize + frameSize > 0); - unsigned maxSPPositiveOffset = compArgSize + frameSize - 1; + assert(lvaParameterStackSize + frameSize > 0); + unsigned maxSPPositiveOffset = lvaParameterStackSize + frameSize - 1; if (codeGen->isFramePointerUsed()) { @@ -4916,6 +4873,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoopsPhase); + // Re-establish profile consistency, now that inlining and morph have run. + // + DoPhase(this, PHASE_REPAIR_PROFILE_POST_MORPH, &Compiler::fgRepairProfile); + // Scale block weights and mark run rarely blocks. // DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights); @@ -4977,6 +4938,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl bool doAssertionProp = true; bool doVNBasedIntrinExpansion = true; bool doRangeAnalysis = true; + bool doRangeCheckCloning = true; bool doVNBasedDeadStoreRemoval = true; #if defined(OPT_CONFIG) @@ -4989,6 +4951,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl doCse = doValueNum; doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0); doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0); + doRangeCheckCloning = doValueNum && doRangeAnalysis; doOptimizeIVs = doAssertionProp && (JitConfig.JitDoOptimizeIVs() != 0); doVNBasedDeadStoreRemoval = doValueNum && (JitConfig.JitDoVNBasedDeadStoreRemoval() != 0); doVNBasedIntrinExpansion = doValueNum; @@ -5101,6 +5064,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_VN_BASED_DEAD_STORE_REMOVAL, &Compiler::optVNBasedDeadStoreRemoval); } + if (doRangeCheckCloning) + { + // Clone blocks with subsequent bounds checks + // + DoPhase(this, PHASE_RANGE_CHECK_CLONING, &Compiler::optRangeCheckCloning); + } + if (doVNBasedIntrinExpansion) { // Expand some intrinsics based on VN data @@ -5206,13 +5176,17 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion); - // Optimize block order + // Conditional to switch conversion, and switch peeling // - DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout); + DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optRecognizeAndOptimizeSwitchJumps); - // Conditional to Switch conversion + // Run flow optimizations before reordering blocks // - DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optSwitchRecognition); + DoPhase(this, PHASE_OPTIMIZE_PRE_LAYOUT, &Compiler::optOptimizePreLayout); + + // Ensure profile is consistent before starting backend phases + // + DoPhase(this, PHASE_REPAIR_PROFILE_PRE_LAYOUT, &Compiler::fgRepairProfile); } #ifdef DEBUG @@ -5285,6 +5259,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl } #endif // TARGET_ARM + if (compIsAsync()) + { + DoPhase(this, PHASE_ASYNC, &Compiler::TransformAsync); + } + // Assign registers to variables, etc. // Create LinearScan before Lowering, so that Lowering can call LinearScan methods @@ -5302,6 +5281,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl stackLevelSetter.Run(); m_pLowering->FinalizeOutgoingArgSpace(); + FinalizeEH(); + // We can not add any new tracked variables after this point. lvaTrackedFixed = true; @@ -5320,40 +5301,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // We won't introduce new blocks from here on out, // so run the new block layout. // - if (JitConfig.JitDoReversePostOrderLayout()) - { - auto lateLayoutPhase = [this] { - // Skip preliminary reordering passes to create more work for 3-opt layout - if (compStressCompile(STRESS_THREE_OPT_LAYOUT, 10)) - { - m_dfsTree = fgComputeDfs(); - } - else - { - fgDoReversePostOrderLayout(); - fgMoveColdBlocks(); - } - - fgSearchImprovedLayout(); - fgInvalidateDfsTree(); - - if (compHndBBtabCount != 0) - { - fgRebuildEHRegions(); - } - - return PhaseStatus::MODIFIED_EVERYTHING; - }; - - DoPhase(this, PHASE_OPTIMIZE_LAYOUT, lateLayoutPhase); - } - else - { - // If we didn't run 3-opt, we might still have a profile-aware DFS tree computed during LSRA available. - // This tree's presence can trigger asserts if pre/postorder numbers are recomputed, - // so invalidate the tree either way. - fgInvalidateDfsTree(); - } + DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::fgSearchImprovedLayout); // Now that the flowgraph is finalized, run post-layout optimizations. // @@ -5458,6 +5406,100 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl #endif // FUNC_INFO_LOGGING } +//---------------------------------------------------------------------------------------------- +// FinalizeEH: Finalize EH information +// +void Compiler::FinalizeEH() +{ +#if defined(FEATURE_EH_WINDOWS_X86) + + // Grab space for exception handling info on the frame + // + if (!UsesFunclets() && ehNeedsShadowSPslots()) + { + // Recompute the handler nesting levels, as they may have changed. + // + unsigned const oldHandlerNestingCount = ehMaxHndNestingCount; + ehMaxHndNestingCount = 0; + + if (compHndBBtabCount > 0) + { + for (int XTnum = compHndBBtabCount - 1; XTnum >= 0; XTnum--) + { + EHblkDsc* const HBtab = &compHndBBtab[XTnum]; + unsigned const enclosingHndIndex = HBtab->ebdEnclosingHndIndex; + + if (enclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + EHblkDsc* const enclosingHBtab = &compHndBBtab[enclosingHndIndex]; + unsigned const newNestingLevel = enclosingHBtab->ebdHandlerNestingLevel + 1; + HBtab->ebdHandlerNestingLevel = (unsigned short)newNestingLevel; + + if (newNestingLevel > ehMaxHndNestingCount) + { + ehMaxHndNestingCount = newNestingLevel; + } + } + else + { + HBtab->ebdHandlerNestingLevel = 0; + } + } + + // When there is EH, we need to record nesting level + 1 + // + ehMaxHndNestingCount++; + } + + if (oldHandlerNestingCount != ehMaxHndNestingCount) + { + JITDUMP("Finalize EH: max handler nesting count now %u (was %u)\n", oldHandlerNestingCount, + ehMaxHndNestingCount); + } + + // The first slot is reserved for ICodeManager::FixContext(ppEndRegion) + // ie. the offset of the end-of-last-executed-filter + unsigned slotsNeeded = 1; + + unsigned handlerNestingLevel = ehMaxHndNestingCount; + + if (opts.compDbgEnC && (handlerNestingLevel < (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL)) + handlerNestingLevel = (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL; + + slotsNeeded += handlerNestingLevel; + + // For a filter (which can be active at the same time as a catch/finally handler) + slotsNeeded++; + // For zero-termination of the shadow-Stack-pointer chain + slotsNeeded++; + + lvaShadowSPslotsVar = lvaGrabTempWithImplicitUse(false DEBUGARG("lvaShadowSPslotsVar")); + lvaSetStruct(lvaShadowSPslotsVar, typGetBlkLayout(slotsNeeded * TARGET_POINTER_SIZE), false); + lvaSetVarAddrExposed(lvaShadowSPslotsVar DEBUGARG(AddressExposedReason::EXTERNALLY_VISIBLE_IMPLICITLY)); + } + + // Build up a mapping from EH IDs to EHblkDsc* + // + assert(m_EHIDtoEHblkDsc == nullptr); + + if (compHndBBtabCount > 0) + { + m_EHIDtoEHblkDsc = new (getAllocator()) EHIDtoEHblkDscMap(getAllocator()); + + for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++) + { + EHblkDsc* const HBtab = &compHndBBtab[XTnum]; + m_EHIDtoEHblkDsc->Set(HBtab->ebdID, HBtab); + } + } + +#endif // FEATURE_EH_WINDOWS_X86 + + // We should not make any more alterations to the EH table structure. + // + ehTableFinalized = true; +} + #if FEATURE_LOOP_ALIGN //------------------------------------------------------------------------ @@ -6218,6 +6260,7 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, if (enableAvailableIsas) { + CORINFO_InstructionSetFlags currentInstructionSetFlags = compileFlags->GetInstructionSetFlags(); CORINFO_InstructionSetFlags instructionSetFlags; // We need to assume, by default, that all flags coming from the VM are invalid. @@ -6231,15 +6274,17 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, // needing to have the hardware in question. #if defined(TARGET_ARM64) - if (JitConfig.EnableHWIntrinsic() != 0) + if (info.compMatchedVM) { - instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); + // Keep the existing VectorT* ISAs. + if (currentInstructionSetFlags.HasInstructionSet(InstructionSet_VectorT128)) + { + instructionSetFlags.AddInstructionSet(InstructionSet_VectorT128); + } } - if (JitConfig.EnableArm64AdvSimd() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); - } + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); if (JitConfig.EnableArm64Aes() != 0) { @@ -6285,40 +6330,38 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, { instructionSetFlags.AddInstructionSet(InstructionSet_Sve); } -#elif defined(TARGET_XARCH) - if (JitConfig.EnableHWIntrinsic() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); - } - if (JitConfig.EnableSSE() != 0) + if (JitConfig.EnableArm64Sve2() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_SSE); + instructionSetFlags.AddInstructionSet(InstructionSet_Sve2); } - - if (JitConfig.EnableSSE2() != 0) +#elif defined(TARGET_XARCH) + if (info.compMatchedVM) { - instructionSetFlags.AddInstructionSet(InstructionSet_SSE2); + // Keep the existing VectorT* ISAs. + if (currentInstructionSetFlags.HasInstructionSet(InstructionSet_VectorT128)) + { + instructionSetFlags.AddInstructionSet(InstructionSet_VectorT128); + } + if (currentInstructionSetFlags.HasInstructionSet(InstructionSet_VectorT256)) + { + instructionSetFlags.AddInstructionSet(InstructionSet_VectorT256); + } + if (currentInstructionSetFlags.HasInstructionSet(InstructionSet_VectorT512)) + { + instructionSetFlags.AddInstructionSet(InstructionSet_VectorT512); + } } - if ((JitConfig.EnableSSE3() != 0) && (JitConfig.EnableSSE3_4() != 0)) - { - instructionSetFlags.AddInstructionSet(InstructionSet_SSE3); - } + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); - if (JitConfig.EnableSSSE3() != 0) + if (JitConfig.EnableSSE42() != 0) { + instructionSetFlags.AddInstructionSet(InstructionSet_SSE3); instructionSetFlags.AddInstructionSet(InstructionSet_SSSE3); - } - - if (JitConfig.EnableSSE41() != 0) - { instructionSetFlags.AddInstructionSet(InstructionSet_SSE41); - } - - if (JitConfig.EnableSSE42() != 0) - { instructionSetFlags.AddInstructionSet(InstructionSet_SSE42); + instructionSetFlags.AddInstructionSet(InstructionSet_POPCNT); } if (JitConfig.EnableAVX() != 0) @@ -6329,122 +6372,104 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, if (JitConfig.EnableAVX2() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_AVX2); - } - - if (JitConfig.EnableAES() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AES); - } - - if (JitConfig.EnableBMI1() != 0) - { instructionSetFlags.AddInstructionSet(InstructionSet_BMI1); - } - - if (JitConfig.EnableBMI2() != 0) - { instructionSetFlags.AddInstructionSet(InstructionSet_BMI2); - } - - if (JitConfig.EnableFMA() != 0) - { instructionSetFlags.AddInstructionSet(InstructionSet_FMA); + instructionSetFlags.AddInstructionSet(InstructionSet_LZCNT); + instructionSetFlags.AddInstructionSet(InstructionSet_MOVBE); } - if (JitConfig.EnableGFNI() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_GFNI); - instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V256); - instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V512); - } - - if (JitConfig.EnableLZCNT() != 0) + if (JitConfig.EnableAVX512() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_LZCNT); + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512); } - if (JitConfig.EnablePCLMULQDQ() != 0) + if (JitConfig.EnableAVX512v2() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ); + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI); } - if (JitConfig.EnableVPCLMULQDQ() != 0) + if (JitConfig.EnableAVX512v3() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V256); - instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V512); + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512v3); } - if (JitConfig.EnablePOPCNT() != 0) + if (JitConfig.EnableAVX10v1() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_POPCNT); + instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v1); } - if (JitConfig.EnableAVXVNNI() != 0) + if (JitConfig.EnableAVX10v2() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI); + instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v2); } - if (JitConfig.EnableAVX512F() != 0) + if (JitConfig.EnableAPX() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F); - instructionSetFlags.AddInstructionSet(InstructionSet_EVEX); + instructionSetFlags.AddInstructionSet(InstructionSet_APX); } - if (JitConfig.EnableAVX512F_VL() != 0) + if (JitConfig.EnableAES() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F_VL); + instructionSetFlags.AddInstructionSet(InstructionSet_AES); + instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ); } - if (JitConfig.EnableAVX512BW() != 0) + if (JitConfig.EnableAVX512VP2INTERSECT() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW); + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VP2INTERSECT); } - if (JitConfig.EnableAVX512BW_VL() != 0) + if (JitConfig.EnableAVXIFMA() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW_VL); + instructionSetFlags.AddInstructionSet(InstructionSet_AVXIFMA); } - if (JitConfig.EnableAVX512CD() != 0) + if (JitConfig.EnableAVXVNNI() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD); + instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI); } - if (JitConfig.EnableAVX512CD_VL() != 0) + if (JitConfig.EnableGFNI() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD_VL); + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI); + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V256); + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V512); } - if (JitConfig.EnableAVX512DQ() != 0) + if (JitConfig.EnableSHA() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ); + instructionSetFlags.AddInstructionSet(InstructionSet_SHA); } - if (JitConfig.EnableAVX512DQ_VL() != 0) + if (JitConfig.EnableVAES() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ_VL); + instructionSetFlags.AddInstructionSet(InstructionSet_AES_V256); + instructionSetFlags.AddInstructionSet(InstructionSet_AES_V512); + instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V256); + instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V512); } - if (JitConfig.EnableAVX512VBMI() != 0) + if (JitConfig.EnableWAITPKG() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI); + instructionSetFlags.AddInstructionSet(InstructionSet_WAITPKG); } - if (JitConfig.EnableAVX512VBMI_VL() != 0) + if (JitConfig.EnableX86Serialize() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI_VL); + instructionSetFlags.AddInstructionSet(InstructionSet_X86Serialize); } +#elif defined(TARGET_RISCV64) + instructionSetFlags.AddInstructionSet(InstructionSet_RiscV64Base); - if (JitConfig.EnableAVX10v1() != 0) + if (JitConfig.EnableRiscV64Zba() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v1); - instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v1_V512); - instructionSetFlags.AddInstructionSet(InstructionSet_EVEX); + instructionSetFlags.AddInstructionSet(InstructionSet_Zba); } - if (JitConfig.EnableAPX() != 0) + if (JitConfig.EnableRiscV64Zbb() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_APX); + instructionSetFlags.AddInstructionSet(InstructionSet_Zbb); } #endif @@ -6889,11 +6914,11 @@ void Compiler::compCompileFinish() } #endif // TRACK_ENREG_STATS - // Only call _DbgBreakCheck when we are jitting, not when we are ngen-ing - // For ngen the int3 or breakpoint instruction will be right at the - // start of the ngen method and we will stop when we execute it. + // Only call _DbgBreakCheck when we are jitting, not when we are generating AOT code. + // For AOT the int3 or breakpoint instruction will be right at the + // start of the AOT method and we will stop when we execute it. // - if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (!IsAot()) { if (compJitHaltMethod()) { @@ -7162,9 +7187,9 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE); - if (!compIsForInlining() && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (!compIsForInlining() && IsAot()) { - // We're prejitting the root method. We also will analyze it as + // We're AOT compiling the root method. We also will analyze it as // a potential inline candidate. InlineResult prejitResult(this, info.compMethodHnd, "prejit"); @@ -8011,12 +8036,7 @@ int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd, pParam->pPrevComp = JitTls::GetCompiler(); JitTls::SetCompiler(pParam->pComp); - // PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here -#if defined(_PREFAST_) || defined(_PREFIX_) - PREFIX_ASSUME(pParam->pComp != NULL); -#else assert(pParam->pComp != nullptr); -#endif #ifdef DEBUG pParam->pComp->jitFallbackCompile = pParam->jitFallbackCompile; @@ -8344,7 +8364,7 @@ Compiler::NodeToIntMap* Compiler::FindReachableNodesInNodeTestData() TestLabelAndNum tlAndN; // For call nodes, translate late args to what they stand for. - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { GenTreeCall* call = tree->AsCall(); unsigned i = 0; @@ -8465,7 +8485,7 @@ void Compiler::compCallArgStats() { for (GenTree* const call : stmt->TreeList()) { - if (call->gtOper != GT_CALL) + if (!call->OperIs(GT_CALL)) continue; argNum = regArgNum = regArgDeferred = regArgTemp = regArgConst = regArgLclVar = argDWordNum = @@ -9434,16 +9454,7 @@ void Compiler::PrintPerMethodLoopHoistStats() void Compiler::RecordStateAtEndOfInlining() { #if defined(DEBUG) - - m_compCyclesAtEndOfInlining = 0; - m_compTickCountAtEndOfInlining = 0; - bool b = CycleTimer::GetThreadCyclesS(&m_compCyclesAtEndOfInlining); - if (!b) - { - return; // We don't have a thread cycle counter. - } - m_compTickCountAtEndOfInlining = GetTickCount(); - + m_compCyclesAtEndOfInlining = minipal_hires_ticks(); #endif // defined(DEBUG) } @@ -9454,19 +9465,16 @@ void Compiler::RecordStateAtEndOfInlining() void Compiler::RecordStateAtEndOfCompilation() { #if defined(DEBUG) - - // Common portion m_compCycles = 0; - uint64_t compCyclesAtEnd; - bool b = CycleTimer::GetThreadCyclesS(&compCyclesAtEnd); - if (!b) + + int64_t lpCycles = minipal_hires_ticks(); + if (lpCycles > m_compCyclesAtEndOfInlining) { - return; // We don't have a thread cycle counter. + int64_t lpFreq = minipal_hires_tick_frequency(); + m_compCycles = lpCycles - m_compCyclesAtEndOfInlining; + m_compCycles *= 1000000; + m_compCycles /= lpFreq; } - assert(compCyclesAtEnd >= m_compCyclesAtEndOfInlining); - - m_compCycles = compCyclesAtEnd - m_compCyclesAtEndOfInlining; - #endif // defined(DEBUG) } @@ -9569,6 +9577,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX * The following don't require a Compiler* to work: * dRegMask : Display a regMaskTP (call dspRegMask(mask)). * dBlockList : Display a BasicBlockList*. + * dIsa : Display a CORINFO_InstructionSet + * dIsaFlags : Display a CORINFO_InstructionSetFlags * * The following find an object in the IR and return it, as well as setting a global variable with the value that can * be used in the debugger (e.g., in the watch window, or as a way to get an address for data breakpoints). @@ -9652,6 +9662,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Functions which don't require a Compiler* #pragma comment(linker, "/include:dRegMask") #pragma comment(linker, "/include:dBlockList") +#pragma comment(linker, "/include:dIsa") +#pragma comment(linker, "/include:dIsaFlags") // Functions which search for objects in the IR #pragma comment(linker, "/include:dFindTreeInTree") @@ -10536,6 +10548,41 @@ JITDBGAPI void __cdecl dBlockList(BasicBlockList* list) printf("\n"); } +JITDBGAPI void __cdecl dIsa(const CORINFO_InstructionSet isa) +{ + static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called + printf("===================================================================== dIsa %u\n", sequenceNumber++); + printf("%s\n", InstructionSetToString(isa)); +} + +JITDBGAPI void __cdecl dIsaFlags(const CORINFO_InstructionSetFlags& isaFlags) +{ + static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called + printf("===================================================================== dIsaFlags %u\n", sequenceNumber++); + if (isaFlags.IsEmpty()) + { + printf("\n"); + } + else + { + bool first = true; + // The flags start at '1'. We don't know the last flag, so compute the maximum. + // It would be better if CORINFO_InstructionSet defined InstructionSet_FIRST and InstructionSet_LAST, + // or even better, if CORINFO_InstructionSetFlags defined an iterator over the instruction sets in the flags. + CORINFO_InstructionSet isaFirst = (CORINFO_InstructionSet)1; + CORINFO_InstructionSet isaLast = (CORINFO_InstructionSet)(sizeof(CORINFO_InstructionSetFlags) * 8 - 1); + for (CORINFO_InstructionSet isa = isaFirst; isa <= isaLast; isa = (CORINFO_InstructionSet)((int)isa + 1)) + { + if (isaFlags.HasInstructionSet(isa)) + { + printf("%s%s", first ? "" : " ", InstructionSetToString(isa)); + first = false; + } + } + } + printf("\n"); +} + // Global variables available in debug mode. That are set by debug APIs for finding // Trees, Stmts, and/or Blocks using id or bbNum. // That can be used in watch window or as a way to get address of fields for data breakpoints. @@ -10862,15 +10909,19 @@ const char* Compiler::devirtualizationDetailToString(CORINFO_DEVIRTUALIZATION_DE // const char* Compiler::printfAlloc(const char* format, ...) { - char str[512]; va_list args; va_start(args, format); - int result = vsprintf_s(str, ArrLen(str), format, args); + int count = _vscprintf(format, args); + va_end(args); + + assert(count >= 0); + char* resultStr = new (this, CMK_DebugOnly) char[count + 1]; + + va_start(args, format); + int result = vsprintf_s(resultStr, count + 1, format, args); va_end(args); - assert((result >= 0) && ((unsigned)result < ArrLen(str))); - char* resultStr = new (this, CMK_DebugOnly) char[result + 1]; - memcpy(resultStr, str, (unsigned)result + 1); + assert((result >= 0) && (result < (count + 1))); return resultStr; } @@ -10909,14 +10960,14 @@ Compiler::EnregisterStats Compiler::s_enregisterStats; void Compiler::EnregisterStats::RecordLocal(const LclVarDsc* varDsc) { m_totalNumberOfVars++; - if (varDsc->TypeGet() == TYP_STRUCT) + if (varDsc->TypeIs(TYP_STRUCT)) { m_totalNumberOfStructVars++; } if (!varDsc->lvDoNotEnregister) { m_totalNumberOfEnregVars++; - if (varDsc->TypeGet() == TYP_STRUCT) + if (varDsc->TypeIs(TYP_STRUCT)) { m_totalNumberOfStructEnregVars++; } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8d3a5681fc5a..6f2f40e9e410 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -72,21 +72,22 @@ inline var_types genActualType(T value); * Forward declarations */ -struct InfoHdr; // defined in GCInfo.h -struct escapeMapping_t; // defined in fgdiagnostic.cpp -class emitter; // defined in emit.h -struct ShadowParamVarInfo; // defined in GSChecks.cpp -struct InitVarDscInfo; // defined in registerargconvention.h -class FgStack; // defined in fgbasic.cpp -class Instrumentor; // defined in fgprofile.cpp -class SpanningTreeVisitor; // defined in fgprofile.cpp -class CSE_DataFlow; // defined in optcse.cpp -struct CSEdsc; // defined in optcse.h -class CSE_HeuristicCommon; // defined in optcse.h -class OptBoolsDsc; // defined in optimizer.cpp -struct JumpThreadInfo; // defined in redundantbranchopts.cpp -class ProfileSynthesis; // defined in profilesynthesis.h -class LoopLocalOccurrences; // defined in inductionvariableopts.cpp +struct InfoHdr; // defined in GCInfo.h +struct escapeMapping_t; // defined in fgdiagnostic.cpp +class emitter; // defined in emit.h +struct ShadowParamVarInfo; // defined in GSChecks.cpp +struct InitVarDscInfo; // defined in registerargconvention.h +class FgStack; // defined in fgbasic.cpp +class Instrumentor; // defined in fgprofile.cpp +class SpanningTreeVisitor; // defined in fgprofile.cpp +class CSE_DataFlow; // defined in optcse.cpp +struct CSEdsc; // defined in optcse.h +class CSE_HeuristicCommon; // defined in optcse.h +class OptBoolsDsc; // defined in optimizer.cpp +struct JumpThreadInfo; // defined in redundantbranchopts.cpp +class ProfileSynthesis; // defined in profilesynthesis.h +class PerLoopInfo; // defined in inductionvariableopts.cpp +class RangeCheck; // defined in rangecheck.h #ifdef DEBUG struct IndentStack; #endif @@ -118,6 +119,7 @@ void* operator new[](size_t n, Compiler* context, CompMemKind cmk); // Requires the definitions of "operator new" so including "LoopCloning.h" after the definitions. #include "loopcloning.h" +#include "rangecheckcloning.h" /*****************************************************************************/ @@ -508,6 +510,7 @@ enum class AddressExposedReason class LclVarDsc { public: +<<<<<<< HEAD // The constructor. Most things can just be zero'ed. // // Initialize the ArgRegs to REG_STK. @@ -526,11 +529,13 @@ class LclVarDsc { } +======= +>>>>>>> upstream-jun // note this only packs because var_types is a typedef of unsigned char var_types lvType : 5; // TYP_INT/LONG/FLOAT/DOUBLE/REF unsigned char lvIsParam : 1; // is this a parameter? - unsigned char lvIsRegArg : 1; // is this an argument that was passed by register? + unsigned char lvIsRegArg : 1; // is any part of this parameter passed in a register? unsigned char lvIsParamRegTarget : 1; // is this the target of a param reg to local mapping? unsigned char lvFramePointerBased : 1; // 0 = off of REG_SPBASE (e.g., ESP), 1 = off of REG_FPBASE (e.g., EBP) @@ -577,10 +582,6 @@ class LclVarDsc unsigned char lvIsLastUseCopyOmissionCandidate : 1; #endif // FEATURE_IMPLICIT_BYREFS -#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - unsigned char lvIsSplit : 1; // Set if the argument is split across last integer register and stack. -#endif // defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - unsigned char lvSingleDef : 1; // variable has a single def. Used to identify ref type locals that can get type // updates @@ -620,8 +621,9 @@ class LclVarDsc unsigned char lvIsStructField : 1; // Is this local var a field of a promoted struct local? unsigned char lvContainsHoles : 1; // Is this a promoted struct whose fields do not cover the struct local? - unsigned char lvIsMultiRegArg : 1; // true if this is a multireg LclVar struct used in an argument context - unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call + unsigned char lvIsMultiRegArg : 1; // true if this is a multireg LclVar struct used in an argument context + unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call + unsigned char lvIsMultiRegDest : 1; // true if this is a multireg LclVar struct that is stored from a multireg node #ifdef DEBUG unsigned char lvHiddenBufferStructArg : 1; // True when this struct (or its field) are passed as hidden buffer @@ -633,7 +635,7 @@ class LclVarDsc #endif // FEATURE_HFA_FIELDS_PRESENT #ifdef DEBUG - // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct + // TODO-Cleanup: this flag is only in use by asserts that are checking for struct // types, and is needed because of cases where TYP_STRUCT is bashed to an integral type. // Consider cleaning this up so this workaround is not required. unsigned char lvUnusedStruct : 1; // All references to this promoted struct are through its field locals. @@ -712,109 +714,13 @@ class LclVarDsc unsigned char lvAllDefsAreNoGc : 1; // For pinned locals: true if all defs of this local are no-gc unsigned char lvStackAllocatedObject : 1; // Local is a stack allocated object (class, box, array, ...) -#if FEATURE_MULTIREG_ARGS - regNumber lvRegNumForSlot(unsigned slotNum) - { - if (slotNum == 0) - { - return (regNumber)_lvArgReg; - } - else if (slotNum == 1) - { - return GetOtherArgReg(); - } - else - { - assert(false && "Invalid slotNum!"); - } - - unreached(); - } -#endif // FEATURE_MULTIREG_ARGS - - CorInfoHFAElemType GetLvHfaElemKind() const + bool IsImplicitByRef() { -#ifdef FEATURE_HFA_FIELDS_PRESENT - return _lvHfaElemKind; -#else - NOWAY_MSG("GetLvHfaElemKind"); - return CORINFO_HFA_ELEM_NONE; -#endif // FEATURE_HFA_FIELDS_PRESENT - } - - void SetLvHfaElemKind(CorInfoHFAElemType elemKind) - { -#ifdef FEATURE_HFA_FIELDS_PRESENT - _lvHfaElemKind = elemKind; +#if FEATURE_IMPLICIT_BYREFS + return lvIsImplicitByRef; #else - NOWAY_MSG("SetLvHfaElemKind"); -#endif // FEATURE_HFA_FIELDS_PRESENT - } - - bool lvIsHfa() const - { - if (GlobalJitOptions::compFeatureHfa) - { - return IsHfa(GetLvHfaElemKind()); - } - else - { - return false; - } - } - - bool lvIsHfaRegArg() const - { - if (GlobalJitOptions::compFeatureHfa) - { - return lvIsRegArg && lvIsHfa(); - } - else - { - return false; - } - } - - //------------------------------------------------------------------------------ - // lvHfaSlots: Get the number of slots used by an HFA local - // - // Return Value: - // On Arm64 - Returns 1-4 indicating the number of register slots used by the HFA - // On Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8 - // - unsigned lvHfaSlots() const - { - assert(lvIsHfa()); - assert(varTypeIsStruct(lvType)); - unsigned slots = 0; -#ifdef TARGET_ARM - slots = lvExactSize() / sizeof(float); - assert(slots <= 8); -#elif defined(TARGET_ARM64) - switch (GetLvHfaElemKind()) - { - case CORINFO_HFA_ELEM_NONE: - assert(!"lvHfaSlots called for non-HFA"); - break; - case CORINFO_HFA_ELEM_FLOAT: - assert((lvExactSize() % 4) == 0); - slots = lvExactSize() >> 2; - break; - case CORINFO_HFA_ELEM_DOUBLE: - case CORINFO_HFA_ELEM_VECTOR64: - assert((lvExactSize() % 8) == 0); - slots = lvExactSize() >> 3; - break; - case CORINFO_HFA_ELEM_VECTOR128: - assert((lvExactSize() % 16) == 0); - slots = lvExactSize() >> 4; - break; - default: - unreached(); - } - assert(slots <= 4); -#endif // TARGET_ARM64 - return slots; + return false; +#endif } // lvIsMultiRegArgOrRet() @@ -825,6 +731,13 @@ class LclVarDsc return lvIsMultiRegArg || lvIsMultiRegRet; } + void SetIsMultiRegDest() + { + lvIsMultiRegDest = true; + // TODO-Quirk: Set the old lvIsMultiRegRet, which is used for heuristics + lvIsMultiRegRet = true; + } + bool IsStackAllocatedObject() const { return lvStackAllocatedObject; @@ -892,13 +805,6 @@ class LclVarDsc regNumberSmall _lvOtherReg; // Used for "upper half" of long var. #endif // !defined(TARGET_64BIT) - regNumberSmall _lvArgReg; // The (first) register in which this argument is passed. - -#if FEATURE_MULTIREG_ARGS - regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register. - // Note this is defined but not used by ARM32 -#endif // FEATURE_MULTIREG_ARGS - regNumberSmall _lvArgInitReg; // the register into which the argument is moved at entry public: @@ -950,31 +856,6 @@ class LclVarDsc ///////////////////// - regNumber GetArgReg() const - { - return (regNumber)_lvArgReg; - } - - void SetArgReg(regNumber reg) - { - _lvArgReg = (regNumberSmall)reg; - assert(_lvArgReg == reg); - } - -#if FEATURE_MULTIREG_ARGS - - regNumber GetOtherArgReg() const - { - return (regNumber)_lvOtherArgReg; - } - - void SetOtherArgReg(regNumber reg) - { - _lvOtherArgReg = (regNumberSmall)reg; - assert(_lvOtherArgReg == reg); - } -#endif // FEATURE_MULTIREG_ARGS - #ifdef FEATURE_SIMD // Is this is a SIMD struct which is used for SIMD intrinsic? bool lvIsUsedInSIMDIntrinsic() const @@ -1132,14 +1013,11 @@ class LclVarDsc } unsigned lvExactSize() const; - unsigned lvSize() const; - - size_t lvArgStackSize() const; unsigned lvSlotNum; // original slot # (if remapped) // class handle for the local or null if not known or not a class - CORINFO_CLASS_HANDLE lvClassHnd; + CORINFO_CLASS_HANDLE lvClassHnd = NO_CLASS_HANDLE; private: ClassLayout* m_layout; // layout info for structs @@ -1150,6 +1028,17 @@ class LclVarDsc return (var_types)lvType; } + bool TypeIs(var_types type) const + { + return TypeGet() == type; + } + + template + bool TypeIs(var_types type, T... rest) const + { + return TypeIs(type) || TypeIs(rest...); + } + // NormalizeOnLoad Rules: // 1. All small locals are actually TYP_INT locals. // 2. NOL locals are such that not all definitions can be controlled by the compiler and so the upper bits can @@ -1173,30 +1062,6 @@ class LclVarDsc void incRefCnts(weight_t weight, Compiler* pComp, RefCountState state = RCS_NORMAL, bool propagate = true); - var_types GetHfaType() const - { - if (GlobalJitOptions::compFeatureHfa) - { - assert(lvIsHfa()); - return HfaTypeFromElemKind(GetLvHfaElemKind()); - } - else - { - return TYP_UNDEF; - } - } - - void SetHfaType(var_types type) - { - if (GlobalJitOptions::compFeatureHfa) - { - CorInfoHFAElemType elemKind = HfaElemKindFromType(type); - SetLvHfaElemKind(elemKind); - // Ensure we've allocated enough bits. - assert(GetLvHfaElemKind() == elemKind); - } - } - // Returns true if this variable contains GC pointers (including being a GC pointer itself). bool HasGCPtr() const { @@ -1207,8 +1072,12 @@ class LclVarDsc ClassLayout* GetLayout() const { #if FEATURE_IMPLICIT_BYREFS +<<<<<<< HEAD assert(varTypeIsStruct(TypeGet()) || (lvIsImplicitByRef && ((TypeGet() == TYP_BYREF) || (TypeGet() == TYP_I_IMPL)))); +======= + assert(varTypeIsStruct(TypeGet()) || (lvIsImplicitByRef && TypeIs(TYP_BYREF))); +>>>>>>> upstream-jun #else assert(varTypeIsStruct(TypeGet())); #endif @@ -1223,6 +1092,13 @@ class LclVarDsc m_layout = layout; } + // Change the layout to one that may not be compatible. + void ChangeLayout(ClassLayout* layout) + { + assert(varTypeIsStruct(lvType)); + m_layout = layout; + } + // Grow the size of a block layout local. void GrowBlockLayout(ClassLayout* layout) { @@ -1883,7 +1759,7 @@ struct FuncInfoDsc UnwindInfo uwi; // Unwind information for this function/funclet's hot section UnwindInfo* uwiCold; // Unwind information for this function/funclet's cold section // Note: we only have a pointer here instead of the actual object, - // to save memory in the JIT case (compared to the NGEN case), + // to save memory in the JIT case (compared to the AOT case), // where we don't have any cold section. // Note 2: we currently don't support hot/cold splitting in functions // with EH, so uwiCold will be NULL for all funclets. @@ -2296,6 +2172,7 @@ class FlowGraphNaturalLoops FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs); static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack& worklist); + static bool IsLoopCanonicalizable(FlowGraphNaturalLoop* loop); public: const FlowGraphDfsTree* GetDfsTree() @@ -2371,7 +2248,7 @@ class FlowGraphNaturalLoops static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs); // Number of blocks with DFS backedges that are not natural loop headers - // (indicates presence of "irreducible" loops) + // (indicates presence of "irreducible" or uncanonicalizable loops) unsigned ImproperLoopHeaders() const { return m_improperLoopHeaders; @@ -2883,7 +2760,7 @@ class Compiler bool ehNeedsShadowSPslots() { - return (info.compXcptnsCount || opts.compDbgEnC); + return ((compHndBBtabCount > 0) || opts.compDbgEnC); } // 0 for methods with no EH @@ -2892,8 +2769,15 @@ class Compiler // etc. unsigned ehMaxHndNestingCount = 0; + typedef JitHashTable, EHblkDsc*> EHIDtoEHblkDscMap; + EHIDtoEHblkDscMap* m_EHIDtoEHblkDsc = nullptr; + #endif // FEATURE_EH_WINDOWS_X86 + EHblkDsc* ehFindEHblkDscById(unsigned short ehID); + bool ehTableFinalized = false; + void FinalizeEH(); + static bool jitIsBetween(unsigned value, unsigned start, unsigned end); static bool jitIsBetweenInclusive(unsigned value, unsigned start, unsigned end); @@ -2916,6 +2800,9 @@ class Compiler // Returns true if "block" is the start of a handler or filter region. bool bbIsHandlerBeg(const BasicBlock* block); + // Returns true if "block" is the start of a funclet. + bool bbIsFuncletBeg(const BasicBlock* block); + bool ehHasCallableHandlers(); // Return the EH descriptor for the given region index. @@ -2954,6 +2841,9 @@ class Compiler // Find the true enclosing try index, ignoring 'mutual protect' try. Uses IL ranges to check. unsigned ehTrueEnclosingTryIndexIL(unsigned regionIndex); + // Find the true enclosing try index, ignoring 'mutual protect' try. Uses blocks to check. + unsigned ehTrueEnclosingTryIndex(unsigned regionIndex); + // Return the index of the most nested enclosing region for a particular EH region. Returns NO_ENCLOSING_INDEX // if there is no enclosing region. If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion' // is set to 'true' if the enclosing region is a 'try', or 'false' if the enclosing region is a handler. @@ -2990,21 +2880,6 @@ class Compiler bool ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsigned finallyIndex); #endif // DEBUG - // Do we need a PSPSym in the main function? For codegen purposes, we only need one - // if there is a filter that protects a region with a nested EH clause (such as a - // try/catch nested in the 'try' body of a try/filter/filter-handler). See - // genFuncletProlog() for more details. However, the VM seems to use it for more - // purposes, maybe including debugging. Until we are sure otherwise, always create - // a PSPSym for functions with any EH. - bool ehNeedsPSPSym() const - { -#ifdef TARGET_X86 - return false; -#else // TARGET_X86 - return compHndBBtabCount > 0; -#endif // TARGET_X86 - } - bool ehAnyFunclets(); // Are there any funclets in this function? unsigned ehFuncletCount(); // Return the count of funclets in the function @@ -3081,7 +2956,7 @@ class Compiler void fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast); - void fgRebuildEHRegions(); + void fgFindTryRegionEnds(); void fgSkipRmvdBlocks(EHblkDsc* handlerTab); @@ -3139,7 +3014,7 @@ class Compiler Statement* gtNewStmt(GenTree* expr, const DebugInfo& di); // For unary opers. - GenTree* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1); + GenTreeUnOp* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1); // For binary opers. GenTreeOp* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2); @@ -3180,6 +3055,7 @@ class Compiler GenTree* gtNewIconEmbHndNode(void* value, void* pValue, GenTreeFlags flags, void* compileTimeHandle); GenTree* gtNewIconEmbScpHndNode(CORINFO_MODULE_HANDLE scpHnd); + GenTree* gtNewIconEmbObjHndNode(CORINFO_OBJECT_HANDLE objHnd); GenTree* gtNewIconEmbClsHndNode(CORINFO_CLASS_HANDLE clsHnd); GenTree* gtNewIconEmbMethHndNode(CORINFO_METHOD_HANDLE methHnd); GenTree* gtNewIconEmbFldHndNode(CORINFO_FIELD_HANDLE fldHnd); @@ -3228,7 +3104,7 @@ class Compiler GenTree* gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg); - GenTree* gtNewBitCastNode(var_types type, GenTree* arg); + GenTreeUnOp* gtNewBitCastNode(var_types type, GenTree* arg); public: GenTreeCall* gtNewCallNode(gtCallTypes callType, @@ -3239,7 +3115,10 @@ class Compiler GenTreeCall* gtNewIndCallNode(GenTree* addr, var_types type, const DebugInfo& di = DebugInfo()); GenTreeCall* gtNewHelperCallNode( - unsigned helper, var_types type, GenTree* arg1 = nullptr, GenTree* arg2 = nullptr, GenTree* arg3 = nullptr); + unsigned helper, var_types type, GenTree* arg1 = nullptr, GenTree* arg2 = nullptr, GenTree* arg3 = nullptr, GenTree* arg4 = nullptr); + + GenTreeCall* gtNewVirtualFunctionLookupHelperCallNode( + unsigned helper, var_types type, GenTree* thisPtr, GenTree* methHnd, GenTree* clsHnd = nullptr); GenTreeCall* gtNewRuntimeLookupHelperCallNode(CORINFO_RUNTIME_LOOKUP* pRuntimeLookup, GenTree* ctxTree, @@ -3255,6 +3134,8 @@ class Compiler GenTreeConditional* gtNewConditionalNode( genTreeOps oper, GenTree* cond, GenTree* op1, GenTree* op2, var_types type); + GenTreeFieldList* gtNewFieldList(); + #ifdef FEATURE_SIMD void SetOpLclRelatedToSIMDIntrinsic(GenTree* op); #endif @@ -3307,6 +3188,7 @@ class Compiler #if defined(TARGET_ARM64) GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdFalseMaskByteNode(unsigned simdSize); #endif GenTree* gtNewSimdBinOpNode(genTreeOps op, @@ -3520,11 +3402,19 @@ class Compiler GenTree* gtNewSimdRoundNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdShuffleVariableNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isShuffleNative); + GenTree* gtNewSimdShuffleNode(var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize, + bool isShuffleNative); GenTree* gtNewSimdSqrtNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); @@ -3830,7 +3720,7 @@ class Compiler bool gtStoreDefinesField( LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize); - void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq = nullptr); + void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq = nullptr) const; // Return true if call is a recursive call; return false otherwise. // Note when inlining, this looks for calls back to the root method. @@ -3951,6 +3841,7 @@ class Compiler const char* gtGetWellKnownArgNameForArgMsg(WellKnownArg arg); void gtGetArgMsg(GenTreeCall* call, CallArg* arg, char* bufp, unsigned bufLength); void gtGetLateArgMsg(GenTreeCall* call, CallArg* arg, char* bufp, unsigned bufLength); + void gtPrintABILocation(const ABIPassingInformation& abiInfo, char** bufp, unsigned* bufLength); void gtDispArgList(GenTreeCall* call, GenTree* lastCallOperand, IndentStack* indentStack); void gtDispFieldSeq(FieldSeq* fieldSeq, ssize_t offset); @@ -4126,7 +4017,7 @@ class Compiler unsigned lvaInlineeReturnSpillTemp = BAD_VAR_NUM; // The temp to spill the non-VOID return expression // in case there are multiple BBJ_RETURN blocks in the inlinee // or if the inlinee has GC ref locals. - + bool lvaInlineeReturnSpillTempFreshlyCreated = false; // True if the temp was freshly created for the inlinee return #if FEATURE_FIXED_OUT_ARGS @@ -4151,6 +4042,9 @@ class Compiler unsigned lvaSwiftErrorLocal; #endif + // Variable representing async continuation argument passed. + unsigned lvaAsyncContinuationArg = BAD_VAR_NUM; + #if defined(DEBUG) && defined(TARGET_XARCH) unsigned lvaReturnSpCheck = BAD_VAR_NUM; // Stores SP to confirm it is not corrupted on return. @@ -4256,12 +4150,13 @@ class Compiler void lvaInitTypeRef(); - void lvaInitArgs(InitVarDscInfo* varDscInfo); - void lvaInitThisPtr(InitVarDscInfo* varDscInfo); - void lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBufReg); - void lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, unsigned takeArgs); - void lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo); - void lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo); + void lvaInitArgs(bool hasRetBuffArg); + void lvaInitThisPtr(unsigned* curVarNum); + void lvaInitRetBuffArg(unsigned* curVarNum, bool useFixedRetBufReg); + void lvaInitUserArgs(unsigned* curVarNum, unsigned skipArgs, unsigned takeArgs); + void lvaInitGenericsCtxt(unsigned* curVarNum); + void lvaInitVarArgsHandle(unsigned* curVarNum); + void lvaInitAsyncContinuation(unsigned* curVarNum); void lvaInitVarDsc(LclVarDsc* varDsc, unsigned varNum, @@ -4275,7 +4170,7 @@ class Compiler void lvaClassifyParameterABI(); - bool lvaInitSpecialSwiftParam(CORINFO_ARG_LIST_HANDLE argHnd, InitVarDscInfo* varDscInfo, CorInfoType type, CORINFO_CLASS_HANDLE typeHnd); + bool lvaInitSpecialSwiftParam(CORINFO_ARG_LIST_HANDLE argHnd, unsigned lclNum, CorInfoType type, CORINFO_CLASS_HANDLE typeHnd); bool lvaHasAnySwiftStackParamToReassemble(); var_types lvaGetActualType(unsigned lclNum); @@ -4329,7 +4224,7 @@ class Compiler return varNum; } - unsigned lvaLclSize(unsigned varNum); + unsigned lvaLclStackHomeSize(unsigned varNum); unsigned lvaLclExactSize(unsigned varNum); bool lvaHaveManyLocals(float percent = 1.0f) const; @@ -4381,13 +4276,9 @@ class Compiler bool lvaIsImplicitByRefLocal(unsigned lclNum) const; bool lvaIsLocalImplicitlyAccessedByRef(unsigned lclNum) const; - // Returns true if this local var is a multireg struct - bool lvaIsMultiregStruct(LclVarDsc* varDsc, bool isVararg); - // If the local is a TYP_STRUCT, get/set a class handle describing it void lvaSetStruct(unsigned varNum, ClassLayout* layout, bool unsafeValueClsCheck); void lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool unsafeValueClsCheck); - void lvaSetStructUsedAsVarArg(unsigned varNum); // If the local is TYP_REF, set or update the associated class information. void lvaSetClass(unsigned varNum, CORINFO_CLASS_HANDLE clsHnd, bool isExact = false); @@ -4454,6 +4345,8 @@ class Compiler bool ShouldPromoteStructVar(unsigned lclNum); void PromoteStructVar(unsigned lclNum); void SortStructFields(); + bool IsArmHfaParameter(unsigned lclNum); + bool IsSysVMultiRegType(ClassLayout* layout); var_types TryPromoteValueClassAsPrimitive(CORINFO_TYPE_LAYOUT_NODE* treeNodes, size_t maxTreeNodes, size_t index); void AdvanceSubTree(CORINFO_TYPE_LAYOUT_NODE* treeNodes, size_t maxTreeNodes, size_t* index); @@ -4474,27 +4367,26 @@ class Compiler bool lvaIsGCTracked(const LclVarDsc* varDsc); #if defined(FEATURE_SIMD) - bool lvaMapSimd12ToSimd16(const LclVarDsc* varDsc) + bool lvaMapSimd12ToSimd16(unsigned varNum) { - assert(varDsc->lvType == TYP_SIMD12); + LclVarDsc* varDsc = lvaGetDesc(varNum); + assert(varDsc->TypeIs(TYP_SIMD12)); -#if defined(TARGET_64BIT) - assert(compAppleArm64Abi() || varDsc->lvSize() == 16); -#endif // defined(TARGET_64BIT) + unsigned stackHomeSize = lvaLclStackHomeSize(varNum); // We make local variable SIMD12 types 16 bytes instead of just 12. - // lvSize() will return 16 bytes for SIMD12, even for fields. + // lvaLclStackHomeSize() will return 16 bytes for SIMD12, even for fields. // However, we can't do that mapping if the var is a dependently promoted struct field. // Such a field must remain its exact size within its parent struct unless it is a single // field *and* it is the only field in a struct of 16 bytes. - if (varDsc->lvSize() != 16) + if (stackHomeSize != 16) { return false; } if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) { LclVarDsc* parentVarDsc = lvaGetDesc(varDsc->lvParentLcl); - return (parentVarDsc->lvFieldCnt == 1) && (parentVarDsc->lvSize() == 16); + return (parentVarDsc->lvFieldCnt == 1) && (lvaLclStackHomeSize(varDsc->lvParentLcl) == 16); } return true; } @@ -4515,8 +4407,6 @@ class Compiler unsigned lvaStubArgumentVar = BAD_VAR_NUM; // variable representing the secret stub argument - unsigned lvaPSPSym = BAD_VAR_NUM; // variable representing the PSPSym - InlineInfo* impInlineInfo; // Only present for inlinees InlineStrategy* m_inlineStrategy; @@ -4658,7 +4548,7 @@ class Compiler CompAllocator alloc(compiler->getAllocator(CMK_Generic)); compiler->impEnumeratorGdvLocalMap = new (alloc) NodeToUnsignedMap(alloc); } - + return compiler->impEnumeratorGdvLocalMap; } @@ -4850,7 +4740,11 @@ class Compiler bool mustExpand); #ifdef FEATURE_HW_INTRINSICS - bool IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const; + bool IsValidForShuffle(GenTree* indices, + unsigned simdSize, + var_types simdBaseType, + bool* canBecomeValid, + bool isShuffleNative) const; GenTree* impHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, @@ -4958,6 +4852,7 @@ class Compiler Statement** pAfterStmt = nullptr, const DebugInfo& di = DebugInfo(), BasicBlock* block = nullptr); + bool impIsLegalRetBuf(GenTree* retBuf, GenTreeCall* call); GenTree* impStoreStructPtr(GenTree* destAddr, GenTree* value, unsigned curLevel, GenTreeFlags indirFlags = GTF_EMPTY); GenTree* impGetNodeAddr(GenTree* val, unsigned curLevel, GenTreeFlags* pDerefFlags); @@ -5002,6 +4897,8 @@ class Compiler bool impMatchIsInstBooleanConversion(const BYTE* codeAddr, const BYTE* codeEndp, int* consumed); + bool impMatchAwaitPattern(const BYTE * codeAddr, const BYTE * codeEndp, int* configVal); + GenTree* impCastClassOrIsInstToTree( GenTree* op1, GenTree* op2, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool isCastClass, bool* booleanCheck, IL_OFFSET ilOffset); @@ -5010,14 +4907,13 @@ class Compiler bool VarTypeIsMultiByteAndCanEnreg(var_types type, CORINFO_CLASS_HANDLE typeClass, unsigned* typeSize, - bool forReturn, bool isVarArg, CorInfoCallConvExtension callConv); bool IsIntrinsicImplementedByUserCall(NamedIntrinsic intrinsicName); bool IsTargetIntrinsic(NamedIntrinsic intrinsicName); bool IsMathIntrinsic(NamedIntrinsic intrinsicName); - bool IsMathIntrinsic(GenTree* tree); + bool IsBitCountingIntrinsic(NamedIntrinsic intrinsicName); private: //----------------- Importing the method ---------------------------------- @@ -5264,6 +5160,7 @@ class Compiler CORINFO_METHOD_HANDLE fncHandle, unsigned methAttr, CORINFO_CONTEXT_HANDLE exactContextHnd, + InlineContext* inlinersContext, InlineCandidateInfo** ppInlineCandidateInfo, InlineResult* inlineResult); @@ -5285,13 +5182,15 @@ class Compiler void impMarkInlineCandidate(GenTree* call, CORINFO_CONTEXT_HANDLE exactContextHnd, bool exactContextNeedsRuntimeLookup, - CORINFO_CALL_INFO* callInfo); + CORINFO_CALL_INFO* callInfo, + InlineContext* inlinersContext); void impMarkInlineCandidateHelper(GenTreeCall* call, uint8_t candidateIndex, CORINFO_CONTEXT_HANDLE exactContextHnd, bool exactContextNeedsRuntimeLookup, CORINFO_CALL_INFO* callInfo, + InlineContext* inlinersContext, InlineResult* inlineResult); bool impTailCallRetTypeCompatible(bool allowWidening, @@ -5459,6 +5358,8 @@ class Compiler // This is derived from the profile data // or is BB_UNITY_WEIGHT when we don't have profile data + bool fgImportDone = false; // true once importation has finished + bool fgFuncletsCreated = false; // true if the funclet creation phase has been run bool fgGlobalMorph = false; // indicates if we are during the global morphing phase @@ -5549,6 +5450,8 @@ class Compiler FoldResult fgFoldConditional(BasicBlock* block); + bool fgFoldCondToReturnBlock(BasicBlock* block); + struct MorphUnreachableInfo { MorphUnreachableInfo(Compiler* comp); @@ -5572,7 +5475,7 @@ class Compiler void fgMergeBlockReturn(BasicBlock* block); - bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg), bool invalidateDFSTreeOnFGChange = true); + bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg), bool allowFGChange = true, bool invalidateDFSTreeOnFGChange = true); void fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt); bool gtRemoveTreesAfterNoReturnCall(BasicBlock* block, Statement* stmt); @@ -5666,12 +5569,15 @@ class Compiler void fgExpandQmarkNodes(); bool fgSimpleLowerCastOfSmpOp(LIR::Range& range, GenTreeCast* cast); + bool fgSimpleLowerBswap16(LIR::Range& range, GenTree* op); #if FEATURE_LOOP_ALIGN bool shouldAlignLoop(FlowGraphNaturalLoop* loop, BasicBlock* top); PhaseStatus placeLoopAlignInstructions(); #endif + PhaseStatus TransformAsync(); + // This field keep the R2R helper call that would be inserted to trigger the constructor // of the static class. It is set as nongc or gc static base if they are imported, so // CSE can eliminate the repeated call, or the chepeast helper function that triggers it. @@ -6093,19 +5999,7 @@ class Compiler // A "primitive" type is one of the scalar types: byte, short, int, long, ref, float, double // If we can't or shouldn't use a "primitive" type then TYP_UNKNOWN is returned. // - // isVarArg is passed for use on Windows Arm64 to change the decision returned regarding - // hfa types. - // - var_types getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd, bool isVarArg); - - // Get the type that is used to pass values of the given struct type. - // isVarArg is passed for use on Windows Arm64 to change the decision returned regarding - // hfa types. - // - var_types getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, - structPassingKind* wbPassStruct, - bool isVarArg, - unsigned structSize); + var_types getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd); // Get the type that is used to return values of the given struct type. // If the size is unknown, pass 0 and it will be determined from 'clsHnd'. @@ -6310,8 +6204,6 @@ class Compiler void fgCompactBlock(BasicBlock* block); - BasicBlock* fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst); - bool fgRenumberBlocks(); bool fgExpandRarelyRunBlocks(); @@ -6324,6 +6216,7 @@ class Compiler bool fgHeadMerge(BasicBlock* block, bool early); bool fgTryOneHeadMerge(BasicBlock* block, bool early); bool gtTreeContainsTailCall(GenTree* tree); + bool gtTreeContainsAsyncCall(GenTree* tree); bool fgCanMoveFirstStatementIntoPred(bool early, Statement* firstStmt, BasicBlock* pred); enum FG_RELOCATE_TYPE @@ -6338,9 +6231,6 @@ class Compiler void fgInsertFuncletPrologBlock(BasicBlock* block); void fgCreateFuncletPrologBlocks(); PhaseStatus fgCreateFunclets(); -#if defined(FEATURE_EH_WINDOWS_X86) - bool fgRelocateEHRegions(); -#endif // FEATURE_EH_WINDOWS_X86 bool fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* target); bool fgFoldSimpleCondByForwardSub(BasicBlock* block); @@ -6357,18 +6247,16 @@ class Compiler bool fgOptimizeSwitchBranches(BasicBlock* block); - bool fgOptimizeSwitchJumps(); + void fgPeelSwitch(BasicBlock* block); #ifdef DEBUG void fgPrintEdgeWeights(); #endif PhaseStatus fgComputeBlockWeights(); bool fgComputeMissingBlockWeights(); - bool fgReorderBlocks(bool useProfile); - void fgDoReversePostOrderLayout(); - void fgMoveColdBlocks(); - void fgSearchImprovedLayout(); + PhaseStatus fgSearchImprovedLayout(); + template class ThreeOptLayout { static bool EdgeCmp(const FlowEdge* left, const FlowEdge* right); @@ -6380,33 +6268,34 @@ class Compiler BasicBlock** tempOrder; unsigned numCandidateBlocks; -#ifdef DEBUG - weight_t GetLayoutCost(unsigned startPos, unsigned endPos); -#endif // DEBUG + bool IsCandidateBlock(BasicBlock* block) const; + INDEBUG(weight_t GetLayoutCost(unsigned startPos, unsigned endPos);) weight_t GetCost(BasicBlock* block, BasicBlock* next); - weight_t GetPartitionCostDelta(unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End); + weight_t GetPartitionCostDelta(unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End); void SwapPartitions(unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End); - void ConsiderEdge(FlowEdge* edge); + template + bool ConsiderEdge(FlowEdge* edge); void AddNonFallthroughSuccs(unsigned blockPos); void AddNonFallthroughPreds(unsigned blockPos); bool RunGreedyThreeOptPass(unsigned startPos, unsigned endPos); - bool RunThreeOpt(); + void RunThreeOpt(); + void CompactHotJumps(); + bool ReorderBlockList(); public: - ThreeOptLayout(Compiler* comp); - void Run(); + ThreeOptLayout(Compiler* comp, BasicBlock** initialLayout, unsigned numHotBlocks); + bool Run(); }; - template - void fgMoveHotJumps(); - bool fgFuncletsAreCold(); PhaseStatus fgDetermineFirstColdBlock(); + bool fgDedupReturnComparison(BasicBlock* block); + bool fgIsForwardBranch(BasicBlock* bJump, BasicBlock* bDest, BasicBlock* bSrc = nullptr); bool fgUpdateFlowGraph(bool doTailDup = false, bool isPhase = false); @@ -6642,6 +6531,7 @@ class Compiler bool fgPgoSynthesized; bool fgPgoDynamic; bool fgPgoConsistent; + bool fgPgoSingleEdge = false; #ifdef DEBUG bool fgPgoDeferredInconsistency; @@ -6668,6 +6558,7 @@ class Compiler } void fgRemoveProfileData(const char* reason); + PhaseStatus fgRepairProfile(); void fgRepairProfileCondToUncond(BasicBlock* block, FlowEdge* retainedEdge, FlowEdge* removedEdge, int* metric = nullptr); //-------- Insert a statement at the start or end of a basic block -------- @@ -6689,12 +6580,12 @@ class Compiler void fgInsertStmtAfter(BasicBlock* block, Statement* insertionPoint, Statement* stmt); void fgInsertStmtBefore(BasicBlock* block, Statement* insertionPoint, Statement* stmt); -private: - Statement* fgInsertStmtListAfter(BasicBlock* block, Statement* stmtAfter, Statement* stmtList); - // Create a new temporary variable to hold the result of *ppTree, // and transform the graph accordingly. GenTree* fgInsertCommaFormTemp(GenTree** ppTree); + +private: + Statement* fgInsertStmtListAfter(BasicBlock* block, Statement* stmtAfter, Statement* stmtList); TempInfo fgMakeTemp(GenTree* value); GenTree* fgMakeMultiUse(GenTree** ppTree); @@ -6718,6 +6609,7 @@ class Compiler //------------------------- Morphing -------------------------------------- +<<<<<<< HEAD unsigned fgPtrArgCntMax = 0; public: @@ -6746,6 +6638,8 @@ class Compiler bool fgIsThrow(GenTree* tree); private: +======= +>>>>>>> upstream-jun hashBv* fgAvailableOutgoingArgTemps; ArrayStack* fgUsedSharedTemps = nullptr; @@ -6798,7 +6692,7 @@ class Compiler #endif // FEATURE_SIMD GenTree* fgMorphIndexAddr(GenTreeIndexAddr* tree); GenTree* fgMorphExpandCast(GenTreeCast* tree); - GenTreeFieldList* fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl); + GenTreeFieldList* fgMorphLclToFieldList(GenTreeLclVar* lcl); GenTreeCall* fgMorphArgs(GenTreeCall* call); void fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg); @@ -6873,7 +6767,7 @@ class Compiler GenTree* fgMorphCopyBlock(GenTree* tree); private: GenTree* fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optAssertionPropDone = nullptr); - void fgTryReplaceStructLocalWithField(GenTree* tree); + bool fgTryReplaceStructLocalWithFields(GenTree** use); GenTree* fgMorphFinalizeIndir(GenTreeIndir* indir); GenTree* fgOptimizeCast(GenTreeCast* cast); GenTree* fgOptimizeCastOnStore(GenTree* store); @@ -6885,6 +6779,15 @@ class Compiler GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree); GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node); GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node); +#if defined(FEATURE_MASKED_HW_INTRINSICS) + GenTreeHWIntrinsic* fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node); +#endif // FEATURE_MASKED_HW_INTRINSICS +#ifdef TARGET_ARM64 + bool canMorphVectorOperandToMask(GenTree* node); + bool canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node); + GenTree* doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent); + GenTreeHWIntrinsic* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node); +#endif // TARGET_ARM64 #endif // FEATURE_HW_INTRINSICS GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree); GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp); @@ -6932,6 +6835,7 @@ class Compiler PhaseStatus fgEarlyLiveness(); + template void fgMarkUseDef(GenTreeLclVarCommon* tree); //------------------------------------------------------------------------- @@ -7097,6 +7001,9 @@ class Compiler bool gtIsTypeHandleToRuntimeTypeHelper(GenTreeCall* call); bool gtIsTypeHandleToRuntimeTypeHandleHelper(GenTreeCall* call, CorInfoHelpFunc* pHelper = nullptr); + template + GenTree* gtFindNodeInTree(GenTree* tree, Predicate predicate); + bool gtTreeContainsOper(GenTree* tree, genTreeOps op); ExceptionSetFlags gtCollectExceptions(GenTree* tree); @@ -7219,13 +7126,13 @@ class Compiler public: PhaseStatus optOptimizeBools(); - PhaseStatus optSwitchRecognition(); + PhaseStatus optRecognizeAndOptimizeSwitchJumps(); bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest); - bool optSwitchDetectAndConvert(BasicBlock* firstBlock); + bool optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingForConversion = false); PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom. PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication - PhaseStatus optOptimizeLayout(); // Optimize the BasicBlock layout of the method + PhaseStatus optOptimizePreLayout(); // Optimize flow before running block layout PhaseStatus optOptimizePostLayout(); // Run optimizations after block layout is finalized PhaseStatus optSetBlockWeights(); PhaseStatus optFindLoopsPhase(); // Finds loops and records them in the loop table @@ -7241,9 +7148,11 @@ class Compiler bool optCanonicalizeExits(FlowGraphNaturalLoop* loop); bool optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit); + + bool optLoopComplexityExceeds(FlowGraphNaturalLoop* loop, unsigned limit); PhaseStatus optCloneLoops(); - bool optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); + PhaseStatus optRangeCheckCloning(); void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) bool optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR); @@ -7265,12 +7174,8 @@ class Compiler #endif void optResetLoopInfo(); - void optFindAndScaleGeneralLoopBlocks(); - // Determine if there are any potential loops, and set BBF_LOOP_HEAD on potential loop heads. - void optMarkLoopHeads(); - - void optScaleLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk); + void optScaleLoopBlocks(FlowGraphNaturalLoop* loop); bool optIsLoopTestEvalIntoTemp(Statement* testStmt, Statement** newTestStmt); unsigned optIsLoopIncrTree(GenTree* incr); @@ -7302,7 +7207,7 @@ class Compiler OptInvertCountTreeInfoType optInvertCountTreeInfo(GenTree* tree); - bool optInvertWhileLoop(BasicBlock* block); + bool optTryInvertWhileLoop(FlowGraphNaturalLoop* loop); bool optIfConvert(BasicBlock* block); private: @@ -7377,6 +7282,7 @@ class Compiler void optPrintCSEDataFlowSet(EXPSET_VALARG_TP cseDataFlowSet, bool includeBits = true); EXPSET_TP cseCallKillsMask; // Computed once - A mask that is used to kill available CSEs at callsites + EXPSET_TP cseAsyncKillsMask; // Computed once - A mask that is used to kill available BYREF CSEs at async suspension points static const size_t s_optCSEhashSizeInitial; static const size_t s_optCSEhashGrowthFactor; @@ -7465,6 +7371,7 @@ class Compiler unsigned optValnumCSE_Index(GenTree* tree, Statement* stmt); bool optValnumCSE_Locate(CSE_HeuristicCommon* heuristic); void optValnumCSE_InitDataFlow(); + void optValnumCSE_SetUpAsyncByrefKills(); void optValnumCSE_DataFlow(); void optValnumCSE_Availability(); void optValnumCSE_Heuristic(CSE_HeuristicCommon* heuristic); @@ -7591,7 +7498,6 @@ class Compiler #define OMF_HAS_EXPRUNTIMELOOKUP 0x00000080 // Method contains a runtime lookup to an expandable dictionary. #define OMF_HAS_PATCHPOINT 0x00000100 // Method contains patchpoints #define OMF_NEEDS_GCPOLLS 0x00000200 // Method needs GC polls -#define OMF_HAS_FROZEN_OBJECTS 0x00000400 // Method has frozen objects (REF constant int) #define OMF_HAS_PARTIAL_COMPILATION_PATCHPOINT 0x00000800 // Method contains partial compilation patchpoints #define OMF_HAS_TAILCALL_SUCCESSOR 0x00001000 // Method has potential tail call in a non BBJ_RETURN block #define OMF_HAS_MDNEWARRAY 0x00002000 // Method contains 'new' of an MD array @@ -7602,6 +7508,7 @@ class Compiler #define OMF_HAS_RECURSIVE_TAILCALL 0x00040000 // Method contains recursive tail call #define OMF_HAS_EXPANDABLE_CAST 0x00080000 // Method contains casts eligible for late expansion #define OMF_HAS_STACK_ARRAY 0x00100000 // Method contains stack allocated arrays +#define OMF_HAS_BOUNDS_CHECKS 0x00200000 // Method contains bounds checks // clang-format on @@ -7622,24 +7529,24 @@ class Compiler void addFatPointerCandidate(GenTreeCall* call); - bool doesMethodHaveFrozenObjects() const + bool doesMethodHaveStaticInit() { - return (optMethodFlags & OMF_HAS_FROZEN_OBJECTS) != 0; + return (optMethodFlags & OMF_HAS_STATIC_INIT) != 0; } - void setMethodHasFrozenObjects() + void setMethodHasStaticInit() { - optMethodFlags |= OMF_HAS_FROZEN_OBJECTS; + optMethodFlags |= OMF_HAS_STATIC_INIT; } - bool doesMethodHaveStaticInit() + bool doesMethodHaveBoundsChecks() { - return (optMethodFlags & OMF_HAS_STATIC_INIT) != 0; + return (optMethodFlags & OMF_HAS_BOUNDS_CHECKS) != 0; } - void setMethodHasStaticInit() + void setMethodHasBoundsChecks() { - optMethodFlags |= OMF_HAS_STATIC_INIT; + optMethodFlags |= OMF_HAS_BOUNDS_CHECKS; } bool doesMethodHaveExpandableCasts() @@ -7708,7 +7615,8 @@ class Compiler CORINFO_CLASS_HANDLE* classGuesses, CORINFO_METHOD_HANDLE* methodGuesses, int* candidatesCount, - unsigned* likelihoods); + unsigned* likelihoods, + bool verboseLogging = true); void considerGuardedDevirtualization(GenTreeCall* call, IL_OFFSET ilOffset, @@ -7849,33 +7757,30 @@ class Compiler void optVisitBoundingExitingCondBlocks(FlowGraphNaturalLoop* loop, TFunctor func); bool optMakeLoopDownwardsCounted(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, - LoopLocalOccurrences* loopLocals); + PerLoopInfo* loopLocals); bool optMakeExitTestDownwardsCounted(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, BasicBlock* exiting, - LoopLocalOccurrences* loopLocals); + PerLoopInfo* loopLocals); bool optCanAndShouldChangeExitTest(GenTree* cond, bool dump); - bool optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals); + bool optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loop, PerLoopInfo* loopLocals); bool optLocalIsLiveIntoBlock(unsigned lclNum, BasicBlock* block); - bool optWidenIVs(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals); - bool optWidenPrimaryIV(FlowGraphNaturalLoop* loop, - unsigned lclNum, - ScevAddRec* addRec, - LoopLocalOccurrences* loopLocals); + bool optWidenIVs(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, PerLoopInfo* loopLocals); + bool optWidenPrimaryIV(FlowGraphNaturalLoop* loop, unsigned lclNum, ScevAddRec* addRec, PerLoopInfo* loopLocals); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); bool optIsIVWideningProfitable(unsigned lclNum, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop, - LoopLocalOccurrences* loopLocals); + PerLoopInfo* loopLocals); void optBestEffortReplaceNarrowIVUses( unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); void optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); - bool optRemoveUnusedIVs(FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals); + bool optRemoveUnusedIVs(FlowGraphNaturalLoop* loop, PerLoopInfo* loopLocals); bool optIsUpdateOfIVWithoutSideEffects(GenTree* tree, unsigned lclNum); // Redundant branch opts @@ -7903,9 +7808,10 @@ class Compiler BitVecTraits* apTraits; ASSERT_TP apFull; ASSERT_TP apLocal; + ASSERT_TP apLocalPostorder; ASSERT_TP apLocalIfTrue; - enum optAssertionKind + enum optAssertionKind : uint8_t { OAK_INVALID, OAK_EQUAL, @@ -7915,10 +7821,11 @@ class Compiler OAK_COUNT }; - enum optOp1Kind + enum optOp1Kind : uint8_t { O1K_INVALID, O1K_LCLVAR, + O1K_VN, O1K_ARR_BND, O1K_BOUND_OPER_BND, O1K_BOUND_LOOP_BND, @@ -7927,15 +7834,15 @@ class Compiler O1K_EXACT_TYPE, O1K_SUBTYPE, O1K_COUNT + // NOTE: as of today, only LCLVAR is used by both Local and Global assertion prop + // the rest are used only by Global assertion prop. }; - enum optOp2Kind : uint16_t + enum optOp2Kind : uint8_t { O2K_INVALID, O2K_LCLVAR_COPY, - O2K_IND_CNS_INT, O2K_CONST_INT, - O2K_CONST_LONG, O2K_CONST_DOUBLE, O2K_ZEROOBJ, O2K_SUBRANGE, @@ -7945,11 +7852,6 @@ class Compiler struct AssertionDsc { optAssertionKind assertionKind; - struct SsaVar - { - unsigned lclNum; // assigned to or property of this local var number - unsigned ssaNum; - }; struct ArrBnd { ValueNum vnIdx; @@ -7961,8 +7863,8 @@ class Compiler ValueNum vn; union { - SsaVar lcl; - ArrBnd bnd; + unsigned lclNum; + ArrBnd bnd; }; } op1; struct AssertionDscOp2 @@ -7974,17 +7876,13 @@ class Compiler ValueNum vn; struct IntVal { - ssize_t iconVal; // integer -#if !defined(HOST_64BIT) - unsigned padding; // unused; ensures iconFlags does not overlap lconVal -#endif + ssize_t iconVal; // integer FieldSeq* fieldSeq; }; union { - SsaVar lcl; + unsigned lclNum; IntVal u1; - int64_t lconVal; double dconVal; IntegralRange u2; }; @@ -8043,7 +7941,8 @@ class Compiler bool IsConstantInt32Assertion() { - return ((assertionKind == OAK_EQUAL) || (assertionKind == OAK_NOT_EQUAL)) && (op2.kind == O2K_CONST_INT); + return ((assertionKind == OAK_EQUAL) || (assertionKind == OAK_NOT_EQUAL)) && (op2.kind == O2K_CONST_INT) && + ((op1.kind == O1K_LCLVAR) || (op1.kind == O1K_VN)); } bool CanPropLclVar() @@ -8063,7 +7962,7 @@ class Compiler bool CanPropBndsCheck() { - return op1.kind == O1K_ARR_BND; + return (op1.kind == O1K_ARR_BND) || (op1.kind == O1K_VN); } bool CanPropSubRange() @@ -8101,10 +8000,14 @@ class Compiler assert(vnBased); return (op1.bnd.vnIdx == that->op1.bnd.vnIdx) && (op1.bnd.vnLen == that->op1.bnd.vnLen); } + else if (op1.kind == O1K_VN) + { + assert(vnBased); + return (op1.vn == that->op1.vn); + } else { - return ((vnBased && (op1.vn == that->op1.vn)) || - (!vnBased && (op1.lcl.lclNum == that->op1.lcl.lclNum))); + return ((vnBased && (op1.vn == that->op1.vn)) || (!vnBased && (op1.lclNum == that->op1.lclNum))); } } @@ -8117,13 +8020,9 @@ class Compiler switch (op2.kind) { - case O2K_IND_CNS_INT: case O2K_CONST_INT: return ((op2.u1.iconVal == that->op2.u1.iconVal) && (op2.GetIconFlag() == that->op2.GetIconFlag())); - case O2K_CONST_LONG: - return (op2.lconVal == that->op2.lconVal); - case O2K_CONST_DOUBLE: // exact match because of positive and negative zero. return (memcmp(&op2.dconVal, &that->op2.dconVal, sizeof(double)) == 0); @@ -8132,8 +8031,7 @@ class Compiler return true; case O2K_LCLVAR_COPY: - return (op2.lcl.lclNum == that->op2.lcl.lclNum) && - (!vnBased || (op2.lcl.ssaNum == that->op2.lcl.ssaNum)); + return op2.lclNum == that->op2.lclNum; case O2K_SUBRANGE: return op2.u2.Equals(that->op2.u2); @@ -8197,6 +8095,9 @@ class Compiler bool optCanPropBndsChk; bool optCanPropSubRange; + RangeCheck* optRangeCheck = nullptr; + RangeCheck* GetRangeCheck(); + public: void optVnNonNullPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree); fgWalkResult optVNBasedFoldCurStmt(BasicBlock* block, Statement* stmt, GenTree* parent, GenTree* tree); @@ -8235,10 +8136,7 @@ class Compiler AssertionIndex optAssertionGenCast(GenTreeCast* cast); AssertionInfo optCreateJTrueBoundsAssertion(GenTree* tree); AssertionInfo optAssertionGenJtrue(GenTree* tree); - AssertionIndex optCreateJtrueAssertions(GenTree* op1, - GenTree* op2, - Compiler::optAssertionKind assertionKind, - bool helperCallArgs = false); + AssertionIndex optCreateJtrueAssertions(GenTree* op1, GenTree* op2, optAssertionKind assertionKind); AssertionIndex optFindComplementary(AssertionIndex assertionIndex); void optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index); @@ -8247,19 +8145,13 @@ class Compiler ssize_t optCastConstantSmall(ssize_t iconVal, var_types smallType); // Assertion creation functions. - AssertionIndex optCreateAssertion(GenTree* op1, - GenTree* op2, - optAssertionKind assertionKind, - bool helperCallArgs = false); + AssertionIndex optCreateAssertion(GenTree* op1, GenTree* op2, optAssertionKind assertionKind); AssertionIndex optFinalizeCreatingAssertion(AssertionDsc* assertion); bool optTryExtractSubrangeAssertion(GenTree* source, IntegralRange* pRange); - void optCreateComplementaryAssertion(AssertionIndex assertionIndex, - GenTree* op1, - GenTree* op2, - bool helperCallArgs = false); + void optCreateComplementaryAssertion(AssertionIndex assertionIndex, GenTree* op1, GenTree* op2); bool optAssertionVnInvolvesNan(AssertionDsc* assertion); AssertionIndex optAddAssertion(AssertionDsc* assertion); @@ -8297,15 +8189,18 @@ class Compiler GenTree* optAssertionProp_LclFld(ASSERT_VALARG_TP assertions, GenTreeLclVarCommon* tree, Statement* stmt); GenTree* optAssertionProp_LocalStore(ASSERT_VALARG_TP assertions, GenTreeLclVarCommon* store, Statement* stmt); GenTree* optAssertionProp_BlockStore(ASSERT_VALARG_TP assertions, GenTreeBlk* store, Statement* stmt); - GenTree* optAssertionProp_ModDiv(ASSERT_VALARG_TP assertions, GenTreeOp* tree, Statement* stmt); + GenTree* optAssertionProp_ModDiv(ASSERT_VALARG_TP assertions, GenTreeOp* tree, Statement* stmt, BasicBlock* block); GenTree* optAssertionProp_Return(ASSERT_VALARG_TP assertions, GenTreeOp* ret, Statement* stmt); GenTree* optAssertionProp_Ind(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt); - GenTree* optAssertionProp_Cast(ASSERT_VALARG_TP assertions, GenTreeCast* cast, Statement* stmt); + GenTree* optAssertionProp_Cast(ASSERT_VALARG_TP assertions, GenTreeCast* cast, Statement* stmt, BasicBlock* block); GenTree* optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call, Statement* stmt); - GenTree* optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt); + GenTree* optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt, BasicBlock* block); GenTree* optAssertionProp_Comma(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt); GenTree* optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt); - GenTree* optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt); + GenTree* optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, + GenTree* tree, + Statement* stmt, + BasicBlock* block); GenTree* optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt); GenTree* optAssertionProp_Update(GenTree* newTree, GenTree* tree, Statement* stmt); GenTree* optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call); @@ -8322,13 +8217,15 @@ class Compiler void optAssertionProp_RangeProperties(ASSERT_VALARG_TP assertions, GenTree* tree, + Statement* stmt, + BasicBlock* block, bool* isKnownNonZero, bool* isKnownNonNegative); // Implied assertion functions. void optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& activeAssertions); void optImpliedByTypeOfAssertions(ASSERT_TP& activeAssertions); - void optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result); + bool optCreateJumpTableImpliedAssertions(BasicBlock* switchBb); void optImpliedByConstAssertion(AssertionDsc* curAssertion, ASSERT_TP& result); #ifdef DEBUG @@ -8372,6 +8269,7 @@ class Compiler bool optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum); bool optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); + bool optExtractSpanIndex(GenTree* tree, SpanIndex* result); bool optReconstructArrIndexHelp(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); bool optReconstructArrIndex(GenTree* tree, ArrIndex* result); bool optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context); @@ -8493,7 +8391,7 @@ class Compiler // and do not do any SPMI handling. There are then convenience printing // functions exposed on top that have SPMI handling and additional buffer // handling. Note that the strings returned are never truncated here. - void eePrintJitType(class StringPrinter* printer, var_types jitType); + void eePrintCorInfoType(class StringPrinter* printer, CorInfoType corInfoType); void eePrintType(class StringPrinter* printer, CORINFO_CLASS_HANDLE clsHnd, bool includeInstantiation); void eePrintTypeOrJitAlias(class StringPrinter* printer, CORINFO_CLASS_HANDLE clsHnd, bool includeInstantiation); void eePrintMethod(class StringPrinter* printer, @@ -8594,16 +8492,8 @@ class Compiler reg = REG_EAX; regMask = RBM_EAX; #elif defined(TARGET_AMD64) - if (isNativeAOT) - { - reg = REG_R10; - regMask = RBM_R10; - } - else - { - reg = REG_R11; - regMask = RBM_R11; - } + reg = REG_R11; + regMask = RBM_R11; #elif defined(TARGET_ARM) if (isNativeAOT) { @@ -8659,7 +8549,7 @@ class Compiler // We explicitly block these APIs from being expanded in R2R // since we know they are non-deterministic across hardware - if (opts.IsReadyToRun() && !IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + if (IsReadyToRun()) { if (mustExpand) { @@ -9138,46 +9028,6 @@ class Compiler XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ - bool IsBaselineSimdIsaSupported() - { -#ifdef FEATURE_SIMD -#if defined(TARGET_XARCH) - CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2; -#elif defined(TARGET_ARM64) - CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; -#elif defined(TARGET_LOONGARCH64) - // TODO: supporting SIMD feature for LoongArch64. - assert(!"unimplemented yet on LA"); - CORINFO_InstructionSet minimumIsa = 0; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_LOONGARCH64 - - return compOpportunisticallyDependsOn(minimumIsa); -#else - return false; -#endif - } - -#if defined(DEBUG) - bool IsBaselineSimdIsaSupportedDebugOnly() - { -#ifdef FEATURE_SIMD -#if defined(TARGET_XARCH) - CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2; -#elif defined(TARGET_ARM64) - CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - return compIsaSupportedDebugOnly(minimumIsa); -#else - return false; -#endif // FEATURE_SIMD - } -#endif // DEBUG - bool isIntrinsicType(CORINFO_CLASS_HANDLE clsHnd) { return info.compCompHnd->isIntrinsicType(clsHnd); @@ -9446,11 +9296,11 @@ class Compiler // X86.SSE: 16-byte Vector and Vector128 // X86.AVX: 16-byte Vector and Vector256 // X86.AVX2: 32-byte Vector and Vector256 - // X86.AVX512F: 32-byte Vector and Vector512 + // X86.AVX512: 32-byte Vector and Vector512 uint32_t getMaxVectorByteLength() const { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { return ZMM_REGSIZE_BYTES; } @@ -9458,29 +9308,12 @@ class Compiler { return YMM_REGSIZE_BYTES; } - else if (compOpportunisticallyDependsOn(InstructionSet_SSE)) - { - return XMM_REGSIZE_BYTES; - } else { - // TODO: We should be returning 0 here, but there are a number of - // places that don't quite get handled correctly in that scenario - return XMM_REGSIZE_BYTES; } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - return FP_REGSIZE_BYTES; - } - else - { - // TODO: We should be returning 0 here, but there are a number of - // places that don't quite get handled correctly in that scenario - - return FP_REGSIZE_BYTES; - } + return FP_REGSIZE_BYTES; #else assert(!"getMaxVectorByteLength() unimplemented on target arch"); unreached(); @@ -9680,7 +9513,7 @@ class Compiler assert(size > 0); var_types result = TYP_UNDEF; #ifdef FEATURE_SIMD - if (IsBaselineSimdIsaSupported() && (roundDownSIMDSize(size) > 0)) + if (roundDownSIMDSize(size) > 0) { return getSIMDTypeForSize(roundDownSIMDSize(size)); } @@ -9904,7 +9737,7 @@ class Compiler // on which the function is executed (except for CoreLib, where there are special rules) bool compExactlyDependsOn(CORINFO_InstructionSet isa) const { -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) if ((opts.compSupportsISAReported.HasInstructionSet(isa)) == false) { if (notifyInstructionSetUsage(isa, (opts.compSupportsISA.HasInstructionSet(isa)))) @@ -9940,45 +9773,7 @@ class Compiler return opts.compSupportsISA.HasInstructionSet(isa); } - // Following cases should be taken into consideration when using the below APIs: - // InstructionSet_EVEX implies Avx10v1 -or- Avx512F+CD+DQ+BW+VL and can be used for 128-bit or 256-bit EVEX encoding - // instructions in these instruction sets InstructionSet_Avx10v1_V512 should never be queried directly, it is - // covered by querying Avx512* InstructionSet_Avx512F (and same for BW, CD, DQ) is only queried for 512-bit EVEX - // encoded instructions - // InstructionSet_Avx10v1 is only queried for cases like 128-bit/256-bit instructions that wouldn't be in - // F+CD+DQ+BW+VL (such as VBMI) and should appear with a corresponding query around AVX512*_VL (i.e. AVX512_VBMI_VL) - #ifdef DEBUG - //------------------------------------------------------------------------ - // IsBaselineVector256IsaSupportedDebugOnly - Does isa support exist for Vector256. - // - // Returns: - // `true` if AVX. - // - bool IsBaselineVector256IsaSupportedDebugOnly() const - { -#ifdef TARGET_XARCH - return compIsaSupportedDebugOnly(InstructionSet_AVX); -#else - return false; -#endif - } - - //------------------------------------------------------------------------ - // IsBaselineVector512IsaSupportedDebugOnly - Does isa support exist for Vector512. - // - // Returns: - // `true` if AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL are supported. - // - bool IsBaselineVector512IsaSupportedDebugOnly() const - { -#ifdef TARGET_XARCH - return compIsaSupportedDebugOnly(InstructionSet_AVX512F); -#else - return false; -#endif - } - //------------------------------------------------------------------------ // canUseEvexEncodingDebugOnly - Answer the question: Is Evex encoding supported on this target. // @@ -9988,64 +9783,13 @@ class Compiler bool canUseEvexEncodingDebugOnly() const { #ifdef TARGET_XARCH - return (compIsaSupportedDebugOnly(InstructionSet_EVEX)); -#else - return false; -#endif - } - - //------------------------------------------------------------------------ - // IsAvx10OrIsaSupportedDebugOnly - Answer the question: Is AVX10v1 or the given ISA supported. - // - // Returns: - // `true` if AVX10v1 or the given ISA is supported, `false` if not. - // - bool IsAvx10OrIsaSupportedDebugOnly(CORINFO_InstructionSet isa) const - { -#ifdef TARGET_XARCH - // For the below cases, check for evex encoding should be used. - assert(isa != InstructionSet_AVX512F || isa != InstructionSet_AVX512F_VL || isa != InstructionSet_AVX512BW || - isa != InstructionSet_AVX512BW_VL || isa != InstructionSet_AVX512CD || - isa != InstructionSet_AVX512CD_VL || isa != InstructionSet_AVX512DQ || - isa != InstructionSet_AVX512DQ_VL); - - return (compIsaSupportedDebugOnly(InstructionSet_AVX10v1) || compIsaSupportedDebugOnly(isa)); + return compIsaSupportedDebugOnly(InstructionSet_AVX512); #else return false; #endif } #endif // DEBUG - //------------------------------------------------------------------------ - // IsBaselineVector512IsaSupportedOpportunistically - Does opportunistic isa support exist for Vector512. - // - // Returns: - // `true` if AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL are supported. - // - bool IsBaselineVector512IsaSupportedOpportunistically() const - { -#ifdef TARGET_XARCH - return compOpportunisticallyDependsOn(InstructionSet_AVX512F); -#else - return false; -#endif - } - - //------------------------------------------------------------------------ - // IsAvx10OrIsaSupportedOpportunistically - Does opportunistic isa support exist for AVX10v1 or the given ISA. - // - // Returns: - // `true` if AVX10v1 or the given ISA is supported, `false` if not. - // - bool IsAvx10OrIsaSupportedOpportunistically(CORINFO_InstructionSet isa) const - { -#ifdef TARGET_XARCH - return (compOpportunisticallyDependsOn(InstructionSet_AVX10v1) || compOpportunisticallyDependsOn(isa)); -#else - return false; -#endif - } - bool canUseEmbeddedBroadcast() const { return JitConfig.EnableEmbeddedBroadcast(); @@ -10059,34 +9803,6 @@ class Compiler #ifdef TARGET_XARCH public: - //------------------------------------------------------------------------ - // compIsEvexOpportunisticallySupported - Checks for whether AVX10v1 or avx512InstructionSet is supported - // opportunistically. - // - // Returns: - // returns true if AVX10v1 or avx512InstructionSet is supported opportunistically and - // sets isV512Supported to true if AVX512F is supported, false otherwise. - // - bool compIsEvexOpportunisticallySupported(bool& isV512Supported, - CORINFO_InstructionSet avx512InstructionSet = InstructionSet_AVX512F) - { - assert(avx512InstructionSet == InstructionSet_AVX512F || avx512InstructionSet == InstructionSet_AVX512F_VL || - avx512InstructionSet == InstructionSet_AVX512BW || avx512InstructionSet == InstructionSet_AVX512BW_VL || - avx512InstructionSet == InstructionSet_AVX512CD || avx512InstructionSet == InstructionSet_AVX512CD_VL || - avx512InstructionSet == InstructionSet_AVX512DQ || avx512InstructionSet == InstructionSet_AVX512DQ_VL || - avx512InstructionSet == InstructionSet_AVX512VBMI || - avx512InstructionSet == InstructionSet_AVX512VBMI_VL); - - if (compOpportunisticallyDependsOn(avx512InstructionSet)) - { - isV512Supported = true; - return true; - } - - isV512Supported = false; - return compOpportunisticallyDependsOn(InstructionSet_AVX10v1); - } - bool canUseVexEncoding() const { return compOpportunisticallyDependsOn(InstructionSet_AVX); @@ -10100,7 +9816,7 @@ class Compiler // bool canUseEvexEncoding() const { - return (compOpportunisticallyDependsOn(InstructionSet_EVEX)); + return compOpportunisticallyDependsOn(InstructionSet_AVX512); } //------------------------------------------------------------------------ @@ -10126,25 +9842,7 @@ class Compiler #ifdef DEBUG // Using JitStressEVEXEncoding flag will force instructions which would // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding - // This requires AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL support - - if (JitStressEvexEncoding() && IsBaselineVector512IsaSupportedOpportunistically()) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW_VL)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD_VL)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL)); - - return true; - } - else if (JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - return true; - } + return JitStressEvexEncoding() && canUseEvexEncoding(); #endif // DEBUG return false; @@ -10159,7 +9857,7 @@ class Compiler bool DoJitStressRex2Encoding() const { #ifdef DEBUG - if (JitConfig.JitStressRex2Encoding()) + if (JitConfig.JitStressRex2Encoding() && compOpportunisticallyDependsOn(InstructionSet_APX)) { // we should make sure EVEX is also stressed when REX2 is stressed, as we will need to guarantee EGPR // functionality is properly turned on for every instructions when REX2 is stress. @@ -10193,7 +9891,7 @@ class Compiler bool DoJitStressPromotedEvexEncoding() const { #ifdef DEBUG - if (JitConfig.JitStressPromotedEvexEncoding()) + if (JitConfig.JitStressPromotedEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_APX)) { return true; } @@ -10407,18 +10105,6 @@ class Compiler return !!(compFlags & optFlag); } -#ifdef FEATURE_READYTORUN - bool IsReadyToRun() const - { - return jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN); - } -#else - bool IsReadyToRun() const - { - return false; - } -#endif - // Check if the compilation is control-flow guard enabled. bool IsCFGEnabled() const { @@ -10550,7 +10236,7 @@ class Compiler #endif // defined(DEBUG) && defined(TARGET_X86) - bool compReloc; // Generate relocs for pointers in code, true for all ngen/prejit codegen + bool compReloc; // Generate relocs for pointers in code, true for all AOT codegen #ifdef DEBUG #if defined(TARGET_XARCH) @@ -10599,7 +10285,7 @@ class Compiler bool disasmWithGC; // Display GC info interleaved with disassembly. bool disAddr; // Display process address next to each instruction in disassembly code bool disAsm2; // Display native code after it is generated using external disassembler - bool dspOrder; // Display names of each of the methods that we ngen/jit + bool dspOrder; // Display names of each of the methods that we compile bool dspUnwind; // Display the unwind info output bool compLongAddress; // Force using large pseudo instructions for long address // (IF_LARGEJMP/IF_LARGEADR/IF_LARGLDC) @@ -10702,8 +10388,26 @@ class Compiler // Collect 64 bit counts for PGO data. bool compCollect64BitCounts; + // Allow inlining of methods with EH. + bool compInlineMethodsWithEH; + } opts; + bool IsAot() const + { + return opts.jitFlags->IsSet(JitFlags::JIT_FLAG_AOT); + } + + bool IsNativeAot() + { + return IsAot() && IsTargetAbi(CORINFO_NATIVEAOT_ABI); + } + + bool IsReadyToRun() + { + return IsAot() && !IsTargetAbi(CORINFO_NATIVEAOT_ABI); + } + static bool s_pAltJitExcludeAssembliesListInitialized; static AssemblyNamesList2* s_pAltJitExcludeAssembliesList; @@ -10906,7 +10610,7 @@ class Compiler { #if 0 // Switching between size & speed has measurable throughput impact - // (3.5% on NGen CoreLib when measured). It used to be enabled for + // (3.5% on AOT CoreLib when measured). It used to be enabled for // DEBUG, but should generate identical code between CHK & RET builds, // so that's not acceptable. // TODO-Throughput: Figure out what to do about size vs. speed & throughput. @@ -10970,22 +10674,19 @@ class Compiler // (2) the code is hot/cold split, and we issued less code than we expected // in the cold section (the hot section will always be padded out to compTotalHotCodeSize). - bool compIsStatic : 1; // Is the method static (no 'this' pointer)? - bool compIsVarArgs : 1; // Does the method have varargs parameters? - bool compInitMem : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options? - bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback - bool compPublishStubParam : 1; // EAX captured in prolog will be available through an intrinsic - bool compHasNextCallRetAddr : 1; // The NextCallReturnAddress intrinsic is used. + bool compIsStatic : 1; // Is the method static (no 'this' pointer)? + bool compIsVarArgs : 1; // Does the method have varargs parameters? + bool compInitMem : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options? + bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback + bool compPublishStubParam : 1; // EAX captured in prolog will be available through an intrinsic + bool compHasNextCallRetAddr : 1; // The NextCallReturnAddress intrinsic is used. + bool compUsesAsyncContinuation : 1; // The AsyncCallContinuation intrinsic is used. var_types compRetType; // Return type of the method as declared in IL (including SIMD normalization) var_types compRetNativeType; // Normalized return type as per target arch ABI unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden) unsigned compArgsCount; // Number of arguments (incl. implicit and hidden) -#if FEATURE_FASTTAILCALL - unsigned compArgStackSize; // Incoming argument stack size in bytes -#endif // FEATURE_FASTTAILCALL - unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present); unsigned compTypeCtxtArg; // position of hidden param for type context for generic code // (CORINFO_CALLCONV_PARAMTYPE) @@ -11041,7 +10742,15 @@ class Compiler { return info.compMethodSuperPMIIndex != -1; } -#endif // DEBUG +#else // !DEBUG + // Are we running a replay under SuperPMI? + // Note: you can certainly run a SuperPMI replay with a non-DEBUG JIT, and if necessary and useful we could + // make compMethodSuperPMIIndex always available. + bool RunningSuperPmiReplay() const + { + return false; + } +#endif // !DEBUG ReturnTypeDesc compRetTypeDesc; // ABI return type descriptor for the method @@ -11104,6 +10813,11 @@ class Compiler #endif // TARGET_AMD64 } + bool compIsAsync() const + { + return opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ASYNC); + } + //------------------------------------------------------------------------ // compMethodReturnsMultiRegRetType: Does this method return a multi-reg value? // @@ -11128,6 +10842,13 @@ class Compiler bool compObjectStackAllocation() { + if (compIsAsync()) + { + // Object stack allocation takes the address of locals around + // suspension points. Disable entirely for now. + return false; + } + return (JitConfig.JitObjectStackAllocation() != 0); } @@ -11180,8 +10901,6 @@ class Compiler unsigned typGetBlkLayoutNum(unsigned blockSize); // Get the layout for the specified array of known length ClassLayout* typGetArrayLayout(CORINFO_CLASS_HANDLE classHandle, unsigned length); - // Get the number of a layout for the specified array of known length - unsigned typGetArrayLayoutNum(CORINFO_CLASS_HANDLE classHandle, unsigned length); var_types TypeHandleToVarType(CORINFO_CLASS_HANDLE handle, ClassLayout** pLayout = nullptr); var_types TypeHandleToVarType(CorInfoType jitType, CORINFO_CLASS_HANDLE handle, ClassLayout** pLayout = nullptr); @@ -11209,9 +10928,10 @@ class Compiler size_t compInfoBlkSize; BYTE* compInfoBlkAddr; - EHblkDsc* compHndBBtab = nullptr; // array of EH data - unsigned compHndBBtabCount = 0; // element count of used elements in EH data array - unsigned compHndBBtabAllocCount = 0; // element count of allocated elements in EH data array + EHblkDsc* compHndBBtab = nullptr; // array of EH data + unsigned compHndBBtabCount = 0; // element count of used elements in EH data array + unsigned compHndBBtabAllocCount = 0; // element count of allocated elements in EH data array + unsigned short compEHID = 0; // unique ID for EH data array entries #if defined(FEATURE_EH_WINDOWS_X86) @@ -11250,12 +10970,6 @@ class Compiler unsigned compVSQuirkStackPaddingNeeded; #endif - unsigned compArgSize; // total size of arguments in bytes (including register args (lvIsRegArg)) - -#if defined(TARGET_ARM) || defined(TARGET_RISCV64) - bool compHasSplitParam; -#endif - unsigned compMapILargNum(unsigned ILargNum); // map accounting for hidden args unsigned compMapILvarNum(unsigned ILvarNum); // map accounting for hidden args unsigned compMap2ILvarNum(unsigned varNum) const; // map accounting for hidden args @@ -11663,10 +11377,12 @@ class Compiler #define DEFAULT_MAX_INLINE_SIZE \ 100 // Methods with > DEFAULT_MAX_INLINE_SIZE IL bytes will never be inlined. - // This can be overwritten by setting DOTNET_JITInlineSize env variable. + // This can be overwritten by setting DOTNET_JitInlineSize env variable. #define DEFAULT_MAX_INLINE_DEPTH 20 // Methods at more than this level deep will not be inlined +#define DEFAULT_INLINE_BUDGET 20 // Maximum estimated compile time increase via inlining + #define DEFAULT_MAX_FORCE_INLINE_DEPTH 1 // Methods at more than this level deep will not be force inlined #define DEFAULT_MAX_LOCALLOC_TO_LOCAL_SIZE 32 // fixed locallocs of this size or smaller will convert to local buffers @@ -11696,12 +11412,10 @@ class Compiler #if defined(DEBUG) // These variables are associated with maintaining SQM data about compile time. - uint64_t m_compCyclesAtEndOfInlining; // The thread-virtualized cycle count at the end of the inlining phase - // in the current compilation. - uint64_t m_compCycles; // Net cycle count for current compilation - DWORD m_compTickCountAtEndOfInlining; // The result of GetTickCount() (# ms since some epoch marker) at the end of - // the inlining phase in the current compilation. -#endif // defined(DEBUG) + int64_t m_compCyclesAtEndOfInlining; // Raw timer count at the end of the inlining phase + // in the current compilation. + int64_t m_compCycles; // Wall clock elapsed time for current compilation (microseconds) +#endif // defined(DEBUG) // Records the SQM-relevant (cycles and tick count). Should be called after inlining is complete. // (We do this after inlining because this marks the last point at which the JIT is likely to cause @@ -11845,15 +11559,15 @@ class Compiler const CORINFO_FPSTRUCT_LOWERING* GetFpStructLowering(CORINFO_CLASS_HANDLE structHandle); #endif // defined(UNIX_AMD64_ABI) - void fgMorphMultiregStructArgs(GenTreeCall* call); - GenTree* fgMorphMultiregStructArg(CallArg* arg); + bool fgTryMorphStructArg(CallArg* arg); + bool FieldsMatchAbi(LclVarDsc* varDsc, const ABIPassingInformation& abiInfo); bool killGCRefs(GenTree* tree); #if defined(TARGET_AMD64) private: // The following are for initializing register allocator "constants" defined in targetamd64.h - // that now depend upon runtime ISA information, e.g., the presence of AVX512F/VL, which increases + // that now depend upon runtime ISA information, e.g., the presence of AVX512, which increases // the number of SIMD (xmm, ymm, and zmm) registers from 16 to 32. // As only 64-bit xarch has the capability to have the additional registers, we limit the changes // to TARGET_AMD64 only. @@ -11918,7 +11632,7 @@ class Compiler #if defined(TARGET_XARCH) private: // The following are for initializing register allocator "constants" defined in targetamd64.h - // that now depend upon runtime ISA information, e.g., the presence of AVX512F/VL, which adds + // that now depend upon runtime ISA information, e.g., the presence of AVX512, which adds // 8 mask registers for use. // // Users of these values need to define four accessor functions: @@ -12091,6 +11805,7 @@ class GenTreeVisitor // Leaf nodes case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -12124,6 +11839,7 @@ class GenTreeVisitor case GT_IL_OFFSET: case GT_NOP: case GT_SWIFT_ERROR: + case GT_GCPOLL: break; // Lclvar unary operators @@ -12166,6 +11882,7 @@ class GenTreeVisitor case GT_RETURNTRAP: case GT_FIELD_ADDR: case GT_RETURN: + case GT_RETURN_SUSPEND: case GT_RETFILT: case GT_RUNTIMELOOKUP: case GT_ARR_ADDR: @@ -12679,32 +12396,6 @@ extern Histogram bbOneBBSizeTable; extern Histogram computeReachabilitySetsIterationTable; #endif -/***************************************************************************** - * - * Used by optFindNaturalLoops to gather statistical information such as - * - total number of natural loops - * - number of loops with 1, 2, ... exit conditions - * - number of loops that have an iterator (for like) - * - number of loops that have a constant iterator - */ - -#if COUNT_LOOPS - -extern unsigned totalLoopMethods; // counts the total number of methods that have natural loops -extern unsigned maxLoopsPerMethod; // counts the maximum number of loops a method has -extern unsigned totalLoopCount; // counts the total number of natural loops -extern unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops -extern unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent -extern unsigned iterLoopCount; // counts the # of loops with an iterator (for like) -extern unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like) -extern bool hasMethodLoops; // flag to keep track if we already counted a method as having loops -extern unsigned loopsThisMethod; // counts the number of loops in the current method -extern bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method. -extern Histogram loopCountTable; // Histogram of loop counts -extern Histogram loopExitCountTable; // Histogram of loop exit counts - -#endif // COUNT_LOOPS - #if MEASURE_BLOCK_SIZE extern size_t genFlowNodeSize; extern size_t genFlowNodeCnt; diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index ccaa6f2bfe55..a82b8bed7b51 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -291,7 +291,7 @@ inline bool Compiler::jitIsBetweenInclusive(unsigned value, unsigned start, unsi #define HISTOGRAM_MAX_SIZE_COUNT 64 -#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC +#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC class Dumpable { @@ -388,7 +388,7 @@ class DumpOnShutdown static void DumpAll(); }; -#endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE +#endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || EMITTER_STATS || MEASURE_NODE_SIZE /****************************************************************************************** * Return the EH descriptor for the given region index. @@ -887,7 +887,7 @@ inline FuncInfoDsc* Compiler::funGetFunc(unsigned funcIdx) /***************************************************************************** * Get the funcIdx for the EH funclet that begins with block. * This is only valid after funclets are created. - * It is only valid for blocks marked with BBF_FUNCLET_BEG because + * It is only valid for blocks that begin a funclet because * otherwise we would have to do a more expensive check to determine * if this should return the filter funclet or the filter handler funclet. * @@ -896,8 +896,7 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock* block) { if (UsesFunclets()) { - assert(fgFuncletsCreated); - assert(block->HasFlag(BBF_FUNCLET_BEG)); + assert(bbIsFuncletBeg(block)); EHblkDsc* eh = ehGetDsc(block->getHndIndex()); unsigned int funcIdx = eh->ebdFuncIndex; @@ -1110,6 +1109,31 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask, var_typ return regNum; } +//------------------------------------------------------------------------------ +// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a +// register number and also toggle the bit in the `mask`. +// Arguments: +// mask - the register mask +// type - type of the register mask +// +// Return Value: +// The number of the first register contained in the mask and updates the `mask` to toggle +// the bit. +// + +inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask) +{ + assert(mask != RBM_NONE); // Must have one bit set, so can't have a mask of zero + + /* Convert the mask to a register number */ + + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); + + mask ^= genSingleTypeRegMask(regNum); + + return regNum; +} + /***************************************************************************** * * Return the size in bytes of the given type. @@ -1191,8 +1215,6 @@ inline bool isRegParamType(var_types type) // type - the basic jit var_type for the item being queried // typeClass - the handle for the struct when 'type' is TYP_STRUCT // typeSize - Out param (if non-null) is updated with the size of 'type'. -// forReturn - this is true when we asking about a GT_RETURN context; -// this is false when we are asking about an argument context // isVarArg - whether or not this is a vararg fixed arg or variable argument // - if so on arm64 windows getArgTypeForStruct will ignore HFA // - types @@ -1201,7 +1223,6 @@ inline bool isRegParamType(var_types type) inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type, CORINFO_CLASS_HANDLE typeClass, unsigned* typeSize, - bool forReturn, bool isVarArg, CorInfoCallConvExtension callConv) { @@ -1212,16 +1233,8 @@ inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types typ { assert(typeClass != nullptr); size = info.compCompHnd->getClassSize(typeClass); - if (forReturn) - { - structPassingKind howToReturnStruct; - type = getReturnTypeForStruct(typeClass, callConv, &howToReturnStruct, size); - } - else - { - structPassingKind howToPassStruct; - type = getArgTypeForStruct(typeClass, &howToPassStruct, isVarArg, size); - } + structPassingKind howToReturnStruct; + type = getReturnTypeForStruct(typeClass, callConv, &howToReturnStruct, size); if (type != TYP_UNKNOWN) { result = true; @@ -1413,14 +1426,14 @@ inline Statement* Compiler::gtNewStmt(GenTree* expr, const DebugInfo& di) return stmt; } -inline GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1) +inline GenTreeUnOp* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1) { assert((GenTree::OperKind(oper) & (GTK_UNOP | GTK_BINOP)) != 0); assert((GenTree::OperKind(oper) & GTK_EXOP) == 0); // Can't use this to construct any types that extend unary/binary // operator. assert(op1 != nullptr || oper == GT_RETFILT || (oper == GT_RETURN && type == TYP_VOID)); - GenTree* node = new (this, oper) GenTreeOp(oper, type, op1, nullptr); + GenTreeUnOp* node = new (this, oper) GenTreeOp(oper, type, op1, nullptr); return node; } @@ -1502,6 +1515,13 @@ inline GenTree* Compiler::gtNewIconEmbClsHndNode(CORINFO_CLASS_HANDLE clsHnd) //----------------------------------------------------------------------------- +inline GenTree* Compiler::gtNewIconEmbObjHndNode(CORINFO_OBJECT_HANDLE objHnd) +{ + return gtNewIconEmbHndNode((void*)objHnd, nullptr, GTF_ICON_OBJ_HDL, nullptr); +} + +//----------------------------------------------------------------------------- + inline GenTree* Compiler::gtNewIconEmbMethHndNode(CORINFO_METHOD_HANDLE methHnd) { void *embedMethHnd, *pEmbedMethHnd; @@ -1541,7 +1561,7 @@ inline GenTree* Compiler::gtNewIconEmbFldHndNode(CORINFO_FIELD_HANDLE fldHnd) // New CT_HELPER node // inline GenTreeCall* Compiler::gtNewHelperCallNode( - unsigned helper, var_types type, GenTree* arg1, GenTree* arg2, GenTree* arg3) + unsigned helper, var_types type, GenTree* arg1, GenTree* arg2, GenTree* arg3, GenTree* arg4) { GenTreeCall* const result = gtNewCallNode(CT_HELPER, eeFindHelper(helper), type); @@ -1560,6 +1580,12 @@ inline GenTreeCall* Compiler::gtNewHelperCallNode( result->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER; #endif + if (arg4 != nullptr) + { + result->gtArgs.PushFront(this, NewCallArg::Primitive(arg4)); + result->gtFlags |= arg4->gtFlags & GTF_ALL_EFFECT; + } + if (arg3 != nullptr) { result->gtArgs.PushFront(this, NewCallArg::Primitive(arg3)); @@ -1581,6 +1607,60 @@ inline GenTreeCall* Compiler::gtNewHelperCallNode( return result; } +/*****************************************************************************/ + +//------------------------------------------------------------------------------ +// gtNewHelperCallNode : Helper to create a call helper node. +// +// +// Arguments: +// helper - Call helper +// type - Type of the node +// thisPtr - 'this' argument +// methHnd - Runtime method handle argument +// clsHnd - Class handle argument +// +// Return Value: +// New CT_HELPER node +// +inline GenTreeCall* Compiler::gtNewVirtualFunctionLookupHelperCallNode( + unsigned helper, var_types type, GenTree* thisPtr, GenTree* methHnd, GenTree* clsHnd) +{ + GenTreeCall* const result = gtNewCallNode(CT_HELPER, eeFindHelper(helper), type); + + if (!s_helperCallProperties.NoThrow((CorInfoHelpFunc)helper)) + { + result->gtFlags |= GTF_EXCEPT; + + if (s_helperCallProperties.AlwaysThrow((CorInfoHelpFunc)helper)) + { + setCallDoesNotReturn(result); + } + } +#if DEBUG + // Helper calls are never candidates. + + result->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER; +#endif + + assert(methHnd != nullptr); + result->gtArgs.PushFront(this, NewCallArg::Primitive(methHnd).WellKnown(WellKnownArg::RuntimeMethodHandle)); + result->gtFlags |= methHnd->gtFlags & GTF_ALL_EFFECT; + + if (clsHnd != nullptr) + { + result->gtArgs.PushFront(this, NewCallArg::Primitive(clsHnd)); + result->gtFlags |= clsHnd->gtFlags & GTF_ALL_EFFECT; + } + + assert(thisPtr != nullptr); + + result->gtArgs.PushFront(this, NewCallArg::Primitive(thisPtr).WellKnown(WellKnownArg::ThisPointer)); + result->gtFlags |= thisPtr->gtFlags & GTF_ALL_EFFECT; + + return result; +} + //------------------------------------------------------------------------ // gtNewAllocObjNode: A little helper to create an object allocation node. // @@ -1792,7 +1872,7 @@ inline GenTree* Compiler::gtNewNothingNode() inline bool GenTree::IsNothingNode() const { - return (gtOper == GT_NOP && gtType == TYP_VOID); + return OperIs(GT_NOP) && TypeIs(TYP_VOID); } /***************************************************************************** @@ -1928,7 +2008,7 @@ inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate) assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || (gtDebugFlags & GTF_DEBUG_NODE_LARGE)); #if defined(HOST_64BIT) && !defined(TARGET_64BIT) - if (gtOper == GT_CNS_LNG && oper == GT_CNS_INT) + if (OperIs(GT_CNS_LNG) && oper == GT_CNS_INT) { // When casting from LONG to INT, we need to force cast of the value, // if the host architecture represents INT and LONG with the same data size. @@ -2187,6 +2267,74 @@ inline bool GenTree::gtOverflowEx() const return OperMayOverflow() && gtOverflow(); } +//------------------------------------------------------------------------ +// gtFindNodeInTree: +// Check if a tree contains a node matching the specified predicate. Descend +// only into subtrees with the specified flags set on them (can be GTF_EMPTY +// to descend into all nodes). +// +// Type parameters: +// RequiredFlagsToDescendIntoNode - Flags that must be set on the node to +// descend into it (GTF_EMPTY to descend into all nodes) +// Predicate - Type of the predicate (GenTree* -> bool) +// +// Parameters: +// tree - The tree +// pred - Predicate that the call must match +// +// Returns: +// Node matching the predicate, or nullptr if no such node was found. +// +template +GenTree* Compiler::gtFindNodeInTree(GenTree* tree, Predicate pred) +{ + struct FindNodeVisitor : GenTreeVisitor + { + private: + Predicate& m_pred; + + public: + GenTree* Result = nullptr; + + enum + { + DoPreOrder = true + }; + + FindNodeVisitor(Compiler* comp, Predicate& pred) + : GenTreeVisitor(comp) + , m_pred(pred) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* node = *use; + if ((node->gtFlags & RequiredFlagsToDescendIntoNode) != RequiredFlagsToDescendIntoNode) + { + return WALK_SKIP_SUBTREES; + } + + if (m_pred(node)) + { + Result = node; + return WALK_ABORT; + } + + return WALK_CONTINUE; + } + }; + + if ((tree->gtFlags & RequiredFlagsToDescendIntoNode) != RequiredFlagsToDescendIntoNode) + { + return nullptr; + } + + FindNodeVisitor findNode(this, pred); + findNode.WalkTree(&tree, nullptr); + return findNode.Result; +} + /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -2495,21 +2643,17 @@ inline void LclVarDsc::incRefCnts(weight_t weight, Compiler* comp, RefCountState #endif } -/***************************************************************************** - Is this a synchronized instance method? If so, we will need to report "this" - in the GC information, so that the EE can release the object lock - in case of an exception - - We also need to report "this" and keep it alive for all shared generic - code that gets the actual generic context from the "this" pointer and - has exception handlers. - - For example, if List::m() is shared between T = object and T = string, - then inside m() an exception handler "catch E" needs to be able to fetch - the 'this' pointer to find out what 'T' is in order to tell if we - should catch the exception or not. - */ - +//------------------------------------------------------------------------ +// lvaKeepAliveAndReportThis: check if there implicit references to this during method execution +// +// Returns: +// true if this must remain alive throughout the method, even if unreferenced +// +// Notes: +// In a synchronized instance method we need to report "this" +// in the GC information, so that the EE can release the object lock +// in case of an exception +// inline bool Compiler::lvaKeepAliveAndReportThis() { if (info.compIsStatic || (lvaTable[0].TypeGet() != TYP_REF)) @@ -2520,17 +2664,11 @@ inline bool Compiler::lvaKeepAliveAndReportThis() const bool genericsContextIsThis = (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0; #ifdef JIT32_GCENCODER - if (info.compFlags & CORINFO_FLG_SYNCH) return true; if (genericsContextIsThis) { - // TODO: Check if any of the exception clauses are - // typed using a generic type. Else, we do not need to report this. - if (info.compXcptnsCount > 0) - return true; - if (opts.compDbgCode) return true; @@ -2922,6 +3060,12 @@ inline unsigned Compiler::compMapILargNum(unsigned ILargNum) assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted. } + if (ILargNum >= lvaAsyncContinuationArg) + { + ILargNum++; + assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted. + } + if (ILargNum >= lvaVarargsHandleArg) { ILargNum++; @@ -3028,20 +3172,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -inline bool Compiler::compCanEncodePtrArgCntMax() -{ -#ifdef JIT32_GCENCODER - // DDB 204533: - // The GC encoding for fully interruptible methods does not - // support more than 1023 pushed arguments, so we have to - // use a partially interruptible GC info/encoding. - // - return (fgPtrArgCntMax < MAX_PTRARG_OFS); -#else // JIT32_GCENCODER - return true; -#endif -} - /***************************************************************************** * * Call the given function pointer for all nodes in the tree. The 'visitor' @@ -3231,7 +3361,7 @@ inline bool Compiler::fgIsThrowHlpBlk(BasicBlock* block) // GenTree* const call = block->lastNode(); - if ((call == nullptr) || (call->gtOper != GT_CALL)) + if ((call == nullptr) || !call->OperIs(GT_CALL)) { return false; } @@ -3740,14 +3870,14 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX inline void Compiler::optAssertionReset(AssertionIndex limit) { - PREFAST_ASSUME(optAssertionCount <= optMaxAssertionCount); + assert(optAssertionCount <= optMaxAssertionCount); while (optAssertionCount > limit) { AssertionIndex index = optAssertionCount; AssertionDsc* curAssertion = optGetAssertion(index); optAssertionCount--; - unsigned lclNum = curAssertion->op1.lcl.lclNum; + unsigned lclNum = curAssertion->op1.lclNum; assert(lclNum < lvaCount); BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1); @@ -3758,9 +3888,9 @@ inline void Compiler::optAssertionReset(AssertionIndex limit) (curAssertion->op2.kind == O2K_LCLVAR_COPY)) { // - // op2.lcl.lclNum no longer depends upon this assertion + // op2.lclNum no longer depends upon this assertion // - lclNum = curAssertion->op2.lcl.lclNum; + lclNum = curAssertion->op2.lclNum; BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1); } } @@ -3768,7 +3898,7 @@ inline void Compiler::optAssertionReset(AssertionIndex limit) { AssertionIndex index = ++optAssertionCount; AssertionDsc* curAssertion = optGetAssertion(index); - unsigned lclNum = curAssertion->op1.lcl.lclNum; + unsigned lclNum = curAssertion->op1.lclNum; BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), index - 1); // @@ -3778,9 +3908,9 @@ inline void Compiler::optAssertionReset(AssertionIndex limit) (curAssertion->op2.kind == O2K_LCLVAR_COPY)) { // - // op2.lcl.lclNum now depends upon this assertion + // op2.lclNum now depends upon this assertion // - lclNum = curAssertion->op2.lcl.lclNum; + lclNum = curAssertion->op2.lclNum; BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), index - 1); } } @@ -3796,7 +3926,7 @@ inline void Compiler::optAssertionRemove(AssertionIndex index) { assert(index > 0); assert(index <= optAssertionCount); - PREFAST_ASSUME(optAssertionCount <= optMaxAssertionCount); + assert(optAssertionCount <= optMaxAssertionCount); AssertionDsc* curAssertion = optGetAssertion(index); @@ -3811,7 +3941,7 @@ inline void Compiler::optAssertionRemove(AssertionIndex index) // if (index == optAssertionCount) { - unsigned lclNum = curAssertion->op1.lcl.lclNum; + unsigned lclNum = curAssertion->op1.lclNum; BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1); // @@ -3821,9 +3951,9 @@ inline void Compiler::optAssertionRemove(AssertionIndex index) (curAssertion->op2.kind == O2K_LCLVAR_COPY)) { // - // op2.lcl.lclNum no longer depends upon this assertion + // op2.lclNum no longer depends upon this assertion // - lclNum = curAssertion->op2.lcl.lclNum; + lclNum = curAssertion->op2.lclNum; BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1); } @@ -4058,7 +4188,7 @@ inline bool Compiler::impIsThis(GenTree* obj) } else { - return ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR) && + return ((obj != nullptr) && obj->OperIs(GT_LCL_VAR) && lvaIsOriginalThisArg(obj->AsLclVarCommon()->GetLclNum())); } } @@ -4103,6 +4233,17 @@ inline bool Compiler::impIsPrimitive(CorInfoType jitType) inline Compiler::lvaPromotionType Compiler::lvaGetPromotionType(const LclVarDsc* varDsc) { + // TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted, + // where the struct itself is no longer used because all access is via its member fields. + // When that happens, the struct is marked as unused and its type has been changed to + // TYP_INT (to keep the GC tracking code from looking at it). + // See Compiler::raAssignVars() for details. For example: + // N002 ( 4, 3) [00EA067C] ------------- return struct $346 + // N001 ( 3, 2) [00EA0628] ------------- lclVar struct(U) V03 loc2 + // float V03.f1 (offs=0x00) -> V12 tmp7 + // f8 (last use) (last use) $345 + // Here, the "struct(U)" shows that the "V03 loc2" variable is unused. Not shown is that V03 + // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree. assert(!varDsc->lvPromoted || varTypeIsPromotable(varDsc) || varDsc->lvUnusedStruct); if (!varDsc->lvPromoted) @@ -4276,8 +4417,6 @@ bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum) result = result || (varNum == lvaOutgoingArgSpaceVar); #endif - result = result || (varNum == lvaPSPSym); - return result; } @@ -4340,19 +4479,20 @@ bool Compiler::fgVarNeedsExplicitZeroInit(unsigned varNum, bool bbInALoop, bool return false; } -// Below conditions guarantee block initialization, which will initialize -// all struct fields. If the logic for block initialization in CodeGen::genCheckUseBlockInit() -// changes, these conditions need to be updated. + // Below conditions guarantee block initialization, which will initialize + // all struct fields. If the logic for block initialization in CodeGen::genCheckUseBlockInit() + // changes, these conditions need to be updated. + unsigned stackHomeSize = lvaLclStackHomeSize(varNum); #ifdef TARGET_64BIT #if defined(TARGET_AMD64) // We can clear using aligned SIMD so the threshold is lower, // and clears in order which is better for auto-prefetching - if (roundUp(varDsc->lvSize(), TARGET_POINTER_SIZE) / sizeof(int) > 4) + if (roundUp(stackHomeSize, TARGET_POINTER_SIZE) / sizeof(int) > 4) #else // !defined(TARGET_AMD64) - if (roundUp(varDsc->lvSize(), TARGET_POINTER_SIZE) / sizeof(int) > 8) + if (roundUp(stackHomeSize, TARGET_POINTER_SIZE) / sizeof(int) > 8) #endif #else - if (roundUp(varDsc->lvSize(), TARGET_POINTER_SIZE) / sizeof(int) > 4) + if (roundUp(stackHomeSize, TARGET_POINTER_SIZE) / sizeof(int) > 4) #endif { return false; @@ -4377,6 +4517,7 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -4410,6 +4551,7 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_IL_OFFSET: case GT_NOP: case GT_SWIFT_ERROR: + case GT_GCPOLL: return; // Unary operators with an optional operand @@ -4450,12 +4592,10 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_NULLCHECK: case GT_PUTARG_REG: case GT_PUTARG_STK: -#if FEATURE_ARG_SPLIT - case GT_PUTARG_SPLIT: -#endif // FEATURE_ARG_SPLIT case GT_RETURNTRAP: case GT_KEEPALIVE: case GT_INC_SATURATE: + case GT_RETURN_SUSPEND: visitor(this->AsUnOp()->gtOp1); return; diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 6de55d070a6e..eb2c0dffc0ee 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -66,6 +66,8 @@ CompMemKindMacro(ZeroInit) CompMemKindMacro(Pgo) CompMemKindMacro(MaskConversionOpt) CompMemKindMacro(TryRegionClone) +CompMemKindMacro(Async) +CompMemKindMacro(RangeCheckCloning) //clang-format on #undef CompMemKindMacro diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index dc3fb380cdb5..975ddeea6c06 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -65,6 +65,7 @@ CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", false, -1, false) CompPhaseNameMacro(PHASE_HEAD_TAIL_MERGE2, "Post-morph head and tail merge", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_FLOW, "Optimize control flow", false, -1, false) +CompPhaseNameMacro(PHASE_OPTIMIZE_PRE_LAYOUT, "Optimize pre-layout", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_POST_LAYOUT, "Optimize post-layout", false, -1, false) CompPhaseNameMacro(PHASE_COMPUTE_DOMINATORS, "Compute dominators", false, -1, false) @@ -98,6 +99,7 @@ CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", CompPhaseNameMacro(PHASE_VN_BASED_INTRINSIC_EXPAND, "VN based intrinsic expansion", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_BRANCHES, "Redundant branch opts", false, -1, false) CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN, "Assertion prop", false, -1, false) +CompPhaseNameMacro(PHASE_RANGE_CHECK_CLONING, "Clone blocks with range checks", false, -1, false) CompPhaseNameMacro(PHASE_IF_CONVERSION, "If conversion", false, -1, false) CompPhaseNameMacro(PHASE_VN_BASED_DEAD_STORE_REMOVAL,"VN-based dead store removal", false, -1, false) CompPhaseNameMacro(PHASE_EMPTY_FINALLY_3, "Remove empty finally 3", false, -1, false) @@ -114,7 +116,10 @@ CompPhaseNameMacro(PHASE_INSERT_GC_POLLS, "Insert GC Polls", CompPhaseNameMacro(PHASE_CREATE_THROW_HELPERS, "Create throw helper blocks", false, -1, true) CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", false, -1, true) CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", false, -1, false) +CompPhaseNameMacro(PHASE_REPAIR_PROFILE_POST_MORPH, "Repair profile post-morph", false, -1, false) +CompPhaseNameMacro(PHASE_REPAIR_PROFILE_PRE_LAYOUT, "Repair profile pre-layout", false, -1, false) +CompPhaseNameMacro(PHASE_ASYNC, "Transform async", false, -1, true) CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", true, -1, false) CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", false, PHASE_LCLVARLIVENESS, false) CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK, "Per block local var liveness", false, PHASE_LCLVARLIVENESS, false) diff --git a/src/coreclr/jit/copyprop.cpp b/src/coreclr/jit/copyprop.cpp index 7953cbd8f5f4..93ff8bd26761 100644 --- a/src/coreclr/jit/copyprop.cpp +++ b/src/coreclr/jit/copyprop.cpp @@ -164,22 +164,9 @@ bool Compiler::optCopyProp( bool madeChanges = false; LclVarDsc* const varDsc = lvaGetDesc(lclNum); LclSsaVarDsc* const varSsaDsc = varDsc->GetPerSsaData(tree->GetSsaNum()); - GenTree* const varDefTree = varSsaDsc->GetDefNode(); - BasicBlock* const varDefBlock = varSsaDsc->GetBlock(); ValueNum const lclDefVN = varSsaDsc->m_vnPair.GetConservative(); assert(lclDefVN != ValueNumStore::NoVN); - // See if this local is a candidate for phi dev equivalence checks - // - bool const varDefTreeIsPhiDef = (varDefTree != nullptr) && varDefTree->IsPhiDefn(); - bool varDefTreeIsPhiDefAtCycleEntry = false; - - if (varDefTreeIsPhiDef) - { - FlowGraphNaturalLoop* const loop = m_blockToLoop->GetLoop(varDefBlock); - varDefTreeIsPhiDefAtCycleEntry = (loop != nullptr) && (loop->GetHeader() == varDefBlock); - } - for (LclNumToLiveDefsMap::Node* const iter : LclNumToLiveDefsMap::KeyValueIteration(curSsaName)) { unsigned newLclNum = iter->GetKey(); @@ -202,17 +189,13 @@ bool Compiler::optCopyProp( ValueNum newLclDefVN = newLclSsaDef->m_vnPair.GetConservative(); assert(newLclDefVN != ValueNumStore::NoVN); + // If VNs don't match, they still can be the same entity, but we currently + // don't have tools to prove it. So we skip this case. if (newLclDefVN != lclDefVN) { - bool arePhiDefsEquivalent = - varDefTreeIsPhiDefAtCycleEntry && vnStore->AreVNsEquivalent(lclDefVN, newLclDefVN); - if (!arePhiDefsEquivalent) - { - continue; - } - JITDUMP("orig [%06u] copy [%06u] VNs proved equivalent\n", dspTreeID(tree), dspTreeID(newLclDef.GetDefNode())); + continue; } // It may not be profitable to propagate a 'doNotEnregister' lclVar to an existing use of an diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 9280f459f978..afe6625c10dc 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -78,11 +78,11 @@ void DecomposeLongs::DecomposeBlock(BasicBlock* block) // Return Value: // None. // -void DecomposeLongs::DecomposeRange(Compiler* compiler, LIR::Range& range) +void DecomposeLongs::DecomposeRange(Compiler* compiler, Lowering* lowering, LIR::Range& range) { assert(compiler != nullptr); - DecomposeLongs decomposer(compiler); + DecomposeLongs decomposer(compiler, lowering); decomposer.m_range = ⦥ decomposer.DecomposeRangeHelper(); @@ -90,7 +90,7 @@ void DecomposeLongs::DecomposeRange(Compiler* compiler, LIR::Range& range) //------------------------------------------------------------------------ // DecomposeLongs::DecomposeRangeHelper: -// Decompiose each node in the current range. +// Decompose each node in the current range. // // Decomposition is done as an execution-order walk. Decomposition of // a particular node can create new nodes that need to be further @@ -122,44 +122,84 @@ void DecomposeLongs::DecomposeRangeHelper() GenTree* DecomposeLongs::DecomposeNode(GenTree* tree) { // Handle the case where we are implicitly using the lower half of a long lclVar. - if ((tree->TypeGet() == TYP_INT) && tree->OperIsLocal()) + if (tree->TypeIs(TYP_INT) && tree->OperIsLocal()) { LclVarDsc* varDsc = m_compiler->lvaGetDesc(tree->AsLclVarCommon()); if (varTypeIsLong(varDsc) && varDsc->lvPromoted) { -#ifdef DEBUG - if (m_compiler->verbose) - { - printf("Changing implicit reference to lo half of long lclVar to an explicit reference of its promoted " - "half:\n"); - m_compiler->gtDispTreeRange(Range(), tree); - } -#endif // DEBUG + JITDUMP("Changing implicit reference to lo half of long lclVar to an explicit reference of its promoted " + "half:\n"); + DISPTREERANGE(Range(), tree); + unsigned loVarNum = varDsc->lvFieldLclStart; tree->AsLclVarCommon()->SetLclNum(loVarNum); return tree->gtNext; } } - if (tree->TypeGet() != TYP_LONG) +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86) + if (!tree->TypeIs(TYP_LONG) && + !(tree->OperIs(GT_CAST) && varTypeIsLong(tree->AsCast()->CastOp()) && varTypeIsFloating(tree))) +#else + if (!tree->TypeIs(TYP_LONG)) +#endif // FEATURE_HW_INTRINSICS && TARGET_X86 { return tree->gtNext; } -#ifdef DEBUG - if (m_compiler->verbose) - { - printf("Decomposing TYP_LONG tree. BEFORE:\n"); - m_compiler->gtDispTreeRange(Range(), tree); - } -#endif // DEBUG - LIR::Use use; if (!Range().TryGetUse(tree, &use)) { LIR::Use::MakeDummyUse(Range(), tree, &use); } +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86) + if (!use.IsDummyUse()) + { + // HWIntrinsics can consume/produce a long directly, provided its source/target is memory. + // Here we do a conservative check for specific cases where it is certain the load/store + // can be contained. In those cases, we can skip decomposition. + + GenTree* user = use.User(); + + if (tree->TypeIs(TYP_LONG) && (user->OperIsHWIntrinsic() || (user->OperIs(GT_CAST) && varTypeIsFloating(user)))) + { + if (tree->OperIs(GT_CNS_LNG) || + (tree->OperIs(GT_IND, GT_LCL_FLD) && m_lowering->IsSafeToContainMem(user, tree))) + { + if (user->OperIsHWIntrinsic()) + { + NamedIntrinsic intrinsicId = user->AsHWIntrinsic()->GetHWIntrinsicId(); + assert(HWIntrinsicInfo::IsVectorCreate(intrinsicId) || + HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) || + HWIntrinsicInfo::IsVectorCreateScalarUnsafe(intrinsicId)); + } + + return tree->gtNext; + } + } + else if (user->OperIs(GT_STOREIND) && tree->OperIsHWIntrinsic() && m_compiler->opts.OptimizationEnabled()) + { + NamedIntrinsic intrinsicId = tree->AsHWIntrinsic()->GetHWIntrinsicId(); + if (HWIntrinsicInfo::IsVectorToScalar(intrinsicId) && m_lowering->IsSafeToContainMem(user, tree)) + { + return tree->gtNext; + } + } + } + + if (tree->OperIs(GT_STOREIND) && tree->AsStoreInd()->Data()->OperIsHWIntrinsic()) + { + // We should only get here if we matched the second pattern above. + assert(HWIntrinsicInfo::IsVectorToScalar(tree->AsStoreInd()->Data()->AsHWIntrinsic()->GetHWIntrinsicId())); + + return tree->gtNext; + } +#endif // FEATURE_HW_INTRINSICS && TARGET_X86 + + JITDUMP("Decomposing TYP_LONG tree. BEFORE:\n"); + DISPTREERANGE(Range(), tree); + GenTree* nextNode = nullptr; switch (tree->OperGet()) { @@ -270,19 +310,14 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree) // If we replaced the argument to a GT_FIELD_LIST element with a GT_LONG node, split that field list // element into two elements: one for each half of the GT_LONG. - if ((use.Def()->OperGet() == GT_LONG) && !use.IsDummyUse() && (use.User()->OperGet() == GT_FIELD_LIST)) + if (use.Def()->OperIs(GT_LONG) && !use.IsDummyUse() && use.User()->OperIs(GT_FIELD_LIST)) { DecomposeFieldList(use.User()->AsFieldList(), use.Def()->AsOp()); } -#ifdef DEBUG - if (m_compiler->verbose) - { - // NOTE: st_lcl_var doesn't dump properly afterwards. - printf("Decomposing TYP_LONG tree. AFTER:\n"); - m_compiler->gtDispTreeRange(Range(), use.Def()); - } -#endif + // NOTE: st_lcl_var doesn't dump properly afterwards. + JITDUMP("Decomposing TYP_LONG tree. AFTER:\n"); + DISPTREERANGE(Range(), use.Def()); // When casting from a decomposed long to a smaller integer we can discard the high part. if (m_compiler->opts.OptimizationEnabled() && !use.IsDummyUse() && use.User()->OperIs(GT_CAST) && @@ -427,7 +462,7 @@ GenTree* DecomposeLongs::DecomposeStoreLclVar(LIR::Use& use) return tree->gtNext; } - noway_assert(rhs->OperGet() == GT_LONG); + noway_assert(rhs->OperIs(GT_LONG)); const LclVarDsc* varDsc = m_compiler->lvaGetDesc(tree->AsLclVarCommon()); if (!varDsc->lvPromoted) @@ -505,7 +540,7 @@ GenTree* DecomposeLongs::DecomposeStoreLclFld(LIR::Use& use) GenTreeLclFld* store = use.Def()->AsLclFld(); GenTreeOp* value = store->gtOp1->AsOp(); - assert(value->OperGet() == GT_LONG); + assert(value->OperIs(GT_LONG)); Range().Remove(value); // The original store node will be repurposed to store the low half of the GT_LONG. @@ -535,28 +570,69 @@ GenTree* DecomposeLongs::DecomposeStoreLclFld(LIR::Use& use) GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use) { assert(use.IsInitialized()); - assert(use.Def()->OperGet() == GT_CAST); + assert(use.Def()->OperIs(GT_CAST)); - GenTree* cast = use.Def()->AsCast(); - GenTree* loResult = nullptr; - GenTree* hiResult = nullptr; - - var_types srcType = cast->CastFromType(); - var_types dstType = cast->CastToType(); + GenTreeCast* cast = use.Def()->AsCast(); + var_types srcType = cast->CastFromType(); + var_types dstType = cast->CastToType(); - if ((cast->gtFlags & GTF_UNSIGNED) != 0) + if (cast->IsUnsigned()) { srcType = varTypeToUnsigned(srcType); } - bool skipDecomposition = false; +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86) + if (varTypeIsFloating(dstType)) + { + // We will reach this path only if morph did not convert the cast to a helper call, + // meaning we can perform the cast using SIMD instructions. + // The sequence this creates is simply: + // AVX512DQ.VL.ConvertToVector128Single(Vector128.CreateScalarUnsafe(LONG)).ToScalar() + + NamedIntrinsic intrinsicId = NI_Illegal; + GenTree* srcOp = cast->CastOp(); + var_types dstType = cast->CastToType(); + CorInfoType baseFloatingType = (dstType == TYP_FLOAT) ? CORINFO_TYPE_FLOAT : CORINFO_TYPE_DOUBLE; + CorInfoType baseIntegralType = cast->IsUnsigned() ? CORINFO_TYPE_ULONG : CORINFO_TYPE_LONG; + + assert(!cast->gtOverflow()); + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512)); + + intrinsicId = (dstType == TYP_FLOAT) ? NI_AVX512_ConvertToVector128Single : NI_AVX512_ConvertToVector128Double; + + GenTree* createScalar = m_compiler->gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, srcOp, baseIntegralType, 16); + GenTree* convert = + m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, createScalar, intrinsicId, baseIntegralType, 16); + GenTree* toScalar = m_compiler->gtNewSimdToScalarNode(dstType, convert, baseFloatingType, 16); + + Range().InsertAfter(cast, createScalar, convert, toScalar); + Range().Remove(cast); + + if (createScalar->IsCnsVec()) + { + Range().Remove(srcOp); + } + + if (use.IsDummyUse()) + { + toScalar->SetUnusedValue(); + } + use.ReplaceWith(toScalar); + + return toScalar->gtNext; + } +#endif // FEATURE_HW_INTRINSICS && TARGET_X86 + + bool skipDecomposition = false; + GenTree* loResult = nullptr; + GenTree* hiResult = nullptr; if (varTypeIsLong(srcType)) { if (cast->gtOverflow() && (varTypeIsUnsigned(srcType) != varTypeIsUnsigned(dstType))) { - GenTree* srcOp = cast->gtGetOp1(); - noway_assert(srcOp->OperGet() == GT_LONG); + GenTree* srcOp = cast->CastOp(); + noway_assert(srcOp->OperIs(GT_LONG)); GenTree* loSrcOp = srcOp->gtGetOp1(); GenTree* hiSrcOp = srcOp->gtGetOp2(); @@ -568,13 +644,13 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use) // check provided by codegen. // - const bool signExtend = (cast->gtFlags & GTF_UNSIGNED) == 0; + const bool signExtend = !cast->IsUnsigned(); loResult = EnsureIntSized(loSrcOp, signExtend); hiResult = cast; hiResult->gtType = TYP_INT; hiResult->AsCast()->gtCastType = TYP_UINT; - hiResult->gtFlags &= ~GTF_UNSIGNED; + hiResult->ClearUnsigned(); hiResult->AsOp()->gtOp1 = hiSrcOp; Range().Remove(srcOp); @@ -604,7 +680,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use) } else { - if (!use.IsDummyUse() && (use.User()->OperGet() == GT_MUL)) + if (!use.IsDummyUse() && use.User()->OperIs(GT_MUL)) { // // This int->long cast is used by a GT_MUL that will be transformed by DecomposeMul into a @@ -619,7 +695,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use) } else if (varTypeIsUnsigned(srcType)) { - const bool signExtend = (cast->gtFlags & GTF_UNSIGNED) == 0; + const bool signExtend = !cast->IsUnsigned(); loResult = EnsureIntSized(cast->gtGetOp1(), signExtend); hiResult = m_compiler->gtNewZeroConNode(TYP_INT); @@ -698,7 +774,7 @@ GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use) // GenTree* DecomposeLongs::DecomposeFieldList(GenTreeFieldList* fieldList, GenTreeOp* longNode) { - assert(longNode->OperGet() == GT_LONG); + assert(longNode->OperIs(GT_LONG)); GenTreeFieldList::Use* loUse = nullptr; for (GenTreeFieldList::Use& use : fieldList->Uses()) @@ -755,7 +831,7 @@ GenTree* DecomposeLongs::DecomposeStoreInd(LIR::Use& use) GenTree* tree = use.Def(); - assert(tree->AsOp()->gtOp2->OperGet() == GT_LONG); + assert(tree->AsOp()->gtOp2->OperIs(GT_LONG)); // Example input (address expression omitted): // @@ -882,7 +958,7 @@ GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use) GenTree* tree = use.Def(); GenTree* gtLong = tree->gtGetOp1(); - noway_assert(gtLong->OperGet() == GT_LONG); + noway_assert(gtLong->OperIs(GT_LONG)); GenTree* loOp1 = gtLong->gtGetOp1(); GenTree* hiOp1 = gtLong->gtGetOp2(); @@ -914,7 +990,7 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use) GenTree* tree = use.Def(); GenTree* gtLong = tree->gtGetOp1(); - noway_assert(gtLong->OperGet() == GT_LONG); + noway_assert(gtLong->OperIs(GT_LONG)); GenTree* loOp1 = gtLong->gtGetOp1(); GenTree* hiOp1 = gtLong->gtGetOp2(); @@ -972,7 +1048,7 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use) GenTree* op2 = tree->gtGetOp2(); // Both operands must have already been decomposed into GT_LONG operators. - noway_assert((op1->OperGet() == GT_LONG) && (op2->OperGet() == GT_LONG)); + noway_assert(op1->OperIs(GT_LONG) && op2->OperIs(GT_LONG)); // Capture the lo and hi halves of op1 and op2. GenTree* loOp1 = op1->gtGetOp1(); @@ -1652,14 +1728,14 @@ GenTree* DecomposeLongs::DecomposeUMod(LIR::Use& use) GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); - assert(op1->OperGet() == GT_LONG); - assert(op2->OperGet() == GT_LONG); + assert(op1->OperIs(GT_LONG)); + assert(op2->OperIs(GT_LONG)); GenTree* loOp2 = op2->gtGetOp1(); GenTree* hiOp2 = op2->gtGetOp2(); - assert(loOp2->OperGet() == GT_CNS_INT); - assert(hiOp2->OperGet() == GT_CNS_INT); + assert(loOp2->OperIs(GT_CNS_INT)); + assert(hiOp2->OperIs(GT_CNS_INT)); assert((loOp2->AsIntCon()->gtIconVal >= 2) && (loOp2->AsIntCon()->gtIconVal <= 0x3fffffff)); assert(hiOp2->AsIntCon()->gtIconVal == 0); @@ -1707,7 +1783,14 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use) return DecomposeHWIntrinsicGetElement(use, hwintrinsicTree); } - case NI_EVEX_MoveMask: + case NI_Vector128_ToScalar: + case NI_Vector256_ToScalar: + case NI_Vector512_ToScalar: + { + return DecomposeHWIntrinsicToScalar(use, hwintrinsicTree); + } + + case NI_AVX512_MoveMask: { return DecomposeHWIntrinsicMoveMask(use, hwintrinsicTree); } @@ -1751,9 +1834,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW { assert(node == use.Def()); assert(varTypeIsLong(node)); - assert((node->GetHWIntrinsicId() == NI_Vector128_GetElement) || - (node->GetHWIntrinsicId() == NI_Vector256_GetElement) || - (node->GetHWIntrinsicId() == NI_Vector512_GetElement)); + assert(HWIntrinsicInfo::IsVectorGetElement(node->GetHWIntrinsicId())); GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); @@ -1836,7 +1917,74 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW } //------------------------------------------------------------------------ -// DecomposeHWIntrinsicMoveMask: Decompose GT_HWINTRINSIC -- NI_EVEX_MoveMask +// DecomposeHWIntrinsicToScalar: Decompose GT_HWINTRINSIC -- NI_Vector*_ToScalar. +// +// create: +// +// tmp_simd_var = simd_var +// lo_result = GT_HWINTRINSIC{ToScalar}[int](tmp_simd_var) +// hi_result = GT_HWINTRINSIC{GetElement}[int](tmp_simd_var, 1) +// - or - +// GT_HWINTRINSIC{ToScalar}[int](GT_RSZ(tmp_simd_var, 32)) +// return: GT_LONG(lo_result, hi_result) +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// node - the hwintrinsic node to decompose +// +// Return Value: +// The GT_LONG node wrapping the upper and lower halves. +// +GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIntrinsic* node) +{ + assert(node == use.Def()); + assert(varTypeIsLong(node)); + assert(HWIntrinsicInfo::IsVectorToScalar(node->GetHWIntrinsicId())); + + GenTree* op1 = node->Op(1); + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); + + assert(varTypeIsLong(simdBaseType)); + assert(varTypeIsSIMD(op1)); + + GenTree* simdTmpVar = RepresentOpAsLocalVar(op1, node, &node->Op(1)); + unsigned simdTmpVarNum = simdTmpVar->AsLclVarCommon()->GetLclNum(); + JITDUMP("[DecomposeHWIntrinsicToScalar]: Saving op1 tree to a temp var:\n"); + DISPTREERANGE(Range(), simdTmpVar); + + GenTree* loResult = m_compiler->gtNewSimdToScalarNode(TYP_INT, simdTmpVar, CORINFO_TYPE_INT, simdSize); + Range().InsertAfter(simdTmpVar, loResult); + + simdTmpVar = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTmpVar->TypeGet()); + Range().InsertAfter(loResult, simdTmpVar); + + GenTree* hiResult; + if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + GenTree* one = m_compiler->gtNewIconNode(1); + hiResult = m_compiler->gtNewSimdGetElementNode(TYP_INT, simdTmpVar, one, CORINFO_TYPE_INT, simdSize); + + Range().InsertAfter(simdTmpVar, one, hiResult); + } + else + { + GenTree* thirtyTwo = m_compiler->gtNewIconNode(32); + GenTree* shift = m_compiler->gtNewSimdBinOpNode(GT_RSZ, op1->TypeGet(), simdTmpVar, thirtyTwo, + node->GetSimdBaseJitType(), simdSize); + hiResult = m_compiler->gtNewSimdToScalarNode(TYP_INT, shift, CORINFO_TYPE_INT, simdSize); + + Range().InsertAfter(simdTmpVar, thirtyTwo, shift, hiResult); + } + + Range().Remove(node); + + return FinalizeDecomposition(use, loResult, hiResult, hiResult); +} + +//------------------------------------------------------------------------ +// DecomposeHWIntrinsicMoveMask: Decompose GT_HWINTRINSIC -- NI_AVX512_MoveMask // // Decompose a MoveMask(x) node on Vector512<*>. For: // @@ -1865,7 +2013,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn { assert(node == use.Def()); assert(varTypeIsLong(node)); - assert(node->GetHWIntrinsicId() == NI_EVEX_MoveMask); + assert(node->GetHWIntrinsicId() == NI_AVX512_MoveMask); GenTree* op1 = node->Op(1); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); @@ -1873,7 +2021,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn unsigned simdSize = node->GetSimdSize(); assert(varTypeIsArithmetic(simdBaseType)); - assert(op1->TypeGet() == TYP_MASK); + assert(op1->TypeIs(TYP_MASK)); assert(simdSize == 64); GenTree* loResult = nullptr; @@ -1895,7 +2043,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn // Create: // loResult = GT_HWINTRINSIC{MoveMask}(simdTmpVar) - loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar, NI_EVEX_MoveMask, simdBaseJitType, 32); + loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar, NI_AVX512_MoveMask, simdBaseJitType, 32); Range().InsertBefore(node, loResult); simdTmpVar = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTmpVar->TypeGet()); @@ -1908,11 +2056,11 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn GenTree* shiftIcon = m_compiler->gtNewIconNode(32, TYP_INT); Range().InsertBefore(node, shiftIcon); - simdTmpVar = m_compiler->gtNewSimdHWIntrinsicNode(TYP_MASK, simdTmpVar, shiftIcon, NI_EVEX_ShiftRightMask, + simdTmpVar = m_compiler->gtNewSimdHWIntrinsicNode(TYP_MASK, simdTmpVar, shiftIcon, NI_AVX512_ShiftRightMask, simdBaseJitType, 64); Range().InsertBefore(node, simdTmpVar); - hiResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar, NI_EVEX_MoveMask, simdBaseJitType, 32); + hiResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar, NI_AVX512_MoveMask, simdBaseJitType, 32); Range().InsertBefore(node, hiResult); } else @@ -1920,7 +2068,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn // Create: // loResult = GT_HWINTRINSIC{MoveMask}(op1) - loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, op1, NI_EVEX_MoveMask, simdBaseJitType, simdSize); + loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, op1, NI_AVX512_MoveMask, simdBaseJitType, simdSize); Range().InsertBefore(node, loResult); // Create: @@ -2049,16 +2197,16 @@ GenTree* DecomposeLongs::StoreNodeToVar(LIR::Use& use) GenTree* tree = use.Def(); GenTree* user = use.User(); - if (user->OperGet() == GT_STORE_LCL_VAR) + if (user->OperIs(GT_STORE_LCL_VAR)) { // If parent is already a STORE_LCL_VAR, just mark it lvIsMultiRegRet. - m_compiler->lvaGetDesc(user->AsLclVar())->lvIsMultiRegRet = true; + m_compiler->lvaGetDesc(user->AsLclVar())->SetIsMultiRegDest(); return tree->gtNext; } // Otherwise, we need to force var = call() - unsigned lclNum = use.ReplaceWithLclVar(m_compiler); - m_compiler->lvaTable[lclNum].lvIsMultiRegRet = true; + unsigned lclNum = use.ReplaceWithLclVar(m_compiler); + m_compiler->lvaGetDesc(lclNum)->SetIsMultiRegDest(); if (m_compiler->lvaEnregMultiRegVars) { @@ -2082,7 +2230,7 @@ GenTree* DecomposeLongs::StoreNodeToVar(LIR::Use& use) // GenTree* DecomposeLongs::RepresentOpAsLocalVar(GenTree* op, GenTree* user, GenTree** edge) { - if (op->OperGet() == GT_LCL_VAR) + if (op->OperIs(GT_LCL_VAR)) { return op; } @@ -2240,7 +2388,7 @@ void DecomposeLongs::TryPromoteLongVar(unsigned lclNum) { LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - assert(varDsc->TypeGet() == TYP_LONG); + assert(varDsc->TypeIs(TYP_LONG)); if (varDsc->lvDoNotEnregister) { @@ -2262,6 +2410,13 @@ void DecomposeLongs::TryPromoteLongVar(unsigned lclNum) { return; } +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86) + if (varDsc->lvIsParam) + { + // Promotion blocks combined read optimizations for SIMD loads of long params + return; + } +#endif // FEATURE_HW_INTRINSICS && TARGET_X86 varDsc->lvFieldCnt = 2; varDsc->lvFieldLclStart = m_compiler->lvaCount; @@ -2294,13 +2449,7 @@ void DecomposeLongs::TryPromoteLongVar(unsigned lclNum) fieldVarDsc->lvIsParam = true; m_compiler->lvaSetVarDoNotEnregister(fieldLclNum DEBUGARG(DoNotEnregisterReason::LongParamField)); -#if FEATURE_MULTIREG_ARGS - if (varDsc->lvIsRegArg) - { - fieldVarDsc->lvIsRegArg = 1; // Longs are never split. - fieldVarDsc->SetArgReg((index == 0) ? varDsc->GetArgReg() : varDsc->GetOtherArgReg()); - } -#endif // FEATURE_MULTIREG_ARGS + fieldVarDsc->lvIsRegArg = varDsc->lvIsRegArg; } } } diff --git a/src/coreclr/jit/decomposelongs.h b/src/coreclr/jit/decomposelongs.h index 02681322a552..e879292abf49 100644 --- a/src/coreclr/jit/decomposelongs.h +++ b/src/coreclr/jit/decomposelongs.h @@ -14,19 +14,21 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define _DECOMPOSELONGS_H_ #include "compiler.h" +#include "lower.h" class DecomposeLongs { public: - DecomposeLongs(Compiler* compiler) + DecomposeLongs(Compiler* compiler, Lowering* lowering) : m_compiler(compiler) + , m_lowering(lowering) { } void PrepareForDecomposition(); void DecomposeBlock(BasicBlock* block); - static void DecomposeRange(Compiler* compiler, LIR::Range& range); + static void DecomposeRange(Compiler* compiler, Lowering* lowering, LIR::Range& range); private: inline LIR::Range& Range() const @@ -64,6 +66,7 @@ class DecomposeLongs #ifdef FEATURE_HW_INTRINSICS GenTree* DecomposeHWIntrinsic(LIR::Use& use); GenTree* DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHWIntrinsic* node); + GenTree* DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIntrinsic* node); GenTree* DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIntrinsic* node); #endif // FEATURE_HW_INTRINSICS @@ -80,6 +83,7 @@ class DecomposeLongs // Data Compiler* m_compiler; + Lowering* m_lowering; LIR::Range* m_range; }; diff --git a/src/coreclr/jit/disasm.cpp b/src/coreclr/jit/disasm.cpp index 945254147652..8f6c8d1cfd1a 100644 --- a/src/coreclr/jit/disasm.cpp +++ b/src/coreclr/jit/disasm.cpp @@ -1704,6 +1704,10 @@ bool DisAssembler::InitCoredistoolsDisasm() coreDisTargetArchitecture = Target_X86; #elif defined(TARGET_AMD64) coreDisTargetArchitecture = Target_X64; +#elif defined(TARGET_LOONGARCH64) + coreDisTargetArchitecture = Target_LoongArch64; +#elif defined(TARGET_RISCV64) + coreDisTargetArchitecture = Target_RiscV64; #else #error Unsupported target for LATE_DISASM with USE_COREDISTOOLS #endif diff --git a/src/coreclr/jit/earlyprop.cpp b/src/coreclr/jit/earlyprop.cpp index ca57a44e7288..06385f3d5157 100644 --- a/src/coreclr/jit/earlyprop.cpp +++ b/src/coreclr/jit/earlyprop.cpp @@ -185,7 +185,7 @@ GenTree* Compiler::optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheck return nullptr; } - if (tree->OperGet() == GT_ARR_LENGTH) + if (tree->OperIs(GT_ARR_LENGTH)) { objectRefPtr = tree->AsOp()->gtOp1; propKind = optPropKind::OPK_ARRAYLEN; @@ -227,7 +227,7 @@ GenTree* Compiler::optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheck if ((actualConstVal < 0) || (actualConstVal > INT32_MAX)) { // Don't propagate array lengths that are beyond the maximum value of a GT_ARR_LENGTH or negative. - // node. CORINFO_HELP_NEWARR_1_OBJ helper call allows to take a long integer as the + // node. CORINFO_HELP_NEWARR_1_PTR helper call allows to take a long integer as the // array length argument, but the type of GT_ARR_LENGTH is always INT32. return nullptr; } @@ -282,8 +282,8 @@ GenTree* Compiler::optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheck if (actualValClone->gtType != tree->gtType) { - assert(actualValClone->gtType == TYP_LONG); - assert(tree->gtType == TYP_INT); + assert(actualValClone->TypeIs(TYP_LONG)); + assert(tree->TypeIs(TYP_INT)); assert((actualConstVal >= 0) && (actualConstVal <= INT32_MAX)); actualValClone->gtType = tree->gtType; } @@ -422,7 +422,7 @@ GenTree* Compiler::optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropK bool Compiler::optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap) { #ifdef DEBUG - if (tree->OperGet() == GT_NULLCHECK) + if (tree->OperIs(GT_NULLCHECK)) { optCheckFlagsAreSet(OMF_HAS_NULLCHECK, "OMF_HAS_NULLCHECK", BBF_HAS_NULLCHECK, "BBF_HAS_NULLCHECK", tree, compCurBB); @@ -457,6 +457,9 @@ bool Compiler::optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nu nullCheckTree->SetHasOrderingSideEffect(); nullCheckTree->gtFlags |= GTF_IND_NONFAULTING; + // The current indir is no longer non-faulting. + tree->gtFlags &= ~GTF_IND_NONFAULTING; + if (nullCheckParent != nullptr) { nullCheckParent->gtFlags &= ~GTF_DONT_CSE; @@ -466,14 +469,14 @@ bool Compiler::optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nu // Re-morph the statement. Statement* curStmt = compCurStmt; - fgMorphBlockStmt(compCurBB, nullCheckStmt DEBUGARG("optFoldNullCheck")); + fgMorphBlockStmt(compCurBB, nullCheckStmt DEBUGARG("optFoldNullCheck"), /* allowFGChange */ false); optRecordSsaUses(nullCheckStmt->GetRootNode(), compCurBB); compCurStmt = curStmt; folded = true; } - if ((tree->OperGet() == GT_NULLCHECK) && (tree->gtGetOp1()->OperGet() == GT_LCL_VAR)) + if (tree->OperIs(GT_NULLCHECK) && (tree->gtGetOp1()->OperGet() == GT_LCL_VAR)) { nullCheckMap->Set(tree->gtGetOp1()->AsLclVarCommon()->GetLclNum(), tree, LocalNumberToNullCheckTreeMap::SetKind::Overwrite); @@ -515,13 +518,13 @@ GenTree* Compiler::optFindNullCheckToFold(GenTree* tree, LocalNumberToNullCheckT ssize_t offsetValue = 0; - if ((addr->OperGet() == GT_ADD) && addr->gtGetOp2()->IsCnsIntOrI()) + if (addr->OperIs(GT_ADD) && addr->gtGetOp2()->IsCnsIntOrI()) { offsetValue += addr->gtGetOp2()->AsIntConCommon()->IconValue(); addr = addr->gtGetOp1(); } - if (addr->OperGet() != GT_LCL_VAR) + if (!addr->OperIs(GT_LCL_VAR)) { return nullptr; } @@ -543,7 +546,7 @@ GenTree* Compiler::optFindNullCheckToFold(GenTree* tree, LocalNumberToNullCheckT if (nullCheckMap->Lookup(lclNum, &nullCheckTree)) { GenTree* nullCheckAddr = nullCheckTree->AsIndir()->Addr(); - if ((nullCheckAddr->OperGet() != GT_LCL_VAR) || (nullCheckAddr->AsLclVarCommon()->GetSsaNum() != ssaNum)) + if (!nullCheckAddr->OperIs(GT_LCL_VAR) || (nullCheckAddr->AsLclVarCommon()->GetSsaNum() != ssaNum)) { nullCheckTree = nullptr; } @@ -572,21 +575,21 @@ GenTree* Compiler::optFindNullCheckToFold(GenTree* tree, LocalNumberToNullCheckT } GenTree* defValue = defNode->Data(); - if (defValue->OperGet() != GT_COMMA) + if (!defValue->OperIs(GT_COMMA)) { return nullptr; } GenTree* commaOp1EffectiveValue = defValue->gtGetOp1()->gtEffectiveVal(); - if (commaOp1EffectiveValue->OperGet() != GT_NULLCHECK) + if (!commaOp1EffectiveValue->OperIs(GT_NULLCHECK)) { return nullptr; } GenTree* nullCheckAddress = commaOp1EffectiveValue->gtGetOp1(); - if ((nullCheckAddress->OperGet() != GT_LCL_VAR) || (defValue->gtGetOp2()->OperGet() != GT_ADD)) + if (!nullCheckAddress->OperIs(GT_LCL_VAR) || (defValue->gtGetOp2()->OperGet() != GT_ADD)) { return nullptr; } @@ -596,7 +599,7 @@ GenTree* Compiler::optFindNullCheckToFold(GenTree* tree, LocalNumberToNullCheckT GenTree* additionNode = defValue->gtGetOp2(); GenTree* additionOp1 = additionNode->gtGetOp1(); GenTree* additionOp2 = additionNode->gtGetOp2(); - if ((additionOp1->OperGet() == GT_LCL_VAR) && + if (additionOp1->OperIs(GT_LCL_VAR) && (additionOp1->AsLclVarCommon()->GetLclNum() == nullCheckAddress->AsLclVarCommon()->GetLclNum()) && (additionOp2->IsCnsIntOrI())) { diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index bad38b78980f..4a3f2b88377f 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -29,6 +29,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define DLLEXPORT #endif // !DLLEXPORT +#include "minipal/log.h" + /*****************************************************************************/ ICorJitHost* g_jitHost = nullptr; @@ -146,10 +148,19 @@ FILE* jitstdout() // Like printf/logf, but only outputs to jitstdout -- skips call back into EE. int jitprintf(const char* fmt, ...) { + int status; va_list vl; va_start(vl, fmt); - int status = vfprintf(jitstdout(), fmt, vl); + if (jitstdout() == procstdout()) + { + status = minipal_log_vprint_verbose(fmt, vl); + } + else + { + status = vfprintf(jitstdout(), fmt, vl); + } va_end(vl); + return status; } diff --git a/src/coreclr/jit/eeinterface.cpp b/src/coreclr/jit/eeinterface.cpp index 7ac73a22d27e..3bce6013df00 100644 --- a/src/coreclr/jit/eeinterface.cpp +++ b/src/coreclr/jit/eeinterface.cpp @@ -81,16 +81,30 @@ void StringPrinter::Append(char chr) } //------------------------------------------------------------------------ -// eePrintJitType: +// eePrintCorInfoType: // Print a JIT type. // // Arguments: // printer - the printer -// jitType - the JIT type +// corInfoType - the CorInfoType type // -void Compiler::eePrintJitType(StringPrinter* printer, var_types jitType) +void Compiler::eePrintCorInfoType(StringPrinter* printer, CorInfoType corInfoType) { - printer->Append(varTypeName(jitType)); + static const char* preciseVarTypeMap[CORINFO_TYPE_COUNT] = {// see the definition of enum CorInfoType in file + // inc/corinfo.h + "", "void", "bool", "char", "sbyte", + "byte", "short", "ushort", "int", "uint", + "long", "ulong", "nint", "nuint", "float", + "double", "string", "ptr", "byref", "struct", + "class", "typedbyref", "var"}; + + const char* corInfoTypeName = "CORINFO_TYPE_INVALID"; + if (corInfoType >= 0 && corInfoType < CORINFO_TYPE_COUNT) + { + corInfoTypeName = preciseVarTypeMap[corInfoType]; + } + + printer->Append(corInfoTypeName); } //------------------------------------------------------------------------ @@ -144,7 +158,7 @@ void Compiler::eePrintType(StringPrinter* printer, CORINFO_CLASS_HANDLE clsHnd, } else { - eePrintJitType(printer, JitType2PreciseVarType(childType)); + eePrintCorInfoType(printer, childType); } printer->Append('['); @@ -205,7 +219,7 @@ void Compiler::eePrintTypeOrJitAlias(StringPrinter* printer, CORINFO_CLASS_HANDL } else { - eePrintJitType(printer, JitType2PreciseVarType(typ)); + eePrintCorInfoType(printer, typ); } } @@ -215,6 +229,18 @@ static const char* s_jitHelperNames[CORINFO_HELP_COUNT] = { #include "jithelpers.h" }; +void AppendCorInfoTypeWithModModifiers(StringPrinter* printer, CorInfoTypeWithMod corInfoTypeWithMod) +{ + if ((corInfoTypeWithMod & CORINFO_TYPE_MOD_PINNED) == CORINFO_TYPE_MOD_PINNED) + { + printer->Append("PINNED__"); + } + if ((corInfoTypeWithMod & CORINFO_TYPE_MOD_COPY_WITH_HELPER) == CORINFO_TYPE_MOD_COPY_WITH_HELPER) + { + printer->Append("COPY_WITH_HELPER__"); + } +} + //------------------------------------------------------------------------ // eePrintMethod: // Print a method given by a method handle, its owning class handle and its @@ -285,7 +311,10 @@ void Compiler::eePrintMethod(StringPrinter* printer, printer->Append(','); CORINFO_CLASS_HANDLE vcClsHnd; - var_types type = JitType2PreciseVarType(strip(info.compCompHnd->getArgType(sig, argLst, &vcClsHnd))); + CorInfoTypeWithMod argTypeWithMod = info.compCompHnd->getArgType(sig, argLst, &vcClsHnd); + AppendCorInfoTypeWithModModifiers(printer, argTypeWithMod); + + var_types type = JitType2PreciseVarType(strip(argTypeWithMod)); switch (type) { case TYP_REF: @@ -302,7 +331,7 @@ void Compiler::eePrintMethod(StringPrinter* printer, FALLTHROUGH; default: - eePrintJitType(printer, type); + eePrintCorInfoType(printer, strip(argTypeWithMod)); break; } @@ -331,7 +360,7 @@ void Compiler::eePrintMethod(StringPrinter* printer, } FALLTHROUGH; default: - eePrintJitType(printer, retType); + eePrintCorInfoType(printer, sig->retType); break; } } diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index bbf30da0c728..14652496a4d5 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1008,7 +1008,13 @@ insGroup* emitter::emitSavIG(bool emitAdd) emitPrevGCrefRegs = emitThisGCrefRegs; emitPrevByrefRegs = emitThisByrefRegs; - emitForceStoreGCState = false; + if (emitAddedLabel) + { + // Reset emitForceStoreGCState only after seeing label. It will keep + // marking IGs with IGF_GC_VARS flag until that. + emitForceStoreGCState = false; + emitAddedLabel = false; + } } #ifdef DEBUG @@ -1277,6 +1283,7 @@ void emitter::emitBegFN(bool hasFramePtr emitPrevByrefRegs = RBM_NONE; emitForceStoreGCState = false; + emitAddedLabel = false; #ifdef DEBUG @@ -1622,7 +1629,7 @@ void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz) // ARM - This is currently broken on TARGET_ARM // When nopSize is odd we misalign emitCurIGsize // - if (!emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && !emitInInstrumentation && + if (!emitComp->IsAot() && !emitInInstrumentation && !emitIGisInProlog(emitCurIG) && // don't do this in prolog or epilog !emitIGisInEpilog(emitCurIG) && emitRandomNops // sometimes we turn off where exact codegen is needed (pinvoke inline) @@ -1755,7 +1762,7 @@ void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz) id->idOpSize(EA_SIZE(opsz)); } - // Amd64: ip-relative addressing is supported even when not generating relocatable ngen code + // Amd64: ip-relative addressing is supported even when not generating relocatable AOT code if (EA_IS_DSP_RELOC(opsz) #ifndef TARGET_AMD64 && emitComp->opts.compReloc @@ -2811,6 +2818,8 @@ bool emitter::emitNoGChelper(CORINFO_METHOD_HANDLE methHnd) void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, BasicBlock* prevBlock) { + bool currIGWasNonEmpty = emitCurIGnonEmpty(); + // if starting a new block that can be a target of a branch and the last instruction was GC-capable call. if ((prevBlock != nullptr) && emitComp->compCurBB->HasFlag(BBF_HAS_LABEL) && emitLastInsIsCallWithGC()) { @@ -2839,10 +2848,27 @@ void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMas } } + emitAddedLabel = true; + /* Create a new IG if the current one is non-empty */ if (emitCurIGnonEmpty()) { +#if FEATURE_LOOP_ALIGN + + if (!currIGWasNonEmpty && (emitAlignLast != nullptr) && (emitAlignLast->idaLoopHeadPredIG != nullptr) && + (emitAlignLast->idaLoopHeadPredIG->igNext == emitCurIG)) + { + // If the emitCurIG was thought to be a loop-head, but if it didn't turn out that way and we end up + // creating a new IG from which the loop starts, make sure to update the LoopHeadPred of last align + // instruction emitted. This will guarantee that the information stays up-to-date. Later if we + // notice a loop that encloses another loop, this information helps in removing the align field from + // such loops. + // We need to only update emitAlignLast because we do not align intermingled or overlapping loops. + emitAlignLast->idaLoopHeadPredIG = emitCurIG; + } +#endif // FEATURE_LOOP_ALIGN + emitNxtIG(); } else @@ -3564,7 +3590,8 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSizeIn - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)) + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet) { emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE; @@ -3588,7 +3615,8 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, (argCnt > ID_MAX_SMALL_CNS) || // too many args (argCnt < 0) // caller pops arguments // There is a second ref/byref return register. - MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize))) + MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)) || + hasAsyncRet) { instrDescCGCA* id; @@ -3605,6 +3633,7 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, #if MULTIREG_HAS_SECOND_GC_RET emitSetSecondRetRegGCType(id, secondRetSize); #endif // MULTIREG_HAS_SECOND_GC_RET + id->hasAsyncContinuationRet(hasAsyncRet); return id; } @@ -3648,7 +3677,8 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSizeIn - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)) + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet) { emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE; @@ -3668,7 +3698,8 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, (argCnt > ID_MAX_SMALL_CNS) || // too many args (argCnt < 0) // caller pops arguments // There is a second ref/byref return register. - MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize))) + MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)) || + hasAsyncRet) { instrDescCGCA* id = emitAllocInstrCGCA(retSize); @@ -3685,6 +3716,7 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, #if MULTIREG_HAS_SECOND_GC_RET emitSetSecondRetRegGCType(id, secondRetSize); #endif // MULTIREG_HAS_SECOND_GC_RET + id->hasAsyncContinuationRet(hasAsyncRet); return id; } @@ -6515,7 +6547,7 @@ void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG) // Only to the first block of the finally (which is properly marked) BasicBlock* tgtBlk = tgtEH->ebdHndBeg; - assert(tgtBlk->HasFlag(BBF_FUNCLET_BEG)); + assert(emitComp->bbIsFuncletBeg(tgtBlk)); // And now we made it back to where we started assert(tgtIG == emitCodeGetCookie(tgtBlk)); @@ -6766,9 +6798,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, // These are the heuristics we use to decide whether or not to force the // code to be 16-byte aligned. // - // 1. For ngen code with IBC data, use 16-byte alignment if the method + // 1. For AOT code with IBC data, use 16-byte alignment if the method // has been called more than ScenarioHotWeight times. - // 2. For JITed code and ngen code without IBC data, use 16-byte alignment + // 2. For JITed code and AOT code without IBC data, use 16-byte alignment // when the code is 16 bytes or smaller. We align small getters/setters // because of they are penalized heavily on certain hardware when not 16-byte // aligned (VSWhidbey #373938). To minimize size impact of this optimization, @@ -6864,9 +6896,8 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #ifdef DEBUG if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) != 0) { - // For prejit, codeBlock will not be necessarily aligned, but it is aligned - // in final obj file. - assert((((size_t)codeBlock & 31) == 0) || emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)); + // For AOT, codeBlock will not be necessarily aligned, but it is aligned in final obj file. + assert((((size_t)codeBlock & 31) == 0) || emitComp->IsAot()); } #if 0 // TODO: we should be able to assert the following, but it appears crossgen2 doesn't respect them, @@ -7066,7 +7097,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, assert(dsc->lvTracked); assert(dsc->lvRefCnt() != 0); - assert(dsc->TypeGet() == TYP_REF || dsc->TypeGet() == TYP_BYREF); + assert(dsc->TypeIs(TYP_REF, TYP_BYREF)); assert(indx < emitComp->lvaTrackedCount); @@ -7082,7 +7113,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } #endif // JIT32_GCENCODER && FEATURE_EH_WINDOWS_X86 - if (dsc->TypeGet() == TYP_BYREF) + if (dsc->TypeIs(TYP_BYREF)) { offs |= byref_OFFSET_FLAG; } @@ -8164,35 +8195,142 @@ CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue) return emitComp->eeFindJitDataOffs(cnum); } -#if defined(TARGET_XARCH) -CORINFO_FIELD_HANDLE emitter::emitSimd32Const(simd32_t constValue) +#ifdef TARGET_XARCH +//------------------------------------------------------------------------ +// emitSimdConst: Create a simd data section constant. +// +// Arguments: +// constValue - constant value +// attr - The EA_SIZE for the constant type +// +// Return Value: +// A field handle representing the data offset to access the constant. +// +// Note: +// Access to inline data is 'abstracted' by a special type of static member +// (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference +// to constant data, not a real static field. +// +CORINFO_FIELD_HANDLE emitter::emitSimdConst(simd_t* constValue, emitAttr attr) { - unsigned cnsSize = 32; - unsigned cnsAlign = cnsSize; + unsigned cnsSize = EA_SIZE(attr); + unsigned cnsAlign = cnsSize; + var_types dataType = (cnsSize >= 8) ? emitComp->getSIMDTypeForSize(cnsSize) : TYP_FLOAT; +#ifdef TARGET_XARCH if (emitComp->compCodeOpt() == Compiler::SMALL_CODE) { cnsAlign = dataSection::MIN_DATA_ALIGN; } +#endif // TARGET_XARCH - UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD32); + UNATIVE_OFFSET cnum = emitDataConst(constValue, cnsSize, cnsAlign, dataType); return emitComp->eeFindJitDataOffs(cnum); } -CORINFO_FIELD_HANDLE emitter::emitSimd64Const(simd64_t constValue) +//------------------------------------------------------------------------ +// emitSimdConstCompressedLoad: Create a simd data section constant, +// compressing it if possible, and emit an appropiate instruction +// to load or broadcast the constant to a register. +// +// Arguments: +// constValue - constant value +// attr - The EA_SIZE for the constant type +// targetReg - The target register +// +void emitter::emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, regNumber targetReg) { - unsigned cnsSize = 64; - unsigned cnsAlign = cnsSize; + assert(EA_SIZE(attr) >= 8 && EA_SIZE(attr) <= 64); - if (emitComp->compCodeOpt() == Compiler::SMALL_CODE) + unsigned cnsSize = EA_SIZE(attr); + unsigned dataSize = cnsSize; + instruction ins = (cnsSize == 8) ? INS_movsd_simd : INS_movups; + + // Most constant vectors tend to have repeated values, so we will first check to see if + // we can replace a full vector load with a smaller broadcast. + + if ((dataSize == 64) && (constValue->v256[1] == constValue->v256[0])) { - cnsAlign = dataSection::MIN_DATA_ALIGN; + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); + dataSize = 32; + ins = INS_vbroadcastf32x8; } - UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD64); - return emitComp->eeFindJitDataOffs(cnum); -} + if ((dataSize == 32) && (constValue->v128[1] == constValue->v128[0])) + { + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX)); + dataSize = 16; + ins = INS_vbroadcastf32x4; + } + + if ((dataSize == 16) && (constValue->u64[1] == constValue->u64[0])) + { + if (((cnsSize == 16) && emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) || + emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX)) + { + dataSize = 8; + ins = (cnsSize == 16) ? INS_movddup : INS_vbroadcastsd; + } + } + + // `vbroadcastss` fills the full SIMD register, so we can't do this last step if the + // original constant was smaller than a full reg (e.g. TYP_SIMD8) + + if ((dataSize == 8) && (cnsSize >= 16) && (constValue->u32[1] == constValue->u32[0])) + { + if (emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX)) + { + dataSize = 4; + ins = INS_vbroadcastss; + } + } + + if (dataSize < cnsSize) + { + // We found a broadcast match, so emit the broadcast instruction and return. + // Here we use the original emitAttr for the instruction, because we need to + // produce a register of the original constant's size, filled with the pattern. + + CORINFO_FIELD_HANDLE hnd = emitSimdConst(constValue, EA_ATTR(dataSize)); + emitIns_R_C(ins, attr, targetReg, hnd, 0); + return; + } + + // Otherwise, if the upper lanes and/or elements of the constant are zero, we can use a + // smaller load, because all scalar and vector memory load instructions zero the uppers. + + simd32_t zeroValue = {}; + + if ((dataSize == 64) && (constValue->v256[1] == zeroValue)) + { + dataSize = 32; + } + + if ((dataSize == 32) && (constValue->v128[1] == zeroValue.v128[0])) + { + dataSize = 16; + } + + if ((dataSize == 16) && (constValue->u64[1] == 0)) + { + dataSize = 8; + ins = INS_movsd_simd; + } + if ((dataSize == 8) && (constValue->u32[1] == 0)) + { + dataSize = 4; + ins = INS_movss; + } + + // Here we set the emitAttr to the size of the actual load. It will zero extend + // up to the native SIMD register size. + + attr = EA_ATTR(dataSize); + + CORINFO_FIELD_HANDLE hnd = emitSimdConst(constValue, attr); + emitIns_R_C(ins, attr, targetReg, hnd, 0); +} #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -8470,7 +8608,10 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) switch (data->dsDataType) { case TYP_FLOAT: - assert(data->dsSize >= 4); + if (data->dsSize < 4) + { + printf("\t= 4)\n", data->dsSize); + } printf("\tdd\t%08llXh\t", (UINT64) * reinterpret_cast(&data->dsCont[i])); printf("\t; %9.6g", FloatingPointUtils::convertToDouble(*reinterpret_cast(&data->dsCont[i]))); @@ -8478,7 +8619,10 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) break; case TYP_DOUBLE: - assert(data->dsSize >= 8); + if (data->dsSize < 8) + { + printf("\t= 8)\n", data->dsSize); + } printf("\tdq\t%016llXh", *reinterpret_cast(&data->dsCont[i])); printf("\t; %12.9g", *reinterpret_cast(&data->dsCont[i])); i += 8; @@ -8499,7 +8643,10 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) break; case 2: - assert((data->dsSize % 2) == 0); + if ((data->dsSize % 2) != 0) + { + printf("\tdsSize); + } printf("\tdw\t%04Xh", *reinterpret_cast(&data->dsCont[i])); for (j = 2; j < 24; j += 2) { @@ -8512,7 +8659,10 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) case 12: case 4: - assert((data->dsSize % 4) == 0); + if ((data->dsSize % 4) != 0) + { + printf("\tdsSize); + } printf("\tdd\t%08Xh", *reinterpret_cast(&data->dsCont[i])); for (j = 4; j < 24; j += 4) { @@ -8527,7 +8677,10 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) case 32: case 16: case 8: - assert((data->dsSize % 8) == 0); + if ((data->dsSize % 8) != 0) + { + printf("\tdsSize); + } printf("\tdq\t%016llXh", *reinterpret_cast(&data->dsCont[i])); for (j = 8; j < 64; j += 8) { @@ -8539,7 +8692,7 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) break; default: - assert(!"unexpected elemSize"); + printf("\t> REG_INT_FIRST; + unsigned byrefRegs = emitThisByrefRegs.GetIntRegSet() >> REG_INT_FIRST; - // We make a bitmask whose bits correspond to callee-saved register indices (in the sequence - // of callee-saved registers only). - for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALL_GC_REGS; calleeSavedRegIdx++) - { - regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; - if (emitThisGCrefRegs & calleeSavedRbm) - { - gcrefRegs |= (1 << calleeSavedRegIdx); - } - if (emitThisByrefRegs & calleeSavedRbm) - { - byrefRegs |= (1 << calleeSavedRegIdx); - } - } + assert(regMaskTP::FromIntRegSet(SingleTypeRegSet(gcrefRegs << REG_INT_FIRST)) == emitThisGCrefRegs); + assert(regMaskTP::FromIntRegSet(SingleTypeRegSet(byrefRegs << REG_INT_FIRST)) == emitThisByrefRegs); #ifdef JIT32_GCENCODER + // x86 does not report GC refs/byrefs in return registers at call sites + gcrefRegs &= ~(1u << (REG_INTRET - REG_INT_FIRST)); + byrefRegs &= ~(1u << (REG_INTRET - REG_INT_FIRST)); + // For the general encoder, we always have to record calls, so we don't take this early return. /* Are there any // args to pop at this call site? @@ -10452,7 +10597,6 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) return result; } -#if !defined(JIT32_GCENCODER) //------------------------------------------------------------------------ // emitDisableGC: Requests that the following instruction groups are not GC-interruptible. // @@ -10544,4 +10688,3 @@ void emitter::emitEnableGC() JITDUMP("Enable GC: still %u no-gc requests\n", emitNoGCRequestCount); } } -#endif // !defined(JIT32_GCENCODER) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 3054f728e2b4..dd1fd19f8671 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -447,6 +447,46 @@ enum idAddrUnionTag iaut_SHIFT = 2 }; +enum EmitCallType +{ + EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method (call/bl addr with IP-relative encoding) +#ifdef TARGET_XARCH + EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method (call [addr]/call [rip+addr]) +#endif + EC_INDIR_R, // Indirect call via register (call/bl reg) +#ifdef TARGET_XARCH + EC_INDIR_ARD, // Indirect call via an addressing mode (call [rax+rdx*8+disp]) +#endif + + EC_COUNT +}; + +struct EmitCallParams +{ + EmitCallType callType = EC_COUNT; + CORINFO_METHOD_HANDLE methHnd = NO_METHOD_HANDLE; +#ifdef DEBUG + // Used to report call sites to the EE + CORINFO_SIG_INFO* sigInfo = nullptr; +#endif + void* addr = nullptr; + ssize_t argSize = 0; + emitAttr retSize = EA_PTRSIZE; + // For multi-reg args with GC returns in the second arg + emitAttr secondRetSize = EA_UNKNOWN; + bool hasAsyncRet = false; + BitVec ptrVars = BitVecOps::UninitVal(); + regMaskTP gcrefRegs = RBM_NONE; + regMaskTP byrefRegs = RBM_NONE; + DebugInfo debugInfo; + regNumber ireg = REG_NA; + regNumber xreg = REG_NA; + unsigned xmul = 0; + ssize_t disp = 0; + bool isJump = false; + bool noSafePoint = false; +}; + class emitter { friend class emitLocation; @@ -624,8 +664,8 @@ class emitter private: // The assembly instruction #if defined(TARGET_XARCH) - static_assert_no_msg(INS_count <= 1024); - instruction _idIns : 10; + static_assert_no_msg(INS_count <= 2048); + instruction _idIns : 11; #define MAX_ENCODED_SIZE 15 #elif defined(TARGET_ARM64) #define INSTR_ENCODED_SIZE 4 @@ -713,8 +753,8 @@ class emitter //////////////////////////////////////////////////////////////////////// // Space taken up to here: - // x86: 17 bits - // amd64: 17 bits + // x86: 18 bits + // amd64: 18 bits // arm: 16 bits // arm64: 21 bits // loongarch64: 14 bits @@ -747,19 +787,22 @@ class emitter // The idReg1 and idReg2 fields hold the first and second register // operand(s), whenever these are present. Note that currently the - // size of these fields is 6 bits on all targets, and care needs to - // be taken to make sure all of these fields stay reasonably packed. + // size of these fields is 6 bits on most targets, but 7 on others, + // and care needs to be taken to make sure all of these fields stay + // reasonably packed. // Note that we use the _idReg1 and _idReg2 fields to hold // the live gcrefReg mask for the call instructions on x86/x64 // +#if !defined(TARGET_XARCH) regNumber _idReg1 : REGNUM_BITS; // register num regNumber _idReg2 : REGNUM_BITS; +#endif //////////////////////////////////////////////////////////////////////// // Space taken up to here: - // x86: 38 bits - // amd64: 38 bits + // x86: 27 bits + // amd64: 27 bits // arm: 32 bits // arm64: 46 bits // loongarch64: 28 bits @@ -777,6 +820,10 @@ class emitter unsigned _idCustom1 : 1; unsigned _idCustom2 : 1; unsigned _idCustom3 : 1; +#if defined(TARGET_XARCH) + regNumber _idReg1 : REGNUM_BITS; // register num + regNumber _idReg2 : REGNUM_BITS; +#endif #define _idBound _idCustom1 /* jump target / frame offset bound */ #define _idTlsGD _idCustom2 /* Used to store information related to TLS GD access on linux */ @@ -804,6 +851,9 @@ class emitter #define _idEvexNdContext _idCustom5 /* bits used for the APX-EVEX.nd context for promoted legacy instructions */ #define _idEvexNfContext _idCustom6 /* bits used for the APX-EVEX.nf context for promoted legacy/vex instructions */ + // We repurpose 4 bits for the default flag value bits for ccmp instructions. +#define _idEvexDFV (_idCustom4 << 3) | (_idCustom3 << 2) | (_idCustom2 << 1) | _idCustom1 + // In certian cases, we do not allow instructions to be promoted to APX-EVEX. // e.g. instructions like add/and/or/inc/dec can be used with LOCK prefix, but cannot be prefixed by LOCK and // EVEX together. @@ -840,8 +890,8 @@ class emitter //////////////////////////////////////////////////////////////////////// // Space taken up to here: - // x86: 49 bits - // amd64: 49 bits + // x86: 50 bits + // amd64: 52 bits // arm: 48 bits // arm64: 55 bits // loongarch64: 46 bits @@ -858,8 +908,10 @@ class emitter #define ID_EXTRA_BITFIELD_BITS (23) #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define ID_EXTRA_BITFIELD_BITS (14) -#elif defined(TARGET_XARCH) -#define ID_EXTRA_BITFIELD_BITS (17) +#elif defined(TARGET_X86) +#define ID_EXTRA_BITFIELD_BITS (18) +#elif defined(TARGET_AMD64) +#define ID_EXTRA_BITFIELD_BITS (20) #else #error Unsupported or unset target architecture #endif @@ -893,8 +945,8 @@ class emitter //////////////////////////////////////////////////////////////////////// // Space taken up to here (with/without prev offset, assuming host==target): - // x86: 55/51 bits - // amd64: 56/51 bits + // x86: 56/52 bits + // amd64: 59/54 bits // arm: 54/50 bits // arm64: 62/57 bits // loongarch64: 53/48 bits @@ -905,12 +957,11 @@ class emitter /* Use whatever bits are left over for small constants */ #define ID_BIT_SMALL_CNS (32 - ID_EXTRA_BITS) - C_ASSERT(ID_BIT_SMALL_CNS > 0); //////////////////////////////////////////////////////////////////////// // Small constant size (with/without prev offset, assuming host==target): - // x86: 10/14 bits - // amd64: 9/14 bits + // x86: 8/12 bits + // amd64: 5/10 bits // arm: 10/14 bits // arm64: 2/7 bits // loongarch64: 11/16 bits @@ -1401,6 +1452,13 @@ class emitter { return idHasMemGenWrite() || idHasMemStkWrite() || idHasMemAdrWrite(); } + + bool idHasMemAndCns() const + { + assert((unsigned)idInsFmt() < emitFmtCount); + ID_OPS idOp = (ID_OPS)emitFmtToOps[idInsFmt()]; + return ((idOp == ID_OP_CNS) || (idOp == ID_OP_DSP_CNS) || (idOp == ID_OP_AMD_CNS)); + } #endif // defined(TARGET_XARCH) #ifdef TARGET_ARMARCH insOpts idInsOpt() const @@ -1754,6 +1812,23 @@ class emitter assert(!idIsNoApxEvexPromotion()); _idNoApxEvexXPromotion = 1; } + + unsigned idGetEvexDFV() const + { + return _idEvexDFV; + } + + void idSetEvexDFV(insOpts instOptions) + { + unsigned value = static_cast((instOptions & INS_OPTS_EVEX_dfv_MASK) >> 8); + + _idCustom1 = ((value >> 0) & 1); + _idCustom2 = ((value >> 1) & 1); + _idCustom3 = ((value >> 2) & 1); + _idCustom4 = ((value >> 3) & 1); + + assert(value == idGetEvexDFV()); + } #endif #ifdef TARGET_ARMARCH @@ -2188,6 +2263,18 @@ class emitter }; #endif +#ifdef TARGET_RISCV64 + struct instrDescLoadImm : instrDescCns + { + instrDescLoadImm() = delete; + + static const int absMaxInsCount = 8; + + instruction ins[absMaxInsCount]; + int32_t values[absMaxInsCount]; + }; +#endif // TARGET_RISCV64 + struct instrDescCGCA : instrDesc // call with ... { instrDescCGCA() = delete; @@ -2208,8 +2295,19 @@ class emitter { _idcSecondRetRegGCType = gctype; } +#endif + + bool hasAsyncContinuationRet() const + { + return _hasAsyncContinuationRet; + } + void hasAsyncContinuationRet(bool value) + { + _hasAsyncContinuationRet = value; + } private: +#if MULTIREG_HAS_SECOND_GC_RET // This member stores the GC-ness of the second register in a 2 register returned struct on System V. // It is added to the call struct since it is not needed by the base instrDesc struct, which keeps GC-ness // of the first register for the instCall nodes. @@ -2219,6 +2317,7 @@ class emitter // The base struct's member keeping the GC-ness of the first return register is _idGCref. GCtype _idcSecondRetRegGCType : 2; // ... GC type for the second return register. #endif // MULTIREG_HAS_SECOND_GC_RET + bool _hasAsyncContinuationRet : 1; }; // TODO-Cleanup: Uses of stack-allocated instrDescs should be refactored to be unnecessary. @@ -2562,10 +2661,9 @@ class emitter CORINFO_FIELD_HANDLE emitSimd8Const(simd8_t constValue); CORINFO_FIELD_HANDLE emitSimd16Const(simd16_t constValue); #if defined(TARGET_XARCH) - CORINFO_FIELD_HANDLE emitSimd32Const(simd32_t constValue); - CORINFO_FIELD_HANDLE emitSimd64Const(simd64_t constValue); + CORINFO_FIELD_HANDLE emitSimdConst(simd_t* constValue, emitAttr attr); + void emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, regNumber targetReg); #endif // TARGET_XARCH - #if defined(FEATURE_MASKED_HW_INTRINSICS) CORINFO_FIELD_HANDLE emitSimdMaskConst(simdmask_t constValue); #endif // FEATURE_MASKED_HW_INTRINSICS @@ -2590,6 +2688,8 @@ class emitter bool emitDoesInsModifyFlags(instruction ins); #endif // TARGET_XARCH + void emitIns_Call(const EmitCallParams& params); + /************************************************************************/ /* The logic that creates and keeps track of instruction groups */ /************************************************************************/ @@ -2735,6 +2835,14 @@ class emitter bool emitForceStoreGCState; + // This flag is used together with `emitForceStoreGCState`. After we set + // emitForceStoreGCState = true, we will mark `emitAddedLabel` to true whenever + // we see a label IG. In emitSavIG, we will reset `emitForceStoreGCState = false` + // only after seeing `emitAddedLabel == true`. Until then, we will keep recording + // GC_VARS on the IGs. + + bool emitAddedLabel; + // emitThis* variables are used during emission, to track GC updates // on a per-instruction basis. During code generation, per-instruction // tracking is done with variables gcVarPtrSetCur, gcRegGCrefSetCur, @@ -2786,11 +2894,9 @@ class emitter void emitNewIG(); -#if !defined(JIT32_GCENCODER) void emitDisableGC(); void emitEnableGC(); bool emitGCDisabled(); -#endif // !defined(JIT32_GCENCODER) #if defined(TARGET_XARCH) static bool emitAlignInstHasNoCode(instrDesc* id); @@ -3123,6 +3229,10 @@ class emitter instrDesc* emitNewInstrLclVarPair(emitAttr attr, cnsval_ssize_t cns); #endif // !TARGET_ARM64 +#ifdef TARGET_RISCV64 + instrDesc* emitNewInstrLoadImm(emitAttr attr, cnsval_ssize_t cns); +#endif // TARGET_RISCV64 + static const BYTE emitFmtToOps[]; #ifdef DEBUG @@ -3955,6 +4065,18 @@ inline emitter::instrDesc* emitter::emitNewInstrReloc(emitAttr attr, BYTE* addr) #endif // TARGET_ARM +#ifdef TARGET_RISCV64 + +inline emitter::instrDesc* emitter::emitNewInstrLoadImm(emitAttr attr, cnsval_ssize_t cns) +{ + instrDescLoadImm* id = static_cast(emitAllocAnyInstr(sizeof(instrDescLoadImm), attr)); + id->idInsOpt(INS_OPTS_I); + id->idcCnsVal = cns; + return id; +} + +#endif // TARGET_RISCV64 + #ifdef TARGET_XARCH /***************************************************************************** @@ -4041,10 +4163,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id, bool ignoreEmbeddedBroadcast) // which case we load either a scalar or full vector; otherwise, // we load a 128-bit vector - assert((unsigned)id->idInsFmt() < emitFmtCount); - ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()]; - - if ((idOp != ID_OP_CNS) && (idOp != ID_OP_SCNS) && (idOp != ID_OP_DSP_CNS) && (idOp != ID_OP_AMD_CNS)) + if (!id->idHasMemAndCns()) { memSize = 16; } @@ -4079,10 +4198,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id, bool ignoreEmbeddedBroadcast) // Embedded broadcast is never supported so if we have a cns operand // we load a full vector; otherwise, we load a 128-bit vector - assert((unsigned)id->idInsFmt() < emitFmtCount); - ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()]; - - if ((idOp != ID_OP_CNS) && (idOp != ID_OP_SCNS) && (idOp != ID_OP_DSP_CNS) && (idOp != ID_OP_AMD_CNS)) + if (!id->idHasMemAndCns()) { memSize = 16; } diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index 902399f58e4f..59fea0490444 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -4662,46 +4662,31 @@ void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNu * Please consult the "debugger team notification" comment in genFnProlog(). */ -void emitter::emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, // used for pretty printing - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - int argSize, - emitAttr retSize, - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di /* = DebugInfo() */, - regNumber ireg /* = REG_NA */, - regNumber xreg /* = REG_NA */, - unsigned xmul /* = 0 */, - ssize_t disp /* = 0 */, - bool isJump /* = false */, - bool noSafePoint /* = false */) +void emitter::emitIns_Call(const EmitCallParams& params) { /* Sanity check the arguments depending on callType */ - assert(callType < EC_COUNT); - assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA)); - assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT)); + assert(params.callType < EC_COUNT); + assert((params.callType != EC_FUNC_TOKEN) || (params.addr != nullptr && params.ireg == REG_NA)); + assert(params.callType != EC_INDIR_R || (params.addr == nullptr && params.ireg < REG_COUNT)); // ARM never uses these - assert(xreg == REG_NA && xmul == 0 && disp == 0); + assert(params.xreg == REG_NA && params.xmul == 0 && params.disp == 0); // Our stack level should be always greater than the bytes of arguments we push. Just // a sanity test. - assert((unsigned)abs(argSize) <= codeGen->genStackLevel); + assert((unsigned)abs(params.argSize) <= codeGen->genStackLevel); // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); - gcrefRegs &= savedSet; - byrefRegs &= savedSet; + regMaskTP savedSet = emitGetGCRegsSavedOrModified(params.methHnd); + regMaskTP gcrefRegs = params.gcrefRegs & savedSet; + regMaskTP byrefRegs = params.byrefRegs & savedSet; #ifdef DEBUG if (EMIT_GC_VERBOSE) { - printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); - dumpConvertedVarSet(emitComp, ptrVars); + printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, params.ptrVars)); + dumpConvertedVarSet(emitComp, params.ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); emitDispRegSet(gcrefRegs); @@ -4713,9 +4698,9 @@ void emitter::emitIns_Call(EmitCallType callType, #endif /* Managed RetVal: emit sequence point for the call */ - if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + if (emitComp->opts.compDbgInfo && params.debugInfo.GetLocation().IsValid()) { - codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, params.debugInfo, false); } /* @@ -4733,43 +4718,44 @@ void emitter::emitIns_Call(EmitCallType callType, */ instrDesc* id; - assert(argSize % REGSIZE_BYTES == 0); - int argCnt = argSize / REGSIZE_BYTES; + assert(params.argSize % REGSIZE_BYTES == 0); + int argCnt = (int)params.argSize / REGSIZE_BYTES; - if (callType == EC_INDIR_R) + if (params.callType == EC_INDIR_R) { /* Indirect call, virtual calls */ - id = emitNewInstrCallInd(argCnt, 0 /* disp */, ptrVars, gcrefRegs, byrefRegs, retSize); + id = emitNewInstrCallInd(argCnt, 0 /* disp */, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, + params.hasAsyncRet); } else { /* Helper/static/nonvirtual/function calls (direct or through handle), and calls to an absolute addr. */ - assert(callType == EC_FUNC_TOKEN); + assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ // If the method returns a GC ref, mark R0 appropriately - if (retSize == EA_GCREF) + if (params.retSize == EA_GCREF) { gcrefRegs |= RBM_R0; } - else if (retSize == EA_BYREF) + else if (params.retSize == EA_BYREF) { byrefRegs |= RBM_R0; } - VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + VarSetOps::Assign(emitComp, emitThisGCrefVars, params.ptrVars); emitThisGCrefRegs = gcrefRegs; emitThisByrefRegs = byrefRegs; // for the purpose of GC safepointing tail-calls are not real calls - id->idSetIsNoGC(isJump || noSafePoint || emitNoGChelper(methHnd)); + id->idSetIsNoGC(params.isJump || params.noSafePoint || emitNoGChelper(params.methHnd)); /* Set the instruction - special case jumping a function */ instruction ins; @@ -4777,11 +4763,11 @@ void emitter::emitIns_Call(EmitCallType callType, /* Record the address: method, indirection, or funcptr */ - if (callType == EC_INDIR_R) + if (params.callType == EC_INDIR_R) { /* This is an indirect call (either a virtual call or func ptr call) */ - if (isJump) + if (params.isJump) { ins = INS_bx; // INS_bx Reg } @@ -4794,19 +4780,19 @@ void emitter::emitIns_Call(EmitCallType callType, id->idIns(ins); id->idInsFmt(fmt); id->idInsSize(emitInsSize(fmt)); - id->idReg3(ireg); - assert(xreg == REG_NA); + id->idReg3(params.ireg); + assert(params.xreg == REG_NA); } else { /* This is a simple direct call: "call helper/method/addr" */ - assert(callType == EC_FUNC_TOKEN); + assert(params.callType == EC_FUNC_TOKEN); // if addr is nullptr then this call is treated as a recursive call. - assert(addr == nullptr || codeGen->validImmForBL((ssize_t)addr)); + assert(params.addr == nullptr || codeGen->validImmForBL((ssize_t)params.addr)); - if (isJump) + if (params.isJump) { ins = INS_b; // INS_b imm24 } @@ -4821,7 +4807,7 @@ void emitter::emitIns_Call(EmitCallType callType, id->idInsFmt(fmt); id->idInsSize(emitInsSize(fmt)); - id->idAddr()->iiaAddr = (BYTE*)addr; + id->idAddr()->iiaAddr = (BYTE*)params.addr; if (emitComp->opts.compReloc) { @@ -4846,14 +4832,14 @@ void emitter::emitIns_Call(EmitCallType callType, if (m_debugInfoSize > 0) { - INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo); - id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token + INDEBUG(id->idDebugOnlyInfo()->idCallSig = params.sigInfo); + id->idDebugOnlyInfo()->idMemCookie = (size_t)params.methHnd; // method token } #ifdef LATE_DISASM - if (addr != nullptr) + if (params.addr != nullptr) { - codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); + codeGen->getDisAssembler().disSetMethod((size_t)params.addr, params.methHnd); } #endif // LATE_DISASM @@ -6546,6 +6532,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) else if (id->idGCref() == GCT_BYREF) byrefRegs |= RBM_R0; + if (id->idIsLargeCall() && ((instrDescCGCA*)id)->hasAsyncContinuationRet()) + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + // If the GC register set has changed, report the new set. if (gcrefRegs != emitThisGCrefRegs) emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); @@ -7875,7 +7864,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR // Handle unaligned floating point loads/stores if ((indir->gtFlags & GTF_IND_UNALIGNED)) { - if (indir->OperGet() == GT_STOREIND) + if (indir->OperIs(GT_STOREIND)) { var_types type = indir->AsStoreInd()->Data()->TypeGet(); if (type == TYP_FLOAT) @@ -7895,7 +7884,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR return; } } - else if (indir->OperGet() == GT_IND) + else if (indir->OperIs(GT_IND)) { var_types type = indir->TypeGet(); if (type == TYP_FLOAT) @@ -7931,7 +7920,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR DWORD lsl = 0; - if (addr->OperGet() == GT_LEA) + if (addr->OperIs(GT_LEA)) { offset += addr->AsAddrMode()->Offset(); if (addr->AsAddrMode()->gtScale > 0) @@ -7945,7 +7934,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (indir->HasIndex()) { - assert(addr->OperGet() == GT_LEA); + assert(addr->OperIs(GT_LEA)); GenTree* index = indir->Index(); @@ -8219,7 +8208,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, emitJumpKind jumpKind; - if (dst->OperGet() == GT_MUL) + if (dst->OperIs(GT_MUL)) { jumpKind = EJ_ne; } @@ -8229,7 +8218,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs; if (jumpKind == EJ_lo) { - if ((dst->OperGet() != GT_SUB) && (dst->OperGet() != GT_SUB_HI)) + if (!dst->OperIs(GT_SUB) && !dst->OperIs(GT_SUB_HI)) { jumpKind = EJ_hs; } diff --git a/src/coreclr/jit/emitarm.h b/src/coreclr/jit/emitarm.h index 20c7b851499c..6e3eb5793c54 100644 --- a/src/coreclr/jit/emitarm.h +++ b/src/coreclr/jit/emitarm.h @@ -65,10 +65,15 @@ void emitDispInsHelp(instrDesc* id, private: instrDesc* emitNewInstrCallDir( - int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize); + int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, bool hasAsyncRet); -instrDesc* emitNewInstrCallInd( - int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize); +instrDesc* emitNewInstrCallInd(int argCnt, + ssize_t disp, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize, + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ @@ -314,30 +319,6 @@ void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg void emitIns_R_ARX( instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp); -enum EmitCallType -{ - EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method - EC_INDIR_R, // Indirect call via register - EC_COUNT -}; - -void emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, // used for pretty printing - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - int argSize, - emitAttr retSize, - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di = DebugInfo(), - regNumber ireg = REG_NA, - regNumber xreg = REG_NA, - unsigned xmul = 0, - ssize_t disp = 0, - bool isJump = false, - bool noSafePoint = false); - /***************************************************************************** * * Given an instrDesc, return true if it's a conditional jump. diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index d0dbc6367be2..71f070973bf1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -217,7 +217,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idIsTlsGD()) { - assert(isGeneralRegister(id->idReg1())); assert(id->idAddr()->iiaAddr != nullptr); } else @@ -1087,6 +1086,22 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id) case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) return ins == INS_mrs_tpid0; + // Below SVE instructions write to GPR and hence GC reg + case IF_SVE_CO_3A: // clasta, clastb + case IF_SVE_BM_1A: // decb, decd, dech, decw, incb, incd, inch, incw + case IF_SVE_BO_1A: // sqdecb, sqdecd, sqdech, sqdecw, sqincb, sqincd, sqinch, sqincw, uqdecb, uqdecd, uqdech, + // uqdecw, uqincb, uqincd, uqinch, uqincw + case IF_SVE_CS_3A: // lasta, lastb + case IF_SVE_DK_3A: // cntp + case IF_SVE_DL_2A: // cntp + case IF_SVE_DM_2A: // decp, incp + case IF_SVE_DO_2A: // sqdecp, sqincp, uqdecp, uqincp + case IF_SVE_BB_2A: // addpl, addvl + case IF_SVE_BC_1A: // rdvl + case IF_SVE_BL_1A: // cntb, cntd, cnth, cntw + case IF_SVE_DS_2A: // ctermeq, ctermne + return true; + default: return false; } @@ -4680,13 +4695,13 @@ void emitter::emitIns_R_R(instruction ins, case INS_str: case INS_strb: case INS_strh: - case INS_cmn: case INS_tst: assert(insOptsNone(opt)); emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_OPTS_NONE); return; case INS_cmp: + case INS_cmn: emitIns_R_R_I(ins, attr, reg1, reg2, 0, opt); return; @@ -9084,47 +9099,31 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) * Please consult the "debugger team notification" comment in genFnProlog(). */ -void emitter::emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize, - emitAttr secondRetSize, - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di /* = DebugInfo() */, - regNumber ireg /* = REG_NA */, - regNumber xreg /* = REG_NA */, - unsigned xmul /* = 0 */, - ssize_t disp /* = 0 */, - bool isJump /* = false */, - bool noSafePoint /* = false */) +void emitter::emitIns_Call(const EmitCallParams& params) { /* Sanity check the arguments depending on callType */ - assert(callType < EC_COUNT); - assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA)); - assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT)); + assert(params.callType < EC_COUNT); + assert((params.callType != EC_FUNC_TOKEN) || (params.addr != nullptr && params.ireg == REG_NA)); + assert(params.callType != EC_INDIR_R || (params.addr == nullptr && params.ireg < REG_COUNT)); // ARM never uses these - assert(xreg == REG_NA && xmul == 0 && disp == 0); + assert(params.xreg == REG_NA && params.xmul == 0 && params.disp == 0); // Our stack level should be always greater than the bytes of arguments we push. Just // a sanity test. - assert((unsigned)std::abs(argSize) <= codeGen->genStackLevel); + assert((unsigned)std::abs(params.argSize) <= codeGen->genStackLevel); // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); - gcrefRegs &= savedSet; - byrefRegs &= savedSet; + regMaskTP savedSet = emitGetGCRegsSavedOrModified(params.methHnd); + regMaskTP gcrefRegs = params.gcrefRegs & savedSet; + regMaskTP byrefRegs = params.byrefRegs & savedSet; #ifdef DEBUG if (EMIT_GC_VERBOSE) { - printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); - dumpConvertedVarSet(emitComp, ptrVars); + printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, params.ptrVars)); + dumpConvertedVarSet(emitComp, params.ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); emitDispRegSet(gcrefRegs); @@ -9136,9 +9135,9 @@ void emitter::emitIns_Call(EmitCallType callType, #endif /* Managed RetVal: emit sequence point for the call */ - if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + if (emitComp->opts.compDbgInfo && params.debugInfo.GetLocation().IsValid()) { - codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, params.debugInfo, false); } /* @@ -9148,53 +9147,55 @@ void emitter::emitIns_Call(EmitCallType callType, */ instrDesc* id; - assert(argSize % REGSIZE_BYTES == 0); - int argCnt = (int)(argSize / (int)REGSIZE_BYTES); + assert(params.argSize % REGSIZE_BYTES == 0); + int argCnt = (int)(params.argSize / (int)REGSIZE_BYTES); - if (callType == EC_INDIR_R) + if (params.callType == EC_INDIR_R) { /* Indirect call, virtual calls */ - id = emitNewInstrCallInd(argCnt, 0 /* disp */, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + id = emitNewInstrCallInd(argCnt, 0 /* disp */, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, + params.secondRetSize, params.hasAsyncRet); } else { /* Helper/static/nonvirtual/function calls (direct or through handle), and calls to an absolute addr. */ - assert(callType == EC_FUNC_TOKEN); + assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ // If the method returns a GC ref, mark RBM_INTRET appropriately - if (retSize == EA_GCREF) + if (params.retSize == EA_GCREF) { gcrefRegs |= RBM_INTRET; } - else if (retSize == EA_BYREF) + else if (params.retSize == EA_BYREF) { byrefRegs |= RBM_INTRET; } // If is a multi-register return method is called, mark RBM_INTRET_1 appropriately - if (secondRetSize == EA_GCREF) + if (params.secondRetSize == EA_GCREF) { gcrefRegs |= RBM_INTRET_1; } - else if (secondRetSize == EA_BYREF) + else if (params.secondRetSize == EA_BYREF) { byrefRegs |= RBM_INTRET_1; } - VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + VarSetOps::Assign(emitComp, emitThisGCrefVars, params.ptrVars); emitThisGCrefRegs = gcrefRegs; emitThisByrefRegs = byrefRegs; // for the purpose of GC safepointing tail-calls are not real calls - id->idSetIsNoGC(isJump || noSafePoint || emitNoGChelper(methHnd)); + id->idSetIsNoGC(params.isJump || params.noSafePoint || emitNoGChelper(params.methHnd)); /* Set the instruction - special case jumping a function */ instruction ins; @@ -9202,11 +9203,11 @@ void emitter::emitIns_Call(EmitCallType callType, /* Record the address: method, indirection, or funcptr */ - if (callType == EC_INDIR_R) + if (params.callType == EC_INDIR_R) { /* This is an indirect call (either a virtual call or func ptr call) */ - if (isJump) + if (params.isJump) { ins = INS_br_tail; // INS_br_tail Reg } @@ -9219,31 +9220,34 @@ void emitter::emitIns_Call(EmitCallType callType, id->idIns(ins); id->idInsFmt(fmt); - assert(xreg == REG_NA); - if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && EA_IS_CNS_TLSGD_RELOC(retSize)) + assert(params.xreg == REG_NA); + if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && EA_IS_CNS_TLSGD_RELOC(params.retSize)) { // For NativeAOT linux/arm64, we need to also record the relocation of methHnd. - // Since we do not have space to embed it in instrDesc, we store the register in - // reg1 and instead use the `iiaAdd` to store the method handle. Likewise, during - // emitOutputInstr, we retrieve the register from reg1 for this specific case. + // Since we do not have space to embed it in instrDesc, we use the `iiaAddr` to + // store the method handle. + // The target handle need to be always in R2 and hence the assert check. + // We cannot use reg1 and reg2 fields of instrDesc because they contain the gc + // registers (emitEncodeCallGCregs()) that are live across the call. + + assert(params.ireg == REG_R2); id->idSetTlsGD(); - id->idReg1(ireg); - id->idAddr()->iiaAddr = (BYTE*)methHnd; + id->idAddr()->iiaAddr = (BYTE*)params.methHnd; } else { - id->idReg3(ireg); + id->idReg3(params.ireg); } } else { /* This is a simple direct call: "call helper/method/addr" */ - assert(callType == EC_FUNC_TOKEN); + assert(params.callType == EC_FUNC_TOKEN); - assert(addr != NULL); + assert(params.addr != NULL); - if (isJump) + if (params.isJump) { ins = INS_b_tail; // INS_b_tail imm28 } @@ -9256,7 +9260,7 @@ void emitter::emitIns_Call(EmitCallType callType, id->idIns(ins); id->idInsFmt(fmt); - id->idAddr()->iiaAddr = (BYTE*)addr; + id->idAddr()->iiaAddr = (BYTE*)params.addr; if (emitComp->opts.compReloc) { @@ -9277,14 +9281,14 @@ void emitter::emitIns_Call(EmitCallType callType, if (m_debugInfoSize > 0) { - INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo); - id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token + INDEBUG(id->idDebugOnlyInfo()->idCallSig = params.sigInfo); + id->idDebugOnlyInfo()->idMemCookie = (size_t)params.methHnd; // method token } #ifdef LATE_DISASM - if (addr != nullptr) + if (params.addr != nullptr) { - codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); + codeGen->getDisAssembler().disSetMethod((size_t)params.addr, params.methHnd); } #endif // LATE_DISASM @@ -10891,6 +10895,10 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. @@ -11029,12 +11037,13 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { emitRecordRelocation(odst, (CORINFO_METHOD_HANDLE)id->idAddr()->iiaAddr, IMAGE_REL_AARCH64_TLSDESC_CALL); - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + code |= insEncodeReg_Rn(REG_R2); // nnnnn } else { code |= insEncodeReg_Rn(id->idReg3()); // nnnnn } + dst += emitOutputCall(ig, dst, id, code); sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc); break; @@ -13359,7 +13368,15 @@ void emitter::emitDispInsHelp( case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn // The size of a branch target is always EA_PTRSIZE assert(insOptsNone(id->idInsOpt())); - emitDispReg(id->idReg3(), EA_PTRSIZE, false); + + if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idIsTlsGD()) + { + emitDispReg(REG_R2, EA_PTRSIZE, false); + } + else + { + emitDispReg(id->idReg3(), EA_PTRSIZE, false); + } break; case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) @@ -14490,7 +14507,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR int offset = 0; DWORD lsl = 0; - if (addr->OperGet() == GT_LEA) + if (addr->OperIs(GT_LEA)) { offset = addr->AsAddrMode()->Offset(); if (addr->AsAddrMode()->gtScale > 0) @@ -17379,6 +17396,12 @@ bool emitter::OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, e return false; } + if (emitComp->compGeneratingUnwindProlog || emitComp->compGeneratingUnwindEpilog) + { + // Don't remove instructions while generating "unwind" part of prologs or epilogs + return false; + } + // Cannot allow post indexing if the load itself is already modifying the // register. regNumber loadStoreDataReg = emitLastIns->idReg1(); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 8e2ed80c6cdf..c30ab5a57dec 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -98,7 +98,8 @@ instrDesc* emitNewInstrCallDir(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, - emitAttr secondRetSize); + emitAttr secondRetSize, + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, @@ -106,7 +107,8 @@ instrDesc* emitNewInstrCallInd(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, - emitAttr secondRetSize); + emitAttr secondRetSize, + bool hasAsyncRet); /************************************************************************/ /* enum to allow instruction optimisation to specify register order */ @@ -1734,31 +1736,6 @@ void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg void emitIns_R_ARX( instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp); -enum EmitCallType -{ - EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method - EC_INDIR_R, // Indirect call via register - EC_COUNT -}; - -void emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize, - emitAttr secondRetSize, - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di, - regNumber ireg, - regNumber xreg, - unsigned xmul, - ssize_t disp, - bool isJump, - bool noSafePoint = false); - BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i); unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* i, code_t code); BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id); diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 62e767d63205..8be176948c1a 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2256,7 +2256,14 @@ void emitter::emitInsSve_R_R(instruction ins, // Thus, MOV is the preferred disassembly. ins = INS_sve_mov; break; - + case INS_sve_ldr: + case INS_sve_str: + { + // We might come here through emitIns_R_R() to emit "ldr Zx, [Xn]" and + // in the case, just generate the ldr variant, where offset is zero. + emitInsSve_R_R_I(ins, attr, reg1, reg2, 0, opt, sopt); + return; + } default: unreached(); break; @@ -10387,6 +10394,10 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) { code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm } + else + { + code |= insEncodeReg_V<20, 16>(id->idReg2()); // mmmmm + } dst += emitOutput_Instr(dst, code); break; diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h index a586193dd5b7..e32d52dd9e33 100644 --- a/src/coreclr/jit/emitinl.h +++ b/src/coreclr/jit/emitinl.h @@ -13,7 +13,7 @@ inline bool emitter::instrIs3opImul(instruction ins) #ifdef TARGET_X86 return ((ins >= INS_imul_AX) && (ins <= INS_imul_DI)); #else // TARGET_AMD64 - return ((ins >= INS_imul_AX) && (ins <= INS_imul_15)); + return ((ins >= INS_imul_AX) && (ins <= INS_imul_31)); #endif } @@ -23,7 +23,7 @@ inline bool emitter::instrIsExtendedReg3opImul(instruction ins) #ifdef TARGET_X86 return false; #else // TARGET_AMD64 - return ((ins >= INS_imul_08) && (ins <= INS_imul_15)); + return ((ins >= INS_imul_08) && (ins <= INS_imul_31)); #endif } @@ -55,6 +55,22 @@ inline void emitter::check3opImulValues() assert(INS_imul_13 - INS_imul_AX == REG_R13); assert(INS_imul_14 - INS_imul_AX == REG_R14); assert(INS_imul_15 - INS_imul_AX == REG_R15); + assert(INS_imul_16 - INS_imul_AX == REG_R16); + assert(INS_imul_17 - INS_imul_AX == REG_R17); + assert(INS_imul_18 - INS_imul_AX == REG_R18); + assert(INS_imul_19 - INS_imul_AX == REG_R19); + assert(INS_imul_20 - INS_imul_AX == REG_R20); + assert(INS_imul_21 - INS_imul_AX == REG_R21); + assert(INS_imul_22 - INS_imul_AX == REG_R22); + assert(INS_imul_23 - INS_imul_AX == REG_R23); + assert(INS_imul_24 - INS_imul_AX == REG_R24); + assert(INS_imul_25 - INS_imul_AX == REG_R25); + assert(INS_imul_26 - INS_imul_AX == REG_R26); + assert(INS_imul_27 - INS_imul_AX == REG_R27); + assert(INS_imul_28 - INS_imul_AX == REG_R28); + assert(INS_imul_29 - INS_imul_AX == REG_R29); + assert(INS_imul_30 - INS_imul_AX == REG_R30); + assert(INS_imul_31 - INS_imul_AX == REG_R31); #endif } @@ -585,10 +601,17 @@ inline bool insIsCMOV(instruction ins) * false. Returns the final result of the callback. */ template -bool emitter::emitGenNoGCLst(Callback& cb) +bool emitter::emitGenNoGCLst(Callback& cb, bool skipMainPrologsAndEpilogs /* = false */) { for (insGroup* ig = emitIGlist; ig; ig = ig->igNext) { + if (skipMainPrologsAndEpilogs) + { + if (ig == emitPrologIG) + continue; + if (ig->igFlags & IGF_EPILOG) + continue; + } if ((ig->igFlags & IGF_NOGCINTERRUPT) && ig->igSize > 0) { emitter::instrDesc* id = emitFirstInstrDesc(ig->igData); diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index cb3292e58782..85c5589212db 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -2375,47 +2375,34 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) * Please consult the "debugger team notification" comment in genFnProlog(). */ -void emitter::emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di /* = DebugInfo() */, - regNumber ireg /* = REG_NA */, - regNumber xreg /* = REG_NA */, - unsigned xmul /* = 0 */, - ssize_t disp /* = 0 */, - bool isJump /* = false */, - bool noSafePoint /* = false */) +void emitter::emitIns_Call(const EmitCallParams& params) { /* Sanity check the arguments depending on callType */ - assert(callType < EC_COUNT); - assert((callType != EC_FUNC_TOKEN) || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); - assert(callType < EC_INDIR_R || addr == NULL); - assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); + assert(params.callType < EC_COUNT); + assert((params.callType != EC_FUNC_TOKEN) || + (params.ireg == REG_NA && params.xreg == REG_NA && params.xmul == 0 && params.disp == 0)); + assert(params.callType < EC_INDIR_R || params.addr == nullptr); + assert(params.callType != EC_INDIR_R || + (params.ireg < REG_COUNT && params.xreg == REG_NA && params.xmul == 0 && params.disp == 0)); // LoongArch64 never uses these - assert(xreg == REG_NA && xmul == 0 && disp == 0); + assert(params.xreg == REG_NA && params.xmul == 0 && params.disp == 0); // Our stack level should be always greater than the bytes of arguments we push. Just // a sanity test. - assert((unsigned)std::abs(argSize) <= codeGen->genStackLevel); + assert((unsigned)std::abs(params.argSize) <= codeGen->genStackLevel); // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); - gcrefRegs &= savedSet; - byrefRegs &= savedSet; + regMaskTP savedSet = emitGetGCRegsSavedOrModified(params.methHnd); + regMaskTP gcrefRegs = params.gcrefRegs & savedSet; + regMaskTP byrefRegs = params.byrefRegs & savedSet; #ifdef DEBUG if (EMIT_GC_VERBOSE) { - printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); - dumpConvertedVarSet(emitComp, ptrVars); + printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, params.ptrVars)); + dumpConvertedVarSet(emitComp, params.ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); emitDispRegSet(gcrefRegs); @@ -2427,9 +2414,9 @@ void emitter::emitIns_Call(EmitCallType callType, #endif /* Managed RetVal: emit sequence point for the call */ - if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + if (emitComp->opts.compDbgInfo && params.debugInfo.GetLocation().IsValid()) { - codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, params.debugInfo, false); } /* @@ -2439,55 +2426,57 @@ void emitter::emitIns_Call(EmitCallType callType, */ instrDesc* id; - assert(argSize % REGSIZE_BYTES == 0); - int argCnt = (int)(argSize / (int)REGSIZE_BYTES); + assert(params.argSize % REGSIZE_BYTES == 0); + int argCnt = (int)(params.argSize / (int)REGSIZE_BYTES); - if (callType >= EC_INDIR_R) + if (params.callType >= EC_INDIR_R) { /* Indirect call, virtual calls */ - assert(callType == EC_INDIR_R); + assert(params.callType == EC_INDIR_R); - id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, + params.secondRetSize, params.hasAsyncRet); } else { /* Helper/static/nonvirtual/function calls (direct or through handle), and calls to an absolute addr. */ - assert(callType == EC_FUNC_TOKEN); + assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ // If the method returns a GC ref, mark RBM_INTRET appropriately - if (retSize == EA_GCREF) + if (params.retSize == EA_GCREF) { gcrefRegs |= RBM_INTRET; } - else if (retSize == EA_BYREF) + else if (params.retSize == EA_BYREF) { byrefRegs |= RBM_INTRET; } // If is a multi-register return method is called, mark RBM_INTRET_1 appropriately - if (secondRetSize == EA_GCREF) + if (params.secondRetSize == EA_GCREF) { gcrefRegs |= RBM_INTRET_1; } - else if (secondRetSize == EA_BYREF) + else if (params.secondRetSize == EA_BYREF) { byrefRegs |= RBM_INTRET_1; } - VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + VarSetOps::Assign(emitComp, emitThisGCrefVars, params.ptrVars); emitThisGCrefRegs = gcrefRegs; emitThisByrefRegs = byrefRegs; // for the purpose of GC safepointing tail-calls are not real calls - id->idSetIsNoGC(isJump || noSafePoint || emitNoGChelper(methHnd)); + id->idSetIsNoGC(params.isJump || params.noSafePoint || emitNoGChelper(params.methHnd)); /* Set the instruction - special case jumping a function */ instruction ins; @@ -2504,8 +2493,8 @@ void emitter::emitIns_Call(EmitCallType callType, // else if (callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR) // if reloc: // //pc + offset_38bits # only when reloc. - // pcaddu18i t2, addr-hi20 - // jilr r0/1,t2,addr-lo18 + // pcaddu18i t4, addr-hi20 + // jilr r0/1, t4, addr-lo18 // // else: // lu12i_w t2, dst_offset_lo32-hi @@ -2514,17 +2503,17 @@ void emitter::emitIns_Call(EmitCallType callType, // jirl REG_R0/REG_RA, t2, 0 /* Record the address: method, indirection, or funcptr */ - if (callType == EC_INDIR_R) + if (params.callType == EC_INDIR_R) { /* This is an indirect call (either a virtual call or func ptr call) */ // assert(callType == EC_INDIR_R); id->idSetIsCallRegPtr(); - regNumber reg_jirl = isJump ? REG_R0 : REG_RA; + regNumber reg_jirl = params.isJump ? REG_R0 : REG_RA; id->idReg4(reg_jirl); - id->idReg3(ireg); // NOTE: for EC_INDIR_R, using idReg3. - assert(xreg == REG_NA); + id->idReg3(params.ireg); // NOTE: for EC_INDIR_R, using idReg3. + assert(params.xreg == REG_NA); id->idCodeSize(4); } @@ -2532,11 +2521,12 @@ void emitter::emitIns_Call(EmitCallType callType, { /* This is a simple direct call: "call helper/method/addr" */ - assert(callType == EC_FUNC_TOKEN); - assert(addr != NULL); - assert((((size_t)addr) & 3) == 0); + assert(params.callType == EC_FUNC_TOKEN); + assert(params.addr != NULL); + assert((((size_t)params.addr) & 3) == 0); - addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0 + void* addr = + (void*)(((size_t)params.addr) + (params.isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0 id->idAddr()->iiaAddr = (BYTE*)addr; if (emitComp->opts.compReloc) @@ -2560,14 +2550,14 @@ void emitter::emitIns_Call(EmitCallType callType, } } - id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token - id->idDebugOnlyInfo()->idCallSig = sigInfo; + id->idDebugOnlyInfo()->idMemCookie = (size_t)params.methHnd; // method token + id->idDebugOnlyInfo()->idCallSig = params.sigInfo; #endif // DEBUG #ifdef LATE_DISASM - if (addr != nullptr) + if (params.addr != nullptr) { - codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); + codeGen->getDisAssembler().disSetMethod((size_t)params.addr, params.methHnd); } #endif // LATE_DISASM @@ -2634,16 +2624,15 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t // pc + offset_38bits // // pcaddu18i t4, addr-hi20 - // jilr r0/1,t4,addr-lo18 + // jilr r0/1, t4, addr-lo18 emitOutput_Instr(dst, 0x1e000000 | (int)REG_DEFAULT_HELPER_CALL_TARGET); size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr. int reg2 = (int)addr & 1; - addr = addr ^ 1; + addr -= reg2; - assert(isValidSimm38(addr - (ssize_t)dst)); assert((addr & 3) == 0); dst += 4; @@ -2725,6 +2714,10 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. @@ -3989,10 +3982,14 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id) printf(" "); - if (!emitComp->opts.disDiffable) + if (emitComp->opts.disCodeBytes && !emitComp->opts.disDiffable) { printf("%08X ", code); } + else + { + printf(" "); + } #else printf(" "); #endif diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index e07e9d47e12e..49f7bb702a74 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -117,14 +117,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ @@ -170,42 +172,12 @@ static bool isValidSimm12(ssize_t value) return -(((int)1) << 11) <= value && value < (((int)1) << 11); }; -// Returns true if 'value' is a legal signed immediate 16 bit encoding. -static bool isValidSimm16(ssize_t value) -{ - return -(((int)1) << 15) <= value && value < (((int)1) << 15); -}; - -// Returns true if 'value' is a legal unsigned immediate 16 bit encoding. -static bool isValidUimm16(ssize_t value) -{ - return (0 == (value >> 16)); -}; - -// Returns true if 'value' is a legal signed immediate 18 bit encoding. -static bool isValidSimm18(ssize_t value) -{ - return -(((int)1) << 17) <= value && value < (((int)1) << 17); -}; - // Returns true if 'value' is a legal signed immediate 20 bit encoding. static bool isValidSimm20(ssize_t value) { return -(((int)1) << 19) <= value && value < (((int)1) << 19); }; -// Returns true if 'value' is a legal signed immediate 28 bit encoding. -static bool isValidSimm28(ssize_t value) -{ - return -(((int)1) << 27) <= value && value < (((int)1) << 27); -}; - -// Returns true if 'value' is a legal signed immediate 38 bit encoding. -static bool isValidSimm38(ssize_t value) -{ - return -(((ssize_t)1) << 37) <= value && value < (((ssize_t)1) << 37); -}; - // Returns the number of bits used by the given 'size'. inline static unsigned getBitWidth(emitAttr size) { @@ -316,42 +288,6 @@ void emitIns_R_AI(instruction ins, regNumber reg, ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); -enum EmitCallType -{ - - // I have included here, but commented out, all the values used by the x86 emitter. - // However, LOONGARCH has a much reduced instruction set, and so the LOONGARCH emitter only - // supports a subset of the x86 variants. By leaving them commented out, it becomes - // a compile time error if code tries to use them (and hopefully see this comment - // and know why they are unavailable on LOONGARCH), while making it easier to stay - // in-sync with x86 and possibly add them back in if needed. - - EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method - // EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method - // EC_FUNC_ADDR, // Direct call to an absolute address - - EC_INDIR_R, // Indirect call via register - - EC_COUNT -}; - -void emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di, - regNumber ireg = REG_NA, - regNumber xreg = REG_NA, - unsigned xmul = 0, - ssize_t disp = 0, - bool isJump = false, - bool noSafePoint = false); - unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code); unsigned get_curTotalCodeSize(); // bytes of code diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h index bf15ba33667c..037ea04bccf6 100644 --- a/src/coreclr/jit/emitpub.h +++ b/src/coreclr/jit/emitpub.h @@ -43,7 +43,7 @@ unsigned emitEndCodeGen(Compiler* comp, unsigned emitGetEpilogCnt(); template -bool emitGenNoGCLst(Callback& cb); +bool emitGenNoGCLst(Callback& cb, bool skipMainPrologsAndEpilogs = false); void emitBegProlog(); unsigned emitGetPrologOffsetEstimate(); diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 740e9b5fe040..6f772120f480 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -92,12 +92,13 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const return sizeof(instrDesc); } - case INS_OPTS_I: case INS_OPTS_RC: case INS_OPTS_RL: case INS_OPTS_RELOC: case INS_OPTS_NONE: return sizeof(instrDesc); + case INS_OPTS_I: + return sizeof(instrDescLoadImm); default: NO_WAY("unexpected instruction descriptor format"); break; @@ -138,6 +139,14 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) }; // clang-format on +emitter::MajorOpcode emitter::GetMajorOpcode(code_t code) +{ + assert((code & 0b11) == 0b11); // 16-bit instructions unsupported + code_t opcode = (code >> 2) & 0b11111; + assert((opcode & 0b111) != 0b111); // 48-bit and larger instructions unsupported + return (MajorOpcode)opcode; +} + inline bool emitter::emitInsMayWriteToGCReg(instruction ins) { assert(ins != INS_invalid); @@ -148,25 +157,28 @@ inline bool emitter::emitInsMayWriteToGCReg(instruction ins) return true; code_t code = emitInsCode(ins); - assert((code & 0b11) == 0b11); // 16-bit instructions unsupported - code_t majorOpcode = (code >> 2) & 0b11111; - assert((majorOpcode & 0b111) != 0b111); // 48-bit and larger instructions unsupported - switch (majorOpcode) - { - // Opcodes with no destination register or a floating-point destination register - case 0b00001: // LOAD-FP - case 0b01000: // STORE - case 0b01001: // STORE-FP - case 0b00011: // MISC-MEM - case 0b10000: // MADD - case 0b10001: // MSUB - case 0b10010: // NMSUB - case 0b10011: // NMADD - case 0b11000: // BRANCH - case 0b11100: // SYSTEM + switch (GetMajorOpcode(code)) + { + // Opcodes with no destination register + case MajorOpcode::Store: + case MajorOpcode::StoreFp: + case MajorOpcode::MiscMem: + case MajorOpcode::Branch: + // Opcodes with a floating-point destination register + case MajorOpcode::LoadFp: + case MajorOpcode::MAdd: + case MajorOpcode::MSub: + case MajorOpcode::NmSub: + case MajorOpcode::NmAdd: return false; - case 0b10100: // OP-FP + case MajorOpcode::System: + { + code_t funct3 = (code >> 12) & 0b111; + return (funct3 != 0); // CSR read/writes + } + + case MajorOpcode::OpFp: { // Lowest 2 bits of funct7 distinguish single, double, half, and quad floats; we don't care code_t funct7 = code >> (25 + 2); @@ -181,13 +193,13 @@ inline bool emitter::emitInsMayWriteToGCReg(instruction ins) } } - case 0b00010: // custom-0 - case 0b01010: // custom-1 - case 0b10101: // OP-V - case 0b10110: // custom-2/rv128 - case 0b11110: // custom-3/rv128 - case 0b11010: // reserved - case 0b11101: // reserved + case MajorOpcode::Custom0: + case MajorOpcode::Custom1: + case MajorOpcode::Custom2Rv128: + case MajorOpcode::Custom3Rv128: + case MajorOpcode::OpV: + case MajorOpcode::OpVe: + case MajorOpcode::Reserved: assert(!"unsupported major opcode"); FALLTHROUGH; @@ -237,7 +249,7 @@ bool emitter::emitInsIsLoadOrStore(instruction ins) * Returns the specific encoding of the given CPU instruction. */ -inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) const +inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) { code_t code = BAD_CODE; @@ -638,7 +650,7 @@ void emitter::emitIns_R_R( { code_t code = emitInsCode(ins); - if (INS_mov == ins || INS_sext_w == ins) + if (INS_mov == ins || INS_sext_w == ins || (INS_clz <= ins && ins <= INS_rev8)) { assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); @@ -678,7 +690,7 @@ void emitter::emitIns_R_R( if (INS_fcvt_d_w != ins && INS_fcvt_d_wu != ins) // fcvt.d.w[u] always produces an exact result code |= 0x7 << 12; // round according to frm status register } - else if (INS_fcvt_s_d == ins || INS_fcvt_d_s == ins) + else if (INS_fcvt_s_d == ins || INS_fcvt_d_s == ins || INS_fsqrt_s == ins || INS_fsqrt_d == ins) { assert(isFloatReg(reg1)); assert(isFloatReg(reg2)); @@ -716,7 +728,7 @@ void emitter::emitIns_R_R_I( if ((INS_addi <= ins && INS_srai >= ins) || (INS_addiw <= ins && INS_sraiw >= ins) || (INS_lb <= ins && INS_lhu >= ins) || INS_ld == ins || INS_lw == ins || INS_jalr == ins || INS_fld == ins || - INS_flw == ins) + INS_flw == ins || INS_slli_uw == ins || INS_rori == ins || INS_roriw == ins) { assert(isGeneralRegister(reg2)); code |= (reg1 & 0x1f) << 7; // rd @@ -814,7 +826,8 @@ void emitter::emitIns_R_R_R( if ((INS_add <= ins && ins <= INS_and) || (INS_mul <= ins && ins <= INS_remuw) || (INS_addw <= ins && ins <= INS_sraw) || (INS_fadd_s <= ins && ins <= INS_fmax_s) || (INS_fadd_d <= ins && ins <= INS_fmax_d) || (INS_feq_s <= ins && ins <= INS_fle_s) || - (INS_feq_d <= ins && ins <= INS_fle_d) || (INS_lr_w <= ins && ins <= INS_amomaxu_d)) + (INS_feq_d <= ins && ins <= INS_fle_d) || (INS_lr_w <= ins && ins <= INS_amomaxu_d) || + (INS_sh1add <= ins && ins <= INS_sh3add_uw) || (INS_rol <= ins && ins <= INS_maxu)) { #ifdef DEBUG switch (ins) @@ -855,7 +868,6 @@ void emitter::emitIns_R_R_R( case INS_fsub_s: case INS_fmul_s: case INS_fdiv_s: - case INS_fsqrt_s: case INS_fsgnj_s: case INS_fsgnjn_s: case INS_fsgnjx_s: @@ -870,7 +882,6 @@ void emitter::emitIns_R_R_R( case INS_fsub_d: case INS_fmul_d: case INS_fdiv_d: - case INS_fsqrt_d: case INS_fsgnj_d: case INS_fsgnjn_d: case INS_fsgnjx_d: @@ -903,6 +914,26 @@ void emitter::emitIns_R_R_R( case INS_amominu_d: case INS_amomaxu_w: case INS_amomaxu_d: + + case INS_sh1add: + case INS_sh2add: + case INS_sh3add: + case INS_add_uw: + case INS_sh1add_uw: + case INS_sh2add_uw: + case INS_sh3add_uw: + + case INS_rol: + case INS_rolw: + case INS_ror: + case INS_rorw: + case INS_xnor: + case INS_orn: + case INS_andn: + case INS_min: + case INS_minu: + case INS_max: + case INS_maxu: break; default: NYI_RISCV64("illegal ins within emitIns_R_R_R!"); @@ -915,11 +946,15 @@ void emitter::emitIns_R_R_R( code |= ((reg1 & 0x1f) << 7); code |= ((reg2 & 0x1f) << 15); code |= ((reg3 & 0x1f) << 20); - if ((INS_fadd_s <= ins && INS_fsqrt_s >= ins) || (INS_fadd_d <= ins && INS_fsqrt_d >= ins)) + if ((INS_fadd_s <= ins && INS_fdiv_s >= ins) || (INS_fadd_d <= ins && INS_fdiv_d >= ins)) { code |= 0x7 << 12; } - else if (INS_lr_w <= ins && ins <= INS_amomaxu_d) + else if (ins == INS_sc_w || ins == INS_sc_d) + { + code |= 0b10 << 25; // release ordering, it ends the lr-sc loop + } + else if ((ins == INS_lr_w || ins == INS_lr_d) || (INS_amoswap_w <= ins && ins <= INS_amomaxu_d)) { // For now all atomics are seq. consistent as Interlocked.* APIs don't expose acquire/release ordering code |= 0b11 << 25; @@ -1000,14 +1035,18 @@ void emitter::emitIns_R_C( id->idSmallCns(offs); // usually is 0. id->idInsOpt(INS_OPTS_RC); - if (emitComp->opts.compReloc) + + if (emitComp->fgIsBlockCold(emitComp->compCurBB)) { - id->idSetIsDspReloc(); - id->idCodeSize(8); + // Loading constant from cold section might be arbitrarily far, + // use emitOutputInstr_OptsRcNoPcRel + id->idCodeSize(24); } else { - id->idCodeSize(24); + // Loading constant from hot section can use auipc, + // use emitOutputInstr_OptsRcPcRel + id->idCodeSize(8); } if (EA_IS_GCREF(attr)) @@ -1263,6 +1302,8 @@ void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 appendToCurIG(id); } +static inline constexpr unsigned WordMask(uint8_t bits); + /***************************************************************************** * * Emits load of 64-bit constant to register. @@ -1270,16 +1311,6 @@ void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 */ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) { - // In the worst case a sequence of 8 instructions will be used: - // LUI + ADDIW + SLLI + ADDI + SLLI + ADDI + SLLI + ADDI - // - // First 2 instructions (LUI + ADDIW) load up to 31 bit. And followed - // sequence of (SLLI + ADDI) is used for loading remaining part by batches - // of up to 11 bits. - // - // Note that LUI, ADDI/W use sign extension so that's why we have to work - // with 31 and 11 bit batches there. - assert(!EA_IS_RELOC(size)); assert(isGeneralRegister(reg)); @@ -1289,53 +1320,287 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) return; } - // TODO-RISCV64: maybe optimized via emitDataConst(), check #86790 + /* The following algorithm works based on the following equation: + * `imm = high32 + offset1` OR `imm = high32 - offset2` + * + * high32 will be loaded with `lui + addiw`, while offset + * will be loaded with `slli + addi` in 11-bits chunks + * + * First, determine at which position to partition imm into high32 and offset, + * so that it yields the least instruction. + * Where high32 = imm[y:x] and imm[63:y] are all zeroes or all ones. + * + * From the above equation, the value of offset1 & offset2 are: + * -> offset1 = imm[x-1:0] + * -> offset2 = ~(imm[x-1:0] - 1) + * The smaller offset should yield the least instruction. (is this correct?) */ + + // STEP 1: Determine x & y + + int x; + int y; + if (((uint64_t)imm >> 63) & 0b1) + { + // last one position from MSB + y = 63 - BitOperations::LeadingZeroCount((uint64_t)~imm) + 1; + } + else + { + // last zero position from MSB + y = 63 - BitOperations::LeadingZeroCount((uint64_t)imm) + 1; + } + if (imm & 0b1) + { + // first zero position from LSB + x = BitOperations::TrailingZeroCount((uint64_t)~imm); + } + else + { + // first one position from LSB + x = BitOperations::TrailingZeroCount((uint64_t)imm); + } + + // STEP 2: Determine whether to utilize SRLI or not. + + /* SRLI can be utilized when the input has the following pattern: + * + * 0...01...10...x + * <-n-><-m-> + * + * It will emit instructions to load the left shifted immidiate then + * followed by a single SRLI instruction. + * + * Since it adds 1 instruction, loading the new form should at least remove + * two instruction. Two instructions can be removed IF: + * 1. y - x > 31, AND + * 2. (b - a) < 32, OR + * 3. (b - a) - (y - x) >= 11 + * + * Visualization aid: + * - Original immidiate + * 0...01...10...x + * y <-x + * - Left shifted immidiate + * 1...10...x0...0 + * b <-a + * */ + + constexpr int absMaxInsCount = instrDescLoadImm::absMaxInsCount; + constexpr int prefMaxInsCount = 5; + assert(prefMaxInsCount <= absMaxInsCount); + + // If we generate more instructions than the prefered maximum instruction count, we'll instead use emitDataConst + + // emitIns_R_C combination. + int insCountLimit = prefMaxInsCount; + // If we are currently generating prolog / epilog, we are currently not inside a method block, therefore, we should + // not use the emitDataConst + emitIns_R_C combination. + if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog) + { + insCountLimit = absMaxInsCount; + } - UINT32 msb = BitOperations::BitScanReverse((uint64_t)imm); - UINT32 high31; - if (msb > 30) + bool utilizeSRLI = false; + int srliShiftAmount = 0; + uint64_t originalImm = imm; + bool cond1 = (y - x) > 31; + if ((((uint64_t)imm >> 63) & 0b1) == 0 && cond1) { - high31 = (imm >> (msb - 30)) & 0x7FffFFff; + srliShiftAmount = BitOperations::LeadingZeroCount((uint64_t)imm); + uint64_t tempImm = (uint64_t)imm << srliShiftAmount; + int m = BitOperations::LeadingZeroCount(~tempImm); + int b = 64 - m; + int a = BitOperations::TrailingZeroCount(tempImm); + bool cond2 = (b - a) < 32; + bool cond3 = ((y - x) - (b - a)) >= 11; + if (cond2 || cond3) + { + imm = tempImm; + y = b; + x = a; + utilizeSRLI = true; + insCountLimit -= 1; + } + } + + assert(y >= x); + assert((1 <= y) && (y <= 63)); + assert((1 <= x) && (x <= 63)); + + if (y < 32) + { + y = 31; + x = 0; + } + else if ((y - x) < 31) + { + y = x + 31; } else { - high31 = imm & 0x7FffFFff; + x = y - 31; + } + + uint32_t high32 = ((int64_t)imm >> x) & WordMask(32); + + // STEP 3: Determine whether to use high32 + offset1 or high32 - offset2 + + /* TODO: Instead of using subtract / add mode, assume that we're always adding + * 12-bit chunks. However, if we encounter such 12-bit chunk with MSB == 1, + * add 1 to the previous chunk, and add the 12-bit chunk as is, which + * essentially does a subtraction. It will generate the least instruction to + * load offset. + * See the following discussion: + * https://github.com/dotnet/runtime/pull/113250#discussion_r1987576070 */ + + uint32_t offset1 = imm & WordMask((uint8_t)x); + uint32_t offset2 = (~(offset1 - 1)) & WordMask((uint8_t)x); + uint32_t offset = offset1; + bool isSubtractMode = false; + + if ((high32 == 0x7FFFFFFF) && (y != 63)) + { + /* Handle corner case: we cannot do subtract mode if high32 == 0x7FFFFFFF + * Since adding 1 to it will change the sign bit. Instead, shift x and y + * to the left by one. */ + int newX = x + 1; + uint32_t newOffset1 = imm & WordMask((uint8_t)newX); + uint32_t newOffset2 = (~(newOffset1 - 1)) & WordMask((uint8_t)newX); + if (newOffset2 < offset1) + { + x = newX; + high32 = ((int64_t)imm >> x) & WordMask(32); + offset2 = newOffset2; + isSubtractMode = true; + } + } + else if (offset2 < offset1) + { + isSubtractMode = true; } - // Since ADDIW use sign extension fo immediate - // we have to adjust higher 19 bit loaded by LUI - // for case when low part is bigger than 0x800. - INT32 high19 = ((int32_t)(high31 + 0x800)) >> 12; + if (isSubtractMode) + { + offset = offset2; + high32 = (high32 + 1) & WordMask(32); + } + + assert(absMaxInsCount >= 2); + int numberOfInstructions = 0; + instruction ins[absMaxInsCount]; + int32_t values[absMaxInsCount]; + + // STEP 4: Generate instructions to load high32 - emitIns_R_I(INS_lui, size, reg, high19); - if (high31 & 0xFFF) + uint32_t upper = (high32 >> 12) & WordMask(20); + uint32_t lower = high32 & WordMask(12); + int lowerMsb = (lower >> 11) & 0b1; + if (lowerMsb == 1) { - emitIns_R_R_I(INS_addiw, size, reg, reg, high31 & 0xFFF); + upper += 1; + upper &= WordMask(20); + } + if (upper != 0) + { + ins[numberOfInstructions] = INS_lui; + values[numberOfInstructions] = ((upper >> 19) & 0b1) ? (upper + 0xFFF00000) : upper; + numberOfInstructions += 1; + } + if (lower != 0) + { + ins[numberOfInstructions] = INS_addiw; + values[numberOfInstructions] = lower; + numberOfInstructions += 1; } - // And load remaining part part by batches of 11 bits size. - INT32 remainingShift = msb - 30; + // STEP 5: Generate instructions to load offset in 11-bits chunks - UINT32 shiftAccumulator = 0; - while (remainingShift > 0) + int chunkLsbPos = (x < 11) ? 0 : (x - 11); + int shift = (x < 11) ? x : 11; + int chunkMask = (x < 11) ? WordMask((uint8_t)x) : WordMask(11); + while (true) { - UINT32 shift = remainingShift >= 11 ? 11 : remainingShift % 11; - UINT32 mask = 0x7ff >> (11 - shift); - remainingShift -= shift; - ssize_t low11 = (imm >> remainingShift) & mask; - shiftAccumulator += shift; + uint32_t chunk = (offset >> chunkLsbPos) & chunkMask; - if (low11) + if (chunk != 0) + { + /* We could move our 11 bit chunk window to the right for as many as the + * leading zeros.*/ + int leadingZerosOn11BitsChunk = 11 - (32 - BitOperations::LeadingZeroCount(chunk)); + if (leadingZerosOn11BitsChunk > 0) + { + int maxAdditionalShift = + (chunkLsbPos < leadingZerosOn11BitsChunk) ? chunkLsbPos : leadingZerosOn11BitsChunk; + chunkLsbPos -= maxAdditionalShift; + shift += maxAdditionalShift; + chunk = (offset >> chunkLsbPos) & chunkMask; + } + + numberOfInstructions += 2; + if (numberOfInstructions > insCountLimit) + { + break; + } + ins[numberOfInstructions - 2] = INS_slli; + values[numberOfInstructions - 2] = shift; + if (isSubtractMode) + { + ins[numberOfInstructions - 1] = INS_addi; + values[numberOfInstructions - 1] = -(int32_t)chunk; + } + else + { + ins[numberOfInstructions - 1] = INS_addi; + values[numberOfInstructions - 1] = chunk; + } + shift = 0; + } + if (chunkLsbPos == 0) + { + break; + } + shift += (chunkLsbPos < 11) ? chunkLsbPos : 11; + chunkMask = (chunkLsbPos < 11) ? (chunkMask >> (11 - chunkLsbPos)) : WordMask(11); + chunkLsbPos -= (chunkLsbPos < 11) ? chunkLsbPos : 11; + } + if (shift > 0) + { + numberOfInstructions += 1; + if (numberOfInstructions <= insCountLimit) { - emitIns_R_R_I(INS_slli, size, reg, reg, shiftAccumulator); - shiftAccumulator = 0; + ins[numberOfInstructions - 1] = INS_slli; + values[numberOfInstructions - 1] = shift; + } + } - emitIns_R_R_I(INS_addi, size, reg, reg, low11); + // STEP 6: Determine whether to use emitDataConst or emit generated instructions + + if (numberOfInstructions <= insCountLimit) + { + instrDescLoadImm* id = static_cast(emitNewInstrLoadImm(size, originalImm)); + id->idReg1(reg); + memcpy(id->ins, ins, sizeof(instruction) * numberOfInstructions); + memcpy(id->values, values, sizeof(int32_t) * numberOfInstructions); + if (utilizeSRLI) + { + numberOfInstructions += 1; + assert(numberOfInstructions < absMaxInsCount); + id->ins[numberOfInstructions - 1] = INS_srli; + id->values[numberOfInstructions - 1] = srliShiftAmount; } + id->idCodeSize(numberOfInstructions * 4); + id->idIns(id->ins[numberOfInstructions - 1]); + + appendToCurIG(id); + } + else if (size == EA_PTRSIZE) + { + assert(!emitComp->compGeneratingProlog && !emitComp->compGeneratingEpilog); + auto constAddr = emitDataConst(&originalImm, sizeof(long), sizeof(long), TYP_LONG); + emitIns_R_C(INS_ld, EA_PTRSIZE, reg, REG_NA, emitComp->eeFindJitDataOffs(constAddr), 0); } - if (shiftAccumulator) + else { - emitIns_R_R_I(INS_slli, size, reg, reg, shiftAccumulator); + assert(false && "If number of instruction exceeds MAX_NUM_OF_LOAD_IMM_INS, imm must be 8 bytes"); } } @@ -1356,47 +1621,34 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) * */ -void emitter::emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di /* = DebugInfo() */, - regNumber ireg /* = REG_NA */, - regNumber xreg /* = REG_NA */, - unsigned xmul /* = 0 */, - ssize_t disp /* = 0 */, - bool isJump /* = false */, - bool noSafePoint /* = false */) +void emitter::emitIns_Call(const EmitCallParams& params) { /* Sanity check the arguments depending on callType */ - assert(callType < EC_COUNT); - assert((callType != EC_FUNC_TOKEN) || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); - assert(callType < EC_INDIR_R || addr == NULL); - assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); + assert(params.callType < EC_COUNT); + assert((params.callType != EC_FUNC_TOKEN) || + (params.ireg == REG_NA && params.xreg == REG_NA && params.xmul == 0 && params.disp == 0)); + assert(params.callType < EC_INDIR_R || params.addr == nullptr || isValidSimm12((ssize_t)params.addr)); + assert(params.callType != EC_INDIR_R || + (params.ireg < REG_COUNT && params.xreg == REG_NA && params.xmul == 0 && params.disp == 0)); // RISCV64 never uses these - assert(xreg == REG_NA && xmul == 0 && disp == 0); + assert(params.xreg == REG_NA && params.xmul == 0 && params.disp == 0); // Our stack level should be always greater than the bytes of arguments we push. Just // a sanity test. - assert((unsigned)std::abs(argSize) <= codeGen->genStackLevel); + assert((unsigned)std::abs(params.argSize) <= codeGen->genStackLevel); // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); - gcrefRegs &= savedSet; - byrefRegs &= savedSet; + regMaskTP savedSet = emitGetGCRegsSavedOrModified(params.methHnd); + regMaskTP gcrefRegs = params.gcrefRegs & savedSet; + regMaskTP byrefRegs = params.byrefRegs & savedSet; #ifdef DEBUG if (EMIT_GC_VERBOSE) { - printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); - dumpConvertedVarSet(emitComp, ptrVars); + printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, params.ptrVars)); + dumpConvertedVarSet(emitComp, params.ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); emitDispRegSet(gcrefRegs); @@ -1408,9 +1660,9 @@ void emitter::emitIns_Call(EmitCallType callType, #endif /* Managed RetVal: emit sequence point for the call */ - if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + if (emitComp->opts.compDbgInfo && params.debugInfo.GetLocation().IsValid()) { - codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, params.debugInfo, false); } /* @@ -1420,55 +1672,57 @@ void emitter::emitIns_Call(EmitCallType callType, */ instrDesc* id; - assert(argSize % REGSIZE_BYTES == 0); - int argCnt = (int)(argSize / (int)REGSIZE_BYTES); + assert(params.argSize % REGSIZE_BYTES == 0); + int argCnt = (int)(params.argSize / (int)REGSIZE_BYTES); - if (callType >= EC_INDIR_R) + if (params.callType >= EC_INDIR_R) { /* Indirect call, virtual calls */ - assert(callType == EC_INDIR_R); + assert(params.callType == EC_INDIR_R); - id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, + params.secondRetSize, params.hasAsyncRet); } else { /* Helper/static/nonvirtual/function calls (direct or through handle), and calls to an absolute addr. */ - assert(callType == EC_FUNC_TOKEN); + assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ // If the method returns a GC ref, mark RBM_INTRET appropriately - if (retSize == EA_GCREF) + if (params.retSize == EA_GCREF) { gcrefRegs |= RBM_INTRET; } - else if (retSize == EA_BYREF) + else if (params.retSize == EA_BYREF) { byrefRegs |= RBM_INTRET; } // If is a multi-register return method is called, mark RBM_INTRET_1 appropriately - if (secondRetSize == EA_GCREF) + if (params.secondRetSize == EA_GCREF) { gcrefRegs |= RBM_INTRET_1; } - else if (secondRetSize == EA_BYREF) + else if (params.secondRetSize == EA_BYREF) { byrefRegs |= RBM_INTRET_1; } - VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + VarSetOps::Assign(emitComp, emitThisGCrefVars, params.ptrVars); emitThisGCrefRegs = gcrefRegs; emitThisByrefRegs = byrefRegs; // for the purpose of GC safepointing tail-calls are not real calls - id->idSetIsNoGC(isJump || noSafePoint || emitNoGChelper(methHnd)); + id->idSetIsNoGC(params.isJump || params.noSafePoint || emitNoGChelper(params.methHnd)); /* Set the instruction - special case jumping a function */ instruction ins; @@ -1481,7 +1735,7 @@ void emitter::emitIns_Call(EmitCallType callType, // INS_OPTS_C: placeholders. 1/2/4-ins: // if (callType == EC_INDIR_R) - // jalr REG_R0/REG_RA, ireg, 0 <---- 1-ins + // jalr REG_R0/REG_RA, ireg, offset <---- 1-ins // else if (callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR) // if reloc: // //pc + offset_38bits # only when reloc. @@ -1495,17 +1749,23 @@ void emitter::emitIns_Call(EmitCallType callType, // jalr REG_R0/REG_RA, t2, 0 /* Record the address: method, indirection, or funcptr */ - if (callType == EC_INDIR_R) + if (params.callType == EC_INDIR_R) { /* This is an indirect call (either a virtual call or func ptr call) */ // assert(callType == EC_INDIR_R); id->idSetIsCallRegPtr(); - regNumber reg_jalr = isJump ? REG_R0 : REG_RA; + regNumber reg_jalr = params.isJump ? REG_R0 : REG_RA; id->idReg4(reg_jalr); - id->idReg3(ireg); // NOTE: for EC_INDIR_R, using idReg3. - assert(xreg == REG_NA); + id->idReg3(params.ireg); // NOTE: for EC_INDIR_R, using idReg3. + id->idSmallCns(0); // SmallCns will contain JALR's offset. + if (params.addr != nullptr) + { + // If addr is not NULL, it must contain JALR's offset, which is set to the lower 12 bits of address. + id->idSmallCns((size_t)params.addr); + } + assert(params.xreg == REG_NA); id->idCodeSize(4); } @@ -1513,10 +1773,11 @@ void emitter::emitIns_Call(EmitCallType callType, { /* This is a simple direct call: "call helper/method/addr" */ - assert(callType == EC_FUNC_TOKEN); - assert(addr != NULL); + assert(params.callType == EC_FUNC_TOKEN); + assert(params.addr != NULL); - addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jalr ra/r0,rd,0 + void* addr = + (void*)(((size_t)params.addr) + (params.isJump ? 0 : 1)); // NOTE: low-bit0 is used for jalr ra/r0,rd,0 id->idAddr()->iiaAddr = (BYTE*)addr; if (emitComp->opts.compReloc) @@ -1543,14 +1804,14 @@ void emitter::emitIns_Call(EmitCallType callType, if (m_debugInfoSize > 0) { - INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo); - id->idDebugOnlyInfo()->idMemCookie = reinterpret_cast(methHnd); // method token + INDEBUG(id->idDebugOnlyInfo()->idCallSig = params.sigInfo); + id->idDebugOnlyInfo()->idMemCookie = reinterpret_cast(params.methHnd); // method token } #ifdef LATE_DISASM - if (addr != nullptr) + if (params.addr != nullptr) { - codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); + codeGen->getDisAssembler().disSetMethod((size_t)params.addr, params.methHnd); } #endif // LATE_DISASM @@ -1607,10 +1868,12 @@ unsigned emitter::emitOutputCall(const insGroup* ig, BYTE* dst, instrDesc* id, c assert(id->idIns() == INS_jalr); if (id->idIsCallRegPtr()) { // EC_INDIR_R + ssize_t offset = id->idSmallCns(); + assert(isValidSimm12(offset)); code = emitInsCode(id->idIns()); code |= (code_t)id->idReg4() << 7; code |= (code_t)id->idReg3() << 15; - // the offset default is 0; + code |= (code_t)offset << 20; emitOutput_Instr(dst, code); } else if (id->idIsReloc()) @@ -1750,6 +2013,10 @@ unsigned emitter::emitOutputCall(const insGroup* ig, BYTE* dst, instrDesc* id, c { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. @@ -2230,7 +2497,7 @@ static inline void assertCodeLength(size_t code, uint8_t size) * * Emit a 32-bit RISCV64 R-Type instruction * - * Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings + * Note: Instruction types as per RISC-V Spec, Chapter "RV32/64G Instruction Set Listings" * R-Type layout: * 31-------25-24---20-19--15-14------12-11-----------7-6------------0 * | funct7 | rs2 | rs1 | funct3 | rd | opcode | @@ -2254,7 +2521,7 @@ static inline void assertCodeLength(size_t code, uint8_t size) * * Emit a 32-bit RISCV64 I-Type instruction * - * Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings + * Note: Instruction types as per RISC-V Spec, Chapter "RV32/64G Instruction Set Listings" * I-Type layout: * 31------------20-19-----15-14------12-11-----------7-6------------0 * | imm[11:0] | rs1 | funct3 | rd | opcode | @@ -2278,7 +2545,7 @@ static inline void assertCodeLength(size_t code, uint8_t size) * * Emit a 32-bit RISCV64 S-Type instruction * - * Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings + * Note: Instruction types as per RISC-V Spec, Chapter "RV32/64G Instruction Set Listings" * S-Type layout: * 31-------25-24---20-19--15-14------12-11-----------7-6------------0 * |imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode | @@ -2308,7 +2575,7 @@ static inline void assertCodeLength(size_t code, uint8_t size) * * Emit a 32-bit RISCV64 U-Type instruction * - * Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings + * Note: Instruction types as per RISC-V Spec, Chapter "RV32/64G Instruction Set Listings" * U-Type layout: * 31---------------------------------12-11-----------7-6------------0 * | imm[31:12] | rd | opcode | @@ -2329,7 +2596,7 @@ static inline void assertCodeLength(size_t code, uint8_t size) * * Emit a 32-bit RISCV64 B-Type instruction * - * Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings + * Note: Instruction types as per RISC-V Spec, Chapter "RV32/64G Instruction Set Listings" * B-Type layout: * 31-------30-----25-24-20-19-15-14--12-11-------8----7----6--------0 * |imm[12]|imm[10:5]| rs2 | rs1 |funct3| imm[4:1]|imm[11]| opcode | @@ -2365,7 +2632,7 @@ static inline void assertCodeLength(size_t code, uint8_t size) * * Emit a 32-bit RISCV64 J-Type instruction * - * Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings + * Note: Instruction types as per RISC-V Spec, Chapter "RV32/64G Instruction Set Listings" * J-Type layout: * 31-------30--------21----20---19----------12-11----7-6------------0 * |imm[20]| imm[10:1] |imm[11]| imm[19:12] | rd | opcode | @@ -2963,56 +3230,6 @@ BYTE* emitter::emitOutputInstr_OptsReloc(BYTE* dst, const instrDesc* id, instruc return dst; } -BYTE* emitter::emitOutputInstr_OptsI(BYTE* dst, const instrDesc* id) -{ - ssize_t immediate = reinterpret_cast(id->idAddr()->iiaAddr); - const regNumber reg1 = id->idReg1(); - - switch (id->idCodeSize()) - { - case 8: - return emitOutputInstr_OptsI8(dst, id, immediate, reg1); - case 32: - return emitOutputInstr_OptsI32(dst, immediate, reg1); - default: - break; - } - unreached(); - return nullptr; -} - -BYTE* emitter::emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t immediate, regNumber reg1) -{ - if (id->idReg2()) - { - // special for INT64_MAX or UINT32_MAX - dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, REG_R0, WordMask(12)); - const unsigned shiftValue = (immediate == INT64_MAX) ? 1 : 32; - dst += emitOutput_ITypeInstr(dst, INS_srli, reg1, reg1, shiftValue); - } - else - { - dst += emitOutput_UTypeInstr(dst, INS_lui, reg1, UpperNBitsOfWordSignExtend<20>(immediate)); - dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<12>(immediate)); - } - return dst; -} - -BYTE* emitter::emitOutputInstr_OptsI32(BYTE* dst, ssize_t immediate, regNumber reg1) -{ - const unsigned upperWord = UpperWordOfDoubleWord(immediate); - dst += emitOutput_UTypeInstr(dst, INS_lui, reg1, UpperNBitsOfWordSignExtend<20>(upperWord)); - dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<12>(upperWord)); - const unsigned lowerWord = LowerWordOfDoubleWord(immediate); - dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 11); - dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<11>(lowerWord >> 21)); - dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 11); - dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<11>(lowerWord >> 10)); - dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 10); - dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<10>(lowerWord)); - return dst; -} - BYTE* emitter::emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instruction* ins) { assert(id->idAddr()->iiaIsJitDataOffset()); @@ -3030,14 +3247,14 @@ BYTE* emitter::emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instructio *ins = id->idIns(); const regNumber reg1 = id->idReg1(); - if (id->idIsReloc()) + if (id->idCodeSize() == 8) { - return emitOutputInstr_OptsRcReloc(dst, ins, offset, reg1); + return emitOutputInstr_OptsRcPcRel(dst, ins, offset, reg1); } - return emitOutputInstr_OptsRcNoReloc(dst, ins, offset, reg1); + return emitOutputInstr_OptsRcNoPcRel(dst, ins, offset, reg1); } -BYTE* emitter::emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1) +BYTE* emitter::emitOutputInstr_OptsRcPcRel(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1) { const ssize_t immediate = (emitConsBlock - dst) + offset; assert((immediate > 0) && ((immediate & 0x03) == 0)); @@ -3055,7 +3272,7 @@ BYTE* emitter::emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, unsigned return dst; } -BYTE* emitter::emitOutputInstr_OptsRcNoReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1) +BYTE* emitter::emitOutputInstr_OptsRcNoPcRel(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1) { const ssize_t immediate = reinterpret_cast(emitConsBlock) + offset; assertCodeLength(static_cast(immediate), 48); // RISC-V Linux Kernel SV48 @@ -3240,6 +3457,50 @@ BYTE* emitter::emitOutputInstr_OptsC(BYTE* dst, instrDesc* id, const insGroup* i return dst; } +BYTE* emitter::emitOutputInstr_OptsI(BYTE* dst, instrDesc* id, instruction* lastIns) +{ + assert(id->idInsOpt() == INS_OPTS_I); + + instrDescLoadImm* idli = static_cast(id); + instruction* ins = idli->ins; + int32_t* values = idli->values; + regNumber reg = idli->idReg1(); + + assert((reg != REG_NA) && (reg != REG_R0)); + + int numberOfInstructions = idli->idCodeSize() / sizeof(code_t); + for (int i = 0; i < numberOfInstructions; i++) + { + if ((i == 0) && (ins[0] == INS_lui)) + { + assert(isValidSimm20(values[i])); + dst += emitOutput_UTypeInstr(dst, ins[i], reg, values[i] & 0xfffff); + } + else if ((i == 0) && ((ins[0] == INS_addiw) || (ins[0] == INS_addi))) + { + assert(isValidSimm12(values[i]) || ((ins[i] == INS_addiw) && isValidUimm12(values[i]))); + dst += emitOutput_ITypeInstr(dst, ins[i], reg, REG_R0, values[i] & 0xfff); + } + else if (i == 0) + { + assert(false && "First instruction must be lui / addiw / addi"); + } + else if ((ins[i] == INS_addi) || (ins[i] == INS_addiw) || (ins[i] == INS_slli) || (ins[i] == INS_srli)) + { + assert(isValidSimm12(values[i]) || ((ins[i] == INS_addiw) && isValidUimm12(values[i]))); + dst += emitOutput_ITypeInstr(dst, ins[i], reg, reg, values[i] & 0xfff); + } + else + { + assert(false && "Remaining instructions must be addi / addiw / slli / srli"); + } + } + + *lastIns = ins[numberOfInstructions - 1]; + + return dst; +} + /***************************************************************************** * * Append the machine code corresponding to the given instruction descriptor @@ -3267,11 +3528,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst = emitOutputInstr_OptsReloc(dst, id, &ins); sz = sizeof(instrDesc); break; - case INS_OPTS_I: - dst = emitOutputInstr_OptsI(dst, id); - ins = INS_addi; - sz = sizeof(instrDesc); - break; case INS_OPTS_RC: dst = emitOutputInstr_OptsRc(dst, id, &ins); sz = sizeof(instrDesc); @@ -3298,6 +3554,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst2 = dst; ins = INS_nop; break; + case INS_OPTS_I: + dst = emitOutputInstr_OptsI(dst, id, &ins); + sz = sizeof(instrDescLoadImm); + break; default: // case INS_OPTS_NONE: dst += emitOutput_Instr(dst, id->idAddr()->iiaGetInstrEncode()); ins = id->idIns(); @@ -3351,31 +3611,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) if (vt == TYP_REF || vt == TYP_BYREF) emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum)); } - // if (emitInsWritesToLclVarStackLocPair(id)) - //{ - // unsigned ofs2 = ofs + TARGET_POINTER_SIZE; - // if (id->idGCrefReg2() != GCT_NONE) - // { - // emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp); - // } - // else - // { - // // If the type of the local is a gc ref type, update the liveness. - // var_types vt; - // if (varNum >= 0) - // { - // // "Regular" (non-spill-temp) local. - // vt = var_types(emitComp->lvaTable[varNum].lvType); - // } - // else - // { - // TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); - // vt = tmpDsc->tdTempType(); - // } - // if (vt == TYP_REF || vt == TYP_BYREF) - // emitGCvarDeadUpd(adr + ofs2, *dp); - // } - //} } #ifdef DEBUG @@ -3528,6 +3763,22 @@ bool emitter::emitDispBranch( void emitter::emitDispIllegalInstruction(code_t instructionCode) { printf("RISCV64 illegal instruction: 0x%08X\n", instructionCode); + assert(!"RISCV64 illegal instruction"); +} + +void emitter::emitDispImmediate(ssize_t imm, bool newLine /*= true*/, unsigned regBase /*= REG_ZERO*/) +{ + if (emitComp->opts.disDiffable && (regBase != REG_FP) && (regBase != REG_SP)) + { + printf("0xD1FFAB1E"); + } + else + { + printf("%li", imm); + } + + if (newLine) + printf("\n"); } //---------------------------------------------------------------------------------------- @@ -3554,10 +3805,6 @@ void emitter::emitDispInsName( static constexpr int kMaxInstructionLength = 14; const BYTE* insAdr = addr - writeableOffset; - - unsigned int opcode = code & 0x7f; - assert((opcode & 0x3) == 0x3); // only 32-bit encodings supported - emitDispInsAddr(insAdr); emitDispInsOffs(insOffset, doffs); @@ -3566,9 +3813,11 @@ void emitter::emitDispInsName( printf(" "); - switch (opcode) + bool willPrintLoadImmValue = (id->idInsOpt() == INS_OPTS_I) && !emitComp->opts.disDiffable; + + switch (GetMajorOpcode(code)) { - case 0x37: // LUI + case MajorOpcode::Lui: { const char* rd = RegNames[(code >> 7) & 0x1f]; int imm20 = (code >> 12) & 0xfffff; @@ -3576,10 +3825,11 @@ void emitter::emitDispInsName( { imm20 |= 0xfff00000; } - printf("lui %s, %d\n", rd, imm20); + printf("lui %s, ", rd); + emitDispImmediate(imm20, !willPrintLoadImmValue); return; } - case 0x17: // AUIPC + case MajorOpcode::Auipc: { const char* rd = RegNames[(code >> 7) & 0x1f]; int imm20 = (code >> 12) & 0xfffff; @@ -3587,16 +3837,16 @@ void emitter::emitDispInsName( { imm20 |= 0xfff00000; } - printf("auipc %s, %d\n", rd, imm20); + printf("auipc %s, ", rd); + emitDispImmediate(imm20); return; } - case 0x13: + case MajorOpcode::OpImm: { unsigned opcode2 = (code >> 12) & 0x7; unsigned rd = (code >> 7) & 0x1f; unsigned rs1 = (code >> 15) & 0x1f; int imm12 = static_cast(code) >> 20; - bool isHex = false; bool hasImmediate = true; int printLength = 0; @@ -3618,18 +3868,32 @@ void emitter::emitDispInsName( hasImmediate = false; } break; - case 0x1: // SLLI + case 0x1: { - static constexpr unsigned kSlliFunct6 = 0b000000; - unsigned funct6 = (imm12 >> 6) & 0x3f; - // SLLI's instruction code's upper 6 bits have to be equal to zero - if (funct6 != kSlliFunct6) + unsigned shamt = imm12 & 0x3f; // 6 BITS for SHAMT in RISCV6 + switch (funct6) { - return emitDispIllegalInstruction(code); + case 0b011000: + { + static const char* names[] = {"clz", "ctz", "cpop", nullptr, "sext.b", "sext.h"}; + // shift amount is treated as additional funct opcode + if (shamt >= ARRAY_SIZE(names) || shamt == 3) + return emitDispIllegalInstruction(code); + + assert(names[shamt] != nullptr); + printLength = printf("%s", names[shamt]); + hasImmediate = false; + break; + } + case 0b000000: + printLength = printf("slli"); + imm12 = shamt; + break; + + default: + return emitDispIllegalInstruction(code); } - printLength = printf("slli"); - imm12 &= 0x3f; // 6 BITS for SHAMT in RISCV64 } break; case 0x2: // SLTI @@ -3639,33 +3903,48 @@ void emitter::emitDispInsName( printLength = printf("sltiu"); break; case 0x4: // XORI - printLength = printf("xori"); - isHex = true; + if (imm12 == -1) + { + printLength = printf("not"); + hasImmediate = false; + } + else + { + printLength = printf("xori"); + } break; case 0x5: // SRLI & SRAI { - static constexpr unsigned kLogicalShiftFunct6 = 0b000000; - static constexpr unsigned kArithmeticShiftFunct6 = 0b010000; - - unsigned funct6 = (imm12 >> 6) & 0x3f; - bool isLogicalShift = funct6 == kLogicalShiftFunct6; - if ((!isLogicalShift) && (funct6 != kArithmeticShiftFunct6)) + unsigned funct6 = (imm12 >> 6) & 0x3f; + imm12 &= 0x3f; // 6BITS for SHAMT in RISCV64 + switch (funct6) { - return emitDispIllegalInstruction(code); + case 0b000000: + printLength = printf("srli"); + break; + case 0b010000: + printLength = printf("srai"); + break; + case 0b011000: + printLength = printf("rori"); + break; + case 0b011010: + if (imm12 != 0b111000) // shift amount is treated as additional funct opcode + return emitDispIllegalInstruction(code); + + printLength = printf("rev8"); + hasImmediate = false; + break; + default: + return emitDispIllegalInstruction(code); } - printLength = printf(isLogicalShift ? "srli" : "srai"); - imm12 &= 0x3f; // 6BITS for SHAMT in RISCV64 } break; case 0x6: // ORI printLength = printf("ori"); - imm12 &= 0xfff; - isHex = true; break; case 0x7: // ANDI printLength = printf("andi"); - imm12 &= 0xfff; - isHex = true; break; default: return emitDispIllegalInstruction(code); @@ -3676,22 +3955,29 @@ void emitter::emitDispInsName( printf("%*s %s, %s", paddingLength, "", RegNames[rd], RegNames[rs1]); if (hasImmediate) { - printf(isHex ? ", 0x%x" : ", %d", imm12); + printf(", "); + if (opcode2 == 0x0) // ADDI + { + emitDispImmediate(imm12, false, rs1); + } + else + { + printf("%d", imm12); + } + } + if (!willPrintLoadImmValue) + { + printf("\n"); } - printf("\n"); return; } - case 0x1b: + case MajorOpcode::OpImm32: { unsigned int opcode2 = (code >> 12) & 0x7; const char* rd = RegNames[(code >> 7) & 0x1f]; const char* rs1 = RegNames[(code >> 15) & 0x1f]; - int imm12 = (((int)code) >> 20); // & 0xfff; - // if (imm12 & 0x800) - //{ - // imm12 |= 0xfffff000; - //} + int imm12 = (((int)code) >> 20); switch (opcode2) { case 0x0: // ADDIW & SEXT.W @@ -3701,19 +3987,37 @@ void emitter::emitDispInsName( } else { - printf("addiw %s, %s, %d\n", rd, rs1, imm12); + printf("addiw %s, %s, ", rd, rs1); + emitDispImmediate(imm12, !willPrintLoadImmValue); } return; - case 0x1: // SLLIW + case 0x1: // SLLIW, SLLI.UW, CLZW, CTZW, & CPOPW { - static constexpr unsigned kSlliwFunct7 = 0b0000000; + static constexpr unsigned kSlliwFunct7 = 0b0000000; + static constexpr unsigned kSlliUwFunct6 = 0b000010; unsigned funct7 = (imm12 >> 5) & 0x7f; + unsigned funct6 = (imm12 >> 6) & 0x3f; // SLLIW's instruction code's upper 7 bits have to be equal to zero if (funct7 == kSlliwFunct7) { printf("slliw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5 BITS for SHAMT in RISCV64 } + // SLLI.UW's instruction code's upper 6 bits have to be equal to 0b000010 + else if (funct6 == kSlliUwFunct6) + { + printf("slli.uw %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6 BITS for SHAMT in RISCV64 + } + else if (funct7 == 0b0110000) + { + static const char* names[] = {"clzw ", "ctzw ", "cpopw"}; + // shift amount is treated as funct additional opcode bits + unsigned shamt = imm12 & 0x1f; // 5 BITS for SHAMT in RISCV64 + if (shamt >= ARRAY_SIZE(names)) + return emitDispIllegalInstruction(code); + + printf("%s %s, %s\n", names[shamt], rd, rs1); + } else { emitDispIllegalInstruction(code); @@ -3722,21 +4026,21 @@ void emitter::emitDispInsName( return; case 0x5: // SRLIW & SRAIW { - static constexpr unsigned kLogicalShiftFunct7 = 0b0000000; - static constexpr unsigned kArithmeticShiftFunct7 = 0b0100000; - unsigned funct7 = (imm12 >> 5) & 0x7f; - if (funct7 == kLogicalShiftFunct7) - { - printf("srliw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5BITS for SHAMT in RISCV64 - } - else if (funct7 == kArithmeticShiftFunct7) + imm12 &= 0x1f; // 5BITS for SHAMT in RISCV64 + switch (funct7) { - printf("sraiw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5BITS for SHAMT in RISCV64 - } - else - { - emitDispIllegalInstruction(code); + case 0b0000000: + printf("srliw %s, %s, %d\n", rd, rs1, imm12); + return; + case 0b0100000: + printf("sraiw %s, %s, %d\n", rd, rs1, imm12); + return; + case 0b0110000: + printf("roriw %s, %s, %d\n", rd, rs1, imm12); + return; + default: + return emitDispIllegalInstruction(code); } } return; @@ -3744,7 +4048,7 @@ void emitter::emitDispInsName( return emitDispIllegalInstruction(code); } } - case 0x33: + case MajorOpcode::Op: { unsigned int opcode2 = (code >> 25) & 0x7f; unsigned int opcode3 = (code >> 12) & 0x7; @@ -3791,9 +4095,18 @@ void emitter::emitDispInsName( case 0x0: // SUB printf("sub %s, %s, %s\n", rd, rs1, rs2); return; + case 0x4: // XNOR + printf("xnor %s, %s, %s\n", rd, rs1, rs2); + return; case 0x5: // SRA printf("sra %s, %s, %s\n", rd, rs1, rs2); return; + case 0x6: // ORN + printf("orn %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x7: // ANDN + printf("andn %s, %s, %s\n", rd, rs1, rs2); + return; default: return emitDispIllegalInstruction(code); } @@ -3829,17 +4142,54 @@ void emitter::emitDispInsName( return emitDispIllegalInstruction(code); } return; + case 0b0010000: + switch (opcode3) + { + case 0x2: // SH1ADD + printf("sh1add %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x4: // SH2ADD + printf("sh2add %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x6: // SH3ADD + printf("sh3add %s, %s, %s\n", rd, rs1, rs2); + return; + } + return; + case 0b0110000: + switch (opcode3) + { + case 0b001: + printf("rol %s, %s, %s\n", rd, rs1, rs2); + return; + case 0b101: + printf("ror %s, %s, %s\n", rd, rs1, rs2); + return; + default: + return emitDispIllegalInstruction(code); + } + return; + case 0b0000101: + { + if ((opcode3 >> 2) != 1) // clmul[h] unsupported + return emitDispIllegalInstruction(code); + + static const char names[][5] = {"min ", "minu", "max ", "maxu"}; + printf("%s %s, %s, %s\n", names[opcode3 & 0b11], rd, rs1, rs2); + return; + } default: return emitDispIllegalInstruction(code); } } - case 0x3b: + case MajorOpcode::Op32: { unsigned int opcode2 = (code >> 25) & 0x7f; unsigned int opcode3 = (code >> 12) & 0x7; + unsigned int rs2Num = (code >> 20) & 0x1f; const char* rd = RegNames[(code >> 7) & 0x1f]; const char* rs1 = RegNames[(code >> 15) & 0x1f]; - const char* rs2 = RegNames[(code >> 20) & 0x1f]; + const char* rs2 = RegNames[rs2Num]; switch (opcode2) { @@ -3894,61 +4244,101 @@ void emitter::emitDispInsName( return emitDispIllegalInstruction(code); } return; + case 0b0010000: + switch (opcode3) + { + case 0x2: // SH1ADD.UW + printf("sh1add.uw %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x4: // SH2ADD.UW + printf("sh2add.uw %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x6: // SH3ADD.UW + printf("sh3add.uw %s, %s, %s\n", rd, rs1, rs2); + return; + default: + return emitDispIllegalInstruction(code); + } + return; + case 0b0110000: + switch (opcode3) + { + case 0b001: + printf("rolw %s, %s, %s\n", rd, rs1, rs2); + return; + case 0b101: + printf("rorw %s, %s, %s\n", rd, rs1, rs2); + return; + default: + return emitDispIllegalInstruction(code); + } + return; + case 0b0000100: + switch (opcode3) + { + case 0b000: // ZEXT.W & ADD.UW + if (rs2Num == REG_ZERO) + { + printf("zext.w %s, %s\n", rd, rs1); + } + else + { + printf("add.uw %s, %s, %s\n", rd, rs1, rs2); + } + return; + case 0b100: // ZEXT.H + // Note: zext.h is encoded as a pseudo for 'packw rd, rs1, zero' which is not in Zbb. + if (rs2Num != REG_ZERO) + return emitDispIllegalInstruction(code); + + printf("zext.h %s, %s\n", rd, rs1); + return; + default: + return emitDispIllegalInstruction(code); + } + return; + default: return emitDispIllegalInstruction(code); } } - case 0x23: + case MajorOpcode::Store: { unsigned int opcode2 = (code >> 12) & 0x7; - const char* rs1 = RegNames[(code >> 15) & 0x1f]; - const char* rs2 = RegNames[(code >> 20) & 0x1f]; - int offset = (((code >> 25) & 0x7f) << 5) | ((code >> 7) & 0x1f); + if (opcode2 >= 4) + return emitDispIllegalInstruction(code); + + unsigned rs1Num = (code >> 15) & 0x1f; + const char* rs1 = RegNames[rs1Num]; + const char* rs2 = RegNames[(code >> 20) & 0x1f]; + int offset = (((code >> 25) & 0x7f) << 5) | ((code >> 7) & 0x1f); if (offset & 0x800) { offset |= 0xfffff000; } - switch (opcode2) - { - case 0: // SB - printf("sb %s, %d(%s)\n", rs2, offset, rs1); - return; - case 1: // SH - printf("sh %s, %d(%s)\n", rs2, offset, rs1); - return; - case 2: // SW - printf("sw %s, %d(%s)\n", rs2, offset, rs1); - return; - case 3: // SD - printf("sd %s, %d(%s)\n", rs2, offset, rs1); - return; - default: - printf("RISCV64 illegal instruction: 0x%08X\n", code); - return; - } + char width = "bhwd"[opcode2]; + printf("s%c %s, ", width, rs2); + emitDispImmediate(offset, false, rs1Num); + printf("(%s)\n", rs1); + return; } - case 0x63: // BRANCH + case MajorOpcode::Branch: { unsigned opcode2 = (code >> 12) & 0x7; unsigned rs1 = (code >> 15) & 0x1f; unsigned rs2 = (code >> 20) & 0x1f; - // int offset = (((code >> 31) & 0x1) << 12) | (((code >> 7) & 0x1) << 11) | (((code >> 25) & 0x3f) << 5) | - // (((code >> 8) & 0xf) << 1); - // if (offset & 0x800) - // { - // offset |= 0xfffff000; - // } if (!emitDispBranch(opcode2, rs1, rs2, id, ig)) { emitDispIllegalInstruction(code); } return; } - case 0x03: + case MajorOpcode::Load: { unsigned int opcode2 = (code >> 12) & 0x7; - const char* rs1 = RegNames[(code >> 15) & 0x1f]; + unsigned rs1Num = (code >> 15) & 0x1f; + const char* rs1 = RegNames[rs1Num]; const char* rd = RegNames[(code >> 7) & 0x1f]; int offset = ((code >> 20) & 0xfff); if (offset & 0x800) @@ -3956,35 +4346,17 @@ void emitter::emitDispInsName( offset |= 0xfffff000; } - switch (opcode2) - { - case 0: // LB - printf("lb %s, %d(%s)\n", rd, offset, rs1); - return; - case 1: // LH - printf("lh %s, %d(%s)\n", rd, offset, rs1); - return; - case 2: // LW - printf("lw %s, %d(%s)\n", rd, offset, rs1); - return; - case 3: // LD - printf("ld %s, %d(%s)\n", rd, offset, rs1); - return; - case 4: // LBU - printf("lbu %s, %d(%s)\n", rd, offset, rs1); - return; - case 5: // LHU - printf("lhu %s, %d(%s)\n", rd, offset, rs1); - return; - case 6: // LWU - printf("lwu %s, %d(%s)\n", rd, offset, rs1); - return; - default: - printf("RISCV64 illegal instruction: 0x%08X\n", code); - return; - } + char width = "bhwd"[opcode2 & 0b011]; + char unsign = ((opcode2 & 0b100) != 0) ? 'u' : ' '; + if (width == 'd' && unsign == 'u') + return emitDispIllegalInstruction(code); + + printf("l%c%c %s, ", width, unsign, rd); + emitDispImmediate(offset, false, rs1Num); + printf("(%s)\n", rs1); + return; } - case 0x67: + case MajorOpcode::Jalr: { const unsigned rs1 = (code >> 15) & 0x1f; const unsigned rd = (code >> 7) & 0x1f; @@ -3994,13 +4366,23 @@ void emitter::emitDispInsName( offset |= 0xfffff000; } - if ((rs1 == REG_RA) && (rd == REG_ZERO)) + if ((offset == 0) && (rs1 == REG_RA) && (rd == REG_ZERO)) { printf("ret"); return; } - printf("jalr %s, %d(%s)", RegNames[rd], offset, RegNames[rs1]); + if ((offset == 0) && ((rd == REG_RA) || (rd == REG_ZERO))) + { + const char* name = (rd == REG_RA) ? "jalr" : "jr "; + printf("%s %s", name, RegNames[rs1]); + } + else + { + printf("jalr %s, ", RegNames[rd]); + emitDispImmediate(offset, false); + printf("(%s)", RegNames[rs1]); + } CORINFO_METHOD_HANDLE handle = (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie; // Target for ret call is unclear, e.g.: // jalr zero, 0(ra) @@ -4014,7 +4396,7 @@ void emitter::emitDispInsName( printf("\n"); return; } - case 0x6f: + case MajorOpcode::Jal: { unsigned rd = (code >> 7) & 0x1f; int offset = (((code >> 31) & 0x1) << 20) | (((code >> 12) & 0xff) << 12) | (((code >> 20) & 0x1) << 11) | @@ -4023,9 +4405,10 @@ void emitter::emitDispInsName( { offset |= 0xfff00000; } - if (rd == REG_ZERO) + if ((rd == REG_ZERO) || (rd == REG_RA)) { - printf("j "); + const char* name = (rd == REG_RA) ? "jal" : "j "; + printf("%s ", name); if (id->idIsBound()) { @@ -4033,12 +4416,15 @@ void emitter::emitDispInsName( } else { - printf("pc%+d instructions", offset >> 2); + printf("pc%+"); + emitDispImmediate(offset / sizeof(code_t)); + printf(" instructions"); } } else { - printf("jal %s, %d", RegNames[rd], offset); + printf("jal %s, ", RegNames[rd]); + emitDispImmediate(offset, false); } CORINFO_METHOD_HANDLE handle = (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie; if (handle != 0) @@ -4050,14 +4436,14 @@ void emitter::emitDispInsName( printf("\n"); return; } - case 0x0f: + case MajorOpcode::MiscMem: { int pred = ((code) >> 24) & 0xf; int succ = ((code) >> 20) & 0xf; printf("fence %d, %d\n", pred, succ); return; } - case 0x73: + case MajorOpcode::System: { unsigned int opcode2 = (code >> 12) & 0x7; if (opcode2 != 0) @@ -4108,13 +4494,17 @@ void emitter::emitDispInsName( { printf("ebreak\n"); } + else if (code == emitInsCode(INS_ecall)) + { + printf("ecall\n"); + } else { NYI_RISCV64("illegal ins within emitDisInsName!"); } return; } - case 0x53: + case MajorOpcode::OpFp: { unsigned int opcode2 = (code >> 25) & 0x7f; unsigned int opcode3 = (code >> 20) & 0x1f; @@ -4144,22 +4534,17 @@ void emitter::emitDispInsName( case 0x2C: // FSQRT.S printf("fsqrt.s %s, %s\n", fd, fs1); return; - case 0x10: // FSGNJ.S & FSGNJN.S & FSGNJX.S - if (opcode4 == 0) // FSGNJ.S + case 0x10: // FSGNJ.S & FSGNJN.S & FSGNJX.S + NYI_IF(opcode4 >= 3, "RISC-V illegal fsgnj.s variant"); + if (fs1 != fs2) { - printf("fsgnj.s %s, %s, %s\n", fd, fs1, fs2); + const char* variants[3] = {".s ", "n.s", "x.s"}; + printf("fsgnj%s %s, %s, %s\n", variants[opcode4], fd, fs1, fs2); } - else if (opcode4 == 1) // FSGNJN.S + else // pseudos { - printf("fsgnjn.s %s, %s, %s\n", fd, fs1, fs2); - } - else if (opcode4 == 2) // FSGNJX.S - { - printf("fsgnjx.s %s, %s, %s\n", fd, fs1, fs2); - } - else - { - NYI_RISCV64("illegal ins within emitDisInsName!"); + const char* names[3] = {"fmv.s ", "fneg.s", "fabs.s"}; + printf("%s %s, %s\n", names[opcode4], fd, fs1); } return; case 0x14: // FMIN.S & FMAX.S @@ -4247,7 +4632,6 @@ void emitter::emitDispInsName( { printf("fcvt.s.lu %s, %s\n", fd, xs1); } - else { NYI_RISCV64("illegal ins within emitDisInsName!"); @@ -4271,22 +4655,17 @@ void emitter::emitDispInsName( case 0x2d: // FSQRT.D printf("fsqrt.d %s, %s\n", fd, fs1); return; - case 0x11: // FSGNJ.D & FSGNJN.D & FSGNJX.D - if (opcode4 == 0) // FSGNJ.D + case 0x11: // FSGNJ.D & FSGNJN.D & FSGNJX.D + NYI_IF(opcode4 >= 3, "RISC-V illegal fsgnj.d variant"); + if (fs1 != fs2) { - printf("fsgnj.d %s, %s, %s\n", fd, fs1, fs2); + const char* variants[3] = {".d ", "n.d", "x.d"}; + printf("fsgnj%s %s, %s, %s\n", variants[opcode4], fd, fs1, fs2); } - else if (opcode4 == 1) // FSGNJN.D + else // pseudos { - printf("fsgnjn.d %s, %s, %s\n", fd, fs1, fs2); - } - else if (opcode4 == 2) // FSGNJX.D - { - printf("fsgnjx.d %s, %s, %s\n", fd, fs1, fs2); - } - else - { - NYI_RISCV64("illegal ins within emitDisInsName!"); + const char* names[3] = {"fmv.d ", "fneg.d", "fabs.d"}; + printf("%s %s, %s\n", names[opcode4], fd, fs1); } return; case 0x15: // FMIN.D & FMAX.D @@ -4412,56 +4791,49 @@ void emitter::emitDispInsName( } return; } - case 0x27: + case MajorOpcode::StoreFp: { unsigned int opcode2 = (code >> 12) & 0x7; + if ((opcode2 != 2) && (opcode2 != 3)) + return emitDispIllegalInstruction(code); - const char* rs1 = RegNames[(code >> 15) & 0x1f]; + unsigned rs1Num = (code >> 15) & 0x1f; + const char* rs1 = RegNames[rs1Num]; const char* rs2 = RegNames[((code >> 20) & 0x1f) | 0x20]; int offset = (((code >> 25) & 0x7f) << 5) | ((code >> 7) & 0x1f); if (offset & 0x800) { offset |= 0xfffff000; } - if (opcode2 == 2) // FSW - { - printf("fsw %s, %d(%s)\n", rs2, offset, rs1); - } - else if (opcode2 == 3) // FSD - { - printf("fsd %s, %d(%s)\n", rs2, offset, rs1); - } - else - { - NYI_RISCV64("illegal ins within emitDisInsName!"); - } + + char width = "bhwd"[opcode2]; + printf("fs%c %s, ", width, rs2); + emitDispImmediate(offset, false, rs1Num); + printf("(%s)\n", rs1); return; } - case 0x7: + case MajorOpcode::LoadFp: { unsigned int opcode2 = (code >> 12) & 0x7; - const char* rs1 = RegNames[(code >> 15) & 0x1f]; - const char* rd = RegNames[((code >> 7) & 0x1f) | 0x20]; - int offset = ((code >> 20) & 0xfff); + if ((opcode2 != 2) && (opcode2 != 3)) + return emitDispIllegalInstruction(code); + + unsigned rs1Num = (code >> 15) & 0x1f; + const char* rs1 = RegNames[rs1Num]; + const char* rd = RegNames[((code >> 7) & 0x1f) | 0x20]; + int offset = ((code >> 20) & 0xfff); if (offset & 0x800) { offset |= 0xfffff000; } - if (opcode2 == 2) // FLW - { - printf("flw %s, %d(%s)\n", rd, offset, rs1); - } - else if (opcode2 == 3) // FLD - { - printf("fld %s, %d(%s)\n", rd, offset, rs1); - } - else - { - NYI_RISCV64("illegal ins within emitDisInsName!"); - } + + char width = "bhwd"[opcode2]; + printf("fl%c %s, ", width, rd); + emitDispImmediate(offset, false, rs1Num); + printf("(%s)\n", rs1); return; } - case 0x2f: // AMO - atomic memory operation + case MajorOpcode::Amo: { bool hasDataReg = true; const char* name; @@ -4572,6 +4944,8 @@ void emitter::emitDispIns( emitDispInsInstrNum(id); + bool willPrintLoadImmValue = (id->idInsOpt() == INS_OPTS_I) && !emitComp->opts.disDiffable; + const BYTE* instr = pCode + writeableOffset; unsigned instrSize; for (size_t i = 0; i < sz; instr += instrSize, i += instrSize, offset += instrSize) @@ -4587,6 +4961,17 @@ void emitter::emitDispIns( } #endif emitDispInsName(instruction, instr, doffs, offset, id, ig); + + if (willPrintLoadImmValue && ((i + instrSize) < sz)) + { + printf("\n"); + } + } + + if (willPrintLoadImmValue) + { + instrDescLoadImm* liid = static_cast(id); + printf("\t\t;; load imm: hex=0x%016lX dec=%ld\n", liid->idcCnsVal, liid->idcCnsVal); } } @@ -4613,12 +4998,12 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (addr->isContained()) { - assert(addr->OperIs(GT_LCL_ADDR, GT_LEA)); + assert(addr->OperIs(GT_LCL_ADDR, GT_LEA, GT_CNS_INT)); int offset = 0; DWORD lsl = 0; - if (addr->OperGet() == GT_LEA) + if (addr->OperIs(GT_LEA)) { offset = addr->AsAddrMode()->Offset(); if (addr->AsAddrMode()->gtScale > 0) @@ -4771,6 +5156,17 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR emitIns_R_S(ins, attr, dataReg, lclNum, offset); } } + else if (addr->OperIs(GT_CNS_INT)) + { + assert(memBase == indir->Addr()); + ssize_t cns = addr->AsIntCon()->IconValue(); + + ssize_t off = (cns << (64 - 12)) >> (64 - 12); // low 12 bits, sign-extended + cns -= off; + + emitLoadImmediate(EA_PTRSIZE, codeGen->rsGetRsvdReg(), cns); + emitIns_R_R_I(ins, attr, dataReg, codeGen->rsGetRsvdReg(), off); + } else if (isValidSimm12(offset)) { // Then load/store dataReg from/to [memBase + offset] @@ -5088,13 +5484,15 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, case GT_AND: case GT_AND_NOT: case GT_OR: + case GT_OR_NOT: case GT_XOR: + case GT_XOR_NOT: { emitIns_R_R_R(ins, attr, dstReg, src1Reg, src2Reg); // TODO-RISCV64-CQ: here sign-extend dst when deal with 32bit data is too conservative. if (EA_SIZE(attr) == EA_4BYTE) - emitIns_R_R_I(INS_slliw, attr, dstReg, dstReg, 0); + emitIns_R_R(INS_sext_w, attr, dstReg, dstReg); } break; @@ -5108,12 +5506,12 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, if ((dst->gtFlags & GTF_UNSIGNED) && (attr == EA_8BYTE)) { - if (src1->gtType == TYP_INT) + if (src1->TypeIs(TYP_INT)) { emitIns_R_R_I(INS_slli, EA_8BYTE, regOp1, regOp1, 32); emitIns_R_R_I(INS_srli, EA_8BYTE, regOp1, regOp1, 32); } - if (src2->gtType == TYP_INT) + if (src2->TypeIs(TYP_INT)) { emitIns_R_R_I(INS_slli, EA_8BYTE, regOp2, regOp2, 32); emitIns_R_R_I(INS_srli, EA_8BYTE, regOp2, regOp2, 32); @@ -5169,7 +5567,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { regNumber resultReg = REG_NA; - if (dst->OperGet() == GT_ADD) + if (dst->OperIs(GT_ADD)) { resultReg = dstReg; regOp1 = saveOperReg1; @@ -5201,8 +5599,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, if (attr == EA_4BYTE) { - assert(src1->gtType != TYP_LONG); - assert(src2->gtType != TYP_LONG); + assert(!src1->TypeIs(TYP_LONG)); + assert(!src2->TypeIs(TYP_LONG)); emitIns_R_R_R(INS_add, attr, tempReg1, regOp1, regOp2); @@ -5248,7 +5646,8 @@ unsigned emitter::get_curTotalCodeSize() //---------------------------------------------------------------------------------------- // getInsExecutionCharacteristics: -// Returns the current instruction execution characteristics +// Returns the current instruction execution characteristics based on the SiFive U74 core: +// https://www.starfivetech.com/uploads/u74_core_complex_manual_21G1.pdf // // Arguments: // id - The current instruction descriptor to be evaluated @@ -5262,12 +5661,179 @@ unsigned emitter::get_curTotalCodeSize() emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id) { insExecutionCharacteristics result; - - // TODO-RISCV64: support this function. - result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; - result.insLatency = PERFSCORE_LATENCY_ZERO; + result.insThroughput = PERFSCORE_LATENCY_1C; + result.insLatency = PERFSCORE_THROUGHPUT_1C; result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE; + unsigned codeSize = id->idCodeSize(); + assert((codeSize >= 4) && (codeSize % sizeof(code_t) == 0)); + + // Some instructions like jumps or loads may have not-yet-known simple auxilliary instructions (lui, addi, slli, + // etc) for building immediates, assume cost of one each. + // instrDescLoadImm consists of OpImm, OpImm32, and Lui instructions. + float immediateBuildingCost = ((codeSize / sizeof(code_t)) - 1) * PERFSCORE_LATENCY_1C; + + instruction ins = id->idIns(); + assert(ins != INS_invalid); + if ((ins == INS_lea) || (id->idInsOpt() == INS_OPTS_I)) + { + result.insLatency += immediateBuildingCost; + result.insThroughput += immediateBuildingCost; + return result; + } + + MajorOpcode opcode = GetMajorOpcode(emitInsCode(ins)); + switch (opcode) + { + case MajorOpcode::OpImm: + case MajorOpcode::OpImm32: + case MajorOpcode::Lui: + case MajorOpcode::Auipc: + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + case MajorOpcode::Op: + case MajorOpcode::Op32: + if (id->idInsIs(INS_mul, INS_mulh, INS_mulhu, INS_mulhsu, INS_mulw)) + { + result.insLatency = PERFSCORE_LATENCY_3C; + } + else if (id->idInsIs(INS_div, INS_divu, INS_rem, INS_remu)) + { + result.insLatency = result.insThroughput = (6.0f + 68.0f) / 2; + } + else if (id->idInsIs(INS_divw, INS_divuw, INS_remw, INS_remuw)) + { + result.insLatency = result.insThroughput = (6.0f + 36.0f) / 2; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + } + break; + + case MajorOpcode::MAdd: + case MajorOpcode::MSub: + case MajorOpcode::NmAdd: + case MajorOpcode::NmSub: + case MajorOpcode::OpFp: + if (id->idInsIs(INS_fadd_s, INS_fsub_s, INS_fmul_s, INS_fmadd_s, INS_fmsub_s, INS_fnmadd_s, INS_fnmsub_s)) + { + result.insLatency = PERFSCORE_LATENCY_5C; + } + else if (id->idInsIs(INS_fadd_d, INS_fsub_d, INS_fmul_d, INS_fmadd_d, INS_fmsub_d, INS_fnmadd_d, + INS_fnmsub_d)) + { + result.insLatency = PERFSCORE_LATENCY_7C; + } + else if (id->idInsIs(INS_fdiv_s)) + { + result.insLatency = (9.0f + 36.0f) / 2; + result.insThroughput = (8.0f + 33.0f) / 2; + } + else if (id->idInsIs(INS_fsqrt_s)) + { + result.insLatency = (9.0f + 28.0f) / 2; + result.insThroughput = (8.0f + 33.0f) / 2; + } + else if (id->idInsIs(INS_fdiv_d)) + { + result.insLatency = (9.0f + 58.0f) / 2; + result.insThroughput = (8.0f + 58.0f) / 2; + } + else if (id->idInsIs(INS_fsqrt_d)) + { + result.insLatency = (9.0f + 57.0f) / 2; + result.insThroughput = (8.0f + 58.0f) / 2; + } + else if (id->idInsIs(INS_feq_s, INS_fle_s, INS_flt_s, INS_fclass_s, INS_feq_d, INS_fle_d, INS_flt_d, + INS_fclass_d, INS_fcvt_w_s, INS_fcvt_l_s, INS_fcvt_s_l, INS_fcvt_wu_s, INS_fcvt_lu_s, + INS_fcvt_s_lu, INS_fcvt_w_d, INS_fcvt_l_d, INS_fcvt_wu_d, INS_fcvt_lu_d)) + { + result.insLatency = PERFSCORE_LATENCY_4C; + } + else if (id->idInsIs(INS_fcvt_d_l, INS_fcvt_d_lu, INS_fmv_d_x)) + { + result.insLatency = PERFSCORE_LATENCY_6C; + } + else if (id->idInsIs(INS_fmv_x_w, INS_fmv_x_d)) + { + result.insLatency = PERFSCORE_LATENCY_1C; + } + else + { + result.insLatency = PERFSCORE_LATENCY_2C; + } + break; + + case MajorOpcode::Amo: + result.insLatency = result.insThroughput = PERFSCORE_LATENCY_5C; + result.insMemoryAccessKind = PERFSCORE_MEMORY_READ_WRITE; + break; + + case MajorOpcode::Branch: + result.insLatency = result.insThroughput = + immediateBuildingCost + (PERFSCORE_LATENCY_1C + PERFSCORE_LATENCY_6C) / 2; + break; + + case MajorOpcode::Jalr: + result.insLatency = result.insThroughput = + immediateBuildingCost + (PERFSCORE_LATENCY_1C + PERFSCORE_LATENCY_5C) / 2; + break; + + case MajorOpcode::Jal: + result.insLatency = result.insThroughput = + immediateBuildingCost + (PERFSCORE_LATENCY_1C + PERFSCORE_LATENCY_2C) / 2; + break; + + case MajorOpcode::System: + { + code_t code = id->idAddr()->iiaGetInstrEncode(); + code_t funct3 = (code >> 12) & 0b111; + if (funct3 != 0) + { + bool isCsrrw = ((funct3 & 0b11) == 0b01); + bool isZero = (((code >> 15) & 0b11111) == 0); // source register or 5-bit immediate is zero + bool isWrite = (isCsrrw || !isZero); + result.insLatency = isWrite ? PERFSCORE_LATENCY_7C : PERFSCORE_LATENCY_1C; + } + break; + } + + case MajorOpcode::Load: + case MajorOpcode::Store: + case MajorOpcode::LoadFp: + case MajorOpcode::StoreFp: + { + bool isLoad = (opcode == MajorOpcode::Load || opcode == MajorOpcode::LoadFp); + + result.insLatency = isLoad ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_4C; + if (isLoad) + { + code_t log2Size = (emitInsCode(ins) >> 12) & 0b11; + if (log2Size < 2) // sub-word loads + result.insLatency += PERFSCORE_LATENCY_1C; + } + + regNumber baseReg = id->idReg2(); + if (baseReg != REG_SP || baseReg != REG_FP) + result.insLatency += PERFSCORE_LATENCY_1C; // assume non-stack load/stores are more likely to cache-miss + + result.insThroughput += immediateBuildingCost; + result.insMemoryAccessKind = isLoad ? PERFSCORE_MEMORY_READ : PERFSCORE_MEMORY_WRITE; + break; + } + + case MajorOpcode::MiscMem: + result.insLatency = PERFSCORE_LATENCY_5C; + result.insThroughput = PERFSCORE_THROUGHPUT_5C; + break; + + default: + perfScoreUnhandledInstruction(id, &result); + } + return result; } diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index b0ce40a49ec3..cbb2b11ec967 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -42,14 +42,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ @@ -68,8 +70,9 @@ void emitDispBranchOffset(const instrDesc* id, const insGroup* ig) const; void emitDispBranchLabel(const instrDesc* id) const; bool emitDispBranchInstrType(unsigned opcode2, bool is_zero_reg, bool& print_second_reg) const; void emitDispIllegalInstruction(code_t instructionCode); +void emitDispImmediate(ssize_t imm, bool newLine = true, unsigned regBase = REG_ZERO); -emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/) const; +static emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/); // Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + offset] void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir); @@ -119,12 +122,9 @@ unsigned emitOutput_BTypeInstr_InvertComparation( unsigned emitOutput_JTypeInstr(BYTE* dst, instruction ins, regNumber rd, unsigned imm21) const; BYTE* emitOutputInstr_OptsReloc(BYTE* dst, const instrDesc* id, instruction* ins); -BYTE* emitOutputInstr_OptsI(BYTE* dst, const instrDesc* id); -BYTE* emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t immediate, regNumber reg1); -BYTE* emitOutputInstr_OptsI32(BYTE* dst, ssize_t immediate, regNumber reg1); BYTE* emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instruction* ins); -BYTE* emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1); -BYTE* emitOutputInstr_OptsRcNoReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1); +BYTE* emitOutputInstr_OptsRcPcRel(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1); +BYTE* emitOutputInstr_OptsRcNoPcRel(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1); BYTE* emitOutputInstr_OptsRl(BYTE* dst, instrDesc* id, instruction* ins); BYTE* emitOutputInstr_OptsRlReloc(BYTE* dst, ssize_t igOffs, regNumber reg1); BYTE* emitOutputInstr_OptsRlNoReloc(BYTE* dst, ssize_t igOffs, regNumber reg1); @@ -135,12 +135,38 @@ BYTE* emitOutputInstr_OptsJalr28(BYTE* dst, const instrDescJmp* jmp, ssize_t imm BYTE* emitOutputInstr_OptsJCond(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins); BYTE* emitOutputInstr_OptsJ(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins); BYTE* emitOutputInstr_OptsC(BYTE* dst, instrDesc* id, const insGroup* ig, size_t* size); +BYTE* emitOutputInstr_OptsI(BYTE* dst, instrDesc* id, instruction* ins); static unsigned TrimSignedToImm12(ssize_t imm12); static unsigned TrimSignedToImm13(ssize_t imm13); static unsigned TrimSignedToImm20(ssize_t imm20); static unsigned TrimSignedToImm21(ssize_t imm21); +// Major opcode of a 32-bit instruction as per "The RISC-V Instruction Set Manual", Chapter "RV32/64G Instruction Set +// Listings", Table "RISC-V base opcode map" +enum class MajorOpcode +{ + // clang-format off + // inst[4:2] 000, 001, 010, 011, 100, 101, 110, 111 (>32Bit) + /* inst[6:5] */ + /* 00 */ Load, LoadFp, Custom0, MiscMem, OpImm, Auipc, OpImm32, Encoding48Bit1, + /* 01 */ Store, StoreFp, Custom1, Amo, Op, Lui, Op32, Encoding64Bit, + /* 11 */ MAdd, MSub, NmSub, NmAdd, OpFp, OpV, Custom2Rv128, Encoding48Bit2, + /* 11 */ Branch, Jalr, Reserved, Jal, System, OpVe, Custom3Rv128, Encoding80Bit, + // clang-format on +}; + +//------------------------------------------------------------------------ +// GetMajorOpcode: extracts major opcode from an instruction +// +// Arguments: +// instr - instruction encoded in 32-bit format +// +// Return Value: +// Major opcode +// +static MajorOpcode GetMajorOpcode(code_t instr); + /************************************************************************/ /* Public inline informational methods */ /************************************************************************/ @@ -200,6 +226,26 @@ static bool isValidSimm32(ssize_t value) return (-(((ssize_t)1) << 31) - 0x800) <= value && value < (((ssize_t)1) << 31) - 0x800; } +//------------------------------------------------------------------------ +// isSingleInstructionFpImm: checks if the floating-point constant can be synthesized with one instruction +// +// Arguments: +// value - the constant to be imm'ed +// size - size of the target immediate +// outBits - [out] the bits of the immediate +// +// Return Value: +// Whether the floating-point immediate can be synthesized with one instruction +// +static bool isSingleInstructionFpImm(double value, emitAttr size, int64_t* outBits) +{ + assert(size == EA_4BYTE || size == EA_8BYTE); + *outBits = (size == EA_4BYTE) + ? (int32_t)BitOperations::SingleToUInt32Bits(FloatingPointUtils::convertToSingle(value)) + : (int64_t)BitOperations::DoubleToUInt64Bits(value); + return isValidSimm12(*outBits) || (((*outBits & 0xfff) == 0) && isValidSimm20(*outBits >> 12)); +} + // Returns the number of bits used by the given 'size'. inline static unsigned getBitWidth(emitAttr size) { @@ -293,46 +339,6 @@ void emitIns_R_AI(instruction ins, regNumber reg, ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); -enum EmitCallType -{ - - // I have included here, but commented out, all the values used by the x86 emitter. - // However, RISCV64 has a much reduced instruction set, and so the RISCV64 emitter only - // supports a subset of the x86 variants. By leaving them commented out, it becomes - // a compile time error if code tries to use them (and hopefully see this comment - // and know why they are unavailable on RISCV64), while making it easier to stay - // in-sync with x86 and possibly add them back in if needed. - - EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method - // EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method - // EC_FUNC_ADDR, // Direct call to an absolute address - - // EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable) - EC_INDIR_R, // Indirect call via register - // EC_INDIR_SR, // Indirect call via stack-reference (local var) - // EC_INDIR_C, // Indirect call via static class var - // EC_INDIR_ARD, // Indirect call via an addressing mode - - EC_COUNT -}; - -void emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di, - regNumber ireg = REG_NA, - regNumber xreg = REG_NA, - unsigned xmul = 0, - ssize_t disp = 0, - bool isJump = false, - bool noSafePoint = false); - unsigned emitOutputCall(const insGroup* ig, BYTE* dst, instrDesc* id, code_t code); unsigned get_curTotalCodeSize(); // bytes of code diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 273dff1367bb..0f0a0bfaafe9 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -85,14 +85,38 @@ bool emitter::IsAvx512OnlyInstruction(instruction ins) return (ins >= INS_FIRST_AVX512_INSTRUCTION) && (ins <= INS_LAST_AVX512_INSTRUCTION); } -bool emitter::IsFMAInstruction(instruction ins) +bool emitter::IsApxOnlyInstruction(instruction ins) { - return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION); + return (ins >= INS_FIRST_APX_INSTRUCTION) && (ins <= INS_LAST_APX_INSTRUCTION); } -bool emitter::IsAVXVNNIInstruction(instruction ins) +bool emitter::Is3OpRmwInstruction(instruction ins) { - return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION); + switch (ins) + { + case INS_vpermi2d: + case INS_vpermi2pd: + case INS_vpermi2ps: + case INS_vpermi2q: + case INS_vpermt2d: + case INS_vpermt2pd: + case INS_vpermt2ps: + case INS_vpermt2q: + case INS_vpermi2w: + case INS_vpermt2w: + case INS_vpermi2b: + case INS_vpermt2b: + { + return true; + } + + default: + { + return ((ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION)) || + ((ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION)) || + ((ins >= INS_FIRST_AVXIFMA_INSTRUCTION) && (ins <= INS_LAST_AVXIFMA_INSTRUCTION)); + } + } } bool emitter::IsBMIInstruction(instruction ins) @@ -101,30 +125,26 @@ bool emitter::IsBMIInstruction(instruction ins) } //------------------------------------------------------------------------ -// IsPermuteVar2xInstruction: Is this an Avx512 permutex2var instruction? +// IsKMOVInstruction: Is this an Avx512 KMOV instruction? // // Arguments: // ins - The instruction to check. // // Returns: -// `true` if it is a permutex2var instruction. +// `true` if it is a KMOV instruction. // -bool emitter::IsPermuteVar2xInstruction(instruction ins) +bool emitter::IsKMOVInstruction(instruction ins) { switch (ins) { - case INS_vpermi2d: - case INS_vpermi2pd: - case INS_vpermi2ps: - case INS_vpermi2q: - case INS_vpermt2d: - case INS_vpermt2pd: - case INS_vpermt2ps: - case INS_vpermt2q: - case INS_vpermi2w: - case INS_vpermt2w: - case INS_vpermi2b: - case INS_vpermt2b: + case INS_kmovb_gpr: + case INS_kmovw_gpr: + case INS_kmovd_gpr: + case INS_kmovq_gpr: + case INS_kmovb_msk: + case INS_kmovw_msk: + case INS_kmovd_msk: + case INS_kmovq_msk: { return true; } @@ -218,31 +238,19 @@ regNumber emitter::getSseShiftRegNumber(instruction ins) } } -bool emitter::HasVexEncoding(instruction ins) const -{ - insFlags flags = CodeGenInterface::instInfo[ins]; - return (flags & Encoding_VEX) != 0; -} - -bool emitter::HasEvexEncoding(instruction ins) const -{ - insFlags flags = CodeGenInterface::instInfo[ins]; - return (flags & Encoding_EVEX) != 0; -} - -bool emitter::HasRex2Encoding(instruction ins) const +bool emitter::HasRex2Encoding(instruction ins) { insFlags flags = CodeGenInterface::instInfo[ins]; return (flags & Encoding_REX2) != 0; } -bool emitter::HasApxNdd(instruction ins) const +bool emitter::HasApxNdd(instruction ins) { insFlags flags = CodeGenInterface::instInfo[ins]; return (flags & INS_Flags_Has_NDD) != 0; } -bool emitter::HasApxNf(instruction ins) const +bool emitter::HasApxNf(instruction ins) { insFlags flags = CodeGenInterface::instInfo[ins]; return (flags & INS_Flags_Has_NF) != 0; @@ -254,7 +262,35 @@ bool emitter::IsVexEncodableInstruction(instruction ins) const { return false; } - return HasVexEncoding(ins); + + // These can use compSupportsHWIntrinsic as we'll get here for + // some NAOT scenarios and it will already have been recorded + // for appropriate usage. + + switch (ins) + { +#if defined(FEATURE_HW_INTRINSICS) + case INS_vpdpbusd: + case INS_vpdpwssd: + case INS_vpdpbusds: + case INS_vpdpwssds: + { + return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNI); + } + + case INS_vpmadd52huq: + case INS_vpmadd52luq: + { + return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXIFMA); + } +#endif // FEATURE_HW_INTRINSICS + + default: + { + insFlags flags = CodeGenInterface::instInfo[ins]; + return (flags & Encoding_VEX) != 0; + } + } } //------------------------------------------------------------------------ @@ -273,16 +309,45 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const return false; } + // These can use compSupportsHWIntrinsic as we'll get here for + // some NAOT scenarios and it will already have been recorded + // for appropriate usage. + switch (ins) { +#if defined(FEATURE_HW_INTRINSICS) + case INS_aesdec: + case INS_aesdeclast: + case INS_aesenc: + case INS_aesenclast: + { + return emitComp->compSupportsHWIntrinsic(InstructionSet_AES_V256); + } + case INS_pclmulqdq: { - return emitComp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256); + return emitComp->compSupportsHWIntrinsic(InstructionSet_PCLMULQDQ_V256); + } + + case INS_vpdpbusd: + case INS_vpdpwssd: + case INS_vpdpbusds: + case INS_vpdpwssds: + { + return emitComp->compSupportsHWIntrinsic(InstructionSet_AVX512v3); + } + + case INS_vpmadd52huq: + case INS_vpmadd52luq: + { + return emitComp->compSupportsHWIntrinsic(InstructionSet_AVX512VBMI); } +#endif // FEATURE_HW_INTRINSICS default: { - return HasEvexEncoding(ins); + insFlags flags = CodeGenInterface::instInfo[ins]; + return (flags & Encoding_EVEX) != 0; } } } @@ -354,12 +419,32 @@ bool emitter::IsApxNFEncodableInstruction(instruction ins) const // bool emitter::IsApxExtendedEvexInstruction(instruction ins) const { +#ifdef TARGET_AMD64 if (!UsePromotedEVEXEncoding()) { return false; } - return HasApxNdd(ins) || HasApxNf(ins); + if (HasApxNdd(ins) || HasApxNf(ins)) + { + return true; + } + + if (ins == INS_crc32_apx || ins == INS_movbe_apx) + { + // With the new opcode, CRC32 is promoted to EVEX with APX. + return true; + } + + if (IsApxOnlyInstruction(ins)) + { + return true; + } + + return false; +#else // !TARGET_AMD64 + return false; +#endif } //------------------------------------------------------------------------ @@ -457,13 +542,7 @@ bool emitter::IsLegacyMap1(code_t code) const // bool emitter::IsVexOrEvexEncodableInstruction(instruction ins) const { - if (!UseVEXEncoding()) - { - return false; - } - - insFlags flags = CodeGenInterface::instInfo[ins]; - return (flags & (Encoding_VEX | Encoding_EVEX)) != 0; + return IsVexEncodableInstruction(ins) || IsEvexEncodableInstruction(ins); } // Returns true if the AVX instruction is a binary operator that requires 3 operands. @@ -777,6 +856,65 @@ bool emitter::DoJitUseApxNDD(instruction ins) const #endif } +inline bool emitter::IsCCMP(instruction ins) +{ + return (ins > INS_FIRST_CCMP_INSTRUCTION && ins < INS_LAST_CCMP_INSTRUCTION); +} + +//------------------------------------------------------------------------ +// GetCCFromCCMP: Get a condition code from a ccmp instruction +// +// Arguments: +// ins - The instruction to check. +// +// Returns: +// `insCC` representing the condition code for a ccmp instruction. +// ccmpx instructions share the same instruction encoding unlike +// other x86 status bit instructions and instead have a CC coded into +// the EVEX prefix. +// +inline insCC emitter::GetCCFromCCMP(instruction ins) +{ + assert(IsCCMP(ins)); + switch (ins) + { + case INS_ccmpo: + return INS_CC_O; + case INS_ccmpno: + return INS_CC_NO; + case INS_ccmpb: + return INS_CC_B; + case INS_ccmpae: + return INS_CC_AE; + case INS_ccmpe: + return INS_CC_E; + case INS_ccmpne: + return INS_CC_NE; + case INS_ccmpbe: + return INS_CC_BE; + case INS_ccmpa: + return INS_CC_A; + case INS_ccmps: + return INS_CC_S; + case INS_ccmpns: + return INS_CC_NS; + case INS_ccmpt: + return INS_CC_TRUE; + case INS_ccmpf: + return INS_CC_FALSE; + case INS_ccmpl: + return INS_CC_L; + case INS_ccmpge: + return INS_CC_GE; + case INS_ccmple: + return INS_CC_LE; + case INS_ccmpg: + return INS_CC_G; + default: + unreached(); + } +} + #ifdef TARGET_64BIT //------------------------------------------------------------------------ // AreUpperBitsZero: check if some previously emitted @@ -986,6 +1124,22 @@ bool emitter::emitIsInstrWritingToReg(instrDesc* id, regNumber reg) case INS_imul_13: case INS_imul_14: case INS_imul_15: + case INS_imul_16: + case INS_imul_17: + case INS_imul_18: + case INS_imul_19: + case INS_imul_20: + case INS_imul_21: + case INS_imul_22: + case INS_imul_23: + case INS_imul_24: + case INS_imul_25: + case INS_imul_26: + case INS_imul_27: + case INS_imul_28: + case INS_imul_29: + case INS_imul_30: + case INS_imul_31: #endif // TARGET_AMD64 if (reg == inst3opImulReg(ins)) { @@ -1637,6 +1791,13 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasMaskReg(id)) { // Requires the EVEX encoding due to used registers + // A special case here is KMOV, the original KMOV introduced in Avx512 can only be encoded in VEX, APX promoted + // them to EVEX, so only return true when APX is available. + if (IsKMOVInstruction(ins)) + { + // Use EVEX only when needed. + return HasExtendedGPReg(id); + } return true; } @@ -1646,6 +1807,14 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const return true; } + if (HasExtendedGPReg(id)) + { + // TODO-XArch-apx: + // revisit this part: this may have some conflicts with REX2 prefix, we may prefer REX2 if only EGPR is + // involved. + return true; + } + if (id->idIsEvexNfContextSet() && IsBMIInstruction(ins)) { // Only a few BMI instructions shall be promoted to APX-EVEX due to NF feature. @@ -1663,6 +1832,13 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const return false; } + if (IsKMOVInstruction(ins)) + { + // KMOV should not be encoded in EVEX when stressing EVEX, as they are supposed to encded in EVEX only + // when APX is available, only stressing EVEX is not enough making the encoding valid. + return false; + } + // Requires the EVEX encoding due to STRESS mode and no change in semantics // // Some instructions, like VCMPEQW return the value in a SIMD register for @@ -1675,7 +1851,17 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const if (IsApxExtendedEvexInstruction(ins) && emitComp->DoJitStressPromotedEvexEncoding()) { // This path will be hit when we stress APX-EVEX and encode VEX with Extended EVEX. - return (IsBMIInstruction(ins) && HasApxNf(ins)); + if (IsKMOVInstruction(ins)) + { + return true; + } + + if (IsBMIInstruction(ins)) + { + return HasApxNf(ins); + } + + return false; } #endif // DEBUG @@ -1685,6 +1871,15 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const return id->idHasMem(); } + if ((insTupleTypeInfo(ins) & INS_TT_MEM128) != 0) + { + assert((ins == INS_pslld) || (ins == INS_psllq) || (ins == INS_psllw) || (ins == INS_psrad) || + (ins == INS_psraw) || (ins == INS_psrld) || (ins == INS_psrlq) || (ins == INS_psrlw)); + + // Memory operand with immediate can only be encoded using EVEX + return id->idHasMemAndCns(); + } + return false; } @@ -1699,6 +1894,7 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const // bool emitter::TakesRex2Prefix(const instrDesc* id) const { +#ifdef TARGET_AMD64 // Return true iff the instruction supports REX2 encoding, and it requires to access EGPRs. // TODO-xarch-apx: @@ -1721,6 +1917,12 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const return true; } + if (ins >= INS_imul_16 && ins <= INS_imul_31) + { + // The instructions have implicit use of EGPRs. + return true; + } + #if defined(DEBUG) if (emitComp->DoJitStressRex2Encoding()) { @@ -1729,6 +1931,9 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const #endif // DEBUG return false; +#else // !TARGET_AMD64 + return false; +#endif } //------------------------------------------------------------------------ @@ -1742,9 +1947,7 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const // bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const { - // TODO-XArch-APX: - // Isolating legacy-promoted-EVEX case out from VEX/EVEX-promoted-EVEX, - // as the latter ones are relatively simple, providing EGPRs functionality, +#ifdef TARGET_AMD64 instruction ins = id->idIns(); if (!IsApxExtendedEvexInstruction(ins)) { @@ -1772,14 +1975,26 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const return true; } + if (ins == INS_crc32_apx || ins == INS_movbe_apx) + { + return true; + } + #if defined(DEBUG) if (emitComp->DoJitStressPromotedEvexEncoding()) { return true; } #endif // DEBUG + if (IsApxOnlyInstruction(ins)) + { + return true; + } return false; +#else // !TARGET_AMD64 + return false; +#endif } // Intel AVX-512 encoding is defined in "Intel 64 and ia-32 architectures software developer's manual volume 2", Section @@ -1809,9 +2024,6 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const // - V'- bit to extend vvvv // - aaa - specifies mask register // Rest - reserved for future use and usage of them will uresult in Undefined instruction exception. -// - u - Bit to indicate YMM Embedded rounding. -// Set to 1 for isas Avx10.1 and below -// Needs to be set to 0 for AVX10.2 and above to indicate YMM embedded rounding // - B' - reserved as of now // set to 0 for future compatibility. // @@ -1822,7 +2034,6 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const #define LBIT_IN_BYTE_EVEX_PREFIX 0x0000002000000000ULL #define LPRIMEBIT_IN_BYTE_EVEX_PREFIX 0x0000004000000000ULL #define ZBIT_IN_BYTE_EVEX_PREFIX 0x0000008000000000ULL -#define uBIT_IN_BYTE_EVEX_PREFIX 0x0000040000000000ULL #define MAP4_IN_BYTE_EVEX_PREFIX 0x4000000000000ULL #define ND_BIT_IN_BYTE_EVEX_PREFIX 0x1000000000ULL @@ -1864,7 +2075,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt if (IsApxExtendedEvexInstruction(ins)) { - if (!HasEvexEncoding(ins)) + if (!IsEvexEncodableInstruction(ins)) { // Legacy-promoted insutrcions are not labeled with Encoding_EVEX. code |= MAP4_IN_BYTE_EVEX_PREFIX; @@ -1897,6 +2108,14 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt // it to EVEX when needed with some helper functions. code &= 0xFF7FFFFFFFFFFFFFULL; } +#ifdef TARGET_AMD64 + if (IsCCMP(ins)) + { + code &= 0xFFFF87F0FFFFFFFF; + code |= ((size_t)id->idGetEvexDFV()) << 43; + code |= ((size_t)GetCCFromCCMP(ins)) << 32; + } +#endif return code; } @@ -1921,13 +2140,6 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt if (!id->idHasMem()) { - // ymm embedded rounding case. - if (attr == EA_32BYTE) - { - assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2)); - code &= ~(uBIT_IN_BYTE_EVEX_PREFIX); - } - unsigned roundingMode = id->idGetEvexbContext(); if (roundingMode == 1) { @@ -2004,16 +2216,17 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt default: { +#ifdef TARGET_AMD64 + if (IsCCMP(id->idIns())) // Special case for conditional ins such as CCMP, CCMOV + { + break; + } +#endif unsigned aaaContext = id->idGetEvexAaaContext(); if (aaaContext != 0) { maskReg = static_cast(aaaContext + KBASE); - - if (id->idIsEvexZContextSet()) - { - code |= ZBIT_IN_BYTE_EVEX_PREFIX; - } } break; } @@ -2022,6 +2235,11 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt if (isMaskReg(maskReg)) { code |= (static_cast(maskReg - KBASE) << 32); + + if (id->idIsEvexZContextSet()) + { + code |= ZBIT_IN_BYTE_EVEX_PREFIX; + } } return code; } @@ -2136,14 +2354,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const { switch (ins) { - case INS_cvtss2si: - case INS_cvttss2si32: - case INS_cvttss2si64: - case INS_cvtsd2si: - case INS_cvttsd2si32: - case INS_cvttsd2si64: - case INS_movd: - case INS_movnti: case INS_andn: case INS_bextr: case INS_blsi: @@ -2154,13 +2364,9 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const case INS_pdep: case INS_pext: case INS_rorx: -#if defined(TARGET_AMD64) case INS_sarx: case INS_shlx: case INS_shrx: -#endif // TARGET_AMD64 - case INS_vcvtsd2usi: - case INS_vcvtss2usi: { if (attr == EA_8BYTE) { @@ -2295,7 +2501,12 @@ bool emitter::HasMaskReg(const instrDesc* id) const } #if defined(DEBUG) - assert(!isMaskReg(id->idReg2())); + // After APX, KMOV instructions can be encoded in EVEX. + if (isMaskReg(id->idReg2())) + { + assert(IsKInstruction(id->idIns())); + return UsePromotedEVEXEncoding(); + } if (!id->idIsSmallDsc()) { @@ -2345,7 +2556,7 @@ regNumber AbsRegNumber(regNumber reg) bool IsExtendedReg(regNumber reg) { #ifdef TARGET_AMD64 - return ((reg >= REG_R8) && (reg <= REG_R23)) || ((reg >= REG_XMM8) && (reg <= REG_XMM31)); + return ((reg >= REG_R8) && (reg <= REG_R31)) || ((reg >= REG_XMM8) && (reg <= REG_XMM31)); #else // X86 JIT operates in 32-bit mode and hence extended reg are not available. return false; @@ -2641,7 +2852,7 @@ emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code) emitter::code_t emitter::AddEvexVPrimePrefix(code_t code) { #if defined(TARGET_AMD64) - assert(UseEvexEncoding() && hasEvexPrefix(code)); + assert((UseEvexEncoding() || UsePromotedEVEXEncoding()) && hasEvexPrefix(code)); return emitter::code_t(code & 0xFFFFFFF7FFFFFFFFULL); #else unreached(); @@ -2661,7 +2872,7 @@ emitter::code_t emitter::AddEvexVPrimePrefix(code_t code) emitter::code_t emitter::AddEvexRPrimePrefix(code_t code) { #if defined(TARGET_AMD64) - assert(UseEvexEncoding() && hasEvexPrefix(code)); + assert((UseEvexEncoding() || UsePromotedEVEXEncoding()) && hasEvexPrefix(code)); return emitter::code_t(code & 0xFFEFFFFFFFFFFFFFULL); #else unreached(); @@ -2730,13 +2941,38 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co { case 0x66: { - // None of the existing BMI instructions should be EVEX encoded. - // After APX, BMI instructions can be EVEX encoded with NF feature. + // After APX, BMI instructions can be encoded in EVEX. if (IsBMIInstruction(ins)) { - // if BMI instructions reaches this part, then it should be APX-EVEX. - // although the opcode of all the BMI instructions are defined with 0x66, - // but it should not, skip this check. + switch (ins) + { + case INS_rorx: + case INS_pdep: + case INS_mulx: + case INS_shrx: + { + evexPrefix |= (0x03 << 8); + break; + } + + case INS_pext: + case INS_sarx: + { + evexPrefix |= (0x02 << 8); + break; + } + + case INS_shlx: + { + evexPrefix |= (0x01 << 8); + break; + } + + default: + { + break; + } + } break; } assert(!IsBMIInstruction(ins)); @@ -2920,32 +3156,25 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con case INS_rorx: case INS_pdep: case INS_mulx: -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 case INS_shrx: -#endif { vexPrefix |= 0x03; break; } case INS_pext: -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 case INS_sarx: -#endif { vexPrefix |= 0x02; break; } -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 + case INS_shlx: { vexPrefix |= 0x01; break; } -#endif + default: { vexPrefix |= 0x00; @@ -3647,10 +3876,13 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) case INS_cvttsd2si64: case INS_cvttss2si32: case INS_cvttss2si64: - case INS_cvtsd2si: - case INS_cvtss2si: + case INS_cvtsd2si32: + case INS_cvtsd2si64: + case INS_cvtss2si32: + case INS_cvtss2si64: case INS_extractps: - case INS_movd: + case INS_movd32: + case INS_movd64: case INS_movmskpd: case INS_movmskps: case INS_mulx: @@ -3663,13 +3895,13 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) case INS_pextrw: case INS_pextrw_sse41: case INS_rorx: -#ifdef TARGET_AMD64 case INS_shlx: case INS_sarx: case INS_shrx: -#endif - case INS_vcvtsd2usi: - case INS_vcvtss2usi: + case INS_vcvtsd2usi32: + case INS_vcvtsd2usi64: + case INS_vcvtss2usi32: + case INS_vcvtss2usi64: case INS_vcvttsd2usi32: case INS_vcvttsd2usi64: case INS_vcvttss2usi32: @@ -3916,10 +4148,8 @@ unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const switch (ins) { case INS_crc32: -#if defined(TARGET_AMD64) case INS_sarx: case INS_shrx: -#endif // TARGET_AMD64 { // When the prefix is 0x0F38 or 0x0F3A, we must use the 3-byte encoding // These are special cases where the pp-bit is 0xF2 or 0xF3 and not 0x66 @@ -3982,7 +4212,7 @@ unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const regFor012Bits = id->idReg1(); } } - else if (ins == INS_movd) + else if ((ins == INS_movd32) || (ins == INS_movd64)) { if (isFloatReg(regFor012Bits)) { @@ -4060,7 +4290,7 @@ inline bool hasTupleTypeInfo(instruction ins) // Return Value: // the tuple type info for a given CPU instruction. // -insTupleType emitter::insTupleTypeInfo(instruction ins) const +insTupleType emitter::insTupleTypeInfo(instruction ins) { assert((unsigned)ins < ArrLen(insTupleTypeInfos)); return insTupleTypeInfos[ins]; @@ -4139,8 +4369,19 @@ inline unsigned emitter::insEncodeReg012(const instrDesc* id, regNumber reg, emi if (IsExtendedGPReg(reg)) { // Seperate the encoding for REX2.B3/B4, REX2.B3 will be handled in `AddRexBPrefix`. - assert(TakesRex2Prefix(id)); - *code |= 0x001000000000ULL; // REX2.B4 + assert(TakesRex2Prefix(id) || TakesApxExtendedEvexPrefix(id) || TakesEvexPrefix(id)); + if (hasRex2Prefix(*code)) + { + *code |= 0x001000000000ULL; // REX2.B4 + } + else if (hasEvexPrefix(*code)) + { + *code |= 0x8000000000000ULL; // EVEX.B4 + } + else + { + // There are cases when this method is called before prefix is attached. + } } } else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr)) @@ -4188,8 +4429,19 @@ inline unsigned emitter::insEncodeReg345(const instrDesc* id, regNumber reg, emi if (IsExtendedGPReg(reg)) { // Seperate the encoding for REX2.R3/R4, REX2.R3 will be handled in `AddRexRPrefix`. - assert(TakesRex2Prefix(id)); - *code |= 0x004000000000ULL; // REX2.R4 + assert(TakesRex2Prefix(id) || TakesApxExtendedEvexPrefix(id) || TakesEvexPrefix(id)); + if (hasRex2Prefix(*code)) + { + *code |= 0x004000000000ULL; // REX2.R4 + } + else if (hasEvexPrefix(*code)) + { + *code = AddEvexRPrimePrefix(*code); // EVEX.R4 + } + else + { + // There are cases when this method is called before prefix is attached. + } } } else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr)) @@ -4247,6 +4499,12 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber // Have to set the EVEX V' bit code = AddEvexVPrimePrefix(code); } + + if (isHighGPReg(reg) && IsBMIInstruction(ins)) + { + // APX: BMI instructions use RVM operand encoding + code = AddEvexVPrimePrefix(code); + } #endif // Shift count = 5-bytes of opcode + 0-2 bits for EVEX @@ -4272,7 +4530,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber // Rather see these paths cleaned up. regBits = HighAwareRegEncoding(reg); - if (false /*reg >= REG_R16 && reg <= REG_R31*/) + if (isHighGPReg(reg)) { // Have to set the EVEX V' bit code = AddEvexVPrimePrefix(code); @@ -4318,8 +4576,21 @@ inline unsigned emitter::insEncodeRegSIB(const instrDesc* id, regNumber reg, cod if (IsExtendedGPReg(reg)) { // Separate the encoding for REX2.X3/X4, REX2.X3 will be handled in `AddRexXPrefix`. - assert(TakesRex2Prefix(id)); - *code |= 0x002000000000ULL; // REX2.X4 + assert(TakesRex2Prefix(id) || TakesApxExtendedEvexPrefix(id) || TakesEvexPrefix(id)); + if (hasRex2Prefix(*code)) + { + *code |= 0x002000000000ULL; // REX2.X4 + } + else if (hasEvexPrefix(*code)) + { + // Note that APX-EVEX use EVEX.X4 as the MSB of the INDEX register to address GPRs, and the original + // EVEX.V4 is used for VSIB addressing. + *code &= 0xFFFFFBFFFFFFFFFFULL; // EVEX.X4 + } + else + { + // There are cases when this method is called before prefix is attached. + } } } unsigned regBits = RegEncoding(reg); @@ -4975,7 +5246,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var static bool baseRegisterRequiresSibByte(regNumber base) { #ifdef TARGET_AMD64 - return base == REG_ESP || base == REG_R12 || base == REG_R20; + return base == REG_ESP || base == REG_R12 || base == REG_R20 || base == REG_R28; #else return base == REG_ESP; #endif @@ -4984,7 +5255,7 @@ static bool baseRegisterRequiresSibByte(regNumber base) static bool baseRegisterRequiresDisplacement(regNumber base) { #ifdef TARGET_AMD64 - return base == REG_EBP || base == REG_R13 || base == REG_R21; + return base == REG_EBP || base == REG_R13 || base == REG_R21 || base == REG_R29; #else return base == REG_EBP; #endif @@ -5858,7 +6129,11 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m if (data->OperIs(GT_BSWAP, GT_BSWAP16) && data->isContained()) { +#ifdef TARGET_AMD64 + assert(ins == INS_movbe || ins == INS_movbe_apx); +#else assert(ins == INS_movbe); +#endif data = data->gtGetOp1(); } @@ -6843,6 +7118,7 @@ void emitter::emitIns_R_I(instruction ins, #endif SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); if (isSimdInsAndValInByte) { @@ -7084,13 +7360,15 @@ bool emitter::IsMovInstruction(instruction ins) case INS_mov: case INS_movapd: case INS_movaps: - case INS_movd: - case INS_movdqa: + case INS_movd32: + case INS_movd64: + case INS_movdqa32: case INS_vmovdqa64: - case INS_movdqu: + case INS_movdqu32: case INS_vmovdqu8: case INS_vmovdqu16: case INS_vmovdqu64: + case INS_movq: case INS_movsd_simd: case INS_movss: case INS_movsx: @@ -7110,7 +7388,6 @@ bool emitter::IsMovInstruction(instruction ins) } #if defined(TARGET_AMD64) - case INS_movq: case INS_movsxd: { return true; @@ -7165,10 +7442,10 @@ bool emitter::IsBitwiseInstruction(instruction ins) { switch (ins) { - case INS_pand: - case INS_pandn: - case INS_por: - case INS_pxor: + case INS_pandd: + case INS_pandnd: + case INS_pord: + case INS_pxord: return true; default: @@ -7201,8 +7478,8 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size) case INS_movapd: case INS_movaps: - case INS_movdqa: - case INS_movdqu: + case INS_movdqa32: + case INS_movdqu32: case INS_movupd: case INS_movups: { @@ -7238,7 +7515,8 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size) break; } - case INS_movd: + case INS_movd32: + case INS_movd64: { // Clears the upper bits hasSideEffect = true; @@ -7261,7 +7539,6 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size) break; } -#if defined(TARGET_AMD64) case INS_movq: { // Clears the upper bits @@ -7269,6 +7546,7 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size) break; } +#if defined(TARGET_AMD64) case INS_movsxd: { // Sign-extends the source @@ -7520,9 +7798,9 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN case INS_movapd: case INS_movaps: - case INS_movdqa: + case INS_movdqa32: case INS_vmovdqa64: - case INS_movdqu: + case INS_movdqu32: case INS_vmovdqu8: case INS_vmovdqu16: case INS_vmovdqu64: @@ -7535,19 +7813,20 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN break; } - case INS_movd: + case INS_movd32: + case INS_movd64: { assert(isFloatReg(dstReg) != isFloatReg(srcReg)); break; } -#if defined(TARGET_AMD64) case INS_movq: { assert(isFloatReg(dstReg) && isFloatReg(srcReg)); break; } +#if defined(TARGET_AMD64) case INS_movsxd: { assert(isGeneralRegister(dstReg) && isGeneralRegister(srcReg)); @@ -7640,6 +7919,7 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum SetEvexNdIfNeeded(id, instOptions); SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); if (id->idIsEvexNdContextSet() && IsApxNDDEncodableInstruction(ins)) { @@ -8418,7 +8698,7 @@ void emitter::emitIns_R_R_C_R(instruction ins, } //------------------------------------------------------------------------ -// emitIns_R_R_R_S: emits the code for a instruction that takes a register operand, a variable index + +// emitIns_R_R_S_R: emits the code for a instruction that takes a register operand, a variable index + // offset, another register operand, and that returns a value in register // // Arguments: @@ -8562,6 +8842,7 @@ void emitter::emitIns_R_C( { SetEvexBroadcastIfNeeded(id, instOptions); SetEvexEmbMaskIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); sz = emitInsSizeCV(id, insCodeRM(ins)); } @@ -9749,7 +10030,7 @@ void emitter::emitIns_SIMD_R_R_R_A(instruction ins, GenTreeIndir* indir, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); + assert(Is3OpRmwInstruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -9782,7 +10063,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, int offs, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); + assert(Is3OpRmwInstruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -9813,7 +10094,7 @@ void emitter::emitIns_SIMD_R_R_R_R(instruction ins, regNumber op3Reg, insOpts instOptions) { - if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)) + if (Is3OpRmwInstruction(ins)) { assert(UseSimdEncoding()); @@ -9900,7 +10181,7 @@ void emitter::emitIns_SIMD_R_R_R_S(instruction ins, int offs, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); + assert(Is3OpRmwInstruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -10492,6 +10773,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int va SetEvexBroadcastIfNeeded(id, instOptions); SetEvexEmbMaskIfNeeded(id, instOptions); SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs); id->idCodeSize(sz); @@ -10852,50 +11134,36 @@ void emitter::emitAdjustStackDepth(instruction ins, ssize_t val) */ // clang-format off -void emitter::emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di, - regNumber ireg, - regNumber xreg, - unsigned xmul, - ssize_t disp, - bool isJump, - bool noSafePoint) +void emitter::emitIns_Call(const EmitCallParams& params) // clang-format on { /* Sanity check the arguments depending on callType */ - assert(callType < EC_COUNT); + assert(params.callType < EC_COUNT); if (!emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)) { - assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR) || - (addr != nullptr && ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); + assert((params.callType != EC_FUNC_TOKEN && params.callType != EC_FUNC_TOKEN_INDIR) || + (params.addr != nullptr && params.ireg == REG_NA && params.xreg == REG_NA && params.xmul == 0 && + params.disp == 0)); } - assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); - assert(callType != EC_INDIR_ARD || (addr == nullptr)); + assert(params.callType != EC_INDIR_R || (params.addr == nullptr && params.ireg < REG_COUNT && + params.xreg == REG_NA && params.xmul == 0 && params.disp == 0)); + assert(params.callType != EC_INDIR_ARD || (params.addr == nullptr)); // Our stack level should be always greater than the bytes of arguments we push. Just // a sanity test. - assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel); + assert((unsigned)abs((signed)params.argSize) <= codeGen->genStackLevel); // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); - gcrefRegs &= savedSet; - byrefRegs &= savedSet; + regMaskTP savedSet = emitGetGCRegsSavedOrModified(params.methHnd); + regMaskTP gcrefRegs = params.gcrefRegs & savedSet; + regMaskTP byrefRegs = params.byrefRegs & savedSet; #ifdef DEBUG if (EMIT_GC_VERBOSE) { - printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); - dumpConvertedVarSet(emitComp, ptrVars); + printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, params.ptrVars)); + dumpConvertedVarSet(emitComp, params.ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); emitDispRegSet(gcrefRegs); @@ -10907,9 +11175,9 @@ void emitter::emitIns_Call(EmitCallType callType, #endif /* Managed RetVal: emit sequence point for the call */ - if (emitComp->opts.compDbgInfo && di.IsValid()) + if (emitComp->opts.compDbgInfo && params.debugInfo.IsValid()) { - codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, params.debugInfo, false); } /* @@ -10928,61 +11196,63 @@ void emitter::emitIns_Call(EmitCallType callType, instrDesc* id; - assert(argSize % REGSIZE_BYTES == 0); - int argCnt = (int)(argSize / (int)REGSIZE_BYTES); // we need a signed-divide + assert(params.argSize % REGSIZE_BYTES == 0); + int argCnt = (int)(params.argSize / (int)REGSIZE_BYTES); // we need a signed-divide - if ((callType == EC_INDIR_R) || (callType == EC_INDIR_ARD)) + if ((params.callType == EC_INDIR_R) || (params.callType == EC_INDIR_ARD)) { /* Indirect call, virtual calls */ - id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, - retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize)); + id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, + params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize), + params.hasAsyncRet); } else { // Helper/static/nonvirtual/function calls (direct or through handle), // and calls to an absolute addr. - assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR); + assert(params.callType == EC_FUNC_TOKEN || params.callType == EC_FUNC_TOKEN_INDIR); - id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, - retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize)); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, + params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize), + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ // If the method returns a GC ref, mark EAX appropriately - if (retSize == EA_GCREF) + if (params.retSize == EA_GCREF) { gcrefRegs |= RBM_EAX; } - else if (retSize == EA_BYREF) + else if (params.retSize == EA_BYREF) { byrefRegs |= RBM_EAX; } #ifdef UNIX_AMD64_ABI // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64). - if (secondRetSize == EA_GCREF) + if (params.secondRetSize == EA_GCREF) { gcrefRegs |= RBM_RDX; } - else if (secondRetSize == EA_BYREF) + else if (params.secondRetSize == EA_BYREF) { byrefRegs |= RBM_RDX; } #endif // UNIX_AMD64_ABI - VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + VarSetOps::Assign(emitComp, emitThisGCrefVars, params.ptrVars); emitThisGCrefRegs = gcrefRegs; emitThisByrefRegs = byrefRegs; /* Set the instruction - special case jumping a function (tail call) */ instruction ins = INS_call; - if (isJump) + if (params.isJump) { - if (callType == EC_FUNC_TOKEN) + if (params.callType == EC_FUNC_TOKEN) { ins = INS_l_jmp; } @@ -10994,16 +11264,16 @@ void emitter::emitIns_Call(EmitCallType callType, id->idIns(ins); // for the purpose of GC safepointing tail-calls are not real calls - id->idSetIsNoGC(isJump || noSafePoint || emitNoGChelper(methHnd)); + id->idSetIsNoGC(params.isJump || params.noSafePoint || emitNoGChelper(params.methHnd)); UNATIVE_OFFSET sz; // Record the address: method, indirection, or funcptr - if ((callType == EC_INDIR_R) || (callType == EC_INDIR_ARD)) + if ((params.callType == EC_INDIR_R) || (params.callType == EC_INDIR_ARD)) { // This is an indirect call/jmp (either a virtual call or func ptr call) - if (callType == EC_INDIR_R) // call reg + if (params.callType == EC_INDIR_R) // call reg { id->idSetIsCallRegPtr(); } @@ -11013,9 +11283,9 @@ void emitter::emitIns_Call(EmitCallType callType, id->idInsFmt(emitInsModeFormat(ins, IF_ARD)); - id->idAddr()->iiaAddrMode.amBaseReg = ireg; - id->idAddr()->iiaAddrMode.amIndxReg = xreg; - id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1; + id->idAddr()->iiaAddrMode.amBaseReg = params.ireg; + id->idAddr()->iiaAddrMode.amIndxReg = params.xreg; + id->idAddr()->iiaAddrMode.amScale = params.xmul ? emitEncodeScale(params.xmul) : emitter::OPSZ1; code_t code = insCodeMR(ins); if (ins == INS_tail_i_jmp) @@ -11027,9 +11297,9 @@ void emitter::emitIns_Call(EmitCallType callType, sz = emitInsSizeAM(id, code); - if (ireg == REG_NA && xreg == REG_NA) + if (params.ireg == REG_NA && params.xreg == REG_NA) { - if (codeGen->genCodeIndirAddrNeedsReloc(disp)) + if (codeGen->genCodeIndirAddrNeedsReloc(params.disp)) { id->idSetIsDspReloc(); } @@ -11039,20 +11309,20 @@ void emitter::emitIns_Call(EmitCallType callType, // An absolute indir address that doesn't need reloc should fit within 32-bits // to be encoded as offset relative to zero. This addr mode requires an extra // SIB byte - noway_assert((size_t) static_cast(reinterpret_cast(addr)) == (size_t)addr); + noway_assert((size_t) static_cast(reinterpret_cast(params.addr)) == (size_t)params.addr); sz++; } #endif // TARGET_AMD64 } } - else if (callType == EC_FUNC_TOKEN_INDIR) + else if (params.callType == EC_FUNC_TOKEN_INDIR) { // call/jmp [method_addr] - assert(addr != nullptr); + assert(params.addr != nullptr); id->idInsFmt(IF_METHPTR); - id->idAddr()->iiaAddr = (BYTE*)addr; + id->idAddr()->iiaAddr = (BYTE*)params.addr; sz = 6; if (TakesRex2Prefix(id)) @@ -11063,7 +11333,7 @@ void emitter::emitIns_Call(EmitCallType callType, // Since this is an indirect call through a pointer and we don't // currently pass in emitAttr into this function, we query codegen // whether addr needs a reloc. - if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr)) + if (codeGen->genCodeIndirAddrNeedsReloc((size_t)params.addr)) { id->idSetIsDspReloc(); } @@ -11073,7 +11343,7 @@ void emitter::emitIns_Call(EmitCallType callType, // An absolute indir address that doesn't need reloc should fit within 32-bits // to be encoded as offset relative to zero. This addr mode requires an extra // SIB byte - noway_assert((size_t) static_cast(reinterpret_cast(addr)) == (size_t)addr); + noway_assert((size_t) static_cast(reinterpret_cast(params.addr)) == (size_t)params.addr); sz++; } #endif // TARGET_AMD64 @@ -11082,21 +11352,21 @@ void emitter::emitIns_Call(EmitCallType callType, { // This is a simple direct call/jmp: call/jmp helper/method/addr - assert(callType == EC_FUNC_TOKEN); + assert(params.callType == EC_FUNC_TOKEN); - assert(addr != nullptr || emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)); + assert(params.addr != nullptr || emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)); id->idInsFmt(IF_METHOD); sz = 5; - id->idAddr()->iiaAddr = (BYTE*)addr; + id->idAddr()->iiaAddr = (BYTE*)params.addr; // Direct call to a method and no addr indirection is needed. - if (codeGen->genCodeAddrNeedsReloc((size_t)addr)) + if (codeGen->genCodeAddrNeedsReloc((size_t)params.addr)) { id->idSetIsDspReloc(); - if ((size_t)methHnd == 1) + if ((size_t)params.methHnd == 1) { id->idSetTlsGD(); sz += 1; // For REX.W prefix @@ -11106,14 +11376,14 @@ void emitter::emitIns_Call(EmitCallType callType, if (m_debugInfoSize > 0) { - INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo); - id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token + INDEBUG(id->idDebugOnlyInfo()->idCallSig = params.sigInfo); + id->idDebugOnlyInfo()->idMemCookie = (size_t)params.methHnd; // method token } #ifdef LATE_DISASM - if (addr != nullptr) + if (params.addr != nullptr) { - codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); + codeGen->getDisAssembler().disSetMethod((size_t)params.addr, params.methHnd); } #endif // LATE_DISASM @@ -11126,10 +11396,10 @@ void emitter::emitIns_Call(EmitCallType callType, /* The call will pop the arguments */ - if (emitCntStackDepth && argSize > 0) + if (emitCntStackDepth && params.argSize > 0) { - noway_assert((ssize_t)emitCurStackLvl >= argSize); - emitCurStackLvl -= (int)argSize; + noway_assert((ssize_t)emitCurStackLvl >= params.argSize); + emitCurStackLvl -= (int)params.argSize; assert((int)emitCurStackLvl >= 0); } @@ -12394,6 +12664,27 @@ void emitter::emitDispIns( sstr = codeGen->genInsDisplayName(id); printf(" %-9s", sstr); +#ifdef TARGET_AMD64 + if (IsCCMP(id->idIns())) + { + // print finite set notation for DFV + unsigned dfv = id->idGetEvexDFV(); + char dfvstr[20] = {0}; + int len = 0; + if (dfv & INS_FLAGS_OF) + len += snprintf(dfvstr + len, 4, "of,"); + if (dfv & INS_FLAGS_SF) + len += snprintf(dfvstr + len, 4, "sf,"); + if (dfv & INS_FLAGS_ZF) + len += snprintf(dfvstr + len, 4, "zf,"); + if (dfv & INS_FLAGS_CF) + len += snprintf(dfvstr + len, 4, "cf,"); + if (len) + dfvstr[len - 1] = 0; + printf("{dfv=%s} ", dfvstr); + } +#endif // TARGET_AMD64 + #ifndef HOST_UNIX if (strnlen_s(sstr, 10) >= 9) #else // HOST_UNIX @@ -12610,9 +12901,9 @@ void emitter::emitDispIns( { switch (ins) { - case INS_vextractf128: + case INS_vextractf32x4: case INS_vextractf64x2: - case INS_vextracti128: + case INS_vextracti32x4: case INS_vextracti64x2: { // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr" @@ -12663,6 +12954,19 @@ void emitter::emitDispIns( case IF_RRW_RRD_ARD: case IF_RWR_RWR_ARD: { + if ((ins == INS_bextr) || (ins == INS_bzhi) || (ins == INS_sarx) || (ins == INS_shlx) || (ins == INS_shrx)) + { + // These instructions have their operands swapped to simplify the emitter implementation. + // They will appear here as IF_RWR_RRD_ARD but should actually + // display as if they were IF_RWR_ARD_RRD. + + printf("%s", emitRegName(id->idReg1(), attr)); + printf(", %s", sstr); + emitDispAddrMode(id); + printf(", %s", emitRegName(id->idReg2(), attr)); + break; + } + printf("%s", emitRegName(id->idReg1(), attr)); emitDispEmbMasking(id); printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); @@ -12960,6 +13264,20 @@ void emitter::emitDispIns( case IF_RRW_RRD_SRD: case IF_RWR_RWR_SRD: { + if ((ins == INS_bextr) || (ins == INS_bzhi) || (ins == INS_sarx) || (ins == INS_shlx) || (ins == INS_shrx)) + { + // These instructions have their operands swapped to simplify the emitter implementation. + // They will appear here as IF_RWR_RRD_SRD but should actually + // display as if they were IF_RWR_SRD_RRD. + + printf("%s", emitRegName(id->idReg1(), attr)); + printf(", %s", sstr); + emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), + id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + printf(", %s", emitRegName(id->idReg2(), attr)); + break; + } + printf("%s", emitRegName(id->idReg1(), attr)); emitDispEmbMasking(id); printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); @@ -13070,16 +13388,28 @@ void emitter::emitDispIns( case INS_cvttsd2si32: case INS_cvttsd2si64: - case INS_cvtss2si: - case INS_cvtsd2si: + case INS_cvtsd2si32: + case INS_cvtsd2si64: + case INS_cvtss2si32: + case INS_cvtss2si64: case INS_cvttss2si32: case INS_cvttss2si64: - case INS_vcvtsd2usi: - case INS_vcvtss2usi: + case INS_vcvtsd2usi32: + case INS_vcvtsd2usi64: + case INS_vcvtss2usi32: + case INS_vcvtss2usi64: case INS_vcvttsd2usi32: case INS_vcvttsd2usi64: case INS_vcvttss2usi32: case INS_vcvttss2usi64: + case INS_vcvttsd2sis32: + case INS_vcvttsd2sis64: + case INS_vcvttss2sis32: + case INS_vcvttss2sis64: + case INS_vcvttsd2usis32: + case INS_vcvttsd2usis64: + case INS_vcvttss2usis32: + case INS_vcvttss2usis64: { assert(!id->idIsEvexAaaContextSet()); printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); @@ -13178,21 +13508,16 @@ void emitter::emitDispIns( regNumber reg2 = id->idReg2(); regNumber reg3 = id->idReg3(); - if (ins == INS_bextr || ins == INS_bzhi -#ifdef TARGET_AMD64 - || ins == INS_shrx || ins == INS_shlx || ins == INS_sarx -#endif - ) + if ((ins == INS_bextr) || (ins == INS_bzhi) || (ins == INS_sarx) || (ins == INS_shlx) || (ins == INS_shrx)) { - // BMI bextr,bzhi, shrx, shlx and sarx encode the reg2 in VEX.vvvv and reg3 in modRM, - // which is different from most of other instructions - regNumber tmp = reg2; - reg2 = reg3; - reg3 = tmp; + // These instructions have their operands swapped to simplify the emitter implementation. + // They encode reg3 in VEX.vvvv and reg2 in modRM, which is opposite most instructions. + // We swap them back here so they will display in the correct order. + std::swap(reg2, reg3); } emitAttr attr3 = attr; - if (hasTupleTypeInfo(ins) && ((insTupleTypeInfo(ins) & INS_TT_MEM128) != 0)) + if ((insTupleTypeInfo(ins) & INS_TT_MEM128) != 0) { // Shift instructions take xmm for the 3rd operand regardless of instruction size. attr3 = EA_16BYTE; @@ -13226,9 +13551,9 @@ void emitter::emitDispIns( break; } - case INS_vinsertf128: + case INS_vinsertf32x4: case INS_vinsertf64x2: - case INS_vinserti128: + case INS_vinserti32x4: case INS_vinserti64x2: { attr = EA_16BYTE; @@ -13293,9 +13618,9 @@ void emitter::emitDispIns( switch (ins) { - case INS_vextractf128: + case INS_vextractf32x4: case INS_vextractf64x2: - case INS_vextracti128: + case INS_vextracti32x4: case INS_vextracti64x2: { tgtAttr = EA_16BYTE; @@ -13482,9 +13807,9 @@ void emitter::emitDispIns( { switch (ins) { - case INS_vextractf128: + case INS_vextractf32x4: case INS_vextractf64x2: - case INS_vextracti128: + case INS_vextracti32x4: case INS_vextracti64x2: { // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr" @@ -13535,6 +13860,20 @@ void emitter::emitDispIns( case IF_RRW_RRD_MRD: case IF_RWR_RWR_MRD: { + if ((ins == INS_bextr) || (ins == INS_bzhi) || (ins == INS_sarx) || (ins == INS_shlx) || (ins == INS_shrx)) + { + // These instructions have their operands swapped to simplify the emitter implementation. + // They will appear here as IF_RWR_RRD_MRD but should actually + // display as if they were IF_RWR_MRD_RRD. + + printf("%s", emitRegName(id->idReg1(), attr)); + printf(", %s", sstr); + offs = emitGetInsDsp(id); + emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + printf(", %s", emitRegName(id->idReg2(), attr)); + break; + } + printf("%s", emitRegName(id->idReg1(), attr)); emitDispEmbMasking(id); printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); @@ -13974,12 +14313,12 @@ BYTE* emitter::emitOutputAlign(insGroup* ig, instrDesc* id, BYTE* dst) #ifdef DEBUG // For cases where 'align' was placed behind a 'jmp' in an IG that does not - // immediately preced the loop IG, we do not know in advance the offset of + // immediately precede the loop IG, we do not know in advance the offset of // IG having loop. For such cases, skip the padding calculation validation. - // For prejit, `dst` is not aliged as requested, but the final assembly will have them aligned. + // For AOT, `dst` is not aligned as requested, but the final assembly will have them aligned. // So, just calculate the offset of the current `dst` from the start. - size_t offset = emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) ? emitCurCodeOffs(dst) : (size_t)dst; + size_t offset = emitComp->IsAot() ? emitCurCodeOffs(dst) : (size_t)dst; bool validatePadding = !alignInstr->isPlacedAfterJmp; #endif @@ -14409,6 +14748,12 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) NO_WAY("unexpected size"); break; } +#ifdef TARGET_AMD64 + if (ins == INS_crc32_apx || ins == INS_movbe_apx) + { + code |= (insEncodeReg345(id, id->idReg1(), size, &code) << 8); + } +#endif // TARGET_AMD64 } // Output the REX prefix @@ -14542,7 +14887,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } #else // TARGET_AMD64 // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero. - // This addr mode should never be used while generating relocatable ngen code nor if + // This addr mode should never be used while generating relocatable AOT code nor if // the addr can be encoded as pc-relative address. noway_assert(!emitComp->opts.compReloc); noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32); @@ -15017,8 +15362,14 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { case IF_RWR_ARD: case IF_RRW_ARD: + case IF_RWR_ARD_CNS: + case IF_RRW_ARD_CNS: + case IF_RWR_ARD_RRD: + case IF_RRW_ARD_RRD: case IF_RWR_RRD_ARD: case IF_RRW_RRD_ARD: + case IF_RWR_RRD_ARD_CNS: + case IF_RWR_RRD_ARD_RRD: { emitGCregDeadUpd(id->idReg1(), dst); break; @@ -15231,6 +15582,12 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { dst += emitOutputByte(dst, 0x66); } +#ifdef TARGET_AMD64 + else + { + code |= EXTENDED_EVEX_PP_BITS; + } +#endif // TARGET_AMD64 } FALLTHROUGH; @@ -15276,6 +15633,14 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) NO_WAY("unexpected size"); break; } +#ifdef TARGET_AMD64 + if (ins == INS_crc32_apx || ins == INS_movbe_apx) + { + // The promoted CRC32 is in 1-byte opcode, unlike other instructions on this path, the register encoding for + // CRC32 need to be done here. + code |= (insEncodeReg345(id, id->idReg1(), size, &code) << 8); + } +#endif // TARGET_AMD64 } // Output the REX prefix @@ -16331,7 +16696,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) if (IsSimdInstruction(ins)) { - assert((ins != INS_movd) || (isFloatReg(reg1) != isFloatReg(reg2))); + assert(((ins != INS_movd32) && (ins != INS_movd64)) || (isFloatReg(reg1) != isFloatReg(reg2))); if (ins == INS_kmovb_gpr || ins == INS_kmovw_gpr || ins == INS_kmovd_gpr || ins == INS_kmovq_gpr) { @@ -16344,7 +16709,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) code |= 0x01; } } - else if ((ins != INS_movd) || isFloatReg(reg1)) + else if (((ins != INS_movd32) && (ins != INS_movd64)) || isFloatReg(reg1)) { code = insCodeRM(ins); } @@ -16388,7 +16753,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt) #ifdef TARGET_AMD64 - || (ins == INS_lzcnt_apx) || (ins == INS_tzcnt_apx) || (ins == INS_popcnt_apx) + || (ins == INS_lzcnt_apx) || (ins == INS_tzcnt_apx) || (ins == INS_popcnt_apx) || (ins == INS_crc32_apx) #endif // TARGET_AMD64 ) { @@ -16400,11 +16765,24 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) { code |= 0x0100; } +#ifdef TARGET_AMD64 + if ((ins == INS_crc32_apx) && (size > EA_1BYTE)) + { + code |= 0x01; + } +#endif // TARGET_AMD64 - if (size == EA_2BYTE && !TakesApxExtendedEvexPrefix(id)) + if (size == EA_2BYTE) { - assert(ins == INS_crc32); - dst += emitOutputByte(dst, 0x66); + if (!TakesApxExtendedEvexPrefix(id)) + { + assert(ins == INS_crc32); + dst += emitOutputByte(dst, 0x66); + } + else + { + code |= EXTENDED_EVEX_PP_BITS; + } } else if (size == EA_8BYTE) { @@ -16423,7 +16801,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // So the logic should be: // checking if those new features are used, then check if EGPRs are involved. // EGPRs will be supported by EVEX anyway, so don't need to check in the first place. - assert(!TakesSimdPrefix(id)); + assert(!TakesSimdPrefix(id) || TakesApxExtendedEvexPrefix(id)); code = insCodeMR(ins); code = AddX86PrefixIfNeeded(id, code, size); code = insEncodeMRreg(id, code); @@ -16444,7 +16822,6 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // Output a size prefix for a 16-bit operand if (TakesApxExtendedEvexPrefix(id)) { - assert(IsApxExtendedEvexInstruction(ins)); assert(hasEvexPrefix(code)); // Evex.pp should already be added when adding the prefix. assert((code & EXTENDED_EVEX_PP_BITS) != 0); @@ -16453,10 +16830,21 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) { dst += emitOutputByte(dst, 0x66); } - FALLTHROUGH; + + code |= 0x1; + break; case EA_4BYTE: // Set the 'w' bit to get the large version + +#ifdef TARGET_AMD64 + if (TakesApxExtendedEvexPrefix(id)) + { + assert(hasEvexPrefix(code)); + // Evex.pp should already be added when adding the prefix + assert((code & EXTENDED_EVEX_PP_BITS) == 0); + } +#endif code |= 0x1; break; @@ -16494,7 +16882,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) { regFor345Bits = reg1; } - if (ins == INS_movd) + if ((ins == INS_movd32) || (ins == INS_movd64)) { assert(isFloatReg(reg1) != isFloatReg(reg2)); if (isFloatReg(reg2)) @@ -16516,7 +16904,11 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) regCode = insEncodeReg012(id, reg2, size, &code); } +#ifdef TARGET_AMD64 + if (TakesSimdPrefix(id) && !IsCCMP(ins)) +#else if (TakesSimdPrefix(id)) +#endif { // In case of AVX instructions that take 3 operands, we generally want to encode reg1 // as first source. In this case, reg1 is both a source and a destination. @@ -16843,14 +17235,13 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) code = insCodeACC(ins); assert(code < 0x100); - code |= 0x08; // Set the 'w' bit - unsigned regcode = insEncodeReg012(id, reg, size, &code); - code |= regcode; - // This is INS_mov and will not take VEX prefix assert(!TakesVexPrefix(ins)); code = AddX86PrefixIfNeededAndNotPresent(id, code, size); + code |= 0x08; // Set the 'w' bit + unsigned regcode = insEncodeReg012(id, reg, size, &code); + code |= regcode; if (TakesRexWPrefix(id)) { @@ -17615,22 +18006,9 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) // ssize_t emitter::GetInputSizeInBytes(instrDesc* id) const { + assert((unsigned)id->idIns() < ArrLen(CodeGenInterface::instInfo)); insFlags inputSize = static_cast((CodeGenInterface::instInfo[id->idIns()] & Input_Mask)); - // INS_movd can represent either movd or movq(https://github.com/dotnet/runtime/issues/47943). - // As such, this is a special case and we need to calculate size based on emitAttr. - if (id->idIns() == INS_movd) - { - if (EA_SIZE(id->idOpSize()) == EA_8BYTE) - { - inputSize = Input_64Bit; - } - else - { - inputSize = Input_32Bit; - } - } - switch (inputSize) { case 0: @@ -17671,10 +18049,7 @@ ssize_t emitter::TryEvexCompressDisp8Byte(instrDesc* id, ssize_t dsp, bool* dspI // path, but for those instructions with no tuple information, // APX-EVEX treat the scaling factor to be 1 constantly. instruction ins = id->idIns(); - // TODO-XArch-APX: - // This assert may need tweak if BMI1 instructions are promoted - // into EVEX for multiple features, currently only EVEX.NF. - assert(IsApxExtendedEvexInstruction(id->idIns())); + assert(IsApxExtendedEvexInstruction(ins) || IsBMIInstruction(ins)); *dspInByte = ((signed char)dsp == (ssize_t)dsp); return dsp; } @@ -17888,11 +18263,6 @@ ssize_t emitter::TryEvexCompressDisp8Byte(instrDesc* id, ssize_t dsp, bool* dspI * point past the generated code, and returns the size of the instruction * descriptor in bytes. */ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { assert(emitIssuing); @@ -18133,7 +18503,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputWord(dst, code | 0x0500); #else // TARGET_AMD64 // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero. - // This addr mode should never be used while generating relocatable ngen code nor if + // This addr mode should never be used while generating relocatable AOT code nor if // the addr can be encoded as pc-relative address. noway_assert(!emitComp->opts.compReloc); noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32); @@ -18234,11 +18604,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) byrefRegs |= RBM_EAX; } -#ifdef UNIX_AMD64_ABI // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64). if (id->idIsLargeCall()) { instrDescCGCA* idCall = (instrDescCGCA*)id; +#ifdef UNIX_AMD64_ABI if (idCall->idSecondGCref() == GCT_GCREF) { gcrefRegs |= RBM_RDX; @@ -18247,8 +18617,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { byrefRegs |= RBM_RDX; } - } #endif // UNIX_AMD64_ABI + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } + } // If the GC register set has changed, report the new set if (gcrefRegs != emitThisGCrefRegs) @@ -19281,8 +19655,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_MWR_RRD_CNS: case IF_MRW_RRD_CNS: { - assert((ins == INS_vextractf128) || (ins == INS_vextractf32x8) || (ins == INS_vextractf64x2) || - (ins == INS_vextractf64x4) || (ins == INS_vextracti128) || (ins == INS_vextracti32x8) || + assert((ins == INS_vextractf32x4) || (ins == INS_vextractf32x8) || (ins == INS_vextractf64x2) || + (ins == INS_vextractf64x4) || (ins == INS_vextracti32x4) || (ins == INS_vextracti32x8) || (ins == INS_vextracti64x2) || (ins == INS_vextracti64x4)); assert(UseSimdEncoding()); emitGetInsDcmCns(id, &cnsVal); @@ -19704,9 +20078,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) return sz; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif emitter::insFormat emitter::getMemoryOperation(instrDesc* id) const { @@ -19765,6 +20136,14 @@ emitter::insFormat emitter::ExtractMemoryFormat(insFormat insFmt) const return IF_NONE; } +#ifdef TARGET_AMD64 +// true if this 'imm' can be encoded as a input operand to a ccmp instruction +/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm) +{ + return (((INT32)imm) == imm); +} +#endif + #if defined(DEBUG) || defined(LATE_DISASM) //---------------------------------------------------------------------------------------- @@ -19951,6 +20330,26 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cmovge: case INS_cmovle: case INS_cmovg: +#ifdef TARGET_AMD64 + // todo-xarch-apx: we need to double check the logic for ccmp + case INS_ccmpo: + case INS_ccmpno: + case INS_ccmpb: + case INS_ccmpae: + case INS_ccmpe: + case INS_ccmpne: + case INS_ccmpbe: + case INS_ccmpa: + case INS_ccmps: + case INS_ccmpns: + case INS_ccmpt: + case INS_ccmpf: + case INS_ccmpl: + case INS_ccmpge: + case INS_ccmple: + case INS_ccmpg: +#endif + if (memFmt == IF_NONE) { result.insThroughput = PERFSCORE_THROUGHPUT_4X; @@ -20076,6 +20475,22 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_imul_13: case INS_imul_14: case INS_imul_15: + case INS_imul_16: + case INS_imul_17: + case INS_imul_18: + case INS_imul_19: + case INS_imul_20: + case INS_imul_21: + case INS_imul_22: + case INS_imul_23: + case INS_imul_24: + case INS_imul_25: + case INS_imul_26: + case INS_imul_27: + case INS_imul_28: + case INS_imul_29: + case INS_imul_30: + case INS_imul_31: #endif // TARGET_AMD64 case INS_imul: result.insThroughput = PERFSCORE_THROUGHPUT_1C; @@ -20451,37 +20866,47 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_25C; break; - case INS_movd: - case INS_movq: // only MOVQ xmm, xmm is different (emitted by Sse2.MoveScalar, should use MOVDQU instead) + case INS_movd32: + case INS_movd64: + case INS_movq: if (memAccessKind == PERFSCORE_MEMORY_NONE) { - // movd r32, xmm or xmm, r32 - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; + if (isFloatReg(id->idReg1()) && isFloatReg(id->idReg2())) + { + // movq xmm, xmm + result.insThroughput = PERFSCORE_THROUGHPUT_3X; + result.insLatency = PERFSCORE_LATENCY_1C; + } + else + { + // movd r32/64, xmm or xmm, r32/64 + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + } } else if (memAccessKind == PERFSCORE_MEMORY_READ) { - // movd xmm, m32 + // ins xmm, m32/64 result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency += PERFSCORE_LATENCY_2C; } else { - // movd m32, xmm + // ins m32/64, xmm assert(memAccessKind == PERFSCORE_MEMORY_WRITE); result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_2C; } break; - case INS_movdqa: + case INS_movdqa32: case INS_vmovdqa64: - case INS_movdqu: + case INS_movdqu32: case INS_vmovdqu8: case INS_vmovdqu16: case INS_vmovdqu64: - case INS_vmovd: - case INS_vmovw: + case INS_vmovd_simd: + case INS_vmovw_simd: case INS_movaps: case INS_movups: case INS_movapd: @@ -20530,7 +20955,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case INS_movntdq: - case INS_movnti: + case INS_movnti32: + case INS_movnti64: case INS_movntps: case INS_movntpd: assert(memAccessKind == PERFSCORE_MEMORY_WRITE); @@ -20818,12 +21244,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cvttsd2si32: case INS_cvttsd2si64: - case INS_cvtsd2si: + case INS_cvtsd2si32: + case INS_cvtsd2si64: case INS_cvtsi2sd32: case INS_cvtsi2ss32: case INS_cvtsi2sd64: case INS_cvtsi2ss64: - case INS_vcvtsd2usi: + case INS_vcvtsd2usi32: + case INS_vcvtsd2usi64: case INS_vcvtusi2ss32: case INS_vcvtusi2ss64: case INS_vcvttsd2usi32: @@ -20857,8 +21285,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cvttss2si32: case INS_cvttss2si64: - case INS_cvtss2si: - case INS_vcvtss2usi: + case INS_cvtss2si32: + case INS_cvtss2si64: + case INS_vcvtss2usi32: + case INS_vcvtss2usi64: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C; break; @@ -20889,13 +21319,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_psubusb: case INS_paddusw: case INS_psubusw: - case INS_pand: + case INS_pandd: case INS_vpandq: - case INS_pandn: + case INS_pandnd: case INS_vpandnq: - case INS_por: + case INS_pord: case INS_vporq: - case INS_pxor: + case INS_pxord: case INS_vpxorq: case INS_andpd: case INS_andps: @@ -21152,19 +21582,19 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vpermq_reg: case INS_vperm2i128: case INS_vperm2f128: - case INS_vextractf128: + case INS_vextractf32x4: case INS_vextractf32x8: case INS_vextractf64x2: case INS_vextractf64x4: - case INS_vextracti128: + case INS_vextracti32x4: case INS_vextracti32x8: case INS_vextracti64x2: case INS_vextracti64x4: - case INS_vinsertf128: + case INS_vinsertf32x4: case INS_vinsertf32x8: case INS_vinsertf64x2: case INS_vinsertf64x4: - case INS_vinserti128: + case INS_vinserti32x4: case INS_vinserti32x8: case INS_vinserti64x2: case INS_vinserti64x4: @@ -21186,6 +21616,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_popcnt_apx: case INS_lzcnt_apx: case INS_tzcnt_apx: + case INS_crc32_apx: #endif // TARGET_AMD64 { result.insThroughput = PERFSCORE_THROUGHPUT_1C; @@ -21376,8 +21807,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vpbroadcastd_gpr: case INS_vpbroadcastq: case INS_vpbroadcastq_gpr: - case INS_vbroadcasti128: - case INS_vbroadcastf128: + case INS_vbroadcasti32x4: + case INS_vbroadcastf32x4: case INS_vbroadcastf64x2: case INS_vbroadcasti64x2: case INS_vbroadcastf64x4: @@ -21574,6 +22005,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } case INS_movbe: +#ifdef TARGET_AMD64 + case INS_movbe_apx: +#endif if (memAccessKind == PERFSCORE_MEMORY_READ) { result.insThroughput = PERFSCORE_THROUGHPUT_2X; @@ -21594,7 +22028,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; } -#ifdef TARGET_AMD64 case INS_shlx: case INS_sarx: case INS_shrx: @@ -21603,7 +22036,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_2X; break; } -#endif case INS_vpmovb2m: case INS_vpmovw2m: diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 62f1d580ae0f..942c3d719749 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -41,9 +41,7 @@ inline static bool isHighSimdReg(regNumber reg) inline static bool isHighGPReg(regNumber reg) { #ifdef TARGET_AMD64 - // TODO-apx: the definition here is incorrect, we will need to revisit this after we extend the register definition. - // for now, we can simply use REX2 as REX. - return ((reg >= REG_R16) && (reg <= REG_R23)); + return ((reg >= REG_R16) && (reg <= REG_R31)); #else // X86 JIT operates in 32-bit mode and hence extended regs are not available. return false; @@ -122,20 +120,18 @@ static bool IsSSEInstruction(instruction ins); static bool IsSSEOrAVXInstruction(instruction ins); static bool IsAVXOnlyInstruction(instruction ins); static bool IsAvx512OnlyInstruction(instruction ins); -static bool IsFMAInstruction(instruction ins); -static bool IsPermuteVar2xInstruction(instruction ins); -static bool IsAVXVNNIInstruction(instruction ins); +static bool IsKMOVInstruction(instruction ins); +static bool Is3OpRmwInstruction(instruction ins); static bool IsBMIInstruction(instruction ins); static bool IsKInstruction(instruction ins); static bool IsKInstructionWithLBit(instruction ins); +static bool IsApxOnlyInstruction(instruction ins); static regNumber getBmiRegNumber(instruction ins); static regNumber getSseShiftRegNumber(instruction ins); -bool HasVexEncoding(instruction ins) const; -bool HasEvexEncoding(instruction ins) const; -bool HasRex2Encoding(instruction ins) const; -bool HasApxNdd(instruction ins) const; -bool HasApxNf(instruction ins) const; +static bool HasRex2Encoding(instruction ins); +static bool HasApxNdd(instruction ins); +static bool HasApxNf(instruction ins); bool IsVexEncodableInstruction(instruction ins) const; bool IsEvexEncodableInstruction(instruction ins) const; bool IsRex2EncodableInstruction(instruction ins) const; @@ -228,7 +224,8 @@ code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr return code; } -insTupleType insTupleTypeInfo(instruction ins) const; +static insTupleType insTupleTypeInfo(instruction ins); +static unsigned insKMaskBaseSize(instruction ins); // 2-byte REX2 prefix starts with byte 0xD5 #define REX2_PREFIX_MASK_2BYTE 0xFF0000000000ULL @@ -296,6 +293,11 @@ bool HasKMaskRegisterDest(instruction ins) const case INS_vgatherqps: case INS_vgatherdpd: case INS_vgatherqpd: + // KMOV can be promoted to EVEX with APX. + case INS_kmovb_msk: + case INS_kmovw_msk: + case INS_kmovd_msk: + case INS_kmovq_msk: { return true; } @@ -517,15 +519,12 @@ void SetEvexEmbMaskIfNeeded(instrDesc* id, insOpts instOptions) { assert(UseEvexEncoding()); id->idSetEvexAaaContext(instOptions); - - if ((instOptions & INS_OPTS_EVEX_z_MASK) == INS_OPTS_EVEX_em_zero) - { - id->idSetEvexZContext(); - } } - else + + if ((instOptions & INS_OPTS_EVEX_z_MASK) == INS_OPTS_EVEX_em_zero) { - assert((instOptions & INS_OPTS_EVEX_z_MASK) == 0); + assert(UseEvexEncoding()); + id->idSetEvexZContext(); } } @@ -571,6 +570,25 @@ void SetEvexNfIfNeeded(instrDesc* id, insOpts instOptions) } } +//------------------------------------------------------------------------ +// SetEvexDFVIfNeeded: set default flag values on an instrDesc +// +// Arguments: +// id - instruction descriptor +// instOptions - emit options +// +void SetEvexDFVIfNeeded(instrDesc* id, insOpts instOptions) +{ +#if defined(TARGET_AMD64) + if ((instOptions & INS_OPTS_EVEX_dfv_MASK) != 0) + { + assert(UsePromotedEVEXEncoding()); + assert(IsCCMP(id->idIns())); + id->idSetEvexDFV(instOptions); + } +#endif +} + //------------------------------------------------------------------------ // AddSimdPrefixIfNeeded: Add the correct SIMD prefix. // Check if the prefix already exists befpre adding. @@ -683,6 +701,9 @@ static bool IsRexW1Instruction(instruction ins); static bool IsRexWXInstruction(instruction ins); static bool IsRexW1EvexInstruction(instruction ins); +static bool IsCCMP(instruction ins); +static insCC GetCCFromCCMP(instruction ins); + bool isAvx512Blendv(instruction ins) { return ins == INS_vblendmps || ins == INS_vblendmpd || ins == INS_vpblendmb || ins == INS_vpblendmd || @@ -727,14 +748,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); void emitGetInsCns(const instrDesc* id, CnsVal* cv) const; ssize_t emitGetInsAmdCns(const instrDesc* id, CnsVal* cv) const; @@ -1198,36 +1221,6 @@ void emitIns_BASE_R_R_I(instruction ins, emitAttr attr, regNumber op1Reg, regNum regNumber emitIns_BASE_R_R_RM( instruction ins, emitAttr attr, regNumber targetReg, GenTree* treeNode, GenTree* regOp, GenTree* rmOp); -enum EmitCallType -{ - EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method (call addr with RIP-relative encoding) - EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method (call [addr]/call [rip+addr]) - EC_INDIR_R, // Indirect call via register (call rax) - EC_INDIR_ARD, // Indirect call via an addressing mode (call [rax+rdx*8+disp]) - - EC_COUNT -}; - -// clang-format off -void emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di = DebugInfo(), - regNumber ireg = REG_NA, - regNumber xreg = REG_NA, - unsigned xmul = 0, - ssize_t disp = 0, - bool isJump = false, - bool noSafePoint = false); -// clang-format on - #ifdef TARGET_AMD64 // Is the last instruction emitted a call instruction? bool emitIsLastInsCall(); @@ -1290,7 +1283,7 @@ inline bool HasEmbeddedBroadcast(const instrDesc* id) const // inline bool HasEmbeddedMask(const instrDesc* id) const { - return id->idIsEvexAaaContextSet(); + return id->idIsEvexAaaContextSet() || id->idIsEvexZContextSet(); } inline bool HasHighSIMDReg(const instrDesc* id) const; @@ -1298,4 +1291,9 @@ inline bool HasExtendedGPReg(const instrDesc* id) const; inline bool HasMaskReg(const instrDesc* id) const; +#ifdef TARGET_AMD64 +// true if this 'imm' can be encoded as a input operand to a ccmp instruction +static bool emitIns_valid_imm_for_ccmp(INT64 imm); +#endif // TARGET_AMD64 + #endif // TARGET_XARCH diff --git a/src/coreclr/jit/error.cpp b/src/coreclr/jit/error.cpp index 5ae6cea056ef..afd74afac250 100644 --- a/src/coreclr/jit/error.cpp +++ b/src/coreclr/jit/error.cpp @@ -15,6 +15,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma hdrstop #endif #include "compiler.h" +#include "minipal/log.h" #if MEASURE_FATAL unsigned fatal_badCode; @@ -257,7 +258,7 @@ LogEnv::LogEnv(ICorJitInfo* aCompHnd) } /*****************************************************************************/ -extern "C" void __cdecl assertAbort(const char* why, const char* file, unsigned line) +extern "C" void assertAbort(const char* why, const char* file, unsigned line) { const char* msg = why; LogEnv* env = JitTls::GetLogEnv(); @@ -298,7 +299,7 @@ extern "C" void __cdecl assertAbort(const char* why, const char* file, unsigned // leading to additional asserts, or (2) tell the VM that the AltJit wants to skip this function, // thus falling back to the fallback JIT. Setting DOTNET_AltJitSkipOnAssert=1 chooses this "skip" // to the fallback JIT behavior. This is useful when doing ASM diffs, where we only want to see - // the first assert for any function, but we don't want to kill the whole ngen process on the + // the first assert for any function, but we don't want to kill the whole process on the // first assert (which would happen if you used DOTNET_NoGuiOnAssert=1 for example). if (JitConfig.AltJitSkipOnAssert() != 0) { @@ -318,7 +319,14 @@ int vflogf(FILE* file, const char* fmt, va_list args) // 0-length string means flush if (fmt[0] == '\0') { - fflush(file); + if (file == procstdout()) + { + minipal_log_flush_verbose(); + } + else + { + fflush(file); + } return 0; } @@ -331,8 +339,15 @@ int vflogf(FILE* file, const char* fmt, va_list args) OutputDebugStringA(buffer); } - // We use fputs here so that this executes as fast a possible - fputs(&buffer[0], file); + if (file == procstdout()) + { + minipal_log_write_verbose(buffer); + } + else + { + fputs(&buffer[0], file); + } + return written; } diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 76a9fc04ad60..a88be6113c34 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -89,6 +89,10 @@ void Compiler::fgCreateNewInitBB() block->setBBProfileWeight(entryWeight); } } + else + { + block->inheritWeight(fgFirstBB); + } // The new scratch bb will fall through to the old first bb FlowEdge* const edge = fgAddRefPred(fgFirstBB, block); @@ -368,6 +372,19 @@ void Compiler::fgRemoveEhfSuccessor(BasicBlock* block, const unsigned succIndex) (succCount - succIndex - 1) * sizeof(FlowEdge*)); } + // Recompute the likelihoods of the block's other successor edges. + const weight_t removedLikelihood = succEdge->getLikelihood(); + const unsigned newSuccCount = succCount - 1; + + for (unsigned i = 0; i < newSuccCount; i++) + { + // If we removed all of the flow out of 'block', distribute flow among the remaining edges evenly. + const weight_t currLikelihood = succTab[i]->getLikelihood(); + const weight_t newLikelihood = + (removedLikelihood == 1.0) ? (1.0 / newSuccCount) : (currLikelihood / (1.0 - removedLikelihood)); + succTab[i]->setLikelihood(min(1.0, newLikelihood)); + } + #ifdef DEBUG // We only expect to see a successor once in the table. for (unsigned i = succIndex; i < (succCount - 1); i++) @@ -427,6 +444,19 @@ void Compiler::fgRemoveEhfSuccessor(FlowEdge* succEdge) } } + // Recompute the likelihoods of the block's other successor edges. + const weight_t removedLikelihood = succEdge->getLikelihood(); + const unsigned newSuccCount = succCount - 1; + + for (unsigned i = 0; i < newSuccCount; i++) + { + // If we removed all of the flow out of 'block', distribute flow among the remaining edges evenly. + const weight_t currLikelihood = succTab[i]->getLikelihood(); + const weight_t newLikelihood = + (removedLikelihood == 1.0) ? (1.0 / newSuccCount) : (currLikelihood / (1.0 - removedLikelihood)); + succTab[i]->setLikelihood(min(1.0, newLikelihood)); + } + assert(found); ehfDesc->bbeCount--; } @@ -885,12 +915,14 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed var_types varType = DUMMY_INIT(TYP_UNDEF); // TYP_ type bool typeIsNormed = false; FgStack pushedStack; - const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0; - const bool isInlining = compIsForInlining(); - unsigned retBlocks = 0; - int prefixFlags = 0; - bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan(); - const bool resolveTokens = preciseScan; + const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0; + const bool isInlining = compIsForInlining(); + unsigned retBlocks = 0; + int prefixFlags = 0; + bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan(); + const bool resolveTokens = preciseScan; + bool isReturnsArrayKnown = false; + bool returnsArray = false; // Track offsets where IL instructions begin in DEBUG builds. Used to // validate debug info generated by the JIT. @@ -2411,6 +2443,36 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed retBlocks++; break; + case CEE_NEWARR: + + if (makeInlineObservations) + { + if (!isReturnsArrayKnown) + { + if (info.compRetType == TYP_REF) + { + CORINFO_CLASS_HANDLE retClass = info.compMethodInfo->args.retTypeClass; + if (retClass != NO_CLASS_HANDLE) + { + uint32_t retClassAttribs = info.compCompHnd->getClassAttribs(retClass); + returnsArray = (retClassAttribs & CORINFO_FLG_ARRAY) != 0; + } + } + isReturnsArrayKnown = true; + } + + if (returnsArray && pushedStack.IsStackAtLeastOneDeep()) + { + FgStack::FgSlot slot0 = pushedStack.GetSlot0(); + + if (FgStack::IsConstantOrConstArg(slot0, impInlineInfo)) + { + compInlineResult->Note(InlineObservation::CALLEE_MAY_RETURN_SMALL_ARRAY); + } + } + } + break; + default: break; } @@ -2548,7 +2610,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed // change. The original this (info.compThisArg) then remains // unmodified in the method. fgAddInternal is responsible for // adding the code to copy the initial this into the temp. - +// void Compiler::fgAdjustForAddressExposedOrWrittenThis() { LclVarDsc* thisVarDsc = lvaGetDesc(info.compThisArg); @@ -3425,20 +3487,54 @@ void Compiler::fgFindBasicBlocks() unsigned XTnum; - /* Are there any exception handlers? */ - + // Are there any exception handlers? + // if (info.compXcptnsCount > 0) { - noway_assert(!compIsForInlining()); + assert(!compIsForInlining() || opts.compInlineMethodsWithEH); - /* Check and mark all the exception handlers */ + if (compIsForInlining()) + { + // Verify we can expand the EH table as needed to incorporate the callee's EH clauses. + // Failing here should be extremely rare. + // + EHblkDsc* const dsc = fgTryAddEHTableEntries(0, info.compXcptnsCount, /* deferAdding */ true); + if (dsc == nullptr) + { + compInlineResult->NoteFatal(InlineObservation::CALLSITE_EH_TABLE_FULL); + } + } + // Check and mark all the exception handlers + // for (XTnum = 0; XTnum < info.compXcptnsCount; XTnum++) { CORINFO_EH_CLAUSE clause; info.compCompHnd->getEHinfo(info.compMethodHnd, XTnum, &clause); noway_assert(clause.HandlerLength != (unsigned)-1); + // If we're inlining, and the inlinee has a catch clause, we are currently + // unable to convey the type of the catch properly, as it is represented + // by a token. So, abandon inlining. + // + // TODO: if inlining methods with catches is rare, consider + // transforming class catches into runtime filters like we do in + // fgCreateFiltersForGenericExceptions + // + if (compIsForInlining()) + { + const bool isFinallyFaultOrFilter = + (clause.Flags & (CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_FAULT | CORINFO_EH_CLAUSE_FILTER)) != + 0; + + if (!isFinallyFaultOrFilter) + { + JITDUMP("Inlinee EH clause %u is a catch; we can't inline these (yet)\n", XTnum); + compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH); + return; + } + } + if (clause.TryLength <= 0) { BADCODE("try block length <=0"); @@ -3513,13 +3609,6 @@ void Compiler::fgFindBasicBlocks() return; } - noway_assert(info.compXcptnsCount == 0); - compHndBBtab = impInlineInfo->InlinerCompiler->compHndBBtab; - compHndBBtabAllocCount = - impInlineInfo->InlinerCompiler->compHndBBtabAllocCount; // we probably only use the table, not add to it. - compHndBBtabCount = impInlineInfo->InlinerCompiler->compHndBBtabCount; - info.compXcptnsCount = impInlineInfo->InlinerCompiler->info.compXcptnsCount; - // Use a spill temp for the return value if there are multiple return blocks, // or if the inlinee has GC ref locals. if ((info.compRetNativeType != TYP_VOID) && ((fgReturnCount > 1) || impInlineInfo->HasGcRefLocals())) @@ -3584,8 +3673,6 @@ void Compiler::fgFindBasicBlocks() lvaInlineeReturnSpillTempFreshlyCreated = true; } } - - return; } /* Mark all blocks within 'try' blocks as such */ @@ -3653,6 +3740,7 @@ void Compiler::fgFindBasicBlocks() BADCODE3("end of hnd block beyond end of method for try", " at offset %04X", tryBegOff); } + HBtab->ebdID = impInlineRoot()->compEHID++; HBtab->ebdTryBegOffset = tryBegOff; HBtab->ebdTryEndOffset = tryEndOff; HBtab->ebdFilterBegOffset = filterBegOff; @@ -4046,18 +4134,13 @@ void Compiler::fgFixEntryFlowForOSR() fgRedirectTargetEdge(fgFirstBB, fgOSREntryBB); - // We don't know the right weight for this block, since - // execution of the method was interrupted within the - // loop containing fgOSREntryBB. - // - // A plausible guess might be to sum the non-backedge - // weights of fgOSREntryBB and use those, but we don't - // have edge weights available yet. Note that might be - // an underestimate. - // - // For now we just guess that the loop will execute 100x. - // - fgFirstBB->inheritWeightPercentage(fgOSREntryBB, 1); + fgFirstBB->bbWeight = fgCalledCount; + fgFirstBB->CopyFlags(fgEntryBB, BBF_PROF_WEIGHT); + + if (fgCalledCount == BB_ZERO_WEIGHT) + { + fgFirstBB->bbSetRunRarely(); + } JITDUMP("OSR: redirecting flow at method entry from " FMT_BB " to OSR entry " FMT_BB " for the importer\n", fgFirstBB->bbNum, fgOSREntryBB->bbNum); @@ -4621,8 +4704,7 @@ BasicBlock* Compiler::fgSplitBlockAtEnd(BasicBlock* curr) newBlock->CopyFlags(curr); // Remove flags that the new block can't have. - newBlock->RemoveFlags(BBF_LOOP_HEAD | BBF_FUNCLET_BEG | BBF_KEEP_BBJ_ALWAYS | BBF_PATCHPOINT | - BBF_BACKWARD_JUMP_TARGET | BBF_LOOP_ALIGN); + newBlock->RemoveFlags(BBF_KEEP_BBJ_ALWAYS | BBF_PATCHPOINT | BBF_BACKWARD_JUMP_TARGET | BBF_LOOP_ALIGN); // Remove the GC safe bit on the new block. It seems clear that if we split 'curr' at the end, // such that all the code is left in 'curr', and 'newBlock' just gets the control flow, then @@ -4782,7 +4864,7 @@ BasicBlock* Compiler::fgSplitBlockAfterNode(BasicBlock* curr, GenTree* node) LIR::Range::ReverseIterator riterEnd; for (riter = currBBRange.rbegin(), riterEnd = currBBRange.rend(); riter != riterEnd; ++riter) { - if ((*riter)->gtOper == GT_IL_OFFSET) + if ((*riter)->OperIs(GT_IL_OFFSET)) { GenTreeILOffset* ilOffset = (*riter)->AsILOffset(); DebugInfo rootDI = ilOffset->gtStmtDI.GetRoot(); @@ -4821,7 +4903,9 @@ BasicBlock* Compiler::fgSplitBlockAtBeginning(BasicBlock* curr) if (curr->IsLIR()) { newBlock->SetFirstLIRNode(curr->GetFirstLIRNode()); + newBlock->SetLastLIRNode(curr->GetLastLIRNode()); curr->SetFirstLIRNode(nullptr); + curr->SetLastLIRNode(nullptr); } else { @@ -4883,6 +4967,10 @@ BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ) } newBlock->CopyFlags(curr, succ->GetFlagsRaw() & BBF_BACKWARD_JUMP); + // Async resumption stubs are permitted to branch into EH regions, so if we + // split such a branch we should also copy this flag. + newBlock->CopyFlags(curr, BBF_ASYNC_RESUMPTION); + JITDUMP("Splitting edge from " FMT_BB " to " FMT_BB "; adding " FMT_BB "\n", curr->bbNum, succ->bbNum, newBlock->bbNum); @@ -5027,7 +5115,7 @@ BasicBlock* Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) if (unreachable) { - PREFIX_ASSUME(bPrev != nullptr); + assert(bPrev != nullptr); fgUnreachableBlock(block); @@ -5197,68 +5285,6 @@ void Compiler::fgPrepareCallFinallyRetForRemoval(BasicBlock* block) block->SetKind(BBJ_ALWAYS); } -//------------------------------------------------------------------------ -// fgConnectFallThrough: fix flow from a block that previously had a fall through -// -// Arguments: -// bSrc - source of fall through -// bDst - target of fall through -// -// Returns: -// Newly inserted block after bSrc that jumps to bDst, -// or nullptr if bSrc already falls through to bDst -// -BasicBlock* Compiler::fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst) -{ - assert(bSrc != nullptr); - assert(fgPredsComputed); - BasicBlock* jmpBlk = nullptr; - - /* If bSrc falls through to a block that is not bDst, we will insert a jump to bDst */ - - if (bSrc->KindIs(BBJ_COND) && bSrc->FalseTargetIs(bDst) && !bSrc->NextIs(bDst)) - { - // Add a new block after bSrc which jumps to 'bDst' - jmpBlk = fgNewBBafter(BBJ_ALWAYS, bSrc, true); - FlowEdge* const oldEdge = bSrc->GetFalseEdge(); - - // Access the likelihood of oldEdge before - // it gets reset by SetTargetEdge below. - // - FlowEdge* const newEdge = fgAddRefPred(jmpBlk, bSrc, oldEdge); - fgReplacePred(oldEdge, jmpBlk); - jmpBlk->SetTargetEdge(oldEdge); - assert(jmpBlk->TargetIs(bDst)); - bSrc->SetFalseEdge(newEdge); - - // When adding a new jmpBlk we will set the bbWeight and bbFlags - // - if (fgHaveProfileWeights()) - { - jmpBlk->setBBProfileWeight(newEdge->getLikelyWeight()); - } - else - { - // We set the bbWeight to the smaller of bSrc->bbWeight or bDst->bbWeight - if (bSrc->bbWeight < bDst->bbWeight) - { - jmpBlk->bbWeight = bSrc->bbWeight; - jmpBlk->CopyFlags(bSrc, BBF_RUN_RARELY); - } - else - { - jmpBlk->bbWeight = bDst->bbWeight; - jmpBlk->CopyFlags(bDst, BBF_RUN_RARELY); - } - } - - JITDUMP("Added an unconditional jump to " FMT_BB " after block " FMT_BB "\n", jmpBlk->GetTarget()->bbNum, - bSrc->bbNum); - } - - return jmpBlk; -} - //------------------------------------------------------------------------ // fgRenumberBlocks: update block bbNums to reflect bbNext order // @@ -5553,16 +5579,6 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r #endif // DEBUG - if (UsesFunclets()) - { - bStart->SetFlags(BBF_FUNCLET_BEG); // Mark the start block of the funclet - - if (bMiddle != nullptr) - { - bMiddle->SetFlags(BBF_FUNCLET_BEG); // Also mark the start block of a filter handler as a funclet - } - } - BasicBlock* bNext; bNext = bLast->Next(); @@ -6308,8 +6324,8 @@ BasicBlock* Compiler::fgNewBBinRegion(BBKinds jumpKind, else { noway_assert(tryIndex > 0 || hndIndex > 0); - PREFIX_ASSUME(tryIndex <= compHndBBtabCount); - PREFIX_ASSUME(hndIndex <= compHndBBtabCount); + assert(tryIndex <= compHndBBtabCount); + assert(hndIndex <= compHndBBtabCount); // Decide which region to put in, the "try" region or the "handler" region. if (tryIndex == 0) diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 5809aa3ffb9f..e954d7d8aa2b 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -409,6 +409,12 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, PhasePositi #ifdef DEBUG dumpFunction = JitConfig.JitDumpFg().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args); + dumpFunction |= ((unsigned)JitConfig.JitDumpFgHash() == info.compMethodHash()); + + if (opts.IsTier0()) + { + dumpFunction &= (JitConfig.JitDumpFgTier0() > 0); + } CompAllocator allocator = getAllocatorDebugOnly(); filename = JitConfig.JitDumpFgFile(); @@ -656,6 +662,8 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, PhasePositi // Here are the config values that control it: // DOTNET_JitDumpFg A string (ala the DOTNET_JitDump string) indicating what methods to dump // flowgraphs for. +// DOTNET_JitDumpFgHash Dump flowgraphs for methods with this hash +// DOTNET_JitDumpFgTier0 Dump tier-0 compilations // DOTNET_JitDumpFgDir A path to a directory into which the flowgraphs will be dumped. // DOTNET_JitDumpFgFile The filename to use. The default is "default.[xml|dot]". // Note that the new graphs will be appended to this file if it already exists. @@ -817,10 +825,10 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos) if (displayBlockFlags) { // Don't display the `[` `]` unless we're going to display something. - const bool isTryEntryBlock = bbIsTryBeg(block); + const bool isTryEntryBlock = bbIsTryBeg(block); + const bool isFuncletEntryBlock = fgFuncletsCreated && bbIsFuncletBeg(block); - if (isTryEntryBlock || - block->HasAnyFlag(BBF_FUNCLET_BEG | BBF_RUN_RARELY | BBF_LOOP_HEAD | BBF_LOOP_ALIGN)) + if (isTryEntryBlock || isFuncletEntryBlock || block->HasAnyFlag(BBF_RUN_RARELY | BBF_LOOP_ALIGN)) { // Display a very few, useful, block flags fprintf(fgxFile, " ["); @@ -828,7 +836,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos) { fprintf(fgxFile, "T"); } - if (block->HasFlag(BBF_FUNCLET_BEG)) + if (isFuncletEntryBlock) { fprintf(fgxFile, "F"); } @@ -836,10 +844,6 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos) { fprintf(fgxFile, "R"); } - if (block->HasFlag(BBF_LOOP_HEAD)) - { - fprintf(fgxFile, "L"); - } if (block->HasFlag(BBF_LOOP_ALIGN)) { fprintf(fgxFile, "A"); @@ -901,7 +905,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos) if (condStmt != nullptr) { GenTree* const condTree = condStmt->GetRootNode(); - noway_assert(condTree->gtOper == GT_JTRUE); + noway_assert(condTree->OperIs(GT_JTRUE)); GenTree* const compareTree = condTree->AsOp()->gtOp1; fgDumpTree(fgxFile, compareTree); } @@ -967,10 +971,6 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos) { fprintf(fgxFile, "\n callsNewArr=\"true\""); } - if (block->HasFlag(BBF_LOOP_HEAD)) - { - fprintf(fgxFile, "\n loopHead=\"true\""); - } const char* rootTreeOpName = "n/a"; if (block->IsLIR() || (block->lastStmt() != nullptr)) @@ -2106,15 +2106,6 @@ void Compiler::fgTableDispBasicBlock(const BasicBlock* block, printf(" "); } - if (flags & BBF_FUNCLET_BEG) - { - printf("F "); - } - else - { - printf(" "); - } - int cnt = 0; switch (block->bbCatchTyp) @@ -2573,7 +2564,7 @@ Compiler::fgWalkResult Compiler::fgStress64RsltMulCB(GenTree** pTree, fgWalkData GenTree* tree = *pTree; Compiler* pComp = data->compiler; - if (tree->gtOper != GT_MUL || tree->gtType != TYP_INT || (tree->gtOverflow())) + if (!tree->OperIs(GT_MUL) || !tree->TypeIs(TYP_INT) || (tree->gtOverflow())) { return WALK_CONTINUE; } @@ -2724,7 +2715,15 @@ bool BBPredsChecker::CheckEhTryDsc(BasicBlock* block, BasicBlock* blockPred, EHb return true; } - printf("Jump into the middle of try region: " FMT_BB " branches to " FMT_BB "\n", blockPred->bbNum, block->bbNum); + // Async resumptions are allowed to jump into try blocks at any point. They + // are introduced late enough that the invariant of single entry is no + // longer necessary. + if (blockPred->HasFlag(BBF_ASYNC_RESUMPTION)) + { + return true; + } + + JITDUMP("Jump into the middle of try region: " FMT_BB " branches to " FMT_BB "\n", blockPred->bbNum, block->bbNum); assert(!"Jump into middle of try region"); return false; } @@ -2756,8 +2755,8 @@ bool BBPredsChecker::CheckEhHndDsc(BasicBlock* block, BasicBlock* blockPred, EHb return true; } - printf("Jump into the middle of handler region: " FMT_BB " branches to " FMT_BB "\n", blockPred->bbNum, - block->bbNum); + JITDUMP("Jump into the middle of handler region: " FMT_BB " branches to " FMT_BB "\n", blockPred->bbNum, + block->bbNum); assert(!"Jump into the middle of handler region"); return false; } @@ -2945,8 +2944,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef // if (fgFirstFuncletBB != nullptr) { - assert(fgFirstFuncletBB->hasHndIndex() == true); - assert(fgFirstFuncletBB->HasFlag(BBF_FUNCLET_BEG)); + assert(bbIsFuncletBeg(fgFirstFuncletBB)); } } @@ -3026,7 +3024,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef else if (block->KindIs(BBJ_SWITCH)) { assert((!allNodesLinked || (block->lastNode()->gtNext == nullptr)) && - (block->lastNode()->gtOper == GT_SWITCH || block->lastNode()->gtOper == GT_SWITCH_TABLE)); + block->lastNode()->OperIs(GT_SWITCH, GT_SWITCH_TABLE)); } } @@ -3121,7 +3119,8 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef // A branch or fall-through to a BBJ_CALLFINALLY block must come from the `try` region associated // with the finally block the BBJ_CALLFINALLY is targeting. There is one special case: if the // BBJ_CALLFINALLY is the first block of a `try`, then its predecessor can be outside the `try`: - // either a branch or fall-through to the first block. + // either a branch or fall-through to the first block. Similarly internal resumption blocks for + // async are allowed to do this as they are introduced late enough that we no longer need the invariant. // // Note that this IR condition is a choice. It naturally occurs when importing EH constructs. // This condition prevents flow optimizations from skipping blocks in a `try` and branching @@ -3159,7 +3158,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef } else { - assert(bbInTryRegions(finallyIndex, block)); + assert(bbInTryRegions(finallyIndex, block) || block->HasFlag(BBF_ASYNC_RESUMPTION)); } } } @@ -3288,9 +3287,20 @@ void Compiler::fgDebugCheckTypes(GenTree* tree) assert(!"TYP_ULONG and TYP_UINT are not legal in IR"); } - if (node->OperIs(GT_NOP)) + switch (node->OperGet()) { - assert(node->TypeIs(TYP_VOID) && "GT_NOP should be TYP_VOID."); + case GT_NOP: + case GT_JTRUE: + case GT_BOUNDS_CHECK: + if (!node->TypeIs(TYP_VOID)) + { + m_compiler->gtDispTree(node); + assert(!"The tree is expected to be of TYP_VOID type"); + } + break; + + default: + break; } if (varTypeIsSmall(node)) @@ -3365,6 +3375,7 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) break; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: expectedFlags |= GTF_ORDER_SIDEEFF; break; @@ -3422,9 +3433,9 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) switch (intrinsicId) { #if defined(TARGET_XARCH) - case NI_SSE_StoreFence: - case NI_SSE2_LoadFence: - case NI_SSE2_MemoryFence: + case NI_X86Base_LoadFence: + case NI_X86Base_MemoryFence: + case NI_X86Base_StoreFence: case NI_X86Serialize_Serialize: { assert(tree->OperRequiresAsgFlag()); @@ -3433,15 +3444,21 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) } case NI_X86Base_Pause: - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { assert(tree->OperRequiresCallFlag(this)); expectedFlags |= GTF_GLOB_REF; break; } + + case NI_Vector128_op_Division: + case NI_Vector256_op_Division: + { + break; + } #endif // TARGET_XARCH #if defined(TARGET_ARM64) @@ -3450,10 +3467,10 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) case NI_Sve_GatherPrefetch32Bit: case NI_Sve_GatherPrefetch64Bit: case NI_Sve_GatherPrefetch8Bit: - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: case NI_Sve_GetFfrByte: case NI_Sve_GetFfrInt16: case NI_Sve_GetFfrInt32: @@ -3508,7 +3525,7 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) // void Compiler::fgDebugCheckDispFlags(GenTree* tree, GenTreeFlags dispFlags, GenTreeDebugFlags debugFlags) { - if (tree->OperGet() == GT_IND) + if (tree->OperIs(GT_IND)) { printf("%c", (dispFlags & GTF_IND_INVARIANT) ? '#' : '-'); printf("%c", (dispFlags & GTF_IND_NONFAULTING) ? 'n' : '-'); @@ -3621,16 +3638,20 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, Statement* stmt) if (tree->OperIsLeaf()) { - if (tree->gtOper == GT_CATCH_ARG) + if (tree->OperIs(GT_CATCH_ARG)) { // The GT_CATCH_ARG should always have GTF_ORDER_SIDEEFF set noway_assert(tree->gtFlags & GTF_ORDER_SIDEEFF); // The GT_CATCH_ARG has to be the first thing evaluated noway_assert(stmt == block->FirstNonPhiDef()); - noway_assert(stmt->GetTreeList()->gtOper == GT_CATCH_ARG); + noway_assert(stmt->GetTreeList()->OperIs(GT_CATCH_ARG)); // The root of the tree should have GTF_ORDER_SIDEEFF set noway_assert(stmt->GetRootNode()->gtFlags & GTF_ORDER_SIDEEFF); } + else if (tree->OperIs(GT_ASYNC_CONTINUATION)) + { + assert(tree->gtFlags & GTF_ORDER_SIDEEFF); + } } if (tree->OperIsUnary() && tree->AsOp()->gtOp1) @@ -4732,7 +4753,12 @@ void Compiler::fgDebugCheckLoops() loop->VisitRegularExitBlocks([=](BasicBlock* exit) { for (BasicBlock* pred : exit->PredBlocks()) { - assert(loop->ContainsBlock(pred)); + if (!loop->ContainsBlock(pred)) + { + JITDUMP("Loop " FMT_LP " exit " FMT_BB " has non-loop predecessor " FMT_BB "\n", + loop->GetIndex(), exit->bbNum, pred->bbNum); + assert(!"Loop exit has non-loop predecessor"); + } } return BasicBlockVisit::Continue; }); diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 7ea85224a863..5ecd9936d8b4 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -112,7 +112,7 @@ PhaseStatus Compiler::fgRemoveEmptyFinally() { GenTree* stmtExpr = stmt->GetRootNode(); - if (stmtExpr->gtOper != GT_RETFILT) + if (!stmtExpr->OperIs(GT_RETFILT)) { isEmpty = false; break; @@ -292,6 +292,8 @@ void Compiler::fgUpdateACDsBeforeEHTableEntryRemoval(unsigned XTnum) return; } + JITDUMP("\nUpdating ACDs before removing EH#%u\n", XTnum); + EHblkDsc* const ebd = ehGetDsc(XTnum); AddCodeDscMap* const map = fgGetAddCodeDscMap(); for (AddCodeDsc* const add : AddCodeDscMap::ValueIteration(map)) @@ -411,6 +413,8 @@ void Compiler::fgUpdateACDsBeforeEHTableEntryRemoval(unsigned XTnum) JITDUMPEXEC(add->Dump()); } } + + JITDUMP("... done updating ACDs\n"); } //------------------------------------------------------------------------ @@ -690,7 +694,9 @@ PhaseStatus Compiler::fgRemoveEmptyTry() // Handler index of any nested blocks will update when we // remove the EH table entry. Change handler exits to jump to // the continuation. Clear catch type on handler entry. - // Decrement nesting level of enclosed GT_END_LFINs. + // + // GT_END_LFIN no longer need updates here, now their gtVal1 fields refer to EH IDs. + // for (BasicBlock* const block : Blocks(firstHandlerBlock, lastHandlerBlock)) { if (block == firstHandlerBlock) @@ -713,7 +719,7 @@ PhaseStatus Compiler::fgRemoveEmptyTry() { Statement* finallyRet = block->lastStmt(); GenTree* finallyRetExpr = finallyRet->GetRootNode(); - assert(finallyRetExpr->gtOper == GT_RETFILT); + assert(finallyRetExpr->OperIs(GT_RETFILT)); fgRemoveStmt(block, finallyRet); FlowEdge* const newEdge = fgAddRefPred(continuation, block); block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); @@ -725,25 +731,6 @@ PhaseStatus Compiler::fgRemoveEmptyTry() } } } - -#if defined(FEATURE_EH_WINDOWS_X86) - if (!UsesFunclets()) - { - // If we're in a non-funclet model, decrement the nesting - // level of any GT_END_LFIN we find in the handler region, - // since we're removing the enclosing handler. - for (Statement* const stmt : block->Statements()) - { - GenTree* expr = stmt->GetRootNode(); - if (expr->gtOper == GT_END_LFIN) - { - const size_t nestLevel = expr->AsVal()->gtVal1; - assert(nestLevel > 0); - expr->AsVal()->gtVal1 = nestLevel - 1; - } - } - } -#endif // FEATURE_EH_WINDOWS_X86 } // (6) Update any impacted ACDs. @@ -764,8 +751,10 @@ PhaseStatus Compiler::fgRemoveEmptyTry() assert(firstHandlerBlock->bbRefs >= 2); firstHandlerBlock->bbRefs -= 1; - // (8) The old try entry no longer needs special protection. + // (8) The old try/handler entries no longer need special protection. firstTryBlock->RemoveFlags(BBF_DONT_REMOVE); + assert(!bbIsHandlerBeg(firstHandlerBlock)); + firstHandlerBlock->RemoveFlags(BBF_DONT_REMOVE); // Another one bites the dust... emptyCount++; @@ -1489,9 +1478,6 @@ PhaseStatus Compiler::fgCloneFinally() { // Mark the block as the start of the cloned finally. newBlock->SetFlags(BBF_CLONED_FINALLY_BEGIN); - - // Cloned finally entry block does not need any special protection. - newBlock->RemoveFlags(BBF_DONT_REMOVE); } if (block == lastBlock) @@ -1500,6 +1486,7 @@ PhaseStatus Compiler::fgCloneFinally() newBlock->SetFlags(BBF_CLONED_FINALLY_END); } + // Cloned finally block does not need any special protection. newBlock->RemoveFlags(BBF_DONT_REMOVE); // Make sure clone block state hasn't munged the try region. @@ -1533,7 +1520,7 @@ PhaseStatus Compiler::fgCloneFinally() { Statement* finallyRet = newBlock->lastStmt(); GenTree* finallyRetExpr = finallyRet->GetRootNode(); - assert(finallyRetExpr->gtOper == GT_RETFILT); + assert(finallyRetExpr->OperIs(GT_RETFILT)); fgRemoveStmt(newBlock, finallyRet); FlowEdge* const newEdge = fgAddRefPred(normalCallFinallyReturn, newBlock); @@ -1661,27 +1648,28 @@ PhaseStatus Compiler::fgCloneFinally() for (BasicBlock* const block : Blocks(firstBlock, lastBlock)) { - if (block->hasProfileWeight()) - { - weight_t const blockWeight = block->bbWeight; - block->setBBProfileWeight(blockWeight * originalScale); - JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", block->bbNum, block->bbWeight); + weight_t const blockWeight = block->bbWeight; + block->setBBProfileWeight(blockWeight * originalScale); + JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", block->bbNum, block->bbWeight); - BasicBlock* const clonedBlock = blockMap[block]; - clonedBlock->setBBProfileWeight(blockWeight * clonedScale); - JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", clonedBlock->bbNum, clonedBlock->bbWeight); - } + BasicBlock* const clonedBlock = blockMap[block]; + clonedBlock->setBBProfileWeight(blockWeight * clonedScale); + JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", clonedBlock->bbNum, clonedBlock->bbWeight); + } + + if (!retargetedAllCalls) + { + JITDUMP( + "Reduced flow out of EH%u needs to be propagated to continuation block(s). Data %s inconsistent.\n", + XTnum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } } // Update flow into normalCallFinallyReturn if (normalCallFinallyReturn->hasProfileWeight()) { - normalCallFinallyReturn->bbWeight = BB_ZERO_WEIGHT; - for (FlowEdge* const predEdge : normalCallFinallyReturn->PredEdges()) - { - normalCallFinallyReturn->increaseBBProfileWeight(predEdge->getLikelyWeight()); - } + normalCallFinallyReturn->setBBProfileWeight(normalCallFinallyReturn->computeIncomingWeight()); } // Done! @@ -1901,17 +1889,30 @@ void Compiler::fgCleanupContinuation(BasicBlock* continuation) // Remove the GT_END_LFIN from the continuation, // Note we only expect to see one such statement. + // bool foundEndLFin = false; + bool isEmpty = true; for (Statement* const stmt : continuation->Statements()) { + isEmpty = false; GenTree* expr = stmt->GetRootNode(); - if (expr->gtOper == GT_END_LFIN) + if (expr->OperIs(GT_END_LFIN)) { assert(!foundEndLFin); fgRemoveStmt(continuation, stmt); foundEndLFin = true; } } + + // If the continuation is unreachable, morph may + // have changed the continuation to an empty BBJ_THROW. + // Tolerate. + // + if (isEmpty && continuation->KindIs(BBJ_THROW)) + { + return; + } + assert(foundEndLFin); } #endif // FEATURE_EH_WINDOWS_X86 @@ -2202,33 +2203,13 @@ bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block, // if (block->hasProfileWeight()) { - // Add weight to the canonical call finally pair. + // Add weight to the canonical call-finally. // - weight_t const canonicalWeight = - canonicalCallFinally->hasProfileWeight() ? canonicalCallFinally->bbWeight : BB_ZERO_WEIGHT; - weight_t const newCanonicalWeight = block->bbWeight + canonicalWeight; - - canonicalCallFinally->setBBProfileWeight(newCanonicalWeight); - - BasicBlock* const canonicalLeaveBlock = canonicalCallFinally->Next(); - - weight_t const canonicalLeaveWeight = - canonicalLeaveBlock->hasProfileWeight() ? canonicalLeaveBlock->bbWeight : BB_ZERO_WEIGHT; - weight_t const newLeaveWeight = block->bbWeight + canonicalLeaveWeight; - - canonicalLeaveBlock->setBBProfileWeight(newLeaveWeight); + canonicalCallFinally->increaseBBProfileWeight(block->bbWeight); - // Remove weight from the old call finally pair. + // Remove weight from the old call-finally. // - if (callFinally->hasProfileWeight()) - { - callFinally->decreaseBBProfileWeight(block->bbWeight); - } - - if (leaveBlock->hasProfileWeight()) - { - leaveBlock->decreaseBBProfileWeight(block->bbWeight); - } + callFinally->decreaseBBProfileWeight(block->bbWeight); } return true; @@ -2697,8 +2678,8 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, if (bbIsTryBeg(block)) { assert(added); - JITDUMP("==> found try entry for EH#%02u nested in handler at " FMT_BB "\n", block->bbNum, - block->getTryIndex()); + JITDUMP("==> found try entry for EH#%02u nested in handler at " FMT_BB "\n", block->getTryIndex(), + block->bbNum); regionsToProcess.Push(block->getTryIndex()); } } @@ -2736,7 +2717,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, break; } outermostEbd = ehGetDsc(enclosingTryIndex); - if (!EHblkDsc::ebdIsSameILTry(outermostEbd, tryEbd)) + if (!EHblkDsc::ebdIsSameTry(outermostEbd, tryEbd)) { break; } @@ -2778,6 +2759,12 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, assert(insertBeforeIndex == enclosingTryIndex); } + if (insertBeforeIndex != compHndBBtabCount) + { + JITDUMP("Existing EH region(s) EH#%02u...EH#%02u will become EH#%02u...EH#%02u\n", insertBeforeIndex, + compHndBBtabCount - 1, insertBeforeIndex + regionCount, compHndBBtabCount + regionCount - 1); + } + // Once we call fgTryAddEHTableEntries with deferCloning = false, // all the EH indicies at or above insertBeforeIndex will shift, // and the EH table may reallocate. @@ -2870,12 +2857,14 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, compHndBBtab[XTnum] = compHndBBtab[originalXTnum]; EHblkDsc* const ebd = &compHndBBtab[XTnum]; + ebd->ebdID = impInlineRoot()->compEHID++; + // Note the outermost region enclosing indices stay the same, because the original // clause entries got adjusted when we inserted the new clauses. // if (ebd->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) { - if (XTnum < clonedOutermostRegionIndex) + if (ebd->ebdEnclosingTryIndex < clonedOutermostRegionIndex) { ebd->ebdEnclosingTryIndex += (unsigned short)indexShift; } @@ -2888,7 +2877,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, if (ebd->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) { - if (XTnum < clonedOutermostRegionIndex) + if (ebd->ebdEnclosingHndIndex < clonedOutermostRegionIndex) { ebd->ebdEnclosingHndIndex += (unsigned short)indexShift; } @@ -3028,6 +3017,22 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, newBlock->bbRefs++; } } + +#if defined(FEATURE_EH_WINDOWS_X86) + // Update the EH ID for any cloned GT_END_LFIN. + // + for (Statement* const stmt : newBlock->Statements()) + { + GenTree* const rootNode = stmt->GetRootNode(); + if (rootNode->OperIs(GT_END_LFIN)) + { + GenTreeVal* const endNode = rootNode->AsVal(); + EHblkDsc* const oldEbd = ehFindEHblkDscById((unsigned short)endNode->gtVal1); + EHblkDsc* const newEbd = oldEbd + indexShift; + endNode->gtVal1 = newEbd->ebdID; + } + } +#endif } JITDUMP("Done fixing region indices\n"); diff --git a/src/coreclr/jit/fgflow.cpp b/src/coreclr/jit/fgflow.cpp index 85c3c560b6ba..730c77cd74be 100644 --- a/src/coreclr/jit/fgflow.cpp +++ b/src/coreclr/jit/fgflow.cpp @@ -287,7 +287,7 @@ FlowEdge* Compiler::fgRemoveAllRefPreds(BasicBlock* block, BasicBlock* blockPred // void Compiler::fgRemoveBlockAsPred(BasicBlock* block) { - PREFIX_ASSUME(block != nullptr); + assert(block != nullptr); switch (block->GetKind()) { diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index d66609dc788b..dce176b08b34 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -278,11 +278,11 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorData(); - if (value->OperGet() == GT_COMMA) + if (value->OperIs(GT_COMMA)) { GenTree* effectiveValue = value->gtEffectiveVal(); - noway_assert(!varTypeIsStruct(effectiveValue) || (effectiveValue->OperGet() != GT_RET_EXPR) || + noway_assert(!varTypeIsStruct(effectiveValue) || !effectiveValue->OperIs(GT_RET_EXPR) || !effectiveValue->AsRetExpr()->gtInlineCandidate->HasMultiRegRetVal()); } } @@ -357,37 +357,60 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorgtFoldExpr(inlineCandidate); - var_types retType = tree->TypeGet(); + inlineCandidate = m_compiler->gtFoldExpr(inlineCandidate); -#ifdef DEBUG - if (m_compiler->verbose) + // If this use is an unused ret expr, is the first child of a comma, the return value is ignored. + // Extract any side effects. + // + if ((parent != nullptr) && parent->OperIs(GT_COMMA) && (parent->gtGetOp1() == *use)) { - printf("\nReplacing the return expression placeholder "); - Compiler::printTreeID(tree); - printf(" with "); - Compiler::printTreeID(inlineCandidate); - printf("\n"); - // Dump out the old return expression placeholder it will be overwritten by the ReplaceWith below - m_compiler->gtDispTree(tree); - } -#endif // DEBUG + JITDUMP("\nReturn expression placeholder [%06u] value [%06u] unused\n", m_compiler->dspTreeID(tree), + m_compiler->dspTreeID(inlineCandidate)); - var_types newType = inlineCandidate->TypeGet(); + GenTree* sideEffects = nullptr; + m_compiler->gtExtractSideEffList(inlineCandidate, &sideEffects); - // If we end up swapping type we may need to retype the tree: - if (retType != newType) + if (sideEffects == nullptr) + { + JITDUMP("\nInline return expression had no side effects\n"); + (*use)->gtBashToNOP(); + } + else + { + JITDUMP("\nInserting the inline return expression side effects\n"); + JITDUMPEXEC(m_compiler->gtDispTree(sideEffects)); + JITDUMP("\n"); + *use = sideEffects; + } + } + else { - if ((retType == TYP_BYREF) && (tree->OperGet() == GT_IND)) + JITDUMP("\nReplacing the return expression placeholder [%06u] with [%06u]\n", + m_compiler->dspTreeID(tree), m_compiler->dspTreeID(inlineCandidate)); + JITDUMPEXEC(m_compiler->gtDispTree(tree)); + + var_types retType = tree->TypeGet(); + var_types newType = inlineCandidate->TypeGet(); + + // If we end up swapping type we may need to retype the tree: + if (retType != newType) { - // - in an RVA static if we've reinterpreted it as a byref; - assert(newType == TYP_I_IMPL); - JITDUMP("Updating type of the return GT_IND expression to TYP_BYREF\n"); - inlineCandidate->gtType = TYP_BYREF; + if ((retType == TYP_BYREF) && tree->OperIs(GT_IND)) + { + // - in an RVA static if we've reinterpreted it as a byref; + assert(newType == TYP_I_IMPL); + JITDUMP("Updating type of the return GT_IND expression to TYP_BYREF\n"); + inlineCandidate->gtType = TYP_BYREF; + } } + + JITDUMP("\nInserting the inline return expression\n"); + JITDUMPEXEC(m_compiler->gtDispTree(inlineCandidate)); + JITDUMP("\n"); + + *use = inlineCandidate; } - *use = inlineCandidate; m_madeChanges = true; if (inlineeBB != nullptr) @@ -396,15 +419,6 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorcompCurBB->CopyFlags(inlineeBB, BBF_COPY_PROPAGATE); } - -#ifdef DEBUG - if (m_compiler->verbose) - { - printf("\nInserting the inline return expression\n"); - m_compiler->gtDispTree(inlineCandidate); - printf("\n"); - } -#endif // DEBUG } // If the inline was rejected and returns a retbuffer, then mark that @@ -530,6 +544,32 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorIsDevirtualizationCandidate(m_compiler)); + if (call->IsVirtual()) + { + return call->gtCallMethHnd; + } + else + { + GenTree* runtimeMethHndNode = + call->gtCallAddr->AsCall()->gtArgs.FindWellKnownArg(WellKnownArg::RuntimeMethodHandle)->GetNode(); + assert(runtimeMethHndNode != nullptr); + switch (runtimeMethHndNode->OperGet()) + { + case GT_RUNTIMELOOKUP: + return runtimeMethHndNode->AsRuntimeLookup()->GetMethodHandle(); + case GT_CNS_INT: + return CORINFO_METHOD_HANDLE(runtimeMethHndNode->AsIntCon()->IconValue()); + default: + assert(!"Unexpected type in RuntimeMethodHandle arg."); + return nullptr; + } + return nullptr; + } + } + //------------------------------------------------------------------------ // LateDevirtualization: re-examine calls after inlining to see if we // can do more devirtualization @@ -566,14 +606,15 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorOperGet() == GT_NOP); + assert((parent != nullptr) && parent->OperIs(GT_NOP)); return; } - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { - GenTreeCall* call = tree->AsCall(); - bool tryLateDevirt = call->IsVirtual() && (call->gtCallType == CT_USER_FUNC); + GenTreeCall* call = tree->AsCall(); + // TODO-CQ: Drop `call->gtCallType == CT_USER_FUNC` once we have GVM devirtualization + bool tryLateDevirt = call->IsDevirtualizationCandidate(m_compiler) && (call->gtCallType == CT_USER_FUNC); #ifdef DEBUG tryLateDevirt = tryLateDevirt && (JitConfig.JitEnableLateDevirtualization() == 1); @@ -589,34 +630,26 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorgtCallMethHnd; + CORINFO_CONTEXT_HANDLE context = call->gtLateDevirtualizationInfo->exactContextHnd; + InlineContext* inlinersContext = call->gtLateDevirtualizationInfo->inlinersContext; + CORINFO_METHOD_HANDLE method = GetMethodHandle(call); unsigned methodFlags = 0; const bool isLateDevirtualization = true; const bool explicitTailCall = call->IsTailPrefixedCall(); - if ((call->gtCallMoreFlags & GTF_CALL_M_HAS_LATE_DEVIRT_INFO) != 0) - { - context = call->gtLateDevirtualizationInfo->exactContextHnd; - // Note: we might call this multiple times for the same trees. - // If the devirtualization below succeeds, the call becomes - // non-virtual and we won't get here again. If it does not - // succeed we might get here again so we keep the late devirt - // info. - } - CORINFO_CONTEXT_HANDLE contextInput = context; context = nullptr; m_compiler->impDevirtualizeCall(call, nullptr, &method, &methodFlags, &contextInput, &context, isLateDevirtualization, explicitTailCall); - if (!call->IsVirtual()) + if (!call->IsDevirtualizationCandidate(m_compiler)) { assert(context != nullptr); + assert(inlinersContext != nullptr); CORINFO_CALL_INFO callInfo = {}; callInfo.hMethod = method; callInfo.methodFlags = methodFlags; - m_compiler->impMarkInlineCandidate(call, context, false, &callInfo); + m_compiler->impMarkInlineCandidate(call, context, false, &callInfo, inlinersContext); if (call->IsInlineCandidate()) { @@ -652,9 +685,6 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorGetSingleInlineCandidateInfo()->exactContextHandle = context; - INDEBUG(call->GetSingleInlineCandidateInfo()->inlinersContext = call->gtInlineContext); - JITDUMP("New inline candidate due to late devirtualization:\n"); DISPTREE(call); } @@ -669,7 +699,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorTypeGet() == TYP_REF) + if (tree->TypeIs(TYP_REF)) { LclVarDsc* lcl = m_compiler->lvaGetDesc(lclNum); @@ -697,14 +727,14 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorOperGet() == GT_JTRUE) + else if (tree->OperIs(GT_JTRUE)) { // See if this jtrue is now foldable. BasicBlock* block = m_compiler->compCurBB; GenTree* condTree = tree->AsOp()->gtOp1; assert(tree == block->lastStmt()->GetRootNode()); - if (condTree->OperGet() == GT_CNS_INT) + if (condTree->OperIs(GT_CNS_INT)) { JITDUMP(" ... found foldable jtrue at [%06u] in " FMT_BB "\n", m_compiler->dspTreeID(tree), block->bbNum); @@ -792,6 +822,17 @@ PhaseStatus Compiler::fgInline() Metrics.ProfileConsistentBeforeInline++; } + if (!fgHaveProfileWeights()) + { + JITDUMP("INLINER: no pgo data\n"); + } + else + { + JITDUMP("INLINER: pgo source is %s; pgo data is %sconsistent; %strusted; %ssufficient\n", + compGetPgoSourceName(), fgPgoConsistent ? "" : "not ", fgHaveTrustedProfileWeights() ? "" : "not ", + fgHaveSufficientProfileWeights() ? "" : "not "); + } + noway_assert(fgFirstBB != nullptr); BasicBlock* block = fgFirstBB; @@ -863,8 +904,7 @@ PhaseStatus Compiler::fgInline() // See if stmt is of the form GT_COMMA(call, nop) // If yes, we can get rid of GT_COMMA. - if (expr->OperGet() == GT_COMMA && expr->AsOp()->gtOp1->OperGet() == GT_CALL && - expr->AsOp()->gtOp2->OperGet() == GT_NOP) + if (expr->OperIs(GT_COMMA) && expr->AsOp()->gtOp1->OperIs(GT_CALL) && expr->AsOp()->gtOp2->OperIs(GT_NOP)) { madeChanges = true; stmt->SetRootNode(expr->AsOp()->gtOp1); @@ -1046,6 +1086,19 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, // Don't expect any surprises here. assert(result->IsCandidate()); +#if defined(DEBUG) + // Fail if we're inlining and we've reached the acceptance limit. + // + int limit = JitConfig.JitInlineLimit(); + unsigned current = m_inlineStrategy->GetInlineCount(); + + if ((limit >= 0) && (current >= static_cast(limit))) + { + result->NoteFatal(InlineObservation::CALLSITE_OVER_INLINE_LIMIT); + return; + } +#endif // defined(DEBUG) + if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING) { // For now, attributing this to call site, though it's really @@ -1070,6 +1123,14 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, return; } + if (call->IsAsync() && info.compUsesAsyncContinuation) + { + // Currently not supported. Could provide a nice perf benefit for + // Task -> runtime async thunks if we supported it. + result->NoteFatal(InlineObservation::CALLER_ASYNC_USED_CONTINUATION); + return; + } + // impMarkInlineCandidate() is expected not to mark tail prefixed calls // and recursive tail calls as inline candidates. noway_assert(!call->IsTailPrefixedCall()); @@ -1146,7 +1207,7 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTree** pTree, fgWalkData* data) { GenTree* tree = *pTree; - if (tree->gtOper == GT_CALL) + if (tree->OperIs(GT_CALL)) { Compiler* compiler = data->compiler; Statement* stmt = (Statement*)data->pCallbackData; @@ -1176,7 +1237,7 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call) return; } - InlineResult inlineResult(this, call, nullptr, "fgNoteNonInlineCandidate", false); + InlineResult inlineResult(this, call, nullptr, "fgNoteNonInlineCandidate", true); InlineObservation currentObservation = InlineObservation::CALLSITE_NOT_CANDIDATE; // Try and recover the reason left behind when the jit decided @@ -1209,13 +1270,13 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call) Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTree** pTree, fgWalkData* data) { GenTree* tree = *pTree; - if (tree->gtOper == GT_CALL) + if (tree->OperIs(GT_CALL)) { assert((tree->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0); } else { - assert(tree->gtOper != GT_RET_EXPR); + assert(!tree->OperIs(GT_RET_EXPR)); } return WALK_CONTINUE; @@ -1225,7 +1286,7 @@ Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTree** pTree, f void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineResult, InlineContext** createdContext) { - noway_assert(call->gtOper == GT_CALL); + noway_assert(call->OperIs(GT_CALL)); noway_assert(call->IsInlineCandidate()); noway_assert(opts.OptEnabled(CLFLG_INLINING)); @@ -1469,7 +1530,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) noway_assert(iciBlock->bbStmtList != nullptr); noway_assert(iciStmt->GetRootNode() != nullptr); assert(iciStmt->GetRootNode() == iciCall); - noway_assert(iciCall->gtOper == GT_CALL); + noway_assert(iciCall->OperIs(GT_CALL)); #ifdef DEBUG @@ -1506,7 +1567,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) { // When fgBBCount is 1 we will always have a non-NULL fgFirstBB // - PREFAST_ASSUME(InlineeCompiler->fgFirstBB != nullptr); + assert(InlineeCompiler->fgFirstBB != nullptr); // DDB 91389: Don't throw away the (only) inlinee block // when its return type is not BBJ_RETURN. @@ -1579,13 +1640,179 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // bottomBlock->RemoveFlags(BBF_DONT_REMOVE); + // If the inlinee has EH, merge the EH tables, and figure out how much of + // a shift we need to make in the inlinee blocks EH indicies. + // + unsigned const inlineeRegionCount = InlineeCompiler->compHndBBtabCount; + const bool inlineeHasEH = inlineeRegionCount > 0; + unsigned inlineeIndexShift = 0; + + if (inlineeHasEH) + { + // If the call site also has EH, we need to insert the inlinee clauses + // so they are a child of the call site's innermost enclosing region. + // Figure out what this is. + // + bool inTryRegion = false; + unsigned const enclosingRegion = ehGetMostNestedRegionIndex(iciBlock, &inTryRegion); + + // We will insert the inlinee clauses in bulk before this index. + // + unsigned insertBeforeIndex = 0; + + if (enclosingRegion == 0) + { + // The call site is not in an EH region, so we can put the inlinee EH clauses + // at the end of root method's the EH table. + // + // For example, if the root method already has EH#0, and the inlinee has 2 regions + // + // enclosingRegion will be 0 + // inlineeIndexShift will be 1 + // insertBeforeIndex will be 1 + // + // inlinee eh0 -> eh1 + // inlinee eh1 -> eh2 + // + // root eh0 -> eh0 + // + inlineeIndexShift = compHndBBtabCount; + insertBeforeIndex = compHndBBtabCount; + } + else + { + // The call site is in an EH region, so we can put the inlinee EH clauses + // just before the enclosing region + // + // Note enclosingRegion is region index + 1. So EH#0 will be represented by 1 here. + // + // For example, if the enclosing EH regions are try#2 and hnd#3, and the inlinee has 2 eh clauses + // + // enclosingRegion will be 3 (try2 + 1) + // inlineeIndexShift will be 2 + // insertBeforeIndex will be 2 + // + // inlinee eh0 -> eh2 + // inlinee eh1 -> eh3 + // + // root eh0 -> eh0 + // root eh1 -> eh1 + // + // root eh2 -> eh4 + // root eh3 -> eh5 + // + inlineeIndexShift = enclosingRegion - 1; + insertBeforeIndex = enclosingRegion - 1; + } + + JITDUMP( + "Inlinee has EH. In root method, inlinee's %u EH region indices will shift by %u and become EH#%02u ... EH#%02u (%p)\n", + inlineeRegionCount, inlineeIndexShift, insertBeforeIndex, insertBeforeIndex + inlineeRegionCount - 1, + &inlineeIndexShift); + + if (enclosingRegion != 0) + { + JITDUMP("Inlinee is nested within current %s EH#%02u (which will become EH#%02u)\n", + inTryRegion ? "try" : "hnd", enclosingRegion - 1, enclosingRegion - 1 + inlineeRegionCount); + } + else + { + JITDUMP("Inlinee is not nested inside any EH region\n"); + } + + // Grow the EH table. + // + // TODO: verify earlier that this won't fail... + // + EHblkDsc* const outermostEbd = + fgTryAddEHTableEntries(insertBeforeIndex, inlineeRegionCount, /* deferAdding */ false); + assert(outermostEbd != nullptr); + + // fgTryAddEHTableEntries has adjusted the indices of all root method blocks and EH clauses + // to accommodate the new entries. No other changes to those are needed. + // + // We just need to add in and fix up the new entries from the inlinee. + // + // Fetch the new enclosing try/handler table indicies. + // + const unsigned enclosingTryIndex = + iciBlock->hasTryIndex() ? iciBlock->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; + const unsigned enclosingHndIndex = + iciBlock->hasHndIndex() ? iciBlock->getHndIndex() : EHblkDsc::NO_ENCLOSING_INDEX; + + // Copy over the EH table entries from inlinee->root and adjust their enclosing indicies. + // + for (unsigned XTnum = 0; XTnum < inlineeRegionCount; XTnum++) + { + unsigned newXTnum = XTnum + inlineeIndexShift; + compHndBBtab[newXTnum] = InlineeCompiler->compHndBBtab[XTnum]; + EHblkDsc* const ebd = &compHndBBtab[newXTnum]; + + if (ebd->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + ebd->ebdEnclosingTryIndex += (unsigned short)inlineeIndexShift; + } + else + { + ebd->ebdEnclosingTryIndex = (unsigned short)enclosingTryIndex; + } + + if (ebd->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + ebd->ebdEnclosingHndIndex += (unsigned short)inlineeIndexShift; + } + else + { + ebd->ebdEnclosingHndIndex = (unsigned short)enclosingHndIndex; + } + } + } + + // Fetch the new enclosing try/handler indicies for blocks. + // Note these are represented differently than the EH table indices. + // + const unsigned blockEnclosingTryIndex = iciBlock->hasTryIndex() ? iciBlock->getTryIndex() + 1 : 0; + const unsigned blockEnclosingHndIndex = iciBlock->hasHndIndex() ? iciBlock->getHndIndex() + 1 : 0; + // Set the try and handler index and fix the jump types of inlinee's blocks. // for (BasicBlock* const block : InlineeCompiler->Blocks()) { - noway_assert(!block->hasTryIndex()); - noway_assert(!block->hasHndIndex()); - block->copyEHRegion(iciBlock); + if (block->hasTryIndex()) + { + JITDUMP("Inlinee " FMT_BB " has old try index %u, shift %u, new try index %u\n", block->bbNum, + (unsigned)block->bbTryIndex, inlineeIndexShift, + (unsigned)(block->bbTryIndex + inlineeIndexShift)); + block->bbTryIndex += (unsigned short)inlineeIndexShift; + } + else + { + block->bbTryIndex = (unsigned short)blockEnclosingTryIndex; + } + + if (block->hasHndIndex()) + { + block->bbHndIndex += (unsigned short)inlineeIndexShift; + } + else + { + block->bbHndIndex = (unsigned short)blockEnclosingHndIndex; + } + + // Sanity checks + // + if (iciBlock->hasTryIndex()) + { + assert(block->hasTryIndex()); + assert(block->getTryIndex() <= iciBlock->getTryIndex()); + } + + if (iciBlock->hasHndIndex()) + { + assert(block->hasHndIndex()); + assert(block->getHndIndex() <= iciBlock->getHndIndex()); + } + block->CopyFlags(iciBlock, BBF_BACKWARD_JUMP | BBF_PROF_WEIGHT); // Update block nums appropriately @@ -1661,13 +1888,18 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) info.compNeedsConsecutiveRegisters |= InlineeCompiler->info.compNeedsConsecutiveRegisters; #endif - // If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler. - // TODO-CQ: Implement hot/cold splitting of methods with switch tables. - if (InlineeCompiler->fgHasSwitch && opts.compProcedureSplitting) + if (InlineeCompiler->fgHasSwitch) { - opts.compProcedureSplitting = false; - JITDUMP("Turning off procedure splitting for this method, as inlinee compiler encountered switch tables; " - "implementation limitation.\n"); + fgHasSwitch = true; + + // If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler. + // TODO-CQ: Implement hot/cold splitting of methods with switch tables. + if (opts.compProcedureSplitting) + { + opts.compProcedureSplitting = false; + JITDUMP("Turning off procedure splitting for this method, as inlinee compiler encountered switch tables; " + "implementation limitation.\n"); + } } #ifdef FEATURE_SIMD @@ -1769,9 +2001,6 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // If the call site is not in a try and the callee has a throw, // we may introduce inconsistency. // - // Technically we should check if the callee has a throw not in a try, but since - // we can't inline methods with EH yet we don't see those. - // if (InlineeCompiler->fgThrowCount > 0) { JITDUMP("INLINER: may-throw inlinee\n"); @@ -1855,7 +2084,7 @@ void Compiler::fgInsertInlineeArgument( { // Change the temp in-place to the actual argument. // We currently do not support this for struct arguments, so it must not be a GT_BLK. - assert(argNode->gtOper != GT_BLK); + assert(!argNode->OperIs(GT_BLK)); argSingleUseNode->ReplaceWith(argNode, this); return; } @@ -1879,8 +2108,7 @@ void Compiler::fgInsertInlineeArgument( { // The argument is either not used or a const or lcl var noway_assert(!argInfo.argIsUsed || argInfo.argIsInvariant || argInfo.argIsLclVar); - noway_assert((argInfo.argIsLclVar == 0) == - (argNode->gtOper != GT_LCL_VAR || (argNode->gtFlags & GTF_GLOB_REF))); + noway_assert((argInfo.argIsLclVar == 0) == (!argNode->OperIs(GT_LCL_VAR) || (argNode->gtFlags & GTF_GLOB_REF))); // If the argument has side effects, append it if (argInfo.argHasSideEff) @@ -1889,7 +2117,7 @@ void Compiler::fgInsertInlineeArgument( *newStmt = nullptr; bool append = true; - if (argNode->gtOper == GT_BLK) + if (argNode->OperIs(GT_BLK)) { // Don't put GT_BLK node under a GT_COMMA. // Codegen can't deal with it. @@ -1918,7 +2146,7 @@ void Compiler::fgInsertInlineeArgument( // Look for the following tree shapes // prejit: (IND (ADD (CONST, CALL(special dce helper...)))) // jit : (COMMA (CALL(special dce helper...), (FIELD ...))) - if (argNode->gtOper == GT_COMMA) + if (argNode->OperIs(GT_COMMA)) { // Look for (COMMA (CALL(special dce helper...), (FIELD ...))) GenTree* op1 = argNode->AsOp()->gtOp1; @@ -1933,12 +2161,12 @@ void Compiler::fgInsertInlineeArgument( append = false; } } - else if (argNode->gtOper == GT_IND) + else if (argNode->OperIs(GT_IND)) { // Look for (IND (ADD (CONST, CALL(special dce helper...)))) GenTree* addr = argNode->AsOp()->gtOp1; - if (addr->gtOper == GT_ADD) + if (addr->OperIs(GT_ADD)) { GenTree* op1 = addr->AsOp()->gtOp1; GenTree* op2 = addr->AsOp()->gtOp2; @@ -2014,7 +2242,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) Statement* newStmt = nullptr; GenTreeCall* call = inlineInfo->iciCall->AsCall(); - noway_assert(call->gtOper == GT_CALL); + noway_assert(call->OperIs(GT_CALL)); // Prepend statements for any initialization / side effects @@ -2059,6 +2287,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) switch (arg.GetWellKnownArg()) { case WellKnownArg::RetBuffer: + case WellKnownArg::AsyncContinuation: continue; case WellKnownArg::InstParam: argInfo = inlineInfo->inlInstParamArgInfo; diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 5127ee82193d..5063a7c3f43a 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -814,8 +814,7 @@ bool Compiler::fgCanCompactBlock(BasicBlock* block) // If target has multiple incoming edges, we can still compact if block is empty. // However, not if it is the beginning of a handler. // - if (target->countOfInEdges() != 1 && - (!block->isEmpty() || block->HasFlag(BBF_FUNCLET_BEG) || (block->bbCatchTyp != BBCT_NONE))) + if (target->countOfInEdges() != 1 && (!block->isEmpty() || (block->bbCatchTyp != BBCT_NONE))) { return false; } @@ -1695,17 +1694,17 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) blockRange = &LIR::AsRange(block); switchTree = blockRange->LastNode(); - assert(switchTree->OperGet() == GT_SWITCH_TABLE); + assert(switchTree->OperIs(GT_SWITCH_TABLE)); } else { switchStmt = block->lastStmt(); switchTree = switchStmt->GetRootNode(); - assert(switchTree->OperGet() == GT_SWITCH); + assert(switchTree->OperIs(GT_SWITCH)); } - noway_assert(switchTree->gtType == TYP_VOID); + noway_assert(switchTree->TypeIs(TYP_VOID)); // At this point all of the case jump targets have been updated such // that none of them go to block that is an empty unconditional block @@ -1770,7 +1769,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) #endif // DEBUG /* Replace the conditional statement with the list of side effects */ - noway_assert(sideEffList->gtOper != GT_SWITCH); + noway_assert(!sideEffList->OperIs(GT_SWITCH)); switchStmt->SetRootNode(sideEffList); @@ -1815,7 +1814,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) if (block->IsLIR()) { GenTree* jumpTable = switchTree->AsOp()->gtOp2; - assert(jumpTable->OperGet() == GT_JMPTABLE); + assert(jumpTable->OperIs(GT_JMPTABLE)); blockRange->Remove(jumpTable); } @@ -2015,7 +2014,7 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target, unsigne // GenTree* const lastTree = lastStmt->GetRootNode(); - if (lastTree->gtOper != GT_JTRUE) + if (!lastTree->OperIs(GT_JTRUE)) { return false; } @@ -2029,7 +2028,7 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target, unsigne // op1 must be some combinations of casts of local or constant GenTree* op1 = cond->AsOp()->gtOp1; - while (op1->gtOper == GT_CAST) + while (op1->OperIs(GT_CAST)) { op1 = op1->AsOp()->gtOp1; } @@ -2041,7 +2040,7 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target, unsigne // op2 must be some combinations of casts of local or constant GenTree* op2 = cond->AsOp()->gtOp2; - while (op2->gtOper == GT_CAST) + while (op2->OperIs(GT_CAST)) { op2 = op2->AsOp()->gtOp1; } @@ -2117,7 +2116,7 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target, unsigne // op1 must be some combinations of casts of local or constant // (or unary) op1 = data->AsOp()->gtOp1; - while (op1->gtOper == GT_CAST) + while (op1->OperIs(GT_CAST)) { op1 = op1->AsOp()->gtOp1; } @@ -2138,7 +2137,7 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target, unsigne return false; } - while (op2->gtOper == GT_CAST) + while (op2->OperIs(GT_CAST)) { op2 = op2->AsOp()->gtOp1; } @@ -2440,7 +2439,7 @@ void Compiler::fgRemoveConditionalJump(BasicBlock* block) { Statement* condStmt = block->lastStmt(); GenTree* cond = condStmt->GetRootNode(); - noway_assert(cond->gtOper == GT_JTRUE); + noway_assert(cond->OperIs(GT_JTRUE)); /* check for SIDE_EFFECTS */ if (cond->gtFlags & GTF_SIDE_EFFECT) @@ -2470,7 +2469,7 @@ void Compiler::fgRemoveConditionalJump(BasicBlock* block) #endif // DEBUG /* Replace the conditional statement with the list of side effects */ - noway_assert(sideEffList->gtOper != GT_JTRUE); + noway_assert(!sideEffList->OperIs(GT_JTRUE)); condStmt->SetRootNode(sideEffList); @@ -2523,17 +2522,13 @@ void Compiler::fgRemoveConditionalJump(BasicBlock* block) // bool Compiler::fgOptimizeBranch(BasicBlock* bJump) { - if (opts.MinOpts()) - { - return false; - } + assert(opts.OptimizationEnabled()); if (!bJump->KindIs(BBJ_ALWAYS)) { return false; } - // We might be able to compact blocks that always jump to the next block. if (bJump->JumpsToNext()) { return false; @@ -2544,7 +2539,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) return false; } - BasicBlock* bDest = bJump->GetTarget(); + BasicBlock* const bDest = bJump->GetTarget(); if (!bDest->KindIs(BBJ_COND)) { @@ -2563,17 +2558,13 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) return false; } - // do not jump into another try region - BasicBlock* bDestNormalTarget = bDest->GetFalseTarget(); - if (bDestNormalTarget->hasTryIndex() && !BasicBlock::sameTryRegion(bJump, bDestNormalTarget)) - { - return false; - } + // We should have already compacted 'bDest' into 'bJump', if it is possible. + assert(!fgCanCompactBlock(bJump)); + + BasicBlock* const trueTarget = bDest->GetTrueTarget(); + BasicBlock* const falseTarget = bDest->GetFalseTarget(); - // This function is only called by fgReorderBlocks, which we do not run in the backend. - // If we wanted to run block reordering in the backend, we would need to be able to - // calculate cost information for LIR on a per-node basis in order for this function - // to work. + // This function is only called in the frontend. assert(!bJump->IsLIR()); assert(!bDest->IsLIR()); @@ -2597,10 +2588,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) bool haveProfileWeights = false; weight_t weightJump = bJump->bbWeight; weight_t weightDest = bDest->bbWeight; - weight_t weightNext = bJump->Next()->bbWeight; + weight_t weightNext = trueTarget->bbWeight; bool rareJump = bJump->isRunRarely(); bool rareDest = bDest->isRunRarely(); - bool rareNext = bJump->Next()->isRunRarely(); + bool rareNext = trueTarget->isRunRarely(); // If we have profile data then we calculate the number of time // the loop will iterate into loopIterations @@ -2611,7 +2602,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // if (bJump->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY) && bDest->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY) && - bJump->Next()->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY)) + trueTarget->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY)) { haveProfileWeights = true; @@ -2649,12 +2640,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) } // - // We we are ngen-ing: - // If the uncondional branch is a rarely run block then - // we are willing to have more code expansion since we - // won't be running code from this page + // If we are AOT compiling: if the unconditional branch is a rarely run block then we are willing to have + // more code expansion since we won't be running code from this page. // - if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (IsAot()) { if (rareJump) { @@ -2676,9 +2665,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) } #endif // DEBUG + // Computing the duplication cost may have triggered node reordering, so return true to indicate we modified IR if (costIsTooHigh) { - return false; + return true; } /* Looks good - duplicate the conditional block */ @@ -2692,12 +2682,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) { // Clone/substitute the expression. Statement* stmt = gtCloneStmt(curStmt); - - // cloneExpr doesn't handle everything. - if (stmt == nullptr) - { - return false; - } + assert(stmt != nullptr); if (fgNodeThreading == NodeThreading::AllTrees) { @@ -2722,15 +2707,16 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // Get to the condition node from the statement tree. GenTree* condTree = newLastStmt->GetRootNode(); - noway_assert(condTree->gtOper == GT_JTRUE); + noway_assert(condTree->OperIs(GT_JTRUE)); // Set condTree to the operand to the GT_JTRUE. - condTree = condTree->AsOp()->gtOp1; + condTree = condTree->gtGetOp1(); // This condTree has to be a RelOp comparison. - if (condTree->OperIsCompare() == false) + // If not, return true since we created new nodes. + if (!condTree->OperIsCompare()) { - return false; + return true; } // Join the two linked lists. @@ -2749,40 +2735,20 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) newStmtList->SetPrevStmt(newLastStmt); } - // - // Reverse the sense of the compare - // - gtReverseCond(condTree); - // We need to update the following flags of the bJump block if they were set in the bDest block bJump->CopyFlags(bDest, BBF_COPY_PROPAGATE); // Update bbRefs and bbPreds // - // For now we set the likelihood of the new branch to match - // the likelihood of the old branch. - // - // This may or may not match the block weight adjustments we're - // making. All this becomes easier to reconcile once we rely on - // edge likelihoods more and have synthesis running. - // - // Until then we won't worry that edges and blocks are potentially - // out of sync. - // - FlowEdge* const destFalseEdge = bDest->GetFalseEdge(); - FlowEdge* const destTrueEdge = bDest->GetTrueEdge(); + FlowEdge* const falseEdge = bDest->GetFalseEdge(); + FlowEdge* const trueEdge = bDest->GetTrueEdge(); - // bJump now falls through into the next block - // - BasicBlock* const bDestFalseTarget = bJump->Next(); - FlowEdge* const falseEdge = fgAddRefPred(bDestFalseTarget, bJump, destFalseEdge); + fgRedirectTargetEdge(bJump, falseTarget); + bJump->GetTargetEdge()->setLikelihood(falseEdge->getLikelihood()); - // bJump now jumps to bDest's normal jump target - // - fgRedirectTargetEdge(bJump, bDestNormalTarget); - bJump->GetTargetEdge()->setLikelihood(destTrueEdge->getLikelihood()); + FlowEdge* const newTrueEdge = fgAddRefPred(trueTarget, bJump, trueEdge); - bJump->SetCond(bJump->GetTargetEdge(), falseEdge); + bJump->SetCond(newTrueEdge, bJump->GetTargetEdge()); // Update profile data // @@ -2791,18 +2757,16 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // bJump no longer flows into bDest // bDest->decreaseBBProfileWeight(bJump->bbWeight); - bDestNormalTarget->decreaseBBProfileWeight(bJump->bbWeight * destFalseEdge->getLikelihood()); - bDestFalseTarget->decreaseBBProfileWeight(bJump->bbWeight * destTrueEdge->getLikelihood()); // Propagate bJump's weight into its new successors // - bDestNormalTarget->increaseBBProfileWeight(bJump->GetTrueEdge()->getLikelyWeight()); - bDestFalseTarget->increaseBBProfileWeight(falseEdge->getLikelyWeight()); + trueTarget->setBBProfileWeight(trueTarget->computeIncomingWeight()); + falseTarget->setBBProfileWeight(falseTarget->computeIncomingWeight()); - if ((bDestNormalTarget->NumSucc() > 0) || (bDestFalseTarget->NumSucc() > 0)) + if ((trueTarget->NumSucc() > 0) || (falseTarget->NumSucc() > 0)) { JITDUMP("fgOptimizeBranch: New flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", - fgPgoConsistent ? "is now" : "was already"); + bJump->bbNum, fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; } } @@ -2824,144 +2788,125 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) } #endif // DEBUG + // Removing flow from 'bJump' into 'bDest' may have made it possible to compact the latter. + BasicBlock* const uniquePred = bDest->GetUniquePred(this); + if ((uniquePred != nullptr) && fgCanCompactBlock(uniquePred)) + { + JITDUMP(FMT_BB " can now be compacted into its remaining predecessor.\n", bDest->bbNum); + fgCompactBlock(uniquePred); + } + return true; } //----------------------------------------------------------------------------- -// fgOptimizeSwitchJump: see if a switch has a dominant case, and modify to -// check for that case up front (aka switch peeling). +// fgPeelSwitch: Modify a switch to check for its dominant case up front. // -// Returns: -// True if the switch now has an upstream check for the dominant case. +// Parameters: +// block - The switch block with a dominant case // -bool Compiler::fgOptimizeSwitchJumps() +void Compiler::fgPeelSwitch(BasicBlock* block) { - if (!fgHasSwitch) - { - return false; - } - - bool modified = false; - - for (BasicBlock* const block : Blocks()) - { - // Lowering expands switches, so calling this method on lowered IR - // does not make sense. - // - assert(!block->IsLIR()); - - if (!block->KindIs(BBJ_SWITCH)) - { - continue; - } - - if (block->isRunRarely()) - { - continue; - } - - if (!block->GetSwitchTargets()->bbsHasDominantCase) - { - continue; - } - - // We currently will only see dominant cases with PGO. - // - assert(block->hasProfileWeight()); - - const unsigned dominantCase = block->GetSwitchTargets()->bbsDominantCase; + assert(block->KindIs(BBJ_SWITCH)); + assert(block->GetSwitchTargets()->bbsHasDominantCase); + assert(!block->isRunRarely()); - JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase); + // Lowering expands switches, so calling this method on lowered IR + // does not make sense. + // + assert(!block->IsLIR()); - // The dominant case should not be the default case, as we already peel that one. - // - assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1)); - BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock(); - Statement* const switchStmt = block->lastStmt(); - GenTree* const switchTree = switchStmt->GetRootNode(); - assert(switchTree->OperIs(GT_SWITCH)); - GenTree* const switchValue = switchTree->AsOp()->gtGetOp1(); + // We currently will only see dominant cases with PGO. + // + assert(block->hasProfileWeight()); - // Split the switch block just before at the switch. - // - // After this, newBlock is the switch block, and - // block is the upstream block. - // - BasicBlock* newBlock = nullptr; + const unsigned dominantCase = block->GetSwitchTargets()->bbsDominantCase; + JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase); - if (block->firstStmt() == switchStmt) - { - newBlock = fgSplitBlockAtBeginning(block); - } - else - { - newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt()); - } + // The dominant case should not be the default case, as we already peel that one. + // + assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1)); + BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock(); + Statement* const switchStmt = block->lastStmt(); + GenTree* const switchTree = switchStmt->GetRootNode(); + assert(switchTree->OperIs(GT_SWITCH)); + GenTree* const switchValue = switchTree->gtGetOp1(); + + // Split the switch block just before at the switch. + // + // After this, newBlock is the switch block, and + // block is the upstream block. + // + BasicBlock* newBlock = nullptr; - // Set up a compare in the upstream block, "stealing" the switch value tree. - // - GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase)); - GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare); - Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetDebugInfo()); - fgInsertStmtAtEnd(block, jmpStmt); + if (block->firstStmt() == switchStmt) + { + newBlock = fgSplitBlockAtBeginning(block); + } + else + { + newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt()); + } - // Reattach switch value to the switch. This may introduce a comma - // in the upstream compare tree, if the switch value expression is complex. - // - switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1); + // Set up a compare in the upstream block, "stealing" the switch value tree. + // + GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase)); + GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare); + Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetDebugInfo()); + fgInsertStmtAtEnd(block, jmpStmt); - // Update flags - // - switchTree->gtFlags = switchTree->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT; - dominantCaseCompare->gtFlags |= dominantCaseCompare->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT; - jmpTree->gtFlags |= dominantCaseCompare->gtFlags & GTF_ALL_EFFECT; - dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE; + // Reattach switch value to the switch. This may introduce a comma + // in the upstream compare tree, if the switch value expression is complex. + // + switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1); - // Wire up the new control flow. - // - FlowEdge* const blockToTargetEdge = fgAddRefPred(dominantTarget, block); - FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds; - block->SetCond(blockToTargetEdge, blockToNewBlockEdge); + // Update flags + // + switchTree->gtFlags = switchTree->gtGetOp1()->gtFlags & GTF_ALL_EFFECT; + dominantCaseCompare->gtFlags |= dominantCaseCompare->gtGetOp1()->gtFlags & GTF_ALL_EFFECT; + jmpTree->gtFlags |= dominantCaseCompare->gtFlags & GTF_ALL_EFFECT; + dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE; - // Update profile data - // - const weight_t fraction = newBlock->GetSwitchTargets()->bbsDominantFraction; - const weight_t blockToTargetWeight = block->bbWeight * fraction; + // Wire up the new control flow. + // + FlowEdge* const blockToTargetEdge = fgAddRefPred(dominantTarget, block); + FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds; + block->SetCond(blockToTargetEdge, blockToNewBlockEdge); - newBlock->decreaseBBProfileWeight(blockToTargetWeight); + // Update profile data + // + const weight_t fraction = newBlock->GetSwitchTargets()->bbsDominantFraction; + const weight_t blockToTargetWeight = block->bbWeight * fraction; - blockToTargetEdge->setLikelihood(fraction); - blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction)); + newBlock->decreaseBBProfileWeight(blockToTargetWeight); - JITDUMP("fgOptimizeSwitchJumps: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n", - newBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; + blockToTargetEdge->setLikelihood(fraction); + blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction)); - // For now we leave the switch as is, since there's no way - // to indicate that one of the cases is now unreachable. - // - // But it no longer has a dominant case. - // - newBlock->GetSwitchTargets()->bbsHasDominantCase = false; + JITDUMP("fgPeelSwitch: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n", + newBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; - if (fgNodeThreading == NodeThreading::AllTrees) - { - // The switch tree has been modified. - JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID()); - gtSetStmtInfo(switchStmt); - fgSetStmtSeq(switchStmt); + // For now we leave the switch as is, since there's no way + // to indicate that one of the cases is now unreachable. + // + // But it no longer has a dominant case. + // + newBlock->GetSwitchTargets()->bbsHasDominantCase = false; - // fgNewStmtFromTree() already threaded the tree, but calling fgMakeMultiUse() might have - // added new nodes if a COMMA was introduced. - JITDUMP("Rethreading " FMT_STMT "\n", jmpStmt->GetID()); - gtSetStmtInfo(jmpStmt); - fgSetStmtSeq(jmpStmt); - } + if (fgNodeThreading == NodeThreading::AllTrees) + { + // The switch tree has been modified. + JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID()); + gtSetStmtInfo(switchStmt); + fgSetStmtSeq(switchStmt); - modified = true; + // fgNewStmtFromTree() already threaded the tree, but calling fgMakeMultiUse() might have + // added new nodes if a COMMA was introduced. + JITDUMP("Rethreading " FMT_STMT "\n", jmpStmt->GetID()); + gtSetStmtInfo(jmpStmt); + fgSetStmtSeq(jmpStmt); } - - return modified; } //----------------------------------------------------------------------------- @@ -3212,7 +3157,7 @@ bool Compiler::fgExpandRarelyRunBlocks() if (block->isBBCallFinallyPair()) { BasicBlock* bNext = block->Next(); - PREFIX_ASSUME(bNext != nullptr); + assert(bNext != nullptr); bNext->bbSetRunRarely(); #ifdef DEBUG if (verbose) @@ -3274,1703 +3219,145 @@ bool Compiler::fgExpandRarelyRunBlocks() return result; } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif - //----------------------------------------------------------------------------- -// fgReorderBlocks: reorder blocks to favor frequent fall through paths -// and move rare blocks to the end of the method/eh region. +// Compiler::ThreeOptLayout::EdgeCmp: Comparator for the 'cutPoints' priority queue. +// If 'left' has a bigger edge weight than 'right', 3-opt will consider it first. +// Else, 3-opt will consider 'right' first. // -// Arguments: -// useProfile - if true, use profile data (if available) to more aggressively -// reorder the blocks. +// Parameters: +// left - One of the two edges to compare +// right - The other edge to compare // // Returns: -// True if anything got reordered. Reordering blocks may require changing -// IR to reverse branch conditions. -// -// Notes: -// We currently allow profile-driven switch opts even when useProfile is false, -// as they are unlikely to lead to reordering.. +// True if 'right' should be considered before 'left', and false otherwise // -bool Compiler::fgReorderBlocks(bool useProfile) +template +/* static */ bool Compiler::ThreeOptLayout::EdgeCmp(const FlowEdge* left, const FlowEdge* right) { - noway_assert(opts.compDbgCode == false); - - // We can't relocate anything if we only have one block - if (fgFirstBB->IsLast()) - { - return false; - } - - bool newRarelyRun = false; - bool movedBlocks = false; - bool optimizedSwitches = false; - bool optimizedBranches = false; - - // First let us expand the set of run rarely blocks - newRarelyRun |= fgExpandRarelyRunBlocks(); - -#if defined(FEATURE_EH_WINDOWS_X86) - if (!UsesFunclets()) - { - movedBlocks |= fgRelocateEHRegions(); - } -#endif // FEATURE_EH_WINDOWS_X86 + assert(left != right); + const weight_t leftWeight = left->getLikelyWeight(); + const weight_t rightWeight = right->getLikelyWeight(); - // - // If we are using profile weights we can change some - // switch jumps into conditional test and jump - // - if (fgIsUsingProfileWeights()) + // Break ties by comparing the source blocks' bbIDs. + // If both edges are out of the same source block, use the target blocks' bbIDs. + if (leftWeight == rightWeight) { - optimizedSwitches = fgOptimizeSwitchJumps(); - if (optimizedSwitches) + BasicBlock* const leftSrc = left->getSourceBlock(); + BasicBlock* const rightSrc = right->getSourceBlock(); + if (leftSrc == rightSrc) { - fgUpdateFlowGraph(); + return left->getDestinationBlock()->bbID < right->getDestinationBlock()->bbID; } + + return leftSrc->bbID < rightSrc->bbID; } - if (useProfile) - { - // Don't run the new layout until we get to the backend, - // since LSRA can introduce new blocks, and lowering can churn the flowgraph. - // - if (JitConfig.JitDoReversePostOrderLayout()) - { - return (newRarelyRun || movedBlocks || optimizedSwitches); - } + return leftWeight < rightWeight; +} - // We will be reordering blocks, so ensure the false target of a BBJ_COND block is its next block - for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next()) - { - if (block->KindIs(BBJ_COND) && !block->NextIs(block->GetFalseTarget())) - { - if (block->CanRemoveJumpToTarget(block->GetTrueTarget(), this)) - { - // Reverse the jump condition - GenTree* test = block->lastNode(); - assert(test->OperIsConditionalJump()); - test->AsOp()->gtOp1 = gtReverseCond(test->AsOp()->gtOp1); - - FlowEdge* const newFalseEdge = block->GetTrueEdge(); - FlowEdge* const newTrueEdge = block->GetFalseEdge(); - block->SetTrueEdge(newTrueEdge); - block->SetFalseEdge(newFalseEdge); - assert(block->CanRemoveJumpToTarget(block->GetFalseTarget(), this)); - } - else - { - BasicBlock* jmpBlk = fgConnectFallThrough(block, block->GetFalseTarget()); - assert(jmpBlk != nullptr); - assert(block->NextIs(jmpBlk)); +//----------------------------------------------------------------------------- +// Compiler::ThreeOptLayout::ThreeOptLayout: Constructs a ThreeOptLayout instance. +// +// Parameters: +// comp - The Compiler instance +// initialLayout - An array of the blocks to be reordered +// numHotBlocks - The number of hot blocks at the beginning of 'initialLayout' +// +// Notes: +// To save an allocation, we will reuse the DFS tree's underlying array for 'tempOrder'. +// This means we will trash the DFS tree. +// +template +Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp, BasicBlock** initialLayout, unsigned numHotBlocks) + : compiler(comp) + , cutPoints(comp->getAllocator(CMK_FlowEdge), &ThreeOptLayout::EdgeCmp) + , blockOrder(initialLayout) + , tempOrder(comp->m_dfsTree->GetPostOrder()) + , numCandidateBlocks(numHotBlocks) +{ +} - // Skip next block - block = jmpBlk; - } - } - } - } +//----------------------------------------------------------------------------- +// Compiler::ThreeOptLayout::IsCandidateBlock: Determines if a block is being considered for reordering +// by checking if it is in 'blockOrder'. +// +// Parameters: +// block - the block to check +// +// Returns: +// True if 'block' is in the set of candidate blocks, false otherwise +// +template +bool Compiler::ThreeOptLayout::IsCandidateBlock(BasicBlock* block) const +{ + assert(block != nullptr); + return (block->bbPreorderNum < numCandidateBlocks) && (blockOrder[block->bbPreorderNum] == block); +} #ifdef DEBUG - if (verbose) - { - printf("*************** In fgReorderBlocks()\n"); - - printf("\nInitial BasicBlocks"); - fgDispBasicBlocks(verboseTrees); - printf("\n"); - } -#endif // DEBUG - - BasicBlock* bNext; - BasicBlock* bPrev; - BasicBlock* block; - unsigned XTnum; - EHblkDsc* HBtab; +//----------------------------------------------------------------------------- +// Compiler::ThreeOptLayout::GetLayoutCost: Computes the cost of the layout for the region +// bounded by 'startPos' and 'endPos'. +// +// Parameters: +// startPos - The starting index of the region +// endPos - The inclusive ending index of the region +// +// Returns: +// The region's layout cost +// +template +weight_t Compiler::ThreeOptLayout::GetLayoutCost(unsigned startPos, unsigned endPos) +{ + assert(startPos <= endPos); + assert(endPos < numCandidateBlocks); + weight_t layoutCost = BB_ZERO_WEIGHT; - // Iterate over every block, remembering our previous block in bPrev - for (bPrev = fgFirstBB, block = bPrev->Next(); block != nullptr; bPrev = block, block = block->Next()) + for (unsigned position = startPos; position < endPos; position++) { - // - // Consider relocating the rarely run blocks such that they are at the end of the method. - // We also consider reversing conditional branches so that they become a not taken forwards branch. - // - - // Don't consider BBJ_CALLFINALLYRET; it should be processed together with BBJ_CALLFINALLY. - if (block->KindIs(BBJ_CALLFINALLYRET)) - { - continue; - } - - // If block is marked with a BBF_KEEP_BBJ_ALWAYS flag then we don't move the block - if (block->HasFlag(BBF_KEEP_BBJ_ALWAYS)) - { - continue; - } + layoutCost += GetCost(blockOrder[position], blockOrder[position + 1]); + } - // Finally and handlers blocks are to be kept contiguous. - // TODO-CQ: Allow reordering within the handler region - if (block->hasHndIndex()) - { - continue; - } + layoutCost += blockOrder[endPos]->bbWeight; + return layoutCost; +} +#endif // DEBUG - bool reorderBlock = useProfile; - const bool isRare = block->isRunRarely(); - BasicBlock* bDest = nullptr; - bool forwardBranch = false; - bool backwardBranch = false; +//----------------------------------------------------------------------------- +// Compiler::ThreeOptLayout::GetCost: Computes the cost of placing 'next' after 'block'. +// Layout cost is modeled as the sum of block weights, minus the weights of edges that fall through. +// +// Parameters: +// block - The block to consider creating fallthrough from +// next - The block to consider creating fallthrough into +// +// Returns: +// The cost +// +template +weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next) +{ + assert(block != nullptr); + assert(next != nullptr); - // Setup bDest - if (bPrev->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET)) - { - bDest = bPrev->GetTarget(); - forwardBranch = fgIsForwardBranch(bPrev, bDest); - backwardBranch = !forwardBranch; - } - else if (bPrev->KindIs(BBJ_COND)) - { - // fgReorderBlocks is called in more than one optimization phase, - // but only does any reordering in optOptimizeLayout. - // At that point, we expect implicit fallthrough to be restored for BBJ_COND blocks. - assert(bPrev->FalseTargetIs(block) || !reorderBlock); - bDest = bPrev->GetTrueTarget(); - forwardBranch = fgIsForwardBranch(bPrev, bDest); - backwardBranch = !forwardBranch; - } + const weight_t maxCost = block->bbWeight; + const FlowEdge* fallthroughEdge = compiler->fgGetPredForBlock(next, block); - // We will look for bPrev as a non rarely run block followed by block as a rarely run block - // - if (bPrev->isRunRarely()) - { - reorderBlock = false; - } + if (fallthroughEdge != nullptr) + { + // The edge's weight should never exceed its source block's weight, + // but handle negative results from rounding errors in getLikelyWeight(), just in case + return max(0.0, maxCost - fallthroughEdge->getLikelyWeight()); + } - // If the weights of the bPrev, block and bDest were all obtained from a profile run - // then we can use them to decide if it is useful to reverse this conditional branch - - weight_t profHotWeight = -1; - - if (useProfile && bPrev->hasProfileWeight() && block->hasProfileWeight() && - ((bDest == nullptr) || bDest->hasProfileWeight())) - { - // - // All blocks have profile information - // - if (forwardBranch) - { - if (bPrev->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET)) - { - if (bPrev->JumpsToNext()) - { - bDest = nullptr; - goto CHECK_FOR_RARE; - } - // We can pull up the blocks that the unconditional jump branches to - // if the weight of bDest is greater or equal to the weight of block - // also the weight of bDest can't be zero. - // Don't reorder if bPrev's jump destination is the next block. - // - else if ((bDest->bbWeight < block->bbWeight) || (bDest->bbWeight == BB_ZERO_WEIGHT)) - { - reorderBlock = false; - } - else - { - // - // If this remains true then we will try to pull up bDest to succeed bPrev - // - bool moveDestUp = true; - - // - // The edge bPrev -> bDest must have a higher weight - // than every other edge into bDest - // - weight_t const weightToBeat = bPrev->GetTargetEdge()->getLikelyWeight(); - - // Examine all of the other edges into bDest - for (FlowEdge* const edge : bDest->PredEdges()) - { - if (edge->getLikelyWeight() > weightToBeat) - { - moveDestUp = false; - break; - } - } - - // Are we still good to move bDest up to bPrev? - if (moveDestUp) - { - // - // We will consider all blocks that have less weight than profHotWeight to be - // uncommonly run blocks as compared with the hot path of bPrev taken-jump to bDest - // - profHotWeight = bDest->bbWeight - 1; - } - else - { - if (block->isRunRarely()) - { - // We will move any rarely run blocks blocks - profHotWeight = 0; - } - else - { - // We will move all blocks that have a weight less or equal to our fall through block - profHotWeight = block->bbWeight + 1; - } - // But we won't try to connect with bDest - bDest = nullptr; - } - } - } - else // (bPrev->KindIs(BBJ_COND)) - { - noway_assert(bPrev->KindIs(BBJ_COND)); - // - // We will reverse branch if the true edge's likelihood is more than 51%. - // - // We will set up profHotWeight to be maximum bbWeight that a block - // could have for us not to want to reverse the conditional branch. - // - // We will consider all blocks that have less weight than profHotWeight to be - // uncommonly run blocks compared to the weight of bPrev's true edge. - // - // We will check if bPrev's true edge weight - // is more than twice bPrev's false edge weight. - // - // bPrev --> [BB04, weight 100] - // | \. - // falseEdge ---------------> O \. - // [likelihood=0.33] V \. - // block --> [BB05, weight 33] \. - // \. - // trueEdge ------------------------------> O - // [likelihood=0.67] | - // V - // bDest ---------------> [BB08, weight 67] - // - assert(bPrev->FalseTargetIs(block)); - FlowEdge* trueEdge = bPrev->GetTrueEdge(); - FlowEdge* falseEdge = bPrev->GetFalseEdge(); - noway_assert(trueEdge != nullptr); - noway_assert(falseEdge != nullptr); - - // If we take the true branch more than half the time, we will reverse the branch. - if (trueEdge->getLikelihood() < 0.51) - { - reorderBlock = false; - } - else - { - // set profHotWeight - profHotWeight = falseEdge->getLikelyWeight() - 1; - } - } - } - else // not a forwardBranch - { - if (bPrev->bbFallsThrough()) - { - goto CHECK_FOR_RARE; - } - - // Here we should pull up the highest weight block remaining - // and place it here since bPrev does not fall through. - - weight_t highestWeight = 0; - BasicBlock* candidateBlock = nullptr; - BasicBlock* lastNonFallThroughBlock = bPrev; - BasicBlock* bTmp = bPrev->Next(); - - while (bTmp != nullptr) - { - // Don't try to split a call finally pair - // - if (bTmp->isBBCallFinallyPair()) - { - // Move bTmp forward - bTmp = bTmp->Next(); - } - - // - // Check for loop exit condition - // - if (bTmp == nullptr) - { - break; - } - - // - // if its weight is the highest one we've seen and - // the EH regions allow for us to place bTmp after bPrev - // - if ((bTmp->bbWeight > highestWeight) && fgEhAllowsMoveBlock(bPrev, bTmp)) - { - // When we have a current candidateBlock that is a conditional (or unconditional) jump - // to bTmp (which is a higher weighted block) then it is better to keep our current - // candidateBlock and have it fall into bTmp - // - if ((candidateBlock == nullptr) || !candidateBlock->KindIs(BBJ_COND, BBJ_ALWAYS) || - (candidateBlock->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) && - (!candidateBlock->TargetIs(bTmp) || candidateBlock->JumpsToNext())) || - (candidateBlock->KindIs(BBJ_COND) && !candidateBlock->TrueTargetIs(bTmp))) - { - // otherwise we have a new candidateBlock - // - highestWeight = bTmp->bbWeight; - candidateBlock = lastNonFallThroughBlock->Next(); - } - } - - const bool bTmpJumpsToNext = bTmp->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) && bTmp->JumpsToNext(); - if ((!bTmp->bbFallsThrough() && !bTmpJumpsToNext) || (bTmp->bbWeight == BB_ZERO_WEIGHT)) - { - lastNonFallThroughBlock = bTmp; - } - - bTmp = bTmp->Next(); - } - - // If we didn't find a suitable block then skip this - if (highestWeight == 0) - { - reorderBlock = false; - } - else - { - noway_assert(candidateBlock != nullptr); - - // If the candidateBlock is the same a block then skip this - if (candidateBlock == block) - { - reorderBlock = false; - } - else - { - // Set bDest to the block that we want to come after bPrev - bDest = candidateBlock; - - // set profHotWeight - profHotWeight = highestWeight - 1; - } - } - } - } - else // we don't have good profile info (or we are falling through) - { - - CHECK_FOR_RARE:; - - /* We only want to reorder when we have a rarely run */ - /* block right after a normal block, */ - /* (bPrev is known to be a normal block at this point) */ - if (!isRare) - { - if (block->NextIs(bDest) && block->KindIs(BBJ_RETURN) && bPrev->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET)) - { - // This is a common case with expressions like "return Expr1 && Expr2" -- move the return - // to establish fall-through. - } - else - { - reorderBlock = false; - } - } - else - { - /* If the jump target bDest is also a rarely run block then we don't want to do the reversal */ - if (bDest && bDest->isRunRarely()) - { - reorderBlock = false; /* Both block and bDest are rarely run */ - } - else - { - // We will move any rarely run blocks blocks - profHotWeight = 0; - } - } - } - - if (reorderBlock == false) - { - // - // Check for an unconditional branch to a conditional branch - // which also branches back to our next block - // - const bool optimizedBranch = fgOptimizeBranch(bPrev); - if (optimizedBranch) - { - noway_assert(bPrev->KindIs(BBJ_COND)); - optimizedBranches = true; - } - continue; - } - - // Now we need to determine which blocks should be moved - // - // We consider one of two choices: - // - // 1. Moving the fall-through blocks (or rarely run blocks) down to - // later in the method and hopefully connecting the jump dest block - // so that it becomes the fall through block - // - // And when bDest is not NULL, we also consider: - // - // 2. Moving the bDest block (or blocks) up to bPrev - // so that it could be used as a fall through block - // - // We will prefer option #1 if we are able to connect the jump dest - // block as the fall though block otherwise will we try to use option #2 - // - - // - // Consider option #1: relocating blocks starting at 'block' - // to later in flowgraph - // - // We set bStart to the first block that will be relocated - // and bEnd to the last block that will be relocated - - BasicBlock* bStart = block; - BasicBlock* bEnd = bStart; - bNext = bEnd->Next(); - bool connected_bDest = false; - - if ((backwardBranch && !isRare) || block->HasFlag(BBF_DONT_REMOVE)) // Don't choose option #1 when block is the - // start of a try region - { - bStart = nullptr; - bEnd = nullptr; - } - else - { - while (true) - { - // Don't try to split a call finally pair - // - if (bEnd->isBBCallFinallyPair()) - { - // Move bEnd and bNext forward - bEnd = bNext; - bNext = bNext->Next(); - } - - // - // Check for loop exit condition - // - if (bNext == nullptr) - { - break; - } - - // Check if we've reached the funclets region, at the end of the function - if (bEnd->NextIs(fgFirstFuncletBB)) - { - break; - } - - if (bNext == bDest) - { - connected_bDest = true; - break; - } - - // All the blocks must have the same try index - // and must not have the BBF_DONT_REMOVE flag set - - if (!BasicBlock::sameTryRegion(bStart, bNext) || bNext->HasFlag(BBF_DONT_REMOVE)) - { - // exit the loop, bEnd is now set to the - // last block that we want to relocate - break; - } - - // If we are relocating rarely run blocks.. - if (isRare) - { - // ... then all blocks must be rarely run - if (!bNext->isRunRarely()) - { - // exit the loop, bEnd is now set to the - // last block that we want to relocate - break; - } - } - else - { - // If we are moving blocks that are hot then all - // of the blocks moved must be less than profHotWeight */ - if (bNext->bbWeight >= profHotWeight) - { - // exit the loop, bEnd is now set to the - // last block that we would relocate - break; - } - } - - // Move bEnd and bNext forward - bEnd = bNext; - bNext = bNext->Next(); - } - - // Set connected_bDest to true if moving blocks [bStart .. bEnd] - // connects with the jump dest of bPrev (i.e bDest) and - // thus allows bPrev fall through instead of jump. - if (bNext == bDest) - { - connected_bDest = true; - } - } - - // Now consider option #2: Moving the jump dest block (or blocks) - // up to bPrev - // - // The variables bStart2, bEnd2 and bPrev2 are used for option #2 - // - // We will setup bStart2 to the first block that will be relocated - // and bEnd2 to the last block that will be relocated - // and bPrev2 to be the lexical pred of bDest - // - // If after this calculation bStart2 is NULL we cannot use option #2, - // otherwise bStart2, bEnd2 and bPrev2 are all non-NULL and we will use option #2 - - BasicBlock* bStart2 = nullptr; - BasicBlock* bEnd2 = nullptr; - BasicBlock* bPrev2 = nullptr; - - // If option #1 didn't connect bDest and bDest isn't NULL - if ((connected_bDest == false) && (bDest != nullptr) && - // The jump target cannot be moved if it has the BBF_DONT_REMOVE flag set - !bDest->HasFlag(BBF_DONT_REMOVE)) - { - // We will consider option #2: relocating blocks starting at 'bDest' to succeed bPrev - // - // setup bPrev2 to be the lexical pred of bDest - - bPrev2 = block; - while (bPrev2 != nullptr) - { - if (bPrev2->NextIs(bDest)) - { - break; - } - - bPrev2 = bPrev2->Next(); - } - - if ((bPrev2 != nullptr) && fgEhAllowsMoveBlock(bPrev, bDest)) - { - // We have decided that relocating bDest to be after bPrev is best - // Set bStart2 to the first block that will be relocated - // and bEnd2 to the last block that will be relocated - // - // Assigning to bStart2 selects option #2 - // - bStart2 = bDest; - bEnd2 = bStart2; - bNext = bEnd2->Next(); - - while (true) - { - // Don't try to split a call finally pair - // - if (bEnd2->isBBCallFinallyPair()) - { - noway_assert(bNext->KindIs(BBJ_CALLFINALLYRET)); - // Move bEnd2 and bNext forward - bEnd2 = bNext; - bNext = bNext->Next(); - } - - // Check for the Loop exit conditions - - if (bNext == nullptr) - { - break; - } - - if (bEnd2->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) && bEnd2->JumpsToNext()) - { - // Treat jumps to next block as fall-through - } - else if (bEnd2->bbFallsThrough() == false) - { - break; - } - - // If we are relocating rarely run blocks.. - // All the blocks must have the same try index, - // and must not have the BBF_DONT_REMOVE flag set - - if (!BasicBlock::sameTryRegion(bStart2, bNext) || bNext->HasFlag(BBF_DONT_REMOVE)) - { - // exit the loop, bEnd2 is now set to the - // last block that we want to relocate - break; - } - - if (isRare) - { - /* ... then all blocks must not be rarely run */ - if (bNext->isRunRarely()) - { - // exit the loop, bEnd2 is now set to the - // last block that we want to relocate - break; - } - } - else - { - // If we are relocating hot blocks - // all blocks moved must be greater than profHotWeight - if (bNext->bbWeight <= profHotWeight) - { - // exit the loop, bEnd2 is now set to the - // last block that we want to relocate - break; - } - } - - // Move bEnd2 and bNext forward - bEnd2 = bNext; - bNext = bNext->Next(); - } - } - } - - // If we are using option #1 then ... - if (bStart2 == nullptr) - { - // Don't use option #1 for a backwards branch - if (bStart == nullptr) - { - continue; - } - - // .... Don't move a set of blocks that are already at the end of the main method - if (bEnd == fgLastBBInMainFunction()) - { - continue; - } - } - -#ifdef DEBUG - if (verbose) - { - if (bDest != nullptr) - { - if (bPrev->KindIs(BBJ_COND)) - { - printf("Decided to reverse conditional branch at block " FMT_BB " branch to " FMT_BB " ", - bPrev->bbNum, bDest->bbNum); - } - else if (bPrev->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET)) - { - printf("Decided to straighten unconditional branch at block " FMT_BB " branch to " FMT_BB " ", - bPrev->bbNum, bDest->bbNum); - } - else - { - printf("Decided to place hot code after " FMT_BB ", placed " FMT_BB " after this block ", - bPrev->bbNum, bDest->bbNum); - } - - if (profHotWeight > 0) - { - printf("because of IBC profile data\n"); - } - else - { - if (bPrev->bbFallsThrough()) - { - printf("since it falls into a rarely run block\n"); - } - else - { - printf("since it is succeeded by a rarely run block\n"); - } - } - } - else - { - printf("Decided to relocate block(s) after block " FMT_BB " since they are %s block(s)\n", bPrev->bbNum, - block->isRunRarely() ? "rarely run" : "uncommonly run"); - } - } -#endif // DEBUG - - // We will set insertAfterBlk to the block the precedes our insertion range - // We will set bStartPrev to be the block that precedes the set of blocks that we are moving - BasicBlock* insertAfterBlk; - BasicBlock* bStartPrev; - - if (bStart2 != nullptr) - { - // Option #2: relocating blocks starting at 'bDest' to follow bPrev - - // Update bStart and bEnd so that we can use these two for all later operations - bStart = bStart2; - bEnd = bEnd2; - - // Set bStartPrev to be the block that comes before bStart - bStartPrev = bPrev2; - - // We will move [bStart..bEnd] to immediately after bPrev - insertAfterBlk = bPrev; - } - else - { - // option #1: Moving the fall-through blocks (or rarely run blocks) down to later in the method - - // Set bStartPrev to be the block that come before bStart - bStartPrev = bPrev; - - // We will move [bStart..bEnd] but we will pick the insert location later - insertAfterBlk = nullptr; - } - - // We are going to move [bStart..bEnd] so they can't be NULL - noway_assert(bStart != nullptr); - noway_assert(bEnd != nullptr); - - // bEnd can't be a BBJ_CALLFINALLY unless it is a RETLESS call - noway_assert(!bEnd->KindIs(BBJ_CALLFINALLY) || bEnd->HasFlag(BBF_RETLESS_CALL)); - - // bStartPrev must be set to the block that precedes bStart - noway_assert(bStartPrev->NextIs(bStart)); - - // Since we will be unlinking [bStart..bEnd], - // we need to compute and remember if bStart is in each of - // the try and handler regions - // - bool* fStartIsInTry = nullptr; - bool* fStartIsInHnd = nullptr; - - if (compHndBBtabCount > 0) - { - fStartIsInTry = new (this, CMK_Generic) bool[compHndBBtabCount]; - fStartIsInHnd = new (this, CMK_Generic) bool[compHndBBtabCount]; - - for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) - { - fStartIsInTry[XTnum] = HBtab->InTryRegionBBRange(bStart); - fStartIsInHnd[XTnum] = HBtab->InHndRegionBBRange(bStart); - } - } - - /* Temporarily unlink [bStart..bEnd] from the flow graph */ - const bool bStartPrevJumpsToNext = bStartPrev->KindIs(BBJ_ALWAYS) && bStartPrev->JumpsToNext(); - fgUnlinkRange(bStart, bEnd); - - if (insertAfterBlk == nullptr) - { - // Find new location for the unlinked block(s) - // Set insertAfterBlk to the block which will precede the insertion point - - if (!bStart->hasTryIndex() && isRare) - { - // We'll just insert the blocks at the end of the method. If the method - // has funclets, we will insert at the end of the main method but before - // any of the funclets. Note that we create funclets before we call - // fgReorderBlocks(). - - insertAfterBlk = fgLastBBInMainFunction(); - noway_assert(insertAfterBlk != bPrev); - } - else - { - BasicBlock* startBlk; - BasicBlock* lastBlk; - EHblkDsc* ehDsc = ehInitTryBlockRange(bStart, &startBlk, &lastBlk); - - BasicBlock* endBlk; - - /* Setup startBlk and endBlk as the range to search */ - - if (ehDsc != nullptr) - { - endBlk = lastBlk->Next(); - - /* - Multiple (nested) try regions might start from the same BB. - For example, - - try3 try2 try1 - |--- |--- |--- BB01 - | | | BB02 - | | |--- BB03 - | | BB04 - | |------------ BB05 - | BB06 - |------------------- BB07 - - Now if we want to insert in try2 region, we will start with startBlk=BB01. - The following loop will allow us to start from startBlk==BB04. - */ - while (!BasicBlock::sameTryRegion(startBlk, bStart) && (startBlk != endBlk)) - { - startBlk = startBlk->Next(); - } - - // startBlk cannot equal endBlk as it must come before endBlk - if (startBlk == endBlk) - { - goto CANNOT_MOVE; - } - - // we also can't start searching the try region at bStart - if (startBlk == bStart) - { - // if bEnd is the last block in the method or - // or if bEnd->bbNext is in a different try region - // then we cannot move the blocks - // - if (bEnd->IsLast() || !BasicBlock::sameTryRegion(startBlk, bEnd->Next())) - { - goto CANNOT_MOVE; - } - - startBlk = bEnd->Next(); - - // Check that the new startBlk still comes before endBlk - - // startBlk cannot equal endBlk as it must come before endBlk - if (startBlk == endBlk) - { - goto CANNOT_MOVE; - } - - BasicBlock* tmpBlk = startBlk; - while ((tmpBlk != endBlk) && (tmpBlk != nullptr)) - { - tmpBlk = tmpBlk->Next(); - } - - // when tmpBlk is NULL that means startBlk is after endBlk - // so there is no way to move bStart..bEnd within the try region - if (tmpBlk == nullptr) - { - goto CANNOT_MOVE; - } - } - } - else - { - noway_assert(isRare == false); - - /* We'll search through the entire main method */ - startBlk = fgFirstBB; - endBlk = fgEndBBAfterMainFunction(); - } - - // Calculate nearBlk and jumpBlk and then call fgFindInsertPoint() - // to find our insertion block - // - { - // If the set of blocks that we are moving ends with a BBJ_ALWAYS to - // another [rarely run] block that comes after bPrev (forward branch) - // then we can set up nearBlk to eliminate this jump sometimes - // - BasicBlock* nearBlk = nullptr; - BasicBlock* jumpBlk = nullptr; - - if (bEnd->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) && !bEnd->JumpsToNext() && - (!isRare || bEnd->GetTarget()->isRunRarely()) && - fgIsForwardBranch(bEnd, bEnd->GetTarget(), bPrev)) - { - // Set nearBlk to be the block in [startBlk..endBlk] - // such that nearBlk->NextIs(bEnd->JumpDest) - // if no such block exists then set nearBlk to NULL - nearBlk = startBlk; - jumpBlk = bEnd; - do - { - // We do not want to set nearBlk to bPrev - // since then we will not move [bStart..bEnd] - // - if (nearBlk != bPrev) - { - // Check if nearBlk satisfies our requirement - if (nearBlk->NextIs(bEnd->GetTarget())) - { - break; - } - } - - // Did we reach the endBlk? - if (nearBlk == endBlk) - { - nearBlk = nullptr; - break; - } - - // advance nearBlk to the next block - nearBlk = nearBlk->Next(); - - } while (nearBlk != nullptr); - } - - // if nearBlk is NULL then we set nearBlk to be the - // first block that we want to insert after. - if (nearBlk == nullptr) - { - if (bDest != nullptr) - { - // we want to insert after bDest - nearBlk = bDest; - } - else - { - // we want to insert after bPrev - nearBlk = bPrev; - } - } - - /* Set insertAfterBlk to the block which we will insert after. */ - - insertAfterBlk = - fgFindInsertPoint(bStart->bbTryIndex, - true, // Insert in the try region. - startBlk, endBlk, nearBlk, jumpBlk, bStart->bbWeight == BB_ZERO_WEIGHT); - } - - /* See if insertAfterBlk is the same as where we started, */ - /* or if we could not find any insertion point */ - - if ((insertAfterBlk == bPrev) || (insertAfterBlk == nullptr)) - { - CANNOT_MOVE:; - /* We couldn't move the blocks, so put everything back */ - /* relink [bStart .. bEnd] into the flow graph */ - - bPrev->SetNext(bStart); - if (!bEnd->IsLast()) - { - bEnd->Next()->SetPrev(bEnd); - } -#ifdef DEBUG - if (verbose) - { - if (bStart != bEnd) - { - printf("Could not relocate blocks (" FMT_BB " .. " FMT_BB ")\n", bStart->bbNum, - bEnd->bbNum); - } - else - { - printf("Could not relocate block " FMT_BB "\n", bStart->bbNum); - } - } -#endif // DEBUG - continue; - } - } - } - - noway_assert(insertAfterBlk != nullptr); - noway_assert(bStartPrev != nullptr); - noway_assert(bStartPrev != insertAfterBlk); - -#ifdef DEBUG - movedBlocks = true; - - if (verbose) - { - const char* msg; - if (bStart2 != nullptr) - { - msg = "hot"; - } - else - { - if (isRare) - { - msg = "rarely run"; - } - else - { - msg = "uncommon"; - } - } - - printf("Relocated %s ", msg); - if (bStart != bEnd) - { - printf("blocks (" FMT_BB " .. " FMT_BB ")", bStart->bbNum, bEnd->bbNum); - } - else - { - printf("block " FMT_BB, bStart->bbNum); - } - - if (bPrev->KindIs(BBJ_COND)) - { - printf(" by reversing conditional jump at " FMT_BB "\n", bPrev->bbNum); - } - else - { - printf("\n", bPrev->bbNum); - } - } -#endif // DEBUG - - if (bPrev->KindIs(BBJ_COND)) - { - /* Reverse the bPrev jump condition */ - Statement* const condTestStmt = bPrev->lastStmt(); - GenTree* const condTest = condTestStmt->GetRootNode(); - - noway_assert(condTest->gtOper == GT_JTRUE); - condTest->AsOp()->gtOp1 = gtReverseCond(condTest->AsOp()->gtOp1); - - FlowEdge* const trueEdge = bPrev->GetTrueEdge(); - FlowEdge* const falseEdge = bPrev->GetFalseEdge(); - bPrev->SetTrueEdge(falseEdge); - bPrev->SetFalseEdge(trueEdge); - - // may need to rethread - // - if (fgNodeThreading == NodeThreading::AllTrees) - { - JITDUMP("Rethreading " FMT_STMT "\n", condTestStmt->GetID()); - gtSetStmtInfo(condTestStmt); - fgSetStmtSeq(condTestStmt); - } - - if (bStart2 != nullptr) - { - noway_assert(insertAfterBlk == bPrev); - noway_assert(insertAfterBlk->NextIs(block)); - } - } - - // If we are moving blocks that are at the end of a try or handler - // we will need to shorten ebdTryLast or ebdHndLast - // - ehUpdateLastBlocks(bEnd, bStartPrev); - - // If we are moving blocks into the end of a try region or handler region - // we will need to extend ebdTryLast or ebdHndLast so the blocks that we - // are moving are part of this try or handler region. - // - for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) - { - // Are we moving blocks to the end of a try region? - if (HBtab->ebdTryLast == insertAfterBlk) - { - if (fStartIsInTry[XTnum]) - { - // bStart..bEnd is in the try, so extend the try region - fgSetTryEnd(HBtab, bEnd); - } - } - - // Are we moving blocks to the end of a handler region? - if (HBtab->ebdHndLast == insertAfterBlk) - { - if (fStartIsInHnd[XTnum]) - { - // bStart..bEnd is in the handler, so extend the handler region - fgSetHndEnd(HBtab, bEnd); - } - } - } - - /* We have decided to insert the block(s) after 'insertAfterBlk' */ - fgMoveBlocksAfter(bStart, bEnd, insertAfterBlk); - - if (bDest) - { - /* We may need to insert an unconditional branch after bPrev to bDest */ - fgConnectFallThrough(bPrev, bDest); - } - else - { - /* If bPrev falls through, we must insert a jump to block */ - fgConnectFallThrough(bPrev, block); - } - - BasicBlock* bSkip = bEnd->Next(); - - /* If bEnd falls through, we must insert a jump to bNext */ - fgConnectFallThrough(bEnd, bNext); - - if (bStart2 == nullptr) - { - /* If insertAfterBlk falls through, we are forced to */ - /* add a jump around the block(s) we just inserted */ - fgConnectFallThrough(insertAfterBlk, bSkip); - } - else - { - /* We may need to insert an unconditional branch after bPrev2 to bStart */ - fgConnectFallThrough(bPrev2, bStart); - } - -#if DEBUG - if (verbose) - { - printf("\nAfter this change in fgReorderBlocks the BB graph is:"); - fgDispBasicBlocks(verboseTrees); - printf("\n"); - } - fgVerifyHandlerTab(); - - // Make sure that the predecessor lists are accurate - if (expensiveDebugCheckLevel >= 2) - { - fgDebugCheckBBlist(); - } -#endif // DEBUG - - // Set our iteration point 'block' to be the new bPrev->bbNext - // It will be used as the next bPrev - block = bPrev->Next(); - - } // end of for loop(bPrev,block) - - const bool changed = movedBlocks || newRarelyRun || optimizedSwitches || optimizedBranches; - - if (changed) - { -#if DEBUG - // Make sure that the predecessor lists are accurate - if (expensiveDebugCheckLevel >= 2) - { - fgDebugCheckBBlist(); - } -#endif // DEBUG - } - - return changed; -} -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - -//----------------------------------------------------------------------------- -// fgMoveHotJumps: Try to move jumps to fall into their successors, if the jump is sufficiently hot. -// -// Template parameters: -// hasEH - If true, method has EH regions, so check that we don't try to move blocks in different regions -// -template -void Compiler::fgMoveHotJumps() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In fgMoveHotJumps()\n"); - - printf("\nInitial BasicBlocks"); - fgDispBasicBlocks(verboseTrees); - printf("\n"); - } -#endif // DEBUG - - assert(m_dfsTree != nullptr); - BitVecTraits traits(m_dfsTree->PostOrderTraits()); - BitVec visitedBlocks = BitVecOps::MakeEmpty(&traits); - - // If we have a funclet region, don't bother reordering anything in it. - // - BasicBlock* next; - for (BasicBlock* block = fgFirstBB; block != fgFirstFuncletBB; block = next) - { - next = block->Next(); - if (!m_dfsTree->Contains(block)) - { - continue; - } - - BitVecOps::AddElemD(&traits, visitedBlocks, block->bbPostorderNum); - - // Don't bother trying to move cold blocks - // - if (block->isBBWeightCold(this)) - { - continue; - } - - FlowEdge* targetEdge; - FlowEdge* unlikelyEdge; - - if (block->KindIs(BBJ_ALWAYS)) - { - targetEdge = block->GetTargetEdge(); - unlikelyEdge = nullptr; - } - else if (block->KindIs(BBJ_COND)) - { - // Consider conditional block's most likely branch for moving - // - if (block->GetTrueEdge()->getLikelihood() > 0.5) - { - targetEdge = block->GetTrueEdge(); - unlikelyEdge = block->GetFalseEdge(); - } - else - { - targetEdge = block->GetFalseEdge(); - unlikelyEdge = block->GetTrueEdge(); - } - - // If we aren't sure which successor is hotter, and we already fall into one of them, - // do nothing - if ((unlikelyEdge->getLikelihood() == 0.5) && block->NextIs(unlikelyEdge->getDestinationBlock())) - { - continue; - } - } - else - { - // Don't consider other block kinds - // - continue; - } - - BasicBlock* target = targetEdge->getDestinationBlock(); - bool isBackwardJump = BitVecOps::IsMember(&traits, visitedBlocks, target->bbPostorderNum); - assert(m_dfsTree->Contains(target)); - - if (isBackwardJump) - { - // We don't want to change the first block, so if block is a backward jump to the first block, - // don't try moving block before it. - // - if (target->IsFirst()) - { - continue; - } - - if (block->KindIs(BBJ_COND)) - { - // This could be a loop exit, so don't bother moving this block up. - // Instead, try moving the unlikely target up to create fallthrough. - // - targetEdge = unlikelyEdge; - target = targetEdge->getDestinationBlock(); - isBackwardJump = BitVecOps::IsMember(&traits, visitedBlocks, target->bbPostorderNum); - assert(m_dfsTree->Contains(target)); - - if (isBackwardJump) - { - continue; - } - } - // Check for single-block loop case - // - else if (block == target) - { - continue; - } - } - - // Check if block already falls into target - // - if (block->NextIs(target)) - { - continue; - } - - if (target->isBBWeightCold(this)) - { - // If target is block's most-likely successor, and block is not rarely-run, - // perhaps the profile data is misleading, and we need to run profile repair? - // - continue; - } - - if (hasEH) - { - // Don't move blocks in different EH regions - // - if (!BasicBlock::sameEHRegion(block, target)) - { - continue; - } - - if (isBackwardJump) - { - // block and target are in the same try/handler regions, and target is behind block, - // so block cannot possibly be the start of the region. - // - assert(!bbIsTryBeg(block) && !bbIsHandlerBeg(block)); - - // Don't change the entry block of an EH region - // - if (bbIsTryBeg(target) || bbIsHandlerBeg(target)) - { - continue; - } - } - else - { - // block and target are in the same try/handler regions, and block is behind target, - // so target cannot possibly be the start of the region. - // - assert(!bbIsTryBeg(target) && !bbIsHandlerBeg(target)); - } - } - - // If moving block will break up existing fallthrough behavior into target, make sure it's worth it - // - FlowEdge* const fallthroughEdge = fgGetPredForBlock(target, target->Prev()); - if ((fallthroughEdge != nullptr) && (fallthroughEdge->getLikelyWeight() >= targetEdge->getLikelyWeight())) - { - continue; - } - - if (isBackwardJump) - { - // Move block to before target - // - fgUnlinkBlock(block); - fgInsertBBbefore(target, block); - } - else if (hasEH && target->isBBCallFinallyPair()) - { - // target is a call-finally pair, so move the pair up to block - // - fgUnlinkRange(target, target->Next()); - fgMoveBlocksAfter(target, target->Next(), block); - next = target->Next(); - } - else - { - // Move target up to block - // - fgUnlinkBlock(target); - fgInsertBBafter(block, target); - next = target; - } - } -} - -//----------------------------------------------------------------------------- -// fgDoReversePostOrderLayout: Reorder blocks using a greedy RPO traversal, -// taking care to keep loop bodies compact. -// -void Compiler::fgDoReversePostOrderLayout() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In fgDoReversePostOrderLayout()\n"); - - printf("\nInitial BasicBlocks"); - fgDispBasicBlocks(verboseTrees); - printf("\n"); - } -#endif // DEBUG - - // If LSRA didn't create any new blocks, we can reuse its flowgraph annotations. - // - if (m_dfsTree == nullptr) - { - m_dfsTree = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - } - else - { - assert(m_loops != nullptr); - } - - BasicBlock** const rpoSequence = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; - unsigned numBlocks = 0; - auto addToSequence = [rpoSequence, &numBlocks](BasicBlock* block) { - // Exclude handler regions from being reordered. - // - if (!block->hasHndIndex()) - { - rpoSequence[numBlocks++] = block; - } - }; - - fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence); - - // Reorder blocks. - // - for (unsigned i = 1; i < numBlocks; i++) - { - BasicBlock* block = rpoSequence[i - 1]; - BasicBlock* const blockToMove = rpoSequence[i]; - - if (block->NextIs(blockToMove)) - { - continue; - } - - // Only reorder blocks within the same try region. We don't want to make them non-contiguous. - // - if (!BasicBlock::sameTryRegion(block, blockToMove)) - { - continue; - } - - // Don't move call-finally pair tails independently. - // When we encounter the head, we will move the entire pair. - // - if (blockToMove->isBBCallFinallyPairTail()) - { - continue; - } - - // Don't break up call-finally pairs by inserting something in the middle. - // - if (block->isBBCallFinallyPair()) - { - block = block->Next(); - } - - if (blockToMove->isBBCallFinallyPair()) - { - BasicBlock* const callFinallyRet = blockToMove->Next(); - fgUnlinkRange(blockToMove, callFinallyRet); - fgMoveBlocksAfter(blockToMove, callFinallyRet, block); - } - else - { - fgUnlinkBlock(blockToMove); - fgInsertBBafter(block, blockToMove); - } - } - - if (compHndBBtabCount == 0) - { - fgMoveHotJumps(); - } - else - { - fgMoveHotJumps(); - } -} - -//----------------------------------------------------------------------------- -// fgMoveColdBlocks: Move rarely-run blocks to the end of their respective regions. -// -// Notes: -// Exception handlers are assumed to be cold, so we won't move blocks within them. -// On platforms that don't use funclets, we should use Compiler::fgRelocateEHRegions to move cold handlers. -// Note that Compiler::fgMoveColdBlocks will break up EH regions to facilitate intermediate transformations. -// To reestablish contiguity of EH regions, callers need to follow this with Compiler::fgRebuildEHRegions. -// -void Compiler::fgMoveColdBlocks() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In fgMoveColdBlocks()\n"); - - printf("\nInitial BasicBlocks"); - fgDispBasicBlocks(verboseTrees); - printf("\n"); - } -#endif // DEBUG - - auto moveBlock = [this](BasicBlock* block, BasicBlock* insertionPoint) { - assert(block != insertionPoint); - // Don't move handler blocks. - // Also, leave try entries behind as a breadcrumb for where to reinsert try blocks. - if (!bbIsTryBeg(block) && !block->hasHndIndex()) - { - if (block->isBBCallFinallyPair()) - { - BasicBlock* const callFinallyRet = block->Next(); - if (callFinallyRet != insertionPoint) - { - fgUnlinkRange(block, callFinallyRet); - fgMoveBlocksAfter(block, callFinallyRet, insertionPoint); - } - } - else - { - fgUnlinkBlock(block); - fgInsertBBafter(insertionPoint, block); - } - } - }; - - BasicBlock* lastMainBB = fgLastBBInMainFunction(); - if (lastMainBB->IsFirst()) - { - return; - } - - // Search the main method body for rarely-run blocks to move - // - for (BasicBlock *block = lastMainBB->Prev(), *prev; !block->IsFirst(); block = prev) - { - prev = block->Prev(); - - // We only want to move cold blocks. - // Also, don't move block if it is the end of a call-finally pair, - // as we want to keep these pairs contiguous - // (if we encounter the beginning of a pair, we'll move the whole pair). - // - if (!block->isBBWeightCold(this) || block->isBBCallFinallyPairTail()) - { - continue; - } - - moveBlock(block, lastMainBB); - } - - // We have moved all cold main blocks before lastMainBB to after lastMainBB. - // If lastMainBB itself is cold, move it to the end of the method to restore its relative ordering. - // But first, we can't move just the tail of a call-finally pair, - // so point lastMainBB to the pair's head, if necessary. - // - if (lastMainBB->isBBCallFinallyPairTail()) - { - lastMainBB = lastMainBB->Prev(); - } - - BasicBlock* lastHotBB = nullptr; - if (lastMainBB->isBBWeightCold(this)) - { - // lastMainBB is cold, so the block behind it (if there is one) is the last hot block - // - lastHotBB = lastMainBB->Prev(); - - // Move lastMainBB - // - BasicBlock* const newLastMainBB = fgLastBBInMainFunction(); - if (lastMainBB != newLastMainBB) - { - moveBlock(lastMainBB, newLastMainBB); - } - } - else - { - // lastMainBB isn't cold, so it (or its call-finally pair tail) the last hot block - // - lastHotBB = lastMainBB->isBBCallFinallyPair() ? lastMainBB->Next() : lastMainBB; - } - - // Save the beginning of the cold section for later. - // If lastHotBB is null, there isn't a hot section, - // so there's no point in differentiating between sections for layout purposes. - // - fgFirstColdBlock = (lastHotBB == nullptr) ? nullptr : lastHotBB->Next(); -} - -//----------------------------------------------------------------------------- -// Compiler::ThreeOptLayout::EdgeCmp: Comparator for the 'cutPoints' priority queue. -// If 'left' has a bigger edge weight than 'right', 3-opt will consider it first. -// Else, 3-opt will consider 'right' first. -// -// Parameters: -// left - One of the two edges to compare -// right - The other edge to compare -// -// Returns: -// True if 'right' should be considered before 'left', and false otherwise -// -/* static */ bool Compiler::ThreeOptLayout::EdgeCmp(const FlowEdge* left, const FlowEdge* right) -{ - assert(left != right); - const weight_t leftWeight = left->getLikelyWeight(); - const weight_t rightWeight = right->getLikelyWeight(); - - // Break ties by comparing the source blocks' bbIDs. - // If both edges are out of the same source block, use the target blocks' bbIDs. - if (leftWeight == rightWeight) - { - BasicBlock* const leftSrc = left->getSourceBlock(); - BasicBlock* const rightSrc = right->getSourceBlock(); - if (leftSrc == rightSrc) - { - return left->getDestinationBlock()->bbID < right->getDestinationBlock()->bbID; - } - - return leftSrc->bbID < rightSrc->bbID; - } - - return leftWeight < rightWeight; -} - -//----------------------------------------------------------------------------- -// Compiler::ThreeOptLayout::ThreeOptLayout: Constructs a ThreeOptLayout instance. -// -// Parameters: -// comp - The Compiler instance -// -Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp) - : compiler(comp) - , cutPoints(comp->getAllocator(CMK_FlowEdge), &ThreeOptLayout::EdgeCmp) - , blockOrder(nullptr) - , tempOrder(nullptr) - , numCandidateBlocks(0) -{ -} - -#ifdef DEBUG -//----------------------------------------------------------------------------- -// Compiler::ThreeOptLayout::GetLayoutCost: Computes the cost of the layout for the region -// bounded by 'startPos' and 'endPos'. -// -// Parameters: -// startPos - The starting index of the region -// endPos - The inclusive ending index of the region -// -// Returns: -// The region's layout cost -// -weight_t Compiler::ThreeOptLayout::GetLayoutCost(unsigned startPos, unsigned endPos) -{ - assert(startPos <= endPos); - assert(endPos < numCandidateBlocks); - weight_t layoutCost = BB_ZERO_WEIGHT; - - for (unsigned position = startPos; position < endPos; position++) - { - layoutCost += GetCost(blockOrder[position], blockOrder[position + 1]); - } - - layoutCost += blockOrder[endPos]->bbWeight; - return layoutCost; -} -#endif // DEBUG - -//----------------------------------------------------------------------------- -// Compiler::ThreeOptLayout::GetCost: Computes the cost of placing 'next' after 'block'. -// Layout cost is modeled as the sum of block weights, minus the weights of edges that fall through. -// -// Parameters: -// block - The block to consider creating fallthrough from -// next - The block to consider creating fallthrough into -// -// Returns: -// The cost -// -weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next) -{ - assert(block != nullptr); - assert(next != nullptr); - - const weight_t maxCost = block->bbWeight; - const FlowEdge* fallthroughEdge = compiler->fgGetPredForBlock(next, block); - - if (fallthroughEdge != nullptr) - { - // The edge's weight should never exceed its source block's weight, - // but handle negative results from rounding errors in getLikelyWeight(), just in case - return max(0.0, maxCost - fallthroughEdge->getLikelyWeight()); - } - - return maxCost; -} + return maxCost; +} //----------------------------------------------------------------------------- // Compiler::ThreeOptLayout::GetPartitionCostDelta: Computes the current cost of the given partitions, // and the cost of swapping S2 and S3, returning the difference between them. // // Parameters: -// s1Start - The starting position of the first partition // s2Start - The starting position of the second partition // s3Start - The starting position of the third partition // s3End - The ending position (inclusive) of the third partition @@ -4980,8 +3367,11 @@ weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next) // The difference in cost between the current and proposed layouts. // A negative delta indicates the proposed layout is an improvement. // -weight_t Compiler::ThreeOptLayout::GetPartitionCostDelta( - unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End) +template +weight_t Compiler::ThreeOptLayout::GetPartitionCostDelta(unsigned s2Start, + unsigned s3Start, + unsigned s3End, + unsigned s4End) { BasicBlock* const s2Block = blockOrder[s2Start]; BasicBlock* const s2BlockPrev = blockOrder[s2Start - 1]; @@ -5036,7 +3426,8 @@ weight_t Compiler::ThreeOptLayout::GetPartitionCostDelta( // // If 's3End' and 's4End' are the same, the fourth partition doesn't exist. // -void Compiler::ThreeOptLayout::SwapPartitions( +template +void Compiler::ThreeOptLayout::SwapPartitions( unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End) { INDEBUG(const weight_t currLayoutCost = GetLayoutCost(s1Start, s4End)); @@ -5060,6 +3451,12 @@ void Compiler::ThreeOptLayout::SwapPartitions( std::swap(blockOrder, tempOrder); + // Update the ordinals for the blocks we moved + for (unsigned i = s2Start; i <= s4End; i++) + { + blockOrder[i]->bbPreorderNum = i; + } + #ifdef DEBUG // Don't bother checking if the cost improved for exceptionally costly layouts. // Imprecision from summing large floating-point values can falsely trigger the below assert. @@ -5083,65 +3480,59 @@ void Compiler::ThreeOptLayout::SwapPartitions( // Parameters: // edge - The branch to consider creating fallthrough for // -void Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge) +// Template parameters: +// addToQueue - If true, adds valid edges to the 'cutPoints' queue +// +// Returns: +// True if 'edge' can be considered for aligning, false otherwise +// +template +template +bool Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge) { assert(edge != nullptr); - // Don't add an edge that we've already considered - // (For exceptionally branchy methods, we want to avoid exploding 'cutPoints' in size) - if (edge->visited()) + // Don't add an edge that we've already considered. + // For exceptionally branchy methods, we want to avoid exploding 'cutPoints' in size. + if (addToQueue && edge->visited()) { - return; + return false; } BasicBlock* const srcBlk = edge->getSourceBlock(); BasicBlock* const dstBlk = edge->getDestinationBlock(); - // Ignore cross-region branches - if (!BasicBlock::sameTryRegion(srcBlk, dstBlk)) - { - return; - } - - // For backward jumps, we will consider partitioning before 'srcBlk'. - // If 'srcBlk' is a BBJ_CALLFINALLYRET, this partition will split up a call-finally pair. - // Thus, don't consider edges out of BBJ_CALLFINALLYRET blocks. - if (srcBlk->KindIs(BBJ_CALLFINALLYRET)) + // Don't consider edges to or from outside the hot range. + if (!IsCandidateBlock(srcBlk) || !IsCandidateBlock(dstBlk)) { - return; + return false; } - const unsigned srcPos = srcBlk->bbPostorderNum; - const unsigned dstPos = dstBlk->bbPostorderNum; - assert(srcPos < compiler->m_dfsTree->GetPostOrderCount()); - assert(dstPos < compiler->m_dfsTree->GetPostOrderCount()); - - // Don't consider edges to or from outside the hot range (i.e. ordinal doesn't match 'blockOrder' position). - if ((srcPos >= numCandidateBlocks) || (srcBlk != blockOrder[srcPos])) + // Don't consider single-block loop backedges. + if (srcBlk == dstBlk) { - return; + return false; } - if ((dstPos >= numCandidateBlocks) || (dstBlk != blockOrder[dstPos])) + // Don't move the method entry block. + if (dstBlk->IsFirst()) { - return; + return false; } - // Don't consider edges to blocks outside the hot range (i.e. ordinal number isn't set), - // or backedges to the first block in a region; we don't want to change the entry point. - if ((dstPos == 0) || compiler->bbIsTryBeg(dstBlk)) + // Ignore cross-region branches, and don't try to change the region's entry block. + if (hasEH && (!BasicBlock::sameTryRegion(srcBlk, dstBlk) || compiler->bbIsTryBeg(dstBlk))) { - return; + return false; } - // Don't consider backedges for single-block loops - if (srcPos == dstPos) + if (addToQueue) { - return; + edge->markVisited(); + cutPoints.Push(edge); } - edge->markVisited(); - cutPoints.Push(edge); + return true; } //----------------------------------------------------------------------------- @@ -5151,7 +3542,8 @@ void Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge) // Parameters: // blockPos - The index into 'blockOrder' of the source block // -void Compiler::ThreeOptLayout::AddNonFallthroughSuccs(unsigned blockPos) +template +void Compiler::ThreeOptLayout::AddNonFallthroughSuccs(unsigned blockPos) { assert(blockPos < numCandidateBlocks); BasicBlock* const block = blockOrder[blockPos]; @@ -5173,7 +3565,8 @@ void Compiler::ThreeOptLayout::AddNonFallthroughSuccs(unsigned blockPos) // Parameters: // blockPos - The index into 'blockOrder' of the target block // -void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos) +template +void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos) { assert(blockPos < numCandidateBlocks); BasicBlock* const block = blockOrder[blockPos]; @@ -5192,82 +3585,15 @@ void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos) // Compiler::ThreeOptLayout::Run: Runs 3-opt on the candidate span of hot blocks. // We skip reordering handler regions for now, as these are assumed to be cold. // -void Compiler::ThreeOptLayout::Run() +// Returns: +// True if any blocks were moved +// +template +bool Compiler::ThreeOptLayout::Run() { - // Since we moved all cold blocks to the end of the method already, - // we should have a span of hot blocks to consider reordering at the beginning of the method - // (unless none of the blocks are cold relative to the rest of the method, - // in which case we will reorder the whole main method body). - BasicBlock* const finalBlock = (compiler->fgFirstColdBlock != nullptr) ? compiler->fgFirstColdBlock->Prev() - : compiler->fgLastBBInMainFunction(); - - // Reset cold section pointer, in case we decide to do hot/cold splitting later - compiler->fgFirstColdBlock = nullptr; - - // We better have an end block for the hot section, and it better not be the start of a call-finally pair. - assert(finalBlock != nullptr); - assert(!finalBlock->isBBCallFinallyPair()); - - // Get an upper bound on the number of hot blocks without walking the whole block list. - // We will only consider blocks reachable via normal flow. - const unsigned numBlocksUpperBound = compiler->m_dfsTree->GetPostOrderCount(); - assert(numBlocksUpperBound != 0); - blockOrder = new (compiler, CMK_BasicBlock) BasicBlock*[numBlocksUpperBound * 2]; - tempOrder = (blockOrder + numBlocksUpperBound); - - // Initialize the current block order - for (BasicBlock* const block : compiler->Blocks(compiler->fgFirstBB, finalBlock)) - { - // Exclude unreachable blocks and handler blocks from being reordered - if (!compiler->m_dfsTree->Contains(block) || block->hasHndIndex()) - { - continue; - } - - assert(numCandidateBlocks < numBlocksUpperBound); - blockOrder[numCandidateBlocks] = block; - - // Repurpose 'bbPostorderNum' for the block's ordinal - block->bbPostorderNum = numCandidateBlocks++; - } - - // For methods with fewer than three candidate blocks, we cannot partition anything - if (numCandidateBlocks < 3) - { - JITDUMP("Not enough blocks to partition anything. Skipping reordering.\n"); - return; - } - - const bool modified = RunThreeOpt(); - - if (modified) - { - for (unsigned i = 1; i < numCandidateBlocks; i++) - { - BasicBlock* const block = blockOrder[i - 1]; - BasicBlock* const next = blockOrder[i]; - - if (block->NextIs(next)) - { - continue; - } - - // Only reorder within try regions to maintain contiguity. - if (!BasicBlock::sameTryRegion(block, next)) - { - continue; - } - - // Don't move the entry of a try region. - if (compiler->bbIsTryBeg(next)) - { - continue; - } - - compiler->fgUnlinkBlock(next); - compiler->fgInsertBBafter(block, next); - } - } + assert(numCandidateBlocks > 0); + RunThreeOpt(); + return ReorderBlockList(); } //----------------------------------------------------------------------------- @@ -5288,7 +3614,8 @@ void Compiler::ThreeOptLayout::Run() // and try to create fallthrough on each edge via partition swaps, starting with the hottest edges. // For each swap, repopulate the priority queue with edges along the modified cut points. // -bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned endPos) +template +bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned endPos) { assert(cutPoints.Empty()); assert(startPos < endPos); @@ -5314,8 +3641,8 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned BasicBlock* const srcBlk = candidateEdge->getSourceBlock(); BasicBlock* const dstBlk = candidateEdge->getDestinationBlock(); - const unsigned srcPos = srcBlk->bbPostorderNum; - const unsigned dstPos = dstBlk->bbPostorderNum; + const unsigned srcPos = srcBlk->bbPreorderNum; + const unsigned dstPos = dstBlk->bbPreorderNum; // This edge better be between blocks in the current region assert((srcPos >= startPos) && (srcPos <= endPos)); @@ -5364,7 +3691,7 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned s2Start = srcPos + 1; s3Start = dstPos; s3End = endPos; - costChange = GetPartitionCostDelta(startPos, s2Start, s3Start, s3End, endPos); + costChange = GetPartitionCostDelta(s2Start, s3Start, s3End, endPos); } else { @@ -5405,96 +3732,402 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned BasicBlock* const s3Block = blockOrder[position]; BasicBlock* const s3BlockPrev = blockOrder[position - 1]; - // Don't consider any cut points that would break up call-finally pairs - if (s3Block->KindIs(BBJ_CALLFINALLYRET)) - { - continue; - } + // Don't consider any cut points that would break up call-finally pairs + if (hasEH && s3Block->KindIs(BBJ_CALLFINALLYRET)) + { + continue; + } + + // Compute the cost delta of this partition + const weight_t currCost = currCostBase + GetCost(s3BlockPrev, s3Block); + const weight_t newCost = + newCostBase + GetCost(s2BlockPrev, s3Block) + + ((s3End < endPos) ? GetCost(s3BlockPrev, blockOrder[s3End + 1]) : s3BlockPrev->bbWeight); + const weight_t delta = newCost - currCost; + + if (delta < costChange) + { + costChange = delta; + s3Start = position; + } + } + } + + // Continue evaluating partitions if this one isn't profitable + if ((costChange >= BB_ZERO_WEIGHT) || Compiler::fgProfileWeightsEqual(costChange, BB_ZERO_WEIGHT, 0.001)) + { + continue; + } + + JITDUMP("Swapping partitions [" FMT_BB ", " FMT_BB "] and [" FMT_BB ", " FMT_BB "] (cost change = %f)\n", + blockOrder[s2Start]->bbNum, blockOrder[s3Start - 1]->bbNum, blockOrder[s3Start]->bbNum, + blockOrder[s3End]->bbNum, costChange); + + SwapPartitions(startPos, s2Start, s3Start, s3End, endPos); + + // Ensure this move created fallthrough from 'srcBlk' to 'dstBlk' + assert((srcBlk->bbPreorderNum + 1) == dstBlk->bbPreorderNum); + + // At every cut point is an opportunity to consider more candidate edges. + // To the left of each cut point, consider successor edges that don't fall through. + // Ditto predecessor edges to the right of each cut point. + AddNonFallthroughSuccs(s2Start - 1); + AddNonFallthroughPreds(s2Start); + AddNonFallthroughSuccs(s3Start - 1); + AddNonFallthroughPreds(s3Start); + AddNonFallthroughSuccs(s3End); + + if (s3End < endPos) + { + AddNonFallthroughPreds(s3End + 1); + } + + modified = true; + numSwaps++; + } + + cutPoints.Clear(); + return modified; +} + +//----------------------------------------------------------------------------- +// Compiler::ThreeOptLayout::RunThreeOpt: Runs 3-opt on the candidate span of blocks. +// +template +void Compiler::ThreeOptLayout::RunThreeOpt() +{ + // For methods with fewer than three candidate blocks, we cannot partition anything + if (numCandidateBlocks < 3) + { + JITDUMP("Not enough blocks to partition anything. Skipping reordering.\n"); + return; + } + + CompactHotJumps(); + + const unsigned startPos = 0; + const unsigned endPos = numCandidateBlocks - 1; + + JITDUMP("Initial layout cost: %f\n", GetLayoutCost(startPos, endPos)); + const bool modified = RunGreedyThreeOptPass(startPos, endPos); + + if (modified) + { + JITDUMP("Final layout cost: %f\n", GetLayoutCost(startPos, endPos)); + } + else + { + JITDUMP("No changes made.\n"); + } +} + +//----------------------------------------------------------------------------- +// Compiler::ThreeOptLayout::ReorderBlockList: Reorders blocks within their regions +// using the order 3-opt came up with. +// If the method has try regions, this will also move them to try to create fallthrough into their entries. +// +// Returns: +// True if any blocks were moved +// +template +bool Compiler::ThreeOptLayout::ReorderBlockList() +{ + // As we reorder blocks, remember the last candidate block we found in each region. + // In case we cannot place two blocks next to each other because they are in different regions, + // we will instead place the latter block after the last one we saw in its region. + // This ensures cold blocks sink to the end of their respective regions. + // This will also push nested regions further down the method, but we will move them later, anyway. + BasicBlock** lastHotBlocks = nullptr; + + if (hasEH) + { + lastHotBlocks = new (compiler, CMK_BasicBlock) BasicBlock* [compiler->compHndBBtabCount + 1] {}; + lastHotBlocks[0] = compiler->fgFirstBB; + + for (EHblkDsc* const HBtab : EHClauses(compiler)) + { + lastHotBlocks[HBtab->ebdTryBeg->bbTryIndex] = HBtab->ebdTryBeg; + } + } + + // Reorder the block list. + JITDUMP("Reordering block list\n"); + bool modified = false; + for (unsigned i = 1; i < numCandidateBlocks; i++) + { + BasicBlock* const block = blockOrder[i - 1]; + BasicBlock* const blockToMove = blockOrder[i]; + + if (!hasEH) + { + if (!block->NextIs(blockToMove)) + { + compiler->fgUnlinkBlock(blockToMove); + compiler->fgInsertBBafter(block, blockToMove); + modified = true; + } + + continue; + } + + lastHotBlocks[block->bbTryIndex] = block; - // Compute the cost delta of this partition - const weight_t currCost = currCostBase + GetCost(s3BlockPrev, s3Block); - const weight_t newCost = - newCostBase + GetCost(s2BlockPrev, s3Block) + - ((s3End < endPos) ? GetCost(s3BlockPrev, blockOrder[s3End + 1]) : s3BlockPrev->bbWeight); - const weight_t delta = newCost - currCost; + // Don't move call-finally pair tails independently. + // When we encounter the head, we will move the entire pair. + if (blockToMove->isBBCallFinallyPairTail()) + { + continue; + } - if (delta < costChange) - { - costChange = delta; - s3Start = position; - } - } + // Only reorder blocks within the same try region. We don't want to make them non-contiguous. + if (compiler->bbIsTryBeg(blockToMove)) + { + continue; } - // Continue evaluating partitions if this one isn't profitable - if ((costChange >= BB_ZERO_WEIGHT) || Compiler::fgProfileWeightsEqual(costChange, BB_ZERO_WEIGHT, 0.001)) + // If these blocks aren't in the same try region, use the last block seen in the same region as 'blockToMove' + // for the insertion point. + // This will push the region containing 'block' down the method, but we will fix this after. + BasicBlock* insertionPoint = + BasicBlock::sameTryRegion(block, blockToMove) ? block : lastHotBlocks[blockToMove->bbTryIndex]; + + // Don't break up call-finally pairs by inserting something in the middle. + if (insertionPoint->isBBCallFinallyPair()) + { + insertionPoint = insertionPoint->Next(); + assert(blockToMove != insertionPoint); + } + + if (insertionPoint->NextIs(blockToMove)) { continue; } - JITDUMP("Swapping partitions [" FMT_BB ", " FMT_BB "] and [" FMT_BB ", " FMT_BB "] (cost change = %f)\n", - blockOrder[s2Start]->bbNum, blockOrder[s3Start - 1]->bbNum, blockOrder[s3Start]->bbNum, - blockOrder[s3End]->bbNum, costChange); + // Move call-finallies together. + if (blockToMove->isBBCallFinallyPair()) + { + BasicBlock* const callFinallyRet = blockToMove->Next(); + if (callFinallyRet != insertionPoint) + { + compiler->fgUnlinkRange(blockToMove, callFinallyRet); + compiler->fgMoveBlocksAfter(blockToMove, callFinallyRet, insertionPoint); + modified = true; + } + } + else + { + compiler->fgUnlinkBlock(blockToMove); + compiler->fgInsertBBafter(insertionPoint, blockToMove); + modified = true; + } + } - SwapPartitions(startPos, s2Start, s3Start, s3End, endPos); + if (!hasEH) + { + return modified; + } + + // If we reordered within any try regions, make sure the EH table is up-to-date. + if (modified) + { + compiler->fgFindTryRegionEnds(); + } + + JITDUMP("Moving try regions\n"); - // Update the ordinals for the blocks we moved - for (unsigned i = s2Start; i <= endPos; i++) + // We only ordered blocks within regions above. + // Now, move entire try regions up to their ideal predecessors, if possible. + for (EHblkDsc* const HBtab : EHClauses(compiler)) + { + // If this try region isn't in the candidate span of blocks, don't consider it. + // Also, if this try region's entry is also the method entry, don't move it. + BasicBlock* const tryBeg = HBtab->ebdTryBeg; + if (!IsCandidateBlock(tryBeg) || tryBeg->IsFirst()) { - blockOrder[i]->bbPostorderNum = i; + continue; } - // Ensure this move created fallthrough from 'srcBlk' to 'dstBlk' - assert((srcBlk->bbPostorderNum + 1) == dstBlk->bbPostorderNum); + // We will try using 3-opt's chosen predecessor for the try region. + BasicBlock* insertionPoint = blockOrder[tryBeg->bbPreorderNum - 1]; + const unsigned parentIndex = + insertionPoint->hasTryIndex() ? insertionPoint->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; - // At every cut point is an opportunity to consider more candidate edges. - // To the left of each cut point, consider successor edges that don't fall through. - // Ditto predecessor edges to the right of each cut point. - AddNonFallthroughSuccs(s2Start - 1); - AddNonFallthroughPreds(s2Start); - AddNonFallthroughSuccs(s3Start - 1); - AddNonFallthroughPreds(s3Start); - AddNonFallthroughSuccs(s3End); + // Can we move this try to after 'insertionPoint' without breaking EH nesting invariants? + if (parentIndex != HBtab->ebdEnclosingTryIndex) + { + // We cannot. + continue; + } - if (s3End < endPos) + // Don't break up call-finally pairs. + if (insertionPoint->isBBCallFinallyPair()) { - AddNonFallthroughPreds(s3End + 1); + insertionPoint = insertionPoint->Next(); } + // Nothing to do if we already fall through. + if (insertionPoint->NextIs(tryBeg)) + { + continue; + } + + BasicBlock* const tryLast = HBtab->ebdTryLast; + compiler->fgUnlinkRange(tryBeg, tryLast); + compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); modified = true; - numSwaps++; + + // If we moved this region within another region, recompute the try region end blocks. + if (parentIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + compiler->fgFindTryRegionEnds(); + } } - cutPoints.Clear(); return modified; } //----------------------------------------------------------------------------- -// Compiler::ThreeOptLayout::RunThreeOpt: Runs 3-opt on the candidate span of blocks. -// -// Returns: -// True if we reordered anything, false otherwise +// Compiler::ThreeOptLayout::CompactHotJumps: Move blocks in the candidate span +// closer to their most-likely successors. // -bool Compiler::ThreeOptLayout::RunThreeOpt() +template +void Compiler::ThreeOptLayout::CompactHotJumps() { - // We better have enough blocks to create partitions - assert(numCandidateBlocks > 2); - const unsigned startPos = 0; - const unsigned endPos = numCandidateBlocks - 1; + JITDUMP("Compacting hot jumps\n"); - JITDUMP("Initial layout cost: %f\n", GetLayoutCost(startPos, endPos)); - const bool modified = RunGreedyThreeOptPass(startPos, endPos); + auto isBackwardJump = [&](BasicBlock* block, BasicBlock* target) { + assert(IsCandidateBlock(block)); + assert(IsCandidateBlock(target)); + return block->bbPreorderNum >= target->bbPreorderNum; + }; - if (modified) - { - JITDUMP("Final layout cost: %f\n", GetLayoutCost(startPos, endPos)); - } - else + for (unsigned i = 0; i < numCandidateBlocks; i++) { - JITDUMP("No changes made.\n"); - } + BasicBlock* const block = blockOrder[i]; + FlowEdge* edge; + FlowEdge* unlikelyEdge; - return modified; + if (block->KindIs(BBJ_ALWAYS)) + { + edge = block->GetTargetEdge(); + unlikelyEdge = nullptr; + } + else if (block->KindIs(BBJ_COND)) + { + // Consider conditional block's most likely branch for moving. + if (block->GetTrueEdge()->getLikelihood() > 0.5) + { + edge = block->GetTrueEdge(); + unlikelyEdge = block->GetFalseEdge(); + } + else + { + edge = block->GetFalseEdge(); + unlikelyEdge = block->GetTrueEdge(); + } + + // If we aren't sure which successor is hotter, and we already fall into one of them, + // do nothing. + BasicBlock* const unlikelyTarget = unlikelyEdge->getDestinationBlock(); + if ((unlikelyEdge->getLikelihood() == 0.5) && IsCandidateBlock(unlikelyTarget) && + (unlikelyTarget->bbPreorderNum == (i + 1))) + { + continue; + } + } + else + { + // Don't consider other block kinds. + continue; + } + + // Ensure we won't break any ordering invariants by creating fallthrough on this edge. + if (!ConsiderEdge(edge)) + { + continue; + } + + if (block->KindIs(BBJ_COND) && isBackwardJump(block, edge->getDestinationBlock())) + { + // This could be a loop exit, so don't bother moving this block up. + // Instead, try moving the unlikely target up to create fallthrough. + if (!ConsiderEdge(unlikelyEdge) || + isBackwardJump(block, unlikelyEdge->getDestinationBlock())) + { + continue; + } + + edge = unlikelyEdge; + } + + BasicBlock* const target = edge->getDestinationBlock(); + const unsigned srcPos = i; + const unsigned dstPos = target->bbPreorderNum; + + // We don't need to do anything if this edge already falls through. + if ((srcPos + 1) == dstPos) + { + continue; + } + + // If this move will break up existing fallthrough into 'target', make sure it's worth it. + assert(dstPos != 0); + FlowEdge* const fallthroughEdge = compiler->fgGetPredForBlock(target, blockOrder[dstPos - 1]); + if ((fallthroughEdge != nullptr) && (fallthroughEdge->getLikelyWeight() >= edge->getLikelyWeight())) + { + continue; + } + + JITDUMP("Creating fallthrough along " FMT_BB " -> " FMT_BB "\n", block->bbNum, target->bbNum); + + const bool isForwardJump = !isBackwardJump(block, target); + if (isForwardJump) + { + // Before swap: | ..srcBlk | ... | dstBlk | ... | + // After swap: | ..srcBlk | dstBlk | ... | + + // First, shift all blocks between 'block' and 'target' rightward to make space for the latter. + // If 'target' is a call-finally pair, include space for the pair's tail. + const unsigned offset = target->isBBCallFinallyPair() ? 2 : 1; + for (unsigned pos = dstPos - 1; pos != srcPos; pos--) + { + BasicBlock* const blockToMove = blockOrder[pos]; + blockOrder[pos + offset] = blockOrder[pos]; + blockToMove->bbPreorderNum += offset; + } + + // Now, insert 'target' in the space after 'block'. + blockOrder[srcPos + 1] = target; + target->bbPreorderNum = srcPos + 1; + + // Move call-finally pairs in tandem. + if (target->isBBCallFinallyPair()) + { + blockOrder[srcPos + 2] = target->Next(); + target->Next()->bbPreorderNum = srcPos + 2; + } + } + else + { + // Before swap: | ... | dstBlk.. | srcBlk | ... | + // After swap: | ... | srcBlk | dstBlk.. | ... | + + // First, shift everything between 'target' and 'block' (including 'target') over + // to make space for 'block'. + for (unsigned pos = srcPos - 1; pos >= dstPos; pos--) + { + BasicBlock* const blockToMove = blockOrder[pos]; + blockOrder[pos + 1] = blockOrder[pos]; + blockToMove->bbPreorderNum++; + } + + // Now, insert 'block' before 'target'. + blockOrder[dstPos] = block; + block->bbPreorderNum = dstPos; + } + + assert((block->bbPreorderNum + 1) == target->bbPreorderNum); + } } //----------------------------------------------------------------------------- @@ -5504,7 +4137,10 @@ bool Compiler::ThreeOptLayout::RunThreeOpt() // - Evaluate cost of swapped layout: S1 - S3 - S2 // - If the cost improves, keep this layout // -void Compiler::fgSearchImprovedLayout() +// Returns: +// Suitable phase status +// +PhaseStatus Compiler::fgSearchImprovedLayout() { #ifdef DEBUG if (verbose) @@ -5517,8 +4153,70 @@ void Compiler::fgSearchImprovedLayout() } #endif // DEBUG - ThreeOptLayout layoutRunner(this); - layoutRunner.Run(); + // Before running 3-opt, compute a loop-aware RPO (if not already available) to get a sensible starting layout. + if (m_dfsTree == nullptr) + { + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + } + else + { + assert(m_loops != nullptr); + } + + BasicBlock** const initialLayout = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; + + // When walking the RPO-based layout, compact the hot blocks, and remember the end of the hot section. + // We don't want to waste time running 3-opt on cold blocks, or on handler sections. + unsigned numHotBlocks = 0; + auto addToSequence = [this, initialLayout, &numHotBlocks](BasicBlock* block) { + // The first block really shouldn't be cold, but if it is, ensure it's still placed first. + if (!block->hasHndIndex() && (!block->isBBWeightCold(this) || block->IsFirst())) + { + // Set the block's ordinal. + block->bbPreorderNum = numHotBlocks; + initialLayout[numHotBlocks++] = block; + } + }; + + // Stress 3-opt by giving it the post-order traversal as its initial layout. + if (compStressCompile(STRESS_THREE_OPT_LAYOUT, 10)) + { + for (unsigned i = 0; i < m_dfsTree->GetPostOrderCount(); i++) + { + addToSequence(m_dfsTree->GetPostOrder(i)); + } + + // Keep the method entry block at the beginning. + // Update the swapped blocks' ordinals, too. + std::swap(initialLayout[0], initialLayout[numHotBlocks - 1]); + std::swap(initialLayout[0]->bbPreorderNum, initialLayout[numHotBlocks - 1]->bbPreorderNum); + } + else + { + fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence); + } + + bool modified = false; + if (numHotBlocks == 0) + { + JITDUMP("No hot blocks found. Skipping reordering.\n"); + } + else if (compHndBBtabCount == 0) + { + ThreeOptLayout layoutRunner(this, initialLayout, numHotBlocks); + modified = layoutRunner.Run(); + } + else + { + ThreeOptLayout layoutRunner(this, initialLayout, numHotBlocks); + modified = layoutRunner.Run(); + } + + // 3-opt will mess with post-order numbers regardless of whether it modifies anything, + // so we always need to invalidate the flowgraph annotations after. + fgInvalidateDfsTree(); + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //------------------------------------------------------------- @@ -5537,6 +4235,64 @@ PhaseStatus Compiler::fgUpdateFlowGraphPhase() return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } +//------------------------------------------------------------- +// fgDedupReturnComparison: Expands BBJ_RETURN into BBJ_COND with two +// BBJ_RETURN blocks ("return true" and "return false"). Such transformation +// helps other phases to focus only on BBJ_COND (normalization). +// +// Arguments: +// block - the BBJ_RETURN block to convert into BBJ_COND +// +// Returns: +// true if the block was converted into BBJ_COND +// +bool Compiler::fgDedupReturnComparison(BasicBlock* block) +{ +#ifdef JIT32_GCENCODER + // JIT32_GCENCODER has a hard limit on the number of epilogues, let's not add more. + return false; +#endif + + assert(block->KindIs(BBJ_RETURN)); + + // We're only interested in boolean returns + if ((info.compRetType != TYP_UBYTE) || (block == genReturnBB) || (block->lastStmt() == nullptr)) + { + return false; + } + + GenTree* rootNode = block->lastStmt()->GetRootNode(); + if (!rootNode->OperIs(GT_RETURN) || !rootNode->gtGetOp1()->OperIsCmpCompare()) + { + return false; + } + + GenTree* cmp = rootNode->gtGetOp1(); + cmp->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + rootNode->ChangeOper(GT_JTRUE); + rootNode->ChangeType(TYP_VOID); + + GenTree* retTrue = gtNewOperNode(GT_RETURN, TYP_INT, gtNewTrue()); + GenTree* retFalse = gtNewOperNode(GT_RETURN, TYP_INT, gtNewFalse()); + + // Create RETURN 1/0 blocks. We expect fgHeadTailMerge to handle them if there are similar returns. + DebugInfo dbgInfo = block->lastStmt()->GetDebugInfo(); + BasicBlock* retTrueBb = fgNewBBFromTreeAfter(BBJ_RETURN, block, retTrue, dbgInfo); + BasicBlock* retFalseBb = fgNewBBFromTreeAfter(BBJ_RETURN, block, retFalse, dbgInfo); + + FlowEdge* trueEdge = fgAddRefPred(retTrueBb, block); + FlowEdge* falseEdge = fgAddRefPred(retFalseBb, block); + block->SetCond(trueEdge, falseEdge); + + // We might want to instrument 'return ' too in the future. For now apply 50%/50%. + trueEdge->setLikelihood(0.5); + falseEdge->setLikelihood(0.5); + retTrueBb->inheritWeightPercentage(block, 50); + retFalseBb->inheritWeightPercentage(block, 50); + + return true; +} + //------------------------------------------------------------- // fgUpdateFlowGraph: Removes any empty blocks, unreachable blocks, and redundant jumps. // Most of those appear after dead store removal and folding of conditionals. @@ -5632,6 +4388,15 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh bDest = nullptr; bFalseDest = nullptr; + // Expand BBJ_RETURN into BBJ_COND when doTailDuplication is enabled + if (doTailDuplication && block->KindIs(BBJ_RETURN) && fgDedupReturnComparison(block)) + { + assert(block->KindIs(BBJ_COND)); + change = true; + modified = true; + bNext = block->Next(); + } + if (block->KindIs(BBJ_ALWAYS)) { bDest = block->GetTarget(); @@ -5814,7 +4579,7 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh GenTree* test = block->lastNode(); noway_assert(test->OperIsConditionalJump()); - if (test->OperGet() == GT_JTRUE) + if (test->OperIs(GT_JTRUE)) { GenTree* cond = gtReverseCond(test->AsOp()->gtOp1); assert(cond == test->AsOp()->gtOp1); // Ensure `gtReverseCond` did not create a new node. @@ -6855,37 +5620,34 @@ bool Compiler::fgHeadMerge(BasicBlock* block, bool early) // bool Compiler::gtTreeContainsTailCall(GenTree* tree) { - struct HasTailCallCandidateVisitor : GenTreeVisitor - { - enum - { - DoPreOrder = true - }; - - HasTailCallCandidateVisitor(Compiler* comp) - : GenTreeVisitor(comp) - { - } + auto isTailCall = [](GenTree* tree) { + return tree->IsCall() && (tree->AsCall()->CanTailCall() || tree->AsCall()->IsTailCall()); + }; - fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) - { - GenTree* node = *use; - if ((node->gtFlags & GTF_CALL) == 0) - { - return WALK_SKIP_SUBTREES; - } + return gtFindNodeInTree(tree, isTailCall) != nullptr; +} - if (node->IsCall() && (node->AsCall()->CanTailCall() || node->AsCall()->IsTailCall())) - { - return WALK_ABORT; - } +//------------------------------------------------------------------------ +// gtTreeContainsAsyncCall: Check if a tree contains any async call. +// +// Parameters: +// tree - The tree to check +// +// Returns: +// True if any node in the tree is an async call, false otherwise. +// +bool Compiler::gtTreeContainsAsyncCall(GenTree* tree) +{ + if (!compIsAsync()) + { + return false; + } - return WALK_CONTINUE; - } + auto isAsyncCall = [](GenTree* tree) { + return tree->IsCall() && tree->AsCall()->IsAsync(); }; - HasTailCallCandidateVisitor visitor(this); - return visitor.WalkTree(&tree, nullptr) == WALK_ABORT; + return gtFindNodeInTree(tree, isAsyncCall) != nullptr; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 152e8bb5fd1a..9dd8ad2c0272 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -89,6 +89,11 @@ bool Compiler::fgHaveSufficientProfileWeights() case ICorJitInfo::PgoSource::Blend: return true; + case ICorJitInfo::PgoSource::Synthesis: + // Single-edge methods always have sufficient profile data. + // Assuming we don't synthesize value and class profile data (which we don't currently). + return fgPgoSingleEdge; + case ICorJitInfo::PgoSource::Static: { // We sometimes call this very early, eg evaluating the prejit root. @@ -134,6 +139,12 @@ bool Compiler::fgHaveTrustedProfileWeights() case ICorJitInfo::PgoSource::Blend: case ICorJitInfo::PgoSource::Text: return true; + + case ICorJitInfo::PgoSource::Synthesis: + // Single-edge methods with synthetic profile are trustful. + // Assuming we don't synthesize value and class profile data (which we don't currently). + return fgPgoSingleEdge; + default: return false; } @@ -2504,10 +2515,10 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod() // We enable edge profiling by default, except when: // // * disabled by option - // * we are prejitting + // * we are AOT compiling // const bool edgesEnabled = (JitConfig.JitEdgeProfiling() > 0); - const bool prejit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT); + const bool prejit = IsAot(); const bool useEdgeProfiles = edgesEnabled && !prejit; const bool minimalProfiling = prejit ? (JitConfig.JitMinimalPrejitProfiling() > 0) : (JitConfig.JitMinimalJitProfiling() > 0); @@ -2666,14 +2677,14 @@ PhaseStatus Compiler::fgInstrumentMethod() // Optionally, when jitting, if there were no class probes, no value probes and only one count probe, // suppress instrumentation. // - // We leave instrumentation in place when prejitting as the sample hits in the method - // may be used to determine if the method should be prejitted or not. + // We leave instrumentation in place for AOT as the sample hits in the method + // may be used to determine if the method should be AOT or not. // // For jitting, no information is conveyed by the count in a single=block method. // bool minimalProbeMode = false; - if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (IsAot()) { minimalProbeMode = (JitConfig.JitMinimalPrejitProfiling() > 0); } @@ -4787,20 +4798,12 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block, ProfileChecks weight_t incomingLikelyWeight = 0; unsigned missingLikelyWeight = 0; bool foundPreds = false; - bool foundEHPreds = false; for (FlowEdge* const predEdge : block->PredEdges()) { if (predEdge->hasLikelihood()) { - if (BasicBlock::sameHndRegion(block, predEdge->getSourceBlock())) - { - incomingLikelyWeight += predEdge->getLikelyWeight(); - } - else - { - foundEHPreds = true; - } + incomingLikelyWeight += predEdge->getLikelyWeight(); } else { @@ -4812,29 +4815,11 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block, ProfileChecks foundPreds = true; } - // We almost certainly won't get the likelihoods on a BBJ_EHFINALLYRET right, - // so special-case BBJ_CALLFINALLYRET incoming flow. - // - if (block->isBBCallFinallyPairTail()) - { - incomingLikelyWeight = block->Prev()->bbWeight; - foundEHPreds = false; - } - - // We almost certainly won't get the likelihoods on a BBJ_EHFINALLYRET right, - // so special-case BBJ_CALLFINALLYRET incoming flow. - // - if (block->isBBCallFinallyPairTail()) - { - incomingLikelyWeight = block->Prev()->bbWeight; - foundEHPreds = false; - } - bool likelyWeightsValid = true; // If we have EH preds we may not have consistent incoming flow. // - if (foundPreds && !foundEHPreds) + if (foundPreds) { if (verifyLikelyWeights) { @@ -4890,7 +4875,7 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks // const unsigned numSuccs = block->NumSucc(this); - if ((numSuccs > 0) && !block->KindIs(BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHFILTERRET)) + if ((numSuccs > 0) && !block->KindIs(BBJ_EHFAULTRET, BBJ_EHFILTERRET)) { weight_t const blockWeight = block->bbWeight; weight_t outgoingLikelihood = 0; @@ -4977,9 +4962,38 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks #endif // DEBUG +//------------------------------------------------------------------------ +// fgRepairProfile: If we have PGO data and the profile is inconsistent, +// run synthesis to re-establish consistency. +// +// Returns: +// PhaseStatus indicating if profile synthesis ran or not. +// +PhaseStatus Compiler::fgRepairProfile() +{ + if (fgIsUsingProfileWeights()) + { + if (fgPgoConsistent) + { + JITDUMP("Profile is already consistent.\n"); + } + else + { + ProfileSynthesis::Run(this, ProfileSynthesisOption::RetainLikelihoods); + return PhaseStatus::MODIFIED_EVERYTHING; + } + } + else + { + JITDUMP("No PGO data. Skipping profile repair.\n"); + } + + return PhaseStatus::MODIFIED_NOTHING; +} + //------------------------------------------------------------------------ // fgRepairProfileCondToUncond: attempt to repair profile after modifying -// a conditinal branch to an unconditional branch. +// a conditional branch to an unconditional branch. // // Arguments: // block - block that was just altered diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index 1647925e270f..63fd3fd489d2 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -30,11 +30,6 @@ // void ProfileSynthesis::Run(ProfileSynthesisOption option) { - m_dfsTree = m_comp->fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - m_improperLoopHeaders = m_loops->ImproperLoopHeaders(); - m_entryBlock = m_comp->opts.IsOSR() ? m_comp->fgEntryBB : m_comp->fgFirstBB; - // Retain or compute edge likelihood information // switch (option) @@ -104,7 +99,7 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option) // belief that the profile should be somewhat flatter. // unsigned retries = 0; - while (m_approximate && (retries < maxRepairRetries)) + while ((option != ProfileSynthesisOption::RetainLikelihoods) && m_approximate && (retries < maxRepairRetries)) { JITDUMP("\n\n[%d] Retrying reconstruction with blend factor " FMT_WT ", because %s\n", retries, m_blendFactor, m_cappedCyclicProbabilities ? "capped cyclic probabilities" : "solver failed to converge"); @@ -153,12 +148,26 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option) m_comp->fgPgoSynthesized = true; m_comp->fgPgoConsistent = !m_approximate; + // A simple check whether the current method has more than one edge. + m_comp->fgPgoSingleEdge = true; + for (BasicBlock* const block : m_comp->Blocks()) + { + if (block->NumSucc() > 1) + { + m_comp->fgPgoSingleEdge = false; + break; + } + } + m_comp->Metrics.ProfileSynthesizedBlendedOrRepaired++; if (m_approximate) { JITDUMP("Profile is inconsistent. Bypassing post-phase consistency checks.\n"); - m_comp->Metrics.ProfileInconsistentInitially++; + if (!m_comp->fgImportDone) + { + m_comp->Metrics.ProfileInconsistentInitially++; + } } // Derive the method's call count from the entry block's weight @@ -198,7 +207,7 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option) // Leave a note so we can bypass the post-phase check, and // instead assert at the end of fgImport, if we make it that far. // - if (!isConsistent) + if (!isConsistent && !m_comp->fgImportDone) { m_comp->fgPgoDeferredInconsistency = true; JITDUMP("Will defer asserting until after importation\n"); @@ -735,13 +744,6 @@ void ProfileSynthesis::RandomizeLikelihoods() // void ProfileSynthesis::ComputeCyclicProbabilities() { - m_cyclicProbabilities = nullptr; - if (m_loops->NumLoops() == 0) - { - return; - } - - m_cyclicProbabilities = new (m_comp, CMK_Pgo) weight_t[m_loops->NumLoops()]; // Walk loops in post order to visit inner loops before outer loops. for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) { @@ -818,10 +820,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop) for (FlowEdge* const edge : nestedLoop->EntryEdges()) { - if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) - { - newWeight += edge->getLikelyWeight(); - } + newWeight += edge->getLikelyWeight(); } newWeight *= m_cyclicProbabilities[nestedLoop->GetIndex()]; @@ -837,10 +836,10 @@ void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop) { BasicBlock* const sourceBlock = edge->getSourceBlock(); - // Ignore flow across EH, or from preds not in the loop. - // Latter can happen if there are unreachable blocks that flow into the loop. + // Ignore flow from preds not in the loop. + // This can happen if there are unreachable blocks that flow into the loop. // - if (BasicBlock::sameHndRegion(block, sourceBlock) && loop->ContainsBlock(sourceBlock)) + if (loop->ContainsBlock(sourceBlock)) { newWeight += edge->getLikelyWeight(); } @@ -1198,13 +1197,14 @@ void ProfileSynthesis::GaussSeidelSolver() weight_t relResidual = 0; weight_t oldRelResidual = 0; weight_t eigenvalue = 0; - weight_t const stopRelResidual = 0.002; + weight_t const stopRelResidual = 0.001; BasicBlock* residualBlock = nullptr; BasicBlock* relResidualBlock = nullptr; const FlowGraphDfsTree* const dfs = m_loops->GetDfsTree(); unsigned const blockCount = dfs->GetPostOrderCount(); bool checkEntryExitWeight = true; - bool showDetails = false; + bool const showDetails = false; + bool const callFinalliesCreated = m_comp->fgImportDone; JITDUMP("Synthesis solver: flow graph has %u improper loop headers\n", m_improperLoopHeaders); @@ -1280,9 +1280,10 @@ void ProfileSynthesis::GaussSeidelSolver() { newWeight = block->bbWeight; - // Finallies also add in the weight of their try. + // If we haven't added flow edges into/out of finallies yet, + // add in the weight of their corresponding try regions. // - if (ehDsc->HasFinallyHandler()) + if (!callFinalliesCreated && ehDsc->HasFinallyHandler()) { newWeight += countVector[ehDsc->ebdTryBeg->bbNum]; } @@ -1308,11 +1309,7 @@ void ProfileSynthesis::GaussSeidelSolver() for (FlowEdge* const edge : loop->EntryEdges()) { BasicBlock* const predBlock = edge->getSourceBlock(); - - if (BasicBlock::sameHndRegion(block, predBlock)) - { - newWeight += edge->getLikelihood() * countVector[predBlock->bbNum]; - } + newWeight += edge->getLikelihood() * countVector[predBlock->bbNum]; } // Scale by cyclic probability @@ -1344,10 +1341,7 @@ void ProfileSynthesis::GaussSeidelSolver() continue; } - if (BasicBlock::sameHndRegion(block, predBlock)) - { - newWeight += edge->getLikelihood() * countVector[predBlock->bbNum]; - } + newWeight += edge->getLikelihood() * countVector[predBlock->bbNum]; } if (selfEdge != nullptr) @@ -1401,12 +1395,14 @@ void ProfileSynthesis::GaussSeidelSolver() countVector[block->bbNum] = newWeight; // Remember max absolute and relative change - // (note rel residual will be infinite at times, that's ok) + // (note rel residual will be as large as 1e9 at times, that's ok) // // Note we are using a "point" bound here ("infinity norm") rather than say // computing the L2-norm of the entire residual vector. // - weight_t const blockRelResidual = change / oldWeight; + // Avoid dividing by zero if oldWeight is very small. + // + weight_t const blockRelResidual = change / max(oldWeight, 1e-12); if ((relResidualBlock == nullptr) || (blockRelResidual > relResidual)) { @@ -1427,10 +1423,12 @@ void ProfileSynthesis::GaussSeidelSolver() } } - // If there were no improper headers, we will have converged in one pass. + // If there were no improper headers, we will have converged in one pass // (profile may still be inconsistent, if there were capped cyclic probabilities). + // After the importer runs, we require that synthesis achieves profile consistency + // unless the resultant profile is approximate, so don't skip the below checks. // - if (m_improperLoopHeaders == 0) + if ((m_improperLoopHeaders == 0) && !m_comp->fgImportDone) { converged = true; break; diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h index f27466835be5..b1425167c752 100644 --- a/src/coreclr/jit/fgprofilesynthesis.h +++ b/src/coreclr/jit/fgprofilesynthesis.h @@ -42,7 +42,29 @@ class ProfileSynthesis private: ProfileSynthesis(Compiler* compiler) : m_comp(compiler) + , m_dfsTree(compiler->m_dfsTree) + , m_loops(compiler->m_loops) + // Profile synthesis can be run before or after morph, so tolerate (non-)canonical method entries + , m_entryBlock((compiler->opts.IsOSR() && (compiler->fgEntryBB != nullptr)) ? compiler->fgEntryBB + : compiler->fgFirstBB) { + // If the Compiler object didn't give us flowgraph annotations to use, re-compute them + if (m_dfsTree == nullptr) + { + m_dfsTree = compiler->fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + } + else + { + assert(m_loops != nullptr); + } + + m_improperLoopHeaders = m_loops->ImproperLoopHeaders(); + + if (m_loops->NumLoops() > 0) + { + m_cyclicProbabilities = new (compiler, CMK_Pgo) weight_t[m_loops->NumLoops()]; + } } static constexpr weight_t exceptionWeight = 0.00001; diff --git a/src/coreclr/jit/fgstmt.cpp b/src/coreclr/jit/fgstmt.cpp index 85809339965f..f5ab387e2624 100644 --- a/src/coreclr/jit/fgstmt.cpp +++ b/src/coreclr/jit/fgstmt.cpp @@ -539,6 +539,7 @@ inline bool OperIsControlFlow(genTreeOps oper) case GT_RETURN: case GT_RETFILT: case GT_SWIFT_ERROR_RET: + case GT_RETURN_SUSPEND: #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: #endif // FEATURE_EH_WINDOWS_X86 diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index bb836a6f61ac..97d6285b5461 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -37,7 +37,7 @@ static bool blockNeedsGCPoll(BasicBlock* block) { for (GenTree* const tree : stmt->TreeList()) { - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { GenTreeCall* call = tree->AsCall(); if (call->IsUnmanaged()) @@ -52,6 +52,10 @@ static bool blockNeedsGCPoll(BasicBlock* block) blockMayNeedGCPoll = true; } } + else if (tree->OperIs(GT_GCPOLL)) + { + blockMayNeedGCPoll = true; + } } } } @@ -277,7 +281,7 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block) // We need to keep a few flags... // - noway_assert((originalFlags & (BBF_SPLIT_NONEXIST & ~(BBF_LOOP_HEAD | BBF_RETLESS_CALL))) == 0); + noway_assert((originalFlags & (BBF_SPLIT_NONEXIST & ~BBF_RETLESS_CALL)) == 0); top->SetFlagsRaw(originalFlags & (~(BBF_SPLIT_LOST | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT)); bottom->SetFlags(originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_RETLESS_CALL)); bottom->inheritWeight(top); @@ -594,6 +598,8 @@ PhaseStatus Compiler::fgImport() INDEBUG(fgPgoDeferredInconsistency = false); } + fgImportDone = true; + return PhaseStatus::MODIFIED_EVERYTHING; } @@ -665,7 +671,7 @@ bool Compiler::fgIsCommaThrow(GenTree* tree, bool forFolding /* = false */) } /* Check for cast of a GT_COMMA with a throw overflow */ - if ((tree->gtOper == GT_COMMA) && (tree->gtFlags & GTF_CALL) && (tree->gtFlags & GTF_EXCEPT)) + if (tree->OperIs(GT_COMMA) && (tree->gtFlags & GTF_CALL) && (tree->gtFlags & GTF_EXCEPT)) { return (fgIsThrow(tree->AsOp()->gtOp1)); } @@ -823,7 +829,7 @@ void Compiler::fgSetPreferredInitCctor() GenTreeCall* Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls) { #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { CORINFO_RESOLVED_TOKEN resolvedToken; memset(&resolvedToken, 0, sizeof(resolvedToken)); @@ -867,6 +873,9 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr) case GT_ARR_ADDR: return (addr->gtFlags & GTF_ARR_ADDR_NONNULL) == 0; + case GT_BOX: + return !addr->IsBoxedValue(); + case GT_LCL_VAR: return !lvaIsImplicitByRefLocal(addr->AsLclVar()->GetLclNum()); @@ -877,7 +886,7 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr) return !addr->IsHelperCall() || !s_helperCallProperties.NonNullReturn(addr->AsCall()->GetHelperNum()); case GT_ADD: - if (addr->AsOp()->gtOp1->gtOper == GT_CNS_INT) + if (addr->AsOp()->gtOp1->OperIs(GT_CNS_INT)) { GenTree* cns1Tree = addr->AsOp()->gtOp1; if (!cns1Tree->IsIconHandle()) @@ -891,7 +900,7 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr) else // Op1 was a handle represented as a constant { // Is Op2 also a constant? - if (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT) + if (addr->AsOp()->gtOp2->OperIs(GT_CNS_INT)) { GenTree* cns2Tree = addr->AsOp()->gtOp2; // Is this an addition of a handle and constant @@ -909,7 +918,7 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr) else { // Op1 is not a constant. What about Op2? - if (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT) + if (addr->AsOp()->gtOp2->OperIs(GT_CNS_INT)) { GenTree* cns2Tree = addr->AsOp()->gtOp2; // Is this an addition of a small constant @@ -994,7 +1003,7 @@ GenTree* Compiler::fgOptimizeDelegateConstructor(GenTreeCall* call, assert(call->gtArgs.CountArgs() == 3); assert(!call->gtArgs.AreArgsComplete()); GenTree* targetMethod = call->gtArgs.GetArgByIndex(2)->GetNode(); - noway_assert(targetMethod->TypeGet() == TYP_I_IMPL); + noway_assert(targetMethod->TypeIs(TYP_I_IMPL)); genTreeOps oper = targetMethod->OperGet(); CORINFO_METHOD_HANDLE targetMethodHnd = nullptr; GenTree* qmarkNode = nullptr; @@ -1009,14 +1018,14 @@ GenTree* Compiler::fgOptimizeDelegateConstructor(GenTreeCall* call, assert(targetMethod->AsCall()->gtArgs.CountArgs() == 3); GenTree* handleNode = targetMethod->AsCall()->gtArgs.GetArgByIndex(2)->GetNode(); - if (handleNode->OperGet() == GT_CNS_INT) + if (handleNode->OperIs(GT_CNS_INT)) { // it's a ldvirtftn case, fetch the methodhandle off the helper for ldvirtftn. It's the 3rd arg targetMethodHnd = CORINFO_METHOD_HANDLE(handleNode->AsIntCon()->gtCompileTimeHandle); } // Sometimes the argument to this is the result of a generic dictionary lookup, which shows // up as a GT_QMARK. - else if (handleNode->OperGet() == GT_QMARK) + else if (handleNode->OperIs(GT_QMARK)) { qmarkNode = handleNode; } @@ -1029,7 +1038,7 @@ GenTree* Compiler::fgOptimizeDelegateConstructor(GenTreeCall* call, } if (qmarkNode) { - noway_assert(qmarkNode->OperGet() == GT_QMARK); + noway_assert(qmarkNode->OperIs(GT_QMARK)); // The argument is actually a generic dictionary lookup. For delegate creation it looks // like: // GT_QMARK @@ -1041,13 +1050,13 @@ GenTree* Compiler::fgOptimizeDelegateConstructor(GenTreeCall* call, // // In this case I can find the token (which is a method handle) and that is the compile time // handle. - noway_assert(qmarkNode->AsOp()->gtOp2->OperGet() == GT_COLON); - noway_assert(qmarkNode->AsOp()->gtOp2->AsOp()->gtOp1->OperGet() == GT_CALL); + noway_assert(qmarkNode->AsOp()->gtOp2->OperIs(GT_COLON)); + noway_assert(qmarkNode->AsOp()->gtOp2->AsOp()->gtOp1->OperIs(GT_CALL)); GenTreeCall* runtimeLookupCall = qmarkNode->AsOp()->gtOp2->AsOp()->gtOp1->AsCall(); // This could be any of CORINFO_HELP_RUNTIMEHANDLE_(METHOD|CLASS)(_LOG?) GenTree* tokenNode = runtimeLookupCall->gtArgs.GetArgByIndex(1)->GetNode(); - noway_assert(tokenNode->OperGet() == GT_CNS_INT); + noway_assert(tokenNode->OperIs(GT_CNS_INT)); targetMethodHnd = CORINFO_METHOD_HANDLE(tokenNode->AsIntCon()->gtCompileTimeHandle); } @@ -1070,7 +1079,7 @@ GenTree* Compiler::fgOptimizeDelegateConstructor(GenTreeCall* call, } #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { if (IsTargetAbi(CORINFO_NATIVEAOT_ABI)) { @@ -1279,10 +1288,9 @@ GenTree* Compiler::fgGetCritSectOfStaticMethod() if (!kind.needsRuntimeLookup) { CORINFO_OBJECT_HANDLE ptr = info.compCompHnd->getRuntimeTypePointer(info.compClassHnd); - if (ptr != NULL) + if (ptr != NO_OBJECT_HANDLE) { - setMethodHasFrozenObjects(); - tree = gtNewIconEmbHndNode((void*)ptr, nullptr, GTF_ICON_OBJ_HDL, nullptr); + tree = gtNewIconEmbObjHndNode(ptr); } else { @@ -1364,13 +1372,13 @@ GenTree* Compiler::fgGetCritSectOfStaticMethod() * { * unsigned byte acquired = 0; * try { - * JIT_MonEnterWorker(, &acquired); + * Monitor.Enter(, &acquired); * * *** all the preexisting user code goes here *** * - * JIT_MonExitWorker(, &acquired); + * Monitor.ExitIfTaken(, &acquired); * } fault { - * JIT_MonExitWorker(, &acquired); + * Monitor.ExitIfTaken(, &acquired); * } * L_return: * ret @@ -1462,6 +1470,7 @@ void Compiler::fgAddSyncMethodEnterExit() // Initialize the new entry + newEntry->ebdID = impInlineRoot()->compEHID++; newEntry->ebdHandlerType = EH_HANDLER_FAULT; newEntry->ebdTryBeg = tryBegBB; @@ -1962,13 +1971,12 @@ class MergedReturns // True if any returns were impacted. // // Notes: - // The goal is to set things up favorably for a reasonable layout without - // putting too much burden on fgReorderBlocks; in particular, since that - // method doesn't (currently) shuffle non-profile, non-rare code to create - // fall-through and reduce gotos, this method places each const return - // block immediately after its last predecessor, so that the flow from - // there to it can become fallthrough without requiring any motion to be - // performed by fgReorderBlocks. + // Prematurely optimizing the block layout is unnecessary. + // However, 'ReturnCountHardLimit' is small enough such that + // any throughput savings from skipping this pass are negated + // by the need to emit branches to these blocks in MinOpts. + // If we decide to increase the number of epilogues allowed, + // we should consider removing this pass. // bool PlaceReturns() { @@ -2449,7 +2457,18 @@ PhaseStatus Compiler::fgAddInternal() } else { - merger.SetMaxReturns(MergedReturns::ReturnCountHardLimit); + unsigned limit = MergedReturns::ReturnCountHardLimit; +#ifdef JIT32_GCENCODER + // For the jit32 GC encoder the limit is an actual hard limit. In + // async functions we will be introducing another return during + // the async transformation, so make sure there's a free epilog + // for it. + if (compIsAsync()) + { + limit--; + } +#endif + merger.SetMaxReturns(limit); } } @@ -2811,6 +2830,58 @@ bool Compiler::fgSimpleLowerCastOfSmpOp(LIR::Range& range, GenTreeCast* cast) return false; } +//------------------------------------------------------------------------ +// fgSimpleLowerBswap16 : Optimization to remove CAST nodes from operands of small ops that depents on +// lower bits only (currently only BSWAP16). +// Example: +// BSWAP16(CAST(x)) transforms to BSWAP16(x) +// +// Returns: +// True or false, representing changes were made. +// +// Notes: +// This optimization could be done in morph, but it cannot because there are correctness +// problems with NOLs (normalized-on-load locals) and how they are handled in VN. +// Simple put, you cannot remove a CAST from CAST(LCL_VAR{nol}) in HIR. +// +// Because the optimization happens during rationalization, turning into LIR, it is safe to remove the CAST. +// +bool Compiler::fgSimpleLowerBswap16(LIR::Range& range, GenTree* op) +{ + assert(op->OperIs(GT_BSWAP16)); + + if (opts.OptimizationDisabled()) + return false; + + // When openrand is a integral cast + // When both source and target sizes are at least the operation size + bool madeChanges = false; + + if (op->gtGetOp1()->OperIs(GT_CAST)) + { + GenTreeCast* op1 = op->gtGetOp1()->AsCast(); + + if (!op1->gtOverflow() && (genTypeSize(op1->CastToType()) >= 2) && + genActualType(op1->CastFromType()) == TYP_INT) + { + // This cast does not affect the lower 16 bits. It can be removed. + op->AsOp()->gtOp1 = op1->CastOp(); + range.Remove(op1); + madeChanges = true; + } + } + +#ifdef DEBUG + if (madeChanges) + { + JITDUMP("Lower - Downcast of Small Op %s:\n", GenTree::OpName(op->OperGet())); + DISPTREE(op); + } +#endif // DEBUG + + return madeChanges; +} + //------------------------------------------------------------------------------ // fgGetDomSpeculatively: Try determine a more accurate dominator than cached bbIDom // @@ -3041,18 +3112,6 @@ PhaseStatus Compiler::fgCreateFunclets() assert(UsesFunclets()); assert(!fgFuncletsCreated); - // Allocate the PSPSym, if needed. PSPSym is not used by the NativeAOT ABI - if (!IsTargetAbi(CORINFO_NATIVEAOT_ABI)) - { - if (ehNeedsPSPSym()) - { - lvaPSPSym = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym")); - LclVarDsc* lclPSPSym = lvaGetDesc(lvaPSPSym); - lclPSPSym->lvType = TYP_I_IMPL; - lvaSetVarDoNotEnregister(lvaPSPSym DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr)); - } - } - fgCreateFuncletPrologBlocks(); unsigned XTnum; @@ -3862,6 +3921,8 @@ bool Compiler::AddCodeDsc::UpdateKeyDesignator(Compiler* compiler) // check changes from being enclosed in a try to being // enclosed in a finally. // + // Filter ACDs should always remain in filter regions. + // const bool inHnd = acdHndIndex > 0; const bool inTry = acdTryIndex > 0; @@ -3871,26 +3932,35 @@ bool Compiler::AddCodeDsc::UpdateKeyDesignator(Compiler* compiler) { // Non-funclet case // + assert(acdKeyDsg != AcdKeyDesignator::KD_FLT); newDsg = inTry ? AcdKeyDesignator::KD_TRY : AcdKeyDesignator::KD_NONE; } else if (!inTry && !inHnd) { // Moved outside of all EH regions. // + assert(acdKeyDsg != AcdKeyDesignator::KD_FLT); newDsg = AcdKeyDesignator::KD_NONE; } else if (inTry && (!inHnd || (acdTryIndex < acdHndIndex))) { // Moved into a parent try region. // + assert(acdKeyDsg != AcdKeyDesignator::KD_FLT); newDsg = AcdKeyDesignator::KD_TRY; } else { - // Moved into a parent handler region. - // Note this cannot be a filter region. + // Moved into a parent or renumbered handler or filter region. // - newDsg = AcdKeyDesignator::KD_HND; + if (acdKeyDsg == AcdKeyDesignator::KD_FLT) + { + newDsg = AcdKeyDesignator::KD_FLT; + } + else + { + newDsg = AcdKeyDesignator::KD_HND; + } } bool result = (newDsg != acdKeyDsg); @@ -4029,7 +4099,7 @@ PhaseStatus Compiler::fgSetBlockOrder() BasicBlock::s_nMaxTrees = 0; #endif - if (compCanEncodePtrArgCntMax() && fgHasCycleWithoutGCSafePoint()) + if (fgHasCycleWithoutGCSafePoint()) { JITDUMP("Marking method as fully interruptible\n"); SetInterruptible(true); @@ -4819,7 +4889,7 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfsTr BitVecTraits loopTraits = loop->LoopBlockTraits(); loop->m_blocks = BitVecOps::MakeEmpty(&loopTraits); - if (!FindNaturalLoopBlocks(loop, worklist)) + if (!FindNaturalLoopBlocks(loop, worklist) || !IsLoopCanonicalizable(loop)) { loops->m_improperLoopHeaders++; @@ -5017,6 +5087,40 @@ bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, Ar return true; } +//------------------------------------------------------------------------ +// FlowGraphNaturalLoops::IsLoopCanonicalizable: +// Check if a loop will be able to be canonicalized if we record it. +// +// Parameters: +// loop - Loop structure (partially filled by caller) +// +// Returns: +// True if the loop header can be canonicalized: +// - Can have a preheader created +// - Exits can be made unique from the loop +// +bool FlowGraphNaturalLoops::IsLoopCanonicalizable(FlowGraphNaturalLoop* loop) +{ + Compiler* comp = loop->GetDfsTree()->GetCompiler(); + // The only (known) problematic case is when a backedge is a callfinally edge. + if (!comp->bbIsHandlerBeg(loop->GetHeader())) + { + return true; + } + + for (FlowEdge* backedge : loop->BackEdges()) + { + if (backedge->getSourceBlock()->KindIs(BBJ_CALLFINALLY)) + { + // It would not be possible to create a preheader for this loop + // since this backedge could not be redirected. + return false; + } + } + + return true; +} + #ifdef DEBUG //------------------------------------------------------------------------ @@ -5635,7 +5739,7 @@ bool FlowGraphNaturalLoop::MatchLimit(unsigned iterVar, GenTree* test, NaturalLo return false; } - if (iterOp->gtType != TYP_INT) + if (!iterOp->TypeIs(TYP_INT)) { return false; } @@ -6145,14 +6249,30 @@ bool FlowGraphNaturalLoop::CanDuplicateWithEH(INDEBUG(const char** reason)) // Check if this is an "outermost" try within the loop. // If so, we have more checking to do later on. // - const bool headerInTry = header->hasTryIndex(); - unsigned blockIndex = block->getTryIndex(); - unsigned outermostBlockIndex = comp->ehTrueEnclosingTryIndexIL(blockIndex); + bool const headerIsInTry = header->hasTryIndex(); + unsigned const blockTryIndex = block->getTryIndex(); + unsigned const enclosingTryIndex = comp->ehTrueEnclosingTryIndex(blockTryIndex); - if ((headerInTry && (outermostBlockIndex == header->getTryIndex())) || - (!headerInTry && (outermostBlockIndex == EHblkDsc::NO_ENCLOSING_INDEX))) + if ((headerIsInTry && (enclosingTryIndex == header->getTryIndex())) || + (!headerIsInTry && (enclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX))) { - tryRegionsToClone.Push(block); + // When we clone a try we also clone its handler. + // + // This try may be enclosed in a handler whose try begin is in the loop. + // If so we'll clone this try when we clone (the handler of) that try. + // + bool isInHandlerOfInLoopTry = false; + if (block->hasHndIndex()) + { + unsigned const enclosingHndIndex = block->getHndIndex(); + BasicBlock* const associatedTryBeg = comp->ehGetDsc(enclosingHndIndex)->ebdTryBeg; + isInHandlerOfInLoopTry = this->ContainsBlock(associatedTryBeg); + } + + if (!isInHandlerOfInLoopTry) + { + tryRegionsToClone.Push(block); + } } } @@ -6670,7 +6790,7 @@ unsigned NaturalLoopIterInfo::VarLimit() assert(HasInvariantLocalLimit); GenTree* limit = Limit(); - assert(limit->OperGet() == GT_LCL_VAR); + assert(limit->OperIs(GT_LCL_VAR)); return limit->AsLclVarCommon()->GetLclNum(); } diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 9d57fa3a4a6d..8bfa7af4f245 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -498,19 +498,19 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) // GenTree* fwdSubNode = defNode->AsLclVarCommon()->Data(); - // Can't substitute GT_CATCH_ARG. - // Can't substitute GT_LCLHEAP. + // Can't substitute GT_CATCH_ARG, GT_LCLHEAP or GT_ASYNC_CONTINUATION. // - // Don't substitute a no return call (trips up morph in some cases). - if (fwdSubNode->OperIs(GT_CATCH_ARG, GT_LCLHEAP)) + if (fwdSubNode->OperIs(GT_CATCH_ARG, GT_LCLHEAP, GT_ASYNC_CONTINUATION)) { - JITDUMP(" tree to sub is catch arg, or lcl heap\n"); + JITDUMP(" tree to sub is %s\n", GenTree::OpName(fwdSubNode->OperGet())); return false; } - if (fwdSubNode->IsCall() && fwdSubNode->AsCall()->IsNoReturn()) + // Do not substitute async calls; if the target node has a temp BYREF node, + // that creates illegal IR. + if (gtTreeContainsAsyncCall(fwdSubNode)) { - JITDUMP(" tree to sub is a 'no return' call\n"); + JITDUMP(" tree has an async call\n"); return false; } @@ -784,8 +784,8 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) unsigned const dstLclNum = parentNode->AsLclVar()->GetLclNum(); LclVarDsc* const dstVarDsc = lvaGetDesc(dstLclNum); - JITDUMP(" [marking V%02u as multi-reg-ret]", dstLclNum); - dstVarDsc->lvIsMultiRegRet = true; + JITDUMP(" [marking V%02u as multi-reg-dest]", dstLclNum); + dstVarDsc->SetIsMultiRegDest(); } // If a method returns a multi-reg type, only forward sub locals, @@ -801,7 +801,7 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) fsv.GetParentNode()->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) { #if defined(TARGET_X86) - if (fwdSubNode->TypeGet() == TYP_LONG) + if (fwdSubNode->TypeIs(TYP_LONG)) { JITDUMP(" TYP_LONG fwd sub node, target is x86\n"); return false; @@ -835,6 +835,7 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) LclVarDsc* const fwdVarDsc = lvaGetDesc(fwdLclNum); JITDUMP(" [marking V%02u as multi-reg-ret]", fwdLclNum); + // TODO-Quirk: Only needed for heuristics fwdVarDsc->lvIsMultiRegRet = true; fwdSubNodeLocal->gtFlags |= GTF_DONT_CSE; } diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 12917dbc1b9b..042948955f75 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -17,6 +17,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma hdrstop #endif +#include #include "gcinfotypes.h" #include "patchpointinfo.h" @@ -374,8 +375,8 @@ void GCInfo::gcDumpVarPtrDsc(varPtrDsc* desc) // find . -name regen.txt | xargs cat | grep CallSite | sort | uniq -c | sort -r | head -80 #if REGEN_SHORTCUTS || REGEN_CALLPAT -static FILE* logFile = NULL; -CRITICAL_SECTION logFileLock; +static FILE* logFile = NULL; +minipal_mutex logFileLock; #endif #if REGEN_CALLPAT @@ -398,12 +399,12 @@ static void regenLog(unsigned codeDelta, if (logFile == NULL) { logFile = fopen_utf8("regen.txt", "a"); - InitializeCriticalSection(&logFileLock); + minipal_mutex_init(&logFileLock); } assert(((enSize > 0) && (enSize < 256)) && ((pat.val & 0xffffff) != 0xffffff)); - EnterCriticalSection(&logFileLock); + minipal_mutex_enter(&logFileLock); fprintf(logFile, "CallSite( 0x%08x, 0x%02x%02x, 0x", pat.val, byrefArgMask, byrefRegMask); @@ -415,7 +416,7 @@ static void regenLog(unsigned codeDelta, fprintf(logFile, "),\n"); fflush(logFile); - LeaveCriticalSection(&logFileLock); + minipal_mutex_leave(&logFileLock); } #endif @@ -425,10 +426,10 @@ static void regenLog(unsigned encoding, InfoHdr* header, InfoHdr* state) if (logFile == NULL) { logFile = fopen_utf8("regen.txt", "a"); - InitializeCriticalSection(&logFileLock); + minipal_mutex_init(&logFileLock); } - EnterCriticalSection(&logFileLock); + minipal_mutex_enter(&logFileLock); fprintf(logFile, "InfoHdr( %2d, %2d, %1d, %1d, %1d," @@ -451,7 +452,7 @@ static void regenLog(unsigned encoding, InfoHdr* header, InfoHdr* state) fflush(logFile); - LeaveCriticalSection(&logFileLock); + minipal_mutex_leave(&logFileLock); } #endif @@ -886,7 +887,7 @@ BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state, BYTE& code state->returnKind = header.returnKind; codeSet = 2; // Two byte encoding encoding = header.returnKind; - _ASSERTE(encoding < SET_RET_KIND_MAX); + _ASSERTE(encoding <= SET_RET_KIND_MAX); goto DO_RETURN; } @@ -950,6 +951,27 @@ BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state, BYTE& code } } + if (state->noGCRegionCnt != header.noGCRegionCnt) + { + assert(state->noGCRegionCnt <= SET_NOGCREGIONS_MAX || state->noGCRegionCnt == HAS_NOGCREGIONS); + + // We have two-byte encodings for 0..4 + if (header.noGCRegionCnt <= SET_NOGCREGIONS_MAX) + { + state->noGCRegionCnt = header.noGCRegionCnt; + codeSet = 2; + encoding = (BYTE)(SET_NOGCREGIONS_CNT + header.noGCRegionCnt); + goto DO_RETURN; + } + else if (state->noGCRegionCnt != HAS_NOGCREGIONS) + { + state->noGCRegionCnt = HAS_NOGCREGIONS; + codeSet = 2; + encoding = FFFF_NOGCREGION_CNT; + goto DO_RETURN; + } + } + DO_RETURN: _ASSERTE(encoding < MORE_BYTES_TO_FOLLOW); if (!state->isHeaderMatch(header)) @@ -964,7 +986,7 @@ static int measureDistance(const InfoHdr& header, const InfoHdrSmall* p, int clo if (p->untrackedCnt != header.untrackedCnt) { - if (header.untrackedCnt > 3) + if (header.untrackedCnt > SET_UNTRACKED_MAX) { if (p->untrackedCnt != HAS_UNTRACKED) distance += 1; @@ -1199,6 +1221,13 @@ static int measureDistance(const InfoHdr& header, const InfoHdrSmall* p, int clo return distance; } + if (header.noGCRegionCnt > 0) + { + distance += 2; + if (distance >= closeness) + return distance; + } + return distance; } @@ -1546,7 +1575,7 @@ size_t GCInfo::gcInfoBlockHdrSave( ReturnKind returnKind = getReturnKind(); _ASSERTE(IsValidReturnKind(returnKind) && "Return Kind must be valid"); _ASSERTE(!IsStructReturnKind(returnKind) && "Struct Return Kinds Unexpected for JIT32"); - _ASSERTE(((int)returnKind < (int)SET_RET_KIND_MAX) && "ReturnKind has no legal encoding"); + _ASSERTE(((int)returnKind <= (int)SET_RET_KIND_MAX) && "ReturnKind has no legal encoding"); header->returnKind = returnKind; header->gsCookieOffset = INVALID_GS_COOKIE_OFFSET; @@ -1578,6 +1607,22 @@ size_t GCInfo::gcInfoBlockHdrSave( assert(header->epilogCount <= 1); } #endif + if (compiler->UsesFunclets() && compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + // While the sync start offset and end offset are not used by the stackwalker/EH system + // in funclets mode, we do need to know if the code is synchronized if we are generating + // an edit and continue method, so that we can properly manage the stack during a Remap + // operation, for determining the ParamTypeArg for collectible generics purposes, and + // for determining the offset of the localloc variable in the stack frame. + // Instead of inventing a new encoding, just encode some non-0 offsets into these fields. + // to indicate that the method is synchronized. + // + // Use 1 for both offsets, since that doesn't actually make sense and implies that the + // sync region is 0 bytes long. The JIT will never emit a sync region of 0 bytes in non- + // funclet mode. + header->syncStartOffset = 1; + header->syncEndOffset = 1; + } header->revPInvokeOffset = INVALID_REV_PINVOKE_OFFSET; if (compiler->opts.IsReversePInvoke()) @@ -1588,10 +1633,7 @@ size_t GCInfo::gcInfoBlockHdrSave( assert(header->revPInvokeOffset != INVALID_REV_PINVOKE_OFFSET); } - assert((compiler->compArgSize & 0x3) == 0); - - size_t argCount = - (compiler->compArgSize - (compiler->codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; + size_t argCount = compiler->lvaParameterStackSize / REGSIZE_BYTES; assert(argCount <= MAX_USHORT_SIZE_T); header->argCount = static_cast(argCount); @@ -1602,7 +1644,8 @@ size_t GCInfo::gcInfoBlockHdrSave( if (mask == 0) { gcCountForHeader((UNALIGNED unsigned int*)&header->untrackedCnt, - (UNALIGNED unsigned int*)&header->varPtrTableSize); + (UNALIGNED unsigned int*)&header->varPtrTableSize, + (UNALIGNED unsigned int*)&header->noGCRegionCnt); } // @@ -1699,6 +1742,14 @@ size_t GCInfo::gcInfoBlockHdrSave( dest += (sz & mask); } + if (header->noGCRegionCnt > SET_NOGCREGIONS_MAX) + { + unsigned count = header->noGCRegionCnt; + unsigned sz = encodeUnsigned(mask ? dest : NULL, count); + size += sz; + dest += (sz & mask); + } + if (header->epilogCount) { /* Generate table unless one epilog at the end of the method */ @@ -2091,6 +2142,29 @@ unsigned PendingArgsStack::pasEnumGCoffs(unsigned iter, unsigned* offs) return pasENUM_END; } +// Small helper class to handle the No-GC-Interrupt callbacks +// when reporting interruptible ranges. +class NoGCRegionEncoder +{ + BYTE* dest; +public: + size_t totalSize; + + NoGCRegionEncoder(BYTE* dest) + : dest(dest) + , totalSize(0) + { + } + + // This callback is called for each insGroup marked with IGF_NOGCINTERRUPT. + bool operator()(unsigned igFuncIdx, unsigned igOffs, unsigned igSize, unsigned firstInstrSize, bool isInProlog) + { + totalSize += encodeUnsigned(dest == NULL ? NULL : dest + totalSize, igOffs); + totalSize += encodeUnsigned(dest == NULL ? NULL : dest + totalSize, igSize); + return true; + } +}; + /***************************************************************************** * * Generate the register pointer map, and return its total size in bytes. If @@ -2098,11 +2172,6 @@ unsigned PendingArgsStack::pasEnumGCoffs(unsigned iter, unsigned* offs) * entry, which is never more than 10 bytes), so this can be used to merely * compute the size of the table. */ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset) { unsigned varNum; @@ -2117,7 +2186,8 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un /* Start computing the total size of the table */ - bool emitArgTabOffset = (header.varPtrTableSize != 0 || header.untrackedCnt > SET_UNTRACKED_MAX); + bool emitArgTabOffset = + (header.varPtrTableSize != 0 || header.untrackedCnt > SET_UNTRACKED_MAX || header.noGCRegionCnt != 0); if (mask != 0 && emitArgTabOffset) { assert(*pArgTabOffset <= MAX_UNSIGNED_SIZE_T); @@ -2137,18 +2207,29 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un /************************************************************************** * - * Untracked ptr variables + * Untracked ptr variables and no GC regions * ************************************************************************** */ #if DEBUG unsigned untrackedCount = 0; unsigned varPtrTableSize = 0; - gcCountForHeader(&untrackedCount, &varPtrTableSize); + unsigned noGCRegionCount = 0; + gcCountForHeader(&untrackedCount, &varPtrTableSize, &noGCRegionCount); assert(untrackedCount == header.untrackedCnt); assert(varPtrTableSize == header.varPtrTableSize); + assert(noGCRegionCount == header.noGCRegionCnt); #endif // DEBUG + if (header.noGCRegionCnt != 0) + { + NoGCRegionEncoder encoder(mask != 0 ? dest : NULL); + compiler->GetEmitter()->emitGenNoGCLst(encoder, /* skipMainPrologsAndEpilogs = */ true); + totalSize += encoder.totalSize; + if (mask != 0) + dest += encoder.totalSize; + } + if (header.untrackedCnt != 0) { // Write the table of untracked pointer variables. @@ -2184,7 +2265,7 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un assert(~OFFSET_MASK % sizeof(offset) == 0); - if (varDsc->TypeGet() == TYP_BYREF) + if (varDsc->TypeIs(TYP_BYREF)) { // Or in byref_OFFSET_FLAG for 'byref' pointer tracking offset |= byref_OFFSET_FLAG; @@ -2208,7 +2289,7 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un totalSize += sz; } } - else if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->lvOnFrame && varDsc->HasGCPtr()) + else if (varDsc->TypeIs(TYP_STRUCT) && varDsc->lvOnFrame && varDsc->HasGCPtr()) { ClassLayout* layout = varDsc->GetLayout(); unsigned slots = layout->GetSlotCount(); @@ -2323,7 +2404,7 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un // "this". So report it as a tracked variable with a liveness // extending over the entire method. - assert(compiler->lvaTable[compiler->info.compThisArg].TypeGet() == TYP_REF); + assert(compiler->lvaTable[compiler->info.compThisArg].TypeIs(TYP_REF)); unsigned varOffs = compiler->lvaTable[compiler->info.compThisArg].GetStackOffset(); @@ -3202,9 +3283,24 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un callArgCnt = genRegPtrTemp->rpdPtrArg; - unsigned gcrefRegMask = genRegPtrTemp->rpdCallGCrefRegs; + unsigned gcrefRegMask = 0; - byrefRegMask = genRegPtrTemp->rpdCallByrefRegs; + byrefRegMask = 0; + + // The order here is fixed: it must agree with the order assumed in eetwain. + // NB: x86 GC decoder does not report return registers at call sites. + static const regNumber calleeSaveOrder[] = {REG_EDI, REG_ESI, REG_EBX, REG_EBP}; + for (unsigned i = 0; i < ArrLen(calleeSaveOrder); i++) + { + if ((genRegPtrTemp->rpdCallGCrefRegs & (1 << (calleeSaveOrder[i] - REG_INT_FIRST))) != 0) + { + gcrefRegMask |= 1u << i; + } + if ((genRegPtrTemp->rpdCallByrefRegs & (1 << (calleeSaveOrder[i] - REG_INT_FIRST))) != 0) + { + byrefRegMask |= 1u << i; + } + } assert((gcrefRegMask & byrefRegMask) == 0); @@ -3535,9 +3631,6 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un return totalSize; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif /*****************************************************************************/ #if DUMP_GC_TABLES @@ -3578,7 +3671,14 @@ size_t GCInfo::gcInfoBlockHdrDump(const BYTE* table, InfoHdr* header, unsigned* size_t GCInfo::gcDumpPtrTable(const BYTE* table, const InfoHdr& header, unsigned methodSize) { - printf("Pointer table:\n"); + if (header.noGCRegionCnt > 0) + { + printf("No GC regions and pointer table:\n"); + } + else + { + printf("Pointer table:\n"); + } GCDump gcDump(GCINFO_VERSION); @@ -3737,15 +3837,6 @@ class GcInfoEncoderWithLogging } } - void SetPSPSymStackSlot(INT32 spOffsetPSPSym) - { - m_gcInfoEncoder->SetPSPSymStackSlot(spOffsetPSPSym); - if (m_doLogging) - { - printf("Set PSPSym stack slot to %d.\n", spOffsetPSPSym); - } - } - void SetGenericsInstContextStackSlot(INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type) { m_gcInfoEncoder->SetGenericsInstContextStackSlot(spOffsetGenericsContext, type); @@ -3858,12 +3949,13 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz assert(false); } - const int offset = compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(), - compiler->isFramePointerUsed()); + const int offset = compiler->lvaCachedGenericContextArgOffset(); #ifdef DEBUG if (compiler->opts.IsOSR()) { + const int callerSpOffset = compiler->lvaToCallerSPRelativeOffset(offset, compiler->isFramePointerUsed()); + // Sanity check the offset vs saved patchpoint info. // const PatchpointInfo* const ppInfo = compiler->info.compPatchpointInfo; @@ -3872,12 +3964,12 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz // subtract off 2 register slots (saved FP, saved RA). // const int osrOffset = ppInfo->GenericContextArgOffset() - 2 * REGSIZE_BYTES; - assert(offset == osrOffset); + assert(callerSpOffset == osrOffset); #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // PP info has virtual offset. This is also the caller SP offset. // const int osrOffset = ppInfo->GenericContextArgOffset(); - assert(offset == osrOffset); + assert(callerSpOffset == osrOffset); #endif } #endif @@ -3890,23 +3982,16 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz { assert(compiler->info.compThisArg != BAD_VAR_NUM); + const int offset = compiler->lvaCachedGenericContextArgOffset(); + +#ifdef DEBUG // OSR can report the root method's frame slot, if that method reported context. // If not, the OSR frame will have saved the needed context. - // - bool useRootFrameSlot = true; - if (compiler->opts.IsOSR()) + if (compiler->opts.IsOSR() && compiler->info.compPatchpointInfo->HasKeptAliveThis()) { - const PatchpointInfo* const ppInfo = compiler->info.compPatchpointInfo; + const int callerSpOffset = + compiler->lvaToCallerSPRelativeOffset(offset, compiler->isFramePointerUsed(), true); - useRootFrameSlot = ppInfo->HasKeptAliveThis(); - } - - const int offset = compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(), - compiler->isFramePointerUsed(), useRootFrameSlot); - -#ifdef DEBUG - if (compiler->opts.IsOSR() && useRootFrameSlot) - { // Sanity check the offset vs saved patchpoint info. // const PatchpointInfo* const ppInfo = compiler->info.compPatchpointInfo; @@ -3915,12 +4000,12 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz // subtract off 2 register slots (saved FP, saved RA). // const int osrOffset = ppInfo->KeptAliveThisOffset() - 2 * REGSIZE_BYTES; - assert(offset == osrOffset); + assert(callerSpOffset == osrOffset); #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // PP info has virtual offset. This is also the caller SP offset. // const int osrOffset = ppInfo->KeptAliveThisOffset(); - assert(offset == osrOffset); + assert(callerSpOffset == osrOffset); #endif } #endif @@ -3945,16 +4030,6 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz gcInfoEncoderWithLog->SetPrologSize(prologSize); } - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { -#ifdef TARGET_AMD64 - // The PSPSym is relative to InitialSP on X64 and CallerSP on other platforms. - gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); -#else // !TARGET_AMD64 - gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); -#endif // !TARGET_AMD64 - } - #ifdef TARGET_AMD64 if (compiler->ehAnyFunclets()) { @@ -4132,7 +4207,7 @@ void GCInfo::gcMakeRegPtrTable( GcSlotFlags flags = GC_SLOT_UNTRACKED; - if (varDsc->TypeGet() == TYP_BYREF) + if (varDsc->TypeIs(TYP_BYREF)) { // Or in byref_OFFSET_FLAG for 'byref' pointer tracking flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); @@ -4174,7 +4249,7 @@ void GCInfo::gcMakeRegPtrTable( // If this is a TYP_STRUCT, handle its GC pointers. // Note that the enregisterable struct types cannot have GC pointers in them. - if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->GetLayout()->HasGCPtr() && varDsc->lvOnFrame && + if (varDsc->TypeIs(TYP_STRUCT) && varDsc->GetLayout()->HasGCPtr() && varDsc->lvOnFrame && (varDsc->lvExactSize() >= TARGET_POINTER_SIZE)) { ClassLayout* layout = varDsc->GetLayout(); @@ -4284,7 +4359,7 @@ void GCInfo::gcMakeRegPtrTable( assert(!compiler->lvaReportParamTypeArg()); GcSlotFlags flags = GC_SLOT_UNTRACKED; - if (compiler->lvaTable[compiler->info.compThisArg].TypeGet() == TYP_BYREF) + if (compiler->lvaTable[compiler->info.compThisArg].TypeIs(TYP_BYREF)) { // Or in GC_SLOT_INTERIOR for 'byref' pointer tracking flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); @@ -4468,8 +4543,8 @@ void GCInfo::gcMakeRegPtrTable( assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0); // Other than that, we just have to deal with the regmasks. - regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); - regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); + regMaskSmall gcrefRegMask = call->cdGCrefRegs; + regMaskSmall byrefRegMask = call->cdByrefRegs; assert((gcrefRegMask & byrefRegMask) == 0); @@ -4555,11 +4630,8 @@ void GCInfo::gcMakeRegPtrTable( { // This is a true call site. - regMaskSmall gcrefRegMask = - genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs).GetIntRegSet(); - - regMaskSmall byrefRegMask = - genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs).GetIntRegSet(); + regMaskSmall gcrefRegMask = regMaskSmall(genRegPtrTemp->rpdCallGCrefRegs << REG_INT_FIRST); + regMaskSmall byrefRegMask = regMaskSmall(genRegPtrTemp->rpdCallByrefRegs << REG_INT_FIRST); assert((gcrefRegMask & byrefRegMask) == 0); diff --git a/src/coreclr/jit/gcinfo.cpp b/src/coreclr/jit/gcinfo.cpp index 8fc398a1d1b3..7cb4326dae06 100644 --- a/src/coreclr/jit/gcinfo.cpp +++ b/src/coreclr/jit/gcinfo.cpp @@ -293,13 +293,13 @@ GCInfo::WriteBarrierForm GCInfo::gcWriteBarrierFormFromTargetAddress(GenTree* tg } // No point in trying to further deconstruct a TYP_I_IMPL address. - if (tgtAddr->TypeGet() == TYP_I_IMPL) + if (tgtAddr->TypeIs(TYP_I_IMPL)) { return GCInfo::WBF_BarrierUnknown; } // Otherwise... - assert(tgtAddr->TypeGet() == TYP_BYREF); + assert(tgtAddr->TypeIs(TYP_BYREF)); bool simplifiedExpr = true; while (simplifiedExpr) { @@ -311,7 +311,7 @@ GCInfo::WriteBarrierForm GCInfo::gcWriteBarrierFormFromTargetAddress(GenTree* tg // source. while (tgtAddr->OperIs(GT_ADD, GT_LEA)) { - if (tgtAddr->OperGet() == GT_ADD) + if (tgtAddr->OperIs(GT_ADD)) { GenTree* addOp1 = tgtAddr->AsOp()->gtGetOp1(); GenTree* addOp2 = tgtAddr->AsOp()->gtGetOp2(); @@ -320,7 +320,7 @@ GCInfo::WriteBarrierForm GCInfo::gcWriteBarrierFormFromTargetAddress(GenTree* tg if (addOp1Type == TYP_BYREF || addOp1Type == TYP_REF) { - assert(((addOp2Type != TYP_BYREF) || (addOp2->OperIs(GT_CNS_INT))) && (addOp2Type != TYP_REF)); + assert(((addOp2Type != TYP_BYREF) || addOp2->OperIs(GT_CNS_INT)) && (addOp2Type != TYP_REF)); tgtAddr = addOp1; simplifiedExpr = true; } @@ -341,9 +341,9 @@ GCInfo::WriteBarrierForm GCInfo::gcWriteBarrierFormFromTargetAddress(GenTree* tg else { // Must be an LEA (i.e., an AddrMode) - assert(tgtAddr->OperGet() == GT_LEA); + assert(tgtAddr->OperIs(GT_LEA)); tgtAddr = tgtAddr->AsAddrMode()->Base(); - if (tgtAddr->TypeGet() == TYP_BYREF || tgtAddr->TypeGet() == TYP_REF) + if (tgtAddr->TypeIs(TYP_BYREF, TYP_REF)) { simplifiedExpr = true; } @@ -356,7 +356,7 @@ GCInfo::WriteBarrierForm GCInfo::gcWriteBarrierFormFromTargetAddress(GenTree* tg } } - if (tgtAddr->TypeGet() == TYP_REF) + if (tgtAddr->TypeIs(TYP_REF)) { return GCInfo::WBF_BarrierUnchecked; } @@ -421,12 +421,33 @@ GCInfo::regPtrDsc* GCInfo::gcRegPtrAllocDsc() #ifdef JIT32_GCENCODER +// Small helper class to handle the No-GC-Interrupt callbacks +// when reporting interruptible ranges. +struct NoGCRegionCounter +{ + unsigned noGCRegionCount; + + NoGCRegionCounter() + : noGCRegionCount(0) + { + } + + // This callback is called for each insGroup marked with IGF_NOGCINTERRUPT. + bool operator()(unsigned igFuncIdx, unsigned igOffs, unsigned igSize, unsigned firstInstrSize, bool isInProlog) + { + noGCRegionCount++; + return true; + } +}; + /***************************************************************************** * * Compute the various counts that get stored in the info block header. */ -void GCInfo::gcCountForHeader(UNALIGNED unsigned int* pUntrackedCount, UNALIGNED unsigned int* pVarPtrTableSize) +void GCInfo::gcCountForHeader(UNALIGNED unsigned int* pUntrackedCount, + UNALIGNED unsigned int* pVarPtrTableSize, + UNALIGNED unsigned int* pNoGCRegionCount) { unsigned varNum; LclVarDsc* varDsc; @@ -475,7 +496,7 @@ void GCInfo::gcCountForHeader(UNALIGNED unsigned int* pUntrackedCount, UNALIGNED untrackedCount++; } - else if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->lvOnFrame) + else if (varDsc->TypeIs(TYP_STRUCT) && varDsc->lvOnFrame) { untrackedCount += varDsc->GetLayout()->GetGCPtrCount(); } @@ -558,6 +579,19 @@ void GCInfo::gcCountForHeader(UNALIGNED unsigned int* pUntrackedCount, UNALIGNED #endif *pVarPtrTableSize = varPtrTableSize; + + // Count the number of no GC regions + + unsigned int noGCRegionCount = 0; + + if (compiler->codeGen->GetInterruptible()) + { + NoGCRegionCounter counter; + compiler->GetEmitter()->emitGenNoGCLst(counter, /* skipMainPrologsAndEpilogs = */ true); + noGCRegionCount = counter.noGCRegionCount; + } + + *pNoGCRegionCount = noGCRegionCount; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8b6b3cc2f140..a7f098e82090 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -169,8 +169,8 @@ static void printIndent(IndentStack* indentStack) #endif -#if defined(DEBUG) || CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_MEM_ALLOC || \ - NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS || DUMP_FLOWGRAPHS +#if defined(DEBUG) || CALL_ARG_STATS || COUNT_BASIC_BLOCKS || EMITTER_STATS || MEASURE_MEM_ALLOC || NODEBASH_STATS || \ + MEASURE_NODE_SIZE || COUNT_AST_OPERS || DUMP_FLOWGRAPHS static const char* opNames[] = { #define GTNODE(en, st, cm, ivn, ok) #en, @@ -261,14 +261,9 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[GT_MOD] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE; #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE; -#if FEATURE_ARG_SPLIT - GenTree::s_gtNodeSizes[GT_PUTARG_SPLIT] = TREE_NODE_SZ_LARGE; -#endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK // This list of assertions should come to contain all GenTree subtypes that are declared // "small". @@ -324,16 +319,9 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeILOffset) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node -#ifndef FEATURE_PUT_STRUCT_ARG_STK - static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL); -#else // FEATURE_PUT_STRUCT_ARG_STK // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); -#if FEATURE_ARG_SPLIT - static_assert_no_msg(sizeof(GenTreePutArgSplit) <= TREE_NODE_SZ_LARGE); -#endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK #ifdef FEATURE_HW_INTRINSICS static_assert_no_msg(sizeof(GenTreeHWIntrinsic) <= TREE_NODE_SZ_SMALL); @@ -881,12 +869,6 @@ int GenTree::GetRegisterDstCount(Compiler* compiler) const { return gtGetOp1()->GetRegisterDstCount(compiler); } -#if FEATURE_ARG_SPLIT - else if (OperIsPutArgSplit()) - { - return (const_cast(this))->AsPutArgSplit()->gtNumRegs; - } -#endif #if !defined(TARGET_64BIT) else if (OperIsMultiRegOp()) { @@ -937,13 +919,6 @@ bool GenTree::IsMultiRegNode() const return true; } -#if FEATURE_ARG_SPLIT - if (OperIsPutArgSplit()) - { - return true; - } -#endif - #if !defined(TARGET_64BIT) if (OperIsMultiRegOp()) { @@ -988,13 +963,6 @@ unsigned GenTree::GetMultiRegCount(Compiler* comp) const return AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); } -#if FEATURE_ARG_SPLIT - if (OperIsPutArgSplit()) - { - return AsPutArgSplit()->gtNumRegs; - } -#endif - #if !defined(TARGET_64BIT) if (OperIsMultiRegOp()) { @@ -1101,21 +1069,6 @@ regMaskTP GenTree::gtGetRegMask() const } } } -#if FEATURE_ARG_SPLIT - else if (compFeatureArgSplit() && OperIsPutArgSplit()) - { - const GenTreePutArgSplit* splitArg = AsPutArgSplit(); - const unsigned regCount = splitArg->gtNumRegs; - - resultMask = RBM_NONE; - for (unsigned i = 0; i < regCount; ++i) - { - regNumber reg = splitArg->GetRegNumByIdx(i); - assert(reg != REG_NA); - resultMask |= genRegMask(reg); - } - } -#endif // FEATURE_ARG_SPLIT else { resultMask = genRegMask(GetRegNum()); @@ -1148,131 +1101,24 @@ void GenTreeFieldList::InsertFieldLIR( } //--------------------------------------------------------------- -// IsHfaArg: Is this arg considered a homogeneous floating-point aggregate? -// -bool CallArgABIInformation::IsHfaArg() const -{ - if (GlobalJitOptions::compFeatureHfa) - { - return IsHfa(GetHfaElemKind()); - } - else - { - return false; - } -} - -//--------------------------------------------------------------- -// IsHfaRegArg: Is this an HFA argument passed in registers? -// -bool CallArgABIInformation::IsHfaRegArg() const -{ - if (GlobalJitOptions::compFeatureHfa) - { - return IsHfa(GetHfaElemKind()) && IsPassedInRegisters(); - } - else - { - return false; - } -} - -//--------------------------------------------------------------- -// GetHfaType: Get the type of each element of the HFA arg. -// -var_types CallArgABIInformation::GetHfaType() const -{ - if (GlobalJitOptions::compFeatureHfa) - { - return HfaTypeFromElemKind(GetHfaElemKind()); - } - else - { - return TYP_UNDEF; - } -} - -//--------------------------------------------------------------- -// SetHfaType: Set the type of each element of the HFA arg. -// -// Arguments: -// type - The new type for each element -// hfaSlots - How many registers are used by the HFA. -// -// Remarks: -// This can only be called after the passing mode of the argument (registers -// or stack) has been determined. When passing HFAs of doubles on ARM it is -// expected that `hfaSlots` refers to the number of float registers used, -// i.e. twice the number of doubles being passed. This function will convert -// that into double registers and set `NumRegs` appropriately. -// -void CallArgABIInformation::SetHfaType(var_types type, unsigned hfaSlots) -{ - if (GlobalJitOptions::compFeatureHfa) - { - if (type != TYP_UNDEF) - { - // We must already have set the passing mode. - assert(NumRegs != 0 || GetStackByteSize() != 0); - // We originally set numRegs according to the size of the struct, but if the size of the - // hfaType is not the same as the pointer size, we need to correct it. - // Note that hfaSlots is the number of registers we will use. For ARM, that is twice - // the number of "double registers". - unsigned numHfaRegs = hfaSlots; -#ifdef TARGET_ARM - if (type == TYP_DOUBLE) - { - // Must be an even number of registers. - assert((NumRegs & 1) == 0); - numHfaRegs = hfaSlots / 2; - } -#endif // TARGET_ARM - - if (!IsHfaArg()) - { - // We haven't previously set this; do so now. - CorInfoHFAElemType elemKind = HfaElemKindFromType(type); - SetHfaElemKind(elemKind); - // Ensure we've allocated enough bits. - assert(GetHfaElemKind() == elemKind); - if (IsPassedInRegisters()) - { - NumRegs = numHfaRegs; - } - } - else - { - // We've already set this; ensure that it's consistent. - if (IsPassedInRegisters()) - { - assert(NumRegs == numHfaRegs); - } - assert(type == HfaTypeFromElemKind(GetHfaElemKind())); - } - } - } -} - -//--------------------------------------------------------------- -// GetStackByteSize: Get the number of stack bytes used to pass this argument. +// SoleFieldOrThis: +// If this FIELD_LIST has only one field, then return it; otherwise return +// the field list. // // Returns: -// For pure register arguments, this returns 0. -// For pure stack arguments, this returns ByteSize. -// For split arguments the return value is between 0 and ByteSize. +// Sole field, or "this". // -unsigned CallArgABIInformation::GetStackByteSize() const +GenTree* GenTreeFieldList::SoleFieldOrThis() { - if (!IsSplit() && NumRegs > 0) + Use* head = m_uses.GetHead(); + assert(head != nullptr); + + if (head->GetNext() == nullptr) { - return 0; + return head->GetNode(); } - assert(!IsHfaArg() || !IsSplit()); - - assert(ByteSize > TARGET_POINTER_SIZE * NumRegs); - const unsigned stackByteSize = ByteSize - TARGET_POINTER_SIZE * NumRegs; - return stackByteSize; + return this; } #ifdef DEBUG @@ -1282,13 +1128,13 @@ void NewCallArg::ValidateTypes() if (varTypeIsStruct(SignatureType)) { - assert(SignatureClsHnd != NO_CLASS_HANDLE); + assert(SignatureLayout != nullptr); assert(SignatureType == Node->TypeGet()); - if (SignatureType == TYP_STRUCT) + if ((SignatureType == TYP_STRUCT) && !Node->OperIsFieldList()) { Compiler* comp = JitTls::GetCompiler(); - assert(ClassLayout::AreCompatible(comp->typGetObjLayout(SignatureClsHnd), Node->GetLayout(comp))); + assert(ClassLayout::AreCompatible(SignatureLayout, Node->GetLayout(comp))); } } } @@ -1344,41 +1190,6 @@ bool CallArg::IsUserArg() const } } -#ifdef DEBUG -//--------------------------------------------------------------- -// CheckIsStruct: Verify that the struct ABI information is consistent with the IR node. -// -void CallArg::CheckIsStruct() -{ - GenTree* node = GetNode(); - if (varTypeIsStruct(GetSignatureType())) - { - if (!varTypeIsStruct(node) && !node->OperIs(GT_FIELD_LIST)) - { - // This is the case where we are passing a struct as a primitive type. - // On most targets, this is always a single register or slot. - // However, on ARM this could be two slots if it is TYP_DOUBLE. - bool isPassedAsPrimitiveType = - ((AbiInfo.NumRegs == 1) || ((AbiInfo.NumRegs == 0) && (AbiInfo.ByteSize <= TARGET_POINTER_SIZE))); -#ifdef TARGET_ARM - if (!isPassedAsPrimitiveType) - { - if (node->TypeGet() == TYP_DOUBLE && AbiInfo.NumRegs == 0 && (AbiInfo.GetStackSlotsNumber() == 2)) - { - isPassedAsPrimitiveType = true; - } - } -#endif // TARGET_ARM - assert(isPassedAsPrimitiveType); - } - } - else - { - assert(!varTypeIsStruct(node)); - } -} -#endif - CallArgs::CallArgs() : m_head(nullptr) , m_lateHead(nullptr) @@ -1391,7 +1202,7 @@ CallArgs::CallArgs() , m_hasRetBuffer(false) , m_isVarArgs(false) , m_abiInformationDetermined(false) - , m_newAbiInformationDetermined(false) + , m_hasAddedFinalArgs(false) , m_hasRegArgs(false) , m_hasStackArgs(false) , m_argsComplete(false) @@ -2002,9 +1813,13 @@ void CallArgs::PushLateBack(CallArg* arg) // void CallArgs::Remove(CallArg* arg) { +<<<<<<< HEAD #ifndef TARGET_WASM assert(!m_abiInformationDetermined && !m_argsComplete); #endif // !TARGET_WASM +======= + assert(!m_hasAddedFinalArgs && !m_argsComplete); +>>>>>>> upstream-jun CallArg** slot = &m_head; while (*slot != nullptr) @@ -2206,7 +2021,7 @@ GenTree* Compiler::getArrayLengthFromAllocation(GenTree* tree DEBUGARG(BasicBloc GenTree* arrayLength = nullptr; - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { GenTreeCall* call = tree->AsCall(); @@ -2217,7 +2032,7 @@ GenTree* Compiler::getArrayLengthFromAllocation(GenTree* tree DEBUGARG(BasicBloc { case CORINFO_HELP_NEWARR_1_MAYBEFROZEN: case CORINFO_HELP_NEWARR_1_DIRECT: - case CORINFO_HELP_NEWARR_1_OBJ: + case CORINFO_HELP_NEWARR_1_PTR: case CORINFO_HELP_NEWARR_1_VC: case CORINFO_HELP_NEWARR_1_ALIGN8: { @@ -2420,6 +2235,36 @@ bool GenTreeCall::HasSideEffects(Compiler* compiler, bool ignoreExceptions, bool (!helperProperties.IsAllocator(helper) || ((gtCallMoreFlags & GTF_CALL_M_ALLOC_SIDE_EFFECTS) != 0)); } +//------------------------------------------------------------------------- +// IsAsync: Whether or not this call is to an async function. +// +// Return Value: +// True if so. +// +// Remarks: +// async involves passing an async continuation as a separate argument and +// returning an async continuation in REG_ASYNC_CONTINUATION_RET. +// +// The async continuation is usually JIT added +// (WellKnownArg::AsyncContinuation). This is the case for an async method +// calling another async method by normal means. However, the VM also creates +// stubs that call async methods through calli where the async continuations +// are passed explicitly. See CEEJitInfo::getAsyncResumptionStub and +// MethodDesc::EmitTaskReturningThunk for examples. In +// those cases the JIT does not know (and does not need to know) which arg is +// the async continuation. +// +// The VM also uses the StubHelpers.AsyncCallContinuation() intrinsic in the +// stubs discussed above. The JIT must take care in those cases to still mark +// the preceding call as an async call; this is required for correct LSRA +// behavior and GC reporting around the returned async continuation. This is +// currently done in lowering; see LowerAsyncContinuation(). +// +bool GenTreeCall::IsAsync() const +{ + return (gtCallMoreFlags & GTF_CALL_M_ASYNC) != 0; +} + //------------------------------------------------------------------------- // HasNonStandardAddedArgs: Return true if the method has non-standard args added to the call // argument list during argument morphing (fgMorphArgs), e.g., passed in R10 or R11 on AMD64. @@ -2477,6 +2322,24 @@ int GenTreeCall::GetNonStandardAddedArgCount(Compiler* compiler) const return 0; } +//------------------------------------------------------------------------- +// IsDevirtualizationCandidate: Determine if this GT_CALL node is a devirtualization candidate. +// A call will be unmarked from devirtualization candidate if it +// is devirtualized. +// +// Arguments: +// compiler - the compiler instance so that we can call eeFindHelper +// +// Return Value: +// Returns true if this GT_CALL node is a devirtualization candidate. +// +bool GenTreeCall::IsDevirtualizationCandidate(Compiler* compiler) const +{ + return IsVirtual() || + (gtCallType == CT_INDIRECT && (gtCallAddr->IsHelperCall(compiler, CORINFO_HELP_VIRTUAL_FUNC_PTR) || + gtCallAddr->IsHelperCall(compiler, CORINFO_HELP_GVMLOOKUP_FOR_SLOT))); +} + //------------------------------------------------------------------------- // IsHelperCall: Determine if this GT_CALL node is a specific helper call. // @@ -2646,9 +2509,7 @@ bool GenTreeCall::Equals(GenTreeCall* c1, GenTreeCall* c2) // void CallArgs::ResetFinalArgsAndABIInfo() { - m_newAbiInformationDetermined = false; - - if (!IsAbiInformationDetermined()) + if (!m_hasAddedFinalArgs) { return; } @@ -2678,16 +2539,10 @@ void CallArgs::ResetFinalArgsAndABIInfo() } } + m_hasAddedFinalArgs = false; m_abiInformationDetermined = false; } -#if !defined(FEATURE_PUT_STRUCT_ARG_STK) -unsigned GenTreePutArgStk::GetStackByteSize() const -{ - return genTypeSize(genActualType(gtOp1->gtType)); -} -#endif // !defined(FEATURE_PUT_STRUCT_ARG_STK) - /***************************************************************************** * * Returns non-zero if the two trees are identical. @@ -2851,6 +2706,7 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) case GT_NOP: case GT_LABEL: case GT_SWIFT_ERROR: + case GT_GCPOLL: return true; default: @@ -3108,6 +2964,25 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) return false; } +//------------------------------------------------------------------------ +// EffectiveUse: Return the use pointer to the "effective val". +// +// Arguments: +// use - Use edge +// +// Return Value: +// Edge pointing to non-comma node. +// +GenTree** GenTree::EffectiveUse(GenTree** use) +{ + while ((*use)->OperIs(GT_COMMA)) + { + use = &(*use)->AsOp()->gtOp2; + } + + return use; +} + //------------------------------------------------------------------------ // gtHasRef: Find out whether the given tree contains a local. // @@ -4288,7 +4163,7 @@ GenTree* Compiler::gtWalkOpEffectiveVal(GenTree* op) { op = op->gtEffectiveVal(); - if ((op->gtOper != GT_ADD) || op->gtOverflow() || !op->AsOp()->gtOp2->IsCnsIntOrI()) + if (!op->OperIs(GT_ADD) || op->gtOverflow() || !op->AsOp()->gtOp2->IsCnsIntOrI()) { break; } @@ -5156,19 +5031,19 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ GenTree* tmpOp2 = tmp->gtGetOp2(); assert(tmpOp2 != nullptr); - if ((tmpOp1 != base) && (tmpOp1->OperGet() == GT_ADD)) + if ((tmpOp1 != base) && tmpOp1->OperIs(GT_ADD)) { tmp = tmpOp1; } - else if (tmpOp2->OperGet() == GT_LSH) + else if (tmpOp2->OperIs(GT_LSH)) { tmp = tmpOp2; } - else if (tmpOp1->OperGet() == GT_LSH) + else if (tmpOp1->OperIs(GT_LSH)) { tmp = tmpOp1; } - else if (tmpOp2->OperGet() == GT_ADD) + else if (tmpOp2->OperIs(GT_ADD)) { tmp = tmpOp2; } @@ -5186,7 +5061,7 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ { addrModeCostEx += base->GetCostEx(); addrModeCostSz += base->GetCostSz(); - if ((base->gtOper == GT_LCL_VAR) && ((idx == NULL) || (cns == 0))) + if (base->OperIs(GT_LCL_VAR) && ((idx == NULL) || (cns == 0))) { addrModeCostSz -= 1; } @@ -5287,7 +5162,7 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ #error "Unknown TARGET" #endif - assert(addr->gtOper == GT_ADD); + assert(addr->OperIs(GT_ADD)); assert(!addr->gtOverflow()); assert(mul != 1); @@ -5410,11 +5285,6 @@ static void SetIndirectStoreEvalOrder(Compiler* comp, GenTreeIndir* store, bool* * 2. GetCostSz() to the code size estimate * 3. Sometimes sets GTF_ADDRMODE_NO_CSE on nodes in the tree. */ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif unsigned Compiler::gtSetEvalOrder(GenTree* tree) { assert(tree); @@ -5843,6 +5713,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) break; case GT_NOP: + case GT_GCPOLL: level = 0; costEx = 0; costSz = 0; @@ -6016,10 +5887,12 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case NI_System_Math_MaxMagnitude: case NI_System_Math_MaxMagnitudeNumber: case NI_System_Math_MaxNumber: + case NI_System_Math_MaxUnsigned: case NI_System_Math_Min: case NI_System_Math_MinMagnitude: case NI_System_Math_MinMagnitudeNumber: case NI_System_Math_MinNumber: + case NI_System_Math_MinUnsigned: case NI_System_Math_Pow: case NI_System_Math_Round: case NI_System_Math_Sin: @@ -6028,6 +5901,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case NI_System_Math_Tan: case NI_System_Math_Tanh: case NI_System_Math_Truncate: + case NI_PRIMITIVE_LeadingZeroCount: + case NI_PRIMITIVE_TrailingZeroCount: + case NI_PRIMITIVE_PopCount: { // Giving intrinsics a large fixed execution cost is because we'd like to CSE // them, even if they are implemented by calls. This is different from modeling @@ -6262,7 +6138,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) } #ifdef TARGET_X86 - if ((tree->gtType == TYP_LONG) || tree->gtOverflow()) + if (tree->TypeIs(TYP_LONG) || tree->gtOverflow()) { /* We use imulEAX for TYP_LONG and overflow multiplications */ // Encourage the first operand to be evaluated (into EAX/EDX) first */ @@ -6306,7 +6182,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) #ifndef TARGET_64BIT // Variable sized LONG shifts require the use of a helper call // - if (tree->gtType == TYP_LONG) + if (tree->TypeIs(TYP_LONG)) { level += 5; lvl2 += 5; @@ -6368,10 +6244,12 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case NI_System_Math_MaxMagnitude: case NI_System_Math_MaxMagnitudeNumber: case NI_System_Math_MaxNumber: + case NI_System_Math_MaxUnsigned: case NI_System_Math_Min: case NI_System_Math_MinMagnitude: case NI_System_Math_MinMagnitudeNumber: case NI_System_Math_MinNumber: + case NI_System_Math_MinUnsigned: { level++; break; @@ -6580,8 +6458,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) if (call->gtCallType == CT_INDIRECT) { // pinvoke-calli cookie is a constant, or constant indirection - assert(call->gtCallCookie == nullptr || call->gtCallCookie->gtOper == GT_CNS_INT || - call->gtCallCookie->gtOper == GT_IND); + assert(call->gtCallCookie == nullptr || call->gtCallCookie->OperIs(GT_CNS_INT, GT_IND)); GenTree* indirect = call->gtCallAddr; @@ -6621,7 +6498,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz += 2; } } - else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + else if (!IsAot()) { costEx += 2; costSz += 6; @@ -6767,9 +6644,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) return level; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //------------------------------------------------------------------------ // gtSetEvalOrderMinOpts: A MinOpts specific version of gtSetEvalOrder. We don't @@ -7131,6 +7005,9 @@ unsigned GenTree::GetScaledIndex() case GT_MUL: return AsOp()->gtOp2->GetScaleIndexMul(); +#ifdef TARGET_RISCV64 + case GT_SLLI_UW: +#endif case GT_LSH: return AsOp()->gtOp2->GetScaleIndexShf(); @@ -7165,6 +7042,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -7198,6 +7076,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_IL_OFFSET: case GT_NOP: case GT_SWIFT_ERROR: + case GT_GCPOLL: return false; // Standard unary operators @@ -7229,6 +7108,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_RETURNTRAP: case GT_RETURN: case GT_RETFILT: + case GT_RETURN_SUSPEND: case GT_BSWAP: case GT_BSWAP16: case GT_KEEPALIVE: @@ -7240,21 +7120,6 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) } return false; -// Variadic nodes -#if FEATURE_ARG_SPLIT - case GT_PUTARG_SPLIT: - if (this->AsUnOp()->gtOp1->gtOper == GT_FIELD_LIST) - { - return this->AsUnOp()->gtOp1->TryGetUse(operand, pUse); - } - if (operand == this->AsUnOp()->gtOp1) - { - *pUse = &this->AsUnOp()->gtOp1; - return true; - } - return false; -#endif // FEATURE_ARG_SPLIT - #if defined(FEATURE_HW_INTRINSICS) case GT_HWINTRINSIC: for (GenTree** opUse : this->AsMultiOp()->UseEdges()) @@ -7524,9 +7389,10 @@ bool GenTree::OperRequiresCallFlag(Compiler* comp) const switch (gtOper) { case GT_CALL: - return true; - + case GT_GCPOLL: case GT_KEEPALIVE: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: return true; case GT_SWIFT_ERROR: @@ -7555,7 +7421,7 @@ bool GenTree::OperRequiresCallFlag(Compiler* comp) const // could mark the trees just before argument processing, but it would require a full // tree walk of the argument tree, so we just do it when morphing, instead, even though we'll // mark non-argument trees (that will still get converted to calls, anyway). - return (this->TypeGet() == TYP_LONG) && (gtGetOp2()->OperGet() != GT_CNS_INT); + return this->TypeIs(TYP_LONG) && !gtGetOp2()->OperIs(GT_CNS_INT); #endif // FEATURE_FIXED_OUT_ARGS && !TARGET_64BIT default: @@ -7738,15 +7604,27 @@ ExceptionSetFlags GenTree::OperExceptions(Compiler* comp) GenTreeHWIntrinsic* hwIntrinsicNode = this->AsHWIntrinsic(); + ExceptionSetFlags flags = ExceptionSetFlags::None; if (hwIntrinsicNode->OperIsMemoryLoadOrStore()) { // TODO-CQ: We should use comp->fgAddrCouldBeNull on the address operand // to determine if this can actually produce an NRE or not + flags |= ExceptionSetFlags::NullReferenceException; + } - return ExceptionSetFlags::NullReferenceException; +#ifdef TARGET_XARCH + NamedIntrinsic intrinsicId = hwIntrinsicNode->GetHWIntrinsicId(); + if ((intrinsicId == NI_Vector128_op_Division) || (intrinsicId == NI_Vector256_op_Division) || + (intrinsicId == NI_Vector512_op_Division)) + { + // We currently don't try to avoid setting these flags and GTF_EXCEPT when + // we know that the operation in fact cannot overflow/divide by zero. + assert(varTypeIsInt(AsHWIntrinsic()->GetSimdBaseType())); + flags |= ExceptionSetFlags::OverflowException | ExceptionSetFlags::DivideByZeroException; } +#endif - return ExceptionSetFlags::None; + return flags; } #endif // FEATURE_HW_INTRINSICS @@ -7781,6 +7659,16 @@ bool GenTree::OperMayThrow(Compiler* comp) { return true; } + +#ifdef TARGET_XARCH + NamedIntrinsic intrinsicId = this->AsHWIntrinsic()->GetHWIntrinsicId(); + if (intrinsicId == NI_Vector128_op_Division || intrinsicId == NI_Vector256_op_Division || + intrinsicId == NI_Vector512_op_Division) + { + assert(varTypeIsInt(AsHWIntrinsic()->GetSimdBaseType())); + return true; + } +#endif // TARGET_XARCH } #endif // FEATURE_HW_INTRINSICS @@ -7834,7 +7722,10 @@ bool GenTree::OperRequiresGlobRefFlag(Compiler* comp) const case GT_CMPXCHG: case GT_MEMORYBARRIER: case GT_KEEPALIVE: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: case GT_SWIFT_ERROR: + case GT_GCPOLL: return true; case GT_CALL: @@ -7878,6 +7769,7 @@ bool GenTree::OperSupportsOrderingSideEffect() const switch (OperGet()) { + case GT_ARR_ADDR: case GT_BOUNDS_CHECK: case GT_IND: case GT_BLK: @@ -7892,6 +7784,8 @@ bool GenTree::OperSupportsOrderingSideEffect() const case GT_CMPXCHG: case GT_MEMORYBARRIER: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: case GT_SWIFT_ERROR: return true; default: @@ -7979,7 +7873,7 @@ var_types GenTreeLclVar::GetFieldTypeByIndex(Compiler* compiler, unsigned idx) assert(IsMultiReg()); LclVarDsc* varDsc = compiler->lvaGetDesc(GetLclNum()); LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + idx); - assert(fieldVarDsc->TypeGet() != TYP_STRUCT); // Don't expect struct fields. + assert(!fieldVarDsc->TypeIs(TYP_STRUCT)); // Don't expect struct fields. return fieldVarDsc->TypeGet(); } @@ -8320,27 +8214,19 @@ GenTree* Compiler::gtNewStringLiteralNode(InfoAccessType iat, void* pValue) switch (iat) { case IAT_VALUE: - setMethodHasFrozenObjects(); - tree = gtNewIconEmbHndNode(pValue, nullptr, GTF_ICON_OBJ_HDL, nullptr); -#ifdef DEBUG - tree->AsIntCon()->gtTargetHandle = (size_t)pValue; -#endif + tree = gtNewIconEmbObjHndNode((CORINFO_OBJECT_HANDLE)pValue); break; case IAT_PVALUE: // The value needs to be accessed via an indirection // Create an indirection tree = gtNewIndOfIconHandleNode(TYP_REF, (size_t)pValue, GTF_ICON_STR_HDL, true); -#ifdef DEBUG - tree->gtGetOp1()->AsIntCon()->gtTargetHandle = (size_t)pValue; -#endif + INDEBUG(tree->gtGetOp1()->AsIntCon()->gtTargetHandle = (size_t)pValue); break; case IAT_PPVALUE: // The value needs to be accessed via a double indirection // Create the first indirection. tree = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pValue, GTF_ICON_CONST_PTR, true); -#ifdef DEBUG - tree->gtGetOp1()->AsIntCon()->gtTargetHandle = (size_t)pValue; -#endif + INDEBUG(tree->gtGetOp1()->AsIntCon()->gtTargetHandle = (size_t)pValue); // Create the second indirection. tree = gtNewIndir(TYP_REF, tree, GTF_IND_NONFAULTING | GTF_IND_INVARIANT | GTF_IND_NONNULL); break; @@ -8692,18 +8578,7 @@ GenTree* Compiler::gtNewGenericCon(var_types type, uint8_t* cnsVal) case TYP_REF: { READ_VALUE(ssize_t); - if (val == 0) - { - return gtNewNull(); - } - else - { - // Even if the caller doesn't need the resulting tree let's still conservatively call - // setMethodHasFrozenObjects here to make caller's life easier. - setMethodHasFrozenObjects(); - GenTree* tree = gtNewIconEmbHndNode((void*)val, nullptr, GTF_ICON_OBJ_HDL, nullptr); - return tree; - } + return val == 0 ? gtNewNull() : gtNewIconEmbObjHndNode((CORINFO_OBJECT_HANDLE)val); } #ifdef FEATURE_SIMD case TYP_SIMD8: @@ -8896,9 +8771,11 @@ GenTreeCall* Compiler::gtNewCallNode(gtCallTypes callType, // These get updated after call node is built. node->gtInlineObservation = InlineObservation::CALLEE_UNUSED_INITIAL; node->gtRawILOffset = BAD_IL_OFFSET; - node->gtInlineContext = compInlineContext; + #endif + node->gtInlineContext = compInlineContext; + // Spec: Managed Retval sequence points needs to be generated while generating debug info for debuggable code. // // Implementation note: if not generating MRV info genCallSite2ILOffsetMap will be NULL and @@ -9033,6 +8910,11 @@ GenTreeConditional* Compiler::gtNewConditionalNode( return node; } +GenTreeFieldList* Compiler::gtNewFieldList() +{ + return new (this, GT_FIELD_LIST) GenTreeFieldList(); +} + GenTreeLclFld* Compiler::gtNewLclFldNode(unsigned lnum, var_types type, unsigned offset, ClassLayout* layout) { GenTreeLclFld* node = new (this, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, type, lnum, offset, layout); @@ -9253,8 +9135,7 @@ GenTree* Compiler::gtNewLoadValueNode(var_types type, ClassLayout* layout, GenTr { unsigned lclNum = addr->AsLclFld()->GetLclNum(); LclVarDsc* varDsc = lvaGetDesc(lclNum); - if ((varDsc->TypeGet() == type) && - ((type != TYP_STRUCT) || ClassLayout::AreCompatible(layout, varDsc->GetLayout()))) + if ((varDsc->TypeGet() == type) && ((type != TYP_STRUCT) || layout->CanAssignFrom(varDsc->GetLayout()))) { return gtNewLclvNode(lclNum, type); } @@ -9278,7 +9159,7 @@ GenTree* Compiler::gtNewLoadValueNode(var_types type, ClassLayout* layout, GenTr GenTreeBlk* Compiler::gtNewStoreBlkNode(ClassLayout* layout, GenTree* addr, GenTree* value, GenTreeFlags indirFlags) { assert((indirFlags & GTF_IND_INVARIANT) == 0); - assert(value->IsInitVal() || ClassLayout::AreCompatible(layout, value->GetLayout(this))); + assert(value->IsInitVal() || layout->CanAssignFrom(value->GetLayout(this))); GenTreeBlk* store = new (this, GT_STORE_BLK) GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, addr, value, layout); store->gtFlags |= GTF_ASG; @@ -9335,8 +9216,7 @@ GenTree* Compiler::gtNewStoreValueNode( { unsigned lclNum = addr->AsLclFld()->GetLclNum(); LclVarDsc* varDsc = lvaGetDesc(lclNum); - if ((varDsc->TypeGet() == type) && - ((type != TYP_STRUCT) || ClassLayout::AreCompatible(layout, varDsc->GetLayout()))) + if ((varDsc->TypeGet() == type) && ((type != TYP_STRUCT) || varDsc->GetLayout()->CanAssignFrom(layout))) { return gtNewStoreLclVarNode(lclNum, value); } @@ -9633,20 +9513,12 @@ GenTree* Compiler::gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg // Notes: // The node is generated as GenTreeMultiRegOp on RyuJIT/arm, as GenTreeOp on all the other archs. // -GenTree* Compiler::gtNewBitCastNode(var_types type, GenTree* arg) +GenTreeUnOp* Compiler::gtNewBitCastNode(var_types type, GenTree* arg) { assert(arg != nullptr); assert(type != TYP_STRUCT); - GenTree* node = nullptr; -#if defined(TARGET_ARM) - // A BITCAST could be a MultiRegOp on arm since we could move a double register to two int registers. - node = new (this, GT_BITCAST) GenTreeMultiRegOp(GT_BITCAST, type, arg, nullptr); -#else - node = gtNewOperNode(GT_BITCAST, type, arg); -#endif - - return node; + return gtNewOperNode(GT_BITCAST, type, arg); } //------------------------------------------------------------------------ @@ -9677,7 +9549,7 @@ GenTreeAllocObj* Compiler::gtNewAllocObjNode(CORINFO_RESOLVED_TOKEN* pResolvedTo #ifdef FEATURE_READYTORUN CORINFO_CONST_LOOKUP lookup = {}; - if (opts.IsReadyToRun()) + if (IsAot()) { helper = CORINFO_HELP_READYTORUN_NEW; CORINFO_LOOKUP_KIND* const pGenericLookupKind = nullptr; @@ -10021,10 +9893,12 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree) goto DONE; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_NO_OP: case GT_NOP: case GT_LABEL: case GT_SWIFT_ERROR: + case GT_GCPOLL: copy = new (this, oper) GenTree(oper, tree->gtType); goto DONE; @@ -10116,7 +9990,6 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree) GenTreeIndexAddr(asIndAddr->Arr(), asIndAddr->Index(), asIndAddr->gtElemType, asIndAddr->gtStructElemClass, asIndAddr->gtElemSize, asIndAddr->gtLenOffset, asIndAddr->gtElemOffset, asIndAddr->IsBoundsChecked()); - copy->AsIndexAddr()->gtIndRngFailBB = asIndAddr->gtIndRngFailBB; } break; @@ -10214,8 +10087,7 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree) copy = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(tree->AsBoundsChk()->GetIndex(), tree->AsBoundsChk()->GetArrayLength(), tree->AsBoundsChk()->gtThrowKind); - copy->AsBoundsChk()->gtIndRngFailBB = tree->AsBoundsChk()->gtIndRngFailBB; - copy->AsBoundsChk()->gtInxType = tree->AsBoundsChk()->gtInxType; + copy->AsBoundsChk()->gtInxType = tree->AsBoundsChk()->gtInxType; break; case GT_LEA: @@ -10395,6 +10267,7 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co m_hasRetBuffer = other->m_hasRetBuffer; m_isVarArgs = other->m_isVarArgs; m_abiInformationDetermined = other->m_abiInformationDetermined; + m_hasAddedFinalArgs = other->m_hasAddedFinalArgs; m_hasRegArgs = other->m_hasRegArgs; m_hasStackArgs = other->m_hasStackArgs; m_argsComplete = other->m_argsComplete; @@ -10412,14 +10285,13 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co #endif // TARGET_WASM carg->m_earlyNode = arg.m_earlyNode != nullptr ? copyNode(arg.m_earlyNode) : nullptr; carg->m_lateNode = arg.m_lateNode != nullptr ? copyNode(arg.m_lateNode) : nullptr; - carg->m_signatureClsHnd = arg.m_signatureClsHnd; + carg->m_signatureLayout = arg.m_signatureLayout; carg->m_signatureType = arg.m_signatureType; carg->m_wellKnownArg = arg.m_wellKnownArg; carg->m_needTmp = arg.m_needTmp; carg->m_needPlace = arg.m_needPlace; carg->m_processed = arg.m_processed; carg->AbiInfo = arg.AbiInfo; - carg->NewAbiInfo = arg.NewAbiInfo; *tail = carg; tail = &carg->m_next; } @@ -10493,6 +10365,8 @@ GenTreeCall* Compiler::gtCloneExprCallHelper(GenTreeCall* tree) copy->gtInlineInfoCount = tree->gtInlineInfoCount; } + copy->gtLateDevirtualizationInfo = tree->gtLateDevirtualizationInfo; + copy->gtCallType = tree->gtCallType; copy->gtReturnType = tree->gtReturnType; #ifdef TARGET_WASM @@ -10510,9 +10384,10 @@ GenTreeCall* Compiler::gtCloneExprCallHelper(GenTreeCall* tree) #if defined(DEBUG) copy->gtInlineObservation = tree->gtInlineObservation; copy->gtRawILOffset = tree->gtRawILOffset; - copy->gtInlineContext = tree->gtInlineContext; #endif + copy->gtInlineContext = tree->gtInlineContext; + copy->CopyOtherRegFlags(tree); // We keep track of the number of no return calls, so if we've cloned @@ -10767,6 +10642,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -10800,6 +10676,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_IL_OFFSET: case GT_NOP: case GT_SWIFT_ERROR: + case GT_GCPOLL: m_state = -1; return; @@ -10833,10 +10710,8 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_BSWAP16: case GT_KEEPALIVE: case GT_INC_SATURATE: -#if FEATURE_ARG_SPLIT - case GT_PUTARG_SPLIT: -#endif // FEATURE_ARG_SPLIT case GT_RETURNTRAP: + case GT_RETURN_SUSPEND: m_edge = &m_node->AsUnOp()->gtOp1; assert(*m_edge != nullptr); m_advance = &GenTreeUseEdgeIterator::Terminate; @@ -11524,11 +11399,11 @@ void Compiler::gtDispNodeName(GenTree* tree) { sprintf_s(bufp, sizeof(buf), " %s(h)%c", name, 0); } - else if (tree->gtOper == GT_PUTARG_STK) + else if (tree->OperIs(GT_PUTARG_STK)) { sprintf_s(bufp, sizeof(buf), " %s [+0x%02x]%c", name, tree->AsPutArgStk()->getArgOffset(), 0); } - else if (tree->gtOper == GT_CALL) + else if (tree->OperIs(GT_CALL)) { const char* callType = "CALL"; const char* gtfType = ""; @@ -11594,7 +11469,7 @@ void Compiler::gtDispNodeName(GenTree* tree) sprintf_s(bufp, sizeof(buf), " %s%s%s%c", callType, ctType, gtfType, 0); } - else if (tree->gtOper == GT_ARR_ELEM) + else if (tree->OperIs(GT_ARR_ELEM)) { bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s[", name); for (unsigned rank = tree->AsArrElem()->gtArrRank - 1; rank; rank--) @@ -11603,7 +11478,7 @@ void Compiler::gtDispNodeName(GenTree* tree) } SimpleSprintf_s(bufp, buf, sizeof(buf), "]"); } - else if (tree->gtOper == GT_LEA) + else if (tree->OperIs(GT_LEA)) { GenTreeAddrMode* lea = tree->AsAddrMode(); bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s(", name); @@ -11617,20 +11492,13 @@ void Compiler::gtDispNodeName(GenTree* tree) } bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "%d)", lea->Offset()); } - else if (tree->gtOper == GT_BOUNDS_CHECK) + else if (tree->OperIs(GT_BOUNDS_CHECK)) { switch (tree->AsBoundsChk()->gtThrowKind) { case SCK_RNGCHK_FAIL: - { bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s_Rng", name); - if (tree->AsBoundsChk()->gtIndRngFailBB != nullptr) - { - bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " -> " FMT_BB, - tree->AsBoundsChk()->gtIndRngFailBB->bbNum); - } break; - } case SCK_ARG_EXCPN: sprintf_s(bufp, sizeof(buf), " %s_Arg", name); break; @@ -12068,7 +11936,7 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_ if (tree) { /* print the type of the node */ - if (tree->gtOper != GT_CAST) + if (!tree->OperIs(GT_CAST)) { printf(" %-6s", varTypeName(tree->TypeGet())); @@ -12149,7 +12017,7 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_ } } - if (tree->gtOper == GT_RUNTIMELOOKUP) + if (tree->OperIs(GT_RUNTIMELOOKUP)) { #ifdef TARGET_64BIT printf(" 0x%llx", dspPtr(tree->AsRuntimeLookup()->gtHnd)); @@ -12361,10 +12229,6 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons ilName = "LocAllocSP"; } #endif // JIT32_GCENCODER - else if (lclNum == lvaPSPSym) - { - ilName = "PSPSym"; - } else { ilKind = "tmp"; @@ -12385,6 +12249,10 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons { ilName = "this"; } + else if (lclNum == lvaAsyncContinuationArg) + { + ilName = "AsyncCont"; + } else { ilKind = "arg"; @@ -12611,7 +12479,7 @@ void Compiler::gtDispConst(GenTree* tree) ssize_t iconVal = tree->AsIntCon()->gtIconVal; ssize_t dspIconVal = tree->IsIconHandle() ? dspPtr(iconVal) : iconVal; - if (tree->TypeGet() == TYP_REF) + if (tree->TypeIs(TYP_REF)) { if (iconVal == 0) { @@ -12619,7 +12487,6 @@ void Compiler::gtDispConst(GenTree* tree) } else { - assert(doesMethodHaveFrozenObjects()); printf(" 0x%llx", dspIconVal); } } @@ -12660,7 +12527,7 @@ void Compiler::gtDispConst(GenTree* tree) printf(" scope"); break; case GTF_ICON_CLASS_HDL: - if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun()) + if (IsAot()) { printf(" class"); } @@ -12670,7 +12537,7 @@ void Compiler::gtDispConst(GenTree* tree) } break; case GTF_ICON_METHOD_HDL: - if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun()) + if (IsAot()) { printf(" method"); } @@ -12680,7 +12547,7 @@ void Compiler::gtDispConst(GenTree* tree) } break; case GTF_ICON_FIELD_HDL: - if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun()) + if (IsAot()) { printf(" field"); } @@ -12934,7 +12801,7 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack) #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: - printf(" endNstLvl=%d", tree->AsVal()->gtVal1); + printf(" ehID=%d", tree->AsVal()->gtVal1); break; #endif // FEATURE_EH_WINDOWS_X86 @@ -12946,10 +12813,12 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack) case GT_START_PREEMPTGC: case GT_PROF_HOOK: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_MEMORYBARRIER: case GT_PINVOKE_PROLOG: case GT_JMPTABLE: case GT_SWIFT_ERROR: + case GT_GCPOLL: break; case GT_RET_EXPR: @@ -13184,7 +13053,7 @@ void Compiler::gtDispTree(GenTree* tree, indentStack->Push(lowerArc); } - if (tree->gtOper == GT_CAST) + if (tree->OperIs(GT_CAST)) { /* Format a message that explains the effect of this GT_CAST */ @@ -13253,8 +13122,7 @@ void Compiler::gtDispTree(GenTree* tree, } } } -#if FEATURE_PUT_STRUCT_ARG_STK - else if (tree->OperGet() == GT_PUTARG_STK) + else if (tree->OperIs(GT_PUTARG_STK)) { const GenTreePutArgStk* putArg = tree->AsPutArgStk(); printf(" (%d stackByteSize), (%d byteOffset)", putArg->GetStackByteSize(), putArg->getArgOffset()); @@ -13279,14 +13147,6 @@ void Compiler::gtDispTree(GenTree* tree, } } } -#if FEATURE_ARG_SPLIT - else if (tree->OperGet() == GT_PUTARG_SPLIT) - { - const GenTreePutArgSplit* putArg = tree->AsPutArgSplit(); - printf(" (%d stackByteSize), (%d numRegs)", putArg->GetStackByteSize(), putArg->gtNumRegs); - } -#endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK if (tree->OperIs(GT_FIELD_ADDR)) { @@ -13297,7 +13157,7 @@ void Compiler::gtDispTree(GenTree* tree, disp(); } - if (tree->gtOper == GT_INTRINSIC) + if (tree->OperIs(GT_INTRINSIC)) { GenTreeIntrinsic* intrinsic = tree->AsIntrinsic(); @@ -13372,6 +13232,9 @@ void Compiler::gtDispTree(GenTree* tree, case NI_System_Math_MaxNumber: printf(" maxNumber"); break; + case NI_System_Math_MaxUnsigned: + printf(" maxUnsigned"); + break; case NI_System_Math_Min: printf(" min"); break; @@ -13384,6 +13247,9 @@ void Compiler::gtDispTree(GenTree* tree, case NI_System_Math_MinNumber: printf(" minNumber"); break; + case NI_System_Math_MinUnsigned: + printf(" minUnsigned"); + break; case NI_System_Math_Pow: printf(" pow"); break; @@ -13443,7 +13309,6 @@ void Compiler::gtDispTree(GenTree* tree, InsCflagsToString(tree->AsCCMP()->gtFlagsVal)); } #endif - gtDispCommonEndLine(tree); if (!topOnly) @@ -13452,11 +13317,11 @@ void Compiler::gtDispTree(GenTree* tree, { // Label the child of the GT_COLON operator // op1 is the else part - if (tree->gtOper == GT_COLON) + if (tree->OperIs(GT_COLON)) { childMsg = "else"; } - else if (tree->gtOper == GT_QMARK) + else if (tree->OperIs(GT_QMARK)) { childMsg = " if"; } @@ -13469,7 +13334,7 @@ void Compiler::gtDispTree(GenTree* tree, // Label the childMsgs of the GT_COLON operator // op2 is the then part - if (tree->gtOper == GT_COLON) + if (tree->OperIs(GT_COLON)) { childMsg = "then"; } @@ -13545,6 +13410,11 @@ void Compiler::gtDispTree(GenTree* tree, disp(); } + if (call->IsAsync()) + { + printf(" (async)"); + } + if ((call->gtFlags & GTF_CALL_UNMANAGED) && (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH)) { printf(" (FramesRoot last use)"); @@ -13568,6 +13438,20 @@ void Compiler::gtDispTree(GenTree* tree, } } + // Dump profile if any + if (call->IsHelperCall() && impIsCastHelperMayHaveProfileData(eeGetHelperNum(call->gtCallMethHnd))) + { + CORINFO_CLASS_HANDLE likelyClasses[MAX_GDV_TYPE_CHECKS] = {}; + unsigned likelyLikelihoods[MAX_GDV_TYPE_CHECKS] = {}; + int likelyClassCount = 0; + pickGDV(call, call->gtCastHelperILOffset, false, likelyClasses, nullptr, &likelyClassCount, + likelyLikelihoods, false); + if (likelyClassCount > 0) + { + printf(" (%d%% likely '%s')", likelyLikelihoods[0], eeGetClassName(likelyClasses[0])); + } + } + gtDispCommonEndLine(tree); if (!topOnly) @@ -13600,14 +13484,18 @@ void Compiler::gtDispTree(GenTree* tree, #if defined(FEATURE_HW_INTRINSICS) case GT_HWINTRINSIC: - if (tree->OperIs(GT_HWINTRINSIC)) + { + GenTreeHWIntrinsic* node = tree->AsHWIntrinsic(); + printf(" %u", node->GetSimdSize()); + if (node->GetSimdBaseType() != TYP_UNKNOWN) { - printf(" %s %s", - tree->AsHWIntrinsic()->GetSimdBaseType() == TYP_UNKNOWN - ? "" - : varTypeName(tree->AsHWIntrinsic()->GetSimdBaseType()), - HWIntrinsicInfo::lookupName(tree->AsHWIntrinsic()->GetHWIntrinsicId())); + printf(" %s", varTypeName(node->GetSimdBaseType())); } + if (node->GetAuxiliaryType() != TYP_UNKNOWN) + { + printf(" (aux %s)", varTypeName(node->GetAuxiliaryType())); + } + printf(" %s", HWIntrinsicInfo::lookupName(node->GetHWIntrinsicId())); gtDispCommonEndLine(tree); @@ -13620,7 +13508,8 @@ void Compiler::gtDispTree(GenTree* tree, gtDispChild(operand, indentStack, ++index < count ? IIArc : IIArcBottom, nullptr, topOnly); } } - break; + } + break; #endif // defined(FEATURE_HW_INTRINSICS) case GT_ARR_ELEM: @@ -13689,6 +13578,8 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg) return "va cookie"; case WellKnownArg::InstParam: return "gctx"; + case WellKnownArg::AsyncContinuation: + return "async"; case WellKnownArg::RetBuffer: return "retbuf"; case WellKnownArg::PInvokeFrame: @@ -13718,6 +13609,8 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg) return "tail call"; case WellKnownArg::StackArrayLocal: return "&lcl arr"; + case WellKnownArg::RuntimeMethodHandle: + return "meth hnd"; default: return nullptr; } @@ -13770,41 +13663,7 @@ void Compiler::gtGetArgMsg(GenTreeCall* call, CallArg* arg, char* bufp, unsigned } else if (call->gtArgs.IsAbiInformationDetermined()) { -#ifdef TARGET_ARM - if (arg->AbiInfo.IsSplit()) - { - regNumber firstReg = arg->AbiInfo.GetRegNum(); - if (arg->AbiInfo.NumRegs == 1) - { - sprintf_s(bufp, bufLength, " %s out+%02x", compRegVarName(firstReg), arg->AbiInfo.ByteOffset); - } - else - { - regNumber lastReg = REG_STK; - char separator = (arg->AbiInfo.NumRegs == 2) ? ',' : '-'; - if (arg->AbiInfo.IsHfaRegArg()) - { - unsigned lastRegNum = genMapFloatRegNumToRegArgNum(firstReg) + arg->AbiInfo.NumRegs - 1; - lastReg = genMapFloatRegArgNumToRegNum(lastRegNum); - } - else - { - unsigned lastRegNum = - genMapIntRegNumToRegArgNum(firstReg, call->GetUnmanagedCallConv()) + arg->AbiInfo.NumRegs - 1; - lastReg = genMapIntRegArgNumToRegNum(lastRegNum, call->GetUnmanagedCallConv()); - } - sprintf_s(bufp, bufLength, " %s%c%s out+%02x", compRegVarName(firstReg), separator, - compRegVarName(lastReg), arg->AbiInfo.ByteOffset); - } - - return; - } -#endif // TARGET_ARM -#if FEATURE_FIXED_OUT_ARGS - sprintf_s(bufp, bufLength, " out+%02x", arg->AbiInfo.ByteOffset); -#else - sprintf_s(bufp, bufLength, " on STK"); -#endif + gtPrintABILocation(arg->AbiInfo, &bufp, &bufLength); } } @@ -13819,14 +13678,16 @@ void Compiler::gtGetArgMsg(GenTreeCall* call, CallArg* arg, char* bufp, unsigned // // Return Value: // No return value, but bufp is written. - +// void Compiler::gtGetLateArgMsg(GenTreeCall* call, CallArg* arg, char* bufp, unsigned bufLength) { assert(arg->GetLateNode() != nullptr); - regNumber argReg = arg->AbiInfo.GetRegNum(); gtPrintArgPrefix(call, arg, &bufp, &bufLength); + gtPrintABILocation(arg->AbiInfo, &bufp, &bufLength); +} +<<<<<<< HEAD #if defined(FEATURE_FIXED_OUT_ARGS) && !defined(TARGET_WASM) if (argReg == REG_STK) { @@ -13837,47 +13698,105 @@ void Compiler::gtGetLateArgMsg(GenTreeCall* call, CallArg* arg, char* bufp, unsi { #ifdef TARGET_ARM if (arg->AbiInfo.IsSplit()) +======= +//------------------------------------------------------------------------ +// gtPrintABILocation: Print location that an argument is being passed in. +// +// Arguments: +// abiInfo - Passing information +// bufp - Pointer to buffer +// bufLength - Remaining length of buffer +// +void Compiler::gtPrintABILocation(const ABIPassingInformation& abiInfo, char** bufp, unsigned* bufLength) +{ + regNumber firstReg = REG_NA; + regNumber lastReg = REG_NA; + + int numPrinted; +#define PRINTF_BUF(...) \ + numPrinted = sprintf_s(*bufp, *bufLength, __VA_ARGS__); \ + assert((numPrinted > 0) && ((unsigned)numPrinted < *bufLength)); \ + *bufp += numPrinted; \ + *bufLength -= (unsigned)numPrinted; + + auto printRegs = [&]() { + if (firstReg == REG_NA) +>>>>>>> upstream-jun { - regNumber firstReg = arg->AbiInfo.GetRegNum(); - if (arg->AbiInfo.NumRegs == 1) - { - sprintf_s(bufp, bufLength, " %s out+%02x", compRegVarName(firstReg), arg->AbiInfo.ByteOffset); - } - else - { - regNumber lastReg = REG_STK; - char separator = (arg->AbiInfo.NumRegs == 2) ? ',' : '-'; - if (arg->AbiInfo.IsHfaRegArg()) - { - unsigned lastRegNum = genMapFloatRegNumToRegArgNum(firstReg) + arg->AbiInfo.NumRegs - 1; - lastReg = genMapFloatRegArgNumToRegNum(lastRegNum); - } - else + return; + } + + bool printSeparately = firstReg == lastReg; + +#ifdef TARGET_XARCH + // No numeric arg regs, always print separately + printSeparately = true; +#endif + + if (printSeparately) + { + regNumber reg = firstReg; + while (true) + { + PRINTF_BUF(" %s", getRegName(reg)); + if (reg == lastReg) { - unsigned lastRegNum = - genMapIntRegNumToRegArgNum(firstReg, call->GetUnmanagedCallConv()) + arg->AbiInfo.NumRegs - 1; - lastReg = genMapIntRegArgNumToRegNum(lastRegNum, call->GetUnmanagedCallConv()); + break; } - sprintf_s(bufp, bufLength, " %s%c%s out+%02x", compRegVarName(firstReg), separator, - compRegVarName(lastReg), arg->AbiInfo.ByteOffset); + reg = REG_NEXT(reg); } - - return; } -#endif // TARGET_ARM -#if FEATURE_MULTIREG_ARGS - if (arg->AbiInfo.NumRegs >= 2) + else { - char separator = (arg->AbiInfo.NumRegs == 2) ? ',' : '-'; - sprintf_s(bufp, bufLength, " %s%c%s", compRegVarName(argReg), separator, - compRegVarName(arg->AbiInfo.GetRegNum(arg->AbiInfo.NumRegs - 1))); + // Numeric arg regs, print as a range + PRINTF_BUF(" %s%c%s", getRegName(firstReg), REG_NEXT(firstReg) == lastReg ? ' ' : '-', getRegName(lastReg)); + } + + firstReg = REG_NA; + lastReg = REG_NA; + }; + + for (const ABIPassingSegment& segment : abiInfo.Segments()) + { + if (segment.IsPassedInRegister()) + { + regMaskTP regs = segment.GetRegisterMask(); + while (regs != RBM_NONE) + { + regNumber reg = genFirstRegNumFromMaskAndToggle(regs); + if (firstReg == REG_NA) + { + firstReg = reg; + lastReg = reg; + } + else if (REG_NEXT(lastReg) == reg) + { + lastReg = reg; + } + else + { + printRegs(); + firstReg = reg; + lastReg = reg; + } + } } else -#endif { - sprintf_s(bufp, bufLength, " in %s", compRegVarName(argReg)); + printRegs(); + +#if FEATURE_FIXED_OUT_ARGS + int numPrinted = sprintf_s(*bufp, *bufLength, " out+%02x", segment.GetStackOffset()); +#else + int numPrinted = sprintf_s(*bufp, *bufLength, " STK"); +#endif + assert((numPrinted > 0) && ((unsigned)numPrinted < *bufLength)); + *bufp += numPrinted; + *bufLength -= (unsigned)numPrinted; } } + + printRegs(); } //------------------------------------------------------------------------ @@ -14264,7 +14183,7 @@ GenTree* Compiler::gtFoldExprCall(GenTreeCall* call) case NI_System_Type_op_Equality: case NI_System_Type_op_Inequality: { - noway_assert(call->TypeGet() == TYP_INT); + noway_assert(call->TypeIs(TYP_INT)); GenTree* op1 = call->gtArgs.GetArgByIndex(0)->GetNode(); GenTree* op2 = call->gtArgs.GetArgByIndex(1)->GetNode(); @@ -14636,7 +14555,7 @@ GenTree* Compiler::gtFoldTypeCompare(GenTree* tree) { GenTree* arg1; - if (op1->OperGet() == GT_INTRINSIC) + if (op1->OperIs(GT_INTRINSIC)) { arg1 = op1->AsUnOp()->gtOp1; } @@ -14649,7 +14568,7 @@ GenTree* Compiler::gtFoldTypeCompare(GenTree* tree) GenTree* arg2; - if (op2->OperGet() == GT_INTRINSIC) + if (op2->OperIs(GT_INTRINSIC)) { arg2 = op2->AsUnOp()->gtOp1; } @@ -14715,7 +14634,7 @@ GenTree* Compiler::gtFoldTypeCompare(GenTree* tree) GenTree* objOp = nullptr; // Note we may see intrinsified or regular calls to GetType - if (opOther->OperGet() == GT_INTRINSIC) + if (opOther->OperIs(GT_INTRINSIC)) { objOp = opOther->AsUnOp()->gtOp1; } @@ -14785,18 +14704,18 @@ CORINFO_CLASS_HANDLE Compiler::gtGetHelperArgClassHandle(GenTree* tree) CORINFO_CLASS_HANDLE result = NO_CLASS_HANDLE; // The handle could be a literal constant - if ((tree->OperGet() == GT_CNS_INT) && (tree->TypeGet() == TYP_I_IMPL)) + if (tree->OperIs(GT_CNS_INT) && tree->TypeIs(TYP_I_IMPL)) { assert(tree->IsIconHandle(GTF_ICON_CLASS_HDL)); result = (CORINFO_CLASS_HANDLE)tree->AsIntCon()->gtCompileTimeHandle; } // Or the result of a runtime lookup - else if (tree->OperGet() == GT_RUNTIMELOOKUP) + else if (tree->OperIs(GT_RUNTIMELOOKUP)) { result = tree->AsRuntimeLookup()->GetClassHandle(); } // Or something reached indirectly - else if (tree->gtOper == GT_IND) + else if (tree->OperIs(GT_IND)) { // The handle indirs we are looking for will be marked as non-faulting. // Certain others (eg from refanytype) may not be. @@ -14804,7 +14723,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetHelperArgClassHandle(GenTree* tree) { GenTree* handleTreeInternal = tree->AsOp()->gtOp1; - if ((handleTreeInternal->OperGet() == GT_CNS_INT) && (handleTreeInternal->TypeGet() == TYP_I_IMPL)) + if (handleTreeInternal->OperIs(GT_CNS_INT) && handleTreeInternal->TypeIs(TYP_I_IMPL)) { // These handle constants should be class handles. assert(handleTreeInternal->IsIconHandle(GTF_ICON_CLASS_HDL)); @@ -14831,18 +14750,18 @@ CORINFO_METHOD_HANDLE Compiler::gtGetHelperArgMethodHandle(GenTree* tree) CORINFO_METHOD_HANDLE result = NO_METHOD_HANDLE; // The handle could be a literal constant - if ((tree->OperGet() == GT_CNS_INT) && (tree->TypeGet() == TYP_I_IMPL)) + if (tree->OperIs(GT_CNS_INT) && tree->TypeIs(TYP_I_IMPL)) { assert(tree->IsIconHandle(GTF_ICON_METHOD_HDL)); result = (CORINFO_METHOD_HANDLE)tree->AsIntCon()->gtCompileTimeHandle; } // Or the result of a runtime lookup - else if (tree->OperGet() == GT_RUNTIMELOOKUP) + else if (tree->OperIs(GT_RUNTIMELOOKUP)) { result = tree->AsRuntimeLookup()->GetMethodHandle(); } // Or something reached indirectly - else if (tree->gtOper == GT_IND) + else if (tree->OperIs(GT_IND)) { // The handle indirs we are looking for will be marked as non-faulting. // Certain others (eg from refanytype) may not be. @@ -14850,7 +14769,7 @@ CORINFO_METHOD_HANDLE Compiler::gtGetHelperArgMethodHandle(GenTree* tree) { GenTree* handleTreeInternal = tree->AsOp()->gtOp1; - if ((handleTreeInternal->OperGet() == GT_CNS_INT) && (handleTreeInternal->TypeGet() == TYP_I_IMPL)) + if (handleTreeInternal->OperIs(GT_CNS_INT) && handleTreeInternal->TypeIs(TYP_I_IMPL)) { // These handle constants should be method handles. assert(handleTreeInternal->IsIconHandle(GTF_ICON_METHOD_HDL)); @@ -15014,9 +14933,10 @@ GenTree* Compiler::gtFoldExprSpecial(GenTree* tree) // Optimize boxed value classes; these are always false. This IL is // generated when a generic value is tested against null: // ... foo(T x) { ... if ((object)x == null) ... - if ((val == 0) && op->IsBoxedValue()) + // Also fold checks against known non-null data like static readonlys + if ((val == 0) && !fgAddrCouldBeNull(op)) { - JITDUMP("\nAttempting to optimize BOX(valueType) %s null [%06u]\n", GenTree::OpName(oper), + JITDUMP("\nAttempting to optimize BOX(valueType)/non-null %s null [%06u]\n", GenTree::OpName(oper), dspTreeID(tree)); // We don't expect GT_GT with signed compares, and we @@ -15028,44 +14948,46 @@ GenTree* Compiler::gtFoldExprSpecial(GenTree* tree) } else { - // The tree under the box must be side effect free - // since we will drop it if we optimize. - assert(!gtTreeHasSideEffects(op->AsBox()->BoxOp(), GTF_SIDE_EFFECT)); + bool wrapEffects = true; + if (op->IsBoxedValue()) + { + // The tree under the box must be side effect free + // since we will drop it if we optimize. + assert(!gtTreeHasSideEffects(op->AsBox()->BoxOp(), GTF_SIDE_EFFECT)); - // See if we can optimize away the box and related statements. - GenTree* boxSourceTree = gtTryRemoveBoxUpstreamEffects(op); - bool didOptimize = (boxSourceTree != nullptr); + // See if we can optimize away the box and related statements. + wrapEffects = (gtTryRemoveBoxUpstreamEffects(op) == nullptr); + } - // If optimization succeeded, remove the box. - if (didOptimize) + // Set up the result of the compare. + int compareResult; + if (oper == GT_GT) { - // Set up the result of the compare. - int compareResult = 0; - if (oper == GT_GT) - { - // GT_GT(null, box) == false - // GT_GT(box, null) == true - compareResult = (op1 == op); - } - else if (oper == GT_EQ) - { - // GT_EQ(box, null) == false - // GT_EQ(null, box) == false - compareResult = 0; - } - else - { - assert(oper == GT_NE); - // GT_NE(box, null) == true - // GT_NE(null, box) == true - compareResult = 1; - } - - JITDUMP("\nSuccess: replacing BOX(valueType) %s null with %d\n", GenTree::OpName(oper), - compareResult); + // GT_GT(null, op) == false + // GT_GT(op, null) == true + compareResult = (op1 == op); + } + else if (oper == GT_EQ) + { + // GT_EQ(op, null) == false + // GT_EQ(null, op) == false + compareResult = 0; + } + else + { + assert(oper == GT_NE); + // GT_NE(op, null) == true + // GT_NE(null, op) == true + compareResult = 1; + } - return NewMorphedIntConNode(compareResult); + GenTree* newTree = NewMorphedIntConNode(compareResult); + if (wrapEffects) + { + newTree = gtWrapWithSideEffects(newTree, op, GTF_ALL_EFFECT); } + op = newTree; + goto DONE_FOLD; } } else @@ -15176,7 +15098,7 @@ GenTree* Compiler::gtFoldExprSpecial(GenTree* tree) case GT_QMARK: { - assert(op1 == cons && op2 == op && op2->gtOper == GT_COLON); + assert(op1 == cons && op2 == op && op2->OperIs(GT_COLON)); assert(op2->AsOp()->gtOp1 && op2->AsOp()->gtOp2); assert(val == 0 || val == 1); @@ -15640,7 +15562,7 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions // GT_RET_EXPR is a tolerable temporary failure. // The jit will revisit this optimization after // inlining is done. - if (copy->gtOper == GT_RET_EXPR) + if (copy->OperIs(GT_RET_EXPR)) { JITDUMP(" bailing; must wait for replacement of copy %s\n", GenTree::OpName(copy->gtOper)); } @@ -15671,14 +15593,14 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions // over in impImportAndPushBox for the inlined box case. // GenTree* copyDstAddr = copy->AsIndir()->Addr(); - if (copyDstAddr->OperGet() != GT_ADD) + if (!copyDstAddr->OperIs(GT_ADD)) { JITDUMP("Unexpected copy dest address tree\n"); return nullptr; } GenTree* copyDstAddrOp1 = copyDstAddr->AsOp()->gtOp1; - if ((copyDstAddrOp1->OperGet() != GT_LCL_VAR) || (copyDstAddrOp1->AsLclVarCommon()->GetLclNum() != boxTempLcl)) + if (!copyDstAddrOp1->OperIs(GT_LCL_VAR) || (copyDstAddrOp1->AsLclVarCommon()->GetLclNum() != boxTempLcl)) { JITDUMP("Unexpected copy dest address 1st addend\n"); return nullptr; @@ -15716,7 +15638,7 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions GenTree* copySrc = copy->Data(); // If the copy source is from a pending inline, wait for it to resolve. - if (copySrc->gtOper == GT_RET_EXPR) + if (copySrc->OperIs(GT_RET_EXPR)) { JITDUMP(" bailing; must wait for replacement of copy source %s\n", GenTree::OpName(copySrc->gtOper)); return nullptr; @@ -15995,11 +15917,6 @@ GenTree* Compiler::gtOptimizeEnumHasFlag(GenTree* thisOp, GenTree* flagOp) * * Fold the given constant tree. */ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif GenTree* Compiler::gtFoldExprConst(GenTree* tree) { SSIZE_T i1, i2, itemp; @@ -16236,7 +16153,6 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) case TYP_FLOAT: { -#ifdef TARGET_64BIT if (tree->IsUnsigned() && (lval1 < 0)) { f1 = FloatingPointUtils::convertUInt64ToFloat((uint64_t)lval1); @@ -16245,20 +16161,6 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) { f1 = (float)lval1; } -#else - // 32-bit currently does a 2-step conversion, which is incorrect - // but which we are going to take a breaking change around early - // in a release cycle. - - if (tree->IsUnsigned() && (lval1 < 0)) - { - f1 = forceCastToFloat(FloatingPointUtils::convertUInt64ToDouble((uint64_t)lval1)); - } - else - { - f1 = forceCastToFloat((double)lval1); - } -#endif d1 = f1; goto CNS_DOUBLE; @@ -17141,9 +17043,6 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) return tree; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //------------------------------------------------------------------------ // gtFoldIndirConst: Attempt to fold an "IND(addr)" expression to a constant. @@ -17210,20 +17109,20 @@ GenTree* Compiler::gtNewTempStore( unsigned tmp, GenTree* val, unsigned curLevel, Statement** pAfterStmt, const DebugInfo& di, BasicBlock* block) { // Self-assignment is a nop. - if (val->OperGet() == GT_LCL_VAR && val->AsLclVarCommon()->GetLclNum() == tmp) + if (val->OperIs(GT_LCL_VAR) && val->AsLclVarCommon()->GetLclNum() == tmp) { return gtNewNothingNode(); } LclVarDsc* varDsc = lvaGetDesc(tmp); - if (varDsc->TypeGet() == TYP_I_IMPL && val->TypeGet() == TYP_BYREF) + if (varDsc->TypeIs(TYP_I_IMPL) && val->TypeIs(TYP_BYREF)) { impBashVarAddrsToI(val); } var_types valTyp = val->TypeGet(); - if (val->OperGet() == GT_LCL_VAR && lvaTable[val->AsLclVar()->GetLclNum()].lvNormalizeOnLoad()) + if (val->OperIs(GT_LCL_VAR) && lvaTable[val->AsLclVar()->GetLclNum()].lvNormalizeOnLoad()) { valTyp = lvaGetRealType(val->AsLclVar()->GetLclNum()); val->gtType = valTyp; @@ -17335,11 +17234,11 @@ GenTree* Compiler::gtNewRefCOMfield(GenTree* objPtr, if (access & CORINFO_ACCESS_SET) { assert(value != nullptr); - if ((lclTyp == TYP_DOUBLE) && (value->TypeGet() == TYP_FLOAT)) + if ((lclTyp == TYP_DOUBLE) && value->TypeIs(TYP_FLOAT)) { value = gtNewCastNode(TYP_DOUBLE, value, false, TYP_DOUBLE); } - else if (lclTyp == TYP_FLOAT && value->TypeGet() == TYP_DOUBLE) + else if (lclTyp == TYP_FLOAT && value->TypeIs(TYP_DOUBLE)) { value = gtNewCastNode(TYP_FLOAT, value, false, TYP_FLOAT); } @@ -17441,7 +17340,7 @@ bool Compiler::gtNodeHasSideEffects(GenTree* tree, GenTreeFlags flags, bool igno { GenTree* potentialCall = tree; - if (potentialCall->OperIs(GT_RET_EXPR)) + while (potentialCall->OperIs(GT_RET_EXPR)) { // We need to preserve return expressions where the underlying call // has side effects. Otherwise early folding can result in us dropping @@ -18087,7 +17986,7 @@ Compiler::fgWalkResult Compiler::gtClearColonCond(GenTree** pTree, fgWalkData* d assert(data->pCallbackData == nullptr); - if (tree->OperGet() == GT_COLON) + if (tree->OperIs(GT_COLON)) { // Nodes below this will be conditionally executed. return WALK_SKIP_SUBTREES; @@ -18190,7 +18089,7 @@ bool Compiler::gtHasCatchArg(GenTree* tree) Compiler::TypeProducerKind Compiler::gtGetTypeProducerKind(GenTree* tree) { - if (tree->gtOper == GT_CALL) + if (tree->OperIs(GT_CALL)) { if (tree->AsCall()->IsHelperCall()) { @@ -18207,11 +18106,11 @@ Compiler::TypeProducerKind Compiler::gtGetTypeProducerKind(GenTree* tree) } } } - else if ((tree->gtOper == GT_INTRINSIC) && (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Object_GetType)) + else if (tree->OperIs(GT_INTRINSIC) && (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Object_GetType)) { return TPK_GetType; } - else if ((tree->gtOper == GT_CNS_INT) && (tree->AsIntCon()->gtIconVal == 0)) + else if (tree->OperIs(GT_CNS_INT) && (tree->AsIntCon()->gtIconVal == 0)) { return TPK_Null; } @@ -18289,35 +18188,10 @@ bool Compiler::gtIsTypeHandleToRuntimeTypeHandleHelper(GenTreeCall* call, CorInf // bool Compiler::gtTreeContainsOper(GenTree* tree, genTreeOps oper) { - class Visitor final : public GenTreeVisitor - { - genTreeOps m_oper; - - public: - Visitor(Compiler* comp, genTreeOps oper) - : GenTreeVisitor(comp) - , m_oper(oper) - { - } - - enum - { - DoPreOrder = true, - }; - - fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) - { - if ((*use)->OperIs(m_oper)) - { - return WALK_ABORT; - } - - return WALK_CONTINUE; - } + auto hasOper = [oper](GenTree* tree) { + return tree->OperGet() == oper; }; - - Visitor visitor(this, oper); - return visitor.WalkTree(&tree, nullptr) == WALK_ABORT; + return gtFindNodeInTree(tree, hasOper) != nullptr; } //------------------------------------------------------------------------ @@ -18670,7 +18544,7 @@ unsigned GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps* pOper) // Some operators, such as LEA, are currently declared as binary but may // not have two operands. We must check that both operands actually exist. - if ((op1 != nullptr) && (op2 != nullptr) && (op1->OperGet() == GT_LCL_VAR) && + if ((op1 != nullptr) && (op2 != nullptr) && op1->OperIs(GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == lclNum)) { *pOtherTree = op2; @@ -18927,7 +18801,7 @@ bool GenTreeIntConCommon::FitsInAddrBase(Compiler* comp) if (comp->opts.compReloc) { - // During Ngen JIT is always asked to generate relocatable code. + // During AOT JIT is always asked to generate relocatable code. // Hence JIT will try to encode only icon handles as pc-relative offsets. return IsIconHandle() && (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue())); } @@ -18959,7 +18833,7 @@ bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp) { if (comp->opts.compReloc) { - // During Ngen JIT is always asked to generate relocatable code. + // During AOT JIT is always asked to generate relocatable code. // Hence JIT will try to encode only icon handles as pc-relative offsets. return IsIconHandle() && (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue())); } @@ -18999,9 +18873,14 @@ unsigned GenTreeVecCon::ElementCount(unsigned simdSize, var_types simdBaseType) return simdSize / genTypeSize(simdBaseType); } -bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const +bool Compiler::IsValidForShuffle( + GenTree* indices, unsigned simdSize, var_types simdBaseType, bool* canBecomeValid, bool isShuffleNative) const { #if defined(TARGET_XARCH) + if (canBecomeValid != nullptr) + { + *canBecomeValid = false; + } size_t elementSize = genTypeSize(simdBaseType); size_t elementCount = simdSize / elementSize; @@ -19013,45 +18892,10 @@ bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_t // it's likely not worth it overall given that IsHardwareAccelerated reports false return false; } - else if ((varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI_VL)) || - (varTypeIsShort(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL))) - { - bool crossLane = false; - - for (size_t index = 0; index < elementCount; index++) - { - uint64_t value = vecCon->GetIntegralVectorConstElement(index, simdBaseType); - - if (value >= elementCount) - { - continue; - } - - if (index < (elementCount / 2)) - { - if (value >= (elementCount / 2)) - { - crossLane = true; - break; - } - } - else if (value < (elementCount / 2)) - { - crossLane = true; - break; - } - } - - if (crossLane) - { - // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort - return false; - } - } } else if (simdSize == 64) { - if (varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI)) + if (varTypeIsByte(simdBaseType) && (!compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI))) { // TYP_BYTE, TYP_UBYTE need AVX512VBMI. return false; @@ -19061,14 +18905,43 @@ bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_t { assert(simdSize == 16); - if (varTypeIsSmall(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSSE3)) + if (varTypeIsSmall(simdBaseType) && (!compOpportunisticallyDependsOn(InstructionSet_SSSE3))) { // TYP_BYTE, TYP_UBYTE, TYP_SHORT, and TYP_USHORT need SSSE3 to be able to shuffle any operation return false; } + + bool isVariableShuffle = !indices->IsCnsVec(); + if ((!isVariableShuffle) && isShuffleNative) + { + // ShuffleNative with constant indices with 1 or more out of range indices is emitted as variable indices. + for (size_t index = 0; index < elementCount; index++) + { + uint64_t value = indices->GetIntegralVectorConstElement(index, simdBaseType); + if (value >= elementCount) + { + isVariableShuffle = true; + break; + } + } + } + if (isVariableShuffle && (!compOpportunisticallyDependsOn(InstructionSet_SSSE3))) + { + // the variable implementation for Vector128 Shuffle always needs SSSE3 + // however, this can become valid later if it becomes constant + if (canBecomeValid != nullptr) + { + *canBecomeValid = true; + } + return false; + } } #endif // TARGET_XARCH + if (canBecomeValid != nullptr) + { + *canBecomeValid = true; + } return true; } @@ -19803,7 +19676,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b objClass = gtGetArrayElementClassHandle(base->AsArrElem()->gtArrObj); } } - else if (base->OperGet() == GT_ADD) + else if (base->OperIs(GT_ADD)) { // TODO-VNTypes: use "IsFieldAddr" here instead. @@ -20022,7 +19895,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetHelperCallClassHandle(GenTreeCall* call, boo case CORINFO_HELP_NEWARR_1_DIRECT: case CORINFO_HELP_NEWARR_1_MAYBEFROZEN: - case CORINFO_HELP_NEWARR_1_OBJ: + case CORINFO_HELP_NEWARR_1_PTR: case CORINFO_HELP_NEWARR_1_VC: case CORINFO_HELP_NEWARR_1_ALIGN8: case CORINFO_HELP_READYTORUN_NEWARR_1: @@ -20372,14 +20245,38 @@ void GenTreeArrAddr::ParseArrayAddress(Compiler* comp, GenTree** pArr, ValueNum* ValueNum vn = comp->GetValueNumStore()->VNLiberalNormalValue(tree->gtVNPair); VNFuncApp vnf; + bool treeIsArrayRef = false; + if (tree->TypeIs(TYP_REF) || comp->GetValueNumStore()->IsVNNewArr(vn, &vnf)) { // This must be the array pointer. assert(*pArr == nullptr); *pArr = tree; assert(inputMul == 1); // Can't multiply the array pointer by anything. + treeIsArrayRef = true; } - else + else if (tree->OperIs(GT_LCL_VAR) && tree->TypeIs(TYP_BYREF, TYP_I_IMPL)) + { + // This is sort of like gtGetClassHandle, but that requires TYP_REF + // + CORINFO_CLASS_HANDLE hnd = comp->lvaGetDesc(tree->AsLclVar())->lvClassHnd; + + if (hnd != NO_CLASS_HANDLE) + { + DWORD attribs = comp->info.compCompHnd->getClassAttribs(hnd); + treeIsArrayRef = (attribs & CORINFO_FLG_ARRAY) != 0; + + if (treeIsArrayRef) + { + // This must be the array pointer. + assert(*pArr == nullptr); + *pArr = tree; + assert(inputMul == 1); // Can't multiply the array pointer by anything. + } + } + } + + if (!treeIsArrayRef) { switch (tree->OperGet()) { @@ -20409,7 +20306,7 @@ void GenTreeArrAddr::ParseArrayAddress(Compiler* comp, GenTree** pArr, ValueNum* { // If the other arg is an int constant, and is a "not-a-field", choose // that as the multiplier, thus preserving constant index offsets... - if (tree->AsOp()->gtOp2->OperGet() == GT_CNS_INT && + if (tree->AsOp()->gtOp2->OperIs(GT_CNS_INT) && tree->AsOp()->gtOp2->AsIntCon()->gtFieldSeq == nullptr) { assert(!tree->AsOp()->gtOp2->AsIntCon()->ImmedValNeedsReloc(comp)); @@ -20541,13 +20438,13 @@ bool GenTree::IsArrayAddr(GenTreeArrAddr** pArrAddr) bool GenTree::SupportsSettingZeroFlag() { #if defined(TARGET_XARCH) - if (OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG)) + if (OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG, GT_LSH, GT_RSH, GT_RSZ, GT_ROL, GT_ROR)) { return true; } #ifdef FEATURE_HW_INTRINSICS - if (OperIs(GT_HWINTRINSIC) && emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(AsHWIntrinsic()))) + if (OperIs(GT_HWINTRINSIC) && emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(AsHWIntrinsic(), nullptr))) { return true; } @@ -20559,6 +20456,11 @@ bool GenTree::SupportsSettingZeroFlag() } // We do not support setting zero flag for madd/msub. + if (OperIs(GT_NEG) && (!gtGetOp1()->OperIs(GT_MUL) || !gtGetOp1()->isContained())) + { + return true; + } + if (OperIs(GT_ADD, GT_SUB) && (!gtGetOp2()->OperIs(GT_MUL) || !gtGetOp2()->isContained())) { return true; @@ -20818,11 +20720,11 @@ var_types GenTreeJitIntrinsic::GetSimdBaseType() const // isCommutativeHWIntrinsic: Checks if the intrinsic is commutative // // Return Value: -// true if the intrisic is commutative +// true if the intrinsic is commutative // bool GenTree::isCommutativeHWIntrinsic() const { - assert(gtOper == GT_HWINTRINSIC); + assert(OperIs(GT_HWINTRINSIC)); const GenTreeHWIntrinsic* node = AsHWIntrinsic(); NamedIntrinsic id = node->GetHWIntrinsicId(); @@ -20837,14 +20739,8 @@ bool GenTree::isCommutativeHWIntrinsic() const switch (id) { #ifdef TARGET_XARCH - case NI_SSE_Max: - case NI_SSE_Min: - { - return false; - } - - case NI_SSE2_Max: - case NI_SSE2_Min: + case NI_X86Base_Max: + case NI_X86Base_Min: { return !varTypeIsFloating(node->GetSimdBaseType()); } @@ -20855,14 +20751,14 @@ bool GenTree::isCommutativeHWIntrinsic() const return false; } - case NI_AVX512F_Max: - case NI_AVX512F_Min: + case NI_AVX512_Max: + case NI_AVX512_Min: { return !varTypeIsFloating(node->GetSimdBaseType()); } - case NI_AVX512F_Add: - case NI_AVX512F_Multiply: + case NI_AVX512_Add: + case NI_AVX512_Multiply: case NI_BMI2_MultiplyNoFlags: case NI_BMI2_X64_MultiplyNoFlags: { @@ -20882,28 +20778,24 @@ bool GenTree::isCommutativeHWIntrinsic() const bool GenTree::isContainableHWIntrinsic() const { - assert(gtOper == GT_HWINTRINSIC); + assert(OperIs(GT_HWINTRINSIC)); #ifdef TARGET_XARCH switch (AsHWIntrinsic()->GetHWIntrinsicId()) { - case NI_SSE_LoadAlignedVector128: - case NI_SSE_LoadScalarVector128: - case NI_SSE2_LoadAlignedVector128: - case NI_SSE2_LoadScalarVector128: + case NI_X86Base_LoadAlignedVector128: + case NI_X86Base_LoadScalarVector128: case NI_AVX_LoadAlignedVector256: - case NI_AVX512F_LoadAlignedVector512: + case NI_AVX512_LoadAlignedVector512: { // These loads are contained as part of a HWIntrinsic operation return true; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { if (varTypeIsFloating(AsHWIntrinsic()->GetSimdBaseType())) { @@ -20916,70 +20808,47 @@ bool GenTree::isContainableHWIntrinsic() const case NI_Vector128_ToScalar: case NI_Vector256_ToScalar: case NI_Vector512_ToScalar: - case NI_SSE2_ConvertToInt32: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_X64_ConvertToUInt64: - case NI_SSE2_Extract: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_X64_ConvertToUInt64: + case NI_X86Base_Extract: case NI_SSE41_Extract: case NI_SSE41_X64_Extract: case NI_AVX_ExtractVector128: case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: case NI_AVX2_ExtractVector128: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: { // These HWIntrinsic operations are contained as part of a store return true; } + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: case NI_Vector128_CreateScalarUnsafe: case NI_Vector256_CreateScalarUnsafe: case NI_Vector512_CreateScalarUnsafe: @@ -20991,10 +20860,10 @@ bool GenTree::isContainableHWIntrinsic() const case NI_SSE3_LoadAndDuplicateToVector128: case NI_SSE3_MoveAndDuplicate: case NI_AVX_BroadcastScalarToVector128: - case NI_AVX2_BroadcastScalarToVector128: case NI_AVX_BroadcastScalarToVector256: + case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { // These intrinsic operations are contained as part of the operand of embedded broadcast compatible // instruction @@ -21015,7 +20884,7 @@ bool GenTree::isContainableHWIntrinsic() const bool GenTree::isRMWHWIntrinsic(Compiler* comp) { - assert(gtOper == GT_HWINTRINSIC); + assert(OperIs(GT_HWINTRINSIC)); assert(comp != nullptr); #if defined(TARGET_XARCH) @@ -21034,7 +20903,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) switch (intrinsicId) { - case NI_EVEX_BlendVariableMask: + case NI_AVX512_BlendVariableMask: { GenTree* op2 = hwintrinsic->Op(2); @@ -21056,11 +20925,8 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) return false; } - case NI_AVX512F_Fixup: - case NI_AVX512F_FixupScalar: - case NI_AVX512F_VL_Fixup: - case NI_AVX10v1_Fixup: - case NI_AVX10v1_FixupScalar: + case NI_AVX512_Fixup: + case NI_AVX512_FixupScalar: { // We are actually only RMW in the case where the lookup table // has any value that could result in `op1` being picked. So @@ -21088,7 +20954,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) uint32_t count = simdSize / sizeof(uint32_t); uint32_t incSize = (simdBaseType == TYP_FLOAT) ? 1 : 2; - if (intrinsicId == NI_AVX512F_FixupScalar || intrinsicId == NI_AVX10v1_FixupScalar) + if (intrinsicId == NI_AVX512_FixupScalar) { // Upper elements come from op2 count = 1; @@ -21109,9 +20975,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) return false; } - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX10v1_TernaryLogic: + case NI_AVX512_TernaryLogic: { // We may not be RMW depending on the control byte as there // are many operations that do not use all three inputs. @@ -21142,6 +21006,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) #endif } +#if defined(TARGET_XARCH) //------------------------------------------------------------------------ // isEvexCompatibleHWIntrinsic: Checks if the intrinsic has a compatible // EVEX form for its intended lowering instruction. @@ -21151,7 +21016,6 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) // bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp) const { -#if defined(TARGET_XARCH) if (OperIsHWIntrinsic()) { NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId(); @@ -21169,16 +21033,47 @@ bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp) const } } } -#endif return false; } +//------------------------------------------------------------------------ +// isEmbeddedBroadcastCompatibleHWIntrinsic: Checks if the intrinsic is compatible +// with the EVEX embedded broadcast form for its intended lowering instruction. +// +// Return Value: +// true if the intrinsic node lowering instruction has a EVEX embedded broadcast support +// +bool GenTree::isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const +{ + if (OperIsHWIntrinsic()) + { + NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId(); + var_types simdBaseType = AsHWIntrinsic()->GetSimdBaseType(); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, nullptr); + + if (comp->codeGen->instIsEmbeddedBroadcastCompatible(ins)) + { + insTupleType tupleType = emitter::insTupleTypeInfo(ins); + + if ((tupleType & INS_TT_MEM128) != 0) + { + assert(AsHWIntrinsic()->GetOperandCount() == 2); + return HWIntrinsicInfo::isImmOp(intrinsicId, AsHWIntrinsic()->Op(2)); + } + + return true; + } + } + return false; +} +#endif // TARGET_XARCH + //------------------------------------------------------------------------ // isEmbeddedMaskingCompatibleHWIntrinsic : Checks if the intrinsic is compatible // with the EVEX embedded masking form for its intended lowering instruction. // // Return Value: -// true if the intrinsic node lowering instruction has an EVEX embedded masking +// true if the intrinsic node lowering instruction has a EVEX embedded masking support // bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const { @@ -21188,23 +21083,21 @@ bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const #if defined(TARGET_XARCH) var_types simdBaseType = AsHWIntrinsic()->GetSimdBaseType(); - switch (intrinsicId) + if (simdBaseType == TYP_UNKNOWN) { - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_ConvertToVector512Int32: - case NI_AVX512F_ConvertToVector512UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32: - { - return varTypeIsFloating(simdBaseType); - } + // Various scalar intrinsics don't support masking + return false; + } - default: - { - return HWIntrinsicInfo::IsEmbMaskingCompatible(intrinsicId); - } + if (AsHWIntrinsic()->OperIsMemoryLoadOrStore()) + { + // Direct loads and stores cannot be embedded masking compatible + // as they may suppress faults that should otherwise be raised + return false; } + + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, nullptr); + return CodeGenInterface::instIsEmbeddedMaskingCompatible(ins); #elif defined(TARGET_ARM64) return HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinsicId) || HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinsicId); @@ -21314,8 +21207,6 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types ty GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -21355,18 +21246,13 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si if (simdBaseType == TYP_LONG) { - if (simdSize == 64) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Abs; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX10v1_Abs; + intrinsic = NI_AVX512_Abs; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + else { - intrinsic = NI_AVX512F_VL_Abs; + assert(simdSize != 64); } } else if (simdSize == 32) @@ -21376,17 +21262,8 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si } else if (simdSize == 64) { - if (simdBaseType == TYP_INT) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Abs; - } - else - { - assert(varTypeIsSmall(simdBaseType)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - intrinsic = NI_AVX512BW_Abs; - } + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_Abs; } else if (compOpportunisticallyDependsOn(InstructionSet_SSSE3)) { @@ -21434,8 +21311,6 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si GenTree* Compiler::gtNewSimdBinOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -21637,13 +21512,6 @@ GenTree* Compiler::gtNewSimdBinOpNode( std::swap(op1, op2); #endif // TARGET_XARCH } -#ifdef TARGET_XARCH - if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic) && varTypeIsSmall(simdBaseType)) - { - simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT; - simdBaseType = JitType2PreciseVarType(simdBaseJitType); - } -#endif // TARGET_XARCH return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -21730,38 +21598,57 @@ GenTree* Compiler::gtNewSimdBinOpNode( } } #endif // TARGET_XARCH +#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) + case GT_DIV: + { + if (simdBaseType == TYP_INT) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX) || + compIsaSupportedDebugOnly(InstructionSet_AVX512)); + + assert(simdSize == 16 || simdSize == 32); + + NamedIntrinsic divIntrinsic = simdSize == 16 ? NI_Vector128_op_Division : NI_Vector256_op_Division; + unsigned int divideOpSimdSize = simdSize * 2; + + GenTree* divOp = + gtNewSimdHWIntrinsicNode(op1->TypeGet(), op1, op2, divIntrinsic, simdBaseJitType, divideOpSimdSize); + return divOp; + } + unreached(); + } +#endif // defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) case GT_MUL: { #if defined(TARGET_XARCH) if (varTypeIsByte(simdBaseType)) { - assert((simdSize != 64) || IsBaselineVector512IsaSupportedDebugOnly()); + assert((simdSize != 64) || compIsaSupportedDebugOnly(InstructionSet_AVX512)); CorInfoType widenedSimdBaseJitType; NamedIntrinsic widenIntrinsic; NamedIntrinsic narrowIntrinsic; var_types widenedType; unsigned widenedSimdSize; - bool isV512Supported = false; - if (simdSize == 32 && IsBaselineVector512IsaSupportedOpportunistically()) + if (simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - // Input is SIMD32 [U]Byte and AVX512BW is supported: + // Input is SIMD32 [U]Byte and AVX512 is supported: // - Widen inputs as SIMD64 [U]Short // - Multiply widened inputs (SIMD64 [U]Short) as widened product (SIMD64 [U]Short) // - Narrow widened product (SIMD64 [U]Short) as SIMD32 [U]Byte if (simdBaseType == TYP_BYTE) { widenedSimdBaseJitType = CORINFO_TYPE_SHORT; - widenIntrinsic = NI_AVX512BW_ConvertToVector512Int16; - narrowIntrinsic = NI_AVX512BW_ConvertToVector256SByte; + widenIntrinsic = NI_AVX512_ConvertToVector512Int16; + narrowIntrinsic = NI_AVX512_ConvertToVector256SByte; } else { widenedSimdBaseJitType = CORINFO_TYPE_USHORT; - widenIntrinsic = NI_AVX512BW_ConvertToVector512UInt16; - narrowIntrinsic = NI_AVX512BW_ConvertToVector256Byte; + widenIntrinsic = NI_AVX512_ConvertToVector512UInt16; + narrowIntrinsic = NI_AVX512_ConvertToVector256Byte; } widenedType = TYP_SIMD64; @@ -21785,9 +21672,9 @@ GenTree* Compiler::gtNewSimdBinOpNode( } else if (simdSize == 16 && compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - if (compIsEvexOpportunisticallySupported(isV512Supported)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - // Input is SIMD16 [U]Byte and AVX512BW_VL is supported: + // Input is SIMD16 [U]Byte and AVX512 is supported: // - Widen inputs as SIMD32 [U]Short // - Multiply widened inputs (SIMD32 [U]Short) as widened product (SIMD32 [U]Short) // - Narrow widened product (SIMD32 [U]Short) as SIMD16 [U]Byte @@ -21796,14 +21683,12 @@ GenTree* Compiler::gtNewSimdBinOpNode( if (simdBaseType == TYP_BYTE) { widenedSimdBaseJitType = CORINFO_TYPE_SHORT; - narrowIntrinsic = !isV512Supported ? NI_AVX10v1_ConvertToVector128SByte - : NI_AVX512BW_VL_ConvertToVector128SByte; + narrowIntrinsic = NI_AVX512_ConvertToVector128SByte; } else { widenedSimdBaseJitType = CORINFO_TYPE_USHORT; - narrowIntrinsic = !isV512Supported ? NI_AVX10v1_ConvertToVector128Byte - : NI_AVX512BW_VL_ConvertToVector128Byte; + narrowIntrinsic = NI_AVX512_ConvertToVector128Byte; } widenedType = TYP_SIMD32; @@ -21827,7 +21712,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( } else { - // Input is SIMD16 [U]Byte and AVX512BW_VL is NOT supported (only AVX2 will be used): + // Input is SIMD16 [U]Byte and AVX512 is NOT supported (only AVX2 will be used): // - Widen inputs as SIMD32 [U]Short // - Multiply widened inputs (SIMD32 [U]Short) as widened product (SIMD32 [U]Short) // - Mask widened product (SIMD32 [U]Short) to select relevant bits @@ -21928,73 +21813,74 @@ GenTree* Compiler::gtNewSimdBinOpNode( // op1Dup = Sse2.ShiftRightLogical128BitLane(op1Dup, 4) op1Dup = gtNewSimdHWIntrinsicNode(type, op1Dup, gtNewIconNode(4, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); + NI_X86Base_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); // op2Dup = Sse2.ShiftRightLogical128BitLane(op2Dup, 4) op2Dup = gtNewSimdHWIntrinsicNode(type, op2Dup, gtNewIconNode(4, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); + NI_X86Base_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); // op2Dup = Sse2.Multiply(op1Dup.AsUInt32(), op2Dup.AsUInt32()).AsInt32() - op2Dup = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_Multiply, CORINFO_TYPE_ULONG, simdSize); + op2Dup = + gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_X86Base_Multiply, CORINFO_TYPE_ULONG, simdSize); // op2Dup = Sse2.Shuffle(op2Dup, (0, 0, 2, 0)) - op2Dup = gtNewSimdHWIntrinsicNode(type, op2Dup, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), NI_SSE2_Shuffle, - simdBaseJitType, simdSize); + op2Dup = gtNewSimdHWIntrinsicNode(type, op2Dup, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), + NI_X86Base_Shuffle, simdBaseJitType, simdSize); // op1 = Sse2.Multiply(op1.AsUInt32(), op2.AsUInt32()).AsInt32() - op1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSE2_Multiply, CORINFO_TYPE_ULONG, simdSize); + op1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_Multiply, CORINFO_TYPE_ULONG, simdSize); // op1 = Sse2.Shuffle(op1, (0, 0, 2, 0)) - op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), NI_SSE2_Shuffle, + op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), NI_X86Base_Shuffle, simdBaseJitType, simdSize); // op2 = op2Dup; op2 = op2Dup; // result = Sse2.UnpackLow(op1, op2) - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); } else if (varTypeIsLong(simdBaseType)) { - assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64)); + // This fallback path will be used only if the vpmullq instruction is not available. + // The implementation is a simple decomposition using pmuludq, which multiplies + // two uint32s and returns a uint64 result. + // + // aLo * bLo + ((aLo * bHi + aHi * bLo) << 32) - assert(((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41)) || - ((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2))); + assert(!canUseEvexEncodingDebugOnly()); + assert((simdSize == 16) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); - // Make op1 and op2 multi-use: - GenTree* op1Dup = fgMakeMultiUse(&op1); - GenTree* op2Dup = fgMakeMultiUse(&op2); + NamedIntrinsic muludq = (simdSize == 16) ? NI_X86Base_Multiply : NI_AVX2_Multiply; + + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op1Dup2 = gtCloneExpr(op1Dup1); + GenTree* op2Dup1 = fgMakeMultiUse(&op2); + GenTree* op2Dup2 = gtCloneExpr(op2Dup1); - const bool is256 = simdSize == 32; + // Vector128 low = Sse2.Multiply(a.AsUInt32(), b.AsUInt32()); + GenTree* low = gtNewSimdHWIntrinsicNode(type, op1, op2, muludq, CORINFO_TYPE_ULONG, simdSize); - // Vector256 tmp0 = Avx2.Multiply(left, right); - GenTreeHWIntrinsic* tmp0 = - gtNewSimdHWIntrinsicNode(type, op1, op2, is256 ? NI_AVX2_Multiply : NI_SSE2_Multiply, - CORINFO_TYPE_ULONG, simdSize); + // Vector128 mid = (b >>> 32).AsUInt64(); + GenTree* mid = gtNewSimdBinOpNode(GT_RSZ, type, op2Dup1, gtNewIconNode(32), simdBaseJitType, simdSize); - // Vector256 tmp1 = Avx2.Shuffle(right.AsUInt32(), ZWXY); - GenTree* shuffleMask = gtNewIconNode(SHUFFLE_ZWXY, TYP_INT); - GenTreeHWIntrinsic* tmp1 = - gtNewSimdHWIntrinsicNode(type, op2Dup, shuffleMask, is256 ? NI_AVX2_Shuffle : NI_SSE2_Shuffle, - CORINFO_TYPE_UINT, simdSize); + // mid = Sse2.Multiply(mid.AsUInt32(), a.AsUInt32()); + mid = gtNewSimdHWIntrinsicNode(type, mid, op1Dup1, muludq, CORINFO_TYPE_ULONG, simdSize); - // Vector256 tmp2 = Avx2.MultiplyLow(left.AsUInt32(), tmp1); - GenTree* tmp2 = gtNewSimdBinOpNode(GT_MUL, type, op1Dup, tmp1, CORINFO_TYPE_UINT, simdSize); + // Vector128 tmp = (a >>> 32).AsUInt64(); + GenTree* tmp = gtNewSimdBinOpNode(GT_RSZ, type, op1Dup2, gtNewIconNode(32), simdBaseJitType, simdSize); - // Vector256 tmp3 = Avx2.HorizontalAdd(tmp2.AsInt32(), Vector256.Zero); - GenTreeHWIntrinsic* tmp3 = - gtNewSimdHWIntrinsicNode(type, tmp2, gtNewZeroConNode(type), - is256 ? NI_AVX2_HorizontalAdd : NI_SSSE3_HorizontalAdd, CORINFO_TYPE_UINT, - simdSize); + // tmp = Sse2.Multiply(tmp.AsUInt32(), b.AsUInt32()); + tmp = gtNewSimdHWIntrinsicNode(type, tmp, op2Dup2, muludq, CORINFO_TYPE_ULONG, simdSize); - // Vector256 tmp4 = Avx2.Shuffle(tmp3, YWXW); - shuffleMask = gtNewIconNode(SHUFFLE_YWXW, TYP_INT); - GenTreeHWIntrinsic* tmp4 = - gtNewSimdHWIntrinsicNode(type, tmp3, shuffleMask, is256 ? NI_AVX2_Shuffle : NI_SSE2_Shuffle, - CORINFO_TYPE_UINT, simdSize); + // mid += tmp; + mid = gtNewSimdBinOpNode(GT_ADD, type, mid, tmp, simdBaseJitType, simdSize); - // result = tmp0 + tmp4; - return gtNewSimdBinOpNode(GT_ADD, type, tmp0, tmp4, simdBaseJitType, simdSize); + // mid <<= 32; + mid = gtNewSimdBinOpNode(GT_LSH, type, mid, gtNewIconNode(32), simdBaseJitType, simdSize); + + // return low + mid; + return gtNewSimdBinOpNode(GT_ADD, type, low, mid, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) if (varTypeIsLong(simdBaseType)) @@ -22057,8 +21943,6 @@ GenTree* Compiler::gtNewSimdBinOpNode( GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -22078,9 +21962,9 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToPositiveInfinity)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseJitType, simdSize); } else { @@ -22106,7 +21990,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s #if defined(FEATURE_MASKED_HW_INTRINSICS) //------------------------------------------------------------------------ -// gtNewSimdCvtMaskToVectorNode: Convert a HW instrinsic mask node to a vector +// gtNewSimdCvtMaskToVectorNode: Convert a HW intrinsic mask node to a vector // // Arguments: // type -- The type of the node to convert to @@ -22127,7 +22011,7 @@ GenTree* Compiler::gtNewSimdCvtMaskToVectorNode(var_types type, compMaskConvertUsed = true; #if defined(TARGET_XARCH) - return gtNewSimdHWIntrinsicNode(type, op1, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, NI_AVX512_ConvertMaskToVector, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) return gtNewSimdHWIntrinsicNode(type, op1, NI_Sve_ConvertMaskToVector, simdBaseJitType, simdSize); #else @@ -22152,18 +22036,42 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, assert(varTypeIsFloating(simdSourceBaseType)); assert(varTypeIsIntegral(simdTargetBaseType)); - assert(IsBaselineSimdIsaSupportedDebugOnly()); - #if defined(TARGET_XARCH) - assert(IsBaselineVector512IsaSupportedDebugOnly() || - (simdSize != 64 && compIsaSupportedDebugOnly(InstructionSet_AVX10v1)) || + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512) || ((simdTargetBaseType == TYP_INT) && ((simdSize == 16 && compIsaSupportedDebugOnly(InstructionSet_SSE41)) || (simdSize == 32 && compIsaSupportedDebugOnly(InstructionSet_AVX))))); GenTree* fixupVal; - bool isV512Supported = false; - if (compIsEvexOpportunisticallySupported(isV512Supported)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) + { + NamedIntrinsic cvtIntrinsic = NI_Illegal; + switch (simdTargetBaseType) + { + case TYP_INT: + cvtIntrinsic = NI_AVX10v2_ConvertToVectorInt32WithTruncationSaturation; + break; + + case TYP_UINT: + cvtIntrinsic = NI_AVX10v2_ConvertToVectorUInt32WithTruncationSaturation; + break; + + case TYP_LONG: + cvtIntrinsic = NI_AVX10v2_ConvertToVectorInt64WithTruncationSaturation; + break; + + case TYP_ULONG: + cvtIntrinsic = NI_AVX10v2_ConvertToVectorUInt64WithTruncationSaturation; + break; + + default: + { + unreached(); + } + } + return gtNewSimdHWIntrinsicNode(type, op1, cvtIntrinsic, simdSourceBaseJitType, simdSize); + } + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { /*Generate the control table for VFIXUPIMMSD/SS - For conversion to unsigned @@ -22189,17 +22097,8 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, GenTree* tblCon = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(iconVal), simdTargetBaseJitType, simdSize); // We need op1Clone to run fixup - GenTree* op1Clone = fgMakeMultiUse(&op1); - NamedIntrinsic fixupHwIntrinsicID; - - if (simdSize == 64) - { - fixupHwIntrinsicID = NI_AVX512F_Fixup; - } - else - { - fixupHwIntrinsicID = !isV512Supported ? NI_AVX10v1_Fixup : NI_AVX512F_VL_Fixup; - } + GenTree* op1Clone = fgMakeMultiUse(&op1); + NamedIntrinsic fixupHwIntrinsicID = NI_AVX512_Fixup; // run vfixupimmsd base on table and no flags reporting fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tblCon, gtNewIconNode(0), fixupHwIntrinsicID, simdSourceBaseJitType, simdSize); @@ -22275,14 +22174,11 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, assert(varTypeIsFloating(simdSourceBaseType)); assert(varTypeIsIntegral(simdTargetBaseType)); - assert(IsBaselineSimdIsaSupportedDebugOnly()); - // Generate intrinsic needed for conversion NamedIntrinsic hwIntrinsicID = NI_Illegal; #if defined(TARGET_XARCH) - assert(IsBaselineVector512IsaSupportedDebugOnly() || - (simdSize != 64 && compIsaSupportedDebugOnly(InstructionSet_AVX10v1)) || + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512) || ((simdTargetBaseType == TYP_INT) && ((simdSize == 16) || (simdSize == 32 && compIsaSupportedDebugOnly(InstructionSet_AVX))))); @@ -22298,7 +22194,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, { case 64: { - hwIntrinsicID = NI_AVX512F_ConvertToVector512Int32WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector512Int32WithTruncation; break; } @@ -22310,7 +22206,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, case 16: { - hwIntrinsicID = NI_SSE2_ConvertToVector128Int32WithTruncation; + hwIntrinsicID = NI_X86Base_ConvertToVector128Int32WithTruncation; break; } @@ -22326,23 +22222,19 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, { case 64: { - hwIntrinsicID = NI_AVX512F_ConvertToVector512UInt32WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector512UInt32WithTruncation; break; } case 32: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector256UInt32WithTruncation - : NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector256UInt32WithTruncation; break; } case 16: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector128UInt32WithTruncation - : NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector128UInt32WithTruncation; break; } @@ -22368,23 +22260,19 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, { case 64: { - hwIntrinsicID = NI_AVX512DQ_ConvertToVector512Int64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector512Int64WithTruncation; break; } case 32: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector256Int64WithTruncation - : NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector256Int64WithTruncation; break; } case 16: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector128Int64WithTruncation - : NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector128Int64WithTruncation; break; } @@ -22400,23 +22288,19 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, { case 64: { - hwIntrinsicID = NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector512UInt64WithTruncation; break; } case 32: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector256UInt64WithTruncation - : NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector256UInt64WithTruncation; break; } case 16: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector128UInt64WithTruncation - : NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector128UInt64WithTruncation; break; } @@ -22499,7 +22383,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, #if defined(FEATURE_MASKED_HW_INTRINSICS) //------------------------------------------------------------------------ -// gtNewSimdCvtVectorToMaskNode: Convert a HW instrinsic vector node to a mask +// gtNewSimdCvtVectorToMaskNode: Convert a HW intrinsic vector node to a mask // // Arguments: // type -- The type of the mask to produce. @@ -22520,7 +22404,7 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, compMaskConvertUsed = true; #if defined(TARGET_XARCH) - return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_EVEX_ConvertVectorToMask, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512_ConvertVectorToMask, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) // We use cmpne which requires an embedded mask. GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); @@ -22534,8 +22418,6 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, GenTree* Compiler::gtNewSimdCmpOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -22592,7 +22474,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode( GenTree* tmp = gtNewSimdCmpOpNode(op, type, op1, op2, CORINFO_TYPE_INT, simdSize); op1 = fgMakeMultiUse(&tmp); - op2 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_ZWXY), NI_SSE2_Shuffle, CORINFO_TYPE_INT, + op2 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_ZWXY), NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); return gtNewSimdBinOpNode(GT_AND, type, tmp, op2, simdBaseJitType, simdSize); @@ -22796,11 +22678,11 @@ GenTree* Compiler::gtNewSimdCmpOpNode( GenTree* u = gtNewSimdCmpOpNode(GT_EQ, type, op1Dup1, op2Dup1, CORINFO_TYPE_INT, simdSize); GenTree* v = gtNewSimdCmpOpNode(op, type, op1Dup2, op2Dup2, CORINFO_TYPE_UINT, simdSize); - op1 = gtNewSimdHWIntrinsicNode(type, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + op1 = gtNewSimdHWIntrinsicNode(type, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); - u = gtNewSimdHWIntrinsicNode(type, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + u = gtNewSimdHWIntrinsicNode(type, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); - v = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle, + v = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); op2 = gtNewSimdBinOpNode(GT_AND, type, u, v, simdBaseJitType, simdSize); @@ -22831,7 +22713,6 @@ GenTree* Compiler::gtNewSimdCmpOpNode( GenTree* Compiler::gtNewSimdCmpOpAllNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(type == TYP_INT); var_types simdType = getSIMDTypeForSize(simdSize); @@ -22862,7 +22743,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } else if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_op_Equality; } else @@ -22891,7 +22772,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } else if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_op_Equality; } else @@ -22970,7 +22851,6 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( GenTree* Compiler::gtNewSimdCmpOpAnyNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(type == TYP_INT); var_types simdType = getSIMDTypeForSize(simdSize); @@ -23011,7 +22891,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } else if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_op_Inequality; } else @@ -23039,7 +22919,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( { if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_op_Inequality; } else if (simdSize == 32) @@ -23105,8 +22985,6 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( GenTree* Compiler::gtNewSimdCndSelNode( var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23253,16 +23131,6 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, } #if defined(TARGET_XARCH) -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType) && !op1->IsIntegralConst()) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - unreached(); - } -#endif // TARGET_X86 - if (simdSize == 64) { hwIntrinsicID = NI_Vector512_Create; @@ -23366,16 +23234,6 @@ GenTree* Compiler::gtNewSimdCreateScalarNode(var_types type, } #if defined(TARGET_XARCH) -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType) && !op1->IsIntegralConst()) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - unreached(); - } -#endif // TARGET_X86 - if (simdSize == 32) { hwIntrinsicID = NI_Vector256_CreateScalar; @@ -23511,16 +23369,6 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type, } #if defined(TARGET_XARCH) -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType) && !op1->IsIntegralConst()) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - unreached(); - } -#endif // TARGET_X86 - if (simdSize == 32) { hwIntrinsicID = NI_Vector256_CreateScalarUnsafe; @@ -23557,7 +23405,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type, GenTree* Compiler::gtNewSimdCreateSequenceNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - // This effectively doees: (Indices * op2) + Create(op1) + // This effectively does: (Indices * op2) + Create(op1) // // When both op2 and op1 are constant we can fully fold this to a constant. Additionally, // if only op2 is a constant we can simplify the computation by a lot. However, if only op1 @@ -23735,8 +23583,6 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( GenTree* Compiler::gtNewSimdDotProdNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - var_types simdType = getSIMDTypeForSize(simdSize); assert(varTypeIsSIMD(simdType)); @@ -23779,8 +23625,6 @@ GenTree* Compiler::gtNewSimdDotProdNode( GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23799,9 +23643,9 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToNegativeInfinity)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseJitType, simdSize); } else { @@ -23848,8 +23692,8 @@ GenTree* Compiler::gtNewSimdFmaNode( #if defined(TARGET_XARCH) if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_FusedMultiplyAdd; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_FusedMultiplyAdd; } else { @@ -23857,8 +23701,6 @@ GenTree* Compiler::gtNewSimdFmaNode( intrinsic = NI_FMA_MultiplyAdd; } #elif defined(TARGET_ARM64) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - if (simdBaseType == TYP_DOUBLE) { intrinsic = (simdSize == 8) ? NI_AdvSimd_FusedMultiplyAddScalar : NI_AdvSimd_Arm64_FusedMultiplyAdd; @@ -23889,14 +23731,7 @@ GenTree* Compiler::gtNewSimdGetElementNode( assert(varTypeIsArithmetic(simdBaseType)); #if defined(TARGET_XARCH) - bool useToScalar = op2->IsIntegralConst(0); - -#if defined(TARGET_X86) - // We handle decomposition via GetElement for simplicity - useToScalar &= !varTypeIsLong(simdBaseType); -#endif // TARGET_X86 - - if (useToScalar) + if (op2->IsIntegralConst(0)) { return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize); } @@ -23920,7 +23755,7 @@ GenTree* Compiler::gtNewSimdGetElementNode( case TYP_SHORT: case TYP_USHORT: { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + // Supported by baseline ISA requirement break; } @@ -24135,8 +23970,6 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24164,8 +23997,6 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, // GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24217,8 +24048,6 @@ GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoTy // GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24250,8 +24079,6 @@ GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfo // GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24292,8 +24119,6 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT // GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24325,8 +24150,6 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType // GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24369,8 +24192,6 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24419,8 +24240,6 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, // GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24484,8 +24303,6 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24513,8 +24330,6 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, // GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24557,8 +24372,6 @@ GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24610,8 +24423,6 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24669,8 +24480,6 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, // GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24726,8 +24535,6 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24740,21 +24547,17 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_LoadAlignedVector512; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_LoadAlignedVector512; } else if (simdSize == 32) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); intrinsic = NI_AVX_LoadAlignedVector256; } - else if (simdBaseType != TYP_FLOAT) - { - intrinsic = NI_SSE2_LoadAlignedVector128; - } else { - intrinsic = NI_SSE_LoadAlignedVector128; + intrinsic = NI_X86Base_LoadAlignedVector128; } assert(intrinsic != NI_Illegal); @@ -24789,8 +24592,6 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24820,24 +24621,18 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, } else if (simdSize == 64) { - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) - { - intrinsic = NI_AVX512F_LoadAlignedVector512NonTemporal; - isNonTemporal = true; - } + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_LoadAlignedVector512NonTemporal; + isNonTemporal = true; } else if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) { intrinsic = NI_SSE41_LoadAlignedVector128NonTemporal; isNonTemporal = true; } - else if (simdBaseType != TYP_FLOAT) - { - intrinsic = NI_SSE2_LoadAlignedVector128; - } else { - intrinsic = NI_SSE_LoadAlignedVector128; + intrinsic = NI_X86Base_LoadAlignedVector128; } if (isNonTemporal) @@ -24872,8 +24667,6 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, GenTree* Compiler::gtNewSimdMaxNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24889,6 +24682,45 @@ GenTree* Compiler::gtNewSimdMaxNode( #if defined(TARGET_XARCH) if (varTypeIsFloating(simdBaseType)) { + if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) + { + NamedIntrinsic minMaxIntrinsic = NI_AVX10v2_MinMax; + return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x05), minMaxIntrinsic, simdBaseJitType, + simdSize); + } + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // If AVX512 is supported, we can use vrangeps/vrangepd to correctly handle the Vector.Max(-0.0, 0.0) = 0.0 + // case. We still need to check for NaN as vrangeps/vrangepd does not handle NaN as specified in IEEE 754 + // 2019. + // + // This can be represented as the following managed code: + // Vector128 range = Avx512DQ.VL.Range(op1, op2, 0x5); + // Vector128 fixup1 = Avx512F.VL.Fixup(op1, op2, Vector128.One, 0); + // Vector128 fixup2 = Avx512F.VL.Fixup(range, fixup1, Vector128.One, 0); + // return fixup2; + // + // 0x5 is the control byte for vrangeps/vrangepd: + // Imm8[1:0] = 01b : Select Max value + // Imm8[3:2] = 01b : Select sign(Compare_Result) + + GenTree* op1Dup = fgMakeMultiUse(&op1); + GenTree* op2Dup = fgMakeMultiUse(&op2); + GenTree* rangeOp = gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x5), NI_AVX512_Range, + simdBaseJitType, simdSize); + GenTreeVecCon* tblVecCon1 = gtNewVconNode(type); + GenTreeVecCon* tblVecCon2 = gtNewVconNode(type); + const int64_t tblValue = 0x1; + tblVecCon1->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue); + tblVecCon2->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue); + NamedIntrinsic fixupIntrinsic = NI_AVX512_Fixup; + GenTree* fixup1 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, tblVecCon1, gtNewIconNode(0), + fixupIntrinsic, simdBaseJitType, simdSize); + GenTree* fixup2 = gtNewSimdHWIntrinsicNode(type, rangeOp, fixup1, tblVecCon2, gtNewIconNode(0), + fixupIntrinsic, simdBaseJitType, simdSize); + return fixup2; + } + GenTree* op1Dup1 = fgMakeMultiUse(&op1); GenTree* op1Dup2 = gtCloneExpr(op1Dup1); GenTree* op1Dup3 = gtCloneExpr(op1Dup2); @@ -24916,8 +24748,6 @@ GenTree* Compiler::gtNewSimdMaxNode( GenTree* Compiler::gtNewSimdMaxNativeNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24949,28 +24779,16 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( { intrinsic = NI_AVX2_Max; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX10v1_Max; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) - { - intrinsic = NI_AVX512F_VL_Max; + intrinsic = NI_AVX512_Max; } } } else if (simdSize == 64) { - if (varTypeIsSmall(simdBaseType)) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - intrinsic = NI_AVX512BW_Max; - } - else - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Max; - } + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_Max; } else { @@ -25066,28 +24884,19 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( case TYP_LONG: case TYP_ULONG: { - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - intrinsic = NI_AVX10v1_Max; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX512F_VL_Max; + intrinsic = NI_AVX512_Max; } break; } case TYP_FLOAT: - { - intrinsic = NI_SSE_Max; - break; - } - case TYP_UBYTE: case TYP_SHORT: case TYP_DOUBLE: { - intrinsic = NI_SSE2_Max; + intrinsic = NI_X86Base_Max; break; } @@ -25131,8 +24940,6 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( GenTree* Compiler::gtNewSimdMinNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25148,6 +24955,45 @@ GenTree* Compiler::gtNewSimdMinNode( #if defined(TARGET_XARCH) if (varTypeIsFloating(simdBaseType)) { + if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) + { + NamedIntrinsic minMaxIntrinsic = NI_AVX10v2_MinMax; + return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x04), minMaxIntrinsic, simdBaseJitType, + simdSize); + } + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // If AVX512 is supported, we can use vrangeps/vrangepd to correctly handle the Vector.Min(-0.0, 0.0) = -0.0 + // case. We still need to check for NaN as vrangeps/vrangepd does not handle NaN as specified in IEEE 754 + // 2019. + // + // This can be represented as the following managed code: + // Vector128 range = Avx512DQ.VL.Range(op1, op2, 0x4); + // Vector128 fixup1 = Avx512F.VL.Fixup(op1, op2, Vector128.One, 0); + // Vector128 fixup2 = Avx512F.VL.Fixup(range, fixup1, Vector128.One, 0); + // return fixup2; + // + // 0x4 is the control byte for vrangeps/vrangepd: + // Imm8[1:0] = 00b : Select Min value + // Imm8[3:2] = 01b : Select sign(Compare_Result) + + GenTree* op1Dup = fgMakeMultiUse(&op1); + GenTree* op2Dup = fgMakeMultiUse(&op2); + GenTree* rangeOp = gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x4), NI_AVX512_Range, + simdBaseJitType, simdSize); + GenTreeVecCon* tblVecCon1 = gtNewVconNode(type); + GenTreeVecCon* tblVecCon2 = gtNewVconNode(type); + const int64_t tblValue = 0x1; + tblVecCon1->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue); + tblVecCon2->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue); + NamedIntrinsic fixupIntrinsic = NI_AVX512_Fixup; + GenTree* fixup1 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, tblVecCon1, gtNewIconNode(0), + fixupIntrinsic, simdBaseJitType, simdSize); + GenTree* fixup2 = gtNewSimdHWIntrinsicNode(type, rangeOp, fixup1, tblVecCon2, gtNewIconNode(0), + fixupIntrinsic, simdBaseJitType, simdSize); + return fixup2; + } + GenTree* op1Dup1 = fgMakeMultiUse(&op1); GenTree* op1Dup2 = gtCloneExpr(op1Dup1); GenTree* op1Dup3 = gtCloneExpr(op1Dup2); @@ -25175,8 +25021,6 @@ GenTree* Compiler::gtNewSimdMinNode( GenTree* Compiler::gtNewSimdMinNativeNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25208,28 +25052,16 @@ GenTree* Compiler::gtNewSimdMinNativeNode( { intrinsic = NI_AVX2_Min; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - intrinsic = NI_AVX10v1_Min; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX512F_VL_Min; + intrinsic = NI_AVX512_Min; } } } else if (simdSize == 64) { - if (varTypeIsSmall(simdBaseType)) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - intrinsic = NI_AVX512BW_Min; - } - else - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Min; - } + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_Min; } else { @@ -25321,28 +25153,19 @@ GenTree* Compiler::gtNewSimdMinNativeNode( case TYP_LONG: case TYP_ULONG: { - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - intrinsic = NI_AVX10v1_Min; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX512F_VL_Min; + intrinsic = NI_AVX512_Min; } break; } case TYP_FLOAT: - { - intrinsic = NI_SSE_Min; - break; - } - case TYP_UBYTE: case TYP_SHORT: case TYP_DOUBLE: { - intrinsic = NI_SSE2_Min; + intrinsic = NI_X86Base_Min; break; } @@ -25386,8 +25209,6 @@ GenTree* Compiler::gtNewSimdMinNativeNode( GenTree* Compiler::gtNewSimdNarrowNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25403,12 +25224,10 @@ GenTree* Compiler::gtNewSimdNarrowNode( GenTree* tmp1; GenTree* tmp2; - bool isV512Supported = false; - #if defined(TARGET_XARCH) GenTree* tmp3; GenTree* tmp4; - if (compIsEvexOpportunisticallySupported(isV512Supported)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // This is the same in principle to the other comments below, however due to // code formatting, its too long to reasonably display here. @@ -25425,12 +25244,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512BW_ConvertToVector256SByte; + intrinsicId = NI_AVX512_ConvertToVector256SByte; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128SByte : NI_AVX512BW_VL_ConvertToVector128SByte; + intrinsicId = NI_AVX512_ConvertToVector128SByte; } opBaseJitType = CORINFO_TYPE_SHORT; @@ -25441,12 +25259,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512BW_ConvertToVector256Byte; + intrinsicId = NI_AVX512_ConvertToVector256Byte; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128Byte : NI_AVX512BW_VL_ConvertToVector128Byte; + intrinsicId = NI_AVX512_ConvertToVector128Byte; } opBaseJitType = CORINFO_TYPE_USHORT; @@ -25457,12 +25274,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256Int16; + intrinsicId = NI_AVX512_ConvertToVector256Int16; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128Int16 : NI_AVX512F_VL_ConvertToVector128Int16; + intrinsicId = NI_AVX512_ConvertToVector128Int16; } opBaseJitType = CORINFO_TYPE_INT; @@ -25473,12 +25289,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256UInt16; + intrinsicId = NI_AVX512_ConvertToVector256UInt16; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128UInt16 : NI_AVX512F_VL_ConvertToVector128UInt16; + intrinsicId = NI_AVX512_ConvertToVector128UInt16; } opBaseJitType = CORINFO_TYPE_UINT; @@ -25489,12 +25304,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256Int32; + intrinsicId = NI_AVX512_ConvertToVector256Int32; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128Int32 : NI_AVX512F_VL_ConvertToVector128Int32; + intrinsicId = NI_AVX512_ConvertToVector128Int32; } opBaseJitType = CORINFO_TYPE_LONG; @@ -25505,12 +25319,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256UInt32; + intrinsicId = NI_AVX512_ConvertToVector256UInt32; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128UInt32 : NI_AVX512F_VL_ConvertToVector128UInt32; + intrinsicId = NI_AVX512_ConvertToVector128UInt32; } opBaseJitType = CORINFO_TYPE_ULONG; @@ -25521,7 +25334,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256Single; + intrinsicId = NI_AVX512_ConvertToVector256Single; } else if (simdSize == 32) { @@ -25529,7 +25342,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( } else { - intrinsicId = NI_SSE2_ConvertToVector128Single; + intrinsicId = NI_X86Base_ConvertToVector128Single; } opBaseJitType = CORINFO_TYPE_DOUBLE; @@ -25547,7 +25360,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( if (simdSize == 16) { - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE_MoveLowToHigh, CORINFO_TYPE_FLOAT, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, CORINFO_TYPE_FLOAT, simdSize); } intrinsicId = (simdSize == 64) ? NI_Vector256_ToVector512Unsafe : NI_Vector128_ToVector256Unsafe; @@ -25719,7 +25532,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize); tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE, + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_PackUnsignedSaturate, CORINFO_TYPE_UBYTE, simdSize); } @@ -25777,18 +25590,18 @@ GenTree* Compiler::gtNewSimdNarrowNode( GenTree* op1Dup = fgMakeMultiUse(&op1); GenTree* op2Dup = fgMakeMultiUse(&op2); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); - tmp2 = - gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_X86Base_UnpackHigh, simdBaseJitType, + simdSize); GenTree* tmp1Dup = fgMakeMultiUse(&tmp1); GenTree* tmp2Dup = fgMakeMultiUse(&tmp2); - tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); - tmp4 = - gtNewSimdHWIntrinsicNode(type, tmp1Dup, tmp2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); + tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); + tmp4 = gtNewSimdHWIntrinsicNode(type, tmp1Dup, tmp2Dup, NI_X86Base_UnpackHigh, simdBaseJitType, + simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp3, tmp4, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp3, tmp4, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); } } @@ -25809,10 +25622,10 @@ GenTree* Compiler::gtNewSimdNarrowNode( GenTree* op1Dup = fgMakeMultiUse(&op1); GenTree* op2Dup = fgMakeMultiUse(&op2); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_X86Base_UnpackHigh, simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); } case TYP_FLOAT: @@ -25830,10 +25643,12 @@ GenTree* Compiler::gtNewSimdNarrowNode( CorInfoType opBaseJitType = CORINFO_TYPE_DOUBLE; - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, NI_SSE2_ConvertToVector128Single, opBaseJitType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(type, op2, NI_SSE2_ConvertToVector128Single, opBaseJitType, simdSize); + tmp1 = + gtNewSimdHWIntrinsicNode(type, op1, NI_X86Base_ConvertToVector128Single, opBaseJitType, simdSize); + tmp2 = + gtNewSimdHWIntrinsicNode(type, op2, NI_X86Base_ConvertToVector128Single, opBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE_MoveLowToHigh, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, simdBaseJitType, simdSize); } default: @@ -25880,14 +25695,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( else { // var tmp1 = op1.ToVector128Unsafe(); - // var tmp2 = AdvSimd.InsertScalar(tmp1.AsUInt64(), 1, op2.AsUInt64()).As(); - signed integer use int64, - // unsigned integer use uint64 + // var tmp2 = tmp1.WithUpper(op2); // return AdvSimd.ExtractNarrowingLower(tmp2); - CorInfoType tmp2BaseJitType = varTypeIsSigned(simdBaseType) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG; - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128Unsafe, simdBaseJitType, simdSize); - tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, tmp2BaseJitType, 16); + tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, simdBaseJitType, 16); return gtNewSimdHWIntrinsicNode(type, tmp2, NI_AdvSimd_ExtractNarrowingLower, simdBaseJitType, simdSize); } @@ -25910,8 +25722,6 @@ GenTree* Compiler::gtNewSimdNarrowNode( // GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25931,9 +25741,9 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToNearestInteger)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseJitType, simdSize); } else { @@ -25957,216 +25767,1055 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } -GenTree* Compiler::gtNewSimdShuffleNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) +//------------------------------------------------------------------------ +// gtNewSimdShuffleVariableNode: Creates a new simd shuffle node (with variable indices, or a case isn't handled in +// gtNewSimdShuffleNode for ShuffleUnsafe with out of bounds indices) - this is a helper function for +// gtNewSimdShuffleNode & should just be invoked by it indirectly, instead of other callers using it +// +// Arguments: +// type -- The type of the node +// op1 -- The values to shuffle +// op2 -- The indices to pick from (variable) +// simdBaseJitType -- The base jit type of the node +// simdSize -- The simd size of the node +// isShuffleNative -- Whether we're making a ShuffleNative node vs a Shuffle one +// +// Return Value: +// The shuffle node +// +GenTree* Compiler::gtNewSimdShuffleVariableNode( + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); assert(op1 != nullptr); assert(op1->TypeIs(type)); + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(op2 != nullptr); assert(op2->TypeIs(type)); - assert(op2->IsCnsVec()); - - var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); - assert(varTypeIsArithmetic(simdBaseType)); - - if (op2->IsVectorAllBitsSet()) - { - // AllBitsSet represents indices that are always "out of range" which means zero should be - // selected for every element. We can special-case this down to just returning a zero node - return gtNewZeroConNode(type); - } - - if (op2->IsVectorZero()) - { - // TODO-XARCH-CQ: Zero represents indices that select the first element of op1 each time. We can simplify - // this down to basically a broadcast equivalent. - } + assert((!op2->IsCnsVec()) || isShuffleNative); - GenTree* retNode = nullptr; - GenTreeIntConCommon* cnsNode = nullptr; + GenTree* retNode = nullptr; + GenTree* cnsNode = nullptr; size_t elementSize = genTypeSize(simdBaseType); size_t elementCount = simdSize / elementSize; + // duplicate operand 2 for non-ShuffleNative implementation later + // (also, on arm64, byte ShuffleNative is same as Shuffle) + GenTree* op2DupSafe = nullptr; #if defined(TARGET_XARCH) - uint8_t control = 0; - bool crossLane = false; - bool needsZero = varTypeIsSmall(simdBaseType) && (simdSize <= 16); - uint64_t value = 0; - simd_t vecCns = {}; - simd_t mskCns = {}; - - for (size_t index = 0; index < elementCount; index++) + if (!isShuffleNative) +#elif defined(TARGET_ARM64) + if ((!isShuffleNative) && (elementSize > 1)) +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 { - value = op2->GetIntegralVectorConstElement(index, simdBaseType); - - if (value < elementCount) - { - if (simdSize == 32) - { - // Most of the 256-bit shuffle/permute instructions operate as if - // the inputs were 2x 128-bit values. If the selected indices cross - // the respective 128-bit "lane" we may need to specialize the codegen - - if (index < (elementCount / 2)) - { - crossLane |= (value >= (elementCount / 2)); - } - else - { - crossLane |= (value < (elementCount / 2)); - } - } + op2DupSafe = fgMakeMultiUse(&op2); + } - // Setting the control for byte/sbyte and short/ushort is unnecessary - // and will actually compute an incorrect control word. But it simplifies - // the overall logic needed here and will remain unused. +#if defined(TARGET_XARCH) + // on xarch, signed comparison is cheaper, so whenever we are able to use it in the + // result & (indices < elementCount) step for Shuffle, we do. Specifically, we are + // able to use it when the top bit causes zeroing (then we can compare indices as + // if they were signed, since negative cases are already handled). + bool canUseSignedComparisonHint = false; - control |= (value << (index * (elementCount / 2))); + // TODO-XARCH-CQ: If we have known set/unset bits for the indices, we could further optimise many cases + // below. - // When Ssse3 is supported, we may need vecCns to accurately select the relevant - // bytes if some index is outside the valid range. Since x86/x64 is little-endian - // we can simplify this down to a for loop that scales the value and selects count - // sequential bytes. + if (simdSize == 64) + { + if (elementSize == 1) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512VBMI)); - for (uint32_t i = 0; i < elementSize; i++) - { - vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i); + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512VBMI_PermuteVar64x8, simdBaseJitType, simdSize); + retNode->SetReverseOp(); + } + else if (elementSize == 2) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); - // When Ssse3 is not supported, we need to adjust the constant to be AllBitsSet - // so that we can emit a ConditionalSelect(op2, retNode, zeroNode). + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar32x16, simdBaseJitType, simdSize); + retNode->SetReverseOp(); + } + else if (elementSize == 4) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); - mskCns.u8[(index * elementSize) + i] = 0xFF; - } + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar16x32, simdBaseJitType, simdSize); + retNode->SetReverseOp(); } else { - needsZero = true; - - // When Ssse3 is supported, we may need vecCns to accurately select the relevant - // bytes if some index is outside the valid range. We can do this by just zeroing - // out each byte in the element. This only requires the most significant bit to be - // set, but we use 0xFF instead since that will be the equivalent of AllBitsSet - - for (uint32_t i = 0; i < elementSize; i++) - { - vecCns.u8[(index * elementSize) + i] = 0xFF; - - // When Ssse3 is not supported, we need to adjust the constant to be Zero - // so that we can emit a ConditionalSelect(op2, retNode, zeroNode). + assert(elementSize == 8); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); - mskCns.u8[(index * elementSize) + i] = 0x00; - } + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar8x64, simdBaseJitType, simdSize); + retNode->SetReverseOp(); } } - - if (simdSize == 32) + else if ((elementSize == 1) && (simdSize == 16)) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - bool isV512Supported = false; - if ((varTypeIsByte(simdBaseType) && - !compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512VBMI_VL)) || - (varTypeIsShort(simdBaseType) && - !compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL))) - { - if (crossLane) - { - // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort - unreached(); - } - - // If we aren't crossing lanes, then we can decompose the byte/sbyte - // and short/ushort operations into 2x 128-bit operations - - // We want to build what is essentially the following managed code: - // var op1Lower = op1.GetLower(); - // op1Lower = Ssse3.Shuffle(op1Lower, Vector128.Create(...)); - // - // var op1Upper = op1.GetUpper(); - // op1Upper = Ssse3.Shuffle(op1Upper, Vector128.Create(...)); - // - // return Vector256.Create(op1Lower, op1Upper); - - simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + assert(compIsaSupportedDebugOnly(InstructionSet_SSSE3)); - GenTree* op1Dup = fgMakeMultiUse(&op1); - GenTree* op1Lower = gtNewSimdGetLowerNode(TYP_SIMD16, op1, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize); - op2 = gtNewVconNode(TYP_SIMD16); - op2->AsVecCon()->gtSimd16Val = vecCns.v128[0]; + // high bit on index gives 0 already + canUseSignedComparisonHint = true; + } + else if ((elementSize == 1) && (simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI)) + { + NamedIntrinsic intrinsic = NI_AVX512VBMI_PermuteVar32x8; - op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16); + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize); + retNode->SetReverseOp(); + } + else if ((elementSize == 2) && compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + assert((simdSize == 16) || (simdSize == 32)); + NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512_PermuteVar8x16 : NI_AVX512_PermuteVar16x16; - GenTree* op1Upper = gtNewSimdGetUpperNode(TYP_SIMD16, op1Dup, simdBaseJitType, simdSize); + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize); + retNode->SetReverseOp(); + } + else if ((elementSize == 4) && ((simdSize == 32) || compOpportunisticallyDependsOn(InstructionSet_AVX))) + { + assert((simdSize == 16) || (simdSize == 32)); - op2 = gtNewVconNode(TYP_SIMD16); - op2->AsVecCon()->gtSimd16Val = vecCns.v128[1]; + if (simdSize == 32) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - op1Upper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16); + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize); + retNode->SetReverseOp(); + } + else + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); - return gtNewSimdWithUpperNode(type, op1Lower, op1Upper, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX_PermuteVar, CORINFO_TYPE_FLOAT, simdSize); } + } + else if ((elementSize == 8) && (simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + NamedIntrinsic intrinsic = NI_AVX512_PermuteVar4x64; - if (elementSize == 4) + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize); + retNode->SetReverseOp(); + } + else if ((elementSize == 8) && (simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + GenTree* op1Copy = fgMakeMultiUse(&op1); // just use op1 again for the other variable + NamedIntrinsic intrinsic = NI_AVX512_PermuteVar2x64x2; + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, op1Copy, intrinsic, simdBaseJitType, simdSize); + } + else + { + assert(((elementSize == 1) && (simdSize == 32)) || (elementSize == 2) || + ((elementSize == 4) && (simdSize == 16)) || (elementSize == 8)); + + if ((elementSize == 8) && ((simdSize == 32) || compOpportunisticallyDependsOn(InstructionSet_AVX))) { - for (uint32_t i = 0; i < elementCount; i++) + assert((simdSize == 16) || (simdSize == 32)); + if (simdSize == 32) { - vecCns.u32[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + } + else + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); } - op2 = gtNewVconNode(type); - op2->AsVecCon()->gtSimdVal = vecCns; + // the below is implemented for integral types + if (varTypeIsFloating(simdBaseType)) + { + assert(elementSize == 8); + simdBaseJitType = CORINFO_TYPE_LONG; + } - // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize); - } - else if (elementSize == 2) - { - assert(canUseEvexEncodingDebugOnly()); - for (uint32_t i = 0; i < elementCount; i++) + // shift all indices to the left by 1 (long to int index, first step of converting long->int indices) + cnsNode = gtNewIconNode(1, TYP_INT); + if (simdSize == 32) { - vecCns.u16[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_ShiftLeftLogical, simdBaseJitType, simdSize); + } + else + { + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_X86Base_ShiftLeftLogical, simdBaseJitType, + simdSize); + } + + // the below are implemented with float/int/uint + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT; + if (varTypeIsFloating(simdBaseType)) + { + simdBaseJitType = CORINFO_TYPE_FLOAT; + } + + // shuffle & manipulate the long indices to int indices (e.g., 3 2 1 0 -> 6 7 4 5 2 3 0 1) + unsigned immediate = 0b10100000; + cnsNode = gtNewIconNode(immediate); + if (simdSize == 32) + { + if (varTypeIsFloating(simdBaseType)) + { + GenTree* op2Dup = fgMakeMultiUse(&op2); + op2 = + gtNewSimdHWIntrinsicNode(type, op2, op2Dup, cnsNode, NI_AVX_Shuffle, simdBaseJitType, simdSize); + } + else + { + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + } + } + else + { + if (varTypeIsFloating(simdBaseType)) + { + GenTree* op2Dup = fgMakeMultiUse(&op2); + op2 = gtNewSimdHWIntrinsicNode(type, op2, op2Dup, cnsNode, NI_X86Base_Shuffle, simdBaseJitType, + simdSize); + } + else + { + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_X86Base_Shuffle, simdBaseJitType, simdSize); + } + } + + simd_t orCns = {}; + for (size_t index = 0; index < simdSize / 4; index++) + { + orCns.u32[index] = index & 1; + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = orCns; + + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + + // perform the shuffle with our int indices + if (simdSize == 32) + { + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize); + retNode->SetReverseOp(); + } + else + { + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX_PermuteVar, CORINFO_TYPE_FLOAT, simdSize); + } + } + else if (simdSize == 32) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + assert(elementSize <= 2); + + // the idea is the following (for bytes, short indices are first converted to byte indices): + // 1. we have vector, and we create a vectorSwapped from it (for which the 128-bit lanes are swapped) + // 2. we then shuffle each using Avx2.Shuffle + // 3. we now have vector and vectorSwapped shuffled with Avx2.Shuffle - which only shuffles within the lane + // 4. for Shuffle, invalid indices are explicitly zeroed later, so no need to worry about anything outside + // [0, 31], and for ShuffleNative, we don't guarantee any particular index (or 0 value) is selected. + // 5. since we only care about [0, 31], notably we have that for each element either vector or vectorSwapped + // (not both) will have the value we actually want, since one is effectively index A = i & 0x0F, and + // the other is effectively B = (i & 0x0F) | 0x10. (vector is A for left lane and B for right lane, + // and vectorSwapped is B for left lane and A for right lane) + // 6. we can use a conditional select to get the appropriate value if we know what mask to use. + // 7. we can use the following mask: + // (indices ^ V256.Create(V128.Create((byte)0), V128.Create((byte)0x10))) > V256.Create((byte)0x0F) + // since this detects whether the index value is in the same lane as V256.Indices + // would be (which we know we can always use vector for). this is because it normalises the 0x10 bit + // to mean '0 = in vector, 1 = in vectorSwapped', and then we can use > 0x0F to detect when this is + // the case (we use > on sbyte, since it is the primitive operation on x86/x64 avx2 hardware). + // 8. for Shuffle, we explicitly normalise the out of range indices later, so we are done. + + // high bit on index gives 0 already + if (elementSize == 1) + { + canUseSignedComparisonHint = true; + } + + // declare required clones of op2 + GenTree *op2Dup1, *op2Dup2; + + // if we have elementSize > 1, we need to convert op2 (short indices) to byte indices + if (elementSize > 1) + { + // shift all indices to the left by tzcnt(size) = 1 + cnsNode = gtNewIconNode(1, TYP_INT); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_ShiftLeftLogical, simdBaseJitType, simdSize); + + // the below are implemented with byte/sbyte + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + // shuffle with a pattern like 0 0 2 2 4 4 6 6 ... 0 0 2 2 ... (for shorts) + // (note: the 0x10 bit is ignored for Avx2.Shuffle) + simd_t shufCns = {}; + shufCns.u64[0] = 0x0606040402020000; + shufCns.u64[1] = 0x0E0E0C0C0A0A0808; + shufCns.u64[2] = 0x0606040402020000; + shufCns.u64[3] = 0x0E0E0C0C0A0A0808; + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = shufCns; + + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + + // or every second index with 1 (short) + simd_t orCns = {}; + for (size_t index = 0; index < simdSize; index++) + { + orCns.u8[index] = static_cast(index & (elementSize - 1)); + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = orCns; + + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + + // create required clones of op2 + op2Dup1 = fgMakeMultiUse(&op2); + op2Dup2 = gtCloneExpr(op2Dup1); + } + + else + { + // create required clones of op2 + op2Dup1 = (op2DupSafe != nullptr) ? gtCloneExpr(op2DupSafe) : fgMakeMultiUse(&op2); + op2Dup2 = gtCloneExpr(op2Dup1); + } + + // swap the low and high 128-bit lanes + // Vector256 swap = Avx2.Permute2x128(vector, vector, 0b00000001); + GenTree* swap; + if (!op1->IsCnsVec()) + { + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op1Dup2 = gtCloneExpr(op1Dup1); + + uint8_t control = 1; + cnsNode = gtNewIconNode(control, TYP_INT); + swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, simdBaseJitType, + simdSize); + } + else + { + // if we have a constant, keep it constant + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + swap = op1Dup1; + + simd_t* cnsPtr = &op1Dup1->AsVecCon()->gtSimdVal; + std::swap(cnsPtr->u64[0], cnsPtr->u64[2]); + std::swap(cnsPtr->u64[1], cnsPtr->u64[3]); + } + + // shuffle with both the normal and swapped values + // Vector256 shuf1 = Avx2.Shuffle(vector, indices); + // Vector256 shuf2 = Avx2.Shuffle(swap, indices); + GenTree* shuf1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + GenTree* shuf2 = gtNewSimdHWIntrinsicNode(type, swap, op2Dup1, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + + // get the indices, and xor the cross-lane bit on the high 128-bit lane part of indices. + // V256 indicesXord = indices ^ V256.Create(V128.Create((byte)0), V128.Create((byte)0x10))); + simd_t xorCns = {}; + xorCns.u64[0] = 0; + xorCns.u64[1] = 0; + xorCns.u64[2] = 0x1010101010101010; + xorCns.u64[3] = 0x1010101010101010; + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = xorCns; + + GenTree* indicesXord = gtNewSimdBinOpNode(GT_XOR, type, op2Dup2, cnsNode, simdBaseJitType, simdSize); + + // compare our modified indices to 0x0F (highest value not swapping lane), we get 0xFF when we are swapping + // lane and 0x00 otherwise. we will also get "swapping lane" also when index is more than 32 + // (but no high bit), but this is normalised later for Shuffle, and acceptable for ShuffleNative. + // V256 selection = Avx2.CompareGreaterThan(indicesXord.AsSByte(), V256.Create((sbyte)0x0F)).AsByte(); + simd_t comparandCnd = {}; + comparandCnd.u64[0] = 0x0F0F0F0F0F0F0F0F; + comparandCnd.u64[1] = 0x0F0F0F0F0F0F0F0F; + comparandCnd.u64[2] = 0x0F0F0F0F0F0F0F0F; + comparandCnd.u64[3] = 0x0F0F0F0F0F0F0F0F; + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = comparandCnd; + GenTree* selection = gtNewSimdCmpOpNode(GT_GT, type, indicesXord, cnsNode, CORINFO_TYPE_BYTE, simdSize); + + // blend our two shuffles based on whether each element swaps lanes or not + // return Avx2.BlendVariable(shuf1, shuf2, selection); + retNode = gtNewSimdHWIntrinsicNode(type, shuf1, shuf2, selection, NI_AVX2_BlendVariable, simdBaseJitType, + simdSize); + } + else + { + assert(compIsaSupportedDebugOnly(InstructionSet_SSSE3)); + assert(simdSize == 16); + assert(elementSize > 1); + + // we want to convert our non-byte indices to byte indices, + // e.g., 3 2 1 0 (int) -> 12 13 14 15 8 9 10 11 4 5 6 7 0 1 2 3 (byte) + + // the below is implemented for integral types + if (varTypeIsFloating(simdBaseType)) + { + if (elementSize == 4) + { + simdBaseJitType = CORINFO_TYPE_UINT; + } + else + { + assert(elementSize == 8); + simdBaseJitType = CORINFO_TYPE_ULONG; + } + } + + // shift all indices to the left by tzcnt(size) + cnsNode = gtNewIconNode(BitOperations::TrailingZeroCount(static_cast(elementSize)), TYP_INT); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_X86Base_ShiftLeftLogical, simdBaseJitType, simdSize); + + // the below are implemented with byte/sbyte + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + // we need to convert the indices to byte indices + // shuffle with a pattern like 0 0 2 2 4 4 6 6 ... (for short, and similar for larger) + + simd_t shufCns = {}; + for (size_t index = 0; index < elementCount; index++) + { + for (size_t i = 0; i < elementSize; i++) + { + shufCns.u8[(index * elementSize) + i] = static_cast(index * elementSize); + } + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = shufCns; + + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSSE3_Shuffle, simdBaseJitType, simdSize); + + // or the relevant bits + + simd_t orCns = {}; + for (size_t index = 0; index < simdSize; index++) + { + orCns.u8[index] = static_cast(index & (elementSize - 1)); + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = orCns; + + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + + // apply normal byte shuffle now that we've converted it + + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize); + } + } +#elif defined(TARGET_ARM64) + NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup; + + if (simdSize == 16) + { + lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup; + } + + // fix-up indices for non-byte sized element types: + // if we have short / int / long, then we want to VectorTableLookup the least-significant byte to all bytes of that + // index element, and then shift left by the applicable amount, then or on the bits for the elements + // if it's not ShuffleNative, we also need to then fix-up the out-of-range indices (only for non-byte though) + // e.g., 3 2 1 0 (int) -> 12 13 14 15 8 9 10 11 4 5 6 7 0 1 2 3 (byte) + if (elementSize > 1) + { + // AdvSimd.ShiftLeftLogical is only valid on integral types, excluding Vector128 + if (varTypeIsFloating(simdBaseType)) + { + if (elementSize == 4) + { + simdBaseJitType = CORINFO_TYPE_INT; + } + else + { + assert(elementSize == 8); + simdBaseJitType = CORINFO_TYPE_LONG; + } + } + if ((simdSize == 16) && (simdBaseJitType == CORINFO_TYPE_INT)) + { + simdBaseJitType = CORINFO_TYPE_UINT; + } + + // shift all indices to the left by tzcnt(size) + cnsNode = gtNewIconNode(BitOperations::TrailingZeroCount(static_cast(elementSize)), TYP_INT); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AdvSimd_ShiftLeftLogical, simdBaseJitType, simdSize); + + // VectorTableLookup is only valid on byte/sbyte + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + simd_t shufCns = {}; + for (size_t index = 0; index < elementCount; index++) + { + for (size_t i = 0; i < elementSize; i++) + { + shufCns.u8[(index * elementSize) + i] = static_cast(index * elementSize); + } + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = shufCns; + + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, lookupIntrinsic, simdBaseJitType, simdSize); + + // or the relevant bits + simd_t orCns = {}; + for (size_t index = 0; index < simdSize; index++) + { + orCns.u8[index] = static_cast(index & (elementSize - 1)); + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = orCns; + + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + } + + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + assert(retNode != nullptr); + +#if defined(TARGET_XARCH) + if (!isShuffleNative) +#elif defined(TARGET_ARM64) + if ((!isShuffleNative) && (elementSize > 1)) +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + { + // we need to ensure indices larger than elementCount become 0 for larger element types + + assert(op2DupSafe != nullptr); + + // get the CorInfoType used for the index comparison + CorInfoType corType = CORINFO_TYPE_UBYTE; + if (elementSize == 2) + { + corType = CORINFO_TYPE_USHORT; + } + else if (elementSize == 4) + { + corType = CORINFO_TYPE_UINT; + } + else if (elementSize == 8) + { + corType = CORINFO_TYPE_ULONG; + } + + // track whether we need to xor the high bit from the comparand + bool subComparandNode = false; + +#if defined(TARGET_XARCH) + // check if we have hardware accelerated unsigned comparison + bool hardwareAcceleratedUnsignedComparison = compOpportunisticallyDependsOn(InstructionSet_AVX512); + + // if the hardware doesn't support direct unsigned comparison, we attempt to use signed comparison + if (!hardwareAcceleratedUnsignedComparison) + { + corType = CORINFO_TYPE_BYTE; + if (elementSize == 2) + { + corType = CORINFO_TYPE_SHORT; + } + else if (elementSize == 4) + { + corType = CORINFO_TYPE_INT; + } + else if (elementSize == 8) + { + corType = CORINFO_TYPE_LONG; + } + + // if we can't use signed comparison for free, update the comparand and op2DupSafe appropriately. + // doing this manually allows the comparand to still be a constant. + if (!canUseSignedComparisonHint) + { + subComparandNode = true; + uint64_t subtractionValue = static_cast(1) << (elementSize * 8 - 1); + GenTree* subtraction = + gtNewSimdCreateBroadcastNode(type, gtNewLconNode(subtractionValue), corType, simdSize); + + op2DupSafe = gtNewSimdBinOpNode(GT_SUB, type, op2DupSafe, subtraction, corType, simdSize); + } + } +#endif + + // create the comparand node + uint64_t comparandValue = static_cast(elementCount); + if (subComparandNode) + { + uint64_t subtraction = (uint64_t)1 << (elementSize * 8 - 1); + comparandValue -= subtraction; + } + GenTree* comparand = gtNewSimdCreateBroadcastNode(type, gtNewLconNode(comparandValue), corType, simdSize); + + assert(genTypeSize(JitType2PreciseVarType(corType)) == elementSize); + + // create the mask node (op2 < comparand), and the result node (mask & nativeResult) + GenTree* mask = gtNewSimdCmpOpNode(GT_LT, type, op2DupSafe, comparand, corType, simdSize); + retNode = gtNewSimdBinOpNode(GT_AND, type, retNode, mask, simdBaseJitType, simdSize); + } + else + { + assert(op2DupSafe == nullptr); + } + + return retNode; +} + +//------------------------------------------------------------------------ +// gtNewSimdShuffleNode: Creates a new simd shuffle node +// +// Arguments: +// type -- The type of the node +// op1 -- The values to shuffle +// op2 -- The indices to pick from +// simdBaseJitType -- The base jit type of the node +// simdSize -- The simd size of the node +// isShuffleNative -- Whether we're making a ShuffleNative node vs a Shuffle one +// +// Return Value: +// The shuffle node +// +GenTree* Compiler::gtNewSimdShuffleNode( + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + + assert(op1 != nullptr); + assert(op1->TypeIs(type)); + + assert(op2 != nullptr); + assert(op2->TypeIs(type)); + + // If op2 is not constant, call into the gtNewSimdShuffleVariableNode routine + if (!op2->IsCnsVec()) + { + return gtNewSimdShuffleVariableNode(type, op1, op2, simdBaseJitType, simdSize, isShuffleNative); + } + + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); + assert(varTypeIsArithmetic(simdBaseType)); + + size_t elementSize = genTypeSize(simdBaseType); + size_t elementCount = simdSize / elementSize; + + // For ShuffleNative, delegate to the variable implementation to get the same behaviour for + // ShuffleNative with constant vs variable indices for free. + // We want ShuffleNative to be at least as good as Shuffle (at least in non out-of-range cases), + // so if we have all values in range, then just treat it like Shuffle. + // We may as well also track whether we have the identity shuffle and all out-of-range here. + bool gotInvalidIndex = false; + bool hasIdentityShuffle = true; + bool allOutOfRange = true; + for (size_t index = 0; index < elementCount; index++) + { + uint64_t value = op2->GetIntegralVectorConstElement(index, simdBaseType); + if (value >= elementCount) + { + gotInvalidIndex = true; + } + else + { + allOutOfRange = false; + } + if (value != static_cast(index)) + { + hasIdentityShuffle = false; + } + } + if (isShuffleNative && gotInvalidIndex) + { + // Call variable implementation. + return gtNewSimdShuffleVariableNode(type, op1, op2, simdBaseJitType, simdSize, isShuffleNative); + } + if (hasIdentityShuffle) + { + // We have 0 1 2 ... for our indices, so just return op1 + return op1; + } + if (allOutOfRange) + { + // allOutOfRange represents indices that are always "out of range" which means zero should be + // selected for every element. We can special-case this down to just returning a zero node + return gtWrapWithSideEffects(gtNewZeroConNode(type), op1, GTF_ALL_EFFECT); + } + + if (op2->IsVectorZero()) + { + // TODO-XARCH-CQ: Zero represents indices that select the first element of op1 each time. We can simplify + // this down to basically a broadcast equivalent. + } + + GenTree* retNode = nullptr; + GenTreeIntConCommon* cnsNode = nullptr; + +#if defined(TARGET_XARCH) + uint8_t control = 0; + bool crossLane = false; + bool needsZero = varTypeIsSmall(simdBaseType) && (simdSize <= 16); + bool differsByLane = false; + uint64_t value = 0; + simd_t vecCns = {}; + simd_t mskCns = {}; + + for (size_t index = 0; index < elementCount; index++) + { + value = op2->GetIntegralVectorConstElement(index, simdBaseType); + + if (value < elementCount) + { + // Most of the 256-bit shuffle/permute instructions operate as if + // the inputs were 2x 128-bit values. If the selected indices cross + // the respective 128-bit "lane" we may need to specialize the codegen. + // Also, for Vector512: If we don't cross 128-bit lanes, then we can emit vpshufb + // instead of vperm* - which has lower latency & allows zeroing in 1 step. + // We also do the same for Vector256. + + crossLane |= ((((uint64_t)index ^ value) * elementSize) & ~(uint64_t)15) != 0; + + // Setting the control for byte/sbyte and short/ushort is unnecessary + // and will actually compute an incorrect control word. But it simplifies + // the overall logic needed here and will remain unused. + + control |= (value << (index * (elementCount / 2))); + + // When Ssse3 is supported, we may need vecCns to accurately select the relevant + // bytes if some index is outside the valid range. Since x86/x64 is little-endian + // we can simplify this down to a for loop that scales the value and selects count + // sequential bytes. + + for (uint32_t i = 0; i < elementSize; i++) + { + vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i); + + // When Ssse3 is not supported, we need to adjust the constant to be AllBitsSet + // so that we can emit a ConditionalSelect(op2, retNode, zeroNode). + + mskCns.u8[(index * elementSize) + i] = 0xFF; + } + } + else + { + needsZero = true; + + // When Ssse3 is supported, we may need vecCns to accurately select the relevant + // bytes if some index is outside the valid range. We can do this by just zeroing + // out each byte in the element. This only requires the most significant bit to be + // set, but we use 0xFF instead since that will be the equivalent of AllBitsSet + + for (uint32_t i = 0; i < elementSize; i++) + { + vecCns.u8[(index * elementSize) + i] = 0xFF; + + // When Ssse3 is not supported, we need to adjust the constant to be Zero + // so that we can emit a ConditionalSelect(op2, retNode, zeroNode). + + mskCns.u8[(index * elementSize) + i] = 0x00; + } + } + + // Check if the value differs in this lane vs any other lane (note: lane is 128 bits, or 16 bytes) + if (index * elementSize >= 16) + { + // Check if the element, masked to the lane, is the same as the element in the same position of earlier + // lanes. If it differs, differsByLane will be set to true. We just compare to the first lane, as we already + // compared it to any other in between lanes. + differsByLane |= ((vecCns.u8[index * elementSize] ^ vecCns.u8[(index * elementSize) & 15]) & 15) != 0; + } + } + + if (simdSize == 32) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + if ((varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI)) || + (varTypeIsShort(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512)) || + // This condition is the condition for when we'd have to emit something slower than what we can do with + // NI_AVX2_Shuffle directly: + ((!crossLane) && (needsZero || (elementSize < 4) || ((elementSize == 4) && differsByLane)))) + { + // we want to treat our type like byte here + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + uint8_t leftWants = 0; // result left lane wants which lanes bitfield (1 - left, 2 - right) + uint8_t rightWants = 0; // result right lane wants which lanes bitfield (1 - left, 2 - right) + bool nonDefaultShuffleMask = + false; // tracks whether any element in vecCns is not the default value: 0->15, 0->15 + + simd_t selCns = {}; + for (size_t index = 0; index < simdSize; index++) + { + // get pointer to our leftWants/rightWants + uint8_t* wants = (index < 16) ? (&leftWants) : (&rightWants); + + // update our wants based on which values we use + value = vecCns.u8[index]; + if (value < 16) + { + *wants |= 1; + } + else if (value < 32) + { + *wants |= 2; + } + + // update our conditional select mask for if we need 2 shuffles + value ^= static_cast(index & 0x10); + selCns.u8[index] = ((value < 32) && (value >= 16)) ? 0xFF : 0; + + // normalise our shuffle mask, and check if it's default + if (vecCns.u8[index] < 32) + { + vecCns.u8[index] &= 0x0F; + } + if (vecCns.u8[index] != (index & 0x0F)) + { + nonDefaultShuffleMask = true; + } + } + + // we might be able to get away with only 1 shuffle, this is the case if neither leftWants nor + // rightWants are 3 (indicating only 0/1 side used) + if ((leftWants != 3) && (rightWants != 3)) + { + // set result to its initial value + retNode = op1; + + // get the permutation control + uint8_t control = 0; + if (leftWants == 2) + { + // if left wants right lane, then set that bit + control |= 1; + } + if (rightWants != 1) + { + // if right wants right lane (or neither), then set the bit for right lane + control |= 16; + } + + // create the permutation node + // if we have 16, then we don't need to actually permute, since that's what we start with + if (control != 16) + { + GenTree* retNodeDup = fgMakeMultiUse(&retNode); + + cnsNode = gtNewIconNode(control); + retNode = gtNewSimdHWIntrinsicNode(type, retNode, retNodeDup, cnsNode, NI_AVX2_Permute2x128, + simdBaseJitType, simdSize); + } + + // if we have a non-default shuffle mask, we need to do Avx2.Shuffle + if (nonDefaultShuffleMask) + { + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; + + retNode = gtNewSimdHWIntrinsicNode(type, retNode, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + } + } + else + { + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op1Dup2 = gtCloneExpr(op1Dup1); + + // create the control for swapping + uint8_t control = 1; // 0b00000001 + cnsNode = gtNewIconNode(control); + GenTree* swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, + simdBaseJitType, simdSize); + + // if we have non-default shuffle mask + if (nonDefaultShuffleMask) + { + // create the shuffle indices node + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; + + GenTree* op2Dup = fgMakeMultiUse(&op2); + + // shuffle both op1 and swap(op1) + op1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + swap = gtNewSimdHWIntrinsicNode(type, swap, op2Dup, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + } + + // select the appropriate values + GenTree* selNode = gtNewVconNode(type); + selNode->AsVecCon()->gtSimdVal = selCns; + retNode = gtNewSimdHWIntrinsicNode(type, op1, swap, selNode, NI_AVX2_BlendVariable, simdBaseJitType, + simdSize); + } + + assert(retNode != nullptr); + return retNode; + } + + if (elementSize == 4) + { + // try to use vpshufd/vshufps instead of vpermd/vpermps. + if ((!crossLane) && (!differsByLane)) + { + assert(!needsZero); + unsigned immediate = (unsigned)0; + for (size_t i = 0; i < 4; i++) + { + value = op2->GetIntegralVectorConstElement(i, simdBaseType); + immediate |= static_cast((value & (uint64_t)3) << (i * 2)); + } + if (varTypeIsFloating(simdBaseType)) + { + op2 = gtNewIconNode(immediate); + GenTree* op1Copy = fgMakeMultiUse(&op1); + return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX_Shuffle, simdBaseJitType, simdSize); + } + else + { + op2 = gtNewIconNode(immediate); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + } + } + + // otherwise, use vpermd/vpermps. + else + { + for (uint32_t i = 0; i < elementCount; i++) + { + vecCns.u32[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize); + } + + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize); + } + } + else if (elementSize == 2) + { + assert(crossLane); + assert(canUseEvexEncodingDebugOnly()); + for (uint32_t i = 0; i < elementCount; i++) + { + vecCns.u16[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize); + } + + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar16x16, simdBaseJitType, simdSize); + } + else if (elementSize == 1) + { + assert(crossLane); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512VBMI)); + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512VBMI_PermuteVar32x8, simdBaseJitType, simdSize); + } + else + { + assert(elementSize == 8); + + // try to use vshufpd instead of vpermpd. + if (!crossLane) + { + assert(!needsZero); + unsigned immediate = (unsigned)0; + for (size_t i = 0; i < elementCount; i++) + { + value = op2->GetIntegralVectorConstElement(i, simdBaseType); + immediate |= static_cast((value & (uint64_t)1) << i); + } + op2 = gtNewIconNode(immediate); + GenTree* op1Copy = fgMakeMultiUse(&op1); + return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX_Shuffle, CORINFO_TYPE_DOUBLE, simdSize); + } + + // otherwise, use vpermpd. + else + { + cnsNode = gtNewIconNode(control); + retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_AVX2_Permute4x64, simdBaseJitType, simdSize); + } + } + } + else if (simdSize == 64) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + + if (!crossLane) + { + // if element size is 64-bit, try to use vshufpd instead of vpshufb. + if ((elementSize == 8) && (!needsZero)) + { + unsigned immediate = (unsigned)0; + for (size_t i = 0; i < elementCount; i++) + { + value = op2->GetIntegralVectorConstElement(i, simdBaseType); + immediate |= static_cast((value & (uint64_t)1) << i); + } + op2 = gtNewIconNode(immediate); + GenTree* op1Copy = fgMakeMultiUse(&op1); + return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX512_Shuffle, CORINFO_TYPE_DOUBLE, + simdSize); + } + + // if the element size is 32-bit, try to use vpshufd/vshufps instead of vpshufb, + // if the indices (when masked to within the lane) are the same for every lane. + if ((elementSize == 4) && (!needsZero) && (!differsByLane)) + { + unsigned immediate = (unsigned)0; + for (size_t i = 0; i < 4; i++) + { + value = op2->GetIntegralVectorConstElement(i, simdBaseType); + immediate |= static_cast((value & (uint64_t)3) << (i * 2)); + } + if (varTypeIsFloating(simdBaseType)) + { + op2 = gtNewIconNode(immediate); + GenTree* op1Copy = fgMakeMultiUse(&op1); + return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX512_Shuffle, simdBaseJitType, + simdSize); + } + else + { + op2 = gtNewIconNode(immediate); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_Shuffle, simdBaseJitType, simdSize); + } } op2 = gtNewVconNode(type); op2->AsVecCon()->gtSimdVal = vecCns; - // swap the operands to match the encoding requirements - retNode = !isV512Supported ? gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX10v1_PermuteVar16x16, - simdBaseJitType, simdSize) - : gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512BW_VL_PermuteVar16x16, - simdBaseJitType, simdSize); - } - else if (elementSize == 1) - { - assert(IsAvx10OrIsaSupportedDebugOnly(InstructionSet_AVX512VBMI_VL)); - op2 = gtNewVconNode(type); - op2->AsVecCon()->gtSimdVal = vecCns; - - // swap the operands to match the encoding requirements - retNode = !isV512Supported ? gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX10v1_PermuteVar32x8, - simdBaseJitType, simdSize) - : gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512VBMI_VL_PermuteVar32x8, - simdBaseJitType, simdSize); - } - else - { - assert(elementSize == 8); - - cnsNode = gtNewIconNode(control); - retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_AVX2_Permute4x64, simdBaseJitType, simdSize); + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_Shuffle, simdBaseJitType, simdSize); } - } - else if (simdSize == 64) - { - assert(IsBaselineVector512IsaSupportedDebugOnly()); - if (elementSize == 4) + else if (elementSize == 4) { for (uint32_t i = 0; i < elementCount; i++) { @@ -26177,7 +26826,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar16x32, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar16x32, simdBaseJitType, simdSize); } else if (elementSize == 2) { @@ -26190,7 +26839,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512BW_PermuteVar32x16, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar32x16, simdBaseJitType, simdSize); } else if (elementSize == 1) { @@ -26214,7 +26863,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar8x64, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar8x64, simdBaseJitType, simdSize); } assert(retNode != nullptr); @@ -26255,7 +26904,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( if (varTypeIsIntegral(simdBaseType)) { - retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_SSE2_Shuffle, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_X86Base_Shuffle, simdBaseJitType, simdSize); } else if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { @@ -26264,7 +26913,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( else { // for double we need SSE2, but we can't use the integral path ^ because we still need op1Dup here - NamedIntrinsic ni = simdBaseType == TYP_DOUBLE ? NI_SSE2_Shuffle : NI_SSE_Shuffle; + NamedIntrinsic ni = NI_X86Base_Shuffle; GenTree* op1Dup = fgMakeMultiUse(&op1); retNode = gtNewSimdHWIntrinsicNode(type, op1, op1Dup, cnsNode, ni, simdBaseJitType, simdSize); } @@ -26274,7 +26923,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( if (needsZero) { - assert((simdSize == 32) || !compIsaSupportedDebugOnly(InstructionSet_SSSE3)); + assert((simdSize == 32) || (!compIsaSupportedDebugOnly(InstructionSet_SSSE3))); op2 = gtNewVconNode(type); op2->AsVecCon()->gtSimdVal = mskCns; @@ -26286,18 +26935,6 @@ GenTree* Compiler::gtNewSimdShuffleNode( uint64_t value = 0; simd_t vecCns = {}; - if (simdSize == 16) - { - // Vector128.Shuffle(a, Vector128.Create(2, 3, 0, 1)) -> ExtractVector128(v.AsUInt64(), v.AsUInt64(), 1) - if ((op2->GetIntegralVectorConstElement(0, TYP_ULONG) == 0x300000002) && - (op2->GetIntegralVectorConstElement(1, TYP_ULONG) == 0x100000000)) - { - GenTree* op1Clone = fgMakeMultiUse(&op1); - return gtNewSimdHWIntrinsicNode(type, op1, op1Clone, gtNewIconNode(1), NI_AdvSimd_ExtractVector128, - CORINFO_TYPE_ULONG, simdSize); - } - } - for (size_t index = 0; index < elementCount; index++) { value = op2->GetIntegralVectorConstElement(index, simdBaseType); @@ -26318,6 +26955,17 @@ GenTree* Compiler::gtNewSimdShuffleNode( } } + if (simdSize == 16) + { + // Vector128.Shuffle(a, Vector128.Create(2, 3, 0, 1)) -> ExtractVector128(v.AsUInt64(), v.AsUInt64(), 1) + if ((vecCns.u64[0] == 0x0F0E0D0C0B0A0908) && (vecCns.u64[1] == 0x0706050403020100)) + { + GenTree* op1Clone = fgMakeMultiUse(&op1); + return gtNewSimdHWIntrinsicNode(type, op1, op1Clone, gtNewIconNode(1), NI_AdvSimd_ExtractVector128, + CORINFO_TYPE_ULONG, simdSize); + } + } + NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup; if (simdSize == 16) @@ -26339,8 +26987,6 @@ GenTree* Compiler::gtNewSimdShuffleNode( GenTree* Compiler::gtNewSimdSqrtNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -26360,16 +27006,12 @@ GenTree* Compiler::gtNewSimdSqrtNode(var_types type, GenTree* op1, CorInfoType s } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Sqrt; - } - else if (simdBaseType == TYP_FLOAT) - { - intrinsic = NI_SSE_Sqrt; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_Sqrt; } else { - intrinsic = NI_SSE2_Sqrt; + intrinsic = NI_X86Base_Sqrt; } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (simdBaseType == TYP_DOUBLE)) @@ -26429,8 +27071,6 @@ GenTree* Compiler::gtNewSimdStoreNode(GenTree* op1, GenTree* op2, CorInfoType si GenTree* Compiler::gtNewSimdStoreAlignedNode(GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(op1 != nullptr); assert(op2 != nullptr); @@ -26449,16 +27089,12 @@ GenTree* Compiler::gtNewSimdStoreAlignedNode(GenTree* op1, GenTree* op2, CorInfo } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_StoreAligned; - } - else if (simdBaseType != TYP_FLOAT) - { - intrinsic = NI_SSE2_StoreAligned; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_StoreAligned; } else { - intrinsic = NI_SSE_StoreAligned; + intrinsic = NI_X86Base_StoreAligned; } return gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -26492,8 +27128,6 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(op1 != nullptr); assert(op2 != nullptr); @@ -26507,21 +27141,17 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_StoreAlignedNonTemporal; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_StoreAlignedNonTemporal; } else if (simdSize == 32) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); intrinsic = NI_AVX_StoreAlignedNonTemporal; } - else if (simdBaseType != TYP_FLOAT) - { - intrinsic = NI_SSE2_StoreAlignedNonTemporal; - } else { - intrinsic = NI_SSE_StoreAlignedNonTemporal; + intrinsic = NI_X86Base_StoreAlignedNonTemporal; } return gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -26539,8 +27169,6 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - var_types simdType = getSIMDTypeForSize(simdSize); assert(varTypeIsSIMD(simdType)); @@ -26557,7 +27185,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op1Dup = fgMakeMultiUse(&op1); op1 = gtNewSimdGetLowerNode(TYP_SIMD32, op1, simdBaseJitType, simdSize); @@ -26581,7 +27209,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si if (simdSize == 32) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); GenTree* op1Dup = fgMakeMultiUse(&op1); op1 = gtNewSimdGetLowerNode(TYP_SIMD16, op1, simdBaseJitType, simdSize); @@ -26609,7 +27237,6 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { if (simdBaseType == TYP_FLOAT) { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -26627,17 +27254,16 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si } else { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE)); // The shuffle below gives us [0, 1, 2, 3] -> [1, 0, 3, 2] op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op1Shuffled, gtNewIconNode((int)0b10110001, TYP_INT), - NI_SSE_Shuffle, simdBaseJitType, simdSize); + NI_X86Base_Shuffle, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1Shuffled); // The add below now results in [0 + 1, 1 + 0, 2 + 3, 3 + 2] op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, op1, op1Shuffled, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1); // The shuffle below gives us [0 + 1, 1 + 0, 2 + 3, 3 + 2] -> [2 + 3, 3 + 2, 0 + 1, 1 + 0] op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op1Shuffled, gtNewIconNode((int)0b01001110, TYP_INT), - NI_SSE_Shuffle, simdBaseJitType, simdSize); + NI_X86Base_Shuffle, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1Shuffled); } // Finally adding the results gets us [(0 + 1) + (2 + 3), (1 + 0) + (3 + 2), (2 + 3) + (0 + 1), (3 + 2) + (1 @@ -26647,7 +27273,6 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si } else { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -26661,7 +27286,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { // The shuffle below gives us [0, 1] -> [1, 0] op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op1Shuffled, gtNewIconNode((int)0b0001, TYP_INT), - NI_SSE2_Shuffle, simdBaseJitType, simdSize); + NI_X86Base_Shuffle, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1Shuffled); } // Finally adding the results gets us [0 + 1, 1 + 0] @@ -26684,7 +27309,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { tmp = fgMakeMultiUse(&op1); opShifted = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(shiftVal, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); + NI_X86Base_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, opShifted, tmp, simdBaseJitType, simdSize); shiftVal = shiftVal / 2; } @@ -26785,8 +27410,7 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineVector512IsaSupportedDebugOnly() || - ((simdSize != 64) && compIsaSupportedDebugOnly(InstructionSet_AVX10v1))); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -26806,31 +27430,7 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); - NamedIntrinsic intrinsic = NI_Illegal; - - if (simdSize == 64) - { - intrinsic = NI_AVX512F_TernaryLogic; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - assert((simdSize == 16) || (simdSize == 32)); - intrinsic = NI_AVX10v1_TernaryLogic; - } - else - { - assert((simdSize == 16) || (simdSize == 32)); - intrinsic = NI_AVX512F_VL_TernaryLogic; - } - -#ifdef TARGET_XARCH - assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic)); - if (varTypeIsSmall(simdBaseType)) - { - simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT; - simdBaseType = JitType2PreciseVarType(simdBaseJitType); - } -#endif // TARGET_XARCH + NamedIntrinsic intrinsic = NI_AVX512_TernaryLogic; return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); } @@ -26850,7 +27450,6 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type, // GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(varTypeIsArithmetic(type)); assert(op1 != nullptr); @@ -26862,21 +27461,9 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoTy NamedIntrinsic intrinsic = NI_Illegal; #ifdef TARGET_XARCH -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType)) - { - // We need SSE41 to handle long, use software fallback - assert(compIsaSupportedDebugOnly(InstructionSet_SSE41)); - - // Create a GetElement node which handles decomposition - GenTree* op2 = gtNewIconNode(0); - return gtNewSimdGetElementNode(type, op1, op2, simdBaseJitType, simdSize); - } -#endif // TARGET_X86 - if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_ToScalar; } else if (simdSize == 32) @@ -26919,8 +27506,6 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoTy // GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -26940,9 +27525,9 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToZero)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseJitType, simdSize); } else { @@ -26969,8 +27554,6 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType GenTree* Compiler::gtNewSimdUnOpNode( genTreeOps op, var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27066,8 +27649,6 @@ GenTree* Compiler::gtNewSimdUnOpNode( GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27084,7 +27665,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo #if defined(TARGET_XARCH) if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); tmp1 = gtNewSimdGetLowerNode(TYP_SIMD32, op1, simdBaseJitType, simdSize); @@ -27092,43 +27673,43 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo { case TYP_BYTE: { - intrinsic = NI_AVX512BW_ConvertToVector512Int16; + intrinsic = NI_AVX512_ConvertToVector512Int16; break; } case TYP_UBYTE: { - intrinsic = NI_AVX512BW_ConvertToVector512UInt16; + intrinsic = NI_AVX512_ConvertToVector512UInt16; break; } case TYP_SHORT: { - intrinsic = NI_AVX512F_ConvertToVector512Int32; + intrinsic = NI_AVX512_ConvertToVector512Int32; break; } case TYP_USHORT: { - intrinsic = NI_AVX512F_ConvertToVector512UInt32; + intrinsic = NI_AVX512_ConvertToVector512UInt32; break; } case TYP_INT: { - intrinsic = NI_AVX512F_ConvertToVector512Int64; + intrinsic = NI_AVX512_ConvertToVector512Int64; break; } case TYP_UINT: { - intrinsic = NI_AVX512F_ConvertToVector512UInt64; + intrinsic = NI_AVX512_ConvertToVector512UInt64; break; } case TYP_FLOAT: { - intrinsic = NI_AVX512F_ConvertToVector512Double; + intrinsic = NI_AVX512_ConvertToVector512Double; break; } @@ -27213,7 +27794,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo case TYP_FLOAT: { - intrinsic = NI_SSE2_ConvertToVector128Double; + intrinsic = NI_X86Base_ConvertToVector128Double; break; } @@ -27234,10 +27815,10 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo { GenTree* op1Dup = fgMakeMultiUse(&op1); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1, NI_SSE2_CompareLessThan, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1, NI_X86Base_CompareLessThan, simdBaseJitType, simdSize); } - return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) if (simdSize == 16) @@ -27280,8 +27861,6 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27298,7 +27877,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo #if defined(TARGET_XARCH) if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); tmp1 = gtNewSimdGetUpperNode(TYP_SIMD32, op1, simdBaseJitType, simdSize); @@ -27306,43 +27885,43 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo { case TYP_BYTE: { - intrinsic = NI_AVX512BW_ConvertToVector512Int16; + intrinsic = NI_AVX512_ConvertToVector512Int16; break; } case TYP_UBYTE: { - intrinsic = NI_AVX512BW_ConvertToVector512UInt16; + intrinsic = NI_AVX512_ConvertToVector512UInt16; break; } case TYP_SHORT: { - intrinsic = NI_AVX512F_ConvertToVector512Int32; + intrinsic = NI_AVX512_ConvertToVector512Int32; break; } case TYP_USHORT: { - intrinsic = NI_AVX512F_ConvertToVector512UInt32; + intrinsic = NI_AVX512_ConvertToVector512UInt32; break; } case TYP_INT: { - intrinsic = NI_AVX512F_ConvertToVector512Int64; + intrinsic = NI_AVX512_ConvertToVector512Int64; break; } case TYP_UINT: { - intrinsic = NI_AVX512F_ConvertToVector512UInt64; + intrinsic = NI_AVX512_ConvertToVector512UInt64; break; } case TYP_FLOAT: { - intrinsic = NI_AVX512F_ConvertToVector512Double; + intrinsic = NI_AVX512_ConvertToVector512Double; break; } @@ -27406,12 +27985,12 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo GenTree* op1Dup = fgMakeMultiUse(&op1); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op1Dup, NI_SSE_MoveHighToLow, simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp1, NI_SSE2_ConvertToVector128Double, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op1Dup, NI_X86Base_MoveHighToLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp1, NI_X86Base_ConvertToVector128Double, simdBaseJitType, simdSize); } else if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) { - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(8), NI_SSE2_ShiftRightLogical128BitLane, + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(8), NI_X86Base_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); switch (simdBaseType) @@ -27454,10 +28033,10 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo { GenTree* op1Dup = fgMakeMultiUse(&op1); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1, NI_SSE2_CompareLessThan, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1, NI_X86Base_CompareLessThan, simdBaseJitType, simdSize); } - return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_X86Base_UnpackHigh, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) if (simdSize == 16) @@ -27515,14 +28094,8 @@ GenTree* Compiler::gtNewSimdWithElementNode( var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); - assert(op2->IsCnsIntOrI()); assert(varTypeIsArithmetic(op3)); - ssize_t imm8 = op2->AsIntCon()->IconValue(); - ssize_t count = simdSize / genTypeSize(simdBaseType); - - assert((0 <= imm8) && (imm8 < count)); - #if defined(TARGET_XARCH) switch (simdBaseType) { @@ -27543,7 +28116,7 @@ GenTree* Compiler::gtNewSimdWithElementNode( case TYP_FLOAT: case TYP_SHORT: case TYP_USHORT: - assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + // Supported by baseline ISA requirement break; default: @@ -27588,6 +28161,20 @@ GenTree* Compiler::gtNewSimdWithElementNode( #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 + int immUpperBound = getSIMDVectorLength(simdSize, simdBaseType) - 1; + bool rangeCheckNeeded = !op2->OperIsConst(); + + if (!rangeCheckNeeded) + { + ssize_t imm8 = op2->AsIntCon()->IconValue(); + rangeCheckNeeded = (imm8 < 0) || (imm8 > immUpperBound); + } + + if (rangeCheckNeeded) + { + op2 = addRangeCheckForHWIntrinsic(op2, 0, immUpperBound); + } + return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); } @@ -27605,9 +28192,9 @@ GenTree* Compiler::gtNewSimdWithElementNode( // GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount) { - LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar()); - unsigned lclNum = lvaGetLclNum(opVarDsc); - unsigned fieldSize = opVarDsc->lvSize() / fieldCount; + unsigned lclNum = op->AsLclVar()->GetLclNum(); + LclVarDsc* opVarDsc = lvaGetDesc(lclNum); + unsigned fieldSize = opVarDsc->lvExactSize() / fieldCount; var_types fieldType = Compiler::getSIMDTypeForSize(fieldSize); GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); @@ -27636,9 +28223,9 @@ GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fi // GenTreeFieldList* Compiler::gtConvertParamOpToFieldList(GenTree* op, unsigned fieldCount, CORINFO_CLASS_HANDLE clsHnd) { - LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar()); - unsigned lclNum = lvaGetLclNum(opVarDsc); - unsigned fieldSize = opVarDsc->lvSize() / fieldCount; + unsigned lclNum = op->AsLclVar()->GetLclNum(); + LclVarDsc* opVarDsc = lvaGetDesc(lclNum); + unsigned fieldSize = opVarDsc->lvExactSize() / fieldCount; GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); int offset = 0; unsigned sizeBytes = 0; @@ -27845,10 +28432,8 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const switch (intrinsicId) { #ifdef TARGET_XARCH - case NI_SSE_LoadLow: - case NI_SSE_LoadHigh: - case NI_SSE2_LoadLow: - case NI_SSE2_LoadHigh: + case NI_X86Base_LoadLow: + case NI_X86Base_LoadHigh: addr = Op(2); break; #endif // TARGET_XARCH @@ -27957,9 +28542,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const case NI_AVX2_ConvertToVector256Int16: case NI_AVX2_ConvertToVector256Int32: case NI_AVX2_ConvertToVector256Int64: - case NI_AVX2_BroadcastVector128ToVector256: - case NI_AVX512F_BroadcastVector128ToVector512: - case NI_AVX512F_BroadcastVector256ToVector512: if (GetAuxiliaryJitType() == CORINFO_TYPE_PTR) { addr = Op(1); @@ -28033,7 +28615,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const } //------------------------------------------------------------------------ -// OperIsMemoryLoad: Does this HWI node have memory store semantics? +// OperIsMemoryStore: Does this HWI node have memory store semantics? // // Arguments: // pAddr - optional [out] parameter for the address @@ -28055,7 +28637,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const switch (intrinsicId) { #ifdef TARGET_XARCH - case NI_SSE2_MaskMove: + case NI_X86Base_MaskMove: addr = Op(3); break; @@ -28167,45 +28749,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryStoreOrBarrier() const } } -//------------------------------------------------------------------------ -// OperIsEmbBroadcastCompatible: Checks if the intrinsic is a embedded broadcast compatible inintrsic. -// -// Return Value: -// true if the intrinsic node lowering instruction is embedded broadcast compatible. -// -bool GenTreeHWIntrinsic::OperIsEmbBroadcastCompatible() const -{ -#if defined(TARGET_XARCH) - NamedIntrinsic intrinsicId = GetHWIntrinsicId(); - var_types simdBaseType = GetSimdBaseType(); - - // MaybeImm intrinsics support embedded broadcasts only for their IMM variants (e.g. PSLLQ) - if (HWIntrinsicInfo::MaybeImm(intrinsicId) && - !HWIntrinsicInfo::isImmOp(intrinsicId, GetOperandArray()[GetOperandCount() - 1])) - { - return false; - } - - switch (intrinsicId) - { - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32: - { - return varTypeIsFloating(simdBaseType); - } - - default: - { - return !varTypeIsSmall(simdBaseType) && HWIntrinsicInfo::IsEmbBroadcastCompatible(intrinsicId); - } - } -#else - return false; -#endif // TARGET_XARCH -} - //------------------------------------------------------------------------ // OperIsBroadcastScalar: Is this HWIntrinsic a broadcast node from scalar. // @@ -28224,7 +28767,7 @@ bool GenTreeHWIntrinsic::OperIsBroadcastScalar() const case NI_AVX_BroadcastScalarToVector256: case NI_SSE3_LoadAndDuplicateToVector128: case NI_SSE3_MoveAndDuplicate: - case NI_AVX512F_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: return true; default: return false; @@ -28234,37 +28777,6 @@ bool GenTreeHWIntrinsic::OperIsBroadcastScalar() const #endif } -//------------------------------------------------------------------------ -// OperIsCreateScalarUnsafe: Is this HWIntrinsic a CreateScalarUnsafe node. -// -// Return Value: -// Whether "this" is a CreateScalarUnsafe node. -// -bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const -{ - NamedIntrinsic intrinsicId = GetHWIntrinsicId(); - - switch (intrinsicId) - { -#if defined(TARGET_ARM64) - case NI_Vector64_CreateScalarUnsafe: -#endif // TARGET_ARM64 - case NI_Vector128_CreateScalarUnsafe: -#if defined(TARGET_XARCH) - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: -#endif // TARGET_XARCH - { - return true; - } - - default: - { - return false; - } - } -} - //------------------------------------------------------------------------ // OperIsBitwiseHWIntrinsic: Is the operation a bitwise logic operation. // @@ -28313,103 +28825,64 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const switch (intrinsicId) { // these intrinsics only have the embedded rounding enabled implementation. - case NI_AVX512F_AddScalar: - case NI_AVX512F_DivideScalar: - case NI_AVX512F_MultiplyScalar: - case NI_AVX512F_SubtractScalar: - case NI_AVX512F_SqrtScalar: - case NI_AVX10v1_AddScalar: - case NI_AVX10v1_DivideScalar: - case NI_AVX10v1_MultiplyScalar: - case NI_AVX10v1_SubtractScalar: - case NI_AVX10v1_SqrtScalar: - case NI_AVX10v2_Add: - case NI_AVX10v2_ConvertToVector128Int32: - case NI_AVX10v2_ConvertToVector128Single: - case NI_AVX10v2_ConvertToVector128UInt32: - case NI_AVX10v2_ConvertToVector256Double: - case NI_AVX10v2_ConvertToVector256Int32: - case NI_AVX10v2_ConvertToVector256Int64: - case NI_AVX10v2_ConvertToVector256Single: - case NI_AVX10v2_ConvertToVector256UInt32: - case NI_AVX10v2_ConvertToVector256UInt64: - case NI_AVX10v2_Divide: - case NI_AVX10v2_Multiply: - case NI_AVX10v2_Scale: - case NI_AVX10v2_Sqrt: - case NI_AVX10v2_Subtract: + case NI_AVX512_AddScalar: + case NI_AVX512_DivideScalar: + case NI_AVX512_MultiplyScalar: + case NI_AVX512_SubtractScalar: + case NI_AVX512_SqrtScalar: { return true; } - case NI_AVX512F_FusedMultiplyAdd: - case NI_AVX512F_FusedMultiplyAddScalar: - case NI_AVX512F_FusedMultiplyAddNegated: - case NI_AVX512F_FusedMultiplyAddNegatedScalar: - case NI_AVX512F_FusedMultiplyAddSubtract: - case NI_AVX512F_FusedMultiplySubtract: - case NI_AVX512F_FusedMultiplySubtractAdd: - case NI_AVX512F_FusedMultiplySubtractNegated: - case NI_AVX512F_FusedMultiplySubtractNegatedScalar: - case NI_AVX512F_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: - case NI_AVX10v1_FusedMultiplyAddNegatedScalar: - case NI_AVX10v1_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplySubtractNegatedScalar: + case NI_AVX512_FusedMultiplyAdd: + case NI_AVX512_FusedMultiplyAddScalar: + case NI_AVX512_FusedMultiplyAddNegated: + case NI_AVX512_FusedMultiplyAddNegatedScalar: + case NI_AVX512_FusedMultiplyAddSubtract: + case NI_AVX512_FusedMultiplySubtract: + case NI_AVX512_FusedMultiplySubtractAdd: + case NI_AVX512_FusedMultiplySubtractNegated: + case NI_AVX512_FusedMultiplySubtractNegatedScalar: + case NI_AVX512_FusedMultiplySubtractScalar: { return numArgs == 4; } - case NI_AVX512F_Add: - case NI_AVX512F_Divide: - case NI_AVX512F_Multiply: - case NI_AVX512F_Subtract: + case NI_AVX512_Add: + case NI_AVX512_Divide: + case NI_AVX512_Multiply: + case NI_AVX512_Subtract: - case NI_AVX512F_Scale: - case NI_AVX512F_ScaleScalar: - case NI_AVX10v1_ScaleScalar: + case NI_AVX512_Scale: + case NI_AVX512_ScaleScalar: - case NI_AVX512F_ConvertScalarToVector128Single: + case NI_AVX512_ConvertScalarToVector128Single: #if defined(TARGET_AMD64) - case NI_AVX512F_X64_ConvertScalarToVector128Double: - case NI_AVX512F_X64_ConvertScalarToVector128Single: - case NI_AVX10v1_X64_ConvertScalarToVector128Double: - case NI_AVX10v1_X64_ConvertScalarToVector128Single: + case NI_AVX512_X64_ConvertScalarToVector128Double: + case NI_AVX512_X64_ConvertScalarToVector128Single: #endif // TARGET_AMD64 - case NI_AVX10v1_ConvertScalarToVector128Single: { return numArgs == 3; } - case NI_AVX512F_Sqrt: - case NI_AVX512F_ConvertToInt32: - case NI_AVX512F_ConvertToUInt32: - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256Single: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_ConvertToVector512Single: - case NI_AVX512F_ConvertToVector512UInt32: - case NI_AVX512F_ConvertToVector512Int32: + case NI_AVX512_ConvertToInt32: + case NI_AVX512_ConvertToUInt32: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256Single: + case NI_AVX512_ConvertToVector256UInt32: + case NI_AVX512_ConvertToVector512Double: + case NI_AVX512_ConvertToVector512Int32: + case NI_AVX512_ConvertToVector512Int64: + case NI_AVX512_ConvertToVector512Single: + case NI_AVX512_ConvertToVector512UInt32: + case NI_AVX512_ConvertToVector512UInt64: + case NI_AVX512_Sqrt: #if defined(TARGET_AMD64) - case NI_AVX512F_X64_ConvertToInt64: - case NI_AVX512F_X64_ConvertToUInt64: - case NI_AVX10v1_X64_ConvertToInt64: - case NI_AVX10v1_X64_ConvertToUInt64: + case NI_AVX512_X64_ConvertToInt64: + case NI_AVX512_X64_ConvertToUInt64: #endif // TARGET_AMD64 - case NI_AVX512DQ_ConvertToVector256Single: - case NI_AVX512DQ_ConvertToVector512Double: - case NI_AVX512DQ_ConvertToVector512Int64: - case NI_AVX512DQ_ConvertToVector512UInt64: - case NI_AVX10v1_ConvertToInt32: - case NI_AVX10v1_ConvertToUInt32: - case NI_AVX10v1_V512_ConvertToVector256Single: - case NI_AVX10v1_V512_ConvertToVector512Double: - case NI_AVX10v1_V512_ConvertToVector512Int64: - case NI_AVX10v1_V512_ConvertToVector512UInt64: case NI_AVX10v2_ConvertToSByteWithSaturationAndZeroExtendToInt32: case NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32: - case NI_AVX10v2_V512_ConvertToSByteWithSaturationAndZeroExtendToInt32: - case NI_AVX10v2_V512_ConvertToByteWithSaturationAndZeroExtendToInt32: { return numArgs == 2; } @@ -28447,10 +28920,10 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const { #if defined(TARGET_XARCH) case NI_X86Base_Pause: - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { return true; } @@ -28458,10 +28931,6 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const #if defined(TARGET_ARM64) case NI_ArmBase_Yield: - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: case NI_Sve_GatherPrefetch16Bit: case NI_Sve_GatherPrefetch32Bit: case NI_Sve_GatherPrefetch64Bit: @@ -28474,6 +28943,10 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const case NI_Sve_GetFfrUInt16: case NI_Sve_GetFfrUInt32: case NI_Sve_GetFfrUInt64: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: case NI_Sve_SetFfr: { return true; @@ -28606,6 +29079,15 @@ void GenTreeHWIntrinsic::SetHWIntrinsicId(NamedIntrinsic intrinsicId) #endif // DEBUG gtHWIntrinsicId = intrinsicId; + +#ifdef TARGET_XARCH + var_types simdBaseType = GetSimdBaseType(); + + if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) && varTypeIsSmall(simdBaseType)) + { + SetSimdBaseJitType(varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT); + } +#endif // TARGET_XARCH } /* static */ bool GenTreeHWIntrinsic::Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2) @@ -28632,9 +29114,9 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) switch (intrinsicId) { #if defined(TARGET_XARCH) - case NI_SSE_StoreFence: - case NI_SSE2_LoadFence: - case NI_SSE2_MemoryFence: + case NI_X86Base_LoadFence: + case NI_X86Base_MemoryFence: + case NI_X86Base_StoreFence: case NI_X86Serialize_Serialize: { // Mark as a store and global reference, much as is done for GT_MEMORYBARRIER @@ -28643,15 +29125,22 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) } case NI_X86Base_Pause: - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { // Mark as a call and global reference, much as is done for GT_KEEPALIVE gtFlags |= (GTF_CALL | GTF_GLOB_REF); break; } + + case NI_Vector128_op_Division: + case NI_Vector256_op_Division: + { + gtFlags |= GTF_EXCEPT; + break; + } #endif // TARGET_XARCH #if defined(TARGET_ARM64) @@ -28660,10 +29149,10 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) case NI_Sve_GatherPrefetch32Bit: case NI_Sve_GatherPrefetch64Bit: case NI_Sve_GatherPrefetch8Bit: - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: case NI_Sve_GetFfrByte: case NI_Sve_GetFfrInt16: case NI_Sve_GetFfrInt32: @@ -28707,14 +29196,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty switch (id) { #if defined(TARGET_XARCH) - case NI_SSE_And: - case NI_SSE2_And: + case NI_X86Base_And: case NI_AVX_And: case NI_AVX2_And: - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: - case NI_EVEX_AndMask: + case NI_AVX512_And: + case NI_AVX512_AndMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_And: #endif @@ -28723,7 +29209,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_EVEX_NotMask: + case NI_AVX512_NotMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_Not: #endif @@ -28732,14 +29218,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_Xor: - case NI_SSE2_Xor: + case NI_X86Base_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: - case NI_EVEX_XorMask: + case NI_AVX512_Xor: + case NI_AVX512_XorMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_Xor: #endif @@ -28748,14 +29231,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_Or: - case NI_SSE2_Or: + case NI_X86Base_Or: case NI_AVX_Or: case NI_AVX2_Or: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: - case NI_EVEX_OrMask: + case NI_AVX512_Or: + case NI_AVX512_OrMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_Or: #endif @@ -28764,14 +29244,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_AndNot: - case NI_SSE2_AndNot: + case NI_X86Base_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: - case NI_EVEX_AndNotMask: + case NI_AVX512_AndNot: + case NI_AVX512_AndNotMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_BitwiseClear: #endif @@ -28780,13 +29257,10 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_Add: - case NI_SSE2_Add: + case NI_X86Base_Add: case NI_AVX_Add: case NI_AVX2_Add: - case NI_AVX512F_Add: - case NI_AVX10v2_Add: - case NI_AVX512BW_Add: + case NI_AVX512_Add: #elif defined(TARGET_ARM64) case NI_AdvSimd_Add: case NI_AdvSimd_Arm64_Add: @@ -28796,10 +29270,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_AddScalar: - case NI_SSE2_AddScalar: - case NI_AVX512F_AddScalar: - case NI_AVX10v1_AddScalar: + case NI_X86Base_AddScalar: + case NI_AVX512_AddScalar: { *isScalar = true; return GT_ADD; @@ -28818,11 +29290,9 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_Divide: - case NI_SSE2_Divide: + case NI_X86Base_Divide: case NI_AVX_Divide: - case NI_AVX512F_Divide: - case NI_AVX10v2_Divide: + case NI_AVX512_Divide: #elif defined(TARGET_ARM64) case NI_AdvSimd_Arm64_Divide: #endif @@ -28831,10 +29301,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_DivideScalar: - case NI_SSE2_DivideScalar: - case NI_AVX512F_DivideScalar: - case NI_AVX10v1_DivideScalar: + case NI_X86Base_DivideScalar: + case NI_AVX512_DivideScalar: { *isScalar = true; return GT_DIV; @@ -28853,17 +29321,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_Multiply: - case NI_SSE2_MultiplyLow: + case NI_X86Base_MultiplyLow: case NI_SSE41_MultiplyLow: case NI_AVX_Multiply: case NI_AVX2_MultiplyLow: - case NI_AVX512F_MultiplyLow: - case NI_AVX512BW_MultiplyLow: - case NI_AVX512DQ_MultiplyLow: - case NI_AVX512DQ_VL_MultiplyLow: - case NI_AVX10v1_MultiplyLow: - case NI_AVX10v1_V512_MultiplyLow: + case NI_AVX512_MultiplyLow: #elif defined(TARGET_ARM64) case NI_AdvSimd_Multiply: case NI_AdvSimd_Arm64_Multiply: @@ -28873,9 +29335,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE2_Multiply: - case NI_AVX512F_Multiply: - case NI_AVX10v2_Multiply: + case NI_X86Base_Multiply: + case NI_AVX512_Multiply: { if (varTypeIsFloating(simdBaseType)) { @@ -28886,10 +29347,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_MultiplyScalar: - case NI_SSE2_MultiplyScalar: - case NI_AVX512F_MultiplyScalar: - case NI_AVX10v1_MultiplyScalar: + case NI_X86Base_MultiplyScalar: + case NI_AVX512_MultiplyScalar: { *isScalar = true; return GT_MUL; @@ -28926,37 +29385,25 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_AVX512F_RotateLeft: - case NI_AVX512F_RotateLeftVariable: - case NI_AVX512F_VL_RotateLeft: - case NI_AVX512F_VL_RotateLeftVariable: - case NI_AVX10v1_RotateLeft: - case NI_AVX10v1_RotateLeftVariable: + case NI_AVX512_RotateLeft: + case NI_AVX512_RotateLeftVariable: { return GT_ROL; } - case NI_AVX512F_RotateRight: - case NI_AVX512F_RotateRightVariable: - case NI_AVX512F_VL_RotateRight: - case NI_AVX512F_VL_RotateRightVariable: - case NI_AVX10v1_RotateRight: - case NI_AVX10v1_RotateRightVariable: + case NI_AVX512_RotateRight: + case NI_AVX512_RotateRightVariable: { return GT_ROR; } #endif // TARGET_XARCH #if defined(TARGET_XARCH) - case NI_SSE2_ShiftLeftLogical: + case NI_X86Base_ShiftLeftLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftLeftLogicalVariable: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftLeftLogicalVariable: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftLeftLogicalVariable: - case NI_AVX512BW_VL_ShiftLeftLogicalVariable: - case NI_AVX10v1_ShiftLeftLogicalVariable: + case NI_AVX512_ShiftLeftLogical: + case NI_AVX512_ShiftLeftLogicalVariable: #elif defined(TARGET_ARM64) case NI_AdvSimd_ShiftLeftLogical: #endif @@ -28976,18 +29423,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE2_ShiftRightArithmetic: + case NI_X86Base_ShiftRightArithmetic: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightArithmeticVariable: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightArithmeticVariable: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512F_VL_ShiftRightArithmeticVariable: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightArithmeticVariable: - case NI_AVX512BW_VL_ShiftRightArithmeticVariable: - case NI_AVX10v1_ShiftRightArithmetic: - case NI_AVX10v1_ShiftRightArithmeticVariable: + case NI_AVX512_ShiftRightArithmetic: + case NI_AVX512_ShiftRightArithmeticVariable: #elif defined(TARGET_ARM64) case NI_AdvSimd_ShiftRightArithmetic: #endif @@ -29007,15 +29447,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE2_ShiftRightLogical: + case NI_X86Base_ShiftRightLogical: case NI_AVX2_ShiftRightLogical: case NI_AVX2_ShiftRightLogicalVariable: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_ShiftRightLogicalVariable: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX512BW_ShiftRightLogicalVariable: - case NI_AVX512BW_VL_ShiftRightLogicalVariable: - case NI_AVX10v1_ShiftRightLogicalVariable: + case NI_AVX512_ShiftRightLogical: + case NI_AVX512_ShiftRightLogicalVariable: #elif defined(TARGET_ARM64) case NI_AdvSimd_ShiftRightLogical: #endif @@ -29035,13 +29471,10 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_Subtract: - case NI_SSE2_Subtract: + case NI_X86Base_Subtract: case NI_AVX_Subtract: case NI_AVX2_Subtract: - case NI_AVX512F_Subtract: - case NI_AVX512BW_Subtract: - case NI_AVX10v2_Subtract: + case NI_AVX512_Subtract: #elif defined(TARGET_ARM64) case NI_AdvSimd_Subtract: case NI_AdvSimd_Arm64_Subtract: @@ -29051,10 +29484,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_SubtractScalar: - case NI_SSE2_SubtractScalar: - case NI_AVX512F_SubtractScalar: - case NI_AVX10v1_SubtractScalar: + case NI_X86Base_SubtractScalar: + case NI_AVX512_SubtractScalar: { *isScalar = true; return GT_SUB; @@ -29073,12 +29504,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareEqual: - case NI_SSE2_CompareEqual: + case NI_X86Base_CompareEqual: case NI_SSE41_CompareEqual: case NI_AVX_CompareEqual: case NI_AVX2_CompareEqual: - case NI_EVEX_CompareEqualMask: + case NI_AVX512_CompareEqualMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareEqual: case NI_AdvSimd_Arm64_CompareEqual: @@ -29088,8 +29518,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarEqual: - case NI_SSE2_CompareScalarEqual: + case NI_X86Base_CompareScalarEqual: { *isScalar = true; return GT_EQ; @@ -29108,12 +29537,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareGreaterThan: - case NI_SSE2_CompareGreaterThan: + case NI_X86Base_CompareGreaterThan: case NI_SSE42_CompareGreaterThan: case NI_AVX_CompareGreaterThan: case NI_AVX2_CompareGreaterThan: - case NI_EVEX_CompareGreaterThanMask: + case NI_AVX512_CompareGreaterThanMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareGreaterThan: case NI_AdvSimd_Arm64_CompareGreaterThan: @@ -29123,8 +29551,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarGreaterThan: - case NI_SSE2_CompareScalarGreaterThan: + case NI_X86Base_CompareScalarGreaterThan: { *isScalar = true; return GT_GT; @@ -29143,10 +29570,9 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE2_CompareGreaterThanOrEqual: + case NI_X86Base_CompareGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: - case NI_EVEX_CompareGreaterThanOrEqualMask: + case NI_AVX512_CompareGreaterThanOrEqualMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareGreaterThanOrEqual: case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual: @@ -29156,8 +29582,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarGreaterThanOrEqual: - case NI_SSE2_CompareScalarGreaterThanOrEqual: + case NI_X86Base_CompareScalarGreaterThanOrEqual: { *isScalar = true; return GT_GE; @@ -29176,12 +29601,11 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareLessThan: - case NI_SSE2_CompareLessThan: + case NI_X86Base_CompareLessThan: case NI_SSE42_CompareLessThan: case NI_AVX_CompareLessThan: case NI_AVX2_CompareLessThan: - case NI_EVEX_CompareLessThanMask: + case NI_AVX512_CompareLessThanMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareLessThan: case NI_AdvSimd_Arm64_CompareLessThan: @@ -29191,8 +29615,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarLessThan: - case NI_SSE2_CompareScalarLessThan: + case NI_X86Base_CompareScalarLessThan: { *isScalar = true; return GT_LT; @@ -29211,10 +29634,9 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareLessThanOrEqual: - case NI_SSE2_CompareLessThanOrEqual: + case NI_X86Base_CompareLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: - case NI_EVEX_CompareLessThanOrEqualMask: + case NI_AVX512_CompareLessThanOrEqualMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareLessThanOrEqual: case NI_AdvSimd_Arm64_CompareLessThanOrEqual: @@ -29224,8 +29646,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarLessThanOrEqual: - case NI_SSE2_CompareScalarLessThanOrEqual: + case NI_X86Base_CompareScalarLessThanOrEqual: { *isScalar = true; return GT_LE; @@ -29244,16 +29665,14 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareNotEqual: - case NI_SSE2_CompareNotEqual: + case NI_X86Base_CompareNotEqual: case NI_AVX_CompareNotEqual: - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareNotEqualMask: { return GT_NE; } - case NI_SSE_CompareScalarNotEqual: - case NI_SSE2_CompareScalarNotEqual: + case NI_X86Base_CompareScalarNotEqual: { *isScalar = true; return GT_NE; @@ -29289,24 +29708,25 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( assert(varTypeIsArithmetic(simdBaseType)); assert(varTypeIsSIMD(simdType)); +#if defined(TARGET_XARCH) if (simdSize == 64) { assert(!isScalar); - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); } else if (simdSize == 32) { assert(!isScalar); - assert(comp->IsBaselineVector256IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); } else +#endif // TARGET_XARCH { #if defined(TARGET_ARM64) assert(!isScalar || (simdSize == 8)); #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } assert(op1 != nullptr); @@ -29392,24 +29812,25 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, assert(op1->TypeIs(simdType)); assert(op2 != nullptr); +#if defined(TARGET_XARCH) if (simdSize == 64) { assert(!isScalar); - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); } else if (simdSize == 32) { assert(!isScalar); - assert(comp->IsBaselineVector256IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); } else +#endif // TARGET_XARCH { #if defined(TARGET_ARM64) assert(!isScalar || (simdSize == 8)); #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } NamedIntrinsic id = NI_Illegal; @@ -29425,11 +29846,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsSmall(simdBaseType)) { - id = NI_AVX512BW_Add; + id = NI_AVX512_Add; } else { - id = NI_AVX512F_Add; + id = NI_AVX512_Add; } } else if (simdSize == 32) @@ -29444,14 +29865,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_Add; } } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_AddScalar : NI_SSE_Add; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_AddScalar : NI_SSE2_Add; + id = isScalar ? NI_X86Base_AddScalar : NI_X86Base_Add; } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (isScalar || (genTypeSize(simdBaseType) == 8))) @@ -29480,11 +29896,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512DQ_And; + id = NI_AVX512_And; } else { - id = NI_AVX512F_And; + id = NI_AVX512_And; } } else if (simdSize == 32) @@ -29499,14 +29915,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_And; } } - else if (simdBaseType == TYP_FLOAT) - { - id = NI_SSE_And; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_And; + id = NI_X86Base_And; } #elif defined(TARGET_ARM64) id = NI_AdvSimd_And; @@ -29532,11 +29943,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512DQ_AndNot; + id = NI_AVX512_AndNot; } else { - id = NI_AVX512F_AndNot; + id = NI_AVX512_AndNot; } } else if (simdSize == 32) @@ -29551,14 +29962,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_AndNot; } } - else if (simdBaseType == TYP_FLOAT) - { - id = NI_SSE_AndNot; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_AndNot; + id = NI_X86Base_AndNot; } #elif defined(TARGET_ARM64) @@ -29569,26 +29975,28 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, case GT_DIV: { +#if defined(TARGET_XARCH) + assert(varTypeIsFloating(simdBaseType) || varTypeIsInt(simdBaseType)); +#else assert(varTypeIsFloating(simdBaseType)); +#endif assert(op2->TypeIs(simdType)); #if defined(TARGET_XARCH) - if (simdSize == 64) - { - id = NI_AVX512F_Divide; - } - else if (simdSize == 32) - { - id = NI_AVX_Divide; - } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_DivideScalar : NI_SSE_Divide; - } - else + if (varTypeIsFloating(simdBaseType)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_DivideScalar : NI_SSE2_Divide; + if (simdSize == 64) + { + id = NI_AVX512_Divide; + } + else if (simdSize == 32) + { + id = NI_AVX_Divide; + } + else + { + id = isScalar ? NI_X86Base_DivideScalar : NI_X86Base_Divide; + } } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (isScalar || (simdBaseType == TYP_DOUBLE))) @@ -29614,11 +30022,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsShort(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512BW_ShiftLeftLogical : NI_AVX512BW_ShiftLeftLogicalVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftLeftLogical : NI_AVX512_ShiftLeftLogicalVariable; } else if (!varTypeIsByte(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_ShiftLeftLogical : NI_AVX512F_ShiftLeftLogicalVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftLeftLogical : NI_AVX512_ShiftLeftLogicalVariable; } } else if (varTypeIsShort(simdBaseType)) @@ -29632,18 +30040,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftLeftLogical; + id = NI_X86Base_ShiftLeftLogical; } } - else + else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) - { - id = isV512Supported ? NI_AVX512BW_VL_ShiftLeftLogicalVariable - : NI_AVX10v1_ShiftLeftLogicalVariable; - } + id = NI_AVX512_ShiftLeftLogicalVariable; } } else if (!varTypeIsByte(simdBaseType)) @@ -29655,8 +30057,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftLeftLogical; + id = NI_X86Base_ShiftLeftLogical; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { @@ -29685,27 +30086,26 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512F_Multiply; + id = NI_AVX512_Multiply; } else if (varTypeIsLong(simdBaseType)) { - id = NI_AVX512DQ_MultiplyLow; + id = NI_AVX512_MultiplyLow; } else if (varTypeIsInt(simdBaseType)) { - id = NI_AVX512F_MultiplyLow; + id = NI_AVX512_MultiplyLow; } else if (varTypeIsShort(simdBaseType)) { - id = NI_AVX512BW_MultiplyLow; + id = NI_AVX512_MultiplyLow; } } else if (varTypeIsLong(simdBaseType)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - id = isV512Supported ? NI_AVX512DQ_VL_MultiplyLow : NI_AVX10v1_MultiplyLow; + id = NI_AVX512_MultiplyLow; } } else if (simdSize == 32) @@ -29720,14 +30120,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX2_MultiplyLow; } } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_MultiplyScalar : NI_SSE_Multiply; - } - else if (simdBaseType == TYP_DOUBLE) + else if (varTypeIsFloating(simdBaseType)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_MultiplyScalar : NI_SSE2_Multiply; + id = isScalar ? NI_X86Base_MultiplyScalar : NI_X86Base_Multiply; } else if (varTypeIsInt(simdBaseType)) { @@ -29738,8 +30133,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsShort(simdBaseType)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_MultiplyLow; + id = NI_X86Base_MultiplyLow; } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (isScalar || (simdBaseType == TYP_DOUBLE))) @@ -29768,11 +30162,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512DQ_Or; + id = NI_AVX512_Or; } else { - id = NI_AVX512F_Or; + id = NI_AVX512_Or; } } else if (simdSize == 32) @@ -29787,14 +30181,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_Or; } } - else if (simdBaseType == TYP_FLOAT) - { - id = NI_SSE_Or; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_Or; + id = NI_X86Base_Or; } #elif defined(TARGET_ARM64) @@ -29814,22 +30203,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (!varTypeIsSmall(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_RotateLeft : NI_AVX512F_RotateLeftVariable; + id = varTypeIsInt(op2) ? NI_AVX512_RotateLeft : NI_AVX512_RotateLeftVariable; } } else if (!varTypeIsSmall(simdBaseType)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - if (isV512Supported) - { - id = varTypeIsInt(op2) ? NI_AVX512F_VL_RotateLeft : NI_AVX512F_VL_RotateLeftVariable; - } - else - { - id = varTypeIsInt(op2) ? NI_AVX10v1_RotateLeft : NI_AVX10v1_RotateLeftVariable; - } + id = varTypeIsInt(op2) ? NI_AVX512_RotateLeft : NI_AVX512_RotateLeftVariable; } } #endif // TARGET_XARCH @@ -29847,22 +30228,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (!varTypeIsSmall(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_RotateRight : NI_AVX512F_RotateRightVariable; + id = varTypeIsInt(op2) ? NI_AVX512_RotateRight : NI_AVX512_RotateRightVariable; } } else if (!varTypeIsSmall(simdBaseType)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - if (isV512Supported) - { - id = varTypeIsInt(op2) ? NI_AVX512F_VL_RotateRight : NI_AVX512F_VL_RotateRightVariable; - } - else - { - id = varTypeIsInt(op2) ? NI_AVX10v1_RotateRight : NI_AVX10v1_RotateRightVariable; - } + id = varTypeIsInt(op2) ? NI_AVX512_RotateRight : NI_AVX512_RotateRightVariable; } } #endif // TARGET_XARCH @@ -29880,29 +30253,18 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsShort(simdBaseType)) { - id = - varTypeIsInt(op2) ? NI_AVX512BW_ShiftRightArithmetic : NI_AVX512BW_ShiftRightArithmeticVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightArithmetic : NI_AVX512_ShiftRightArithmeticVariable; } else if (!varTypeIsByte(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_ShiftRightArithmetic : NI_AVX512F_ShiftRightArithmeticVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightArithmetic : NI_AVX512_ShiftRightArithmeticVariable; } } else if (genTypeSize(simdBaseType) == 8) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512F_VL)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - if (isV512Supported) - { - id = varTypeIsInt(op2) ? NI_AVX512F_VL_ShiftRightArithmetic - : NI_AVX512F_VL_ShiftRightArithmeticVariable; - } - else - { - id = varTypeIsInt(op2) ? NI_AVX10v1_ShiftRightArithmetic - : NI_AVX10v1_ShiftRightArithmeticVariable; - } + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightArithmetic : NI_AVX512_ShiftRightArithmeticVariable; } } else if (varTypeIsShort(simdBaseType)) @@ -29916,18 +30278,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftRightArithmetic; + id = NI_X86Base_ShiftRightArithmetic; } } - else + else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) - { - id = isV512Supported ? NI_AVX512BW_VL_ShiftRightArithmeticVariable - : NI_AVX10v1_ShiftRightArithmeticVariable; - } + id = NI_AVX512_ShiftRightArithmeticVariable; } } else if (!varTypeIsByte(simdBaseType)) @@ -29939,8 +30295,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftRightArithmetic; + id = NI_X86Base_ShiftRightArithmetic; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { @@ -29971,11 +30326,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsShort(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512BW_ShiftRightLogical : NI_AVX512BW_ShiftRightLogicalVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightLogical : NI_AVX512_ShiftRightLogicalVariable; } else if (!varTypeIsByte(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_ShiftRightLogical : NI_AVX512F_ShiftRightLogicalVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightLogical : NI_AVX512_ShiftRightLogicalVariable; } } else if (varTypeIsShort(simdBaseType)) @@ -29989,18 +30344,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftRightLogical; + id = NI_X86Base_ShiftRightLogical; } } - else + else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) - { - id = isV512Supported ? NI_AVX512BW_VL_ShiftRightLogicalVariable - : NI_AVX10v1_ShiftRightLogicalVariable; - } + id = NI_AVX512_ShiftRightLogicalVariable; } } else if (!varTypeIsByte(simdBaseType)) @@ -30012,8 +30361,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftRightLogical; + id = NI_X86Base_ShiftRightLogical; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { @@ -30042,11 +30390,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsSmall(simdBaseType)) { - id = NI_AVX512BW_Subtract; + id = NI_AVX512_Subtract; } else { - id = NI_AVX512F_Subtract; + id = NI_AVX512_Subtract; } } else if (simdSize == 32) @@ -30061,14 +30409,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_Subtract; } } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_SubtractScalar : NI_SSE_Subtract; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_SubtractScalar : NI_SSE2_Subtract; + id = isScalar ? NI_X86Base_SubtractScalar : NI_X86Base_Subtract; } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (isScalar || (genTypeSize(simdBaseType) == 8))) @@ -30097,11 +30440,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512DQ_Xor; + id = NI_AVX512_Xor; } else { - id = NI_AVX512F_Xor; + id = NI_AVX512_Xor; } } else if (simdSize == 32) @@ -30116,14 +30459,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_Xor; } } - else if (simdBaseType == TYP_FLOAT) - { - id = NI_SSE_Xor; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_Xor; + id = NI_X86Base_Xor; } #elif defined(TARGET_ARM64) id = NI_AdvSimd_Xor; @@ -30175,6 +30513,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, assert(op1->TypeIs(simdType)); assert(op2 != nullptr); +#if defined(TARGET_XARCH) if (varTypeIsMask(type)) { assert(!isScalar); @@ -30183,9 +30522,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, else if (simdSize == 32) { assert(!isScalar); - assert(comp->IsBaselineVector256IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); } else +#endif // TARGET_XARCH { assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16)); @@ -30194,7 +30534,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } NamedIntrinsic id = NI_Illegal; @@ -30208,7 +30547,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareEqualMask; + id = NI_AVX512_CompareEqualMask; } else if (simdSize == 32) { @@ -30222,10 +30561,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, id = NI_AVX_CompareEqual; } } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarEqual : NI_SSE_CompareEqual; - } else if (varTypeIsLong(simdBaseType)) { if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) @@ -30235,8 +30570,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarEqual : NI_SSE2_CompareEqual; + id = isScalar ? NI_X86Base_CompareScalarEqual : NI_X86Base_CompareEqual; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30258,7 +30592,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareGreaterThanOrEqualMask; + id = NI_AVX512_CompareGreaterThanOrEqualMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30269,14 +30603,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareGreaterThanOrEqual; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarGreaterThanOrEqual : NI_SSE_CompareGreaterThanOrEqual; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarGreaterThanOrEqual : NI_SSE2_CompareGreaterThanOrEqual; + id = isScalar ? NI_X86Base_CompareScalarGreaterThanOrEqual : NI_X86Base_CompareGreaterThanOrEqual; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30299,7 +30628,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareGreaterThanMask; + id = NI_AVX512_CompareGreaterThanMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30319,8 +30648,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_CompareGreaterThan; + id = NI_X86Base_CompareGreaterThan; } } else @@ -30333,14 +30661,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareGreaterThan; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarGreaterThan : NI_SSE_CompareGreaterThan; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarGreaterThan : NI_SSE2_CompareGreaterThan; + id = isScalar ? NI_X86Base_CompareScalarGreaterThan : NI_X86Base_CompareGreaterThan; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30362,7 +30685,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareLessThanOrEqualMask; + id = NI_AVX512_CompareLessThanOrEqualMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30373,14 +30696,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareLessThanOrEqual; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarLessThanOrEqual : NI_SSE_CompareLessThanOrEqual; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarLessThanOrEqual : NI_SSE2_CompareLessThanOrEqual; + id = isScalar ? NI_X86Base_CompareScalarLessThanOrEqual : NI_X86Base_CompareLessThanOrEqual; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30403,7 +30721,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareLessThanMask; + id = NI_AVX512_CompareLessThanMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30423,8 +30741,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_CompareLessThan; + id = NI_X86Base_CompareLessThan; } } else @@ -30437,14 +30754,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareLessThan; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarLessThan : NI_SSE_CompareLessThan; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarLessThan : NI_SSE2_CompareLessThan; + id = isScalar ? NI_X86Base_CompareScalarLessThan : NI_X86Base_CompareLessThan; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30466,7 +30778,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareNotEqualMask; + id = NI_AVX512_CompareNotEqualMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30477,14 +30789,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareNotEqual; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarNotEqual : NI_SSE_CompareNotEqual; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarNotEqual : NI_SSE2_CompareNotEqual; + id = isScalar ? NI_X86Base_CompareScalarNotEqual : NI_X86Base_CompareNotEqual; } #endif // TARGET_XARCH break; @@ -30643,8 +30950,8 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec return (simdBaseType == TYP_FLOAT) && vecCon->IsZero(); } - case NI_EVEX_CompareEqualMask: - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareEqualMask: + case NI_AVX512_CompareNotEqualMask: { // We can optimize when the constant is zero, but only // for non floating-point since +0.0 == -0.0 @@ -30653,27 +30960,33 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec #endif // TARGET_XARCH case NI_Vector128_Shuffle: + case NI_Vector128_ShuffleNative: + case NI_Vector128_ShuffleNativeFallback: #if defined(TARGET_XARCH) case NI_Vector256_Shuffle: + case NI_Vector256_ShuffleNative: + case NI_Vector256_ShuffleNativeFallback: case NI_Vector512_Shuffle: + case NI_Vector512_ShuffleNative: + case NI_Vector512_ShuffleNativeFallback: #elif defined(TARGET_ARM64) case NI_Vector64_Shuffle: + case NI_Vector64_ShuffleNative: + case NI_Vector64_ShuffleNativeFallback: #endif { - // The shuffle indices need to be constant so we can preserve - // the node as a hwintrinsic instead of rewriting as a user call. + // The shuffle indices ideally are constant so we can get the best + // codegen possible. There are also some case/s where it would have + // to rewrite as a user call instead depending on available intrinsics. assert(GetOperandCount() == 2); return IsUserCall() && (operand == Op(2)); } #if defined(TARGET_XARCH) - case NI_SSE_Xor: - case NI_SSE2_Xor: + case NI_X86Base_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: + case NI_AVX512_Xor: { // We recognize this as GT_NOT which can enable other optimizations assert(GetOperandCount() == 2); @@ -30982,6 +31295,30 @@ void ReturnTypeDesc::InitializeReturnType(Compiler* comp, } } +//------------------------------------------------------------------- +// GetReturnFieldOffset: +// For the N'th returned register, identified by "index", returns the +// starting offset in the struct return type of the data being returned. +// +// Arguments: +// index - The register whose offset to get +// +// Return Value: +// Starting offset of data returned in that register. +// +unsigned ReturnTypeDesc::GetReturnFieldOffset(unsigned index) const +{ + assert(m_regType[index] != TYP_UNKNOWN); +#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) + return m_fieldOffset[index]; +#else + unsigned offset = 0; + for (unsigned i = 0; i < index; i++) + offset += genTypeSize(m_regType[i]); + return offset; +#endif +} + //------------------------------------------------------------------- // GetABIReturnReg: Return i'th return register as per target ABI // @@ -31341,7 +31678,7 @@ unsigned* SsaNumInfo::GetOutlinedNumSlot(Compiler* compiler, unsigned index) con // Copy over all of the already encoded numbers. if (!baseNum.IsInvalid()) { - for (int i = 0; i < SIMPLE_NUM_COUNT; i++) + for (int i = 0; i < count; i++) { pFirstSlot[i] = baseNum.GetNum(compiler, i); } @@ -31431,7 +31768,7 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree { #if defined(TARGET_XARCH) assert(HWIntrinsicInfo::IsFmaIntrinsic(gtHWIntrinsicId) || HWIntrinsicInfo::IsPermuteVar2x(gtHWIntrinsicId) || - HWIntrinsicInfo::IsTernaryLogic(gtHWIntrinsicId)); + (gtHWIntrinsicId == NI_AVX512_TernaryLogic)); #elif defined(TARGET_ARM64) assert(HWIntrinsicInfo::IsFmaIntrinsic(gtHWIntrinsicId)); #endif @@ -32046,7 +32383,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector64_ToVector128: { assert(retType == TYP_SIMD16); - assert(cnsNode->gtType == TYP_SIMD8); + assert(cnsNode->TypeIs(TYP_SIMD8)); cnsNode->AsVecCon()->gtSimd16Val.v64[1] = {}; cnsNode->gtType = retType; @@ -32057,7 +32394,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_ToVector256: { assert(retType == TYP_SIMD32); - assert(cnsNode->gtType == TYP_SIMD16); + assert(cnsNode->TypeIs(TYP_SIMD16)); cnsNode->AsVecCon()->gtSimd32Val.v128[1] = {}; cnsNode->gtType = retType; @@ -32068,7 +32405,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_ToVector512: { assert(retType == TYP_SIMD64); - assert(cnsNode->gtType == TYP_SIMD16); + assert(cnsNode->TypeIs(TYP_SIMD16)); cnsNode->AsVecCon()->gtSimd64Val.v128[1] = {}; cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; @@ -32080,7 +32417,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector256_ToVector512: { assert(retType == TYP_SIMD64); - assert(cnsNode->gtType == TYP_SIMD32); + assert(cnsNode->TypeIs(TYP_SIMD32)); cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; cnsNode->gtType = retType; @@ -32093,7 +32430,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_GetUpper: { assert(retType == TYP_SIMD8); - assert(cnsNode->gtType == TYP_SIMD16); + assert(cnsNode->TypeIs(TYP_SIMD16)); cnsNode->AsVecCon()->gtSimd8Val = cnsNode->AsVecCon()->gtSimd16Val.v64[1]; cnsNode->gtType = retType; @@ -32104,7 +32441,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector256_GetUpper: { assert(retType == TYP_SIMD16); - assert(cnsNode->gtType == TYP_SIMD32); + assert(cnsNode->TypeIs(TYP_SIMD32)); cnsNode->AsVecCon()->gtSimd16Val = cnsNode->AsVecCon()->gtSimd32Val.v128[1]; cnsNode->gtType = retType; @@ -32115,7 +32452,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector512_GetUpper: { assert(retType == TYP_SIMD32); - assert(cnsNode->gtType == TYP_SIMD64); + assert(cnsNode->TypeIs(TYP_SIMD64)); cnsNode->AsVecCon()->gtSimd32Val = cnsNode->AsVecCon()->gtSimd64Val.v256[1]; cnsNode->gtType = retType; @@ -32344,8 +32681,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_WithLower: { assert(retType == TYP_SIMD16); - assert(cnsNode->gtType == TYP_SIMD16); - assert(otherNode->gtType == TYP_SIMD8); + assert(cnsNode->TypeIs(TYP_SIMD16)); + assert(otherNode->TypeIs(TYP_SIMD8)); cnsNode->AsVecCon()->gtSimd16Val.v64[0] = otherNode->AsVecCon()->gtSimd8Val; resultNode = cnsNode; @@ -32355,8 +32692,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector256_WithLower: { assert(retType == TYP_SIMD32); - assert(cnsNode->gtType == TYP_SIMD32); - assert(otherNode->gtType == TYP_SIMD16); + assert(cnsNode->TypeIs(TYP_SIMD32)); + assert(otherNode->TypeIs(TYP_SIMD16)); cnsNode->AsVecCon()->gtSimd32Val.v128[0] = otherNode->AsVecCon()->gtSimd16Val; resultNode = cnsNode; @@ -32366,8 +32703,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector512_WithLower: { assert(retType == TYP_SIMD64); - assert(cnsNode->gtType == TYP_SIMD64); - assert(otherNode->gtType == TYP_SIMD32); + assert(cnsNode->TypeIs(TYP_SIMD64)); + assert(otherNode->TypeIs(TYP_SIMD32)); cnsNode->AsVecCon()->gtSimd64Val.v256[0] = otherNode->AsVecCon()->gtSimd32Val; resultNode = cnsNode; @@ -32379,8 +32716,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_WithUpper: { assert(retType == TYP_SIMD16); - assert(cnsNode->gtType == TYP_SIMD16); - assert(otherNode->gtType == TYP_SIMD8); + assert(cnsNode->TypeIs(TYP_SIMD16)); + assert(otherNode->TypeIs(TYP_SIMD8)); cnsNode->AsVecCon()->gtSimd16Val.v64[1] = otherNode->AsVecCon()->gtSimd8Val; resultNode = cnsNode; @@ -32390,8 +32727,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector256_WithUpper: { assert(retType == TYP_SIMD32); - assert(cnsNode->gtType == TYP_SIMD32); - assert(otherNode->gtType == TYP_SIMD16); + assert(cnsNode->TypeIs(TYP_SIMD32)); + assert(otherNode->TypeIs(TYP_SIMD16)); cnsNode->AsVecCon()->gtSimd32Val.v128[1] = otherNode->AsVecCon()->gtSimd16Val; resultNode = cnsNode; @@ -32401,8 +32738,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector512_WithUpper: { assert(retType == TYP_SIMD64); - assert(cnsNode->gtType == TYP_SIMD64); - assert(otherNode->gtType == TYP_SIMD32); + assert(cnsNode->TypeIs(TYP_SIMD64)); + assert(otherNode->TypeIs(TYP_SIMD32)); cnsNode->AsVecCon()->gtSimd64Val.v256[1] = otherNode->AsVecCon()->gtSimd32Val; resultNode = cnsNode; @@ -33056,7 +33393,7 @@ bool Compiler::gtCanSkipCovariantStoreCheck(GenTree* value, GenTree* array) // Check for store of NULL. if (value->OperIs(GT_CNS_INT)) { - assert(value->gtType == TYP_REF); + assert(value->TypeIs(TYP_REF)); if (value->AsIntCon()->gtIconVal == 0) { JITDUMP("\nstelem of null: skipping covariant store check\n"); @@ -33064,8 +33401,6 @@ bool Compiler::gtCanSkipCovariantStoreCheck(GenTree* value, GenTree* array) } // Non-0 const refs can only occur with frozen objects assert(value->IsIconHandle(GTF_ICON_OBJ_HDL)); - assert(doesMethodHaveFrozenObjects() || - (compIsForInlining() && impInlineInfo->InlinerCompiler->doesMethodHaveFrozenObjects())); } // Try and get a class handle for the array diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 281376639f17..98b726245fb8 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -485,7 +485,7 @@ enum GenTreeFlags : unsigned int // This flag is useful in cases where it is required to generate register // indirect addressing mode. One such case is virtual stub calls on xarch. GTF_IND_UNALIGNED = 0x02000000, // OperIsIndir() -- the load or store is unaligned (we assume worst case alignment of 1 byte) - GTF_IND_INVARIANT = 0x01000000, // GT_IND -- the target is invariant (a prejit indirection) + GTF_IND_INVARIANT = 0x01000000, // GT_IND -- the target is invariant (an AOT indirection) GTF_IND_NONNULL = 0x00400000, // GT_IND -- the indirection never returns null (zero) GTF_IND_INITCLASS = 0x00200000, // OperIsIndir() -- the indirection requires preceding static cctor GTF_IND_ALLOW_NON_ATOMIC = 0x00100000, // GT_IND -- this memory access does not need to be atomic @@ -1073,7 +1073,7 @@ struct GenTree return false; } - if (gtType == TYP_VOID) + if (TypeIs(TYP_VOID)) { // These are the only operators which can produce either VOID or non-VOID results. assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsHWIntrinsic() || @@ -1181,7 +1181,7 @@ struct GenTree static bool OperIsLocalField(genTreeOps gtOper) { - return (gtOper == GT_LCL_FLD || gtOper == GT_LCL_ADDR || gtOper == GT_STORE_LCL_FLD); + return StaticOperIs(gtOper, GT_LCL_FLD, GT_LCL_ADDR, GT_STORE_LCL_FLD); } bool OperIsLocalField() const @@ -1191,7 +1191,7 @@ struct GenTree static bool OperIsScalarLocal(genTreeOps gtOper) { - return (gtOper == GT_LCL_VAR || gtOper == GT_STORE_LCL_VAR); + return StaticOperIs(gtOper, GT_LCL_VAR, GT_STORE_LCL_VAR); } static bool OperIsNonPhiLocal(genTreeOps gtOper) @@ -1206,17 +1206,17 @@ struct GenTree static bool OperIsLocalStore(genTreeOps gtOper) { - return (gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD); + return StaticOperIs(gtOper, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD); } static bool OperIsAddrMode(genTreeOps gtOper) { - return (gtOper == GT_LEA); + return gtOper == GT_LEA; } static bool OperIsInitVal(genTreeOps gtOper) { - return (gtOper == GT_INIT_VAL); + return gtOper == GT_INIT_VAL; } bool OperIsInitVal() const @@ -1231,7 +1231,7 @@ struct GenTree bool IsConstInitVal() const { - return (gtOper == GT_CNS_INT) || (OperIsInitVal() && (gtGetOp1()->gtOper == GT_CNS_INT)); + return (OperIs(GT_CNS_INT)) || (OperIsInitVal() && (gtGetOp1()->OperIs(GT_CNS_INT))); } bool OperIsBlkOp(); @@ -1258,34 +1258,19 @@ struct GenTree return OperIsStoreBlk(OperGet()); } - bool OperIsPutArgSplit() const - { -#if FEATURE_ARG_SPLIT - assert((gtOper != GT_PUTARG_SPLIT) || compFeatureArgSplit()); - return gtOper == GT_PUTARG_SPLIT; -#else // !FEATURE_ARG_SPLIT - return false; -#endif - } - bool OperIsPutArgStk() const { - return gtOper == GT_PUTARG_STK; - } - - bool OperIsPutArgStkOrSplit() const - { - return OperIsPutArgStk() || OperIsPutArgSplit(); + return OperIs(GT_PUTARG_STK); } bool OperIsPutArgReg() const { - return gtOper == GT_PUTARG_REG; + return OperIs(GT_PUTARG_REG); } bool OperIsPutArg() const { - return OperIsPutArgStk() || OperIsPutArgReg() || OperIsPutArgSplit(); + return OperIsPutArgStk() || OperIsPutArgReg(); } bool OperIsFieldList() const @@ -1382,7 +1367,7 @@ struct GenTree static bool OperIsCC(genTreeOps gtOper) { - return (gtOper == GT_JCC) || (gtOper == GT_SETCC); + return StaticOperIs(gtOper, GT_JCC, GT_SETCC); } bool OperIsCC() const @@ -1392,7 +1377,7 @@ struct GenTree static bool OperIsShift(genTreeOps gtOper) { - return (gtOper == GT_LSH) || (gtOper == GT_RSH) || (gtOper == GT_RSZ); + return StaticOperIs(gtOper, GT_LSH, GT_RSH, GT_RSZ); } bool OperIsShift() const @@ -1405,7 +1390,7 @@ struct GenTree #ifdef TARGET_64BIT return false; #else - return (gtOper == GT_LSH_HI) || (gtOper == GT_RSH_LO); + return StaticOperIs(gtOper, GT_LSH_HI, GT_RSH_LO); #endif } @@ -1416,7 +1401,7 @@ struct GenTree static bool OperIsRotate(genTreeOps gtOper) { - return (gtOper == GT_ROL) || (gtOper == GT_ROR); + return StaticOperIs(gtOper, GT_ROL, GT_ROR); } bool OperIsRotate() const @@ -1436,7 +1421,7 @@ struct GenTree static bool OperIsMul(genTreeOps gtOper) { - return (gtOper == GT_MUL) || (gtOper == GT_MULHI) + return StaticOperIs(gtOper, GT_MUL, GT_MULHI) #if !defined(TARGET_64BIT) || defined(TARGET_ARM64) || (gtOper == GT_MUL_LONG) #endif @@ -1452,8 +1437,8 @@ struct GenTree static bool OperIsRMWMemOp(genTreeOps gtOper) { // Return if binary op is one of the supported operations for RMW of memory. - return (gtOper == GT_ADD || gtOper == GT_SUB || gtOper == GT_AND || gtOper == GT_OR || gtOper == GT_XOR || - gtOper == GT_NOT || gtOper == GT_NEG || OperIsShiftOrRotate(gtOper)); + return StaticOperIs(gtOper, GT_ADD, GT_SUB, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG) || + OperIsShiftOrRotate(gtOper); } bool OperIsRMWMemOp() const { @@ -1501,7 +1486,10 @@ struct GenTree bool isCommutativeHWIntrinsic() const; bool isContainableHWIntrinsic() const; bool isRMWHWIntrinsic(Compiler* comp); +#if defined(TARGET_XARCH) bool isEvexCompatibleHWIntrinsic(Compiler* comp) const; + bool isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const; +#endif // TARGET_XARCH bool isEmbeddedMaskingCompatibleHWIntrinsic() const; #else bool isCommutativeHWIntrinsic() const @@ -1519,10 +1507,17 @@ struct GenTree return false; } - bool isEvexCompatibleHWIntrinsic() const +#if defined(TARGET_XARCH) + bool isEvexCompatibleHWIntrinsic(Compiler* comp) const + { + return false; + } + + bool isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const { return false; } +#endif // TARGET_XARCH bool isEmbeddedMaskingCompatibleHWIntrinsic() const { @@ -1542,11 +1537,11 @@ struct GenTree static bool OperMayOverflow(genTreeOps gtOper) { - return ((gtOper == GT_ADD) || (gtOper == GT_SUB) || (gtOper == GT_MUL) || (gtOper == GT_CAST) + return StaticOperIs(gtOper, GT_ADD, GT_SUB, GT_MUL, GT_CAST) #if !defined(TARGET_64BIT) - || (gtOper == GT_ADD_HI) || (gtOper == GT_SUB_HI) + || StaticOperIs(gtOper, GT_ADD_HI, GT_SUB_HI) #endif - ); + ; } bool OperMayOverflow() const @@ -1566,18 +1561,18 @@ struct GenTree static bool OperIsArrLength(genTreeOps gtOper) { - return (gtOper == GT_ARR_LENGTH) || (gtOper == GT_MDARR_LENGTH); + return StaticOperIs(gtOper, GT_ARR_LENGTH, GT_MDARR_LENGTH); } static bool OperIsMDArr(genTreeOps gtOper) { - return (gtOper == GT_MDARR_LENGTH) || (gtOper == GT_MDARR_LOWER_BOUND); + return StaticOperIs(gtOper, GT_MDARR_LENGTH, GT_MDARR_LOWER_BOUND); } // Is this an access of an SZ array length, MD array length, or MD array lower bounds? static bool OperIsArrMetaData(genTreeOps gtOper) { - return (gtOper == GT_ARR_LENGTH) || (gtOper == GT_MDARR_LENGTH) || (gtOper == GT_MDARR_LOWER_BOUND); + return StaticOperIs(gtOper, GT_ARR_LENGTH, GT_MDARR_LENGTH, GT_MDARR_LOWER_BOUND); } static bool OperIsIndirOrArrMetaData(genTreeOps gtOper) @@ -1638,7 +1633,7 @@ struct GenTree static bool OperIsLoad(genTreeOps gtOper) { - return (gtOper == GT_IND) || (gtOper == GT_BLK); + return StaticOperIs(gtOper, GT_IND, GT_BLK); } bool OperIsLoad() const @@ -1752,7 +1747,7 @@ struct GenTree case GT_FIELD_ADDR: return true; case GT_RETURN: - return gtType == TYP_VOID; + return TypeIs(TYP_VOID); default: return false; } @@ -1777,7 +1772,7 @@ struct GenTree return true; case GT_SWIFT_ERROR_RET: - return (gtType == TYP_VOID); + return (TypeIs(TYP_VOID)); default: return false; } @@ -1796,7 +1791,6 @@ struct GenTree bool OperSupportsReverseOpEvalOrder(Compiler* comp) const; static bool RequiresNonNullOp2(genTreeOps oper); - bool IsValidCallArgument(); #endif // DEBUG inline bool IsIntegralConst(ssize_t constVal) const; @@ -1940,8 +1934,10 @@ struct GenTree //--------------------------------------------------------------------- -#if defined(DEBUG) || CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_MEM_ALLOC || \ - NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS || DUMP_FLOWGRAPHS + static GenTree** EffectiveUse(GenTree** use); + +#if defined(DEBUG) || CALL_ARG_STATS || COUNT_BASIC_BLOCKS || EMITTER_STATS || MEASURE_MEM_ALLOC || NODEBASH_STATS || \ + MEASURE_NODE_SIZE || COUNT_AST_OPERS || DUMP_FLOWGRAPHS static const char* OpName(genTreeOps op); #endif @@ -1970,7 +1966,7 @@ struct GenTree var_types oldType = gtType; gtType = newType; GenTree* node = this; - while (node->gtOper == GT_COMMA) + while (node->OperIs(GT_COMMA)) { node = node->gtGetOp2(); if (node->gtType != newType) @@ -2233,7 +2229,7 @@ struct GenTree bool IsIconHandle() const { - return (gtOper == GT_CNS_INT) && ((gtFlags & GTF_ICON_HDL_MASK) != 0); + return (OperIs(GT_CNS_INT)) && ((gtFlags & GTF_ICON_HDL_MASK) != 0); } bool IsIconHandle(GenTreeFlags handleType) const @@ -2241,7 +2237,7 @@ struct GenTree // check that handleType is one of the valid GTF_ICON_* values assert((handleType & GTF_ICON_HDL_MASK) != 0); assert((handleType & ~GTF_ICON_HDL_MASK) == 0); - return (gtOper == GT_CNS_INT) && ((gtFlags & GTF_ICON_HDL_MASK) == handleType); + return (OperIs(GT_CNS_INT)) && ((gtFlags & GTF_ICON_HDL_MASK) == handleType); } template @@ -2254,7 +2250,7 @@ struct GenTree // For non-icon handle trees, returns GTF_EMPTY. GenTreeFlags GetIconHandleFlag() const { - return (gtOper == GT_CNS_INT) ? (gtFlags & GTF_ICON_HDL_MASK) : GTF_EMPTY; + return (OperIs(GT_CNS_INT)) ? (gtFlags & GTF_ICON_HDL_MASK) : GTF_EMPTY; } bool IsTlsIconHandle() @@ -2270,7 +2266,7 @@ struct GenTree // Mark this node as no longer being a handle; clear its GTF_ICON_*_HDL bits. void ClearIconHandleMask() { - assert(gtOper == GT_CNS_INT); + assert(OperIs(GT_CNS_INT)); gtFlags &= ~GTF_ICON_HDL_MASK; } @@ -2725,16 +2721,10 @@ struct GenTreeFieldList : public GenTree class UseList { - Use* m_head; - Use* m_tail; + Use* m_head = nullptr; + Use* m_tail = nullptr; public: - UseList() - : m_head(nullptr) - , m_tail(nullptr) - { - } - Use* GetHead() const { return m_head; @@ -2792,6 +2782,12 @@ struct GenTreeFieldList : public GenTree } } + void Clear() + { + m_head = nullptr; + m_tail = nullptr; + } + bool IsSorted() const { unsigned offset = 0; @@ -2831,6 +2827,8 @@ struct GenTreeFieldList : public GenTree // Insert a new field use after the specified use without updating side effect flags. void InsertFieldLIR(Compiler* compiler, Use* insertAfter, GenTree* node, unsigned offset, var_types type); + GenTree* SoleFieldOrThis(); + //-------------------------------------------------------------------------- // Equals: Check if 2 FIELD_LIST nodes are equal. // @@ -2844,8 +2842,8 @@ struct GenTreeFieldList : public GenTree // static bool Equals(GenTreeFieldList* list1, GenTreeFieldList* list2) { - assert(list1->TypeGet() == TYP_STRUCT); - assert(list2->TypeGet() == TYP_STRUCT); + assert(list1->TypeIs(TYP_STRUCT)); + assert(list2->TypeIs(TYP_STRUCT)); UseIterator i1 = list1->Uses().begin(); UseIterator end1 = list1->Uses().end(); @@ -3094,6 +3092,19 @@ struct GenTreeOp : public GenTreeUnOp // then sets the flag GTF_DIV_BY_CNS_OPT and GTF_DONT_CSE on the constant void CheckDivideByConstOptimized(Compiler* comp); + GenTree*& ReturnValueRef() + { + assert(OperIs(GT_RETURN, GT_RETFILT, GT_SWIFT_ERROR_RET)); +#ifdef SWIFT_SUPPORT + if (OperIs(GT_SWIFT_ERROR_RET)) + { + return gtOp2; + } +#endif // SWIFT_SUPPORT + + return gtOp1; + } + GenTree* GetReturnValue() const { assert(OperIs(GT_RETURN, GT_RETFILT, GT_SWIFT_ERROR_RET)); @@ -3247,7 +3258,7 @@ struct GenTreeIntCon : public GenTreeIntConCommon /* The InitializeArray intrinsic needs to go back to the newarray statement to find the class handle of the array so that we can get its size. However, - in ngen mode, the handle in that statement does not correspond to the compile + in AOT mode, the handle in that statement does not correspond to the compile time handle (rather it lets you get a handle at run-time). In that case, we also need to store a compile time handle, which goes in this gtCompileTimeHandle field. */ @@ -3324,7 +3335,7 @@ struct GenTreeLngCon : public GenTreeIntConCommon inline INT64 GenTreeIntConCommon::LngValue() const { #ifndef TARGET_64BIT - assert(gtOper == GT_CNS_LNG); + assert(OperIs(GT_CNS_LNG)); return AsLngCon()->gtLconVal; #else return IconValue(); @@ -3334,7 +3345,7 @@ inline INT64 GenTreeIntConCommon::LngValue() const inline void GenTreeIntConCommon::SetLngValue(INT64 val) { #ifndef TARGET_64BIT - assert(gtOper == GT_CNS_LNG); + assert(OperIs(GT_CNS_LNG)); AsLngCon()->gtLconVal = val; #else // Compile time asserts that these two fields overlap and have the same offsets: gtIconVal and gtLconVal @@ -3347,13 +3358,13 @@ inline void GenTreeIntConCommon::SetLngValue(INT64 val) inline ssize_t GenTreeIntConCommon::IconValue() const { - assert(gtOper == GT_CNS_INT); // We should never see a GT_CNS_LNG for a 64-bit target! + assert(OperIs(GT_CNS_INT)); // We should never see a GT_CNS_LNG for a 64-bit target! return AsIntCon()->gtIconVal; } inline void GenTreeIntConCommon::SetIconValue(ssize_t val) { - assert(gtOper == GT_CNS_INT); // We should never see a GT_CNS_LNG for a 64-bit target! + assert(OperIs(GT_CNS_INT)); // We should never see a GT_CNS_LNG for a 64-bit target! AsIntCon()->gtIconVal = val; } @@ -3362,7 +3373,7 @@ inline INT64 GenTreeIntConCommon::IntegralValue() const #ifdef TARGET_64BIT return LngValue(); #else - return gtOper == GT_CNS_LNG ? LngValue() : (INT64)IconValue(); + return OperIs(GT_CNS_LNG) ? LngValue() : (INT64)IconValue(); #endif // TARGET_64BIT } @@ -4223,13 +4234,13 @@ enum GenTreeCallFlags : unsigned int GTF_CALL_M_GUARDED_DEVIRT_CHAIN = 0x00080000, // this call is a candidate for chained guarded devirtualization GTF_CALL_M_ALLOC_SIDE_EFFECTS = 0x00100000, // this is a call to an allocator with side effects GTF_CALL_M_SUPPRESS_GC_TRANSITION = 0x00200000, // suppress the GC transition (i.e. during a pinvoke) but a separate GC safe point is required. + GTF_CALL_M_ASYNC = 0x00400000, // this call is a runtime async method call and thus a suspension point GTF_CALL_M_EXPANDED_EARLY = 0x00800000, // the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower - GTF_CALL_M_HAS_LATE_DEVIRT_INFO = 0x01000000, // this call has late devirtualzation info - GTF_CALL_M_LDVIRTFTN_INTERFACE = 0x02000000, // ldvirtftn on an interface type - GTF_CALL_M_CAST_CAN_BE_EXPANDED = 0x04000000, // this cast (helper call) can be expanded if it's profitable. To be removed. - GTF_CALL_M_CAST_OBJ_NONNULL = 0x08000000, // if we expand this specific cast we don't need to check the input object for null + GTF_CALL_M_LDVIRTFTN_INTERFACE = 0x01000000, // ldvirtftn on an interface type + GTF_CALL_M_CAST_CAN_BE_EXPANDED = 0x02000000, // this cast (helper call) can be expanded if it's profitable. To be removed. + GTF_CALL_M_CAST_OBJ_NONNULL = 0x04000000, // if we expand this specific cast we don't need to check the input object for null // NOTE: if needed, this flag can be removed, and we can introduce new _NONNUL cast helpers - GTF_CALL_M_STACK_ARRAY = 0x10000000, // this call is a new array helper for a stack allocated array. + GTF_CALL_M_STACK_ARRAY = 0x08000000, // this call is a new array helper for a stack allocated array. }; inline constexpr GenTreeCallFlags operator ~(GenTreeCallFlags a) @@ -4466,13 +4477,7 @@ struct ReturnTypeDesc #endif } -#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - unsigned GetReturnFieldOffset(unsigned index) const - { - assert(m_regType[index] != TYP_UNKNOWN); - return m_fieldOffset[index]; - } -#endif + unsigned GetReturnFieldOffset(unsigned index) const; // Get i'th ABI return register regNumber GetABIReturnReg(unsigned idx, CorInfoCallConvExtension callConv) const; @@ -4555,6 +4560,7 @@ enum class WellKnownArg : unsigned ThisPointer, VarArgsCookie, InstParam, + AsyncContinuation, RetBuffer, PInvokeFrame, WrapperDelegateCell, @@ -4570,12 +4576,14 @@ enum class WellKnownArg : unsigned SwiftSelf, X86TailCallSpecialArg, StackArrayLocal, + RuntimeMethodHandle, }; #ifdef DEBUG const char* getWellKnownArgName(WellKnownArg arg); #endif +<<<<<<< HEAD struct CallArgABIInformation { CallArgABIInformation() @@ -4733,14 +4741,16 @@ struct CallArgABIInformation } }; +======= +>>>>>>> upstream-jun struct NewCallArg { // The node being passed. GenTree* Node = nullptr; // The signature type of the node. var_types SignatureType = TYP_UNDEF; - // The class handle if SignatureType == TYP_STRUCT. - CORINFO_CLASS_HANDLE SignatureClsHnd = NO_CLASS_HANDLE; + // The class layout if varTypeIsStruct(SignatureType). + ClassLayout* SignatureLayout = nullptr; // The type of well known arg enum WellKnownArg WellKnownArg = ::WellKnownArg::None; @@ -4756,13 +4766,13 @@ struct NewCallArg return copy; } - static NewCallArg Struct(GenTree* node, var_types type, CORINFO_CLASS_HANDLE clsHnd) + static NewCallArg Struct(GenTree* node, var_types type, ClassLayout* layout) { assert(varTypeIsStruct(node) && varTypeIsStruct(type)); NewCallArg arg; arg.Node = node; arg.SignatureType = type; - arg.SignatureClsHnd = clsHnd; + arg.SignatureLayout = layout; arg.ValidateTypes(); return arg; } @@ -4805,8 +4815,8 @@ class CallArg CallArg* m_next; CallArg* m_lateNext; - // The class handle for the signature type (when varTypeIsStruct(SignatureType)). - CORINFO_CLASS_HANDLE m_signatureClsHnd; + // The class layout for the signature type (when varTypeIsStruct(SignatureType)). + ClassLayout* m_signatureLayout; // The type of the argument in the signature. var_types m_signatureType : 5; #ifdef TARGET_WASM @@ -4828,7 +4838,7 @@ class CallArg , m_lateNode(nullptr) , m_next(nullptr) , m_lateNext(nullptr) - , m_signatureClsHnd(NO_CLASS_HANDLE) + , m_signatureLayout(nullptr) , m_signatureType(TYP_UNDEF) #ifdef TARGET_WASM , m_signatureCorInfoType(CORINFO_TYPE_UNDEF) @@ -4841,8 +4851,7 @@ class CallArg } public: - CallArgABIInformation AbiInfo; - ABIPassingInformation NewAbiInfo; + ABIPassingInformation AbiInfo; CallArg(const NewCallArg& arg) : CallArg() @@ -4850,10 +4859,14 @@ class CallArg m_earlyNode = arg.Node; m_wellKnownArg = arg.WellKnownArg; m_signatureType = arg.SignatureType; +<<<<<<< HEAD #ifdef TARGET_WASM m_signatureCorInfoType = arg.SignatureCorInfoType; #endif m_signatureClsHnd = arg.SignatureClsHnd; +======= + m_signatureLayout = arg.SignatureLayout; +>>>>>>> upstream-jun } CallArg(const CallArg&) = delete; @@ -4872,7 +4885,8 @@ class CallArg CallArg*& LateNextRef() { return m_lateNext; } CallArg* GetLateNext() { return m_lateNext; } void SetLateNext(CallArg* lateNext) { m_lateNext = lateNext; } - CORINFO_CLASS_HANDLE GetSignatureClassHandle() { return m_signatureClsHnd; } + ClassLayout* GetSignatureLayout() { return m_signatureLayout; } + CORINFO_CLASS_HANDLE GetSignatureClassHandle() { return m_signatureLayout == nullptr ? NO_CLASS_HANDLE : m_signatureLayout->GetClassHandle(); } var_types GetSignatureType() { return m_signatureType; } #ifdef TARGET_WASM CorInfoType GetSignatureCorInfoType() { return m_signatureCorInfoType; } @@ -4899,14 +4913,6 @@ class CallArg #ifdef DEBUG void Dump(Compiler* comp); - // Check that the value of 'AbiInfo.IsStruct' is consistent. - // A struct arg must be one of the following: - // - A node of struct type, - // - A GT_FIELD_LIST, or - // - A node of a scalar type, passed in a single register or slot - // (or two slots in the case of a struct pass on the stack as TYP_DOUBLE). - // - void CheckIsStruct(); #endif }; @@ -4925,11 +4931,11 @@ class CallArgs // made for this call. unsigned m_padStkAlign; #endif - bool m_hasThisPointer : 1; - bool m_hasRetBuffer : 1; - bool m_isVarArgs : 1; - bool m_abiInformationDetermined : 1; - bool m_newAbiInformationDetermined : 1; + bool m_hasThisPointer : 1; + bool m_hasRetBuffer : 1; + bool m_isVarArgs : 1; + bool m_abiInformationDetermined : 1; + bool m_hasAddedFinalArgs : 1; // True if we have one or more register arguments. bool m_hasRegArgs : 1; // True if we have one or more stack arguments. @@ -4994,15 +5000,13 @@ class CallArgs void AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call); void ResetFinalArgsAndABIInfo(); - void DetermineNewABIInfo(Compiler* comp, GenTreeCall* call); + void DetermineABIInfo(Compiler* comp, GenTreeCall* call); void ArgsComplete(Compiler* comp, GenTreeCall* call); void EvalArgsToTemps(Compiler* comp, GenTreeCall* call); void SetNeedsTemp(CallArg* arg); bool IsNonStandard(Compiler* comp, GenTreeCall* call, CallArg* arg); - GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum); - // clang-format off bool HasThisPointer() const { return m_hasThisPointer; } bool HasRetBuffer() const { return m_hasRetBuffer; } @@ -5010,7 +5014,6 @@ class CallArgs void SetIsVarArgs() { m_isVarArgs = true; } void ClearIsVarArgs() { m_isVarArgs = false; } bool IsAbiInformationDetermined() const { return m_abiInformationDetermined; } - bool IsNewAbiInformationDetermined() const { return m_newAbiInformationDetermined; } // TODO-Remove: Workaround for bad codegen in MSVC versions < 19.41, see // https://github.com/dotnet/runtime/pull/104370#issuecomment-2222910359 @@ -5211,6 +5214,13 @@ struct GenTreeCall final : public GenTree #endif } + void SetIsAsync() + { + gtCallMoreFlags |= GTF_CALL_M_ASYNC; + } + + bool IsAsync() const; + //--------------------------------------------------------------------------- // GetRegNumByIdx: get i'th return register allocated to this call node. // @@ -5384,6 +5394,9 @@ struct GenTreeCall final : public GenTree { return (gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_VTABLE; } + + bool IsDevirtualizationCandidate(Compiler* compiler) const; + bool IsInlineCandidate() const { return (gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0; @@ -5434,7 +5447,7 @@ struct GenTreeCall final : public GenTree { #ifdef FEATURE_MULTIREG_RET #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1); + return (TypeIs(TYP_STRUCT)) && (gtReturnTypeDesc.GetReturnRegCount() > 1); #else #if defined(TARGET_X86) || defined(TARGET_ARM) @@ -5803,11 +5816,13 @@ struct GenTreeCall final : public GenTree jitstd::vector* gtInlineCandidateInfoList; HandleHistogramProfileCandidateInfo* gtHandleHistogramProfileCandidateInfo; - LateDevirtualizationInfo* gtLateDevirtualizationInfo; + CORINFO_GENERIC_HANDLE compileTimeHelperArgumentHandle; // Used to track type handle argument of dynamic helpers void* gtDirectCallAddress; // Used to pass direct call address between lower and codegen }; + LateDevirtualizationInfo* gtLateDevirtualizationInfo; // Always available for user virtual calls + // expression evaluated after args are placed which determines the control target GenTree* gtControlExpr; @@ -5831,12 +5846,9 @@ struct GenTreeCall final : public GenTree // IL offset of the call wrt its parent method. IL_OFFSET gtRawILOffset; +#endif // defined(DEBUG) - // In DEBUG we report even non inline candidates in the inline tree in - // fgNoteNonInlineCandidate. We need to keep around the inline context for - // this as normally it's part of the candidate info. class InlineContext* gtInlineContext; -#endif // defined(DEBUG) bool IsHelperCall() const { @@ -5898,7 +5910,7 @@ struct GenTreeMultiRegOp : public GenTreeOp unsigned GetRegCount() const { - return (TypeGet() == TYP_LONG) ? 2 : 1; + return TypeIs(TYP_LONG) ? 2 : 1; } //--------------------------------------------------------------------------- @@ -6626,9 +6638,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsMemoryStore(GenTree** pAddr = nullptr) const; bool OperIsMemoryLoadOrStore() const; bool OperIsMemoryStoreOrBarrier() const; - bool OperIsEmbBroadcastCompatible() const; bool OperIsBroadcastScalar() const; - bool OperIsCreateScalarUnsafe() const; bool OperIsBitwiseHWIntrinsic() const; bool OperIsEmbRoundingEnabled() const; @@ -6640,7 +6650,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsConvertMaskToVector() const { #if defined(TARGET_XARCH) - return OperIsHWIntrinsic(NI_EVEX_ConvertMaskToVector); + return OperIsHWIntrinsic(NI_AVX512_ConvertMaskToVector); #elif defined(TARGET_ARM64) return OperIsHWIntrinsic(NI_Sve_ConvertMaskToVector); #else @@ -6651,7 +6661,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsConvertVectorToMask() const { #if defined(TARGET_XARCH) - return OperIsHWIntrinsic(NI_EVEX_ConvertVectorToMask); + return OperIsHWIntrinsic(NI_AVX512_ConvertVectorToMask); #elif defined(TARGET_ARM64) return OperIsHWIntrinsic(NI_Sve_ConvertVectorToMask); #else @@ -6797,20 +6807,6 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool ShouldConstantProp(GenTree* operand, GenTreeVecCon* vecCon); - void NormalizeJitBaseTypeToInt(NamedIntrinsic id, var_types simdBaseType) - { - assert(varTypeIsSmall(simdBaseType)); - - if (varTypeIsUnsigned(simdBaseType)) - { - SetSimdBaseJitType(CORINFO_TYPE_UINT); - } - else - { - SetSimdBaseJitType(CORINFO_TYPE_UINT); - } - } - private: void SetHWIntrinsicId(NamedIntrinsic intrinsicId); @@ -6982,26 +6978,25 @@ struct GenTreeVecCon : public GenTree case TYP_LONG: case TYP_ULONG: { -#if defined(TARGET_64BIT) - if (arg->IsCnsIntOrI()) + if (arg->IsIntegralConst()) { - simdVal.i64[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); + simdVal.i64[argIdx] = arg->AsIntConCommon()->IntegralValue(); return true; } -#else - if (arg->OperIsLong() && arg->AsOp()->gtOp1->IsCnsIntOrI() && arg->AsOp()->gtOp2->IsCnsIntOrI()) +#if !defined(TARGET_64BIT) + else if (arg->OperIsLong() && arg->gtGetOp1()->IsCnsIntOrI() && arg->gtGetOp2()->IsCnsIntOrI()) { - // 32-bit targets will decompose GT_CNS_LNG into two GT_CNS_INT + // 32-bit targets may decompose GT_CNS_LNG into two GT_CNS_INT // We need to reconstruct the 64-bit value in order to handle this - INT64 gtLconVal = arg->AsOp()->gtOp2->AsIntCon()->gtIconVal; + INT64 gtLconVal = arg->gtGetOp2()->AsIntCon()->gtIconVal; gtLconVal <<= 32; - gtLconVal |= arg->AsOp()->gtOp1->AsIntCon()->gtIconVal; + gtLconVal |= static_cast(arg->gtGetOp1()->AsIntCon()->gtIconVal); simdVal.i64[argIdx] = gtLconVal; return true; } -#endif // TARGET_64BIT +#endif // !TARGET_64BIT else { // We expect the constant to have been already zeroed @@ -7569,8 +7564,6 @@ struct GenTreeIndexAddr : public GenTreeOp CORINFO_CLASS_HANDLE gtStructElemClass; // If the element type is a struct, this is the struct type. - BasicBlock* gtIndRngFailBB; // Basic block to jump to for array-index-out-of-range - var_types gtElemType; // The element type of the array. unsigned gtElemSize; // size of elements in the array unsigned gtLenOffset; // The offset from the array's base address to its length. @@ -7586,7 +7579,6 @@ struct GenTreeIndexAddr : public GenTreeOp bool boundsCheck) : GenTreeOp(GT_INDEX_ADDR, TYP_BYREF, arr, ind) , gtStructElemClass(structElemClass) - , gtIndRngFailBB(nullptr) , gtElemType(elemType) , gtElemSize(elemSize) , gtLenOffset(lenOffset) @@ -7783,8 +7775,7 @@ struct GenTreeMDArr : public GenTreeArrCommon // struct GenTreeBoundsChk : public GenTreeOp { - BasicBlock* gtIndRngFailBB; // Basic block to jump to for index-out-of-range - SpecialCodeKind gtThrowKind; // Kind of throw block to branch to on failure + SpecialCodeKind gtThrowKind; // Kind of throw block to branch to on failure // Store some information about the array element type that was in the GT_INDEX_ADDR node before morphing. // Note that this information is also stored in the ARR_ADDR node of the morphed tree, but that can be hard @@ -7793,7 +7784,6 @@ struct GenTreeBoundsChk : public GenTreeOp GenTreeBoundsChk(GenTree* index, GenTree* length, SpecialCodeKind kind) : GenTreeOp(GT_BOUNDS_CHECK, TYP_VOID, index, length) - , gtIndRngFailBB(nullptr) , gtThrowKind(kind) , gtInxType(TYP_UNKNOWN) { @@ -8058,6 +8048,7 @@ struct GenTreeBlk : public GenTreeIndir return m_layout; } +<<<<<<< HEAD #ifdef TARGET_WASM void SetLayout(ClassLayout* layout) { @@ -8065,6 +8056,14 @@ struct GenTreeBlk : public GenTreeIndir m_layout = layout; } #endif // TARGET_WASM +======= + void SetLayout(ClassLayout* newLayout) + { + assert(newLayout != nullptr); + assert(newLayout->GetSize() == m_layout->GetSize()); + m_layout = newLayout; + } +>>>>>>> upstream-jun // The data to be stored (null for GT_BLK) GenTree*& Data() @@ -8099,9 +8098,7 @@ struct GenTreeBlk : public GenTreeIndir BlkOpKindUnrollMemmove, } gtBlkOpKind; -#ifndef JIT32_GCENCODER bool gtBlkOpGcUnsafe; -#endif bool ContainsReferences() { @@ -8139,11 +8136,9 @@ struct GenTreeBlk : public GenTreeIndir assert(layout != nullptr); assert(layout->GetSize() != 0); - m_layout = layout; - gtBlkOpKind = BlkOpKindInvalid; -#ifndef JIT32_GCENCODER + m_layout = layout; + gtBlkOpKind = BlkOpKindInvalid; gtBlkOpGcUnsafe = false; -#endif } #if DEBUGGABLE_GENTREE @@ -8718,9 +8713,7 @@ struct GenTreePutArgStk : public GenTreeUnOp { private: unsigned m_byteOffset; -#ifdef FEATURE_PUT_STRUCT_ARG_STK unsigned m_byteSize; // The number of bytes that this argument is occupying on the stack with padding. -#endif public: #if defined(UNIX_X86_ABI) @@ -8738,7 +8731,6 @@ struct GenTreePutArgStk : public GenTreeUnOp // In future if we need to add more such bool fields consider bit fields. #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK // Instruction selection: during codegen time, what code sequence we will be using // to encode this operation. // TODO-Throughput: The following information should be obtained from the child @@ -8758,23 +8750,18 @@ struct GenTreePutArgStk : public GenTreeUnOp private: uint8_t m_argLoadSizeDelta; #endif // TARGET_XARCH -#endif // FEATURE_PUT_STRUCT_ARG_STK public: - GenTreePutArgStk(genTreeOps oper, - var_types type, - GenTree* op1, - unsigned stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) - unsigned stackByteSize, -#endif + GenTreePutArgStk(genTreeOps oper, + var_types type, + GenTree* op1, + unsigned stackByteOffset, + unsigned stackByteSize, GenTreeCall* callNode, bool putInIncomingArgArea) : GenTreeUnOp(oper, type, op1 DEBUGARG(/*largeNode*/ false)) , m_byteOffset(stackByteOffset) -#if defined(FEATURE_PUT_STRUCT_ARG_STK) , m_byteSize(stackByteSize) -#endif #if defined(UNIX_X86_ABI) , gtPadAlign(0) #endif @@ -8784,12 +8771,10 @@ struct GenTreePutArgStk : public GenTreeUnOp #if FEATURE_FASTTAILCALL , gtPutInIncomingArgArea(putInIncomingArgArea) #endif // FEATURE_FASTTAILCALL -#if defined(FEATURE_PUT_STRUCT_ARG_STK) , gtPutArgStkKind(Kind::Invalid) #if defined(TARGET_XARCH) , m_argLoadSizeDelta(UINT8_MAX) #endif -#endif // FEATURE_PUT_STRUCT_ARG_STK { } @@ -8830,7 +8815,6 @@ struct GenTreePutArgStk : public GenTreeUnOp } #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK unsigned GetStackByteSize() const { return m_byteSize; @@ -8877,9 +8861,6 @@ struct GenTreePutArgStk : public GenTreeUnOp { return gtPutArgStkKind == Kind::Push; } -#else // !FEATURE_PUT_STRUCT_ARG_STK - unsigned GetStackByteSize() const; -#endif // !FEATURE_PUT_STRUCT_ARG_STK #if DEBUGGABLE_GENTREE GenTreePutArgStk() @@ -8889,158 +8870,6 @@ struct GenTreePutArgStk : public GenTreeUnOp #endif }; -#if FEATURE_ARG_SPLIT -// Represent the struct argument: split value in register(s) and stack -struct GenTreePutArgSplit : public GenTreePutArgStk -{ - unsigned gtNumRegs; - - GenTreePutArgSplit(GenTree* op1, - unsigned stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) - unsigned stackByteSize, -#endif - unsigned numRegs, - GenTreeCall* callNode, - bool putIncomingArgArea) - : GenTreePutArgStk(GT_PUTARG_SPLIT, - TYP_STRUCT, - op1, - stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) - stackByteSize, -#endif - callNode, - putIncomingArgArea) - , gtNumRegs(numRegs) - { - ClearOtherRegs(); - ClearOtherRegFlags(); - } - - // Type required to support multi-reg struct arg. - var_types m_regType[MAX_REG_ARG]; - - // First reg of struct is always given by GetRegNum(). - // gtOtherRegs holds the other reg numbers of struct. - regNumberSmall gtOtherRegs[MAX_REG_ARG - 1]; - - MultiRegSpillFlags gtSpillFlags; - - //--------------------------------------------------------------------------- - // GetRegNumByIdx: get i'th register allocated to this struct argument. - // - // Arguments: - // idx - index of the struct - // - // Return Value: - // Return regNumber of i'th register of this struct argument - // - regNumber GetRegNumByIdx(unsigned idx) const - { - assert(idx < MAX_REG_ARG); - - if (idx == 0) - { - return GetRegNum(); - } - - return (regNumber)gtOtherRegs[idx - 1]; - } - - //---------------------------------------------------------------------- - // SetRegNumByIdx: set i'th register of this struct argument - // - // Arguments: - // reg - reg number - // idx - index of the struct - // - // Return Value: - // None - // - void SetRegNumByIdx(regNumber reg, unsigned idx) - { - assert(idx < MAX_REG_ARG); - if (idx == 0) - { - SetRegNum(reg); - } - else - { - gtOtherRegs[idx - 1] = (regNumberSmall)reg; - assert(gtOtherRegs[idx - 1] == reg); - } - } - - //---------------------------------------------------------------------------- - // ClearOtherRegs: clear multi-reg state to indicate no regs are allocated - // - // Arguments: - // None - // - // Return Value: - // None - // - void ClearOtherRegs() - { - for (unsigned i = 0; i < MAX_REG_ARG - 1; ++i) - { - gtOtherRegs[i] = REG_NA; - } - } - - GenTreeFlags GetRegSpillFlagByIdx(unsigned idx) const - { - return GetMultiRegSpillFlagsByIdx(gtSpillFlags, idx); - } - - void SetRegSpillFlagByIdx(GenTreeFlags flags, unsigned idx) - { -#if FEATURE_MULTIREG_RET - gtSpillFlags = SetMultiRegSpillFlagsByIdx(gtSpillFlags, flags, idx); -#endif - } - - //-------------------------------------------------------------------------- - // GetRegType: Get var_type of the register specified by index. - // - // Arguments: - // index - Index of the register. - // First register will have an index 0 and so on. - // - // Return Value: - // var_type of the register specified by its index. - - var_types GetRegType(unsigned index) const - { - assert(index < gtNumRegs); - var_types result = m_regType[index]; - return result; - } - - //------------------------------------------------------------------- - // clearOtherRegFlags: clear GTF_* flags associated with gtOtherRegs - // - // Arguments: - // None - // - // Return Value: - // None - // - void ClearOtherRegFlags() - { - gtSpillFlags = 0; - } - -#if DEBUGGABLE_GENTREE - GenTreePutArgSplit() - : GenTreePutArgStk() - { - } -#endif -}; -#endif // FEATURE_ARG_SPLIT - // Represents GT_COPY or GT_RELOAD node // // Needed to support multi-reg ops. @@ -9579,7 +9408,71 @@ enum insCflags : unsigned INS_FLAGS_NZC, INS_FLAGS_NZCV, }; +#elif defined(TARGET_XARCH) +enum insCflags : unsigned +{ + INS_FLAGS_NONE = 0x0, + INS_FLAGS_CF = 0x1, + INS_FLAGS_ZF = 0x2, + INS_FLAGS_SF = 0x4, + INS_FLAGS_OF = 0x8 +}; + +// todo-apx-xarch : this data structure might not be necessary, but nice to have the CC +// encoded somewhere +enum insCC : unsigned +{ + INS_CC_O = 0x0, // OF = 1 + + INS_CC_NO = 0x1, // OF = 0 + + INS_CC_B = 0x2, // CF = 1 + INS_CC_C = 0x2, // CF = 1 + INS_CC_NAE = 0x2, // CF = 1 + + INS_CC_NB = 0x3, // CF = 0 + INS_CC_NC = 0x3, // CF = 0 + INS_CC_AE = 0x3, // CF = 0 + + INS_CC_E = 0x4, // ZF = 1 + INS_CC_Z = 0x4, // ZF = 1 + + INS_CC_NE = 0x5, // ZF = 0 + INS_CC_NZ = 0x5, // ZF = 0 + + INS_CC_BE = 0x6, // (CF OR ZF) = 1 + INS_CC_NA = 0x6, // (CF OR ZF) = 1 + + INS_CC_NBE = 0x7, // (CF OR ZF) = 0 + INS_CC_A = 0x7, // (CF OR ZF) = 0 + + INS_CC_S = 0x8, // (SF = 1) + + INS_CC_NS = 0x9, // (SF = 0) + + // no parity flag in ccmp/ctest + + // 0b1010 special always evals to true + INS_CC_TRUE = 0xA, + + // 0b1011 special always evals to false + INS_CC_FALSE = 0xB, + INS_CC_L = 0xC, // (SF XOR OF) = 1 + INS_CC_NGE = 0xC, // (SF XOR OF) = 1 + + INS_CC_NL = 0xD, // (SF XOR OF) = 0 + INS_CC_GE = 0xD, // (SF XOR OF) = 0 + + INS_CC_LE = 0xE, // (SF XOR OF) OR ZF) = 1 + INS_CC_NG = 0xE, // (SF XOR OF) OR ZF) = 1 + + INS_CC_NLE = 0xF, // (SF XOR OF) OR ZF) = 0 + INS_CC_G = 0xF, // (SF XOR OF) OR ZF) = 0 +}; +#endif + +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) struct GenTreeCCMP final : public GenTreeOpCC { insCflags gtFlagsVal; @@ -9648,12 +9541,12 @@ inline bool GenTree::OperIsCopyBlkOp() inline bool GenTree::IsIntegralConst(ssize_t constVal) const { - if ((gtOper == GT_CNS_INT) && (AsIntConCommon()->IconValue() == constVal)) + if ((OperIs(GT_CNS_INT)) && (AsIntConCommon()->IconValue() == constVal)) { return true; } - if ((gtOper == GT_CNS_LNG) && (AsIntConCommon()->LngValue() == constVal)) + if ((OperIs(GT_CNS_LNG)) && (AsIntConCommon()->LngValue() == constVal)) { return true; } @@ -9988,52 +9881,8 @@ inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index, var_types s inline bool GenTree::IsBoxedValue() { assert(gtOper != GT_BOX || AsBox()->BoxOp() != nullptr); - return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE); -} - -#ifdef DEBUG -//------------------------------------------------------------------------ -// IsValidCallArgument: Given an GenTree node that represents an argument -// enforce (or don't enforce) the following invariant. -// -// Arguments: -// instance method for a GenTree node -// -// Return values: -// true: the GenTree node is accepted as a valid argument -// false: the GenTree node is not accepted as a valid argument -// -// Notes: -// For targets that don't support arguments as a list of fields, we do not support GT_FIELD_LIST. -// -// Currently for AMD64 UNIX we allow a limited case where a GT_FIELD_LIST is -// allowed but every element must be a GT_LCL_FLD. -// -// For the future targets that allow for Multireg args (and this includes the current ARM64 target), -// or that allow for passing promoted structs, we allow a GT_FIELD_LIST of arbitrary nodes. -// These would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs, -// but could be changed into constants or GT_COMMA trees by the later -// optimization phases. - -inline bool GenTree::IsValidCallArgument() -{ - if (OperIs(GT_FIELD_LIST)) - { -#if !FEATURE_MULTIREG_ARGS && !FEATURE_PUT_STRUCT_ARG_STK - - return false; - -#else // FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK - - // We allow this GT_FIELD_LIST as an argument - return true; - -#endif // FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK - } - // We don't have either kind of list, so it satisfies the invariant. - return true; + return (OperIs(GT_BOX)) && (gtFlags & GTF_BOX_VALUE); } -#endif // DEBUG inline GenTree* GenTree::gtGetOp1() const { @@ -10229,13 +10078,6 @@ inline regNumber GenTree::GetRegByIndex(int regIndex) const return AsCall()->GetRegNumByIdx(regIndex); } -#if FEATURE_ARG_SPLIT - if (OperIsPutArgSplit()) - { - return AsPutArgSplit()->GetRegNumByIdx(regIndex); - } -#endif - #if !defined(TARGET_64BIT) if (OperIsMultiRegOp()) { @@ -10291,13 +10133,6 @@ inline var_types GenTree::GetRegTypeByIndex(int regIndex) const return AsCall()->AsCall()->GetReturnTypeDesc()->GetReturnRegType(regIndex); } -#if FEATURE_ARG_SPLIT - if (OperIsPutArgSplit()) - { - return AsPutArgSplit()->GetRegType(regIndex); - } -#endif // FEATURE_ARG_SPLIT - #if !defined(TARGET_64BIT) if (OperIsMultiRegOp()) { @@ -10309,7 +10144,7 @@ inline var_types GenTree::GetRegTypeByIndex(int regIndex) const #ifdef FEATURE_HW_INTRINSICS if (OperIsHWIntrinsic()) { - assert(TypeGet() == TYP_STRUCT); + assert(TypeIs(TYP_STRUCT)); #ifdef TARGET_ARM64 if (AsHWIntrinsic()->GetSimdSize() == 16) { @@ -10330,11 +10165,11 @@ inline var_types GenTree::GetRegTypeByIndex(int regIndex) const if (OperIsScalarLocal()) { - if (TypeGet() == TYP_LONG) + if (TypeIs(TYP_LONG)) { return TYP_INT; } - assert(TypeGet() == TYP_STRUCT); + assert(TypeIs(TYP_STRUCT)); assert((gtFlags & GTF_VAR_MULTIREG) != 0); // The register type for a multireg lclVar requires looking at the LclVarDsc, // which requires a Compiler instance. The caller must use the GetFieldTypeByIndex @@ -10368,6 +10203,7 @@ inline GenTreeFlags GenTree::GetRegSpillFlagByIdx(int regIndex) const return AsCall()->GetRegSpillFlagByIdx(regIndex); } +<<<<<<< HEAD #if FEATURE_ARG_SPLIT if (OperIsPutArgSplit()) { @@ -10376,6 +10212,9 @@ inline GenTreeFlags GenTree::GetRegSpillFlagByIdx(int regIndex) const #endif // FEATURE_ARG_SPLIT #if !defined(TARGET_64BIT) && !defined(TARGET_WASM32) && !defined(TARGET_WASM64) +======= +#if !defined(TARGET_64BIT) +>>>>>>> upstream-jun if (OperIsMultiRegOp()) { return AsMultiRegOp()->GetRegSpillFlagByIdx(regIndex); @@ -10421,14 +10260,6 @@ inline void GenTree::SetRegSpillFlagByIdx(GenTreeFlags flags, int regIndex) return; } -#if FEATURE_ARG_SPLIT - if (OperIsPutArgSplit()) - { - AsPutArgSplit()->SetRegSpillFlagByIdx(flags, regIndex); - return; - } -#endif // FEATURE_ARG_SPLIT - #if !defined(TARGET_64BIT) if (OperIsMultiRegOp()) { @@ -10548,7 +10379,7 @@ inline void GenTree::ClearLastUse(int fieldIndex) // inline bool GenTree::IsCopyOrReload() const { - return (gtOper == GT_COPY || gtOper == GT_RELOAD); + return (OperIs(GT_COPY) || OperIs(GT_RELOAD)); } //----------------------------------------------------------------------------------- @@ -10573,7 +10404,7 @@ inline bool GenTree::IsCopyOrReloadOfMultiRegCall() const inline bool GenTree::IsCnsIntOrI() const { - return (gtOper == GT_CNS_INT); + return (OperIs(GT_CNS_INT)); } inline bool GenTree::IsIntegralConst() const @@ -10581,7 +10412,7 @@ inline bool GenTree::IsIntegralConst() const #ifdef TARGET_64BIT return IsCnsIntOrI(); #else // !TARGET_64BIT - return ((gtOper == GT_CNS_INT) || (gtOper == GT_CNS_LNG)); + return ((OperIs(GT_CNS_INT)) || (OperIs(GT_CNS_LNG))); #endif // !TARGET_64BIT } diff --git a/src/coreclr/jit/gschecks.cpp b/src/coreclr/jit/gschecks.cpp index 558c30f6b357..75d5a365a587 100644 --- a/src/coreclr/jit/gschecks.cpp +++ b/src/coreclr/jit/gschecks.cpp @@ -418,8 +418,9 @@ void Compiler::gsParamsToShadows() // We don't need unsafe value cls check here since we are copying the params and this flag // would have been set on the original param before reaching here. lvaSetStruct(shadowVarNum, varDsc->GetLayout(), false); - shadowVarDsc->lvIsMultiRegArg = varDsc->lvIsMultiRegArg; - shadowVarDsc->lvIsMultiRegRet = varDsc->lvIsMultiRegRet; + shadowVarDsc->lvIsMultiRegArg = varDsc->lvIsMultiRegArg; + shadowVarDsc->lvIsMultiRegRet = varDsc->lvIsMultiRegRet; + shadowVarDsc->lvIsMultiRegDest = varDsc->lvIsMultiRegDest; } shadowVarDsc->lvIsUnsafeBuffer = varDsc->lvIsUnsafeBuffer; shadowVarDsc->lvIsPtr = varDsc->lvIsPtr; @@ -511,7 +512,7 @@ void Compiler::gsParamsToShadows() } #if defined(TARGET_X86) && defined(FEATURE_IJW) - if (lclNum < info.compArgsCount && argRequiresSpecialCopy(lclNum) && (varDsc->TypeGet() == TYP_STRUCT)) + if (lclNum < info.compArgsCount && argRequiresSpecialCopy(lclNum) && varDsc->TypeIs(TYP_STRUCT)) { JITDUMP("arg%02u requires special copy, using special copy helper to copy to shadow var V%02u\n", lclNum, shadowVarNum); @@ -536,37 +537,11 @@ void Compiler::gsParamsToShadows() // inserting reverse pinvoke transitions way too early in the // JIT. - struct HasReversePInvokeEnterVisitor : GenTreeVisitor - { - enum - { - DoPreOrder = true, - }; - - HasReversePInvokeEnterVisitor(Compiler* comp) - : GenTreeVisitor(comp) - { - } - - fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) - { - if (((*use)->gtFlags & GTF_CALL) == 0) - { - return fgWalkResult::WALK_SKIP_SUBTREES; - } - - if ((*use)->IsHelperCall(m_compiler, CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER) || - (*use)->IsHelperCall(m_compiler, CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER_TRACK_TRANSITIONS)) - { - return fgWalkResult::WALK_ABORT; - } - - return fgWalkResult::WALK_CONTINUE; - } + auto isReversePInvoke = [=](GenTree* tree) { + return tree->IsHelperCall(this, CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER) || + tree->IsHelperCall(this, CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER_TRACK_TRANSITIONS); }; - HasReversePInvokeEnterVisitor checker(this); - Statement* reversePInvokeStmt = nullptr; for (Statement* const stmt : fgFirstBB->Statements()) { @@ -574,7 +549,7 @@ void Compiler::gsParamsToShadows() // at the point before we insert the shadow copy statement. assert(!gtHasRef(stmt->GetRootNode(), lclNum)); - if (checker.WalkTree(stmt->GetRootNodePointer(), nullptr) == fgWalkResult::WALK_ABORT) + if (gtFindNodeInTree(stmt->GetRootNode(), isReversePInvoke) != nullptr) { reversePInvokeStmt = stmt; break; diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 75726fc2ef71..67964f7a3865 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -33,10 +33,12 @@ GTNODE(LCL_ADDR , GenTreeLclFld ,0,0,GTK_LEAF) // local //----------------------------------------------------------------------------- GTNODE(CATCH_ARG , GenTree ,0,0,GTK_LEAF) // Exception object in a catch block +GTNODE(ASYNC_CONTINUATION, GenTree ,0,0,GTK_LEAF) // Access returned continuation by an async call GTNODE(LABEL , GenTree ,0,0,GTK_LEAF) // Jump-target GTNODE(JMP , GenTreeVal ,0,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function GTNODE(FTN_ADDR , GenTreeFptrVal ,0,0,GTK_LEAF) // Address of a function GTNODE(RET_EXPR , GenTreeRetExpr ,0,0,GTK_LEAF|DBK_NOTLIR) // Place holder for the return expression from an inline candidate +GTNODE(GCPOLL , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTLIR) //----------------------------------------------------------------------------- // Constant nodes: @@ -244,11 +246,16 @@ GTNODE(JCC , GenTreeCC ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI GTNODE(SETCC , GenTreeCC ,0,0,GTK_LEAF|DBK_NOTHIR) // Variant of SELECT that reuses flags computed by a previous node with the specified condition. GTNODE(SELECTCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR) -#ifdef TARGET_ARM64 -// The arm64 ccmp instruction. If the specified condition is true, compares two + +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) +// The arm64 and x86 ccmp instruction. If the specified condition is true, compares two // operands and sets the condition flags according to the result. Otherwise // sets the condition flags to the specified immediate value. GTNODE(CCMP , GenTreeCCMP ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) +#endif + + +#ifdef TARGET_ARM64 // Maps to arm64 csinc/cinc instruction. Computes result = condition ? op1 : op2 + 1. // If op2 is null, computes result = condition ? op1 + 1 : op1. GTNODE(SELECT_INC , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) @@ -266,6 +273,29 @@ GTNODE(SELECT_NEG , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) GTNODE(SELECT_NEGCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR) #endif +//----------------------------------------------------------------------------- +// LIR specific arithmetic nodes: +//----------------------------------------------------------------------------- + +#ifdef TARGET_RISCV64 +// Maps to riscv64 sh1add instruction. Computes result = op2 + (op1 << 1). +GTNODE(SH1ADD , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) +// Maps to riscv64 sh1add.uw instruction. Computes result = op2 + zext(op1[31..0] << 1). +GTNODE(SH1ADD_UW , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) +// Maps to riscv64 sh2add instruction. Computes result = op2 + (op1 << 2). +GTNODE(SH2ADD , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) +// Maps to riscv64 sh2add.uw instruction. Computes result = op2 + zext(op1[31..0] << 2). +GTNODE(SH2ADD_UW , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) +// Maps to riscv64 sh3add instruction. Computes result = op2 + (op1 << 3). +GTNODE(SH3ADD , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) +// Maps to riscv64 sh3add.uw instruction. Computes result = op2 + zext(op1[31..0] << 3). +GTNODE(SH3ADD_UW , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) +// Maps to riscv64 add.uw instruction. Computes result = op2 + zext(op1[31..0]). +GTNODE(ADD_UW , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) +// Maps to riscv64 slli.uw instruction. Computes result = zext(op1[31..0] << imm). +GTNODE(SLLI_UW , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) +#endif + //----------------------------------------------------------------------------- // Other nodes that look like unary/binary operators: //----------------------------------------------------------------------------- @@ -284,6 +314,11 @@ GTNODE(RETURN , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) GTNODE(SWITCH , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) GTNODE(NO_OP , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE) // A NOP that cannot be deleted. +// Suspend an async method, returning a continuation. +// Before lowering this is a seemingly normal TYP_VOID node with a lot of side effects (GTF_CALL | GTF_GLOB_REF | GTF_ORDER_SIDEEFF). +// Lowering then removes all successor nodes and leaves it as the terminator node. +GTNODE(RETURN_SUSPEND , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) // Return a continuation in an async method + GTNODE(START_NONGC , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Starts a new instruction group that will be non-gc interruptible. GTNODE(START_PREEMPTGC , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Starts a new instruction group where preemptive GC is enabled. GTNODE(PROF_HOOK , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Profiler Enter/Leave/TailCall hook. @@ -319,9 +354,6 @@ GTNODE(PINVOKE_EPILOG , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI GTNODE(RETURNTRAP , GenTreeOp ,0,0,GTK_UNOP|GTK_NOVALUE|DBK_NOTHIR) // a conditional call to wait on gc GTNODE(PUTARG_REG , GenTreeOp ,0,0,GTK_UNOP|DBK_NOTHIR) // operator that places outgoing arg in register GTNODE(PUTARG_STK , GenTreePutArgStk ,0,0,GTK_UNOP|GTK_NOVALUE|DBK_NOTHIR) // operator that places outgoing arg in stack -#if FEATURE_ARG_SPLIT -GTNODE(PUTARG_SPLIT , GenTreePutArgSplit ,0,0,GTK_UNOP|DBK_NOTHIR) // operator that places outgoing arg in registers with stack (split struct in ARM32) -#endif // FEATURE_ARG_SPLIT GTNODE(SWAP , GenTreeOp ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) // op1 and op2 swap (registers) GTNODE(COPY , GenTreeCopyOrReload,0,0,GTK_UNOP|DBK_NOTHIR) // Copies a variable from its current location to a register that satisfies GTNODE(RELOAD , GenTreeCopyOrReload,0,0,GTK_UNOP|DBK_NOTHIR) // code generation constraints. The operand is the actual lclVar node. diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 26f88d179099..b785279801c7 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -102,12 +102,7 @@ GTSTRUCT_N(Conditional , GT_SELECT, GT_SELECT_INC, GT_SELECT_INV, GT_SELECT_NEG) #else GTSTRUCT_N(Conditional , GT_SELECT) #endif //TARGET_ARM64 -#if FEATURE_ARG_SPLIT -GTSTRUCT_2_SPECIAL(PutArgStk, GT_PUTARG_STK, GT_PUTARG_SPLIT) -GTSTRUCT_1(PutArgSplit , GT_PUTARG_SPLIT) -#else // !FEATURE_ARG_SPLIT GTSTRUCT_1(PutArgStk , GT_PUTARG_STK) -#endif // !FEATURE_ARG_SPLIT GTSTRUCT_1(PhysReg , GT_PHYSREG) #ifdef FEATURE_HW_INTRINSICS GTSTRUCT_1(HWIntrinsic , GT_HWINTRINSIC) @@ -116,8 +111,10 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ) GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP) GTSTRUCT_1(ArrAddr , GT_ARR_ADDR) GTSTRUCT_2(CC , GT_JCC, GT_SETCC) -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) GTSTRUCT_1(CCMP , GT_CCMP) +#endif +#ifdef TARGET_ARM64 GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SELECT_INVCC, GT_SELECT_NEGCC) #else GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 731dbd477ba6..aedb31ba0c8c 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -789,7 +789,7 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St // bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { - assert(!opts.IsReadyToRun()); + assert(!IsAot()); BasicBlock* block = *pBlock; @@ -801,7 +801,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* { return false; } - assert(!opts.IsReadyToRun()); if (TargetOS::IsUnix) { @@ -1943,10 +1942,6 @@ enum class TypeCheckPassedAction CallHelper_AlwaysThrows, }; -// Some arbitrary limit on the number of guesses we can make -// The actual number of guesses is usually much smaller -#define MAX_CAST_GUESSES 8 - //------------------------------------------------------------------------------ // PickCandidatesForTypeCheck: picks classes to use as fast type checks against // the object being casted. The function also defines the strategy to follow @@ -1955,7 +1950,7 @@ enum class TypeCheckPassedAction // Arguments: // comp - Compiler instance // castHelper - Cast helper call to expand -// candidates - [out] Classes (guesses) to use in the fast path (up to MAX_CAST_GUESSES) +// candidates - [out] Classes (guesses) to use in the fast path (up to MAX_GDV_TYPE_CHECKS) // commonCls - [out] Common denominator class for the fast and the fallback paths. // likelihoods - [out] Likelihoods of successful type checks [0..100] // typeCheckFailed - [out] Action to perform if the type check fails @@ -2161,9 +2156,9 @@ static int PickCandidatesForTypeCheck(Compiler* comp, ///////////////////////////////////////////////////////////////////////////////////////////////////// // Let's re-use GDV's threshold on how many guesses we can make (can be 3 by default). - const int maxTypeChecks = min(comp->getGDVMaxTypeChecks(), MAX_CAST_GUESSES); + const int maxTypeChecks = min(comp->getGDVMaxTypeChecks(), MAX_GDV_TYPE_CHECKS); - CORINFO_CLASS_HANDLE exactClasses[MAX_CAST_GUESSES] = {}; + CORINFO_CLASS_HANDLE exactClasses[MAX_GDV_TYPE_CHECKS] = {}; const int numExactClasses = comp->info.compCompHnd->getExactClasses(castToCls, maxTypeChecks, exactClasses); bool allTrulyExact = true; for (int i = 0; i < numExactClasses; i++) @@ -2235,9 +2230,9 @@ static int PickCandidatesForTypeCheck(Compiler* comp, // 3) Consult with PGO data ///////////////////////////////////////////////////////////////////////////////////////////////////// - CORINFO_CLASS_HANDLE likelyClasses[MAX_CAST_GUESSES] = {}; - unsigned likelyLikelihoods[MAX_CAST_GUESSES] = {}; - int likelyClassCount = 0; + CORINFO_CLASS_HANDLE likelyClasses[MAX_GDV_TYPE_CHECKS] = {}; + unsigned likelyLikelihoods[MAX_GDV_TYPE_CHECKS] = {}; + int likelyClassCount = 0; comp->pickGDV(castHelper, castHelper->gtCastHelperILOffset, false, likelyClasses, nullptr, &likelyClassCount, likelyLikelihoods); @@ -2365,8 +2360,8 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, TypeCheckFailedAction typeCheckFailedAction; TypeCheckPassedAction typeCheckPassedAction; CORINFO_CLASS_HANDLE commonCls; - CORINFO_CLASS_HANDLE expectedExactClasses[MAX_CAST_GUESSES] = {}; - unsigned likelihoods[MAX_CAST_GUESSES] = {}; + CORINFO_CLASS_HANDLE expectedExactClasses[MAX_GDV_TYPE_CHECKS] = {}; + unsigned likelihoods[MAX_GDV_TYPE_CHECKS] = {}; const int numOfCandidates = PickCandidatesForTypeCheck(this, call, expectedExactClasses, &commonCls, likelihoods, &typeCheckFailedAction, &typeCheckPassedAction); @@ -2451,8 +2446,8 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // Block 2: typeCheckBb(s) // TODO-InlineCast: if likelyCls == expectedCls we can consider saving to a local to re-use. - BasicBlock* typeChecksBbs[MAX_CAST_GUESSES] = {}; - BasicBlock* lastTypeCheckBb = nullcheckBb; + BasicBlock* typeChecksBbs[MAX_GDV_TYPE_CHECKS] = {}; + BasicBlock* lastTypeCheckBb = nullcheckBb; for (int candidateId = 0; candidateId < numOfCandidates; candidateId++) { const CORINFO_CLASS_HANDLE expectedCls = expectedExactClasses[candidateId]; @@ -2822,18 +2817,16 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, const CorInfoHelpFunc helper = eeGetHelperNum(call->gtCallMethHnd); int lengthArgIndex = -1; + int typeArgIndex = -1; switch (helper) { case CORINFO_HELP_NEWARR_1_DIRECT: case CORINFO_HELP_NEWARR_1_VC: - case CORINFO_HELP_NEWARR_1_OBJ: + case CORINFO_HELP_NEWARR_1_PTR: case CORINFO_HELP_NEWARR_1_ALIGN8: lengthArgIndex = 1; - break; - - case CORINFO_HELP_READYTORUN_NEWARR_1: - lengthArgIndex = 0; + typeArgIndex = 0; break; default: @@ -2871,9 +2864,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, // Initialize the array method table pointer. // - CORINFO_CLASS_HANDLE arrayHnd = (CORINFO_CLASS_HANDLE)call->compileTimeHelperArgumentHandle; - - GenTree* const mt = gtNewIconEmbClsHndNode(arrayHnd); + GenTree* const mt = call->gtArgs.GetArgByIndex(typeArgIndex)->GetNode(); GenTree* const mtStore = gtNewStoreValueNode(TYP_I_IMPL, stackLocalAddress, mt); Statement* const mtStmt = fgNewStmtFromTree(mtStore); diff --git a/src/coreclr/jit/host.h b/src/coreclr/jit/host.h index d10eb93ca9a1..6f5b4427403e 100644 --- a/src/coreclr/jit/host.h +++ b/src/coreclr/jit/host.h @@ -36,7 +36,7 @@ void gcDump_logf(const char* fmt, ...); void logf(unsigned level, const char* fmt, ...); -extern "C" void ANALYZER_NORETURN __cdecl assertAbort(const char* why, const char* file, unsigned line); +extern "C" void ANALYZER_NORETURN assertAbort(const char* why, const char* file, unsigned line); #undef assert #define assert(p) (void)((p) || (assertAbort(#p, __FILE__, __LINE__), 0)) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index a00d57962d75..4aa159c52fc6 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -58,6 +58,142 @@ const HWIntrinsicInfo& HWIntrinsicInfo::lookup(NamedIntrinsic id) return hwIntrinsicInfoArray[id - NI_HW_INTRINSIC_START - 1]; } +//------------------------------------------------------------------------ +// lookupIns: Gets the instruction associated with a given NamedIntrinsic and base type +// +// Arguments: +// id -- The NamedIntrinsic associated for which to lookup its instruction +// type -- The base type for which to lookup the instruction +// comp -- The optional compiler instance which is used to special case instruction lookup +// +// Return Value: +// The instruction for id and type +instruction HWIntrinsicInfo::lookupIns(NamedIntrinsic id, var_types type, Compiler* comp) +{ + if ((type < TYP_BYTE) || (type > TYP_DOUBLE)) + { + assert(!"Unexpected type"); + return INS_invalid; + } + + uint16_t result = lookup(id).ins[type - TYP_BYTE]; + instruction ins = static_cast(result); + +#if defined(TARGET_X86) + if (ins == INS_movd64) + { + ins = INS_movd32; + } +#endif // TARGET_X86 + +#if defined(TARGET_XARCH) + instruction evexIns = ins; + + switch (ins) + { + case INS_movdqa32: + { + if (varTypeIsLong(type)) + { + evexIns = INS_vmovdqa64; + } + break; + } + + case INS_movdqu32: + { + if (varTypeIsLong(type)) + { + evexIns = INS_vmovdqu64; + } + break; + } + + case INS_vbroadcastf32x4: + { + if (type == TYP_DOUBLE) + { + evexIns = INS_vbroadcastf64x2; + } + break; + } + + case INS_vbroadcasti32x4: + { + if (varTypeIsLong(type)) + { + evexIns = INS_vbroadcasti64x2; + } + break; + } + + case INS_vextractf32x4: + { + if (type == TYP_DOUBLE) + { + evexIns = INS_vextractf64x2; + } + else if (varTypeIsInt(type)) + { + evexIns = INS_vextracti32x4; + } + else if (varTypeIsLong(type)) + { + evexIns = INS_vextracti64x2; + } + break; + } + + case INS_vextracti32x4: + { + if (varTypeIsLong(type)) + { + evexIns = INS_vextracti64x2; + } + break; + } + + case INS_vinsertf32x4: + { + if (type == TYP_DOUBLE) + { + evexIns = INS_vinsertf64x2; + } + else if (varTypeIsInt(type)) + { + evexIns = INS_vinserti32x4; + } + else if (varTypeIsLong(type)) + { + evexIns = INS_vinserti64x2; + } + break; + } + + case INS_vinserti32x4: + { + if (varTypeIsLong(type)) + { + evexIns = INS_vinserti64x2; + } + break; + } + + default: + { + break; + } + } + + if ((evexIns != ins) && (comp != nullptr) && comp->canUseEvexEncoding()) + { + ins = evexIns; + } +#endif // TARGET_XARCH + + return ins; +} + #if defined(TARGET_XARCH) const TernaryLogicInfo& TernaryLogicInfo::lookup(uint8_t control) { @@ -766,106 +902,101 @@ struct HWIntrinsicIsaRange static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { // clang-format off #if defined(TARGET_XARCH) - { FIRST_NI_X86Base, LAST_NI_X86Base }, - { FIRST_NI_SSE, LAST_NI_SSE }, - { FIRST_NI_SSE2, LAST_NI_SSE2 }, - { FIRST_NI_SSE3, LAST_NI_SSE3 }, - { FIRST_NI_SSSE3, LAST_NI_SSSE3 }, - { FIRST_NI_SSE41, LAST_NI_SSE41 }, - { FIRST_NI_SSE42, LAST_NI_SSE42 }, - { FIRST_NI_AVX, LAST_NI_AVX }, - { FIRST_NI_AVX2, LAST_NI_AVX2 }, - { FIRST_NI_AES, LAST_NI_AES }, - { FIRST_NI_BMI1, LAST_NI_BMI1 }, - { FIRST_NI_BMI2, LAST_NI_BMI2 }, - { FIRST_NI_FMA, LAST_NI_FMA }, - { FIRST_NI_LZCNT, LAST_NI_LZCNT }, - { FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ }, - { FIRST_NI_PCLMULQDQ_V256, LAST_NI_PCLMULQDQ_V256 }, - { FIRST_NI_PCLMULQDQ_V512, LAST_NI_PCLMULQDQ_V512 }, - { FIRST_NI_POPCNT, LAST_NI_POPCNT }, - { FIRST_NI_Vector128, LAST_NI_Vector128 }, - { FIRST_NI_Vector256, LAST_NI_Vector256 }, - { FIRST_NI_Vector512, LAST_NI_Vector512 }, - { FIRST_NI_AVXVNNI, LAST_NI_AVXVNNI }, - { NI_Illegal, NI_Illegal }, // MOVBE - { FIRST_NI_X86Serialize, LAST_NI_X86Serialize }, - { NI_Illegal, NI_Illegal }, // EVEX - { FIRST_NI_AVX512F, LAST_NI_AVX512F }, - { FIRST_NI_AVX512F_VL, LAST_NI_AVX512F_VL }, - { FIRST_NI_AVX512BW, LAST_NI_AVX512BW }, - { FIRST_NI_AVX512BW_VL, LAST_NI_AVX512BW_VL }, - { FIRST_NI_AVX512CD, LAST_NI_AVX512CD }, - { FIRST_NI_AVX512CD_VL, LAST_NI_AVX512CD_VL }, - { FIRST_NI_AVX512DQ, LAST_NI_AVX512DQ }, - { FIRST_NI_AVX512DQ_VL, LAST_NI_AVX512DQ_VL }, - { FIRST_NI_AVX512VBMI, LAST_NI_AVX512VBMI }, - { FIRST_NI_AVX512VBMI_VL, LAST_NI_AVX512VBMI_VL }, - { FIRST_NI_AVX10v1, LAST_NI_AVX10v1 }, - { FIRST_NI_AVX10v1_V512, LAST_NI_AVX10v1_V512 }, - { NI_Illegal, NI_Illegal }, // VectorT128 - { NI_Illegal, NI_Illegal }, // VectorT256 - { NI_Illegal, NI_Illegal }, // VectorT512 - { NI_Illegal, NI_Illegal }, // APX - { FIRST_NI_AVX10v2, LAST_NI_AVX10v2 }, // AVX10v2 - { FIRST_NI_AVX10v2_V512, LAST_NI_AVX10v2_V512 }, // AVX10v2_V512 - { FIRST_NI_GFNI, LAST_NI_GFNI }, - { FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 }, - { FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 }, - { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, - { FIRST_NI_SSE_X64, LAST_NI_SSE_X64 }, - { FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 }, - { NI_Illegal, NI_Illegal }, // SSE3_X64 - { NI_Illegal, NI_Illegal }, // SSSE3_X64 - { FIRST_NI_SSE41_X64, LAST_NI_SSE41_X64 }, - { FIRST_NI_SSE42_X64, LAST_NI_SSE42_X64 }, - { NI_Illegal, NI_Illegal }, // AVX_X64 - { NI_Illegal, NI_Illegal }, // AVX2_X64 - { NI_Illegal, NI_Illegal }, // AES_X64 - { FIRST_NI_BMI1_X64, LAST_NI_BMI1_X64 }, - { FIRST_NI_BMI2_X64, LAST_NI_BMI2_X64 }, - { NI_Illegal, NI_Illegal }, // FMA_X64 - { FIRST_NI_LZCNT_X64, LAST_NI_LZCNT_X64 }, - { NI_Illegal, NI_Illegal }, // PCLMULQDQ_X64 - { FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 }, - { NI_Illegal, NI_Illegal }, // AVXVNNI_X64 - { NI_Illegal, NI_Illegal }, // X86Serialize_X64 - { FIRST_NI_AVX512F_X64, LAST_NI_AVX512F_X64 }, - { NI_Illegal, NI_Illegal }, // AVX512BW_X64 - { NI_Illegal, NI_Illegal }, // AVX512CD_X64 - { NI_Illegal, NI_Illegal }, // AVX512DQ_X64 - { NI_Illegal, NI_Illegal }, // AVX512VBMI_X64 - { FIRST_NI_AVX10v1_X64, LAST_NI_AVX10v1_X64 }, - { NI_Illegal, NI_Illegal }, // AVX10v1_V512_X64 - { NI_Illegal, NI_Illegal }, // AVX10v2_X64 - { NI_Illegal, NI_Illegal }, // AVX10v2_V512_X64 - { NI_Illegal, NI_Illegal }, // GFNI_X64 + { FIRST_NI_X86Base, LAST_NI_X86Base }, // X86Base + { FIRST_NI_SSE3, LAST_NI_SSE3 }, // SSE3 + { FIRST_NI_SSSE3, LAST_NI_SSSE3 }, // SSSE3 + { FIRST_NI_SSE41, LAST_NI_SSE41 }, // SSE41 + { FIRST_NI_SSE42, LAST_NI_SSE42 }, // SSE42 + { FIRST_NI_POPCNT, LAST_NI_POPCNT }, // POPCNT + { FIRST_NI_AVX, LAST_NI_AVX }, // AVX + { FIRST_NI_AVX2, LAST_NI_AVX2 }, // AVX2 + { FIRST_NI_BMI1, LAST_NI_BMI1 }, // BMI1 + { FIRST_NI_BMI2, LAST_NI_BMI2 }, // BMI2 + { FIRST_NI_FMA, LAST_NI_FMA }, // FMA + { FIRST_NI_LZCNT, LAST_NI_LZCNT }, // LZCNT + { NI_Illegal, NI_Illegal }, // MOVBE + { FIRST_NI_AVX512, LAST_NI_AVX512 }, // AVX512 + { FIRST_NI_AVX512VBMI, LAST_NI_AVX512VBMI }, // AVX512VBMI + { NI_Illegal, NI_Illegal }, // AVX512v3 + { NI_Illegal, NI_Illegal }, // AVX10v1 + { FIRST_NI_AVX10v2, LAST_NI_AVX10v2 }, // AVX10v2 + { NI_Illegal, NI_Illegal }, // APX + { FIRST_NI_AES, LAST_NI_AES }, // AES + { FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ }, // PCLMULQDQ + { NI_Illegal, NI_Illegal }, // AVX512VP2INTERSECT + { NI_Illegal, NI_Illegal }, // AVXIFMA + { FIRST_NI_AVXVNNI, LAST_NI_AVXVNNI }, // AVXVNNI + { FIRST_NI_GFNI, LAST_NI_GFNI }, // GFNI + { FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 }, // GFNI_V256 + { FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 }, // GFNI_V512 + { NI_Illegal, NI_Illegal }, // SHA + { NI_Illegal, NI_Illegal }, // AES_V256 + { NI_Illegal, NI_Illegal }, // AES_V512 + { FIRST_NI_PCLMULQDQ_V256, LAST_NI_PCLMULQDQ_V256 }, // PCLMULQDQ_V256 + { FIRST_NI_PCLMULQDQ_V512, LAST_NI_PCLMULQDQ_V512 }, // PCLMULQDQ_V512 + { NI_Illegal, NI_Illegal }, // WAITPKG + { FIRST_NI_X86Serialize, LAST_NI_X86Serialize }, // X86Serialize + { FIRST_NI_Vector128, LAST_NI_Vector128 }, // Vector128 + { FIRST_NI_Vector256, LAST_NI_Vector256 }, // Vector256 + { FIRST_NI_Vector512, LAST_NI_Vector512 }, // Vector512 + { NI_Illegal, NI_Illegal }, // VectorT128 + { NI_Illegal, NI_Illegal }, // VectorT256 + { NI_Illegal, NI_Illegal }, // VectorT512 + + { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, // X86Base_X64 + { NI_Illegal, NI_Illegal }, // SSE3_X64 + { NI_Illegal, NI_Illegal }, // SSSE3_X64 + { FIRST_NI_SSE41_X64, LAST_NI_SSE41_X64 }, // SSE41_X64 + { FIRST_NI_SSE42_X64, LAST_NI_SSE42_X64 }, // SSE42_X64 + { FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 }, // POPCNT_X64 + { NI_Illegal, NI_Illegal }, // AVX_X64 + { NI_Illegal, NI_Illegal }, // AVX2_X64 + { FIRST_NI_BMI1_X64, LAST_NI_BMI1_X64 }, // BMI1_X64 + { FIRST_NI_BMI2_X64, LAST_NI_BMI2_X64 }, // BMI2_X64 + { NI_Illegal, NI_Illegal }, // FMA_X64 + { FIRST_NI_LZCNT_X64, LAST_NI_LZCNT_X64 }, // LZCNT_X64 + { FIRST_NI_AVX512_X64, LAST_NI_AVX512_X64 }, // AVX512_X64 + { NI_Illegal, NI_Illegal }, // AVX512VBMI_X64 + { NI_Illegal, NI_Illegal }, // AVX512v3_X64 + { NI_Illegal, NI_Illegal }, // AVX10v1_X64 + { NI_Illegal, NI_Illegal }, // AVX10v2_X64 + { NI_Illegal, NI_Illegal }, // AES_X64 + { NI_Illegal, NI_Illegal }, // PCLMULQDQ_X64 + { NI_Illegal, NI_Illegal }, // AVX512VP2INTERSECT_X64 + { NI_Illegal, NI_Illegal }, // AVXIFMA_X64 + { NI_Illegal, NI_Illegal }, // AVXVNNI_X64 + { NI_Illegal, NI_Illegal }, // GFNI_X64 + { NI_Illegal, NI_Illegal }, // SHA_X64 + { NI_Illegal, NI_Illegal }, // WAITPKG_X64 + { NI_Illegal, NI_Illegal }, // X86Serialize_X64 #elif defined (TARGET_ARM64) - { FIRST_NI_ArmBase, LAST_NI_ArmBase }, - { FIRST_NI_AdvSimd, LAST_NI_AdvSimd }, - { FIRST_NI_Aes, LAST_NI_Aes }, - { FIRST_NI_Crc32, LAST_NI_Crc32 }, - { FIRST_NI_Dp, LAST_NI_Dp }, - { FIRST_NI_Rdm, LAST_NI_Rdm }, - { FIRST_NI_Sha1, LAST_NI_Sha1 }, - { FIRST_NI_Sha256, LAST_NI_Sha256 }, - { NI_Illegal, NI_Illegal }, // Atomics - { FIRST_NI_Vector64, LAST_NI_Vector64 }, - { FIRST_NI_Vector128, LAST_NI_Vector128 }, - { NI_Illegal, NI_Illegal }, // Dczva - { NI_Illegal, NI_Illegal }, // Rcpc - { NI_Illegal, NI_Illegal }, // VectorT128 - { NI_Illegal, NI_Illegal }, // Rcpc2 - { FIRST_NI_Sve, LAST_NI_Sve }, - { FIRST_NI_ArmBase_Arm64, LAST_NI_ArmBase_Arm64 }, - { FIRST_NI_AdvSimd_Arm64, LAST_NI_AdvSimd_Arm64 }, - { NI_Illegal, NI_Illegal }, // Aes_Arm64 - { FIRST_NI_Crc32_Arm64, LAST_NI_Crc32_Arm64 }, - { NI_Illegal, NI_Illegal }, // Dp_Arm64 - { FIRST_NI_Rdm_Arm64, LAST_NI_Rdm_Arm64 }, - { NI_Illegal, NI_Illegal }, // Sha1_Arm64 - { NI_Illegal, NI_Illegal }, // Sha256_Arm64 - { NI_Illegal, NI_Illegal }, // Sve_Arm64 + { FIRST_NI_ArmBase, LAST_NI_ArmBase }, // ArmBase + { FIRST_NI_AdvSimd, LAST_NI_AdvSimd }, // AdvSimd + { FIRST_NI_Aes, LAST_NI_Aes }, // Aes + { FIRST_NI_Crc32, LAST_NI_Crc32 }, // Crc32 + { FIRST_NI_Dp, LAST_NI_Dp }, // Dp + { FIRST_NI_Rdm, LAST_NI_Rdm }, // Rdm + { FIRST_NI_Sha1, LAST_NI_Sha1 }, // Sha1 + { FIRST_NI_Sha256, LAST_NI_Sha256 }, // Sha256 + { NI_Illegal, NI_Illegal }, // Atomics + { FIRST_NI_Vector64, LAST_NI_Vector64 }, // Vector64 + { FIRST_NI_Vector128, LAST_NI_Vector128 }, // Vector128 + { NI_Illegal, NI_Illegal }, // Dczva + { NI_Illegal, NI_Illegal }, // Rcpc + { NI_Illegal, NI_Illegal }, // VectorT128 + { NI_Illegal, NI_Illegal }, // Rcpc2 + { FIRST_NI_Sve, LAST_NI_Sve }, // Sve + { FIRST_NI_Sve2, LAST_NI_Sve2 }, // Sve2 + { FIRST_NI_ArmBase_Arm64, LAST_NI_ArmBase_Arm64 }, // ArmBase_Arm64 + { FIRST_NI_AdvSimd_Arm64, LAST_NI_AdvSimd_Arm64 }, // AdvSimd_Arm64 + { NI_Illegal, NI_Illegal }, // Aes_Arm64 + { FIRST_NI_Crc32_Arm64, LAST_NI_Crc32_Arm64 }, // Crc32_Arm64 + { NI_Illegal, NI_Illegal }, // Dp_Arm64 + { FIRST_NI_Rdm_Arm64, LAST_NI_Rdm_Arm64 }, // Rdm_Arm64 + { NI_Illegal, NI_Illegal }, // Sha1_Arm64 + { NI_Illegal, NI_Illegal }, // Sha256_Arm64 + { NI_Illegal, NI_Illegal }, // Sve_Arm64 + { NI_Illegal, NI_Illegal }, // Sve2_Arm64 #else #error Unsupported platform #endif @@ -966,6 +1097,75 @@ static void ValidateHWIntrinsicIsaRangeArray() } #endif +//------------------------------------------------------------------------ +// binarySearchId: Does a binary search through a given ISA for the NamedIntrinsic matching a given name +// +// Arguments: +// isa -- The instruction set to search +// sig -- The signature of the intrinsic +// methodName -- The name of the method associated with the HWIntrinsic to lookup +// isLimitedVector256Isa -- true if Vector256 has limited acceleration support +// +// Return Value: +// The NamedIntrinsic associated with methodName and isa +static NamedIntrinsic binarySearchId(CORINFO_InstructionSet isa, + CORINFO_SIG_INFO* sig, + const char* methodName, + bool isLimitedVector256Isa) +{ + size_t isaIndex = static_cast(isa) - 1; + assert(isaIndex < ARRAY_SIZE(hwintrinsicIsaRangeArray)); + + const HWIntrinsicIsaRange& isaRange = hwintrinsicIsaRangeArray[isaIndex]; + + if (isaRange.FirstId == NI_Illegal) + { + return NI_Illegal; + } + + size_t rangeLower = isaRange.FirstId; + size_t rangeUpper = isaRange.LastId; + + while (rangeLower <= rangeUpper) + { + // This is safe since rangeLower and rangeUpper will never be negative + size_t rangeIndex = (rangeUpper + rangeLower) / 2; + + NamedIntrinsic ni = static_cast(rangeIndex); + const HWIntrinsicInfo& intrinsicInfo = HWIntrinsicInfo::lookup(ni); + + int sortOrder = strcmp(methodName, intrinsicInfo.name); + + if (sortOrder < 0) + { + rangeUpper = rangeIndex - 1; + } + else if (sortOrder > 0) + { + rangeLower = rangeIndex + 1; + } + else + { + assert(sortOrder == 0); + assert((intrinsicInfo.numArgs == -1) || (sig->numArgs == static_cast(intrinsicInfo.numArgs))); + +#if defined(TARGET_XARCH) + // on AVX1-only CPUs we only support a subset of intrinsics in Vector256 + if (isLimitedVector256Isa && !HWIntrinsicInfo::AvxOnlyCompatible(ni)) + { + return NI_Illegal; + } +#endif // TARGET_XARCH + + return ni; + } + } + + // There are several helper intrinsics that are implemented in managed code + // Those intrinsics will hit this code path and need to return NI_Illegal + return NI_Illegal; +} + //------------------------------------------------------------------------ // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // @@ -1008,7 +1208,8 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, return NI_Illegal; } - bool isIsaSupported = comp->compSupportsHWIntrinsic(isa); + bool isHWIntrinsicEnabled = (JitConfig.EnableHWIntrinsic() != 0); + bool isIsaSupported = isHWIntrinsicEnabled && comp->compSupportsHWIntrinsic(isa); bool isHardwareAcceleratedProp = false; bool isSupportedProp = false; uint32_t vectorByteLength = 0; @@ -1035,7 +1236,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, if (isa == InstructionSet_Vector128) { - isa = InstructionSet_SSE2; + isa = InstructionSet_X86Base; vectorByteLength = 16; } else if (isa == InstructionSet_Vector256) @@ -1045,7 +1246,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } else if (isa == InstructionSet_Vector512) { - isa = InstructionSet_AVX512F; + isa = InstructionSet_AVX512; vectorByteLength = 64; } else @@ -1089,7 +1290,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, { return NI_IsSupported_True; } - else + else if (isSupportedProp) { assert(comp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)); return NI_IsSupported_Dynamic; @@ -1113,7 +1314,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, if (isa == InstructionSet_Vector128) { - if (!comp->IsBaselineSimdIsaSupported()) + if (!isHWIntrinsicEnabled) { return NI_Illegal; } @@ -1135,7 +1336,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } else if (isa == InstructionSet_Vector512) { - if (!comp->IsBaselineVector512IsaSupportedOpportunistically()) + if (!comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { return NI_Illegal; } @@ -1143,64 +1344,36 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, #elif defined(TARGET_ARM64) else if (isa == InstructionSet_Vector64) { - if (!comp->IsBaselineSimdIsaSupported()) + if (!isHWIntrinsicEnabled) { return NI_Illegal; } } #endif - size_t isaIndex = static_cast(isa) - 1; - assert(isaIndex < ARRAY_SIZE(hwintrinsicIsaRangeArray)); - - const HWIntrinsicIsaRange& isaRange = hwintrinsicIsaRangeArray[isaIndex]; - - if (isaRange.FirstId == NI_Illegal) - { - return NI_Illegal; - } - - size_t rangeLower = isaRange.FirstId; - size_t rangeUpper = isaRange.LastId; +#if defined(TARGET_XARCH) + // AVX10v1 is a strict superset of all AVX512 ISAs + // + // The original design was that it exposed the AVX512VL instructions without requiring V512 support + // however, later iterations changed this and it is now just a unifying ISA instead - while (rangeLower <= rangeUpper) + if (isa == InstructionSet_AVX10v1) { - // This is safe since rangeLower and rangeUpper will never be negative - size_t rangeIndex = (rangeUpper + rangeLower) / 2; - - NamedIntrinsic ni = static_cast(rangeIndex); - const HWIntrinsicInfo& intrinsicInfo = HWIntrinsicInfo::lookup(ni); - - int sortOrder = strcmp(methodName, intrinsicInfo.name); + NamedIntrinsic ni = binarySearchId(InstructionSet_AVX512, sig, methodName, isLimitedVector256Isa); - if (sortOrder < 0) - { - rangeUpper = rangeIndex - 1; - } - else if (sortOrder > 0) - { - rangeLower = rangeIndex + 1; - } - else + if (ni != NI_Illegal) { - assert(sortOrder == 0); - assert((intrinsicInfo.numArgs == -1) || (sig->numArgs == static_cast(intrinsicInfo.numArgs))); - -#if defined(TARGET_XARCH) - // on AVX1-only CPUs we only support a subset of intrinsics in Vector256 - if (isLimitedVector256Isa && !AvxOnlyCompatible(ni)) - { - return NI_Illegal; - } -#endif // TARGET_XARCH - return ni; } + return binarySearchId(InstructionSet_AVX512VBMI, sig, methodName, isLimitedVector256Isa); + } + else if (isa == InstructionSet_AVX10v1_X64) + { + return binarySearchId(InstructionSet_AVX512_X64, sig, methodName, isLimitedVector256Isa); } +#endif // TARGET_XARCH - // There are several helper intrinsics that are implemented in managed code - // Those intrinsics will hit this code path and need to return NI_Illegal - return NI_Illegal; + return binarySearchId(isa, sig, methodName, isLimitedVector256Isa); } //------------------------------------------------------------------------ @@ -1843,6 +2016,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, if (simdBaseJitType != CORINFO_TYPE_UNDEF) { simdBaseType = JitType2PreciseVarType(simdBaseJitType); + #ifdef TARGET_XARCH if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic) && varTypeIsSmall(simdBaseType)) { @@ -1991,7 +2165,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, if (!isScalar) { - if (HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType) == INS_invalid) + if (HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, this) == INS_invalid) { assert(!"Unexpected HW intrinsic"); return nullptr; @@ -2058,7 +2232,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { // Although the API specifies a pointer, if what we have is a BYREF, that's what // we really want, so throw away the cast. - if (op1->gtGetOp1()->TypeGet() == TYP_BYREF) + if (op1->gtGetOp1()->TypeIs(TYP_BYREF)) { op1 = op1->gtGetOp1(); } @@ -2078,9 +2252,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case NI_AVX2_ConvertToVector256Int16: case NI_AVX2_ConvertToVector256Int32: case NI_AVX2_ConvertToVector256Int64: - case NI_AVX2_BroadcastVector128ToVector256: - case NI_AVX512F_BroadcastVector128ToVector512: - case NI_AVX512F_BroadcastVector256ToVector512: { // These intrinsics have both pointer and vector overloads // We want to be able to differentiate between them so lets @@ -2205,7 +2376,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { // Although the API specifies a pointer, if what we have is a BYREF, that's what // we really want, so throw away the cast. - if (op1->gtGetOp1()->TypeGet() == TYP_BYREF) + if (op1->gtGetOp1()->TypeIs(TYP_BYREF)) { op1 = op1->gtGetOp1(); } diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index d8bf386eb600..8523bf0befb6 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -6,6 +6,8 @@ #ifdef FEATURE_HW_INTRINSICS +class Compiler; + #ifdef TARGET_XARCH enum HWIntrinsicCategory : uint8_t { @@ -162,14 +164,12 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic is a PermuteVar2x intrinsic HW_Flag_PermuteVar2x = 0x400000, - // The intrinsic is an embedded broadcast compatible intrinsic - HW_Flag_EmbBroadcastCompatible = 0x800000, + // UNUSED = 0x800000, // The intrinsic is an embedded rounding compatible intrinsic HW_Flag_EmbRoundingCompatible = 0x1000000, - // The intrinsic is an embedded masking compatible intrinsic - HW_Flag_EmbMaskingCompatible = 0x2000000, + // UNUSED = 0x2000000, // The base type of this intrinsic needs to be normalized to int/uint unless it is long/ulong. HW_Flag_NormalizeSmallTypeToInt = 0x4000000, @@ -233,6 +233,11 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic is a reduce operation. HW_Flag_ReduceOperation = 0x2000000, + // This intrinsic could be implemented with another intrinsic when it is operating on operands that are all of + // type TYP_MASK, and this other intrinsic will produces a value of this type. Used in morph to convert vector + // operations into mask operations when the intrinsic is operating on mask vectors (mainly bitwise operations). + HW_Flag_HasAllMaskVariant = 0x4000000, + #else #error Unsupported platform #endif @@ -552,6 +557,26 @@ struct HWIntrinsicInfo FloatComparisonMode comparison, var_types simdBaseType, unsigned simdSize); + + //------------------------------------------------------------------------ + // genIsTableDrivenHWIntrinsic: + // + // Arguments: + // intrinsicId - The identifier for the hwintrinsic to check + // category - The category of intrinsicId + // + // Return Value: + // returns true if this category can be table-driven in CodeGen + // + static bool genIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicCategory category) + { + // TODO - make more categories to the table-driven framework + // HW_Category_Helper and HW_Flag_SpecialCodeGen usually need manual codegen + const bool tableDrivenCategory = + (category != HW_Category_Special) && (category != HW_Category_Scalar) && (category != HW_Category_Helper); + const bool tableDrivenFlag = !HWIntrinsicInfo::HasSpecialCodegen(intrinsicId); + return tableDrivenCategory && tableDrivenFlag; + } #endif // Member lookup @@ -594,19 +619,9 @@ struct HWIntrinsicInfo return lookup(id).numArgs; } - static instruction lookupIns(NamedIntrinsic id, var_types type) - { - if ((type < TYP_BYTE) || (type > TYP_DOUBLE)) - { - assert(!"Unexpected type"); - return INS_invalid; - } - - uint16_t result = lookup(id).ins[type - TYP_BYTE]; - return static_cast(result); - } + static instruction lookupIns(NamedIntrinsic id, var_types type, Compiler* comp); - static instruction lookupIns(GenTreeHWIntrinsic* intrinsicNode) + static instruction lookupIns(GenTreeHWIntrinsic* intrinsicNode, Compiler* comp) { assert(intrinsicNode != nullptr); @@ -622,7 +637,7 @@ struct HWIntrinsicInfo type = intrinsicNode->GetSimdBaseType(); } - return lookupIns(intrinsic, type); + return lookupIns(intrinsic, type, comp); } static HWIntrinsicCategory lookupCategory(NamedIntrinsic id) @@ -644,23 +659,11 @@ struct HWIntrinsicInfo } #if defined(TARGET_XARCH) - static bool IsEmbBroadcastCompatible(NamedIntrinsic id) - { - HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_EmbBroadcastCompatible) != 0; - } - static bool IsEmbRoundingCompatible(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); return (flags & HW_Flag_EmbRoundingCompatible) != 0; } - - static bool IsEmbMaskingCompatible(NamedIntrinsic id) - { - HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_EmbMaskingCompatible) != 0; - } #endif // TARGET_XARCH static bool CanBenefitFromConstantProp(NamedIntrinsic id) @@ -902,19 +905,12 @@ struct HWIntrinsicInfo #ifdef TARGET_XARCH switch (id) { + case NI_AVX2_ShiftLeftLogicalVariable: case NI_AVX2_ShiftRightArithmeticVariable: - case NI_AVX512F_ShiftRightArithmeticVariable: - case NI_AVX512F_VL_ShiftRightArithmeticVariable: - case NI_AVX512BW_ShiftRightArithmeticVariable: - case NI_AVX512BW_VL_ShiftRightArithmeticVariable: - case NI_AVX10v1_ShiftRightArithmeticVariable: case NI_AVX2_ShiftRightLogicalVariable: - case NI_AVX512F_ShiftRightLogicalVariable: - case NI_AVX512BW_ShiftRightLogicalVariable: - case NI_AVX512BW_VL_ShiftRightLogicalVariable: - case NI_AVX10v1_ShiftRightLogicalVariable: - case NI_AVX2_ShiftLeftLogicalVariable: - case NI_AVX512BW_VL_ShiftLeftLogicalVariable: + case NI_AVX512_ShiftLeftLogicalVariable: + case NI_AVX512_ShiftRightArithmeticVariable: + case NI_AVX512_ShiftRightLogicalVariable: return true; default: return false; @@ -923,6 +919,96 @@ struct HWIntrinsicInfo return false; } + static bool IsVectorCreate(NamedIntrinsic id) + { + switch (id) + { +#if defined(TARGET_ARM64) + case NI_Vector64_Create: +#endif // TARGET_ARM64 + case NI_Vector128_Create: +#if defined(TARGET_XARCH) + case NI_Vector256_Create: + case NI_Vector512_Create: +#endif // TARGET_XARCH + return true; + default: + return false; + } + } + + static bool IsVectorCreateScalar(NamedIntrinsic id) + { + switch (id) + { +#if defined(TARGET_ARM64) + case NI_Vector64_CreateScalar: +#endif // TARGET_ARM64 + case NI_Vector128_CreateScalar: +#if defined(TARGET_XARCH) + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: +#endif // TARGET_XARCH + return true; + default: + return false; + } + } + + static bool IsVectorCreateScalarUnsafe(NamedIntrinsic id) + { + switch (id) + { +#if defined(TARGET_ARM64) + case NI_Vector64_CreateScalarUnsafe: +#endif // TARGET_ARM64 + case NI_Vector128_CreateScalarUnsafe: +#if defined(TARGET_XARCH) + case NI_Vector256_CreateScalarUnsafe: + case NI_Vector512_CreateScalarUnsafe: +#endif // TARGET_XARCH + return true; + default: + return false; + } + } + + static bool IsVectorGetElement(NamedIntrinsic id) + { + switch (id) + { +#if defined(TARGET_ARM64) + case NI_Vector64_GetElement: +#endif // TARGET_ARM64 + case NI_Vector128_GetElement: +#if defined(TARGET_XARCH) + case NI_Vector256_GetElement: + case NI_Vector512_GetElement: +#endif // TARGET_XARCH + return true; + default: + return false; + } + } + + static bool IsVectorToScalar(NamedIntrinsic id) + { + switch (id) + { +#if defined(TARGET_ARM64) + case NI_Vector64_ToScalar: +#endif // TARGET_ARM64 + case NI_Vector128_ToScalar: +#if defined(TARGET_XARCH) + case NI_Vector256_ToScalar: + case NI_Vector512_ToScalar: +#endif // TARGET_XARCH + return true; + default: + return false; + } + } + static bool HasImmediateOperand(NamedIntrinsic id) { #if defined(TARGET_ARM64) @@ -1043,6 +1129,67 @@ struct HWIntrinsicInfo } } +#ifdef FEATURE_MASKED_HW_INTRINSICS + // HasAllMaskVariant: Does the intrinsic have an intrinsic variant that operates on mask types? + // + // Arguments: + // id -- the intrinsic to check for a mask-type variant. + // + // Return Value: + // true when the intrinsic has a mask-type variant, else false + // + static bool HasAllMaskVariant(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_HasAllMaskVariant) != 0; + } + + // GetMaskVariant: Given an intrinsic that has a variant that operates on mask types, return the ID of + // this variant intrinsic. Call HasAllMaskVariant before using this function, as it will + // assert if no match is found. + // + // Arguments: + // id -- the intrinsic with a mask-type variant. + // + // Return Value: + // The ID of the mask-type variant for the given intrinsic + // + static NamedIntrinsic GetMaskVariant(NamedIntrinsic id) + { + assert(HasAllMaskVariant(id)); + switch (id) + { + case NI_Sve_And: + return NI_Sve_And_Predicates; + case NI_Sve_BitwiseClear: + return NI_Sve_BitwiseClear_Predicates; + case NI_Sve_Xor: + return NI_Sve_Xor_Predicates; + case NI_Sve_Or: + return NI_Sve_Or_Predicates; + case NI_Sve_ZipHigh: + return NI_Sve_ZipHigh_Predicates; + case NI_Sve_ZipLow: + return NI_Sve_ZipLow_Predicates; + case NI_Sve_UnzipOdd: + return NI_Sve_UnzipOdd_Predicates; + case NI_Sve_UnzipEven: + return NI_Sve_UnzipEven_Predicates; + case NI_Sve_TransposeEven: + return NI_Sve_TransposeEven_Predicates; + case NI_Sve_TransposeOdd: + return NI_Sve_TransposeOdd_Predicates; + case NI_Sve_ReverseElement: + return NI_Sve_ReverseElement_Predicates; + case NI_Sve_ConditionalSelect: + return NI_Sve_ConditionalSelect_Predicates; + + default: + unreached(); + } + } +#endif // FEATURE_MASKED_HW_INTRINSICS + #endif // TARGET_ARM64 static bool HasSpecialSideEffect(NamedIntrinsic id) @@ -1081,11 +1228,6 @@ struct HWIntrinsicInfo HWIntrinsicFlag flags = lookupFlags(id); return (flags & HW_Flag_PermuteVar2x) != 0; } - - static bool IsTernaryLogic(NamedIntrinsic id) - { - return (id == NI_AVX512F_TernaryLogic) || (id == NI_AVX512F_VL_TernaryLogic) || (id == NI_AVX10v1_TernaryLogic); - } #endif // TARGET_XARCH #if defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index be1c577d4bdf..b87aa304a22f 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -36,6 +36,8 @@ static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_Rdm_Arm64; case InstructionSet_Sve: return InstructionSet_Sve_Arm64; + case InstructionSet_Sve2: + return InstructionSet_Sve2_Arm64; default: return InstructionSet_NONE; } @@ -99,6 +101,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_Sha256; } + if (strcmp(className, "Sve2") == 0) + { + return InstructionSet_Sve2; + } if (strcmp(className, "Sve") == 0) { return InstructionSet_Sve; @@ -212,6 +218,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_Sha256_Arm64: case InstructionSet_Sve: case InstructionSet_Sve_Arm64: + case InstructionSet_Sve2: + case InstructionSet_Sve2_Arm64: case InstructionSet_Vector64: case InstructionSet_Vector128: return true; @@ -500,10 +508,10 @@ void HWIntrinsicInfo::lookupImmBounds( case NI_Sve_GatherPrefetch16Bit: case NI_Sve_GatherPrefetch32Bit: case NI_Sve_GatherPrefetch64Bit: - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: immLowerBound = (int)SVE_PRFOP_PLDL1KEEP; immUpperBound = (int)SVE_PRFOP_CONST15; break; @@ -709,6 +717,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector64_AddSaturate: + case NI_Vector128_AddSaturate: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (varTypeIsFloating(simdBaseType)) + { + retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); + } + else + { + intrinsic = NI_AdvSimd_AddSaturate; + + if ((simdSize == 8) && varTypeIsLong(simdBaseType)) + { + intrinsic = NI_AdvSimd_AddSaturateScalar; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + break; + } + case NI_AdvSimd_BitwiseClear: case NI_Vector64_AndNot: case NI_Vector128_AndNot: @@ -2114,6 +2148,39 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector64_NarrowWithSaturation: + case NI_Vector128_NarrowWithSaturation: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (varTypeIsFloating(simdBaseType)) + { + retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseJitType, simdSize); + } + else if (simdSize == 16) + { + intrinsic = NI_AdvSimd_ExtractNarrowingSaturateLower; + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, intrinsic, simdBaseJitType, 8); + + intrinsic = NI_AdvSimd_ExtractNarrowingSaturateUpper; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + else + { + intrinsic = NI_Vector64_ToVector128Unsafe; + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, simdBaseJitType, simdSize); + + op1 = gtNewSimdWithUpperNode(TYP_SIMD16, op1, op2, simdBaseJitType, 16); + + intrinsic = NI_AdvSimd_ExtractNarrowingSaturateLower; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); + } + break; + } + case NI_Vector64_op_UnaryNegation: case NI_Vector128_op_UnaryNegation: { @@ -2251,38 +2318,56 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_Shuffle: case NI_Vector128_Shuffle: + case NI_Vector64_ShuffleNative: + case NI_Vector128_ShuffleNative: + case NI_Vector64_ShuffleNativeFallback: + case NI_Vector128_ShuffleNativeFallback: { assert((sig->numArgs == 2) || (sig->numArgs == 3)); assert((simdSize == 8) || (simdSize == 16)); + // The Native variants are non-deterministic on arm64 (for element size > 1) + bool isShuffleNative = (intrinsic != NI_Vector64_Shuffle) && (intrinsic != NI_Vector128_Shuffle); + if (isShuffleNative && (genTypeSize(simdBaseType) > 1) && BlockNonDeterministicIntrinsics(mustExpand)) + { + break; + } + GenTree* indices = impStackTop(0).val; - if (!indices->IsCnsVec() || !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType)) + // Check if the required intrinsics to emit are available. + bool canBecomeValidForShuffle = false; + if (!IsValidForShuffle(indices, simdSize, simdBaseType, &canBecomeValidForShuffle, isShuffleNative)) + { + // All cases on arm64 are either valid or invalid, they cannot become valid later + assert(!canBecomeValidForShuffle); + break; + } + + // If the indices might become constant later, then we don't emit for now, delay until later. + if (!indices->IsCnsVec()) { assert(sig->numArgs == 2); - if (!opts.OptimizationEnabled()) + if (opts.OptimizationEnabled()) { // Only enable late stage rewriting if optimizations are enabled // as we won't otherwise encounter a constant at the later point - return nullptr; - } + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; + retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); + break; + } } if (sig->numArgs == 2) { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize); + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isShuffleNative); } break; } @@ -2310,7 +2395,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = impPopStack().val; - if (op2->TypeGet() == TYP_STRUCT) + if (op2->TypeIs(TYP_STRUCT)) { info.compNeedsConsecutiveRegisters = true; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); @@ -2330,7 +2415,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { // Although the API specifies a pointer, if what we have is a BYREF, that's what // we really want, so throw away the cast. - if (op1->gtGetOp1()->TypeGet() == TYP_BYREF) + if (op1->gtGetOp1()->TypeIs(TYP_BYREF)) { op1 = op1->gtGetOp1(); } @@ -2340,7 +2425,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } else { - if (op2->TypeGet() == TYP_SIMD16) + if (op2->TypeIs(TYP_SIMD16)) { // Update the simdSize explicitly as Vector128 variant of Store() is present in AdvSimd instead of // AdvSimd.Arm64. @@ -2478,12 +2563,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass); - assert(op2->TypeGet() == TYP_STRUCT); + assert(op2->TypeIs(TYP_STRUCT)); if (op1->OperIs(GT_CAST)) { // Although the API specifies a pointer, if what we have is a BYREF, that's what // we really want, so throw away the cast. - if (op1->gtGetOp1()->TypeGet() == TYP_BYREF) + if (op1->gtGetOp1()->TypeIs(TYP_BYREF)) { op1 = op1->gtGetOp1(); } @@ -2511,7 +2596,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 3); assert(retType == TYP_VOID); - if (!mustExpand && !impStackTop(0).val->IsCnsIntOrI() && (impStackTop(1).val->TypeGet() == TYP_STRUCT)) + if (!mustExpand && !impStackTop(0).val->IsCnsIntOrI() && impStackTop(1).val->TypeIs(TYP_STRUCT)) { // TODO-ARM64-CQ: Support rewriting nodes that involves // GenTreeFieldList as user calls during rationalization @@ -2531,7 +2616,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, int immLowerBound = 0; int immUpperBound = 0; - if (op2->TypeGet() == TYP_STRUCT) + if (op2->TypeIs(TYP_STRUCT)) { info.compNeedsConsecutiveRegisters = true; intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalar : NI_AdvSimd_Arm64_StoreSelectedScalar; @@ -2562,7 +2647,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { // Although the API specifies a pointer, if what we have is a BYREF, that's what // we really want, so throw away the cast. - if (op1->gtGetOp1()->TypeGet() == TYP_BYREF) + if (op1->gtGetOp1()->TypeIs(TYP_BYREF)) { op1 = op1->gtGetOp1(); } @@ -2572,6 +2657,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector64_SubtractSaturate: + case NI_Vector128_SubtractSaturate: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (varTypeIsFloating(simdBaseType)) + { + retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); + } + else + { + intrinsic = NI_AdvSimd_SubtractSaturate; + + if ((simdSize == 8) && varTypeIsLong(simdBaseType)) + { + intrinsic = NI_AdvSimd_SubtractSaturateScalar; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + break; + } + case NI_Vector64_Sum: case NI_Vector128_Sum: { @@ -2726,7 +2837,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { // Although the API specifies a pointer, if what we have is a BYREF, that's what // we really want, so throw away the cast. - if (op1->gtGetOp1()->TypeGet() == TYP_BYREF) + if (op1->gtGetOp1()->TypeIs(TYP_BYREF)) { op1 = op1->gtGetOp1(); } @@ -2754,7 +2865,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { // Although the API specifies a pointer, if what we have is a BYREF, that's what // we really want, so throw away the cast. - if (op2->gtGetOp1()->TypeGet() == TYP_BYREF) + if (op2->gtGetOp1()->TypeIs(TYP_BYREF)) { op2 = op2->gtGetOp1(); } @@ -2800,14 +2911,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { // Although the API specifies a pointer, if what we have is a BYREF, that's what // we really want, so throw away the cast. - if (op3->gtGetOp1()->TypeGet() == TYP_BYREF) + if (op3->gtGetOp1()->TypeIs(TYP_BYREF)) { op3 = op3->gtGetOp1(); } } assert(HWIntrinsicInfo::IsMultiReg(intrinsic)); - assert(op1->TypeGet() == TYP_STRUCT); + assert(op1->TypeIs(TYP_STRUCT)); info.compNeedsConsecutiveRegisters = true; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); @@ -2840,7 +2951,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); op1 = impPopStack().val; - if (op1->TypeGet() == TYP_STRUCT) + if (op1->TypeIs(TYP_STRUCT)) { info.compNeedsConsecutiveRegisters = true; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); @@ -2880,7 +2991,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impPopStack().val; - if (op2->TypeGet() == TYP_STRUCT) + if (op2->TypeIs(TYP_STRUCT)) { info.compNeedsConsecutiveRegisters = true; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); @@ -2919,7 +3030,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op3 = impPopStack().val; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); - if (op3->TypeGet() == TYP_STRUCT) + if (op3->TypeIs(TYP_STRUCT)) { info.compNeedsConsecutiveRegisters = true; switch (fieldCount) @@ -3071,10 +3182,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Sve_GatherPrefetch16Bit: case NI_Sve_GatherPrefetch32Bit: case NI_Sve_GatherPrefetch64Bit: - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: { assert((sig->numArgs == 3) || (sig->numArgs == 4)); assert(!isScalar); @@ -3173,6 +3284,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Sve_ExtractAfterLastActiveElementScalar: + case NI_Sve_ExtractLastActiveElementScalar: + { + assert(sig->numArgs == 2); + +#ifdef DEBUG + isValidScalarIntrinsic = true; +#endif + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + CorInfoType op2BaseJitType = getBaseJitTypeOfSIMDType(argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); + + retNode = gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic); + + retNode->AsHWIntrinsic()->SetSimdBaseJitType(simdBaseJitType); + break; + } + case NI_Sve_MultiplyAddRotateComplexBySelectedScalar: { assert(sig->numArgs == 5); @@ -3230,7 +3367,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// gtNewSimdEmbeddedMaskNode: Create an embedded mask +// gtNewSimdAllTrueMaskNode: Create an embedded mask with all bits set to true // // Arguments: // simdBaseJitType -- the base jit type of the nodes being masked @@ -3244,4 +3381,18 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize); } +//------------------------------------------------------------------------ +// gtNewSimdFalseMaskByteNode: Create an embedded mask with all bits set to false +// +// Arguments: +// simdSize -- the simd size of the nodes being masked +// +// Return Value: +// The mask +// +GenTree* Compiler::gtNewSimdFalseMaskByteNode(unsigned simdSize) +{ + return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_UBYTE, simdSize); +} + #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 2751839a89b5..e83fb55e1a18 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -359,7 +359,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (intrin.codeGenIsTableDriven()) { - const instruction ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType); + const instruction ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType, compiler); assert(ins != INS_invalid); if (intrin.category == HW_Category_SIMDByIndexedElement) @@ -459,8 +459,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // Get the registers and intrinsics that needs embedded mask const HWIntrinsic intrinEmbMask(op2->AsHWIntrinsic()); - instruction insEmbMask = HWIntrinsicInfo::lookupIns(intrinEmbMask.id, intrinEmbMask.baseType); - const bool instrIsRMW = op2->isRMWHWIntrinsic(compiler); + instruction insEmbMask = HWIntrinsicInfo::lookupIns(intrinEmbMask.id, intrinEmbMask.baseType, compiler); + const bool instrIsRMW = op2->isRMWHWIntrinsic(compiler); regNumber maskReg = op1Reg; regNumber embMaskOp1Reg = REG_NA; @@ -702,6 +702,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Sve_And_Predicates: + case NI_Sve_BitwiseClear_Predicates: + case NI_Sve_Or_Predicates: + case NI_Sve_Xor_Predicates: + GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, + embMaskOp2Reg, INS_OPTS_SCALABLE_B); + break; + default: { GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, @@ -1158,26 +1166,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { case NI_AdvSimd_AddWideningLower: assert(varTypeIsIntegral(intrin.baseType)); - if (intrin.op1->TypeGet() == TYP_SIMD8) + if (intrin.op1->TypeIs(TYP_SIMD8)) { ins = varTypeIsUnsigned(intrin.baseType) ? INS_uaddl : INS_saddl; } else { - assert(intrin.op1->TypeGet() == TYP_SIMD16); + assert(intrin.op1->TypeIs(TYP_SIMD16)); ins = varTypeIsUnsigned(intrin.baseType) ? INS_uaddw : INS_saddw; } break; case NI_AdvSimd_SubtractWideningLower: assert(varTypeIsIntegral(intrin.baseType)); - if (intrin.op1->TypeGet() == TYP_SIMD8) + if (intrin.op1->TypeIs(TYP_SIMD8)) { ins = varTypeIsUnsigned(intrin.baseType) ? INS_usubl : INS_ssubl; } else { - assert(intrin.op1->TypeGet() == TYP_SIMD16); + assert(intrin.op1->TypeIs(TYP_SIMD16)); ins = varTypeIsUnsigned(intrin.baseType) ? INS_usubw : INS_ssubw; } break; @@ -1221,11 +1229,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Sve_StoreNarrowing: - ins = HWIntrinsicInfo::lookupIns(intrin.id, node->GetAuxiliaryType()); + ins = HWIntrinsicInfo::lookupIns(intrin.id, node->GetAuxiliaryType(), compiler); break; default: - ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType); + ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType, compiler); break; } @@ -1778,10 +1786,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: { assert(hasImmediateOperand); HWIntrinsicImmOpHelper helper(this, intrin.op3, node); @@ -2478,6 +2486,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_CreateBreakAfterPropagateMask: case NI_Sve_CreateBreakBeforePropagateMask: + case NI_Sve_ConditionalSelect_Predicates: { GetEmitter()->emitInsSve_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, INS_OPTS_SCALABLE_B); break; @@ -2561,13 +2570,42 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(varTypeIsFloating(node->gtType) || varTypeIsSIMD(node->gtType)); assert((targetReg == op2Reg) || (targetReg != op1Reg)); assert((targetReg == op2Reg) || (targetReg != op3Reg)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg, - /* canSkip */ true); + + GetEmitter()->emitIns_Mov(INS_sve_mov, EA_SCALABLE, targetReg, op2Reg, /* canSkip */ true, opt); GetEmitter()->emitInsSve_R_R_R(ins, EA_SCALABLE, targetReg, op1Reg, op3Reg, opt, INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); break; } + case NI_Sve_ExtractAfterLastActiveElementScalar: + case NI_Sve_ExtractLastActiveElementScalar: + { + opt = emitter::optGetSveInsOpt(emitTypeSize(node->GetSimdBaseType())); + + if (emitter::isGeneralRegisterOrZR(targetReg)) + { + assert(varTypeIsIntegralOrI(intrin.baseType)); + + emitSize = emitTypeSize(node); + GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt, + INS_SCALABLE_OPTS_NONE); + break; + } + + // FP scalars are processed by the INS_SCALABLE_OPTS_WITH_SIMD_SCALAR variant of the instructions + FALLTHROUGH; + } + case NI_Sve_ExtractAfterLastActiveElement: + case NI_Sve_ExtractLastActiveElement: + { + assert(emitter::isFloatReg(targetReg)); + assert(varTypeIsFloating(node->gtType) || varTypeIsSIMD(node->gtType)); + + GetEmitter()->emitInsSve_R_R_R(ins, EA_SCALABLE, targetReg, op1Reg, op2Reg, opt, + INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); + break; + } + case NI_Sve_TrigonometricMultiplyAddCoefficient: { assert(isRMW); @@ -2649,6 +2687,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Sve2_BitwiseClearXor: + case NI_Sve2_Xor: + if (targetReg != op1Reg) + { + assert(targetReg != op2Reg && targetReg != op3Reg); + GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg); + } + // Always use the lane size D. It's a bitwise operation so this is fine for all integer vector types. + GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, INS_OPTS_SCALABLE_D); + break; + + case NI_Sve2_BitwiseSelect: + case NI_Sve2_BitwiseSelectLeftInverted: + case NI_Sve2_BitwiseSelectRightInverted: + // op1: select, op2: left, op3: right + // Operation is destructive on the 'left' operand. + if (targetReg != op2Reg) + { + assert(targetReg != op3Reg && targetReg != op1Reg); + GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op2Reg); + } + // Always use the lane size D. It's a bitwise operation so this is fine for all integer vector types. + GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op3Reg, op1Reg, INS_OPTS_SCALABLE_D); + break; + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 96d40a9e43b5..78bed523d9d0 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -62,25 +62,6 @@ static void assertIsContainableHWIntrinsicOp(Lowering* lowering, #endif // DEBUG } -//------------------------------------------------------------------------ -// genIsTableDrivenHWIntrinsic: -// -// Arguments: -// category - category of a HW intrinsic -// -// Return Value: -// returns true if this category can be table-driven in CodeGen -// -static bool genIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicCategory category) -{ - // TODO - make more categories to the table-driven framework - // HW_Category_Helper and HW_Flag_SpecialCodeGen usually need manual codegen - const bool tableDrivenCategory = - (category != HW_Category_Special) && (category != HW_Category_Scalar) && (category != HW_Category_Helper); - const bool tableDrivenFlag = !HWIntrinsicInfo::HasSpecialCodegen(intrinsicId); - return tableDrivenCategory && tableDrivenFlag; -} - //------------------------------------------------------------------------ // AddEmbRoundingMode: Adds the embedded rounding mode to the insOpts // @@ -168,6 +149,226 @@ static insOpts AddEmbMaskingMode(insOpts instOptions, regNumber maskReg, bool me return static_cast(result); } +//------------------------------------------------------------------------ +// GetImmediateMaxAndMask: Returns the max valid value and a bit mask for +// a full-range immediate of an instruction that has documented +// masking or clamping of the immediate. +// +// Arguments: +// instruction - The instruction to look up +// simdSize - The vector size for the instruction +// maskOut - A pointer to the location to return the mask +// +// Return Value: +// The max useful immediate value +// +static unsigned GetImmediateMaxAndMask(instruction ins, unsigned simdSize, unsigned* maskOut) +{ + assert(maskOut != nullptr); + assert((simdSize >= 16) && (simdSize <= 64)); + + unsigned lanes = simdSize / genTypeSize(TYP_SIMD16); + unsigned mask = 0xFF; + unsigned max = 0; + + switch (ins) + { + // These byte-wise shift instructions are documented to return a zero vector + // for shift amounts 16 or greater. + case INS_pslldq: + case INS_psrldq: + { + max = 16; + break; + } + + // palignr concatenates two 16-byte lanes and shifts the result by imm8 bytes. + // It is documented to return a zero vector for shift amounts 32 or greater. + case INS_palignr: + { + max = 32; + break; + } + + // The following groups of instructions extract/insert a scalar value from/to a + // 128-bit vector and use a documented range of bits for element index. + case INS_pextrq: + case INS_pinsrq: + { + mask = 0b00000001; + max = mask; + break; + } + + case INS_extractps: + case INS_pextrd: + case INS_pinsrd: + { + mask = 0b00000011; + max = mask; + break; + } + + case INS_pextrw: + case INS_pinsrw: + { + mask = 0b00000111; + max = mask; + break; + } + + case INS_pextrb: + case INS_pinsrb: + { + mask = 0b00001111; + max = mask; + break; + } + + // The following instructions concatenate 128- or 256-bit vectors and shift the + // result right by imm8 elements. The number of bits used depends on the + // vector size / element size. + case INS_valignd: + { + mask = (simdSize / genTypeSize(TYP_INT)) - 1; + max = mask; + break; + } + + case INS_valignq: + { + mask = (simdSize / genTypeSize(TYP_LONG)) - 1; + max = mask; + break; + } + + // The following groups of instructions operate in 128-bit lanes but use a + // different range of bits from the immediate for each lane. + case INS_blendpd: + case INS_shufpd: + case INS_vpermilpd: + { + assert(lanes <= 4); + + // two bits per lane + mask = (1 << (lanes * 2)) - 1; + max = mask; + break; + } + + case INS_blendps: + case INS_vpblendd: + { + assert(lanes <= 2); + + // four bits per lane + mask = (1 << (lanes * 4)) - 1; + max = mask; + break; + } + + case INS_mpsadbw: + { + assert(lanes <= 2); + + // three bits per lane + mask = (1 << (lanes * 3)) - 1; + max = mask; + break; + } + + // These instructions extract/insert a 128-bit vector from/to either a 256-bit or + // 512-bit vector. The number of positions is equal to the number of 128-bit lanes. + case INS_vextractf32x4: + case INS_vextracti32x4: + case INS_vextractf64x2: + case INS_vextracti64x2: + case INS_vinsertf32x4: + case INS_vinserti32x4: + case INS_vinsertf64x2: + case INS_vinserti64x2: + { + assert(lanes >= 2); + + mask = lanes - 1; + max = mask; + break; + } + + // These instructions shuffle 128-bit lanes within a larger vector. + // The number of bits used depends on the number of possible lanes. + case INS_vshuff32x4: + case INS_vshufi32x4: + case INS_vshuff64x2: + case INS_vshufi64x2: + { + assert(lanes >= 2); + + // log2(lanes) bits per lane for src selection + mask = (1 << (lanes * BitOperations::Log2(lanes))) - 1; + max = mask; + break; + } + + // These instructions extract/insert a 256-bit vector from/to a 512-bit vector + // and therefore only have two possible positions. + case INS_vextractf32x8: + case INS_vextracti32x8: + case INS_vextractf64x4: + case INS_vextracti64x4: + case INS_vinsertf32x8: + case INS_vinserti32x8: + case INS_vinsertf64x4: + case INS_vinserti64x4: + { + assert(simdSize == 64); + + mask = 0b00000001; + max = mask; + break; + } + + // The following instructions use documented ranges of bits with gaps in them. + case INS_dppd: + { + // bits [1:0] are the result broadcast mask + // bits [5:4] are the element selection mask + mask = 0b00110011; + max = mask; + break; + } + + case INS_pclmulqdq: + { + // bit 0 selects the src1 qword + // bit 4 selects the src2 qword + mask = 0b00010001; + max = mask; + break; + } + + case INS_vperm2f128: + case INS_vperm2i128: + { + // bits [1:0] select the src index for the low lane result + // bits [5:4] select the src index for the high lane result + // bits 3 and 7, if set, will zero the low or high lane, respectively + mask = 0b10111011; + max = mask; + break; + } + + default: + { + max = 255; + break; + } + } + + *maskOut = mask; + return max; +} + //------------------------------------------------------------------------ // genHWIntrinsic: Generates the code for a given hardware intrinsic node. // @@ -188,7 +389,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId)); assert(!HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) || !varTypeIsSmall(node->GetSimdBaseType())); - bool isTableDriven = genIsTableDrivenHWIntrinsic(intrinsicId, category); + bool isTableDriven = HWIntrinsicInfo::genIsTableDrivenHWIntrinsic(intrinsicId, category); insOpts instOptions = INS_OPTS_NONE; if (GetEmitter()->UseEvexEncoding()) @@ -199,7 +400,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (op2->IsEmbMaskOp()) { - assert(intrinsicId == NI_EVEX_BlendVariableMask); + assert(intrinsicId == NI_AVX512_BlendVariableMask); assert(op2->isContained()); assert(op2->OperIsHWIntrinsic()); @@ -309,7 +510,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { var_types baseType = node->GetSimdBaseType(); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); assert(ins != INS_invalid); emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); @@ -320,6 +521,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (isTableDriven) { + if (embMaskOp != nullptr) + { + // Handle an extra operand we need to consume so that + // embedded masking can work without making the overall + // logic significantly more complex. + + assert(embMaskNode != nullptr); + genConsumeReg(embMaskOp); + } + switch (numArgs) { case 1: @@ -332,8 +543,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) }; regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, - emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, lastOp->GetRegNum(), baseReg, + offsReg, emitSwCase); break; } case 2: @@ -344,8 +555,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) }; regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, - emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, lastOp->GetRegNum(), baseReg, + offsReg, emitSwCase); break; } @@ -357,11 +568,25 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { + // We shouldn't encounter embedded masking for non table-driven intrinsics + assert((embMaskNode == nullptr) && (embMaskOp == nullptr)); + // There are a few embedded rounding intrinsics that need to be emitted with special handling. genNonTableDrivenHWIntrinsicsJumpTableFallback(node, lastOp); } genProduceReg(node); + + if (embMaskNode != nullptr) + { + // Similarly to the mask operand, we need to handle the + // mask node to ensure everything works correctly, particularly + // lifetimes and spilling if required. Doing it this way avoids + // needing to duplicate all our existing handling. + + assert(embMaskOp != nullptr); + genProduceReg(embMaskNode); + } return; } } @@ -369,6 +594,18 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (isTableDriven) { + genConsumeMultiOpOperands(node); + + if (embMaskOp != nullptr) + { + // Handle an extra operand we need to consume so that + // embedded masking can work without making the overall + // logic significantly more complex. + + assert(embMaskNode != nullptr); + genConsumeReg(embMaskOp); + } + regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); @@ -385,7 +622,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(numArgs >= 0); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); assert(ins != INS_invalid); emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); @@ -401,15 +638,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (node->OperIsMemoryLoad()) { - genConsumeAddress(op1); // Until we improve the handling of addressing modes in the emitter, we'll create a // temporary GT_IND to generate code with. + + assert(instOptions == INS_OPTS_NONE); GenTreeIndir load = indirForm(node->TypeGet(), op1); emit->emitInsLoadInd(ins, simdSize, node->GetRegNum(), &load); } else { - genConsumeRegs(op1); op1Reg = op1->GetRegNum(); if (ival != -1) @@ -446,18 +683,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (category == HW_Category_MemoryStore) { - genConsumeAddress(op1); - genConsumeReg(op2); - // Until we improve the handling of addressing modes in the emitter, we'll create a // temporary GT_STORE_IND to generate code with. + assert(instOptions == INS_OPTS_NONE); GenTreeStoreInd store = storeIndirForm(node->TypeGet(), op1, op2); emit->emitInsStoreInd(ins, simdSize, &store); break; } - genConsumeRegs(op1); - genConsumeRegs(op2); op1Reg = op1->GetRegNum(); op2Reg = op2->GetRegNum(); @@ -533,10 +766,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // constant value. regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, op2Reg, baseReg, offsReg, emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, op2Reg, baseReg, offsReg, + emitSwCase); } } - else if (node->TypeGet() == TYP_VOID) + else if (node->TypeIs(TYP_VOID)) { genHWIntrinsic_R_RM(node, ins, simdSize, op1Reg, op2, instOptions); } @@ -553,13 +787,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) op2 = node->Op(2); op3 = node->Op(3); - genConsumeRegs(op1); op1Reg = op1->GetRegNum(); - - genConsumeRegs(op2); op2Reg = op2->GetRegNum(); - - genConsumeRegs(op3); op3Reg = op3->GetRegNum(); assert(ival == -1); @@ -583,22 +812,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, op3Reg, baseReg, offsReg, emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, op3Reg, baseReg, offsReg, + emitSwCase); } } else if (category == HW_Category_MemoryStore) { - assert(instOptions == INS_OPTS_NONE); - // The Mask instructions do not currently support containment of the address. assert(!op2->isContained()); + if (intrinsicId == NI_AVX_MaskStore || intrinsicId == NI_AVX2_MaskStore) { emit->emitIns_AR_R_R(ins, simdSize, op2Reg, op3Reg, op1Reg, 0, instOptions); } else { - assert(intrinsicId == NI_SSE2_MaskMove); + assert(intrinsicId == NI_X86Base_MaskMove); assert(targetReg == REG_NA); // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI @@ -614,7 +843,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE41_BlendVariable: case NI_AVX_BlendVariable: case NI_AVX2_BlendVariable: - case NI_EVEX_BlendVariableMask: + case NI_AVX512_BlendVariableMask: { genHWIntrinsic_R_R_RM_R(node, ins, simdSize, instOptions); break; @@ -648,16 +877,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) op3 = node->Op(3); op4 = node->Op(4); - genConsumeRegs(op1); op1Reg = op1->GetRegNum(); - - genConsumeRegs(op2); op2Reg = op2->GetRegNum(); - - genConsumeRegs(op3); op3Reg = op3->GetRegNum(); - - genConsumeRegs(op4); op4Reg = op4->GetRegNum(); assert(ival == -1); @@ -681,7 +903,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, op4Reg, baseReg, offsReg, emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, op4Reg, baseReg, offsReg, + emitSwCase); } } else @@ -696,16 +919,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - if (embMaskOp != nullptr) - { - // Handle an extra operand we need to consume so that - // embedded masking can work without making the overall - // logic significantly more complex. - - assert(embMaskNode != nullptr); - genConsumeReg(embMaskOp); - } - genProduceReg(node); if (embMaskNode != nullptr) @@ -721,8 +934,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) return; } - assert(embMaskNode == nullptr); - assert(embMaskOp == nullptr); + // We shouldn't encounter embedded masking for non table-driven intrinsics + assert((embMaskNode == nullptr) && (embMaskOp == nullptr)); switch (isa) { @@ -741,19 +954,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case InstructionSet_SSE: - case InstructionSet_SSE_X64: - { - genSSEIntrinsic(node, instOptions); - break; - } - case InstructionSet_SSE2: - case InstructionSet_SSE2_X64: - { - genSSE2Intrinsic(node, instOptions); - break; - } - case InstructionSet_SSE41: case InstructionSet_SSE41_X64: { @@ -770,18 +970,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_AVX: case InstructionSet_AVX2: - case InstructionSet_AVX512F: - case InstructionSet_AVX512F_VL: - case InstructionSet_AVX512F_X64: - case InstructionSet_AVX512BW: - case InstructionSet_AVX512BW_VL: + case InstructionSet_AVX512: + case InstructionSet_AVX512_X64: case InstructionSet_AVX512VBMI: - case InstructionSet_AVX512VBMI_VL: - case InstructionSet_AVX10v1: - case InstructionSet_AVX10v1_X64: - case InstructionSet_AVX10v1_V512: - case InstructionSet_AVX10v1_V512_X64: - case InstructionSet_EVEX: { genAvxFamilyIntrinsic(node, instOptions); break; @@ -853,11 +1044,11 @@ void CodeGen::genHWIntrinsic_R_RM( instOptions = AddEmbBroadcastMode(instOptions); } - OperandDesc rmOpDesc = genOperandDesc(rmOp); + OperandDesc rmOpDesc = genOperandDesc(ins, rmOp); if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (rmOpDesc.GetKind() == OperandKind::Reg)) { - // As embedded rounding only appies in R_R case, we can skip other checks for different paths. + // As embedded rounding only applies in R_R case, we can skip other checks for different paths. regNumber op1Reg = rmOp->GetRegNum(); assert(op1Reg != REG_NA); @@ -923,8 +1114,7 @@ void CodeGen::genHWIntrinsic_R_RM( break; } - case NI_AVX512F_BroadcastScalarToVector512: - case NI_AVX512BW_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { needsInstructionFixup = true; break; @@ -948,11 +1138,18 @@ void CodeGen::genHWIntrinsic_R_RM( // that failed and we either didn't get marked regOptional or we did and didn't get spilled // // As such, we need to emulate the removed CreateScalarUnsafe to ensure that op1 is in a - // SIMD register so the broadcast instruction can execute succesfully. We'll just move + // SIMD register so the broadcast instruction can execute successfully. We'll just move // the value into the target register and then broadcast it out from that. emitAttr movdAttr = emitActualTypeSize(node->GetSimdBaseType()); - emit->emitIns_Mov(INS_movd, movdAttr, reg, rmOpReg, /* canSkip */ false); + +#if defined(TARGET_AMD64) + instruction movdIns = (movdAttr == EA_4BYTE) ? INS_movd32 : INS_movd64; +#else + instruction movdIns = INS_movd32; +#endif + + emit->emitIns_Mov(movdIns, movdAttr, reg, rmOpReg, /* canSkip */ false); rmOpReg = reg; } else if (needsInstructionFixup) @@ -1126,9 +1323,22 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, GenTree* op3 = node->Op(3); emitter* emit = GetEmitter(); - regNumber op1Reg = op1->GetRegNum(); + regNumber op1Reg = REG_NA; regNumber op3Reg = op3->GetRegNum(); + if (op1->isContained()) + { + assert(node->GetHWIntrinsicId() == NI_AVX512_BlendVariableMask); + assert(op1->IsVectorZero()); + + instOptions = AddEmbMaskingMode(instOptions, REG_K0, true); + op1Reg = targetReg; + } + else + { + op1Reg = op1->GetRegNum(); + } + assert(targetReg != REG_NA); assert(op1Reg != REG_NA); assert(op3Reg != REG_NA); @@ -1138,7 +1348,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, instOptions = AddEmbBroadcastMode(instOptions); } - OperandDesc op2Desc = genOperandDesc(op2); + OperandDesc op2Desc = genOperandDesc(ins, op2); if (op2Desc.IsContained()) { @@ -1208,11 +1418,11 @@ void CodeGen::genHWIntrinsic_R_R_R_RM(instruction ins, instOptions = AddEmbBroadcastMode(instOptions); } - OperandDesc op3Desc = genOperandDesc(op3); + OperandDesc op3Desc = genOperandDesc(ins, op3); if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (op3Desc.GetKind() == OperandKind::Reg)) { - // As embedded rounding only appies in R_R case, we can skip other checks for different paths. + // As embedded rounding only applies in R_R case, we can skip other checks for different paths. regNumber op3Reg = op3->GetRegNum(); assert(op3Reg != REG_NA); @@ -1287,7 +1497,7 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I( // non-RMW based codegen. #if defined(DEBUG) - assert(HWIntrinsicInfo::IsTernaryLogic(node->GetHWIntrinsicId())); + assert(node->GetHWIntrinsicId() == NI_AVX512_TernaryLogic); uint8_t control = static_cast(ival); const TernaryLogicInfo& info = TernaryLogicInfo::lookup(control); @@ -1301,7 +1511,7 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I( else { #if defined(DEBUG) - if (HWIntrinsicInfo::IsTernaryLogic(node->GetHWIntrinsicId())) + if (node->GetHWIntrinsicId() == NI_AVX512_TernaryLogic) { uint8_t control = static_cast(ival); const TernaryLogicInfo& info = TernaryLogicInfo::lookup(control); @@ -1324,7 +1534,7 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I( instOptions = AddEmbBroadcastMode(instOptions); } - OperandDesc op3Desc = genOperandDesc(op3); + OperandDesc op3Desc = genOperandDesc(ins, op3); switch (op3Desc.GetKind()) { @@ -1368,6 +1578,8 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I( // // Arguments: // intrinsic - intrinsic ID +// ins - the instruction chosen for the intrinsic and base type +// attr - the emit attributes for the instruction // nonConstImmReg - the register contains non-constant imm8 argument // baseReg - a register for the start of the switch table // offsReg - a register for the offset into the switch table @@ -1382,6 +1594,8 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I( // template void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsic, + instruction ins, + emitAttr attr, regNumber nonConstImmReg, regNumber baseReg, regNumber offsReg, @@ -1389,18 +1603,44 @@ void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsi { assert(nonConstImmReg != REG_NA); // AVX2 Gather intrinsics use managed non-const fallback since they have discrete imm8 value range - // that does work with the current compiler generated jump-table fallback + // that does not work with the current compiler generated jump-table fallback assert(!HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsic)); emitter* emit = GetEmitter(); - const unsigned maxByte = (unsigned)HWIntrinsicInfo::lookupImmUpperBound(intrinsic) + 1; - assert(maxByte <= 256); - BasicBlock* jmpTable[256]; + unsigned maxByte = (unsigned)HWIntrinsicInfo::lookupImmUpperBound(intrinsic); + unsigned mask = 0xFF; + + // Some instructions allow full-range immediates but are documented to ignore ranges of bits + // or to clamp the value. We can implement the same masking/clamping here in order to reduce + // the size of the generated code and jump table. + + if (HWIntrinsicInfo::HasFullRangeImm(intrinsic)) + { + maxByte = GetImmediateMaxAndMask(ins, EA_SIZE(attr), &mask); + + if (mask != 0xFF) + { + emit->emitIns_R_I(INS_and, EA_4BYTE, nonConstImmReg, mask); + } + else if (maxByte < 255) + { + emit->emitIns_R_I(INS_cmp, EA_4BYTE, nonConstImmReg, maxByte); + + BasicBlock* skipLabel = genCreateTempLabel(); + inst_JMP(EJ_jbe, skipLabel); + + instGen_Set_Reg_To_Imm(EA_4BYTE, nonConstImmReg, maxByte); - unsigned jmpTableBase = emit->emitBBTableDataGenBeg(maxByte, true); + genDefineTempLabel(skipLabel); + } + } + + assert(maxByte <= 255); + BasicBlock* jmpTable[256]; + unsigned jmpTableBase = emit->emitBBTableDataGenBeg(maxByte + 1, true); // Emit the jump table - for (unsigned i = 0; i < maxByte; i++) + for (unsigned i = 0; i <= maxByte; i++) { jmpTable[i] = genCreateTempLabel(); emit->emitDataGenData(i, jmpTable[i]); @@ -1423,9 +1663,18 @@ void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsi genDefineTempLabel(switchTableBeg); - for (unsigned i = 0; i < maxByte; i++) + for (unsigned i = 0; i <= maxByte; i++) { genDefineTempLabel(jmpTable[i]); + + if ((i & mask) != i) + { + // This is a jump table entry that won't be hit, because the value can't exist after + // masking. We define the labels for these values in order to pad out the jump table + // so that the valid entries fall at the correct offsets, but we don't emit any code. + continue; + } + emitSwCase((int8_t)i); emit->emitIns_J(INS_jmp, switchTableEnd); } @@ -1440,19 +1689,19 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* assert(HWIntrinsicInfo::IsEmbRoundingCompatible(intrinsicId)); assert(!lastOp->isContained()); - assert(!genIsTableDrivenHWIntrinsic(intrinsicId, category)); + assert(!HWIntrinsicInfo::genIsTableDrivenHWIntrinsic(intrinsicId, category)); var_types baseType = node->GetSimdBaseType(); emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); var_types targetType = node->TypeGet(); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); regNumber targetReg = node->GetRegNum(); insOpts instOptions = INS_OPTS_NONE; switch (intrinsicId) { - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { // This intrinsic has several overloads, only the ones with floating number inputs should reach this part. assert(varTypeIsFloating(baseType)); @@ -1463,19 +1712,15 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* }; regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, attr, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase); break; } - case NI_AVX512F_ConvertToInt32: - case NI_AVX512F_ConvertToUInt32: - case NI_AVX10v1_ConvertToInt32: - case NI_AVX10v1_ConvertToUInt32: + case NI_AVX512_ConvertToInt32: + case NI_AVX512_ConvertToUInt32: #if defined(TARGET_AMD64) - case NI_AVX512F_X64_ConvertToInt64: - case NI_AVX512F_X64_ConvertToUInt64: - case NI_AVX10v1_X64_ConvertToInt64: - case NI_AVX10v1_X64_ConvertToUInt64: + case NI_AVX512_X64_ConvertToInt64: + case NI_AVX512_X64_ConvertToUInt64: #endif // TARGET_AMD64 { assert(varTypeIsFloating(baseType)); @@ -1488,14 +1733,12 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* }; regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, attr, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase); break; } - case NI_AVX512F_X64_ConvertScalarToVector128Single: - case NI_AVX512F_X64_ConvertScalarToVector128Double: - case NI_AVX10v1_X64_ConvertScalarToVector128Single: - case NI_AVX10v1_X64_ConvertScalarToVector128Double: + case NI_AVX512_X64_ConvertScalarToVector128Single: + case NI_AVX512_X64_ConvertScalarToVector128Double: { assert(varTypeIsLong(baseType)); auto emitSwCase = [&](int8_t i) { @@ -1504,24 +1747,20 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* }; regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, attr, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase); break; } - case NI_AVX512F_FusedMultiplyAdd: - case NI_AVX512F_FusedMultiplyAddScalar: - case NI_AVX512F_FusedMultiplyAddNegated: - case NI_AVX512F_FusedMultiplyAddNegatedScalar: - case NI_AVX512F_FusedMultiplyAddSubtract: - case NI_AVX512F_FusedMultiplySubtract: - case NI_AVX512F_FusedMultiplySubtractAdd: - case NI_AVX512F_FusedMultiplySubtractNegated: - case NI_AVX512F_FusedMultiplySubtractNegatedScalar: - case NI_AVX512F_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: - case NI_AVX10v1_FusedMultiplyAddNegatedScalar: - case NI_AVX10v1_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplySubtractNegatedScalar: + case NI_AVX512_FusedMultiplyAdd: + case NI_AVX512_FusedMultiplyAddScalar: + case NI_AVX512_FusedMultiplyAddNegated: + case NI_AVX512_FusedMultiplyAddNegatedScalar: + case NI_AVX512_FusedMultiplyAddSubtract: + case NI_AVX512_FusedMultiplySubtract: + case NI_AVX512_FusedMultiplySubtractAdd: + case NI_AVX512_FusedMultiplySubtractNegated: + case NI_AVX512_FusedMultiplySubtractNegatedScalar: + case NI_AVX512_FusedMultiplySubtractScalar: { // For FMA intrinsics, since it is not possible to get any contained operand in this case: embedded rounding // is limited in register-to-register form, and the control byte is dynamic, we don't need to do any swap. @@ -1540,7 +1779,7 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* }; regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, attr, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase); break; } @@ -1565,11 +1804,11 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE)); assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE)); GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; GenTree* op2 = (node->GetOperandCount() >= 2) ? node->Op(2) : nullptr; + GenTree* op3 = (node->GetOperandCount() >= 3) ? node->Op(3) : nullptr; genConsumeMultiOpOperands(node); regNumber op1Reg = (op1 == nullptr) ? REG_NA : op1->GetRegNum(); @@ -1577,17 +1816,75 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) emitter* emit = GetEmitter(); var_types simdType = Compiler::getSIMDTypeForSize(node->GetSimdSize()); emitAttr attr = emitActualTypeSize(simdType); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); switch (intrinsicId) { + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: case NI_Vector128_CreateScalarUnsafe: case NI_Vector256_CreateScalarUnsafe: case NI_Vector512_CreateScalarUnsafe: { if (varTypeIsIntegral(baseType)) { - genHWIntrinsic_R_RM(node, ins, emitActualTypeSize(baseType), targetReg, op1, instOptions); + emitAttr baseAttr = emitActualTypeSize(baseType); + +#if defined(TARGET_X86) + if (varTypeIsLong(baseType)) + { + assert(op1->isContained()); + + if (op1->OperIsLong()) + { + node->SetSimdBaseJitType(CORINFO_TYPE_INT); + + bool canCombineLoad = false; + GenTree* loPart = op1->gtGetOp1(); + GenTree* hiPart = op1->gtGetOp2(); + + if ((loPart->isContained() && hiPart->isContained()) && + (loPart->OperIs(GT_LCL_FLD) && hiPart->OperIs(GT_LCL_FLD))) + { + GenTreeLclFld* loFld = loPart->AsLclFld(); + GenTreeLclFld* hiFld = hiPart->AsLclFld(); + + canCombineLoad = (hiFld->GetLclNum() == loFld->GetLclNum()) && + (hiFld->GetLclOffs() == (loFld->GetLclOffs() + 4)); + } + + if (!canCombineLoad) + { + if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, loPart, instOptions); + inst_RV_RV_TT_IV(INS_pinsrd, EA_16BYTE, targetReg, targetReg, hiPart, 0x01, + !compiler->canUseVexEncoding(), instOptions); + } + else + { + regNumber tmpReg = internalRegisters.GetSingle(node); + genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, loPart, instOptions); + genHWIntrinsic_R_RM(node, ins, baseAttr, tmpReg, hiPart, instOptions); + emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, targetReg, tmpReg, instOptions); + } + break; + } + + op1 = loPart; + } + + baseAttr = EA_8BYTE; + } +#endif // TARGET_X86 + + if (op1->isUsedFromMemory() && (baseAttr == EA_8BYTE)) + { + ins = INS_movq; + } + + genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, op1, instOptions); } else { @@ -1602,6 +1899,45 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) else { assert(instOptions == INS_OPTS_NONE); + + if (HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId)) + { + // If this is CreateScalar, we need to ensure the upper elements are zeroed. + // Scalar integer loads and loads from memory always zero the upper elements, + // so reg to reg copies of floating types are the only place we need to + // do anything different. + + if (baseType == TYP_FLOAT) + { + if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + // insertps imm8 is: + // * Bits 0-3: zmask + // * Bits 4-5: count_d + // * Bits 6-7: count_s (register form only) + // + // We want zmask 0b1110 (0xE) to zero elements 1/2/3 + // We want count_d 0b00 (0x0) to insert the value to element 0 + // We want count_s 0b00 (0x0) as we're just taking element 0 of the source + + emit->emitIns_SIMD_R_R_R_I(INS_insertps, attr, targetReg, targetReg, op1Reg, 0x0E, + instOptions); + } + else + { + assert(targetReg != op1Reg); + emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg, instOptions); + emit->emitIns_Mov(INS_movss, attr, targetReg, op1Reg, /* canSkip */ false); + } + } + else + { + // `movq xmm xmm` zeroes the upper 64 bits. + emit->emitIns_Mov(INS_movq, attr, targetReg, op1Reg, /* canSkip */ false); + } + break; + } + // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); } @@ -1609,6 +1945,58 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } + case NI_Vector128_WithElement: + case NI_Vector256_WithElement: + case NI_Vector512_WithElement: + { + // Optimize the case where op2 is not a constant. + + assert(!op1->isContained()); + assert(!op2->OperIsConst()); + + // We don't have an instruction to implement this intrinsic if the index is not a constant. + // So we will use the SIMD temp location to store the vector, set the value and then reload it. + // The range check will already have been performed, so at this point we know we have an index + // within the bounds of the vector. + + unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum; + noway_assert(simdInitTempVarNum != BAD_VAR_NUM); + + bool isEBPbased; + unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased); + +#if !FEATURE_FIXED_OUT_ARGS + if (!isEBPbased) + { + // Adjust the offset by the amount currently pushed on the CPU stack + offs += genStackLevel; + } +#else + assert(genStackLevel == 0); +#endif // !FEATURE_FIXED_OUT_ARGS + + regNumber indexReg = op2->GetRegNum(); + regNumber valueReg = op3->GetRegNum(); // New element value to be stored + + // Store the vector to the temp location. + GetEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), + emitTypeSize(simdType), op1Reg, simdInitTempVarNum, 0); + + // Set the desired element. + GetEmitter()->emitIns_ARX_R(ins_Store(op3->TypeGet()), // Store + emitTypeSize(baseType), // Of the vector baseType + valueReg, // From valueReg + (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based + indexReg, // Indexed + genTypeSize(baseType), // by the size of the baseType + offs); // Offset + + // Write back the modified vector to the original location. + GetEmitter()->emitIns_R_S(ins_Load(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), + emitTypeSize(simdType), targetReg, simdInitTempVarNum, 0); + break; + } + case NI_Vector128_GetElement: case NI_Vector256_GetElement: case NI_Vector512_GetElement: @@ -1783,6 +2171,20 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) } genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions); } + else if (varTypeIsIntegral(baseType)) + { + assert(!varTypeIsLong(baseType) || TargetArchitecture::Is64Bit); + assert(HWIntrinsicInfo::IsVectorToScalar(intrinsicId)); + + attr = emitActualTypeSize(baseType); + genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions); + + if (varTypeIsSmall(baseType)) + { + emit->emitIns_Mov(ins_Move_Extend(baseType, /* srcInReg */ true), emitTypeSize(baseType), targetReg, + targetReg, /* canSkip */ false); + } + } else { assert(varTypeIsFloating(baseType)); @@ -1871,6 +2273,70 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } + case NI_Vector128_op_Division: + case NI_Vector256_op_Division: + { + // We can emulate SIMD integer division by converting the 32-bit integer -> 64-bit double, + // perform a 64-bit double divide, then convert back to a 32-bit integer. This is generating + // something similar to the following managed code: + // if (Vector128.EqualsAny(op2, Vector128.Zero)) + // { + // throw new DivideByZeroException(); + // } + // + // Vector128 overflowMask = + // Vector128.Equals(op1, Vector128.Create(int.MinValue) + // & Vector128.Equals(op2, Vector128.Create(-1)); + // if (!Vector128.EqualsAll(overflowMask, Vector128.Zero)) + // { + // throw new OverflowException(); + // } + // + // Vector256 op1_f64 = + // Vector256.ConvertToDouble(Vector256.WidenLower(Vector128.ToVector256Unsafe(op1)))); + // Vector256 op2_f64 = + // Vector256.ConvertToDouble(Vector256.WidenLower(Vector128.ToVector256Unsafe(op2)))); + // Vector256 div_f64 = op1_f64 / op2_f64; + // Vector256 div_i64 = Vector256.ConvertToInt64(div_f64); + // Vector128 div_i32 = Vector256.Narrow(div_i64.GetLower(), div_i64.GetUpper()); + // return div_i32; + regNumber op2Reg = op2->GetRegNum(); + regNumber tmpReg1 = internalRegisters.Extract(node, RBM_ALLFLOAT); + regNumber tmpReg2 = internalRegisters.Extract(node, RBM_ALLFLOAT); + emitAttr typeSize = emitTypeSize(node->TypeGet()); + noway_assert(typeSize == EA_16BYTE || typeSize == EA_32BYTE); + emitAttr divTypeSize = typeSize == EA_16BYTE ? EA_32BYTE : EA_64BYTE; + + simd_t negOneIntVec = simd_t::AllBitsSet(); + simd_t minValueInt{}; + int numElements = genTypeSize(node->TypeGet()) / 4; + for (int i = 0; i < numElements; i++) + { + minValueInt.i32[i] = INT_MIN; + } + CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst(&minValueInt, typeSize); + CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst(&negOneIntVec, typeSize); + + // div-by-zero check + emit->emitIns_SIMD_R_R_R(INS_xorpd, typeSize, tmpReg1, tmpReg1, tmpReg1, instOptions); + emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, typeSize, tmpReg1, tmpReg1, op2Reg, instOptions); + emit->emitIns_R_R(INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions); + genJumpToThrowHlpBlk(EJ_jne, SCK_DIV_BY_ZERO); + + // overflow check + emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg1, op1Reg, minValueFld, 0, instOptions); + emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg2, op2Reg, negOneFld, 0, instOptions); + emit->emitIns_SIMD_R_R_R(INS_pandd, typeSize, tmpReg1, tmpReg1, tmpReg2, instOptions); + emit->emitIns_R_R(INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + + emit->emitIns_R_R(INS_cvtdq2pd, divTypeSize, tmpReg1, op1Reg, instOptions); + emit->emitIns_R_R(INS_cvtdq2pd, divTypeSize, tmpReg2, op2Reg, instOptions); + emit->emitIns_SIMD_R_R_R(INS_divpd, divTypeSize, targetReg, tmpReg1, tmpReg2, instOptions); + emit->emitIns_R_R(INS_cvttpd2dq, divTypeSize, targetReg, targetReg, instOptions); + break; + } + default: { unreached(); @@ -1890,6 +2356,10 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + regNumber targetReg = node->GetRegNum(); + var_types targetType = node->TypeGet(); + var_types baseType = node->GetSimdBaseType(); + emitter* emit = GetEmitter(); genConsumeMultiOpOperands(node); @@ -1900,10 +2370,8 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) case NI_X86Base_X64_BitScanForward: case NI_X86Base_X64_BitScanReverse: { - GenTree* op1 = node->Op(1); - regNumber targetReg = node->GetRegNum(); - var_types targetType = node->TypeGet(); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType); + GenTree* op1 = node->Op(1); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType, compiler); genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op1, instOptions); break; @@ -1912,7 +2380,7 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) case NI_X86Base_Pause: { assert(node->GetSimdBaseType() == TYP_UNKNOWN); - GetEmitter()->emitIns(INS_pause); + emit->emitIns(INS_pause); break; } @@ -1923,18 +2391,19 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) assert(instOptions == INS_OPTS_NONE); // SIMD base type is from signature and can distinguish signed and unsigned - var_types targetType = node->GetSimdBaseType(); - GenTree* op1 = node->Op(1); - GenTree* op2 = node->Op(2); - GenTree* op3 = node->Op(3); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType); + targetType = node->GetSimdBaseType(); + + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); + + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType, compiler); regNumber op1Reg = op1->GetRegNum(); regNumber op2Reg = op2->GetRegNum(); regNumber op3Reg = op3->GetRegNum(); emitAttr attr = emitTypeSize(targetType); - emitter* emit = GetEmitter(); // op1: EAX, op2: EDX, op3: free assert(op1Reg != REG_EDX); @@ -1954,122 +2423,45 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - default: - unreached(); - break; - } - - genProduceReg(node); -} - -//------------------------------------------------------------------------ -// genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node -// -// Arguments: -// node - The hardware intrinsic node -// instOptions - The options used to when generating the instruction. -// -void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) -{ - NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - regNumber targetReg = node->GetRegNum(); - var_types targetType = node->TypeGet(); - var_types baseType = node->GetSimdBaseType(); - emitter* emit = GetEmitter(); - - genConsumeMultiOpOperands(node); - - switch (intrinsicId) - { - case NI_SSE_X64_ConvertToInt64: - case NI_SSE_X64_ConvertToInt64WithTruncation: + case NI_X86Base_X64_ConvertScalarToVector128Double: + case NI_X86Base_X64_ConvertScalarToVector128Single: { - assert(targetType == TYP_LONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_RM(node, ins, EA_8BYTE, targetReg, node->Op(1), instOptions); - break; - } - - case NI_SSE_X64_ConvertScalarToVector128Single: - { - assert(baseType == TYP_LONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + assert(baseType == TYP_LONG || baseType == TYP_ULONG); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, instOptions); break; } - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { assert(baseType == TYP_UBYTE); assert(instOptions == INS_OPTS_NONE); // These do not support containment. assert(!node->Op(1)->isContained()); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->GetSimdBaseType()); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->GetSimdBaseType(), compiler); emit->emitIns_AR(ins, emitTypeSize(baseType), node->Op(1)->GetRegNum(), 0); break; } - case NI_SSE_StoreFence: + case NI_X86Base_StoreFence: { assert(baseType == TYP_UNKNOWN); emit->emitIns(INS_sfence); break; } - default: - unreached(); - break; - } - - genProduceReg(node); -} - -//------------------------------------------------------------------------ -// genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node -// -// Arguments: -// node - The hardware intrinsic node -// instOptions - The options used to when generating the instruction. -// -void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) -{ - NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - regNumber targetReg = node->GetRegNum(); - var_types targetType = node->TypeGet(); - var_types baseType = node->GetSimdBaseType(); - emitter* emit = GetEmitter(); - - genConsumeMultiOpOperands(node); - - switch (intrinsicId) - { - case NI_SSE2_X64_ConvertScalarToVector128Double: - { - assert(baseType == TYP_LONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, instOptions); - break; - } - - case NI_SSE2_X64_ConvertScalarToVector128Int64: - case NI_SSE2_X64_ConvertScalarToVector128UInt64: - { - assert(baseType == TYP_LONG || baseType == TYP_ULONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType), targetReg, node->Op(1), instOptions); - break; - } - - case NI_SSE2_ConvertToInt32: - case NI_SSE2_ConvertToInt32WithTruncation: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_X64_ConvertToInt64WithTruncation: - case NI_SSE2_X64_ConvertToUInt64: + case NI_X86Base_X64_ConvertScalarToVector128Int64: + case NI_X86Base_X64_ConvertScalarToVector128UInt64: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_ConvertToInt32WithTruncation: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_X64_ConvertToInt64WithTruncation: + case NI_X86Base_X64_ConvertToUInt64: { emitAttr attr; if (varTypeIsIntegral(baseType)) @@ -2083,30 +2475,30 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) attr = emitTypeSize(targetType); } - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); genHWIntrinsic_R_RM(node, ins, attr, targetReg, node->Op(1), instOptions); break; } - case NI_SSE2_LoadFence: + case NI_X86Base_LoadFence: { assert(baseType == TYP_UNKNOWN); emit->emitIns(INS_lfence); break; } - case NI_SSE2_MemoryFence: + case NI_X86Base_MemoryFence: { assert(baseType == TYP_UNKNOWN); emit->emitIns(INS_mfence); break; } - case NI_SSE2_StoreNonTemporal: - case NI_SSE2_X64_StoreNonTemporal: + case NI_X86Base_StoreNonTemporal: + case NI_X86Base_X64_StoreNonTemporal: { assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); GenTreeStoreInd store = storeIndirForm(node->TypeGet(), node->Op(1), node->Op(2)); emit->emitInsStoreInd(ins, emitTypeSize(baseType), &store); break; @@ -2143,7 +2535,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) case NI_SSE41_ConvertToVector128Int32: case NI_SSE41_ConvertToVector128Int64: { - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); if (!varTypeIsSIMD(op1->gtType)) { @@ -2165,7 +2557,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) assert(!varTypeIsFloating(baseType)); GenTree* op2 = node->Op(2); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); emitAttr attr = emitActualTypeSize(node->TypeGet()); auto emitSwCase = [&](int8_t i) { @@ -2185,7 +2577,8 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = internalRegisters.Extract(node); regNumber offsReg = internalRegisters.GetSingle(node); - genHWIntrinsicJumpTableFallback(intrinsicId, op2->GetRegNum(), baseReg, offsReg, emitSwCase); + genHWIntrinsicJumpTableFallback(intrinsicId, ins, EA_16BYTE, op2->GetRegNum(), baseReg, offsReg, + emitSwCase); } break; } @@ -2231,15 +2624,47 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) assert(!op2->isUsedFromReg() || (op2->GetRegNum() != targetReg) || (op1Reg == targetReg)); emit->emitIns_Mov(INS_mov, emitTypeSize(targetType), targetReg, op1Reg, /* canSkip */ true); + instruction ins = INS_crc32; +#ifdef TARGET_AMD64 + bool needsEvex = false; + if (emit->IsExtendedGPReg(targetReg)) + { + needsEvex = true; + } + else if (op2->isUsedFromReg() && emit->IsExtendedGPReg(op2->GetRegNum())) + { + needsEvex = true; + } + else if (op2->isIndir()) + { + GenTreeIndir* indir = op2->AsIndir(); + + // We don't need to check if they are actually enregistered. + if (indir->HasBase() && emit->IsExtendedGPReg(indir->Base()->GetRegNum())) + { + needsEvex = true; + } + + if (indir->HasIndex() && emit->IsExtendedGPReg(indir->Index()->GetRegNum())) + { + needsEvex = true; + } + } + + if (needsEvex) + { + ins = INS_crc32_apx; + } +#endif // TARGET_AMD64 if ((baseType == TYP_UBYTE) || (baseType == TYP_USHORT)) // baseType is the type of the second argument { assert(targetType == TYP_INT); - genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(baseType), targetReg, op2, instOptions); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType), targetReg, op2, instOptions); } else { assert((targetType == TYP_INT) || (targetType == TYP_LONG)); - genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(targetType), targetReg, op2, instOptions); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op2, instOptions); } break; @@ -2281,7 +2706,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption var_types baseType = node->GetSimdBaseType(); emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); var_types targetType = node->TypeGet(); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); size_t numArgs = node->GetOperandCount(); GenTree* op1 = node->Op(1); regNumber op1Reg = REG_NA; @@ -2299,7 +2724,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption op1Reg = op1->GetRegNum(); assert((baseType == TYP_INT) || (baseType == TYP_UINT)); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); emit->emitIns_Mov(ins, emitActualTypeSize(baseType), targetReg, op1Reg, /* canSkip */ false); break; } @@ -2308,7 +2733,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX2_ConvertToVector256Int32: case NI_AVX2_ConvertToVector256Int64: { - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); if (!varTypeIsSIMD(op1->gtType)) { @@ -2376,7 +2801,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, maskReg, maskReg, maskReg, instOptions); } - bool isVector128GatherWithVector256Index = (targetType == TYP_SIMD16) && (indexOp->TypeGet() == TYP_SIMD32); + bool isVector128GatherWithVector256Index = (targetType == TYP_SIMD16) && indexOp->TypeIs(TYP_SIMD32); // hwintrinsiclistxarch.h uses Dword index instructions in default if (varTypeIsLong(node->GetAuxiliaryType())) @@ -2422,7 +2847,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_AddMask: + case NI_AVX512_AddMask: { assert(instOptions == INS_OPTS_NONE); @@ -2462,7 +2887,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_AndMask: + case NI_AVX512_AndMask: { assert(instOptions == INS_OPTS_NONE); @@ -2502,7 +2927,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_AndNotMask: + case NI_AVX512_AndNotMask: { assert(instOptions == INS_OPTS_NONE); @@ -2542,7 +2967,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_MoveMask: + case NI_AVX512_MoveMask: { assert(instOptions == INS_OPTS_NONE); @@ -2579,7 +3004,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_KORTEST: + case NI_AVX512_KORTEST: { assert(instOptions == INS_OPTS_NONE); @@ -2621,7 +3046,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_KTEST: + case NI_AVX512_KTEST: { assert(instOptions == INS_OPTS_NONE); @@ -2659,7 +3084,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_NotMask: + case NI_AVX512_NotMask: { assert(instOptions == INS_OPTS_NONE); @@ -2694,7 +3119,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_OrMask: + case NI_AVX512_OrMask: { assert(instOptions == INS_OPTS_NONE); @@ -2734,7 +3159,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_ShiftLeftMask: + case NI_AVX512_ShiftLeftMask: { assert(instOptions == INS_OPTS_NONE); @@ -2775,7 +3200,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_ShiftRightMask: + case NI_AVX512_ShiftRightMask: { assert(instOptions == INS_OPTS_NONE); @@ -2816,7 +3241,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_XorMask: + case NI_AVX512_XorMask: { assert(instOptions == INS_OPTS_NONE); @@ -2856,7 +3281,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_XnorMask: + case NI_AVX512_XnorMask: { assert(instOptions == INS_OPTS_NONE); @@ -2896,87 +3321,57 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_AVX512F_ConvertToInt32: - case NI_AVX512F_ConvertToUInt32: - case NI_AVX512F_ConvertToUInt32WithTruncation: - case NI_AVX512F_X64_ConvertToInt64: - case NI_AVX512F_X64_ConvertToUInt64: - case NI_AVX512F_X64_ConvertToUInt64WithTruncation: - case NI_AVX10v1_X64_ConvertToInt64: - case NI_AVX10v1_X64_ConvertToUInt64: - case NI_AVX10v1_X64_ConvertToUInt64WithTruncation: - case NI_AVX10v1_ConvertToInt32: - case NI_AVX10v1_ConvertToUInt32: - case NI_AVX10v1_ConvertToUInt32WithTruncation: + case NI_AVX512_ConvertToInt32: + case NI_AVX512_ConvertToUInt32: + case NI_AVX512_ConvertToUInt32WithTruncation: + case NI_AVX512_X64_ConvertToInt64: + case NI_AVX512_X64_ConvertToUInt64: + case NI_AVX512_X64_ConvertToUInt64WithTruncation: { assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT); emitAttr attr = emitTypeSize(targetType); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions); break; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { if (varTypeIsFloating(baseType)) { - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions); break; } FALLTHROUGH; } - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: - { - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: + { + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); // These instructions are RM_R and so we need to ensure the targetReg // is passed in as the RM register and op1 is passed as the R register @@ -2986,13 +3381,11 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_AVX512F_X64_ConvertScalarToVector128Double: - case NI_AVX512F_X64_ConvertScalarToVector128Single: - case NI_AVX10v1_X64_ConvertScalarToVector128Double: - case NI_AVX10v1_X64_ConvertScalarToVector128Single: + case NI_AVX512_X64_ConvertScalarToVector128Double: + case NI_AVX512_X64_ConvertScalarToVector128Single: { assert(baseType == TYP_ULONG || baseType == TYP_LONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, instOptions); break; } @@ -3017,7 +3410,7 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptio NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); regNumber targetReg = node->GetRegNum(); var_types targetType = node->TypeGet(); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType, compiler); emitter* emit = GetEmitter(); assert(targetReg != REG_NA); @@ -3140,7 +3533,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) var_types baseType = node->GetSimdBaseType(); emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); - instruction _213form = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form + instruction _213form = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); // 213 form instruction _132form = (instruction)(_213form - 1); instruction _231form = (instruction)(_213form + 1); GenTree* op1 = node->Op(1); @@ -3276,7 +3669,7 @@ void CodeGen::genPermuteVar2x(GenTreeHWIntrinsic* node, insOpts instOptions) assert(!op1->isContained()); assert(!op2->isContained()); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // vpermt2 + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); // vpermt2 if (targetReg == op2NodeReg) { diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index efad7ee09802..e54f77c13c64 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -17,6 +17,7 @@ // Vector64 Intrinsics #define FIRST_NI_Vector64 NI_Vector64_Abs HARDWARE_INTRINSIC(Vector64, Abs, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, AddSaturate, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, AndNot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, As, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, AsByte, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -88,13 +89,17 @@ HARDWARE_INTRINSIC(Vector64, Min, HARDWARE_INTRINSIC(Vector64, MinNative, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, MultiplyAddEstimate, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, NarrowWithSaturation, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Round, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector64, ShuffleNative, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector64, ShuffleNativeFallback, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, StoreUnsafe, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, SubtractSaturate, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Sum, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, ToScalar, 8, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, ToVector128, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) @@ -131,6 +136,7 @@ HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift, // Vector128 Intrinsics #define FIRST_NI_Vector128 NI_Vector128_Abs HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, AddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, As, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -210,13 +216,17 @@ HARDWARE_INTRINSIC(Vector128, Min, HARDWARE_INTRINSIC(Vector128, MinNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, SubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 9bb76b0ad038..47c73db897ad 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -27,9 +27,9 @@ HARDWARE_INTRINSIC(Sve, AddAcross, HARDWARE_INTRINSIC(Sve, AddRotateComplex, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcadd, INS_sve_fcadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, AddSaturate, -1, 2, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, AddSequentialAcross, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fadda, INS_sve_fadda}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, And, -1, -1, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, And, -1, -1, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, AndAcross, -1, -1, {INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, BooleanNot, -1, -1, {INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Compact, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, CompareEqual, -1, -1, {INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_fcmeq, INS_sve_fcmeq}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) @@ -47,7 +47,7 @@ HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElement, HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementAndReplicate, -1, 3, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElement, -1, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementAndReplicate, -1, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, ConvertToDouble, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_fcvt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) @@ -97,6 +97,10 @@ HARDWARE_INTRINSIC(Sve, Divide, HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, DuplicateSelectedScalarToVector, -1, 2, {INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve, ExtractAfterLastActiveElement, -1, 2, {INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ExtractAfterLastActiveElementScalar, 0, 2, {INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, ExtractLastActiveElement, -1, 2, {INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ExtractLastActiveElementScalar, 0, 2, {INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Sve, ExtractVector, -1, 3, {INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FloatingPointExponentialAccelerator, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fexpa, INS_invalid, INS_sve_fexpa, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) @@ -224,20 +228,20 @@ HARDWARE_INTRINSIC(Sve, MultiplyExtended, HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Negate, -1, -1, {INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_fneg, INS_sve_fneg}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Not, -1, -1, {INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, Or, -1, -1, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Or, -1, -1, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, Prefetch16Bit, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, Prefetch32Bit, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, Prefetch64Bit, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, Prefetch8Bit, -1, 3, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) HARDWARE_INTRINSIC(Sve, ReciprocalEstimate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecpe, INS_sve_frecpe}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ReciprocalExponent, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecpx, INS_sve_frecpx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ReciprocalSqrtEstimate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frsqrte, INS_sve_frsqrte}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ReciprocalSqrtStep, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frsqrts, INS_sve_frsqrts}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ReciprocalStep, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecps, INS_sve_frecps}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ReverseBits, -1, -1, {INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ReverseElement8, -1, -1, {INS_invalid, INS_invalid, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) @@ -284,31 +288,53 @@ HARDWARE_INTRINSIC(Sve, SubtractSaturate, HARDWARE_INTRINSIC(Sve, TestAnyTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, TestFirstTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, TestLastTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, TrigonometricMultiplyAddCoefficient, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftmad, INS_sve_ftmad}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, TrigonometricSelectCoefficient, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftssel, INS_sve_ftssel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, TrigonometricStartingValue, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftsmul, INS_sve_ftsmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, VectorTableLookup, -1, 2, {INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, Xor, -1, -1, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Xor, -1, -1, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, XorAcross, -1, -1, {INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ZeroExtendWideningLower, -1, 1, {INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, ZeroExtendWideningUpper, -1, 1, {INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) #define LAST_NI_Sve NI_Sve_ZipLow // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Special intrinsics that are generated during importing or lowering +// SVE2 Intrinsics +#define FIRST_NI_Sve2 NI_Sve2_AbsoluteDifferenceAdd +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAdd, -1, 3, {INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAddWideningLower, -1, 3, {INS_invalid, INS_invalid, INS_sve_sabalb, INS_sve_uabalb, INS_sve_sabalb, INS_sve_uabalb, INS_sve_sabalb, INS_sve_uabalb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAddWideningUpper, -1, 3, {INS_invalid, INS_invalid, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLower, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningUpper, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, BitwiseClearXor, -1, 3, {INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, {INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, -1, 3, {INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, BitwiseSelectRightInverted, -1, 3, {INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftArithmeticRounded, -1, -1, {INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ShiftArithmeticRoundedSaturate, -1, -1, {INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ShiftArithmeticSaturate, -1, -1, {INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ShiftLeftAndInsert, -1, 3, {INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, Xor, -1, 3, {INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, XorRotateRight, -1, 3, {INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) +#define LAST_NI_Sve2 NI_Sve2_XorRotateRight +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// Special intrinsics that are generated during importing or lowering #define SPECIAL_NI_Sve NI_Sve_ConditionalExtractAfterLastActiveElementScalar HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementScalar, 0, 3, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementScalar, 0, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) @@ -325,7 +351,19 @@ HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCountScalar, HARDWARE_INTRINSIC(Sve, StoreAndZipx2, -1, 3, {INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(Sve, StoreAndZipx3, -1, 3, {INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(Sve, StoreAndZipx4, -1, 3, {INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) - +// Predicate variants of intrinsics, these are specialized for operating on TYP_MASK type values. +HARDWARE_INTRINSIC(Sve, And_Predicates, -1, 2, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, BitwiseClear_Predicates, -1, 2, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Or_Predicates, -1, 2, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Xor_Predicates, -1, 2, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ConditionalSelect_Predicates, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ZipHigh_Predicates, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, ZipLow_Predicates, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, UnzipEven_Predicates, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, UnzipOdd_Predicates, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, TransposeEven_Predicates, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, TransposeOdd_Predicates, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, ReverseElement_Predicates, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 5d27ce50f49d..b0e67846b9a7 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -24,11 +24,12 @@ // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector128 Intrinsics #define FIRST_NI_Vector128 NI_Vector128_Abs HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, AddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, As, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -62,8 +63,8 @@ HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -106,18 +107,22 @@ HARDWARE_INTRINSIC(Vector128, Min, HARDWARE_INTRINSIC(Vector128, MinNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, SubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, ToVector512, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector128, ToVector512, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -129,7 +134,7 @@ HARDWARE_INTRINSIC(Vector128, get_Zero, HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) @@ -150,6 +155,7 @@ HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, // Vector256 Intrinsics #define FIRST_NI_Vector256 NI_Vector256_Abs HARDWARE_INTRINSIC(Vector256, Abs, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, AddSaturate, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, As, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, AsByte, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) @@ -179,8 +185,8 @@ HARDWARE_INTRINSIC(Vector256, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, ConvertToUInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, CreateSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, Equals, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -189,7 +195,7 @@ HARDWARE_INTRINSIC(Vector256, ExtractMostSignificantBits, HARDWARE_INTRINSIC(Vector256, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, FusedMultiplyAdd, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(Vector256, GetUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, GreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, GreaterThanAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -225,17 +231,21 @@ HARDWARE_INTRINSIC(Vector256, Min, HARDWARE_INTRINSIC(Vector256, MinNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, NarrowWithSaturation, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector256, ShuffleNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector256, ShuffleNativeFallback, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, StoreUnsafe, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, SubtractSaturate, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Sum, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ToVector512, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ToVector512, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(Vector256, Truncate, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, WidenLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, WidenUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -249,7 +259,7 @@ HARDWARE_INTRINSIC(Vector256, get_Zero, HARDWARE_INTRINSIC(Vector256, op_Addition, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, op_BitwiseAnd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, op_BitwiseOr, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) @@ -270,6 +280,7 @@ HARDWARE_INTRINSIC(Vector256, op_UnsignedRightShift, // Vector512 Intrinsics #define FIRST_NI_Vector512 NI_Vector512_Abs HARDWARE_INTRINSIC(Vector512, Abs, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, AddSaturate, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, As, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, AsByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -299,17 +310,18 @@ HARDWARE_INTRINSIC(Vector512, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ConvertToUInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Dot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Equals, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, EqualsAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ExtractMostSignificantBits, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Floor, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, FusedMultiplyAdd, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, GetElement, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GetLower, 64, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector512, GetLower128, 64, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GetLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector512, GetLower128, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, GetUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, GreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, GreaterThanAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -345,15 +357,19 @@ HARDWARE_INTRINSIC(Vector512, Min, HARDWARE_INTRINSIC(Vector512, MinNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, NarrowWithSaturation, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector512, ShuffleNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector512, ShuffleNativeFallback, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, SubtractSaturate, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Sum, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Truncate, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, WidenLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, WidenUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -386,12 +402,126 @@ HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // X86Base Intrinsics -#define FIRST_NI_X86Base NI_X86Base_BitScanForward -HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -#define LAST_NI_X86Base NI_X86Base_Pause +#define FIRST_NI_X86Base NI_X86Base_Add +HARDWARE_INTRINSIC(X86Base, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_cvttsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Extract, 16, 2, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Insert, 16, 3, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, LoadFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(X86Base, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, Max, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, Min, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch0, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch2, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, PrefetchNonTemporal, 0, 1, {INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Reciprocal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalSqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalSqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftRightLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShuffleHigh, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShuffleLow, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, StoreFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti32, INS_movnti32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, Subtract, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, SubtractSaturate, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +#define LAST_NI_X86Base NI_X86Base_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -399,239 +529,18 @@ HARDWARE_INTRINSIC(X86Base, Pause, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // X86Base 64-bit-only Intrinsics #define FIRST_NI_X86Base_X64 NI_X86Base_X64_BitScanForward -HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) -#define LAST_NI_X86Base_X64 NI_X86Base_X64_DivRem - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SSE Intrinsics -#define FIRST_NI_SSE NI_SSE_Add -HARDWARE_INTRINSIC(SSE, Add, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(SSE, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE, MoveMask, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, MoveScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, Prefetch0, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(SSE, Prefetch1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(SSE, Prefetch2, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(SSE, PrefetchNonTemporal, 0, 1, {INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(SSE, Reciprocal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, ReciprocalScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, ReciprocalSqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, ReciprocalSqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, Shuffle, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(SSE, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, Subtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, UnpackHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, UnpackLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_SSE NI_SSE_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SSE 64-bit-only Intrinsics -#define FIRST_NI_SSE_X64 NI_SSE_X64_ConvertScalarToVector128Single -HARDWARE_INTRINSIC(SSE_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -#define LAST_NI_SSE_X64 NI_SSE_X64_ConvertToInt64WithTruncation - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SSE2 Intrinsics -#define FIRST_NI_SSE2 NI_SSE2_Add -HARDWARE_INTRINSIC(SSE2, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, And, 16, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, AndNot, 16, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Extract, 16, 2, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, Insert, 16, 3, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(SSE2, LoadAlignedVector128, 16, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, LoadFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(SSE2, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(SSE2, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, Max, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(SSE2, Min, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE2, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Or, 16, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, ShiftLeftLogical128BitLane, 16, 2, {INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, ShiftRightLogical128BitLane, 16, 2, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, ShuffleHigh, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, ShuffleLow, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreAligned, 16, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreAlignedNonTemporal, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, Subtract, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, SubtractSaturate, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE2, Xor, 16, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) -#define LAST_NI_SSE2 NI_SSE2_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SSE2 64-bit-only Intrinsics -#define FIRST_NI_SSE2_X64 NI_SSE2_X64_ConvertScalarToVector128Double -HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -#define LAST_NI_SSE2_X64 NI_SSE2_X64_StoreNonTemporal +HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_cvttsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti64, INS_movnti64, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +#define LAST_NI_X86Base_X64 NI_X86Base_X64_StoreNonTemporal // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -643,7 +552,7 @@ HARDWARE_INTRINSIC(SSE3, AddSubtract, HARDWARE_INTRINSIC(SSE3, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE3, HorizontalSubtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE3, LoadAndDuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE3, LoadDquVector128, 16, 1, {INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(SSE3, LoadDquVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(SSE3, MoveAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE3, MoveHighAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE3, MoveLowAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) @@ -655,15 +564,15 @@ HARDWARE_INTRINSIC(SSE3, MoveLowAndDuplicate, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSSE3 Intrinsics #define FIRST_NI_SSSE3 NI_SSSE3_Abs -HARDWARE_INTRINSIC(SSSE3, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSSE3, AlignRight, 16, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSSE3, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSSE3, AlignRight, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(SSSE3, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSSE3, HorizontalAddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSSE3, HorizontalSubtract, 16, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSSE3, HorizontalSubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSSE3, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSSE3, MultiplyHighRoundScale, 16, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSSE3, Shuffle, 16, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSSE3, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSSE3, MultiplyHighRoundScale, 16, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSSE3, Shuffle, 16, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSSE3, Sign, 16, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) #define LAST_NI_SSSE3 NI_SSSE3_Sign @@ -674,8 +583,8 @@ HARDWARE_INTRINSIC(SSSE3, Sign, // SSE41 Intrinsics #define FIRST_NI_SSE41 NI_SSE41_Blend HARDWARE_INTRINSIC(SSE41, Blend, 16, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE41, BlendVariable, 16, 3, {INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE41, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, BlendVariable, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(SSE41, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, CeilingScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE41, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int16, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) @@ -683,30 +592,30 @@ HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int32, HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int64, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(SSE41, DotProduct, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE41, Extract, 16, 2, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, FloorScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE41, Insert, 16, 3, {INS_pinsrb, INS_pinsrb, INS_invalid, INS_invalid, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(SSE41, LoadAlignedVector128NonTemporal, 16, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41, Max, 16, 2, {INS_pmaxsb, INS_invalid, INS_invalid, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE41, Min, 16, 2, {INS_pminsb, INS_invalid, INS_invalid, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE41, LoadAlignedVector128NonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(SSE41, Max, 16, 2, {INS_pmaxsb, INS_invalid, INS_invalid, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE41, Min, 16, 2, {INS_pminsb, INS_invalid, INS_invalid, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(SSE41, MinHorizontal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_phminposuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE41, MultipleSumAbsoluteDifferences, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE41, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE41, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE41, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(SSE41, RoundCurrentDirection, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE41, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE41, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE41, RoundCurrentDirection, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, RoundCurrentDirectionScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, RoundToNearestInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, RoundToNearestInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, RoundToNearestIntegerScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, RoundToNegativeInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, RoundToPositiveInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, RoundToZeroScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, TestC, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE41, TestNotZAndNotC, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE41, TestZ, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(SSE41, TestC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(SSE41, TestNotZAndNotC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(SSE41, TestZ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) #define LAST_NI_SSE41 NI_SSE41_TestZ // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -745,16 +654,16 @@ HARDWARE_INTRINSIC(SSE42_X64, Crc32, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX Intrinsics #define FIRST_NI_AVX NI_AVX_Add -HARDWARE_INTRINSIC(AVX, Add, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Add, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX, AddSubtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) HARDWARE_INTRINSIC(AVX, Blend, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, BlendVariable, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vblendvps, INS_vblendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_vbroadcastf128}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf32x4, INS_vbroadcastf32x4}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, Compare, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) @@ -769,54 +678,54 @@ HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_IMM, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, Divide, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, Divide, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, DotProduct, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, DuplicateEvenIndexed, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, DuplicateOddIndexed, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ExtractVector128, 32, 2, {INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, HorizontalAdd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, HorizontalSubtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, InsertVector128, 32, 3, {INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, LoadAlignedVector256, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, LoadDquVector256, 32, 1, {INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, InsertVector128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, LoadAlignedVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, LoadDquVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(AVX, LoadVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(AVX, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) HARDWARE_INTRINSIC(AVX, MoveMask, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, Permute, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, Permute, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX, Permute2x128, 32, 3, {INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, PermuteVar, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, PermuteVar, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, Reciprocal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, ReciprocalSqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, RoundCurrentDirection, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, RoundToNearestInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, RoundToNegativeInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, RoundToPositiveInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, RoundToZero, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, Shuffle, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, RoundCurrentDirection, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToNearestInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToNegativeInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToPositiveInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToZero, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Shuffle, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, Store, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX, StoreAligned, 32, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX, StoreAlignedNonTemporal, 32, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX, Subtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, TestC, -1, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, TestNotZAndNotC, -1, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, TestZ, -1, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, UnpackHigh, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, UnpackLow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, Subtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, TestC, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, TestNotZAndNotC, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, TestZ, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, UnpackHigh, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, UnpackLow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) #define LAST_NI_AVX NI_AVX_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -825,27 +734,27 @@ HARDWARE_INTRINSIC(AVX, Xor, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX2 Intrinsics #define FIRST_NI_AVX2 NI_AVX2_Abs -HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, And, 32, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, AndNot, 32, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, Average, 32, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, Average, 32, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, Blend, -1, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_vpblendd, INS_vpblendd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, BlendVariable, 32, 3, {INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, BlendVariable, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector128, 16, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_movddup}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector256, 32, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, 32, 1, {INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(AVX2, CompareEqual, 32, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(AVX2, GatherMaskVector128, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, GatherMaskVector256, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) @@ -854,43 +763,43 @@ HARDWARE_INTRINSIC(AVX2, HorizontalAdd, HARDWARE_INTRINSIC(AVX2, HorizontalAddSaturate, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, HorizontalSubtract, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, HorizontalSubtractSaturate, 32, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, InsertVector128, 32, 3, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, LoadAlignedVector256NonTemporal, 32, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, InsertVector128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, LoadAlignedVector256NonTemporal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(AVX2, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Max, 32, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, Min, 32, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, Max, 32, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, Min, 32, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, MoveMask, 32, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, MultipleSumAbsoluteDifferences, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent, 32, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, Or, 32, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent, 32, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, Permute2x128, 32, 3, {INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Permute4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, PermuteVar8x32, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical, 32, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical128BitLane, 32, 2, {INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, ShiftRightArithmetic, 32, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_vpsravd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogical, 32, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogical128BitLane, 32, 2, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, Shuffle, 32, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_MaybeIMM) -HARDWARE_INTRINSIC(AVX2, ShuffleHigh, 32, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, ShuffleLow, 32, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Permute4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, PermuteVar8x32, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical, 32, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical128BitLane, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, ShiftRightArithmetic, 32, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_vpsravd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogical, 32, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogical128BitLane, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, Shuffle, 32, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM) +HARDWARE_INTRINSIC(AVX2, ShuffleHigh, 32, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShuffleLow, 32, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2, Sign, 32, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Subtract, 32, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, SubtractSaturate, 32, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Subtract, 32, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, SubtractSaturate, 32, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, SumAbsoluteDifferences, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) #define LAST_NI_AVX2 NI_AVX2_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -898,362 +807,210 @@ HARDWARE_INTRINSIC(AVX2, Xor, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512F Intrinsics -#define FIRST_NI_AVX512F NI_AVX512F_Abs -HARDWARE_INTRINSIC(AVX512F, Abs, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Add, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, AlignRight32, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, AlignRight64, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, And, 64, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, BlendVariable, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX512F, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_invalid, INS_vbroadcastf64x4}, HW_Category_SimpleSIMD, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX512F, Compare, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareGreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareLessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareLessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotLessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareOrdered, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareUnordered, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Byte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128SByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Double, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int64, 64, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt64, 64, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, Divide, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, ExtractVector128, 64, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ExtractVector256, 64, 2, {INS_vextracti32x8, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf64x4, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, Fixup, 64, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddSubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, GetExponent, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, GetMantissa, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, InsertVector128, 64, 3, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, {INS_vinserti32x8, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, Max, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX512F, Min, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX512F, Multiply, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Reciprocal14, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, ReciprocalSqrt14, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, RotateLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, RotateLeftVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, RotateRight, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, RotateRightVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, RoundScale, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, Scale, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogicalVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmetic, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmeticVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftRightLogicalVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Shuffle4x128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Sqrt, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512F, Subtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, TernaryLogic, 64, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, UnpackHigh, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, UnpackLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) -#define LAST_NI_AVX512F NI_AVX512F_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512F.VL Intrinsics -#define FIRST_NI_AVX512F_VL NI_AVX512F_VL_Abs -HARDWARE_INTRINSIC(AVX512F_VL, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512F_VL, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, Scale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, TernaryLogic, -1, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -#define LAST_NI_AVX512F_VL NI_AVX512F_VL_TernaryLogic - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512F.X64 Intrinsics -#define FIRST_NI_AVX512F_X64 NI_AVX512F_X64_ConvertScalarToVector128Double -HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -#define LAST_NI_AVX512F_X64 NI_AVX512F_X64_ConvertToUInt64WithTruncation - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512BW Intrinsics -#define FIRST_NI_AVX512BW NI_AVX512BW_Abs -HARDWARE_INTRINSIC(AVX512BW, Abs, 64, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, Add, 64, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, AddSaturate, 64, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, BlendVariable, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, CompareEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareGreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareGreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareLessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareLessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareNotEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256Byte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512BW, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, Max, 64, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, Min, 64, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, MultiplyAddAdjacent, 64, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, MultiplyHigh, 64, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, MultiplyHighRoundScale, 64, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, PackSignedSaturate, 64, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, PackUnsignedSaturate, 64, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, PermuteVar32x16, 64, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, PermuteVar32x16x2, 64, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical128BitLane, 64, 2, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogicalVariable, 64, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmetic, 64, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmeticVariable, 64, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical128BitLane, 64, 2, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogicalVariable, 64, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, Shuffle, 64, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, ShuffleHigh, 64, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, ShuffleLow, 64, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512BW, Subtract, 64, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, SubtractSaturate, 64, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, SumAbsoluteDifferences, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, SumAbsoluteDifferencesInBlock32, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, UnpackHigh, 64, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW, UnpackLow, 64, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVX512BW NI_AVX512BW_UnpackLow - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512BW.VL Intrinsics -#define FIRST_NI_AVX512BW_VL NI_AVX512BW_VL_CompareGreaterThan -HARDWARE_INTRINSIC(AVX512BW_VL, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16 , 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512BW_VL, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVX512BW_VL NI_AVX512BW_VL_SumAbsoluteDifferencesInBlock32 +#define FIRST_NI_AVX512 NI_AVX512_Abs +HARDWARE_INTRINSIC(AVX512, Abs, -1, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, Add, 64, -1, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, AddSaturate, 64, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, BlendVariable, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_vbroadcastf32x4, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_vbroadcastf32x8, INS_vbroadcastf64x4}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Compare, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareOrdered, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareUnordered, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi32, INS_vcvtsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Byte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Single, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Divide, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti64x2, INS_vextracti64x2, INS_vextractf32x4, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf32x8, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddSubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti64x2, INS_vinserti64x2, INS_vinsertf32x4, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf32x8, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512NonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, Max, -1, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512, Min, -1, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512, Multiply, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, MultiplyAddAdjacent, 64, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MultiplyHigh, 64, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, MultiplyHighRoundScale, 64, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, PackSignedSaturate, 64, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PackUnsignedSaturate, 64, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x32x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar32x16, 64, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar32x16x2, 64, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x16 , 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x64x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, HW_Category_IMM, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Scale, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical128BitLane, 64, 2, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogical128BitLane, 64, 2, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Shuffle, 64, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Shuffle4x128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShuffleHigh, 64, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShuffleLow, 64, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Sqrt, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Subtract, 64, -1, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SubtractSaturate, 64, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferences, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, TernaryLogic, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, UnpackHigh, 64, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, UnpackLow, 64, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +#define LAST_NI_AVX512 NI_AVX512_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512CD Intrinsics -#define FIRST_NI_AVX512CD NI_AVX512CD_DetectConflicts -HARDWARE_INTRINSIC(AVX512CD, DetectConflicts, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512CD, LeadingZeroCount, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVX512CD NI_AVX512CD_LeadingZeroCount - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512CD.VL Intrinsics -#define FIRST_NI_AVX512CD_VL NI_AVX512CD_VL_DetectConflicts -HARDWARE_INTRINSIC(AVX512CD_VL, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512CD_VL, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVX512CD_VL NI_AVX512CD_VL_LeadingZeroCount - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512DQ Intrinsics -#define FIRST_NI_AVX512DQ NI_AVX512DQ_And -HARDWARE_INTRINSIC(AVX512DQ, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_invalid, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_invalid, INS_invalid, INS_vbroadcastf32x8, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector256Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti64x2, INS_vextracti64x2, INS_invalid, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_invalid, INS_invalid, INS_vextractf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti64x2, INS_vinserti64x2, INS_invalid, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, Range, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512DQ, Reduce, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512DQ, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_AVX512DQ NI_AVX512DQ_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512DQ.VL Intrinsics -#define FIRST_NI_AVX512DQ_VL NI_AVX512DQ_VL_BroadcastPairScalarToVector128 -HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512DQ_VL, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVX512DQ_VL NI_AVX512DQ_VL_Reduce +// AVX512.X64 Intrinsics +#define FIRST_NI_AVX512_X64 NI_AVX512_X64_ConvertScalarToVector128Double +HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi64, INS_vcvtsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +#define LAST_NI_AVX512_X64 NI_AVX512_X64_ConvertToUInt64WithTruncation // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -1261,227 +1018,35 @@ HARDWARE_INTRINSIC(AVX512DQ_VL, Reduce, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512VBMI Intrinsics #define FIRST_NI_AVX512VBMI NI_AVX512VBMI_MultiShift -HARDWARE_INTRINSIC(AVX512VBMI, MultiShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512VBMI, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) #define LAST_NI_AVX512VBMI NI_AVX512VBMI_PermuteVar64x8x2 -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512VBMI.VL Intrinsics -#define FIRST_NI_AVX512VBMI_VL NI_AVX512VBMI_VL_MultiShift -HARDWARE_INTRINSIC(AVX512VBMI_VL, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVX512VBMI_VL NI_AVX512VBMI_VL_PermuteVar32x8x2 - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX10V1 Intrinsics -#define FIRST_NI_AVX10v1 NI_AVX10v1_Abs -HARDWARE_INTRINSIC(AVX10v1, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256UInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256UInt32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar8x16, 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, Scale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX10v1, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, TernaryLogic, -1, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt) -#define LAST_NI_AVX10v1 NI_AVX10v1_TernaryLogic - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX10V1_V512 Intrinsics -#define FIRST_NI_AVX10v1_V512 NI_AVX10v1_V512_And -HARDWARE_INTRINSIC(AVX10v1_V512, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_invalid, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_invalid, INS_invalid, INS_vbroadcastf32x8, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector256Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512Int64, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512UInt64, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, DetectConflicts, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti64x2, INS_vextracti64x2, INS_invalid, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_invalid, INS_invalid, INS_vextractf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti64x2, INS_vinserti64x2, INS_invalid, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, LeadingZeroCount, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, MultiShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, Range, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, Reduce, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_AVX10v1_V512 NI_AVX10v1_V512_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512F.X64 Intrinsics -#define FIRST_NI_AVX10v1_X64 NI_AVX10v1_X64_ConvertScalarToVector128Double -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -#define LAST_NI_AVX10v1_X64 NI_AVX10v1_X64_ConvertToUInt64WithTruncation - // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX10v2 Intrinsics -#define FIRST_NI_AVX10v2 NI_AVX10v2_Add -HARDWARE_INTRINSIC(AVX10v2, Add, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative |HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector128Int32, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector128Single, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector128UInt32, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256Double, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256Int32, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256Int64, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256Single, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256UInt32, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256UInt64, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2, Divide, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2, Multiply, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative |HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, Scale, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, Sqrt, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, Subtract, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -#define LAST_NI_AVX10v2 NI_AVX10v2_Subtract - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX10v2_V512 Intrinsics -#define FIRST_NI_AVX10v2_V512 NI_AVX10v2_V512_ConvertToByteWithSaturationAndZeroExtendToInt32 -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToByteWithSaturationAndZeroExtendToInt32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToSByteWithSaturationAndZeroExtendToInt32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorInt32WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorInt64WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorUInt32WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorUInt64WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, MinMax, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVX10v2_V512 NI_AVX10v2_V512_MultipleSumAbsoluteDifferences +#define FIRST_NI_AVX10v2 NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32 +HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_vmovd_simd, INS_vmovd_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +#define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -1565,14 +1130,14 @@ HARDWARE_INTRINSIC(BMI2_X64, ZeroHighBits, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // FMA Intrinsics #define FIRST_NI_FMA NI_FMA_MultiplyAdd -HARDWARE_INTRINSIC(FMA, MultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(FMA, MultiplyAddNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(FMA, MultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(FMA, MultiplyAddNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) HARDWARE_INTRINSIC(FMA, MultiplyAddNegatedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(FMA, MultiplyAddScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(FMA, MultiplyAddSubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(FMA, MultiplySubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(FMA, MultiplySubtractAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(FMA, MultiplySubtractNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(FMA, MultiplyAddSubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(FMA, MultiplySubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(FMA, MultiplySubtractAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(FMA, MultiplySubtractNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) HARDWARE_INTRINSIC(FMA, MultiplySubtractNegatedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(FMA, MultiplySubtractScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) #define LAST_NI_FMA NI_FMA_MultiplySubtractScalar @@ -1655,9 +1220,9 @@ HARDWARE_INTRINSIC(X86Serialize, Serialize, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // GFNI Intrinsics #define FIRST_NI_GFNI NI_GFNI_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransform, 16, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransformInverse, 16, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, 16, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransform, 16, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransformInverse, 16, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, 16, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI NI_GFNI_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -1666,9 +1231,9 @@ HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // GFNI Intrinsics #define FIRST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransform, 32, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransformInverse, 32, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, 32, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransform, 32, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransformInverse, 32, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, 32, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -1677,9 +1242,9 @@ HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // GFNI Intrinsics #define FIRST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransform, 64, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransformInverse, 64, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, 64, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransform, 64, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransformInverse, 64, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, 64, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -1687,46 +1252,43 @@ HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Special intrinsics that are generated during lowering -HARDWARE_INTRINSIC(SSE, COMISS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, UCOMISS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, COMISD, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, UCOMISD, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, COMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, UCOMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(EVEX, KORTEST, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, KTEST, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, PTESTM, 0, 2, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, PTESTNM, 0, 2, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) - -HARDWARE_INTRINSIC(EVEX, AddMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, AndMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, AndNotMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, BlendVariableMask, -1, 3, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareEqualMask, -1, 2, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(EVEX, CompareGreaterThanMask, -1, 2, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareNotEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(EVEX, CompareNotGreaterThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareNotGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareNotLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareNotLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareOrderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareUnorderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, op_EqualityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) -HARDWARE_INTRINSIC(EVEX, op_InequalityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) -HARDWARE_INTRINSIC(EVEX, OrMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, ShiftLeftMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(EVEX, ShiftRightMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(EVEX, XorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, XnorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, KORTEST, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, KTEST, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, PTESTM, 0, 2, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, PTESTNM, 0, 2, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, AddMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, AndMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, AndNotMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, BlendVariableMask, -1, 3, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, CompareMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareEqualMask, -1, 2, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanMask, -1, 2, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareOrderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareUnorderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, op_EqualityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, op_InequalityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, OrMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ShiftLeftMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ShiftRightMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, XorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, XnorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC #undef HARDWARE_INTRINSIC diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 838aafa85ae5..feddb6bafbbf 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -20,10 +20,6 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) { case InstructionSet_X86Base: return InstructionSet_X86Base_X64; - case InstructionSet_SSE: - return InstructionSet_SSE_X64; - case InstructionSet_SSE2: - return InstructionSet_SSE2_X64; case InstructionSet_SSE3: return InstructionSet_SSE3_X64; case InstructionSet_SSSE3: @@ -32,46 +28,46 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_SSE41_X64; case InstructionSet_SSE42: return InstructionSet_SSE42_X64; + case InstructionSet_POPCNT: + return InstructionSet_POPCNT_X64; case InstructionSet_AVX: return InstructionSet_AVX_X64; case InstructionSet_AVX2: return InstructionSet_AVX2_X64; - case InstructionSet_AVX512BW: - return InstructionSet_AVX512BW_X64; - case InstructionSet_AVX512CD: - return InstructionSet_AVX512CD_X64; - case InstructionSet_AVX512DQ: - return InstructionSet_AVX512DQ_X64; - case InstructionSet_AVX512F: - return InstructionSet_AVX512F_X64; - case InstructionSet_AVX512VBMI: - return InstructionSet_AVX512VBMI_X64; - case InstructionSet_AVX10v1: - return InstructionSet_AVX10v1_X64; - case InstructionSet_AVX10v1_V512: - return InstructionSet_AVX10v1_V512_X64; - case InstructionSet_AVX10v2: - return InstructionSet_AVX10v2_X64; - case InstructionSet_AVX10v2_V512: - return InstructionSet_AVX10v2_V512_X64; - case InstructionSet_AVXVNNI: - return InstructionSet_AVXVNNI_X64; - case InstructionSet_AES: - return InstructionSet_AES_X64; case InstructionSet_BMI1: return InstructionSet_BMI1_X64; case InstructionSet_BMI2: return InstructionSet_BMI2_X64; case InstructionSet_FMA: return InstructionSet_FMA_X64; - case InstructionSet_GFNI: - return InstructionSet_GFNI_X64; case InstructionSet_LZCNT: return InstructionSet_LZCNT_X64; + case InstructionSet_AVX512: + return InstructionSet_AVX512_X64; + case InstructionSet_AVX512VBMI: + return InstructionSet_AVX512VBMI_X64; + case InstructionSet_AVX512v3: + return InstructionSet_AVX512v3_X64; + case InstructionSet_AVX10v1: + return InstructionSet_AVX10v1_X64; + case InstructionSet_AVX10v2: + return InstructionSet_AVX10v2_X64; + case InstructionSet_AES: + return InstructionSet_AES_X64; case InstructionSet_PCLMULQDQ: return InstructionSet_PCLMULQDQ_X64; - case InstructionSet_POPCNT: - return InstructionSet_POPCNT_X64; + case InstructionSet_AVX512VP2INTERSECT: + return InstructionSet_AVX512VP2INTERSECT_X64; + case InstructionSet_AVXIFMA: + return InstructionSet_AVXIFMA_X64; + case InstructionSet_AVXVNNI: + return InstructionSet_AVXVNNI_X64; + case InstructionSet_GFNI: + return InstructionSet_GFNI_X64; + case InstructionSet_SHA: + return InstructionSet_SHA_X64; + case InstructionSet_WAITPKG: + return InstructionSet_WAITPKG_X64; case InstructionSet_X86Serialize: return InstructionSet_X86Serialize_X64; default: @@ -91,18 +87,19 @@ static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) { switch (isa) { - case InstructionSet_AVX512BW: - return InstructionSet_AVX512BW_VL; - case InstructionSet_AVX512CD: - return InstructionSet_AVX512CD_VL; - case InstructionSet_AVX512DQ: - return InstructionSet_AVX512DQ_VL; - case InstructionSet_AVX512F: - return InstructionSet_AVX512F_VL; + case InstructionSet_AVX512: case InstructionSet_AVX512VBMI: - return InstructionSet_AVX512VBMI_VL; + case InstructionSet_AVX512v3: + case InstructionSet_AVX10v1: + { + // These nested ISAs aren't tracked by the JIT support + return isa; + } + default: + { return InstructionSet_NONE; + } } } @@ -118,12 +115,25 @@ static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) { switch (isa) { + case InstructionSet_AES: + { + return InstructionSet_AES_V256; + } + case InstructionSet_GFNI: + { return InstructionSet_GFNI_V256; + } + case InstructionSet_PCLMULQDQ: + { return InstructionSet_PCLMULQDQ_V256; + } + default: + { return InstructionSet_NONE; + } } } @@ -140,19 +150,33 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) switch (isa) { case InstructionSet_AVX10v1: - return InstructionSet_AVX10v1_V512; case InstructionSet_AVX10v1_X64: - return InstructionSet_AVX10v1_V512_X64; case InstructionSet_AVX10v2: - return InstructionSet_AVX10v2_V512; case InstructionSet_AVX10v2_X64: - return InstructionSet_AVX10v2_V512_X64; + { + // These nested ISAs aren't tracked by the JIT support + return isa; + } + + case InstructionSet_AES: + { + return InstructionSet_AES_V512; + } + case InstructionSet_GFNI: + { return InstructionSet_GFNI_V512; + } + case InstructionSet_PCLMULQDQ: + { return InstructionSet_PCLMULQDQ_V512; + } + default: + { return InstructionSet_NONE; + } } } @@ -170,11 +194,11 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) if (className[0] == 'A') { - if (strcmp(className, "Aes") == 0) + if (strcmp(className + 1, "es") == 0) { return InstructionSet_AES; } - else if (strncmp(className, "Avx", 3) == 0) + else if (strncmp(className + 1, "vx", 2) == 0) { if (className[3] == '\0') { @@ -197,27 +221,66 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) } else if (strncmp(className + 3, "512", 3) == 0) { - if (strcmp(className + 6, "BW") == 0) - { - return InstructionSet_AVX512BW; - } - else if (strcmp(className + 6, "CD") == 0) + if (className[6] == 'B') { - return InstructionSet_AVX512CD; + if (strcmp(className + 7, "italg") == 0) + { + return InstructionSet_AVX512v3; + } + else if (strcmp(className + 7, "f16") == 0) + { + return InstructionSet_AVX10v1; + } + else if (strcmp(className + 7, "W") == 0) + { + return InstructionSet_AVX512; + } } - else if (strcmp(className + 6, "DQ") == 0) + else if ((strcmp(className + 6, "CD") == 0) || (strcmp(className + 6, "DQ") == 0)) { - return InstructionSet_AVX512DQ; + return InstructionSet_AVX512; } - else if (strcmp(className + 6, "F") == 0) + else if (className[6] == 'F') { - return InstructionSet_AVX512F; + if (className[7] == '\0') + { + return InstructionSet_AVX512; + } + else if (strcmp(className + 7, "p16") == 0) + { + return InstructionSet_AVX10v1; + } } - else if (strcmp(className + 6, "Vbmi") == 0) + else if (className[6] == 'V') { - return InstructionSet_AVX512VBMI; + if (strncmp(className + 7, "bmi", 3) == 0) + { + if (className[10] == '\0') + { + return InstructionSet_AVX512VBMI; + } + else if (strcmp(className + 10, "2") == 0) + { + return InstructionSet_AVX512v3; + } + } + else if (className[7] == 'p') + { + if (strcmp(className + 8, "p2intersect") == 0) + { + return InstructionSet_AVX512VP2INTERSECT; + } + else if (strcmp(className + 8, "opcntdq") == 0) + { + return InstructionSet_AVX512v3; + } + } } } + else if (strcmp(className + 3, "Ifma") == 0) + { + return InstructionSet_AVXIFMA; + } else if (strcmp(className + 3, "Vnni") == 0) { return InstructionSet_AVXVNNI; @@ -226,7 +289,7 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) } else if (className[0] == 'B') { - if (strncmp(className, "Bmi", 3) == 0) + if (strncmp(className + 1, "mi", 2) == 0) { if (strcmp(className + 3, "1") == 0) { @@ -240,47 +303,51 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) } else if (className[0] == 'F') { - if (strcmp(className, "Fma") == 0) + if (strcmp(className + 1, "ma") == 0) { return InstructionSet_FMA; } + else if (strcmp(className + 1, "16c") == 0) + { + return InstructionSet_AVX2; + } } else if (className[0] == 'G') { - if (strcmp(className, "Gfni") == 0) + if (strcmp(className + 1, "fni") == 0) { return InstructionSet_GFNI; } } else if (className[0] == 'L') { - if (strcmp(className, "Lzcnt") == 0) + if (strcmp(className + 1, "zcnt") == 0) { return InstructionSet_LZCNT; } } else if (className[0] == 'P') { - if (strcmp(className, "Pclmulqdq") == 0) + if (strcmp(className + 1, "clmulqdq") == 0) { return InstructionSet_PCLMULQDQ; } - else if (strcmp(className, "Popcnt") == 0) + else if (strcmp(className + 1, "opcnt") == 0) { return InstructionSet_POPCNT; } } else if (className[0] == 'S') { - if (strncmp(className, "Sse", 3) == 0) + if (strcmp(className + 1, "ha") == 0) { - if (className[3] == '\0') - { - return InstructionSet_SSE; - } - else if (strcmp(className + 3, "2") == 0) + return InstructionSet_SHA; + } + else if (strncmp(className + 1, "se", 2) == 0) + { + if ((className[3] == '\0') || (strcmp(className + 3, "2") == 0)) { - return InstructionSet_SSE2; + return InstructionSet_X86Base; } else if (strcmp(className + 3, "3") == 0) { @@ -295,14 +362,14 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) return InstructionSet_SSE42; } } - else if (strcmp(className, "Ssse3") == 0) + else if (strcmp(className + 1, "sse3") == 0) { return InstructionSet_SSSE3; } } else if (className[0] == 'V') { - if (strncmp(className, "Vector", 6) == 0) + if (strncmp(className + 1, "ector", 5) == 0) { if (strncmp(className + 6, "128", 3) == 0) { @@ -326,12 +393,16 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) } } } - else if (strcmp(className, "VL") == 0) + else if (strcmp(className + 1, "L") == 0) { assert(!"VL.X64 support doesn't exist in the managed libraries and so is not yet implemented"); return InstructionSet_ILLEGAL; } } + else if (strcmp(className, "WaitPkg") == 0) + { + return InstructionSet_WAITPKG; + } else if (strncmp(className, "X86", 3) == 0) { if (strcmp(className + 3, "Base") == 0) @@ -425,11 +496,10 @@ int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id) { case NI_AVX_Compare: case NI_AVX_CompareScalar: - case NI_AVX512F_Compare: - case NI_EVEX_CompareMask: + case NI_AVX512_Compare: + case NI_AVX512_CompareMask: case NI_AVX10v2_MinMaxScalar: case NI_AVX10v2_MinMax: - case NI_AVX10v2_V512_MinMax: { assert(!HWIntrinsicInfo::HasFullRangeImm(id)); return 31; // enum FloatComparisonMode has 32 values @@ -444,17 +514,10 @@ int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id) return 8; } - case NI_AVX512F_GetMantissa: - case NI_AVX512F_GetMantissaScalar: - case NI_AVX512F_VL_GetMantissa: - case NI_AVX512DQ_Range: - case NI_AVX512DQ_RangeScalar: - case NI_AVX512DQ_VL_Range: - case NI_AVX10v1_GetMantissa: - case NI_AVX10v1_GetMantissaScalar: - case NI_AVX10v1_Range: - case NI_AVX10v1_RangeScalar: - case NI_AVX10v1_V512_Range: + case NI_AVX512_GetMantissa: + case NI_AVX512_GetMantissaScalar: + case NI_AVX512_Range: + case NI_AVX512_RangeScalar: { assert(!HWIntrinsicInfo::HasFullRangeImm(id)); return 15; @@ -604,13 +667,13 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { case FloatComparisonMode::OrderedEqualNonSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareEqualMask; + return NI_AVX512_CompareEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarEqual : NI_SSE2_CompareScalarEqual; + return NI_X86Base_CompareScalarEqual; } assert(intrinsic == NI_AVX_Compare); @@ -619,18 +682,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareEqual : NI_SSE2_CompareEqual; + return NI_X86Base_CompareEqual; } case FloatComparisonMode::OrderedGreaterThanSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareGreaterThanMask; + return NI_AVX512_CompareGreaterThanMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarGreaterThan : NI_SSE2_CompareScalarGreaterThan; + return NI_X86Base_CompareScalarGreaterThan; } assert(intrinsic == NI_AVX_Compare); @@ -639,19 +702,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareGreaterThan; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareGreaterThan : NI_SSE2_CompareGreaterThan; + return NI_X86Base_CompareGreaterThan; } case FloatComparisonMode::OrderedGreaterThanOrEqualSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareGreaterThanOrEqualMask; + return NI_AVX512_CompareGreaterThanOrEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarGreaterThanOrEqual - : NI_SSE2_CompareScalarGreaterThanOrEqual; + return NI_X86Base_CompareScalarGreaterThanOrEqual; } assert(intrinsic == NI_AVX_Compare); @@ -660,18 +722,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareGreaterThanOrEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareGreaterThanOrEqual : NI_SSE2_CompareGreaterThanOrEqual; + return NI_X86Base_CompareGreaterThanOrEqual; } case FloatComparisonMode::OrderedLessThanSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareLessThanMask; + return NI_AVX512_CompareLessThanMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarLessThan : NI_SSE2_CompareScalarLessThan; + return NI_X86Base_CompareScalarLessThan; } assert(intrinsic == NI_AVX_Compare); @@ -680,19 +742,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareLessThan; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareLessThan : NI_SSE2_CompareLessThan; + return NI_X86Base_CompareLessThan; } case FloatComparisonMode::OrderedLessThanOrEqualSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareLessThanOrEqualMask; + return NI_AVX512_CompareLessThanOrEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarLessThanOrEqual - : NI_SSE2_CompareScalarLessThanOrEqual; + return NI_X86Base_CompareScalarLessThanOrEqual; } assert(intrinsic == NI_AVX_Compare); @@ -701,18 +762,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareLessThanOrEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareLessThanOrEqual : NI_SSE2_CompareLessThanOrEqual; + return NI_X86Base_CompareLessThanOrEqual; } case FloatComparisonMode::UnorderedNotEqualNonSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotEqualMask; + return NI_AVX512_CompareNotEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotEqual : NI_SSE2_CompareScalarNotEqual; + return NI_X86Base_CompareScalarNotEqual; } assert(intrinsic == NI_AVX_Compare); @@ -721,19 +782,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotEqual : NI_SSE2_CompareNotEqual; + return NI_X86Base_CompareNotEqual; } case FloatComparisonMode::UnorderedNotGreaterThanSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotGreaterThanMask; + return NI_AVX512_CompareNotGreaterThanMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotGreaterThan - : NI_SSE2_CompareScalarNotGreaterThan; + return NI_X86Base_CompareScalarNotGreaterThan; } assert(intrinsic == NI_AVX_Compare); @@ -742,19 +802,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotGreaterThan; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotGreaterThan : NI_SSE2_CompareNotGreaterThan; + return NI_X86Base_CompareNotGreaterThan; } case FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotGreaterThanOrEqualMask; + return NI_AVX512_CompareNotGreaterThanOrEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotGreaterThanOrEqual - : NI_SSE2_CompareScalarNotGreaterThanOrEqual; + return NI_X86Base_CompareScalarNotGreaterThanOrEqual; } assert(intrinsic == NI_AVX_Compare); @@ -763,19 +822,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotGreaterThanOrEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotGreaterThanOrEqual - : NI_SSE2_CompareNotGreaterThanOrEqual; + return NI_X86Base_CompareNotGreaterThanOrEqual; } case FloatComparisonMode::UnorderedNotLessThanSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotLessThanMask; + return NI_AVX512_CompareNotLessThanMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotLessThan : NI_SSE2_CompareScalarNotLessThan; + return NI_X86Base_CompareScalarNotLessThan; } assert(intrinsic == NI_AVX_Compare); @@ -784,19 +842,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotLessThan; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotLessThan : NI_SSE2_CompareNotLessThan; + return NI_X86Base_CompareNotLessThan; } case FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotLessThanOrEqualMask; + return NI_AVX512_CompareNotLessThanOrEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotLessThanOrEqual - : NI_SSE2_CompareScalarNotLessThanOrEqual; + return NI_X86Base_CompareScalarNotLessThanOrEqual; } assert(intrinsic == NI_AVX_Compare); @@ -805,18 +862,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotLessThanOrEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotLessThanOrEqual : NI_SSE2_CompareNotLessThanOrEqual; + return NI_X86Base_CompareNotLessThanOrEqual; } case FloatComparisonMode::OrderedNonSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareOrderedMask; + return NI_AVX512_CompareOrderedMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarOrdered : NI_SSE2_CompareScalarOrdered; + return NI_X86Base_CompareScalarOrdered; } assert(intrinsic == NI_AVX_Compare); @@ -825,18 +882,18 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareOrdered; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareOrdered : NI_SSE2_CompareOrdered; + return NI_X86Base_CompareOrdered; } case FloatComparisonMode::UnorderedNonSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareUnorderedMask; + return NI_AVX512_CompareUnorderedMask; } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarUnordered : NI_SSE2_CompareScalarUnordered; + return NI_X86Base_CompareScalarUnordered; } assert(intrinsic == NI_AVX_Compare); @@ -845,7 +902,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareUnordered; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareUnordered : NI_SSE2_CompareUnordered; + return NI_X86Base_CompareUnordered; } default: @@ -865,87 +922,8 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic // true if isa is supported; otherwise, false bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) { - switch (isa) - { - // These ISAs are fully implemented - case InstructionSet_AES: - case InstructionSet_AES_X64: - case InstructionSet_AVX: - case InstructionSet_AVX_X64: - case InstructionSet_AVX2: - case InstructionSet_AVX2_X64: - case InstructionSet_AVX512F: - case InstructionSet_AVX512F_VL: - case InstructionSet_AVX512F_X64: - case InstructionSet_AVX512BW: - case InstructionSet_AVX512BW_VL: - case InstructionSet_AVX512BW_X64: - case InstructionSet_AVX512CD: - case InstructionSet_AVX512CD_VL: - case InstructionSet_AVX512CD_X64: - case InstructionSet_AVX512DQ: - case InstructionSet_AVX512DQ_VL: - case InstructionSet_AVX512DQ_X64: - case InstructionSet_AVX512VBMI: - case InstructionSet_AVX512VBMI_VL: - case InstructionSet_AVX512VBMI_X64: - case InstructionSet_AVXVNNI: - case InstructionSet_AVXVNNI_X64: - case InstructionSet_BMI1: - case InstructionSet_BMI1_X64: - case InstructionSet_BMI2: - case InstructionSet_BMI2_X64: - case InstructionSet_FMA: - case InstructionSet_FMA_X64: - case InstructionSet_LZCNT: - case InstructionSet_LZCNT_X64: - case InstructionSet_PCLMULQDQ: - case InstructionSet_PCLMULQDQ_X64: - case InstructionSet_PCLMULQDQ_V256: - case InstructionSet_PCLMULQDQ_V512: - case InstructionSet_POPCNT: - case InstructionSet_POPCNT_X64: - case InstructionSet_SSE: - case InstructionSet_SSE_X64: - case InstructionSet_SSE2: - case InstructionSet_SSE2_X64: - case InstructionSet_SSE3: - case InstructionSet_SSE3_X64: - case InstructionSet_SSSE3: - case InstructionSet_SSSE3_X64: - case InstructionSet_SSE41: - case InstructionSet_SSE41_X64: - case InstructionSet_SSE42: - case InstructionSet_SSE42_X64: - case InstructionSet_Vector128: - case InstructionSet_Vector256: - case InstructionSet_Vector512: - case InstructionSet_X86Base: - case InstructionSet_X86Base_X64: - case InstructionSet_X86Serialize: - case InstructionSet_X86Serialize_X64: - case InstructionSet_AVX10v1: - case InstructionSet_AVX10v1_X64: - case InstructionSet_AVX10v1_V512: - case InstructionSet_AVX10v1_V512_X64: - case InstructionSet_AVX10v2: - case InstructionSet_AVX10v2_X64: - case InstructionSet_AVX10v2_V512: - case InstructionSet_AVX10v2_V512_X64: - case InstructionSet_EVEX: - case InstructionSet_GFNI: - case InstructionSet_GFNI_X64: - case InstructionSet_GFNI_V256: - case InstructionSet_GFNI_V512: - { - return true; - } - - default: - { - return false; - } - } + // All ISAs are currently fully implemented + return true; } //------------------------------------------------------------------------ @@ -996,12 +974,10 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim { switch (id) { - case NI_SSE_CompareEqual: - case NI_SSE_CompareScalarEqual: - case NI_SSE2_CompareEqual: - case NI_SSE2_CompareScalarEqual: + case NI_X86Base_CompareEqual: + case NI_X86Base_CompareScalarEqual: case NI_AVX_CompareEqual: - case NI_EVEX_CompareEqualMask: + case NI_AVX512_CompareEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1014,12 +990,10 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareGreaterThan: - case NI_SSE_CompareScalarGreaterThan: - case NI_SSE2_CompareGreaterThan: - case NI_SSE2_CompareScalarGreaterThan: + case NI_X86Base_CompareGreaterThan: + case NI_X86Base_CompareScalarGreaterThan: case NI_AVX_CompareGreaterThan: - case NI_EVEX_CompareGreaterThanMask: + case NI_AVX512_CompareGreaterThanMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1029,7 +1003,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); return static_cast(FloatComparisonMode::OrderedGreaterThanSignaling); } - else if ((id == NI_EVEX_CompareGreaterThanMask) && varTypeIsUnsigned(simdBaseType)) + else if ((id == NI_AVX512_CompareGreaterThanMask) && varTypeIsUnsigned(simdBaseType)) { // TODO-XARCH-CQ: Allow the other integer paths to use the EVEX encoding return static_cast(IntComparisonMode::GreaterThan); @@ -1037,18 +1011,16 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareLessThan: - case NI_SSE_CompareScalarLessThan: - case NI_SSE2_CompareLessThan: - case NI_SSE2_CompareScalarLessThan: + case NI_X86Base_CompareLessThan: + case NI_X86Base_CompareScalarLessThan: case NI_AVX_CompareLessThan: - case NI_EVEX_CompareLessThanMask: + case NI_AVX512_CompareLessThanMask: { if (varTypeIsFloating(simdBaseType)) { return static_cast(FloatComparisonMode::OrderedLessThanSignaling); } - else if (id == NI_EVEX_CompareLessThanMask) + else if (id == NI_AVX512_CompareLessThanMask) { // TODO-XARCH-CQ: Allow the other integer paths to use the EVEX encoding return static_cast(IntComparisonMode::LessThan); @@ -1056,12 +1028,10 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE_CompareScalarGreaterThanOrEqual: - case NI_SSE2_CompareGreaterThanOrEqual: - case NI_SSE2_CompareScalarGreaterThanOrEqual: + case NI_X86Base_CompareGreaterThanOrEqual: + case NI_X86Base_CompareScalarGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: - case NI_EVEX_CompareGreaterThanOrEqualMask: + case NI_AVX512_CompareGreaterThanOrEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1073,18 +1043,16 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareGreaterThanOrEqualMask); + assert(id == NI_AVX512_CompareGreaterThanOrEqualMask); return static_cast(IntComparisonMode::GreaterThanOrEqual); } break; } - case NI_SSE_CompareLessThanOrEqual: - case NI_SSE_CompareScalarLessThanOrEqual: - case NI_SSE2_CompareLessThanOrEqual: - case NI_SSE2_CompareScalarLessThanOrEqual: + case NI_X86Base_CompareLessThanOrEqual: + case NI_X86Base_CompareScalarLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: - case NI_EVEX_CompareLessThanOrEqualMask: + case NI_AVX512_CompareLessThanOrEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1092,18 +1060,16 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareLessThanOrEqualMask); + assert(id == NI_AVX512_CompareLessThanOrEqualMask); return static_cast(IntComparisonMode::LessThanOrEqual); } break; } - case NI_SSE_CompareNotEqual: - case NI_SSE_CompareScalarNotEqual: - case NI_SSE2_CompareNotEqual: - case NI_SSE2_CompareScalarNotEqual: + case NI_X86Base_CompareNotEqual: + case NI_X86Base_CompareScalarNotEqual: case NI_AVX_CompareNotEqual: - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareNotEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1111,18 +1077,16 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotEqualMask); + assert(id == NI_AVX512_CompareNotEqualMask); return static_cast(IntComparisonMode::NotEqual); } break; } - case NI_SSE_CompareNotGreaterThan: - case NI_SSE_CompareScalarNotGreaterThan: - case NI_SSE2_CompareNotGreaterThan: - case NI_SSE2_CompareScalarNotGreaterThan: + case NI_X86Base_CompareNotGreaterThan: + case NI_X86Base_CompareScalarNotGreaterThan: case NI_AVX_CompareNotGreaterThan: - case NI_EVEX_CompareNotGreaterThanMask: + case NI_AVX512_CompareNotGreaterThanMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1134,18 +1098,16 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotGreaterThanMask); + assert(id == NI_AVX512_CompareNotGreaterThanMask); return static_cast(IntComparisonMode::LessThanOrEqual); } break; } - case NI_SSE_CompareNotLessThan: - case NI_SSE_CompareScalarNotLessThan: - case NI_SSE2_CompareNotLessThan: - case NI_SSE2_CompareScalarNotLessThan: + case NI_X86Base_CompareNotLessThan: + case NI_X86Base_CompareScalarNotLessThan: case NI_AVX_CompareNotLessThan: - case NI_EVEX_CompareNotLessThanMask: + case NI_AVX512_CompareNotLessThanMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1153,18 +1115,16 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotLessThanMask); + assert(id == NI_AVX512_CompareNotLessThanMask); return static_cast(IntComparisonMode::GreaterThanOrEqual); } break; } - case NI_SSE_CompareNotGreaterThanOrEqual: - case NI_SSE_CompareScalarNotGreaterThanOrEqual: - case NI_SSE2_CompareNotGreaterThanOrEqual: - case NI_SSE2_CompareScalarNotGreaterThanOrEqual: + case NI_X86Base_CompareNotGreaterThanOrEqual: + case NI_X86Base_CompareScalarNotGreaterThanOrEqual: case NI_AVX_CompareNotGreaterThanOrEqual: - case NI_EVEX_CompareNotGreaterThanOrEqualMask: + case NI_AVX512_CompareNotGreaterThanOrEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1176,18 +1136,16 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotGreaterThanOrEqualMask); + assert(id == NI_AVX512_CompareNotGreaterThanOrEqualMask); return static_cast(IntComparisonMode::LessThan); } break; } - case NI_SSE_CompareNotLessThanOrEqual: - case NI_SSE_CompareScalarNotLessThanOrEqual: - case NI_SSE2_CompareNotLessThanOrEqual: - case NI_SSE2_CompareScalarNotLessThanOrEqual: + case NI_X86Base_CompareNotLessThanOrEqual: + case NI_X86Base_CompareScalarNotLessThanOrEqual: case NI_AVX_CompareNotLessThanOrEqual: - case NI_EVEX_CompareNotLessThanOrEqualMask: + case NI_AVX512_CompareNotLessThanOrEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1195,29 +1153,25 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotLessThanOrEqualMask); + assert(id == NI_AVX512_CompareNotLessThanOrEqualMask); return static_cast(IntComparisonMode::GreaterThan); } break; } - case NI_SSE_CompareOrdered: - case NI_SSE_CompareScalarOrdered: - case NI_SSE2_CompareOrdered: - case NI_SSE2_CompareScalarOrdered: + case NI_X86Base_CompareOrdered: + case NI_X86Base_CompareScalarOrdered: case NI_AVX_CompareOrdered: - case NI_EVEX_CompareOrderedMask: + case NI_AVX512_CompareOrderedMask: { assert(varTypeIsFloating(simdBaseType)); return static_cast(FloatComparisonMode::OrderedNonSignaling); } - case NI_SSE_CompareUnordered: - case NI_SSE_CompareScalarUnordered: - case NI_SSE2_CompareUnordered: - case NI_SSE2_CompareScalarUnordered: + case NI_X86Base_CompareUnordered: + case NI_X86Base_CompareScalarUnordered: case NI_AVX_CompareUnordered: - case NI_EVEX_CompareUnorderedMask: + case NI_AVX512_CompareUnorderedMask: { assert(varTypeIsFloating(simdBaseType)); return static_cast(FloatComparisonMode::UnorderedNonSignaling); @@ -1303,20 +1257,15 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT assert(HWIntrinsicInfo::NoJmpTableImm(intrinsic) || HWIntrinsicInfo::MaybeNoJmpTableImm(intrinsic)); switch (intrinsic) { - case NI_SSE2_ShiftLeftLogical: - case NI_SSE2_ShiftRightArithmetic: - case NI_SSE2_ShiftRightLogical: + case NI_X86Base_ShiftLeftLogical: + case NI_X86Base_ShiftRightArithmetic: + case NI_X86Base_ShiftRightLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX512_ShiftLeftLogical: + case NI_AVX512_ShiftRightArithmetic: + case NI_AVX512_ShiftRightLogical: { // These intrinsics have overloads that take op2 in a simd register and just read the lowest 8-bits @@ -1329,24 +1278,16 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, intrinsic, simdBaseJitType, genTypeSize(simdType)); } - case NI_AVX512F_RotateLeft: - case NI_AVX512F_RotateRight: - case NI_AVX512F_VL_RotateLeft: - case NI_AVX512F_VL_RotateRight: - case NI_AVX10v1_RotateLeft: - case NI_AVX10v1_RotateRight: + case NI_AVX512_RotateLeft: + case NI_AVX512_RotateRight: { var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); // These intrinsics have variants that take op2 in a simd register and read a unique shift per element intrinsic = static_cast(intrinsic + 1); - static_assert_no_msg(NI_AVX512F_RotateLeftVariable == (NI_AVX512F_RotateLeft + 1)); - static_assert_no_msg(NI_AVX512F_RotateRightVariable == (NI_AVX512F_RotateRight + 1)); - static_assert_no_msg(NI_AVX512F_VL_RotateLeftVariable == (NI_AVX512F_VL_RotateLeft + 1)); - static_assert_no_msg(NI_AVX512F_VL_RotateRightVariable == (NI_AVX512F_VL_RotateRight + 1)); - static_assert_no_msg(NI_AVX10v1_RotateLeftVariable == (NI_AVX10v1_RotateLeft + 1)); - static_assert_no_msg(NI_AVX10v1_RotateRightVariable == (NI_AVX10v1_RotateRight + 1)); + static_assert_no_msg(NI_AVX512_RotateLeftVariable == (NI_AVX512_RotateLeft + 1)); + static_assert_no_msg(NI_AVX512_RotateRightVariable == (NI_AVX512_RotateRight + 1)); impSpillSideEffect(true, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); @@ -1457,13 +1398,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_AndNot: - case NI_SSE2_AndNot: + case NI_X86Base_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: + case NI_AVX512_AndNot: { assert(sig->numArgs == 2); @@ -1476,23 +1414,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - if (IsBaselineSimdIsaSupported()) - { - op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize)); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); - } - else - { - // We need to ensure we import even if SSE2 is disabled - assert(intrinsic == NI_SSE_AndNot); - - op3 = gtNewAllBitsSetConNode(retType); - - op1 = gtNewSimdHWIntrinsicNode(retType, op1, op3, NI_SSE_Xor, simdBaseJitType, simdSize); - op1 = gtFoldExpr(op1); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_SSE_And, simdBaseJitType, simdSize); - } + op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize)); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); break; } @@ -1512,6 +1435,133 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_AddSaturate: + case NI_Vector256_AddSaturate: + case NI_Vector512_AddSaturate: + { + assert(sig->numArgs == 2); + + if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || + compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (varTypeIsFloating(simdBaseType)) + { + retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); + } + else if (varTypeIsSmall(simdBaseType)) + { + if (simdSize == 64) + { + intrinsic = NI_AVX512_AddSaturate; + } + else if (simdSize == 32) + { + intrinsic = NI_AVX2_AddSaturate; + } + else + { + assert(simdSize == 16); + intrinsic = NI_X86Base_AddSaturate; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + else if (varTypeIsUnsigned(simdBaseType)) + { + // For unsigned we simply have to detect `(x + y) < x` + // and in that scenario return MaxValue (AllBitsSet) + + GenTree* cns = gtNewAllBitsSetConNode(retType); + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + + GenTree* tmp = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); + GenTree* tmpDup1 = fgMakeMultiUse(&tmp); + GenTree* msk = gtNewSimdCmpOpNode(GT_LT, retType, tmp, op1Dup1, simdBaseJitType, simdSize); + + retNode = gtNewSimdCndSelNode(retType, msk, cns, tmpDup1, simdBaseJitType, simdSize); + } + else + { + // For signed the logic is a bit more complex, but is + // explained on the managed side as part of Scalar.AddSaturate + + GenTreeVecCon* minCns = gtNewVconNode(retType); + GenTreeVecCon* maxCns = gtNewVconNode(retType); + + switch (simdBaseType) + { + case TYP_SHORT: + { + minCns->EvaluateBroadcastInPlace(INT16_MIN); + maxCns->EvaluateBroadcastInPlace(INT16_MAX); + break; + } + + case TYP_INT: + { + minCns->EvaluateBroadcastInPlace(INT32_MIN); + maxCns->EvaluateBroadcastInPlace(INT32_MAX); + break; + } + + case TYP_LONG: + { + minCns->EvaluateBroadcastInPlace(INT64_MIN); + maxCns->EvaluateBroadcastInPlace(INT64_MAX); + break; + } + + default: + { + unreached(); + } + } + + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op2Dup1 = fgMakeMultiUse(&op2); + + GenTree* tmp = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); + + GenTree* tmpDup1 = fgMakeMultiUse(&tmp); + GenTree* tmpDup2 = gtCloneExpr(tmpDup1); + + GenTree* msk = gtNewSimdIsNegativeNode(retType, tmpDup1, simdBaseJitType, simdSize); + GenTree* ovf = gtNewSimdCndSelNode(retType, msk, maxCns, minCns, simdBaseJitType, simdSize); + + // The mask we need is ((a ^ b) & ~(b ^ c)) < 0 + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // tmpDup1 = a: 0xF0 + // op1Dup1 = b: 0xCC + // op2Dup2 = c: 0xAA + // + // 0x18 = A ? norBC : andBC + // a ? ~(b | c) : (b & c) + msk = gtNewSimdTernaryLogicNode(retType, tmp, op1Dup1, op2Dup1, gtNewIconNode(0x18), + simdBaseJitType, simdSize); + } + else + { + GenTree* op1Dup2 = gtCloneExpr(op1Dup1); + + GenTree* msk2 = gtNewSimdBinOpNode(GT_XOR, retType, tmp, op1Dup1, simdBaseJitType, simdSize); + GenTree* msk3 = + gtNewSimdBinOpNode(GT_XOR, retType, op1Dup2, op2Dup1, simdBaseJitType, simdSize); + + msk = gtNewSimdBinOpNode(GT_AND_NOT, retType, msk2, msk3, simdBaseJitType, simdSize); + } + + msk = gtNewSimdIsNegativeNode(retType, msk, simdBaseJitType, simdSize); + retNode = gtNewSimdCndSelNode(retType, msk, ovf, tmpDup2, simdBaseJitType, simdSize); + } + } + break; + } + case NI_Vector128_AndNot: case NI_Vector256_AndNot: case NI_Vector512_AndNot: @@ -1887,25 +1937,21 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(varTypeIsLong(simdBaseType)); - if (IsBaselineVector512IsaSupportedOpportunistically() || - ((simdSize != 64) && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { if (simdSize == 64) { - intrinsic = NI_AVX512DQ_ConvertToVector512Double; + intrinsic = NI_AVX512_ConvertToVector512Double; } else if (simdSize == 32) { - intrinsic = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector256Double - : NI_AVX512DQ_VL_ConvertToVector256Double; + intrinsic = NI_AVX512_ConvertToVector256Double; } else { assert(simdSize == 16); - intrinsic = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector128Double - : NI_AVX512DQ_VL_ConvertToVector128Double; + intrinsic = NI_AVX512_ConvertToVector128Double; } op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); @@ -1952,8 +1998,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 1); assert(simdBaseType == TYP_DOUBLE); - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); @@ -1973,8 +2018,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); @@ -1994,45 +2038,30 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, switch (simdSize) { case 16: - intrinsic = NI_SSE2_ConvertToVector128Single; + intrinsic = NI_X86Base_ConvertToVector128Single; break; case 32: intrinsic = NI_AVX_ConvertToVector256Single; break; case 64: - intrinsic = NI_AVX512F_ConvertToVector512Single; - break; - default: - unreached(); - } - } - else if (simdBaseType == TYP_UINT && simdSize != 64 && - compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - switch (simdSize) - { - case 16: - intrinsic = NI_AVX10v1_ConvertToVector128Single; - break; - case 32: - intrinsic = NI_AVX10v1_ConvertToVector256Single; + intrinsic = NI_AVX512_ConvertToVector512Single; break; default: unreached(); } } - else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()) + else if (simdBaseType == TYP_UINT && compOpportunisticallyDependsOn(InstructionSet_AVX512)) { switch (simdSize) { case 16: - intrinsic = NI_AVX512F_VL_ConvertToVector128Single; + intrinsic = NI_AVX512_ConvertToVector128Single; break; case 32: - intrinsic = NI_AVX512F_VL_ConvertToVector256Single; + intrinsic = NI_AVX512_ConvertToVector256Single; break; case 64: - intrinsic = NI_AVX512F_ConvertToVector512Single; + intrinsic = NI_AVX512_ConvertToVector512Single; break; default: unreached(); @@ -2053,8 +2082,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 1); assert(simdBaseType == TYP_FLOAT); - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); @@ -2074,8 +2102,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); @@ -2089,8 +2116,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(simdBaseType == TYP_DOUBLE); - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); @@ -2110,8 +2137,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); @@ -2125,16 +2151,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { if (sig->numArgs == 1) { -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->IsIntegralConst()) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - break; - } -#endif // TARGET_X86 - op1 = impPopStack().val; retNode = gtNewSimdCreateBroadcastNode(retType, op1, simdBaseJitType, simdSize); break; @@ -2266,16 +2282,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType)) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - break; - } -#endif // TARGET_X86 - IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs); // TODO-CQ: We don't handle contiguous args for anything except TYP_FLOAT today @@ -2321,16 +2327,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->IsIntegralConst()) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - break; - } -#endif // TARGET_X86 - op1 = impPopStack().val; retNode = gtNewSimdCreateScalarNode(retType, op1, simdBaseJitType, simdSize); break; @@ -2342,16 +2338,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->IsIntegralConst()) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - break; - } -#endif // TARGET_X86 - op1 = impPopStack().val; retNode = gtNewSimdCreateScalarUnsafeNode(retType, op1, simdBaseJitType, simdSize); break; @@ -2376,27 +2362,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } } - - if (varTypeIsLong(simdBaseType)) - { - if (!impStackTop(0).val->OperIsConst()) - { - // When op2 is a constant, we can skip the multiplication allowing us to always - // generate better code. However, if it isn't then we need to fallback in the - // cases where multiplication isn't supported. - - if ((simdSize != 64) && !canUseEvexEncoding()) - { - // TODO-XARCH-CQ: We should support long/ulong multiplication - break; - } - } - -#if defined(TARGET_X86) - // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast - break; -#endif // TARGET_X86 - } } impSpillSideEffect(true, stackState.esStackDepth - @@ -2417,8 +2382,19 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (!varTypeIsFloating(simdBaseType)) { - // We can't trivially handle division for integral types using SIMD +#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) + // Check to see if it is possible to emulate the integer division + if (!(simdBaseType == TYP_INT && + ((simdSize == 16 && compOpportunisticallyDependsOn(InstructionSet_AVX)) || + (simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512))))) + { + break; + } + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector integer division")); +#else break; +#endif // defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) } CORINFO_ARG_LIST_HANDLE arg1 = sig->args; @@ -2433,45 +2409,38 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = getArgForHWIntrinsic(argType, argClass); retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize); + break; } case NI_Vector128_Dot: case NI_Vector256_Dot: + case NI_Vector512_Dot: { assert(sig->numArgs == 2); var_types simdType = getSIMDTypeForSize(simdSize); - if (varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType)) + if ((simdSize == 32) && !varTypeIsFloating(simdBaseType) && + !compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - // TODO-XARCH-CQ: We could support dot product for 8-bit and - // 64-bit integers if we support multiplication for the same + // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 break; } - if (simdSize == 32) - { - if (!varTypeIsFloating(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 - break; - } - } - else if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) - { - if (!compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - // TODO-XARCH-CQ: We can support 32-bit integers if we updating multiplication - // to be lowered rather than imported as the relevant operations. - break; - } - } - op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); + if ((simdSize == 64) || varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType) || + (varTypeIsInt(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSE41))) + { + // The lowering for Dot doesn't handle these cases, so import as Sum(left * right) + retNode = gtNewSimdBinOpNode(GT_MUL, simdType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdSumNode(retType, retNode, simdBaseJitType, simdSize); + break; + } + retNode = gtNewSimdDotProdNode(simdType, op1, op2, simdBaseJitType, simdSize); - retNode = gtNewSimdGetElementNode(retType, retNode, gtNewIconNode(0), simdBaseJitType, simdSize); + retNode = gtNewSimdToScalarNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -2539,7 +2508,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); } - retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_EVEX_MoveMask, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AVX512_MoveMask, simdBaseJitType, simdSize); break; } @@ -2557,7 +2526,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case TYP_UBYTE: { op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX2_MoveMask : NI_SSE2_MoveMask; + moveMaskIntrinsic = (simdSize == 32) ? NI_AVX2_MoveMask : NI_X86Base_MoveMask; break; } @@ -2585,12 +2554,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, simdVal.u64[3] = 0x8080808080808080; shuffleIntrinsic = NI_AVX2_Shuffle; - moveMaskIntrinsic = NI_SSE2_MoveMask; + moveMaskIntrinsic = NI_X86Base_MoveMask; } else if (compOpportunisticallyDependsOn(InstructionSet_SSSE3)) { shuffleIntrinsic = NI_SSSE3_Shuffle; - moveMaskIntrinsic = NI_SSE2_MoveMask; + moveMaskIntrinsic = NI_X86Base_MoveMask; } else { @@ -2631,7 +2600,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { simdBaseJitType = CORINFO_TYPE_FLOAT; op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_SSE_MoveMask; + moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_X86Base_MoveMask; break; } @@ -2641,7 +2610,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { simdBaseJitType = CORINFO_TYPE_DOUBLE; op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_SSE2_MoveMask; + moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_X86Base_MoveMask; break; } @@ -2753,13 +2722,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case TYP_LONG: case TYP_ULONG: { - bool useToScalar = op2->IsIntegralConst(0); - -#if defined(TARGET_X86) - useToScalar &= !varTypeIsLong(simdBaseType); -#endif // TARGET_X86 - - if (!useToScalar && !compOpportunisticallyDependsOn(InstructionSet_SSE41)) + if (!op2->IsIntegralConst(0) && !compOpportunisticallyDependsOn(InstructionSet_SSE41)) { // Using software fallback if simdBaseType is not supported by hardware return nullptr; @@ -3173,11 +3136,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_LoadVector128: - case NI_SSE2_LoadVector128: + case NI_X86Base_LoadVector128: case NI_AVX_LoadVector256: - case NI_AVX512F_LoadVector512: - case NI_AVX512BW_LoadVector512: + case NI_AVX512_LoadVector512: case NI_Vector128_LoadUnsafe: case NI_Vector256_LoadUnsafe: case NI_Vector512_LoadUnsafe: @@ -3337,29 +3298,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (varTypeIsLong(simdBaseType)) - { - if (TARGET_POINTER_SIZE == 4) - { - // TODO-XARCH-CQ: 32bit support - break; - } - - if ((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // Emulate NI_AVX512DQ_VL_MultiplyLow with AVX2 for SIMD32 - } - else if ((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - // Emulate NI_AVX512DQ_VL_MultiplyLow with SSE41 for SIMD16 - } - else if (simdSize != 64) - { - // Software fallback - break; - } - } - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); var_types argType = TYP_UNKNOWN; @@ -3393,29 +3331,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (varTypeIsLong(simdBaseType)) - { - if (TARGET_POINTER_SIZE == 4) - { - // TODO-XARCH-CQ: 32bit support - break; - } - - if ((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // Emulate NI_AVX512DQ_VL_MultiplyLow with AVX2 for SIMD32 - } - else if ((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - // Emulate NI_AVX512DQ_VL_MultiplyLow with SSE41 for SIMD16 - } - else if (simdSize != 64) - { - // Software fallback - break; - } - } - op3 = impSIMDPopStack(); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); @@ -3449,6 +3364,219 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_NarrowWithSaturation: + case NI_Vector256_NarrowWithSaturation: + case NI_Vector512_NarrowWithSaturation: + { + assert(sig->numArgs == 2); + + if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || + compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (simdBaseType == TYP_DOUBLE) + { + // gtNewSimdNarrowNode uses the base type of the return for the simdBaseType + retNode = gtNewSimdNarrowNode(retType, op1, op2, CORINFO_TYPE_FLOAT, simdSize); + } + else if ((simdSize == 16) && ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_INT))) + { + // PackSignedSaturate uses the base type of the return for the simdBaseType + simdBaseJitType = (simdBaseType == TYP_SHORT) ? CORINFO_TYPE_BYTE : CORINFO_TYPE_SHORT; + + intrinsic = NI_X86Base_PackSignedSaturate; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + if ((simdSize == 32) || (simdSize == 64)) + { + if (simdSize == 32) + { + intrinsic = NI_Vector256_ToVector512Unsafe; + + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD64, op1, intrinsic, simdBaseJitType, simdSize); + op1 = gtNewSimdWithUpperNode(TYP_SIMD64, op1, op2, simdBaseJitType, simdSize * 2); + } + + switch (simdBaseType) + { + case TYP_SHORT: + { + intrinsic = NI_AVX512_ConvertToVector256SByteWithSaturation; + break; + } + + case TYP_USHORT: + { + intrinsic = NI_AVX512_ConvertToVector256ByteWithSaturation; + break; + } + + case TYP_INT: + { + intrinsic = NI_AVX512_ConvertToVector256Int16WithSaturation; + break; + } + + case TYP_UINT: + { + intrinsic = NI_AVX512_ConvertToVector256UInt16WithSaturation; + break; + } + + case TYP_LONG: + { + intrinsic = NI_AVX512_ConvertToVector256Int32WithSaturation; + break; + } + + case TYP_ULONG: + { + intrinsic = NI_AVX512_ConvertToVector256UInt32WithSaturation; + break; + } + + default: + { + unreached(); + } + } + } + else + { + assert(simdSize == 16); + intrinsic = NI_Vector128_ToVector256Unsafe; + + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, intrinsic, simdBaseJitType, simdSize); + op1 = gtNewSimdWithUpperNode(TYP_SIMD32, op1, op2, simdBaseJitType, simdSize * 2); + + switch (simdBaseType) + { + case TYP_USHORT: + { + intrinsic = NI_AVX512_ConvertToVector128ByteWithSaturation; + break; + } + + case TYP_UINT: + { + intrinsic = NI_AVX512_ConvertToVector128UInt16WithSaturation; + break; + } + + case TYP_LONG: + { + intrinsic = NI_AVX512_ConvertToVector128Int32WithSaturation; + break; + } + + case TYP_ULONG: + { + intrinsic = NI_AVX512_ConvertToVector128UInt32WithSaturation; + break; + } + + default: + { + unreached(); + } + } + } + + if (simdSize == 64) + { + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, intrinsic, simdBaseJitType, simdSize); + op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op2, intrinsic, simdBaseJitType, simdSize); + + retNode = gtNewSimdWithUpperNode(retType, op1, op2, simdBaseJitType, simdSize); + } + else + { + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize * 2); + } + } + else + { + // gtNewSimdNarrowNode uses the base type of the return for the simdBaseType + CorInfoType narrowSimdBaseJitType; + + GenTreeVecCon* minCns = varTypeIsSigned(simdBaseType) ? gtNewVconNode(retType) : nullptr; + GenTreeVecCon* maxCns = gtNewVconNode(retType); + + switch (simdBaseType) + { + case TYP_SHORT: + { + minCns->EvaluateBroadcastInPlace(INT8_MIN); + maxCns->EvaluateBroadcastInPlace(INT8_MAX); + + narrowSimdBaseJitType = CORINFO_TYPE_BYTE; + break; + } + + case TYP_USHORT: + { + maxCns->EvaluateBroadcastInPlace(UINT8_MAX); + narrowSimdBaseJitType = CORINFO_TYPE_UBYTE; + break; + } + + case TYP_INT: + { + minCns->EvaluateBroadcastInPlace(INT16_MIN); + maxCns->EvaluateBroadcastInPlace(INT16_MAX); + + narrowSimdBaseJitType = CORINFO_TYPE_SHORT; + break; + } + + case TYP_UINT: + { + maxCns->EvaluateBroadcastInPlace(UINT16_MAX); + narrowSimdBaseJitType = CORINFO_TYPE_USHORT; + break; + } + + case TYP_LONG: + { + minCns->EvaluateBroadcastInPlace(INT32_MIN); + maxCns->EvaluateBroadcastInPlace(INT32_MAX); + + narrowSimdBaseJitType = CORINFO_TYPE_INT; + break; + } + + case TYP_ULONG: + { + maxCns->EvaluateBroadcastInPlace(UINT32_MAX); + narrowSimdBaseJitType = CORINFO_TYPE_UINT; + break; + } + + default: + { + unreached(); + } + } + + if (minCns != nullptr) + { + op1 = gtNewSimdMaxNode(retType, op1, minCns, simdBaseJitType, simdSize); + op2 = gtNewSimdMaxNode(retType, op2, gtCloneExpr(minCns), simdBaseJitType, simdSize); + } + + op1 = gtNewSimdMinNode(retType, op1, maxCns, simdBaseJitType, simdSize); + op2 = gtNewSimdMinNode(retType, op2, gtCloneExpr(maxCns), simdBaseJitType, simdSize); + + retNode = gtNewSimdNarrowNode(retType, op1, op2, narrowSimdBaseJitType, simdSize); + } + } + break; + } + case NI_Vector128_op_UnaryNegation: case NI_Vector256_op_UnaryNegation: case NI_Vector512_op_UnaryNegation: @@ -3539,18 +3667,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 2); -#if defined(TARGET_X86) - if ((simdBaseType == TYP_LONG) || (simdBaseType == TYP_DOUBLE)) - { - if (!compOpportunisticallyDependsOn(InstructionSet_EVEX) && !impStackTop(0).val->IsCnsIntOrI()) - { - // If vpsraq is available, we can use that. We can also trivially emulate arithmetic shift by const - // amount. Otherwise, more work is required for long types, so we fall back to managed for now. - break; - } - } -#endif // TARGET_X86 - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) { genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH; @@ -3625,7 +3741,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (simdSize == 64) { - intrinsic = NI_AVX512F_ShiftLeftLogicalVariable; + intrinsic = NI_AVX512_ShiftLeftLogicalVariable; } else { @@ -3641,37 +3757,68 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_Shuffle: case NI_Vector256_Shuffle: case NI_Vector512_Shuffle: + case NI_Vector128_ShuffleNative: + case NI_Vector256_ShuffleNative: + case NI_Vector512_ShuffleNative: + case NI_Vector128_ShuffleNativeFallback: + case NI_Vector256_ShuffleNativeFallback: + case NI_Vector512_ShuffleNativeFallback: { assert((sig->numArgs == 2) || (sig->numArgs == 3)); + // The Native variants are non-deterministic on xarch + bool isShuffleNative = (intrinsic != NI_Vector128_Shuffle) && (intrinsic != NI_Vector256_Shuffle) && + (intrinsic != NI_Vector512_Shuffle); + if (isShuffleNative && BlockNonDeterministicIntrinsics(mustExpand)) + { + break; + } + GenTree* indices = impStackTop(0).val; - if (!indices->IsCnsVec() || !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType)) + // Check if the required intrinsics are available to emit now (validForShuffle). If we have variable + // indices that might become possible to emit later (due to them becoming constant), this will be + // indicated in canBecomeValidForShuffle; otherwise, it's just the same as validForShuffle. + bool canBecomeValidForShuffle = false; + bool validForShuffle = + IsValidForShuffle(indices, simdSize, simdBaseType, &canBecomeValidForShuffle, isShuffleNative); + + // If it isn't valid for shuffle (and can't become valid later), then give up now. + if (!canBecomeValidForShuffle) + { + return nullptr; + } + + // If the indices might become constant later, then we don't emit for now, delay until later. + if ((!validForShuffle) || (!indices->IsCnsVec())) { assert(sig->numArgs == 2); - if (!opts.OptimizationEnabled()) + if (opts.OptimizationEnabled()) { // Only enable late stage rewriting if optimizations are enabled // as we won't otherwise encounter a constant at the later point - return nullptr; - } + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); + break; + } - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; + // If we're not doing late stage rewriting, just return null now as it won't become valid. + if (!validForShuffle) + { + return nullptr; + } } if (sig->numArgs == 2) { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize); + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isShuffleNative); } break; } @@ -3690,11 +3837,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_Store: - case NI_SSE2_Store: + case NI_X86Base_Store: case NI_AVX_Store: - case NI_AVX512F_Store: - case NI_AVX512BW_Store: + case NI_AVX512_Store: { assert(retType == TYP_VOID); assert(sig->numArgs == 2); @@ -3800,29 +3945,139 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_SubtractSaturate: + case NI_Vector256_SubtractSaturate: + case NI_Vector512_SubtractSaturate: + { + assert(sig->numArgs == 2); + + if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || + compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (varTypeIsFloating(simdBaseType)) + { + retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); + } + else if (varTypeIsSmall(simdBaseType)) + { + if (simdSize == 64) + { + intrinsic = NI_AVX512_SubtractSaturate; + } + else if (simdSize == 32) + { + intrinsic = NI_AVX2_SubtractSaturate; + } + else + { + assert(simdSize == 16); + intrinsic = NI_X86Base_SubtractSaturate; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + else if (varTypeIsUnsigned(simdBaseType)) + { + // For unsigned we simply have to detect `(x - y) > x` + // and in that scenario return MinValue (Zero) + + GenTree* cns = gtNewZeroConNode(retType); + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + + GenTree* tmp = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); + GenTree* tmpDup1 = fgMakeMultiUse(&tmp); + GenTree* msk = gtNewSimdCmpOpNode(GT_GT, retType, tmp, op1Dup1, simdBaseJitType, simdSize); + + retNode = gtNewSimdCndSelNode(retType, msk, cns, tmpDup1, simdBaseJitType, simdSize); + } + else + { + // For signed the logic is a bit more complex, but is + // explained on the managed side as part of Scalar.SubtractSaturate + + GenTreeVecCon* minCns = gtNewVconNode(retType); + GenTreeVecCon* maxCns = gtNewVconNode(retType); + + switch (simdBaseType) + { + case TYP_SHORT: + { + minCns->EvaluateBroadcastInPlace(INT16_MIN); + maxCns->EvaluateBroadcastInPlace(INT16_MAX); + break; + } + + case TYP_INT: + { + minCns->EvaluateBroadcastInPlace(INT32_MIN); + maxCns->EvaluateBroadcastInPlace(INT32_MAX); + break; + } + + case TYP_LONG: + { + minCns->EvaluateBroadcastInPlace(INT64_MIN); + maxCns->EvaluateBroadcastInPlace(INT64_MAX); + break; + } + + default: + { + unreached(); + } + } + + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op2Dup1 = fgMakeMultiUse(&op2); + + GenTree* tmp = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); + + GenTree* tmpDup1 = fgMakeMultiUse(&tmp); + GenTree* tmpDup2 = gtCloneExpr(tmpDup1); + + GenTree* msk = gtNewSimdIsNegativeNode(retType, tmpDup1, simdBaseJitType, simdSize); + GenTree* ovf = gtNewSimdCndSelNode(retType, msk, maxCns, minCns, simdBaseJitType, simdSize); + + // The mask we need is ((a ^ b) & (b ^ c)) < 0 + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // tmpDup1 = a: 0xF0 + // op1Dup1 = b: 0xCC + // op2Dup2 = c: 0xAA + // + // 0x18 = B ? norAC : andAC + // b ? ~(a | c) : (a & c) + msk = gtNewSimdTernaryLogicNode(retType, tmp, op1Dup1, op2Dup1, gtNewIconNode(0x24), + simdBaseJitType, simdSize); + } + else + { + GenTree* op1Dup2 = gtCloneExpr(op1Dup1); + + GenTree* msk2 = gtNewSimdBinOpNode(GT_XOR, retType, tmp, op1Dup1, simdBaseJitType, simdSize); + GenTree* msk3 = + gtNewSimdBinOpNode(GT_XOR, retType, op1Dup2, op2Dup1, simdBaseJitType, simdSize); + + msk = gtNewSimdBinOpNode(GT_AND, retType, msk2, msk3, simdBaseJitType, simdSize); + } + + msk = gtNewSimdIsNegativeNode(retType, msk, simdBaseJitType, simdSize); + retNode = gtNewSimdCndSelNode(retType, msk, ovf, tmpDup2, simdBaseJitType, simdSize); + } + } + break; + } + case NI_Vector128_Sum: case NI_Vector256_Sum: case NI_Vector512_Sum: { assert(sig->numArgs == 1); - if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // Vector256 requires AVX2 - break; - } - else if ((simdSize == 16) && !compOpportunisticallyDependsOn(InstructionSet_SSE2)) - { - break; - } -#if defined(TARGET_X86) - else if (varTypeIsLong(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - // We need SSE41 to handle long, use software fallback - break; - } -#endif // TARGET_X86 - op1 = impSIMDPopStack(); retNode = gtNewSimdSumNode(retType, op1, simdBaseJitType, simdSize); break; @@ -3834,14 +4089,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - // We need SSE41 to handle long, use software fallback - break; - } -#endif // TARGET_X86 - op1 = impSIMDPopStack(); retNode = gtNewSimdToScalarNode(retType, op1, simdBaseJitType, simdSize); break; @@ -3889,7 +4136,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - assert((simdSize != 64) || IsBaselineVector512IsaSupportedDebugOnly()); + assert((simdSize != 64) || compIsaSupportedDebugOnly(InstructionSet_AVX512)); op1 = impSIMDPopStack(); @@ -3921,34 +4168,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 3); GenTree* indexOp = impStackTop(1).val; - if (!indexOp->OperIsConst()) - { - if (!opts.OptimizationEnabled()) - { - // Only enable late stage rewriting if optimizations are enabled - // as we won't otherwise encounter a constant at the later point - return nullptr; - } - - op3 = impPopStack().val; - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; - } - - ssize_t imm8 = indexOp->AsIntCon()->IconValue(); - ssize_t count = simdSize / genTypeSize(simdBaseType); - - if ((imm8 >= count) || (imm8 < 0)) - { - // Using software fallback if index is out of range (throw exception) - return nullptr; - } - switch (simdBaseType) { // Using software fallback if simdBaseType is not supported by hardware @@ -4057,10 +4276,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_CompareScalarGreaterThan: - case NI_SSE_CompareScalarGreaterThanOrEqual: - case NI_SSE_CompareScalarNotGreaterThan: - case NI_SSE_CompareScalarNotGreaterThanOrEqual: + case NI_X86Base_CompareScalarGreaterThan: + case NI_X86Base_CompareScalarGreaterThanOrEqual: + case NI_X86Base_CompareScalarNotGreaterThan: + case NI_X86Base_CompareScalarNotGreaterThanOrEqual: { assert(sig->numArgs == 2); @@ -4072,10 +4291,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); } - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - simdBaseJitType = getBaseJitTypeOfSIMDType(sig->retTypeSigClass); - assert(JitType2PreciseVarType(simdBaseJitType) == TYP_FLOAT); + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + assert(varTypeIsFloating(JitType2PreciseVarType(simdBaseJitType))); if (supportsAvx) { @@ -4090,31 +4308,31 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { GenTree* clonedOp1 = nullptr; op1 = impCloneExpr(op1, &clonedOp1, CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse.CompareScalarGreaterThan")); + nullptr DEBUGARG("Clone op1 for CompareScalarGreaterThan")); switch (intrinsic) { - case NI_SSE_CompareScalarGreaterThan: + case NI_X86Base_CompareScalarGreaterThan: { - intrinsic = NI_SSE_CompareScalarLessThan; + intrinsic = NI_X86Base_CompareScalarLessThan; break; } - case NI_SSE_CompareScalarGreaterThanOrEqual: + case NI_X86Base_CompareScalarGreaterThanOrEqual: { - intrinsic = NI_SSE_CompareScalarLessThanOrEqual; + intrinsic = NI_X86Base_CompareScalarLessThanOrEqual; break; } - case NI_SSE_CompareScalarNotGreaterThan: + case NI_X86Base_CompareScalarNotGreaterThan: { - intrinsic = NI_SSE_CompareScalarNotLessThan; + intrinsic = NI_X86Base_CompareScalarNotLessThan; break; } - case NI_SSE_CompareScalarNotGreaterThanOrEqual: + case NI_X86Base_CompareScalarNotGreaterThanOrEqual: { - intrinsic = NI_SSE_CompareScalarNotLessThanOrEqual; + intrinsic = NI_X86Base_CompareScalarNotLessThanOrEqual; break; } @@ -4125,16 +4343,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, simdBaseJitType, simdSize); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE_MoveScalar, simdBaseJitType, - simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_X86Base_MoveScalar, + simdBaseJitType, simdSize); } break; } - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { assert(sig->numArgs == 1); assert(JITtype2varType(sig->retType) == TYP_VOID); @@ -4143,87 +4361,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_StoreFence: + case NI_X86Base_StoreFence: assert(sig->numArgs == 0); assert(JITtype2varType(sig->retType) == TYP_VOID); retNode = gtNewScalarHWIntrinsicNode(TYP_VOID, intrinsic); break; - case NI_SSE2_CompareScalarGreaterThan: - case NI_SSE2_CompareScalarGreaterThanOrEqual: - case NI_SSE2_CompareScalarNotGreaterThan: - case NI_SSE2_CompareScalarNotGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - - bool supportsAvx = compOpportunisticallyDependsOn(InstructionSet_AVX); - - if (!supportsAvx) - { - impSpillSideEffect(true, - stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - } - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - assert(JitType2PreciseVarType(simdBaseJitType) == TYP_DOUBLE); - - if (supportsAvx) - { - // These intrinsics are "special import" because the non-AVX path isn't directly - // hardware supported. Instead, they start with "swapped operands" and we fix that here. - - int ival = HWIntrinsicInfo::lookupIval(this, intrinsic, simdBaseType); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(ival), NI_AVX_CompareScalar, - simdBaseJitType, simdSize); - } - else - { - GenTree* clonedOp1 = nullptr; - op1 = impCloneExpr(op1, &clonedOp1, CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarGreaterThan")); - - switch (intrinsic) - { - case NI_SSE2_CompareScalarGreaterThan: - { - intrinsic = NI_SSE2_CompareScalarLessThan; - break; - } - - case NI_SSE2_CompareScalarGreaterThanOrEqual: - { - intrinsic = NI_SSE2_CompareScalarLessThanOrEqual; - break; - } - - case NI_SSE2_CompareScalarNotGreaterThan: - { - intrinsic = NI_SSE2_CompareScalarNotLessThan; - break; - } - - case NI_SSE2_CompareScalarNotGreaterThanOrEqual: - { - intrinsic = NI_SSE2_CompareScalarNotLessThanOrEqual; - break; - } - - default: - { - unreached(); - } - } - - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, simdBaseJitType, simdSize); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE2_MoveScalar, simdBaseJitType, - simdSize); - } - break; - } - - case NI_SSE2_LoadFence: - case NI_SSE2_MemoryFence: + case NI_X86Base_LoadFence: + case NI_X86Base_MemoryFence: { assert(sig->numArgs == 0); assert(JITtype2varType(sig->retType) == TYP_VOID); @@ -4233,7 +4378,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE2_StoreNonTemporal: + case NI_X86Base_StoreNonTemporal: { assert(sig->numArgs == 2); assert(JITtype2varType(sig->retType) == TYP_VOID); @@ -4244,26 +4389,20 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, argJitType, 0); + retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_X86Base_StoreNonTemporal, argJitType, 0); break; } case NI_AVX2_PermuteVar8x32: - case NI_AVX512BW_PermuteVar32x16: - case NI_AVX512BW_VL_PermuteVar8x16: - case NI_AVX512BW_VL_PermuteVar16x16: - case NI_AVX512F_PermuteVar8x64: - case NI_AVX512F_PermuteVar16x32: - case NI_AVX512F_VL_PermuteVar4x64: + case NI_AVX512_PermuteVar4x64: + case NI_AVX512_PermuteVar8x16: + case NI_AVX512_PermuteVar8x64: + case NI_AVX512_PermuteVar16x16: + case NI_AVX512_PermuteVar16x32: + case NI_AVX512_PermuteVar32x16: + case NI_AVX512VBMI_PermuteVar16x8: + case NI_AVX512VBMI_PermuteVar32x8: case NI_AVX512VBMI_PermuteVar64x8: - case NI_AVX512VBMI_VL_PermuteVar16x8: - case NI_AVX512VBMI_VL_PermuteVar32x8: - case NI_AVX10v1_PermuteVar16x8: - case NI_AVX10v1_PermuteVar8x16: - case NI_AVX10v1_PermuteVar16x16: - case NI_AVX10v1_PermuteVar32x8: - case NI_AVX10v1_PermuteVar4x64: - case NI_AVX10v1_V512_PermuteVar64x8: { simdBaseJitType = getBaseJitTypeOfSIMDType(sig->retTypeSigClass); @@ -4277,11 +4416,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_AVX512F_Fixup: - case NI_AVX512F_FixupScalar: - case NI_AVX512F_VL_Fixup: - case NI_AVX10v1_Fixup: - case NI_AVX10v1_FixupScalar: + case NI_AVX512_Fixup: + case NI_AVX512_FixupScalar: { assert(sig->numArgs == 4); @@ -4313,9 +4449,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX10v1_TernaryLogic: + case NI_AVX512_TernaryLogic: { assert(sig->numArgs == 4); @@ -4520,6 +4654,65 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } + // Some normalization cases require us to swap the operands, which might require + // spilling side effects. Check that here. + // + // cast in switch clause is needed for old gcc + switch ((TernaryLogicOperKind)info.oper1) + { + case TernaryLogicOperKind::Not: + { + assert(info.oper1Use != TernaryLogicUseFlags::None); + + bool needSideEffectSpill = false; + + if (info.oper2 == TernaryLogicOperKind::And) + { + assert(info.oper2Use != TernaryLogicUseFlags::None); + + if ((control == static_cast(~0xCC & 0xF0)) || // ~B & A + (control == static_cast(~0xAA & 0xF0)) || // ~C & A + (control == static_cast(~0xAA & 0xCC))) // ~C & B + { + // We're normalizing to ~B & C, so we need another swap + std::swap(val2, val3); + needSideEffectSpill = (control == static_cast(~0xAA & 0xCC)); // ~C & B + } + } + else if (info.oper2 == TernaryLogicOperKind::Or) + { + assert(info.oper2Use != TernaryLogicUseFlags::None); + + if ((control == static_cast(~0xCC | 0xF0)) || // ~B | A + (control == static_cast(~0xAA | 0xF0)) || // ~C | A + (control == static_cast(~0xAA | 0xCC))) // ~C | B + { + // We're normalizing to ~B | C, so we need another swap + std::swap(val2, val3); + needSideEffectSpill = (control == static_cast(~0xAA | 0xCC)); // ~C | B + } + } + + if (needSideEffectSpill) + { + // Side-effect cases: + // ~B op A ; order before swap C A B + // op1 & op2 already set to be spilled; no further spilling necessary + // ~C op A ; order before swap B A C + // op1 already set to be spilled; no further spilling necessary + // ~C op B ; order before swap A B C + // nothing already set to be spilled; op1 & op2 need to be spilled + + spillOp1 = true; + spillOp2 = true; + } + break; + } + + default: + break; + } + if (spillOp1) { impSpillSideEffect(true, stackState.esStackDepth - @@ -4644,8 +4837,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, (control == static_cast(~0xAA & 0xF0)) || // ~C & A (control == static_cast(~0xAA & 0xCC))) // ~C & B { - // We're normalizing to ~B & C, so we need another swap - std::swap(*val2, *val3); + // We already normalized to ~B & C above. } else { @@ -4671,8 +4863,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, (control == static_cast(~0xAA | 0xF0)) || // ~C | A (control == static_cast(~0xAA | 0xCC))) // ~C | B { - // We're normalizing to ~B & C, so we need another swap - std::swap(*val2, *val3); + // We already normalized to ~B | C above. } else { @@ -4849,8 +5040,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_AVX512F_BlendVariable: - case NI_AVX512BW_BlendVariable: + case NI_AVX512_BlendVariable: { assert(sig->numArgs == 3); @@ -4862,20 +5052,20 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { op3 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op3, simdBaseJitType, simdSize); } - retNode = - gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, NI_EVEX_BlendVariableMask, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, NI_AVX512_BlendVariableMask, simdBaseJitType, + simdSize); break; } case NI_AVX_Compare: case NI_AVX_CompareScalar: - case NI_AVX512F_Compare: + case NI_AVX512_Compare: { assert(sig->numArgs == 3); - if (intrinsic == NI_AVX512F_Compare) + if (intrinsic == NI_AVX512_Compare) { - intrinsic = NI_EVEX_CompareMask; + intrinsic = NI_AVX512_CompareMask; retType = TYP_MASK; } @@ -4917,58 +5107,46 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_AVX512F_CompareEqual: - case NI_AVX512BW_CompareEqual: + case NI_AVX512_CompareEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareEqualMask, simdBaseJitType, simdSize); + retNode = + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareGreaterThan: - case NI_AVX512F_VL_CompareGreaterThan: - case NI_AVX10v1_CompareGreaterThan: - case NI_AVX512BW_CompareGreaterThan: - case NI_AVX512BW_VL_CompareGreaterThan: + case NI_AVX512_CompareGreaterThan: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareGreaterThanMask, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareGreaterThanMask, simdBaseJitType, + simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareGreaterThanOrEqual: - case NI_AVX512F_VL_CompareGreaterThanOrEqual: - case NI_AVX512BW_CompareGreaterThanOrEqual: - case NI_AVX512BW_VL_CompareGreaterThanOrEqual: - case NI_AVX10v1_CompareGreaterThanOrEqual: + case NI_AVX512_CompareGreaterThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareGreaterThanOrEqualMask, + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareGreaterThanOrEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareLessThan: - case NI_AVX512F_VL_CompareLessThan: - case NI_AVX512BW_CompareLessThan: - case NI_AVX512BW_VL_CompareLessThan: - case NI_AVX10v1_CompareLessThan: + case NI_AVX512_CompareLessThan: { assert(sig->numArgs == 2); @@ -4976,33 +5154,25 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareLessThanMask, simdBaseJitType, simdSize); + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareLessThanMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareLessThanOrEqual: - case NI_AVX512F_VL_CompareLessThanOrEqual: - case NI_AVX512BW_CompareLessThanOrEqual: - case NI_AVX512BW_VL_CompareLessThanOrEqual: - case NI_AVX10v1_CompareLessThanOrEqual: + case NI_AVX512_CompareLessThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareLessThanOrEqualMask, simdBaseJitType, - simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareLessThanOrEqualMask, + simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotEqual: - case NI_AVX512F_VL_CompareNotEqual: - case NI_AVX512BW_CompareNotEqual: - case NI_AVX512BW_VL_CompareNotEqual: - case NI_AVX10v1_CompareNotEqual: + case NI_AVX512_CompareNotEqual: { assert(sig->numArgs == 2); @@ -5010,64 +5180,64 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotEqualMask, simdBaseJitType, simdSize); + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotGreaterThan: + case NI_AVX512_CompareNotGreaterThan: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotGreaterThanMask, simdBaseJitType, + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotGreaterThanMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotGreaterThanOrEqual: + case NI_AVX512_CompareNotGreaterThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotGreaterThanOrEqualMask, + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotGreaterThanOrEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotLessThan: + case NI_AVX512_CompareNotLessThan: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotLessThanMask, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotLessThanMask, simdBaseJitType, + simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotLessThanOrEqual: + case NI_AVX512_CompareNotLessThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotLessThanOrEqualMask, + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotLessThanOrEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareOrdered: + case NI_AVX512_CompareOrdered: { assert(sig->numArgs == 2); @@ -5075,12 +5245,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareOrderedMask, simdBaseJitType, simdSize); + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareOrderedMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareUnordered: + case NI_AVX512_CompareUnordered: { assert(sig->numArgs == 2); @@ -5088,7 +5258,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareUnorderedMask, simdBaseJitType, simdSize); + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareUnorderedMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index d193d133be76..ef81c2580a19 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -704,9 +704,27 @@ bool OptIfConversionDsc::optIfConvert() selectType = genActualType(m_thenOperation.node); } - // Create a select node. - GenTreeConditional* select = - m_comp->gtNewConditionalNode(GT_SELECT, m_cond, selectTrueInput, selectFalseInput, selectType); + GenTree* select = nullptr; + if (selectTrueInput->TypeIs(TYP_INT) && selectFalseInput->TypeIs(TYP_INT)) + { + if (selectTrueInput->IsIntegralConst(1) && selectFalseInput->IsIntegralConst(0)) + { + // compare ? true : false --> compare + select = m_cond; + } + else if (selectTrueInput->IsIntegralConst(0) && selectFalseInput->IsIntegralConst(1)) + { + // compare ? false : true --> reversed_compare + select = m_comp->gtReverseCond(m_cond); + } + } + + if (select == nullptr) + { + // Create a select node + select = m_comp->gtNewConditionalNode(GT_SELECT, m_cond, selectTrueInput, selectFalseInput, selectType); + } + m_thenOperation.node->AddAllEffectsFlags(select); // Use the select as the source of the Then operation. diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 6d573d197b15..35c0108502fb 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -38,11 +38,11 @@ void Compiler::impPushOnStack(GenTree* tree, typeInfo ti) stackState.esStack[stackState.esStackDepth].seTypeInfo = ti; stackState.esStack[stackState.esStackDepth++].val = tree; - if (tree->gtType == TYP_LONG) + if (tree->TypeIs(TYP_LONG)) { compLongUsed = true; } - else if ((tree->gtType == TYP_FLOAT) || (tree->gtType == TYP_DOUBLE)) + else if (tree->TypeIs(TYP_FLOAT) || tree->TypeIs(TYP_DOUBLE)) { compFloatingPointUsed = true; } @@ -174,7 +174,7 @@ unsigned Compiler::impStackHeight() #ifdef DEBUG // only used in asserts static bool impValidSpilledStackEntry(GenTree* tree) { - if (tree->gtOper == GT_LCL_VAR) + if (tree->OperIs(GT_LCL_VAR)) { return true; } @@ -845,9 +845,7 @@ GenTree* Compiler::impStoreStruct(GenTree* store, GenTreeFlags indirFlags = GTF_EMPTY; GenTree* destAddr = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags); - // Make sure we don't pass something other than a local address to the return buffer arg. - // It is allowed to pass current's method return buffer as it is a local too. - if (fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(srcCall->gtRetClsHnd)) + if (!impIsLegalRetBuf(destAddr, srcCall)) { unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer")); lvaSetStruct(tmp, srcCall->gtRetClsHnd, false); @@ -971,9 +969,7 @@ GenTree* Compiler::impStoreStruct(GenTree* store, GenTreeFlags indirFlags = GTF_EMPTY; GenTree* destAddr = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags); - // Make sure we don't pass something other than a local address to the return buffer arg. - // It is allowed to pass current's method return buffer as it is a local too. - if (fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(call->gtRetClsHnd)) + if (!impIsLegalRetBuf(destAddr, call)) { unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer")); lvaSetStruct(tmp, call->gtRetClsHnd, false); @@ -1005,9 +1001,24 @@ GenTree* Compiler::impStoreStruct(GenTree* store, } else if (src->OperIs(GT_COMMA)) { + GenTree* sideEffectAddressStore = nullptr; + if (store->OperIs(GT_STORE_BLK, GT_STOREIND) && ((store->AsIndir()->Addr()->gtFlags & GTF_ALL_EFFECT) != 0)) + { + TempInfo addrTmp = fgMakeTemp(store->AsIndir()->Addr()); + sideEffectAddressStore = addrTmp.store; + store->AsIndir()->Addr() = addrTmp.load; + } + if (pAfterStmt) { // Insert op1 after '*pAfterStmt' + if (sideEffectAddressStore != nullptr) + { + Statement* addrStmt = gtNewStmt(sideEffectAddressStore, usedDI); + fgInsertStmtAfter(block, *pAfterStmt, addrStmt); + *pAfterStmt = addrStmt; + } + Statement* newStmt = gtNewStmt(src->AsOp()->gtOp1, usedDI); fgInsertStmtAfter(block, *pAfterStmt, newStmt); *pAfterStmt = newStmt; @@ -1015,6 +1026,10 @@ GenTree* Compiler::impStoreStruct(GenTree* store, else if (impLastStmt != nullptr) { // Do the side-effect as a separate statement. + if (sideEffectAddressStore != nullptr) + { + impAppendTree(sideEffectAddressStore, curLevel, usedDI); + } impAppendTree(src->AsOp()->gtOp1, curLevel, usedDI); } else @@ -1027,6 +1042,10 @@ GenTree* Compiler::impStoreStruct(GenTree* store, gtUpdateNodeSideEffects(store); src->SetAllEffectsFlags(src->AsOp()->gtOp1, src->AsOp()->gtOp2); + if (sideEffectAddressStore != nullptr) + { + src = gtNewOperNode(GT_COMMA, src->TypeGet(), sideEffectAddressStore, src); + } return src; } @@ -1038,12 +1057,52 @@ GenTree* Compiler::impStoreStruct(GenTree* store, if (store->OperIs(GT_STORE_LCL_VAR) && src->IsMultiRegNode()) { - lvaGetDesc(store->AsLclVar())->lvIsMultiRegRet = true; + lvaGetDesc(store->AsLclVar())->SetIsMultiRegDest(); } return store; } +//------------------------------------------------------------------------ +// impIsLegalRetbuf: +// Check if a return buffer is of a legal shape. +// +// Arguments: +// retBuf - The return buffer +// call - The call that is passed the return buffer +// +// Return Value: +// True if it is legal according to ABI and IR invariants. +// +// Notes: +// ABI requires all return buffers to point to stack. Also, we have an IR +// invariant for async calls that return buffers must be the address of a +// local. +// +bool Compiler::impIsLegalRetBuf(GenTree* retBuf, GenTreeCall* call) +{ + if (call->IsAsync()) + { + // Async calls require LCL_ADDR shape for the retbuf to know where to + // save the value on resumption. + if (!retBuf->OperIs(GT_LCL_ADDR)) + { + return false; + } + + // LCL_ADDR on an implicit byref will turn into LCL_VAR in morph. + if (lvaIsImplicitByRefLocal(retBuf->AsLclVarCommon()->GetLclNum())) + { + return false; + } + + return true; + } + + // The ABI requires the retbuffer to point to stack. + return !fgAddrCouldBeHeap(retBuf) || eeIsByrefLike(call->gtRetClsHnd); +} + //------------------------------------------------------------------------ // impStoreStructPtr: Store (copy) the structure from 'src' to 'destAddr'. // @@ -1219,7 +1278,7 @@ GenTree* Compiler::impNormStructVal(GenTree* structVal, unsigned curLevel) // Is this GT_COMMA(op1, GT_COMMA())? GenTree* parent = structVal; - if (blockNode->OperGet() == GT_COMMA) + if (blockNode->OperIs(GT_COMMA)) { // Find the last node in the comma chain. do @@ -1227,7 +1286,7 @@ GenTree* Compiler::impNormStructVal(GenTree* structVal, unsigned curLevel) assert(blockNode->gtType == structType); parent = blockNode; blockNode = blockNode->AsOp()->gtOp2; - } while (blockNode->OperGet() == GT_COMMA); + } while (blockNode->OperIs(GT_COMMA)); } if (blockNode->OperIsBlk()) @@ -1521,7 +1580,7 @@ GenTree* Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORI op1 = new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, pCallInfo->hMethod); #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { op1->AsFptrVal()->gtEntryPoint = pCallInfo->codePointerLookup.constLookup; } @@ -1640,7 +1699,7 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken if (pRuntimeLookup->indirections == CORINFO_USEHELPER) { #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { return impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_GENERIC_HANDLE, TYP_I_IMPL, &pLookup->lookupKind, ctxTree); @@ -1800,7 +1859,7 @@ bool Compiler::impSpillStackEntry(unsigned level, // If we're assigning a GT_RET_EXPR, note the temp over on the call, // so the inliner can use it in case it needs a return spill temp. - if (tree->OperGet() == GT_RET_EXPR) + if (tree->OperIs(GT_RET_EXPR)) { JITDUMP("\n*** see V%02u = GT_RET_EXPR, noting temp\n", tnum); GenTreeCall* call = tree->AsRetExpr()->gtInlineCandidate->AsCall(); @@ -1846,8 +1905,7 @@ void Compiler::impSpillStackEnsure(bool spillLeaves) // Temps introduced by the importer itself don't need to be spilled - bool isTempLcl = - (tree->OperGet() == GT_LCL_VAR) && (tree->AsLclVarCommon()->GetLclNum() >= info.compLocalsCount); + bool isTempLcl = tree->OperIs(GT_LCL_VAR) && (tree->AsLclVarCommon()->GetLclNum() >= info.compLocalsCount); if (isTempLcl) { @@ -2562,7 +2620,7 @@ typeInfo Compiler::makeTypeInfoForLocal(unsigned lclNum) { LclVarDsc* varDsc = lvaGetDesc(lclNum); - if (varDsc->TypeGet() == TYP_REF) + if (varDsc->TypeIs(TYP_REF)) { return typeInfo(varDsc->lvClassHnd); } @@ -2597,9 +2655,6 @@ bool Compiler::checkTailCallConstraint(OPCODE opcode, { DWORD mflags; CORINFO_SIG_INFO sig; - unsigned int popCount = 0; // we can't pop the stack since impImportCall needs it, so - // this counter is used to keep track of how many items have been - // virtually popped CORINFO_METHOD_HANDLE methodHnd = nullptr; CORINFO_CLASS_HANDLE methodClassHnd = nullptr; @@ -2670,19 +2725,15 @@ bool Compiler::checkTailCallConstraint(OPCODE opcode, args = info.compCompHnd->getArgNext(args); } - // Update popCount. - popCount += sig.numArgs; + unsigned popCount = sig.totalILArgs(); // Check for 'this' which is on non-static methods, not called via NEWOBJ - if (!(mflags & CORINFO_FLG_STATIC)) + if ((mflags & CORINFO_FLG_STATIC) == 0) { - // Always update the popCount. This is crucial for the stack calculation to be correct. - popCount++; - if (opcode == CEE_CALLI) { // For CALLI, we don't know the methodClassHnd. Therefore, let's check the "this" object on the stack. - if (impStackTop(popCount).val->TypeGet() != TYP_REF) + if (!impStackTop(popCount).val->TypeIs(TYP_REF)) { return false; } @@ -2707,7 +2758,7 @@ bool Compiler::checkTailCallConstraint(OPCODE opcode, // Get the exact view of the signature for an array method if (sig.retType != CORINFO_TYPE_VOID) { - if (methodClassFlgs & CORINFO_FLG_ARRAY) + if ((methodClassFlgs & CORINFO_FLG_ARRAY) != 0) { assert(opcode != CEE_CALLI); eeGetCallSiteSig(pResolvedToken->token, pResolvedToken->tokenScope, pResolvedToken->tokenContext, &sig); @@ -2758,11 +2809,12 @@ GenTree* Compiler::impImportLdvirtftn(GenTree* thisPtr, { GenTree* runtimeMethodHandle = impLookupToTree(pResolvedToken, &pCallInfo->codePointerLookup, GTF_ICON_METHOD_HDL, pCallInfo->hMethod); - call = gtNewHelperCallNode(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, TYP_I_IMPL, thisPtr, runtimeMethodHandle); + call = gtNewVirtualFunctionLookupHelperCallNode(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, TYP_I_IMPL, thisPtr, + runtimeMethodHandle); } #ifdef FEATURE_READYTORUN - else if (opts.IsReadyToRun()) + else if (IsAot()) { if (!pCallInfo->exactContextNeedsRuntimeLookup) { @@ -2799,7 +2851,8 @@ GenTree* Compiler::impImportLdvirtftn(GenTree* thisPtr, // Call helper function. This gets the target address of the final destination callsite. // - call = gtNewHelperCallNode(CORINFO_HELP_VIRTUAL_FUNC_PTR, TYP_I_IMPL, thisPtr, exactTypeDesc, exactMethodDesc); + call = gtNewVirtualFunctionLookupHelperCallNode(CORINFO_HELP_VIRTUAL_FUNC_PTR, TYP_I_IMPL, thisPtr, + exactMethodDesc, exactTypeDesc); } assert(call != nullptr); @@ -2882,9 +2935,13 @@ GenTree* Compiler::impInlineUnboxNullable(CORINFO_CLASS_HANDLE nullableCls, GenT // result._hasValue = true; // result._value = MethodTableLookup(obj); // - CORINFO_FIELD_HANDLE valueFldHnd = info.compCompHnd->getFieldInClass(nullableCls, 1); - CORINFO_CLASS_HANDLE valueStructCls; - var_types valueType = JITtype2varType(info.compCompHnd->getFieldType(valueFldHnd, &valueStructCls)); + CORINFO_FIELD_HANDLE valueFldHnd = info.compCompHnd->getFieldInClass(nullableCls, 1); + CORINFO_CLASS_HANDLE valueStructCls = NO_CLASS_HANDLE; + ClassLayout* layout = nullptr; + + CorInfoType corFldType = info.compCompHnd->getFieldType(valueFldHnd, &valueStructCls); + var_types valueType = TypeHandleToVarType(corFldType, valueStructCls, &layout); + static_assert_no_msg(OFFSETOF__CORINFO_NullableOfT__hasValue == 0); unsigned hasValOffset = OFFSETOF__CORINFO_NullableOfT__hasValue; unsigned valueOffset = info.compCompHnd->getFieldOffset(valueFldHnd); @@ -2892,8 +2949,8 @@ GenTree* Compiler::impInlineUnboxNullable(CORINFO_CLASS_HANDLE nullableCls, GenT GenTree* boxedContentAddr = gtNewOperNode(GT_ADD, TYP_BYREF, gtCloneExpr(objClone), gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL)); // Load the boxed content from the object (op1): - GenTree* boxedContent = valueType == TYP_STRUCT ? gtNewBlkIndir(typGetObjLayout(valueStructCls), boxedContentAddr) - : gtNewIndir(valueType, boxedContentAddr); + GenTree* boxedContent = gtNewLoadValueNode(valueType, layout, boxedContentAddr); + // Now do two stores via a comma: GenTree* setHasValue = gtNewStoreLclFldNode(resultTmp, TYP_UBYTE, hasValOffset, gtNewIconNode(1)); GenTree* setValue = gtNewStoreLclFldNode(resultTmp, valueType, valueOffset, boxedContent); @@ -3837,16 +3894,16 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI #ifdef FEATURE_SIMD // First, let's check whether field is a SIMD vector and import it as GT_CNS_VEC int simdWidth = getSIMDTypeSizeInBytes(fieldClsHnd); - if ((simdWidth > 0) && IsBaselineSimdIsaSupported()) + if (simdWidth > 0) { - assert((totalSize <= 32) && (totalSize <= MaxStructSize)); + assert((totalSize <= 64) && (totalSize <= MaxStructSize)); var_types simdType = getSIMDTypeForSize(simdWidth); bool hwAccelerated = true; #ifdef TARGET_XARCH if (simdType == TYP_SIMD64) { - hwAccelerated = compOpportunisticallyDependsOn(InstructionSet_AVX512F); + hwAccelerated = compOpportunisticallyDependsOn(InstructionSet_AVX512); } else if (simdType == TYP_SIMD32) { @@ -3855,7 +3912,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI else #endif // TARGET_XARCH { - // SIMD8, SIMD12, SIMD16 are covered by IsBaselineSimdIsaSupported check + // SIMD8, SIMD12, SIMD16 are covered by baseline ISA requirement assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD12) || (simdType == TYP_SIMD16)); } @@ -4054,7 +4111,7 @@ GenTree* Compiler::impImportStaticFieldAddress(CORINFO_RESOLVED_TOKEN* pResolved case CORINFO_FIELD_STATIC_TLS_MANAGED: #ifdef FEATURE_READYTORUN - if (!opts.IsReadyToRun()) + if (!IsAot()) #endif // FEATURE_READYTORUN { if ((pFieldInfo->helper == CORINFO_HELP_GETDYNAMIC_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || @@ -4074,7 +4131,7 @@ GenTree* Compiler::impImportStaticFieldAddress(CORINFO_RESOLVED_TOKEN* pResolved case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER: { #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { GenTreeFlags callFlags = GTF_EMPTY; @@ -4157,7 +4214,7 @@ GenTree* Compiler::impImportStaticFieldAddress(CORINFO_RESOLVED_TOKEN* pResolved case CORINFO_FIELD_STATIC_READYTORUN_HELPER: { #ifdef FEATURE_READYTORUN - assert(opts.IsReadyToRun()); + assert(IsAot()); assert(!compIsForInlining()); CORINFO_LOOKUP_KIND kind; info.compCompHnd->getLocationOfThisType(info.compMethodHnd, &kind); @@ -4725,12 +4782,15 @@ void Compiler::impImportLeaveEHRegions(BasicBlock* block) } #endif - unsigned finallyNesting = compHndBBtab[XTnum].ebdHandlerNestingLevel; - assert(finallyNesting <= compHndBBtabCount); + // We now record the EH region ID on GT_END_LFIN instead of the finally nesting depth, + // as the later can change as we optimize the code. + // + unsigned const ehID = compHndBBtab[XTnum].ebdID; + assert(ehID <= impInlineRoot()->compEHID); - GenTree* endLFin = new (this, GT_END_LFIN) GenTreeVal(GT_END_LFIN, TYP_VOID, finallyNesting); - endLFinStmt = gtNewStmt(endLFin); - endCatches = NULL; + GenTree* const endLFin = new (this, GT_END_LFIN) GenTreeVal(GT_END_LFIN, TYP_VOID, ehID); + endLFinStmt = gtNewStmt(endLFin); + endCatches = NULL; encFinallies++; } @@ -5607,7 +5667,7 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr GenTree* Compiler::impOptimizeCastClassOrIsInst(GenTree* op1, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool isCastClass) { - assert(op1->TypeGet() == TYP_REF); + assert(op1->TypeIs(TYP_REF)); // Don't optimize for minopts or debug codegen. if (opts.OptimizationDisabled()) @@ -5783,7 +5843,7 @@ GenTree* Compiler::impCastClassOrIsInstToTree(GenTree* op1, bool* booleanCheck, IL_OFFSET ilOffset) { - assert(op1->TypeGet() == TYP_REF); + assert(op1->TypeIs(TYP_REF)); // Optimistically assume the jit should expand this as an inline test bool isClassExact = info.compCompHnd->isExactType(pResolvedToken->hClass); @@ -5952,10 +6012,85 @@ bool Compiler::impBlockIsInALoop(BasicBlock* block) block->HasFlag(BBF_BACKWARD_JUMP); } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif +//------------------------------------------------------------------------ +// impMatchAwaitPattern: check if a method call starts an Await pattern +// that can be optimized for runtime async +// +// Arguments: +// codeAddr - IL after call[virt] +// codeEndp - End of IL code stream +// configVal - [out] set to 0 or 1, accordingly, if we saw ConfigureAwait(0|1) +// +// Returns: +// true if this is an Await that we can optimize +// +bool Compiler::impMatchAwaitPattern(const BYTE* codeAddr, const BYTE* codeEndp, int* configVal) +{ + // If we see the following code pattern in runtime async methods: + // + // call[virt] + // [ OPTIONAL ] + // ldc.i4.0 / ldc.i4.1 + // call[virt] + // call + // + // We emit an eqivalent of: + // + // call[virt] + // + // where "RtMethod" is the runtime-async counterpart of a Task-returning method. + // + // NOTE: we could potentially check if Method is not a thunk and, in cases when we can tell, + // bypass this optimization. Otherwise in a non-thunk case we would be + // replacing the pattern with a call to a thunk, which contains roughly the same code. + + const BYTE* nextOpcode = codeAddr + sizeof(mdToken); + // There must be enough space after ldc for {call + tk + call + tk} + if (nextOpcode + 2 * (1 + sizeof(mdToken)) < codeEndp) + { + uint8_t nextOp = getU1LittleEndian(nextOpcode); + uint8_t nextNextOp = getU1LittleEndian(nextOpcode + 1); + if ((nextOp != CEE_LDC_I4_0 && nextOp != CEE_LDC_I4_1) || + (nextNextOp != CEE_CALL && nextNextOp != CEE_CALLVIRT)) + { + goto checkForAwait; + } + + // check if the token after {ldc, call[virt]} is ConfigAwait + CORINFO_RESOLVED_TOKEN nextCallTok; + impResolveToken(nextOpcode + 2, &nextCallTok, CORINFO_TOKENKIND_Method); + + if (!eeIsIntrinsic(nextCallTok.hMethod) || + lookupNamedIntrinsic(nextCallTok.hMethod) != NI_System_Threading_Tasks_Task_ConfigureAwait) + { + goto checkForAwait; + } + + *configVal = nextOp == CEE_LDC_I4_0 ? 0 : 1; + // skip {ldc; call; } + nextOpcode += 1 + 1 + sizeof(mdToken); + } + +checkForAwait: + + if ((nextOpcode + sizeof(mdToken) < codeEndp) && (getU1LittleEndian(nextOpcode) == CEE_CALL)) + { + // resolve the next token + CORINFO_RESOLVED_TOKEN nextCallTok; + impResolveToken(nextOpcode + 1, &nextCallTok, CORINFO_TOKENKIND_Method); + + // check if it is an Await intrinsic + if (eeIsIntrinsic(nextCallTok.hMethod) && + lookupNamedIntrinsic(nextCallTok.hMethod) == NI_System_Runtime_CompilerServices_AsyncHelpers_Await) + { + // yes, this is an Await + return true; + } + } + + return false; +} + /***************************************************************************** * Import the instr for the given basic block */ @@ -6672,7 +6807,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (compIsForInlining()) { op1 = impInlineFetchArg(impInlineInfo->inlArgInfo[lclNum], impInlineInfo->lclVarInfo[lclNum]); - noway_assert(op1->gtOper == GT_LCL_VAR); + noway_assert(op1->OperIs(GT_LCL_VAR)); lclNum = op1->AsLclVar()->GetLclNum(); goto VAR_ST_VALID; @@ -6735,7 +6870,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) VAR_ST_VALID: /* if it is a struct store, make certain we don't overflow the buffer */ - assert(lclTyp != TYP_STRUCT || lvaLclSize(lclNum) >= info.compCompHnd->getClassSize(clsHnd)); + assert(lclTyp != TYP_STRUCT || lvaLclStackHomeSize(lclNum) >= info.compCompHnd->getClassSize(clsHnd)); if (lvaTable[lclNum].lvNormalizeOnLoad()) { @@ -6760,13 +6895,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = impImplicitR4orR8Cast(op1, lclTyp); // We had better assign it a value of the correct type - assertImp( - genActualType(lclTyp) == genActualType(op1->gtType) || - (genActualType(lclTyp) == TYP_I_IMPL && op1->OperIs(GT_LCL_ADDR)) || - (genActualType(lclTyp) == TYP_I_IMPL && (op1->gtType == TYP_BYREF || op1->gtType == TYP_REF)) || - (genActualType(op1->gtType) == TYP_I_IMPL && lclTyp == TYP_BYREF) || - (varTypeIsFloating(lclTyp) && varTypeIsFloating(op1->TypeGet())) || - ((genActualType(lclTyp) == TYP_BYREF) && genActualType(op1->TypeGet()) == TYP_REF)); + assertImp(genActualType(lclTyp) == genActualType(op1->gtType) || + (genActualType(lclTyp) == TYP_I_IMPL && op1->OperIs(GT_LCL_ADDR)) || + (genActualType(lclTyp) == TYP_I_IMPL && (op1->TypeIs(TYP_BYREF) || op1->TypeIs(TYP_REF))) || + (genActualType(op1->gtType) == TYP_I_IMPL && lclTyp == TYP_BYREF) || + (varTypeIsFloating(lclTyp) && varTypeIsFloating(op1->TypeGet())) || + ((genActualType(lclTyp) == TYP_BYREF) && genActualType(op1->TypeGet()) == TYP_REF)); // If op1 is "&var" then its type is the transient "*" and it can // be used either as BYREF or TYP_I_IMPL. @@ -6818,7 +6952,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) /* Filter out simple stores to itself */ - if (op1->gtOper == GT_LCL_VAR && lclNum == op1->AsLclVarCommon()->GetLclNum()) + if (op1->OperIs(GT_LCL_VAR) && lclNum == op1->AsLclVarCommon()->GetLclNum()) { if (opts.compDbgCode) { @@ -6897,7 +7031,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) // followed by a ldfld to load the field. op1 = impInlineFetchArg(impInlineInfo->inlArgInfo[lclNum], impInlineInfo->lclVarInfo[lclNum]); - if (op1->gtOper != GT_LCL_VAR) + if (!op1->OperIs(GT_LCL_VAR)) { compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDARGA_NOT_LOCAL_VAR); return; @@ -6952,9 +7086,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) case CEE_ENDFINALLY: - if (compIsForInlining()) + if (compIsForInlining() && !opts.compInlineMethodsWithEH) { - assert(!"Shouldn't have exception handlers in the inliner!"); + assert(!"Shouldn't have exception handlers in the inlinee!"); compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_ENDFINALLY); return; } @@ -6976,9 +7110,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) case CEE_ENDFILTER: - if (compIsForInlining()) + if (compIsForInlining() && !opts.compInlineMethodsWithEH) { - assert(!"Shouldn't have exception handlers in the inliner!"); + assert(!"Shouldn't have exception handlers in the inlinee!"); compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_ENDFILTER); return; } @@ -6995,7 +7129,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) } op1 = impPopStack().val; - assertImp(op1->gtType == TYP_INT); + assertImp(op1->TypeIs(TYP_INT)); if (!bbInFilterILRange(block)) { BADCODE("EndFilter outside a filter handler"); @@ -7428,7 +7562,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) /* Special case: "int+0", "int-0", "int*1", "int/1" */ - if (op2->gtOper == GT_CNS_INT) + if (op2->OperIs(GT_CNS_INT)) { if ((op2->IsIntegralConst(0) && (oper == GT_ADD || oper == GT_SUB)) || (op2->IsIntegralConst(1) && (oper == GT_MUL || oper == GT_DIV))) @@ -7550,7 +7684,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) LEAVE: - if (compIsForInlining()) + if (compIsForInlining() && !opts.compInlineMethodsWithEH) { compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_LEAVE); return; @@ -7932,7 +8066,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) assertImp(genActualTypeIsIntOrI(op1->TypeGet())); // Fold Switch for GT_CNS_INT - if (opts.OptimizationEnabled() && (op1->gtOper == GT_CNS_INT)) + if (opts.OptimizationEnabled() && op1->OperIs(GT_CNS_INT)) { // Find the jump target size_t switchVal = (size_t)op1->AsIntCon()->gtIconVal; @@ -8126,7 +8260,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) goto CONV; case CEE_CONV_R_UN: - lclTyp = TYP_DOUBLE; + // Because there is no IL instruction conv.r4.un, compilers consistently + // emit conv.r.un followed immediately by conv.r4 for unsigned->float casts. + // We recognize this pattern and create the intended cast. + // Otherwise, conv.r.un is treated as a cast to double. + lclTyp = ((OPCODE)getU1LittleEndian(codeAddr) == CEE_CONV_R4) ? TYP_FLOAT : TYP_DOUBLE; goto CONV_UN; CONV_UN: @@ -8152,7 +8290,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) // TYP_BYREF could be used as TYP_I_IMPL which is long. // TODO-CQ: remove this when we lower casts long/ulong --> float/double // and generate SSE2 code instead of going through helper calls. - || (impStackTop().val->TypeGet() == TYP_BYREF) + || impStackTop().val->TypeIs(TYP_BYREF) #endif ; } @@ -8173,11 +8311,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) // At this point uns, ovf, callNode are all set. - if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND) + if (varTypeIsSmall(lclTyp) && !ovfl && op1->TypeIs(TYP_INT) && op1->OperIs(GT_AND)) { op2 = op1->AsOp()->gtOp2; - if (op2->gtOper == GT_CNS_INT) + if (op2->OperIs(GT_CNS_INT)) { ssize_t ival = op2->AsIntCon()->gtIconVal; ssize_t mask, umask; @@ -8307,12 +8445,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) // implicit tail calls when the operand of pop is GT_CAST(GT_CALL(..)). // The cast gets added as part of importing GT_CALL, which gets in the way // of fgMorphCall() on the forms of tail call nodes that we assert. - if ((op1->gtOper == GT_CAST) && !op1->gtOverflow()) + if (op1->OperIs(GT_CAST) && !op1->gtOverflow()) { op1 = op1->AsOp()->gtOp1; } - if (op1->gtOper != GT_CALL) + if (!op1->OperIs(GT_CALL)) { if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0) { @@ -8438,7 +8576,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = impPopStack().val; // address to store to // you can indirect off of a TYP_I_IMPL (if we are in C) or a BYREF - assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->gtType == TYP_BYREF); + assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->TypeIs(TYP_BYREF)); impBashVarAddrsToI(op1, op2); @@ -8456,15 +8594,15 @@ void Compiler::impImportBlockCode(BasicBlock* block) // Check target type. #ifdef DEBUG - if (op2->gtType == TYP_BYREF || lclTyp == TYP_BYREF) + if (op2->TypeIs(TYP_BYREF) || lclTyp == TYP_BYREF) { - if (op2->gtType == TYP_BYREF) + if (op2->TypeIs(TYP_BYREF)) { assertImp(lclTyp == TYP_BYREF || lclTyp == TYP_I_IMPL); } else if (lclTyp == TYP_BYREF) { - assertImp(op2->gtType == TYP_BYREF || varTypeIsIntOrI(op2->gtType)); + assertImp(op2->TypeIs(TYP_BYREF) || varTypeIsIntOrI(op2->gtType)); } } else @@ -8523,7 +8661,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) } #endif - assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->gtType == TYP_BYREF); + assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->TypeIs(TYP_BYREF)); op1 = gtNewIndir(lclTyp, op1, impPrefixFlagsToIndirFlags(prefixFlags)); impPushOnStack(op1, tiRetVal); @@ -8634,9 +8772,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) /* Get the object-ref */ op1 = impPopStack().val; - assertImp(op1->gtType == TYP_REF); + assertImp(op1->TypeIs(TYP_REF)); - if (opts.IsReadyToRun()) + if (IsAot()) { if (callInfo.kind != CORINFO_VIRTUALCALL_LDVIRTFTN) { @@ -8863,7 +9001,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) { // Append a tree to zero-out the temp GenTree* newObjInit = - gtNewZeroConNode((lclDsc->TypeGet() == TYP_STRUCT) ? TYP_INT : lclDsc->TypeGet()); + gtNewZeroConNode(lclDsc->TypeIs(TYP_STRUCT) ? TYP_INT : lclDsc->TypeGet()); impStoreToTemp(lclNum, newObjInit, CHECK_SPILL_NONE); } @@ -8968,7 +9106,41 @@ void Compiler::impImportBlockCode(BasicBlock* block) // many other places. We unfortunately embed that knowledge here. if (opcode != CEE_CALLI) { - _impResolveToken(CORINFO_TOKENKIND_Method); + bool isAwait = false; + // TODO: The configVal should be wired to the actual implementation + // that control the flow of sync context. + // We do not have that yet. + int configVal = -1; // -1 not configured, 0/1 configured to false/true + if (compIsAsync() && JitConfig.JitOptimizeAwait()) + { + isAwait = impMatchAwaitPattern(codeAddr, codeEndp, &configVal); + } + + if (isAwait) + { + _impResolveToken(CORINFO_TOKENKIND_Await); + if (resolvedToken.hMethod != NULL) + { + // There is a runtime async variant that is implicitly awaitable, just call that. + // if configured, skip {ldc call ConfigureAwait} + if (configVal >= 0) + codeAddr += 2 + sizeof(mdToken); + + // Skip the call to `Await` + codeAddr += 1 + sizeof(mdToken); + } + else + { + // This can happen in rare cases when the Task-returning method is not a runtime Async + // function. For example "T M1(T arg) => arg" when called with a Task argument. Treat + // that as a regualr call that is Awaited + _impResolveToken(CORINFO_TOKENKIND_Method); + } + } + else + { + _impResolveToken(CORINFO_TOKENKIND_Method); + } eeGetCallInfo(&resolvedToken, (prefixFlags & PREFIX_CONSTRAINED) ? &constrainedResolvedToken : nullptr, @@ -9463,8 +9635,8 @@ void Compiler::impImportBlockCode(BasicBlock* block) { bool isHoistable = info.compCompHnd->getClassAttribs(resolvedToken.hClass) & CORINFO_FLG_BEFOREFIELDINIT; - unsigned check_spill = isHoistable ? CHECK_SPILL_NONE : CHECK_SPILL_ALL; - impAppendTree(helperNode, check_spill, impCurStmtDI); + unsigned checkSpill = isHoistable ? CHECK_SPILL_NONE : CHECK_SPILL_ALL; + impAppendTree(helperNode, checkSpill, impCurStmtDI); } } @@ -9498,6 +9670,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) { impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("value for stsfld with typeinit")); } + else if (compIsAsync() && op1->TypeIs(TYP_BYREF)) + { + // TODO-Async: We really only need to spill if + // there is a possibility of an async call in op2. + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("byref address in async method")); + } break; default: @@ -9676,7 +9854,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) JITDUMP(" %08X", resolvedToken.token); - if (!opts.IsReadyToRun()) + if (!IsAot()) { // Need to restore array classes before creating array objects on the heap op1 = impTokenToHandle(&resolvedToken, nullptr, true /*mustRestoreHandle*/); @@ -9733,7 +9911,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (((fi.fieldFlags & flagsToCheck) == flagsToCheck) && !eeIsSharedInst(info.compClassHnd)) { #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { // Need to restore array classes before creating array objects on the heap op1 = impTokenToHandle(&resolvedToken, nullptr, true /*mustRestoreHandle*/); @@ -9747,7 +9925,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) } #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun() && !isFrozenAllocator) + if (IsAot() && !isFrozenAllocator) { helper = CORINFO_HELP_READYTORUN_NEWARR_1; op1 = impReadyToRunHelperToTree(&resolvedToken, helper, TYP_REF, nullptr, op2); @@ -9807,9 +9985,10 @@ void Compiler::impImportBlockCode(BasicBlock* block) impPushOnStack(gtNewLclvNode(lclNum, TYP_REF), tiRetVal); #ifdef DEBUG - // Under SPMI, look up info we might ask for if we stack allocate this array + // Under SPMI, look up info we might ask for if we stack allocate this array, + // but only if we know the precise type // - if (JitConfig.EnableExtraSuperPmiQueries()) + if (JitConfig.EnableExtraSuperPmiQueries() && !eeIsSharedInst(resolvedToken.hClass)) { void* pEmbedClsHnd; info.compCompHnd->embedClassHandle(resolvedToken.hClass, &pEmbedClsHnd); @@ -9959,7 +10138,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) JITDUMP(" %08X", resolvedToken.token); - if (!opts.IsReadyToRun()) + if (!IsAot()) { op2 = impTokenToHandle(&resolvedToken, nullptr, false); if (op2 == nullptr) @@ -9984,7 +10163,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) { #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { GenTreeCall* opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_ISINSTANCEOF, TYP_REF, @@ -10054,7 +10233,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) // Call helper GETREFANY(classHandle, op1); GenTreeCall* helperCall = gtNewHelperCallNode(CORINFO_HELP_GETREFANY, TYP_BYREF); NewCallArg clsHandleArg = NewCallArg::Primitive(op2); - NewCallArg typedRefArg = NewCallArg::Struct(op1, TYP_STRUCT, impGetRefAnyClass()); + NewCallArg typedRefArg = NewCallArg::Struct(op1, TYP_STRUCT, typGetObjLayout(impGetRefAnyClass())); helperCall->gtArgs.PushFront(this, clsHandleArg, typedRefArg); helperCall->gtFlags |= (op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT; op1 = helperCall; @@ -10171,7 +10350,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) /* instruction. BLK then creates the appropriate tiRetVal. */ op1 = impPopStack().val; - assertImp(op1->gtType == TYP_REF); + assertImp(op1->TypeIs(TYP_REF)); helper = info.compCompHnd->getUnBoxHelper(resolvedToken.hClass); assert(helper == CORINFO_HELP_UNBOX || helper == CORINFO_HELP_UNBOX_NULLABLE); @@ -10301,7 +10480,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = gtNewHelperCallNode(helper, TYP_BYREF, op2, op1); } - assert((helper == CORINFO_HELP_UNBOX && op1->gtType == TYP_BYREF) || // Unbox helper returns a byref. + assert((helper == CORINFO_HELP_UNBOX && op1->TypeIs(TYP_BYREF)) || // Unbox helper returns a byref. (helper == CORINFO_HELP_UNBOX_NULLABLE && op1->TypeIs(TYP_STRUCT)) // UnboxNullable helper // returns a struct. ); @@ -10444,7 +10623,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) JITDUMP(" %08X", resolvedToken.token); - if (!opts.IsReadyToRun()) + if (!IsAot()) { op2 = impTokenToHandle(&resolvedToken, nullptr, false); if (op2 == nullptr) @@ -10475,7 +10654,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) { #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { GenTreeCall* opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST, TYP_REF, nullptr, @@ -10851,9 +11030,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) return; #undef _impResolveToken } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //------------------------------------------------------------------------ // impCreateLocal: create a GT_LCL_VAR node to access a local that might need to be normalized on load @@ -10986,8 +11162,7 @@ GenTree* Compiler::impStoreMultiRegValueToVar(GenTree* op, LclVarDsc* varDsc = lvaGetDesc(tmpNum); - // Set "lvIsMultiRegRet" to block promotion under "!lvaEnregMultiRegVars". - varDsc->lvIsMultiRegRet = true; + varDsc->SetIsMultiRegDest(); GenTreeLclVar* ret = gtNewLclvNode(tmpNum, varDsc->lvType); @@ -11044,8 +11219,8 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) op2 = impImplicitR4orR8Cast(op2, info.compRetType); assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || - ((op2->TypeGet() == TYP_I_IMPL) && (info.compRetType == TYP_BYREF)) || - ((op2->TypeGet() == TYP_BYREF) && (info.compRetType == TYP_I_IMPL)) || + (op2->TypeIs(TYP_I_IMPL) && (info.compRetType == TYP_BYREF)) || + (op2->TypeIs(TYP_BYREF) && (info.compRetType == TYP_I_IMPL)) || (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) || (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType))); @@ -11056,7 +11231,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) // VSW 440513: Incorrect gcinfo on the return value under DOTNET_JitGCChecks=1 for methods with // one-return BB. - assert(op2->gtType == TYP_REF); + assert(op2->TypeIs(TYP_REF)); // confirm that the argument is a GC pointer (for debugging (GC stress)) op2 = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_REF, op2); @@ -11201,7 +11376,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) { // Some other block(s) have seen the CEE_RET first. // Better they spilled to the same temp. - assert(inlRetExpr->gtSubstExpr->gtOper == GT_LCL_VAR); + assert(inlRetExpr->gtSubstExpr->OperIs(GT_LCL_VAR)); assert(inlRetExpr->gtSubstExpr->AsLclVarCommon()->GetLclNum() == op2->AsLclVarCommon()->GetLclNum()); } @@ -11465,7 +11640,7 @@ inline void Compiler::impReimportMarkBlock(BasicBlock* block) void Compiler::impVerifyEHBlock(BasicBlock* block) { assert(block->hasTryIndex()); - assert(!compIsForInlining()); + assert(!compIsForInlining() || opts.compInlineMethodsWithEH); unsigned tryIndex = block->getTryIndex(); EHblkDsc* HBtab = ehGetDsc(tryIndex); @@ -11563,11 +11738,6 @@ void Compiler::impVerifyEHBlock(BasicBlock* block) // Import the instructions for the given basic block. Perform // verification, throwing an exception on failure. Push any successor blocks that are enabled for the first // time, or whose verification pre-state is changed. - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif void Compiler::impImportBlock(BasicBlock* block) { // BBF_INTERNAL blocks only exist during importation due to EH canonicalization. We need to @@ -11648,7 +11818,7 @@ void Compiler::impImportBlock(BasicBlock* block) addStmt = impExtractLastStmt(); - assert(addStmt->GetRootNode()->gtOper == GT_JTRUE); + assert(addStmt->GetRootNode()->OperIs(GT_JTRUE)); /* Note if the next block has more than one ancestor */ @@ -11679,7 +11849,7 @@ void Compiler::impImportBlock(BasicBlock* block) case BBJ_SWITCH: addStmt = impExtractLastStmt(); - assert(addStmt->GetRootNode()->gtOper == GT_SWITCH); + assert(addStmt->GetRootNode()->OperIs(GT_SWITCH)); for (BasicBlock* const tgtBlock : block->SwitchTargets()) { @@ -11736,21 +11906,21 @@ void Compiler::impImportBlock(BasicBlock* block) // the stack. Thus the value would not get GC-tracked. Hence, // change the temp to TYP_BYREF and reimport the clique. LclVarDsc* tempDsc = lvaGetDesc(tempNum); - if (tree->TypeIs(TYP_BYREF) && (tempDsc->TypeGet() == TYP_I_IMPL)) + if (tree->TypeIs(TYP_BYREF) && tempDsc->TypeIs(TYP_I_IMPL)) { tempDsc->lvType = TYP_BYREF; reimportSpillClique = true; } #ifdef TARGET_64BIT - if ((genActualType(tree) == TYP_I_IMPL) && (tempDsc->TypeGet() == TYP_INT)) + if ((genActualType(tree) == TYP_I_IMPL) && tempDsc->TypeIs(TYP_INT)) { // Some other block in the spill clique set this to "int", but now we have "native int". // Change the type and go back to re-import any blocks that used the wrong type. tempDsc->lvType = TYP_I_IMPL; reimportSpillClique = true; } - else if ((genActualType(tree) == TYP_INT) && (tempDsc->TypeGet() == TYP_I_IMPL)) + else if ((genActualType(tree) == TYP_INT) && tempDsc->TypeIs(TYP_I_IMPL)) { // Spill clique has decided this should be "native int", but this block only pushes an "int". // Insert a sign-extension to "native int" so we match the clique. @@ -11765,14 +11935,14 @@ void Compiler::impImportBlock(BasicBlock* block) // imported already, we need to change the type of the local and reimport the spill clique. // If the 'byref' side has imported, we insert a cast from int to 'native int' to match // the 'byref' size. - if ((genActualType(tree) == TYP_BYREF) && (tempDsc->TypeGet() == TYP_INT)) + if ((genActualType(tree) == TYP_BYREF) && tempDsc->TypeIs(TYP_INT)) { // Some other block in the spill clique set this to "int", but now we have "byref". // Change the type and go back to re-import any blocks that used the wrong type. tempDsc->lvType = TYP_BYREF; reimportSpillClique = true; } - else if ((genActualType(tree) == TYP_INT) && (tempDsc->TypeGet() == TYP_BYREF)) + else if ((genActualType(tree) == TYP_INT) && tempDsc->TypeIs(TYP_BYREF)) { // Spill clique has decided this should be "byref", but this block only pushes an "int". // Insert a sign-extension to "native int" so we match the clique size. @@ -11788,7 +11958,7 @@ void Compiler::impImportBlock(BasicBlock* block) tempDsc->lvType = TYP_DOUBLE; reimportSpillClique = true; } - else if (tree->TypeIs(TYP_FLOAT) && (tempDsc->TypeGet() == TYP_DOUBLE)) + else if (tree->TypeIs(TYP_FLOAT) && tempDsc->TypeIs(TYP_DOUBLE)) { // Spill clique has decided this should be "double", but this block only pushes a "float". // Insert a cast to "double" so we match the clique. @@ -11908,9 +12078,6 @@ void Compiler::impImportBlock(BasicBlock* block) } } } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //------------------------------------------------------------------------ // impImportBlockPending: ensure that block will be imported @@ -12243,7 +12410,7 @@ void Compiler::impRetypeEntryStateTemps(BasicBlock* blk) for (unsigned level = 0; level < es->esStackDepth; level++) { GenTree* tree = es->esStack[level].val; - if ((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_LCL_FLD)) + if (tree->OperIs(GT_LCL_VAR) || tree->OperIs(GT_LCL_FLD)) { es->esStack[level].val->gtType = lvaGetDesc(tree->AsLclVarCommon())->TypeGet(); } @@ -12536,9 +12703,8 @@ void Compiler::impImport() // If the method had EH, we may be missing some pred edges // (notably those from BBJ_EHFINALLYRET blocks). Add them. - // Only needed for the root method, since inlinees can't have EH. // - if (!compIsForInlining() && (info.compXcptnsCount > 0)) + if (info.compXcptnsCount > 0) { impFixPredLists(); JITDUMP("\nAfter impImport() added blocks for try,catch,finally"); @@ -12558,8 +12724,9 @@ void Compiler::impImport() // void Compiler::impFixPredLists() { - unsigned XTnum = 0; - bool added = false; + unsigned XTnum = 0; + bool added = false; + const bool usingProfileWeights = fgIsUsingProfileWeights(); for (EHblkDsc* HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) { @@ -12568,6 +12735,7 @@ void Compiler::impFixPredLists() BasicBlock* const finallyBegBlock = HBtab->ebdHndBeg; BasicBlock* const finallyLastBlock = HBtab->ebdHndLast; unsigned predCount = (unsigned)-1; + const weight_t finallyWeight = finallyBegBlock->bbWeight; for (BasicBlock* const finallyBlock : BasicBlockRangeList(finallyBegBlock, finallyLastBlock)) { @@ -12611,7 +12779,8 @@ void Compiler::impFixPredLists() jumpEhf->bbeCount = predCount; jumpEhf->bbeSuccs = new (this, CMK_FlowEdge) FlowEdge*[predCount]; - unsigned predNum = 0; + unsigned predNum = 0; + weight_t remainingLikelihood = 1.0; for (BasicBlock* const predBlock : finallyBegBlock->PredBlocks()) { // We only care about preds that are callfinallies. @@ -12623,7 +12792,22 @@ void Compiler::impFixPredLists() BasicBlock* const continuation = predBlock->Next(); FlowEdge* const newEdge = fgAddRefPred(continuation, finallyBlock); - newEdge->setLikelihood(1.0 / predCount); + + if (usingProfileWeights && (finallyWeight != BB_ZERO_WEIGHT)) + { + // Derive edge likelihood from the entry block's weight relative to other entries. + // + const weight_t callFinallyWeight = predBlock->bbWeight; + const weight_t likelihood = min(callFinallyWeight / finallyWeight, 1.0); + newEdge->setLikelihood(min(likelihood, remainingLikelihood)); + remainingLikelihood = max(BB_ZERO_WEIGHT, remainingLikelihood - likelihood); + } + else + { + // If we don't have profile data, evenly distribute the likelihoods. + // + newEdge->setLikelihood(1.0 / predCount); + } jumpEhf->bbeSuccs[predNum] = newEdge; ++predNum; @@ -12639,6 +12823,27 @@ void Compiler::impFixPredLists() finallyBlock->SetEhfTargets(jumpEhf); } + + if (usingProfileWeights) + { + // Compute new flow into the finally region's continuation successors. + // + bool profileConsistent = true; + for (BasicBlock* const callFinally : finallyBegBlock->PredBlocks()) + { + BasicBlock* const callFinallyRet = callFinally->Next(); + callFinallyRet->setBBProfileWeight(callFinallyRet->computeIncomingWeight()); + profileConsistent &= + fgProfileWeightsConsistentOrSmall(callFinally->bbWeight, callFinallyRet->bbWeight); + } + + if (!profileConsistent) + { + JITDUMP("Flow into finally handler EH%u does not match outgoing flow. Data %s inconsistent.\n", + XTnum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } + } } } } @@ -12709,7 +12914,7 @@ bool Compiler::impIsAddressInLocal(const GenTree* tree, GenTree** lclVarTreeOut) void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult) { assert((pInlineInfo != nullptr && compIsForInlining()) || // Perform the actual inlining. - (pInlineInfo == nullptr && !compIsForInlining()) // Calculate the static inlining hint for ngen. + (pInlineInfo == nullptr && !compIsForInlining()) // Calculate the static inlining hint for AOT. ); // If we're really inlining, we should just have one result in play. @@ -12923,7 +13128,7 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I if ((pInlineInfo != nullptr) && rootCompiler->fgHaveSufficientProfileWeights()) { const weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight; - const weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight; + const weight_t entryWeight = rootCompiler->fgCalledCount; profileFreq = fgProfileWeightsEqual(entryWeight, 0.0) ? 0.0 : callSiteWeight / entryWeight; hasProfile = true; @@ -12946,7 +13151,7 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I // // Arguments: // fncHandle -- inline candidate method -// methInfo -- method info from VN +// methInfo -- method info from VM // forceInline -- true if method is marked with AggressiveInlining // inlineResult -- ongoing inline evaluation // @@ -12960,10 +13165,13 @@ void Compiler::impCanInlineIL(CORINFO_METHOD_HANDLE fncHandle, // We shouldn't have made up our minds yet... assert(!inlineResult->IsDecided()); - if (methInfo->EHcount) + if (methInfo->EHcount > 0) { - inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH); - return; + if (!opts.compInlineMethodsWithEH) + { + inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH); + return; + } } if ((methInfo->ILCode == nullptr) || (codeSize == 0)) @@ -13080,7 +13288,7 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, argInfo->argHasSideEff = (curArgVal->gtFlags & (GTF_ALL_EFFECT & ~GTF_GLOB_REF)) != 0; } - if (curArgVal->gtOper == GT_LCL_VAR) + if (curArgVal->OperIs(GT_LCL_VAR)) { argInfo->argIsLclVar = true; } @@ -13090,7 +13298,7 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, if (impIsInvariant(curArgVal)) { argInfo->argIsInvariant = true; - if (argInfo->argIsThis && (curArgVal->gtOper == GT_CNS_INT) && (curArgVal->AsIntCon()->gtIconVal == 0)) + if (argInfo->argIsThis && curArgVal->OperIs(GT_CNS_INT) && (curArgVal->AsIntCon()->gtIconVal == 0)) { // Abort inlining at this call site inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_NULL_THIS); @@ -13220,7 +13428,8 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) switch (arg.GetWellKnownArg()) { case WellKnownArg::RetBuffer: - // This does not appear in the table of inline arg info; do not include them + case WellKnownArg::AsyncContinuation: + // These do not appear in the table of inline arg info; do not include them continue; case WellKnownArg::InstParam: pInlineInfo->inlInstParamArgInfo = argInfo = new (this, CMK_Inlining) InlArgInfo{}; @@ -13269,8 +13478,8 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) GenTree* thisArgNode = thisArg->GetEarlyNode(); - assert(varTypeIsGC(thisArgNode->TypeGet()) || // "this" is managed - ((thisArgNode->TypeGet() == TYP_I_IMPL) && // "this" is unmgd but the method's class doesnt care + assert(varTypeIsGC(thisArgNode->TypeGet()) || // "this" is managed + (thisArgNode->TypeIs(TYP_I_IMPL) && // "this" is unmgd but the method's class doesnt care isValueClassThis)); if (genActualType(thisArgNode) != genActualType(sigType)) @@ -13285,7 +13494,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) /* This can only happen with byrefs <-> ints/shorts */ assert(sigType == TYP_BYREF); - assert((genActualType(thisArgNode) == TYP_I_IMPL) || (thisArgNode->TypeGet() == TYP_BYREF)); + assert((genActualType(thisArgNode) == TYP_I_IMPL) || thisArgNode->TypeIs(TYP_BYREF)); } } @@ -13343,7 +13552,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) // encountering bad IL. bool isPlausibleTypeMatch = (genActualType(sigType) == genActualType(inlArgNode->gtType)) || - (genActualTypeIsIntOrI(sigType) && inlArgNode->gtType == TYP_BYREF) || + (genActualTypeIsIntOrI(sigType) && inlArgNode->TypeIs(TYP_BYREF)) || (sigType == TYP_BYREF && genActualTypeIsIntOrI(inlArgNode->gtType)); if (!isPlausibleTypeMatch) @@ -13360,7 +13569,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) // normalized to an int (on the IL stack) if (genTypeSize(inlArgNode) >= genTypeSize(sigType)) { - if ((sigType != TYP_BYREF) && (inlArgNode->TypeGet() == TYP_BYREF)) + if ((sigType != TYP_BYREF) && inlArgNode->TypeIs(TYP_BYREF)) { assert(varTypeIsIntOrI(sigType)); @@ -13630,6 +13839,20 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l GenTree* argNode = argInfo.arg->GetNode(); assert(!argNode->OperIs(GT_RET_EXPR)); + // For TYP_REF args, if the argNode doesn't have any class information + // we will lose some type info if we directly substitute it. + // We can at least rely on the declared type of the arg here. + // + bool argLosesTypeInfo = false; + if (argNode->TypeIs(TYP_REF)) + { + bool isExact; + bool isNeverNull; + CORINFO_CLASS_HANDLE argClass = gtGetClassHandle(argNode, &isExact, &isNeverNull); + + argLosesTypeInfo = (argClass == NO_CLASS_HANDLE); + } + if (argInfo.argIsInvariant && !argCanBeModified) { // Directly substitute constants or addresses of locals @@ -13641,7 +13864,7 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l // further references to the argument working off of the // bashed copy. op1 = gtCloneExpr(argNode); - PREFIX_ASSUME(op1 != nullptr); + assert(op1 != nullptr); argInfo.argTmpNum = BAD_VAR_NUM; // We may need to retype to ensure we match the callee's view of the type. @@ -13655,7 +13878,7 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l op1->gtType = genActualType(lclTyp); } } - else if (argInfo.argIsLclVar && !argCanBeModified && !argInfo.argHasCallerLocalRef) + else if (argInfo.argIsLclVar && !argCanBeModified && !argInfo.argHasCallerLocalRef && !argLosesTypeInfo) { // Directly substitute unaliased caller locals for args that cannot be modified // @@ -13673,9 +13896,9 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l // So here we may have argument type mismatches that are benign, for instance // passing a TYP_SHORT local (eg. normalized-on-load) as a TYP_INT arg. // The exception is when the inlining means we should start tracking the argument. - if (argInfo.argIsUsed || ((lclTyp == TYP_BYREF) && (op1->TypeGet() != TYP_BYREF))) + if (argInfo.argIsUsed || ((lclTyp == TYP_BYREF) && !op1->TypeIs(TYP_BYREF))) { - assert(op1->gtOper == GT_LCL_VAR); + assert(op1->OperIs(GT_LCL_VAR)); // Create a new lcl var node - remember the argument lclNum op1 = impCreateLocalNode(argLclNum DEBUGARG(op1->AsLclVar()->gtLclILoffs)); @@ -13705,7 +13928,7 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l then we change the argument tree (of "ldloca.s V_1") to TYP_I_IMPL to match the callee signature. We'll soon afterwards reject the inlining anyway, since the tree we return isn't a GT_LCL_VAR. */ - assert(argNode->TypeGet() == TYP_BYREF || argNode->TypeGet() == TYP_I_IMPL); + assert(argNode->TypeIs(TYP_BYREF, TYP_I_IMPL)); op1 = gtCloneExpr(argNode); } else @@ -13743,8 +13966,10 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l assert(lvaTable[tmpNum].lvSingleDef == 0); lvaTable[tmpNum].lvSingleDef = 1; JITDUMP("Marked V%02u as a single def temp\n", tmpNum); + if (lclTyp == TYP_REF) { + // Use argNode type (when it exists) or lclInfo type lvaSetClass(tmpNum, argNode, lclInfo.lclTypeHandle); } } @@ -13752,7 +13977,7 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l { if (lclTyp == TYP_REF) { - // Arg might be modified, use the declared type of the argument. + // Arg might be modified. Use the declared type of the argument. lvaSetClass(tmpNum, lclInfo.lclTypeHandle); } } @@ -13766,10 +13991,6 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l if (varTypeIsStruct(lclTyp)) { lvaSetStruct(tmpNum, lclInfo.lclTypeHandle, true /* unsafe value cls check */); - if (info.compIsVarArgs) - { - lvaSetStructUsedAsVarArg(tmpNum); - } } argInfo.argHasTmp = true; @@ -13822,7 +14043,7 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l bool Compiler::impInlineIsThis(GenTree* tree, InlArgInfo* inlArgInfo) { assert(compIsForInlining()); - return (tree->gtOper == GT_LCL_VAR && tree->AsLclVarCommon()->GetLclNum() == inlArgInfo[0].argTmpNum); + return (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->GetLclNum() == inlArgInfo[0].argTmpNum); } //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 43f5a5d9a551..a5c87aafa3a7 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -30,12 +30,6 @@ // // For CEE_NEWOBJ, newobjThis should be the temp grabbed for the allocated // uninitialized object. - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif - var_types Compiler::impImportCall(OPCODE opcode, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, @@ -100,7 +94,8 @@ var_types Compiler::impImportCall(OPCODE opcode, bool bIntrinsicImported = false; CORINFO_SIG_INFO calliSig; - NewCallArg extraArg; + GenTree* varArgsCookie = nullptr; + GenTree* instParam = nullptr; // Swift calls that might throw use a SwiftError* arg that requires additional IR to handle, // so if we're importing a Swift call, look for this type in the signature @@ -114,9 +109,7 @@ var_types Compiler::impImportCall(OPCODE opcode, { if (IsTargetAbi(CORINFO_NATIVEAOT_ABI)) { - // See comment in impCheckForPInvokeCall - BasicBlock* block = compIsForInlining() ? impInlineInfo->iciBlock : compCurBB; - if (info.compCompHnd->convertPInvokeCalliToCall(pResolvedToken, !impCanPInvokeInlineCallSite(block))) + if (info.compCompHnd->convertPInvokeCalliToCall(pResolvedToken, !impCanPInvokeInlineCallSite(compCurBB))) { eeGetCallInfo(pResolvedToken, nullptr, CORINFO_CALLINFO_ALLOWINSTPARAM, callInfo); return impImportCall(CEE_CALL, pResolvedToken, nullptr, nullptr, prefixFlags, callInfo, rawILOffset); @@ -225,17 +218,19 @@ var_types Compiler::impImportCall(OPCODE opcode, // Factor this into getCallInfo bool isSpecialIntrinsic = false; - if (isIntrinsic || !info.compMatchedVM) + if (isIntrinsic || (!info.compMatchedVM && !RunningSuperPmiReplay())) { // For mismatched VM (AltJit) we want to check all methods as intrinsic to ensure - // we get more accurate codegen. This particularly applies to HWIntrinsic usage + // we get more accurate codegen. This particularly applies to HWIntrinsic usage. + // But don't do this under SuperPMI replay, because it's unlikely we'll have + // the right data in the MethodContext in that case. const bool isTailCall = canTailCall && (tailCallFlags != 0); #if defined(FEATURE_READYTORUN) CORINFO_CONST_LOOKUP entryPoint; - if (opts.IsReadyToRun() && (callInfo->kind == CORINFO_CALL)) + if (IsAot() && (callInfo->kind == CORINFO_CALL)) { entryPoint = callInfo->codePointerLookup.constLookup; } @@ -359,7 +354,7 @@ var_types Compiler::impImportCall(OPCODE opcode, } #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { // Null check is sometimes needed for ready to run to handle // non-virtual <-> virtual changes between versions @@ -436,7 +431,7 @@ var_types Compiler::impImportCall(OPCODE opcode, addFatPointerCandidate(call->AsCall()); } #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { // Null check is needed for ready to run to handle // non-virtual <-> virtual changes between versions @@ -465,7 +460,7 @@ var_types Compiler::impImportCall(OPCODE opcode, } #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { call->AsCall()->setEntryPoint(callInfo->codePointerLookup.constLookup); } @@ -516,7 +511,7 @@ var_types Compiler::impImportCall(OPCODE opcode, //------------------------------------------------------------------------- // Set more flags - PREFIX_ASSUME(call != nullptr); + assert(call != nullptr); if (mflags & CORINFO_FLG_NOGCCHECK) { @@ -582,8 +577,6 @@ var_types Compiler::impImportCall(OPCODE opcode, if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG || (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG) { - assert(!compIsForInlining()); - /* Set the right flags */ call->gtFlags |= GTF_CALL_POP_ARGS; @@ -641,17 +634,9 @@ var_types Compiler::impImportCall(OPCODE opcode, } //--------------------------- Inline NDirect ------------------------------ - - // For inline cases we technically should look at both the current - // block and the call site block (or just the latter if we've - // fused the EH trees). However the block-related checks pertain to - // EH and we currently won't inline a method with EH. So for - // inlinees, just checking the call site block is sufficient. - { - // New lexical block here to avoid compilation errors because of GOTOs. - BasicBlock* block = compIsForInlining() ? impInlineInfo->iciBlock : compCurBB; - impCheckForPInvokeCall(call->AsCall(), methHnd, sig, mflags, block); - } + // If this is a call to a PInvoke method, we may be able to inline the invocation frame. + // + impCheckForPInvokeCall(call->AsCall(), methHnd, sig, mflags, compCurBB); #ifdef UNIX_X86_ABI // On Unix x86 we use caller-cleaned convention. @@ -686,7 +671,7 @@ var_types Compiler::impImportCall(OPCODE opcode, { // Normally this only happens with inlining. // However, a generic method (or type) being NGENd into another module - // can run into this issue as well. There's not an easy fall-back for NGEN + // can run into this issue as well. There's not an easy fall-back for AOT // so instead we fallback to JIT. if (compIsForInlining()) { @@ -706,11 +691,11 @@ var_types Compiler::impImportCall(OPCODE opcode, // an indirection of a GT_CNS_INT // GenTree* cookieConst = cookie; - if (cookie->gtOper == GT_IND) + if (cookie->OperIs(GT_IND)) { cookieConst = cookie->AsOp()->gtOp1; } - assert(cookieConst->gtOper == GT_CNS_INT); + assert(cookieConst->OperIs(GT_CNS_INT)); // Setting GTF_DONT_CSE on the GT_CNS_INT as well as on the GT_IND (if it exists) will ensure that // we won't allow this tree to participate in any CSE logic @@ -727,17 +712,18 @@ var_types Compiler::impImportCall(OPCODE opcode, } } - /*------------------------------------------------------------------------- - * Create the argument list - */ + if (sig->isAsyncCall()) + { + call->AsCall()->SetIsAsync(); + } + + // Now create the argument list. //------------------------------------------------------------------------- - // Special case - for varargs we have an implicit last argument + // Special case - for varargs we have an extra argument if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG) { - assert(!compIsForInlining()); - void *varCookie, *pVarCookie; if (!info.compCompHnd->canGetVarArgsHandle(sig)) { @@ -747,9 +733,7 @@ var_types Compiler::impImportCall(OPCODE opcode, varCookie = info.compCompHnd->getVarArgsHandle(sig, &pVarCookie); assert((!varCookie) != (!pVarCookie)); - GenTree* cookieNode = gtNewIconEmbHndNode(varCookie, pVarCookie, GTF_ICON_VARG_HDL, sig); - assert(extraArg.Node == nullptr); - extraArg = NewCallArg::Primitive(cookieNode).WellKnown(WellKnownArg::VarArgsCookie); + varArgsCookie = gtNewIconEmbHndNode(varCookie, pVarCookie, GTF_ICON_VARG_HDL, sig); } //------------------------------------------------------------------------- @@ -769,7 +753,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // We also set the exact type context associated with the call so we can // inline the call correctly later on. - if (sig->callConv & CORINFO_CALLCONV_PARAMTYPE) + if (sig->hasTypeArg()) { assert(call->AsCall()->gtCallType == CT_USER_FUNC); if (clsHnd == nullptr) @@ -779,8 +763,7 @@ var_types Compiler::impImportCall(OPCODE opcode, assert(opcode != CEE_CALLI); - GenTree* instParam; - bool runtimeLookup; + bool runtimeLookup; // Instantiated generic method if (((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_METHOD) @@ -793,7 +776,7 @@ var_types Compiler::impImportCall(OPCODE opcode, if (!exactContextNeedsRuntimeLookup) { #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { instParam = impReadyToRunLookupToTree(&callInfo->instParamLookup, GTF_ICON_METHOD_HDL, exactMethodHandle); @@ -843,7 +826,7 @@ var_types Compiler::impImportCall(OPCODE opcode, else if (!exactContextNeedsRuntimeLookup) { #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { instParam = impReadyToRunLookupToTree(&callInfo->instParamLookup, GTF_ICON_CLASS_HDL, exactClassHandle); @@ -870,9 +853,6 @@ var_types Compiler::impImportCall(OPCODE opcode, } } } - - assert(extraArg.Node == nullptr); - extraArg = NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam); } if ((opcode == CEE_NEWOBJ) && ((clsFlags & CORINFO_FLG_DELEGATE) != 0)) @@ -908,18 +888,50 @@ var_types Compiler::impImportCall(OPCODE opcode, } impPopCallArgs(sig, call->AsCall()); - if (extraArg.Node != nullptr) + + // Extra args + if ((instParam != nullptr) || call->AsCall()->IsAsync() || (varArgsCookie != nullptr)) { if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L) { - call->AsCall()->gtArgs.PushFront(this, extraArg); + if (varArgsCookie != nullptr) + { + call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(varArgsCookie) + .WellKnown(WellKnownArg::VarArgsCookie)); + } + + if (call->AsCall()->IsAsync()) + { + call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(gtNewNull(), TYP_REF) + .WellKnown(WellKnownArg::AsyncContinuation)); + } + + if (instParam != nullptr) + { + call->AsCall()->gtArgs.PushFront(this, + NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam)); + } } else { - call->AsCall()->gtArgs.PushBack(this, extraArg); - } + if (instParam != nullptr) + { + call->AsCall()->gtArgs.PushBack(this, + NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam)); + } + + if (call->AsCall()->IsAsync()) + { + call->AsCall()->gtArgs.PushBack(this, NewCallArg::Primitive(gtNewNull(), TYP_REF) + .WellKnown(WellKnownArg::AsyncContinuation)); + } - call->gtFlags |= extraArg.Node->gtFlags & GTF_GLOB_EFFECT; + if (varArgsCookie != nullptr) + { + call->AsCall()->gtArgs.PushBack(this, NewCallArg::Primitive(varArgsCookie) + .WellKnown(WellKnownArg::VarArgsCookie)); + } + } } //------------------------------------------------------------------------- @@ -959,7 +971,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // See if we can devirt if we aren't probing. if (!probing && opts.OptimizationEnabled()) { - if (call->AsCall()->IsVirtual()) + if (call->AsCall()->IsDevirtualizationCandidate(this)) { // only true object pointers can be virtual assert(call->AsCall()->gtArgs.HasThisPointer() && @@ -975,7 +987,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // inlinees. rawILOffset); - const bool wasDevirtualized = !call->AsCall()->IsVirtual(); + const bool wasDevirtualized = !call->AsCall()->IsDevirtualizationCandidate(this); if (wasDevirtualized) { @@ -1036,7 +1048,8 @@ var_types Compiler::impImportCall(OPCODE opcode, INDEBUG(call->AsCall()->gtRawILOffset = rawILOffset); // Is it an inline candidate? - impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo); + impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, + compInlineContext); } // append the call node. @@ -1058,13 +1071,13 @@ var_types Compiler::impImportCall(OPCODE opcode, } else { - if (newobjThis->gtOper == GT_COMMA) + if (newobjThis->OperIs(GT_COMMA)) { // We must have inserted the callout. Get the real newobj. newobjThis = newobjThis->AsOp()->gtOp2; } - assert(newobjThis->gtOper == GT_LCL_VAR); + assert(newobjThis->OperIs(GT_LCL_VAR)); impPushOnStack(gtNewLclvNode(newobjThis->AsLclVarCommon()->GetLclNum(), TYP_REF), typeInfo(clsHnd)); } } @@ -1226,7 +1239,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // Things needed to be checked when bIntrinsicImported is false. // - assert(call->gtOper == GT_CALL); + assert(call->OperIs(GT_CALL)); assert(callInfo != nullptr); if (compIsForInlining() && opcode == CEE_CALLVIRT) @@ -1246,7 +1259,20 @@ var_types Compiler::impImportCall(OPCODE opcode, INDEBUG(call->AsCall()->gtRawILOffset = rawILOffset); // Is it an inline candidate? - impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo); + impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, compInlineContext); + + // If the call is virtual, record the inliner's context for possible use during late devirt inlining. + // Also record the generics context if there is any. + // + if (call->AsCall()->IsDevirtualizationCandidate(this)) + { + JITDUMP("\nSaving generic context %p and inline context %p for call [%06u]\n", dspPtr(exactContextHnd), + dspPtr(compInlineContext), dspTreeID(call->AsCall())); + LateDevirtualizationInfo* const info = new (this, CMK_Inlining) LateDevirtualizationInfo; + info->exactContextHnd = exactContextHnd; + info->inlinersContext = compInlineContext; + call->AsCall()->gtLateDevirtualizationInfo = info; + } } // Extra checks for tail calls and tail recursion. @@ -1441,22 +1467,6 @@ var_types Compiler::impImportCall(OPCODE opcode, } else { - // If the call is virtual, and has a generics context, and is not going to have a class probe, - // record the context for possible use during late devirt. - // - // If we ever want to devirt at Tier0, and/or see issues where OSR methods under PGO lose - // important devirtualizations, we'll want to allow both a class probe and a captured context. - // - if (origCall->IsVirtual() && (origCall->gtCallType != CT_INDIRECT) && (exactContextHnd != nullptr) && - (origCall->gtHandleHistogramProfileCandidateInfo == nullptr)) - { - JITDUMP("\nSaving context %p for call [%06u]\n", dspPtr(exactContextHnd), dspTreeID(origCall)); - origCall->gtCallMoreFlags |= GTF_CALL_M_HAS_LATE_DEVIRT_INFO; - LateDevirtualizationInfo* const info = new (this, CMK_Inlining) LateDevirtualizationInfo; - info->exactContextHnd = exactContextHnd; - origCall->gtLateDevirtualizationInfo = info; - } - if (isFatPointerCandidate) { // fatPointer candidates should be in statements of the form call() or var = call(). @@ -1567,9 +1577,6 @@ var_types Compiler::impImportCall(OPCODE opcode, return callRetTyp; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //------------------------------------------------------------------------ // impThrowIfNull: Remove redundandant boxing from ArgumentNullException_ThrowIfNull @@ -1833,7 +1840,7 @@ var_types Compiler::impImportJitTestLabelMark(int numArgs) // a GT_IND of a static field address, which should be the sum of a (hoistable) helper call and possibly some // offset within the static field block whose address is returned by the helper call. // The annotation is saying that this address calculation, but not the entire access, should be hoisted. - assert(node->OperGet() == GT_IND); + assert(node->OperIs(GT_IND)); tlAndN.m_num -= 100; GetNodeTestData()->Set(node->AsOp()->gtOp1, tlAndN); GetNodeTestData()->Remove(node); @@ -1967,7 +1974,7 @@ GenTreeCall* Compiler::impImportIndirectCall(CORINFO_SIG_INFO* sig, const DebugI // because that can introduce a call to the cast helper after the // arguments have already been evaluated. - if (fptr->OperGet() == GT_LCL_VAR) + if (fptr->OperIs(GT_LCL_VAR)) { lvaTable[fptr->AsLclVarCommon()->GetLclNum()].lvKeepType = 1; } @@ -2070,7 +2077,7 @@ void Compiler::impPopArgsForUnmanagedCall(GenTreeCall* call, CORINFO_SIG_INFO* s { GenTree* thisPtr = call->gtArgs.GetArgByIndex(0)->GetNode(); impBashVarAddrsToI(thisPtr); - assert(thisPtr->TypeGet() == TYP_I_IMPL || thisPtr->TypeGet() == TYP_BYREF); + assert(thisPtr->TypeIs(TYP_I_IMPL, TYP_BYREF)); } impRetypeUnmanagedCallArgs(call); @@ -2101,7 +2108,7 @@ void Compiler::impRetypeUnmanagedCallArgs(GenTreeCall* call) // for this arg at the call site (gc info says byref, // pinvoke sig says native int). // - if (argNode->TypeGet() == TYP_BYREF) + if (argNode->TypeIs(TYP_BYREF)) { GenTree* cast = gtNewCastNode(TYP_I_IMPL, argNode, false, TYP_I_IMPL); arg.SetEarlyNode(cast); @@ -2586,13 +2593,13 @@ GenTree* Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig) // Strip helper call away fieldTokenNode = fieldTokenNode->AsCall()->gtArgs.GetArgByIndex(0)->GetEarlyNode(); - if (fieldTokenNode->gtOper == GT_IND) + if (fieldTokenNode->OperIs(GT_IND)) { fieldTokenNode = fieldTokenNode->AsOp()->gtOp1; } // Check for constant - if (fieldTokenNode->gtOper != GT_CNS_INT) + if (!fieldTokenNode->OperIs(GT_CNS_INT)) { return nullptr; } @@ -2665,7 +2672,7 @@ GenTree* Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig) switch (newArrayCall->AsCall()->GetHelperNum()) { case CORINFO_HELP_NEWARR_1_DIRECT: - case CORINFO_HELP_NEWARR_1_OBJ: + case CORINFO_HELP_NEWARR_1_PTR: case CORINFO_HELP_NEWARR_1_MAYBEFROZEN: case CORINFO_HELP_NEWARR_1_VC: case CORINFO_HELP_NEWARR_1_ALIGN8: @@ -2766,7 +2773,7 @@ GenTree* Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig) static bool IsComma(GenTree* tree) { - return (tree != nullptr) && (tree->OperGet() == GT_COMMA); + return (tree != nullptr) && tree->OperIs(GT_COMMA); } }; @@ -2847,7 +2854,7 @@ GenTree* Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig) // // This optimization is only valid for a constant array size. // - if (arrayLengthNode->gtOper != GT_CNS_INT) + if (!arrayLengthNode->OperIs(GT_CNS_INT)) { return nullptr; } @@ -2949,13 +2956,13 @@ GenTree* Compiler::impCreateSpanIntrinsic(CORINFO_SIG_INFO* sig) // Strip helper call away fieldTokenNode = fieldTokenNode->AsCall()->gtArgs.GetArgByIndex(0)->GetNode(); - if (fieldTokenNode->gtOper == GT_IND) + if (fieldTokenNode->OperIs(GT_IND)) { fieldTokenNode = fieldTokenNode->AsOp()->gtOp1; } // Check for constant - if (fieldTokenNode->gtOper != GT_CNS_INT) + if (!fieldTokenNode->OperIs(GT_CNS_INT)) { return nullptr; } @@ -3327,6 +3334,32 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, return new (this, GT_LABEL) GenTree(GT_LABEL, TYP_I_IMPL); } + if (ni == NI_System_StubHelpers_AsyncCallContinuation) + { + GenTree* node = new (this, GT_ASYNC_CONTINUATION) GenTree(GT_ASYNC_CONTINUATION, TYP_REF); + node->SetHasOrderingSideEffect(); + node->gtFlags |= GTF_CALL | GTF_GLOB_REF; + info.compUsesAsyncContinuation = true; + return node; + } + + if (ni == NI_System_Runtime_CompilerServices_AsyncHelpers_AsyncSuspend) + { + GenTree* node = gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, impPopStack().val); + node->SetHasOrderingSideEffect(); + node->gtFlags |= GTF_CALL | GTF_GLOB_REF; + return node; + } + + if (ni == NI_System_Runtime_CompilerServices_AsyncHelpers_Await) + { + // These are marked intrinsics simply to match them by name in + // the Await pattern optimization. Make sure we keep pIntrinsicName assigned + // (it would be overridden if we left this up to the rest of this function). + *pIntrinsicName = ni; + return nullptr; + } + bool betterToExpand = false; // Allow some lightweight intrinsics in Tier0 which can improve throughput @@ -3697,6 +3730,8 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, case NI_System_Span_get_Item: case NI_System_ReadOnlySpan_get_Item: { + optMethodFlags |= OMF_HAS_ARRAYREF; + // Have index, stack pointer-to Span s on the stack. Expand to: // // For Span @@ -3724,7 +3759,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, GenTree* indexClone = nullptr; GenTree* ptrToSpanClone = nullptr; assert(genActualType(index) == TYP_INT); - assert(ptrToSpan->TypeGet() == TYP_BYREF || ptrToSpan->TypeGet() == TYP_I_IMPL); + assert(ptrToSpan->TypeIs(TYP_BYREF, TYP_I_IMPL)); #if defined(DEBUG) if (verbose) @@ -3874,7 +3909,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, { GenTree* op1 = impStackTop(0).val; CorInfoHelpFunc typeHandleHelper; - if (op1->gtOper == GT_CALL && op1->AsCall()->IsHelperCall() && + if (op1->OperIs(GT_CALL) && op1->AsCall()->IsHelperCall() && gtIsTypeHandleToRuntimeTypeHandleHelper(op1->AsCall(), &typeHandleHelper)) { op1 = impPopStack().val; @@ -4114,7 +4149,27 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, break; } +<<<<<<< HEAD #if defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_XARCH) || defined(TARGET_WASM) +======= + case NI_System_Threading_Thread_FastPollGC: + { + optMethodFlags |= OMF_NEEDS_GCPOLLS; + compCurBB->SetFlags(BBF_NEEDS_GCPOLL); + + GenTree* gcpoll = new (this, GT_GCPOLL) GenTree(GT_GCPOLL, TYP_VOID); + // Prevent both reordering and removal. Invalid optimizations of Thread.FastPollGC are + // very subtle and hard to observe. Thus we are conservatively marking it with both + // GTF_CALL and GTF_GLOB_REF side-effects even though it may be more than strictly + // necessary. The conservative side-effects are unlikely to have negative impact + // on code quality in this case. + gcpoll->gtFlags |= (GTF_CALL | GTF_GLOB_REF); + retNode = gcpoll; + break; + } + +#if defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_XARCH) +>>>>>>> upstream-jun case NI_System_Threading_Interlocked_Or: case NI_System_Threading_Interlocked_And: { @@ -4258,11 +4313,10 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, #ifdef FEATURE_HW_INTRINSICS case NI_System_Math_FusedMultiplyAdd: { + assert(varTypeIsFloating(callType)); #ifdef TARGET_XARCH - if (IsAvx10OrIsaSupportedOpportunistically(InstructionSet_FMA)) + if (compOpportunisticallyDependsOn(InstructionSet_FMA)) { - assert(varTypeIsFloating(callType)); - // We are constructing a chain of intrinsics similar to: // return FMA.MultiplyAddScalar( // Vector128.CreateScalarUnsafe(x), @@ -4285,39 +4339,34 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, break; } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - assert(varTypeIsFloating(callType)); + // We are constructing a chain of intrinsics similar to: + // return AdvSimd.FusedMultiplyAddScalar( + // Vector64.Create{ScalarUnsafe}(z), + // Vector64.Create{ScalarUnsafe}(y), + // Vector64.Create{ScalarUnsafe}(x) + // ).ToScalar(); - // We are constructing a chain of intrinsics similar to: - // return AdvSimd.FusedMultiplyAddScalar( - // Vector64.Create{ScalarUnsafe}(z), - // Vector64.Create{ScalarUnsafe}(y), - // Vector64.Create{ScalarUnsafe}(x) - // ).ToScalar(); + impSpillSideEffect(true, stackState.esStackDepth - + 3 DEBUGARG("Spilling op1 side effects for FusedMultiplyAdd")); - impSpillSideEffect(true, stackState.esStackDepth - - 3 DEBUGARG("Spilling op1 side effects for FusedMultiplyAdd")); + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op2 side effects for FusedMultiplyAdd")); - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op2 side effects for FusedMultiplyAdd")); + GenTree* op3 = impImplicitR4orR8Cast(impPopStack().val, callType); + GenTree* op2 = impImplicitR4orR8Cast(impPopStack().val, callType); + GenTree* op1 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op3 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op2 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op1 = impImplicitR4orR8Cast(impPopStack().val, callType); + op3 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op3, callJitType, 8); + op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op2, callJitType, 8); + op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op1, callJitType, 8); - op3 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op3, callJitType, 8); - op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op2, callJitType, 8); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op1, callJitType, 8); + // Note that AdvSimd.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 + op2 * op3 + // while Math{F}.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 * op2 + op3 + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar, + callJitType, 8); - // Note that AdvSimd.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 + op2 * op3 - // while Math{F}.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 * op2 + op3 - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar, - callJitType, 8); - - retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 8); - break; - } + retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 8); + break; #endif // TODO-CQ-XArch: Ideally we would create a GT_INTRINSIC node for fma, however, that currently @@ -4677,9 +4726,11 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, { case CorInfoType::CORINFO_TYPE_SHORT: case CorInfoType::CORINFO_TYPE_USHORT: + { retNode = gtNewCastNode(TYP_INT, gtNewOperNode(GT_BSWAP16, TYP_INT, impPopStack().val), false, callType); break; + } case CorInfoType::CORINFO_TYPE_INT: case CorInfoType::CORINFO_TYPE_UINT: @@ -5053,11 +5104,9 @@ GenTree* Compiler::impSRCSUnsafeIntrinsic(NamedIntrinsic intrinsic, ClassLayout* toLayout = nullptr; var_types toType = TypeHandleToVarType(toTypeHnd, &toLayout); - if (fromType == TYP_REF || info.compCompHnd->isNullableType(fromTypeHnd) != TypeCompareState::MustNot || - toType == TYP_REF || info.compCompHnd->isNullableType(toTypeHnd) != TypeCompareState::MustNot) + if (fromType == TYP_REF || toType == TYP_REF) { - // Fallback to the software implementation to throw when the types fail a "default(T) is not null" - // check. + // Fallback to the software implementation to throw for reference types return nullptr; } @@ -5531,7 +5580,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) - if ((intrinsic == NI_PRIMITIVE_ConvertToIntegerNative) && IsBaselineSimdIsaSupported()) + if (intrinsic == NI_PRIMITIVE_ConvertToIntegerNative) { NamedIntrinsic hwIntrinsicId = NI_Illegal; @@ -5541,15 +5590,11 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, { if (!uns) { - hwIntrinsicId = NI_SSE_ConvertToInt32WithTruncation; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - hwIntrinsicId = NI_AVX10v1_ConvertToUInt32WithTruncation; + hwIntrinsicId = NI_X86Base_ConvertToInt32WithTruncation; } - else if (IsBaselineVector512IsaSupportedOpportunistically()) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - hwIntrinsicId = NI_AVX512F_ConvertToUInt32WithTruncation; + hwIntrinsicId = NI_AVX512_ConvertToUInt32WithTruncation; } } else @@ -5558,15 +5603,11 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, if (!uns) { - hwIntrinsicId = NI_SSE2_ConvertToInt32WithTruncation; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - hwIntrinsicId = NI_AVX10v1_ConvertToUInt32WithTruncation; + hwIntrinsicId = NI_X86Base_ConvertToInt32WithTruncation; } - else if (IsBaselineVector512IsaSupportedOpportunistically()) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - hwIntrinsicId = NI_AVX512F_ConvertToUInt32WithTruncation; + hwIntrinsicId = NI_AVX512_ConvertToUInt32WithTruncation; } } } @@ -5579,15 +5620,11 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, { if (!uns) { - hwIntrinsicId = NI_SSE_X64_ConvertToInt64WithTruncation; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - hwIntrinsicId = NI_AVX10v1_X64_ConvertToUInt64WithTruncation; + hwIntrinsicId = NI_X86Base_X64_ConvertToInt64WithTruncation; } - else if (IsBaselineVector512IsaSupportedOpportunistically()) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - hwIntrinsicId = NI_AVX512F_X64_ConvertToUInt64WithTruncation; + hwIntrinsicId = NI_AVX512_X64_ConvertToUInt64WithTruncation; } } else @@ -5596,15 +5633,11 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, if (!uns) { - hwIntrinsicId = NI_SSE2_X64_ConvertToInt64WithTruncation; + hwIntrinsicId = NI_X86Base_X64_ConvertToInt64WithTruncation; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - hwIntrinsicId = NI_AVX10v1_X64_ConvertToUInt64WithTruncation; - } - else if (IsBaselineVector512IsaSupportedOpportunistically()) - { - hwIntrinsicId = NI_AVX512F_X64_ConvertToUInt64WithTruncation; + hwIntrinsicId = NI_AVX512_X64_ConvertToUInt64WithTruncation; } } } @@ -5742,7 +5775,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, } #endif // !TARGET_64BIT -#if defined(FEATURE_HW_INTRINSICS) +#ifdef TARGET_RISCV64 + if (compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + impPopStack(); + result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_LeadingZeroCount, + nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE})); + } +#elif defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_XARCH) if (compOpportunisticallyDependsOn(InstructionSet_LZCNT)) { @@ -5752,7 +5792,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, hwintrinsic = varTypeIsLong(baseType) ? NI_LZCNT_X64_LeadingZeroCount : NI_LZCNT_LeadingZeroCount; result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } - else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) + else { // Pop the value from the stack impPopStack(); @@ -5793,15 +5833,12 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewLclvNode(tmp, baseType); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_ArmBase)) - { - // Pop the value from the stack - impPopStack(); + // Pop the value from the stack + impPopStack(); - hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; - result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); - baseType = TYP_INT; - } + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; + result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); + baseType = TYP_INT; #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -5918,7 +5955,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, } #endif // !TARGET_64BIT -#if defined(FEATURE_HW_INTRINSICS) +#ifdef TARGET_RISCV64 + if (compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + impPopStack(); + result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_PopCount, + nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE})); + } +#elif defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_XARCH) if (compOpportunisticallyDependsOn(InstructionSet_POPCNT)) { @@ -5929,10 +5973,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - // TODO-ARM64-CQ: PopCount should be handled as an intrinsic for non-constant cases - } + // TODO-ARM64-CQ: PopCount should be handled as an intrinsic for non-constant cases #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -6075,7 +6116,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, } #endif // !TARGET_64BIT -#if defined(FEATURE_HW_INTRINSICS) +#ifdef TARGET_RISCV64 + if (compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + impPopStack(); + result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_TrailingZeroCount, + nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE})); + } +#elif defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_XARCH) if (compOpportunisticallyDependsOn(InstructionSet_BMI1)) { @@ -6085,7 +6133,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, hwintrinsic = varTypeIsLong(baseType) ? NI_BMI1_X64_TrailingZeroCount : NI_BMI1_TrailingZeroCount; result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } - else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) + else { // Pop the value from the stack impPopStack(); @@ -6124,19 +6172,15 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewLclvNode(tmp, baseType); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_ArmBase)) - { - // Pop the value from the stack - impPopStack(); + // Pop the value from the stack + impPopStack(); - hwintrinsic = - varTypeIsLong(baseType) ? NI_ArmBase_Arm64_ReverseElementBits : NI_ArmBase_ReverseElementBits; - op1 = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_ReverseElementBits : NI_ArmBase_ReverseElementBits; + op1 = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); - hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; - result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); - baseType = TYP_INT; - } + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; + result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); + baseType = TYP_INT; #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -6280,11 +6324,20 @@ void Compiler::impPopCallArgs(CORINFO_SIG_INFO* sig, GenTreeCall* call) NewCallArg arg; if (varTypeIsStruct(jitSigType)) { - arg = NewCallArg::Struct(argNode, jitSigType, classHnd); + arg = NewCallArg::Struct(argNode, jitSigType, typGetObjLayout(classHnd)); } else { +<<<<<<< HEAD arg = NewCallArg::Primitive(argNode, params[i - 1].CorType); +======= + arg = NewCallArg::Primitive(argNode, jitSigType); + + if (i == 1 && (sig->callConv & CORINFO_CALLCONV_EXPLICITTHIS)) + { + arg = arg.WellKnown(WellKnownArg::ThisPointer); + } +>>>>>>> upstream-jun } call->gtArgs.PushFront(this, arg); @@ -6319,7 +6372,7 @@ GenTree* Compiler::impTransformThis(GenTree* thisPtr, // This does a LDIND on the obj, which should be a byref. pointing to a ref impBashVarAddrsToI(obj); - assert(genActualType(obj->gtType) == TYP_I_IMPL || obj->gtType == TYP_BYREF); + assert(genActualType(obj->gtType) == TYP_I_IMPL || obj->TypeIs(TYP_BYREF)); CorInfoType constraintTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass); obj = gtNewIndir(JITtype2varType(constraintTyp), obj); @@ -6337,7 +6390,7 @@ GenTree* Compiler::impTransformThis(GenTree* thisPtr, GenTree* obj = thisPtr; - assert(obj->TypeGet() == TYP_BYREF || obj->TypeGet() == TYP_I_IMPL); + assert(obj->TypeIs(TYP_BYREF, TYP_I_IMPL)); ClassLayout* layout; var_types objType = TypeHandleToVarType(pConstrainedResolvedToken->hClass, &layout); obj = (objType == TYP_STRUCT) ? gtNewBlkIndir(layout, obj) : gtNewIndir(objType, obj); @@ -6385,8 +6438,7 @@ bool Compiler::impCanPInvokeInline() // from a call to see if the call qualifies as an inline pinvoke. // // Arguments: -// block - block containing the call, or for inlinees, block -// containing the call being inlined +// block - block containing the call // // Return Value: // true if this call can legally qualify as an inline pinvoke, false otherwise @@ -6403,9 +6455,9 @@ bool Compiler::impCanPInvokeInline() // TODO-CQ: The inlining frame no longer has a GSCookie, so the common on this // restriction is out of date. However, given that there is a comment // about protecting the framelet, I'm not confident about what this -// is actually protecteing, so I don't want to remove this +// is actually protecting, so I don't want to remove this // restriction without further analysis analysis. -// * We disable pinvoke inlini1ng inside handlers since the GSCookie +// * We disable pinvoke inlining inside handlers since the GSCookie // is in the inlined Frame (see // CORINFO_EE_INFO::InlinedCallFrameInfo::offsetOfGSCookie), but // this would not protect framelets/return-address of handlers. @@ -6420,49 +6472,53 @@ bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block) return false; } - // The remaining limitations do not apply to NativeAOT - if (IsTargetAbi(CORINFO_NATIVEAOT_ABI)) - { - return true; - } - - // The VM assumes that the PInvoke frame in IL Stub is only going to be used - // for the PInvoke target call. The PInvoke frame cannot be reused by marshalling helper - // calls (see InlinedCallFrame::GetActualInteropMethodDesc and related stackwalking code). - if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) + // The following limitations do not apply to NativeAOT + // + if (!IsTargetAbi(CORINFO_NATIVEAOT_ABI)) { - return false; - } + // The VM assumes that the PInvoke frame in IL Stub is only going to be used + // for the PInvoke target call. The PInvoke frame cannot be reused by marshalling helper + // calls (see InlinedCallFrame::GetActualInteropMethodDesc and related stackwalking code). + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) + { + return false; + } #ifdef USE_PER_FRAME_PINVOKE_INIT - // For platforms that use per-P/Invoke InlinedCallFrame initialization, - // we can't inline P/Invokes inside of try blocks where we can resume execution in the same function. - // The runtime can correctly unwind out of an InlinedCallFrame and out of managed code. However, - // it cannot correctly unwind out of an InlinedCallFrame and stop at that frame without also unwinding - // at least one managed frame. In particular, the runtime struggles to restore non-volatile registers - // from the top-most unmanaged call before the InlinedCallFrame. As a result, the runtime does not support - // re-entering the same method frame as the InlinedCallFrame after an exception in unmanaged code. - if (block->hasTryIndex()) - { - // Check if this block's try block or any containing try blocks have catch handlers. - // If any of the containing try blocks have catch handlers, - // we cannot inline a P/Invoke for reasons above. If the handler is a fault or finally handler, - // we can inline a P/Invoke into this block in the try since the code will not resume execution - // in the same method after throwing an exception if only fault or finally handlers are executed. - for (unsigned int ehIndex = block->getTryIndex(); ehIndex != EHblkDsc::NO_ENCLOSING_INDEX; - ehIndex = ehGetEnclosingTryIndex(ehIndex)) - { - if (ehGetDsc(ehIndex)->HasCatchHandler()) + // For platforms that use per-P/Invoke InlinedCallFrame initialization, + // we can't inline P/Invokes inside of try blocks where we can resume execution in the same function. + // The runtime can correctly unwind out of an InlinedCallFrame and out of managed code. However, + // it cannot correctly unwind out of an InlinedCallFrame and stop at that frame without also unwinding + // at least one managed frame. In particular, the runtime struggles to restore non-volatile registers + // from the top-most unmanaged call before the InlinedCallFrame. As a result, the runtime does not support + // re-entering the same method frame as the InlinedCallFrame after an exception in unmanaged code. + if (block->hasTryIndex()) + { + // Check if this block's try block or any containing try blocks have catch handlers. + // If any of the containing try blocks have catch handlers, + // we cannot inline a P/Invoke for reasons above. If the handler is a fault or finally handler, + // we can inline a P/Invoke into this block in the try since the code will not resume execution + // in the same method after throwing an exception if only fault or finally handlers are executed. + for (unsigned int ehIndex = block->getTryIndex(); ehIndex != EHblkDsc::NO_ENCLOSING_INDEX; + ehIndex = ehGetEnclosingTryIndex(ehIndex)) { - return false; + if (ehGetDsc(ehIndex)->HasCatchHandler()) + { + return false; + } } } +#endif // USE_PER_FRAME_PINVOKE_INIT + } + if (!compIsForInlining()) + { return true; } -#endif // USE_PER_FRAME_PINVOKE_INIT - return true; + // If inlining, verify conditions for the call site block too. + // + return impInlineRoot()->impCanPInvokeInlineCallSite(impInlineInfo->iciBlock); } //------------------------------------------------------------------------ @@ -6474,8 +6530,7 @@ bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block) // methHnd - handle for the method being called (may be null) // sig - signature of the method being called // mflags - method flags for the method being called -// block - block containing the call, or for inlinees, block -// containing the call being inlined +// block - block containing the call // // Notes: // Sets GTF_CALL_M_PINVOKE on the call for pinvokes. @@ -6546,8 +6601,7 @@ void Compiler::impCheckForPInvokeCall( // PInvoke CALLI in IL stubs must be inlined } - else if (!IsTargetAbi(CORINFO_NATIVEAOT_ABI) && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB) && - opts.IsReadyToRun()) + else if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB) && IsReadyToRun()) { // The raw PInvoke call that is inside the no marshalling R2R compiled pinvoke ILStub must // be inlined into the stub, otherwise we would end up with a stub that recursively calls @@ -6573,7 +6627,11 @@ void Compiler::impCheckForPInvokeCall( // Size-speed tradeoff: don't use inline pinvoke at rarely // executed call sites. The non-inline version is more // compact. - if (block->isRunRarely()) + // + // Zero-diff quirk: the first clause below should simply be block->isRunRarely() + // + if ((!compIsForInlining() && block->isRunRarely()) || + (compIsForInlining() && impInlineInfo->iciBlock->isRunRarely())) { return; } @@ -6632,7 +6690,7 @@ class SpillRetExprHelper // Trees with ret_expr are marked as GTF_CALL. return Compiler::WALK_SKIP_SUBTREES; } - if (tree->OperGet() == GT_RET_EXPR) + if (tree->OperIs(GT_RET_EXPR)) { SpillRetExprHelper* walker = static_cast(fgWalkPre->pCallbackData); walker->StoreRetExprAsLocalVar(pTree); @@ -6643,7 +6701,7 @@ class SpillRetExprHelper void StoreRetExprAsLocalVar(GenTree** pRetExpr) { GenTree* retExpr = *pRetExpr; - assert(retExpr->OperGet() == GT_RET_EXPR); + assert(retExpr->OperIs(GT_RET_EXPR)); const unsigned tmp = comp->lvaGrabTemp(true DEBUGARG("spilling ret_expr")); JITDUMP("Storing return expression [%06u] to a local var V%02u.\n", comp->dspTreeID(retExpr), tmp); comp->impStoreToTemp(tmp, retExpr, Compiler::CHECK_SPILL_NONE); @@ -6652,7 +6710,7 @@ class SpillRetExprHelper assert(comp->lvaTable[tmp].lvSingleDef == 0); comp->lvaTable[tmp].lvSingleDef = 1; JITDUMP("Marked V%02u as a single def temp\n", tmp); - if (retExpr->TypeGet() == TYP_REF) + if (retExpr->TypeIs(TYP_REF)) { bool isExact = false; bool isNonNull = false; @@ -6695,6 +6753,7 @@ void Compiler::addFatPointerCandidate(GenTreeCall* call) // methodGuesses - [out] the methods to guess for (mutually exclusive with classGuess) // candidatesCount - [out] number of guesses // likelihoods - [out] estimates of the likelihoods that the guesses will succeed +// verboseLogging - whether or not to do verbose logging // void Compiler::pickGDV(GenTreeCall* call, IL_OFFSET ilOffset, @@ -6702,17 +6761,22 @@ void Compiler::pickGDV(GenTreeCall* call, CORINFO_CLASS_HANDLE* classGuesses, CORINFO_METHOD_HANDLE* methodGuesses, int* candidatesCount, - unsigned* likelihoods) + unsigned* likelihoods, + bool verboseLogging) { *candidatesCount = 0; + // Get the relevant pgo info for this call + // + PgoInfo pgoInfo(call->gtInlineContext); + const int maxLikelyClasses = MAX_GDV_TYPE_CHECKS; LikelyClassMethodRecord likelyClasses[maxLikelyClasses]; unsigned numberOfClasses = 0; if (call->IsVirtualStub() || call->IsVirtualVtable() || call->IsHelperCall()) { - numberOfClasses = - getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset); + numberOfClasses = getLikelyClasses(likelyClasses, maxLikelyClasses, pgoInfo.PgoSchema, pgoInfo.PgoSchemaCount, + pgoInfo.PgoData, ilOffset); } const int maxLikelyMethods = MAX_GDV_TYPE_CHECKS; @@ -6725,21 +6789,24 @@ void Compiler::pickGDV(GenTreeCall* call, // impDevirtualizeCall and what happens in // GuardedDevirtualizationTransformer::CreateThen for method GDV. // - if (!opts.IsReadyToRun() && (call->IsVirtualVtable() || call->IsDelegateInvoke())) + if (!IsAot() && (call->IsVirtualVtable() || call->IsDelegateInvoke())) { assert(!call->IsHelperCall()); - numberOfMethods = - getLikelyMethods(likelyMethods, maxLikelyMethods, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset); + numberOfMethods = getLikelyMethods(likelyMethods, maxLikelyMethods, pgoInfo.PgoSchema, pgoInfo.PgoSchemaCount, + pgoInfo.PgoData, ilOffset); } if ((numberOfClasses < 1) && (numberOfMethods < 1)) { - JITDUMP("No likely class or method, sorry\n"); + if (verboseLogging) + { + JITDUMP("No likely class or method, sorry\n"); + } return; } #ifdef DEBUG - if ((verbose || JitConfig.EnableExtraSuperPmiQueries()) && (numberOfClasses > 0)) + if ((verbose || JitConfig.EnableExtraSuperPmiQueries()) && (numberOfClasses > 0) && verboseLogging) { JITDUMP("Likely classes for call [%06u]", dspTreeID(call)); if (!call->IsHelperCall()) @@ -6909,8 +6976,12 @@ void Compiler::pickGDV(GenTreeCall* call, classGuesses[guessIdx] = (CORINFO_CLASS_HANDLE)likelyClasses[guessIdx].handle; likelihoods[guessIdx] = likelyClasses[guessIdx].likelihood; *candidatesCount = *candidatesCount + 1; - JITDUMP("Accepting type %s with likelihood %u as a candidate\n", eeGetClassName(classGuesses[guessIdx]), - likelihoods[guessIdx]) + + if (verboseLogging) + { + JITDUMP("Accepting type %s with likelihood %u as a candidate\n", + eeGetClassName(classGuesses[guessIdx]), likelihoods[guessIdx]) + } } else { @@ -6933,8 +7004,11 @@ void Compiler::pickGDV(GenTreeCall* call, return; } - JITDUMP("Not guessing for method; likelihood is below %s call threshold %u\n", - call->IsDelegateInvoke() ? "delegate" : "virtual", likelihoodThreshold); + if (verboseLogging) + { + JITDUMP("Not guessing for method; likelihood is below %s call threshold %u\n", + call->IsDelegateInvoke() ? "delegate" : "virtual", likelihoodThreshold); + } } } @@ -7468,6 +7542,7 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, // exactContextHnd -- context handle for inlining // exactContextNeedsRuntimeLookup -- true if context required runtime lookup // callInfo -- call info from VM +// inlinersContext -- the inliner's context // // Notes: // Mostly a wrapper for impMarkInlineCandidateHelper that also undoes @@ -7477,7 +7552,8 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, void Compiler::impMarkInlineCandidate(GenTree* callNode, CORINFO_CONTEXT_HANDLE exactContextHnd, bool exactContextNeedsRuntimeLookup, - CORINFO_CALL_INFO* callInfo) + CORINFO_CALL_INFO* callInfo, + InlineContext* inlinersContext) { if (!opts.OptEnabled(CLFLG_INLINING)) { @@ -7500,7 +7576,7 @@ void Compiler::impMarkInlineCandidate(GenTree* callNode, // Do the actual evaluation impMarkInlineCandidateHelper(call, candidateId, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, - &inlineResult); + inlinersContext, &inlineResult); // Ignore non-inlineable candidates // TODO: Consider keeping them to just devirtualize without inlining, at least for interface // calls on NativeAOT, but that requires more changes elsewhere too. @@ -7523,7 +7599,8 @@ void Compiler::impMarkInlineCandidate(GenTree* callNode, const uint8_t candidatesCount = call->GetInlineCandidatesCount(); assert(candidatesCount <= 1); InlineResult inlineResult(this, call, nullptr, "impMarkInlineCandidate"); - impMarkInlineCandidateHelper(call, 0, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, &inlineResult); + impMarkInlineCandidateHelper(call, 0, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, + inlinersContext, &inlineResult); } // If this call is an inline candidate or is not a guarded devirtualization @@ -7556,6 +7633,7 @@ void Compiler::impMarkInlineCandidate(GenTree* callNode, // exactContextHnd -- context handle for inlining // exactContextNeedsRuntimeLookup -- true if context required runtime lookup // callInfo -- call info from VM +// inlinersContext -- the inliner's context // // Notes: // If callNode is an inline candidate, this method sets the flag @@ -7572,6 +7650,7 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, CORINFO_CONTEXT_HANDLE exactContextHnd, bool exactContextNeedsRuntimeLookup, CORINFO_CALL_INFO* callInfo, + InlineContext* inlinersContext, InlineResult* inlineResult) { // Let the strategy know there's another call @@ -7653,6 +7732,15 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, return; } + // The inliner gets confused when the unmanaged convention reverses arg order (like x86). + // Just suppress for all targets for now. + // + if (call->GetUnmanagedCallConv() != CorInfoCallConvExtension::Managed) + { + inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_UNMANAGED_CALLCONV); + return; + } + /* I removed the check for BBJ_THROW. BBJ_THROW is usually marked as rarely run. This more or less * restricts the inliner to non-expanding inlines. I removed the check to allow for non-expanding * inlining in throw blocks. I should consider the same thing for catch and filter regions. */ @@ -7756,9 +7844,7 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, if (methAttr & CORINFO_FLG_PINVOKE) { - // See comment in impCheckForPInvokeCall - BasicBlock* block = compIsForInlining() ? impInlineInfo->iciBlock : compCurBB; - if (!impCanPInvokeInlineCallSite(block)) + if (!impCanPInvokeInlineCallSite(compCurBB)) { inlineResult->NoteFatal(InlineObservation::CALLSITE_PINVOKE_EH); return; @@ -7766,13 +7852,33 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, } InlineCandidateInfo* inlineCandidateInfo = nullptr; - impCheckCanInline(call, candidateIndex, fncHandle, methAttr, exactContextHnd, &inlineCandidateInfo, inlineResult); + impCheckCanInline(call, candidateIndex, fncHandle, methAttr, exactContextHnd, inlinersContext, &inlineCandidateInfo, + inlineResult); if (inlineResult->IsFailure()) { return; } + if (inlineCandidateInfo->methInfo.EHcount > 0) + { + // We cannot inline methods with EH into filter clauses, even if marked as aggressive inline + // + if (bbInFilterBBRange(compCurBB)) + { + inlineResult->NoteFatal(InlineObservation::CALLSITE_IS_WITHIN_FILTER); + return; + } + + // Do not inline pinvoke stubs with EH. + // + if ((methAttr & CORINFO_FLG_PINVOKE) != 0) + { + inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH); + return; + } + } + // The old value should be null OR this call should be a guarded devirtualization candidate. assert(call->IsGuardedDevirtualizationCandidate() || (call->GetSingleInlineCandidateInfo() == nullptr)); @@ -7847,6 +7953,7 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Abs: case NI_System_Math_Ceiling: case NI_System_Math_Floor: + case NI_System_Math_FusedMultiplyAdd: case NI_System_Math_Max: case NI_System_Math_Min: case NI_System_Math_MultiplyAddEstimate: @@ -7857,9 +7964,6 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Truncate: return true; - case NI_System_Math_FusedMultiplyAdd: - return compOpportunisticallyDependsOn(InstructionSet_AdvSimd); - default: return false; } @@ -7876,19 +7980,50 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) default: return false; } +<<<<<<< HEAD #elif defined(TARGET_WASM) switch (intrinsicName) { +======= +#elif defined(TARGET_RISCV64) + switch (intrinsicName) + { + case NI_System_Math_Abs: + case NI_System_Math_Sqrt: + case NI_System_Math_MinNumber: + case NI_System_Math_MinMagnitudeNumber: + case NI_System_Math_MaxNumber: + case NI_System_Math_MaxMagnitudeNumber: + case NI_System_Math_Min: + case NI_System_Math_MinMagnitude: + case NI_System_Math_Max: + case NI_System_Math_MaxMagnitude: +>>>>>>> upstream-jun case NI_System_Math_MultiplyAddEstimate: case NI_System_Math_ReciprocalEstimate: case NI_System_Math_ReciprocalSqrtEstimate: return true; +<<<<<<< HEAD default: break; } return m_llvm->IsLlvmIntrinsic(intrinsicName); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +======= + + case NI_System_Math_MinUnsigned: + case NI_System_Math_MaxUnsigned: + case NI_PRIMITIVE_LeadingZeroCount: + case NI_PRIMITIVE_TrailingZeroCount: + case NI_PRIMITIVE_PopCount: + return compOpportunisticallyDependsOn(InstructionSet_Zbb); + + default: + return false; + } +#elif defined(TARGET_LOONGARCH64) +>>>>>>> upstream-jun switch (intrinsicName) { case NI_System_Math_Abs: @@ -7896,8 +8031,6 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_ReciprocalSqrtEstimate: { // TODO-LoongArch64: support these standard intrinsics - // TODO-RISCV64: support these standard intrinsics - return false; } @@ -7956,10 +8089,12 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_MaxMagnitude: case NI_System_Math_MaxMagnitudeNumber: case NI_System_Math_MaxNumber: + case NI_System_Math_MaxUnsigned: case NI_System_Math_Min: case NI_System_Math_MinMagnitude: case NI_System_Math_MinMagnitudeNumber: case NI_System_Math_MinNumber: + case NI_System_Math_MinUnsigned: case NI_System_Math_MultiplyAddEstimate: case NI_System_Math_Pow: case NI_System_Math_ReciprocalEstimate: @@ -7984,9 +8119,18 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName) } } -bool Compiler::IsMathIntrinsic(GenTree* tree) +bool Compiler::IsBitCountingIntrinsic(NamedIntrinsic intrinsicName) { - return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->AsIntrinsic()->gtIntrinsicName); + switch (intrinsicName) + { + case NI_PRIMITIVE_LeadingZeroCount: + case NI_PRIMITIVE_TrailingZeroCount: + case NI_PRIMITIVE_PopCount: + return true; + + default: + return false; + } } //------------------------------------------------------------------------ @@ -8049,9 +8193,9 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, assert(methodFlags != nullptr); assert(pContextHandle != nullptr); - // This should be a virtual vtable or virtual stub call. + // This should be a devirtualization candidate. // - assert(call->IsVirtual()); + assert(call->IsDevirtualizationCandidate(this)); assert(opts.OptimizationEnabled()); #if defined(DEBUG) @@ -8111,7 +8255,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, if ((baseMethodAttribs & CORINFO_FLG_VIRTUAL) == 0) { assert(call->IsVirtualStub()); - assert(opts.IsReadyToRun()); + assert(IsAot()); JITDUMP("\nimpDevirtualizeCall: [R2R] base method not virtual, sorry\n"); return; } @@ -8272,7 +8416,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // We don't expect R2R to end up here, since it does not (yet) support // array interface devirtualization. // - assert(!opts.IsReadyToRun()); + assert(!IsAot()); // We don't expect there to be an existing inst param arg. // @@ -8293,7 +8437,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // If we failed to get a method handle, we can't directly devirtualize. // - // This can happen when prejitting, if the devirtualization crosses + // This can happen with AOT, if the devirtualization crosses // servicing bubble boundaries, or if objClass is a shared class. // if (derivedMethod == nullptr) @@ -8391,8 +8535,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // Note different embedding would be needed for NAOT/R2R, // but we have ruled those out above. // - GenTree* const instParam = - gtNewIconEmbHndNode(instantiatingStub, nullptr, GTF_ICON_METHOD_HDL, instantiatingStub); + GenTree* const instParam = gtNewIconEmbMethHndNode(instantiatingStub); call->gtArgs.InsertInstParam(this, instParam); } @@ -8407,7 +8550,6 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // it's a union field used for other things by virtual // stubs) call->ClearInlineInfo(); - call->gtCallMoreFlags &= ~GTF_CALL_M_HAS_LATE_DEVIRT_INFO; #if defined(DEBUG) if (verbose) @@ -8455,7 +8597,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // // Also, AOT may have a more nuanced notion of class equality. // - if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (!IsAot()) { bool mismatch = true; @@ -8735,7 +8877,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, } #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (IsAot()) { // For R2R, getCallInfo triggers bookkeeping on the zap // side and acquires the actual symbol to call so we need to call it here. @@ -8823,7 +8965,7 @@ Compiler::GDVProbeType Compiler::compClassifyGDVProbeType(GenTreeCall* call) return GDVProbeType::None; } - if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) || opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) || IsAot()) { return GDVProbeType::None; } @@ -9041,7 +9183,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni return true; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef TARGET_64BIT // Jit64 compat: if (callerRetType == TYP_VOID) { @@ -9063,15 +9205,15 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni unsigned callerRetTypeSize = 0; unsigned calleeRetTypeSize = 0; bool isCallerRetTypMBEnreg = VarTypeIsMultiByteAndCanEnreg(callerRetType, callerRetTypeClass, &callerRetTypeSize, - true, info.compIsVarArgs, callerCallConv); + info.compIsVarArgs, callerCallConv); bool isCalleeRetTypMBEnreg = VarTypeIsMultiByteAndCanEnreg(calleeRetType, calleeRetTypeClass, &calleeRetTypeSize, - true, info.compIsVarArgs, calleeCallConv); + info.compIsVarArgs, calleeCallConv); if (varTypeIsIntegral(callerRetType) || isCallerRetTypMBEnreg) { return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize); } -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 +#endif // TARGET_64BIT return false; } @@ -9086,6 +9228,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni // fncHandle - method that will be called // methAttr - attributes for the method // exactContextHnd - exact context for the method +// inlinersContext - the inliner's context // ppInlineCandidateInfo [out] - information needed later for inlining // inlineResult - result of ongoing inline evaluation // @@ -9098,6 +9241,7 @@ void Compiler::impCheckCanInline(GenTreeCall* call, CORINFO_METHOD_HANDLE fncHandle, unsigned methAttr, CORINFO_CONTEXT_HANDLE exactContextHnd, + InlineContext* inlinersContext, InlineCandidateInfo** ppInlineCandidateInfo, InlineResult* inlineResult) { @@ -9112,6 +9256,7 @@ void Compiler::impCheckCanInline(GenTreeCall* call, CORINFO_METHOD_HANDLE fncHandle; unsigned methAttr; CORINFO_CONTEXT_HANDLE exactContextHnd; + InlineContext* inlinersContext; InlineResult* result; InlineCandidateInfo** ppInlineCandidateInfo; } param; @@ -9123,6 +9268,7 @@ void Compiler::impCheckCanInline(GenTreeCall* call, param.fncHandle = fncHandle; param.methAttr = methAttr; param.exactContextHnd = (exactContextHnd != nullptr) ? exactContextHnd : MAKE_METHODCONTEXT(fncHandle); + param.inlinersContext = inlinersContext; param.result = inlineResult; param.ppInlineCandidateInfo = ppInlineCandidateInfo; @@ -9273,7 +9419,7 @@ void Compiler::impCheckCanInline(GenTreeCall* call, pInfo->methAttr = pParam->methAttr; pInfo->initClassResult = initClassResult; pInfo->exactContextNeedsRuntimeLookup = false; - pInfo->inlinersContext = pParam->pThis->compInlineContext; + pInfo->inlinersContext = pParam->inlinersContext; // Note exactContextNeedsRuntimeLookup is reset later on, // over in impMarkInlineCandidate. @@ -9363,30 +9509,15 @@ GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method, assert(sig->numArgs == 1); #if defined(TARGET_XARCH) - if (compExactlyDependsOn(InstructionSet_AVX10v1)) + if (compExactlyDependsOn(InstructionSet_AVX512)) { simdType = TYP_SIMD16; - intrinsicId = NI_AVX10v1_Reciprocal14Scalar; + intrinsicId = NI_AVX512_Reciprocal14Scalar; } - else if (compExactlyDependsOn(InstructionSet_AVX512F)) + else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_X86Base)) { simdType = TYP_SIMD16; - intrinsicId = NI_AVX512F_Reciprocal14Scalar; - } - else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE)) - { - if (!IsBaselineSimdIsaSupported()) - { - // While the actual intrinsic only requires SSE, the - // ToScalar intrinsic asserts that the BaselineSimdIsa - // (SSE2) is supported to help simplify the overall logic - // it has to maintain - assert(intrinsicId == NI_Illegal); - break; - } - - simdType = TYP_SIMD16; - intrinsicId = NI_SSE_ReciprocalScalar; + intrinsicId = NI_X86Base_ReciprocalScalar; } #elif defined(TARGET_ARM64) if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64)) @@ -9403,25 +9534,15 @@ GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method, assert(sig->numArgs == 1); #if defined(TARGET_XARCH) - if (compExactlyDependsOn(InstructionSet_AVX512F)) + if (compExactlyDependsOn(InstructionSet_AVX512)) { simdType = TYP_SIMD16; - intrinsicId = NI_AVX512F_ReciprocalSqrt14Scalar; + intrinsicId = NI_AVX512_ReciprocalSqrt14Scalar; } - else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE)) + else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_X86Base)) { - if (!IsBaselineSimdIsaSupported()) - { - // While the actual intrinsic only requires SSE, the - // ToScalar intrinsic asserts that the BaselineSimdIsa - // (SSE2) is supported to help simplify the overall logic - // it has to maintain - assert(intrinsicId == NI_Illegal); - break; - } - simdType = TYP_SIMD16; - intrinsicId = NI_SSE_ReciprocalSqrtScalar; + intrinsicId = NI_X86Base_ReciprocalSqrtScalar; } #elif defined(TARGET_ARM64) if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64)) @@ -9667,457 +9788,570 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, { var_types callType = JITtype2varType(callJitType); - assert(varTypeIsFloating(callType)); + assert(varTypeIsArithmetic(callType)); assert(sig->numArgs == 2); - GenTreeDblCon* cnsNode = nullptr; - GenTree* otherNode = nullptr; + if (varTypeIsFloating(callType)) + { + GenTreeDblCon* cnsNode = nullptr; + GenTree* otherNode = nullptr; - GenTree* op2 = impImplicitR4orR8Cast(impStackTop().val, callType); - GenTree* op1 = impImplicitR4orR8Cast(impStackTop(1).val, callType); + GenTree* op2 = impImplicitR4orR8Cast(impStackTop().val, callType); + GenTree* op1 = impImplicitR4orR8Cast(impStackTop(1).val, callType); - if (op2->IsCnsFltOrDbl()) - { - cnsNode = op2->AsDblCon(); - otherNode = op1; - } - else if (op1->IsCnsFltOrDbl()) - { - cnsNode = op1->AsDblCon(); - otherNode = op2; - } +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + // If Avx10.2 is enabled, the min/max operations can be done using the + // new minmax instructions which is faster than using the combination + // of instructions for lower ISAs. We can use the minmax instructions - if (cnsNode != nullptr) - { - if (otherNode->IsCnsFltOrDbl()) + if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) { - // both are constant, we can fold this operation completely. Pop both peeked values + impPopStack(); + impPopStack(); + /** + * ctrlByte A control byte (imm8) that specifies the type of min/max operation and sign behavior: + * - Bits [1:0] (Op-select): Determines the operation performed: + * - 0b00: minimum - Returns x if x ≤ y, otherwise y; NaN handling applies. + * - 0b01: maximum - Returns x if x ≥ y, otherwise y; NaN handling applies. + * - 0b10: minimumMagnitude - Compares absolute values, returns the smaller magnitude. + * - 0b11: maximumMagnitude - Compares absolute values, returns the larger magnitude. + * - Bit [4] (min/max mode): Determines whether the instruction follows IEEE-compliant NaN + * handling: + * - 0: Standard min/max (propagates NaNs). + * - 1: Number-preferential min/max (ignores signaling NaNs). + * - Bits [3:2] (Sign control): Defines how the result’s sign is determined: + * - 0b00: Select sign from the first operand (src1). + * - 0b01: Select sign from the comparison result. + * - 0b10: Force result sign to 0 (positive). + * - 0b11: Force result sign to 1 (negative). + */ + uint8_t ctrlByte = 0x04; // Select sign from comparison result + ctrlByte |= isMax ? 0x01 : 0x00; + ctrlByte |= isMagnitude ? 0x02 : 0x00; + ctrlByte |= isNumber ? 0x10 : 0x00; + + GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(ctrlByte), + NI_AVX10v2_MinMaxScalar, callJitType, 16); + return gtNewSimdToScalarNode(genActualType(callType), retNode, callJitType, 16); + } +#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH - double x = cnsNode->DconValue(); - double y = otherNode->AsDblCon()->DconValue(); - double z; + if (op2->IsCnsFltOrDbl()) + { + cnsNode = op2->AsDblCon(); + otherNode = op1; + } + else if (op1->IsCnsFltOrDbl()) + { + cnsNode = op1->AsDblCon(); + otherNode = op2; + } - if (isMax) + if (cnsNode != nullptr) + { + if (otherNode->IsCnsFltOrDbl()) { - if (isMagnitude) + // both are constant, we can fold this operation completely. Pop both peeked values + + double x = cnsNode->DconValue(); + double y = otherNode->AsDblCon()->DconValue(); + double z; + + if (isMax) { - if (isNumber) + if (isMagnitude) + { + if (isNumber) + { + z = FloatingPointUtils::maximumMagnitudeNumber(x, y); + } + else + { + z = FloatingPointUtils::maximumMagnitude(x, y); + } + } + else if (isNumber) { - z = FloatingPointUtils::maximumMagnitudeNumber(x, y); + z = FloatingPointUtils::maximumNumber(x, y); } else { - z = FloatingPointUtils::maximumMagnitude(x, y); + z = FloatingPointUtils::maximum(x, y); } } - else if (isNumber) - { - z = FloatingPointUtils::maximumNumber(x, y); - } else { - z = FloatingPointUtils::maximum(x, y); - } - } - else - { - if (isMagnitude) - { - if (isNumber) + if (isMagnitude) { - z = FloatingPointUtils::minimumMagnitudeNumber(x, y); + if (isNumber) + { + z = FloatingPointUtils::minimumMagnitudeNumber(x, y); + } + else + { + z = FloatingPointUtils::minimumMagnitude(x, y); + } + } + else if (isNumber) + { + z = FloatingPointUtils::minimumNumber(x, y); } else { - z = FloatingPointUtils::minimumMagnitude(x, y); + z = FloatingPointUtils::minimum(x, y); } } - else if (isNumber) - { - z = FloatingPointUtils::minimumNumber(x, y); - } - else - { - z = FloatingPointUtils::minimum(x, y); - } - } - cnsNode->SetDconValue(z); - - impPopStack(); - impPopStack(); - - DEBUG_DESTROY_NODE(otherNode); - return cnsNode; - } - - // only one is constant, we can fold in specialized scenarios + cnsNode->SetDconValue(z); - if (cnsNode->IsFloatNaN()) - { - impSpillSideEffects(false, CHECK_SPILL_ALL DEBUGARG("spill side effects before propagating NaN")); - - impPopStack(); - impPopStack(); + impPopStack(); + impPopStack(); - if (isNumber) - { - DEBUG_DESTROY_NODE(cnsNode); - return otherNode; - } - else - { DEBUG_DESTROY_NODE(otherNode); return cnsNode; } - } -#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_SSE2)) - { - bool needsFixup = false; - bool canHandle = false; - bool isV512Supported = false; + // only one is constant, we can fold in specialized scenarios - if (isMax) + if (cnsNode->IsFloatNaN()) { - // maxsd, maxss return op2 if both inputs are 0 of either sign - // we require +0 to be greater than -0 we also require NaN to - // not be propagated for isNumber and to be propagated otherwise. - // - // This means for isNumber we want to do `max other, cns` and - // can only handle cns being -0 if Avx512F is supported. This is - // because if other was NaN, we want to return the non-NaN cns. - // But if cns was -0 and other was +0 we'd want to return +0 and - // so need to be able to fixup the result. - // - // For !isNumber we have the inverse and want `max cns, other` and - // can only handle cns being +0 if Avx512F is supported. This is - // because if other was NaN, we want to return other and if cns - // was +0 and other was -0 we'd want to return +0 and so need - // so need to be able to fixup the result. + impSpillSideEffects(false, CHECK_SPILL_ALL DEBUGARG("spill side effects before propagating NaN")); + + impPopStack(); + impPopStack(); if (isNumber) { - needsFixup = cnsNode->IsFloatNegativeZero(); + DEBUG_DESTROY_NODE(cnsNode); + return otherNode; } else { - needsFixup = cnsNode->IsFloatPositiveZero(); + DEBUG_DESTROY_NODE(otherNode); + return cnsNode; } + } - if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported)) +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + if (!isMagnitude) + { + bool needsFixup = false; + bool canHandle = false; + + if (isMax) { - // Given the checks, op1 can safely be the cns and op2 the other node + // maxsd, maxss return op2 if both inputs are 0 of either sign + // we require +0 to be greater than -0 we also require NaN to + // not be propagated for isNumber and to be propagated otherwise. + // + // This means for isNumber we want to do `max other, cns` and + // can only handle cns being -0 if Avx512F is supported. This is + // because if other was NaN, we want to return the non-NaN cns. + // But if cns was -0 and other was +0 we'd want to return +0 and + // so need to be able to fixup the result. + // + // For !isNumber we have the inverse and want `max cns, other` and + // can only handle cns being +0 if Avx512F is supported. This is + // because if other was NaN, we want to return other and if cns + // was +0 and other was -0 we'd want to return +0 and so need + // so need to be able to fixup the result. - intrinsicName = (callType == TYP_DOUBLE) ? NI_SSE2_MaxScalar : NI_SSE_MaxScalar; + if (isNumber) + { + needsFixup = cnsNode->IsFloatNegativeZero(); + } + else + { + needsFixup = cnsNode->IsFloatPositiveZero(); + } - // one is constant and we know its something we can handle, so pop both peeked values + if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // Given the checks, op1 can safely be the cns and op2 the other node - op1 = cnsNode; - op2 = otherNode; + intrinsicName = NI_X86Base_MaxScalar; - canHandle = true; - } - } - else - { - // minsd, minss return op2 if both inputs are 0 of either sign - // we require -0 to be lesser than +0, we also require NaN to - // not be propagated for isNumber and to be propagated otherwise. - // - // This means for isNumber we want to do `min other, cns` and - // can only handle cns being +0 if Avx512F is supported. This is - // because if other was NaN, we want to return the non-NaN cns. - // But if cns was +0 and other was -0 we'd want to return -0 and - // so need to be able to fixup the result. - // - // For !isNumber we have the inverse and want `min cns, other` and - // can only handle cns being -0 if Avx512F is supported. This is - // because if other was NaN, we want to return other and if cns - // was -0 and other was +0 we'd want to return -0 and so need - // so need to be able to fixup the result. + // one is constant and we know its something we can handle, so pop both peeked values - if (isNumber) - { - needsFixup = cnsNode->IsFloatPositiveZero(); + op1 = cnsNode; + op2 = otherNode; + + canHandle = true; + } } else { - needsFixup = cnsNode->IsFloatNegativeZero(); - } - - if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported)) - { - // Given the checks, op1 can safely be the cns and op2 the other node - - intrinsicName = (callType == TYP_DOUBLE) ? NI_SSE2_MinScalar : NI_SSE_MinScalar; - - // one is constant and we know its something we can handle, so pop both peeked values + // minsd, minss return op2 if both inputs are 0 of either sign + // we require -0 to be lesser than +0, we also require NaN to + // not be propagated for isNumber and to be propagated otherwise. + // + // This means for isNumber we want to do `min other, cns` and + // can only handle cns being +0 if Avx512F is supported. This is + // because if other was NaN, we want to return the non-NaN cns. + // But if cns was +0 and other was -0 we'd want to return -0 and + // so need to be able to fixup the result. + // + // For !isNumber we have the inverse and want `min cns, other` and + // can only handle cns being -0 if Avx512F is supported. This is + // because if other was NaN, we want to return other and if cns + // was -0 and other was +0 we'd want to return -0 and so need + // so need to be able to fixup the result. - op1 = cnsNode; - op2 = otherNode; + if (isNumber) + { + needsFixup = cnsNode->IsFloatPositiveZero(); + } + else + { + needsFixup = cnsNode->IsFloatNegativeZero(); + } - canHandle = true; - } - } + if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // Given the checks, op1 can safely be the cns and op2 the other node - if (canHandle) - { - assert(op1->IsCnsFltOrDbl() && !op2->IsCnsFltOrDbl()); + intrinsicName = NI_X86Base_MinScalar; - impPopStack(); - impPopStack(); + // one is constant and we know its something we can handle, so pop both peeked values - GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD16); + op1 = cnsNode; + op2 = otherNode; - if (callJitType == CORINFO_TYPE_FLOAT) - { - vecCon->gtSimdVal.f32[0] = static_cast(op1->AsDblCon()->DconValue()); - } - else - { - vecCon->gtSimdVal.f64[0] = op1->AsDblCon()->DconValue(); + canHandle = true; + } } - op1 = vecCon; - op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, callJitType, 16); - - GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsicName, callJitType, 16); - - if (needsFixup) + if (canHandle) { - GenTree* op2Clone; - op2 = impCloneExpr(op2, &op2Clone, CHECK_SPILL_ALL, - nullptr DEBUGARG("Cloning non-constant for Math.Max/Min")); - - retNode->AsHWIntrinsic()->Op(2) = op2; + assert(op1->IsCnsFltOrDbl() && !op2->IsCnsFltOrDbl()); - GenTreeVecCon* tbl = gtNewVconNode(TYP_SIMD16); + impPopStack(); + impPopStack(); - // FixupScalar(left, right, table, control) computes the input type of right - // adjusts it based on the table and then returns - // - // In our case, left is going to be the result of the RangeScalar operation - // and right is going to be op1 or op2. In the case op1/op2 is QNaN or SNaN - // we want to preserve it instead. Otherwise we want to preserve the original - // result computed by RangeScalar. - // - // If both inputs are NaN, then we'll end up taking op1 by virtue of it being - // the latter fixup. + GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD16); - if (isMax) + if (callJitType == CORINFO_TYPE_FLOAT) { - // QNAN: 0b0000: Preserve left - // SNAN: 0b0000 - // ZERO: 0b1000: +0 - // +ONE: 0b0000 - // -INF: 0b0000 - // +INF: 0b0000 - // -VAL: 0b0000 - // +VAL: 0b0000 - tbl->gtSimdVal.i32[0] = 0x0800; + vecCon->gtSimdVal.f32[0] = static_cast(op1->AsDblCon()->DconValue()); } else { - // QNAN: 0b0000: Preserve left - // SNAN: 0b0000 - // ZERO: 0b0111: -0 - // +ONE: 0b0000 - // -INF: 0b0000 - // +INF: 0b0000 - // -VAL: 0b0000 - // +VAL: 0b0000 - tbl->gtSimdVal.i32[0] = 0x0700; + vecCon->gtSimdVal.f64[0] = op1->AsDblCon()->DconValue(); } - NamedIntrinsic fixupScalarId = isV512Supported ? NI_AVX512F_FixupScalar : NI_AVX10v1_FixupScalar; + op1 = vecCon; + op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, callJitType, 16); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, retNode, op2Clone, tbl, gtNewIconNode(0), - fixupScalarId, callJitType, 16); - } + GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsicName, callJitType, 16); - if (isNumber) - { - // Swap the operands so that the cnsNode is op1, this prevents - // the unknown value (which could be NaN) from being selected. + if (needsFixup) + { + GenTree* op2Clone; + op2 = impCloneExpr(op2, &op2Clone, CHECK_SPILL_ALL, + nullptr DEBUGARG("Cloning non-constant for Math.Max/Min")); - retNode->AsHWIntrinsic()->Op(1) = op2; - retNode->AsHWIntrinsic()->Op(2) = op1; - } + retNode->AsHWIntrinsic()->Op(2) = op2; + + GenTreeVecCon* tbl = gtNewVconNode(TYP_SIMD16); + + // FixupScalar(left, right, table, control) computes the input type of right + // adjusts it based on the table and then returns + // + // In our case, left is going to be the result of the RangeScalar operation + // and right is going to be op1 or op2. In the case op1/op2 is QNaN or SNaN + // we want to preserve it instead. Otherwise we want to preserve the original + // result computed by RangeScalar. + // + // If both inputs are NaN, then we'll end up taking op1 by virtue of it being + // the latter fixup. + + if (isMax) + { + // QNAN: 0b0000: Preserve left + // SNAN: 0b0000 + // ZERO: 0b1000: +0 + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b0000 + // +VAL: 0b0000 + tbl->gtSimdVal.i32[0] = 0x0800; + } + else + { + // QNAN: 0b0000: Preserve left + // SNAN: 0b0000 + // ZERO: 0b0111: -0 + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b0000 + // +VAL: 0b0000 + tbl->gtSimdVal.i32[0] = 0x0700; + } - return gtNewSimdToScalarNode(genActualType(callType), retNode, callJitType, 16); + NamedIntrinsic fixupScalarId = NI_AVX512_FixupScalar; + + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, retNode, op2Clone, tbl, gtNewIconNode(0), + fixupScalarId, callJitType, 16); + } + + if (isNumber) + { + // Swap the operands so that the cnsNode is op1, this prevents + // the unknown value (which could be NaN) from being selected. + + retNode->AsHWIntrinsic()->Op(1) = op2; + retNode->AsHWIntrinsic()->Op(2) = op1; + } + + return gtNewSimdToScalarNode(genActualType(callType), retNode, callJitType, 16); + } } - } #endif // FEATURE_HW_INTRINSICS && TARGET_XARCH - } + } #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - bool isV512Supported = false; - if (compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ)) - { - // We are constructing a chain of intrinsics similar to: - // var op1 = Vector128.CreateScalarUnsafe(x); - // var op2 = Vector128.CreateScalarUnsafe(y); - // - // var tmp = Avx512DQ.RangeScalar(op1, op2, imm8); - // var tbl = Vector128.CreateScalarUnsafe(0x00); - // - // tmp = Avx512F.FixupScalar(tmp, op2, tbl, 0x00); - // tmp = Avx512F.FixupScalar(tmp, op1, tbl, 0x00); - // - // return tmp.ToScalar(); + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // We are constructing a chain of intrinsics similar to: + // var op1 = Vector128.CreateScalarUnsafe(x); + // var op2 = Vector128.CreateScalarUnsafe(y); + // + // var tmp = Avx512DQ.RangeScalar(op1, op2, imm8); + // var tbl = Vector128.CreateScalarUnsafe(0x00); + // + // tmp = Avx512F.FixupScalar(tmp, op2, tbl, 0x00); + // tmp = Avx512F.FixupScalar(tmp, op1, tbl, 0x00); + // + // return tmp.ToScalar(); - // RangeScalar operates by default almost as MaxNumber or MinNumber - // but, it propagates sNaN and does not propagate qNaN. So we need - // an additional fixup to ensure we propagate qNaN as well. + // RangeScalar operates by default almost as MaxNumber or MinNumber + // but, it propagates sNaN and does not propagate qNaN. So we need + // an additional fixup to ensure we propagate qNaN as well. - uint8_t imm8; + uint8_t imm8; - if (isMax) - { - if (isMagnitude) + if (isMax) + { + if (isMagnitude) + { + // 0b01_11: Sign(CompareResult), Max-Abs Value + imm8 = 0x07; + } + else + { + // 0b01_01: Sign(CompareResult), Max Value + imm8 = 0x05; + } + } + else if (isMagnitude) { - // 0b01_11: Sign(CompareResult), Max-Abs Value - imm8 = 0x07; + // 0b01_10: Sign(CompareResult), Min-Abs Value + imm8 = 0x06; } else { - // 0b01_01: Sign(CompareResult), Max Value - imm8 = 0x05; + // 0b01_00: Sign(CompareResult), Min Value + imm8 = 0x04; } - } - else if (isMagnitude) - { - // 0b01_10: Sign(CompareResult), Min-Abs Value - imm8 = 0x06; - } - else - { - // 0b01_00: Sign(CompareResult), Min Value - imm8 = 0x04; - } - GenTree* op3 = gtNewIconNode(imm8); - GenTree* op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, impPopStack().val, callJitType, 16); - GenTree* op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, impPopStack().val, callJitType, 16); + GenTree* op3 = gtNewIconNode(imm8); + GenTree* op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, impPopStack().val, callJitType, 16); + GenTree* op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, impPopStack().val, callJitType, 16); - GenTree* op2Clone; - op2 = impCloneExpr(op2, &op2Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op2 for Math.Max/Min")); + GenTree* op2Clone; + op2 = impCloneExpr(op2, &op2Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op2 for Math.Max/Min")); - GenTree* op1Clone; - op1 = impCloneExpr(op1, &op1Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op1 for Math.Max/Min")); + GenTree* op1Clone; + op1 = impCloneExpr(op1, &op1Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op1 for Math.Max/Min")); - GenTree* tmp = - !isV512Supported - ? gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX10v1_RangeScalar, callJitType, 16) - : gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX512DQ_RangeScalar, callJitType, 16); - - // FixupScalar(left, right, table, control) computes the input type of right - // adjusts it based on the table and then returns - // - // In our case, left is going to be the result of the RangeScalar operation, - // which is either sNaN or a normal value, and right is going to be op1 or op2. + GenTree* tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX512_RangeScalar, callJitType, 16); - GenTree* tbl1 = gtNewVconNode(TYP_SIMD16); - GenTree* tbl2; + // FixupScalar(left, right, table, control) computes the input type of right + // adjusts it based on the table and then returns + // + // In our case, left is going to be the result of the RangeScalar operation, + // which is either sNaN or a normal value, and right is going to be op1 or op2. - // We currently have (commutative) - // * snan, snan = snan - // * snan, qnan = snan - // * snan, norm = snan - // * qnan, qnan = qnan - // * qnan, norm = norm - // * norm, norm = norm + GenTree* tbl1 = gtNewVconNode(TYP_SIMD16); + GenTree* tbl2; - NamedIntrinsic fixupHwIntrinsicID = !isV512Supported ? NI_AVX10v1_FixupScalar : NI_AVX512F_FixupScalar; - if (isNumber) - { - // We need to fixup the case of: + // We currently have (commutative) + // * snan, snan = snan + // * snan, qnan = snan // * snan, norm = snan - // - // Instead, it should be: - // * snan, norm = norm + // * qnan, qnan = qnan + // * qnan, norm = norm + // * norm, norm = norm - // First look at op1 and op2 using op2 as the classification - // - // If op2 is norm, we take op2 (norm) - // If op2 is nan, we take op1 ( nan or norm) - // - // Thus, if one input was norm the fixup is now norm - - // QNAN: 0b0000: Preserve left - // SNAN: 0b0000 - // ZERO: 0b0001: Preserve right - // +ONE: 0b0001 - // -INF: 0b0001 - // +INF: 0b0001 - // -VAL: 0b0001 - // +VAL: 0b0001 - tbl1->AsVecCon()->gtSimdVal.i32[0] = 0x11111100; - - // Next look at result and fixup using result as the classification - // - // If result is norm, we take the result (norm) - // If result is nan, we take the fixup ( nan or norm) - // - // Thus if either input was snan, we now have norm as expected - // Otherwise, the result was already correct + NamedIntrinsic fixupHwIntrinsicID = NI_AVX512_FixupScalar; + if (isNumber) + { + // We need to fixup the case of: + // * snan, norm = snan + // + // Instead, it should be: + // * snan, norm = norm + + // First look at op1 and op2 using op2 as the classification + // + // If op2 is norm, we take op2 (norm) + // If op2 is nan, we take op1 ( nan or norm) + // + // Thus, if one input was norm the fixup is now norm + + // QNAN: 0b0000: Preserve left + // SNAN: 0b0000 + // ZERO: 0b0001: Preserve right + // +ONE: 0b0001 + // -INF: 0b0001 + // +INF: 0b0001 + // -VAL: 0b0001 + // +VAL: 0b0001 + tbl1->AsVecCon()->gtSimdVal.i32[0] = 0x11111100; + + // Next look at result and fixup using result as the classification + // + // If result is norm, we take the result (norm) + // If result is nan, we take the fixup ( nan or norm) + // + // Thus if either input was snan, we now have norm as expected + // Otherwise, the result was already correct + + tbl1 = impCloneExpr(tbl1, &tbl2, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning tbl for Math.Max/Min")); + + op1Clone = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, op2Clone, tbl1, gtNewIconNode(0), + fixupHwIntrinsicID, callJitType, 16); - tbl1 = impCloneExpr(tbl1, &tbl2, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning tbl for Math.Max/Min")); + tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, tmp, tbl2, gtNewIconNode(0), fixupHwIntrinsicID, + callJitType, 16); + } + else + { + // We need to fixup the case of: + // * qnan, norm = norm + // + // Instead, it should be: + // * qnan, norm = qnan - op1Clone = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, op2Clone, tbl1, gtNewIconNode(0), - fixupHwIntrinsicID, callJitType, 16); + // First look at op1 and op2 using op2 as the classification + // + // If op2 is norm, we take op1 ( nan or norm) + // If op2 is snan, we take op1 ( nan or norm) + // If op2 is qnan, we take op2 (qnan) + // + // Thus, if either input was qnan the fixup is now qnan + + // QNAN: 0b0001: Preserve right + // SNAN: 0b0000: Preserve left + // ZERO: 0b0000 + // +ONE: 0b0000 + // -INF: 0b0000 + // +INF: 0b0000 + // -VAL: 0b0000 + // +VAL: 0b0000 + tbl1->AsVecCon()->gtSimdVal.i32[0] = 0x00000001; + + // Next look at result and fixup using fixup as the classification + // + // If fixup is norm, we take the result (norm) + // If fixup is sNaN, we take the result (sNaN) + // If fixup is qNaN, we take the fixup (qNaN) + // + // Thus if the fixup was qnan, we now have qnan as expected + // Otherwise, the result was already correct - tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, tmp, tbl2, gtNewIconNode(0), fixupHwIntrinsicID, - callJitType, 16); + tbl1 = impCloneExpr(tbl1, &tbl2, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning tbl for Math.Max/Min")); + + op1Clone = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, op2Clone, tbl1, gtNewIconNode(0), + fixupHwIntrinsicID, callJitType, 16); + + tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp, op1Clone, tbl2, gtNewIconNode(0), fixupHwIntrinsicID, + callJitType, 16); + } + + return gtNewSimdToScalarNode(genActualType(callType), tmp, callJitType, 16); } - else +#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH + +#ifdef TARGET_RISCV64 + GenTree *op1Clone = nullptr, *op2Clone = nullptr; + + op2 = impPopStack().val; + if (!isNumber) { - // We need to fixup the case of: - // * qnan, norm = norm - // - // Instead, it should be: - // * qnan, norm = qnan + op2 = impCloneExpr(op2, &op2Clone, CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op2 for Math.Min/Max non-Number")); + } - // First look at op1 and op2 using op2 as the classification - // - // If op2 is norm, we take op1 ( nan or norm) - // If op2 is snan, we take op1 ( nan or norm) - // If op2 is qnan, we take op2 (qnan) - // - // Thus, if either input was qnan the fixup is now qnan - - // QNAN: 0b0001: Preserve right - // SNAN: 0b0000: Preserve left - // ZERO: 0b0000 - // +ONE: 0b0000 - // -INF: 0b0000 - // +INF: 0b0000 - // -VAL: 0b0000 - // +VAL: 0b0000 - tbl1->AsVecCon()->gtSimdVal.i32[0] = 0x00000001; - - // Next look at result and fixup using fixup as the classification - // - // If fixup is norm, we take the result (norm) - // If fixup is sNaN, we take the result (sNaN) - // If fixup is qNaN, we take the fixup (qNaN) - // - // Thus if the fixup was qnan, we now have qnan as expected - // Otherwise, the result was already correct + op1 = impPopStack().val; + if (!isNumber) + { + op1 = impCloneExpr(op1, &op1Clone, CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Math.Min/Max non-Number")); + } - tbl1 = impCloneExpr(tbl1, &tbl2, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning tbl for Math.Max/Min")); + static const CORINFO_CONST_LOOKUP nullEntry = {IAT_VALUE}; + if (isMagnitude) + { + op1 = + new (this, GT_INTRINSIC) GenTreeIntrinsic(callType, op1, NI_System_Math_Abs, nullptr R2RARG(nullEntry)); + op2 = + new (this, GT_INTRINSIC) GenTreeIntrinsic(callType, op2, NI_System_Math_Abs, nullptr R2RARG(nullEntry)); + } + NamedIntrinsic name = isMax ? NI_System_Math_MaxNumber : NI_System_Math_MinNumber; + GenTree* minMax = + new (this, GT_INTRINSIC) GenTreeIntrinsic(callType, op1, op2, name, nullptr R2RARG(nullEntry)); - op1Clone = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, op2Clone, tbl1, gtNewIconNode(0), - fixupHwIntrinsicID, callJitType, 16); + if (!isNumber) + { + GenTreeOp* isOp1Number = gtNewOperNode(GT_EQ, TYP_INT, op1Clone, gtCloneExpr(op1Clone)); + GenTreeOp* isOp2Number = gtNewOperNode(GT_EQ, TYP_INT, op2Clone, gtCloneExpr(op2Clone)); + GenTreeOp* isOkForMinMax = gtNewOperNode(GT_EQ, TYP_INT, isOp1Number, isOp2Number); + + GenTreeOp* nanPropagator = gtNewOperNode(GT_ADD, callType, gtCloneExpr(op1Clone), gtCloneExpr(op2Clone)); - tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp, op1Clone, tbl2, gtNewIconNode(0), fixupHwIntrinsicID, - callJitType, 16); + GenTreeQmark* qmark = + gtNewQmarkNode(callType, isOkForMinMax, gtNewColonNode(callType, minMax, nanPropagator)); + // QMARK has to be a root node + unsigned tmp = lvaGrabTemp(true DEBUGARG("Temp for Qmark in Math.Min/Max non-Number")); + impStoreToTemp(tmp, qmark, CHECK_SPILL_NONE); + minMax = gtNewLclvNode(tmp, callType); } + return minMax; +#endif // TARGET_RISCV64 + } + else + { + assert(varTypeIsIntegral(callType)); + assert(!isNumber && !isMagnitude); +#ifdef TARGET_RISCV64 + if (compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + GenTree* op2 = impPopStack().val; + GenTree* op1 = impPopStack().val; + + // RISC-V integer min/max instructions operate on whole registers with preferrably ABI-extended values. + // We currently don't know if a register is ABI-extended so always cast, even for 'int' and 'uint'. + var_types preciseType = JitType2PreciseVarType(callJitType); + if (genTypeSize(preciseType) < REGSIZE_BYTES) + { + // Zero-extended 'uint' is unnatural on RISC-V + bool zeroExtend = varTypeIsUnsigned(preciseType) && (preciseType != TYP_UINT); - return gtNewSimdToScalarNode(genActualType(callType), tmp, callJitType, 16); + op2 = gtNewCastNode(TYP_I_IMPL, op2, zeroExtend, TYP_I_IMPL); + op1 = gtNewCastNode(TYP_I_IMPL, op1, zeroExtend, TYP_I_IMPL); + } + if (varTypeIsUnsigned(preciseType)) + intrinsicName = isMax ? NI_System_Math_MaxUnsigned : NI_System_Math_MinUnsigned; + + GenTreeIntrinsic* minMax = new (this, GT_INTRINSIC) + GenTreeIntrinsic(TYP_I_IMPL, op1, op2, intrinsicName, nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE})); + + return minMax; + } +#endif // TARGET_RISCV64 } -#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH // TODO-CQ: Returning this as an intrinsic blocks inlining and is undesirable // return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall, isSpecial); @@ -10571,19 +10805,22 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { namespaceName += 1; -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (strcmp(namespaceName, "Buffers.Binary") == 0) { if (strcmp(className, "BinaryPrimitives") == 0) { if (strcmp(methodName, "ReverseEndianness") == 0) { - result = NI_System_Buffers_Binary_BinaryPrimitives_ReverseEndianness; + RISCV64_ONLY(if (compOpportunisticallyDependsOn(InstructionSet_Zbb))) + { + result = NI_System_Buffers_Binary_BinaryPrimitives_ReverseEndianness; + } } } } else -#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) +#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (strcmp(namespaceName, "Collections.Generic") == 0) { if (strcmp(className, "Comparer`1") == 0) @@ -10621,17 +10858,16 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) if (isVectorT || (strcmp(className, "Vector") == 0)) { - if (strncmp(methodName, - "System.Runtime.Intrinsics.ISimdVector APIs to still be expanded where // possible but, they all prefix the qualified name of the interface first, so we'll // check for that and skip the prefix before trying to resolve the method. - if (strncmp(methodName + 70, ",T>.", 7) == 0) + if (strncmp(methodName + 60, ",T>.", 7) == 0) { - methodName += 77; + methodName += 67; } } @@ -10808,6 +11044,17 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) result = NI_System_Runtime_CompilerServices_RuntimeHelpers_GetMethodTable; } } + else if (strcmp(className, "AsyncHelpers") == 0) + { + if (strcmp(methodName, "AsyncSuspend") == 0) + { + result = NI_System_Runtime_CompilerServices_AsyncHelpers_AsyncSuspend; + } + else if (strcmp(methodName, "Await") == 0) + { + result = NI_System_Runtime_CompilerServices_AsyncHelpers_Await; + } + } else if (strcmp(className, "StaticsHelpers") == 0) { if (strcmp(methodName, "VolatileReadAsByref") == 0) @@ -11056,6 +11303,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_StubHelpers_NextCallReturnAddress; } + else if (strcmp(methodName, "AsyncCallContinuation") == 0) + { + result = NI_System_StubHelpers_AsyncCallContinuation; + } } } else if (strcmp(namespaceName, "Text") == 0) @@ -11108,6 +11359,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_Threading_Thread_get_ManagedThreadId; } + else if (strcmp(methodName, "FastPollGC") == 0) + { + result = NI_System_Threading_Thread_FastPollGC; + } } else if (strcmp(className, "Volatile") == 0) { @@ -11129,6 +11384,17 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } } + else if (strcmp(namespaceName, "Threading.Tasks") == 0) + { + if (strcmp(methodName, "ConfigureAwait") == 0) + { + if (strcmp(className, "Task`1") == 0 || strcmp(className, "Task") == 0 || + strcmp(className, "ValuTask`1") == 0 || strcmp(className, "ValueTask") == 0) + { + result = NI_System_Threading_Tasks_Task_ConfigureAwait; + } + } + } } } else if (strcmp(namespaceName, "Internal.Runtime") == 0) @@ -11698,9 +11964,8 @@ GenTree* Compiler::impArrayAccessIntrinsic( val = impPopStack().val; assert((genActualType(elemType) == genActualType(val->gtType)) || - (elemType == TYP_FLOAT && val->gtType == TYP_DOUBLE) || - (elemType == TYP_INT && val->gtType == TYP_BYREF) || - (elemType == TYP_DOUBLE && val->gtType == TYP_FLOAT)); + (elemType == TYP_FLOAT && val->TypeIs(TYP_DOUBLE)) || (elemType == TYP_INT && val->TypeIs(TYP_BYREF)) || + (elemType == TYP_DOUBLE && val->TypeIs(TYP_FLOAT))); } // Here, we're committed to expanding the intrinsic and creating a GT_ARR_ELEM node. @@ -11719,7 +11984,7 @@ GenTree* Compiler::impArrayAccessIntrinsic( } GenTree* arr = impPopStack().val; - assert(arr->gtType == TYP_REF); + assert(arr->TypeIs(TYP_REF)); GenTree* arrElem = new (this, GT_ARR_ELEM) GenTreeArrElem(TYP_BYREF, arr, static_cast(rank), static_cast(arrayElemSize), &inds[0]); diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 9e47eff499ab..ea8c7698f0d4 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -490,6 +490,7 @@ class IndirectCallTransformer GuardedDevirtualizationTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt) : Transformer(compiler, block, stmt) , returnTemp(BAD_VAR_NUM) + , returnValueUnused(false) { } @@ -793,9 +794,52 @@ class IndirectCallTransformer // // Note implicit by-ref returns should have already been converted // so any struct copy we induce here should be cheap. - InlineCandidateInfo* const inlineInfo = origCall->GetGDVCandidateInfo(0); + InlineCandidateInfo* const inlineInfo = origCall->GetGDVCandidateInfo(0); + GenTree* const retExprNode = inlineInfo->retExpr; - if (!origCall->TypeIs(TYP_VOID)) + if (retExprNode == nullptr) + { + // We do not produce GT_RET_EXPRs for CTOR calls, so there is nothing to patch. + return; + } + + GenTreeRetExpr* const retExpr = retExprNode->AsRetExpr(); + bool const noReturnValue = origCall->TypeIs(TYP_VOID); + + // If there is a return value, search the next statement to see if we can find + // retExprNode's parent. If we find it, see if retExprNode's value is unused. + // + // If we fail to find it, we will assume the return value is used. + // + if (!noReturnValue) + { + Statement* const nextStmt = stmt->GetNextStmt(); + if (nextStmt != nullptr) + { + Compiler::FindLinkData fld = compiler->gtFindLink(nextStmt, retExprNode); + GenTree* const parent = fld.parent; + + if ((parent != nullptr) && parent->OperIs(GT_COMMA) && (parent->AsOp()->gtGetOp1() == retExprNode)) + { + returnValueUnused = true; + JITDUMP("GT_RET_EXPR [%06u] value is unused\n", compiler->dspTreeID(retExprNode)); + } + } + } + + if (noReturnValue) + { + JITDUMP("Linking GT_RET_EXPR [%06u] for VOID return to NOP\n", + compiler->dspTreeID(inlineInfo->retExpr)); + inlineInfo->retExpr->gtSubstExpr = compiler->gtNewNothingNode(); + } + else if (returnValueUnused) + { + JITDUMP("Linking GT_RET_EXPR [%06u] for UNUSED return to NOP\n", + compiler->dspTreeID(inlineInfo->retExpr)); + inlineInfo->retExpr->gtSubstExpr = compiler->gtNewNothingNode(); + } + else { // If there's a spill temp already associated with this inline candidate, // use that instead of allocating a new temp. @@ -834,6 +878,16 @@ class IndirectCallTransformer { returnTemp = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt return temp")); JITDUMP("Reworking call(s) to return value via a new temp V%02u\n", returnTemp); + + // Keep the information about small typedness to avoid + // inserting unnecessary casts for normalization, which can + // make tailcall invariants unhappy. This is the same logic + // that impImportCall uses when it introduces call temps. + if (varTypeIsSmall(origCall->gtReturnType)) + { + assert(origCall->NormalizesSmallTypesOnReturn()); + compiler->lvaGetDesc(returnTemp)->lvType = origCall->gtReturnType; + } } if (varTypeIsStruct(origCall)) @@ -848,23 +902,6 @@ class IndirectCallTransformer inlineInfo->retExpr->gtSubstExpr = tempTree; } - else if (inlineInfo->retExpr != nullptr) - { - // We still oddly produce GT_RET_EXPRs for some void - // returning calls. Just bash the ret expr to a NOP. - // - // Todo: consider bagging creation of these RET_EXPRs. The only possible - // benefit they provide is stitching back larger trees for failed inlines - // of void-returning methods. But then the calls likely sit in commas and - // the benefit of a larger tree is unclear. - JITDUMP("Linking GT_RET_EXPR [%06u] for VOID return to NOP\n", - compiler->dspTreeID(inlineInfo->retExpr)); - inlineInfo->retExpr->gtSubstExpr = compiler->gtNewNothingNode(); - } - else - { - // We do not produce GT_RET_EXPRs for CTOR calls, so there is nothing to patch. - } } //------------------------------------------------------------------------ @@ -1052,7 +1089,6 @@ class IndirectCallTransformer if (oldRetExpr != nullptr) { inlineInfo->retExpr = compiler->gtNewInlineCandidateReturnExpr(call, call->TypeGet()); - GenTree* newRetExpr = inlineInfo->retExpr; if (returnTemp != BAD_VAR_NUM) @@ -1062,7 +1098,9 @@ class IndirectCallTransformer else { // We should always have a return temp if we return results by value - assert(origCall->TypeGet() == TYP_VOID); + // and that value is used. + assert(origCall->TypeIs(TYP_VOID) || returnValueUnused); + newRetExpr = compiler->gtUnusedValNode(newRetExpr); } compiler->fgNewStmtAtEnd(block, newRetExpr); } @@ -1467,6 +1505,7 @@ class IndirectCallTransformer unsigned returnTemp; Statement* lastStmt; bool checkFallsThrough; + bool returnValueUnused; //------------------------------------------------------------------------ // CreateTreeForLookup: Create a tree representing a lookup of a method address. diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 77f15adfb07a..e107701219ed 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1,11 +1,12 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// // This file contains code to optimize induction variables in loops based on // scalar evolution analysis (see scev.h and scev.cpp for more information // about the scalar evolution analysis). // -// Currently the following optimizations are done: +// Currently the following optimizations are implemented: // // IV widening: // This widens primary induction variables from 32 bits into 64 bits. This is @@ -37,21 +38,26 @@ // single instruction, bypassing the need to do a separate comparison with a // bound. // -// Strength reduction (disabled): -// This changes the stride of primary IVs in a loop to avoid more expensive -// multiplications inside the loop. Commonly the primary IVs are only used -// for indexing memory at some element size, which can end up with these -// multiplications. +// Strength reduction: +// Strength reduction identifies cases where all uses of a primary IV compute +// a common derived value. Commonly this happens when indexing memory at some +// element size, resulting in multiplications. It introduces a new primary IV +// that directly computes this derived value, avoiding the need for the +// original primary IV and its associated calculations. The optimization +// handles GC pointers carefully, ensuring all accesses remain within managed +// objects. // -// Strength reduction frequently relies on reversing the loop to remove the -// last non-multiplied use of the primary IV. +// Unused IV removal: +// This removes induction variables that are only used for self-updates with +// no external uses. This commonly happens after other IV optimizations have +// replaced all meaningful uses of an IV with a different, more efficient IV. // #include "jitpch.h" #include "scev.h" -// Data structure that keeps track of local occurrences inside loops. -class LoopLocalOccurrences +// Data structure that keeps track of per-loop info, like occurrences and suspension-points inside loops. +class PerLoopInfo { struct Occurrence { @@ -63,19 +69,26 @@ class LoopLocalOccurrences typedef JitHashTable, Occurrence*> LocalToOccurrenceMap; + struct LoopInfo + { + LocalToOccurrenceMap* LocalToOccurrences = nullptr; + bool HasSuspensionPoint = false; + }; + FlowGraphNaturalLoops* m_loops; - // For every loop, we track all occurrences exclusive to that loop. - // Occurrences in descendant loops are not kept in their ancestor's maps. - LocalToOccurrenceMap** m_maps; + // For every loop, we track all occurrences exclusive to that loop, and + // whether or not the loop has a suspension point. + // Occurrences/suspensions in descendant loops are not kept in their ancestor's maps. + LoopInfo* m_info; // Blocks whose IR we have visited to find local occurrences in. BitVec m_visitedBlocks; - LocalToOccurrenceMap* GetOrCreateMap(FlowGraphNaturalLoop* loop); + LoopInfo* GetOrCreateInfo(FlowGraphNaturalLoop* loop); template - bool VisitLoopNestMaps(FlowGraphNaturalLoop* loop, TFunc& func); + bool VisitLoopNestInfo(FlowGraphNaturalLoop* loop, TFunc& func); public: - LoopLocalOccurrences(FlowGraphNaturalLoops* loops); + PerLoopInfo(FlowGraphNaturalLoops* loops); template bool VisitOccurrences(FlowGraphNaturalLoop* loop, unsigned lclNum, TFunc func); @@ -85,38 +98,40 @@ class LoopLocalOccurrences template bool VisitStatementsWithOccurrences(FlowGraphNaturalLoop* loop, unsigned lclNum, TFunc func); + bool HasSuspensionPoint(FlowGraphNaturalLoop* loop); + void Invalidate(FlowGraphNaturalLoop* loop); }; -LoopLocalOccurrences::LoopLocalOccurrences(FlowGraphNaturalLoops* loops) +PerLoopInfo::PerLoopInfo(FlowGraphNaturalLoops* loops) : m_loops(loops) { - Compiler* comp = loops->GetDfsTree()->GetCompiler(); - m_maps = loops->NumLoops() == 0 ? nullptr : new (comp, CMK_LoopOpt) LocalToOccurrenceMap* [loops->NumLoops()] {}; + Compiler* comp = loops->GetDfsTree()->GetCompiler(); + m_info = loops->NumLoops() == 0 ? nullptr : new (comp, CMK_LoopOpt) LoopInfo[loops->NumLoops()]; BitVecTraits poTraits = loops->GetDfsTree()->PostOrderTraits(); m_visitedBlocks = BitVecOps::MakeEmpty(&poTraits); } //------------------------------------------------------------------------------ -// LoopLocalOccurrences:GetOrCreateMap: -// Get or create the map of occurrences exclusive to a single loop. +// PerLoopInfo:GetOrCreateInfo: +// Get or create the info exclusive to a single loop. // // Parameters: // loop - The loop // // Returns: -// Map of occurrences. +// Loop information. // // Remarks: // As a precondition occurrences of all descendant loops must already have // been found. // -LoopLocalOccurrences::LocalToOccurrenceMap* LoopLocalOccurrences::GetOrCreateMap(FlowGraphNaturalLoop* loop) +PerLoopInfo::LoopInfo* PerLoopInfo::GetOrCreateInfo(FlowGraphNaturalLoop* loop) { - LocalToOccurrenceMap* map = m_maps[loop->GetIndex()]; - if (map != nullptr) + LoopInfo& info = m_info[loop->GetIndex()]; + if (info.LocalToOccurrences != nullptr) { - return map; + return &info; } BitVecTraits poTraits = m_loops->GetDfsTree()->PostOrderTraits(); @@ -132,11 +147,10 @@ LoopLocalOccurrences::LocalToOccurrenceMap* LoopLocalOccurrences::GetOrCreateMap } #endif - Compiler* comp = m_loops->GetDfsTree()->GetCompiler(); - map = new (comp, CMK_LoopOpt) LocalToOccurrenceMap(comp->getAllocator(CMK_LoopOpt)); - m_maps[loop->GetIndex()] = map; + Compiler* comp = m_loops->GetDfsTree()->GetCompiler(); + info.LocalToOccurrences = new (comp, CMK_LoopOpt) LocalToOccurrenceMap(comp->getAllocator(CMK_LoopOpt)); - loop->VisitLoopBlocksReversePostOrder([=, &poTraits](BasicBlock* block) { + loop->VisitLoopBlocksReversePostOrder([=, &poTraits, &info](BasicBlock* block) { if (!BitVecOps::TryAddElemD(&poTraits, m_visitedBlocks, block->bbPostorderNum)) { return BasicBlockVisit::Continue; @@ -146,13 +160,15 @@ LoopLocalOccurrences::LocalToOccurrenceMap* LoopLocalOccurrences::GetOrCreateMap { for (GenTree* node : stmt->TreeList()) { + info.HasSuspensionPoint |= node->IsCall() && node->AsCall()->IsAsync(); + if (!node->OperIsAnyLocal()) { continue; } - GenTreeLclVarCommon* lcl = node->AsLclVarCommon(); - Occurrence** occurrence = map->LookupPointerOrAdd(lcl->GetLclNum(), nullptr); + GenTreeLclVarCommon* lcl = node->AsLclVarCommon(); + Occurrence** occurrence = info.LocalToOccurrences->LookupPointerOrAdd(lcl->GetLclNum(), nullptr); Occurrence* newOccurrence = new (comp, CMK_LoopOpt) Occurrence; newOccurrence->Block = block; @@ -166,15 +182,15 @@ LoopLocalOccurrences::LocalToOccurrenceMap* LoopLocalOccurrences::GetOrCreateMap return BasicBlockVisit::Continue; }); - return map; + return &info; } //------------------------------------------------------------------------------ -// LoopLocalOccurrences:VisitLoopNestMaps: -// Visit all occurrence maps of the specified loop nest. +// PerLoopInfo:VisitLoopNestInfo: +// Visit all info of the specified loop nest. // // Type parameters: -// TFunc - bool(LocalToOccurrenceMap*) functor that returns true to continue +// TFunc - bool(LoopInfo*) functor that returns true to continue // the visit and false to abort. // // Parameters: @@ -185,21 +201,21 @@ LoopLocalOccurrences::LocalToOccurrenceMap* LoopLocalOccurrences::GetOrCreateMap // True if the visit completed; false if "func" returned false for any map. // template -bool LoopLocalOccurrences::VisitLoopNestMaps(FlowGraphNaturalLoop* loop, TFunc& func) +bool PerLoopInfo::VisitLoopNestInfo(FlowGraphNaturalLoop* loop, TFunc& func) { for (FlowGraphNaturalLoop* child = loop->GetChild(); child != nullptr; child = child->GetSibling()) { - if (!VisitLoopNestMaps(child, func)) + if (!VisitLoopNestInfo(child, func)) { return false; } } - return func(GetOrCreateMap(loop)); + return func(GetOrCreateInfo(loop)); } //------------------------------------------------------------------------------ -// LoopLocalOccurrences:VisitOccurrences: +// PerLoopInfo:VisitOccurrences: // Visit all occurrences of the specified local inside the loop. // // Type parameters: @@ -216,11 +232,11 @@ bool LoopLocalOccurrences::VisitLoopNestMaps(FlowGraphNaturalLoop* loop, TFunc& // returning false. // template -bool LoopLocalOccurrences::VisitOccurrences(FlowGraphNaturalLoop* loop, unsigned lclNum, TFunc func) +bool PerLoopInfo::VisitOccurrences(FlowGraphNaturalLoop* loop, unsigned lclNum, TFunc func) { - auto visitor = [=, &func](LocalToOccurrenceMap* map) { + auto visitor = [=, &func](LoopInfo* info) { Occurrence* occurrence; - if (!map->Lookup(lclNum, &occurrence)) + if (!info->LocalToOccurrences->Lookup(lclNum, &occurrence)) { return true; } @@ -240,11 +256,11 @@ bool LoopLocalOccurrences::VisitOccurrences(FlowGraphNaturalLoop* loop, unsigned return true; }; - return VisitLoopNestMaps(loop, visitor); + return VisitLoopNestInfo(loop, visitor); } //------------------------------------------------------------------------------ -// LoopLocalOccurrences:HasAnyOccurrences: +// PerLoopInfo:HasAnyOccurrences: // Check if this loop has any occurrences of the specified local. // // Parameters: @@ -257,7 +273,7 @@ bool LoopLocalOccurrences::VisitOccurrences(FlowGraphNaturalLoop* loop, unsigned // Remarks: // Does not take promotion into account. // -bool LoopLocalOccurrences::HasAnyOccurrences(FlowGraphNaturalLoop* loop, unsigned lclNum) +bool PerLoopInfo::HasAnyOccurrences(FlowGraphNaturalLoop* loop, unsigned lclNum) { if (!VisitOccurrences(loop, lclNum, [](BasicBlock* block, Statement* stmt, GenTreeLclVarCommon* tree) { return false; @@ -270,7 +286,7 @@ bool LoopLocalOccurrences::HasAnyOccurrences(FlowGraphNaturalLoop* loop, unsigne } //------------------------------------------------------------------------------ -// LoopLocalOccurrences:VisitStatementsWithOccurrences: +// PerLoopInfo:VisitStatementsWithOccurrences: // Visit all statements with occurrences of the specified local inside // the loop. // @@ -292,11 +308,11 @@ bool LoopLocalOccurrences::HasAnyOccurrences(FlowGraphNaturalLoop* loop, unsigne // once. // template -bool LoopLocalOccurrences::VisitStatementsWithOccurrences(FlowGraphNaturalLoop* loop, unsigned lclNum, TFunc func) +bool PerLoopInfo::VisitStatementsWithOccurrences(FlowGraphNaturalLoop* loop, unsigned lclNum, TFunc func) { - auto visitor = [=, &func](LocalToOccurrenceMap* map) { + auto visitor = [=, &func](LoopInfo* info) { Occurrence* occurrence; - if (!map->Lookup(lclNum, &occurrence)) + if (!info->LocalToOccurrences->Lookup(lclNum, &occurrence)) { return true; } @@ -330,7 +346,43 @@ bool LoopLocalOccurrences::VisitStatementsWithOccurrences(FlowGraphNaturalLoop* return true; }; - return VisitLoopNestMaps(loop, visitor); + return VisitLoopNestInfo(loop, visitor); +} + +//------------------------------------------------------------------------------ +// PerLoopInfo:HasSuspensionPoint: +// Check if a loop has a suspension point. +// +// Parameters: +// loop - The loop +// +// Returns: +// True if so. +// +bool PerLoopInfo::HasSuspensionPoint(FlowGraphNaturalLoop* loop) +{ + if (!loop->GetDfsTree()->GetCompiler()->compIsAsync()) + { + return false; + } + + auto visitor = [](LoopInfo* info) { + if (info->HasSuspensionPoint) + { + // Abort now that we've found a suspension point + return false; + } + + return true; + }; + + if (!VisitLoopNestInfo(loop, visitor)) + { + // Aborted, so has a suspension point + return true; + } + + return false; } //------------------------------------------------------------------------ @@ -340,16 +392,18 @@ bool LoopLocalOccurrences::VisitStatementsWithOccurrences(FlowGraphNaturalLoop* // Parameters: // loop - The loop // -void LoopLocalOccurrences::Invalidate(FlowGraphNaturalLoop* loop) +void PerLoopInfo::Invalidate(FlowGraphNaturalLoop* loop) { for (FlowGraphNaturalLoop* child = loop->GetChild(); child != nullptr; child = child->GetSibling()) { Invalidate(child); } - if (m_maps[loop->GetIndex()] != nullptr) + LoopInfo& info = m_info[loop->GetIndex()]; + if (info.LocalToOccurrences != nullptr) { - m_maps[loop->GetIndex()] = nullptr; + info.LocalToOccurrences = nullptr; + info.HasSuspensionPoint = false; BitVecTraits poTraits = m_loops->GetDfsTree()->PostOrderTraits(); loop->VisitLoopBlocks([=, &poTraits](BasicBlock* block) { @@ -446,7 +500,7 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // initBlock - The block in where the new IV would be initialized // initedToConstant - Whether or not the new IV will be initialized to a constant // loop - The loop -// loopLocals - Data structure tracking local uses inside the loop +// loopInfo - Data structure tracking loop info, like local occurrences // // // Returns: @@ -462,11 +516,8 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // 2. We need to store the wide IV back into the narrow one in each of // the exits where the narrow IV is live-in. // -bool Compiler::optIsIVWideningProfitable(unsigned lclNum, - BasicBlock* initBlock, - bool initedToConstant, - FlowGraphNaturalLoop* loop, - LoopLocalOccurrences* loopLocals) +bool Compiler::optIsIVWideningProfitable( + unsigned lclNum, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop, PerLoopInfo* loopInfo) { for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) { @@ -522,7 +573,7 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, return true; }; - loopLocals->VisitOccurrences(loop, lclNum, measure); + loopInfo->VisitOccurrences(loop, lclNum, measure); if (!initedToConstant) { @@ -763,14 +814,12 @@ void Compiler::optBestEffortReplaceNarrowIVUses( // Parameters: // scevContext - Context for scalar evolution // loop - The loop -// loopLocals - Data structure for locals occurrences +// loopInfo - Data structure for tracking loop info, like locals occurrences // // Returns: // True if any primary IV was widened. // -bool Compiler::optWidenIVs(ScalarEvolutionContext& scevContext, - FlowGraphNaturalLoop* loop, - LoopLocalOccurrences* loopLocals) +bool Compiler::optWidenIVs(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, PerLoopInfo* loopInfo) { JITDUMP("Considering primary IVs of " FMT_LP " for widening\n", loop->GetIndex()); @@ -811,14 +860,14 @@ bool Compiler::optWidenIVs(ScalarEvolutionContext& scevContext, // For a struct field with occurrences of the parent local we won't // be able to do much. - if (lclDsc->lvIsStructField && loopLocals->HasAnyOccurrences(loop, lclDsc->lvParentLcl)) + if (lclDsc->lvIsStructField && loopInfo->HasAnyOccurrences(loop, lclDsc->lvParentLcl)) { JITDUMP(" V%02u is a struct field whose parent local V%02u has occurrences inside the loop\n", lclNum, lclDsc->lvParentLcl); continue; } - if (optWidenPrimaryIV(loop, lclNum, addRec, loopLocals)) + if (optWidenPrimaryIV(loop, lclNum, addRec, loopInfo)) { numWidened++; } @@ -835,15 +884,12 @@ bool Compiler::optWidenIVs(ScalarEvolutionContext& scevContext, // loop - The loop // lclNum - The primary IV // addRec - The add recurrence for the primary IV -// loopLocals - Data structure for locals occurrences +// loopInfo - Data structure for tracking loop info like locals occurrences // -bool Compiler::optWidenPrimaryIV(FlowGraphNaturalLoop* loop, - unsigned lclNum, - ScevAddRec* addRec, - LoopLocalOccurrences* loopLocals) +bool Compiler::optWidenPrimaryIV(FlowGraphNaturalLoop* loop, unsigned lclNum, ScevAddRec* addRec, PerLoopInfo* loopInfo) { LclVarDsc* lclDsc = lvaGetDesc(lclNum); - if (lclDsc->TypeGet() != TYP_INT) + if (!lclDsc->TypeIs(TYP_INT)) { JITDUMP(" Type is %s, no widening to be done\n", varTypeName(lclDsc->TypeGet())); return false; @@ -882,7 +928,7 @@ bool Compiler::optWidenPrimaryIV(FlowGraphNaturalLoop* loop, initBlock = startSsaDsc->GetBlock(); } - if (!optIsIVWideningProfitable(lclNum, initBlock, initToConstant, loop, loopLocals)) + if (!optIsIVWideningProfitable(lclNum, initBlock, initToConstant, loop, loopInfo)) { return false; } @@ -977,10 +1023,10 @@ bool Compiler::optWidenPrimaryIV(FlowGraphNaturalLoop* loop, return true; }; - loopLocals->VisitStatementsWithOccurrences(loop, lclNum, replace); + loopInfo->VisitStatementsWithOccurrences(loop, lclNum, replace); optSinkWidenedIV(lclNum, newLclNum, loop); - loopLocals->Invalidate(loop); + loopInfo->Invalidate(loop); return true; } @@ -1031,21 +1077,21 @@ void Compiler::optVisitBoundingExitingCondBlocks(FlowGraphNaturalLoop* loop, TFu // Parameters: // scevContext - Context for scalar evolution // loop - Loop to transform -// loopLocals - Data structure that tracks occurrences of locals in the loop +// loopInfo - Data structure that tracks occurrences of locals in the loop // // Returns: // True if the loop was made downwards counted; otherwise false. // bool Compiler::optMakeLoopDownwardsCounted(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, - LoopLocalOccurrences* loopLocals) + PerLoopInfo* loopInfo) { JITDUMP("Checking if we should make " FMT_LP " downwards counted\n", loop->GetIndex()); bool changed = false; optVisitBoundingExitingCondBlocks(loop, [=, &scevContext, &changed](BasicBlock* exiting) { JITDUMP(" Considering exiting block " FMT_BB "\n", exiting->bbNum); - changed |= optMakeExitTestDownwardsCounted(scevContext, loop, exiting, loopLocals); + changed |= optMakeExitTestDownwardsCounted(scevContext, loop, exiting, loopInfo); }); return changed; @@ -1060,7 +1106,7 @@ bool Compiler::optMakeLoopDownwardsCounted(ScalarEvolutionContext& scevContext, // scevContext - SCEV context // loop - The specific loop // exiting - Exiting block -// loopLocals - Data structure tracking local uses +// loopInfo - Data structure tracking local uses // // Returns: // True if any modification was made. @@ -1068,7 +1114,7 @@ bool Compiler::optMakeLoopDownwardsCounted(ScalarEvolutionContext& scevContext, bool Compiler::optMakeExitTestDownwardsCounted(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, BasicBlock* exiting, - LoopLocalOccurrences* loopLocals) + PerLoopInfo* loopInfo) { // Note: keep the heuristics here in sync with // `StrengthReductionContext::IsUseExpectedToBeRemoved`. @@ -1099,7 +1145,7 @@ bool Compiler::optMakeExitTestDownwardsCounted(ScalarEvolutionContext& scevConte unsigned candidateLclNum = stmt->GetRootNode()->AsLclVarCommon()->GetLclNum(); - if (optLocalHasNonLoopUses(candidateLclNum, loop, loopLocals)) + if (optLocalHasNonLoopUses(candidateLclNum, loop, loopInfo)) { continue; } @@ -1122,7 +1168,7 @@ bool Compiler::optMakeExitTestDownwardsCounted(ScalarEvolutionContext& scevConte return false; }; - if (!loopLocals->VisitStatementsWithOccurrences(loop, candidateLclNum, checkRemovableUse)) + if (!loopInfo->VisitStatementsWithOccurrences(loop, candidateLclNum, checkRemovableUse)) { // Aborted means we found a non-removable use continue; @@ -1215,7 +1261,7 @@ bool Compiler::optMakeExitTestDownwardsCounted(ScalarEvolutionContext& scevConte DISPSTMT(jtrueStmt); JITDUMP("\n"); - loopLocals->Invalidate(loop); + loopInfo->Invalidate(loop); return true; } @@ -1270,16 +1316,16 @@ bool Compiler::optCanAndShouldChangeExitTest(GenTree* cond, bool dump) // Parameters: // lclNum - The local // loop - The loop -// loopLocals - Data structure tracking local uses +// loopInfo - Data structure tracking local uses // // Returns: // True if the local may have non-loop uses (or if it is a field with uses of // the parent struct). // -bool Compiler::optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals) +bool Compiler::optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loop, PerLoopInfo* loopInfo) { LclVarDsc* varDsc = lvaGetDesc(lclNum); - if (varDsc->lvIsStructField && loopLocals->HasAnyOccurrences(loop, varDsc->lvParentLcl)) + if (varDsc->lvIsStructField && loopInfo->HasAnyOccurrences(loop, varDsc->lvParentLcl)) { return true; } @@ -1362,7 +1408,7 @@ class StrengthReductionContext Compiler* m_comp; ScalarEvolutionContext& m_scevContext; FlowGraphNaturalLoop* m_loop; - LoopLocalOccurrences& m_loopLocals; + PerLoopInfo& m_loopInfo; ArrayStack m_backEdgeBounds; SimplificationAssumptions m_simplAssumptions; @@ -1403,11 +1449,11 @@ class StrengthReductionContext StrengthReductionContext(Compiler* comp, ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, - LoopLocalOccurrences& loopLocals) + PerLoopInfo& loopInfo) : m_comp(comp) , m_scevContext(scevContext) , m_loop(loop) - , m_loopLocals(loopLocals) + , m_loopInfo(loopInfo) , m_backEdgeBounds(comp->getAllocator(CMK_LoopIVOpts)) , m_cursors1(comp->getAllocator(CMK_LoopIVOpts)) , m_cursors2(comp->getAllocator(CMK_LoopIVOpts)) @@ -1484,7 +1530,7 @@ bool StrengthReductionContext::TryStrengthReduce() continue; } - if (m_comp->optLocalHasNonLoopUses(primaryIVLcl->GetLclNum(), m_loop, &m_loopLocals)) + if (m_comp->optLocalHasNonLoopUses(primaryIVLcl->GetLclNum(), m_loop, &m_loopInfo)) { // We won't be able to remove this primary IV JITDUMP(" Has non-loop uses\n"); @@ -1524,12 +1570,20 @@ bool StrengthReductionContext::TryStrengthReduce() assert(nextIV != nullptr); - if (varTypeIsGC(nextIV->Type) && !StaysWithinManagedObject(nextCursors, nextIV)) + if (varTypeIsGC(nextIV->Type)) { - JITDUMP( - " Next IV computes a GC pointer that we cannot prove to be inside a managed object. Bailing.\n", - varTypeName(nextIV->Type)); - break; + if (m_loopInfo.HasSuspensionPoint(m_loop)) + { + JITDUMP(" Next IV computes a GC pointer in a loop with a suspension point. Bailing.\n"); + break; + } + + if (!StaysWithinManagedObject(nextCursors, nextIV)) + { + JITDUMP( + " Next IV computes a GC pointer that we cannot prove to be inside a managed object. Bailing.\n"); + break; + } } ExpandStoredCursors(nextCursors, cursors); @@ -1573,7 +1627,7 @@ bool StrengthReductionContext::TryStrengthReduce() if (TryReplaceUsesWithNewPrimaryIV(cursors, currentIV)) { strengthReducedAny = true; - m_loopLocals.Invalidate(m_loop); + m_loopInfo.Invalidate(m_loop); } } @@ -1686,7 +1740,7 @@ bool StrengthReductionContext::InitializeCursors(GenTreeLclVarCommon* primaryIVL return true; }; - if (!m_loopLocals.VisitOccurrences(m_loop, primaryIVLcl->GetLclNum(), visitor) || (m_cursors1.Height() <= 0)) + if (!m_loopInfo.VisitOccurrences(m_loop, primaryIVLcl->GetLclNum(), visitor) || (m_cursors1.Height() <= 0)) { JITDUMP(" Could not create cursors for all loop uses of primary IV\n"); return false; @@ -1882,7 +1936,7 @@ void StrengthReductionContext::ExpandStoredCursors(ArrayStack* curso GenTreeLclVarCommon* storedLcl = parent->AsLclVarCommon(); if ((storedLcl->Data() == cur) && ((cur->gtFlags & GTF_SIDE_EFFECT) == 0) && storedLcl->HasSsaIdentity() && - !m_comp->optLocalHasNonLoopUses(storedLcl->GetLclNum(), m_loop, &m_loopLocals)) + !m_comp->optLocalHasNonLoopUses(storedLcl->GetLclNum(), m_loop, &m_loopInfo)) { int numCreated = 0; ScevAddRec* cursorIV = cursor->IV; @@ -1922,7 +1976,7 @@ void StrengthReductionContext::ExpandStoredCursors(ArrayStack* curso return true; }; - if (m_loopLocals.VisitOccurrences(m_loop, storedLcl->GetLclNum(), createExtraCursor)) + if (m_loopInfo.VisitOccurrences(m_loop, storedLcl->GetLclNum(), createExtraCursor)) { JITDUMP( " [%06u] was the data of store [%06u]; expanded to %d new cursors, and will replace with a store of 0\n", @@ -2678,12 +2732,12 @@ bool StrengthReductionContext::InsertionPointPostDominatesUses(BasicBlock* // // Parameters: // loop - The loop -// loopLocals - Locals of the loop +// loopInfo - Locals of the loop // // Returns: // True if any primary IV was removed. // -bool Compiler::optRemoveUnusedIVs(FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals) +bool Compiler::optRemoveUnusedIVs(FlowGraphNaturalLoop* loop, PerLoopInfo* loopInfo) { JITDUMP(" Now looking for unnecessary primary IVs\n"); @@ -2697,7 +2751,7 @@ bool Compiler::optRemoveUnusedIVs(FlowGraphNaturalLoop* loop, LoopLocalOccurrenc unsigned lclNum = stmt->GetRootNode()->AsLclVarCommon()->GetLclNum(); JITDUMP(" V%02u", lclNum); - if (optLocalHasNonLoopUses(lclNum, loop, loopLocals)) + if (optLocalHasNonLoopUses(lclNum, loop, loopInfo)) { JITDUMP(" has non-loop uses, cannot remove\n"); continue; @@ -2707,7 +2761,7 @@ bool Compiler::optRemoveUnusedIVs(FlowGraphNaturalLoop* loop, LoopLocalOccurrenc return optIsUpdateOfIVWithoutSideEffects(stmt->GetRootNode(), lclNum); }; - if (!loopLocals->VisitStatementsWithOccurrences(loop, lclNum, visit)) + if (!loopInfo->VisitStatementsWithOccurrences(loop, lclNum, visit)) { JITDUMP(" has essential uses, cannot remove\n"); continue; @@ -2720,9 +2774,9 @@ bool Compiler::optRemoveUnusedIVs(FlowGraphNaturalLoop* loop, LoopLocalOccurrenc return true; }; - loopLocals->VisitStatementsWithOccurrences(loop, lclNum, remove); + loopInfo->VisitStatementsWithOccurrences(loop, lclNum, remove); numRemoved++; - loopLocals->Invalidate(loop); + loopInfo->Invalidate(loop); } Metrics.UnusedIVsRemoved += numRemoved; @@ -2810,7 +2864,7 @@ PhaseStatus Compiler::optInductionVariables() m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); } - LoopLocalOccurrences loopLocals(m_loops); + PerLoopInfo loopInfo(m_loops); ScalarEvolutionContext scevContext(this); JITDUMP("Optimizing induction variables:\n"); @@ -2830,14 +2884,14 @@ PhaseStatus Compiler::optInductionVariables() continue; } - StrengthReductionContext strengthReductionContext(this, scevContext, loop, loopLocals); + StrengthReductionContext strengthReductionContext(this, scevContext, loop, loopInfo); if (strengthReductionContext.TryStrengthReduce()) { Metrics.LoopsStrengthReduced++; changed = true; } - if (optMakeLoopDownwardsCounted(scevContext, loop, &loopLocals)) + if (optMakeLoopDownwardsCounted(scevContext, loop, &loopInfo)) { Metrics.LoopsMadeDownwardsCounted++; changed = true; @@ -2847,14 +2901,14 @@ PhaseStatus Compiler::optInductionVariables() // addressing modes can include the zero/sign-extension of the index // for free. #if defined(TARGET_XARCH) && defined(TARGET_64BIT) - if (optWidenIVs(scevContext, loop, &loopLocals)) + if (optWidenIVs(scevContext, loop, &loopInfo)) { Metrics.LoopsIVWidened++; changed = true; } #endif - if (optRemoveUnusedIVs(loop, &loopLocals)) + if (optRemoveUnusedIVs(loop, &loopInfo)) { changed = true; } diff --git a/src/coreclr/jit/inline.cpp b/src/coreclr/jit/inline.cpp index aba964f2ce61..7dfa4ba474ab 100644 --- a/src/coreclr/jit/inline.cpp +++ b/src/coreclr/jit/inline.cpp @@ -332,6 +332,7 @@ InlineContext::InlineContext(InlineStrategy* strategy) , m_Code(nullptr) , m_Callee(nullptr) , m_RuntimeContext(nullptr) + , m_PgoInfo() , m_ILSize(0) , m_ImportedILSize(0) , m_ActualCallOffset(BAD_IL_OFFSET) @@ -792,16 +793,39 @@ void InlineResult::Report() // IS_NOINLINE, then we've uncovered a reason why this method // can't ever be inlined. Update the callee method attributes // so that future inline attempts for this callee fail faster. - + // InlineObservation obs = m_Policy->GetObservation(); - if ((m_Callee != nullptr) && (obs != InlineObservation::CALLEE_IS_NOINLINE)) + bool report = (m_Callee != nullptr); + bool suppress = (obs == InlineObservation::CALLEE_IS_NOINLINE); + bool dynamicPgo = m_RootCompiler->fgPgoDynamic; + + // If dynamic pgo is active, only propagate noinline back to metadata + // when there is a CALLEE FATAL observation. We want to make sure + // not to block future inlines based on performance or throughput considerations. + // + // Note fgPgoDynamic (and hence dynamicPgo) is true iff TieredPGO is enabled globally. + // In particular this value does not depend on the root method having PGO data. + // + if (dynamicPgo) { - JITDUMP("\nINLINER: Marking %s as NOINLINE because of %s\n", callee, InlGetObservationString(obs)); + InlineTarget target = InlGetTarget(obs); + InlineImpact impact = InlGetImpact(obs); + suppress = (target != InlineTarget::CALLEE) || (impact != InlineImpact::FATAL); + } + + if (report && !suppress) + { + JITDUMP("\nINLINER: Marking %s as NOINLINE (observation %s)\n", callee, InlGetObservationString(obs)); COMP_HANDLE comp = m_RootCompiler->info.compCompHnd; comp->setMethodAttribs(m_Callee, CORINFO_FLG_BAD_INLINEE); } + else if (suppress) + { + JITDUMP("\nINLINER: Not marking %s NOINLINE; %s (observation %s)\n", callee, + dynamicPgo ? "pgo active" : "already known", InlGetObservationString(obs)); + } } if (IsDecided() || m_reportFailureAsVmFailure || m_successResult != INLINE_PASS) @@ -927,7 +951,14 @@ InlineContext* InlineStrategy::GetRootContext() // Set the initial budget for inlining. Note this is // deliberately set very high and is intended to catch // only pathological runaway inline cases. - m_InitialTimeBudget = BUDGET * m_InitialTimeEstimate; + const unsigned budget = JitConfig.JitInlineBudget(); + + if (budget != DEFAULT_INLINE_BUDGET) + { + JITDUMP("Using non-default inline budget %u\n", budget); + } + + m_InitialTimeBudget = budget * m_InitialTimeEstimate; m_CurrentTimeBudget = m_InitialTimeBudget; // Estimate the code size if there's no inlining @@ -1452,14 +1483,13 @@ void InlineStrategy::DumpData() void InlineStrategy::DumpDataEnsurePolicyIsSet() { // Cache references to compiler substructures. - const Compiler::Info& info = m_Compiler->info; - const Compiler::Options& opts = m_Compiler->opts; + const Compiler::Info& info = m_Compiler->info; // If there weren't any successful inlines, we won't have a // successful policy, so fake one up. if (m_LastSuccessfulPolicy == nullptr) { - const bool isPrejitRoot = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT); + const bool isPrejitRoot = m_Compiler->IsAot(); m_LastSuccessfulPolicy = InlinePolicy::GetPolicy(m_Compiler, isPrejitRoot); // Add in a bit of data.... @@ -1597,10 +1627,9 @@ void InlineStrategy::DumpXml(FILE* file, unsigned indent) } // Cache references to compiler substructures. - const Compiler::Info& info = m_Compiler->info; - const Compiler::Options& opts = m_Compiler->opts; + const Compiler::Info& info = m_Compiler->info; - const bool isPrejitRoot = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT); + const bool isPrejitRoot = m_Compiler->IsAot(); // We'd really like the method identifier to be unique and // durable across crossgen invocations. Not clear how to @@ -1795,3 +1824,22 @@ bool InlineStrategy::IsInliningDisabled() #endif // defined(DEBUG) } + +PgoInfo::PgoInfo() +{ + PgoSchema = nullptr; + PgoSchemaCount = 0; + PgoData = nullptr; +} + +PgoInfo::PgoInfo(Compiler* compiler) +{ + PgoSchema = compiler->fgPgoSchema; + PgoSchemaCount = compiler->fgPgoSchemaCount; + PgoData = compiler->fgPgoData; +} + +PgoInfo::PgoInfo(InlineContext* context) +{ + *this = context->GetPgoInfo(); +} diff --git a/src/coreclr/jit/inline.def b/src/coreclr/jit/inline.def index efacbd4deb27..6ce57ccfeed7 100644 --- a/src/coreclr/jit/inline.def +++ b/src/coreclr/jit/inline.def @@ -28,6 +28,7 @@ INLINE_OBSERVATION(UNUSED_INITIAL, bool, "unused initial observatio INLINE_OBSERVATION(BAD_ARGUMENT_NUMBER, bool, "invalid argument number", FATAL, CALLEE) INLINE_OBSERVATION(BAD_LOCAL_NUMBER, bool, "invalid local number", FATAL, CALLEE) INLINE_OBSERVATION(COMPILATION_ERROR, bool, "compilation error", FATAL, CALLEE) +INLINE_OBSERVATION(EXPLICIT_TAIL_PREFIX, bool, "explicit tail prefix in callee", FATAL, CALLEE) INLINE_OBSERVATION(HAS_EH, bool, "has exception handling", FATAL, CALLEE) INLINE_OBSERVATION(HAS_ENDFILTER, bool, "has endfilter", FATAL, CALLEE) INLINE_OBSERVATION(HAS_ENDFINALLY, bool, "has endfinally", FATAL, CALLEE) @@ -36,6 +37,7 @@ INLINE_OBSERVATION(HAS_MANAGED_VARARGS, bool, "managed varargs", INLINE_OBSERVATION(HAS_NATIVE_VARARGS, bool, "native varargs", FATAL, CALLEE) INLINE_OBSERVATION(HAS_NO_BODY, bool, "has no body", FATAL, CALLEE) INLINE_OBSERVATION(HAS_NULL_FOR_LDELEM, bool, "has null pointer for ldelem", FATAL, CALLEE) +INLINE_OBSERVATION(HAS_UNMANAGED_CALLCONV, bool, "has unmanaged calling convention", FATAL, CALLEE) INLINE_OBSERVATION(IS_ARRAY_METHOD, bool, "is array method", FATAL, CALLEE) INLINE_OBSERVATION(IS_GENERIC_VIRTUAL, bool, "generic virtual", FATAL, CALLEE) INLINE_OBSERVATION(IS_JIT_NOINLINE, bool, "noinline per JitNoinline", FATAL, CALLEE) @@ -55,7 +57,6 @@ INLINE_OBSERVATION(STACK_CRAWL_MARK, bool, "uses stack crawl mark", INLINE_OBSERVATION(STFLD_NEEDS_HELPER, bool, "stfld needs helper", FATAL, CALLEE) INLINE_OBSERVATION(TOO_MANY_ARGUMENTS, bool, "too many arguments", FATAL, CALLEE) INLINE_OBSERVATION(TOO_MANY_LOCALS, bool, "too many locals", FATAL, CALLEE) -INLINE_OBSERVATION(EXPLICIT_TAIL_PREFIX, bool, "explicit tail prefix in callee", FATAL, CALLEE) // ------ Callee Performance ------- @@ -100,6 +101,7 @@ INLINE_OBSERVATION(IS_SIZE_DECREASING_INLINE, bool, "size decreasing inline", INLINE_OBSERVATION(LOG_REPLAY_ACCEPT, bool, "accepted by log replay", INFORMATION, CALLEE) INLINE_OBSERVATION(LOOKS_LIKE_WRAPPER, bool, "thin wrapper around a call", INFORMATION, CALLEE) INLINE_OBSERVATION(MAXSTACK, int, "maxstack", INFORMATION, CALLEE) +INLINE_OBSERVATION(MAY_RETURN_SMALL_ARRAY, bool, "may return a small new array", INFORMATION, CALLEE) INLINE_OBSERVATION(OPCODE, int, "next opcode in IL stream", INFORMATION, CALLEE) INLINE_OBSERVATION(OPCODE_NORMED, int, "next opcode in IL stream", INFORMATION, CALLEE) INLINE_OBSERVATION(NUMBER_OF_ARGUMENTS, int, "number of arguments", INFORMATION, CALLEE) @@ -114,6 +116,7 @@ INLINE_OBSERVATION(UNSUPPORTED_OPCODE, bool, "unsupported opcode", INLINE_OBSERVATION(DEBUG_CODEGEN, bool, "debug codegen", FATAL, CALLER) INLINE_OBSERVATION(IS_JIT_NOINLINE, bool, "noinline per JitNoInlineRange", FATAL, CALLER) INLINE_OBSERVATION(USES_NEXT_CALL_RET_ADDR, bool, "uses NextCallReturnAddress intrinsic", FATAL, CALLER) +INLINE_OBSERVATION(ASYNC_USED_CONTINUATION, bool, "uses AsyncCallContinuation intrinsic", FATAL, CALLER) // ------ Caller Information ------- @@ -132,6 +135,7 @@ INLINE_OBSERVATION(CANT_CLASS_INIT, bool, "can't class init", INLINE_OBSERVATION(COMPILATION_ERROR, bool, "compilation error", FATAL, CALLSITE) INLINE_OBSERVATION(COMPILATION_FAILURE, bool, "failed to compile", FATAL, CALLSITE) INLINE_OBSERVATION(EXPLICIT_TAIL_PREFIX, bool, "explicit tail prefix", FATAL, CALLSITE) +INLINE_OBSERVATION(EH_TABLE_FULL, bool, "callee has eh, eh table is full", FATAL, CALLSITE) INLINE_OBSERVATION(GENERIC_DICTIONARY_LOOKUP, bool, "runtime dictionary lookup", FATAL, CALLSITE) INLINE_OBSERVATION(HAS_CALL_VIA_LDVIRTFTN, bool, "call via ldvirtftn", FATAL, CALLSITE) INLINE_OBSERVATION(HAS_COMPLEX_HANDLE, bool, "complex handle access", FATAL, CALLSITE) diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index 426e6575973d..6d869ab3bb80 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -634,6 +634,7 @@ struct InlineCandidateInfo : public HandleHistogramProfileCandidateInfo struct LateDevirtualizationInfo { CORINFO_CONTEXT_HANDLE exactContextHnd; + InlineContext* inlinersContext; }; // InlArgInfo describes inline candidate argument properties. @@ -714,6 +715,21 @@ struct InlineInfo BasicBlock* iciBlock; // The basic block iciStmt is in. }; +//------------------------------------------------------------------------ +// PgoInfo +// Schema and data for a method's PGO data. +// +struct PgoInfo +{ + PgoInfo(); + PgoInfo(Compiler* compiler); + PgoInfo(InlineContext* inlineContext); + + ICorJitInfo::PgoInstrumentationSchema* PgoSchema; // pgo schema for method + BYTE* PgoData; // pgo data for the method + unsigned PgoSchemaCount; // count of schema elements +}; + // InlineContext tracks the inline history in a method. // // Notes: @@ -869,6 +885,21 @@ class InlineContext } #endif + const PgoInfo& GetPgoInfo() + { + return m_PgoInfo; + } + + void SetPgoInfo(const PgoInfo& info) + { + m_PgoInfo = info; + } + + bool HasPgoInfo() const + { + return (m_PgoInfo.PgoSchema != nullptr) && (m_PgoInfo.PgoSchemaCount > 0) && (m_PgoInfo.PgoData != nullptr); + } + private: InlineContext(InlineStrategy* strategy); @@ -879,6 +910,7 @@ class InlineContext const BYTE* m_Code; // address of IL buffer for the method CORINFO_METHOD_HANDLE m_Callee; // handle to the method CORINFO_CONTEXT_HANDLE m_RuntimeContext; // handle to the exact context + PgoInfo m_PgoInfo; // profile data unsigned m_ILSize; // size of IL buffer for the method unsigned m_ImportedILSize; // estimated size of imported IL ILLocation m_Location; // inlining statement location within parent @@ -1072,14 +1104,6 @@ class InlineStrategy // Accounting updates for a successful or failed inline. void NoteOutcome(InlineContext* context); - // Cap on allowable increase in jit time due to inlining. - // Multiplicative, so BUDGET = 10 means up to 10x increase - // in jit time. - enum - { - BUDGET = 10 - }; - // Estimate the jit time change because of this inline. int EstimateTime(InlineContext* context); diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index d22676a62a3e..6ddd8c6278fe 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -88,7 +88,7 @@ InlinePolicy* InlinePolicy::GetPolicy(Compiler* compiler, bool isPrejitRoot) return new (compiler, CMK_Inlining) ProfilePolicy(compiler, isPrejitRoot); } - const bool isPrejit = compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT); + const bool isPrejit = compiler->IsAot(); const bool isSpeedOpt = compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SPEED_OPT); if ((JitConfig.JitExtDefaultPolicy() != 0)) @@ -911,21 +911,6 @@ int DefaultPolicy::DetermineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* meth void DefaultPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) { - -#if defined(DEBUG) - - // Punt if we're inlining and we've reached the acceptance limit. - int limit = JitConfig.JitInlineLimit(); - unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount(); - - if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast(limit))) - { - SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT); - return; - } - -#endif // defined(DEBUG) - assert(InlDecisionIsCandidate(m_Decision)); assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE); @@ -1134,20 +1119,6 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) assert(InlDecisionIsCandidate(m_Decision)); assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE); -#if defined(DEBUG) - - // Punt if we're inlining and we've reached the acceptance limit. - int limit = JitConfig.JitInlineLimit(); - unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount(); - - if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast(limit))) - { - SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT); - return; - } - -#endif // defined(DEBUG) - // Budget check. const bool overBudget = this->BudgetCheck(); if (overBudget) @@ -1370,6 +1341,10 @@ void ExtendedDefaultPolicy::NoteBool(InlineObservation obs, bool value) m_ArgUnboxExact++; break; + case InlineObservation::CALLEE_MAY_RETURN_SMALL_ARRAY: + m_MayReturnSmallArray = true; + break; + default: DefaultPolicy::NoteBool(obs, value); break; @@ -1397,13 +1372,24 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) // TODO: Enable for PgoSource::Static as well if it's not the generic profile we bundle. if (m_HasProfileWeights && (m_RootCompiler->fgHaveTrustedProfileWeights())) { + JITDUMP("Callee and root has trusted profile\n"); maxCodeSize = static_cast(JitConfig.JitExtDefaultPolicyMaxILProf()); } + else if (m_RootCompiler->fgHaveSufficientProfileWeights()) + { + JITDUMP("Root has sufficient profile\n"); + maxCodeSize = static_cast(JitConfig.JitExtDefaultPolicyMaxILRoot()); + } + else + { + JITDUMP("Callee has %s profile\n", m_HasProfileWeights ? "untrusted" : "no"); + } unsigned alwaysInlineSize = InlineStrategy::ALWAYS_INLINE_SIZE; if (m_InsideThrowBlock) { // Inline only small code in BBJ_THROW blocks, e.g. <= 8 bytes of IL + JITDUMP("Call site in throw block\n"); alwaysInlineSize /= 2; maxCodeSize = min(alwaysInlineSize + 1, maxCodeSize); } @@ -1426,6 +1412,7 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) else { // Callee too big, not a candidate + JITDUMP("Callee IL size %u exceeds maxCodeSize %u\n", m_CodeSize, maxCodeSize); SetNever(InlineObservation::CALLEE_TOO_MUCH_IL); } break; @@ -1450,6 +1437,7 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) if ((unsigned)value > bbLimit) { + JITDUMP("Callee BB count %u exceeds bbLimit %u\n", value, bbLimit); SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS); } } @@ -1805,6 +1793,12 @@ double ExtendedDefaultPolicy::DetermineMultiplier() } } + if (m_MayReturnSmallArray) + { + multiplier += 4.0; + JITDUMP("\nInline candidate may return small known-size array. Multiplier increased to %g.", multiplier); + } + if (m_HasProfileWeights) { // There are cases when Profile Data can be misleading or polluted: @@ -1918,6 +1912,7 @@ void ExtendedDefaultPolicy::OnDumpXml(FILE* file, unsigned indent) const XATTR_B(m_IsCallsiteInNoReturnRegion) XATTR_B(m_HasProfileWeights) XATTR_B(m_InsideThrowBlock) + XATTR_B(m_MayReturnSmallArray) } #endif @@ -2400,21 +2395,6 @@ bool DiscretionaryPolicy::PropagateNeverToRuntime() const void DiscretionaryPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) { - -#if defined(DEBUG) - - // Punt if we're inlining and we've reached the acceptance limit. - int limit = JitConfig.JitInlineLimit(); - unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount(); - - if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast(limit))) - { - SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT); - return; - } - -#endif // defined(DEBUG) - // Make additional observations based on the method info MethodInfoObservations(methodInfo); diff --git a/src/coreclr/jit/inlinepolicy.h b/src/coreclr/jit/inlinepolicy.h index d08fbf7b3230..4277a61fa781 100644 --- a/src/coreclr/jit/inlinepolicy.h +++ b/src/coreclr/jit/inlinepolicy.h @@ -226,6 +226,7 @@ class ExtendedDefaultPolicy : public DefaultPolicy , m_NonGenericCallsGeneric(false) , m_IsCallsiteInNoReturnRegion(false) , m_HasProfileWeights(false) + , m_MayReturnSmallArray(false) { // Empty } @@ -281,6 +282,7 @@ class ExtendedDefaultPolicy : public DefaultPolicy bool m_NonGenericCallsGeneric : 1; bool m_IsCallsiteInNoReturnRegion : 1; bool m_HasProfileWeights : 1; + bool m_MayReturnSmallArray : 1; }; // DiscretionaryPolicy is a variant of the default policy. It diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 2b0f34ebb49b..4b62a65f9e94 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -128,32 +128,32 @@ const char* CodeGen::genInsDisplayName(emitter::instrDesc* id) { switch (ins) { - case INS_movdqa: + case INS_movdqa32: { return "vmovdqa32"; } - case INS_movdqu: + case INS_movdqu32: { return "vmovdqu32"; } - case INS_pand: + case INS_pandd: { return "vpandd"; } - case INS_pandn: + case INS_pandnd: { return "vpandnd"; } - case INS_por: + case INS_pord: { return "vpord"; } - case INS_pxor: + case INS_pxord: { return "vpxord"; } @@ -178,32 +178,32 @@ const char* CodeGen::genInsDisplayName(emitter::instrDesc* id) return "vrndscaless"; } - case INS_vbroadcastf128: + case INS_vbroadcastf32x4: { return "vbroadcastf32x4"; } - case INS_vextractf128: + case INS_vextractf32x4: { return "vextractf32x4"; } - case INS_vinsertf128: + case INS_vinsertf32x4: { return "vinsertf32x4"; } - case INS_vbroadcasti128: + case INS_vbroadcasti32x4: { return "vbroadcasti32x4"; } - case INS_vextracti128: + case INS_vextracti32x4: { return "vextracti32x4"; } - case INS_vinserti128: + case INS_vinserti32x4: { return "vinserti32x4"; } @@ -355,12 +355,25 @@ bool CodeGenInterface::instIsFP(instruction ins) * compatible instruction. */ -// static inline bool CodeGenInterface::instIsEmbeddedBroadcastCompatible(instruction ins) { - assert((unsigned)ins < ArrLen(instInfo)); + if (GetEmitter()->IsEvexEncodableInstruction(ins)) + { + insTupleType tupleType = emitter::insTupleTypeInfo(ins); + return (tupleType & INS_TT_IS_BROADCAST) != 0; + } + return false; +} + +/***************************************************************************** + * + * Returns non-zero if the given CPU instruction is an embedded masking + * compatible instruction. + */ - return (instInfo[ins] & INS_Flags_EmbeddedBroadcastSupported) != 0; +bool CodeGenInterface::instIsEmbeddedMaskingCompatible(instruction ins) +{ + return (ins != INS_invalid) && (instKMaskBaseSize(ins) != 0); } /***************************************************************************** @@ -371,8 +384,8 @@ bool CodeGenInterface::instIsEmbeddedBroadcastCompatible(instruction ins) unsigned CodeGenInterface::instInputSize(instruction ins) { assert((unsigned)ins < ArrLen(instInfo)); - insFlags inputSize = static_cast((instInfo[ins] & Input_Mask)); + switch (inputSize) { case Input_8Bit: @@ -387,6 +400,33 @@ unsigned CodeGenInterface::instInputSize(instruction ins) unreached(); } } + +/***************************************************************************** + * + * Returns the value of the given instruction's KMask base size attribute, in bits. + */ + +unsigned CodeGenInterface::instKMaskBaseSize(instruction ins) +{ + assert((unsigned)ins < ArrLen(instInfo)); + insFlags kmaskBaseSize = static_cast((instInfo[ins] & KMask_BaseMask)); + + switch (kmaskBaseSize) + { + case KMask_Base1: + return 1; + case KMask_Base2: + return 2; + case KMask_Base4: + return 4; + case KMask_Base8: + return 8; + case KMask_Base16: + return 16; + default: + return 0; + } +} #endif // TARGET_XARCH /***************************************************************************** @@ -733,7 +773,7 @@ void CodeGen::inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumbe { // Is this the special case of a write-thru lclVar? // We mark it as SPILLED to denote that its value is valid in memory. - if (((tree->gtFlags & GTF_SPILL) != 0) && tree->gtOper == GT_STORE_LCL_VAR) + if (((tree->gtFlags & GTF_SPILL) != 0) && tree->OperIs(GT_STORE_LCL_VAR)) { isValidInReg = true; } @@ -809,6 +849,7 @@ void CodeGen::inst_RV_SH( // logic for determining what "kind" of operand "op" is. // // Arguments: +// ins - The instruction that will consume the operand. // op - The operand node for which to obtain the descriptor. // // Return Value: @@ -818,7 +859,7 @@ void CodeGen::inst_RV_SH( // This method is not idempotent - it can only be called once for a // given node. // -CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) +CodeGen::OperandDesc CodeGen::genOperandDesc(instruction ins, GenTree* op) { if (!op->isContained() && !op->isUsedFromSpillTemp()) { @@ -887,7 +928,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) case NI_SSE3_MoveAndDuplicate: case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { assert(hwintrinsic->isContained()); if (intrinsicId == NI_SSE3_MoveAndDuplicate) @@ -901,7 +942,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) // handle other cases recursively. GenTree* hwintrinsicChild = hwintrinsic->Op(1); assert(hwintrinsicChild->isContained()); - if (hwintrinsicChild->OperIs(GT_CNS_INT, GT_CNS_LNG)) + if (hwintrinsicChild->IsIntegralConst()) { // a special case is when the operand of CreateScalarUnsafe is an integer type, // CreateScalarUnsafe node will be folded, so we directly match a pattern of @@ -915,11 +956,14 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) { // If the operand of broadcast is not a constant integer, // we handle all the other cases recursively. - return genOperandDesc(hwintrinsicChild); + return genOperandDesc(ins, hwintrinsicChild); } break; } + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: case NI_Vector128_CreateScalarUnsafe: case NI_Vector256_CreateScalarUnsafe: case NI_Vector512_CreateScalarUnsafe: @@ -927,12 +971,12 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) // The hwintrinsic should be contained and its // op1 should be either contained or spilled. This // allows us to transparently "look through" the - // CreateScalarUnsafe and treat it directly like + // CreateScalar/Unsafe and treat it directly like // a load from memory. assert(hwintrinsic->isContained()); op = hwintrinsic->Op(1); - return genOperandDesc(op); + return genOperandDesc(ins, op); } default: @@ -986,59 +1030,26 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) #if defined(FEATURE_SIMD) case GT_CNS_VEC: { - switch (op->TypeGet()) - { - case TYP_SIMD8: - { - simd8_t constValue; - memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd8_t)); - return OperandDesc(emit->emitSimd8Const(constValue)); - } + insTupleType tupleType = emitter::insTupleTypeInfo(ins); + unsigned cnsSize = genTypeSize(op); - case TYP_SIMD12: - { - simd16_t constValue = {}; - memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd12_t)); - return OperandDesc(emit->emitSimd16Const(constValue)); - } - case TYP_SIMD16: - { - simd16_t constValue; - memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd16_t)); - return OperandDesc(emit->emitSimd16Const(constValue)); - } - -#if defined(TARGET_XARCH) - case TYP_SIMD32: - { - simd32_t constValue; - memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd32_t)); - return OperandDesc(emit->emitSimd32Const(constValue)); - } - - case TYP_SIMD64: - { - simd64_t constValue; - memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd64_t)); - return OperandDesc(emit->emitSimd64Const(constValue)); - } - -#endif // TARGET_XARCH + if ((tupleType == INS_TT_TUPLE1_SCALAR) || (tupleType == INS_TT_TUPLE1_FIXED)) + { + // We have a vector const, but the instruction will only read a scalar from it, + // so don't waste space putting the entire vector to the data section. - default: - { - unreached(); - } + cnsSize = max(CodeGenInterface::instInputSize(ins), 4U); + assert(cnsSize <= genTypeSize(op)); } + + return OperandDesc(emit->emitSimdConst(&op->AsVecCon()->gtSimdVal, EA_TYPE(cnsSize))); } #endif // FEATURE_SIMD #if defined(FEATURE_MASKED_HW_INTRINSICS) case GT_CNS_MSK: { - simdmask_t constValue; - memcpy(&constValue, &op->AsMskCon()->gtSimdMaskVal, sizeof(simdmask_t)); - return OperandDesc(emit->emitSimdMaskConst(constValue)); + return OperandDesc(emit->emitSimdMaskConst(op->AsMskCon()->gtSimdMaskVal)); } #endif // FEATURE_MASKED_HW_INTRINSICS @@ -1068,7 +1079,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) void CodeGen::inst_TT(instruction ins, emitAttr size, GenTree* op1) { emitter* emit = GetEmitter(); - OperandDesc op1Desc = genOperandDesc(op1); + OperandDesc op1Desc = genOperandDesc(ins, op1); switch (op1Desc.GetKind()) { @@ -1117,7 +1128,7 @@ void CodeGen::inst_TT(instruction ins, emitAttr size, GenTree* op1) void CodeGen::inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTree* op2) { emitter* emit = GetEmitter(); - OperandDesc op2Desc = genOperandDesc(op2); + OperandDesc op2Desc = genOperandDesc(ins, op2); switch (op2Desc.GetKind()) { @@ -1199,7 +1210,7 @@ void CodeGen::inst_RV_TT_IV( } #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS - OperandDesc rmOpDesc = genOperandDesc(rmOp); + OperandDesc rmOpDesc = genOperandDesc(ins, rmOp); switch (rmOpDesc.GetKind()) { @@ -1244,14 +1255,17 @@ bool CodeGenInterface::IsEmbeddedBroadcastEnabled(instruction ins, GenTree* op) // 1. EVEX enabled. // 2. Embedded broadcast compatible intrinsics // 3. A contained broadcast scalar node + if (!GetEmitter()->UseEvexEncoding()) { return false; } + if (!instIsEmbeddedBroadcastCompatible(ins)) { return false; } + if (!op->isContained() || !op->OperIsHWIntrinsic()) { return false; @@ -1313,19 +1327,19 @@ void CodeGen::inst_RV_RV_TT(instruction ins, { switch (ins) { - case INS_pand: + case INS_pandd: ins = INS_vpandq; break; - case INS_pandn: + case INS_pandnd: ins = INS_vpandnq; break; - case INS_por: + case INS_pord: ins = INS_vporq; break; - case INS_pxor: + case INS_pxord: ins = INS_vpxorq; break; @@ -1336,7 +1350,7 @@ void CodeGen::inst_RV_RV_TT(instruction ins, } #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS - OperandDesc op2Desc = genOperandDesc(op2); + OperandDesc op2Desc = genOperandDesc(ins, op2); switch (op2Desc.GetKind()) { @@ -1423,7 +1437,7 @@ void CodeGen::inst_RV_RV_TT_IV(instruction ins, } #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS - OperandDesc op2Desc = genOperandDesc(op2); + OperandDesc op2Desc = genOperandDesc(ins, op2); switch (op2Desc.GetKind()) { @@ -1614,9 +1628,9 @@ bool CodeGen::arm_Valid_Imm_For_Add_SP(target_ssize_t imm) bool CodeGenInterface::validImmForBL(ssize_t addr) { return - // If we are running the altjit for NGEN, then assume we can use the "BL" instruction. - // This matches the usual behavior for NGEN, since we normally do generate "BL". - (!compiler->info.compMatchedVM && compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) || + // If we are running the altjit for AOT, then assume we can use the "BL" instruction. + // This matches the usual behavior for AOT, since we normally do generate "BL". + (!compiler->info.compMatchedVM && compiler->IsAot()) || (compiler->eeGetRelocTypeHint((void*)addr) == IMAGE_REL_BASED_THUMB_BRANCH24); } @@ -1628,7 +1642,7 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) // On arm64, we always assume a call target is in range and generate a 28-bit relative // 'bl' instruction. If this isn't sufficient range, the VM will generate a jump stub when // we call recordRelocation(). See the IMAGE_REL_ARM64_BRANCH26 case in jitinterface.cpp - // (for JIT) or zapinfo.cpp (for NGEN). If we cannot allocate a jump stub, it is fatal. + // (for JIT) or zapinfo.cpp (for AOT). If we cannot allocate a jump stub, it is fatal. return true; } #endif // TARGET_ARM64 @@ -2082,8 +2096,10 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) // float to int assert(genIsValidFloatReg(srcReg)); -#if defined(TARGET_XARCH) - return INS_movd; +#if defined(TARGET_AMD64) + return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_movd32 : INS_movd64; +#elif defined(TARGET_X86) + return INS_movd32; #elif defined(TARGET_ARM64) return INS_mov; #elif defined(TARGET_ARM) @@ -2133,8 +2149,10 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) // int to float assert(genIsValidIntOrFakeReg(srcReg)); -#if defined(TARGET_XARCH) - return INS_movd; +#if defined(TARGET_AMD64) + return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_movd32 : INS_movd64; +#elif defined(TARGET_X86) + return INS_movd32; #elif defined(TARGET_ARM64) return INS_fmov; #elif defined(TARGET_ARM) @@ -2420,6 +2438,11 @@ instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type) // instruction CodeGen::ins_FloatConv(var_types to, var_types from) { + // AVX: Supports following conversions + // srcType = int16/int64 castToType = float + // AVX512: Supports following conversions + // srcType = ulong castToType = double/float + bool isAvx10v2 = false; switch (from) { case TYP_INT: @@ -2471,40 +2494,52 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from) break; case TYP_FLOAT: + if (to == TYP_FLOAT) + { + return ins_Move_Extend(TYP_FLOAT, false); + } + else if (to == TYP_DOUBLE) + { + return INS_cvtss2sd; + } + isAvx10v2 = compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2); + switch (to) { case TYP_INT: - return INS_cvttss2si32; + return isAvx10v2 ? INS_vcvttss2sis32 : INS_cvttss2si32; case TYP_LONG: - return INS_cvttss2si64; - case TYP_FLOAT: - return ins_Move_Extend(TYP_FLOAT, false); - case TYP_DOUBLE: - return INS_cvtss2sd; + return isAvx10v2 ? INS_vcvttss2sis64 : INS_cvttss2si64; case TYP_ULONG: - return INS_vcvttss2usi64; + return isAvx10v2 ? INS_vcvttss2usis64 : INS_vcvttss2usi64; case TYP_UINT: - return INS_vcvttss2usi32; + return isAvx10v2 ? INS_vcvttss2usis32 : INS_vcvttss2usi32; default: unreached(); } break; case TYP_DOUBLE: + if (to == TYP_FLOAT) + { + return INS_cvtsd2ss; + } + else if (to == TYP_DOUBLE) + { + return ins_Move_Extend(TYP_DOUBLE, false); + } + isAvx10v2 = compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2); + switch (to) { case TYP_INT: - return INS_cvttsd2si32; + return isAvx10v2 ? INS_vcvttsd2sis32 : INS_cvttsd2si32; case TYP_LONG: - return INS_cvttsd2si64; - case TYP_FLOAT: - return INS_cvtsd2ss; - case TYP_DOUBLE: - return ins_Move_Extend(TYP_DOUBLE, false); + return isAvx10v2 ? INS_vcvttsd2sis64 : INS_cvttsd2si64; case TYP_ULONG: - return INS_vcvttsd2usi64; + return isAvx10v2 ? INS_vcvttsd2usis64 : INS_vcvttsd2usi64; case TYP_UINT: - return INS_vcvttsd2usi32; + return isAvx10v2 ? INS_vcvttsd2usis32 : INS_vcvttsd2usi32; default: unreached(); } diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index c77f52717748..d36fad39e567 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -188,8 +188,9 @@ enum insFlags : uint64_t INS_FLAGS_Has_Wbit = 1ULL << 29, INS_FLAGS_Has_Sbit = 1ULL << 30, - // instruction input size - // if not input size is set, instruction defaults to using + // instruction input size which is used to determine + // the scalar or broadcast load amount for SIMD instructions + // if this flag is not present, we default to using // the emitAttr for size Input_8Bit = 1ULL << 31, Input_16Bit = 1ULL << 32, @@ -216,18 +217,26 @@ enum insFlags : uint64_t KInstruction = 1ULL << 41, KInstructionWithLBit = 1ULL << 42, - // EVEX feature: embedded broadcast - INS_Flags_EmbeddedBroadcastSupported = 1ULL << 43, + // UNUSED = 1ULL << 43, // APX: REX2 prefix: Encoding_REX2 = 1ULL << 44, // APX: EVEX.ND: - INS_Flags_Has_NDD = 1ULL << 45, - + INS_Flags_Has_NDD = 1ULL << 45, + // APX: EVEX.NF: INS_Flags_Has_NF = 1ULL << 46, + // base kmask size used for a 128-bit vector + // used to determine if we can use embedded masking + KMask_Base1 = 1ULL << 47, + KMask_Base2 = 1ULL << 48, + KMask_Base4 = 1ULL << 49, + KMask_Base8 = 1ULL << 50, + KMask_Base16 = 1ULL << 51, + KMask_BaseMask = (0x1FULL) << 47, + // TODO-Cleanup: Remove this flag and its usage from TARGET_XARCH INS_FLAGS_DONT_CARE = 0x00ULL, }; @@ -276,8 +285,16 @@ enum insOpts: unsigned INS_OPTS_EVEX_nf_MASK = 0x80, // mask for APX-EVEX.nf related features INS_OPTS_EVEX_nf = 1 << 7, // NDD form for legacy instructions + INS_OPTS_EVEX_dfv_byte_offset = 8, // save the bit offset for first dfv flag pos + + INS_OPTS_EVEX_dfv_cf = 1 << 8, + INS_OPTS_EVEX_dfv_zf = 1 << 9, + INS_OPTS_EVEX_dfv_sf = 1 << 10, + INS_OPTS_EVEX_dfv_of = 1 << 11, + + INS_OPTS_EVEX_dfv_MASK = 0xF00, - INS_OPTS_EVEX_NoApxPromotion = 1 << 8, // Do not promote to APX-EVEX + INS_OPTS_EVEX_NoApxPromotion = 1 << 12, // Do not promote to APX-EVEX }; @@ -431,16 +448,16 @@ enum insSvePattern : unsigned enum insSvePrfop : unsigned { SVE_PRFOP_PLDL1KEEP = 0b0000, - SVE_PRFOP_PLDL1STRM = 0b0001, - SVE_PRFOP_PLDL2KEEP = 0b0010, - SVE_PRFOP_PLDL2STRM = 0b0011, - SVE_PRFOP_PLDL3KEEP = 0b0100, - SVE_PRFOP_PLDL3STRM = 0b0101, - SVE_PRFOP_PSTL1KEEP = 0b1000, - SVE_PRFOP_PSTL1STRM = 0b1001, - SVE_PRFOP_PSTL2KEEP = 0b1010, - SVE_PRFOP_PSTL2STRM = 0b1011, - SVE_PRFOP_PSTL3KEEP = 0b1100, + SVE_PRFOP_PLDL1STRM = 0b0001, + SVE_PRFOP_PLDL2KEEP = 0b0010, + SVE_PRFOP_PLDL2STRM = 0b0011, + SVE_PRFOP_PLDL3KEEP = 0b0100, + SVE_PRFOP_PLDL3STRM = 0b0101, + SVE_PRFOP_PSTL1KEEP = 0b1000, + SVE_PRFOP_PSTL1STRM = 0b1001, + SVE_PRFOP_PSTL2KEEP = 0b1010, + SVE_PRFOP_PSTL2STRM = 0b1011, + SVE_PRFOP_PSTL3KEEP = 0b1100, SVE_PRFOP_PSTL3STRM = 0b1101, SVE_PRFOP_CONST6 = 0b0110, diff --git a/src/coreclr/jit/instrsriscv64.h b/src/coreclr/jit/instrsriscv64.h index c61d6ad9103e..62169e5831be 100644 --- a/src/coreclr/jit/instrsriscv64.h +++ b/src/coreclr/jit/instrsriscv64.h @@ -260,6 +260,53 @@ INST(amominu_w, "amominu.w", 0, 0xc000202f) // funct5:11000 INST(amominu_d, "amominu.d", 0, 0xc000302f) // funct5:11000 INST(amomaxu_w, "amomaxu.w", 0, 0xe000202f) // funct5:11100 INST(amomaxu_d, "amomaxu.d", 0, 0xe000302f) // funct5:11100 + +// Zbb (RV32 + RV64) +//// R_R +INST(clz, "clz", 0, 0x60001013) +INST(clzw, "clzw", 0, 0x6000101b) +INST(ctz, "ctz", 0, 0x60101013) +INST(ctzw, "ctzw", 0, 0x6010101b) +INST(cpop, "cpop", 0, 0x60201013) +INST(cpopw, "cpopw", 0, 0x6020101b) +INST(sext_b, "sext.b", 0, 0x60401013) +INST(sext_h, "sext.h", 0, 0x60501013) +INST(zext_h, "zext.h", 0, 0x0800403b) +INST(rev8, "rev8", 0, 0x6b805013) + +//// R_R_R +INST(rol, "rol", 0, 0x60001033) +INST(rolw, "rolw", 0, 0x6000103b) +INST(ror, "ror", 0, 0x60005033) +INST(rorw, "rorw", 0, 0x6000503b) +INST(xnor, "xnor", 0, 0x40004033) +INST(orn, "orn", 0, 0x40006033) +INST(andn, "andn", 0, 0x40007033) +INST(min, "min", 0, 0x0a004033) +INST(minu, "minu", 0, 0x0a005033) +INST(max, "max", 0, 0x0a006033) +INST(maxu, "maxu", 0, 0x0a007033) + +//// R_R_I +INST(rori, "rori", 0, 0x60005013) +INST(roriw, "roriw", 0, 0x6000501b) + +// Zba (RV64 + RV32) + +//// R_R_R +INST(sh1add, "sh1add", 0, 0x20002033) +INST(sh2add, "sh2add", 0, 0x20004033) +INST(sh3add, "sh3add", 0, 0x20006033) + +// Zba (RV64) + +//// R_R_R +INST(add_uw, "add.uw", 0, 0x0800003b) +INST(sh1add_uw, "sh1add_uw", 0, 0x2000203b) +INST(sh2add_uw, "sh2add_uw", 0, 0x2000403b) +INST(sh3add_uw, "sh3add_uw", 0, 0x2000603b) +INST(slli_uw, "slli_uw", 0, 0x0800101b) + // clang-format on /*****************************************************************************/ #undef INST diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index f4c5df821190..a2d9ff191150 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -117,7 +117,7 @@ INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) INST3(xchg, "xchg", IUM_RW, 0x000086, BAD_CODE, 0x000086, INS_TT_NONE, INS_FLAGS_Has_Wbit | Encoding_REX2) -INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NDD | INS_Flags_Has_NF | Encoding_REX2) // id nm um mr mi rm tt flags @@ -125,28 +125,46 @@ INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, // as 2-operand instructions with the target register being implicit // implicit_reg = op1*op2_icon #define INSTMUL INST3 -INSTMUL(imul_AX, "imul", IUM_RD, BAD_CODE, 0x000068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_CX, "imul", IUM_RD, BAD_CODE, 0x000868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_DX, "imul", IUM_RD, BAD_CODE, 0x001068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_BX, "imul", IUM_RD, BAD_CODE, 0x001868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_SP, "imul", IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_BP, "imul", IUM_RD, BAD_CODE, 0x002868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_SI, "imul", IUM_RD, BAD_CODE, 0x003068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_DI, "imul", IUM_RD, BAD_CODE, 0x003868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) +INSTMUL(imul_AX, "imul", IUM_RD, BAD_CODE, 0x000068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_CX, "imul", IUM_RD, BAD_CODE, 0x000868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_DX, "imul", IUM_RD, BAD_CODE, 0x001068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_BX, "imul", IUM_RD, BAD_CODE, 0x001868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_SP, "imul", IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_BP, "imul", IUM_RD, BAD_CODE, 0x002868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_SI, "imul", IUM_RD, BAD_CODE, 0x003068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_DI, "imul", IUM_RD, BAD_CODE, 0x003868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) #ifdef TARGET_AMD64 -INSTMUL(imul_08, "imul", IUM_RD, BAD_CODE, 0x4400000068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_09, "imul", IUM_RD, BAD_CODE, 0x4400000868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_10, "imul", IUM_RD, BAD_CODE, 0x4400001068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_11, "imul", IUM_RD, BAD_CODE, 0x4400001868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_12, "imul", IUM_RD, BAD_CODE, 0x4400002068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_13, "imul", IUM_RD, BAD_CODE, 0x4400002868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_14, "imul", IUM_RD, BAD_CODE, 0x4400003068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) -INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF) - +INSTMUL(imul_08, "imul", IUM_RD, BAD_CODE, 0x4400000068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_09, "imul", IUM_RD, BAD_CODE, 0x4400000868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_10, "imul", IUM_RD, BAD_CODE, 0x4400001068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_11, "imul", IUM_RD, BAD_CODE, 0x4400001868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_12, "imul", IUM_RD, BAD_CODE, 0x4400002068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_13, "imul", IUM_RD, BAD_CODE, 0x4400002868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_14, "imul", IUM_RD, BAD_CODE, 0x4400003068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) + +INSTMUL(imul_16, "imul", IUM_RD, BAD_CODE, 0xD54000000068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_17, "imul", IUM_RD, BAD_CODE, 0xD54000000868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_18, "imul", IUM_RD, BAD_CODE, 0xD54000001068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_19, "imul", IUM_RD, BAD_CODE, 0xD54000001868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_20, "imul", IUM_RD, BAD_CODE, 0xD54000002068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_21, "imul", IUM_RD, BAD_CODE, 0xD54000002868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_22, "imul", IUM_RD, BAD_CODE, 0xD54000003068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_23, "imul", IUM_RD, BAD_CODE, 0xD54000003868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_24, "imul", IUM_RD, BAD_CODE, 0xD54400000068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_25, "imul", IUM_RD, BAD_CODE, 0xD54400000868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_26, "imul", IUM_RD, BAD_CODE, 0xD54400001068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_27, "imul", IUM_RD, BAD_CODE, 0xD54400001868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_28, "imul", IUM_RD, BAD_CODE, 0xD54400002068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_29, "imul", IUM_RD, BAD_CODE, 0xD54400002868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_30, "imul", IUM_RD, BAD_CODE, 0xD54400003068, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) +INSTMUL(imul_31, "imul", IUM_RD, BAD_CODE, 0xD54400003868, BAD_CODE, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF | Encoding_REX2) #endif // TARGET_AMD64 + + // the hex codes in this file represent the instruction encoding as follows: // 0x0000ff00 - modrm byte position // 0x000000ff - last byte of opcode (before modrm) @@ -196,420 +214,443 @@ INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // SSE -INST3(addps, "addps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Add packed singles -INST3(addss, "addss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles -INST3(andnps, "andnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // And-Not packed singles -INST3(andps, "andps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // AND packed singles -INST3(cmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_TT_FULL, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles -INST3(cmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles -INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare singles -INST3(cvtsi2ss32, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single -INST3(cvtsi2ss64, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar single -INST3(cvtss2si, "cvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // cvt scalar single to DWORD/QWORD -INST3(cvttss2si32, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD -INST3(cvttss2si64, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD -INST3(divps, "divps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Divide packed singles -INST3(divss, "divss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles -INST3(maxps, "maxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Return Maximum packed singles -INST3(maxss, "maxss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar single -INST3(minps, "minps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Return Minimum packed singles -INST3(minss, "minss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar single -INST3(movaps, "movaps", IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movhlps, "movhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), INS_TT_NONE, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(movhps, "movhps", IUM_WR, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16), INS_TT_TUPLE2, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movlhps, "movlhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), INS_TT_NONE, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(movlps, "movlps", IUM_WR, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12), INS_TT_TUPLE2, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movmskps, "movmskps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), INS_TT_NONE, REX_WIG | Encoding_VEX) -INST3(movntps, "movntps", IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movss, "movss", IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movups, "movups", IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(mulps, "mulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Multiply packed singles -INST3(mulss, "mulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single -INST3(orps, "orps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Or packed singles -INST3(prefetchnta, "prefetchnta", IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) -INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) -INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) -INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) -INST3(rcpps, "rcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), INS_TT_NONE, REX_WIG | Encoding_VEX) // Reciprocal of packed singles -INST3(rcpss, "rcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single -INST3(rsqrtps, "rsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), INS_TT_NONE, REX_WIG | Encoding_VEX) // Reciprocal Sqrt of packed singles -INST3(rsqrtss, "rsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single -INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) -INST3(sfence, "sfence", IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) -INST3(sqrtps, "sqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Sqrt of packed singles -INST3(sqrtss, "sqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar single -INST3(subps, "subps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Subtract packed singles -INST3(subss, "subss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles -INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare singles -INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) -INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) -INST3(xorps, "xorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // XOR packed singles +INST3(addps, "addps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed singles +INST3(addss, "addss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles +INST3(andnps, "andnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed singles +INST3(andps, "andps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // AND packed singles +INST3(cmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles +INST3(cmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles +INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare singles +INST3(cvtsi2ss32, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single +INST3(cvtsi2ss64, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar single +INST3(cvtss2si32, "cvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt scalar single to DWORD/QWORD +INST3(cvtss2si64, "cvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt scalar single to DWORD/QWORD +INST3(cvttss2si32, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD +INST3(cvttss2si64, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD +INST3(divps, "divps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed singles +INST3(divss, "divss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles +INST3(maxps, "maxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed singles +INST3(maxss, "maxss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar single +INST3(minps, "minps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed singles +INST3(minss, "minss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar single +INST3(movaps, "movaps", IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movhlps, "movhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), INS_TT_NONE, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(movhps, "movhps", IUM_WR, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16), INS_TT_TUPLE2, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movlhps, "movlhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), INS_TT_NONE, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(movlps, "movlps", IUM_WR, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12), INS_TT_TUPLE2, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movmskps, "movmskps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), INS_TT_NONE, REX_WIG | Encoding_VEX) +INST3(movntps, "movntps", IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movss, "movss", IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movups, "movups", IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(mulps, "mulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed singles +INST3(mulss, "mulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single +INST3(orps, "orps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Or packed singles +INST3(prefetchnta, "prefetchnta", IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) +INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) +INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) +INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) +INST3(rcpps, "rcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Reciprocal of packed singles +INST3(rcpss, "rcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single +INST3(rsqrtps, "rsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Reciprocal Sqrt of packed singles +INST3(rsqrtss, "rsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single +INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(sfence, "sfence", IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) +INST3(sqrtps, "sqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Sqrt of packed singles +INST3(sqrtss, "sqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar single +INST3(subps, "subps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed singles +INST3(subss, "subss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles +INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare singles +INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(xorps, "xorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed singles // SSE2 -INST3(addpd, "addpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Add packed doubles -INST3(addsd, "addsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles -INST3(andnpd, "andnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // And-Not packed doubles -INST3(andpd, "andpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // AND packed doubles -INST3(cmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_TT_FULL, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles -INST3(cmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles -INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare doubles -INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed DWORDs to doubles -INST3(cvtdq2ps, "cvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed DWORDs to singles -INST3(cvtpd2dq, "cvtpd2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to DWORDs -INST3(cvtpd2ps, "cvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to singles -INST3(cvtps2dq, "cvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to DWORDs -INST3(cvtps2pd, "cvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to doubles -INST3(cvtsd2si, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // cvt scalar double to DWORD -INST3(cvtsd2ss, "cvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles -INST3(cvtsi2sd32, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double -INST3(cvtsi2sd64, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar double -INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles -INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with trunc packed doubles to DWORDs -INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with trunc packed singles to DWORDs -INST3(cvttsd2si32, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs -INST3(cvttsd2si64, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs -INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Divide packed doubles -INST3(divsd, "divsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles -INST3(lfence, "lfence", IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) -INST3(maskmovdqu, "maskmovdqu", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), INS_TT_NONE, REX_WIG) -INST3(maxpd, "maxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Return Maximum packed doubles -INST3(maxsd, "maxsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar double -INST3(mfence, "mfence", IUM_RD, 0x000FF0AE, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) -INST3(minpd, "minpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Return Minimum packed doubles -INST3(minsd, "minsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double -INST3(movapd, "movapd", IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // Move DWORD/QWORD between xmm regs <-> memory/r32/r64 regs -INST3(movdqa, "movdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movdqu, "movdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movhpd, "movhpd", IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movlpd, "movlpd", IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movmskpd, "movmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_TT_NONE, REX_WIG | Encoding_VEX) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros. -INST3(movntdq, "movntdq", IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movnti, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WX) -INST3(movntpd, "movntpd", IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Move Quadword between memory/mm <-> regs -INST3(movsd_simd, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movupd, "movupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Multiply packed doubles -INST3(mulsd, "mulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles -INST3(orpd, "orpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Or packed doubles -INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Pack (narrow) int to short with saturation -INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation -INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation -INST3(paddb, "paddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers -INST3(paddd, "paddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Add packed double-word (32-bit) integers -INST3(paddq, "paddq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Add packed quad-word (64-bit) integers -INST3(paddsb, "paddsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed byte integers and saturate the results -INST3(paddsw, "paddsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed word integers and saturate the results -INST3(paddusb, "paddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results -INST3(paddusw, "paddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results -INST3(paddw, "paddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers -INST3(pand, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise AND of two xmm regs -INST3(pandn, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise AND NOT of two xmm regs -INST3(pavgb, "pavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers -INST3(pavgw, "pavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers -INST3(pcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality -INST3(pcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_TT_FULL, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality -INST3(pcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality -INST3(pcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than -INST3(pcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_TT_FULL, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than -INST3(pcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than -INST3(pextrw, "pextrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 16-bit value into a r32 with zero extended to 32-bits -INST3(pinsrw, "pinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index -INST3(pmaddwd, "pmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst -INST3(pmaxsw, "pmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words -INST3(pmaxub, "pmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes -INST3(pminsw, "pminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words -INST3(pminub, "pminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes -INST3(pmovmskb, "pmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), INS_TT_NONE, REX_WIG | Encoding_VEX) // Move the MSB bits of all bytes in a xmm reg to an int reg -INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers -INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers -INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result -INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_TT_FULL, Input_32Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed multiply 32-bit unsigned integers and store 64-bit result -INST3(por, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise OR of two xmm regs -INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers -INST3(pshufd, "pshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Packed shuffle of 32-bit integers -INST3(pshufhw, "pshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. -INST3(pshuflw, "pshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. -INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift left logical of 32-bit integers -INST3(pslldq, "pslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes -INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift left logical of 64-bit integers -INST3(psllw, "psllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_TT_FULL_MEM | INS_TT_MEM128, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers -INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift right arithmetic of 32-bit integers -INST3(psraw, "psraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_TT_FULL_MEM | INS_TT_MEM128, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers -INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift right logical of 32-bit integers -INST3(psrldq, "psrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes -INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift right logical of 64-bit integers -INST3(psrlw, "psrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_TT_FULL_MEM | INS_TT_MEM128, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers -INST3(psubb, "psubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers -INST3(psubd, "psubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Subtract packed double-word (32-bit) integers -INST3(psubq, "psubq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // subtract packed quad-word (64-bit) integers -INST3(psubw, "psubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers -INST3(psubsb, "psubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation -INST3(psubsw, "psubsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation -INST3(psubusb, "psubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation -INST3(psubusw, "psubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation -INST3(punpckhbw, "punpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) -INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) -INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed logical (unsigned) widen uint to ulong (hi) -INST3(punpckhwd, "punpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi) -INST3(punpcklbw, "punpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo) -INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) -INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed logical (unsigned) widen uint to ulong (lo) -INST3(punpcklwd, "punpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo) -INST3(pxor, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise XOR of two xmm regs -INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) -INST3(sqrtpd, "sqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Sqrt of packed doubles -INST3(sqrtsd, "sqrtsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar double -INST3(subpd, "subpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Subtract packed doubles -INST3(subsd, "subsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles -INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare doubles -INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed logical (unsigned) widen ubyte to ushort (hi) -INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed logical (unsigned) widen ubyte to ushort (hi) -INST3(xorpd, "xorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // XOR packed doubles +INST3(addpd, "addpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed doubles +INST3(addsd, "addsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles +INST3(andnpd, "andnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed doubles +INST3(andpd, "andpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // AND packed doubles +INST3(cmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles +INST3(cmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles +INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare doubles +INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed DWORDs to doubles +INST3(cvtdq2ps, "cvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed DWORDs to singles +INST3(cvtpd2dq, "cvtpd2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed doubles to DWORDs +INST3(cvtpd2ps, "cvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed doubles to singles +INST3(cvtps2dq, "cvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed singles to DWORDs +INST3(cvtps2pd, "cvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_TT_HALF, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed singles to doubles +INST3(cvtsd2si32, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt scalar double to DWORD +INST3(cvtsd2si64, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt scalar double to DWORD +INST3(cvtsd2ss, "cvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles +INST3(cvtsi2sd32, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double +INST3(cvtsi2sd64, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar double +INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles +INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed doubles to DWORDs +INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed singles to DWORDs +INST3(cvttsd2si32, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs +INST3(cvttsd2si64, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs +INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed doubles +INST3(divsd, "divsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles +INST3(lfence, "lfence", IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) +INST3(maskmovdqu, "maskmovdqu", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), INS_TT_NONE, REX_WIG) +INST3(maxpd, "maxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed doubles +INST3(maxsd, "maxsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar double +INST3(mfence, "mfence", IUM_RD, 0x000FF0AE, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) +INST3(minpd, "minpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed doubles +INST3(minsd, "minsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double +INST3(movapd, "movapd", IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movd32, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move DWORD between xmm regs <-> memory/r32 regs +INST3(movd64, "movq", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move QWORD between xmm regs <-> memory/r64 regs +INST3(movdqa32, "movdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) +INST3(movdqu32, "movdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) +INST3(movhpd, "movhpd", IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movlpd, "movlpd", IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movmskpd, "movmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_TT_NONE, REX_WIG | Encoding_VEX) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros. +INST3(movntdq, "movntdq", IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movnti32, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_REX2) +INST3(movnti64, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_REX2) +INST3(movntpd, "movntpd", IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move Quadword between memory/mm <-> regs +INST3(movsd_simd, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movupd, "movupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed doubles +INST3(mulsd, "mulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles +INST3(orpd, "orpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Or packed doubles +INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation +INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation +INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation +INST3(paddb, "paddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers +INST3(paddd, "paddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed double-word (32-bit) integers +INST3(paddq, "paddq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed quad-word (64-bit) integers +INST3(paddsb, "paddsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed byte integers and saturate the results +INST3(paddsw, "paddsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed word integers and saturate the results +INST3(paddusb, "paddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results +INST3(paddusw, "paddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results +INST3(paddw, "paddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers +INST3(pandd, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs +INST3(pandnd, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs +INST3(pavgb, "pavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers +INST3(pavgw, "pavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers +INST3(pcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality +INST3(pcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality +INST3(pcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality +INST3(pcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than +INST3(pcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than +INST3(pcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than +INST3(pextrw, "pextrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 16-bit value into a r32 with zero extended to 32-bits +INST3(pinsrw, "pinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index +INST3(pmaddwd, "pmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), INS_TT_FULL_MEM, KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst +INST3(pmaxsw, "pmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words +INST3(pmaxub, "pmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes +INST3(pminsw, "pminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words +INST3(pminub, "pminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes +INST3(pmovmskb, "pmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), INS_TT_NONE, REX_WIG | Encoding_VEX) // Move the MSB bits of all bytes in a xmm reg to an int reg +INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers +INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers +INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result +INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result +INST3(pord, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs +INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers +INST3(pshufd, "pshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed shuffle of 32-bit integers +INST3(pshufhw, "pshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. +INST3(pshuflw, "pshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. +INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers +INST3(pslldq, "pslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes +INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers +INST3(psllw, "psllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers +INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 32-bit integers +INST3(psraw, "psraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers +INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 32-bit integers +INST3(psrldq, "psrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes +INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 64-bit integers +INST3(psrlw, "psrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers +INST3(psubb, "psubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers +INST3(psubd, "psubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed double-word (32-bit) integers +INST3(psubq, "psubq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // subtract packed quad-word (64-bit) integers +INST3(psubw, "psubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers +INST3(psubsb, "psubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation +INST3(psubsw, "psubsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation +INST3(psubusb, "psubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation +INST3(psubusw, "psubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation +INST3(punpckhbw, "punpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) +INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi) +INST3(punpckhwd, "punpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi) +INST3(punpcklbw, "punpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo) +INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (lo) +INST3(punpcklwd, "punpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo) +INST3(pxord, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs +INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(sqrtpd, "sqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Sqrt of packed doubles +INST3(sqrtsd, "sqrtsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar double +INST3(subpd, "subpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed doubles +INST3(subsd, "subsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles +INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare doubles +INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) +INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) +INST3(xorpd, "xorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed doubles // SSE3 -INST3(addsubpd, "addsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles -INST3(addsubps, "addsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles -INST3(haddpd, "haddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles -INST3(haddps, "haddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats -INST3(hsubpd, "hsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles -INST3(hsubps, "hsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats -INST3(lddqu, "lddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), INS_TT_NONE, REX_WIG | Encoding_VEX) // Load Unaligned integer -INST3(movddup, "movddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), INS_TT_MOVDDUP, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate Double FP Values -INST3(movshdup, "movshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate odd-indexed Single FP Values -INST3(movsldup, "movsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate even-indexed Single FP Values +INST3(addsubpd, "addsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles +INST3(addsubps, "addsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles +INST3(haddpd, "haddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles +INST3(haddps, "haddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats +INST3(hsubpd, "hsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles +INST3(hsubps, "hsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats +INST3(lddqu, "lddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Load Unaligned integer +INST3(movddup, "movddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), INS_TT_MOVDDUP, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate Double FP Values +INST3(movshdup, "movshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate odd-indexed Single FP Values +INST3(movsldup, "movsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate even-indexed Single FP Values // SSSE3 -INST3(pabsb, "pabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of bytes -INST3(pabsd, "pabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Packed absolute value of 32-bit integers -INST3(pabsw, "pabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 16-bit integers -INST3(palignr, "palignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right -INST3(phaddd, "phaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add -INST3(phaddsw, "phaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation -INST3(phaddw, "phaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers -INST3(phsubd, "phsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers -INST3(phsubsw, "phsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation -INST3(phsubw, "phsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers -INST3(pmaddubsw, "pmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes -INST3(pmulhrsw, "pmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale -INST3(pshufb, "pshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes -INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), INS_TT_NONE, Input_8Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN -INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN -INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN - -// AESNI, PCLMULQDQ, & GFNI -INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow -INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow -INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow -INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow -INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation -INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist -INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords -INST3(gf2p8affineinvqb, "gf2p8affineinvqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCF), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Galois Field Affine Transformation Inverse -INST3(gf2p8affineqb, "gf2p8affineqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCE), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Galois Field Affine Transformation -INST3(gf2p8mulb, "gf2p8mulb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xCF), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Multiply Bytes +INST3(pabsb, "pabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of bytes +INST3(pabsd, "pabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 32-bit integers +INST3(pabsw, "pabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 16-bit integers +INST3(palignr, "palignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right +INST3(phaddd, "phaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add +INST3(phaddsw, "phaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation +INST3(phaddw, "phaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers +INST3(phsubd, "phsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers +INST3(phsubsw, "phsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation +INST3(phsubw, "phsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers +INST3(pmaddubsw, "pmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes +INST3(pmulhrsw, "pmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale +INST3(pshufb, "pshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes +INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN +INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN +INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN + +// AESNI +INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow +INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow +INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow +INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow +INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation +INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist + +// PCLMULQDQ +INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, KMask_Base1 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords + +// SHA +INST3(sha1msg1, "sha1msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC9), INS_TT_FULL, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords +INST3(sha1msg2, "sha1msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCA), INS_TT_FULL, REX_WIG) // Perform a Final Calculation for the Next Four SHA1 Message Dwords +INST3(sha1nexte, "sha1nexte", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC8), INS_TT_FULL, REX_WIG) // Calculate SHA1 State Variable E After Four Rounds +INST3(sha1rnds4, "sha1rnds4", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0xCC), INS_TT_FULL, REX_WIG) // Perform Four Rounds of SHA1 Operation +INST3(sha256msg1, "sha256msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCC), INS_TT_FULL, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA256 Message Dwords +INST3(sha256msg2, "sha256msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCD), INS_TT_FULL, REX_WIG) // Perform a Final Calculation for the Next Four SHA256 Message Dwords +INST3(sha256rnds2, "sha256rnds2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCB), INS_TT_FULL, REX_WIG) // Perform Two Rounds of SHA256 Operation + +// GFNI +INST3(gf2p8affineinvqb, "gf2p8affineinvqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCF), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Affine Transformation Inverse +INST3(gf2p8affineqb, "gf2p8affineqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCE), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Affine Transformation +INST3(gf2p8mulb, "gf2p8mulb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xCF), INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Multiply Bytes // SSE4.1 -INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values -INST3(blendps, "blendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values -INST3(blendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_NONE, Input_64Bit | REX_W0) // Variable Blend Packed Doubles -INST3(blendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_NONE, Input_32Bit | REX_W0) // Variable Blend Packed Singles -INST3(dppd, "dppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs -INST3(dpps, "dpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs -INST3(extractps, "extractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Extract Packed Floating-Point Values -INST3(insertps, "insertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value -INST3(movntdqa, "movntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Load Double Quadword Non-Temporal Aligned Hint -INST3(mpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_TT_NONE, Input_8Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference -INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Pack (narrow) int to unsigned short with saturation -INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_TT_NONE, Input_8Bit | REX_W0) // Variable Blend Packed Bytes -INST3(pblendw, "pblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words -INST3(pcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_TT_FULL, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality -INST3(pextrb, "pextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Byte -INST3(pextrd, "pextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Dword -INST3(pextrq, "pextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // Extract Qword -INST3(pextrw_sse41, "pextrw", IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Word -INST3(phminposuw, "phminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX) // Packed Horizontal Word Minimum -INST3(pinsrb, "pinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte -INST3(pinsrd, "pinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword -INST3(pinsrq, "pinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword -INST3(pmaxsb, "pmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes -INST3(pmaxsd, "pmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed maximum 32-bit signed integers -INST3(pmaxud, "pmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed maximum 32-bit unsigned integers -INST3(pmaxuw, "pmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers -INST3(pminsb, "pminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes -INST3(pminsd, "pminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed minimum 32-bit signed integers -INST3(pminud, "pminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed minimum 32-bit unsigned integers -INST3(pminuw, "pminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers -INST3(pmovsxbd, "pmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), INS_TT_QUARTER_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to int -INST3(pmovsxbq, "pmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), INS_TT_EIGHTH_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to long -INST3(pmovsxbw, "pmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), INS_TT_HALF_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to short -INST3(pmovsxdq, "pmovsxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), INS_TT_HALF_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed sign extend int to long -INST3(pmovsxwd, "pmovsxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), INS_TT_HALF_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to int -INST3(pmovsxwq, "pmovsxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), INS_TT_QUARTER_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to long -INST3(pmovzxbd, "pmovzxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), INS_TT_QUARTER_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to intg -INST3(pmovzxbq, "pmovzxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), INS_TT_EIGHTH_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to lon -INST3(pmovzxbw, "pmovzxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), INS_TT_HALF_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to short -INST3(pmovzxdq, "pmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), INS_TT_HALF_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed zero extend int to long -INST3(pmovzxwd, "pmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), INS_TT_HALF_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to int -INST3(pmovzxwq, "pmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), INS_TT_QUARTER_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to long -INST3(pmuldq, "pmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_TT_FULL, Input_32Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed multiply 32-bit signed integers and store 64-bit result -INST3(pmulld, "pmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result -INST3(ptest, "ptest", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x17), INS_TT_NONE, REX_WIG | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed logical compare -INST3(roundpd, "roundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Round packed double precision floating-point values -INST3(roundps, "roundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Round packed single precision floating-point values -INST3(roundsd, "roundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double precision floating-point values -INST3(roundss, "roundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single precision floating-point values +INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values +INST3(blendps, "blendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values +INST3(blendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Doubles +INST3(blendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Singles +INST3(dppd, "dppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs +INST3(dpps, "dpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs +INST3(extractps, "extractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Extract Packed Floating-Point Values +INST3(insertps, "insertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value +INST3(movntdqa, "movntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Load Double Quadword Non-Temporal Aligned Hint +INST3(mpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference +INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation +INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Bytes +INST3(pblendw, "pblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words +INST3(pcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality +INST3(pextrb, "pextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Byte +INST3(pextrd, "pextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Dword +INST3(pextrq, "pextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // Extract Qword +INST3(pextrw_sse41, "pextrw", IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Word +INST3(phminposuw, "phminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Packed Horizontal Word Minimum +INST3(pinsrb, "pinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte +INST3(pinsrd, "pinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword +INST3(pinsrq, "pinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword +INST3(pmaxsb, "pmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes +INST3(pmaxsd, "pmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers +INST3(pmaxud, "pmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers +INST3(pmaxuw, "pmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers +INST3(pminsb, "pminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes +INST3(pminsd, "pminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers +INST3(pminud, "pminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers +INST3(pminuw, "pminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers +INST3(pmovsxbd, "pmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to int +INST3(pmovsxbq, "pmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to long +INST3(pmovsxbw, "pmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to short +INST3(pmovsxdq, "pmovsxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed sign extend int to long +INST3(pmovsxwd, "pmovsxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to int +INST3(pmovsxwq, "pmovsxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to long +INST3(pmovzxbd, "pmovzxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to intg +INST3(pmovzxbq, "pmovzxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to lon +INST3(pmovzxbw, "pmovzxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to short +INST3(pmovzxdq, "pmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed zero extend int to long +INST3(pmovzxwd, "pmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to int +INST3(pmovzxwq, "pmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to long +INST3(pmuldq, "pmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result +INST3(pmulld, "pmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result +INST3(ptest, "ptest", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x17), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed logical compare +INST3(roundpd, "roundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Round packed double precision floating-point values +INST3(roundps, "roundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Round packed single precision floating-point values +INST3(roundsd, "roundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double precision floating-point values +INST3(roundss, "roundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single precision floating-point values // SSE4.2 -INST3(pcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_TT_FULL, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality +INST3(pcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality INST3(LAST_SSE_INSTRUCTION, "LAST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // AVX -INST3(vblendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles -INST3(vblendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles -INST3(vbroadcastf128, "broadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed float values read from memory to entire ymm register -INST3(vbroadcastsd, "broadcastsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register -INST3(vbroadcastss, "broadcastss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register -INST3(vextractf128, "extractf128", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed floating point values -INST3(vinsertf128, "insertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values -INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_TT_NONE, Input_64Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores -INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores -INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_TT_NONE, Input_8Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes -INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_TT_NONE, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values -INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values -INST3(vpermilpdvar, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values -INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values -INST3(vpermilpsvar, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values -INST3(vtestpd, "testpd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_TT_NONE, Input_64Bit | REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test -INST3(vtestps, "testps", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0E), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test -INST3(vzeroupper, "zeroupper", IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG | Encoding_VEX) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) +INST3(vblendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles +INST3(vblendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles +INST3(vbroadcastf32x4, "broadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed float values read from memory to entire ymm register +INST3(vbroadcastsd, "broadcastsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register +INST3(vbroadcastss, "broadcastss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register +INST3(vextractf32x4, "extractf128", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed floating point values +INST3(vinsertf32x4, "insertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values +INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores +INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores +INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes +INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values +INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values +INST3(vpermilpdvar, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values +INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values +INST3(vpermilpsvar, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values +INST3(vtestpd, "testpd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test +INST3(vtestps, "testps", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0E), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test +INST3(vzeroupper, "zeroupper", IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG | Encoding_VEX) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) // AVX2 -INST3(vbroadcasti128, "broadcasti128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed integer values read from memory to entire ymm register -INST3(vextracti128, "extracti128", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed integer values -INST3(vgatherdpd, "gatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_TT_NONE, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices -INST3(vgatherdps, "gatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices -INST3(vgatherqpd, "gatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_TT_NONE, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices -INST3(vgatherqps, "gatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices -INST3(vinserti128, "inserti128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed integer values -INST3(vpblendd, "pblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs -INST3(vpbroadcastb, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int8 value from reg/memory to entire ymm register -INST3(vpbroadcastd, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int32 value from reg/memory to entire ymm register -INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast int64 value from reg/memory to entire ymm register -INST3(vpbroadcastw, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int16 value from reg/memory to entire ymm register -INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register -INST3(vpermd, "permd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute Packed Doublewords Elements -INST3(vpermpd, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute Double-Precision Floating-Point Values -INST3(vpermps, "permps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute Single-Precision Floating-Point Elements -INST3(vpermq, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute 64-bit of input register -INST3(vpgatherdd, "pgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword -INST3(vpgatherdq, "pgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices -INST3(vpgatherqd, "pgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword -INST3(vpgatherqq, "pgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices -INST3(vpmaskmovd, "pmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores -INST3(vpmaskmovq, "pmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_TT_NONE, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores -INST3(vpsllvd, "psllvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Variable Bit Shift Left Logical -INST3(vpsllvq, "psllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Variable Bit Shift Left Logical -INST3(vpsravd, "psravd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Variable Bit Shift Right Arithmetic -INST3(vpsrlvd, "psrlvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Variable Bit Shift Right Logical -INST3(vpsrlvq, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Variable Bit Shift Right Logical +INST3(vbroadcasti32x4, "broadcasti128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed integer values read from memory to entire ymm register +INST3(vextracti32x4, "extracti128", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed integer values +INST3(vgatherdpd, "gatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices +INST3(vgatherdps, "gatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices +INST3(vgatherqpd, "gatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices +INST3(vgatherqps, "gatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices +INST3(vinserti32x4, "inserti128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed integer values +INST3(vpblendd, "pblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs +INST3(vpbroadcastb, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), INS_TT_TUPLE1_SCALAR, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int8 value from reg/memory to entire ymm register +INST3(vpbroadcastd, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int32 value from reg/memory to entire ymm register +INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast int64 value from reg/memory to entire ymm register +INST3(vpbroadcastw, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int16 value from reg/memory to entire ymm register +INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register +INST3(vpermd, "permd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Packed Doublewords Elements +INST3(vpermpd, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX) // Permute Double-Precision Floating-Point Values +INST3(vpermps, "permps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Single-Precision Floating-Point Elements +INST3(vpermq, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX) // Permute 64-bit of input register +INST3(vpgatherdd, "pgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword +INST3(vpgatherdq, "pgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices +INST3(vpgatherqd, "pgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword +INST3(vpgatherqq, "pgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices +INST3(vpmaskmovd, "pmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores +INST3(vpmaskmovq, "pmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_TT_FULL_MEM, REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores +INST3(vpsllvd, "psllvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical +INST3(vpsllvq, "psllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical +INST3(vpsravd, "psravd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic +INST3(vpsrlvd, "psrlvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical +INST3(vpsrlvq, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical + +// F16C +INST3(vcvtph2ps, "cvtph2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x13), INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Convert Packed FP16 Values to Single Precision Floating-Point Values +INST3(vcvtps2ph, "cvtps2ph", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1D), INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Convert Single Precision FP Value to 16-bit FP Value INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // id nm um mr mi rm flags -INST3(vfmadd132pd, "fmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values -INST3(vfmadd213pd, "fmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmadd231pd, "fmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmadd132ps, "fmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values -INST3(vfmadd213ps, "fmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmadd231ps, "fmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmadd132sd, "fmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values -INST3(vfmadd213sd, "fmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd231sd, "fmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd132ss, "fmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values -INST3(vfmadd213ss, "fmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd231ss, "fmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values -INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values -INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values -INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values -INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsub132pd, "fmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values -INST3(vfmsub213pd, "fmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsub231pd, "fmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsub132ps, "fmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values -INST3(vfmsub213ps, "fmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsub231ps, "fmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfmsub132sd, "fmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values -INST3(vfmsub213sd, "fmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub231sd, "fmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub132ss, "fmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values -INST3(vfmsub213ss, "fmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub231ss, "fmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd132pd, "fnmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values -INST3(vfnmadd213pd, "fnmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfnmadd231pd, "fnmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfnmadd132ps, "fnmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values -INST3(vfnmadd213ps, "fnmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfnmadd231ps, "fnmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfnmadd132sd, "fnmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values -INST3(vfnmadd213sd, "fnmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd231sd, "fnmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd132ss, "fnmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values -INST3(vfnmadd213ss, "fnmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd231ss, "fnmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub132pd, "fnmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values -INST3(vfnmsub213pd, "fnmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfnmsub231pd, "fnmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfnmsub132ps, "fnmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values -INST3(vfnmsub213ps, "fnmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfnmsub231ps, "fnmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // -INST3(vfnmsub132sd, "fnmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values -INST3(vfnmsub213sd, "fnmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub231sd, "fnmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub132ss, "fnmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values -INST3(vfnmsub213ss, "fnmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub231ss, "fnmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132pd, "fmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values +INST3(vfmadd213pd, "fmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd231pd, "fmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132ps, "fmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values +INST3(vfmadd213ps, "fmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd231ps, "fmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132sd, "fmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values +INST3(vfmadd213sd, "fmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd231sd, "fmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132ss, "fmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values +INST3(vfmadd213ss, "fmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd231ss, "fmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values +INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values +INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values +INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values +INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub132pd, "fmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values +INST3(vfmsub213pd, "fmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub231pd, "fmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub132ps, "fmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values +INST3(vfmsub213ps, "fmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub231ps, "fmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub132sd, "fmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values +INST3(vfmsub213sd, "fmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub231sd, "fmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub132ss, "fmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values +INST3(vfmsub213ss, "fmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub231ss, "fmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132pd, "fnmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values +INST3(vfnmadd213pd, "fnmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd231pd, "fnmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132ps, "fnmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values +INST3(vfnmadd213ps, "fnmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd231ps, "fnmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132sd, "fnmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values +INST3(vfnmadd213sd, "fnmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd231sd, "fnmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132ss, "fnmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values +INST3(vfnmadd213ss, "fnmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd231ss, "fnmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132pd, "fnmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values +INST3(vfnmsub213pd, "fnmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub231pd, "fnmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132ps, "fnmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values +INST3(vfnmsub213ps, "fnmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub231ps, "fnmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132sd, "fnmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values +INST3(vfnmsub213sd, "fnmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub231sd, "fnmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132ss, "fnmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values +INST3(vfnmsub213ss, "fnmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub231ss, "fnmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVXVNNI_INSTRUCTION, "FIRST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) -INST3(vpdpbusd, "pdpbusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes -INST3(vpdpwssd, "pdpwssd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x52), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers -INST3(vpdpbusds, "pdpbusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation -INST3(vpdpwssds, "pdpwssds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x53), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation +INST3(vpdpbusd, "pdpbusd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x50), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes +INST3(vpdpbusds, "pdpbusds", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x51), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation +INST3(vpdpwssd, "pdpwssd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x52), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers +INST3(vpdpwssds, "pdpwssds", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x53), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) +INST3(FIRST_AVXIFMA_INSTRUCTION, "FIRST_AVXIFMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) +INST3(vpmadd52huq, "pmadd52huq", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB5), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply of Unsigned 52-Bit Unsigned Integers and Add High 52-Bit Products to 64-Bit Accumulators +INST3(vpmadd52luq, "pmadd52luq", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB4), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply of Unsigned 52-Bit Integers and Add the Low 52-Bit Products to Qword Accumulators +INST3(LAST_AVXIFMA_INSTRUCTION, "LAST_AVXIFMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // BMI1 -INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_Has_NF) // Logical AND NOT -INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_Has_NF) // Bit Field Extract -INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Extract Lowest Set Isolated Bit -INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Resets_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Get Mask Up to Lowest Set Bit -INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Reset Lowest Set Bit +INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_Has_NF) // Logical AND NOT +INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_Has_NF) // Bit Field Extract +INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Extract Lowest Set Isolated Bit +INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Resets_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Get Mask Up to Lowest Set Bit +INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Reset Lowest Set Bit // BMI2 -INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF) // Zero High Bits Starting with Specified Bit Position -INST3(mulx, "mulx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags -INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit -INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract -INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_TT_NONE, REX_WX | Encoding_VEX) -#ifdef TARGET_AMD64 -INST3(sarx, "sarx", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Arithmetic Right Without Affecting Flags -INST3(shlx, "shlx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Logical Left Without Affecting Flags -INST3(shrx, "shrx", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Logical Right Without Affecting Flags -#endif +INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF) // Zero High Bits Starting with Specified Bit Position +INST3(mulx, "mulx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags +INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit +INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract +INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX) +INST3(sarx, "sarx", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Arithmetic Right Without Affecting Flags +INST3(shlx, "shlx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Logical Left Without Affecting Flags +INST3(shrx, "shrx", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Logical Right Without Affecting Flags INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) @@ -618,345 +659,565 @@ INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, INST3(FIRST_AVX512_INSTRUCTION, "FIRST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // AVX512F -INST3(kandw, "kandw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks -INST3(kandnw, "kandnw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks -INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(knotw, "knotw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register -INST3(korw, "korw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks -INST3(kortestw, "kortestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kshiftlw, "kshiftlw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift left mask registers -INST3(kshiftrw, "kshiftrw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(kunpckbw, "kunpckbw", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x4B), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers -INST3(kxnorw, "kxnorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XNOR masks -INST3(kxorw, "kxorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks -INST3(valignd, "alignd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Align doubleword vectors -INST3(valignq, "alignq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Align quadword vectors -INST3(vblendmpd, "blendmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Blend Float64 vectors using an OpMask control -INST3(vblendmps, "blendmps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Blend Float32 vectors using an OpMask control -INST3(vpblendmq, "pblendmq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Blend Int32 vectors using an OpMask control -INST3(vpblendmb, "pblendmb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int64 vectors using an OpMask control -INST3(vbroadcastf64x2, "broadcastf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register -INST3(vbroadcasti64x2, "broadcasti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vbroadcastf64x4, "broadcastf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register -INST3(vbroadcasti64x4, "broadcasti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vcmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // compare packed singles -INST3(vcmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles -INST3(vcmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // compare packed doubles -INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles -INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to unsigned DWORDs -INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to unsigned DWORDs -INST3(vcvtsd2usi, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD -INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD -INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed doubles to unsigned DWORDs -INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed singles to unsigned DWORDs -INST3(vcvttsd2usi32, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD -INST3(vcvttsd2usi64, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned QWORD -INST3(vcvttss2usi32, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD -INST3(vcvttss2usi64, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD -INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed unsigned DWORDs to doubles -INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed unsigned DWORDs to singles -INST3(vcvtusi2sd32, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double -INST3(vcvtusi2sd64, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to double -INST3(vcvtusi2ss32, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to single -INST3(vcvtusi2ss64, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to single -INST3(vextractf64x4, "extractf64x4", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values -INST3(vextracti64x4, "extracti64x4", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values -INST3(vfixupimmpd, "fixupimmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x54), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fixup special packed double-precision floating-point values -INST3(vfixupimmps, "fixupimmps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x54), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fixup special packed single-precision floating-point values -INST3(vfixupimmsd, "fixupimmsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x55), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar double-precision floating-point value -INST3(vfixupimmss, "fixupimmss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x55), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar single-precision floating-point value -INST3(vgetexppd, "getexppd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Extract exponents of packed double-precision floating-point values -INST3(vgetexpps, "getexpps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Extract exponents of packed single-precision floating-point values -INST3(vgetexpsd, "getexpsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar double-precision floating-point value -INST3(vgetexpss, "getexpss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar single-precision floating-point value -INST3(vgetmantpd, "getmantpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Extract mantissas of packed double-precision floating-point values -INST3(vgetmantps, "getmantps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Extract mantissas of packed single-precision floating-point values -INST3(vgetmantsd, "getmantsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar double-precision floating-point value -INST3(vgetmantss, "getmantss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar single-precision floating-point value -INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) -INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) -INST3(vpabsq, "pabsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Packed absolute value of 64-bit integers -INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise AND of two xmm regs -INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise AND NOT of two xmm regs -INST3(vpbroadcastd_gpr, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast int32 value from gpr to entire register -INST3(vpbroadcastq_gpr, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast int64 value from gpr to entire register -INST3(vpcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed compare 32-bit integers for equality -INST3(vpcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed compare 32-bit signed integers for greater than -INST3(vpcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed compare 64-bit integers for equality -INST3(vpcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed compare 64-bit integers for equality -INST3(vpermq_reg, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute 64-bit of input register -INST3(vpermpd_reg, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute 64-bit of input register -INST3(vpermi2d, "permi2d", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x76), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2pd, "permi2pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x77), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2ps, "permi2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x77), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2q, "permi2q", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x76), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting the Index -INST3(vpermt2d, "permt2d", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2pd, "permt2pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2ps, "permt2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7F), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2q, "permt2q", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting one Table -INST3(vpmaxsq, "pmaxsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed maximum 64-bit signed integers -INST3(vpmaxuq, "pmaxuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed maximum 64-bit unsigned integers -INST3(vpminsq, "pminsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed minimum 64-bit signed integers -INST3(vpminuq, "pminuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed minimum 64-bit unsigned integers -INST3(vpmovdb, "pmovdb", IUM_WR, PSSE38(0xF3, 0x31), BAD_CODE, PSSE38(0xF3, 0x31), INS_TT_QUARTER_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovdw, "pmovdw", IUM_WR, PSSE38(0xF3, 0x33), BAD_CODE, PSSE38(0xF3, 0x33), INS_TT_HALF_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovqb, "pmovqb", IUM_WR, PSSE38(0xF3, 0x32), BAD_CODE, PSSE38(0xF3, 0x32), INS_TT_EIGHTH_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovqd, "pmovqd", IUM_WR, PSSE38(0xF3, 0x35), BAD_CODE, PSSE38(0xF3, 0x35), INS_TT_HALF_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovqw, "pmovqw", IUM_WR, PSSE38(0xF3, 0x34), BAD_CODE, PSSE38(0xF3, 0x34), INS_TT_QUARTER_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovsdb, "pmovsdb", IUM_WR, PSSE38(0xF3, 0x21), BAD_CODE, PSSE38(0xF3, 0x21), INS_TT_QUARTER_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovsdw, "pmovsdw", IUM_WR, PSSE38(0xF3, 0x23), BAD_CODE, PSSE38(0xF3, 0x23), INS_TT_HALF_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovsqb, "pmovsqb", IUM_WR, PSSE38(0xF3, 0x22), BAD_CODE, PSSE38(0xF3, 0x22), INS_TT_EIGHTH_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovsqd, "pmovsqd", IUM_WR, PSSE38(0xF3, 0x25), BAD_CODE, PSSE38(0xF3, 0x25), INS_TT_HALF_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovsqw, "pmovsqw", IUM_WR, PSSE38(0xF3, 0x24), BAD_CODE, PSSE38(0xF3, 0x24), INS_TT_QUARTER_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovusdb, "pmovusdb", IUM_WR, PSSE38(0xF3, 0x11), BAD_CODE, PSSE38(0xF3, 0x11), INS_TT_QUARTER_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovusdw, "pmovusdw", IUM_WR, PSSE38(0xF3, 0x13), BAD_CODE, PSSE38(0xF3, 0x13), INS_TT_HALF_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovusqb, "pmovusqb", IUM_WR, PSSE38(0xF3, 0x12), BAD_CODE, PSSE38(0xF3, 0x12), INS_TT_EIGHTH_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovusqd, "pmovusqd", IUM_WR, PSSE38(0xF3, 0x15), BAD_CODE, PSSE38(0xF3, 0x15), INS_TT_HALF_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovusqw, "pmovusqw", IUM_WR, PSSE38(0xF3, 0x14), BAD_CODE, PSSE38(0xF3, 0x14), INS_TT_QUARTER_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) -INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise OR of two xmm regs -INST3(vprold, "prold", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate left -INST3(vprolq, "prolq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate left -INST3(vprolvd, "prolvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate left -INST3(vprolvq, "prolvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate left -INST3(vprord, "prord", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate right -INST3(vprorq, "prorq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate right -INST3(vprorvd, "prorvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate right -INST3(vprorvq, "prorvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate right -INST3(vpsraq, "psraq", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift right arithmetic of 64-bit integers -INST3(vpsravq, "psravq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Variable Bit Shift Right Arithmetic -INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bitwise Ternary Logic -INST3(vpternlogq, "pternlogq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bitwise Ternary Logic -INST3(vptestmd, "ptestmd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Logical AND and set mask -INST3(vptestmq, "ptestmq", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Logical AND and set mask -INST3(vptestnmd, "ptestnmd", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Logical NAND and set mask -INST3(vptestnmq, "ptestnmq", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Logical NAND and set mask -INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise XOR of two xmm regs -INST3(vrangepd, "rangepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Range restriction calculation from a pair of packed double-precision floating-point values -INST3(vrangeps, "rangeps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Range restriction calculation from a pair of packed single-precision floating-point values -INST3(vrangesd, "rangesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar double-precision floating-point value -INST3(vrangess, "rangess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar single-precision floating-point value -INST3(vrcp14pd, "rcp14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Compute approximate reciprocals of packed double-precision floating-point values -INST3(vrcp14ps, "rcp14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Compute approximate reciprocals of packed single-precision floating-point values -INST3(vrcp14sd, "rcp14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar double-precision floating-point value -INST3(vrcp14ss, "rcp14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar single-precision floating-point value -INST3(vreducepd, "reducepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Perform a reduction transformation on packed double-precision floating-point values -INST3(vreduceps, "reduceps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Perform a reduction transformation on packed single-precision floating-point values -INST3(vreducesd, "reducesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar double-precision floating-point value -INST3(vreducess, "reducess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar single-precision floating-point value -INST3(vrndscalepd, "rndscalepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Round packed double-precision floating-point values to include a given number of fraction bits -INST3(vrndscaleps, "rndscaleps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Round packed single-precision floating-point values to include a given number of fraction bits -INST3(vrndscalesd, "rndscalesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double-precision floating-point value to include a given number of fraction bits -INST3(vrndscaless, "rndscaless", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single-precision floating-point value to include a given number of fraction bits -INST3(vrsqrt14pd, "rsqrt14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Compute approximate reciprocals of square roots of packed double-precision floating-point values -INST3(vrsqrt14ps, "rsqrt14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Compute approximate reciprocals of square roots of packed single-precision floating-point values -INST3(vrsqrt14sd, "rsqrt14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar double-precision floating-point value -INST3(vrsqrt14ss, "rsqrt14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar single-precision floating-point value -INST3(vscalefpd, "scalefpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Scale packed double-precision floating-point values -INST3(vscalefps, "scalefps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Scale packed single-precision floating-point values -INST3(vscalefsd, "scalefsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar double-precision floating-point value -INST3(vscalefss, "scalefss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar single-precision floating-point value -INST3(vshuff32x4, "shuff32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Shuffle packed values at 128-bit granularity -INST3(vshuff64x2, "shuff64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Shuffle packed values at 128-bit granularity -INST3(vshufi32x4, "shufi32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Shuffle packed values at 128-bit granularity -INST3(vshufi64x2, "shufi64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Shuffle packed values at 128-bit granularity +INST3(kandw, "kandw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks +INST3(kandnw, "kandnw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks +INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(knotw, "knotw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register +INST3(korw, "korw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks +INST3(kortestw, "kortestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kshiftlw, "kshiftlw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift left mask registers +INST3(kshiftrw, "kshiftrw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers +INST3(kunpckbw, "kunpckbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4B), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers +INST3(kxnorw, "kxnorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XNOR masks +INST3(kxorw, "kxorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks +INST3(valignd, "alignd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Align doubleword vectors +INST3(valignq, "alignq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Align quadword vectors +INST3(vblendmpd, "blendmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Float64 vectors using an OpMask control +INST3(vblendmps, "blendmps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Float32 vectors using an OpMask control +INST3(vpblendmq, "pblendmq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int32 vectors using an OpMask control +INST3(vpblendmb, "pblendmb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), INS_TT_FULL_MEM, REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int64 vectors using an OpMask control +INST3(vbroadcastf64x2, "broadcastf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register +INST3(vbroadcasti64x2, "broadcasti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register +INST3(vbroadcastf64x4, "broadcastf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register +INST3(vbroadcasti64x4, "broadcasti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register +INST3(vcmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles +INST3(vcmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles +INST3(vcmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles +INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles +INST3(vcompressps, "compressps", IUM_WR, SSE38(0x8A), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Store sparse packed singles into dense memory +INST3(vcompresspd, "compresspd", IUM_WR, SSE38(0x8A), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Store sparse packed doubles into dense memory +INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs +INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned DWORDs +INST3(vcvtsd2usi32, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD +INST3(vcvtsd2usi64, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD +INST3(vcvtss2usi32, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD +INST3(vcvtss2usi64, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD +INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs +INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs +INST3(vcvttsd2usi32, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD +INST3(vcvttsd2usi64, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned QWORD +INST3(vcvttss2usi32, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD +INST3(vcvttss2usi64, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD +INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles +INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to singles +INST3(vcvtusi2sd32, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double +INST3(vcvtusi2sd64, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to double +INST3(vcvtusi2ss32, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to single +INST3(vcvtusi2ss64, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to single +INST3(vexpandps, "expandps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x88), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Load sparse packed singles from dense memory +INST3(vexpandpd, "expandpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x88), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Load sparse packed doubles from dense memory +INST3(vextractf64x4, "extractf64x4", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values +INST3(vextracti64x4, "extracti64x4", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values +INST3(vfixupimmpd, "fixupimmpd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x54), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special packed double-precision floating-point values +INST3(vfixupimmps, "fixupimmps", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x54), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special packed single-precision floating-point values +INST3(vfixupimmsd, "fixupimmsd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x55), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar double-precision floating-point value +INST3(vfixupimmss, "fixupimmss", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x55), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar single-precision floating-point value +INST3(vgatherdpd_msk, "gatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices +INST3(vgatherdps_msk, "gatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices +INST3(vgatherqpd_msk, "gatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices +INST3(vgatherqps_msk, "gatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices +INST3(vgetexppd, "getexppd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract exponents of packed double-precision floating-point values +INST3(vgetexpps, "getexpps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract exponents of packed single-precision floating-point values +INST3(vgetexpsd, "getexpsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar double-precision floating-point value +INST3(vgetexpss, "getexpss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar single-precision floating-point value +INST3(vgetmantpd, "getmantpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract mantissas of packed double-precision floating-point values +INST3(vgetmantps, "getmantps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract mantissas of packed single-precision floating-point values +INST3(vgetmantsd, "getmantsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar double-precision floating-point value +INST3(vgetmantss, "getmantss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar single-precision floating-point value +INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values +INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) +INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) +INST3(vpabsq, "pabsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1F), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Packed absolute value of 64-bit integers +INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs +INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs +INST3(vpbroadcastd_gpr, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast int32 value from gpr to entire register +INST3(vpbroadcastq_gpr, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast int64 value from gpr to entire register +INST3(vpcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality +INST3(vpcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than +INST3(vpcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality +INST3(vpcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality +INST3(vpcompressd, "pcompressd", IUM_WR, SSE38(0x8B), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Store sparse packed doublewords into dense memory +INST3(vpcompressq, "pcompressq", IUM_WR, SSE38(0x8B), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Store sparse packed quadwords into dense memory +INST3(vpermq_reg, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute 64-bit of input register +INST3(vpermpd_reg, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute 64-bit of input register +INST3(vpermi2d, "permi2d", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x76), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2pd, "permi2pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x77), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2ps, "permi2ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x77), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2q, "permi2q", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x76), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermt2d, "permt2d", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7E), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2pd, "permt2pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7F), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2ps, "permt2ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7F), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2q, "permt2q", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7E), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpexpandd, "pexpandd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x89), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Load sparse packed doublewords from dense memory +INST3(vpexpandq, "pexpandq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x89), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Load sparse packed quadwords from dense memory +INST3(vpgatherdd_msk, "pgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword +INST3(vpgatherdq_msk, "pgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices +INST3(vpgatherqd_msk, "pgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword +INST3(vpgatherqq_msk, "pgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices +INST3(vpmaxsq, "pmaxsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit signed integers +INST3(vpmaxuq, "pmaxuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit unsigned integers +INST3(vpminsq, "pminsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit signed integers +INST3(vpminuq, "pminuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit unsigned integers +INST3(vpmovdb, "pmovdb", IUM_WR, PSSE38(0xF3, 0x31), BAD_CODE, PSSE38(0xF3, 0x31), INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovdw, "pmovdw", IUM_WR, PSSE38(0xF3, 0x33), BAD_CODE, PSSE38(0xF3, 0x33), INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovqb, "pmovqb", IUM_WR, PSSE38(0xF3, 0x32), BAD_CODE, PSSE38(0xF3, 0x32), INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovqd, "pmovqd", IUM_WR, PSSE38(0xF3, 0x35), BAD_CODE, PSSE38(0xF3, 0x35), INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovqw, "pmovqw", IUM_WR, PSSE38(0xF3, 0x34), BAD_CODE, PSSE38(0xF3, 0x34), INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovsdb, "pmovsdb", IUM_WR, PSSE38(0xF3, 0x21), BAD_CODE, PSSE38(0xF3, 0x21), INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovsdw, "pmovsdw", IUM_WR, PSSE38(0xF3, 0x23), BAD_CODE, PSSE38(0xF3, 0x23), INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovsqb, "pmovsqb", IUM_WR, PSSE38(0xF3, 0x22), BAD_CODE, PSSE38(0xF3, 0x22), INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovsqd, "pmovsqd", IUM_WR, PSSE38(0xF3, 0x25), BAD_CODE, PSSE38(0xF3, 0x25), INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovsqw, "pmovsqw", IUM_WR, PSSE38(0xF3, 0x24), BAD_CODE, PSSE38(0xF3, 0x24), INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovusdb, "pmovusdb", IUM_WR, PSSE38(0xF3, 0x11), BAD_CODE, PSSE38(0xF3, 0x11), INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovusdw, "pmovusdw", IUM_WR, PSSE38(0xF3, 0x13), BAD_CODE, PSSE38(0xF3, 0x13), INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovusqb, "pmovusqb", IUM_WR, PSSE38(0xF3, 0x12), BAD_CODE, PSSE38(0xF3, 0x12), INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovusqd, "pmovusqd", IUM_WR, PSSE38(0xF3, 0x15), BAD_CODE, PSSE38(0xF3, 0x15), INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovusqw, "pmovusqw", IUM_WR, PSSE38(0xF3, 0x14), BAD_CODE, PSSE38(0xF3, 0x14), INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs +INST3(vprold, "prold", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left +INST3(vprolq, "prolq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left +INST3(vprolvd, "prolvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left +INST3(vprolvq, "prolvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left +INST3(vprord, "prord", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right +INST3(vprorq, "prorq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right +INST3(vprorvd, "prorvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right +INST3(vprorvq, "prorvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right +INST3(vpsraq, "psraq", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 64-bit integers +INST3(vpsravq, "psravq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic +INST3(vpscatterdd_msk, "pscatterdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA0), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword Values Using Signed Dword +INST3(vpscatterdq_msk, "pscatterdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA0), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword with Signed Dword Indices +INST3(vpscatterqd_msk, "pscatterqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA1), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword Values Using Signed Qword +INST3(vpscatterqq_msk, "pscatterqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA1), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Qword with Signed Dword Indices +INST3(vpternlogd, "pternlogd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bitwise Ternary Logic +INST3(vpternlogq, "pternlogq", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bitwise Ternary Logic +INST3(vptestmd, "ptestmd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask +INST3(vptestmq, "ptestmq", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask +INST3(vptestnmd, "ptestnmd", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(vptestnmq, "ptestnmq", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs +INST3(vrangepd, "rangepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of packed double-precision floating-point values +INST3(vrangeps, "rangeps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of packed single-precision floating-point values +INST3(vrangesd, "rangesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar double-precision floating-point value +INST3(vrangess, "rangess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar single-precision floating-point value +INST3(vrcp14pd, "rcp14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute approximate reciprocals of packed double-precision floating-point values +INST3(vrcp14ps, "rcp14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute approximate reciprocals of packed single-precision floating-point values +INST3(vrcp14sd, "rcp14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar double-precision floating-point value +INST3(vrcp14ss, "rcp14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar single-precision floating-point value +INST3(vreducepd, "reducepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Perform a reduction transformation on packed double-precision floating-point values +INST3(vreduceps, "reduceps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Perform a reduction transformation on packed single-precision floating-point values +INST3(vreducesd, "reducesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar double-precision floating-point value +INST3(vreducess, "reducess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar single-precision floating-point value +INST3(vrndscalepd, "rndscalepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Round packed double-precision floating-point values to include a given number of fraction bits +INST3(vrndscaleps, "rndscaleps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Round packed single-precision floating-point values to include a given number of fraction bits +INST3(vrndscalesd, "rndscalesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double-precision floating-point value to include a given number of fraction bits +INST3(vrndscaless, "rndscaless", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single-precision floating-point value to include a given number of fraction bits +INST3(vrsqrt14pd, "rsqrt14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute approximate reciprocals of square roots of packed double-precision floating-point values +INST3(vrsqrt14ps, "rsqrt14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute approximate reciprocals of square roots of packed single-precision floating-point values +INST3(vrsqrt14sd, "rsqrt14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar double-precision floating-point value +INST3(vrsqrt14ss, "rsqrt14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar single-precision floating-point value +INST3(vscalefpd, "scalefpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale packed double-precision floating-point values +INST3(vscalefps, "scalefps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale packed single-precision floating-point values +INST3(vscalefsd, "scalefsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar double-precision floating-point value +INST3(vscalefss, "scalefss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar single-precision floating-point value +INST3(vscatterdps_msk, "scatterdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA2), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float32 Values Using Signed Dword +INST3(vscatterdpd_msk, "scatterdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA2), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float64 with Signed Dword Indices +INST3(vscatterqps_msk, "scatterqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA3), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float32 Values Using Signed Qword +INST3(vscatterqpd_msk, "scatterqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA3), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float64 with Signed Dword Indices +INST3(vshuff32x4, "shuff32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity +INST3(vshuff64x2, "shuff64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity +INST3(vshufi32x4, "shufi32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity +INST3(vshufi64x2, "shufi64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity // AVX512BW -INST3(kaddd, "kaddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4A), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks -INST3(kaddq, "kaddq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks -INST3(kandd, "kandd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks -INST3(kandq, "kandq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks -INST3(kandnd, "kandnd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks -INST3(kandnq, "kandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks -INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(knotd, "knotd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register -INST3(knotq, "knotq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register -INST3(kord, "kord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks -INST3(korq, "korq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks -INST3(kortestd, "kortestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kortestq, "kortestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kshiftld, "kshiftld", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers -INST3(kshiftlq, "kshiftlq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift left mask registers -INST3(kshiftrd, "kshiftrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(kshiftrq, "kshiftrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(ktestd, "ktestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(ktestq, "ktestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(kunpckdq, "kunpckdq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x4B), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers -INST3(kunpckwd, "kunpckwd", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x4B), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers -INST3(kxnord, "kxnord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x46), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XNOR masks -INST3(kxnorq, "kxnorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x46), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XNOR masks -INST3(kxord, "kxord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks -INST3(kxorq, "kxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks -INST3(vpblendmd, "pblendmd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Blend Byte vectors using an OpMask control -INST3(vpblendmw, "pblendmw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Word vectors using an OpMask control -INST3(vdbpsadbw, "dbpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Double block packed Sum-Absolute-Differences (SAD) on unsigned bytes -INST3(vmovdqu8, "movdqu8", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX) -INST3(vmovdqu16, "movdqu16", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX) -INST3(vpbroadcastb_gpr, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7A), INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_EVEX) // Broadcast int8 value from gpr to entire register -INST3(vpbroadcastw_gpr, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7B), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Broadcast int16 value from gpr to entire register -INST3(vpcmpb, "pcmpb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality -INST3(vpcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality -INST3(vpcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than -INST3(vpcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than -INST3(vpcmpw, "pcmpw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpcmpub, "pcmpub", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpcmpuw, "pcmpuw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpermw, "permw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Doublewords Elements -INST3(vpermi2w, "permi2w", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x75), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermt2w, "permt2w", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7D), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpmovb2m, "pmovb2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), INS_TT_NONE, Input_8Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovm2b, "pmovm2b", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), INS_TT_NONE, Input_8Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovm2w, "pmovm2w", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), INS_TT_NONE, Input_16Bit | REX_W1 | Encoding_EVEX) -INST3(vpmovw2m, "pmovw2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), INS_TT_NONE, Input_16Bit | REX_W1 | Encoding_EVEX) -INST3(vpmovwb, "pmovwb", IUM_WR, PSSE38(0xF3, 0x30), BAD_CODE, PSSE38(0xF3, 0x30), INS_TT_HALF_MEM, Input_16Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovswb, "pmovswb", IUM_WR, PSSE38(0xF3, 0x20), BAD_CODE, PSSE38(0xF3, 0x20), INS_TT_HALF_MEM, Input_16Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovuswb, "pmovuswb", IUM_WR, PSSE38(0xF3, 0x10), BAD_CODE, PSSE38(0xF3, 0x10), INS_TT_HALF_MEM, Input_16Bit | REX_W0 | Encoding_EVEX) -INST3(vpsllvw, "psllvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x12), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical -INST3(vpsravw, "psravw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x11), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic -INST3(vpsrlvw, "psrlvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical -INST3(vptestmb, "ptestmb", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x26), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask -INST3(vptestmw, "ptestmw", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x26), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask -INST3(vptestnmb, "ptestnmb", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask -INST3(vptestnmw, "ptestnmw", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(kaddd, "kaddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4A), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks +INST3(kaddq, "kaddq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks +INST3(kandd, "kandd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks +INST3(kandq, "kandq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks +INST3(kandnd, "kandnd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks +INST3(kandnq, "kandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks +INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(knotd, "knotd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register +INST3(knotq, "knotq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register +INST3(kord, "kord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks +INST3(korq, "korq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks +INST3(kortestd, "kortestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kortestq, "kortestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kshiftld, "kshiftld", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers +INST3(kshiftlq, "kshiftlq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift left mask registers +INST3(kshiftrd, "kshiftrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers +INST3(kshiftrq, "kshiftrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers +INST3(ktestd, "ktestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(ktestq, "ktestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(kunpckdq, "kunpckdq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4B), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers +INST3(kunpckwd, "kunpckwd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4B), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers +INST3(kxnord, "kxnord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x46), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XNOR masks +INST3(kxnorq, "kxnorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x46), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XNOR masks +INST3(kxord, "kxord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks +INST3(kxorq, "kxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks +INST3(vpblendmd, "pblendmd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Byte vectors using an OpMask control +INST3(vpblendmw, "pblendmw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Word vectors using an OpMask control +INST3(vdbpsadbw, "dbpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Double block packed Sum-Absolute-Differences (SAD) on unsigned bytes +INST3(vmovdqu8, "movdqu8", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, REX_W0 | Encoding_EVEX) +INST3(vmovdqu16, "movdqu16", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) +INST3(vpbroadcastb_gpr, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7A), INS_TT_TUPLE1_SCALAR, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Broadcast int8 value from gpr to entire register +INST3(vpbroadcastw_gpr, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7B), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Broadcast int16 value from gpr to entire register +INST3(vpcmpb, "pcmpb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality +INST3(vpcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality +INST3(vpcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than +INST3(vpcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than +INST3(vpcmpw, "pcmpw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpcmpub, "pcmpub", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpcmpuw, "pcmpuw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpermw, "permw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Doublewords Elements +INST3(vpermi2w, "permi2w", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x75), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermt2w, "permt2w", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7D), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpmovb2m, "pmovb2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), INS_TT_NONE, REX_W0 | Encoding_EVEX) +INST3(vpmovm2b, "pmovm2b", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), INS_TT_NONE, REX_W0 | Encoding_EVEX) +INST3(vpmovm2w, "pmovm2w", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), INS_TT_NONE, REX_W1 | Encoding_EVEX) +INST3(vpmovw2m, "pmovw2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), INS_TT_NONE, REX_W1 | Encoding_EVEX) +INST3(vpmovwb, "pmovwb", IUM_WR, PSSE38(0xF3, 0x30), BAD_CODE, PSSE38(0xF3, 0x30), INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) +INST3(vpmovswb, "pmovswb", IUM_WR, PSSE38(0xF3, 0x20), BAD_CODE, PSSE38(0xF3, 0x20), INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) +INST3(vpmovuswb, "pmovuswb", IUM_WR, PSSE38(0xF3, 0x10), BAD_CODE, PSSE38(0xF3, 0x10), INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) +INST3(vpsllvw, "psllvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x12), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical +INST3(vpsravw, "psravw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x11), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic +INST3(vpsrlvw, "psrlvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical +INST3(vptestmb, "ptestmb", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x26), INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask +INST3(vptestmw, "ptestmw", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x26), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask +INST3(vptestnmb, "ptestnmb", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(vptestnmw, "ptestnmw", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask // AVX512CD -INST3(vpconflictd, "pconflictd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Detect conflicts within a vector of packed dword values into dense memory/register -INST3(vpconflictq, "pconflictq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Detect conflicts within a vector of packed qword values into dense memory/register -INST3(vplzcntd, "plzcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Count the number of leading zero bits for packed dword values -INST3(vplzcntq, "plzcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Count the number of leading zero bits for packed qword values +INST3(vpconflictd, "pconflictd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Detect conflicts within a vector of packed dword values into dense memory/register +INST3(vpconflictq, "pconflictq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Detect conflicts within a vector of packed qword values into dense memory/register +INST3(vplzcntd, "plzcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Count the number of leading zero bits for packed dword values +INST3(vplzcntq, "plzcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Count the number of leading zero bits for packed qword values // AVX512DQ -INST3(kaddb, "kaddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks -INST3(kaddw, "kaddw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks -INST3(kandb, "kandb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks -INST3(kandnb, "kandnb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks -INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(knotb, "knotb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register -INST3(korb, "korb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks -INST3(kortestb, "kortestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kshiftlb, "kshiftlb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers -INST3(kshiftrb, "kshiftrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(ktestb, "ktestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(ktestw, "ktestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(kxnorb, "kxnorb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XNOR masks -INST3(kxorb, "kxorb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks -INST3(vbroadcastf32x2, "broadcastf32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_TT_TUPLE2, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register -INST3(vbroadcasti32x2, "broadcasti32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE2, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vbroadcastf32x8, "broadcastf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register -INST3(vbroadcasti32x8, "broadcasti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vcvtpd2qq, "cvtpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to signed QWORDs -INST3(vcvtpd2uqq, "cvtpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to unsigned QWORDs -INST3(vcvtps2qq, "cvtps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to signed QWORDs -INST3(vcvtps2uqq, "cvtps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to unsigned QWORDs -INST3(vcvtqq2pd, "cvtqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed signed QWORDs to doubles -INST3(vcvtqq2ps, "cvtqq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed signed QWORDs to singles -INST3(vcvttpd2qq, "cvttpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed doubles to signed QWORDs -INST3(vcvttpd2uqq, "cvttpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed doubles to unsigned QWORDs -INST3(vcvttps2qq, "cvttps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed singles to signed QWORDs -INST3(vcvttps2uqq, "cvttps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed singles to unsigned QWORDs -INST3(vcvtuqq2pd, "cvtuqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed signed QWORDs to doubles -INST3(vcvtuqq2ps, "cvtuqq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed signed QWORDs to singles -INST3(vextractf32x8, "extractf32x8", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values -INST3(vextractf64x2, "extractf64x2", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values -INST3(vextracti32x8, "extracti32x8", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Extract 256-bit packed quadword integer values -INST3(vextracti64x2, "extracti64x2", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values -INST3(vinsertf32x8, "insertf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinsertf64x2, "insertf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinserti32x8, "inserti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vinserti64x2, "inserti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vpcmpd, "pcmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask | INS_Flags_EmbeddedBroadcastSupported) -INST3(vpcmpq, "pcmpq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask | INS_Flags_EmbeddedBroadcastSupported) -INST3(vpcmpud, "pcmpud", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask | INS_Flags_EmbeddedBroadcastSupported) -INST3(vpcmpuq, "pcmpuq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask | INS_Flags_EmbeddedBroadcastSupported) -INST3(vpmovd2m, "pmovd2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovm2d, "pmovm2d", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vpmovm2q, "pmovm2q", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), INS_TT_NONE, Input_64Bit | REX_W1 | Encoding_EVEX) -INST3(vpmovq2m, "pmovq2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), INS_TT_NONE, Input_64Bit | REX_W1 | Encoding_EVEX) -INST3(vpmullq, "pmullq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed multiply 64 bit unsigned integers and store lower 64 bits of each result - -// AVX512VBMI -INST3(vpermb, "permb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Byte Elements -INST3(vpermi2b, "permi2b", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x75), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting the Index -INST3(vpermt2b, "permt2b", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7D), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table -INST3(vpmultishiftqb, "pmultishiftqb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x83), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Select Packed Unaligned Bytes From Quadword Sources +INST3(kaddb, "kaddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks +INST3(kaddw, "kaddw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks +INST3(kandb, "kandb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks +INST3(kandnb, "kandnb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks +INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(knotb, "knotb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register +INST3(korb, "korb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks +INST3(kortestb, "kortestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kshiftlb, "kshiftlb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers +INST3(kshiftrb, "kshiftrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers +INST3(ktestb, "ktestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(ktestw, "ktestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(kxnorb, "kxnorb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XNOR masks +INST3(kxorb, "kxorb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks +INST3(vbroadcastf32x2, "broadcastf32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_TT_TUPLE2, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register +INST3(vbroadcasti32x2, "broadcasti32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE2, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register +INST3(vbroadcastf32x8, "broadcastf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register +INST3(vbroadcasti32x8, "broadcasti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register +INST3(vcvtpd2qq, "cvtpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to signed QWORDs +INST3(vcvtpd2uqq, "cvtpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned QWORDs +INST3(vcvtps2qq, "cvtps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_HALF, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt packed singles to signed QWORDs +INST3(vcvtps2uqq, "cvtps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_HALF, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned QWORDs +INST3(vcvtqq2pd, "cvtqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to doubles +INST3(vcvtqq2ps, "cvtqq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to singles +INST3(vcvttpd2qq, "cvttpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to signed QWORDs +INST3(vcvttpd2uqq, "cvttpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned QWORDs +INST3(vcvttps2qq, "cvttps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_HALF, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to signed QWORDs +INST3(vcvttps2uqq, "cvttps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_HALF, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned QWORDs +INST3(vcvtuqq2pd, "cvtuqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to doubles +INST3(vcvtuqq2ps, "cvtuqq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to singles +INST3(vextractf32x8, "extractf32x8", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values +INST3(vextractf64x2, "extractf64x2", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values +INST3(vextracti32x8, "extracti32x8", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract 256-bit packed quadword integer values +INST3(vextracti64x2, "extracti64x2", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values +INST3(vfpclasspd, "fpclasspd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x66), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Test Types of Packed Float64 Values +INST3(vfpclassps, "fpclassps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x66), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Test Types of Packed Float32 Values +INST3(vfpclasssd, "fpclasssd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x67), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Test Types of Scalar Float64 Values +INST3(vfpclassss, "fpclassss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x67), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Test Types of Scalar Float32 Values +INST3(vinsertf32x8, "insertf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinsertf64x2, "insertf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinserti32x8, "inserti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values +INST3(vinserti64x2, "inserti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values +INST3(vpcmpd, "pcmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpcmpq, "pcmpq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpcmpud, "pcmpud", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpcmpuq, "pcmpuq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpmovd2m, "pmovd2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), INS_TT_NONE, REX_W0 | Encoding_EVEX) +INST3(vpmovm2d, "pmovm2d", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), INS_TT_NONE, REX_W0 | Encoding_EVEX) +INST3(vpmovm2q, "pmovm2q", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), INS_TT_NONE, REX_W1 | Encoding_EVEX) +INST3(vpmovq2m, "pmovq2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), INS_TT_NONE, REX_W1 | Encoding_EVEX) +INST3(vpmullq, "pmullq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 64 bit unsigned integers and store lower 64 bits of each result + +// AVX512-BITALG +INST3(vpopcntb, "popcntb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x54), INS_TT_FULL_MEM, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in BYTE +INST3(vpopcntw, "popcntw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x54), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in WORD +INST3(vpshufbitqmb, "pshufbitqmb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8F), INS_TT_FULL_MEM, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Shuffle Bits From Quadword Elements Using Byte Indexes Into Mask + +// AVX512-BF16 +INST3(vcvtne2ps2bf16, "cvtne2ps2bf16", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x72), INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Two Packed Single Data to One Packed BF16 Data +INST3(vcvtneps2bf16, "cvtneps2bf16", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x72), INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Two Packed Single Data to One Packed BF16 Data +INST3(vdpbf16ps, "dpbf16ps", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x52), INS_TT_FULL, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Dot Product of BF16 Pairs Accumulated Into Packed Single Precision + +// AVX512-FP16 +INST3(vaddph, "addph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x58), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Add Packed FP16 Values +INST3(vaddsh, "addsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x58), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Add Scalar FP16 Values +INST3(vcmpph, "cmpph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0xC2), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compare Packed FP16 Values +INST3(vcmpsh, "cmpsh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0xF3, 0xC2), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compare Scalar FP16 Values +INST3(vcomish, "comish", IUM_RD, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x2F), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Compare Scalar Ordered FP16 Values and Set EFLAGS +INST3(vcvtdq2ph, "cvtdq2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5B), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Signed DWORD Integers to Packed FP16 Values +INST3(vcvtpd2ph, "cvtpd2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x5A), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Double Precision FP Values to Packed FP16 Values +INST3(vcvtph2dq, "cvtph2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x5B), INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed DWORD Integers +INST3(vcvtph2pd, "cvtph2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5A), INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Double Precision FP Values +INST3(vcvtph2psx, "cvtph2psx", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x13), INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Single Precision FP Values +INST3(vcvtph2qq, "cvtph2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7B), INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed QWORD Integers +INST3(vcvtph2udq, "cvtph2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x79), INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned DWORD Integers +INST3(vcvtph2uqq, "cvtph2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x79), INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned QWORD Integers +INST3(vcvtph2uw, "cvtph2uw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x7D), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned WORD Integers +INST3(vcvtph2w, "cvtph2w", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7D), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed WORD Integers +INST3(vcvtps2phx, "cvtps2phx", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x1D), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vcvtqq2ph, "cvtqq2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5B), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Signed QWORD Integers to Packed FP16 Values +INST3(vcvtsd2sh, "cvtsd2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x5A), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Double Precision FP Value to Scalar FP16 Value +INST3(vcvtsh2sd, "cvtsh2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5A), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Double Precision FP Value +INST3(vcvtsh2si32, "cvtsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2D), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Signed DWORD Integer +INST3(vcvtsh2si64, "cvtsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2D), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Signed QWORD Integer +INST3(vcvtsh2ss, "cvtsh2ss", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x13), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Single Precision FP Value +INST3(vcvtsh2usi32, "cvtsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x79), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Unsigned DWORD Integer +INST3(vcvtsh2usi64, "cvtsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x79), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Unsigned QWORD Integer +INST3(vcvtsi2sh32, "cvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Signed DWORD Integer to Scalar FP16 Value +INST3(vcvtsi2sh64, "cvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Signed QWORD Integer to Scalar FP16 Value +INST3(vcvtss2sh, "cvtss2sh", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x1D), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Single Precision FP Value to Scalar FP16 Value +INST3(vcvttph2dq, "cvttph2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5B), INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed DWORD Integers +INST3(vcvttph2qq, "cvttph2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7A), INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed QWORD Integers +INST3(vcvttph2udq, "cvttph2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x78), INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned DWORD Integers +INST3(vcvttph2uqq, "cvttph2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x78), INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned QWORD Integers +INST3(vcvttph2uw, "cvttph2uw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x7C), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned WORD Integers +INST3(vcvttph2w, "cvttph2w", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7C), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed WORD Integers +INST3(vcvttsh2si32, "cvttsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2C), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Signed DWORD Integer +INST3(vcvttsh2si64, "cvttsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2C), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Signed QWORD Integer +INST3(vcvttsh2usi32, "cvttsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x78), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Unsigned DWORD Integer +INST3(vcvttsh2usi64, "cvttsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x78), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Unsigned QWORD Integer +INST3(vcvtudq2ph, "cvtudq2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7A), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vcvtuqq2ph, "cvtuqq2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7A), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vcvtusi2sh32, "cvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Unsigned DWORD Integer to Scalar FP16 Value +INST3(vcvtusi2sh64, "cvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Unsigned QWORD Integer to Scalar FP16 Value +INST3(vcvtuw2ph, "cvtuw2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7D), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vcvtw2ph, "cvtw2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7D), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vdivph, "divph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5E), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Divide Packed FP16 Values +INST3(vdivsh, "divsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5E), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Divide Scalar FP16 Values +INST3(vfcmaddcph, "fcmaddcph", IUM_RW, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0x56), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Packed FP16 Values +INST3(vfcmaddcsh, "fcmaddcsh", IUM_RW, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0x57), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Scalar FP16 Values +INST3(vfmaddcph, "fmaddcph", IUM_RW, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0x56), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Packed FP16 Values +INST3(vfmaddcsh, "fmaddcsh", IUM_RW, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0x57), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Scalar FP16 Values +INST3(vfcmulcph, "fcmulcph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0xD6), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply Packed FP16 Values +INST3(vfcmulcsh, "fcmulcsh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0xD7), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply Scalar FP16 Values +INST3(vfmulcph, "fmulcph", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0xD6), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply Packed FP16 Values +INST3(vfmulcsh, "fmulcsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0xD7), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply Scalar FP16 Values +INST3(vfmadd132ph, "vfmadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x98), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values +INST3(vfmadd132sh, "vfmadd132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x99), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values +INST3(vfmadd213ph, "vfmadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA8), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values +INST3(vfmadd213sh, "vfmadd213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA9), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values +INST3(vfmadd231ph, "vfmadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB8), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values +INST3(vfmadd231sh, "vfmadd231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB9), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values +INST3(vfnmadd132ph, "vfnmadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9C), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values +INST3(vfnmadd132sh, "vfnmadd132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9D), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values +INST3(vfnmadd213ph, "vfnmadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAC), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values +INST3(vfnmadd213sh, "vfnmadd213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAD), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values +INST3(vfnmadd231ph, "vfnmadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBC), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values +INST3(vfnmadd231sh, "vfnmadd231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBD), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values +INST3(vfmaddsub132ph, "vfmaddsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x96), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values +INST3(vfmaddsub213ph, "vfmaddsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA6), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values +INST3(vfmaddsub231ph, "vfmaddsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB6), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values +INST3(vfmsub132ph, "vfmsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9A), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values +INST3(vfmsub132sh, "vfmsub132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9B), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values +INST3(vfmsub213ph, "vfmsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAA), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values +INST3(vfmsub213sh, "vfmsub213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAB), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values +INST3(vfmsub231ph, "vfmsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBA), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values +INST3(vfmsub231sh, "vfmsub231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBB), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values +INST3(vfnmsub132ph, "vfnmsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9E), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values +INST3(vfnmsub132sh, "vfnmsub132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9F), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values +INST3(vfnmsub213ph, "vfnmsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAE), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values +INST3(vfnmsub213sh, "vfnmsub213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAF), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values +INST3(vfnmsub231ph, "vfnmsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBE), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values +INST3(vfnmsub231sh, "vfnmsub231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBF), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values +INST3(vfmsubadd132ph, "vfmsubadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x97), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values +INST3(vfmsubadd213ph, "vfmsubadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA7), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values +INST3(vfmsubadd231ph, "vfmsubadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB7), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values +INST3(vfpclassph, "fpclassph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x66), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Test Types of Packed FP16 Values +INST3(vfpclasssh, "fpclasssh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x67), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Test Types of Scalar FP16 Values +INST3(vgetexpph, "getexpph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x42), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Exponents of Packed FP16 Values to FP16 Values +INST3(vgetexpsh, "getexpsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x43), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Exponents of Scalar FP16 Values to FP16 Values +INST3(vgetmantph, "getmantph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x26), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Extract Normalized Mantissas from Packed FP16 Values +INST3(vgetmantsh, "getmantsh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x27), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Extract Normalized Mantissas from Scalar FP16 Values +INST3(vmaxph, "maxph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5F), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Maximum of Packed FP16 Values +INST3(vmaxsh, "maxsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5F), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Return Maximum of Scalar FP16 Values +INST3(vminph, "minph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x00, 0x5D), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Minimum of Packed FP16 Values +INST3(vminsh, "minsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x00, 0x5D), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Return Minimum of Scalar FP16 Values +INST3(vmovsh, "movsh", IUM_WR, SSEFLTMAP(0x00, 0x11), BAD_CODE, SSEFLTMAP(0x00, 0x10), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move Scalar FP16 Value +INST3(vmovw, "movw", IUM_WR, PCKDBLMAP(0x06, 0x7E), BAD_CODE, PCKDBLMAP(0x00, 0x6E), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_WIG | Encoding_EVEX) // Move Word +INST3(vmulph, "mulph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x59), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Multiply Packed FP16 Values +INST3(vmulsh, "mulsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x59), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Multiply Scalar FP16 Values +INST3(vrcpph, "rcpph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4C), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Packed FP16 Values +INST3(vrcpsh, "rcpsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4D), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Scalar FP16 Values +INST3(vreduceph, "reduceph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x56), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Perform Reduction Transformation on Packed FP16 Values +INST3(vreducesh, "reducesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x57), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Perform Reduction Transformation on Scalar FP16 Values +INST3(vrndscaleph, "rndscaleph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x08), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Round Packed FP16 Values to Include a Given Number of Fraction Bits +INST3(vrndscalesh, "rndscalesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x0A), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Round Scalar FP16 Values to Include a Given Number of Fraction Bits +INST3(vrsqrtph, "rsqrtph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4E), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Square Roots of Packed FP16 Values +INST3(vrsqrtsh, "rsqrtsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4F), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Square Roots of Scalar FP16 Values +INST3(vscalefph, "scalefph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x2C), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Scale Packed FP16 Values with FP16 Values +INST3(vscalefsh, "scalefsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x2D), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Scale Scalar FP16 Values with FP16 Values +INST3(vsqrtph, "sqrtph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x51), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute Square Root of Packed FP16 Values +INST3(vsqrtsh, "sqrtsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x51), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute Square Root of Scalar FP16 Values +INST3(vsubph, "subph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5C), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Subtract Packed FP16 Values +INST3(vsubsh, "subsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5C), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Subtract Scalar FP16 Values +INST3(vucomish, "ucomish", IUM_RD, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x2E), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Compare Scalar Unordered FP16 Values and Set EFLAGS + +// AVX512-VBMI +INST3(vpermb, "permb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Byte Elements +INST3(vpermi2b, "permi2b", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x75), INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting the Index +INST3(vpermt2b, "permt2b", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7D), INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table +INST3(vpmultishiftqb, "pmultishiftqb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x83), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Select Packed Unaligned Bytes From Quadword Sources + +// AVX512-VBMI2 +INST3(vpcompressb, "pcompressb", IUM_WR, SSE38(0x63), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Store sparse packed bytes into dense memory +INST3(vpcompressw, "pcompressw", IUM_WR, SSE38(0x63), BAD_CODE, BAD_CODE, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Store sparse packed words into dense memory +INST3(vpexpandb, "pexpandb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x62), INS_TT_TUPLE1_SCALAR, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Load sparse packed bytes from dense memory +INST3(vpexpandw, "pexpandw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x62), INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Load sparse packed words from dense memory +INST3(vpshldd, "pshldd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x71), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical +INST3(vpshldq, "pshldq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x71), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical +INST3(vpshldw, "pshldw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x70), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical +INST3(vpshldvd, "pshldvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x71), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical +INST3(vpshldvq, "pshldvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x71), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical +INST3(vpshldvw, "pshldvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x70), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical +INST3(vpshrdd, "pshrdd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x73), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical +INST3(vpshrdq, "pshrdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x73), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical +INST3(vpshrdw, "pshrdw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x72), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical +INST3(vpshrdvd, "pshrdvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x73), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical +INST3(vpshrdvq, "pshrdvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x73), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical +INST3(vpshrdvw, "pshrdvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x72), INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical + +// AVX512-VP2INTERSECT +INST3(vp2intersectd, "p2intersectd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute Intersection Between DWORDS to a Pair of Mask Registers +INST3(vp2intersectq, "p2intersectq", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute Intersection Between QWORDS to a Pair of Mask Registers + +// AVX512-VPOPCNTDQ +INST3(vpopcntd, "popcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x55), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in DWORD +INST3(vpopcntq, "popcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x55), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in QWORD INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVX10v2_INSTRUCTION, "FIRST_AVX10v2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) -INST3(vcomxsd, "comxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare double precision floating point values and set flags -INST3(vcomxss, "comxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2f), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare single precision floating point values and set flags -INST3(vucomxsd, "ucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags -INST3(vucomxss, "ucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags -INST3(vcvttps2dqs, "cvttps2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed singles to DWORDs -INST3(vcvttps2udqs, "cvttps2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed singles to unsigned DWORDs -INST3(vcvttps2qqs, "cvttps2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed singles to signed QWORDs -INST3(vcvttps2uqqs, "cvttps2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed singles to unsigned QWORDs -INST3(vcvttpd2dqs, "cvttpd2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed doubles to DWORDs -INST3(vcvttpd2udqs, "cvttpd2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed doubles to unsigned DWORDs -INST3(vcvttpd2qqs, "cvttpd2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed doubles to signed QWORDs -INST3(vcvttpd2uqqs, "cvttpd2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed doubles to signed QWORDs -INST3(vcvttsd2sis32, "cvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs -INST3(vcvttsd2sis64, "cvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs -INST3(vcvttsd2usis32, "cvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned DWORD -INST3(vcvttsd2usis64, "cvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned QWORD -INST3(vcvttss2sis32, "cvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD -INST3(vcvttss2sis64, "cvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD -INST3(vcvttss2usis32, "cvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vcvttss2usis64, "cvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD - -INST3(vcvtps2ibs, "cvtps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x69), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) -INST3(vcvtps2iubs, "cvtps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6B), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vcvttps2ibs, "cvttps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x68), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vcvttps2iubs, "cvttps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference - -INST3(vminmaxsd, "minmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar double -INST3(vminmaxss, "minmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar single -INST3(vminmaxpd, "minmaxpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed doubles -INST3(vminmaxps, "minmaxps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed singles -INST3(vmovd, "movd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs -INST3(vmovw, "movw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs -INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vcomxsd, "comxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare double precision floating point values and set flags +INST3(vcomxss, "comxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2f), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare single precision floating point values and set flags +INST3(vucomxsd, "ucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags +INST3(vucomxss, "ucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags +INST3(vcvttpd2dqs, "cvttpd2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to DWORDs +INST3(vcvttpd2qqs, "cvttpd2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to signed QWORDs +INST3(vcvttpd2udqs, "cvttpd2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to unsigned DWORDs +INST3(vcvttpd2uqqs, "cvttpd2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to signed QWORDs +INST3(vcvttps2dqs, "cvttps2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to DWORDs +INST3(vcvttps2qqs, "cvttps2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), INS_TT_HALF, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to signed QWORDs +INST3(vcvttps2udqs, "cvttps2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to unsigned DWORDs +INST3(vcvttps2uqqs, "cvttps2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), INS_TT_HALF, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to unsigned QWORDs +INST3(vcvttsd2sis32, "cvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs +INST3(vcvttsd2sis64, "cvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs +INST3(vcvttsd2usis32, "cvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned DWORD +INST3(vcvttsd2usis64, "cvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned QWORD +INST3(vcvttss2sis32, "cvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD +INST3(vcvttss2sis64, "cvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD +INST3(vcvttss2usis32, "cvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vcvttss2usis64, "cvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vcvtps2ibs, "cvtps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x69), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vcvtps2iubs, "cvtps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6B), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vcvttps2ibs, "cvttps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x68), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vcvttps2iubs, "cvttps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6A), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference +INST3(vminmaxpd, "minmaxpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed doubles +INST3(vminmaxps, "minmaxps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed singles +INST3(vminmaxsd, "minmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar double +INST3(vminmaxss, "minmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar single +INST3(vmovd_simd, "movd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs +INST3(vmovw_simd, "movw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs +INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results INST3(LAST_AVX10v2_INSTRUCTION, "LAST_AVX10v2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) +// id nm um mr mi rm tt flags + +INST3(FIRST_APX_INSTRUCTION, "FIRST_APX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + +INST3(FIRST_CCMP_INSTRUCTION, "FIRST_CCMP_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + +INST3(ccmpo, "ccmpo", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpno, "ccmpno", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpb, "ccmpb", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpae, "ccmpae", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpe, "ccmpe", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpne, "ccmpne", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpbe, "ccmpbe", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpa, "ccmpa", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmps, "ccmps", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpns, "ccmpns", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpt, "ccmpt", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpf, "ccmpf", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpl, "ccmpl", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpge, "ccmpge", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmple, "ccmple", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpg, "ccmpg", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) + +INST3(LAST_CCMP_INSTRUCTION, "LAST_CCMP_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + +INST3(LAST_APX_INSTRUCTION, "LAST_APX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + + // Scalar instructions in SSE4.2 INST3(crc32, "crc32", IUM_RW, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF0), INS_TT_NONE, INS_FLAGS_None) +#ifdef TARGET_AMD64 +INST3(crc32_apx, "crc32", IUM_RW, BAD_CODE, BAD_CODE, 0x0000F0, INS_TT_NONE, INS_FLAGS_None) +#endif // BMI1 INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | Encoding_REX2) // Count the Number of Trailing Zero Bits +#ifdef TARGET_AMD64 +INST3(tzcnt_apx, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x0000F4, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Count the Number of Trailing Zero Bits +#endif // LZCNT INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | Encoding_REX2) +#ifdef TARGET_AMD64 +INST3(lzcnt_apx, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x0000F5, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) +#endif // MOVBE INST3(movbe, "movbe", IUM_WR, PCKMVB(0xF1), BAD_CODE, PCKMVB(0xF0), INS_TT_NONE, INS_FLAGS_None) +#ifdef TARGET_AMD64 +INST3(movbe_apx, "movbe", IUM_WR, 0x000061, BAD_CODE, 0x000060, INS_TT_NONE, INS_FLAGS_None) +#endif // POPCNT INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_TT_NONE, Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Resets_CF | Encoding_REX2) +#ifdef TARGET_AMD64 +INST3(popcnt_apx, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x000088, INS_TT_NONE, Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Resets_CF | INS_Flags_Has_NF) +#endif -#if defined(TARGET_AMD64) -INST3(tzcnt_apx, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x0000F4, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Count the Number of Trailing Zero Bits -INST3(lzcnt_apx, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x0000F5, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) -INST3(popcnt_apx, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x000088, INS_TT_NONE, Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Resets_CF | INS_Flags_Has_NF) -#endif // TARGET_AMD64 +// WAITPKG +INST3(tpause, "tpause", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0xAE), INS_TT_NONE, Resets_OF | Resets_SF | Resets_ZF | Resets_AF | Resets_PF | Writes_CF) // Timed PAUSE +INST3(umonitor, "umonitor", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0xAE), INS_TT_NONE, INS_FLAGS_None) // User Level Set Up Monitor Address +INST3(umwait, "umwait", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0xAE), INS_TT_NONE, Resets_OF | Resets_SF | Resets_ZF | Resets_AF | Resets_PF | Writes_CF) // User Level Monitor Wait INST3(neg, "neg", IUM_RW, 0x0018F6, BAD_CODE, 0x0018F6, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) INST3(not, "not", IUM_RW, 0x0010F6, BAD_CODE, 0x0010F6, INS_TT_NONE, INS_FLAGS_None | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD) @@ -968,12 +1229,12 @@ INST3(ror, "ror", IUM_RW, 0x0008D2, BAD_CODE, INST3(ror_1, "ror", IUM_RW, 0x0008D0, 0x0008D0, 0x0008D0, INS_TT_NONE, Writes_OF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) INST3(ror_N, "ror", IUM_RW, 0x0008C0, 0x0008C0, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) -INST3(rcl, "rcl", IUM_RW, 0x0010D2, BAD_CODE, 0x0010D2, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) -INST3(rcl_1, "rcl", IUM_RW, 0x0010D0, 0x0010D0, 0x0010D0, INS_TT_NONE, Writes_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) -INST3(rcl_N, "rcl", IUM_RW, 0x0010C0, 0x0010C0, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) -INST3(rcr, "rcr", IUM_RW, 0x0018D2, BAD_CODE, 0x0018D2, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) -INST3(rcr_1, "rcr", IUM_RW, 0x0018D0, 0x0018D0, 0x0018D0, INS_TT_NONE, Writes_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) -INST3(rcr_N, "rcr", IUM_RW, 0x0018C0, 0x0018C0, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(rcl, "rcl", IUM_RW, 0x0010D2, BAD_CODE, 0x0010D2, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(rcl_1, "rcl", IUM_RW, 0x0010D0, 0x0010D0, 0x0010D0, INS_TT_NONE, Writes_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(rcl_N, "rcl", IUM_RW, 0x0010C0, 0x0010C0, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(rcr, "rcr", IUM_RW, 0x0018D2, BAD_CODE, 0x0018D2, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(rcr_1, "rcr", IUM_RW, 0x0018D0, 0x0018D0, 0x0018D0, INS_TT_NONE, Writes_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(rcr_N, "rcr", IUM_RW, 0x0018C0, 0x0018C0, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD) INST3(shl, "shl", IUM_RW, 0x0020D2, BAD_CODE, 0x0020D2, INS_TT_NONE, Undefined_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) INST3(shl_1, "shl", IUM_RW, 0x0020D0, 0x0020D0, 0x0020D0, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) INST3(shl_N, "shl", IUM_RW, 0x0020C0, 0x0020C0, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) @@ -1023,7 +1284,7 @@ INST1(serialize, "serialize", IUM_RD, 0x0fe801, INST1(cwde, "cwde", IUM_RD, 0x000098, INS_TT_NONE, INS_FLAGS_None) INST1(cdq, "cdq", IUM_RD, 0x000099, INS_TT_NONE, INS_FLAGS_None) INST1(idiv, "idiv", IUM_RD, 0x0038F6, INS_TT_NONE, Undefined_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Undefined_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF) -INST1(imulEAX, "imul", IUM_RD, 0x0028F6, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Wbit | INS_Flags_Has_NF) +INST1(imulEAX, "imul", IUM_RD, 0x0028F6, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF) INST1(div, "div", IUM_RD, 0x0030F6, INS_TT_NONE, Undefined_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Undefined_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF) INST1(mulEAX, "mul", IUM_RD, 0x0020F6, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF) diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index a6b6c9d16f28..dbd6730d8fa9 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -357,11 +357,6 @@ typedef ptrdiff_t ssize_t; #define UNIX_LOONGARCH64_ONLY(x) #endif // TARGET_LOONGARCH64 -#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || \ - defined(TARGET_RISCV64) -#define FEATURE_PUT_STRUCT_ARG_STK 1 -#endif - #if defined(UNIX_AMD64_ABI) #define UNIX_AMD64_ABI_ONLY_ARG(x) , x #define UNIX_AMD64_ABI_ONLY(x) x @@ -509,11 +504,8 @@ class GlobalJitOptions #define CALL_ARG_STATS 0 // Collect stats about calls and call arguments. #define COUNT_BASIC_BLOCKS \ - 0 // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple - // case of single block methods. -#define COUNT_LOOPS \ - 0 // Collect stats about loops, such as the total number of natural loops, a histogram of - // the number of loop exits, etc. + 0 // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple + // case of single block methods. #define DISPLAY_SIZES 0 // Display generated code, data, and GC information sizes. #define MEASURE_BLOCK_SIZE 0 // Collect stats about basic block and FlowEdge node sizes and memory allocations. #define MEASURE_FATAL 0 // Count the number of calls to fatal(), including NYIs and noway_asserts. diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index b4c3fcaf4a4a..587ad46fec89 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -116,9 +116,13 @@ CONFIG_INTEGER(JitHashBreak, "JitHashBreak", -1) // Same as JitBreak, but CONFIG_INTEGER(JitHashHalt, "JitHashHalt", -1) // Same as JitHalt, but for a method hash CONFIG_INTEGER(JitInlineAdditionalMultiplier, "JitInlineAdditionalMultiplier", 0) CONFIG_INTEGER(JitInlinePrintStats, "JitInlinePrintStats", 0) -CONFIG_INTEGER(JitInlineSize, "JITInlineSize", DEFAULT_MAX_INLINE_SIZE) -CONFIG_INTEGER(JitInlineDepth, "JITInlineDepth", DEFAULT_MAX_INLINE_DEPTH) -CONFIG_INTEGER(JitForceInlineDepth, "JITForceInlineDepth", DEFAULT_MAX_FORCE_INLINE_DEPTH) +CONFIG_INTEGER(JitInlineSize, "JitInlineSize", DEFAULT_MAX_INLINE_SIZE) +CONFIG_INTEGER(JitInlineDepth, "JitInlineDepth", DEFAULT_MAX_INLINE_DEPTH) +RELEASE_CONFIG_INTEGER(JitInlineBudget, "JitInlineBudget", DEFAULT_INLINE_BUDGET) +CONFIG_INTEGER(JitForceInlineDepth, "JitForceInlineDepth", DEFAULT_MAX_FORCE_INLINE_DEPTH) +RELEASE_CONFIG_INTEGER(JitInlineMethodsWithEH, "JitInlineMethodsWithEH", 1) +CONFIG_STRING(JitInlineMethodsWithEHRange, "JitInlineMethodsWithEHRange") + CONFIG_INTEGER(JitLongAddress, "JitLongAddress", 0) // Force using the large pseudo instruction form for long address CONFIG_INTEGER(JitMaxUncheckedOffset, "JitMaxUncheckedOffset", 8) @@ -258,13 +262,16 @@ CONFIG_METHODSET(JitUnwindDump, "JitUnwindDump") // Dump the unwind codes for th // JitDumpFg - dump flowgraph // -CONFIG_METHODSET(JitDumpFg, "JitDumpFg") // Dumps Xml/Dot Flowgraph for specified method -CONFIG_STRING(JitDumpFgDir, "JitDumpFgDir") // Directory for Xml/Dot flowgraph dump(s) -CONFIG_STRING(JitDumpFgFile, "JitDumpFgFile") // Filename for Xml/Dot flowgraph dump(s) (default: "default") -CONFIG_STRING(JitDumpFgPhase, "JitDumpFgPhase") // Phase-based Xml/Dot flowgraph support. Set to the short name of a - // phase to see the flowgraph after that phase. Leave unset to dump - // after COLD-BLK (determine first cold block) or set to * for all - // phases +CONFIG_METHODSET(JitDumpFg, "JitDumpFg") // Dumps Xml/Dot Flowgraph for specified method +CONFIG_INTEGER(JitDumpFgHash, "JitDumpFgHash", 0) // Dumps Xml/Dot Flowgraph for specified method +CONFIG_INTEGER(JitDumpFgTier0, "JitDumpFgTier0", 1) // Dumps Xml/Dot Flowgraph for tier-0 compilations of specified + // methods +CONFIG_STRING(JitDumpFgDir, "JitDumpFgDir") // Directory for Xml/Dot flowgraph dump(s) +CONFIG_STRING(JitDumpFgFile, "JitDumpFgFile") // Filename for Xml/Dot flowgraph dump(s) (default: "default") +CONFIG_STRING(JitDumpFgPhase, "JitDumpFgPhase") // Phase-based Xml/Dot flowgraph support. Set to the short name of a + // phase to see the flowgraph after that phase. Leave unset to dump + // after COLD-BLK (determine first cold block) or set to * for all + // phases CONFIG_STRING(JitDumpFgPrePhase, "JitDumpFgPrePhase") // Same as JitDumpFgPhase, but specifies to dump pre-phase, not // post-phase. CONFIG_INTEGER(JitDumpFgDot, "JitDumpFgDot", 1) // 0 == dump XML format; non-zero == dump DOT format @@ -381,54 +388,38 @@ CONFIG_INTEGER(JitStressPromotedEvexEncoding, "JitStressPromotedEvexEncoding", 0 CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0) #endif -RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth, "PreferredVectorBitWidth", 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. - // // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h // -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_LOONGARCH64) //TODO: should implement LoongArch64's features. -//TODO-RISCV64-CQ: should implement RISCV64's features. RELEASE_CONFIG_INTEGER(EnableHWIntrinsic, "EnableHWIntrinsic", 0) // Allows Base+ hardware intrinsics to be disabled #else RELEASE_CONFIG_INTEGER(EnableHWIntrinsic, "EnableHWIntrinsic", 1) // Allows Base+ hardware intrinsics to be disabled -#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#endif // defined(TARGET_LOONGARCH64) #if defined(TARGET_AMD64) || defined(TARGET_X86) -RELEASE_CONFIG_INTEGER(EnableAES, "EnableAES", 1) // Allows AES+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX, "EnableAVX", 1) // Allows AVX+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX2, "EnableAVX2", 1) // Allows AVX2+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512BW, "EnableAVX512BW", 1) // Allows AVX512BW+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512BW_VL, "EnableAVX512BW_VL", 1) // Allows AVX512BW+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512CD, "EnableAVX512CD", 1) // Allows AVX512CD+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512CD_VL, "EnableAVX512CD_VL", 1) // Allows AVX512CD+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512DQ, "EnableAVX512DQ", 1) // Allows AVX512DQ+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512DQ_VL, "EnableAVX512DQ_VL", 1) // Allows AVX512DQ+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512F, "EnableAVX512F", 1) // Allows AVX512F+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512F_VL, "EnableAVX512F_VL", 1) // Allows AVX512F+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512VBMI, "EnableAVX512VBMI", 1) // Allows AVX512VBMI+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512VBMI_VL, "EnableAVX512VBMI_VL", 1) // Allows AVX512VBMI_VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX10v1, "EnableAVX10v1", 1) // Allows AVX10v1+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX10v2, "EnableAVX10v2", 1) // Allows AVX10v2+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVXVNNI, "EnableAVXVNNI", 1) // Allows AVXVNNI+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableBMI1, "EnableBMI1", 1) // Allows BMI1+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableBMI2, "EnableBMI2", 1) // Allows BMI2+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableFMA, "EnableFMA", 1) // Allows FMA+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableGFNI, "EnableGFNI", 1) // Allows GFNI+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableLZCNT, "EnableLZCNT", 1) // Allows LZCNT+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, "EnablePCLMULQDQ", 1) // Allows PCLMULQDQ+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableVPCLMULQDQ, "EnableVPCLMULQDQ", 1) // Allows VPCLMULQDQ+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnablePOPCNT, "EnablePOPCNT", 1) // Allows POPCNT+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE, "EnableSSE", 1) // Allows SSE+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE2, "EnableSSE2", 1) // Allows SSE2+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE3, "EnableSSE3", 1) // Allows SSE3+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE3_4, "EnableSSE3_4", 1) // Allows SSE3+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE41, "EnableSSE41", 1) // Allows SSE4.1+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE42, "EnableSSE42", 1) // Allows SSE4.2+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSSE3, "EnableSSSE3", 1) // Allows SSSE3+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAPX, "EnableAPX", 0) // Allows APX+ features to be disabled +RELEASE_CONFIG_INTEGER(EnableSSE42, "EnableSSE42", 1) // Allows SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX, "EnableAVX", 1) // Allows AVX and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX2, "EnableAVX2", 1) // Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512, "EnableAVX512", 1) // Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled + +RELEASE_CONFIG_INTEGER(EnableAVX512v2, "EnableAVX512v2", 1) // Allows AVX512 IFMA+VBMI and depdendent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512v3, "EnableAVX512v3", 1) // Allows AVX512 BITALG+VBMI2+VNNI+VPOPCNTDQ and depdendent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX10v1, "EnableAVX10v1", 1) // Allows AVX10v1 and depdendent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX10v2, "EnableAVX10v2", 0) // Allows AVX10v2 and depdendent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAPX, "EnableAPX", 0) // Allows APX and dependent features to be disabled + +RELEASE_CONFIG_INTEGER(EnableAES, "EnableAES", 1) // Allows AES, PCLMULQDQ, and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512VP2INTERSECT, "EnableAVX512VP2INTERSECT", 1) // Allows AVX512VP2INTERSECT and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVXIFMA, "EnableAVXIFMA", 1) // Allows AVXIFMA and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVXVNNI, "EnableAVXVNNI", 1) // Allows AVXVNNI and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableGFNI, "EnableGFNI", 1) // Allows GFNI and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableSHA, "EnableSHA", 1) // Allows SHA and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableVAES, "EnableVAES", 1) // Allows VAES, VPCLMULQDQ, and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableWAITPKG, "EnableWAITPKG", 1) // Allows WAITPKG and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableX86Serialize, "EnableX86Serialize", 1) // Allows X86Serialize and dependent hardware intrinsics to be disabled #elif defined(TARGET_ARM64) -RELEASE_CONFIG_INTEGER(EnableArm64AdvSimd, "EnableArm64AdvSimd", 1) // Allows Arm64 AdvSimd+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableArm64Aes, "EnableArm64Aes", 1) // Allows Arm64 Aes+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableArm64Atomics, "EnableArm64Atomics", 1) // Allows Arm64 Atomics+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableArm64Crc32, "EnableArm64Crc32", 1) // Allows Arm64 Crc32+ hardware intrinsics to be disabled @@ -438,11 +429,16 @@ RELEASE_CONFIG_INTEGER(EnableArm64Rdm, "EnableArm64Rdm", RELEASE_CONFIG_INTEGER(EnableArm64Sha1, "EnableArm64Sha1", 1) // Allows Arm64 Sha1+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableArm64Sha256, "EnableArm64Sha256", 1) // Allows Arm64 Sha256+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableArm64Sve, "EnableArm64Sve", 1) // Allows Arm64 Sve+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableArm64Sve2, "EnableArm64Sve2", 1) // Allows Arm64 Sve2+ hardware intrinsics to be disabled +#elif defined(TARGET_RISCV64) +RELEASE_CONFIG_INTEGER(EnableRiscV64Zba, "EnableRiscV64Zba", 1) // Allows RiscV64 Zba hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableRiscV64Zbb, "EnableRiscV64Zbb", 1) // Allows RiscV64 Zbb hardware intrinsics to be disabled #endif RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, "EnableEmbeddedBroadcast", 1) // Allows embedded broadcasts to be disabled RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", 1) // Allows embedded masking to be disabled RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled +RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining // clang-format on @@ -565,7 +561,9 @@ OPT_CONFIG_INTEGER(JitDoOptimizeIVs, "JitDoOptimizeIVs", 1) // Perform optim OPT_CONFIG_INTEGER(JitDoEarlyProp, "JitDoEarlyProp", 1) // Perform Early Value Propagation OPT_CONFIG_INTEGER(JitDoLoopHoisting, "JitDoLoopHoisting", 1) // Perform loop hoisting on loop invariant values OPT_CONFIG_INTEGER(JitDoLoopInversion, "JitDoLoopInversion", 1) // Perform loop inversion on "for/while" loops -OPT_CONFIG_INTEGER(JitDoRangeAnalysis, "JitDoRangeAnalysis", 1) // Perform range check analysis +RELEASE_CONFIG_INTEGER(JitLoopInversionSizeLimit, "JitLoopInversionSizeLimit", 100) // limit inversion to loops with no + // more than this many tree nodes +OPT_CONFIG_INTEGER(JitDoRangeAnalysis, "JitDoRangeAnalysis", 1) // Perform range check analysis OPT_CONFIG_INTEGER(JitDoVNBasedDeadStoreRemoval, "JitDoVNBasedDeadStoreRemoval", 1) // Perform VN-based dead store // removal OPT_CONFIG_INTEGER(JitDoRedundantBranchOpts, "JitDoRedundantBranchOpts", 1) // Perform redundant branch optimizations @@ -589,6 +587,8 @@ OPT_CONFIG_INTEGER(JitDoIfConversion, "JitDoIfConversion", 1) OPT_CONFIG_INTEGER(JitDoOptimizeMaskConversions, "JitDoOptimizeMaskConversions", 1) // Perform optimization of mask // conversions +RELEASE_CONFIG_INTEGER(JitOptimizeAwait, "JitOptimizeAwait", 1) // Perform optimization of Await intrinsics + RELEASE_CONFIG_INTEGER(JitEnableOptRepeat, "JitEnableOptRepeat", 1) // If zero, do not allow JitOptRepeat RELEASE_CONFIG_METHODSET(JitOptRepeat, "JitOptRepeat") // Runs optimizer multiple times on specified methods RELEASE_CONFIG_INTEGER(JitOptRepeatCount, "JitOptRepeatCount", 2) // Number of times to repeat opts when repeating @@ -598,7 +598,7 @@ RELEASE_CONFIG_INTEGER(JitVNMapSelBudget, "JitVNMapSelBudget", DEFAULT_MAP_SELEC RELEASE_CONFIG_INTEGER(TailCallLoopOpt, "TailCallLoopOpt", 1) // Convert recursive tail calls to loops RELEASE_CONFIG_METHODSET(AltJit, "AltJit") // Enables AltJit and selectively limits it to the specified methods. -RELEASE_CONFIG_METHODSET(AltJitNgen, "AltJitNgen") // Enables AltJit for NGEN and selectively limits it +RELEASE_CONFIG_METHODSET(AltJitNgen, "AltJitNgen") // Enables AltJit for AOT and selectively limits it // to the specified methods. // Do not use AltJit on this semicolon-delimited list of assemblies. @@ -650,6 +650,7 @@ CONFIG_STRING(JitInlineReplayFile, "JitInlineReplayFile") // relies on PGO if it exists and generally is more aggressive. RELEASE_CONFIG_INTEGER(JitExtDefaultPolicy, "JitExtDefaultPolicy", 1) RELEASE_CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, "JitExtDefaultPolicyMaxIL", 0x80) +RELEASE_CONFIG_INTEGER(JitExtDefaultPolicyMaxILRoot, "JitExtDefaultPolicyMaxILRoot", 0x100) RELEASE_CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, "JitExtDefaultPolicyMaxILProf", 0x400) RELEASE_CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, "JitExtDefaultPolicyMaxBB", 7) @@ -675,6 +676,9 @@ RELEASE_CONFIG_INTEGER(JitObjectStackAllocationConditionalEscape, "JitObjectStac CONFIG_STRING(JitObjectStackAllocationConditionalEscapeRange, "JitObjectStackAllocationConditionalEscapeRange") RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationSize, "JitObjectStackAllocationSize", 528) +RELEASE_CONFIG_INTEGER(JitObjectStackAllocationTrackFields, "JitObjectStackAllocationTrackFields", 1) +CONFIG_STRING(JitObjectStackAllocationTrackFieldsRange, "JitObjectStackAllocationTrackFieldsRange") +CONFIG_INTEGER(JitObjectStackAllocationDumpConnGraph, "JitObjectStackAllocationDumpConnGraph", 0) RELEASE_CONFIG_INTEGER(JitEECallTimingInfo, "JitEECallTimingInfo", 0) @@ -697,11 +701,11 @@ CONFIG_STRING(JitGuardedDevirtualizationRange, "JitGuardedDevirtualizationRange" CONFIG_INTEGER(JitRandomGuardedDevirtualization, "JitRandomGuardedDevirtualization", 0) // Enable insertion of patchpoints into Tier0 methods, switching to optimized where needed. -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef FEATURE_ON_STACK_REPLACEMENT RELEASE_CONFIG_INTEGER(TC_OnStackReplacement, "TC_OnStackReplacement", 1) #else RELEASE_CONFIG_INTEGER(TC_OnStackReplacement, "TC_OnStackReplacement", 0) -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#endif // FEATURE_ON_STACK_REPLACEMENT // Initial patchpoint counter value used by jitted code RELEASE_CONFIG_INTEGER(TC_OnStackReplacement_InitialCounter, "TC_OnStackReplacement_InitialCounter", 1000) @@ -798,8 +802,8 @@ RELEASE_CONFIG_INTEGER(JitEnablePhysicalPromotion, "JitEnablePhysicalPromotion", // Enable cross-block local assertion prop RELEASE_CONFIG_INTEGER(JitEnableCrossBlockLocalAssertionProp, "JitEnableCrossBlockLocalAssertionProp", 1) -// Do greedy RPO-based layout in Compiler::fgReorderBlocks. -RELEASE_CONFIG_INTEGER(JitDoReversePostOrderLayout, "JitDoReversePostOrderLayout", 1); +// Enable postorder local assertion prop +RELEASE_CONFIG_INTEGER(JitEnablePostorderLocalAssertionProp, "JitEnablePostorderLocalAssertionProp", 1) // Enable strength reduction RELEASE_CONFIG_INTEGER(JitEnableStrengthReduction, "JitEnableStrengthReduction", 1) diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index edfbafc917d4..c8f0e64f1f4a 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -20,10 +20,10 @@ class JitFlags JIT_FLAG_ALT_JIT = 8, // JIT should consider itself an ALT_JIT JIT_FLAG_FROZEN_ALLOC_ALLOWED = 9, // JIT is allowed to use *_MAYBEFROZEN allocators // JIT_FLAG_UNUSED = 10, - JIT_FLAG_READYTORUN = 11, // Use version-resilient code generation + JIT_FLAG_AOT = 11, // Do ahead-of-time code generation (ReadyToRun or NativeAOT) JIT_FLAG_PROF_ENTERLEAVE = 12, // Instrument prologues/epilogues JIT_FLAG_PROF_NO_PINVOKE_INLINE = 13, // Disables PInvoke inlining - JIT_FLAG_PREJIT = 14, // prejit is the execution engine. + // JIT_FLAG_UNUSED = 14, JIT_FLAG_RELOC = 15, // Generate relocatable code JIT_FLAG_IL_STUB = 16, // method is an IL stub JIT_FLAG_PROCSPLIT = 17, // JIT should separate code into hot and cold sections @@ -44,10 +44,7 @@ class JitFlags JIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif -#if defined(TARGET_XARCH) - JIT_FLAG_VECTOR512_THROTTLING = 31, // On Xarch, 512-bit vector usage may incur CPU frequency throttling -#endif - + JIT_FLAG_ASYNC = 31, // Generate code for use as an async function // Note: the mcs tool uses the currently unused upper flags bits when outputting SuperPMI MC file flags. // See EXTRA_JIT_FLAGS and spmidumphelper.cpp. Currently, these are bits 56 through 63. If they overlap, // something needs to change. @@ -123,10 +120,9 @@ class JitFlags FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_OSR, JIT_FLAG_OSR); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT, JIT_FLAG_ALT_JIT); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_FROZEN_ALLOC_ALLOWED, JIT_FLAG_FROZEN_ALLOC_ALLOWED); - FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_READYTORUN, JIT_FLAG_READYTORUN); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_AOT, JIT_FLAG_AOT); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROF_ENTERLEAVE, JIT_FLAG_PROF_ENTERLEAVE); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROF_NO_PINVOKE_INLINE, JIT_FLAG_PROF_NO_PINVOKE_INLINE); - FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PREJIT, JIT_FLAG_PREJIT); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_RELOC, JIT_FLAG_RELOC); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_IL_STUB, JIT_FLAG_IL_STUB); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROCSPLIT, JIT_FLAG_PROCSPLIT); @@ -146,10 +142,7 @@ class JitFlags FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_RELATIVE_CODE_RELOCS, JIT_FLAG_RELATIVE_CODE_RELOCS); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SOFTFP_ABI, JIT_FLAG_SOFTFP_ABI); #endif // TARGET_ARM - -#if defined(TARGET_X86) || defined(TARGET_AMD64) - FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING, JIT_FLAG_VECTOR512_THROTTLING); -#endif // TARGET_ARM + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_ASYNC, JIT_FLAG_ASYNC); #undef FLAGS_EQUAL } diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index c7d5950dcd52..4f15ce1afe4b 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -241,7 +241,7 @@ bool EHblkDsc::ebdIsSameTry(BasicBlock* ebdTryBeg, BasicBlock* ebdTryLast) void EHblkDsc::DispEntry(unsigned XTnum) { - printf(" %2u ::", XTnum); + printf(" %2u %2u ::", ebdID, XTnum); #if defined(FEATURE_EH_WINDOWS_X86) if (ebdHandlerNestingLevel == 0) @@ -640,6 +640,20 @@ bool Compiler::bbIsHandlerBeg(const BasicBlock* block) return (ehDsc != nullptr) && ((block == ehDsc->ebdHndBeg) || (ehDsc->HasFilter() && (block == ehDsc->ebdFilter))); } +// bbIsFuncletBeg() returns true if "block" is the start of a handler or filter region, +// and if the handler/filter is a funclet. +// +bool Compiler::bbIsFuncletBeg(const BasicBlock* block) +{ + if (UsesFunclets()) + { + assert(fgFuncletsCreated); + return bbIsHandlerBeg(block); + } + + return false; +} + bool Compiler::ehHasCallableHandlers() { if (UsesFunclets()) @@ -799,14 +813,24 @@ unsigned Compiler::ehGetMostNestedRegionIndex(BasicBlock* block, bool* inTryRegi return mostNestedRegion; } -/***************************************************************************** - * Returns the try index of the enclosing try, skipping all EH regions with the - * same try region (that is, all 'mutual protect' regions). If there is no such - * enclosing try, returns EHblkDsc::NO_ENCLOSING_INDEX. - */ +//------------------------------------------------------------- +// ehTrueEnclosingTryIndexIL: find the outermost enclosing try +// region that is not a mutual-protect try +// +// Arguments: +// regionIndex - index of interest +// +// Returns: +// Index of enclosng non-mutual protect try region, or EHblkDsc::NO_ENCLOSING_INDEX. +// +// Notes: +// Only safe to use during importation, before we have normalize the +// EH in the flow graph. Post importation use, the non-IL version. +// unsigned Compiler::ehTrueEnclosingTryIndexIL(unsigned regionIndex) { assert(regionIndex != EHblkDsc::NO_ENCLOSING_INDEX); + assert(!fgImportDone); EHblkDsc* ehDscRoot = ehGetDsc(regionIndex); EHblkDsc* HBtab = ehDscRoot; @@ -832,6 +856,49 @@ unsigned Compiler::ehTrueEnclosingTryIndexIL(unsigned regionIndex) return regionIndex; } +//------------------------------------------------------------- +// ehTrueEnclosingTryIndex: find the closest enclosing try +// region that is not a mutual-protect try +// +// Arguments: +// regionIndex - index of interest +// +// Returns: +// Index of enclosng non-mutual protect try region, or EHblkDsc::NO_ENCLOSING_INDEX. +// +// Notes: +// Only safe to use after importation, once we have normalized the +// EH in the flow graph. For importation, use the IL version. +// +unsigned Compiler::ehTrueEnclosingTryIndex(unsigned regionIndex) +{ + assert(regionIndex != EHblkDsc::NO_ENCLOSING_INDEX); + assert(fgImportDone); + + EHblkDsc* ehDscRoot = ehGetDsc(regionIndex); + EHblkDsc* HBtab = ehDscRoot; + + for (;;) + { + regionIndex = HBtab->ebdEnclosingTryIndex; + if (regionIndex == EHblkDsc::NO_ENCLOSING_INDEX) + { + // No enclosing 'try'; we're done + break; + } + + HBtab = ehGetDsc(regionIndex); + if (!EHblkDsc::ebdIsSameTry(ehDscRoot, HBtab)) + { + // Found an enclosing 'try' that has a different 'try' region (is not mutually-protect with the + // original region). Return it. + break; + } + } + + return regionIndex; +} + unsigned Compiler::ehGetEnclosingRegionIndex(unsigned regionIndex, bool* inTryRegion) { assert(regionIndex != EHblkDsc::NO_ENCLOSING_INDEX); @@ -1207,6 +1274,30 @@ EHblkDsc* Compiler::ehInitTryBlockRange(BasicBlock* blk, BasicBlock** tryBeg, Ba return tryTab; } +//------------------------------------------------------------------------ +// ehFindEHblkDscById: find an eh table entry by its ID +// +// Argument: +// ID to use in search +// +// Returns: +// Pointer to the entry, or nullptr +// +EHblkDsc* Compiler::ehFindEHblkDscById(unsigned short id) +{ + EHblkDsc* result = nullptr; + for (EHblkDsc* const xtab : EHClauses(this)) + { + if (xtab->ebdID == id) + { + result = xtab; + break; + } + } + + return result; +} + /***************************************************************************** * This method updates the value of ebdTryBeg */ @@ -1278,62 +1369,24 @@ void Compiler::fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast) } //------------------------------------------------------------- -// fgRebuildEHRegions: After reordering blocks, make EH regions contiguous -// while maintaining relative block order, and update each region's end pointer. +// fgFindTryRegionEnds: Walk the main method body, and set each try region's end block. // -void Compiler::fgRebuildEHRegions() +void Compiler::fgFindTryRegionEnds() { assert(compHndBBtabCount != 0); - unsigned unsetTryEnds = compHndBBtabCount; - unsigned unsetHndEnds = compHndBBtabCount; + unsigned unsetTryEnds = 0; - // Null out try/handler end pointers to signify the given clause hasn't been visited yet. + // Null out try end pointers to signify the given clause hasn't been visited yet. for (EHblkDsc* const HBtab : EHClauses(this)) { - HBtab->ebdTryLast = nullptr; - HBtab->ebdHndLast = nullptr; - } - - // Walk the main method body, and move try blocks to re-establish contiguity. - for (BasicBlock *block = fgFirstBB, *next; block != fgFirstFuncletBB; block = next) - { - next = block->Next(); - EHblkDsc* HBtab = ehGetBlockTryDsc(block); - if (HBtab != nullptr) + // Ignore try regions inside funclet regions. + if (!UsesFunclets() || !HBtab->ebdTryLast->hasHndIndex()) { - // Move this block up to the previous block in the same try region. - BasicBlock* const insertionPoint = HBtab->ebdTryLast; - if ((insertionPoint != nullptr) && !insertionPoint->NextIs(block)) - { - assert(block != HBtab->ebdTryLast); - fgUnlinkBlock(block); - fgInsertBBafter(HBtab->ebdTryLast, block); - } - - // Update this try region's (and all parent try regions') end pointer with the last block visited - for (unsigned XTnum = block->getTryIndex(); XTnum != EHblkDsc::NO_ENCLOSING_INDEX; - XTnum = ehGetEnclosingTryIndex(XTnum)) - { - HBtab = ehGetDsc(XTnum); - if (HBtab->ebdTryLast == nullptr) - { - assert(HBtab->ebdTryBeg == block); - assert(unsetTryEnds != 0); - unsetTryEnds--; - HBtab->ebdTryLast = block; - } - else if (HBtab->ebdTryLast->NextIs(block)) - { - HBtab->ebdTryLast = block; - } - } + HBtab->ebdTryLast = nullptr; + unsetTryEnds++; } } - // The above logic rebuilt the try regions in the main method body. - // Now, resolve the regions in the funclet section, if there is one. - assert((unsetTryEnds == 0) || (fgFirstFuncletBB != nullptr)); - // Updates the try region's (and all of its parent regions') end block to 'block,' // if the try region's end block hasn't been updated yet. auto setTryEnd = [this, &unsetTryEnds](BasicBlock* block) { @@ -1354,29 +1407,8 @@ void Compiler::fgRebuildEHRegions() } }; - // Updates the handler region's (and all of its parent regions') end block to 'block,' - // if the handler region's end block hasn't been updated yet. - auto setHndEnd = [this, &unsetHndEnds](BasicBlock* block) { - for (unsigned hndIndex = block->getHndIndex(); hndIndex != EHblkDsc::NO_ENCLOSING_INDEX; - hndIndex = ehGetEnclosingHndIndex(hndIndex)) - { - EHblkDsc* const HBtab = ehGetDsc(hndIndex); - if (HBtab->ebdHndLast == nullptr) - { - assert(unsetHndEnds != 0); - HBtab->ebdHndLast = block; - unsetHndEnds--; - } - else - { - break; - } - } - }; - - // If we have a funclet section, update the ends of any try regions nested in funclets - for (BasicBlock* block = fgLastBB; (unsetTryEnds != 0) && (block != fgLastBBInMainFunction()); - block = block->Prev()) + // Iterate backwards through the main method body, and update each try region's end block. + for (BasicBlock* block = fgLastBBInMainFunction(); (unsetTryEnds != 0) && (block != nullptr); block = block->Prev()) { if (block->hasTryIndex()) { @@ -1384,17 +1416,7 @@ void Compiler::fgRebuildEHRegions() } } - // Finally, update the handler regions' ends - for (BasicBlock* block = fgLastBB; (unsetHndEnds != 0) && (block != nullptr); block = block->Prev()) - { - if (block->hasHndIndex()) - { - setHndEnd(block); - } - } - assert(unsetTryEnds == 0); - assert(unsetHndEnds == 0); } /***************************************************************************** @@ -1515,6 +1537,9 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) { assert(compHndBBtabCount > 0); assert(XTnum < compHndBBtabCount); + assert(!ehTableFinalized); + + JITDUMP("\nRemoving EH#%u\n", XTnum); EHblkDsc* HBtab; @@ -1625,6 +1650,7 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) } else { + JITDUMP("Updating ACD entries after EH removal\n"); // There are three possibilities for each ACD entry // // 1. remains as is (stays in same region with same indices) @@ -1697,6 +1723,8 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) JITDUMPEXEC(add->Dump()); } } + + JITDUMP("... done updating ACD entries after EH removal\n"); } } @@ -1729,6 +1757,8 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) // EHblkDsc* Compiler::fgTryAddEHTableEntries(unsigned XTnum, unsigned count, bool deferAdding) { + assert(!ehTableFinalized); + bool reallocate = false; bool const insert = (XTnum != compHndBBtabCount); unsigned const newCount = compHndBBtabCount + count; @@ -1743,8 +1773,9 @@ EHblkDsc* Compiler::fgTryAddEHTableEntries(unsigned XTnum, unsigned count, bool if (deferAdding) { // We can add count entries... + // (we may not have allocated a table, so return a dummy non-null entry) // - return compHndBBtab; + return (EHblkDsc*)(0x1); } if (newCount > compHndBBtabAllocCount) @@ -3170,8 +3201,8 @@ void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& claus // Note: the flags field is kind of weird. It should be compared for equality // to determine the type of clause, even though it looks like a bitfield. In - // Particular, CORINFO_EH_CLAUSE_NONE is zero, so you can "&" to check it. - // You do need to mask off the bits, though, because CORINFO_EH_CLAUSE_DUPLICATE + // particular, CORINFO_EH_CLAUSE_NONE is zero, so you cannot "&" to check it. + // You do need to mask off the bits, though, because CORINFO_EH_CLAUSE_SAMETRY // is and'ed in. const DWORD CORINFO_EH_CLAUSE_TYPE_MASK = 0x7; switch (clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK) @@ -3206,17 +3237,7 @@ void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& claus break; } - if ((clause.TryOffset == clause.TryLength) && (clause.TryOffset == clause.HandlerOffset) && - ((clause.Flags & (CORINFO_EH_CLAUSE_DUPLICATE | CORINFO_EH_CLAUSE_FINALLY)) == - (CORINFO_EH_CLAUSE_DUPLICATE | CORINFO_EH_CLAUSE_FINALLY))) - { - printf(" cloned finally"); - } - else if (clause.Flags & CORINFO_EH_CLAUSE_DUPLICATE) - { - printf(" duplicated"); - } - else if (clause.Flags & CORINFO_EH_CLAUSE_SAMETRY) + if (clause.Flags & CORINFO_EH_CLAUSE_SAMETRY) { printf(" same try"); } @@ -3227,12 +3248,6 @@ void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& claus void Compiler::fgVerifyHandlerTab() { - if (compIsForInlining()) - { - // We don't inline functions with EH. Don't bother verifying the EH table in the inlinee Compiler. - return; - } - if (compHndBBtabCount == 0) { return; @@ -3249,6 +3264,9 @@ void Compiler::fgVerifyHandlerTab() // block (case 3)? bool multipleLastBlockNormalizationDone = false; // Currently disabled + BitVecTraits traits(impInlineRoot()->compEHID, this); + BitVec ids(BitVecOps::MakeEmpty(&traits)); + assert(compHndBBtabCount <= compHndBBtabAllocCount); unsigned XTnum; @@ -3256,6 +3274,11 @@ void Compiler::fgVerifyHandlerTab() for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) { + // EH IDs should be unique and in range + // + assert(HBtab->ebdID < impInlineRoot()->compEHID); + assert(BitVecOps::TryAddElemD(&traits, ids, HBtab->ebdID)); + assert(HBtab->ebdTryBeg != nullptr); assert(HBtab->ebdTryLast != nullptr); assert(HBtab->ebdHndBeg != nullptr); @@ -3279,11 +3302,11 @@ void Compiler::fgVerifyHandlerTab() if (fgFuncletsCreated) { - assert(HBtab->ebdHndBeg->HasFlag(BBF_FUNCLET_BEG)); + assert(bbIsFuncletBeg(HBtab->ebdHndBeg)); if (HBtab->HasFilter()) { - assert(HBtab->ebdFilter->HasFlag(BBF_FUNCLET_BEG)); + assert(bbIsFuncletBeg(HBtab->ebdFilter)); } } } @@ -3686,8 +3709,8 @@ void Compiler::fgVerifyHandlerTab() // on the block. for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) { - unsigned enclosingTryIndex = ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index, - // ignoring 'mutual protect' trys + unsigned enclosingTryIndex = ehTrueEnclosingTryIndex(XTnum); // find the true enclosing try index, + // ignoring 'mutual protect' trys if (enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) { // The handler funclet for 'XTnum' has a try index of 'enclosingTryIndex' (at least, the parts of the @@ -3720,11 +3743,9 @@ void Compiler::fgVerifyHandlerTab() { assert(block->bbCatchTyp == BBCT_NONE); - if (fgFuncletsCreated) - { - // Make sure blocks that aren't the first block of a funclet do not have the BBF_FUNCLET_BEG flag set. - assert(!block->HasFlag(BBF_FUNCLET_BEG)); - } + // If this block wasn't marked as an EH handler 'begin' block, + // it shouldn't be the beginning of a funclet. + assert(!fgFuncletsCreated || !bbIsFuncletBeg(block)); } // Check for legal block types @@ -3782,7 +3803,7 @@ void Compiler::fgDispHandlerTab() return; } - printf("\nindex "); + printf("\n id, index "); #if defined(FEATURE_EH_WINDOWS_X86) if (!UsesFunclets()) { @@ -4405,124 +4426,6 @@ bool Compiler::fgAnyIntraHandlerPreds(BasicBlock* block) return false; } -#if defined(FEATURE_EH_WINDOWS_X86) - -/***************************************************************************** - * - * Function called to relocate any and all EH regions. - * Only entire consecutive EH regions will be moved and they will be kept together. - * Except for the first block, the range can not have any blocks that jump into or out of the region. - */ - -bool Compiler::fgRelocateEHRegions() -{ - bool result = false; // Our return value - - assert(!UsesFunclets()); - -#ifdef DEBUG - if (verbose) - printf("*************** In fgRelocateEHRegions()\n"); -#endif - - unsigned XTnum; - EHblkDsc* HBtab; - - for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) - { - // Nested EH regions cannot be moved. - // Also we don't want to relocate an EH region that has a filter - if ((HBtab->ebdHandlerNestingLevel == 0) && !HBtab->HasFilter()) - { - bool movedTry = false; -#if DEBUG - bool movedHnd = false; -#endif // DEBUG - - // Only try to move the outermost try region - if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) - { - // Move the entire try region if it can be moved - if (HBtab->ebdTryBeg->isRunRarely()) - { - BasicBlock* bTryLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_TRY); - if (bTryLastBB != NULL) - { - result = true; - movedTry = true; - } - } -#if DEBUG - if (verbose && movedTry) - { - printf("\nAfter relocating an EH try region"); - fgDispBasicBlocks(); - fgDispHandlerTab(); - - // Make sure that the predecessor lists are accurate - if (expensiveDebugCheckLevel >= 2) - { - fgDebugCheckBBlist(); - } - } -#endif // DEBUG - } - - // Currently it is not good to move the rarely run handler regions to the end of the method - // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot - // section. - -#if 0 - // Now try to move the entire handler region if it can be moved. - // Don't try to move a finally handler unless we already moved the try region. - if (HBtab->ebdHndBeg->isRunRarely() && - !HBtab->ebdHndBeg->hasTryIndex() && - (movedTry || !HBtab->HasFinallyHandler())) - { - BasicBlock* bHndLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_HANDLER); - if (bHndLastBB != NULL) - { - result = true; - movedHnd = true; - } - } -#endif // 0 - -#if DEBUG - if (verbose && movedHnd) - { - printf("\nAfter relocating an EH handler region"); - fgDispBasicBlocks(); - fgDispHandlerTab(); - - // Make sure that the predecessor lists are accurate - if (expensiveDebugCheckLevel >= 2) - { - fgDebugCheckBBlist(); - } - } -#endif // DEBUG - } - } - -#if DEBUG - fgVerifyHandlerTab(); - - if (verbose && result) - { - printf("\nAfter fgRelocateEHRegions()"); - fgDispBasicBlocks(); - fgDispHandlerTab(); - // Make sure that the predecessor lists are accurate - fgDebugCheckBBlist(); - } -#endif // DEBUG - - return result; -} - -#endif // FEATURE_EH_WINDOWS_X86 - //------------------------------------------------------------------------ // fgExtendEHRegionBefore: Modify the EH table to account for a new block. // @@ -4579,13 +4482,6 @@ void Compiler::fgExtendEHRegionBefore(BasicBlock* block) block->bbRefs--; bPrev->bbRefs++; - if (fgFuncletsCreated) - { - assert(block->HasFlag(BBF_FUNCLET_BEG)); - bPrev->SetFlags(BBF_FUNCLET_BEG); - block->RemoveFlags(BBF_FUNCLET_BEG); - } - // If this is a handler for a filter, the last block of the filter will end with // a BBJ_EHFILTERRET block that jumps to the first block of its handler. // So we need to update it to keep things in sync. @@ -4624,13 +4520,6 @@ void Compiler::fgExtendEHRegionBefore(BasicBlock* block) HBtab->ebdFilter = bPrev; bPrev->SetFlags(BBF_DONT_REMOVE); - if (fgFuncletsCreated) - { - assert(block->HasFlag(BBF_FUNCLET_BEG)); - bPrev->SetFlags(BBF_FUNCLET_BEG); - block->RemoveFlags(BBF_FUNCLET_BEG); - } - bPrev->bbRefs++; } } diff --git a/src/coreclr/jit/jiteh.h b/src/coreclr/jit/jiteh.h index 482e6796f549..0d84e5b986b9 100644 --- a/src/coreclr/jit/jiteh.h +++ b/src/coreclr/jit/jiteh.h @@ -90,6 +90,8 @@ struct EHblkDsc unsigned ebdTyp; // Exception type (a class token), otherwise }; + unsigned short ebdID; // Unique ID for this eh descriptor (stable across add/delete/inlining) + EHHandlerType ebdHandlerType; #if defined(FEATURE_EH_WINDOWS_X86) diff --git a/src/coreclr/jit/jitgcinfo.h b/src/coreclr/jit/jitgcinfo.h index 16729f18470c..a99b020700f5 100644 --- a/src/coreclr/jit/jitgcinfo.h +++ b/src/coreclr/jit/jitgcinfo.h @@ -162,7 +162,13 @@ class GCInfo regMaskSmall rpdDel; // regptr bitset being removed } rpdCompiler; - unsigned short rpdPtrArg; // arg offset or popped arg count + struct + { + // Registers after call containing GC/byref (index 0 = REG_INT_FIRST) + unsigned int rpdCallGCrefRegs; + unsigned int rpdCallByrefRegs; + unsigned short rpdPtrArg; // arg offset or popped arg count + }; }; #ifndef JIT32_GCENCODER @@ -184,11 +190,8 @@ class GCInfo } #endif // !TARGET_WASM - unsigned short rpdIsThis : 1; // is it the 'this' pointer - unsigned short rpdCall : 1; // is this a true call site? - unsigned short : 1; // Padding bit, so next two start on a byte boundary - unsigned short rpdCallGCrefRegs : CNT_CALL_GC_REGS; // Callee-saved and return registers containing GC pointers. - unsigned short rpdCallByrefRegs : CNT_CALL_GC_REGS; // Callee-saved and return registers containing byrefs. + unsigned short rpdIsThis : 1; // is it the 'this' pointer + unsigned short rpdCall : 1; // is this a true call site? #ifndef JIT32_GCENCODER bool rpdIsCallInstr() @@ -288,7 +291,9 @@ class GCInfo //------------------------------------------------------------------------- #ifdef JIT32_GCENCODER - void gcCountForHeader(UNALIGNED unsigned int* pUntrackedCount, UNALIGNED unsigned int* pVarPtrTableSize); + void gcCountForHeader(UNALIGNED unsigned int* pUntrackedCount, + UNALIGNED unsigned int* pVarPtrTableSize, + UNALIGNED unsigned int* pNoGCRegionCount); bool gcIsUntrackedLocalOrNonEnregisteredArg(unsigned varNum, bool* pThisKeptAliveIsInUntracked = nullptr); diff --git a/src/coreclr/jit/layout.cpp b/src/coreclr/jit/layout.cpp index e84f55781ce1..a1ad460435ca 100644 --- a/src/coreclr/jit/layout.cpp +++ b/src/coreclr/jit/layout.cpp @@ -414,17 +414,48 @@ ClassLayout* Compiler::typGetBlkLayout(unsigned blockSize) return typGetCustomLayout(ClassLayoutBuilder(this, blockSize)); } -unsigned Compiler::typGetArrayLayoutNum(CORINFO_CLASS_HANDLE classHandle, unsigned length) +ClassLayout* Compiler::typGetArrayLayout(CORINFO_CLASS_HANDLE classHandle, unsigned length) { ClassLayoutBuilder b = ClassLayoutBuilder::BuildArray(this, classHandle, length); - return typGetCustomLayoutNum(b); + return typGetCustomLayout(b); } -ClassLayout* Compiler::typGetArrayLayout(CORINFO_CLASS_HANDLE classHandle, unsigned length) +#ifdef DEBUG +//------------------------------------------------------------------------ +// CopyNameFrom: Copy layout names, with optional prefix. +// +// Parameters: +// layout - layout to copy from +// prefix - prefix to add (or nullptr) +// +void ClassLayoutBuilder::CopyNameFrom(ClassLayout* layout, const char* prefix) { - ClassLayoutBuilder b = ClassLayoutBuilder::BuildArray(this, classHandle, length); - return typGetCustomLayout(b); + const char* layoutName = layout->GetClassName(); + const char* layoutShortName = layout->GetShortClassName(); + + if (prefix != nullptr) + { + const char* newName = nullptr; + const char* newShortName = nullptr; + + if (layoutName != nullptr) + { + newName = m_compiler->printfAlloc("%s%.100s", prefix, layoutName); + } + + if (layoutShortName != nullptr) + { + newShortName = m_compiler->printfAlloc("%s%.100s", prefix, layoutShortName); + } + + SetName(newName, newShortName); + } + else + { + SetName(layoutName, layoutShortName); + } } +#endif // DEBUG //------------------------------------------------------------------------ // Create: Create a ClassLayout from an EE side class handle. @@ -536,6 +567,31 @@ ClassLayout* ClassLayout::Create(Compiler* compiler, const ClassLayoutBuilder& b return newLayout; } +//------------------------------------------------------------------------ +// HasGCByRef: // Check if this classlayout has a TYP_BYREF GC pointer in it. +// +// Return value: +// True if so. +// +bool ClassLayout::HasGCByRef() const +{ + if (!HasGCPtr()) + { + return false; + } + + unsigned numSlots = GetSlotCount(); + for (unsigned i = 0; i < numSlots; i++) + { + if (GetGCPtrType(i) == TYP_BYREF) + { + return true; + } + } + + return false; +} + //------------------------------------------------------------------------ // IsStackOnly: does the layout represent a block that can never be on the heap? // @@ -646,8 +702,8 @@ const SegmentList& ClassLayout::GetNonPadding(Compiler* comp) // AreCompatible: check if 2 layouts are the same for copying. // // Arguments: -// layout1 - the first layout; -// layout2 - the second layout. +// layout1 - the first layout +// layout2 - the second layout // // Return value: // true if compatible, false otherwise. @@ -656,6 +712,11 @@ const SegmentList& ClassLayout::GetNonPadding(Compiler* comp) // Layouts are called compatible if they are equal or if // they have the same size and the same GC slots. // +// This is an equivalence relation: +// AreCompatible(a, b) == AreCompatible(b, a) +// AreCompatible(a, a) == true +// AreCompatible(a, b) && AreCompatible(b, c) ==> AreCompatible(a, c) +// // static bool ClassLayout::AreCompatible(const ClassLayout* layout1, const ClassLayout* layout2) { @@ -706,8 +767,6 @@ bool ClassLayout::AreCompatible(const ClassLayout* layout1, const ClassLayout* l return true; } - assert(clsHnd1 != NO_CLASS_HANDLE); - assert(clsHnd2 != NO_CLASS_HANDLE); assert(layout1->HasGCPtr() && layout2->HasGCPtr()); if (layout1->GetGCPtrCount() != layout2->GetGCPtrCount()) @@ -728,6 +787,106 @@ bool ClassLayout::AreCompatible(const ClassLayout* layout1, const ClassLayout* l return true; } +//------------------------------------------------------------------------ +// CanAssignFrom: true if assignment to this layout from the indicated layout is sensible +// +// Arguments: +// layout - the source of a possible assigment +// +// Return value: +// true if assignable, false otherwise. +// +// Notes: +// This may not be an equivalence relation: +// a->CanAssignFrom(b) and b->CanAssignFrom(a) may differ. +// +bool ClassLayout::CanAssignFrom(const ClassLayout* layout) +{ + if (this == layout) + { + return true; + } + + // Do the normal compatibility check first + // + const bool areCompatible = AreCompatible(this, layout); + + if (areCompatible) + { + return true; + } + + // Must be same size + // + if (GetSize() != layout->GetSize()) + { + return false; + } + + // Must be same IR type + // + if (GetType() != layout->GetType()) + { + return false; + } + + // Dest is GC, source is GC. Allow, slotwise: + // + // byref <- ref, byref, nint + // ref <- ref + // nint <- nint + // + if (HasGCPtr() && layout->HasGCPtr()) + { + const unsigned slotsCount = GetSlotCount(); + assert(slotsCount == layout->GetSlotCount()); + + for (unsigned i = 0; i < slotsCount; ++i) + { + var_types slotType = GetGCPtrType(i); + var_types layoutSlotType = layout->GetGCPtrType(i); + + if ((slotType != TYP_BYREF) && (slotType != layoutSlotType)) + { + return false; + } + } + return true; + } + + // Dest is GC, source is noGC. Allow, slotwise: + // + // byref <- nint + // nint <- nint + // + if (HasGCPtr() && !layout->HasGCPtr()) + { + const unsigned slotsCount = GetSlotCount(); + + for (unsigned i = 0; i < slotsCount; ++i) + { + var_types slotType = GetGCPtrType(i); + if (slotType == TYP_REF) + { + return false; + } + } + return true; + } + + // Dest is noGC, source is GC. Disallow. + // + if (!HasGCPtr() && layout->HasGCPtr()) + { + assert(!HasGCPtr()); + return false; + } + + // Dest is noGC, source is noGC, and they're not compatible. + // + return false; +} + //------------------------------------------------------------------------ // ClassLayoutBuilder: Construct a new builder for a class layout of the // specified size. @@ -778,6 +937,7 @@ ClassLayoutBuilder ClassLayoutBuilder::BuildArray(Compiler* compiler, CORINFO_CL ClrSafeInt totalSize(elementSize); totalSize *= static_cast(length); + totalSize.AlignUp(TARGET_POINTER_SIZE); totalSize += static_cast(OFFSETOF__CORINFO_Array__data); assert(!totalSize.IsOverflow()); @@ -790,7 +950,7 @@ ClassLayoutBuilder ClassLayoutBuilder::BuildArray(Compiler* compiler, CORINFO_CL unsigned offset = OFFSETOF__CORINFO_Array__data; for (unsigned i = 0; i < length; i++) { - builder.CopyInfoFrom(offset, elementLayout, /* copy padding */ false); + builder.CopyGCInfoFrom(offset, elementLayout); offset += elementSize; } } @@ -895,14 +1055,13 @@ void ClassLayoutBuilder::SetGCPtrType(unsigned slot, var_types type) } //------------------------------------------------------------------------ -// CopyInfoFrom: Copy GC pointers and padding information from another layout. +// CopyInfoGCFrom: Copy GC pointers from another layout. // // Arguments: // offset - Offset in this builder to start copy information into. // layout - Layout to get information from. -// copyPadding - Whether padding info should also be copied from the layout. // -void ClassLayoutBuilder::CopyInfoFrom(unsigned offset, ClassLayout* layout, bool copyPadding) +void ClassLayoutBuilder::CopyGCInfoFrom(unsigned offset, ClassLayout* layout) { assert(offset + layout->GetSize() <= m_size); @@ -915,15 +1074,22 @@ void ClassLayoutBuilder::CopyInfoFrom(unsigned offset, ClassLayout* layout, bool SetGCPtr(startSlot + slot, layout->GetGCPtr(slot)); } } +} - if (copyPadding) - { - AddPadding(SegmentList::Segment(offset, offset + layout->GetSize())); +//------------------------------------------------------------------------ +// CopyInfoPaddingFrom: Copy padding from another layout. +// +// Arguments: +// offset - Offset in this builder to start copy information into. +// layout - Layout to get information from. +// +void ClassLayoutBuilder::CopyPaddingFrom(unsigned offset, ClassLayout* layout) +{ + AddPadding(SegmentList::Segment(offset, offset + layout->GetSize())); - for (const SegmentList::Segment& nonPadding : layout->GetNonPadding(m_compiler)) - { - RemovePadding(SegmentList::Segment(offset + nonPadding.Start, offset + nonPadding.End)); - } + for (const SegmentList::Segment& nonPadding : layout->GetNonPadding(m_compiler)) + { + RemovePadding(SegmentList::Segment(offset + nonPadding.Start, offset + nonPadding.End)); } } diff --git a/src/coreclr/jit/layout.h b/src/coreclr/jit/layout.h index 03708a007052..ca367d5fb563 100644 --- a/src/coreclr/jit/layout.h +++ b/src/coreclr/jit/layout.h @@ -34,12 +34,14 @@ class ClassLayoutBuilder ClassLayoutBuilder(Compiler* compiler, unsigned size); void SetGCPtrType(unsigned slot, var_types type); - void CopyInfoFrom(unsigned offset, ClassLayout* layout, bool copyPadding); + void CopyGCInfoFrom(unsigned offset, ClassLayout* layout); + void CopyPaddingFrom(unsigned offset, ClassLayout* layout); void AddPadding(const SegmentList::Segment& padding); void RemovePadding(const SegmentList::Segment& nonPadding); #ifdef DEBUG void SetName(const char* name, const char* shortName); + void CopyNameFrom(ClassLayout* layout, const char* prefix); #endif static ClassLayoutBuilder BuildArray(Compiler* compiler, CORINFO_CLASS_HANDLE arrayType, unsigned length); @@ -222,6 +224,8 @@ class ClassLayout return m_gcPtrCount != 0; } + bool HasGCByRef() const; + bool IsStackOnly(Compiler* comp) const; bool IsGCPtr(unsigned slot) const @@ -260,6 +264,8 @@ class ClassLayout static bool AreCompatible(const ClassLayout* layout1, const ClassLayout* layout2); + bool CanAssignFrom(const ClassLayout* sourceLayout); + private: const BYTE* GetGCPtrs() const { diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 4d1b26972153..7366dfe9bce1 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1240,38 +1240,6 @@ class LocalAddressVisitor final : public GenTreeVisitor PopValue(); break; - case GT_RETURN: - if (TopValue(0).Node() != node) - { - assert(TopValue(1).Node() == node); - assert(TopValue(0).Node() == node->gtGetOp1()); - GenTreeUnOp* ret = node->AsUnOp(); - GenTree* retVal = ret->gtGetOp1(); - if (retVal->OperIs(GT_LCL_VAR)) - { - // TODO-1stClassStructs: this block is a temporary workaround to keep diffs small, - // having `doNotEnreg` affect block init and copy transformations that affect many methods. - // I have a change that introduces more precise and effective solution for that, but it would - // be merged separately. - GenTreeLclVar* lclVar = retVal->AsLclVar(); - unsigned lclNum = lclVar->GetLclNum(); - if (!m_compiler->compMethodReturnsMultiRegRetType() && - !m_compiler->lvaIsImplicitByRefLocal(lclVar->GetLclNum())) - { - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - if (varDsc->lvFieldCnt > 1) - { - m_compiler->lvaSetVarDoNotEnregister( - lclNum DEBUGARG(DoNotEnregisterReason::BlockOpRet)); - } - } - } - - EscapeValue(TopValue(0), node); - PopValue(); - } - break; - case GT_CALL: while (TopValue(0).Node() != node) { @@ -1797,7 +1765,7 @@ class LocalAddressVisitor final : public GenTreeVisitor case TYP_SIMD12: { // Handle the Vector3 field of case 2 - assert(varDsc->TypeGet() == TYP_SIMD16); + assert(varDsc->TypeIs(TYP_SIMD16)); // We effectively inverse the operands here and take elementNode as the main value and // simdLclNode[3] as the new value. This gives us a new TYP_SIMD16 with all elements in the @@ -1946,7 +1914,7 @@ class LocalAddressVisitor final : public GenTreeVisitor LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - if (indir->TypeGet() != TYP_STRUCT) + if (!indir->TypeIs(TYP_STRUCT)) { if (indir->TypeGet() == varDsc->TypeGet()) { @@ -1975,14 +1943,14 @@ class LocalAddressVisitor final : public GenTreeVisitor if (indir->TypeIs(TYP_FLOAT)) { - if (((offset % genTypeSize(TYP_FLOAT)) == 0) && m_compiler->IsBaselineSimdIsaSupported()) + if ((offset % genTypeSize(TYP_FLOAT)) == 0) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } } else if (indir->TypeIs(TYP_SIMD12)) { - if ((offset == 0) && (varDsc->TypeGet() == TYP_SIMD16) && m_compiler->IsBaselineSimdIsaSupported()) + if ((offset == 0) && varDsc->TypeIs(TYP_SIMD16)) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } @@ -1990,8 +1958,7 @@ class LocalAddressVisitor final : public GenTreeVisitor #ifdef TARGET_ARM64 else if (indir->TypeIs(TYP_SIMD8)) { - if ((varDsc->TypeGet() == TYP_SIMD16) && ((offset % 8) == 0) && - m_compiler->IsBaselineSimdIsaSupported()) + if (varDsc->TypeIs(TYP_SIMD16) && ((offset % 8) == 0)) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } @@ -2001,7 +1968,7 @@ class LocalAddressVisitor final : public GenTreeVisitor else if (((indir->TypeIs(TYP_SIMD16) && m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX)) || (indir->TypeIs(TYP_SIMD32) && - m_compiler->IsBaselineVector512IsaSupportedOpportunistically())) && + m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512))) && (genTypeSize(indir) * 2 == genTypeSize(varDsc)) && ((offset % genTypeSize(indir)) == 0)) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; @@ -2027,12 +1994,12 @@ class LocalAddressVisitor final : public GenTreeVisitor return IndirTransform::LclFld; } - if (varDsc->TypeGet() != TYP_STRUCT) + if (!varDsc->TypeIs(TYP_STRUCT)) { return IndirTransform::LclFld; } - if ((offset == 0) && ClassLayout::AreCompatible(indir->AsBlk()->GetLayout(), varDsc->GetLayout())) + if ((offset == 0) && indir->AsBlk()->GetLayout()->CanAssignFrom(varDsc->GetLayout())) { return IndirTransform::LclVar; } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 72df49af1b07..7ed68c365cac 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -18,7 +18,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif #include "emit.h" -#include "registerargconvention.h" #include "jitstd/algorithm.h" #include "patchpointinfo.h" @@ -37,19 +36,19 @@ void Compiler::lvaInitTypeRef() { /* x86 args look something like this: - [this ptr] [hidden return buffer] [declared arguments]* [generic context] [var arg cookie] + [this ptr] [hidden return buffer] [declared arguments]* [generic context] [async continuation] [var arg cookie] x64 is closer to the native ABI: - [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]* + [this ptr] [hidden return buffer] [generic context] [async continuation] [var arg cookie] [declared arguments]* (Note: prior to .NET Framework 4.5.1 for Windows 8.1 (but not .NET Framework 4.5.1 "downlevel"), the "hidden return buffer" came before the "this ptr". Now, the "this ptr" comes first. This is different from the C++ order, where the "hidden return buffer" always comes first.) ARM and ARM64 are the same as the current x64 convention: - [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]* + [this ptr] [hidden return buffer] [generic context] [async continuation] [var arg cookie] [declared arguments]* Key difference: - The var arg cookie and generic context are swapped with respect to the user arguments + The var arg cookie, generic context and async continuations are swapped with respect to the user arguments */ /* Set compArgsCount and compLocalsCount */ @@ -95,6 +94,22 @@ void Compiler::lvaInitTypeRef() info.compRetNativeType = hasRetBuffArg ? TYP_STRUCT : TYP_VOID; } +#ifdef DEBUG + if (verbose) + { + CORINFO_CLASS_HANDLE retClass = info.compMethodInfo->args.retTypeClass; + printf("%u return registers for return type %s %s\n", returnRegCount, varTypeName(info.compRetType), + varTypeIsStruct(info.compRetType) ? eeGetClassName(retClass) : ""); + for (unsigned i = 0; i < returnRegCount; i++) + { + unsigned offset = compRetTypeDesc.GetReturnFieldOffset(i); + unsigned size = genTypeSize(compRetTypeDesc.GetReturnRegType(i)); + printf(" [%02u..%02u) reg %s\n", offset, offset + size, + getRegName(compRetTypeDesc.GetABIReturnReg(i, info.compCallConv))); + } + } +#endif + // Do we have a RetBuffArg? if (hasRetBuffArg) { @@ -146,6 +161,11 @@ void Compiler::lvaInitTypeRef() info.compTypeCtxtArg = BAD_VAR_NUM; } + if (compIsAsync()) + { + info.compArgsCount++; + } + lvaCount = info.compLocalsCount = info.compArgsCount + info.compMethodInfo->locals.numArgs; info.compILlocalsCount = info.compILargsCount + info.compMethodInfo->locals.numArgs; @@ -180,54 +200,22 @@ void Compiler::lvaInitTypeRef() //------------------------------------------------------------------------- // Count the arguments and initialize the respective lvaTable[] entries // - // First the implicit arguments + // First the arguments //------------------------------------------------------------------------- - InitVarDscInfo varDscInfo; -#ifdef TARGET_X86 - // x86 unmanaged calling conventions limit the number of registers supported - // for accepting arguments. As a result, we need to modify the number of registers - // when we emit a method with an unmanaged calling convention. - switch (info.compCallConv) - { - case CorInfoCallConvExtension::Thiscall: - // In thiscall the this parameter goes into a register. - varDscInfo.Init(lvaTable, hasRetBuffArg, 1, 0); - break; - case CorInfoCallConvExtension::C: - case CorInfoCallConvExtension::Stdcall: - case CorInfoCallConvExtension::CMemberFunction: - case CorInfoCallConvExtension::StdcallMemberFunction: - varDscInfo.Init(lvaTable, hasRetBuffArg, 0, 0); - break; - case CorInfoCallConvExtension::Managed: - case CorInfoCallConvExtension::Fastcall: - case CorInfoCallConvExtension::FastcallMemberFunction: - default: - varDscInfo.Init(lvaTable, hasRetBuffArg, MAX_REG_ARG, MAX_FLOAT_REG_ARG); - break; - } -#else - varDscInfo.Init(lvaTable, hasRetBuffArg, MAX_REG_ARG, MAX_FLOAT_REG_ARG); -#endif - - lvaInitArgs(&varDscInfo); + lvaInitArgs(hasRetBuffArg); //------------------------------------------------------------------------- - // Finally the local variables + // Then the local variables //------------------------------------------------------------------------- - unsigned varNum = varDscInfo.varNum; - LclVarDsc* varDsc = varDscInfo.varDsc; + unsigned varNum = info.compArgsCount; CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args; -#if defined(TARGET_ARM) || defined(TARGET_RISCV64) - compHasSplitParam = varDscInfo.hasSplitParam; -#endif // TARGET_ARM || TARGET_RISCV64 - for (unsigned i = 0; i < info.compMethodInfo->locals.numArgs; - i++, varNum++, varDsc++, localsSig = info.compCompHnd->getArgNext(localsSig)) + i++, varNum++, localsSig = info.compCompHnd->getArgNext(localsSig)) { + LclVarDsc* varDsc = lvaGetDesc(varNum); CORINFO_CLASS_HANDLE typeHnd; CorInfoTypeWithMod corInfoTypeWithMod = info.compCompHnd->getArgType(&info.compMethodInfo->locals, localsSig, &typeHnd); @@ -335,10 +323,8 @@ void Compiler::lvaInitTypeRef() } /*****************************************************************************/ -void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo) +void Compiler::lvaInitArgs(bool hasRetBuffArg) { - compArgSize = 0; - #if defined(TARGET_ARM) && defined(PROFILING_SUPPORTED) // Prespill all argument regs on to stack in case of Arm when under profiler. // We do this as the arm32 CORINFO_HELP_FCN_ENTER helper does not preserve @@ -351,8 +337,9 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo) //---------------------------------------------------------------------- + unsigned varNum = 0; // Is there a "this" pointer ? - lvaInitThisPtr(varDscInfo); + lvaInitThisPtr(&varNum); unsigned numUserArgsToSkip = 0; unsigned numUserArgs = info.compMethodInfo->args.numArgs; @@ -363,17 +350,23 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo) // (the unmanaged this parameter) and then handle the hidden // return buffer parameter. assert(numUserArgs >= 1); - lvaInitUserArgs(varDscInfo, 0, 1); + lvaInitUserArgs(&varNum, 0, 1); numUserArgsToSkip++; numUserArgs--; - lvaInitRetBuffArg(varDscInfo, false); + if (hasRetBuffArg) + { + lvaInitRetBuffArg(&varNum, false); + } } else #endif { - /* If we have a hidden return-buffer parameter, that comes here */ - lvaInitRetBuffArg(varDscInfo, true); + if (hasRetBuffArg) + { + // If we have a hidden return-buffer parameter, that comes here + lvaInitRetBuffArg(&varNum, true); + } } //====================================================================== @@ -381,28 +374,33 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo) #if USER_ARGS_COME_LAST //@GENERICS: final instantiation-info argument for shared generic methods // and shared generic struct instance methods - lvaInitGenericsCtxt(varDscInfo); + lvaInitGenericsCtxt(&varNum); + + lvaInitAsyncContinuation(&varNum); /* If the method is varargs, process the varargs cookie */ - lvaInitVarArgsHandle(varDscInfo); + lvaInitVarArgsHandle(&varNum); #endif //------------------------------------------------------------------------- // Now walk the function signature for the explicit user arguments //------------------------------------------------------------------------- - lvaInitUserArgs(varDscInfo, numUserArgsToSkip, numUserArgs); + lvaInitUserArgs(&varNum, numUserArgsToSkip, numUserArgs); #if !USER_ARGS_COME_LAST //@GENERICS: final instantiation-info argument for shared generic methods // and shared generic struct instance methods - lvaInitGenericsCtxt(varDscInfo); + lvaInitGenericsCtxt(&varNum); + + lvaInitAsyncContinuation(&varNum); /* If the method is varargs, process the varargs cookie */ - lvaInitVarArgsHandle(varDscInfo); + lvaInitVarArgsHandle(&varNum); #endif //---------------------------------------------------------------------- // We have set info.compArgsCount in compCompile() +<<<<<<< HEAD noway_assert(varDscInfo->varNum == info.compArgsCount); assert(varDscInfo->intRegArgNum <= MAX_REG_ARG); @@ -418,115 +416,67 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo) // codeGen->floatRegState info.compArgStackSize = varDscInfo->stackArgSize; #endif // FEATURE_FASTTAILCALL +======= + noway_assert(varNum == info.compArgsCount); +>>>>>>> upstream-jun // Now we have parameters created in the right order. Figure out how they're passed. lvaClassifyParameterABI(); // The total argument size must be aligned. - noway_assert((compArgSize % TARGET_POINTER_SIZE) == 0); + noway_assert((lvaParameterStackSize % TARGET_POINTER_SIZE) == 0); #ifdef TARGET_X86 /* We can not pass more than 2^16 dwords as arguments as the "ret" instruction can only pop 2^16 arguments. Could be handled correctly but it will be very difficult for fully interruptible code */ - if (compArgSize != (size_t)(unsigned short)compArgSize) + if (lvaParameterStackSize != (size_t)(unsigned short)lvaParameterStackSize) IMPL_LIMITATION("Too many arguments for the \"ret\" instruction to pop"); #endif } /*****************************************************************************/ -void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo) +void Compiler::lvaInitThisPtr(unsigned* curVarNum) { - LclVarDsc* varDsc = varDscInfo->varDsc; - if (!info.compIsStatic) + if (info.compIsStatic) { - varDsc->lvIsParam = 1; - varDsc->lvIsPtr = 1; - - lvaArg0Var = info.compThisArg = varDscInfo->varNum; - noway_assert(info.compThisArg == 0); - - if (eeIsValueClass(info.compClassHnd)) - { - varDsc->lvType = TYP_BYREF; - } - else - { - varDsc->lvType = TYP_REF; - lvaSetClass(varDscInfo->varNum, info.compClassHnd); - } - - varDsc->lvIsRegArg = 1; - noway_assert(varDscInfo->intRegArgNum == 0); + return; + } - varDsc->SetArgReg( - genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet(), info.compCallConv)); -#if FEATURE_MULTIREG_ARGS - varDsc->SetOtherArgReg(REG_NA); -#endif - varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame + LclVarDsc* varDsc = lvaGetDesc(*curVarNum); + varDsc->lvIsParam = 1; + varDsc->lvIsPtr = 1; -#ifdef DEBUG - if (verbose) - { - printf("'this' passed in register %s\n", getRegName(varDsc->GetArgReg())); - } -#endif - compArgSize += TARGET_POINTER_SIZE; + lvaArg0Var = info.compThisArg = *curVarNum; + noway_assert(info.compThisArg == 0); - varDscInfo->nextParam(); + if (eeIsValueClass(info.compClassHnd)) + { + varDsc->lvType = TYP_BYREF; } + else + { + varDsc->lvType = TYP_REF; + lvaSetClass(*curVarNum, info.compClassHnd); + } + + varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame + (*curVarNum)++; } /*****************************************************************************/ -void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBufReg) +void Compiler::lvaInitRetBuffArg(unsigned* curVarNum, bool useFixedRetBufReg) { - if (varDscInfo->hasRetBufArg) - { - info.compRetBuffArg = varDscInfo->varNum; - - LclVarDsc* varDsc = varDscInfo->varDsc; - varDsc->lvType = TYP_I_IMPL; - varDsc->lvIsParam = 1; - varDsc->lvIsRegArg = 0; - - if (useFixedRetBufReg && hasFixedRetBuffReg(info.compCallConv)) - { - varDsc->lvIsRegArg = 1; - varDsc->SetArgReg(theFixedRetBuffReg(info.compCallConv)); - } - else if (varDscInfo->canEnreg(TYP_INT)) - { - varDsc->lvIsRegArg = 1; - unsigned retBuffArgNum = varDscInfo->allocRegArg(TYP_INT); - varDsc->SetArgReg(genMapIntRegArgNumToRegNum(retBuffArgNum, info.compCallConv)); - } - else - { - varDscInfo->stackArgSize = roundUp(varDscInfo->stackArgSize, TARGET_POINTER_SIZE); - varDsc->SetStackOffset(varDscInfo->stackArgSize); - varDscInfo->stackArgSize += TARGET_POINTER_SIZE; - } - -#if FEATURE_MULTIREG_ARGS - varDsc->SetOtherArgReg(REG_NA); -#endif - varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame - - assert(!varDsc->lvIsRegArg || isValidIntArgReg(varDsc->GetArgReg(), info.compCallConv)); - -#ifdef DEBUG - if (varDsc->lvIsRegArg && verbose) - { - printf("'__retBuf' passed in register %s\n", getRegName(varDsc->GetArgReg())); - } -#endif + info.compRetBuffArg = *curVarNum; - compArgSize += TARGET_POINTER_SIZE; + LclVarDsc* varDsc = lvaGetDesc(*curVarNum); + varDsc->lvType = TYP_I_IMPL; + varDsc->lvIsParam = 1; + varDsc->lvIsRegArg = 0; + varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame - varDscInfo->nextParam(); - } + (*curVarNum)++; } //----------------------------------------------------------------------------- @@ -534,27 +484,16 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf // Initialize local var descriptions for incoming user arguments // // Arguments: -// varDscInfo - the local var descriptions +// curVarNum - the current local // skipArgs - the number of user args to skip processing. // takeArgs - the number of user args to process (after skipping skipArgs number of args) // -void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, unsigned takeArgs) +void Compiler::lvaInitUserArgs(unsigned* curVarNum, unsigned skipArgs, unsigned takeArgs) { //------------------------------------------------------------------------- // Walk the function signature for the explicit arguments //------------------------------------------------------------------------- -#if defined(TARGET_X86) - // Only (some of) the implicit args are enregistered for varargs - if (info.compIsVarArgs) - { - varDscInfo->maxIntRegArgNum = varDscInfo->intRegArgNum; - } -#elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - // On System V type environment the float registers are not indexed together with the int ones. - varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum; -#endif // TARGET* - CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args; const unsigned argSigLen = info.compMethodInfo->args.numArgs; @@ -568,10 +507,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un return; } -#ifdef TARGET_ARM - regMaskTP doubleAlignMask = RBM_NONE; -#endif // TARGET_ARM - // Skip skipArgs arguments from the signature. for (unsigned i = 0; i < skipArgs; i++, argLst = info.compCompHnd->getArgNext(argLst)) { @@ -579,9 +514,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } // Process each user arg. - for (unsigned i = 0; i < numUserArgs; i++, varDscInfo->nextParam(), argLst = info.compCompHnd->getArgNext(argLst)) + for (unsigned i = 0; i < numUserArgs; i++, (*curVarNum)++, argLst = info.compCompHnd->getArgNext(argLst)) { - LclVarDsc* varDsc = varDscInfo->varDsc; + LclVarDsc* varDsc = lvaGetDesc(*curVarNum); CORINFO_CLASS_HANDLE typeHnd = nullptr; CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd); @@ -600,12 +535,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } #endif // TARGET_X86 && FEATURE_IJW - lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args); + lvaInitVarDsc(varDsc, *curVarNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args); if (strip(corInfoType) == CORINFO_TYPE_CLASS) { CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->args, argLst); - lvaSetClass(varDscInfo->varNum, clsHnd); + lvaSetClass(*curVarNum, clsHnd); } // The final home for this incoming parameter might be our local stack frame. @@ -619,949 +554,211 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un IMPL_LIMITATION("SIMD types are currently unsupported in Swift reverse pinvokes"); } - if (lvaInitSpecialSwiftParam(argLst, varDscInfo, strip(corInfoType), typeHnd)) + if (lvaInitSpecialSwiftParam(argLst, *curVarNum, strip(corInfoType), typeHnd)) { continue; } - if (varDsc->TypeGet() == TYP_STRUCT) + if (varDsc->TypeIs(TYP_STRUCT)) { // Struct parameters are lowered to separate primitives in the // Swift calling convention. We cannot handle these patterns // efficiently, so we always DNER them and home them to stack // in the prolog. - lvaSetVarDoNotEnregister(varDscInfo->varNum DEBUGARG(DoNotEnregisterReason::IsStructArg)); + lvaSetVarDoNotEnregister(*curVarNum DEBUGARG(DoNotEnregisterReason::IsStructArg)); } } #endif - // For ARM, ARM64, LOONGARCH64, RISCV64 and AMD64 varargs, all arguments go in integer registers - var_types argType = mangleVarArgsType(varDsc->TypeGet()); - - var_types origArgType = argType; - - // ARM softfp calling convention should affect only the floating point arguments. - // Otherwise there appear too many surplus pre-spills and other memory operations - // with the associated locations . - bool isSoftFPPreSpill = opts.compUseSoftFP && varTypeIsFloating(varDsc->TypeGet()); - unsigned argSize = eeGetArgSize(strip(corInfoType), typeHnd); - unsigned cSlots = - (argSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; // the total number of slots of this argument - bool isHfaArg = false; - var_types hfaType = TYP_UNDEF; - - // Methods that use VarArg or SoftFP cannot have HFA arguments except - // Native varargs on arm64 unix use the regular calling convention. - if (((TargetOS::IsUnix && TargetArchitecture::IsArm64) || !info.compIsVarArgs) && !opts.compUseSoftFP) - { - // If the argType is a struct, then check if it is an HFA - if (varTypeIsStruct(argType)) - { - // hfaType is set to float, double, or SIMD type if it is an HFA, otherwise TYP_UNDEF - hfaType = GetHfaType(typeHnd); - isHfaArg = varTypeIsValidHfaType(hfaType); - } - } - else if (info.compIsVarArgs) - { - // Currently native varargs is not implemented on non windows targets. - // - // Note that some targets like Arm64 Unix should not need much work as - // the ABI is the same. While other targets may only need small changes - // such as amd64 Unix, which just expects RAX to pass numFPArguments. - if (TargetOS::IsUnix) - { - NYI("InitUserArgs for Vararg callee is not yet implemented on non Windows targets."); - } - } - - if (isHfaArg) + if (info.compIsVarArgs || (opts.compUseSoftFP && varTypeIsFloating(varDsc))) { - // We have an HFA argument, so from here on out treat the type as a float, double, or vector. - // The original struct type is available by using origArgType. - // We also update the cSlots to be the number of float/double/vector fields in the HFA. - argType = hfaType; // TODO-Cleanup: remove this assignment and mark `argType` as const. - varDsc->SetHfaType(hfaType); - cSlots = varDsc->lvHfaSlots(); +#ifndef TARGET_X86 + // TODO-CQ: We shouldn't have to go as far as to declare these + // address-exposed -- DoNotEnregister should suffice. + lvaSetVarAddrExposed(*curVarNum DEBUGARG(AddressExposedReason::TOO_CONSERVATIVE)); +#endif // !TARGET_X86 } + } +} - // The number of slots that must be enregistered if we are to consider this argument enregistered. - // This is normally the same as cSlots, since we normally either enregister the entire object, - // or none of it. For structs on ARM, however, we only need to enregister a single slot to consider - // it enregistered, as long as we can split the rest onto the stack. - unsigned cSlotsToEnregister = cSlots; - -#if defined(TARGET_ARM64) +#ifdef SWIFT_SUPPORT +//----------------------------------------------------------------------------- +// lvaInitSpecialSwiftParam: Initialize SwiftSelf/SwiftError* parameters. +// +// Parameters: +// argHnd - Handle for this parameter in the method's signature +// lclNum - The parameter local +// type - Type of the parameter +// typeHnd - Class handle for the type of the parameter +// +// Returns: +// true if parameter was initialized +// +bool Compiler::lvaInitSpecialSwiftParam(CORINFO_ARG_LIST_HANDLE argHnd, + unsigned lclNum, + CorInfoType type, + CORINFO_CLASS_HANDLE typeHnd) +{ + const bool argIsByrefOrPtr = (type == CORINFO_TYPE_BYREF) || (type == CORINFO_TYPE_PTR); - if (compFeatureArgSplit()) - { - // On arm64 Windows we will need to properly handle the case where a >8byte <=16byte - // struct (or vector) is split between register r7 and virtual stack slot s[0]. - // We will only do this for calls to vararg methods on Windows Arm64. - // SIMD types (for which `varTypeIsStruct()` returns `true`) are also passed in general-purpose - // registers and can be split between registers and stack with Windows arm64 native varargs. - // - // !!This does not affect the normal arm64 calling convention or Unix Arm64!! - if (info.compIsVarArgs && (cSlots > 1)) - { - if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register - !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register - { - cSlotsToEnregister = 1; // Force the split - varDscInfo->stackArgSize += TARGET_POINTER_SIZE; - } - } - } + if (argIsByrefOrPtr) + { + // For primitive types, we don't expect to be passed a CORINFO_CLASS_HANDLE; look up the actual handle + assert(typeHnd == nullptr); + CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->args, argHnd); + type = info.compCompHnd->getChildType(clsHnd, &typeHnd); + } -#endif // defined(TARGET_ARM64) + if (type != CORINFO_TYPE_VALUECLASS) + { + return false; + } -#ifdef TARGET_ARM - // On ARM we pass the first 4 words of integer arguments and non-HFA structs in registers. - // But we pre-spill user arguments in varargs methods and structs. - // - unsigned cAlign; - bool preSpill = info.compIsVarArgs || isSoftFPPreSpill; + if (!info.compCompHnd->isIntrinsicType(typeHnd)) + { + return false; + } - switch (origArgType) + const char* namespaceName; + const char* className = info.compCompHnd->getClassNameFromMetadata(typeHnd, &namespaceName); + if ((strcmp(className, "SwiftSelf") == 0) && (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0)) + { + if (argIsByrefOrPtr) { - case TYP_STRUCT: - assert(varDsc->lvSize() == argSize); - cAlign = varDsc->lvStructDoubleAlign ? 2 : 1; - - // HFA arguments go on the stack frame. They don't get spilled in the prolog like struct - // arguments passed in the integer registers but get homed immediately after the prolog. - if (!isHfaArg) - { - cSlotsToEnregister = 1; // HFAs must be totally enregistered or not, but other structs can be split. - preSpill = true; - } - break; - - case TYP_DOUBLE: - case TYP_LONG: - cAlign = 2; - break; - - default: - cAlign = 1; - break; + BADCODE("Expected SwiftSelf struct, got pointer/reference"); } - if (isRegParamType(argType)) + if (lvaSwiftSelfArg != BAD_VAR_NUM) { - compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES; + BADCODE("Duplicate SwiftSelf parameter"); } - if (argType == TYP_STRUCT) - { - // Are we going to split the struct between registers and stack? We can do that as long as - // no floating-point arguments have been put on the stack. - // - // From the ARM Procedure Call Standard: - // Rule C.5: "If the NCRN is less than r4 **and** the NSAA is equal to the SP," - // then split the argument between registers and stack. Implication: if something - // has already been spilled to the stack, then anything that would normally be - // split between the core registers and the stack will be put on the stack. - // Anything that follows will also be on the stack. However, if something from - // floating point regs has been spilled to the stack, we can still use r0-r3 until they are full. - - if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register - !varDscInfo->canEnreg(TYP_INT, cSlots) && // The end of the struct can't fit in a register - varDscInfo->existAnyFloatStackArgs()) // There's at least one stack-based FP arg already - { - varDscInfo->setAllRegArgUsed(TYP_INT); // Prevent all future use of integer registers - preSpill = false; // This struct won't be prespilled, since it will go on the stack - } - } + lvaSwiftSelfArg = lclNum; + return true; + } - if (preSpill) + if ((strcmp(className, "SwiftIndirectResult") == 0) && + (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0)) + { + if (argIsByrefOrPtr) { - for (unsigned ix = 0; ix < cSlots; ix++) - { - if (!varDscInfo->canEnreg(TYP_INT, ix + 1)) - { - break; - } - regMaskTP regMask = genMapArgNumToRegMask(varDscInfo->regArgNum(TYP_INT) + ix, TYP_INT); - if (cAlign == 2) - { - doubleAlignMask |= regMask; - } - codeGen->regSet.rsMaskPreSpillRegArg |= regMask; - } + BADCODE("Expected SwiftIndirectResult struct, got pointer/reference"); } -#endif // TARGET_ARM -#if defined(UNIX_AMD64_ABI) - SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; - if (varTypeIsStruct(argType)) + if (info.compRetType != TYP_VOID) { - assert(typeHnd != nullptr); - eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); - if (structDesc.passedInRegisters) - { - unsigned intRegCount = 0; - unsigned floatRegCount = 0; - - for (unsigned int i = 0; i < structDesc.eightByteCount; i++) - { - if (structDesc.IsIntegralSlot(i)) - { - intRegCount++; - } - else if (structDesc.IsSseSlot(i)) - { - floatRegCount++; - } - else - { - assert(false && "Invalid eightbyte classification type."); - break; - } - } - - if (intRegCount != 0 && !varDscInfo->canEnreg(TYP_INT, intRegCount)) - { - structDesc.passedInRegisters = false; // No register to enregister the eightbytes. - } - - if (floatRegCount != 0 && !varDscInfo->canEnreg(TYP_FLOAT, floatRegCount)) - { - structDesc.passedInRegisters = false; // No register to enregister the eightbytes. - } - } + BADCODE("Functions with SwiftIndirectResult parameters must return void"); } -#endif // UNIX_AMD64_ABI - - bool canPassArgInRegisters = false; -#if defined(UNIX_AMD64_ABI) - if (varTypeIsStruct(argType)) - { - canPassArgInRegisters = structDesc.passedInRegisters; - } - else -#elif defined(TARGET_X86) - if (varTypeIsStruct(argType) && isTrivialPointerSizedStruct(typeHnd)) + if (lvaSwiftIndirectResultArg != BAD_VAR_NUM) { - canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); + BADCODE("Duplicate SwiftIndirectResult parameter"); } - else -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - const CORINFO_FPSTRUCT_LOWERING* lowering = nullptr; - var_types argRegTypeInStruct1 = TYP_UNKNOWN; - var_types argRegTypeInStruct2 = TYP_UNKNOWN; + lvaSwiftIndirectResultArg = lclNum; + return true; + } - if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES)) + if ((strcmp(className, "SwiftError") == 0) && (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0)) + { + if (!argIsByrefOrPtr) { - lowering = GetFpStructLowering(typeHnd); + BADCODE("Expected SwiftError pointer/reference, got struct"); } - if ((lowering != nullptr) && !lowering->byIntegerCallConv) + if (lvaSwiftErrorArg != BAD_VAR_NUM) { - assert(varTypeIsStruct(argType)); - assert((lowering->numLoweredElements == 1) || (lowering->numLoweredElements == 2)); - if (lowering->numLoweredElements == 1) - assert(varDsc->lvExactSize() <= argSize); - - cSlotsToEnregister = static_cast(lowering->numLoweredElements); - argRegTypeInStruct1 = JITtype2varType(lowering->loweredElements[0]); - if (lowering->numLoweredElements == 2) - argRegTypeInStruct2 = JITtype2varType(lowering->loweredElements[1]); + BADCODE("Duplicate SwiftError* parameter"); + } - int floatNum = (int)varTypeIsFloating(argRegTypeInStruct1) + (int)varTypeIsFloating(argRegTypeInStruct2); - assert(floatNum > 0); + lvaSwiftErrorArg = lclNum; - canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, floatNum); - if (canPassArgInRegisters && ((unsigned)floatNum < lowering->numLoweredElements)) - { - assert(floatNum == 1); - assert(lowering->numLoweredElements == 2); - assert(varTypeIsIntegralOrI(argRegTypeInStruct1) || varTypeIsIntegralOrI(argRegTypeInStruct2)); - canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1); - } + // Instead, all usages of the SwiftError* parameter will be redirected to this pseudolocal. + lvaSwiftErrorLocal = lvaGrabTempWithImplicitUse(false DEBUGARG("SwiftError pseudolocal")); + lvaSetStruct(lvaSwiftErrorLocal, typeHnd, false); + return true; + } - if (!canPassArgInRegisters) - { - // If a struct eligible for passing according to floating-point calling convention cannot be fully - // enregistered, it is passed according to integer calling convention -- in up to two integer registers - // and/or stack slots, as a lump of bits laid out like in memory. - cSlotsToEnregister = cSlots; - argRegTypeInStruct1 = TYP_UNKNOWN; - argRegTypeInStruct2 = TYP_UNKNOWN; - - canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); - if (cSlotsToEnregister == 2) - { - if (!canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1)) - { - // Here a struct-arg which needs two registers but only one integer register available, - // it has to be split. - argRegTypeInStruct1 = TYP_I_IMPL; - canPassArgInRegisters = true; - } - } - } - } - else -#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - { - canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // On LoongArch64 and RISCV64, if there aren't any remaining floating-point registers to pass the - // argument, integer registers (if any) are used instead. - if (!canPassArgInRegisters && varTypeIsFloating(argType)) - { - canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); - argType = canPassArgInRegisters ? TYP_I_IMPL : argType; - } - if (!canPassArgInRegisters && (cSlots > 1)) - { - // If a struct-arg which needs two registers but only one integer register available, - // it has to be split. - canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1); - argRegTypeInStruct1 = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; - } + return false; +} #endif - } - if (canPassArgInRegisters) - { - /* Another register argument */ +/*****************************************************************************/ +void Compiler::lvaInitGenericsCtxt(unsigned* curVarNum) +{ + //@GENERICS: final instantiation-info argument for shared generic methods + // and shared generic struct instance methods + if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) == 0) + { + return; + } - // Allocate the registers we need. allocRegArg() returns the first argument register number of the set. - // For non-HFA structs, we still "try" to enregister the whole thing; it will just max out if splitting - // to the stack happens. - unsigned firstAllocatedRegArgNum = 0; + info.compTypeCtxtArg = *curVarNum; -#if FEATURE_MULTIREG_ARGS - varDsc->SetOtherArgReg(REG_NA); -#endif // FEATURE_MULTIREG_ARGS + LclVarDsc* varDsc = lvaGetDesc(*curVarNum); + varDsc->lvIsParam = 1; + varDsc->lvType = TYP_I_IMPL; + varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame -#if defined(UNIX_AMD64_ABI) - unsigned secondAllocatedRegArgNum = 0; - var_types firstEightByteType = TYP_UNDEF; - var_types secondEightByteType = TYP_UNDEF; + (*curVarNum)++; +} - if (varTypeIsStruct(argType)) - { - if (structDesc.eightByteCount >= 1) - { - firstEightByteType = GetEightByteType(structDesc, 0); - firstAllocatedRegArgNum = varDscInfo->allocRegArg(firstEightByteType, 1); - } - } - else -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - unsigned secondAllocatedRegArgNum = 0; - if (argRegTypeInStruct1 != TYP_UNKNOWN) - { - firstAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct1, 1); - } - else -#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - { - firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); - } - - if (isHfaArg) - { - // We need to save the fact that this HFA is enregistered. - // Note that we can have HVAs of SIMD types even if we are not recognizing intrinsics. - // In that case, we won't have normalized the vector types on the varDsc, so if we have a single vector - // register, we need to set the type now. Otherwise, later we'll assume this is passed by reference. - if (varDsc->lvHfaSlots() != 1) - { - varDsc->lvIsMultiRegArg = true; - } - } - - varDsc->lvIsRegArg = 1; - -#if FEATURE_MULTIREG_ARGS -#ifdef TARGET_ARM64 - if (argType == TYP_STRUCT) - { - varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL, info.compCallConv)); - if (cSlots == 2) - { - varDsc->SetOtherArgReg( - genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL, info.compCallConv)); - varDsc->lvIsMultiRegArg = true; - } - } -#elif defined(UNIX_AMD64_ABI) - if (varTypeIsStruct(argType)) - { - varDsc->SetArgReg( - genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType, info.compCallConv)); - - // If there is a second eightbyte, get a register for it too and map the arg to the reg number. - if (structDesc.eightByteCount >= 2) - { - secondEightByteType = GetEightByteType(structDesc, 1); - secondAllocatedRegArgNum = varDscInfo->allocRegArg(secondEightByteType, 1); - varDsc->lvIsMultiRegArg = true; - } - - if (secondEightByteType != TYP_UNDEF) - { - varDsc->SetOtherArgReg( - genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType, info.compCallConv)); - } - - assert(structDesc.eightByteCount <= 2); - } -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (argType == TYP_STRUCT) - { - if (argRegTypeInStruct1 != TYP_UNKNOWN) - { - varDsc->SetArgReg( - genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1, info.compCallConv)); - if (argRegTypeInStruct2 != TYP_UNKNOWN) - { - secondAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct2, 1); - varDsc->SetOtherArgReg( - genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2, info.compCallConv)); - - varDsc->lvIsMultiRegArg = true; - } - else if (cSlotsToEnregister > 1) - { - // Here a struct-arg which needs two registers but only one integer register available, - // it has to be split. But we reserved extra 8-bytes for the whole struct. - varDsc->lvIsSplit = 1; - varDsc->SetOtherArgReg(REG_STK); - varDscInfo->setAllRegArgUsed(argRegTypeInStruct1); - varDscInfo->stackArgSize += TARGET_POINTER_SIZE; -#ifdef TARGET_RISCV64 - varDscInfo->hasSplitParam = true; -#endif - } - } - else - { - varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL, info.compCallConv)); - if (cSlots == 2) - { - varDsc->SetOtherArgReg( - genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL, info.compCallConv)); - - varDsc->lvIsMultiRegArg = true; - } - - assert(cSlots <= 2); - } - } -#else // ARM32 - if (varTypeIsStruct(argType)) - { - varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL, info.compCallConv)); - } -#endif // ARM32 - else -#endif // FEATURE_MULTIREG_ARGS - { - varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType, info.compCallConv)); - } - -#ifdef TARGET_ARM - if (varDsc->TypeGet() == TYP_LONG) - { - varDsc->SetOtherArgReg( - genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT, info.compCallConv)); - } - - unsigned numEnregistered = 0; - unsigned stackSize = 0; - // Check if arg was split between registers and stack. - if (varTypeUsesIntReg(argType)) - { - unsigned firstRegArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg(), info.compCallConv); - unsigned lastRegArgNum = firstRegArgNum + cSlots - 1; - if (lastRegArgNum >= varDscInfo->maxIntRegArgNum) - { - assert(varDscInfo->stackArgSize == 0); - numEnregistered = varDscInfo->maxIntRegArgNum - firstRegArgNum; - varDsc->SetStackOffset(-(int)numEnregistered * REGSIZE_BYTES); - stackSize = (cSlots - numEnregistered) * REGSIZE_BYTES; - varDscInfo->stackArgSize += stackSize; - varDscInfo->hasSplitParam = true; - JITDUMP("set user arg V%02u offset to %d\n", varDscInfo->varNum, varDsc->GetStackOffset()); - } - else - { - numEnregistered = cSlots; - } - } - else - { - numEnregistered = cSlots; - } -#endif // TARGET_ARM - -#ifdef DEBUG - if (verbose) - { - printf("Arg #%u passed in register(s) ", varDscInfo->varNum); - -#if defined(UNIX_AMD64_ABI) - if (varTypeIsStruct(argType)) - { - // Print both registers, just to be clear - if (firstEightByteType == TYP_UNDEF) - { - printf("firstEightByte: "); - } - else - { - printf("firstEightByte: %s", - getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType, - info.compCallConv))); - } - - if (secondEightByteType == TYP_UNDEF) - { - printf(", secondEightByte: "); - } - else - { - printf(", secondEightByte: %s", - getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType, - info.compCallConv))); - } - } - else -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (varTypeIsStruct(argType)) - { - if (argRegTypeInStruct1 == TYP_UNKNOWN) - { - printf("first: "); - } - else - { - printf("first: %s", - getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1, - info.compCallConv))); - } - if (argRegTypeInStruct2 == TYP_UNKNOWN) - { - printf(", second: "); - } - else - { - printf(", second: %s", - getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2, - info.compCallConv))); - } - } - else -#endif // UNIX_AMD64_ABI, TARGET_LOONGARCH64, TARGET_RISCV64 - { - assert(varTypeUsesFloatReg(argType) || varTypeUsesIntReg(argType)); - - bool isFloat = varTypeUsesFloatReg(argType); - unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), argType, info.compCallConv); - - for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++) - { - if (ix > 0) - { - printf(","); - } - - if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) - { - // a struct has been split between registers and stack - printf(" stack slots:%d", cSlots - ix); - break; - } - -#ifdef TARGET_ARM - if (isFloat) - { - // Print register size prefix - if (argType == TYP_DOUBLE) - { - // Print both registers, just to be clear - printf("%s/%s", - getRegName(genMapRegArgNumToRegNum(regArgNum, argType, info.compCallConv)), - getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType, info.compCallConv))); - - // doubles take 2 slots - assert(ix + 1 < cSlots); - ++ix; - ++regArgNum; - } - else - { - printf("%s", - getRegName(genMapRegArgNumToRegNum(regArgNum, argType, info.compCallConv))); - } - } - else -#endif // TARGET_ARM - { - printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType, info.compCallConv))); - } - } - } - printf("\n"); - } -#endif // DEBUG - } // end if (canPassArgInRegisters) - else - { -#if defined(TARGET_ARM) - varDscInfo->setAllRegArgUsed(argType); - - if (varTypeUsesFloatReg(argType)) - { - varDscInfo->setAnyFloatStackArgs(); - } - else - { - assert(varTypeUsesIntReg(argType)); - } - -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - - // If we needed to use the stack in order to pass this argument then - // record the fact that we have used up any remaining registers of this 'type' - // This prevents any 'backfilling' from occurring on ARM64/LoongArch64. - // - varDscInfo->setAllRegArgUsed(argType); - -#endif // TARGET_XXX - -#ifdef TARGET_ARM - unsigned argAlignment = cAlign * TARGET_POINTER_SIZE; -#else - unsigned argAlignment = eeGetArgSizeAlignment(origArgType, (hfaType == TYP_FLOAT)); - // We expect the following rounding operation to be a noop on all - // ABIs except ARM (where we have 8-byte aligned args) and Apple - // ARM64 (that allows to pack multiple smaller parameters in a - // single stack slot). - assert(compAppleArm64Abi() || ((varDscInfo->stackArgSize % argAlignment) == 0)); -#endif - varDscInfo->stackArgSize = roundUp(varDscInfo->stackArgSize, argAlignment); - - JITDUMP("set user arg V%02u offset to %u\n", varDscInfo->varNum, varDscInfo->stackArgSize); - varDsc->SetStackOffset(varDscInfo->stackArgSize); - varDscInfo->stackArgSize += argSize; - } - -#ifdef UNIX_AMD64_ABI - // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a - // multiple of TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE, - // so round it up. - compArgSize += roundUp(argSize, TARGET_POINTER_SIZE); -#else // !UNIX_AMD64_ABI - compArgSize += argSize; -#endif // !UNIX_AMD64_ABI - if (info.compIsVarArgs || isSoftFPPreSpill) - { -#if defined(TARGET_X86) - varDsc->SetStackOffset(compArgSize); -#else // !TARGET_X86 - // TODO-CQ: We shouldn't have to go as far as to declare these - // address-exposed -- DoNotEnregister should suffice. - - lvaSetVarAddrExposed(varDscInfo->varNum DEBUGARG(AddressExposedReason::TOO_CONSERVATIVE)); -#endif // !TARGET_X86 - } - } - - compArgSize = GetOutgoingArgByteSize(compArgSize); - -#ifdef TARGET_ARM - if (doubleAlignMask != RBM_NONE) - { - assert(RBM_ARG_REGS == 0xF); - assert((doubleAlignMask & RBM_ARG_REGS) == doubleAlignMask); - if (doubleAlignMask != RBM_NONE && doubleAlignMask != RBM_ARG_REGS) - { - // 'double aligned types' can begin only at r0 or r2 and we always expect at least two registers to be used - // Note that in rare cases, we can have double-aligned structs of 12 bytes (if specified explicitly with - // attributes) - assert((doubleAlignMask == 0b0011) || (doubleAlignMask == 0b1100) || - (doubleAlignMask == 0b0111) /* || 0b1111 is if'ed out */); - - // Now if doubleAlignMask is xyz1 i.e., the struct starts in r0, and we prespill r2 or r3 - // but not both, then the stack would be misaligned for r0. So spill both - // r2 and r3. - // - // ; +0 --- caller SP double aligned ---- - // ; -4 r2 r3 - // ; -8 r1 r1 - // ; -c r0 r0 <-- misaligned. - // ; callee saved regs - bool startsAtR0 = (doubleAlignMask & 1) == 1; - bool r2XorR3 = ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R2) == 0) != - ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R3) == 0); - if (startsAtR0 && r2XorR3) - { - codeGen->regSet.rsMaskPreSpillAlign = - (~codeGen->regSet.rsMaskPreSpillRegArg & ~doubleAlignMask) & RBM_ARG_REGS; - } - } - } -#endif // TARGET_ARM -} - -#ifdef SWIFT_SUPPORT //----------------------------------------------------------------------------- -// lvaInitSpecialSwiftParam: Initialize SwiftSelf/SwiftError* parameters. -// -// Parameters: -// argHnd - Handle for this parameter in the method's signature -// varDsc - LclVarDsc* for the parameter -// type - Type of the parameter -// typeHnd - Class handle for the type of the parameter +// lvaInitAsyncContinuation: +// Initialize the async continuation parameter. // -// Returns: -// true if parameter was initialized +// Type parameters: +// curVarNum - [in, out] The current local variable number for parameters // -bool Compiler::lvaInitSpecialSwiftParam(CORINFO_ARG_LIST_HANDLE argHnd, - InitVarDscInfo* varDscInfo, - CorInfoType type, - CORINFO_CLASS_HANDLE typeHnd) +void Compiler::lvaInitAsyncContinuation(unsigned* curVarNum) { - const bool argIsByrefOrPtr = (type == CORINFO_TYPE_BYREF) || (type == CORINFO_TYPE_PTR); - - if (argIsByrefOrPtr) - { - // For primitive types, we don't expect to be passed a CORINFO_CLASS_HANDLE; look up the actual handle - assert(typeHnd == nullptr); - CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->args, argHnd); - type = info.compCompHnd->getChildType(clsHnd, &typeHnd); - } - - if (type != CORINFO_TYPE_VALUECLASS) - { - return false; - } - - if (!info.compCompHnd->isIntrinsicType(typeHnd)) - { - return false; - } - - const char* namespaceName; - const char* className = info.compCompHnd->getClassNameFromMetadata(typeHnd, &namespaceName); - if ((strcmp(className, "SwiftSelf") == 0) && (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0)) + if (!compIsAsync()) { - if (argIsByrefOrPtr) - { - BADCODE("Expected SwiftSelf struct, got pointer/reference"); - } - - if (lvaSwiftSelfArg != BAD_VAR_NUM) - { - BADCODE("Duplicate SwiftSelf parameter"); - } - - LclVarDsc* const varDsc = varDscInfo->varDsc; - varDsc->SetArgReg(REG_SWIFT_SELF); - varDsc->SetOtherArgReg(REG_NA); - varDsc->lvIsRegArg = true; - - compArgSize += TARGET_POINTER_SIZE; - - lvaSwiftSelfArg = varDscInfo->varNum; - return true; - } - - if ((strcmp(className, "SwiftIndirectResult") == 0) && - (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0)) - { - if (argIsByrefOrPtr) - { - BADCODE("Expected SwiftIndirectResult struct, got pointer/reference"); - } - - if (info.compRetType != TYP_VOID) - { - BADCODE("Functions with SwiftIndirectResult parameters must return void"); - } - - if (lvaSwiftIndirectResultArg != BAD_VAR_NUM) - { - BADCODE("Duplicate SwiftIndirectResult parameter"); - } - - LclVarDsc* const varDsc = varDscInfo->varDsc; - varDsc->SetArgReg(theFixedRetBuffReg(CorInfoCallConvExtension::Swift)); - varDsc->lvIsRegArg = true; - - compArgSize += TARGET_POINTER_SIZE; - - lvaSwiftIndirectResultArg = varDscInfo->varNum; - return true; + return; } - if ((strcmp(className, "SwiftError") == 0) && (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0)) - { - if (!argIsByrefOrPtr) - { - BADCODE("Expected SwiftError pointer/reference, got struct"); - } - - if (lvaSwiftErrorArg != BAD_VAR_NUM) - { - BADCODE("Duplicate SwiftError* parameter"); - } + lvaAsyncContinuationArg = *curVarNum; + LclVarDsc* varDsc = lvaGetDesc(*curVarNum); + varDsc->lvType = TYP_REF; + varDsc->lvIsParam = true; - // We won't actually be passing this SwiftError* in REG_SWIFT_ERROR (or any register, for that matter). - // We will check for this quirk when generating the prolog, - // and ensure this fake parameter doesn't take any registers/stack space - LclVarDsc* const varDsc = varDscInfo->varDsc; - varDsc->SetArgReg(REG_SWIFT_ERROR); - varDsc->SetOtherArgReg(REG_NA); - varDsc->lvIsRegArg = true; - lvaSwiftErrorArg = varDscInfo->varNum; + // The final home for this incoming register might be our local stack frame + varDsc->lvOnFrame = true; - // Instead, all usages of the SwiftError* parameter will be redirected to this pseudolocal. - lvaSwiftErrorLocal = lvaGrabTempWithImplicitUse(false DEBUGARG("SwiftError pseudolocal")); - lvaSetStruct(lvaSwiftErrorLocal, typeHnd, false); - return true; - } + INDEBUG(varDsc->lvReason = "Async continuation arg"); - return false; + (*curVarNum)++; } -#endif /*****************************************************************************/ -void Compiler::lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo) +void Compiler::lvaInitVarArgsHandle(unsigned* curVarNum) { - //@GENERICS: final instantiation-info argument for shared generic methods - // and shared generic struct instance methods - if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) + if (!info.compIsVarArgs) { - info.compTypeCtxtArg = varDscInfo->varNum; - - LclVarDsc* varDsc = varDscInfo->varDsc; - varDsc->lvIsParam = 1; - varDsc->lvType = TYP_I_IMPL; - - if (varDscInfo->canEnreg(TYP_I_IMPL)) - { - /* Another register argument */ - - varDsc->lvIsRegArg = 1; - varDsc->SetArgReg( - genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet(), info.compCallConv)); -#if FEATURE_MULTIREG_ARGS - varDsc->SetOtherArgReg(REG_NA); -#endif - varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame - - varDscInfo->intRegArgNum++; - -#ifdef DEBUG - if (verbose) - { - printf("'GenCtxt' passed in register %s\n", getRegName(varDsc->GetArgReg())); - } -#endif - } - else - { - // We need to mark these as being on the stack, as this is not done elsewhere in the case that canEnreg - // returns false. - varDsc->lvOnFrame = true; - varDsc->SetStackOffset(varDscInfo->stackArgSize); - varDscInfo->stackArgSize += TARGET_POINTER_SIZE; - } - - compArgSize += TARGET_POINTER_SIZE; - -#if defined(TARGET_X86) - if (info.compIsVarArgs) - varDsc->SetStackOffset(compArgSize); -#endif // TARGET_X86 - - varDscInfo->nextParam(); + return; } -} -/*****************************************************************************/ -void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo) -{ - if (info.compIsVarArgs) - { - lvaVarargsHandleArg = varDscInfo->varNum; + lvaVarargsHandleArg = *curVarNum; - LclVarDsc* varDsc = varDscInfo->varDsc; - varDsc->lvType = TYP_I_IMPL; - varDsc->lvIsParam = 1; + LclVarDsc* varDsc = lvaGetDesc(*curVarNum); + varDsc->lvType = TYP_I_IMPL; + varDsc->lvIsParam = 1; + varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame #if defined(TARGET_X86) - // Codegen will need it for x86 scope info. - varDsc->lvImplicitlyReferenced = 1; + // Codegen will need it for x86 scope info. + varDsc->lvImplicitlyReferenced = 1; #endif // TARGET_X86 - varDsc->lvHasLdAddrOp = 1; - - lvaSetVarDoNotEnregister(lvaVarargsHandleArg DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr)); - - assert(mostRecentlyActivePhase == PHASE_PRE_IMPORT); + varDsc->lvHasLdAddrOp = 1; - if (varDscInfo->canEnreg(TYP_I_IMPL)) - { - /* Another register argument */ - - unsigned varArgHndArgNum = varDscInfo->allocRegArg(TYP_I_IMPL); - - varDsc->lvIsRegArg = 1; - varDsc->SetArgReg(genMapRegArgNumToRegNum(varArgHndArgNum, TYP_I_IMPL, info.compCallConv)); -#if FEATURE_MULTIREG_ARGS - varDsc->SetOtherArgReg(REG_NA); -#endif - varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame -#ifdef TARGET_ARM - // This has to be spilled right in front of the real arguments and we have - // to pre-spill all the argument registers explicitly because we only have - // have symbols for the declared ones, not any potential variadic ones. - for (unsigned ix = varArgHndArgNum; ix < ArrLen(intArgMasks); ix++) - { - codeGen->regSet.rsMaskPreSpillRegArg |= intArgMasks[ix]; - } -#endif // TARGET_ARM - -#ifdef DEBUG - if (verbose) - { - printf("'VarArgHnd' passed in register %s\n", getRegName(varDsc->GetArgReg())); - } -#endif // DEBUG - } - else - { - // We need to mark these as being on the stack, as this is not done elsewhere in the case that canEnreg - // returns false. - varDsc->lvOnFrame = true; - varDsc->SetStackOffset(varDscInfo->stackArgSize); - varDscInfo->stackArgSize += TARGET_POINTER_SIZE; - } - - /* Update the total argument size, count and varDsc */ - - compArgSize += TARGET_POINTER_SIZE; - - varDscInfo->nextParam(); + lvaSetVarDoNotEnregister(lvaVarargsHandleArg DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr)); #if defined(TARGET_X86) - varDsc->SetStackOffset(compArgSize); - - // Allocate a temp to point at the beginning of the args - - lvaVarargsBaseOfStkArgs = lvaGrabTemp(false DEBUGARG("Varargs BaseOfStkArgs")); - lvaTable[lvaVarargsBaseOfStkArgs].lvType = TYP_I_IMPL; - + // Allocate a temp to point at the beginning of the args + lvaVarargsBaseOfStkArgs = lvaGrabTemp(false DEBUGARG("Varargs BaseOfStkArgs")); + lvaTable[lvaVarargsBaseOfStkArgs].lvType = TYP_I_IMPL; #endif // TARGET_X86 - } + + (*curVarNum)++; } /*****************************************************************************/ @@ -1602,6 +799,7 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, compFloatingPointUsed = true; } +<<<<<<< HEAD #if FEATURE_IMPLICIT_BYREFS varDsc->lvIsImplicitByRef = 0; #endif // FEATURE_IMPLICIT_BYREFS @@ -1613,19 +811,12 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, varDsc->lvCorInfoType = corInfoType; #endif // TARGET_WASM +======= +>>>>>>> upstream-jun // Set the lvType (before this point it is TYP_UNDEF). - - if (GlobalJitOptions::compFeatureHfa) - { - varDsc->SetHfaType(TYP_UNDEF); - } if ((varTypeIsStruct(type))) { lvaSetStruct(varNum, typeHnd, typeHnd != NO_CLASS_HANDLE); - if (info.compIsVarArgs) - { - lvaSetStructUsedAsVarArg(varNum); - } } else { @@ -1635,10 +826,6 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, #ifdef DEBUG varDsc->SetStackOffset(BAD_STK_OFFS); #endif - -#if FEATURE_MULTIREG_ARGS - varDsc->SetOtherArgReg(REG_NA); -#endif // FEATURE_MULTIREG_ARGS } //----------------------------------------------------------------------------- @@ -1657,6 +844,7 @@ void Compiler::lvaClassifyParameterABI(Classifier& classifier) lvaParameterPassingInfo = info.compArgsCount == 0 ? nullptr : new (this, CMK_LvaTable) ABIPassingInformation[info.compArgsCount]; + regMaskTP argRegs = RBM_NONE; for (unsigned i = 0; i < info.compArgsCount; i++) { LclVarDsc* dsc = lvaGetDesc(i); @@ -1682,18 +870,109 @@ void Compiler::lvaClassifyParameterABI(Classifier& classifier) } #endif - lvaParameterPassingInfo[i] = classifier.Classify(this, dsc->TypeGet(), structLayout, wellKnownArg); + ABIPassingInformation abiInfo = classifier.Classify(this, dsc->TypeGet(), structLayout, wellKnownArg); + lvaParameterPassingInfo[i] = abiInfo; -#ifdef DEBUG - if (verbose) + JITDUMP("Parameter V%02u ABI info: ", i); + DBEXEC(verbose, abiInfo.Dump()); + +#if FEATURE_IMPLICIT_BYREFS + dsc->lvIsImplicitByRef = abiInfo.IsPassedByReference(); +#endif // FEATURE_IMPLICIT_BYREFS + + unsigned numRegisters = 0; + for (const ABIPassingSegment& segment : abiInfo.Segments()) { - printf("Parameter V%02u ABI info: ", i); - lvaParameterPassingInfo[i].Dump(); + if (segment.IsPassedInRegister()) + { + argRegs |= segment.GetRegisterMask(); + numRegisters++; + } } -#endif + + dsc->lvIsRegArg = numRegisters > 0; + dsc->lvIsMultiRegArg = numRegisters > 1; } lvaParameterStackSize = classifier.StackSize(); + + // genFnPrologCalleeRegArgs expect these to be the counts of registers it knows how to handle. + // TODO-Cleanup: Recompute these values in the backend instead, where they are used. + codeGen->intRegState.rsCalleeRegArgCount = genCountBits(argRegs & RBM_ARG_REGS); + codeGen->floatRegState.rsCalleeRegArgCount = genCountBits(argRegs & RBM_FLTARG_REGS); + +#ifdef TARGET_ARM + // Prespill all argument regs on to stack in case of Arm when under profiler. + // We do this as the arm32 CORINFO_HELP_FCN_ENTER helper does not preserve + // these registers, and is called very early. + if (compIsProfilerHookNeeded()) + { + codeGen->regSet.rsMaskPreSpillRegArg |= RBM_ARG_REGS; + } + + regMaskTP doubleAlignMask = RBM_NONE; + + // Also prespill struct parameters. + for (unsigned i = 0; i < info.compArgsCount; i++) + { + const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(i); + LclVarDsc* varDsc = lvaGetDesc(i); + bool preSpill = opts.compUseSoftFP && varTypeIsFloating(varDsc); + preSpill |= varDsc->TypeIs(TYP_STRUCT); + + if (!preSpill) + { + continue; + } + + regMaskTP regs = RBM_NONE; + for (const ABIPassingSegment& segment : abiInfo.Segments()) + { + if (segment.IsPassedInRegister() && genIsValidIntReg(segment.GetRegister())) + { + regs |= segment.GetRegisterMask(); + } + } + + codeGen->regSet.rsMaskPreSpillRegArg |= regs; + if (varDsc->lvStructDoubleAlign || varDsc->TypeIs(TYP_DOUBLE)) + { + doubleAlignMask |= regs; + } + } + + if (doubleAlignMask != RBM_NONE) + { + assert(RBM_ARG_REGS == 0xF); + assert((doubleAlignMask & RBM_ARG_REGS) == doubleAlignMask); + if (doubleAlignMask != RBM_NONE && doubleAlignMask != RBM_ARG_REGS) + { + // 'double aligned types' can begin only at r0 or r2 and we always expect at least two registers to be used + // Note that in rare cases, we can have double-aligned structs of 12 bytes (if specified explicitly with + // attributes) + assert((doubleAlignMask == 0b0011) || (doubleAlignMask == 0b1100) || + (doubleAlignMask == 0b0111) /* || 0b1111 is if'ed out */); + + // Now if doubleAlignMask is xyz1 i.e., the struct starts in r0, and we prespill r2 or r3 + // but not both, then the stack would be misaligned for r0. So spill both + // r2 and r3. + // + // ; +0 --- caller SP double aligned ---- + // ; -4 r2 r3 + // ; -8 r1 r1 + // ; -c r0 r0 <-- misaligned. + // ; callee saved regs + bool startsAtR0 = (doubleAlignMask & 1) == 1; + bool r2XorR3 = ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R2) == 0) != + ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R3) == 0); + if (startsAtR0 && r2XorR3) + { + codeGen->regSet.rsMaskPreSpillAlign = + (~codeGen->regSet.rsMaskPreSpillRegArg & ~doubleAlignMask) & RBM_ARG_REGS; + } + } + } +#endif } //----------------------------------------------------------------------------- @@ -1713,58 +992,6 @@ void Compiler::lvaClassifyParameterABI() { SwiftABIClassifier classifier(cInfo); lvaClassifyParameterABI(classifier); - - regMaskTP argRegs = RBM_NONE; - - // The calling convention details computed by the old ABI classifier - // are wrong since it does not handle the Swift ABI for structs - // appropriately. Grab them from the new ABI information. - for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) - { - LclVarDsc* dsc = lvaGetDesc(lclNum); - const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(lclNum); - - if (dsc->TypeGet() == TYP_STRUCT) - { - const CORINFO_SWIFT_LOWERING* lowering = GetSwiftLowering(dsc->GetLayout()->GetClassHandle()); - dsc->lvIsImplicitByRef = lowering->byReference; - } - - if ((dsc->TypeGet() == TYP_STRUCT) && !lvaIsImplicitByRefLocal(lclNum) && - !abiInfo.HasExactlyOneStackSegment()) - { - dsc->lvIsRegArg = false; - } - else - { - assert(abiInfo.NumSegments == 1); - if (abiInfo.Segment(0).IsPassedInRegister()) - { - dsc->lvIsRegArg = true; - dsc->SetArgReg(abiInfo.Segment(0).GetRegister()); - dsc->SetOtherArgReg(REG_NA); - } - else - { - dsc->lvIsRegArg = false; - dsc->SetArgReg(REG_STK); - dsc->SetOtherArgReg(REG_NA); - dsc->SetStackOffset(abiInfo.Segment(0).GetStackOffset()); - } - } - - for (const ABIPassingSegment& segment : abiInfo.Segments()) - { - if (segment.IsPassedInRegister()) - { - argRegs |= segment.GetRegisterMask(); - } - } - } - - // genFnPrologCalleeRegArgs expect these to be the counts of registers it knows how to handle. - codeGen->intRegState.rsCalleeRegArgCount = genCountBits(argRegs & RBM_ARG_REGS); - codeGen->floatRegState.rsCalleeRegArgCount = genCountBits(argRegs & RBM_FLTARG_REGS); } else #endif @@ -1774,6 +1001,7 @@ void Compiler::lvaClassifyParameterABI() } #ifdef DEBUG +<<<<<<< HEAD if (lvaParameterPassingInfo == nullptr) { return; @@ -1858,6 +1086,8 @@ void Compiler::lvaClassifyParameterABI() } } +======= +>>>>>>> upstream-jun for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) { const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(lclNum); @@ -1893,23 +1123,6 @@ void Compiler::lvaClassifyParameterABI() } } } - -#if FEATURE_FASTTAILCALL - // Swift doesn't have correct ABI info computed by the old classification, - // so skip this validation there. - if (info.compCallConv != CorInfoCallConvExtension::Swift) - { - unsigned oldStackSize = info.compArgStackSize; - -#ifdef WINDOWS_AMD64_ABI - // Old info does not take 4 shadow slots on win-x64 into account. - oldStackSize += 32; -#endif - - assert(lvaParameterStackSize == roundUp(oldStackSize, TARGET_POINTER_SIZE)); - } -#endif - #endif // DEBUG } @@ -2553,7 +1766,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) // promotion of non FP or SIMD type fields is disallowed. // TODO-1stClassStructs: add support in Lowering and prolog generation // to enable promoting these types. - if (varDsc->lvIsParam && (varDsc->lvIsHfa() != varTypeUsesFloatReg(fieldType))) + if (varDsc->lvIsParam && (IsArmHfaParameter(lclNum) != varTypeUsesFloatReg(fieldType))) { canPromote = false; } @@ -2610,6 +1823,50 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) return canPromote; } +//-------------------------------------------------------------------------------------------- +// IsArmHfaParameter - Check if a local is an ARM or ARM64 HFA parameter. +// This is a quirk to match old promotion behavior. +// +// Arguments: +// lclNum - The local +// +// Return value: +// True if it is an HFA parameter. +// +bool Compiler::StructPromotionHelper::IsArmHfaParameter(unsigned lclNum) +{ + if (!GlobalJitOptions::compFeatureHfa) + { + return false; + } + + CorInfoHFAElemType hfaType = + compiler->info.compCompHnd->getHFAType(compiler->lvaGetDesc(lclNum)->GetLayout()->GetClassHandle()); + return hfaType != CORINFO_HFA_ELEM_NONE; +} + +//-------------------------------------------------------------------------------------------- +// IsSysVMultiRegType - Check if a type is one that could be passed in 2 +// registers in some cases. +// This is a quirk to match old promotion behavior. +// +// Arguments: +// lclNum - The local +// +// Return value: +// True if it sometimes may be passed in two registers. +// +bool Compiler::StructPromotionHelper::IsSysVMultiRegType(ClassLayout* layout) +{ +#ifdef UNIX_AMD64_ABI + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(layout->GetClassHandle(), &structDesc); + return structDesc.passedInRegisters && (structDesc.eightByteCount == 2); +#else + return false; +#endif +} + //-------------------------------------------------------------------------------------------- // ShouldPromoteStructVar - Should a struct var be promoted if it can be promoted? // This routine mainly performs profitability checks. Right now it also has @@ -2664,15 +1921,15 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) shouldPromote = false; } #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 - else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa()) + else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !IsArmHfaParameter(lclNum)) { #if FEATURE_MULTIREG_STRUCT_PROMOTE // Is this a variable holding a value with exactly two fields passed in // multiple registers? - if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) + if (varDsc->lvIsMultiRegArg || IsSysVMultiRegType(varDsc->GetLayout())) { if ((structPromotionInfo.fieldCnt != 2) && - !((structPromotionInfo.fieldCnt == 1) && varTypeIsSIMD(structPromotionInfo.fields[0].fldType))) + ((structPromotionInfo.fieldCnt != 1) || !varTypeIsSIMD(structPromotionInfo.fields[0].fldType))) { JITDUMP("Not promoting multireg struct local V%02u, because lvIsParam is true, #fields != 2 and it's " "not a single SIMD.\n", @@ -2680,7 +1937,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) shouldPromote = false; } #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - else if (varDsc->lvIsSplit) + else if (compiler->lvaGetParameterABIInfo(lclNum).IsSplitAcrossRegistersAndStack()) { JITDUMP("Not promoting multireg struct local V%02u, because it is splitted.\n", lclNum); shouldPromote = false; @@ -2689,6 +1946,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) } else #endif // !FEATURE_MULTIREG_STRUCT_PROMOTE + { // TODO-PERF - Implement struct promotion for incoming single-register structs. // Also the implementation of jmp uses the 4 byte move to store @@ -2702,6 +1960,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) lclNum, structPromotionInfo.fieldCnt); shouldPromote = false; } + } } else if ((lclNum == compiler->genReturnLocal) && (structPromotionInfo.fieldCnt > 1)) { @@ -2838,60 +2097,10 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum) } #if FEATURE_IMPLICIT_BYREFS - // Reset the implicitByRef flag. fieldVarDsc->lvIsImplicitByRef = 0; #endif // FEATURE_IMPLICIT_BYREFS - // Do we have a parameter that can be enregistered? - // - if (varDsc->lvIsRegArg) - { - fieldVarDsc->lvIsRegArg = true; - regNumber parentArgReg = varDsc->GetArgReg(); -#if FEATURE_MULTIREG_ARGS - if (!compiler->lvaIsImplicitByRefLocal(lclNum)) - { -#ifdef UNIX_AMD64_ABI - if (varTypeIsSIMD(fieldVarDsc) && (varDsc->lvFieldCnt == 1)) - { - // This SIMD typed field may be passed in multiple registers. - fieldVarDsc->SetArgReg(parentArgReg); - fieldVarDsc->SetOtherArgReg(varDsc->GetOtherArgReg()); - } - else -#endif // UNIX_AMD64_ABI - { - regNumber fieldRegNum; - if (index == 0) - { - fieldRegNum = parentArgReg; - } - else if (varDsc->lvIsHfa()) - { - unsigned regIncrement = fieldVarDsc->lvFldOrdinal; -#ifdef TARGET_ARM - // TODO: Need to determine if/how to handle split args. - if (varDsc->GetHfaType() == TYP_DOUBLE) - { - regIncrement *= 2; - } -#endif // TARGET_ARM - fieldRegNum = (regNumber)(parentArgReg + regIncrement); - } - else - { - assert(index == 1); - fieldRegNum = varDsc->GetOtherArgReg(); - } - fieldVarDsc->SetArgReg(fieldRegNum); - } - } - else -#endif // FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) - { - fieldVarDsc->SetArgReg(parentArgReg); - } - } + fieldVarDsc->lvIsRegArg = varDsc->lvIsRegArg; #ifdef FEATURE_SIMD if (varTypeIsSIMD(pFieldInfo->fldType)) @@ -2909,8 +2118,8 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum) var_types hfaType = compiler->GetHfaType(pFieldInfo->fldSIMDTypeHnd); if (varTypeIsValidHfaType(hfaType)) { - fieldVarDsc->SetHfaType(hfaType); - fieldVarDsc->lvIsMultiRegArg = (varDsc->lvIsMultiRegArg != 0) && (fieldVarDsc->lvHfaSlots() > 1); + fieldVarDsc->lvIsMultiRegArg = + (varDsc->lvIsMultiRegArg != 0) && (fieldVarDsc->lvExactSize() > genTypeSize(hfaType)); } } } @@ -3248,7 +2457,11 @@ bool Compiler::lvaIsImplicitByRefLocal(unsigned lclNum) const { assert(varDsc->lvIsParam); +<<<<<<< HEAD assert(varTypeIsStruct(varDsc) || (varDsc->TypeGet() == TYP_BYREF) || (varDsc->TypeGet() == TYP_I_IMPL)); +======= + assert(varTypeIsStruct(varDsc) || varDsc->TypeIs(TYP_BYREF)); +>>>>>>> upstream-jun return true; } #endif // FEATURE_IMPLICIT_BYREFS @@ -3279,39 +2492,6 @@ bool Compiler::lvaIsLocalImplicitlyAccessedByRef(unsigned lclNum) const return lvaIsImplicitByRefLocal(lclNum); } -// Returns true if this local var is a multireg struct. -// TODO-Throughput: This does a lookup on the class handle, and in the outgoing arg context -// this information is already available on the CallArgABIInformation, and shouldn't need to be -// recomputed. -// -// Also seems like this info could be cached in the layout. -// -bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc, bool isVarArg) -{ - if (varTypeIsStruct(varDsc->TypeGet())) - { - CORINFO_CLASS_HANDLE clsHnd = varDsc->GetLayout()->GetClassHandle(); - structPassingKind howToPassStruct; - - var_types type = getArgTypeForStruct(clsHnd, &howToPassStruct, isVarArg, varDsc->lvExactSize()); - - if (howToPassStruct == SPK_ByValueAsHfa) - { - assert(type == TYP_STRUCT); - return true; - } - -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (howToPassStruct == SPK_ByValue) - { - assert(type == TYP_STRUCT); - return true; - } -#endif - } - return false; -} - //------------------------------------------------------------------------ // lvaSetStruct: Set the type of a local to a struct, given a layout. // @@ -3336,40 +2516,6 @@ void Compiler::lvaSetStruct(unsigned varNum, ClassLayout* layout, bool unsafeVal if (layout->IsValueClass()) { varDsc->lvType = layout->GetType(); - -#if FEATURE_IMPLICIT_BYREFS - // Mark implicit byref struct parameters - if (varDsc->lvIsParam && !varDsc->lvIsStructField) - { - structPassingKind howToReturnStruct; - getArgTypeForStruct(layout->GetClassHandle(), &howToReturnStruct, info.compIsVarArgs, - varDsc->lvExactSize()); - - if (howToReturnStruct == SPK_ByReference) - { - JITDUMP("Marking V%02i as a byref parameter\n", varNum); - varDsc->lvIsImplicitByRef = 1; - } - } -#endif // FEATURE_IMPLICIT_BYREFS - - // For structs that are small enough, we check and set HFA element type - if (GlobalJitOptions::compFeatureHfa && (layout->GetSize() <= MAX_PASS_MULTIREG_BYTES)) - { - // hfaType is set to float, double or SIMD type if it is an HFA, otherwise TYP_UNDEF - var_types hfaType = GetHfaType(layout->GetClassHandle()); - if (varTypeIsValidHfaType(hfaType)) - { - varDsc->SetHfaType(hfaType); - - // hfa variables can never contain GC pointers - assert(!layout->HasGCPtr()); - // The size of this struct should be evenly divisible by 4 or 8 - assert((varDsc->lvExactSize() % genTypeSize(hfaType)) == 0); - // The number of elements in the HFA should fit into our MAX_ARG_REG_COUNT limit - assert((varDsc->lvExactSize() / genTypeSize(hfaType)) <= MAX_ARG_REG_COUNT); - } - } } } else @@ -3481,7 +2627,7 @@ void Compiler::makeExtraStructQueries(CORINFO_CLASS_HANDLE structHandle, int lev // In R2R we cannot query arbitrary information about struct fields, so // skip it there. Note that the getTypeLayout call above is enough to cover // us for promotion at least. - if (!opts.IsReadyToRun()) + if (!IsAot()) { for (unsigned int i = 0; i < fieldCnt; i++) { @@ -3502,32 +2648,6 @@ void Compiler::makeExtraStructQueries(CORINFO_CLASS_HANDLE structHandle, int lev } #endif // DEBUG -//------------------------------------------------------------------------ -// lvaSetStructUsedAsVarArg: update hfa information for vararg struct args -// -// Arguments: -// varNum -- number of the variable -// -// Notes: -// This only affects arm64 varargs on windows where we need to pass -// hfa arguments as if they are not HFAs. -// -// This function should only be called if the struct is used in a varargs -// method. - -void Compiler::lvaSetStructUsedAsVarArg(unsigned varNum) -{ - if (GlobalJitOptions::compFeatureHfa && TargetOS::IsWindows) - { -#if defined(TARGET_ARM64) - LclVarDsc* varDsc = lvaGetDesc(varNum); - // For varargs methods incoming and outgoing arguments should not be treated - // as HFA. - varDsc->SetHfaType(TYP_UNDEF); -#endif // defined(TARGET_ARM64) - } -} - //------------------------------------------------------------------------ // lvaSetClass: set class information for a local var. // @@ -3723,7 +2843,7 @@ void Compiler::lvaUpdateClass(unsigned varNum, GenTree* tree, CORINFO_CLASS_HAND } //------------------------------------------------------------------------ -// lvaLclSize: returns size of a local variable, in bytes +// lvaLclStackHomeSize: returns size of stack home of a local variable, in bytes // // Arguments: // varNum -- variable to query @@ -3731,26 +2851,56 @@ void Compiler::lvaUpdateClass(unsigned varNum, GenTree* tree, CORINFO_CLASS_HAND // Returns: // Number of bytes needed on the frame for such a local. // -unsigned Compiler::lvaLclSize(unsigned varNum) +unsigned Compiler::lvaLclStackHomeSize(unsigned varNum) { assert(varNum < lvaCount); - var_types varType = lvaTable[varNum].TypeGet(); + LclVarDsc* varDsc = lvaGetDesc(varNum); + var_types varType = varDsc->TypeGet(); - if (varType == TYP_STRUCT) + if (!varTypeIsStruct(varType)) { - return lvaTable[varNum].lvSize(); +#ifdef TARGET_64BIT + // We only need this Quirk for TARGET_64BIT + if (varDsc->lvQuirkToLong) + { + noway_assert(varDsc->IsAddressExposed()); + return genTypeStSz(TYP_LONG) * sizeof(int); // return 8 (2 * 4) + } +#endif + + return genTypeStSz(varType) * sizeof(int); } -#ifdef TARGET_64BIT - // We only need this Quirk for TARGET_64BIT - if (lvaTable[varNum].lvQuirkToLong) + if (varDsc->lvIsParam && !varDsc->lvIsStructField) { - noway_assert(lvaTable[varNum].IsAddressExposed()); - return genTypeStSz(TYP_LONG) * sizeof(int); // return 8 (2 * 4) + // If this parameter was passed on the stack then we often reuse that + // space for its home. Take into account that this space might actually + // not be pointer-sized for some cases (macos-arm64 ABI currently). + const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(varNum); + if (abiInfo.HasExactlyOneStackSegment()) + { + return abiInfo.Segment(0).GetStackSize(); + } + + // There are other cases where the caller has allocated space for the + // parameter, like windows-x64 with shadow space for register + // parameters, but in those cases this rounding is fine. + return roundUp(varDsc->lvExactSize(), TARGET_POINTER_SIZE); } -#endif - return genTypeStSz(varType) * sizeof(int); + +#if defined(FEATURE_SIMD) && !defined(TARGET_64BIT) + // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do + // this for arguments, which must be passed according the defined ABI. We don't want to do this for + // dependently promoted struct fields, but we don't know that here. See lvaMapSimd12ToSimd16(). + // (Note that for 64-bits, we are already rounding up to 16.) + if (varDsc->TypeIs(TYP_SIMD12)) + { + return 16; + } +#endif // defined(FEATURE_SIMD) && !defined(TARGET_64BIT) + + return roundUp(varDsc->lvExactSize(), TARGET_POINTER_SIZE); } // @@ -4079,22 +3229,6 @@ void Compiler::lvaSortByRefCount() { lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::NoRegVars)); } -#if defined(JIT32_GCENCODER) - if (UsesFunclets() && lvaIsOriginalThisArg(lclNum) && - (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0) - { - // For x86/Linux, we need to track "this". - // However we cannot have it in tracked variables, so we set "this" pointer always untracked - varDsc->lvTracked = 0; - } -#endif - - // No benefit in tracking the PSPSym (if any) - // - if (lclNum == lvaPSPSym) - { - varDsc->lvTracked = 0; - } // Are we not optimizing and we have exception handlers? // if so mark all args and locals "do not enregister". @@ -4218,93 +3352,6 @@ unsigned LclVarDsc::lvExactSize() const return (lvType == TYP_STRUCT) ? GetLayout()->GetSize() : genTypeSize(lvType); } -//------------------------------------------------------------------------ -// lvSize: Get the size of a struct local on the stack frame. -// -// Return Value: -// Size in bytes. -// -unsigned LclVarDsc::lvSize() const // Size needed for storage representation. Only used for structs. -{ - // TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted, - // where the struct itself is no longer used because all access is via its member fields. - // When that happens, the struct is marked as unused and its type has been changed to - // TYP_INT (to keep the GC tracking code from looking at it). - // See Compiler::raAssignVars() for details. For example: - // N002 ( 4, 3) [00EA067C] ------------- return struct $346 - // N001 ( 3, 2) [00EA0628] ------------- lclVar struct(U) V03 loc2 - // float V03.f1 (offs=0x00) -> V12 tmp7 - // f8 (last use) (last use) $345 - // Here, the "struct(U)" shows that the "V03 loc2" variable is unused. Not shown is that V03 - // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree. - - assert(varTypeIsStruct(lvType) || (lvPromoted && lvUnusedStruct)); - - if (lvIsParam) - { - assert(varTypeIsStruct(lvType)); - const bool isFloatHfa = (lvIsHfa() && (GetHfaType() == TYP_FLOAT)); - const unsigned argSizeAlignment = Compiler::eeGetArgSizeAlignment(lvType, isFloatHfa); - return roundUp(lvExactSize(), argSizeAlignment); - } - -#if defined(FEATURE_SIMD) && !defined(TARGET_64BIT) - // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do - // this for arguments, which must be passed according the defined ABI. We don't want to do this for - // dependently promoted struct fields, but we don't know that here. See lvaMapSimd12ToSimd16(). - // (Note that for 64-bits, we are already rounding up to 16.) - if (lvType == TYP_SIMD12) - { - assert(!lvIsParam); - return 16; - } -#endif // defined(FEATURE_SIMD) && !defined(TARGET_64BIT) - - return roundUp(lvExactSize(), TARGET_POINTER_SIZE); -} - -/********************************************************************************** - * Get stack size of the varDsc. - */ -size_t LclVarDsc::lvArgStackSize() const -{ - // Make sure this will have a stack size - assert(!this->lvIsRegArg); - - size_t stackSize = 0; - if (varTypeIsStruct(this)) - { -#if defined(WINDOWS_AMD64_ABI) - // Structs are either passed by reference or can be passed by value using one pointer - stackSize = TARGET_POINTER_SIZE; -#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // lvSize performs a roundup. - stackSize = this->lvSize(); - -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa())) - { - // If the size is greater than 16 bytes then it will - // be passed by reference. - stackSize = TARGET_POINTER_SIZE; - } -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - -#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64 !TARGET_RISCV64 - - NYI("Unsupported target."); - unreached(); - -#endif // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI - } - else - { - stackSize = TARGET_POINTER_SIZE; - } - - return stackSize; -} - //------------------------------------------------------------------------ // GetRegisterType: Determine register type for this local var. // @@ -4327,7 +3374,7 @@ var_types LclVarDsc::GetRegisterType(const GenTreeLclVarCommon* tree) const } else { - assert((TypeGet() == TYP_STRUCT) && tree->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR)); + assert(TypeIs(TYP_STRUCT) && tree->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR)); layout = GetLayout(); } @@ -4354,10 +3401,10 @@ var_types LclVarDsc::GetRegisterType(const GenTreeLclVarCommon* tree) const // var_types LclVarDsc::GetRegisterType() const { - if (TypeGet() != TYP_STRUCT) + if (!TypeIs(TYP_STRUCT)) { #if !defined(TARGET_64BIT) - if (TypeGet() == TYP_LONG) + if (TypeIs(TYP_LONG)) { return TYP_UNDEF; } @@ -4629,8 +3676,8 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, // Check that the LCL_VAR node has the same type as the underlying variable, save a few mismatches we allow. assert(tree->TypeIs(varDsc->TypeGet(), genActualType(varDsc)) || - (tree->TypeIs(TYP_BYREF) && (varDsc->TypeGet() == TYP_I_IMPL)) || // Created by inliner substitution. - (tree->TypeIs(TYP_INT) && (varDsc->TypeGet() == TYP_LONG))); // Created by "optNarrowTree". + (tree->TypeIs(TYP_BYREF) && varDsc->TypeIs(TYP_I_IMPL)) || // Created by inliner substitution. + (tree->TypeIs(TYP_INT) && varDsc->TypeIs(TYP_LONG))); // Created by "optNarrowTree". } } @@ -4741,35 +3788,6 @@ PhaseStatus Compiler::lvaMarkLocalVars() unsigned const lvaCountOrig = lvaCount; -#if defined(FEATURE_EH_WINDOWS_X86) - - // Grab space for exception handling - - if (!UsesFunclets() && ehNeedsShadowSPslots()) - { - // The first slot is reserved for ICodeManager::FixContext(ppEndRegion) - // ie. the offset of the end-of-last-executed-filter - unsigned slotsNeeded = 1; - - unsigned handlerNestingLevel = ehMaxHndNestingCount; - - if (opts.compDbgEnC && (handlerNestingLevel < (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL)) - handlerNestingLevel = (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL; - - slotsNeeded += handlerNestingLevel; - - // For a filter (which can be active at the same time as a catch/finally handler) - slotsNeeded++; - // For zero-termination of the shadow-Stack-pointer chain - slotsNeeded++; - - lvaShadowSPslotsVar = lvaGrabTempWithImplicitUse(false DEBUGARG("lvaShadowSPslotsVar")); - lvaSetStruct(lvaShadowSPslotsVar, typGetBlkLayout(slotsNeeded * TARGET_POINTER_SIZE), false); - lvaSetVarAddrExposed(lvaShadowSPslotsVar DEBUGARG(AddressExposedReason::EXTERNALLY_VISIBLE_IMPLICITLY)); - } - -#endif // FEATURE_EH_WINDOWS_X86 - #ifdef JIT32_GCENCODER // LocAllocSPvar is only required by the implicit frame layout expected by the VM on x86. Whether // a function contains a Localloc is conveyed in the GC information, in the InfoHdrSmall.localloc @@ -4780,6 +3798,9 @@ PhaseStatus Compiler::lvaMarkLocalVars() // saved EBP <-- EBP points here // other callee-saved registers // InfoHdrSmall.savedRegsCountExclFP specifies this size // optional GS cookie // InfoHdrSmall.security is 1 if this exists + // if FEATURE_EH_FUNCLETS + // issynchronized bool if it is a synchronized method + // endif // FEATURE_EH_FUNCLETS // LocAllocSP slot // -- lower addresses -- // @@ -5191,6 +4212,7 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * | security object | * |-----------------------| * | ParamTypeArg | +// If funclet support is disabled * |-----------------------| * | Last-executed-filter | * |-----------------------| @@ -5198,6 +4220,7 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * ~ Shadow SPs ~ * | | * |-----------------------| +// Endif funclet support is disabled * | | * ~ Variables ~ * | | @@ -5284,9 +4307,6 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * | | * ~ localloc ~ // not in frames with EH * | | - * |-----------------------| - * | PSPSym | // only in frames with EH (thus no localloc) - * | | * |-----------------------| <---- RBP in localloc frames (max 240 bytes from Initial-SP) * | Arguments for the | * ~ next function ~ @@ -5353,8 +4373,6 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * |-----------------------| <---- Virtual '0' * |Callee saved registers | * |-----------------------| - * | PSPSym | // Only for frames with EH, which means FP-based frames - * |-----------------------| * ~ possible double align ~ * |-----------------------| * | security object | @@ -5444,8 +4462,6 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * |Callee saved registers | * | except fp/lr | * |-----------------------| - * | PSPSym | // Only for frames with EH, which requires FP-based frames - * |-----------------------| * | security object | * |-----------------------| * | ParamTypeArg | @@ -5492,8 +4508,6 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * |-----------------------| * |Callee saved registers | * |-----------------------| - * | PSPSym | // Only for frames with EH, which requires FP-based frames - * |-----------------------| * | security object | * |-----------------------| * | ParamTypeArg | @@ -5637,27 +4651,6 @@ void Compiler::lvaFixVirtualFrameOffsets() { LclVarDsc* varDsc; -#if defined(TARGET_AMD64) - if (lvaPSPSym != BAD_VAR_NUM) - { - // We need to fix the offset of the PSPSym so there is no padding between it and the outgoing argument space. - // Without this code, lvaAlignFrame might have put the padding lower than the PSPSym, which would be between - // the PSPSym and the outgoing argument space. - varDsc = lvaGetDesc(lvaPSPSym); - assert(varDsc->lvFramePointerBased); // We always access it RBP-relative. - assert(!varDsc->lvMustInit); // It is never "must init". - varDsc->SetStackOffset(codeGen->genCallerSPtoInitialSPdelta() + lvaLclSize(lvaOutgoingArgSpaceVar)); - - if (opts.IsOSR()) - { - // With OSR RBP points at the base of the OSR frame, but the virtual offsets - // are from the base of the Tier0 frame. Adjust. - // - varDsc->SetStackOffset(varDsc->GetStackOffset() - info.compPatchpointInfo->TotalFrameSize()); - } - } -#endif - // The delta to be added to virtual offset to adjust it relative to frame pointer or SP int delta = 0; int frameLocalsDelta = 0; @@ -5721,21 +4714,7 @@ void Compiler::lvaFixVirtualFrameOffsets() { int offset = lvaTable[lvaMonAcquired].GetStackOffset() + delta; lvaTable[lvaMonAcquired].SetStackOffset(offset); - - if (lvaPSPSym != BAD_VAR_NUM) - { - int offset = lvaTable[lvaPSPSym].GetStackOffset() + delta; - lvaTable[lvaPSPSym].SetStackOffset(offset); - delta += TARGET_POINTER_SIZE; - } - - delta += lvaLclSize(lvaMonAcquired); - } - else if (lvaPSPSym != BAD_VAR_NUM) - { - int offset = lvaTable[lvaPSPSym].GetStackOffset() + delta; - lvaTable[lvaPSPSym].SetStackOffset(offset); - delta += TARGET_POINTER_SIZE; + delta += lvaLclStackHomeSize(lvaMonAcquired); } JITDUMP("--- delta bump %d for FP frame\n", delta); @@ -5870,8 +4849,21 @@ void Compiler::lvaFixVirtualFrameOffsets() #ifdef TARGET_ARM bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask) { - const LclVarDsc& desc = lvaTable[lclNum]; - return desc.lvIsRegArg && (preSpillMask & genRegMask(desc.GetArgReg())); + LclVarDsc* dsc = lvaGetDesc(lclNum); + if (dsc->lvIsStructField) + { + lclNum = dsc->lvParentLcl; + } + const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(lclNum); + for (const ABIPassingSegment& segment : abiInfo.Segments()) + { + if (segment.IsPassedInRegister() && ((preSpillMask & segment.GetRegisterMask()) != RBM_NONE)) + { + return true; + } + } + + return false; } #endif // TARGET_ARM @@ -5884,7 +4876,7 @@ bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask) // void Compiler::lvaUpdateArgWithInitialReg(LclVarDsc* varDsc) { - noway_assert(varDsc->lvIsParam); + assert(varDsc->lvIsParam || varDsc->lvIsParamRegTarget); if (varDsc->lvIsRegCandidate()) { @@ -5903,20 +4895,11 @@ void Compiler::lvaUpdateArgsWithInitialReg() return; } - for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) + for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++) { LclVarDsc* varDsc = lvaGetDesc(lclNum); - if (varDsc->lvPromoted) - { - for (unsigned fieldVarNum = varDsc->lvFieldLclStart; - fieldVarNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldVarNum) - { - LclVarDsc* fieldVarDsc = lvaGetDesc(fieldVarNum); - lvaUpdateArgWithInitialReg(fieldVarDsc); - } - } - else + if (varDsc->lvIsParam || varDsc->lvIsParamRegTarget) { lvaUpdateArgWithInitialReg(varDsc); } @@ -6285,21 +5268,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // This var must go first, in what is called the 'frame header' for EnC so that it is // preserved when remapping occurs. See vm\eetwain.cpp for detailed comment specifying frame // layout requirements for EnC to work. - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclSize(lvaMonAcquired), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclStackHomeSize(lvaMonAcquired), stkOffs); } } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (lvaPSPSym != BAD_VAR_NUM) - { - // On ARM/ARM64, if we need a PSPSym we allocate it early since funclets - // will need to have it at the same caller-SP relative offset so anything - // allocated before this will also leak into the funclet's frame. - noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); - } -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 - if (mustDoubleAlign) { if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) @@ -6410,7 +5382,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() stkOffs -= TARGET_POINTER_SIZE; } } - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaShadowSPslotsVar, lvaLclSize(lvaShadowSPslotsVar), stkOffs); + stkOffs = + lvaAllocLocalAndSetVirtualOffset(lvaShadowSPslotsVar, lvaLclStackHomeSize(lvaShadowSPslotsVar), stkOffs); } #endif // FEATURE_EH_WINDOWS_X86 @@ -6420,7 +5393,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() if (!opts.IsOSR() || !info.compPatchpointInfo->HasSecurityCookie()) { - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclStackHomeSize(lvaGSSecurityCookie), + stkOffs); } } @@ -6530,7 +5504,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() bool allocateOnFrame = varDsc->lvOnFrame; if (varDsc->lvRegister && (lvaDoneFrameLayout == REGALLOC_FRAME_LAYOUT) && - ((varDsc->TypeGet() != TYP_LONG) || (varDsc->GetOtherReg() != REG_STK))) + (!varDsc->TypeIs(TYP_LONG) || (varDsc->GetOtherReg() != REG_STK))) { allocateOnFrame = false; } @@ -6613,7 +5587,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // These need to be located as the very first variables (highest memory address) // and so they have already been assigned an offset - if (lclNum == lvaPSPSym || + if ( #if defined(FEATURE_EH_WINDOWS_X86) lclNum == lvaShadowSPslotsVar || #endif // FEATURE_EH_WINDOWS_X86 @@ -6634,12 +5608,31 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() if (varDsc->lvIsParam) { #ifdef TARGET_ARM64 - if (info.compIsVarArgs && varDsc->lvIsRegArg && - (varDsc->GetArgReg() != theFixedRetBuffReg(info.compCallConv))) + if (info.compIsVarArgs && varDsc->lvIsRegArg && (lclNum != info.compRetBuffArg)) { - // Stack offset to varargs (parameters) should point to home area which will be preallocated. - const unsigned regArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg(), info.compCallConv); - varDsc->SetStackOffset(-initialStkOffs + regArgNum * REGSIZE_BYTES); + const ABIPassingInformation& abiInfo = + lvaGetParameterABIInfo(varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum); + bool found = false; + for (const ABIPassingSegment& segment : abiInfo.Segments()) + { + if (!segment.IsPassedInRegister()) + { + continue; + } + + if (varDsc->lvIsStructField && (segment.Offset != varDsc->lvFldOffset)) + { + continue; + } + + found = true; + // Stack offset to varargs (parameters) should point to home area which will be preallocated. + const unsigned regArgNum = genMapIntRegNumToRegArgNum(segment.GetRegister(), info.compCallConv); + varDsc->SetStackOffset(-initialStkOffs + regArgNum * REGSIZE_BYTES); + break; + } + + assert(found); continue; } #endif @@ -6728,7 +5721,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } // Reserve the stack space for this variable - stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclStackHomeSize(lclNum), stkOffs); #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // If we have an incoming register argument that has a promoted field then we // need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar @@ -6751,7 +5744,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() if (!opts.IsOSR() || !info.compPatchpointInfo->HasSecurityCookie()) { // LOCALLOC used, but we have no unsafe buffer. Allocated cookie last, close to localloc buffer. - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclStackHomeSize(lvaGSSecurityCookie), + stkOffs); } } @@ -6818,17 +5812,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } } -#if defined(TARGET_AMD64) - if (lvaPSPSym != BAD_VAR_NUM) - { - // On AMD64, if we need a PSPSym, allocate it last, immediately above the outgoing argument - // space. Any padding will be higher on the stack than this - // (including the padding added by lvaAlignFrame()). - noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); - } -#endif // TARGET_AMD64 - #if FEATURE_FIXED_OUT_ARGS if (lvaOutgoingArgSpaceSize > 0) { @@ -6841,7 +5824,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // Since this will always use an SP relative offset of zero // at the end of lvaFixVirtualFrameOffsets, it will be set to absolute '0' - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaOutgoingArgSpaceVar, lvaLclSize(lvaOutgoingArgSpaceVar), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaOutgoingArgSpaceVar, lvaLclStackHomeSize(lvaOutgoingArgSpaceVar), + stkOffs); } #endif // FEATURE_FIXED_OUT_ARGS @@ -6946,7 +5930,7 @@ bool Compiler::lvaParamHasLocalStackSpace(unsigned lclNum) // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg // in the prolog, thus they don't need stack frame space. // - if ((codeGen->regSet.rsMaskPreSpillRegs(false) & genRegMask(varDsc->GetArgReg())) != 0) + if (lvaIsPreSpilled(lclNum, codeGen->regSet.rsMaskPreSpillRegs(false))) { assert(varDsc->GetStackOffset() != BAD_STK_OFFS); return false; @@ -7368,7 +6352,7 @@ void Compiler::lvaDumpRegLocation(unsigned lclNum) const LclVarDsc* varDsc = lvaGetDesc(lclNum); #ifdef TARGET_ARM - if (varDsc->TypeGet() == TYP_DOUBLE) + if (varDsc->TypeIs(TYP_DOUBLE)) { // The assigned registers are `lvRegNum:RegNext(lvRegNum)` printf("%3s:%-3s ", getRegName(varDsc->GetRegNum()), getRegName(REG_NEXT(varDsc->GetRegNum()))); @@ -7473,7 +6457,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r printf(" %7s ", varTypeName(type)); if (genTypeSize(type) == 0) { - printf("(%2d) ", lvaLclSize(lclNum)); + printf("(%2d) ", lvaLclStackHomeSize(lclNum)); } else { @@ -7513,11 +6497,6 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r } } - if (varDsc->lvIsHfa()) - { - printf(" HFA(%s) ", varTypeName(varDsc->GetHfaType())); - } - if (varDsc->lvDoNotEnregister) { printf(" do-not-enreg["); @@ -7557,6 +6536,10 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r { printf("R"); } + if (varDsc->lvIsMultiRegDest) + { + printf("M"); + } #ifdef JIT32_GCENCODER if (varDsc->lvPinned) printf("P"); @@ -7572,6 +6555,10 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r { printf(" multireg-ret"); } + if (varDsc->lvIsMultiRegDest) + { + printf(" multireg-dest"); + } if (varDsc->lvMustInit) { printf(" must-init"); @@ -7935,7 +6922,7 @@ int Compiler::lvaToCallerSPRelativeOffset(int offset, bool isFpBased, bool forRo offset += codeGen->genCallerSPtoInitialSPdelta(); } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef FEATURE_ON_STACK_REPLACEMENT if (forRootFrame && opts.IsOSR()) { const PatchpointInfo* const ppInfo = info.compPatchpointInfo; @@ -7953,9 +6940,7 @@ int Compiler::lvaToCallerSPRelativeOffset(int offset, bool isFpBased, bool forRo // is simply TotalFrameSize plus one register. // const int adjustment = ppInfo->TotalFrameSize() + REGSIZE_BYTES; - -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - +#else const int adjustment = ppInfo->TotalFrameSize(); #endif @@ -8198,7 +7183,7 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData* if (varType != TYP_STRUCT) { // Change the variable to a custom layout struct - unsigned size = roundUp(padding + pComp->lvaLclSize(lclNum), TARGET_POINTER_SIZE); + unsigned size = roundUp(padding + pComp->lvaLclStackHomeSize(lclNum), TARGET_POINTER_SIZE); ClassLayoutBuilder builder(pComp, size); #ifdef DEBUG builder.SetName(pComp->printfAlloc("%s_%u_Stress", varTypeName(varType), size), diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index cafb6be5d1f2..faf16dc42642 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -490,7 +490,7 @@ GenTree* LIR::Range::FirstNonCatchArgNode() const { continue; } - else if ((node->OperIs(GT_STORE_LCL_VAR)) && (node->gtGetOp1()->OperIs(GT_CATCH_ARG))) + else if (node->OperIs(GT_STORE_LCL_VAR) && (node->gtGetOp1()->OperIs(GT_CATCH_ARG))) { continue; } @@ -1686,9 +1686,8 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const // It may be useful to remove these from being call operands, but that may also impact // other code that relies on being able to reach all the operands from a call node. // The argument of a JTRUE doesn't produce a value (just sets a flag). - assert(((node->OperGet() == GT_CALL) && def->OperIs(GT_PUTARG_STK)) || - ((node->OperGet() == GT_JTRUE) && (def->TypeGet() == TYP_VOID) && - ((def->gtFlags & GTF_SET_FLAGS) != 0))); + assert((node->OperIs(GT_CALL) && def->OperIs(GT_PUTARG_STK)) || + (node->OperIs(GT_JTRUE) && def->TypeIs(TYP_VOID) && ((def->gtFlags & GTF_SET_FLAGS) != 0))); continue; } @@ -1735,8 +1734,17 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const for (auto kvp : unusedDefs) { GenTree* node = kvp.Key(); - assert(node->IsUnusedValue() && "found an unmarked unused value"); - assert(!node->isContained() && "a contained node should have a user"); + if (!node->IsUnusedValue()) + { + JITDUMP("[%06u] is an unmarked unused value\n", Compiler::dspTreeID(node)); + assert(!"Found an unmarked unused value"); + } + + if (node->isContained()) + { + JITDUMP("[%06u] is a contained node with no user\n", Compiler::dspTreeID(node)); + assert(!"A contained node should have a user"); + } } } diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index 691336212815..6975963c1094 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -286,6 +286,20 @@ class LIR final void InsertAtBeginning(Range&& range); void InsertAtEnd(Range&& range); + template + void InsertAtBeginning(GenTree* tree, Trees&&... rest) + { + InsertAtBeginning(std::forward(rest)...); + InsertAtBeginning(tree); + } + + template + void InsertAtEnd(GenTree* tree, Trees&&... rest) + { + InsertAtEnd(tree); + InsertAtEnd(std::forward(rest)...); + } + void Remove(GenTree* node, bool markOperandsUnused = false); Range Remove(GenTree* firstNode, GenTree* lastNode); Range Remove(ReadOnlyRange&& range); diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 4055d6cb8c67..24b4bedad58e 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -15,14 +15,30 @@ #endif #include "lower.h" // for LowerRange() -/***************************************************************************** - * - * Helper for Compiler::fgPerBlockLocalVarLiveness(). - * The goal is to compute the USE and DEF sets for a basic block. - */ +//------------------------------------------------------------------------ +// fgMarkUseDef: +// Mark a local in the current def/use set. +// +// Parameters: +// tree - The local +// +// Template parameters: +// ssaLiveness - Whether the liveness computed is for SSA and should follow +// same modelling rules as SSA. SSA models partial defs like (v.x = 123) as +// (v = v with x = 123), which also implies that these partial definitions +// become uses. For dead-code elimination this is more conservative than +// needed, so outside SSA we do not model partial defs in this way: +// +// * In SSA: Partial defs are full defs but are also uses. They impact both +// bbVarUse and bbVarDef. +// +// * Outside SSA: Partial defs are _not_ full defs and are also not +// considered uses. They do not get included in bbVarUse/bbVarDef. +// +template void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) { - assert((tree->OperIsLocal() && (tree->OperGet() != GT_PHI_ARG)) || tree->OperIs(GT_LCL_ADDR)); + assert((tree->OperIsLocal() && !tree->OperIs(GT_PHI_ARG)) || tree->OperIs(GT_LCL_ADDR)); const unsigned lclNum = tree->GetLclNum(); LclVarDsc* const varDsc = lvaGetDesc(lclNum); @@ -35,8 +51,9 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) varDsc->setLvRefCnt(1); } - const bool isDef = (tree->gtFlags & GTF_VAR_DEF) != 0; - const bool isUse = !isDef || ((tree->gtFlags & GTF_VAR_USEASG) != 0); + const bool isDef = ((tree->gtFlags & GTF_VAR_DEF) != 0); + const bool isFullDef = isDef && ((tree->gtFlags & GTF_VAR_USEASG) == 0); + const bool isUse = ssaLiveness ? !isFullDef : !isDef; if (varDsc->lvTracked) { @@ -64,7 +81,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex); } - if (isDef) + if (ssaLiveness ? isDef : isFullDef) { // This is a def, add it to the set of defs. VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex); @@ -110,7 +127,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) VarSetOps::AddElemD(this, fgCurUseSet, varIndex); } - if (isDef) + if (ssaLiveness ? isDef : isFullDef) { VarSetOps::AddElemD(this, fgCurDefSet, varIndex); } @@ -120,7 +137,10 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree) } } -/*****************************************************************************/ +//------------------------------------------------------------------------ +// fgLocalVarLiveness: +// Compute block def/use sets, liveness, and do dead code elimination. +// void Compiler::fgLocalVarLiveness() { #ifdef DEBUG @@ -220,7 +240,7 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree) case GT_LCL_FLD: case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: - fgMarkUseDef(tree->AsLclVarCommon()); + fgMarkUseDef(tree->AsLclVarCommon()); break; case GT_LCL_ADDR: @@ -233,7 +253,7 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree) break; } - fgMarkUseDef(tree->AsLclVarCommon()); + fgMarkUseDef(tree->AsLclVarCommon()); } break; @@ -329,7 +349,7 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree) GenTreeLclVarCommon* definedLcl = gtCallGetDefinedRetBufLclAddr(call); if (definedLcl != nullptr) { - fgMarkUseDef(definedLcl); + fgMarkUseDef(definedLcl); } break; } @@ -360,7 +380,10 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTreeHWIntrinsic* hwintrinsic) } #endif // FEATURE_HW_INTRINSICS -/*****************************************************************************/ +//------------------------------------------------------------------------ +// fgPerBlockLocalVarLiveness: +// Compute def and use sets for the IR. +// void Compiler::fgPerBlockLocalVarLiveness() { #ifdef DEBUG @@ -423,7 +446,7 @@ void Compiler::fgPerBlockLocalVarLiveness() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgMarkUseDef(lcl); + fgMarkUseDef(lcl); } } else @@ -440,12 +463,12 @@ void Compiler::fgPerBlockLocalVarLiveness() // qmark arms. for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - bool isUse = ((lcl->gtFlags & GTF_VAR_DEF) == 0) || ((lcl->gtFlags & GTF_VAR_USEASG) != 0); + bool isUse = (lcl->gtFlags & GTF_VAR_DEF) == 0; // We can still handle the pure def at the top level. bool conditional = lcl != dst; if (isUse || !conditional) { - fgMarkUseDef(lcl); + fgMarkUseDef(lcl); } } } @@ -457,7 +480,7 @@ void Compiler::fgPerBlockLocalVarLiveness() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgMarkUseDef(lcl); + fgMarkUseDef(lcl); } } } @@ -1179,7 +1202,7 @@ void Compiler::fgComputeLife(VARSET_TP& life, for (GenTree* tree = startNode; tree != endNode; tree = tree->gtPrev) { AGAIN: - assert(tree->OperGet() != GT_QMARK); + assert(!tree->OperIs(GT_QMARK)); bool isUse = false; bool doAgain = false; @@ -1270,7 +1293,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR case GT_CALL: { GenTreeCall* const call = node->AsCall(); - if (((call->TypeGet() == TYP_VOID) || call->IsUnusedValue()) && !call->HasSideEffects(this)) + if ((call->TypeIs(TYP_VOID) || call->IsUnusedValue()) && !call->HasSideEffects(this)) { JITDUMP("Removing dead call:\n"); DISPNODE(call); @@ -1462,6 +1485,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR case GT_JCC: case GT_JTRUE: case GT_RETURN: + case GT_RETURN_SUSPEND: case GT_SWITCH: case GT_RETFILT: case GT_START_NONGC: @@ -1478,6 +1502,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR case GT_IL_OFFSET: case GT_KEEPALIVE: case GT_SWIFT_ERROR_RET: + case GT_GCPOLL: // Never remove these nodes, as they are always side-effecting. // // NOTE: the only side-effect of some of these nodes (GT_CMP, GT_SUB_HI) is a write to the flags @@ -1652,8 +1677,7 @@ bool Compiler::fgTryRemoveDeadStoreLIR(GenTree* store, GenTreeLclVarCommon* lclN if ((lclNode->gtFlags & GTF_VAR_USEASG) == 0) { LclVarDsc* varDsc = lvaGetDesc(lclNode); - if (varDsc->lvHasExplicitInit && (varDsc->TypeGet() == TYP_STRUCT) && varDsc->HasGCPtr() && - (varDsc->lvRefCnt() > 1)) + if (varDsc->lvHasExplicitInit && varDsc->TypeIs(TYP_STRUCT) && varDsc->HasGCPtr() && (varDsc->lvRefCnt() > 1)) { JITDUMP("Not removing a potential explicit init [%06u] of V%02u\n", dspTreeID(store), lclNode->GetLclNum()); return false; @@ -2096,7 +2120,7 @@ void Compiler::fgInterBlockLocalVarLiveness() for (GenTree* cur = stmt->GetTreeListEnd(); cur != nullptr;) { assert(cur->OperIsAnyLocal()); - bool isDef = ((cur->gtFlags & GTF_VAR_DEF) != 0) && ((cur->gtFlags & GTF_VAR_USEASG) == 0); + bool isDef = (cur->gtFlags & GTF_VAR_DEF) != 0; bool conditional = cur != dst; // Ignore conditional defs that would otherwise // (incorrectly) interfere with liveness in other diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index c999e1b1721c..470370455340 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -45,6 +45,22 @@ void ArrIndex::PrintBoundsCheckNodes(unsigned dim /* = -1 */) } } +//-------------------------------------------------------------------------------------------------- +// Print: debug print an SpanIndex struct in form: `V01[V02]`. +// +void SpanIndex::Print() +{ + printf("V%02d[V%02d]", lenLcl, indLcl); +} + +//-------------------------------------------------------------------------------------------------- +// PrintBoundsCheckNode: - debug print an SpanIndex struct bounds check node tree id +// +void SpanIndex::PrintBoundsCheckNode() +{ + Compiler::printTreeID(bndsChk); +} + #endif // DEBUG //-------------------------------------------------------------------------------------------------- @@ -80,6 +96,10 @@ GenTree* LC_Array::ToGenTree(Compiler* comp, BasicBlock* bb) arrAddr->gtFlags |= GTF_INX_ADDR_NONNULL; arr = comp->gtNewIndexIndir(arrAddr->AsIndexAddr()); + + // We don't really need to call morph here if we import arr[i] directly + // without gtNewArrayIndexAddr (but it's a bit of verbose). + arr = comp->fgMorphTree(arr); } // If asked for arrlen invoke arr length operator. if (oper == ArrLen) @@ -111,6 +131,20 @@ GenTree* LC_Array::ToGenTree(Compiler* comp, BasicBlock* bb) return nullptr; } +//-------------------------------------------------------------------------------------------------- +// ToGenTree: Convert a Span.Length operation into a GenTree node. +// +// Arguments: +// comp - Compiler instance to allocate trees +// +// Return Values: +// Returns the gen tree representation for Span.Length +// +GenTree* LC_Span::ToGenTree(Compiler* comp) +{ + return comp->gtNewLclvNode(spanIndex->lenLcl, comp->lvaTable[spanIndex->lenLcl].lvType); +} + //-------------------------------------------------------------------------------------------------- // ToGenTree - Convert an "identifier" into a GenTree node. // @@ -134,6 +168,8 @@ GenTree* LC_Ident::ToGenTree(Compiler* comp, BasicBlock* bb) return comp->gtNewLclvNode(lclNum, comp->lvaTable[lclNum].lvType); case ArrAccess: return arrAccess.ToGenTree(comp, bb); + case SpanAccess: + return spanAccess.ToGenTree(comp); case Null: return comp->gtNewIconNode(0, TYP_REF); case ClassHandle: @@ -861,54 +897,12 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* // const weight_t fastLikelihood = fastPathWeightScaleFactor; - // Choose how to generate the conditions - const bool generateOneConditionPerBlock = true; - - if (generateOneConditionPerBlock) - { - // N = conds.Size() branches must all be true to execute the fast loop. - // Use the N'th root.... - // - const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)conds.Size()); - - for (unsigned i = 0; i < conds.Size(); ++i) - { - BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true); - newBlk->inheritWeight(insertAfter); - - JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, slowPreheader->bbNum); - FlowEdge* const trueEdge = comp->fgAddRefPred(slowPreheader, newBlk); - newBlk->SetTrueEdge(trueEdge); - trueEdge->setLikelihood(1 - fastLikelihoodPerBlock); - - if (insertAfter->KindIs(BBJ_COND)) - { - JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", insertAfter->bbNum, newBlk->bbNum); - FlowEdge* const falseEdge = comp->fgAddRefPred(newBlk, insertAfter); - insertAfter->SetFalseEdge(falseEdge); - falseEdge->setLikelihood(fastLikelihoodPerBlock); - } - - JITDUMP("Adding conditions %u to " FMT_BB "\n", i, newBlk->bbNum); - - GenTree* cond = conds[i].ToGenTree(comp, newBlk, /* invert */ true); - GenTree* jmpTrueTree = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, cond); - Statement* stmt = comp->fgNewStmtFromTree(jmpTrueTree); - - comp->fgInsertStmtAtEnd(newBlk, stmt); - - // Remorph. - JITDUMP("Loop cloning condition tree before morphing:\n"); - DBEXEC(comp->verbose, comp->gtDispTree(jmpTrueTree)); - JITDUMP("\n"); - comp->fgMorphBlockStmt(newBlk, stmt DEBUGARG("Loop cloning condition")); - - insertAfter = newBlk; - } + // N = conds.Size() branches must all be true to execute the fast loop. + // Use the N'th root.... + // + const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)conds.Size()); - return insertAfter; - } - else + for (unsigned i = 0; i < conds.Size(); ++i) { BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true); newBlk->inheritWeight(insertAfter); @@ -916,43 +910,28 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, slowPreheader->bbNum); FlowEdge* const trueEdge = comp->fgAddRefPred(slowPreheader, newBlk); newBlk->SetTrueEdge(trueEdge); - trueEdge->setLikelihood(1.0 - fastLikelihood); + trueEdge->setLikelihood(1 - fastLikelihoodPerBlock); if (insertAfter->KindIs(BBJ_COND)) { JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", insertAfter->bbNum, newBlk->bbNum); FlowEdge* const falseEdge = comp->fgAddRefPred(newBlk, insertAfter); insertAfter->SetFalseEdge(falseEdge); - falseEdge->setLikelihood(fastLikelihood); - } - - JITDUMP("Adding conditions to " FMT_BB "\n", newBlk->bbNum); - - // Get the first condition. - GenTree* cond = conds[0].ToGenTree(comp, newBlk, /* invert */ false); - for (unsigned i = 1; i < conds.Size(); ++i) - { - // Append all conditions using AND operator. - cond = comp->gtNewOperNode(GT_AND, TYP_INT, cond, conds[i].ToGenTree(comp, newBlk, /* invert */ false)); + falseEdge->setLikelihood(fastLikelihoodPerBlock); } - // Add "cond == 0" node - cond = comp->gtNewOperNode(GT_EQ, TYP_INT, cond, comp->gtNewIconNode(0)); + JITDUMP("Adding conditions %u to " FMT_BB "\n", i, newBlk->bbNum); - // Add jmpTrue "cond == 0" + GenTree* cond = conds[i].ToGenTree(comp, newBlk, /* invert */ true); + cond->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); GenTree* jmpTrueTree = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, cond); Statement* stmt = comp->fgNewStmtFromTree(jmpTrueTree); comp->fgInsertStmtAtEnd(newBlk, stmt); - - // Remorph. - JITDUMP("Loop cloning condition tree before morphing:\n"); - DBEXEC(comp->verbose, comp->gtDispTree(jmpTrueTree)); - JITDUMP("\n"); - comp->fgMorphBlockStmt(newBlk, stmt DEBUGARG("Loop cloning condition")); - - return newBlk; + insertAfter = newBlk; } + + return insertAfter; } //-------------------------------------------------------------------------------------------------- @@ -1133,6 +1112,10 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex()); assert(optInfos->Size() > 0); + // If we have spans, that means we have to be careful about the stride (see below). + // + bool hasSpans = false; + // We only need to check for iteration behavior if we have array checks. // bool checkIterationBehavior = false; @@ -1147,6 +1130,11 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl checkIterationBehavior = true; break; + case LcOptInfo::LcSpan: + checkIterationBehavior = true; + hasSpans = true; + break; + case LcOptInfo::LcTypeTest: { LcTypeTestOptInfo* ttInfo = optInfo->AsLcTypeTestOptInfo(); @@ -1207,16 +1195,22 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl } const bool isIncreasingLoop = iterInfo->IsIncreasingLoop(); - assert(isIncreasingLoop || iterInfo->IsDecreasingLoop()); + if (!isIncreasingLoop && !iterInfo->IsDecreasingLoop()) + { + // Normally, we reject weird-looking loops in optIsLoopClonable, but it's not the case + // when we have both GDVs and array checks inside such loops. + return false; + } // We already know that this is either increasing or decreasing loop and the // stride is (> 0) or (< 0). Here, just take the abs() value and check if it // is beyond the limit. int stride = abs(iterInfo->IterConst()); - if (stride >= 58) + static_assert_no_msg(INT32_MAX >= CORINFO_Array_MaxLength); + if (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1)) { - // Array.MaxLength can have maximum of 0X7FFFFFC7 elements, so make sure + // Array.MaxLength can have maximum of 0x7fffffc7 elements, so make sure // the stride increment doesn't overflow or underflow the index. Hence, // the maximum stride limit is set to // (int.MaxValue - (Array.MaxLength - 1) + 1), which is @@ -1224,6 +1218,14 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl return false; } + // We don't know exactly whether we might be dealing with a Span or not, + // but if we suspect we are, we need to be careful about the stride: + // As Span<>.Length can be INT32_MAX unlike arrays. + if (hasSpans && (stride > 1)) + { + return false; + } + LC_Ident ident; // Init conditions if (iterInfo->HasConstInit) @@ -1366,6 +1368,15 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } break; + case LcOptInfo::LcSpan: + { + LcSpanOptInfo* spanInfo = optInfo->AsLcSpanOptInfo(); + LC_Span spanLen(&spanInfo->spanIndex); + LC_Ident spanLenIdent = LC_Ident::CreateSpanAccess(spanLen); + LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(spanLenIdent)); + context->EnsureConditions(loop->GetIndex())->Push(cond); + } + break; case LcOptInfo::LcMdArray: { LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); @@ -1508,10 +1519,6 @@ bool Compiler::optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneCo JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loop->GetIndex()); JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loop->GetIndex()); - // We currently expect to have at least one of these. - // - assert((arrayDeref->Size() != 0) || (objDeref->Size() != 0)); - // Generate the array dereference checks. // // For each array in the dereference list, construct a tree, @@ -1732,6 +1739,39 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt)); } break; + case LcOptInfo::LcSpan: + { + LcSpanOptInfo* spanIndexInfo = optInfo->AsLcSpanOptInfo(); + compCurBB = spanIndexInfo->spanIndex.useBlock; + GenTree* bndsChkNode = spanIndexInfo->spanIndex.bndsChk; + +#ifdef DEBUG + if (verbose) + { + printf("Remove bounds check "); + printTreeID(bndsChkNode->gtGetOp1()); + printf(" for " FMT_STMT ", ", spanIndexInfo->stmt->GetID()); + spanIndexInfo->spanIndex.Print(); + printf(", bounds check nodes: "); + spanIndexInfo->spanIndex.PrintBoundsCheckNode(); + printf("\n"); + } +#endif // DEBUG + + if (bndsChkNode->gtGetOp1()->OperIs(GT_BOUNDS_CHECK)) + { + optRemoveCommaBasedRangeCheck(bndsChkNode, spanIndexInfo->stmt); + } + else + { + JITDUMP(" Bounds check already removed\n"); + + // If the bounds check node isn't there, it better have been converted to a GT_NOP. + assert(bndsChkNode->gtGetOp1()->OperIs(GT_NOP)); + } + DBEXEC(dynamicPath, optDebugLogLoopCloning(spanIndexInfo->spanIndex.useBlock, spanIndexInfo->stmt)); + } + break; case LcOptInfo::LcMdArray: // TODO-CQ: CLONE: Implement. break; @@ -1775,60 +1815,6 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, } } -//------------------------------------------------------------------------ -// optShouldCloneLoop: Decide if a loop that can be cloned should be cloned. -// -// Arguments: -// loop - the current loop for which the optimizations are performed. -// context - data structure where all loop cloning info is kept. -// -// Returns: -// true if expected performance gain from cloning is worth the potential -// size increase. -// -// Remarks: -// This is a simple-minded heuristic meant to avoid "runaway" cloning -// where large loops are cloned. -// -// We estimate the size cost of cloning by summing up the number of -// tree nodes in all statements in all blocks in the loop. -// -// This value is compared to a hard-coded threshold, and if bigger, -// then the method returns false. -// -bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context) -{ - // See if loop size exceeds the limit. - // - const int sizeConfig = JitConfig.JitCloneLoopsSizeLimit(); - unsigned const sizeLimit = (sizeConfig >= 0) ? (unsigned)sizeConfig : UINT_MAX; - unsigned size = 0; - - BasicBlockVisit result = loop->VisitLoopBlocks([&](BasicBlock* block) { - assert(sizeLimit >= size); - unsigned const slack = sizeLimit - size; - unsigned blockSize = 0; - if (block->ComplexityExceeds(this, slack, &blockSize)) - { - return BasicBlockVisit::Abort; - } - - size += blockSize; - return BasicBlockVisit::Continue; - }); - - if (result == BasicBlockVisit::Abort) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ": exceeds size limit %u\n", loop->GetIndex(), sizeLimit); - return false; - } - - JITDUMP("Loop cloning: loop " FMT_LP ": size %u does not exceed size limit %u\n", loop->GetIndex(), size, - sizeLimit); - - return true; -} - //---------------------------------------------------------------------------- // optIsLoopClonable: Determine whether this loop can be cloned. // @@ -1845,6 +1831,12 @@ bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* // bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { + if (loop->GetHeader()->isRunRarely()) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop is cold.\n", loop->GetIndex()); + return false; + } + const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); @@ -1930,7 +1922,7 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c #ifdef DEBUG const unsigned ivLclNum = iterInfo->IterVar; GenTree* const op1 = iterInfo->Iterator(); - assert((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); + assert(op1->OperIs(GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); #endif } @@ -1966,7 +1958,6 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex BasicBlock* insertAfter) { JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loop->GetIndex()); - assert(context->HasBlockConditions(loop->GetIndex())); assert(slowPreheader != nullptr); if (context->HasBlockConditions(loop->GetIndex())) @@ -2140,9 +2131,6 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // ... // slowPreheader --> slowHeader // - // We should always have block conditions. - - assert(context->HasBlockConditions(loop->GetIndex())); // If any condition is false, go to slowPreheader (which branches or falls through to header of the slow loop). BasicBlock* slowHeader = nullptr; @@ -2275,7 +2263,7 @@ bool Compiler::optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lcl // bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal) { - if (tree->gtOper != GT_COMMA) + if (!tree->OperIs(GT_COMMA)) { return false; } @@ -2325,6 +2313,44 @@ bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsN return true; } +//--------------------------------------------------------------------------------------------------------------- +// optExtractSpanIndex: Try to extract the Span element access from "tree". +// +// Arguments: +// tree - the tree to be checked if it is the Span [] operation. +// result - the extracted information is updated in result. +// +// Return Value: +// Returns true if Span index can be extracted, else, return false. +// +// Notes: +// The way loop cloning works for Span is that we don't actually know (or care) +// if it's a Span or an array, we just extract index and length locals out +/// of the GT_BOUNDS_CHECK node. The fact that the length is a local var +/// allows us to not worry about array/span dereferencing. +// +bool Compiler::optExtractSpanIndex(GenTree* tree, SpanIndex* result) +{ + // Bounds checks are almost always wrapped in a comma node + // and are the first operand. + if (!tree->OperIs(GT_COMMA) || !tree->gtGetOp1()->OperIs(GT_BOUNDS_CHECK)) + { + return false; + } + + GenTreeBoundsChk* arrBndsChk = tree->gtGetOp1()->AsBoundsChk(); + if (!arrBndsChk->GetIndex()->OperIs(GT_LCL_VAR) || !arrBndsChk->GetArrayLength()->OperIs(GT_LCL_VAR)) + { + return false; + } + + result->lenLcl = arrBndsChk->GetArrayLength()->AsLclVarCommon()->GetLclNum(); + result->indLcl = arrBndsChk->GetIndex()->AsLclVarCommon()->GetLclNum(); + result->bndsChk = tree; + result->useBlock = compCurBB; + return true; +} + //--------------------------------------------------------------------------------------------------------------- // optReconstructArrIndexHelp: Helper function for optReconstructArrIndex. See that function for more details. // @@ -2346,7 +2372,7 @@ bool Compiler::optReconstructArrIndexHelp(GenTree* tree, ArrIndex* result, unsig return true; } // We have a comma (check if array base expr is computed in "before"), descend further. - else if (tree->OperGet() == GT_COMMA) + else if (tree->OperIs(GT_COMMA)) { GenTree* before = tree->gtGetOp1(); @@ -2529,7 +2555,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop // if (info->cloneForArrayBounds && optReconstructArrIndex(tree, &arrIndex)) { - assert(tree->gtOper == GT_COMMA); + assert(tree->OperIs(GT_COMMA)); #ifdef DEBUG if (verbose) @@ -2588,6 +2614,30 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_SKIP_SUBTREES; } + SpanIndex spanIndex = SpanIndex(); + if (info->cloneForArrayBounds && optExtractSpanIndex(tree, &spanIndex)) + { + // Check that the span's length local variable is invariant within the loop body. + if (!optIsStackLocalInvariant(info->loop, spanIndex.lenLcl)) + { + JITDUMP("Span.Length V%02d is not loop invariant\n", spanIndex.lenLcl); + return WALK_SKIP_SUBTREES; + } + + unsigned iterVar = info->context->GetLoopIterInfo(info->loop->GetIndex())->IterVar; + if (spanIndex.indLcl == iterVar) + { + // Update the loop context. + info->context->EnsureLoopOptInfo(info->loop->GetIndex()) + ->Push(new (this, CMK_LoopOpt) LcSpanOptInfo(spanIndex, info->stmt)); + } + else + { + JITDUMP("Induction V%02d is not used as index\n", iterVar); + } + return WALK_SKIP_SUBTREES; + } + if (info->cloneForGDVTests && tree->OperIs(GT_JTRUE)) { JITDUMP("...GDV considering [%06u]\n", dspTreeID(tree)); @@ -3027,8 +3077,9 @@ PhaseStatus Compiler::optCloneLoops() } else { - bool allTrue = false; - bool anyFalse = false; + bool allTrue = false; + bool anyFalse = false; + const int sizeLimit = JitConfig.JitCloneLoopsSizeLimit(); context.EvaluateConditions(loop->GetIndex(), &allTrue, &anyFalse DEBUGARG(verbose)); if (anyFalse) { @@ -3045,7 +3096,13 @@ PhaseStatus Compiler::optCloneLoops() // No need to clone. context.CancelLoopOptInfo(loop->GetIndex()); } - else if (!optShouldCloneLoop(loop, &context)) + // This is a simple-minded heuristic meant to avoid "runaway" cloning + // where large loops are cloned. + // We estimate the size cost of cloning by summing up the number of + // tree nodes in all statements in all blocks in the loop. + // This value is compared to a hard-coded threshold, and if bigger, + // then the method returns false. + else if ((sizeLimit >= 0) && optLoopComplexityExceeds(loop, (unsigned)sizeLimit)) { context.CancelLoopOptInfo(loop->GetIndex()); } diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index cfda1be87a8b..ecdda09775f8 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -211,6 +211,28 @@ struct ArrIndex #endif }; +// SpanIndex represents a span element access and associated bounds check. +struct SpanIndex +{ + unsigned lenLcl; // The Span length local num + unsigned indLcl; // The index local num + GenTree* bndsChk; // The bounds check node + BasicBlock* useBlock; // Block where the [] occurs + + SpanIndex() + : lenLcl(BAD_VAR_NUM) + , indLcl(BAD_VAR_NUM) + , bndsChk(nullptr) + , useBlock(nullptr) + { + } + +#ifdef DEBUG + void Print(); + void PrintBoundsCheckNode(); +#endif +}; + // Forward declarations #define LC_OPT(en) struct en##OptInfo; #include "loopcloningopts.h" @@ -317,6 +339,21 @@ struct LcJaggedArrayOptInfo : public LcOptInfo } }; +// Optimization info for a Span +// +struct LcSpanOptInfo : public LcOptInfo +{ + SpanIndex spanIndex; // SpanIndex representation of the Span. + Statement* stmt; // "stmt" where the optimization opportunity occurs. + + LcSpanOptInfo(SpanIndex& spanIndex, Statement* stmt) + : LcOptInfo(LcSpan) + , spanIndex(spanIndex) + , stmt(stmt) + { + } +}; + // Optimization info for a type test // struct LcTypeTestOptInfo : public LcOptInfo @@ -481,6 +518,38 @@ struct LC_Array GenTree* ToGenTree(Compiler* comp, BasicBlock* bb); }; +// Symbolic representation of Span.Length +struct LC_Span +{ + SpanIndex* spanIndex; + +#ifdef DEBUG + void Print() + { + spanIndex->Print(); + } +#endif + + LC_Span() + : spanIndex(nullptr) + { + } + + LC_Span(SpanIndex* arrIndex) + : spanIndex(arrIndex) + { + } + + // Equality operator + bool operator==(const LC_Span& that) const + { + return (spanIndex->lenLcl == that.spanIndex->lenLcl) && (spanIndex->indLcl == that.spanIndex->indLcl); + } + + // Get a tree representation for this symbolic Span.Length + GenTree* ToGenTree(Compiler* comp); +}; + //------------------------------------------------------------------------ // LC_Ident: symbolic representation of "a value" // @@ -492,6 +561,7 @@ struct LC_Ident Const, Var, ArrAccess, + SpanAccess, Null, ClassHandle, IndirOfLocal, @@ -509,6 +579,7 @@ struct LC_Ident unsigned indirOffs; }; LC_Array arrAccess; + LC_Span spanAccess; CORINFO_CLASS_HANDLE clsHnd; struct { @@ -553,6 +624,8 @@ struct LC_Ident return (lclNum == that.lclNum) && (indirOffs == that.indirOffs); case ArrAccess: return (arrAccess == that.arrAccess); + case SpanAccess: + return (spanAccess == that.spanAccess); case Null: return true; case MethodAddr: @@ -598,6 +671,9 @@ struct LC_Ident case ArrAccess: arrAccess.Print(); break; + case SpanAccess: + spanAccess.Print(); + break; case Null: printf("null"); break; @@ -646,6 +722,13 @@ struct LC_Ident return id; } + static LC_Ident CreateSpanAccess(const LC_Span& spanLen) + { + LC_Ident id(SpanAccess); + id.spanAccess = spanLen; + return id; + } + static LC_Ident CreateNull() { return LC_Ident(Null); diff --git a/src/coreclr/jit/loopcloningopts.h b/src/coreclr/jit/loopcloningopts.h index 2fb13937e2f8..e27a3d802e11 100644 --- a/src/coreclr/jit/loopcloningopts.h +++ b/src/coreclr/jit/loopcloningopts.h @@ -13,5 +13,6 @@ LC_OPT(LcMdArray) LC_OPT(LcJaggedArray) LC_OPT(LcTypeTest) LC_OPT(LcMethodAddrTest) +LC_OPT(LcSpan) #undef LC_OPT diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 34c8bd16b310..b30230b64adc 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -393,6 +393,61 @@ bool Lowering::IsSafeToMarkRegOptional(GenTree* parentNode, GenTree* childNode) return false; } +//------------------------------------------------------------------------ +// LowerRange: +// Lower the specified range of nodes. +// +// Arguments: +// firstNode - First node to lower +// lastNode - Last node to lower +// +void Lowering::LowerRange(GenTree* firstNode, GenTree* lastNode) +{ + assert(lastNode != nullptr); + + // Multiple possible behaviors of LowerNode are possible: + // 1. The node being lowered may be removed + // 2. The node being lowered may be replaced by a new region of nodes and + // ask lowering to go back to those nodes + // 3. The node being lowered may have its user removed + // + // The solution here does not actually try to handle the 3rd case. + // + GenTree* stopNode = lastNode->gtNext; + + for (GenTree* cur = firstNode; cur != stopNode;) + { + cur = LowerNode(cur); + } +} + +//------------------------------------------------------------------------ +// IsProfitableToSetZeroFlag: Checks if it's profitable to optimize an shift +// and rotate operations to set the zero flag. +// +// Arguments: +// op - The operation node to check. +// +// Return value: +// true if it's profitable to set the zero flag; otherwise false. +// +bool Lowering::IsProfitableToSetZeroFlag(GenTree* op) const +{ +#ifdef TARGET_XARCH + if (op->OperIs(GT_LSH, GT_RSH, GT_RSZ, GT_ROR, GT_ROL)) + { + // BMI2 instructions (SHLX, SARX, SHRX, RORX) do not set zero flag. + if (!op->AsOp()->gtGetOp2()->OperIsConst()) + { + return false; + } + } + +#endif // TARGET_XARCH + + return true; +} + //------------------------------------------------------------------------ // LowerNode: this is the main entry point for Lowering. // @@ -437,14 +492,20 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_OR: case GT_XOR: { - if (comp->opts.OptimizationEnabled() && node->OperIs(GT_AND)) + if (comp->opts.OptimizationEnabled()) { GenTree* nextNode = nullptr; - if (TryLowerAndNegativeOne(node->AsOp(), &nextNode)) + if (node->OperIs(GT_AND) && TryLowerAndNegativeOne(node->AsOp(), &nextNode)) { return nextNode; } assert(nextNode == nullptr); + + nextNode = node->gtNext; + if (node->OperIs(GT_AND, GT_OR, GT_XOR) && TryFoldBinop(node->AsOp())) + { + return nextNode; + } } return LowerBinaryArithmetic(node->AsOp()); @@ -538,19 +599,25 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_CAST: { - GenTree* nextNode = LowerCast(node); -#if defined(TARGET_XARCH) - if (nextNode != nullptr) + GenTree* nextNode = node->gtNext; + if (TryRemoveCast(node->AsCast())) { return nextNode; } -#endif // TARGET_XARCH + + LowerCast(node); + break; } - break; case GT_BITCAST: - ContainCheckBitCast(node); - break; + { + GenTree* next = node->gtNext; + if (!TryRemoveBitCast(node->AsUnOp())) + { + ContainCheckBitCast(node->AsUnOp()); + } + return next; + } #if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_BOUNDS_CHECK: @@ -560,8 +627,16 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_ROL: case GT_ROR: + { + GenTree* next = node->gtNext; + if (comp->opts.OptimizationEnabled() && TryFoldBinop(node->AsOp())) + { + return next; + } + LowerRotate(node); break; + } #ifndef TARGET_64BIT case GT_LSH_HI: @@ -573,12 +648,20 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_LSH: case GT_RSH: case GT_RSZ: + { + GenTree* next = node->gtNext; + if (comp->opts.OptimizationEnabled() && TryFoldBinop(node->AsOp())) + { + return next; + } + #if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) LowerShift(node->AsOp()); #else ContainCheckShiftRotate(node->AsOp()); #endif break; + } case GT_STORE_BLK: if (node->AsBlk()->Data()->IsCall()) @@ -644,12 +727,14 @@ GenTree* Lowering::LowerNode(GenTree* node) #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_CMPXCHG: + RISCV64_ONLY(CheckImmedAndMakeContained(node, node->AsCmpXchg()->Data())); CheckImmedAndMakeContained(node, node->AsCmpXchg()->Comparand()); break; case GT_XORR: case GT_XAND: case GT_XADD: + case GT_XCHG: CheckImmedAndMakeContained(node, node->AsOp()->gtOp2); break; #elif defined(TARGET_XARCH) @@ -696,6 +781,12 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_MDARR_LENGTH: case GT_MDARR_LOWER_BOUND: return LowerArrLength(node->AsArrCommon()); + + case GT_ASYNC_CONTINUATION: + return LowerAsyncContinuation(node); + + case GT_RETURN_SUSPEND: + LowerReturnSuspend(node); break; default: @@ -750,7 +841,7 @@ GenTree* Lowering::LowerArrLength(GenTreeArrCommon* node) GenTree* addr; noway_assert(arr->gtNext == node); - if ((arr->gtOper == GT_CNS_INT) && (arr->AsIntCon()->gtIconVal == 0)) + if (arr->OperIs(GT_CNS_INT) && (arr->AsIntCon()->gtIconVal == 0)) { // If the array is NULL, then we should get a NULL reference // exception when computing its length. We need to maintain @@ -825,7 +916,7 @@ GenTree* Lowering::LowerArrLength(GenTreeArrCommon* node) * and LinearCodeGen will be responsible to generate downstream). * * This way there are no implicit temporaries. - * + * b) For small-sized switches, we will actually morph them into a series of conditionals of the form * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case } * (For the default case conditional, we'll be constructing the exact same code as the jump table case one). @@ -840,7 +931,7 @@ GenTree* Lowering::LowerArrLength(GenTreeArrCommon* node) GenTree* Lowering::LowerSwitch(GenTree* node) { - assert(node->gtOper == GT_SWITCH); + assert(node->OperIs(GT_SWITCH)); // The first step is to build the default case conditional construct that is // shared between both kinds of expansion of the switch node. @@ -907,9 +998,9 @@ GenTree* Lowering::LowerSwitch(GenTree* node) // 1. a statement containing temp = indexExpression // 2. and a statement with GT_SWITCH(temp) - assert(node->gtOper == GT_SWITCH); + assert(node->OperIs(GT_SWITCH)); GenTree* temp = node->AsOp()->gtOp1; - assert(temp->gtOper == GT_LCL_VAR); + assert(temp->OperIs(GT_LCL_VAR)); unsigned tempLclNum = temp->AsLclVarCommon()->GetLclNum(); var_types tempLclType = temp->TypeGet(); @@ -1533,498 +1624,485 @@ bool Lowering::TryLowerSwitchToBitTest(FlowEdge* jumpTable[], } //------------------------------------------------------------------------ -// ReplaceArgWithPutArgOrBitcast: Insert a PUTARG_* node in the right location -// and replace the call operand with that node. +// LowerArg: +// Lower one argument of a call. This entails inserting putarg nodes between +// the call and the argument. This is the point at which the source is +// consumed and the value transitions from control of the register allocator +// to the calling convention. // // Arguments: -// argSlot - slot in call of argument -// putArgOrBitcast - the node that is being inserted +// call - The call node +// callArg - Call argument // -void Lowering::ReplaceArgWithPutArgOrBitcast(GenTree** argSlot, GenTree* putArgOrBitcast) +void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg) { - assert(argSlot != nullptr); - assert(*argSlot != nullptr); - assert(putArgOrBitcast->OperIsPutArg() || putArgOrBitcast->OperIs(GT_BITCAST)); + GenTree** ppArg = &callArg->NodeRef(); + GenTree* arg = *ppArg; + assert(arg != nullptr); - GenTree* arg = *argSlot; + JITDUMP("Lowering arg: \n"); + DISPTREERANGE(BlockRange(), arg); + assert(arg->IsValue()); - // Replace the argument with the putarg/copy - *argSlot = putArgOrBitcast; - putArgOrBitcast->AsOp()->gtOp1 = arg; + // If we hit this we are probably double-lowering. + assert(!arg->OperIsPutArg()); - // Insert the putarg/copy into the block - BlockRange().InsertAfter(arg, putArgOrBitcast); -} + const ABIPassingInformation& abiInfo = callArg->AbiInfo; + JITDUMP("Passed in "); + DBEXEC(comp->verbose, abiInfo.Dump()); -//------------------------------------------------------------------------ -// NewPutArg: rewrites the tree to put an arg in a register or on the stack. -// -// Arguments: -// call - the call whose arg is being rewritten. -// arg - the arg being rewritten. -// callArg - the CallArg for the argument. -// type - the type of the argument. -// -// Return Value: -// The new tree that was created to put the arg in the right place -// or the incoming arg if the arg tree was not rewritten. -// -// Assumptions: -// call, arg, and info must be non-null. -// -// Notes: -// For System V systems with native struct passing (i.e. UNIX_AMD64_ABI defined) -// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs -// for two eightbyte structs. For STK passed structs the method generates GT_PUTARG_STK tree. -// -GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, var_types type) -{ - assert(call != nullptr); - assert(arg != nullptr); - assert(callArg != nullptr); +#if !defined(TARGET_64BIT) + if (comp->opts.compUseSoftFP && arg->TypeIs(TYP_DOUBLE)) + { + // Unlike TYP_LONG we do no decomposition for doubles, yet we maintain + // it as a primitive type until lowering. So we need to get it into the + // right form here. - GenTree* putArg = nullptr; + unsigned argLclNum = comp->lvaGrabTemp(false DEBUGARG("double arg on softFP")); + GenTree* store = comp->gtNewTempStore(argLclNum, arg); + GenTree* low = comp->gtNewLclFldNode(argLclNum, TYP_INT, 0); + GenTree* high = comp->gtNewLclFldNode(argLclNum, TYP_INT, 4); + GenTree* longNode = new (comp, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, low, high); + BlockRange().InsertAfter(arg, store, low, high, longNode); - bool isOnStack = (callArg->AbiInfo.GetRegNum() == REG_STK); + *ppArg = arg = longNode; -#if FEATURE_ARG_SPLIT - // Struct can be split into register(s) and stack on ARM - if (compFeatureArgSplit() && callArg->AbiInfo.IsSplit()) - { - assert(arg->OperIs(GT_BLK, GT_FIELD_LIST) || arg->OperIsLocalRead()); - // TODO: Need to check correctness for FastTailCall - if (call->IsFastTailCall()) - { -#ifdef TARGET_ARM - NYI_ARM("lower: struct argument by fast tail call"); -#endif // TARGET_ARM - } + comp->lvaSetVarDoNotEnregister(argLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); - const unsigned slotNumber = callArg->AbiInfo.ByteOffset / TARGET_POINTER_SIZE; - const bool putInIncomingArgArea = call->IsFastTailCall(); + JITDUMP("Transformed double-typed arg on softFP to LONG node\n"); + } - putArg = new (comp, GT_PUTARG_SPLIT) GenTreePutArgSplit(arg, callArg->AbiInfo.ByteOffset, -#ifdef FEATURE_PUT_STRUCT_ARG_STK - callArg->AbiInfo.GetStackByteSize(), -#endif - callArg->AbiInfo.NumRegs, call, putInIncomingArgArea); + if (varTypeIsLong(arg)) + { + assert(callArg->AbiInfo.CountRegsAndStackSlots() == 2); - GenTreePutArgSplit* argSplit = putArg->AsPutArgSplit(); - for (unsigned regIndex = 0; regIndex < callArg->AbiInfo.NumRegs; regIndex++) - { - argSplit->SetRegNumByIdx(callArg->AbiInfo.GetRegNum(regIndex), regIndex); - } + noway_assert(arg->OperIs(GT_LONG)); + GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(); + fieldList->AddFieldLIR(comp, arg->gtGetOp1(), 0, TYP_INT); + fieldList->AddFieldLIR(comp, arg->gtGetOp2(), 4, TYP_INT); + BlockRange().InsertBefore(arg, fieldList); - if (arg->OperIs(GT_FIELD_LIST)) - { - unsigned regIndex = 0; - for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses()) - { - if (regIndex >= callArg->AbiInfo.NumRegs) - { - break; - } - var_types regType = use.GetNode()->TypeGet(); - // Account for the possibility that float fields may be passed in integer registers. - if (varTypeIsFloating(regType) && !genIsValidFloatReg(argSplit->GetRegNumByIdx(regIndex))) - { - regType = (regType == TYP_FLOAT) ? TYP_INT : TYP_LONG; - } - argSplit->m_regType[regIndex] = regType; - regIndex++; - } + BlockRange().Remove(arg); + *ppArg = arg = fieldList; - // Clear the register assignment on the fieldList node, as these are contained. - arg->SetRegNum(REG_NA); - } - else - { - ClassLayout* layout = arg->GetLayout(comp); + JITDUMP("Transformed long arg on 32-bit to FIELD_LIST node\n"); + } +#endif - // Set type of registers - for (unsigned index = 0; index < callArg->AbiInfo.NumRegs; index++) - { - argSplit->m_regType[index] = layout->GetGCPtrType(index); - } - } +#if FEATURE_ARG_SPLIT + // Structs can be split into register(s) and stack on some targets + if (compFeatureArgSplit() && abiInfo.IsSplitAcrossRegistersAndStack()) + { + SplitArgumentBetweenRegistersAndStack(call, callArg); + LowerArg(call, callArg); + return; } else #endif // FEATURE_ARG_SPLIT { - if (!isOnStack) + if (abiInfo.HasAnyRegisterSegment()) { -#if FEATURE_MULTIREG_ARGS - if ((callArg->AbiInfo.NumRegs > 1) && (arg->OperGet() == GT_FIELD_LIST)) + if (arg->OperIs(GT_FIELD_LIST) || (abiInfo.NumSegments > 1)) { - unsigned int regIndex = 0; - for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses()) + if (!arg->OperIs(GT_FIELD_LIST)) { - regNumber argReg = callArg->AbiInfo.GetRegNum(regIndex); - GenTree* curOp = use.GetNode(); - var_types curTyp = curOp->TypeGet(); - - // Create a new GT_PUTARG_REG node with op1 - GenTree* newOper = comp->gtNewPutArgReg(curTyp, curOp, argReg); - - // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST - ReplaceArgWithPutArgOrBitcast(&use.NodeRef(), newOper); - regIndex++; + // Primitive arg, but the ABI requires it to be split into + // registers. Insert the field list here. + GenTreeFieldList* fieldList = comp->gtNewFieldList(); + fieldList->AddFieldLIR(comp, arg, 0, genActualType(arg->TypeGet())); + BlockRange().InsertAfter(arg, fieldList); + arg = *ppArg = fieldList; } - // Just return arg. The GT_FIELD_LIST is not replaced. - // Nothing more to do. - return arg; + LowerArgFieldList(callArg, arg->AsFieldList()); + arg = *ppArg; } else -#endif // FEATURE_MULTIREG_ARGS { - putArg = comp->gtNewPutArgReg(type, arg, callArg->AbiInfo.GetRegNum()); + assert(abiInfo.HasExactlyOneRegisterSegment()); + InsertPutArgReg(ppArg, abiInfo.Segment(0)); + arg = *ppArg; } } else { - // Mark this one as tail call arg if it is a fast tail call. - // This provides the info to put this argument in in-coming arg area slot - // instead of in out-going arg area slot. + assert(abiInfo.NumSegments == 1); + const ABIPassingSegment& stackSeg = abiInfo.Segment(0); + const bool putInIncomingArgArea = call->IsFastTailCall(); -#ifdef DEBUG - // Make sure state is correct. The PUTARG_STK has TYP_VOID, as it doesn't produce - // a result. So the type of its operand must be the correct type to push on the stack. - callArg->CheckIsStruct(); -#endif + GenTree* putArg = + new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, stackSeg.GetStackOffset(), + stackSeg.GetStackSize(), call, putInIncomingArgArea); - if ((arg->OperGet() != GT_FIELD_LIST)) - { -#if defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK) - if (type == TYP_SIMD12) - { -#if !defined(TARGET_64BIT) - assert(callArg->AbiInfo.ByteSize == 12); -#else // TARGET_64BIT - if (compAppleArm64Abi()) - { - assert(callArg->AbiInfo.ByteSize == 12); - } - else - { - assert(callArg->AbiInfo.ByteSize == 16); - } -#endif // TARGET_64BIT - } - else -#endif // defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK) - { - assert(genActualType(arg->TypeGet()) == type); - } - } - const unsigned slotNumber = callArg->AbiInfo.ByteOffset / TARGET_POINTER_SIZE; - const bool putInIncomingArgArea = call->IsFastTailCall(); - - putArg = - new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, callArg->AbiInfo.ByteOffset, -#ifdef FEATURE_PUT_STRUCT_ARG_STK - callArg->AbiInfo.GetStackByteSize(), -#endif - call, putInIncomingArgArea); - -#if defined(DEBUG) && defined(FEATURE_PUT_STRUCT_ARG_STK) - if (varTypeIsStruct(callArg->GetSignatureType())) - { - // We use GT_BLK only for non-SIMD struct arguments. - if (arg->OperIs(GT_BLK)) - { - assert(!varTypeIsSIMD(arg)); - } - else if (!arg->TypeIs(TYP_STRUCT)) - { -#ifdef TARGET_ARM - assert((callArg->AbiInfo.GetStackSlotsNumber() == 1) || - ((arg->TypeGet() == TYP_DOUBLE) && (callArg->AbiInfo.GetStackSlotsNumber() == 2))); -#else - assert(varTypeIsSIMD(arg) || (callArg->AbiInfo.GetStackSlotsNumber() == 1)); -#endif - } - } -#endif // defined(DEBUG) && defined(FEATURE_PUT_STRUCT_ARG_STK) + BlockRange().InsertAfter(arg, putArg); + *ppArg = arg = putArg; } } - JITDUMP("new node is : "); - DISPNODE(putArg); - JITDUMP("\n"); + if (arg->OperIsPutArgStk()) + { + LowerPutArgStk(arg->AsPutArgStk()); + } - return putArg; + DISPTREERANGE(BlockRange(), arg); } //------------------------------------------------------------------------ -// LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between -// the argument evaluation and the call. This is the point at which the source is -// consumed and the value transitions from control of the register allocator to the calling -// convention. +// SplitArgumentBetweenRegistersAndStack: +// Split an argument that is passed in both registers and stack into two +// separate arguments, one for registers and one for stack. // -// Arguments: -// call - The call node -// callArg - Call argument -// late - Whether it is the late arg that is being lowered. +// Parameters: +// call - The call node +// callArg - Call argument // -// Return Value: -// None. +// Remarks: +// The argument is changed to be its stack part, and a new argument is +// inserted after it representing its registers. // -void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late) +void Lowering::SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* callArg) { - GenTree** ppArg = late ? &callArg->LateNodeRef() : &callArg->EarlyNodeRef(); + GenTree** ppArg = &callArg->NodeRef(); GenTree* arg = *ppArg; - assert(arg != nullptr); - JITDUMP("lowering arg : "); - DISPNODE(arg); - assert(arg->IsValue()); + assert(arg->OperIs(GT_BLK, GT_FIELD_LIST) || arg->OperIsLocalRead()); + assert(!call->IsFastTailCall()); - var_types type = genActualType(arg); + const ABIPassingInformation& abiInfo = callArg->AbiInfo; + assert(abiInfo.IsSplitAcrossRegistersAndStack()); -#if defined(FEATURE_SIMD) -#if defined(TARGET_X86) - // Non-param TYP_SIMD12 local var nodes are massaged in Lower to TYP_SIMD16 to match their - // allocated size (see lvSize()). However, when passing the variables as arguments, and - // storing the variables to the outgoing argument area on the stack, we must use their - // actual TYP_SIMD12 type, so exactly 12 bytes is allocated and written. - if (type == TYP_SIMD16) +#ifdef DEBUG + for (unsigned i = 0; i < abiInfo.NumSegments; i++) { - if ((arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_STORE_LCL_VAR)) + assert((i < abiInfo.NumSegments - 1) == abiInfo.Segment(i).IsPassedInRegister()); + } +#endif + + unsigned numRegs = abiInfo.NumSegments - 1; + const ABIPassingSegment& stackSeg = abiInfo.Segment(abiInfo.NumSegments - 1); + + JITDUMP("Dividing split arg [%06u] with %u registers, %u stack space into two arguments\n", + Compiler::dspTreeID(arg), numRegs, stackSeg.Size); + + ClassLayout* registersLayout = SliceLayout(callArg->GetSignatureLayout(), 0, stackSeg.Offset); + ClassLayout* stackLayout = SliceLayout(callArg->GetSignatureLayout(), stackSeg.Offset, + callArg->GetSignatureLayout()->GetSize() - stackSeg.Offset); + + GenTree* stackNode = nullptr; + GenTree* registersNode = nullptr; + + if (arg->OperIsFieldList()) + { + JITDUMP("Argument is a FIELD_LIST\n", numRegs, stackSeg.Size); + + GenTreeFieldList::Use* splitPoint = nullptr; + // Split the field list into its register and stack parts. + for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses()) { - const LclVarDsc* varDsc = comp->lvaGetDesc(arg->AsLclVarCommon()); - type = varDsc->lvType; + if (use.GetOffset() >= stackSeg.Offset) + { + splitPoint = &use; + JITDUMP("Found split point at offset %u\n", splitPoint->GetOffset()); + break; + } + + if (use.GetOffset() + genTypeSize(use.GetType()) > stackSeg.Offset) + { + // Field overlaps partially into the stack segment, cannot + // handle this without spilling. + break; + } } - else if (arg->OperIs(GT_HWINTRINSIC)) + + if (splitPoint == nullptr) { - GenTreeHWIntrinsic* hwintrinsic = arg->AsHWIntrinsic(); + JITDUMP("No clean split point found, spilling FIELD_LIST\n", splitPoint->GetOffset()); - // For HWIntrinsic, there are some intrinsics like ExtractVector128 which have - // a gtType of TYP_SIMD16 but a SimdSize of 32, so we can't necessarily assert - // the simd size + unsigned int newLcl = + StoreFieldListToNewLocal(comp->typGetObjLayout(callArg->GetSignatureClassHandle()), arg->AsFieldList()); + stackNode = comp->gtNewLclFldNode(newLcl, TYP_STRUCT, stackSeg.Offset, stackLayout); + registersNode = comp->gtNewLclFldNode(newLcl, TYP_STRUCT, 0, registersLayout); + BlockRange().InsertBefore(arg, stackNode); + BlockRange().InsertBefore(arg, registersNode); + } + else + { + stackNode = comp->gtNewFieldList(); + registersNode = comp->gtNewFieldList(); + + BlockRange().InsertBefore(arg, stackNode); + BlockRange().InsertBefore(arg, registersNode); - if (hwintrinsic->GetSimdSize() == 12) + for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses()) { - if (hwintrinsic->GetHWIntrinsicId() != NI_Vector128_AsVector128Unsafe) + if (&use == splitPoint) { - // Most nodes that have a simdSize of 12 are actually producing a TYP_SIMD12 - // and have been massaged to TYP_SIMD16 to match the actual product size. This - // is not the case for NI_Vector128_AsVector128Unsafe which is explicitly taking - // a TYP_SIMD12 and producing a TYP_SIMD16. - - type = TYP_SIMD12; + break; } + + registersNode->AsFieldList()->AddFieldLIR(comp, use.GetNode(), use.GetOffset(), use.GetType()); + } + + for (GenTreeFieldList::Use* use = splitPoint; use != nullptr; use = use->GetNext()) + { + stackNode->AsFieldList()->AddFieldLIR(comp, use->GetNode(), use->GetOffset() - stackSeg.Offset, + use->GetType()); } } - } -#elif defined(TARGET_AMD64) - // TYP_SIMD8 parameters that are passed as longs - if (type == TYP_SIMD8 && genIsValidIntReg(callArg->AbiInfo.GetRegNum())) - { - GenTree* bitcast = comp->gtNewBitCastNode(TYP_LONG, arg); - BlockRange().InsertAfter(arg, bitcast); - *ppArg = arg = bitcast; - type = TYP_LONG; + BlockRange().Remove(arg); } -#endif // defined(TARGET_X86) -#endif // defined(FEATURE_SIMD) + else if (arg->OperIs(GT_BLK)) + { + JITDUMP("Argument is a BLK\n", numRegs, stackSeg.Size); - // If we hit this we are probably double-lowering. - assert(!arg->OperIsPutArg()); + GenTree* blkAddr = arg->AsBlk()->Addr(); + target_ssize_t offset = 0; + comp->gtPeelOffsets(&blkAddr, &offset); -#if !defined(TARGET_64BIT) - if (comp->opts.compUseSoftFP && (type == TYP_DOUBLE)) - { - // Unlike TYP_LONG we do no decomposition for doubles, yet we maintain - // it as a primitive type until lowering. So we need to get it into the - // right form here. + LIR::Use addrUse; + bool gotUse = BlockRange().TryGetUse(blkAddr, &addrUse); + assert(gotUse); - unsigned argLclNum = comp->lvaGrabTemp(false DEBUGARG("double arg on softFP")); - GenTree* store = comp->gtNewTempStore(argLclNum, arg); - GenTree* low = comp->gtNewLclFldNode(argLclNum, TYP_INT, 0); - GenTree* high = comp->gtNewLclFldNode(argLclNum, TYP_INT, 4); - GenTree* longNode = new (comp, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, low, high); - BlockRange().InsertAfter(arg, store, low, high, longNode); + unsigned addrLcl; + if (addrUse.Def()->OperIsScalarLocal() && + !comp->lvaGetDesc(addrUse.Def()->AsLclVarCommon())->IsAddressExposed() && + IsInvariantInRange(addrUse.Def(), arg)) + { + JITDUMP("Reusing LCL_VAR\n", numRegs, stackSeg.Size); + addrLcl = addrUse.Def()->AsLclVarCommon()->GetLclNum(); + } + else + { + JITDUMP("Spilling address\n", numRegs, stackSeg.Size); + addrLcl = addrUse.ReplaceWithLclVar(comp); + } - *ppArg = arg = longNode; - type = TYP_LONG; + auto createAddr = [=](unsigned offs) { + GenTree* addr = comp->gtNewLclVarNode(addrLcl); + offs += (unsigned)offset; + if (offs != 0) + { + GenTree* addrOffs = comp->gtNewIconNode((ssize_t)offs, TYP_I_IMPL); + addr = comp->gtNewOperNode(GT_ADD, varTypeIsGC(addr) ? TYP_BYREF : TYP_I_IMPL, addr, addrOffs); + } - comp->lvaSetVarDoNotEnregister(argLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + return addr; + }; - JITDUMP("Created new nodes for double-typed arg on softFP:\n"); - DISPRANGE(LIR::ReadOnlyRange(store, longNode)); - } + GenTree* addr = createAddr(stackSeg.Offset); + stackNode = comp->gtNewBlkIndir(stackLayout, addr, arg->gtFlags & GTF_IND_COPYABLE_FLAGS); + BlockRange().InsertBefore(arg, LIR::SeqTree(comp, stackNode)); + LowerRange(addr, stackNode); - if (varTypeIsLong(type)) - { - noway_assert(arg->OperIs(GT_LONG)); - GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(); - fieldList->AddFieldLIR(comp, arg->AsOp()->gtGetOp1(), 0, TYP_INT); - fieldList->AddFieldLIR(comp, arg->AsOp()->gtGetOp2(), 4, TYP_INT); - GenTree* newArg = NewPutArg(call, fieldList, callArg, type); + registersNode = comp->gtNewFieldList(); + BlockRange().InsertBefore(arg, registersNode); - if (callArg->AbiInfo.GetRegNum() != REG_STK) - { - assert(callArg->AbiInfo.NumRegs == 2); - // In the register argument case, NewPutArg replaces the original field list args with new - // GT_PUTARG_REG nodes, inserts them in linear order and returns the field list. So the - // only thing left to do is to insert the field list itself in linear order. - assert(newArg == fieldList); - BlockRange().InsertBefore(arg, newArg); - } - else + for (unsigned i = 0; i < numRegs; i++) { - // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK. - // Although the hi argument needs to be pushed first, that will be handled by the general case, - // in which the fields will be reversed. - assert(callArg->AbiInfo.GetStackSlotsNumber() == 2); - newArg->SetRegNum(REG_STK); - BlockRange().InsertBefore(arg, fieldList, newArg); + const ABIPassingSegment& seg = abiInfo.Segment(i); + + GenTree* addr = createAddr(seg.Offset); + GenTree* indir = comp->gtNewIndir(seg.GetRegisterType(callArg->GetSignatureLayout()), addr, + arg->gtFlags & GTF_IND_COPYABLE_FLAGS); + registersNode->AsFieldList()->AddFieldLIR(comp, indir, seg.Offset, indir->TypeGet()); + BlockRange().InsertBefore(registersNode, LIR::SeqTree(comp, indir)); + LowerRange(addr, indir); } - *ppArg = newArg; - BlockRange().Remove(arg); + BlockRange().Remove(arg, /* markOperandsUnused */ true); } else -#endif // !defined(TARGET_64BIT) { + assert(arg->OperIsLocalRead()); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (call->IsVarargs() || comp->opts.compUseSoftFP || callArg->AbiInfo.IsMismatchedArgType()) - { - // Insert copies as needed to move float value to integer register - // if the ABI requires it. - GenTree* newNode = LowerFloatArg(ppArg, callArg); - if (newNode != nullptr) - { - type = newNode->TypeGet(); - } - } -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 + JITDUMP("Argument is a local\n", numRegs, stackSeg.Size); - GenTree* putArg = NewPutArg(call, arg, callArg, type); + GenTreeLclVarCommon* lcl = arg->AsLclVarCommon(); - // In the case of register passable struct (in one or two registers) - // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.) - // If an extra node is returned, splice it in the right place in the tree. - if (arg != putArg) + stackNode = + comp->gtNewLclFldNode(lcl->GetLclNum(), TYP_STRUCT, lcl->GetLclOffs() + stackSeg.Offset, stackLayout); + BlockRange().InsertBefore(arg, stackNode); + + registersNode = comp->gtNewFieldList(); + BlockRange().InsertBefore(arg, registersNode); + + for (unsigned i = 0; i < numRegs; i++) { - ReplaceArgWithPutArgOrBitcast(ppArg, putArg); + const ABIPassingSegment& seg = abiInfo.Segment(i); + GenTree* fldNode = + comp->gtNewLclFldNode(lcl->GetLclNum(), seg.GetRegisterType(callArg->GetSignatureLayout()), + lcl->GetLclOffs() + seg.Offset); + registersNode->AsFieldList()->AddFieldLIR(comp, fldNode, seg.Offset, fldNode->TypeGet()); + BlockRange().InsertBefore(registersNode, fldNode); } + + BlockRange().Remove(arg); } - arg = *ppArg; + JITDUMP("New stack node is:\n"); + DISPTREERANGE(BlockRange(), stackNode); + + JITDUMP("New registers node is:\n"); + DISPTREERANGE(BlockRange(), registersNode); - if (arg->OperIsPutArgStk() || arg->OperIsPutArgSplit()) + ABIPassingSegment newStackSeg = ABIPassingSegment::OnStack(stackSeg.GetStackOffset(), 0, stackSeg.Size); + ABIPassingInformation newStackAbi = ABIPassingInformation::FromSegment(comp, false, newStackSeg); + + ABIPassingInformation newRegistersAbi(comp, numRegs); + for (unsigned i = 0; i < numRegs; i++) { - LowerPutArgStkOrSplit(arg->AsPutArgStk()); + newRegistersAbi.Segment(i) = abiInfo.Segment(i); } + + callArg->AbiInfo = newStackAbi; + *ppArg = arg = stackNode; + + NewCallArg newRegisterArgAdd = NewCallArg::Struct(registersNode, TYP_STRUCT, registersLayout); + CallArg* newRegisterArg = call->gtArgs.InsertAfter(comp, callArg, newRegisterArgAdd); + + newRegisterArg->AbiInfo = newRegistersAbi; + + if (callArg->GetLateNode() != nullptr) + { + newRegisterArg->SetLateNext(callArg->GetLateNext()); + callArg->SetLateNext(newRegisterArg); + + newRegisterArg->SetLateNode(registersNode); + newRegisterArg->SetEarlyNode(nullptr); + } + + JITDUMP("Added a new call arg. New call is:\n"); + DISPTREERANGE(BlockRange(), call); } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //------------------------------------------------------------------------ -// LowerFloatArg: Lower float call arguments on the arm/LoongArch64/RiscV64 platform. -// -// Arguments: -// arg - The arg node -// callArg - call argument info +// SliceLayout: +// Slice a class layout into the specified range. // -// Return Value: -// Return nullptr, if no transformation was done; -// return arg if there was in place transformation; -// return a new tree if the root was changed. +// Parameters: +// layout - The layout +// offset - Start offset of the slice +// size - Size of the slice // -// Notes: -// This must handle scalar float arguments as well as GT_FIELD_LISTs -// with floating point fields. +// Returns: +// New layout of size 'size' // -GenTree* Lowering::LowerFloatArg(GenTree** pArg, CallArg* callArg) +ClassLayout* Lowering::SliceLayout(ClassLayout* layout, unsigned offset, unsigned size) { - GenTree* arg = *pArg; - if (callArg->AbiInfo.GetRegNum() != REG_STK) + ClassLayoutBuilder builder(comp, size); + INDEBUG(builder.SetName(comp->printfAlloc("%s[%03u..%03u)", layout->GetClassName(), offset, offset + size), + comp->printfAlloc("%s[%03u..%03u)", layout->GetShortClassName(), offset, offset + size))); + + if (((offset % TARGET_POINTER_SIZE) == 0) && ((size % TARGET_POINTER_SIZE) == 0) && layout->HasGCPtr()) { - if (arg->OperIs(GT_FIELD_LIST)) + for (unsigned i = 0; i < size; i += TARGET_POINTER_SIZE) { - // Transform fields that are passed as registers in place. - regNumber currRegNumber = callArg->AbiInfo.GetRegNum(); - unsigned regIndex = 0; - for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses()) - { - if (regIndex >= callArg->AbiInfo.NumRegs) - { - break; - } - GenTree* node = use.GetNode(); - if (varTypeUsesFloatReg(node)) - { - GenTree* intNode = LowerFloatArgReg(node, currRegNumber); - assert(intNode != nullptr); + builder.SetGCPtrType(i / TARGET_POINTER_SIZE, layout->GetGCPtrType((offset + i) / TARGET_POINTER_SIZE)); + } + } + else + { + assert(!layout->HasGCPtr()); + } - ReplaceArgWithPutArgOrBitcast(&use.NodeRef(), intNode); - } + builder.AddPadding(SegmentList::Segment(0, size)); - if (node->TypeGet() == TYP_DOUBLE) - { - currRegNumber = REG_NEXT(REG_NEXT(currRegNumber)); - regIndex += 2; - } - else - { - currRegNumber = REG_NEXT(currRegNumber); - regIndex += 1; - } - } - // List fields were replaced in place. - return arg; - } - else if (varTypeUsesFloatReg(arg)) + for (const SegmentList::Segment& nonPadding : layout->GetNonPadding(comp)) + { + if ((nonPadding.End <= offset) || (nonPadding.Start >= offset + size)) { - GenTree* intNode = LowerFloatArgReg(arg, callArg->AbiInfo.GetRegNum()); - assert(intNode != nullptr); - ReplaceArgWithPutArgOrBitcast(pArg, intNode); - return *pArg; + continue; } + + unsigned start = nonPadding.Start <= offset ? 0 : (nonPadding.Start - offset); + unsigned end = nonPadding.End >= (offset + size) ? size : (nonPadding.End - offset); + + builder.RemovePadding(SegmentList::Segment(start, end)); } - return nullptr; + return comp->typGetCustomLayout(builder); } //------------------------------------------------------------------------ -// LowerFloatArgReg: Lower the float call argument node that is passed via register. +// InsertBitCastIfNecessary: +// Insert a bitcast if a primitive argument being passed in a register is not +// evaluated in the right type of register. // // Arguments: -// arg - The arg node -// regNum - register number -// -// Return Value: -// Return new bitcast node, that moves float to int register. +// argNode - Edge for the argument +// registerSegment - Register that the argument is going into // -GenTree* Lowering::LowerFloatArgReg(GenTree* arg, regNumber regNum) +void Lowering::InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegment& registerSegment) { - assert(varTypeUsesFloatReg(arg)); + if (varTypeUsesIntReg(*argNode) == genIsValidIntReg(registerSegment.GetRegister())) + { + return; + } + + JITDUMP("Argument node [%06u] needs to be passed in %s; inserting bitcast\n", Compiler::dspTreeID(*argNode), + getRegName(registerSegment.GetRegister())); + + // Due to padding the node may be smaller than the register segment. In + // such cases we cut off the end of the segment to get an appropriate + // register type for the bitcast. + ABIPassingSegment cutRegisterSegment = registerSegment; + unsigned argNodeSize = genTypeSize(genActualType(*argNode)); + if (registerSegment.Size > argNodeSize) + { + cutRegisterSegment = + ABIPassingSegment::InRegister(registerSegment.GetRegister(), registerSegment.Offset, argNodeSize); + } + + var_types bitCastType = cutRegisterSegment.GetRegisterType(); - var_types floatType = arg->TypeGet(); - var_types intType = (floatType == TYP_FLOAT) ? TYP_INT : TYP_LONG; - GenTree* intArg = comp->gtNewBitCastNode(intType, arg); - intArg->SetRegNum(regNum); + GenTreeUnOp* bitCast = comp->gtNewBitCastNode(bitCastType, *argNode); + BlockRange().InsertAfter(*argNode, bitCast); - return intArg; + *argNode = bitCast; + if (!TryRemoveBitCast(bitCast)) + { + ContainCheckBitCast(bitCast); + } +} + +//------------------------------------------------------------------------ +// InsertPutArgReg: +// Insert a PUTARG_REG node for the specified edge. If the argument node does +// not fit the register type, then also insert a bitcast. +// +// Arguments: +// argNode - Edge for the argument +// registerSegment - Register that the argument is going into +// +void Lowering::InsertPutArgReg(GenTree** argNode, const ABIPassingSegment& registerSegment) +{ + assert(registerSegment.IsPassedInRegister()); + + InsertBitCastIfNecessary(argNode, registerSegment); + GenTree* putArg = comp->gtNewPutArgReg(genActualType(*argNode), *argNode, registerSegment.GetRegister()); + BlockRange().InsertAfter(*argNode, putArg); + *argNode = putArg; } -#endif -// do lowering steps for each arg of a call +//------------------------------------------------------------------------ +// LowerArgsForCall: +// Lower the arguments of a call node. +// +// Arguments: +// call - Call node +// void Lowering::LowerArgsForCall(GenTreeCall* call) { - JITDUMP("args:\n======\n"); + JITDUMP("Args:\n======\n"); for (CallArg& arg : call->gtArgs.EarlyArgs()) { - LowerArg(call, &arg, false); + LowerArg(call, &arg); } - JITDUMP("\nlate:\n======\n"); + JITDUMP("\nLate args:\n======\n"); for (CallArg& arg : call->gtArgs.LateArgs()) { - LowerArg(call, &arg, true); + LowerArg(call, &arg); } #if defined(TARGET_X86) && defined(FEATURE_IJW) @@ -2181,9 +2259,9 @@ void Lowering::InsertSpecialCopyArg(GenTreePutArgStk* putArgStk, CORINFO_CLASS_H // argument values. However, there are constraints on how the PUTARG nodes // can appear: // -// - No other GT_CALL nodes are allowed between a PUTARG_REG/PUTARG_SPLIT -// node and the call. For FEATURE_FIXED_OUT_ARGS this condition is also true -// for PUTARG_STK. +// - No other GT_CALL nodes are allowed between a PUTARG_REG node and the +// call. For FEATURE_FIXED_OUT_ARGS this condition is also true for +// PUTARG_STK. // - For !FEATURE_FIXED_OUT_ARGS, the PUTARG_STK nodes must come in push // order. // @@ -2570,10 +2648,11 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) GenTree* lArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); GenTree* rArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - ssize_t MaxUnrollSize = comp->IsBaselineSimdIsaSupported() ? 32 : 16; + ssize_t MaxUnrollSize = 16; -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) - if (comp->IsBaselineVector512IsaSupportedOpportunistically()) +#ifdef FEATURE_SIMD +#ifdef TARGET_XARCH + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { MaxUnrollSize = 128; } @@ -2582,7 +2661,12 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) // We need AVX2 for NI_Vector256_op_Equality, fallback to Vector128 if only AVX is available MaxUnrollSize = 64; } -#endif + else +#endif // TARGET_XARCH + { + MaxUnrollSize = 32; + } +#endif // FEATURE_SIMD if (cnsSize <= MaxUnrollSize) { @@ -2951,6 +3035,15 @@ GenTree* Lowering::LowerCall(GenTree* node) BlockRange().InsertBefore(call, std::move(controlExprRange)); call->gtControlExpr = controlExpr; + +#ifdef TARGET_RISCV64 + // If controlExpr is a constant, we should contain it inside the call so that we can move the lower 12-bits of + // the value to call instruction's (JALR) offset. + if (controlExpr->IsCnsIntOrI() && !controlExpr->AsIntCon()->ImmedValNeedsReloc(comp) && !call->IsFastTailCall()) + { + MakeSrcContained(call, controlExpr); + } +#endif // TARGET_RISCV64 } if (comp->opts.IsCFGEnabled()) @@ -3261,7 +3354,6 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) unsigned int overwrittenStart = put->getArgOffset(); unsigned int overwrittenEnd = overwrittenStart + put->GetStackByteSize(); - int baseOff = -1; // Stack offset of first arg on stack for (unsigned callerArgLclNum = 0; callerArgLclNum < comp->info.compArgsCount; callerArgLclNum++) { @@ -3272,34 +3364,12 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) continue; } - unsigned int argStart; - unsigned int argEnd; -#if defined(TARGET_AMD64) - if (TargetOS::IsWindows) - { - // On Windows x64, the argument position determines the stack slot uniquely, and even the - // register args take up space in the stack frame (shadow space). - argStart = callerArgLclNum * TARGET_POINTER_SIZE; - argEnd = argStart + static_cast(callerArgDsc->lvArgStackSize()); - } - else -#endif // TARGET_AMD64 - { - assert(callerArgDsc->GetStackOffset() != BAD_STK_OFFS); - - if (baseOff == -1) - { - baseOff = callerArgDsc->GetStackOffset(); - } + const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(callerArgLclNum); + assert(abiInfo.HasExactlyOneStackSegment()); + const ABIPassingSegment& seg = abiInfo.Segment(0); - // On all ABIs where we fast tail call the stack args should come in order. - assert(baseOff <= callerArgDsc->GetStackOffset()); - - // Compute offset of this stack argument relative to the first stack arg. - // This will be its offset into the incoming arg space area. - argStart = static_cast(callerArgDsc->GetStackOffset() - baseOff); - argEnd = argStart + comp->lvaLclSize(callerArgLclNum); - } + unsigned argStart = seg.GetStackOffset(); + unsigned argEnd = argStart + seg.GetStackSize(); // If ranges do not overlap then this PUTARG_STK will not mess up the arg. if ((overwrittenEnd <= argStart) || (overwrittenStart >= argEnd)) @@ -3559,7 +3629,7 @@ GenTree* Lowering::LowerTailCallViaJitHelper(GenTreeCall* call, GenTree* callTar argEntry = call->gtArgs.GetArgByIndex(numArgs - 2); assert(argEntry != nullptr); GenTree* arg1 = argEntry->GetEarlyNode()->AsPutArgStk()->gtGetOp1(); - assert(arg1->gtOper == GT_CNS_INT); + assert(arg1->OperIs(GT_CNS_INT)); ssize_t tailCallHelperFlags = 1 | // always restore EDI,ESI,EBX (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag @@ -3569,7 +3639,7 @@ GenTree* Lowering::LowerTailCallViaJitHelper(GenTreeCall* call, GenTree* callTar argEntry = call->gtArgs.GetArgByIndex(numArgs - 3); assert(argEntry != nullptr); GenTree* arg2 = argEntry->GetEarlyNode()->AsPutArgStk()->gtGetOp1(); - assert(arg2->gtOper == GT_CNS_INT); + assert(arg2->OperIs(GT_CNS_INT)); arg2->AsIntCon()->gtIconVal = nNewStkArgsWords; @@ -3578,7 +3648,7 @@ GenTree* Lowering::LowerTailCallViaJitHelper(GenTreeCall* call, GenTree* callTar argEntry = call->gtArgs.GetArgByIndex(numArgs - 4); assert(argEntry != nullptr); GenTree* arg3 = argEntry->GetEarlyNode()->AsPutArgStk()->gtGetOp1(); - assert(arg3->gtOper == GT_CNS_INT); + assert(arg3->OperIs(GT_CNS_INT)); #endif // DEBUG // Transform this call node into a call to Jit tail call helper. @@ -3765,16 +3835,13 @@ void Lowering::LowerCFGCall(GenTreeCall* call) call->gtArgs.PushLateBack(targetArg); // Set up ABI information for this arg. - targetArg->NewAbiInfo = - ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_DISPATCH_INDIRECT_CALL_ADDR, - 0, TARGET_POINTER_SIZE)); - targetArg->AbiInfo.ArgType = callTarget->TypeGet(); - targetArg->AbiInfo.SetRegNum(0, REG_DISPATCH_INDIRECT_CALL_ADDR); - targetArg->AbiInfo.NumRegs = 1; - targetArg->AbiInfo.ByteSize = TARGET_POINTER_SIZE; + targetArg->AbiInfo = + ABIPassingInformation::FromSegmentByValue(comp, + ABIPassingSegment::InRegister(REG_DISPATCH_INDIRECT_CALL_ADDR, + 0, TARGET_POINTER_SIZE)); // Lower the newly added args now that call is updated - LowerArg(call, targetArg, true /* late */); + LowerArg(call, targetArg); // Finally update the call to be a helper call call->gtCallType = CT_HELPER; @@ -3944,7 +4011,7 @@ void Lowering::MoveCFGCallArgs(GenTreeCall* call) // GenTree* Lowering::DecomposeLongCompare(GenTree* cmp) { - assert(cmp->gtGetOp1()->TypeGet() == TYP_LONG); + assert(cmp->gtGetOp1()->TypeIs(TYP_LONG)); GenTree* src1 = cmp->gtGetOp1(); GenTree* src2 = cmp->gtGetOp2(); @@ -4346,6 +4413,20 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) } #endif } + else if (andOp2->IsIntegralConst() && GenTree::Compare(andOp2, op2)) + { + // + // Transform EQ|NE(AND(x, y), y) into EQ|NE(AND(NOT(x), y), 0) when y is a constant. + // + + GenTree* notNode = comp->gtNewOperNode(GT_NOT, andOp1->TypeGet(), andOp1); + cmp->gtGetOp1()->AsOp()->gtOp1 = notNode; + BlockRange().InsertAfter(andOp1, notNode); + op2->BashToZeroConst(op2->TypeGet()); + + andOp1 = notNode; + op2Value = 0; + } } #ifdef TARGET_XARCH @@ -4408,6 +4489,26 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) } } + // Optimize EQ/NE(op_that_sets_zf, 0) into op_that_sets_zf with GTF_SET_FLAGS + SETCC. + LIR::Use use; + if (cmp->OperIs(GT_EQ, GT_NE) && op2->IsIntegralConst(0) && op1->SupportsSettingZeroFlag() && + BlockRange().TryGetUse(cmp, &use) && IsProfitableToSetZeroFlag(op1)) + { + op1->gtFlags |= GTF_SET_FLAGS; + op1->SetUnusedValue(); + + GenTree* next = cmp->gtNext; + BlockRange().Remove(cmp); + BlockRange().Remove(op2); + + GenCondition cmpCondition = GenCondition::FromRelop(cmp); + GenTreeCC* setcc = comp->gtNewCC(GT_SETCC, cmp->TypeGet(), cmpCondition); + BlockRange().InsertAfter(op1, setcc); + + use.ReplaceWith(setcc); + return next; + } + return cmp; } @@ -4423,7 +4524,7 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) GenTree* Lowering::LowerCompare(GenTree* cmp) { #ifndef TARGET_64BIT - if (cmp->gtGetOp1()->TypeGet() == TYP_LONG) + if (cmp->gtGetOp1()->TypeIs(TYP_LONG)) { return DecomposeLongCompare(cmp); } @@ -4640,7 +4741,10 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select) // Return Value: // True if relop was transformed and is now right before 'parent'; otherwise false. // -bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond) +bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, + GenTree* condition, + GenCondition* cond, + bool allowMultipleFlagsChecks) { JITDUMP("Lowering condition:\n"); DISPTREERANGE(BlockRange(), condition); @@ -4672,50 +4776,47 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, } #endif - // Optimize EQ/NE(op_that_sets_zf, 0) into op_that_sets_zf with GTF_SET_FLAGS. - if (optimizing && relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0) && - relopOp1->SupportsSettingZeroFlag() && IsInvariantInRange(relopOp1, parent)) +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) + if (!allowMultipleFlagsChecks) { - relopOp1->gtFlags |= GTF_SET_FLAGS; - relopOp1->SetUnusedValue(); + const GenConditionDesc& desc = GenConditionDesc::Get(*cond); - BlockRange().Remove(relopOp1); - BlockRange().InsertBefore(parent, relopOp1); - BlockRange().Remove(relop); - BlockRange().Remove(relopOp2); + if (desc.oper != GT_NONE) + { + return false; + } } - else +#endif + + relop->gtType = TYP_VOID; + relop->gtFlags |= GTF_SET_FLAGS; + + if (relop->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)) { - relop->gtType = TYP_VOID; - relop->gtFlags |= GTF_SET_FLAGS; + relop->SetOper(GT_CMP); - if (relop->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)) + if (cond->PreferSwap()) { - relop->SetOper(GT_CMP); - - if (cond->PreferSwap()) - { - std::swap(relop->gtOp1, relop->gtOp2); - *cond = GenCondition::Swap(*cond); - } + std::swap(relop->gtOp1, relop->gtOp2); + *cond = GenCondition::Swap(*cond); } + } #ifdef TARGET_XARCH - else if (relop->OperIs(GT_BITTEST_EQ, GT_BITTEST_NE)) - { - relop->SetOper(GT_BT); - } + else if (relop->OperIs(GT_BITTEST_EQ, GT_BITTEST_NE)) + { + relop->SetOper(GT_BT); + } #endif - else - { - assert(relop->OperIs(GT_TEST_EQ, GT_TEST_NE)); - relop->SetOper(GT_TEST); - } + else + { + assert(relop->OperIs(GT_TEST_EQ, GT_TEST_NE)); + relop->SetOper(GT_TEST); + } - if (relop->gtNext != parent) - { - BlockRange().Remove(relop); - BlockRange().InsertBefore(parent, relop); - } + if (relop->gtNext != parent) + { + BlockRange().Remove(relop); + BlockRange().InsertBefore(parent, relop); } return true; @@ -4725,7 +4826,7 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, { assert((condition->gtPrev->gtFlags & GTF_SET_FLAGS) != 0); GenTree* flagsDef = condition->gtPrev; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) // CCMP is a flag producing node that also consumes flags, so find the // "root" of the flags producers and move the entire range. // We limit this to 10 nodes look back to avoid quadratic behavior. @@ -4742,6 +4843,18 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, *cond = condition->AsCC()->gtCondition; +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) + if (!allowMultipleFlagsChecks) + { + const GenConditionDesc& desc = GenConditionDesc::Get(*cond); + + if (desc.oper != GT_NONE) + { + return false; + } + } +#endif + LIR::Range range = BlockRange().Remove(flagsDef, condition->gtPrev); BlockRange().InsertBefore(parent, std::move(range)); BlockRange().Remove(condition); @@ -4856,7 +4969,7 @@ GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition) // Lower "jmp " tail call to insert PInvoke method epilog if required. void Lowering::LowerJmpMethod(GenTree* jmp) { - assert(jmp->OperGet() == GT_JMP); + assert(jmp->OperIs(GT_JMP)); JITDUMP("lowering GT_JMP\n"); DISPNODE(jmp); @@ -4885,7 +4998,7 @@ void Lowering::LowerRet(GenTreeOp* ret) // - We're returning a floating type as an integral type or vice-versa, or // - If we're returning a struct as a primitive type, we change the type of // 'retval' in 'LowerRetStructLclVar()' - bool needBitcast = (ret->TypeGet() != TYP_VOID) && !varTypeUsesSameRegType(ret, retVal); + bool needBitcast = !ret->TypeIs(TYP_VOID) && !varTypeUsesSameRegType(ret, retVal); bool doPrimitiveBitcast = false; if (needBitcast) { @@ -4900,12 +5013,12 @@ void Lowering::LowerRet(GenTreeOp* ret) assert(!varTypeIsStruct(ret) && !varTypeIsStruct(retVal)); #endif - GenTree* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); + GenTreeUnOp* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); ret->SetReturnValue(bitcast); BlockRange().InsertBefore(ret, bitcast); ContainCheckBitCast(bitcast); } - else if (ret->TypeGet() != TYP_VOID) + else if (!ret->TypeIs(TYP_VOID)) { #if FEATURE_MULTIREG_RET if (comp->compMethodReturnsMultiRegRetType() && retVal->OperIs(GT_LCL_VAR)) @@ -4934,7 +5047,11 @@ void Lowering::LowerRet(GenTreeOp* ret) } #endif // DEBUG - if (varTypeIsStruct(ret)) + if (retVal->OperIsFieldList()) + { + LowerRetFieldList(ret, retVal->AsFieldList()); + } + else if (varTypeIsStruct(ret)) { LowerRetStruct(ret); } @@ -4946,7 +5063,6 @@ void Lowering::LowerRet(GenTreeOp* ret) } } - // Method doing PInvokes has exactly one return block unless it has tail calls. if (comp->compMethodRequiresPInvokeFrame()) { InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret)); @@ -4954,6 +5070,364 @@ void Lowering::LowerRet(GenTreeOp* ret) ContainCheckRet(ret); } +struct LowerFieldListRegisterInfo +{ + unsigned Offset; + var_types RegType; + + LowerFieldListRegisterInfo(unsigned offset, var_types regType) + : Offset(offset) + , RegType(regType) + { + } +}; + +//---------------------------------------------------------------------------------------------- +// LowerRetFieldList: +// Lower a returned FIELD_LIST node. +// +// Arguments: +// ret - The return node +// fieldList - The field list +// +void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList) +{ + const ReturnTypeDesc& retDesc = comp->compRetTypeDesc; + unsigned numRegs = retDesc.GetReturnRegCount(); + + auto getRegInfo = [=, &retDesc](unsigned regIndex) { + unsigned offset = retDesc.GetReturnFieldOffset(regIndex); + var_types regType = genActualType(retDesc.GetReturnRegType(regIndex)); + return LowerFieldListRegisterInfo(offset, regType); + }; + + bool isCompatible = IsFieldListCompatibleWithRegisters(fieldList, numRegs, getRegInfo); + if (!isCompatible) + { + unsigned lclNum = + StoreFieldListToNewLocal(comp->typGetObjLayout(comp->info.compMethodInfo->args.retTypeClass), fieldList); + LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); + + GenTree* retValue = comp->gtNewLclvNode(lclNum, varDsc->TypeGet()); + ret->SetReturnValue(retValue); + BlockRange().InsertBefore(ret, retValue); + LowerNode(retValue); + + BlockRange().Remove(fieldList); + + if (numRegs == 1) + { + var_types nativeReturnType = comp->info.compRetNativeType; + ret->ChangeType(genActualType(nativeReturnType)); + LowerRetSingleRegStructLclVar(ret); + } + else + { + varDsc->lvIsMultiRegRet = true; + } + + return; + } + + LowerFieldListToFieldListOfRegisters(fieldList, numRegs, getRegInfo); +} + +//---------------------------------------------------------------------------------------------- +// StoreFieldListToNewLocal: +// Create a new local with the specified layout and store the specified +// fields of the specified FIELD_LIST into it. +// +// Arguments: +// layout - Layout of the new local +// fieldList - Fields to store to it +// +// Returns: +// Var number of new local. +// +unsigned Lowering::StoreFieldListToNewLocal(ClassLayout* layout, GenTreeFieldList* fieldList) +{ + JITDUMP("Spilling field list [%06u] to stack\n", Compiler::dspTreeID(fieldList)); + unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Spilled local for field list")); + LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); + comp->lvaSetStruct(lclNum, layout, false); + comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + GenTree* store = comp->gtNewStoreLclFldNode(lclNum, use.GetType(), use.GetOffset(), use.GetNode()); + BlockRange().InsertAfter(use.GetNode(), store); + LowerNode(store); + } + + return lclNum; +} + +//---------------------------------------------------------------------------------------------- +// LowerArgFieldList: +// Lower an argument FIELD_LIST node. +// +// Arguments: +// arg - The argument +// fieldList - The FIELD_LIST node +// +void Lowering::LowerArgFieldList(CallArg* arg, GenTreeFieldList* fieldList) +{ + assert(!arg->AbiInfo.HasAnyStackSegment()); + + auto getRegInfo = [=](unsigned regIndex) { + const ABIPassingSegment& seg = arg->AbiInfo.Segment(regIndex); + return LowerFieldListRegisterInfo(seg.Offset, seg.GetRegisterType()); + }; + + bool isCompatible = IsFieldListCompatibleWithRegisters(fieldList, arg->AbiInfo.NumSegments, getRegInfo); + if (!isCompatible) + { + ClassLayout* layout = comp->typGetObjLayout(arg->GetSignatureClassHandle()); + unsigned lclNum = StoreFieldListToNewLocal(layout, fieldList); + fieldList->Uses().Clear(); + for (const ABIPassingSegment& seg : arg->AbiInfo.Segments()) + { + GenTreeLclFld* fld = comp->gtNewLclFldNode(lclNum, seg.GetRegisterType(layout), seg.Offset); + fieldList->AddFieldLIR(comp, fld, seg.Offset, fld->TypeGet()); + BlockRange().InsertBefore(fieldList, fld); + } + } + else + { + LowerFieldListToFieldListOfRegisters(fieldList, arg->AbiInfo.NumSegments, getRegInfo); + } + + GenTreeFieldList::Use* field = fieldList->Uses().GetHead(); + for (const ABIPassingSegment& seg : arg->AbiInfo.Segments()) + { + assert((field != nullptr) && "Ran out of fields while inserting PUTARG_REG"); + InsertPutArgReg(&field->NodeRef(), seg); + field = field->GetNext(); + } + + assert((field == nullptr) && "Missed fields while inserting PUTARG_REG"); + + arg->NodeRef() = fieldList->SoleFieldOrThis(); + if (arg->GetNode() != fieldList) + { + BlockRange().Remove(fieldList); + } +} + +//---------------------------------------------------------------------------------------------- +// IsFieldListCompatibleWithReturn: +// Check if the fields of a FIELD_LIST are compatible with the registers +// being returned. +// +// Arguments: +// fieldList - The FIELD_LIST node +// +// Returns: +// True if the fields of the FIELD_LIST are all direct insertions into the +// return registers. +// +template +bool Lowering::IsFieldListCompatibleWithRegisters(GenTreeFieldList* fieldList, + unsigned numRegs, + GetRegisterInfoFunc getRegInfo) +{ + JITDUMP("Checking if field list [%06u] is compatible with registers: ", Compiler::dspTreeID(fieldList)); + + GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); + for (unsigned i = 0; i < numRegs; i++) + { + LowerFieldListRegisterInfo regInfo = getRegInfo(i); + unsigned regStart = regInfo.Offset; + var_types regType = regInfo.RegType; + unsigned regEnd = regStart + genTypeSize(regType); + + if ((i == numRegs - 1) && !varTypeUsesFloatReg(regType)) + { + // Allow tail end to pass undefined bits into the register + regEnd = regStart + REGSIZE_BYTES; + } + + // TODO-CQ: Could just create a 0 for this. + if ((use == nullptr) || (use->GetOffset() >= regEnd)) + { + JITDUMP("it is not; register %u has no corresponding field\n", i); + return false; + } + + do + { + unsigned fieldStart = use->GetOffset(); + + if (fieldStart < regStart) + { + // Not fully contained in a register. + // TODO-CQ: Could just remove these fields if they don't partially overlap with the next register. + JITDUMP("it is not; field [%06u] starts before register %u\n", Compiler::dspTreeID(use->GetNode()), i); + return false; + } + + if (fieldStart >= regEnd) + { + break; + } + + unsigned fieldEnd = fieldStart + genTypeSize(use->GetType()); + if (fieldEnd > regEnd) + { + JITDUMP("it is not; field [%06u] ends after register %u\n", Compiler::dspTreeID(use->GetNode()), i); + return false; + } + + // float -> float insertions are not yet supported + if (varTypeUsesFloatReg(use->GetNode()) && varTypeUsesFloatReg(regType) && (fieldStart != regStart)) + { + JITDUMP("it is not; field [%06u] requires an insertion into register %u\n", + Compiler::dspTreeID(use->GetNode()), i); + return false; + } + + use = use->GetNext(); + } while (use != nullptr); + } + + if (use != nullptr) + { + // TODO-CQ: Could just remove these fields. + JITDUMP("it is not; field [%06u] corresponds to no register\n", Compiler::dspTreeID(use->GetNode())); + return false; + } + + JITDUMP("it is\n"); + return true; +} + +//---------------------------------------------------------------------------------------------- +// LowerFieldListToFieldListOfRegisters: +// Lower the specified field list into one that is compatible with the return +// registers. +// +// Arguments: +// fieldList - The field list +// +template +void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList, + unsigned numRegs, + GetRegisterInfoFunc getRegInfo) +{ + GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); + assert(fieldList->Uses().IsSorted()); + + for (unsigned i = 0; i < numRegs; i++) + { + LowerFieldListRegisterInfo regInfo = getRegInfo(i); + unsigned regStart = regInfo.Offset; + var_types regType = regInfo.RegType; + unsigned regEnd = regStart + genTypeSize(regType); + + if ((i == numRegs - 1) && !varTypeUsesFloatReg(regType)) + { + // Allow tail end to pass undefined bits into the register + regEnd = regStart + REGSIZE_BYTES; + } + + GenTreeFieldList::Use* regEntry = use; + + assert(use != nullptr); + + GenTree* fieldListPrev = fieldList->gtPrev; + + do + { + unsigned fieldStart = use->GetOffset(); + + assert(fieldStart >= regStart); + + if (fieldStart >= regEnd) + { + break; + } + + var_types fieldType = use->GetType(); + GenTree* value = use->GetNode(); + + unsigned insertOffset = fieldStart - regStart; + GenTreeFieldList::Use* nextUse = use->GetNext(); + + // First ensure the value does not have upper bits set that + // interfere with the next field. + if ((nextUse != nullptr) && (nextUse->GetOffset() < regEnd) && + (fieldStart + genTypeSize(genActualType(fieldType)) > nextUse->GetOffset())) + { + assert(varTypeIsSmall(fieldType)); + // This value may interfere with the next field. Ensure that doesn't happen. + if (comp->fgCastNeeded(value, varTypeToUnsigned(fieldType))) + { + value = comp->gtNewCastNode(TYP_INT, value, true, varTypeToUnsigned(fieldType)); + BlockRange().InsertBefore(fieldList, value); + } + } + + // If this is a float -> int insertion, then we need the bitcast now. + if (varTypeUsesFloatReg(value) && varTypeUsesIntReg(regInfo.RegType)) + { + assert((genTypeSize(value) == 4) || (genTypeSize(value) == 8)); + var_types castType = genTypeSize(value) == 4 ? TYP_INT : TYP_LONG; + value = comp->gtNewBitCastNode(castType, value); + BlockRange().InsertBefore(fieldList, value); + } + + if (insertOffset + genTypeSize(fieldType) > genTypeSize(genActualType(value))) + { + value = comp->gtNewCastNode(TYP_LONG, value, true, TYP_LONG); + BlockRange().InsertBefore(fieldList, value); + } + + if (fieldStart != regStart) + { + GenTree* shiftAmount = comp->gtNewIconNode((ssize_t)insertOffset * BITS_PER_BYTE); + value = comp->gtNewOperNode(GT_LSH, genActualType(value), value, shiftAmount); + BlockRange().InsertBefore(fieldList, shiftAmount, value); + } + + if (regEntry != use) + { + GenTree* prevValue = regEntry->GetNode(); + if (genActualType(value) != genActualType(regEntry->GetNode())) + { + prevValue = comp->gtNewCastNode(TYP_LONG, prevValue, true, TYP_LONG); + BlockRange().InsertBefore(fieldList, prevValue); + regEntry->SetNode(prevValue); + } + + value = comp->gtNewOperNode(GT_OR, genActualType(value), prevValue, value); + BlockRange().InsertBefore(fieldList, value); + + // Remove this field from the FIELD_LIST. + regEntry->SetNext(use->GetNext()); + } + + regEntry->SetNode(value); + regEntry->SetType(genActualType(value)); + use = regEntry->GetNext(); + } while (use != nullptr); + + assert(regEntry != nullptr); + if (varTypeUsesIntReg(regEntry->GetNode()) != varTypeUsesIntReg(regType)) + { + GenTree* bitCast = comp->gtNewBitCastNode(regType, regEntry->GetNode()); + BlockRange().InsertBefore(fieldList, bitCast); + regEntry->SetNode(bitCast); + } + + if (fieldListPrev->gtNext != fieldList) + { + LowerRange(fieldListPrev->gtNext, fieldList->gtPrev); + } + } + + assert(use == nullptr); +} + //---------------------------------------------------------------------------------------------- // LowerStoreLocCommon: platform independent part of local var or field store lowering. // @@ -5003,14 +5477,14 @@ GenTree* Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore) const var_types lclRegType = varDsc->GetRegisterType(lclStore); - if ((lclStore->TypeGet() == TYP_STRUCT) && !srcIsMultiReg) + if (lclStore->TypeIs(TYP_STRUCT) && !srcIsMultiReg) { bool convertToStoreObj; if (lclStore->OperIs(GT_STORE_LCL_FLD)) { convertToStoreObj = true; } - else if (src->OperGet() == GT_CALL) + else if (src->OperIs(GT_CALL)) { GenTreeCall* call = src->AsCall(); @@ -5158,9 +5632,9 @@ GenTree* Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore) assert(lclStore->OperIsLocalStore()); assert(lclRegType != TYP_UNDEF); - GenTree* bitcast = comp->gtNewBitCastNode(lclRegType, src); - lclStore->gtOp1 = bitcast; - src = lclStore->gtGetOp1(); + GenTreeUnOp* bitcast = comp->gtNewBitCastNode(lclRegType, src); + lclStore->gtOp1 = bitcast; + src = lclStore->gtGetOp1(); BlockRange().InsertBefore(lclStore, bitcast); ContainCheckBitCast(bitcast); } @@ -5275,8 +5749,8 @@ void Lowering::LowerRetStruct(GenTreeUnOp* ret) assert(varTypeIsEnregisterable(retVal)); if (!varTypeUsesSameRegType(ret, retVal)) { - GenTree* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); - ret->gtOp1 = bitcast; + GenTreeUnOp* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); + ret->gtOp1 = bitcast; BlockRange().InsertBefore(ret, bitcast); ContainCheckBitCast(bitcast); } @@ -5347,7 +5821,7 @@ void Lowering::LowerRetSingleRegStructLclVar(GenTreeUnOp* ret) if (!varTypeUsesSameRegType(ret, lclVarType)) { - GenTree* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), ret->gtOp1); + GenTreeUnOp* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), ret->gtOp1); ret->AsOp()->SetReturnValue(bitcast); BlockRange().InsertBefore(ret, bitcast); ContainCheckBitCast(bitcast); @@ -5355,6 +5829,83 @@ void Lowering::LowerRetSingleRegStructLclVar(GenTreeUnOp* ret) } } +//---------------------------------------------------------------------------------------------- +// LowerAsyncContinuation: Lower a GT_ASYNC_CONTINUATION node +// +// Arguments: +// asyncCont - Async continuation node +// +// Returns: +// Next node to lower. +// +GenTree* Lowering::LowerAsyncContinuation(GenTree* asyncCont) +{ + assert(asyncCont->OperIs(GT_ASYNC_CONTINUATION)); + + GenTree* next = asyncCont->gtNext; + + // + // ASYNC_CONTINUATION is created from two sources: + // + // 1. The async resumption stubs are IL stubs created by the VM. These call + // runtime async functions via "calli", passing the continuation manually. + // They use the AsyncHelpers.AsyncCallContinuation intrinsic after the + // calli, which turns into the ASYNC_CONTINUATION node during import. + // + // 2. In the async transformation, ASYNC_CONTINUATION nodes are inserted + // after calls to async calls. + // + // In the former case nothing has marked the previous call as an "async" + // method. We need to do that here to ensure that the backend knows that + // the call has a non-standard calling convention that returns an + // additional GC ref. This requires additional GC tracking that we would + // otherwise not get. + // + GenTree* node = asyncCont; + while (true) + { + node = node->gtPrev; + noway_assert((node != nullptr) && "Ran out of nodes while looking for call before async continuation"); + + if (node->IsCall()) + { + if (!node->AsCall()->IsAsync()) + { + JITDUMP("Marking the call [%06u] before async continuation [%06u] as an async call\n", + Compiler::dspTreeID(node), Compiler::dspTreeID(asyncCont)); + node->AsCall()->SetIsAsync(); + } + + BlockRange().Remove(asyncCont); + BlockRange().InsertAfter(node, asyncCont); + break; + } + } + + return next; +} + +//---------------------------------------------------------------------------------------------- +// LowerReturnSuspend: +// Lower a GT_RETURN_SUSPEND by making it a terminator node. +// +// Arguments: +// node - The node +// +void Lowering::LowerReturnSuspend(GenTree* node) +{ + assert(node->OperIs(GT_RETURN_SUSPEND)); + while (BlockRange().LastNode() != node) + { + BlockRange().Remove(BlockRange().LastNode(), true); + } + + if (comp->compMethodRequiresPInvokeFrame()) + { + InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(node)); + } +} + //---------------------------------------------------------------------------------------------- // LowerCallStruct: Lowers a call node that returns a struct. // @@ -5421,7 +5972,8 @@ void Lowering::LowerCallStruct(GenTreeCall* call) break; case GT_CALL: - // Argument lowering will deal with register file mismatches if needed. + case GT_FIELD_LIST: + // Argument/return lowering will deal with register file mismatches if needed. assert(varTypeIsSIMD(origType)); break; @@ -5445,7 +5997,7 @@ void Lowering::LowerCallStruct(GenTreeCall* call) { if (!varTypeUsesSameRegType(returnType, origType)) { - GenTree* bitCast = comp->gtNewBitCastNode(origType, call); + GenTreeUnOp* bitCast = comp->gtNewBitCastNode(origType, call); BlockRange().InsertAfter(call, bitCast); callUse.ReplaceWith(bitCast); ContainCheckBitCast(bitCast); @@ -5694,7 +6246,7 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call) } assert(thisArgNode != nullptr); - assert(thisArgNode->gtOper == GT_PUTARG_REG); + assert(thisArgNode->OperIs(GT_PUTARG_REG)); GenTree* thisExpr = thisArgNode->AsOp()->gtOp1; // We're going to use the 'this' expression multiple times, so make a local to copy it. @@ -5914,7 +6466,7 @@ void Lowering::InsertPInvokeMethodProlog() noway_assert(comp->info.compUnmanagedCallCountWithGCTransition); noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); - if (comp->opts.ShouldUsePInvokeHelpers()) + if (!comp->info.compPublishStubParam && comp->opts.ShouldUsePInvokeHelpers()) { return; } @@ -5943,6 +6495,13 @@ void Lowering::InsertPInvokeMethodProlog() DISPTREERANGE(firstBlockRange, store); } + // If we use P/Invoke helper calls then the hidden stub initialization + // is all we need to do. Rest will get initialized by the helper. + if (comp->opts.ShouldUsePInvokeHelpers()) + { + return; + } + // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list: // TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart); GenTree* frameAddr = comp->gtNewLclVarAddrNode(comp->lvaInlinedPInvokeFrameVar); @@ -6404,7 +6963,7 @@ GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call) // fit into int32 and we will have to turn fAllowRel32 off globally. To prevent that // we'll create a wrapper node and force LSRA to allocate a register so RIP relative // isn't used and we don't need to pessimize other callsites. - if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) || !IsCallTargetInRange(addr)) + if (!comp->IsAot() || !IsCallTargetInRange(addr)) { result = AddrGen(addr); } @@ -7131,7 +7690,22 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) return next; } } -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 + +#ifdef TARGET_RISCV64 + if (comp->compOpportunisticallyDependsOn(InstructionSet_Zba)) + { + GenTree* next; + if (TryLowerShiftAddToShxadd(node, &next)) + { + return next; + } + else if (TryLowerZextAddToAddUw(node, &next)) + { + return next; + } + } +#endif if (node->OperIs(GT_ADD)) { @@ -7168,7 +7742,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) assert(varTypeIsFloating(divMod->TypeGet())); #endif // USE_HELPERS_FOR_INT_DIV #if defined(TARGET_ARM64) - assert(divMod->OperGet() != GT_UMOD); + assert(!divMod->OperIs(GT_UMOD)); #endif // TARGET_ARM64 GenTree* dividend = divMod->gtGetOp1(); @@ -7462,7 +8036,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) // bool Lowering::TryLowerConstIntDivOrMod(GenTree* node, GenTree** nextNode) { - assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); + assert(node->OperIs(GT_DIV) || node->OperIs(GT_MOD)); assert(nextNode != nullptr); GenTree* divMod = node; @@ -7482,7 +8056,7 @@ bool Lowering::TryLowerConstIntDivOrMod(GenTree* node, GenTree** nextNode) *nextNode = node->gtNext; return true; } - assert(node->OperGet() != GT_MOD); + assert(!node->OperIs(GT_MOD)); #endif // TARGET_ARM64 if (!divisor->IsCnsIntOrI()) @@ -7514,7 +8088,7 @@ bool Lowering::TryLowerConstIntDivOrMod(GenTree* node, GenTree** nextNode) return false; } - bool isDiv = divMod->OperGet() == GT_DIV; + bool isDiv = divMod->OperIs(GT_DIV); if (isDiv) { @@ -7732,7 +8306,7 @@ bool Lowering::TryLowerConstIntDivOrMod(GenTree* node, GenTree** nextNode) // GenTree* Lowering::LowerSignedDivOrMod(GenTree* node) { - assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); + assert(node->OperIs(GT_DIV) || node->OperIs(GT_MOD)); if (varTypeIsIntegral(node->TypeGet())) { @@ -7748,6 +8322,64 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTree* node) return node->gtNext; } +//------------------------------------------------------------------------ +// TryFoldBinop: Try removing a binop node by constant folding. +// +// Parameters: +// node - the node +// +// Returns: +// True if the node was removed +// +bool Lowering::TryFoldBinop(GenTreeOp* node) +{ + if (node->gtSetFlags()) + { + return false; + } + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + if (op1->IsIntegralConst() && op2->IsIntegralConst()) + { + GenTree* folded = comp->gtFoldExprConst(node); + assert(folded == node); + if (!folded->OperIsConst()) + { + return false; + } + + BlockRange().Remove(op1); + BlockRange().Remove(op2); + return true; + } + + if ((node->OperIs(GT_LSH, GT_RSH, GT_RSZ, GT_ROL, GT_ROR) && op2->IsIntegralConst(0)) || + (node->OperIs(GT_OR, GT_XOR) && (op1->IsIntegralConst(0) || op2->IsIntegralConst(0)))) + { + GenTree* zeroOp = op2->IsIntegralConst(0) ? op2 : op1; + GenTree* otherOp = zeroOp == op1 ? op2 : op1; + + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(otherOp); + } + else + { + otherOp->SetUnusedValue(); + } + + BlockRange().Remove(node); + BlockRange().Remove(zeroOp); + + return true; + } + + return false; +} + //------------------------------------------------------------------------ // LowerShift: Lower shift nodes // @@ -7757,7 +8389,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTree* node) // Notes: // Remove unnecessary shift count masking, xarch shift instructions // mask the shift count to 5 bits (or 6 bits for 64 bit operations). - +// void Lowering::LowerShift(GenTreeOp* shift) { assert(shift->OperIs(GT_LSH, GT_RSH, GT_RSZ)); @@ -7823,12 +8455,20 @@ void Lowering::LowerShift(GenTreeOp* shift) } } #endif + +#ifdef TARGET_RISCV64 + if (comp->compOpportunisticallyDependsOn(InstructionSet_Zba)) + { + GenTree* next; + TryLowerZextLeftShiftToSlliUw(shift, &next); + } +#endif } void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node) { #ifdef FEATURE_SIMD - if (node->TypeGet() == TYP_SIMD12) + if (node->TypeIs(TYP_SIMD12)) { // Assumption 1: // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off @@ -7858,9 +8498,7 @@ void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node) // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear // it either. - LclVarDsc* varDsc = comp->lvaGetDesc(node->AsLclVarCommon()); - - if (comp->lvaMapSimd12ToSimd16(varDsc)) + if (comp->lvaMapSimd12ToSimd16(node->AsLclVarCommon()->GetLclNum())) { JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n"); DISPNODE(node); @@ -7883,7 +8521,7 @@ PhaseStatus Lowering::DoPhase() } #if !defined(TARGET_64BIT) - DecomposeLongs decomp(comp); // Initialize the long decomposition class. + DecomposeLongs decomp(comp, this); // Initialize the long decomposition class. if (comp->compLongUsed) { decomp.PrepareForDecomposition(); @@ -7900,7 +8538,7 @@ PhaseStatus Lowering::DoPhase() comp->lvSetMinOptsDoNotEnreg(); } - if (comp->opts.OptimizationEnabled()) + if (comp->opts.OptimizationEnabled() && !comp->opts.IsOSR()) { MapParameterRegisterLocals(); } @@ -8034,10 +8672,7 @@ void Lowering::MapParameterRegisterLocals() } } - if (!comp->opts.IsOSR()) - { - FindInducedParameterRegisterLocals(); - } + FindInducedParameterRegisterLocals(); #ifdef DEBUG if (comp->verbose) @@ -8296,7 +8931,7 @@ unsigned Lowering::TryReuseLocalForParameterAccess(const LIR::Use& use, const Lo LclVarDsc* destLclDsc = comp->lvaGetDesc(useNode->AsLclVarCommon()); - if (destLclDsc->lvIsParamRegTarget) + if (destLclDsc->lvIsParam || destLclDsc->lvIsParamRegTarget) { return BAD_VAR_NUM; } @@ -8306,7 +8941,7 @@ unsigned Lowering::TryReuseLocalForParameterAccess(const LIR::Use& use, const Lo return BAD_VAR_NUM; } - if (destLclDsc->TypeGet() == TYP_STRUCT) + if (destLclDsc->TypeIs(TYP_STRUCT)) { return BAD_VAR_NUM; } @@ -8402,7 +9037,7 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node) #ifdef FEATURE_SIMD case GT_HWINTRINSIC: - assert(node->TypeGet() != TYP_SIMD12); + assert(!node->TypeIs(TYP_SIMD12)); break; #endif // FEATURE_SIMD @@ -8413,12 +9048,13 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node) #if defined(FEATURE_SIMD) && defined(TARGET_64BIT) if (node->TypeIs(TYP_SIMD12)) { - assert(compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc) || (varDsc->lvSize() == 12)); + assert(compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc) || + (compiler->lvaLclStackHomeSize(node->AsLclVar()->GetLclNum()) == 12)); } #endif // FEATURE_SIMD && TARGET_64BIT if (varDsc->lvPromoted) { - assert(varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegRet); + assert(varDsc->lvDoNotEnregister || (node->OperIs(GT_STORE_LCL_VAR) && varDsc->lvIsMultiRegDest)); } } break; @@ -8536,8 +9172,8 @@ void Lowering::LowerBlock(BasicBlock* block) */ bool Lowering::IndirsAreEquivalent(GenTree* candidate, GenTree* storeInd) { - assert(candidate->OperGet() == GT_IND); - assert(storeInd->OperGet() == GT_STOREIND); + assert(candidate->OperIs(GT_IND)); + assert(storeInd->OperIs(GT_STOREIND)); // We should check the size of the indirections. If they are // different, say because of a cast, then we can't call them equivalent. Doing so could cause us @@ -8699,7 +9335,7 @@ bool Lowering::CheckMultiRegLclVar(GenTreeLclVar* lclNode, int registerCount) for (int i = 0; i < varDsc->lvFieldCnt; i++) { - if (comp->lvaGetDesc(varDsc->lvFieldLclStart + i)->TypeGet() == TYP_SIMD12) + if (comp->lvaGetDesc(varDsc->lvFieldLclStart + i)->TypeIs(TYP_SIMD12)) { canEnregisterAsMultiReg = false; break; @@ -8808,7 +9444,7 @@ void Lowering::ContainCheckNode(GenTree* node) ContainCheckCast(node->AsCast()); break; case GT_BITCAST: - ContainCheckBitCast(node); + ContainCheckBitCast(node->AsUnOp()); break; case GT_LCLHEAP: ContainCheckLclHeap(node->AsOp()); @@ -8827,10 +9463,7 @@ void Lowering::ContainCheckNode(GenTree* node) break; case GT_PUTARG_REG: case GT_PUTARG_STK: -#if FEATURE_ARG_SPLIT - case GT_PUTARG_SPLIT: -#endif // FEATURE_ARG_SPLIT - // The regNum must have been set by the lowering of the call. + // The regNum must have been set by the lowering of the call. assert(node->GetRegNum() != REG_NA); break; #ifdef TARGET_XARCH @@ -8893,10 +9526,10 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) assert(ret->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)); #if !defined(TARGET_64BIT) - if (ret->TypeGet() == TYP_LONG) + if (ret->TypeIs(TYP_LONG)) { GenTree* op1 = ret->AsOp()->GetReturnValue(); - noway_assert(op1->OperGet() == GT_LONG); + noway_assert(op1->OperIs(GT_LONG)); MakeSrcContained(ret, op1); } #endif // !defined(TARGET_64BIT) @@ -8905,11 +9538,11 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) { GenTree* op1 = ret->AsOp()->GetReturnValue(); // op1 must be either a lclvar or a multi-reg returning call - if (op1->OperGet() == GT_LCL_VAR) + if (op1->OperIs(GT_LCL_VAR)) { const LclVarDsc* varDsc = comp->lvaGetDesc(op1->AsLclVarCommon()); // This must be a multi-reg return or an HFA of a single element. - assert(varDsc->lvIsMultiRegRet || (varDsc->lvIsHfa() && varTypeIsValidHfaType(varDsc->lvType))); + assert(varDsc->lvIsMultiRegRet); // Mark var as contained if not enregisterable. if (!varDsc->IsEnregisterableLcl()) @@ -8924,15 +9557,144 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) #endif // FEATURE_MULTIREG_RET } +//------------------------------------------------------------------------ +// TryRemoveCast: +// Try to remove a cast node by changing its operand. +// +// Arguments: +// node - Cast node +// +// Returns: +// True if the cast was removed. +// +bool Lowering::TryRemoveCast(GenTreeCast* node) +{ + if (comp->opts.OptimizationDisabled()) + { + return false; + } + + if (node->gtOverflow()) + { + return false; + } + + GenTree* op = node->CastOp(); + if (!op->OperIsConst()) + { + return false; + } + + GenTree* folded = comp->gtFoldExprConst(node); + assert(folded == node); + if (folded->OperIs(GT_CAST)) + { + return false; + } + + op->SetUnusedValue(); + return true; +} + +//------------------------------------------------------------------------ +// TryRemoveBitCast: +// Try to remove a bitcast node by changing its operand. +// +// Arguments: +// node - Bitcast node +// +// Returns: +// True if the bitcast was removed. +// +bool Lowering::TryRemoveBitCast(GenTreeUnOp* node) +{ + if (comp->opts.OptimizationDisabled()) + { + return false; + } + + GenTree* op = node->gtGetOp1(); + assert(genTypeSize(node) == genTypeSize(genActualType(op))); + + bool changed = false; +#ifdef FEATURE_SIMD + bool isConst = op->OperIs(GT_CNS_INT, GT_CNS_DBL, GT_CNS_VEC); +#else + bool isConst = op->OperIs(GT_CNS_INT, GT_CNS_DBL); +#endif + + if (isConst) + { + uint8_t bits[sizeof(simd_t)]; + assert(sizeof(bits) >= genTypeSize(genActualType(op))); + if (op->OperIs(GT_CNS_INT)) + { + ssize_t cns = op->AsIntCon()->IconValue(); + assert(sizeof(ssize_t) >= genTypeSize(genActualType(op))); + memcpy(bits, &cns, genTypeSize(genActualType(op))); + } +#ifdef FEATURE_SIMD + else if (op->OperIs(GT_CNS_VEC)) + { + memcpy(bits, &op->AsVecCon()->gtSimdVal, genTypeSize(op)); + } +#endif + else + { + if (op->TypeIs(TYP_FLOAT)) + { + float floatVal = FloatingPointUtils::convertToSingle(op->AsDblCon()->DconValue()); + memcpy(bits, &floatVal, sizeof(float)); + } + else + { + double doubleVal = op->AsDblCon()->DconValue(); + memcpy(bits, &doubleVal, sizeof(double)); + } + } + + GenTree* newCon = comp->gtNewGenericCon(node->TypeGet(), bits); + BlockRange().InsertAfter(op, newCon); + BlockRange().Remove(op); + + node->gtOp1 = op = newCon; + + changed = true; + } + else if (op->OperIs(GT_LCL_FLD, GT_IND)) + { + op->ChangeType(node->TypeGet()); + changed = true; + } + + if (!changed) + { + return false; + } + + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(op); + } + else + { + op->SetUnusedValue(); + } + + BlockRange().Remove(node); + return true; +} + //------------------------------------------------------------------------ // ContainCheckBitCast: determine whether the source of a BITCAST should be contained. // // Arguments: // node - pointer to the node // -void Lowering::ContainCheckBitCast(GenTree* node) +void Lowering::ContainCheckBitCast(GenTreeUnOp* node) { - GenTree* const op1 = node->AsOp()->gtOp1; + GenTree* const op1 = node->gtGetOp1(); if (op1->OperIs(GT_LCL_VAR) && (genTypeSize(op1) == genTypeSize(node))) { if (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1)) @@ -9319,7 +10081,7 @@ bool Lowering::GetLoadStoreCoalescingData(GenTreeIndir* ind, LoadStoreCoalescing // void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) { -// LA, RISC-V and ARM32 more likely to recieve a terrible performance hit from +// LA, RISC-V and ARM32 more likely to receive a terrible performance hit from // unaligned accesses making this optimization questionable. #if defined(TARGET_XARCH) || defined(TARGET_ARM64) if (!comp->opts.OptimizationEnabled()) @@ -9439,9 +10201,16 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) // // IND is always fine (and all IND created here from such) // IND is not required to be atomic per our Memory Model - const bool allowsNonAtomic = + bool allowsNonAtomic = ((ind->gtFlags & GTF_IND_ALLOW_NON_ATOMIC) != 0) && ((prevInd->gtFlags & GTF_IND_ALLOW_NON_ATOMIC) != 0); + if (!allowsNonAtomic && currData.baseAddr->OperIs(GT_LCL_VAR) && + (currData.baseAddr->AsLclVar()->GetLclNum() == comp->info.compRetBuffArg)) + { + // RetBuf is a private stack memory, so we don't need to worry about atomicity. + allowsNonAtomic = true; + } + if (!allowsNonAtomic && (genTypeSize(ind) > 1) && !varTypeIsSIMD(ind)) { // TODO-CQ: if we see that the target is a local memory (non address exposed) @@ -9510,29 +10279,25 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) #if defined(FEATURE_HW_INTRINSICS) case TYP_LONG: case TYP_REF: - if (comp->IsBaselineSimdIsaSupported()) + // TLDR: we should be here only if one of the conditions is true: + // 1) Both GT_INDs have GTF_IND_ALLOW_NON_ATOMIC flag + // 2) ARM64: Data is at least 8-byte aligned + // 3) AMD64: Data is at least 16-byte aligned on AMD/Intel with AVX+ + // + newType = TYP_SIMD16; + if ((oldType == TYP_REF) && + (!currData.value->IsIntegralConst(0) || !prevData.value->IsIntegralConst(0))) { - // TLDR: we should be here only if one of the conditions is true: - // 1) Both GT_INDs have GTF_IND_ALLOW_NON_ATOMIC flag - // 2) ARM64: Data is at least 8-byte aligned - // 3) AMD64: Data is at least 16-byte aligned on AMD/Intel with AVX+ + // For TYP_REF we only support null values. In theory, we can also support frozen handles, e.g.: // - newType = TYP_SIMD16; - if ((oldType == TYP_REF) && - (!currData.value->IsIntegralConst(0) || !prevData.value->IsIntegralConst(0))) - { - // For TYP_REF we only support null values. In theory, we can also support frozen handles, e.g.: - // - // arr[1] = "hello"; - // arr[0] = "world"; - // - // but we don't want to load managed references into SIMD registers (we can only do so - // when we can issue a nongc region for a block) - return; - } - break; + // arr[1] = "hello"; + // arr[0] = "world"; + // + // but we don't want to load managed references into SIMD registers (we can only do so + // when we can issue a nongc region for a block) + return; } - return; + break; #if defined(TARGET_AMD64) case TYP_SIMD16: @@ -9693,7 +10458,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) // GenTree* Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) { - assert(ind->TypeGet() != TYP_STRUCT); + assert(!ind->TypeIs(TYP_STRUCT)); TryRetypingFloatingPointStoreToIntegerStore(ind); @@ -9780,7 +10545,7 @@ GenTree* Lowering::LowerIndir(GenTreeIndir* ind) #endif // TODO-Cleanup: We're passing isContainable = true but ContainCheckIndir rejects - // address containment in some cases so we end up creating trivial (reg + offfset) + // address containment in some cases so we end up creating trivial (reg + offset) // or (reg + reg) LEAs that are not necessary. #if defined(TARGET_ARM64) @@ -10610,11 +11375,6 @@ void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode) // Return value: // true if the replacement was made, false otherwise. // -// Notes: -// TODO-CQ: this method should do the transformation when possible -// and STOREIND should always generate better or the same code as -// STORE_BLK for the same copy. -// bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode) { assert(blkNode->OperIs(GT_STORE_BLK)); @@ -10630,12 +11390,6 @@ bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode) } GenTree* src = blkNode->Data(); - if (varTypeIsSIMD(regType) && src->IsConstInitVal()) - { - // TODO-CQ: support STORE_IND SIMD16(SIMD16, CNT_INT 0). - return false; - } - if (varTypeIsGC(regType)) { // TODO-CQ: STOREIND does not try to contain src if we need a barrier, @@ -10648,29 +11402,43 @@ bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode) return false; } - JITDUMP("Replacing STORE_BLK with STOREIND for [%06u]\n", blkNode->gtTreeID); - blkNode->ChangeOper(GT_STOREIND); - blkNode->ChangeType(regType); + if (src->IsConstInitVal()) + { +#if !defined(TARGET_XARCH) + if (varTypeIsSIMD(regType)) + { + // Platforms with zero-regs may produce better/more compact codegen + return false; + } +#endif + + assert(!blkNode->ContainsReferences()); + if (src->OperIsInitVal()) + { + BlockRange().Remove(src); + src = src->gtGetOp1(); + } - if (varTypeIsStruct(src)) + uint8_t initVal = static_cast(src->AsIntCon()->IconValue()); + GenTree* cnsVec = comp->gtNewConWithPattern(regType, initVal); + BlockRange().InsertAfter(src, cnsVec); + BlockRange().Remove(src); + blkNode->SetData(cnsVec); + } + else if (varTypeIsStruct(src)) { src->ChangeType(regType); LowerNode(blkNode->Data()); } - else if (src->OperIsInitVal()) - { - GenTreeUnOp* initVal = src->AsUnOp(); - src = src->gtGetOp1(); - assert(src->IsCnsIntOrI()); - src->AsIntCon()->FixupInitBlkValue(regType); - blkNode->SetData(src); - BlockRange().Remove(initVal); - } else { - assert(src->TypeIs(regType) || src->IsCnsIntOrI() || src->IsCall()); + unreached(); } + JITDUMP("Replacing STORE_BLK with STOREIND for [%06u]\n", blkNode->gtTreeID); + blkNode->ChangeOper(GT_STOREIND); + blkNode->ChangeType(regType); + #if defined(TARGET_XARCH) if (varTypeIsSmall(regType) && src->OperIs(GT_IND, GT_LCL_FLD)) { @@ -10830,6 +11598,134 @@ bool Lowering::TryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode) return true; } +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +//------------------------------------------------------------------------ +// TryLowerAndOrToCCMP : Lower AND/OR of two conditions into test + CCMP + SETCC nodes. +// +// Arguments: +// tree - pointer to the node +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next) +{ + assert(tree->OperIs(GT_AND, GT_OR)); + + if (!comp->opts.OptimizationEnabled()) + { + return false; + } + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + if ((op1->OperIsCmpCompare() && varTypeIsIntegralOrI(op1->gtGetOp1())) || + (op2->OperIsCmpCompare() && varTypeIsIntegralOrI(op2->gtGetOp1()))) + { + JITDUMP("[%06u] is a potential candidate for CCMP:\n", Compiler::dspTreeID(tree)); + DISPTREERANGE(BlockRange(), tree); + JITDUMP("\n"); + } + + // Find out whether an operand is eligible to be converted to a conditional + // compare. It must be a normal integral relop; for example, we cannot + // conditionally perform a floating point comparison and there is no "ctst" + // instruction that would allow us to conditionally implement + // TEST_EQ/TEST_NE. + // + // For the other operand we can allow more arbitrary operations that set + // the condition flags; the final transformation into the flags def is done + // by TryLowerConditionToFlagsNode. + // + GenCondition cond1; + if (op2->OperIsCmpCompare() && varTypeIsIntegralOrI(op2->gtGetOp1()) && IsInvariantInRange(op2, tree) && + (op2->gtGetOp1()->IsIntegralConst() || !op2->gtGetOp1()->isContained()) && + (op2->gtGetOp2() == nullptr || op2->gtGetOp2()->IsIntegralConst() || !op2->gtGetOp2()->isContained()) && + TryLowerConditionToFlagsNode(tree, op1, &cond1, false)) + { + // Fall through, converting op2 to the CCMP + } + else if (op1->OperIsCmpCompare() && varTypeIsIntegralOrI(op1->gtGetOp1()) && IsInvariantInRange(op1, tree) && + (op1->gtGetOp1()->IsIntegralConst() || !op1->gtGetOp1()->isContained()) && + (op1->gtGetOp2() == nullptr || op1->gtGetOp2()->IsIntegralConst() || !op1->gtGetOp2()->isContained()) && + TryLowerConditionToFlagsNode(tree, op2, &cond1, false)) + { + std::swap(op1, op2); + } + else + { + JITDUMP(" ..could not turn [%06u] or [%06u] into a def of flags, bailing\n", Compiler::dspTreeID(op1), + Compiler::dspTreeID(op2)); + return false; + } + + BlockRange().Remove(op2); + BlockRange().InsertBefore(tree, op2); + + GenCondition cond2 = GenCondition::FromRelop(op2); + op2->SetOper(GT_CCMP); + op2->gtType = TYP_VOID; + op2->gtFlags |= GTF_SET_FLAGS; + + op2->gtGetOp1()->ClearContained(); + op2->gtGetOp2()->ClearContained(); + + GenTreeCCMP* ccmp = op2->AsCCMP(); + + if (tree->OperIs(GT_AND)) + { + // If the first comparison succeeds then do the second comparison. + ccmp->gtCondition = cond1; + // Otherwise set the condition flags to something that makes the second + // one fail. + ccmp->gtFlagsVal = TruthifyingFlags(GenCondition::Reverse(cond2)); + } + else + { + // If the first comparison fails then do the second comparison. + ccmp->gtCondition = GenCondition::Reverse(cond1); + // Otherwise set the condition flags to something that makes the second + // one succeed. + ccmp->gtFlagsVal = TruthifyingFlags(cond2); + } + + ContainCheckConditionalCompare(ccmp); + + tree->SetOper(GT_SETCC); + tree->AsCC()->gtCondition = cond2; + + JITDUMP("Conversion was legal. Result:\n"); + DISPTREERANGE(BlockRange(), tree); + JITDUMP("\n"); + + *next = tree->gtNext; + return true; +} + +//------------------------------------------------------------------------ +// ContainCheckConditionalCompare: determine whether the source of a compare within a compare chain should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckConditionalCompare(GenTreeCCMP* cmp) +{ + GenTree* op2 = cmp->gtOp2; + + if (op2->IsCnsIntOrI() && !op2->AsIntCon()->ImmedValNeedsReloc(comp)) + { + target_ssize_t immVal = (target_ssize_t)op2->AsIntCon()->gtIconVal; + + if (emitter::emitIns_valid_imm_for_ccmp(immVal)) + { + MakeSrcContained(cmp, op2); + } + } +} +#endif + #if defined(FEATURE_HW_INTRINSICS) //---------------------------------------------------------------------------------------------- // Lowering::InsertNewSimdCreateScalarUnsafeNode: Inserts a new simd CreateScalarUnsafe node @@ -10937,5 +11833,38 @@ void Lowering::FinalizeOutgoingArgSpace() comp->lvaOutgoingArgSpaceSize = m_outgoingArgSpaceSize; comp->lvaOutgoingArgSpaceSize.MarkAsReadOnly(); comp->lvaGetDesc(comp->lvaOutgoingArgSpaceVar)->GrowBlockLayout(comp->typGetBlkLayout(m_outgoingArgSpaceSize)); + + SetFramePointerFromArgSpaceSize(); #endif } + +//---------------------------------------------------------------------------------------------- +// Lowering::SetFramePointerFromArgSpaceSize: +// Set the frame pointer from the arg space size. This is a quirk because +// StackLevelSetter used to do this even outside x86. +// +void Lowering::SetFramePointerFromArgSpaceSize() +{ + unsigned stackLevelSpace = m_outgoingArgSpaceSize; + + if (comp->compTailCallUsed) + { + // StackLevelSetter also used to count tailcalls. + for (BasicBlock* block : comp->Blocks()) + { + GenTreeCall* tailCall; + if (block->endsWithTailCall(comp, true, false, &tailCall)) + { + stackLevelSpace = max(stackLevelSpace, tailCall->gtArgs.OutgoingArgsStackSize()); + } + } + } + + unsigned stackLevel = + (max(stackLevelSpace, (unsigned)MIN_ARG_AREA_FOR_CALL) - MIN_ARG_AREA_FOR_CALL) / TARGET_POINTER_SIZE; + + if (stackLevel >= 4) + { + comp->codeGen->setFramePointerRequired(true); + } +} diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index e2f5ff9967f4..f44902a476db 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -45,21 +45,15 @@ class Lowering final : public Phase } void FinalizeOutgoingArgSpace(); + void SetFramePointerFromArgSpaceSize(); private: // LowerRange handles new code that is introduced by or after Lowering. void LowerRange(LIR::ReadOnlyRange& range) { - for (GenTree* newNode : range) - { - LowerNode(newNode); - } - } - void LowerRange(GenTree* firstNode, GenTree* lastNode) - { - LIR::ReadOnlyRange range(firstNode, lastNode); - LowerRange(range); + LowerRange(range.FirstNode(), range.LastNode()); } + void LowerRange(GenTree* firstNode, GenTree* lastNode); // ContainCheckRange handles new code that is introduced by or after Lowering, // and that is known to be already in Lowered form. @@ -89,7 +83,7 @@ class Lowering final : public Phase void ContainCheckReturnTrap(GenTreeOp* node); void ContainCheckLclHeap(GenTreeOp* node); void ContainCheckRet(GenTreeUnOp* ret); -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) bool TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next); insCflags TruthifyingFlags(GenCondition cond); void ContainCheckConditionalCompare(GenTreeCCMP* ccmp); @@ -100,9 +94,14 @@ class Lowering final : public Phase bool TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next); bool TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next); bool TryContainingCselOp(GenTreeHWIntrinsic* parentNode, GenTreeHWIntrinsic* childNode); +#endif +#ifdef TARGET_RISCV64 + bool TryLowerShiftAddToShxadd(GenTreeOp* tree, GenTree** next); + bool TryLowerZextAddToAddUw(GenTreeOp* tree, GenTree** next); + bool TryLowerZextLeftShiftToSlliUw(GenTreeOp* tree, GenTree** next); #endif void ContainCheckSelect(GenTreeOp* select); - void ContainCheckBitCast(GenTree* node); + void ContainCheckBitCast(GenTreeUnOp* node); void ContainCheckCallOperands(GenTreeCall* call); void ContainCheckIndir(GenTreeIndir* indirNode); void ContainCheckStoreIndir(GenTreeStoreInd* indirNode); @@ -122,7 +121,6 @@ class Lowering final : public Phase void ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node); #ifdef TARGET_XARCH void TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, GenTreeVecCon* childNode); - void TryCompressConstVecData(GenTreeStoreInd* node); #endif // TARGET_XARCH #endif // FEATURE_HW_INTRINSICS @@ -161,15 +159,27 @@ class Lowering final : public Phase GenTree* LowerCompare(GenTree* cmp); GenTree* LowerJTrue(GenTreeOp* jtrue); GenTree* LowerSelect(GenTreeConditional* cond); - bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* code); + bool TryLowerConditionToFlagsNode(GenTree* parent, + GenTree* condition, + GenCondition* code, + bool allowMultipleFlagChecks = true); GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition); void LowerJmpMethod(GenTree* jmp); void LowerRet(GenTreeOp* ret); GenTree* LowerStoreLocCommon(GenTreeLclVarCommon* lclVar); void LowerRetStruct(GenTreeUnOp* ret); void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret); - void LowerCallStruct(GenTreeCall* call); - void LowerStoreSingleRegCallStruct(GenTreeBlk* store); + GenTree* LowerAsyncContinuation(GenTree* asyncCont); + void LowerReturnSuspend(GenTree* retSuspend); + void LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList); + unsigned StoreFieldListToNewLocal(ClassLayout* layout, GenTreeFieldList* fieldList); + void LowerArgFieldList(CallArg* arg, GenTreeFieldList* fieldList); + template + bool IsFieldListCompatibleWithRegisters(GenTreeFieldList* fieldList, unsigned numRegs, GetRegisterInfoFunc func); + template + void LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList, unsigned numRegs, GetRegisterInfoFunc func); + void LowerCallStruct(GenTreeCall* call); + void LowerStoreSingleRegCallStruct(GenTreeBlk* store); #if !defined(WINDOWS_AMD64_ABI) GenTreeLclVar* SpillStructCallResult(GenTreeCall* call) const; #endif // WINDOWS_AMD64_ABI @@ -190,18 +200,16 @@ class Lowering final : public Phase GenTree* LowerVirtualVtableCall(GenTreeCall* call); GenTree* LowerVirtualStubCall(GenTreeCall* call); void LowerArgsForCall(GenTreeCall* call); - void ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode); #if defined(TARGET_X86) && defined(FEATURE_IJW) void LowerSpecialCopyArgs(GenTreeCall* call); void InsertSpecialCopyArg(GenTreePutArgStk* putArgStk, CORINFO_CLASS_HANDLE argType, unsigned lclNum); #endif // defined(TARGET_X86) && defined(FEATURE_IJW) - GenTree* NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, var_types type); - void LowerArg(GenTreeCall* call, CallArg* callArg, bool late); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - GenTree* LowerFloatArg(GenTree** pArg, CallArg* callArg); - GenTree* LowerFloatArgReg(GenTree* arg, regNumber regNum); -#endif - void LegalizeArgPlacement(GenTreeCall* call); + void LowerArg(GenTreeCall* call, CallArg* callArg); + void SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* callArg); + ClassLayout* SliceLayout(ClassLayout* layout, unsigned offset, unsigned size); + void InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegment& registerSegment); + void InsertPutArgReg(GenTree** node, const ABIPassingSegment& registerSegment); + void LegalizeArgPlacement(GenTreeCall* call); void InsertPInvokeCallProlog(GenTreeCall* call); void InsertPInvokeCallEpilog(GenTreeCall* call); @@ -235,19 +243,19 @@ class Lowering final : public Phase GenTree* Offset(GenTree* base, unsigned offset) { - var_types resultType = (base->TypeGet() == TYP_REF) ? TYP_BYREF : base->TypeGet(); + var_types resultType = base->TypeIs(TYP_REF) ? TYP_BYREF : base->TypeGet(); return new (comp, GT_LEA) GenTreeAddrMode(resultType, base, nullptr, 0, offset); } GenTree* OffsetByIndex(GenTree* base, GenTree* index) { - var_types resultType = (base->TypeGet() == TYP_REF) ? TYP_BYREF : base->TypeGet(); + var_types resultType = base->TypeIs(TYP_REF) ? TYP_BYREF : base->TypeGet(); return new (comp, GT_LEA) GenTreeAddrMode(resultType, base, index, 0, 0); } GenTree* OffsetByIndexWithScale(GenTree* base, GenTree* index, unsigned scale) { - var_types resultType = (base->TypeGet() == TYP_REF) ? TYP_BYREF : base->TypeGet(); + var_types resultType = base->TypeIs(TYP_REF) ? TYP_BYREF : base->TypeGet(); return new (comp, GT_LEA) GenTreeAddrMode(resultType, base, index, scale, 0); } @@ -256,7 +264,7 @@ class Lowering final : public Phase GenTreeLclVar* ReplaceWithLclVar(LIR::Use& use, unsigned tempNum = BAD_VAR_NUM) { GenTree* oldUseNode = use.Def(); - if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM)) + if (!oldUseNode->OperIs(GT_LCL_VAR) || (tempNum != BAD_VAR_NUM)) { GenTree* store; use.ReplaceWithLclVar(comp, tempNum, &store); @@ -385,11 +393,13 @@ class Lowering final : public Phase bool TryLowerBlockStoreAsGcBulkCopyCall(GenTreeBlk* blkNode); void LowerLclHeap(GenTree* node); void ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr, GenTree* addrParent); - void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode); + void LowerPutArgStk(GenTreePutArgStk* putArgNode); GenTree* LowerArrLength(GenTreeArrCommon* node); + bool TryRemoveCast(GenTreeCast* node); + bool TryRemoveBitCast(GenTreeUnOp* node); + #ifdef TARGET_XARCH - void LowerPutArgStk(GenTreePutArgStk* putArgStk); GenTree* TryLowerMulWithConstant(GenTreeOp* node); #endif // TARGET_XARCH @@ -407,7 +417,7 @@ class Lowering final : public Phase GenTree* switchValue, weight_t defaultLikelihood); - GenTree* LowerCast(GenTree* node); + void LowerCast(GenTree* node); #if !CPU_LOAD_STORE_ARCH bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd); @@ -421,6 +431,7 @@ class Lowering final : public Phase GenTree* LowerStoreLoc(GenTreeLclVarCommon* tree); void LowerRotate(GenTree* tree); void LowerShift(GenTreeOp* shift); + bool TryFoldBinop(GenTreeOp* node); #ifdef FEATURE_HW_INTRINSICS GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node); void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition); @@ -452,61 +463,6 @@ class Lowering final : public Phase unsigned simdSize); #endif // FEATURE_HW_INTRINSICS - //---------------------------------------------------------------------------------------------- - // TryRemoveCastIfPresent: Removes op it is a cast operation and the size of its input is at - // least the size of expectedType - // - // Arguments: - // expectedType - The expected type of the cast operation input if it is to be removed - // op - The tree to remove if it is a cast op whose input is at least the size of expectedType - // - // Returns: - // op if it was not a cast node or if its input is not at least the size of expected type; - // Otherwise, it returns the underlying operation that was being casted - GenTree* TryRemoveCastIfPresent(var_types expectedType, GenTree* op) - { - if (!op->OperIs(GT_CAST) || !comp->opts.OptimizationEnabled()) - { - return op; - } - - GenTreeCast* cast = op->AsCast(); - GenTree* castOp = cast->CastOp(); - - // FP <-> INT casts should be kept - if (varTypeIsFloating(castOp) ^ varTypeIsFloating(expectedType)) - { - return op; - } - - // Keep casts which can overflow - if (cast->gtOverflow()) - { - return op; - } - - // Keep casts with operands usable from memory. - if (castOp->isContained() || castOp->IsRegOptional()) - { - return op; - } - - if (genTypeSize(cast->CastToType()) >= genTypeSize(expectedType)) - { -#ifndef TARGET_64BIT - // Don't expose TYP_LONG on 32bit - if (castOp->TypeIs(TYP_LONG)) - { - return op; - } -#endif - BlockRange().Remove(op); - return castOp; - } - - return op; - } - // Utility functions public: static bool IndirsAreEquivalent(GenTree* pTreeA, GenTree* pTreeB); @@ -556,6 +512,13 @@ class Lowering final : public Phase bool IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTree* childNode, bool* supportsRegOptional); #endif // FEATURE_HW_INTRINSICS + // Checks for memory conflicts in the instructions between childNode and parentNode, and returns true if childNode + // can be contained. + bool IsSafeToContainMem(GenTree* parentNode, GenTree* childNode) const; + + // Similar to above, but allows bypassing a "transparent" parent. + bool IsSafeToContainMem(GenTree* grandparentNode, GenTree* parentNode, GenTree* childNode) const; + static void TransformUnusedIndirection(GenTreeIndir* ind, Compiler* comp, BasicBlock* block); private: @@ -587,16 +550,12 @@ class Lowering final : public Phase GenTree* endExclusive, GenTree* ignoreNode) const; - // Checks for memory conflicts in the instructions between childNode and parentNode, and returns true if childNode - // can be contained. - bool IsSafeToContainMem(GenTree* parentNode, GenTree* childNode) const; - - // Similar to above, but allows bypassing a "transparent" parent. - bool IsSafeToContainMem(GenTree* grandparentNode, GenTree* parentNode, GenTree* childNode) const; - // Check if marking an operand of a node as reg-optional is safe. bool IsSafeToMarkRegOptional(GenTree* parentNode, GenTree* node) const; + // Checks if it's profitable to optimize an shift and rotate operations to set the zero flag. + bool IsProfitableToSetZeroFlag(GenTree* op) const; + inline LIR::Range& BlockRange() const { return LIR::AsRange(m_block); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index d06bfaa892ab..222fe046963c 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -384,7 +384,7 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN if (childNode->OperIs(GT_NEG)) { // If we have a contained LSH, RSH or RSZ, we can still contain NEG if the parent is a EQ or NE. - if (childNode->gtGetOp1()->isContained() && !childNode->gtGetOp1()->OperIs(GT_LSH, GT_RSH, GT_RSZ)) + if (childNode->gtGetOp1()->isContained() && !childNode->gtGetOp1()->OperIs(GT_LSH, GT_RSH, GT_RSZ, GT_CAST)) { // Cannot contain if the childs op1 is already contained return false; @@ -399,10 +399,36 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN // EQ and NE are the only valid comparison ops that can contain NEG. if (parentNode->OperIs(GT_EQ, GT_NE)) { - if (IsInvariantInRange(childNode, parentNode)) + if (!IsInvariantInRange(childNode, parentNode)) { - return true; + return false; + } + + if (childNode->gtGetOp1()->OperIs(GT_CAST)) + { + // Grab the cast as well, we can contain this with cmn (extended-register). + GenTreeCast* cast = childNode->gtGetOp1()->AsCast(); + GenTree* castOp = cast->CastOp(); + + // Cannot contain the cast from floating point. + if (!varTypeIsIntegral(castOp)) + { + return false; + } + + // Cannot contain the cast if it already contains it's CastOp. + if (castOp->isContained()) + { + return false; + } + + assert(!cast->gtOverflow()); + assert(varTypeIsIntegral(cast) && varTypeIsIntegral(cast->CastToType())); + + MakeSrcContained(childNode, cast); } + + return true; } return false; @@ -435,14 +461,15 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN return false; } + if (!IsInvariantInRange(childNode, parentNode)) + { + return false; + } + if (parentNode->OperIs(GT_ADD, GT_SUB)) { // These operations can still report flags - - if (IsInvariantInRange(childNode, parentNode)) - { - return true; - } + return true; } if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) @@ -453,13 +480,29 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN if (parentNode->OperIs(GT_CMP)) { - if (IsInvariantInRange(childNode, parentNode)) + return true; + } + + if (parentNode->OperIsCmpCompare()) + { + if (castOp->isContained()) { - return true; + return false; + } + + if (IsContainableMemoryOp(castOp)) + { + // The cast node will contain a memory operation which will perform + // the cast on load/store, so we don't need to contain it here. + // This check won't catch spills, so if register pressure is high + // this can result in cmp (extended-register) taking higher priority + // over a load/store with extension. + return false; } + + return true; } - // TODO: Handle CMN return false; } @@ -816,7 +859,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) return; } - assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL)); + assert(dstAddr->TypeIs(TYP_BYREF, TYP_I_IMPL)); blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll; } else if (blkNode->OperIs(GT_STORE_BLK) && (size <= copyBlockUnrollLimit)) @@ -895,12 +938,12 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT } //------------------------------------------------------------------------ -// LowerPutArgStkOrSplit: Lower a GT_PUTARG_STK/GT_PUTARG_SPLIT. +// LowerPutArgStk: Lower a GT_PUTARG_STK. // // Arguments: // putArgStk - The node to lower // -void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode) +void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgNode) { GenTree* src = putArgNode->Data(); @@ -925,22 +968,11 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode) // tree - GT_CAST node to be lowered // // Return Value: -// nextNode to be lowered if tree is modified else returns nullptr -// -// Notes: -// Casts from float/double to a smaller int type are transformed as follows: -// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) -// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) -// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) -// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) -// -// Note that for the overflow conversions we still depend on helper calls and -// don't expect to see them here. -// i) GT_CAST(float/double, int type with overflow detection) +// None. // -GenTree* Lowering::LowerCast(GenTree* tree) +void Lowering::LowerCast(GenTree* tree) { - assert(tree->OperGet() == GT_CAST); + assert(tree->OperIs(GT_CAST)); JITDUMP("LowerCast for: "); DISPNODE(tree); @@ -952,17 +984,16 @@ GenTree* Lowering::LowerCast(GenTree* tree) if (varTypeIsFloating(srcType)) { + // Overflow casts should have been converted to helper call in morph. noway_assert(!tree->gtOverflow()); - assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small - // int. + // Small types should have had an intermediate int cast inserted in morph. + assert(!varTypeIsSmall(dstType)); } assert(!varTypeIsSmall(srcType)); // Now determine if we have operands that should be contained. ContainCheckCast(tree->AsCast()); - - return nullptr; } //------------------------------------------------------------------------ @@ -976,7 +1007,7 @@ GenTree* Lowering::LowerCast(GenTree* tree) // void Lowering::LowerRotate(GenTree* tree) { - if (tree->OperGet() == GT_ROL) + if (tree->OperIs(GT_ROL)) { // There is no ROL instruction on ARM. Convert ROL into ROR. GenTree* rotatedValue = tree->AsOp()->gtOp1; @@ -1490,7 +1521,7 @@ void Lowering::LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node) // GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { - if (node->TypeGet() == TYP_SIMD12) + if (node->TypeIs(TYP_SIMD12)) { // GT_HWINTRINSIC node requiring to produce TYP_SIMD12 in fact // produces a TYP_SIMD16 result @@ -1985,11 +2016,9 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) { - assert((node->GetHWIntrinsicId() == NI_Vector64_Create) || (node->GetHWIntrinsicId() == NI_Vector128_Create) || - (node->GetHWIntrinsicId() == NI_Vector64_CreateScalar) || - (node->GetHWIntrinsicId() == NI_Vector128_CreateScalar) || - (node->GetHWIntrinsicId() == NI_Vector64_CreateScalarUnsafe) || - (node->GetHWIntrinsicId() == NI_Vector128_CreateScalarUnsafe) || + assert(HWIntrinsicInfo::IsVectorCreate(node->GetHWIntrinsicId()) || + HWIntrinsicInfo::IsVectorCreateScalar(node->GetHWIntrinsicId()) || + HWIntrinsicInfo::IsVectorCreateScalarUnsafe(node->GetHWIntrinsicId()) || (node->GetHWIntrinsicId() == NI_AdvSimd_DuplicateToVector64) || (node->GetHWIntrinsicId() == NI_AdvSimd_DuplicateToVector128) || (node->GetHWIntrinsicId() == NI_AdvSimd_Arm64_DuplicateToVector64) || @@ -2248,7 +2277,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(simdSize != 0); bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal); - bool isCreateScalar = (intrinsicId == NI_Vector64_CreateScalar) || (intrinsicId == NI_Vector128_CreateScalar); + bool isCreateScalar = HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId); size_t argCnt = node->GetOperandCount(); // Check if we have a cast that we can remove. Note that "IsValidConstForMovImm" @@ -2736,7 +2765,7 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) { // If this is the rhs of a block copy it will be handled when we handle the store. - if (indirNode->TypeGet() == TYP_STRUCT) + if (indirNode->TypeIs(TYP_STRUCT)) { return; } @@ -2749,7 +2778,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) // // TODO-ARM64-CQ: handle other addr mode expressions that could be marked // as contained. - if (indirNode->TypeGet() == TYP_SIMD12) + if (indirNode->TypeIs(TYP_SIMD12)) { return; } @@ -2757,7 +2786,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) GenTree* addr = indirNode->Addr(); - if ((addr->OperGet() == GT_LEA) && IsInvariantInRange(addr, indirNode)) + if (addr->OperIs(GT_LEA) && IsInvariantInRange(addr, indirNode)) { bool makeContained = true; @@ -2769,14 +2798,14 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) int cns = lea->Offset(); if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns)) { - if (indirNode->OperGet() == GT_STOREIND) + if (indirNode->OperIs(GT_STOREIND)) { if (varTypeIsFloating(indirNode->AsStoreInd()->Data())) { makeContained = false; } } - else if (indirNode->OperGet() == GT_IND) + else if (indirNode->OperIs(GT_IND)) { if (varTypeIsFloating(indirNode)) { @@ -2834,25 +2863,24 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) { if (IsContainableUnaryOrBinaryOp(node, op2)) { - if (op2->OperIs(GT_CAST)) + if (node->OperIs(GT_ADD, GT_SUB, GT_CMP) && op2->OperIs(GT_CAST)) { // We want to prefer the combined op here over containment of the cast op op2->AsCast()->CastOp()->ClearContained(); } - MakeSrcContained(node, op2); + MakeSrcContained(node, op2); return; } - if (node->OperIsCommutative() && IsContainableUnaryOrBinaryOp(node, op1)) { - if (op1->OperIs(GT_CAST)) + if (node->OperIs(GT_ADD, GT_SUB, GT_CMP) && op1->OperIs(GT_CAST)) { // We want to prefer the combined op here over containment of the cast op op1->AsCast()->CastOp()->ClearContained(); } - MakeSrcContained(node, op1); + MakeSrcContained(node, op1); std::swap(node->gtOp1, node->gtOp2); return; } @@ -2899,7 +2927,7 @@ void Lowering::ContainCheckShiftRotate(GenTreeOp* node) GenTree* source = node->gtOp1; if (node->OperIs(GT_LSH_HI, GT_RSH_LO)) { - assert(source->OperGet() == GT_LONG); + assert(source->OperIs(GT_LONG)); MakeSrcContained(node, source); } #endif // TARGET_ARM @@ -2962,7 +2990,7 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const { MakeSrcContained(storeLoc, op1); } - else if (op1->OperGet() == GT_LONG) + else if (op1->OperIs(GT_LONG)) { MakeSrcContained(storeLoc, op1); } @@ -3036,7 +3064,7 @@ void Lowering::ContainCheckCast(GenTreeCast* node) #ifdef TARGET_ARM if (varTypeIsLong(castOp)) { - assert(castOp->OperGet() == GT_LONG); + assert(castOp->OperIs(GT_LONG)); MakeSrcContained(node, castOp); } #endif // TARGET_ARM @@ -3066,14 +3094,45 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) #ifdef TARGET_ARM64 if (comp->opts.OptimizationEnabled() && (cmp->OperIsCompare() || cmp->OperIs(GT_CMP))) { + auto forceCastOpInRegister = [](GenTree* op) { + // If the compare contains a cast, make sure that cast node definitely does not become + // a memory operation, as we won't be able to contain it in CodeGen if this happens. + // The node being cast must have a register assigned. + GenTree* cast = nullptr; + if (op->OperIs(GT_CAST)) + { + // cmp (extended-register): GT_EQ -> GT_CAST -> ... + cast = op; + } + else if (op->OperIs(GT_NEG) && op->gtGetOp1()->OperIs(GT_CAST)) + { + // cmn (extended-register): GT_EQ -> GT_NEG -> GT_CAST -> ... + cast = op->gtGetOp1(); + } + if (cast) + { + cast->AsCast()->CastOp()->ClearRegOptional(); + } + }; + if (IsContainableUnaryOrBinaryOp(cmp, op2)) { + if (cmp->OperIsCmpCompare()) + { + forceCastOpInRegister(op2); + } + MakeSrcContained(cmp, op2); return; } if (IsContainableUnaryOrBinaryOp(cmp, op1)) { + if (cmp->OperIsCmpCompare()) + { + forceCastOpInRegister(op1); + } + MakeSrcContained(cmp, op1); std::swap(cmp->gtOp1, cmp->gtOp2); if (cmp->OperIsCompare()) @@ -3087,107 +3146,6 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) } #ifdef TARGET_ARM64 -//------------------------------------------------------------------------ -// TryLowerAndOrToCCMP : Lower AND/OR of two conditions into test + CCMP + SETCC nodes. -// -// Arguments: -// tree - pointer to the node -// next - [out] Next node to lower if this function returns true -// -// Return Value: -// false if no changes were made -// -bool Lowering::TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next) -{ - assert(tree->OperIs(GT_AND, GT_OR)); - - if (!comp->opts.OptimizationEnabled()) - { - return false; - } - - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - - if ((op1->OperIsCmpCompare() && varTypeIsIntegralOrI(op1->gtGetOp1())) || - (op2->OperIsCmpCompare() && varTypeIsIntegralOrI(op2->gtGetOp1()))) - { - JITDUMP("[%06u] is a potential candidate for CCMP:\n", Compiler::dspTreeID(tree)); - DISPTREERANGE(BlockRange(), tree); - JITDUMP("\n"); - } - - // Find out whether an operand is eligible to be converted to a conditional - // compare. It must be a normal integral relop; for example, we cannot - // conditionally perform a floating point comparison and there is no "ctst" - // instruction that would allow us to conditionally implement - // TEST_EQ/TEST_NE. - // - // For the other operand we can allow more arbitrary operations that set - // the condition flags; the final transformation into the flags def is done - // by TryLowerConditionToFlagsNode. - // - GenCondition cond1; - if (op2->OperIsCmpCompare() && varTypeIsIntegralOrI(op2->gtGetOp1()) && IsInvariantInRange(op2, tree) && - TryLowerConditionToFlagsNode(tree, op1, &cond1)) - { - // Fall through, converting op2 to the CCMP - } - else if (op1->OperIsCmpCompare() && varTypeIsIntegralOrI(op1->gtGetOp1()) && IsInvariantInRange(op1, tree) && - TryLowerConditionToFlagsNode(tree, op2, &cond1)) - { - std::swap(op1, op2); - } - else - { - JITDUMP(" ..could not turn [%06u] or [%06u] into a def of flags, bailing\n", Compiler::dspTreeID(op1), - Compiler::dspTreeID(op2)); - return false; - } - - BlockRange().Remove(op2); - BlockRange().InsertBefore(tree, op2); - - GenCondition cond2 = GenCondition::FromRelop(op2); - op2->SetOper(GT_CCMP); - op2->gtType = TYP_VOID; - op2->gtFlags |= GTF_SET_FLAGS; - - op2->gtGetOp1()->ClearContained(); - op2->gtGetOp2()->ClearContained(); - - GenTreeCCMP* ccmp = op2->AsCCMP(); - - if (tree->OperIs(GT_AND)) - { - // If the first comparison succeeds then do the second comparison. - ccmp->gtCondition = cond1; - // Otherwise set the condition flags to something that makes the second - // one fail. - ccmp->gtFlagsVal = TruthifyingFlags(GenCondition::Reverse(cond2)); - } - else - { - // If the first comparison fails then do the second comparison. - ccmp->gtCondition = GenCondition::Reverse(cond1); - // Otherwise set the condition flags to something that makes the second - // one succeed. - ccmp->gtFlagsVal = TruthifyingFlags(cond2); - } - - ContainCheckConditionalCompare(ccmp); - - tree->SetOper(GT_SETCC); - tree->AsCC()->gtCondition = cond2; - - JITDUMP("Conversion was legal. Result:\n"); - DISPTREERANGE(BlockRange(), tree); - JITDUMP("\n"); - - *next = tree->gtNext; - return true; -} - //------------------------------------------------------------------------ // TruthifyingFlags: Get a flags immediate that will make a specified condition true. // @@ -3226,28 +3184,6 @@ insCflags Lowering::TruthifyingFlags(GenCondition condition) return INS_FLAGS_NONE; } } - -//------------------------------------------------------------------------ -// ContainCheckConditionalCompare: determine whether the source of a compare within a compare chain should be contained. -// -// Arguments: -// node - pointer to the node -// -void Lowering::ContainCheckConditionalCompare(GenTreeCCMP* cmp) -{ - GenTree* op2 = cmp->gtOp2; - - if (op2->IsCnsIntOrI() && !op2->AsIntCon()->ImmedValNeedsReloc(comp)) - { - target_ssize_t immVal = (target_ssize_t)op2->AsIntCon()->gtIconVal; - - if (emitter::emitIns_valid_imm_for_ccmp(immVal)) - { - MakeSrcContained(cmp, op2); - } - } -} - #endif // TARGET_ARM64 //------------------------------------------------------------------------ @@ -3567,9 +3503,6 @@ bool Lowering::TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next) if (!comp->opts.OptimizationEnabled()) return false; - if (!comp->compOpportunisticallyDependsOn(InstructionSet_ArmBase_Arm64)) - return false; - if (op->isContained()) return false; @@ -3673,9 +3606,6 @@ bool Lowering::TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next) if (!comp->opts.OptimizationEnabled()) return false; - if (!comp->compOpportunisticallyDependsOn(InstructionSet_ArmBase_Arm64)) - return false; - if (op->isContained()) return false; @@ -3884,13 +3814,14 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_ExtractVector128: case NI_AdvSimd_StoreSelectedScalar: case NI_AdvSimd_Arm64_StoreSelectedScalar: - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: case NI_Sve_ExtractVector: case NI_Sve_AddRotateComplex: case NI_Sve_TrigonometricMultiplyAddCoefficient: + case NI_Sve2_ShiftLeftAndInsert: assert(hasImmediateOperand); assert(varTypeIsIntegral(intrin.op3)); if (intrin.op3->IsCnsIntOrI()) @@ -4210,14 +4141,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) // CndSel(mask, embedded(trueValOp2), op3) // cndSelNode->Op(2) = nestedCndSel->Op(2); - if (nestedOp3->IsMaskZero()) - { - BlockRange().Remove(nestedOp3); - } - else - { - nestedOp3->SetUnusedValue(); - } + nestedOp3->SetUnusedValue(); BlockRange().Remove(nestedOp1); BlockRange().Remove(nestedCndSel); @@ -4254,14 +4178,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) op2->SetUnusedValue(); } - if (op3->IsMaskZero()) - { - BlockRange().Remove(op3); - } - else - { - op3->SetUnusedValue(); - } + op3->SetUnusedValue(); op1->SetUnusedValue(); GenTree* next = cndSelNode->gtNext; diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index ef06d2ce4179..46954a5d5b1f 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -38,8 +38,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // bool Lowering::IsCallTargetInRange(void* addr) { + // The CallTarget is always in range on LA64. // TODO-LOONGARCH64-CQ: using B/BL for optimization. - return false; + return true; } //------------------------------------------------------------------------ @@ -381,7 +382,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) return; } - assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL)); + assert(dstAddr->TypeIs(TYP_BYREF, TYP_I_IMPL)); blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll; } else if (blkNode->OperIs(GT_STORE_BLK) && (size <= copyBlockUnrollLimit)) @@ -453,12 +454,12 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT } //------------------------------------------------------------------------ -// LowerPutArgStkOrSplit: Lower a GT_PUTARG_STK/GT_PUTARG_SPLIT. +// LowerPutArgStk: Lower a GT_PUTARG_STK // // Arguments: // putArgNode - The node to lower // -void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode) +void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgNode) { GenTree* src = putArgNode->Data(); @@ -514,21 +515,9 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode) // Return Value: // None. // -// Notes: -// Casts from float/double to a smaller int type are transformed as follows: -// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) -// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) -// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) -// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) -// -// Note that for the overflow conversions we still depend on helper calls and -// don't expect to see them here. -// i) GT_CAST(float/double, int type with overflow detection) -// - -GenTree* Lowering::LowerCast(GenTree* tree) +void Lowering::LowerCast(GenTree* tree) { - assert(tree->OperGet() == GT_CAST); + assert(tree->OperIs(GT_CAST)); JITDUMP("LowerCast for: "); DISPNODE(tree); @@ -540,17 +529,16 @@ GenTree* Lowering::LowerCast(GenTree* tree) if (varTypeIsFloating(srcType)) { + // Overflow casts should have been converted to helper call in morph. noway_assert(!tree->gtOverflow()); - assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small - // int. + // Small types should have had an intermediate int cast inserted in morph. + assert(!varTypeIsSmall(dstType)); } assert(!varTypeIsSmall(srcType)); // Now determine if we have operands that should be contained. ContainCheckCast(tree->AsCast()); - - return nullptr; } //------------------------------------------------------------------------ @@ -564,7 +552,7 @@ GenTree* Lowering::LowerCast(GenTree* tree) // void Lowering::LowerRotate(GenTree* tree) { - if (tree->OperGet() == GT_ROL) + if (tree->OperIs(GT_ROL)) { // Convert ROL into ROR. GenTree* rotatedValue = tree->AsOp()->gtOp1; @@ -704,7 +692,7 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) { // If this is the rhs of a block copy it will be handled when we handle the store. - if (indirNode->TypeGet() == TYP_STRUCT) + if (indirNode->TypeIs(TYP_STRUCT)) { return; } @@ -714,7 +702,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) #endif // FEATURE_SIMD GenTree* addr = indirNode->Addr(); - if ((addr->OperGet() == GT_LEA) && IsInvariantInRange(addr, indirNode)) + if (addr->OperIs(GT_LEA) && IsInvariantInRange(addr, indirNode)) { MakeSrcContained(indirNode, addr); } diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp index 32302506ae55..87508c8dc5dd 100644 --- a/src/coreclr/jit/lowerriscv64.cpp +++ b/src/coreclr/jit/lowerriscv64.cpp @@ -64,26 +64,32 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const switch (parentNode->OperGet()) { - case GT_ADD: case GT_EQ: case GT_NE: + return emitter::isValidSimm12(-immVal) || (immVal == -2048); + + case GT_LE: // a <= N -> a < N+1 + case GT_GT: // a > N -> !(a <= N) -> !(a < N+1) + immVal += 1; + FALLTHROUGH; case GT_LT: - case GT_LE: case GT_GE: - case GT_GT: - return emitter::isValidSimm12(immVal); + case GT_ADD: case GT_AND: case GT_OR: case GT_XOR: - return emitter::isValidUimm11(immVal); + return emitter::isValidSimm12(immVal); case GT_JCMP: return true; + case GT_CMPXCHG: + case GT_XORR: + case GT_XAND: + case GT_XADD: + case GT_XCHG: case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: - if (immVal == 0) - return true; - break; + return (immVal == 0); default: break; @@ -183,6 +189,73 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) // GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) { + if (comp->opts.OptimizationEnabled()) + { + GenTree*& op1 = binOp->gtOp1; + GenTree*& op2 = binOp->gtOp2; + + bool isOp1Negated = op1->OperIs(GT_NOT); + bool isOp2Negated = op2->OperIs(GT_NOT); + if (binOp->OperIs(GT_AND, GT_OR, GT_XOR) && (isOp1Negated || isOp2Negated)) + { + if ((isOp1Negated && isOp2Negated) || comp->compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + if (isOp1Negated) + { + BlockRange().Remove(op1); + op1 = op1->AsUnOp()->gtGetOp1(); + } + if (isOp2Negated) + { + BlockRange().Remove(op2); + op2 = op2->AsUnOp()->gtGetOp1(); + } + + if (isOp1Negated != isOp2Negated) + { + assert(comp->compOpportunisticallyDependsOn(InstructionSet_Zbb)); + if (isOp1Negated) + std::swap(op1, op2); + + genTreeOps operNot = GT_NONE; + switch (binOp->OperGet()) + { + case GT_AND: + operNot = GT_AND_NOT; + break; + case GT_OR: + operNot = GT_OR_NOT; + break; + default: + assert(binOp->OperIs(GT_XOR)); + operNot = GT_XOR_NOT; + break; + } + binOp->ChangeOper(operNot); + } + else if (binOp->OperIs(GT_AND, GT_OR)) // XOR is good after negation removal, (~a ^ ~b) == (a ^ b) + { + assert(isOp1Negated && isOp2Negated); + LIR::Use use; + if (BlockRange().TryGetUse(binOp, &use)) + { + // (~a | ~b) == ~(a & b), (~a & ~b) == ~(a | b) + genTreeOps reverseOper = binOp->OperIs(GT_AND) ? GT_OR : GT_AND; + binOp->ChangeOper(reverseOper); + + GenTreeUnOp* negation = comp->gtNewOperNode(GT_NOT, binOp->gtType, binOp); + BlockRange().InsertAfter(binOp, negation); + use.ReplaceWith(negation); + } + else + { + binOp->SetUnusedValue(); + } + } + } + } + } + ContainCheckBinary(binOp); return binOp->gtNext; @@ -330,7 +403,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) return; } - assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL)); + assert(dstAddr->TypeIs(TYP_BYREF, TYP_I_IMPL)); blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll; } else if (blkNode->OperIs(GT_STORE_BLK) && (size <= copyBlockUnrollLimit)) @@ -401,12 +474,12 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT } //------------------------------------------------------------------------ -// LowerPutArgStkOrSplit: Lower a GT_PUTARG_STK/GT_PUTARG_SPLIT. +// LowerPutArgStk: Lower a GT_PUTARG_STK. // // Arguments: // putArgNode - The node to lower // -void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode) +void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgNode) { GenTree* src = putArgNode->Data(); @@ -433,21 +506,9 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode) // Return Value: // None. // -// Notes: -// Casts from float/double to a smaller int type are transformed as follows: -// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) -// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) -// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) -// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) -// -// Note that for the overflow conversions we still depend on helper calls and -// don't expect to see them here. -// i) GT_CAST(float/double, int type with overflow detection) -// - -GenTree* Lowering::LowerCast(GenTree* tree) +void Lowering::LowerCast(GenTree* tree) { - assert(tree->OperGet() == GT_CAST); + assert(tree->OperIs(GT_CAST)); JITDUMP("LowerCast for: "); DISPNODE(tree); @@ -459,17 +520,16 @@ GenTree* Lowering::LowerCast(GenTree* tree) if (varTypeIsFloating(srcType)) { + // Overflow casts should have been converted to helper call in morph. noway_assert(!tree->gtOverflow()); - assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small - // int. + // Small types should have had an intermediate int cast inserted in morph. + assert(!varTypeIsSmall(dstType)); } assert(!varTypeIsSmall(srcType)); // Now determine if we have operands that should be contained. ContainCheckCast(tree->AsCast()); - - return nullptr; } //------------------------------------------------------------------------ @@ -486,6 +546,313 @@ void Lowering::LowerRotate(GenTree* tree) ContainCheckShiftRotate(tree->AsOp()); } +// Determine if cast type is 32-bit zero extension +bool IsIntZeroExtCast(GenTreeCast* cast) +{ + GenTree* const src = cast->CastOp(); + const var_types srcType = genActualType(src); + const bool srcUnsigned = cast->IsUnsigned(); + const unsigned srcSize = genTypeSize(srcType); + const var_types castType = cast->gtCastType; + const bool castUnsigned = varTypeIsUnsigned(castType); + const unsigned castSize = genTypeSize(castType); + + return varTypeIsIntegralOrI(srcType) && varTypeIsIntegralOrI(castType) && srcSize == 4 && castSize == 8 && + (castUnsigned || srcUnsigned); +} + +// Determine SH(X)ADD(_UW) node for the given shift amount and signedness +genTreeOps GetShxaddOp(unsigned int shamt, bool isUnsigned) +{ + if (isUnsigned) + { + switch (shamt) + { + case 1: + return GT_SH1ADD_UW; + case 2: + return GT_SH2ADD_UW; + case 3: + return GT_SH3ADD_UW; + default: + unreached(); + } + } + else + { + switch (shamt) + { + case 1: + return GT_SH1ADD; + case 2: + return GT_SH2ADD; + case 3: + return GT_SH3ADD; + default: + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// TryLowerShiftAddToShxadd : Lower ADD(LSH) node to SH(X)ADD(.UW) node. +// +// Arguments: +// tree - pointer to the node +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::TryLowerShiftAddToShxadd(GenTreeOp* tree, GenTree** next) +{ + if (comp->opts.OptimizationDisabled()) + { + return false; + } + + if (tree->isContained() || ((tree->gtFlags & GTF_ALL_EFFECT) != 0) || !tree->OperIs(GT_ADD) || + ((emitActualTypeSize(tree) != EA_8BYTE) && (emitActualTypeSize(tree) != EA_BYREF))) + { + return false; + } + + GenTree* base = nullptr; + GenTree* shift = nullptr; + + if (tree->gtOp1->OperIs(GT_LSH, GT_MUL, GT_SLLI_UW)) + { + shift = tree->gtOp1; + base = tree->gtOp2; + } + else if (tree->gtOp2->OperIs(GT_LSH, GT_MUL, GT_SLLI_UW)) + { + shift = tree->gtOp2; + base = tree->gtOp1; + } + else + { + return false; + } + + bool isSlliUw = false; + if (shift->OperIs(GT_SLLI_UW)) + { + isSlliUw = true; + } + + GenTree* index = shift->gtGetOp1(); + unsigned int scale = shift->GetScaledIndex(); + if (scale == 0) + { + return false; + } + + assert(base->IsValue()); + assert(index->IsValue()); + + if (base->isContained() || index->isContained() || !varTypeIsIntegralOrI(base) || !varTypeIsIntegralOrI(index) || + base->IsCnsIntOrI() || index->IsCnsIntOrI()) + { + return false; + } + + JITDUMP("Removing unused node:\n "); + DISPNODE(shift->gtGetOp2()); + BlockRange().Remove(shift->gtGetOp2()); + DEBUG_DESTROY_NODE(shift->gtGetOp2()); + + JITDUMP("Removing unused node:\n "); + DISPNODE(shift); + BlockRange().Remove(shift); + DEBUG_DESTROY_NODE(shift); + + DWORD shamt; + BitScanForward(&shamt, scale); + + tree->gtOp1 = index; + tree->gtOp2 = base; + tree->ChangeOper(GetShxaddOp(shamt, isSlliUw)); + + JITDUMP("Base:\n "); + DISPNODE(tree->gtOp2); + JITDUMP("Index:\n "); + DISPNODE(tree->gtOp1); + + JITDUMP("New SHXADD node:\n "); + DISPNODE(tree); + JITDUMP("\n"); + + if (index->OperIs(GT_CAST)) + { + GenTreeCast* const cast = index->AsCast(); + GenTree* const src = cast->CastOp(); + + if (IsIntZeroExtCast(cast)) + { + JITDUMP("Removing unused node:\n "); + DISPNODE(cast); + BlockRange().Remove(cast); + DEBUG_DESTROY_NODE(cast); + + tree->gtOp1 = src; + tree->ChangeOper(GetShxaddOp(shamt, true)); + + JITDUMP("Index:\n "); + DISPNODE(tree->gtOp1); + + JITDUMP("Transformed SH(X)ADD node to SH(X)ADD_UW node:\n "); + DISPNODE(tree); + JITDUMP("\n"); + } + } + + *next = tree->gtNext; + return true; +} + +//------------------------------------------------------------------------ +// TryLowerZextAddToAddUw : Lower ADD(CAST) node to ADD_UW node. +// +// Arguments: +// tree - pointer to the node +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::TryLowerZextAddToAddUw(GenTreeOp* tree, GenTree** next) +{ + if (comp->opts.OptimizationDisabled()) + { + return false; + } + + if (tree->isContained() || ((tree->gtFlags & GTF_ALL_EFFECT) != 0) || !tree->OperIs(GT_ADD) || + ((emitActualTypeSize(tree) != EA_8BYTE) && (emitActualTypeSize(tree) != EA_BYREF))) + { + return false; + } + + GenTree* base = nullptr; + GenTree* index = nullptr; + + if (tree->gtOp1->OperIs(GT_CAST)) + { + index = tree->gtOp1; + base = tree->gtOp2; + } + else if (tree->gtOp2->OperIs(GT_CAST)) + { + index = tree->gtOp2; + base = tree->gtOp1; + } + else + { + return false; + } + + assert(base->IsValue()); + assert(index->IsValue()); + + if (base->isContained() || index->isContained() || !varTypeIsIntegralOrI(base) || !varTypeIsIntegralOrI(index) || + base->IsCnsIntOrI() || index->IsCnsIntOrI()) + { + return false; + } + + GenTreeCast* const cast = index->AsCast(); + GenTree* const src = cast->CastOp(); + + if (IsIntZeroExtCast(cast)) + { + JITDUMP("Removing unused node:\n "); + DISPNODE(cast); + BlockRange().Remove(cast); + DEBUG_DESTROY_NODE(cast); + + tree->gtOp1 = src; + tree->gtOp2 = base; + tree->ChangeOper(GT_ADD_UW); + + JITDUMP("Base:\n "); + DISPNODE(tree->gtOp2); + JITDUMP("Index:\n "); + DISPNODE(tree->gtOp1); + + JITDUMP("New ADD_UW node:\n "); + DISPNODE(tree); + JITDUMP("\n"); + + *next = tree->gtNext; + + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// TryLowerZextLeftShiftToSlliUw : Lower LSH(CAST) node to SLLI_UW node. +// +// Arguments: +// tree - pointer to the node +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::TryLowerZextLeftShiftToSlliUw(GenTreeOp* tree, GenTree** next) +{ + if (comp->opts.OptimizationDisabled()) + { + return false; + } + + if (tree->isContained() || ((tree->gtFlags & GTF_ALL_EFFECT) != 0) || !tree->OperIs(GT_LSH) || + !tree->gtOp1->OperIs(GT_CAST) || !tree->gtOp2->IsCnsIntOrI() || + ((emitActualTypeSize(tree) != EA_8BYTE) && (emitActualTypeSize(tree) != EA_BYREF))) + { + return false; + } + + GenTree* index = tree->gtOp1; + + assert(index->IsValue()); + + if (index->isContained() || !varTypeIsIntegralOrI(index) || index->IsCnsIntOrI()) + { + return false; + } + + GenTreeCast* const cast = index->AsCast(); + GenTree* const src = cast->CastOp(); + + if (IsIntZeroExtCast(cast)) + { + JITDUMP("Removing unused node:\n "); + DISPNODE(cast); + BlockRange().Remove(cast); + DEBUG_DESTROY_NODE(cast); + + tree->gtOp1 = src; + tree->ChangeOper(GT_SLLI_UW); + + JITDUMP("Index:\n "); + DISPNODE(tree->gtOp1); + + JITDUMP("New SLLI_UW node:\n "); + DISPNODE(tree); + JITDUMP("\n"); + + *next = tree->gtNext; + + return true; + } + + return false; +} + #ifdef FEATURE_SIMD //---------------------------------------------------------------------------------------------- // Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. @@ -615,7 +982,7 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) { // If this is the rhs of a block copy it will be handled when we handle the store. - if (indirNode->TypeGet() == TYP_STRUCT) + if (indirNode->TypeIs(TYP_STRUCT)) { return; } @@ -625,7 +992,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) #endif // FEATURE_SIMD GenTree* addr = indirNode->Addr(); - if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr)) + if (addr->OperIs(GT_LEA) && IsSafeToContainMem(indirNode, addr)) { MakeSrcContained(indirNode, addr); } @@ -635,6 +1002,10 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) // - GT_LCL_ADDR is a stack addr mode. MakeSrcContained(indirNode, addr); } + else if (addr->IsCnsIntOrI() && !addr->AsIntCon()->ImmedValNeedsReloc(comp)) + { + MakeSrcContained(indirNode, addr); + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index f3d6ad039bcf..e835de2393df 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -107,30 +107,6 @@ GenTree* Lowering::LowerStoreIndir(GenTreeStoreInd* node) } ContainCheckStoreIndir(node); -#if defined(FEATURE_HW_INTRINSICS) - if (comp->IsBaselineVector512IsaSupportedOpportunistically() || - comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - if (!node->Data()->IsCnsVec()) - { - return node->gtNext; - } - - if (!node->Data()->AsVecCon()->TypeIs(TYP_SIMD32, TYP_SIMD64)) - { - return node->gtNext; - } - - if (node->Data()->IsVectorAllBitsSet() || node->Data()->IsVectorZero()) - { - // To avoid some unexpected regression, this optimization only applies to non-all 1/0 constant vectors. - return node->gtNext; - } - - TryCompressConstVecData(node); - } -#endif - return node->gtNext; } @@ -320,9 +296,65 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) ContainCheckBinary(binOp); +#ifdef TARGET_AMD64 + if (JitConfig.EnableApxConditionalChaining()) + { + if (binOp->OperIs(GT_AND, GT_OR)) + { + GenTree* next; + if (TryLowerAndOrToCCMP(binOp, &next)) + { + return next; + } + } + } +#endif // TARGET_AMD64 + return binOp->gtNext; } +#ifdef TARGET_AMD64 +//------------------------------------------------------------------------ +// TruthifyingFlags: Get a flags immediate that will make a specified condition true. +// +// Arguments: +// condition - the condition. +// +// Returns: +// A flags immediate that, if those flags were set, would cause the specified condition to be true. +// (NOTE: This just has to make the condition be true, i.e., if the condition calls for (SF ^ OF), then +// returning one will suffice +insCflags Lowering::TruthifyingFlags(GenCondition condition) +{ + switch (condition.GetCode()) + { + case GenCondition::EQ: + return INS_FLAGS_ZF; + case GenCondition::NE: + return INS_FLAGS_NONE; + case GenCondition::SGE: // !(SF ^ OF) + return INS_FLAGS_NONE; + case GenCondition::SGT: // !(SF ^ OF) && !ZF + return INS_FLAGS_NONE; + case GenCondition::SLE: // !(SF ^ OF) || ZF + return INS_FLAGS_ZF; + case GenCondition::SLT: // (SF ^ OF) + return INS_FLAGS_SF; + case GenCondition::UGE: // !CF + return INS_FLAGS_NONE; + case GenCondition::UGT: // !CF && !ZF + return INS_FLAGS_NONE; + case GenCondition::ULE: // CF || ZF + return INS_FLAGS_ZF; + case GenCondition::ULT: // CF + return INS_FLAGS_CF; + default: + NO_WAY("unexpected condition type"); + return INS_FLAGS_NONE; + } +} +#endif // TARGET_AMD64 + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // @@ -375,7 +407,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ssize_t fill = src->AsIntCon()->IconValue() & 0xFF; - const bool canUseSimd = !blkNode->IsOnHeapAndContainsReferences() && comp->IsBaselineSimdIsaSupported(); + const bool canUseSimd = !blkNode->IsOnHeapAndContainsReferences(); if (size > comp->getUnrollThreshold(Compiler::UnrollKind::Memset, canUseSimd)) { // It turns out we can't use SIMD so the default threshold is too big @@ -470,7 +502,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) return; } - assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL)); + assert(dstAddr->TypeIs(TYP_BYREF, TYP_I_IMPL)); // If we have a long enough sequence of slots that do not require write barriers then // we can use REP MOVSD/Q instead of a sequence of MOVSD/Q instructions. According to the @@ -592,18 +624,6 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT addrMode->SetContained(); } -//------------------------------------------------------------------------ -// LowerPutArgStkOrSplit: Lower a GT_PUTARG_STK/GT_PUTARG_SPLIT. -// -// Arguments: -// putArgNode - The node of interest -// -void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode) -{ - assert(putArgNode->OperIs(GT_PUTARG_STK)); // No split args on XArch. - LowerPutArgStk(putArgNode); -} - //------------------------------------------------------------------------ // LowerPutArgStk: Lower a GT_PUTARG_STK. // @@ -669,7 +689,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) return; } -#ifdef FEATURE_PUT_STRUCT_ARG_STK if (src->TypeIs(TYP_STRUCT)) { assert(src->OperIs(GT_BLK) || src->OperIsLocalRead()); @@ -756,7 +775,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) { return; } -#endif // FEATURE_PUT_STRUCT_ARG_STK assert(!src->TypeIs(TYP_STRUCT)); @@ -793,39 +811,22 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) #endif // TARGET_X86 } -/* Lower GT_CAST(srcType, DstType) nodes. - * - * Casts from small int type to float/double are transformed as follows: - * GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double) - * GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double) - * GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double) - * GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double) - * - * Unless the EVEX conversion instructions are available, casts from Uint32 - * are morphed as follows by front-end and hence should not be seen here. - * GT_CAST(uint32, float/double) = GT_CAST(GT_CAST(uint32, long), float/double) - * - * - * Similarly casts from float/double to a smaller int type are transformed as follows: - * GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) - * GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) - * GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) - * GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) - * - * Note that for the following conversions we still depend on helper calls and - * don't expect to see them here. - * i) GT_CAST(float/double, uint64) when EVEX is not available - * ii) GT_CAST(float/double, int type with overflow detection) - */ -GenTree* Lowering::LowerCast(GenTree* tree) +//------------------------------------------------------------------------ +// LowerCast: Lower GT_CAST(srcType, DstType) nodes. +// +// Arguments: +// tree - GT_CAST node to be lowered +// +// Return Value: +// None. +// +void Lowering::LowerCast(GenTree* tree) { assert(tree->OperIs(GT_CAST)); - GenTree* castOp = tree->AsCast()->CastOp(); - var_types castToType = tree->CastToType(); - var_types dstType = castToType; - var_types srcType = castOp->TypeGet(); - var_types tmpType = TYP_UNDEF; + GenTree* castOp = tree->AsCast()->CastOp(); + var_types dstType = tree->CastToType(); + var_types srcType = castOp->TypeGet(); // force the srcType to unsigned if GT_UNSIGNED flag is set if (tree->IsUnsigned()) @@ -833,366 +834,442 @@ GenTree* Lowering::LowerCast(GenTree* tree) srcType = varTypeToUnsigned(srcType); } - // We should not see the following casts unless directly supported by hardware, - // as they are expected to be lowered appropriately or converted into helper calls by front-end. - // srcType = float/double castToType = * and overflow detecting cast - // Reason: must be converted to a helper call - // srcType = float/double, castToType = ulong - // Reason: must be converted to a helper call - // srcType = uint castToType = float/double - // Reason: uint -> float/double = uint -> long -> float/double if (varTypeIsFloating(srcType)) { + // Overflow casts should have been converted to helper call in morph. noway_assert(!tree->gtOverflow()); - assert(castToType != TYP_ULONG || comp->canUseEvexEncodingDebugOnly()); + // Small types should have had an intermediate int cast inserted in morph. + assert(!varTypeIsSmall(dstType)); + // Long types should have been handled by helper call or in DecomposeLongs on x86. + assert(!varTypeIsLong(dstType) || TargetArchitecture::Is64Bit); } else if (srcType == TYP_UINT) { - assert(castToType != TYP_FLOAT || comp->canUseEvexEncodingDebugOnly()); + // uint->float casts should have an intermediate cast to long unless + // we have the EVEX unsigned conversion instructions available. + assert(dstType != TYP_FLOAT || comp->canUseEvexEncodingDebugOnly()); } -#if defined(TARGET_AMD64) - // Handle saturation logic for X64 - if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType) && !varTypeIsSmall(dstType)) - { - // We should have filtered out float -> long conversion and - // converted it to float -> double -> long conversion. - assert((dstType != TYP_LONG) || (srcType != TYP_FLOAT)); - - // we should have handled overflow cases in morph itself - assert(!tree->gtOverflow()); - - CorInfoType fieldType = (srcType == TYP_DOUBLE) ? CORINFO_TYPE_DOUBLE : CORINFO_TYPE_FLOAT; - GenTree* castOutput = nullptr; - LIR::Use castOpUse(BlockRange(), &(tree->AsCast()->CastOp()), tree); - ReplaceWithLclVar(castOpUse); - castOp = tree->AsCast()->CastOp(); - bool isV512Supported = false; - /*The code below is to introduce saturating conversions on X86/X64. - The C# equivalence of the code is given below --> - - // Replace QNaN and SNaN with Zero - op1 = Avx512F.Fixup(op1, op1, Vector128.Create(0x88), 0); - - // Convert from double to long, replacing any values that were greater than or equal to MaxValue - with MaxValue - // Values that were less than or equal to MinValue will already be MinValue - return Vector128.ConditionalSelect( - Vector128.LessThan(op1, Vector128.Create(long.MaxValue)).AsInt64(), - Avx512DQ.VL.ConvertToVector128Int64(op1), - Vector128.Create(long.MaxValue) - ); - */ - if (comp->compIsEvexOpportunisticallySupported(isV512Supported)) - { - // Clone the cast operand for usage. - GenTree* op1Clone1 = comp->gtClone(castOp); - BlockRange().InsertAfter(castOp, op1Clone1); - - // Generate the control table for VFIXUPIMMSD - // The behavior we want is to saturate negative values to 0. - GenTreeVecCon* tbl = comp->gtNewVconNode(TYP_SIMD16); - tbl->gtSimdVal.i32[0] = (varTypeIsUnsigned(dstType)) ? 0x08080088 : 0x00000088; - BlockRange().InsertAfter(op1Clone1, tbl); - - // get a zero int node for control table - GenTree* ctrlByte = comp->gtNewIconNode(0); - BlockRange().InsertAfter(tbl, ctrlByte); - - NamedIntrinsic fixupHwIntrinsicID = !isV512Supported ? NI_AVX10v1_FixupScalar : NI_AVX512F_FixupScalar; - if (varTypeIsUnsigned(dstType)) - { - // run vfixupimmsd base on table and no flags reporting - GenTree* oper1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, castOp, op1Clone1, tbl, ctrlByte, - fixupHwIntrinsicID, fieldType, 16); - BlockRange().InsertAfter(ctrlByte, oper1); - LowerNode(oper1); - - // Convert to scalar - // Here, we try to insert a Vector128 to Scalar node so that the input - // can be provided to the scalar cast - GenTree* oper2 = comp->gtNewSimdHWIntrinsicNode(srcType, oper1, NI_Vector128_ToScalar, fieldType, 16); - BlockRange().InsertAfter(oper1, oper2); - LowerNode(oper2); - - castOutput = comp->gtNewCastNode(genActualType(dstType), oper2, false, dstType); - BlockRange().InsertAfter(oper2, castOutput); - } - else - { - CorInfoType destFieldType = (dstType == TYP_INT) ? CORINFO_TYPE_INT : CORINFO_TYPE_LONG; - - ssize_t actualMaxVal = (dstType == TYP_INT) ? INT32_MAX : INT64_MAX; - - // run vfixupimmsd base on table and no flags reporting - GenTree* fixupVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, castOp, op1Clone1, tbl, ctrlByte, - fixupHwIntrinsicID, fieldType, 16); - BlockRange().InsertAfter(ctrlByte, fixupVal); - LowerNode(fixupVal); - - // get the max value vector - GenTree* maxValScalar = (srcType == TYP_DOUBLE) - ? comp->gtNewDconNodeD(static_cast(actualMaxVal)) - : comp->gtNewDconNodeF(static_cast(actualMaxVal)); - GenTree* maxVal = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxValScalar, fieldType, 16); - BlockRange().InsertAfter(fixupVal, maxVal); - - GenTree* maxValDstTypeScalar = (dstType == TYP_INT) ? comp->gtNewIconNode(actualMaxVal, dstType) - : comp->gtNewLconNode(actualMaxVal); - GenTree* maxValDstType = - comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxValDstTypeScalar, destFieldType, 16); - BlockRange().InsertAfter(maxVal, maxValDstType); - - // usage 1 --> compare with max value of integer - GenTree* compMask = comp->gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, fixupVal, maxVal, fieldType, 16); - BlockRange().InsertAfter(maxValDstType, compMask); - - // convert fixupVal to local variable and clone it for further use - LIR::Use fixupValUse(BlockRange(), &(compMask->AsHWIntrinsic()->Op(1)), compMask); - ReplaceWithLclVar(fixupValUse); - fixupVal = compMask->AsHWIntrinsic()->Op(1); - GenTree* fixupValClone = comp->gtClone(fixupVal); - LowerNode(compMask); - BlockRange().InsertAfter(fixupVal, fixupValClone); - - GenTree* FixupValCloneScalar = - comp->gtNewSimdHWIntrinsicNode(srcType, fixupValClone, NI_Vector128_ToScalar, fieldType, 16); - BlockRange().InsertAfter(compMask, FixupValCloneScalar); - LowerNode(FixupValCloneScalar); - - // cast it - GenTreeCast* newCast = comp->gtNewCastNode(dstType, FixupValCloneScalar, false, dstType); - BlockRange().InsertAfter(FixupValCloneScalar, newCast); - - GenTree* newTree = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, newCast, destFieldType, 16); - BlockRange().InsertAfter(newCast, newTree); - LowerNode(newTree); - - // usage 2 --> use the compared mask with input value and max value to blend - GenTree* control = comp->gtNewIconNode(0xCA); // (B & A) | (C & ~A) - BlockRange().InsertAfter(newTree, control); - GenTree* cndSelect = comp->gtNewSimdTernaryLogicNode(TYP_SIMD16, compMask, maxValDstType, newTree, - control, destFieldType, 16); - BlockRange().InsertAfter(control, cndSelect); - LowerNode(cndSelect); - - castOutput = - comp->gtNewSimdHWIntrinsicNode(dstType, cndSelect, NI_Vector128_ToScalar, destFieldType, 16); - BlockRange().InsertAfter(cndSelect, castOutput); - LowerNode(castOutput); - } - } - else if (varTypeIsSigned(dstType) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - CorInfoType destFieldType = (dstType == TYP_INT) ? CORINFO_TYPE_INT : CORINFO_TYPE_LONG; - - ssize_t actualMaxVal = (dstType == TYP_INT) ? INT32_MAX : INT64_MAX; - - // create clones for usage - GenTree* castOpClone1 = comp->gtClone(castOp); - GenTree* castOpClone2 = comp->gtClone(castOp); - BlockRange().InsertAfter(castOp, castOpClone1); - BlockRange().InsertAfter(castOpClone1, castOpClone2); - - GenTree* oper = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOp, fieldType, 16); - BlockRange().InsertAfter(castOpClone2, oper); - LowerNode(oper); - GenTree* op1Clone1 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone1, fieldType, 16); - BlockRange().InsertAfter(oper, op1Clone1); - LowerNode(op1Clone1); - GenTree* op1Clone2 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone2, fieldType, 16); - BlockRange().InsertAfter(op1Clone1, op1Clone2); - LowerNode(op1Clone2); - - // check NaN - GenTree* mask1 = comp->gtNewSimdCmpOpNode(GT_EQ, TYP_SIMD16, oper, op1Clone1, fieldType, 16); - BlockRange().InsertAfter(op1Clone2, mask1); - LowerNode(mask1); - // inp = inp & mask - GenTree* maskNaN = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, op1Clone2, mask1, fieldType, 16); - BlockRange().InsertAfter(mask1, maskNaN); - LowerNode(maskNaN); - - // get the max value vector - GenTree* maxVal = (srcType == TYP_DOUBLE) ? comp->gtNewDconNodeD(static_cast(actualMaxVal)) - : comp->gtNewDconNodeF(static_cast(actualMaxVal)); - GenTree* maxValDup = - (dstType == TYP_INT) ? comp->gtNewIconNode(actualMaxVal) : comp->gtNewLconNode(actualMaxVal); - maxVal = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxVal, fieldType, 16); - BlockRange().InsertAfter(maskNaN, maxVal); - maxValDup = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxValDup, destFieldType, 16); - BlockRange().InsertAfter(maxVal, maxValDup); - - // usage 1 --> compare with max value of integer - GenTree* compMask = comp->gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, maskNaN, maxVal, fieldType, 16); - BlockRange().InsertAfter(maxValDup, compMask); - - // we will be using the maskNaN value twice - LIR::Use maskNaNUse(BlockRange(), &(compMask->AsHWIntrinsic()->Op(1)), compMask); - ReplaceWithLclVar(maskNaNUse); - maskNaN = compMask->AsHWIntrinsic()->Op(1); - GenTree* maskNaNClone = comp->gtClone(maskNaN); - LowerNode(compMask); - BlockRange().InsertAfter(maskNaN, maskNaNClone); - - // convert to scalar for conversion - GenTree* maskNaNCloneScalar = - comp->gtNewSimdHWIntrinsicNode(srcType, maskNaNClone, NI_Vector128_ToScalar, fieldType, 16); - BlockRange().InsertAfter(compMask, maskNaNCloneScalar); - LowerNode(maskNaNCloneScalar); - - // cast it - GenTreeCast* newCast = comp->gtNewCastNode(dstType, maskNaNCloneScalar, false, dstType); - BlockRange().InsertAfter(maskNaNCloneScalar, newCast); - GenTree* newTree = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, newCast, destFieldType, 16); - BlockRange().InsertAfter(newCast, newTree); - LowerNode(newTree); - - // usage 2 --> use thecompared mask with input value and max value to blend - GenTree* cndSelect = comp->gtNewSimdCndSelNode(TYP_SIMD16, compMask, maxValDup, newTree, destFieldType, 16); - BlockRange().InsertAfter(newTree, cndSelect); - LowerNode(cndSelect); - - castOutput = comp->gtNewSimdHWIntrinsicNode(dstType, cndSelect, NI_Vector128_ToScalar, destFieldType, 16); - BlockRange().InsertAfter(cndSelect, castOutput); - LowerNode(castOutput); - } - else +#ifdef FEATURE_HW_INTRINSICS + if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType) && + !comp->compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) + { + // If we don't have AVX10v2 saturating conversion instructions for + // floating->integral, we have to handle the saturation logic here. + + JITDUMP("LowerCast before:\n"); + DISPTREERANGE(BlockRange(), tree); + + CorInfoType srcBaseType = (srcType == TYP_FLOAT) ? CORINFO_TYPE_FLOAT : CORINFO_TYPE_DOUBLE; + LIR::Range castRange = LIR::EmptyRange(); + + // We'll be using SIMD instructions to fix up castOp before conversion. + // + // This creates the equivalent of the following C# code: + // var srcVec = Vector128.CreateScalarUnsafe(castOp); + + GenTree* srcVector = comp->gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, castOp, srcBaseType, 16); + castRange.InsertAtEnd(srcVector); + + if (srcVector->IsCnsVec()) { - // The remaining case not handled above should be conversion - // to TYP_UINT in case where SSE41 is supported. - // We should have converted float -> uint conversion to - // float -> double -> uint during morph. - assert((dstType == TYP_UINT) && comp->compIsaSupportedDebugOnly(InstructionSet_SSE41) && - (srcType != TYP_FLOAT)); - - ssize_t actualMaxVal = UINT32_MAX; - CorInfoType destFieldType = CORINFO_TYPE_LONG; - - GenTree* castOpClone1 = comp->gtClone(castOp); - GenTree* castOpClone2 = comp->gtClone(castOp); - GenTree* castOpClone3 = comp->gtClone(castOp); - BlockRange().InsertAfter(castOp, castOpClone1); - BlockRange().InsertAfter(castOpClone1, castOpClone2); - BlockRange().InsertAfter(castOpClone2, castOpClone3); - - GenTree* oper = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOp, fieldType, 16); - BlockRange().InsertAfter(castOpClone3, oper); - LowerNode(oper); - GenTree* op1Clone1 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone1, fieldType, 16); - BlockRange().InsertAfter(oper, op1Clone1); - LowerNode(op1Clone1); - GenTree* op1Clone2 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone2, fieldType, 16); - BlockRange().InsertAfter(op1Clone1, op1Clone2); - LowerNode(op1Clone2); - GenTree* op1Clone3 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone3, fieldType, 16); - BlockRange().InsertAfter(op1Clone2, op1Clone3); - LowerNode(op1Clone3); - - // get the max/min value vector - GenTree* minVal = comp->gtNewDconNodeD(static_cast(0)); - minVal = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, minVal, fieldType, 16); - BlockRange().InsertAfter(op1Clone3, minVal); - GenTree* maxVal = comp->gtNewDconNodeD(static_cast(actualMaxVal)); - maxVal = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxVal, fieldType, 16); - BlockRange().InsertAfter(minVal, maxVal); - - // check NaN - GenTree* mask1 = comp->gtNewSimdCmpOpNode(GT_EQ, TYP_SIMD16, oper, op1Clone1, fieldType, 16); - BlockRange().InsertAfter(maxVal, mask1); - LowerNode(mask1); - - // check negative - GenTree* mask2 = comp->gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, op1Clone2, minVal, fieldType, 16); - BlockRange().InsertAfter(mask1, mask2); - LowerNode(mask2); - - // and mask - GenTree* mask12 = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, mask1, mask2, fieldType, 16); - BlockRange().InsertAfter(mask2, mask12); - LowerNode(mask12); - - // inp = inp & mask - GenTree* saturatedVal = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, op1Clone3, mask12, fieldType, 16); - BlockRange().InsertAfter(mask12, saturatedVal); - LowerNode(saturatedVal); - - // compare with max value of uint - GenTree* mask3 = comp->gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, saturatedVal, maxVal, fieldType, 16); - BlockRange().InsertAfter(saturatedVal, mask3); - - // Convert both the operands of mask3 to local variables for reusage - LIR::Use saturatedValUse(BlockRange(), &(mask3->AsHWIntrinsic()->Op(1)), mask3); - ReplaceWithLclVar(saturatedValUse); - saturatedVal = mask3->AsHWIntrinsic()->Op(1); - GenTree* saturatedValDup = comp->gtClone(saturatedVal); - BlockRange().InsertAfter(saturatedVal, saturatedValDup); - - LIR::Use maxValUse(BlockRange(), &(mask3->AsHWIntrinsic()->Op(2)), mask3); - ReplaceWithLclVar(maxValUse); - maxVal = mask3->AsHWIntrinsic()->Op(2); - GenTree* maxValDup = comp->gtClone(maxVal); - LowerNode(mask3); - BlockRange().InsertAfter(maxVal, maxValDup); - - // Select based on mask3 - GenTree* castOpVal = - comp->gtNewSimdCndSelNode(TYP_SIMD16, mask3, maxValDup, saturatedValDup, fieldType, 16); - BlockRange().InsertAfter(mask3, castOpVal); - LowerNode(castOpVal); - - // scalar - GenTree* castOpValScalar = - comp->gtNewSimdHWIntrinsicNode(srcType, castOpVal, NI_Vector128_ToScalar, fieldType, 16); - BlockRange().InsertAfter(castOpVal, castOpValScalar); - LowerNode(castOpValScalar); - - // cast it - castOutput = comp->gtNewCastNode(TYP_INT, castOpValScalar, false, dstType); - BlockRange().InsertAfter(castOpValScalar, castOutput); - } - assert(castOutput != nullptr); - LIR::Use use; - if (BlockRange().TryGetUse(tree, &use)) + castOp->SetUnusedValue(); + } + + if (varTypeIsUnsigned(dstType) && comp->canUseEvexEncoding()) { - use.ReplaceWith(castOutput); + // EVEX unsigned conversion instructions saturate positive overflow properly, so as + // long as we fix up NaN and negative values, we can preserve the existing cast node. + // + // maxs[sd] will take the value from the second operand if the first operand's value is + // NaN, which allows us to fix up both negative and NaN values with a single instruction. + // + // This creates the equivalent of the following C# code: + // castOp = Sse.MaxScalar(srcVec, Vector128.Zero).ToScalar(); + + NamedIntrinsic maxScalarIntrinsic = NI_X86Base_MaxScalar; + + GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16); + GenTree* fixupVal = + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, zero, maxScalarIntrinsic, srcBaseType, 16); + + GenTree* toScalar = comp->gtNewSimdToScalarNode(srcType, fixupVal, srcBaseType, 16); + + castRange.InsertAtEnd(zero); + castRange.InsertAtEnd(fixupVal); + castRange.InsertAtEnd(toScalar); + + tree->AsCast()->CastOp() = toScalar; } else { - castOutput->SetUnusedValue(); + // We need to fix up NaN as well as handle possible overflow. Signed conversions + // return int/long.MinValue for any overflow, which is correct for saturation of + // negative, but the result must be replaced with MaxValue for positive overflow. + + CorInfoType dstBaseType = CORINFO_TYPE_UNDEF; + NamedIntrinsic convertIntrinsic = NI_Illegal; + GenTree* maxIntegralValue = nullptr; + GenTree* maxFloatingValue = comp->gtNewVconNode(TYP_SIMD16); + simd_t* maxFloatSimdVal = &maxFloatingValue->AsVecCon()->gtSimdVal; + + switch (dstType) + { + case TYP_INT: + { + dstBaseType = CORINFO_TYPE_INT; + maxIntegralValue = comp->gtNewIconNode(INT32_MAX); + if (srcType == TYP_FLOAT) + { + maxFloatSimdVal->f32[0] = 2147483648.0f; + convertIntrinsic = NI_X86Base_ConvertToInt32WithTruncation; + } + else + { + maxFloatSimdVal->f64[0] = 2147483648.0; + convertIntrinsic = NI_X86Base_ConvertToInt32WithTruncation; + } + break; + } + case TYP_UINT: + { + dstBaseType = CORINFO_TYPE_UINT; + maxIntegralValue = comp->gtNewIconNode(static_cast(UINT32_MAX)); + if (srcType == TYP_FLOAT) + { + maxFloatSimdVal->f32[0] = 4294967296.0f; + convertIntrinsic = TargetArchitecture::Is64Bit + ? NI_X86Base_X64_ConvertToInt64WithTruncation + : NI_X86Base_ConvertToVector128Int32WithTruncation; + } + else + { + maxFloatSimdVal->f64[0] = 4294967296.0; + convertIntrinsic = TargetArchitecture::Is64Bit + ? NI_X86Base_X64_ConvertToInt64WithTruncation + : NI_X86Base_ConvertToVector128Int32WithTruncation; + } + break; + } + case TYP_LONG: + { + dstBaseType = CORINFO_TYPE_LONG; + maxIntegralValue = comp->gtNewLconNode(INT64_MAX); + if (srcType == TYP_FLOAT) + { + maxFloatSimdVal->f32[0] = 9223372036854775808.0f; + convertIntrinsic = NI_X86Base_X64_ConvertToInt64WithTruncation; + } + else + { + maxFloatSimdVal->f64[0] = 9223372036854775808.0; + convertIntrinsic = NI_X86Base_X64_ConvertToInt64WithTruncation; + } + break; + } + case TYP_ULONG: + { + dstBaseType = CORINFO_TYPE_ULONG; + maxIntegralValue = comp->gtNewLconNode(static_cast(UINT64_MAX)); + if (srcType == TYP_FLOAT) + { + maxFloatSimdVal->f32[0] = 18446744073709551616.0f; + convertIntrinsic = NI_X86Base_X64_ConvertToInt64WithTruncation; + } + else + { + maxFloatSimdVal->f64[0] = 18446744073709551616.0; + convertIntrinsic = NI_X86Base_X64_ConvertToInt64WithTruncation; + } + break; + } + default: + { + unreached(); + } + } + + // We will use the input value at least twice, so we preemptively replace it with a lclVar. + LIR::Use srcUse; + LIR::Use::MakeDummyUse(castRange, srcVector, &srcUse); + srcUse.ReplaceWithLclVar(comp); + srcVector = srcUse.Def(); + + GenTree* srcClone = nullptr; + GenTree* convertResult = nullptr; + + if (varTypeIsSigned(dstType)) + { + // Fix up NaN values before conversion. Saturation is handled after conversion, + // because MaxValue may not be precisely representable in the floating format. + // + // This creates the equivalent of the following C# code: + // var nanMask = Sse.CompareScalarOrdered(srcVec, srcVec); + // var fixupVal = Sse.And(srcVec, nanMask); + // convertResult = Sse.ConvertToInt32WithTruncation(fixupVal); + + NamedIntrinsic compareNaNIntrinsic = NI_X86Base_CompareScalarOrdered; + + srcClone = comp->gtClone(srcVector); + GenTree* nanMask = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, srcClone, compareNaNIntrinsic, + srcBaseType, 16); + + castRange.InsertAtEnd(srcClone); + castRange.InsertAtEnd(nanMask); + + srcClone = comp->gtClone(srcVector); + GenTree* fixupVal = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, nanMask, srcClone, srcBaseType, 16); + + castRange.InsertAtEnd(srcClone); + castRange.InsertAtEnd(fixupVal); + + convertResult = comp->gtNewSimdHWIntrinsicNode(dstType, fixupVal, convertIntrinsic, srcBaseType, 16); + } + else + { + // maxs[sd] will take the value from the second operand if the first operand's value is + // NaN, which allows us to fix up both negative and NaN values with a single instruction. + // + // This creates the equivalent of the following C# code: + // var fixupVal = Sse.MaxScalar(srcVec, Vector128.Zero); + + NamedIntrinsic maxScalarIntrinsic = NI_X86Base_MaxScalar; + + GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16); + GenTree* fixupVal = + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, zero, maxScalarIntrinsic, srcBaseType, 16); + + castRange.InsertAtEnd(zero); + castRange.InsertAtEnd(fixupVal); + + if ((dstType == TYP_UINT) && (convertIntrinsic == NI_X86Base_X64_ConvertToInt64WithTruncation)) + { + // On x64, we can use long conversion to handle uint directly. + convertResult = + comp->gtNewSimdHWIntrinsicNode(TYP_LONG, fixupVal, convertIntrinsic, srcBaseType, 16); + } + else + { + // We're doing a conversion that isn't supported directly by hardware. We will emulate + // the unsigned conversion by using the signed instruction on both the fixed-up input + // value and a negative value that has the same bit representation when converted to + // integer. If the conversion overflows as a signed integer, the negative conversion + // result is selected. + // + // This creates the equivalent of the following C# code: + // var wrapVal = Sse.SubtractScalar(srcVec, maxFloatingValue); + + NamedIntrinsic subtractIntrinsic = NI_X86Base_SubtractScalar; + + // We're going to use maxFloatingValue twice, so replace the constant with a lclVar. + castRange.InsertAtEnd(maxFloatingValue); + + LIR::Use maxFloatUse; + LIR::Use::MakeDummyUse(castRange, maxFloatingValue, &maxFloatUse); + maxFloatUse.ReplaceWithLclVar(comp); + maxFloatingValue = maxFloatUse.Def(); + + GenTree* floorVal = comp->gtClone(srcVector); + castRange.InsertAtEnd(floorVal); + + if ((srcType == TYP_DOUBLE) && (dstType == TYP_UINT)) + { + // This technique works only if the truncating conversion of the positive and negative + // values causes them to round in the same direction. i.e. there is no rounding, because + // we have a whole number. This is always true if the exponent is larger than the number + // of significand bits, which will always be the case for double->ulong or float->uint. + // + // For double->uint, the double has enough precision to exactly represent any whole number + // in range, with bits left over. e.g. we might have a value of 4294967295.9999995. + // We must, therefore, truncate the value before wrapping it to negative. + + if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + // This creates the equivalent of the following C# code: + // floorVal = Sse41.RoundToZeroScalar(srcVector); + + floorVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, floorVal, NI_SSE41_RoundToZeroScalar, + srcBaseType, 16); + castRange.InsertAtEnd(floorVal); + } + else + { + // We don't have `roundsd` available, but we can truncate the value by simply zeroing out + // the low 21 bits of the double. This works because we know we will only use the negative + // value when the exponent is exactly 31, meaning 31 of the 52 bits in the significand are + // used for the whole portion of the number, and the remaining 21 bits are fractional. + // + // This creates the equivalent of the following C# code: + // floorVal = ((srcVector.AsUInt64() >>> 21) << 21).AsDouble(); + + GenTree* twentyOne = comp->gtNewIconNode(21); + GenTree* rightShift = comp->gtNewSimdBinOpNode(GT_RSZ, TYP_SIMD16, floorVal, twentyOne, + CORINFO_TYPE_ULONG, 16); + castRange.InsertAtEnd(twentyOne); + castRange.InsertAtEnd(rightShift); + + twentyOne = comp->gtClone(twentyOne); + floorVal = comp->gtNewSimdBinOpNode(GT_LSH, TYP_SIMD16, rightShift, twentyOne, + CORINFO_TYPE_ULONG, 16); + castRange.InsertAtEnd(twentyOne); + castRange.InsertAtEnd(floorVal); + } + } + + GenTree* wrapVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, floorVal, maxFloatingValue, + subtractIntrinsic, srcBaseType, 16); + castRange.InsertAtEnd(wrapVal); + + maxFloatingValue = comp->gtClone(maxFloatingValue); + + if (dstType == TYP_UINT) + { + // We can keep the conversion results in SIMD registers to make selection of the + // correct result simpler. + // + // This creates the equivalent of the following C# code: + // var result = Sse2.ConvertToVector128Int32WithTruncation(fixupVal); + // var negated = Sse2.ConvertToVector128Int32WithTruncation(wrapVal); + + GenTree* result = + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, fixupVal, convertIntrinsic, srcBaseType, 16); + GenTree* negated = + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, wrapVal, convertIntrinsic, srcBaseType, 16); + + castRange.InsertAtEnd(result); + castRange.InsertAtEnd(negated); + + // We need the result twice -- one for the mask bit and one for the blend. + LIR::Use resultUse; + LIR::Use::MakeDummyUse(castRange, result, &resultUse); + resultUse.ReplaceWithLclVar(comp); + result = resultUse.Def(); + + GenTree* resultClone = comp->gtClone(result); + castRange.InsertAtEnd(resultClone); + + if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + // If the conversion of the fixed-up value overflowed, the result wil be + // int.MinValue. Since `blendvps` uses only the MSB for result selection, + // this is adequate to force selection of the negated result. + // + // This creates the equivalent of the following C# code: + // convertResult = Sse41.BlendVariable(result, negated, result); + + convertResult = + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, result, negated, resultClone, + NI_SSE41_BlendVariable, CORINFO_TYPE_FLOAT, 16); + } + else + { + // If we can't use `blendvps`, we do a bit-wise selection. This works + // using only and+or because if we choose the negated value, both it + // and the overflowed result have MSB set. + // + // This creates the equivalent of the following C# code: + // var mask = Sse2.ShiftRightArithmetic(result, 31); + // convertResult = Sse.Or(result, Sse.And(negated, mask)); + + GenTree* thirtyOne = comp->gtNewIconNode(31); + GenTree* mask = + comp->gtNewSimdBinOpNode(GT_RSH, TYP_SIMD16, result, thirtyOne, CORINFO_TYPE_INT, 16); + GenTree* andMask = + comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, mask, negated, dstBaseType, 16); + + castRange.InsertAtEnd(thirtyOne); + castRange.InsertAtEnd(mask); + castRange.InsertAtEnd(andMask); + + convertResult = + comp->gtNewSimdBinOpNode(GT_OR, TYP_SIMD16, andMask, resultClone, dstBaseType, 16); + } + + // Because the results are in a SIMD register, we need to ToScalar() them out. + castRange.InsertAtEnd(convertResult); + convertResult = comp->gtNewSimdToScalarNode(TYP_INT, convertResult, dstBaseType, 16); + } + else + { + assert(dstType == TYP_ULONG); + + // We're emulating floating->ulong conversion on x64. The logic is the same as for + // uint on x86, except that we don't have conversion instructions that keep the + // results in SIMD registers, so we do the final result selection in scalar code. + // + // This creates the equivalent of the following C# code: + // long result = Sse.X64.ConvertToInt64WithTruncation(fixupVal); + // long negated = Sse.X64.ConvertToInt64WithTruncation(wrapVal); + // convertResult = (ulong)(result | (negated & (result >> 63))); + + GenTree* result = + comp->gtNewSimdHWIntrinsicNode(TYP_LONG, fixupVal, convertIntrinsic, srcBaseType, 16); + GenTree* negated = + comp->gtNewSimdHWIntrinsicNode(TYP_LONG, wrapVal, convertIntrinsic, srcBaseType, 16); + + castRange.InsertAtEnd(result); + castRange.InsertAtEnd(negated); + + // We need the result twice -- once for the mask bit and once for the blend. + LIR::Use resultUse; + LIR::Use::MakeDummyUse(castRange, result, &resultUse); + resultUse.ReplaceWithLclVar(comp); + result = resultUse.Def(); + + GenTree* sixtyThree = comp->gtNewIconNode(63); + GenTree* mask = comp->gtNewOperNode(GT_RSH, TYP_LONG, result, sixtyThree); + GenTree* andMask = comp->gtNewOperNode(GT_AND, TYP_LONG, mask, negated); + GenTree* resultClone = comp->gtClone(result); + + castRange.InsertAtEnd(sixtyThree); + castRange.InsertAtEnd(mask); + castRange.InsertAtEnd(andMask); + castRange.InsertAtEnd(resultClone); + + convertResult = comp->gtNewOperNode(GT_OR, TYP_LONG, andMask, resultClone); + } + } + } + + // Now we handle saturation of the result for positive overflow. + // + // This creates the equivalent of the following C# code: + // bool isOverflow = Sse.CompareScalarUnorderedGreaterThanOrEqual(srcVec, maxFloatingValue); + // return isOverflow ? maxIntegralValue : convertResult; + + NamedIntrinsic compareIntrinsic = NI_X86Base_CompareScalarUnorderedGreaterThanOrEqual; + + // These nodes were all created above but not used until now. + castRange.InsertAtEnd(maxFloatingValue); + castRange.InsertAtEnd(maxIntegralValue); + castRange.InsertAtEnd(convertResult); + + srcClone = comp->gtClone(srcVector); + GenTree* compareMax = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcClone, maxFloatingValue, + compareIntrinsic, srcBaseType, 16); + GenTree* select = comp->gtNewConditionalNode(GT_SELECT, compareMax, maxIntegralValue, convertResult, + genActualType(dstType)); + + castRange.InsertAtEnd(srcClone); + castRange.InsertAtEnd(compareMax); + castRange.InsertAtEnd(select); + + // The original cast becomes a no-op, because its input is already the correct type. + tree->AsCast()->CastOp() = select; } - BlockRange().Remove(tree); - return castOutput->gtNext; - } -#endif // TARGET_AMD64 - // Case of src is a small type and dst is a floating point type. - if (varTypeIsSmall(srcType) && varTypeIsFloating(castToType)) - { - // These conversions can never be overflow detecting ones. - noway_assert(!tree->gtOverflow()); - tmpType = TYP_INT; - } - // case of src is a floating point type and dst is a small type. - else if (varTypeIsFloating(srcType) && varTypeIsSmall(castToType)) - { - tmpType = TYP_INT; - } + LIR::ReadOnlyRange lowerRange(castRange.FirstNode(), castRange.LastNode()); + BlockRange().InsertBefore(tree, std::move(castRange)); - if (tmpType != TYP_UNDEF) - { - GenTree* tmp = comp->gtNewCastNode(tmpType, castOp, tree->IsUnsigned(), tmpType); - tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); + JITDUMP("LowerCast after:\n"); + DISPTREERANGE(BlockRange(), tree); - tree->gtFlags &= ~GTF_UNSIGNED; - tree->AsOp()->gtOp1 = tmp; - BlockRange().InsertAfter(castOp, tmp); - ContainCheckCast(tmp->AsCast()); + LowerRange(lowerRange); } +#endif // FEATURE_HW_INTRINSICS // Now determine if we have operands that should be contained. ContainCheckCast(tree->AsCast()); - return nullptr; } #ifdef FEATURE_HW_INTRINSICS @@ -1210,7 +1287,7 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn { GenTreeCC* cc = LowerNodeCC(node, condition); - assert((HWIntrinsicInfo::lookupNumArgs(newIntrinsicId) == 2) || (newIntrinsicId == NI_EVEX_KORTEST)); + assert((HWIntrinsicInfo::lookupNumArgs(newIntrinsicId) == 2) || (newIntrinsicId == NI_AVX512_KORTEST)); node->ChangeHWIntrinsicId(newIntrinsicId); node->gtType = TYP_VOID; node->ClearUnusedValue(); @@ -1220,10 +1297,8 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn switch (newIntrinsicId) { - case NI_SSE_COMISS: - case NI_SSE_UCOMISS: - case NI_SSE2_COMISD: - case NI_SSE2_UCOMISD: + case NI_X86Base_COMIS: + case NI_X86Base_UCOMIS: // In some cases we can generate better code if we swap the operands: // - If the condition is not one of the "preferred" floating point conditions we can swap // the operands and change the condition to avoid generating an extra JP/JNP branch. @@ -1254,8 +1329,8 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn break; } - case NI_EVEX_KORTEST: - case NI_EVEX_KTEST: + case NI_AVX512_KORTEST: + case NI_AVX512_KTEST: { // No containment support, so no reason to swap operands canSwapOperands = false; @@ -1353,7 +1428,7 @@ void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node) // GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { - if (node->TypeGet() == TYP_SIMD12) + if (node->TypeIs(TYP_SIMD12)) { // GT_HWINTRINSIC node requiring to produce TYP_SIMD12 in fact // produces a TYP_SIMD16 result @@ -1403,8 +1478,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op3 = nullptr; // We want to specially recognize this pattern as GT_NOT - bool isOperNot = (oper == GT_XOR) && op2->IsVectorAllBitsSet(); - bool isV512Supported = false; + bool isOperNot = (oper == GT_XOR) && op2->IsVectorAllBitsSet(); LIR::Use use; if (BlockRange().TryGetUse(node, &use)) @@ -1442,29 +1516,16 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) op2 = userIntrin->Op(1); } - NamedIntrinsic intrinsic = NI_Illegal; - - if (comp->IsBaselineSimdIsaSupported()) - { - intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2, - simdBaseType, simdSize, false); - } - else - { - // We need to ensure we optimize even if SSE2 is disabled - - assert(simdBaseType == TYP_FLOAT); - assert(simdSize <= 16); - - intrinsic = NI_SSE_AndNot; - } + NamedIntrinsic intrinsic = + GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2, simdBaseType, + simdSize, false); userIntrin->ResetHWIntrinsicId(intrinsic, comp, op1, op2); return nextNode; } - if (comp->compIsEvexOpportunisticallySupported(isV512Supported)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // For everything else we want to lower it to a standard TernaryLogic node GenTree* nextNode = node->gtNext; @@ -1628,29 +1689,19 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, A, controlByte); } - NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic; - - if (simdSize != 64) - { - ternaryLogicId = isV512Supported ? NI_AVX512F_VL_TernaryLogic : NI_AVX10v1_TernaryLogic; - } + NamedIntrinsic ternaryLogicId = NI_AVX512_TernaryLogic; GenTree* op4 = comp->gtNewIconNode(controlByte); BlockRange().InsertBefore(userIntrin, op4); userIntrin->ResetHWIntrinsicId(ternaryLogicId, comp, op1, op2, op3, op4); - if (varTypeIsSmall(simdBaseType)) - { - assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId)); - userIntrin->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType); - } return nextNode; } } } } - if (isOperNot && comp->compIsEvexOpportunisticallySupported(isV512Supported)) + if (isOperNot && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // Lowering this to TernaryLogic(zero, zero, op1, ~C) is smaller // and faster than emitting the pcmpeqd; pxor sequence. @@ -1661,7 +1712,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { GenTreeHWIntrinsic* opIntrin = op1->AsHWIntrinsic(); - if (HWIntrinsicInfo::IsTernaryLogic(opIntrin->GetHWIntrinsicId())) + if (opIntrin->GetHWIntrinsicId() == NI_AVX512_TernaryLogic) { GenTree* opControl = opIntrin->Op(4); @@ -1690,12 +1741,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } } - NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic; - - if (simdSize != 64) - { - ternaryLogicId = isV512Supported ? NI_AVX512F_VL_TernaryLogic : NI_AVX10v1_TernaryLogic; - } + NamedIntrinsic ternaryLogicId = NI_AVX512_TernaryLogic; op3 = op1; @@ -1709,11 +1755,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(node, control); node->ResetHWIntrinsicId(ternaryLogicId, comp, op1, op2, op3, control); - if (varTypeIsSmall(simdBaseType)) - { - assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId)); - node->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType); - } return LowerNode(node); } } @@ -1783,15 +1824,10 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector512_GetUpper: { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); var_types simdBaseType = node->GetSimdBaseType(); - intrinsicId = NI_AVX512F_ExtractVector256; - - if ((genTypeSize(simdBaseType) == 4) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - intrinsicId = NI_AVX512DQ_ExtractVector256; - } + intrinsicId = NI_AVX512_ExtractVector256; GenTree* op1 = node->Op(1); @@ -1800,10 +1836,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) LowerNode(op2); node->ResetHWIntrinsicId(intrinsicId, comp, op1, op2); - if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) && varTypeIsSmall(simdBaseType)) - { - node->NormalizeJitBaseTypeToInt(intrinsicId, simdBaseType); - } break; } @@ -1844,16 +1876,11 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector512_WithLower: case NI_Vector512_WithUpper: { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); var_types simdBaseType = node->GetSimdBaseType(); int index = (intrinsicId == NI_Vector512_WithUpper) ? 1 : 0; - intrinsicId = NI_AVX512F_InsertVector256; - - if ((genTypeSize(simdBaseType) == 4) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - intrinsicId = NI_AVX512DQ_InsertVector256; - } + intrinsicId = NI_AVX512_InsertVector256; GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); @@ -1863,10 +1890,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) LowerNode(op3); node->ResetHWIntrinsicId(intrinsicId, comp, op1, op2, op3); - if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) && varTypeIsSmall(simdBaseType)) - { - node->NormalizeJitBaseTypeToInt(intrinsicId, simdBaseType); - } break; } @@ -1884,11 +1907,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return LowerHWIntrinsicCmpOp(node, GT_NE); } - case NI_AVX512F_Fixup: - case NI_AVX512F_FixupScalar: - case NI_AVX512F_VL_Fixup: - case NI_AVX10v1_Fixup: - case NI_AVX10v1_FixupScalar: + case NI_AVX512_Fixup: + case NI_AVX512_FixupScalar: { if (!node->isRMWHWIntrinsic(comp)) { @@ -1911,8 +1931,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_EVEX_CompareEqualMask: - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareEqualMask: + case NI_AVX512_CompareNotEqualMask: { GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); @@ -1921,14 +1941,14 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic testIntrinsicId; - if (intrinsicId == NI_EVEX_CompareEqualMask) + if (intrinsicId == NI_AVX512_CompareEqualMask) { // We have `CompareEqual(x, Zero)` where a given element // equaling zero returns 1. We can therefore use `vptestnm(x, x)` // since it does `(x & x) == 0`, thus giving us `1` if zero and `0` // if non-zero - testIntrinsicId = NI_EVEX_PTESTNM; + testIntrinsicId = NI_AVX512_PTESTNM; } else { @@ -1937,8 +1957,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // since it does `(x & x) != 0`, thus giving us `1` if non-zero and `0` // if zero - assert(intrinsicId == NI_EVEX_CompareNotEqualMask); - testIntrinsicId = NI_EVEX_PTESTM; + assert(intrinsicId == NI_AVX512_CompareNotEqualMask); + testIntrinsicId = NI_AVX512_PTESTM; } node->Op(1) = op1; @@ -1958,7 +1978,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_EVEX_AndMask: + case NI_AVX512_AndMask: { // We want to recognize (~op1 & op2) and transform it // into Evex.AndNotMask(op1, op2) as well as (op1 & ~op2) @@ -1971,7 +1991,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); - if (op1->OperIsHWIntrinsic(NI_EVEX_NotMask)) + if (op1->OperIsHWIntrinsic(NI_AVX512_NotMask)) { GenTreeHWIntrinsic* opIntrin = op1->AsHWIntrinsic(); unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); @@ -1985,7 +2005,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } } - if (!transform && op2->OperIsHWIntrinsic(NI_EVEX_NotMask)) + if (!transform && op2->OperIsHWIntrinsic(NI_AVX512_NotMask)) { GenTreeHWIntrinsic* opIntrin = op2->AsHWIntrinsic(); unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); @@ -2003,27 +2023,27 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) if (transform) { - intrinsicId = NI_EVEX_AndNotMask; + intrinsicId = NI_AVX512_AndNotMask; node->ChangeHWIntrinsicId(intrinsicId, op1, op2); } break; } - case NI_EVEX_NotMask: + case NI_AVX512_NotMask: { // We want to recognize ~(op1 ^ op2) and transform it // into Evex.XnorMask(op1, op2) GenTree* op1 = node->Op(1); - if (op1->OperIsHWIntrinsic(NI_EVEX_XorMask)) + if (op1->OperIsHWIntrinsic(NI_AVX512_XorMask)) { GenTreeHWIntrinsic* opIntrin = op1->AsHWIntrinsic(); unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); if (genTypeSize(opIntrin->GetSimdBaseType()) == simdBaseTypeSize) { - intrinsicId = NI_EVEX_XnorMask; + intrinsicId = NI_AVX512_XnorMask; node->ResetHWIntrinsicId(intrinsicId, comp, opIntrin->Op(1), opIntrin->Op(2)); BlockRange().Remove(opIntrin); } @@ -2068,26 +2088,14 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE2_Insert: case NI_SSE41_Insert: - case NI_SSE41_X64_Insert: { assert(node->GetOperandCount() == 3); - var_types simdBaseType = node->GetSimdBaseType(); - - // Insert takes either a 32-bit register or a memory operand. - // In either case, only SimdBaseType bits are read and so - // widening or narrowing the operand may be unnecessary and it - // can just be used directly. - - node->Op(2) = TryRemoveCastIfPresent(simdBaseType, node->Op(2)); - - if (simdBaseType != TYP_FLOAT) + if (node->GetSimdBaseType() != TYP_FLOAT) { break; } - assert(intrinsicId == NI_SSE41_Insert); // We have Sse41.Insert in which case we can specially handle // a couple of interesting scenarios involving chains of Inserts @@ -2296,40 +2304,17 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE42_Crc32: - { - assert(node->GetOperandCount() == 2); - - // Crc32 takes either a bit register or a memory operand. - // In either case, only gtType bits are read and so widening - // or narrowing the operand may be unnecessary and it can - // just be used directly. - - node->Op(2) = TryRemoveCastIfPresent(node->TypeGet(), node->Op(2)); - break; - } - - case NI_SSE2_CompareGreaterThan: + case NI_X86Base_CompareGreaterThan: + case NI_X86Base_CompareGreaterThanOrEqual: + case NI_X86Base_CompareNotGreaterThan: + case NI_X86Base_CompareNotGreaterThanOrEqual: { - if (node->GetSimdBaseType() != TYP_DOUBLE) + if (!varTypeIsFloating(node->GetSimdBaseType())) { assert(varTypeIsIntegral(node->GetSimdBaseType())); break; } - FALLTHROUGH; - } - - case NI_SSE_CompareGreaterThan: - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE_CompareNotGreaterThan: - case NI_SSE_CompareNotGreaterThanOrEqual: - case NI_SSE2_CompareGreaterThanOrEqual: - case NI_SSE2_CompareNotGreaterThan: - case NI_SSE2_CompareNotGreaterThanOrEqual: - { - assert((node->GetSimdBaseType() == TYP_FLOAT) || (node->GetSimdBaseType() == TYP_DOUBLE)); - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) { break; @@ -2340,59 +2325,35 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_SSE_CompareGreaterThan: + case NI_X86Base_CompareGreaterThan: { - newIntrinsicId = NI_SSE_CompareLessThan; + newIntrinsicId = NI_X86Base_CompareLessThan; break; } - case NI_SSE_CompareGreaterThanOrEqual: + case NI_X86Base_CompareGreaterThanOrEqual: { - newIntrinsicId = NI_SSE_CompareLessThanOrEqual; + newIntrinsicId = NI_X86Base_CompareLessThanOrEqual; break; } - case NI_SSE_CompareNotGreaterThan: + case NI_X86Base_CompareNotGreaterThan: { - newIntrinsicId = NI_SSE_CompareNotLessThan; + newIntrinsicId = NI_X86Base_CompareNotLessThan; break; } - case NI_SSE_CompareNotGreaterThanOrEqual: + case NI_X86Base_CompareNotGreaterThanOrEqual: { - newIntrinsicId = NI_SSE_CompareNotLessThanOrEqual; + newIntrinsicId = NI_X86Base_CompareNotLessThanOrEqual; break; } - case NI_SSE2_CompareGreaterThan: - { - newIntrinsicId = NI_SSE2_CompareLessThan; - break; - } - - case NI_SSE2_CompareGreaterThanOrEqual: + default: { - newIntrinsicId = NI_SSE2_CompareLessThanOrEqual; - break; + unreached(); } - - case NI_SSE2_CompareNotGreaterThan: - { - newIntrinsicId = NI_SSE2_CompareNotLessThan; - break; - } - - case NI_SSE2_CompareNotGreaterThanOrEqual: - { - newIntrinsicId = NI_SSE2_CompareNotLessThanOrEqual; - break; - } - - default: - { - unreached(); - } - } + } assert(newIntrinsicId != NI_Illegal); assert(intrinsicId != newIntrinsicId); @@ -2402,11 +2363,11 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE2_CompareLessThan: + case NI_X86Base_CompareLessThan: case NI_SSE42_CompareLessThan: case NI_AVX2_CompareLessThan: { - if (node->GetSimdBaseType() == TYP_DOUBLE) + if (varTypeIsFloating(node->GetSimdBaseType())) { break; } @@ -2417,9 +2378,9 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_SSE2_CompareLessThan: + case NI_X86Base_CompareLessThan: { - newIntrinsicId = NI_SSE2_CompareGreaterThan; + newIntrinsicId = NI_X86Base_CompareGreaterThan; break; } @@ -2449,80 +2410,42 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE_CompareScalarOrderedEqual: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FEQ); - break; - case NI_SSE_CompareScalarOrderedNotEqual: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FNEU); - break; - case NI_SSE_CompareScalarOrderedLessThan: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FLT); - break; - case NI_SSE_CompareScalarOrderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FLE); - break; - case NI_SSE_CompareScalarOrderedGreaterThan: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FGT); - break; - case NI_SSE_CompareScalarOrderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FGE); - break; - - case NI_SSE_CompareScalarUnorderedEqual: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FEQ); + case NI_X86Base_CompareScalarOrderedEqual: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FEQ); break; - case NI_SSE_CompareScalarUnorderedNotEqual: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FNEU); + case NI_X86Base_CompareScalarOrderedNotEqual: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FNEU); break; - case NI_SSE_CompareScalarUnorderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FLE); + case NI_X86Base_CompareScalarOrderedLessThan: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FLT); break; - case NI_SSE_CompareScalarUnorderedLessThan: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FLT); + case NI_X86Base_CompareScalarOrderedLessThanOrEqual: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FLE); break; - case NI_SSE_CompareScalarUnorderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FGE); + case NI_X86Base_CompareScalarOrderedGreaterThan: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FGT); break; - case NI_SSE_CompareScalarUnorderedGreaterThan: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FGT); + case NI_X86Base_CompareScalarOrderedGreaterThanOrEqual: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FGE); break; - case NI_SSE2_CompareScalarOrderedEqual: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FEQ); + case NI_X86Base_CompareScalarUnorderedEqual: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FEQ); break; - case NI_SSE2_CompareScalarOrderedNotEqual: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FNEU); + case NI_X86Base_CompareScalarUnorderedNotEqual: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FNEU); break; - case NI_SSE2_CompareScalarOrderedLessThan: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FLT); + case NI_X86Base_CompareScalarUnorderedLessThanOrEqual: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FLE); break; - case NI_SSE2_CompareScalarOrderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FLE); + case NI_X86Base_CompareScalarUnorderedLessThan: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FLT); break; - case NI_SSE2_CompareScalarOrderedGreaterThan: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FGT); + case NI_X86Base_CompareScalarUnorderedGreaterThanOrEqual: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FGE); break; - case NI_SSE2_CompareScalarOrderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FGE); - break; - - case NI_SSE2_CompareScalarUnorderedEqual: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FEQ); - break; - case NI_SSE2_CompareScalarUnorderedNotEqual: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FNEU); - break; - case NI_SSE2_CompareScalarUnorderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FLE); - break; - case NI_SSE2_CompareScalarUnorderedLessThan: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FLT); - break; - case NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FGE); - break; - case NI_SSE2_CompareScalarUnorderedGreaterThan: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FGT); + case NI_X86Base_CompareScalarUnorderedGreaterThan: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FGT); break; case NI_SSE41_TestC: @@ -2549,9 +2472,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) LowerFusedMultiplyAdd(node); break; - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX10v1_TernaryLogic: + case NI_AVX512_TernaryLogic: { return LowerHWIntrinsicTernaryLogic(node); } @@ -2780,7 +2701,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm // to detect and account for those differences is not likely to be worth the tradeoff. // // TODO-XARCH-AVX512: Given the above don't emit the PTEST path above when AVX-512 is available - // This will require exposing `NI_AVX512F_TestZ` so that we can keep codegen optimized to just + // This will require exposing `NI_AVX512_TestZ` so that we can keep codegen optimized to just // `vptestm` followed by `kortest`. This will be one instruction more than just `vptest` but // it has the advantages detailed above. // @@ -2790,7 +2711,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm GenTree* maskNode = node; GenTree* nextNode = node->gtNext; - NamedIntrinsic maskIntrinsicId = NI_EVEX_CompareEqualMask; + NamedIntrinsic maskIntrinsicId = NI_AVX512_CompareEqualMask; uint32_t count = simdSize / genTypeSize(maskBaseType); // KORTEST does a bitwise or on the result and sets ZF if it is zero and CF if it is all @@ -2892,75 +2813,75 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm switch (maskIntrinsicId) { - case NI_EVEX_CompareEqualMask: + case NI_AVX512_CompareEqualMask: { - maskIntrinsicId = NI_EVEX_CompareNotEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotEqualMask; break; } - case NI_EVEX_CompareGreaterThanMask: + case NI_AVX512_CompareGreaterThanMask: { - maskIntrinsicId = NI_EVEX_CompareNotGreaterThanMask; + maskIntrinsicId = NI_AVX512_CompareNotGreaterThanMask; break; } - case NI_EVEX_CompareGreaterThanOrEqualMask: + case NI_AVX512_CompareGreaterThanOrEqualMask: { - maskIntrinsicId = NI_EVEX_CompareNotGreaterThanOrEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotGreaterThanOrEqualMask; break; } - case NI_EVEX_CompareLessThanMask: + case NI_AVX512_CompareLessThanMask: { - maskIntrinsicId = NI_EVEX_CompareNotLessThanMask; + maskIntrinsicId = NI_AVX512_CompareNotLessThanMask; break; } - case NI_EVEX_CompareLessThanOrEqualMask: + case NI_AVX512_CompareLessThanOrEqualMask: { - maskIntrinsicId = NI_EVEX_CompareNotLessThanOrEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotLessThanOrEqualMask; break; } - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareNotEqualMask: { - maskIntrinsicId = NI_EVEX_CompareEqualMask; + maskIntrinsicId = NI_AVX512_CompareEqualMask; break; } - case NI_EVEX_CompareNotGreaterThanMask: + case NI_AVX512_CompareNotGreaterThanMask: { - maskIntrinsicId = NI_EVEX_CompareGreaterThanMask; + maskIntrinsicId = NI_AVX512_CompareGreaterThanMask; break; } - case NI_EVEX_CompareNotGreaterThanOrEqualMask: + case NI_AVX512_CompareNotGreaterThanOrEqualMask: { - maskIntrinsicId = NI_EVEX_CompareGreaterThanOrEqualMask; + maskIntrinsicId = NI_AVX512_CompareGreaterThanOrEqualMask; break; } - case NI_EVEX_CompareNotLessThanMask: + case NI_AVX512_CompareNotLessThanMask: { - maskIntrinsicId = NI_EVEX_CompareLessThanMask; + maskIntrinsicId = NI_AVX512_CompareLessThanMask; break; } - case NI_EVEX_CompareNotLessThanOrEqualMask: + case NI_AVX512_CompareNotLessThanOrEqualMask: { - maskIntrinsicId = NI_EVEX_CompareLessThanOrEqualMask; + maskIntrinsicId = NI_AVX512_CompareLessThanOrEqualMask; break; } - case NI_EVEX_CompareOrderedMask: + case NI_AVX512_CompareOrderedMask: { - maskIntrinsicId = NI_EVEX_CompareUnorderedMask; + maskIntrinsicId = NI_AVX512_CompareUnorderedMask; break; } - case NI_EVEX_CompareUnorderedMask: + case NI_AVX512_CompareUnorderedMask: { - maskIntrinsicId = NI_EVEX_CompareOrderedMask; + maskIntrinsicId = NI_AVX512_CompareOrderedMask; break; } @@ -2975,15 +2896,16 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm GenTree* cnsNode; - maskNode = comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, NI_EVEX_NotMask, + maskNode = comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, NI_AVX512_NotMask, maskBaseJitType, simdSize); BlockRange().InsertBefore(node, maskNode); cnsNode = comp->gtNewIconNode(8 - count); BlockRange().InsertAfter(maskNode, cnsNode); - maskNode = comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, cnsNode, - NI_EVEX_ShiftLeftMask, maskBaseJitType, simdSize); + maskNode = + comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, cnsNode, NI_AVX512_ShiftLeftMask, + maskBaseJitType, simdSize); BlockRange().InsertAfter(cnsNode, maskNode); LowerNode(maskNode); @@ -2991,11 +2913,11 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm BlockRange().InsertAfter(maskNode, cnsNode); maskNode = - comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, cnsNode, NI_EVEX_ShiftRightMask, + comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, cnsNode, NI_AVX512_ShiftRightMask, maskBaseJitType, simdSize); BlockRange().InsertAfter(cnsNode, maskNode); - maskIntrinsicId = NI_EVEX_ShiftRightMask; + maskIntrinsicId = NI_AVX512_ShiftRightMask; break; } } @@ -3042,7 +2964,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm if (!varTypeIsFloating(simdBaseType) && (op2 != nullptr) && op2->IsVectorZero()) { - NamedIntrinsic testIntrinsicId = NI_EVEX_PTESTM; + NamedIntrinsic testIntrinsicId = NI_AVX512_PTESTM; bool skipReplaceOperands = false; if (op1->OperIsHWIntrinsic()) @@ -3073,7 +2995,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm // will then set `ZF: 1` if all elements were 0 and `ZF: 0` if any elements were // non-zero. The default GenCondition then remain correct - assert(testIntrinsicId == NI_EVEX_PTESTM); + assert(testIntrinsicId == NI_AVX512_PTESTM); GenTree* nestedOp1 = op1Intrinsic->Op(1); GenTree* nestedOp2 = op1Intrinsic->Op(2); @@ -3086,7 +3008,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm if ((nestedIntrinId == NI_SSE3_MoveAndDuplicate) || (nestedIntrinId == NI_AVX2_BroadcastScalarToVector128) || (nestedIntrinId == NI_AVX2_BroadcastScalarToVector256) || - (nestedIntrinId == NI_AVX512F_BroadcastScalarToVector512)) + (nestedIntrinId == NI_AVX512_BroadcastScalarToVector512)) { // We need to rewrite the embedded broadcast back to a regular constant // so that the subsequent containment check for ptestm can determine @@ -3106,6 +3028,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm comp->gtNewSimdCreateBroadcastNode(simdType, broadcastOp, op1Intrinsic->GetSimdBaseJitType(), simdSize); + assert(vecCns->IsCnsVec()); BlockRange().InsertAfter(broadcastOp, vecCns); nestedOp2 = vecCns; @@ -3123,10 +3046,18 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm if (varTypeIsSmall(simdBaseType)) { // Fixup the base type so embedded broadcast and the mask size checks still work - node->NormalizeJitBaseTypeToInt(testIntrinsicId, simdBaseType); - simdBaseJitType = node->GetSimdBaseJitType(); - simdBaseType = node->GetSimdBaseType(); + if (varTypeIsUnsigned(simdBaseType)) + { + simdBaseJitType = CORINFO_TYPE_UINT; + simdBaseType = TYP_UINT; + } + else + { + simdBaseJitType = CORINFO_TYPE_INT; + simdBaseType = TYP_INT; + } + node->SetSimdBaseJitType(simdBaseJitType); maskBaseJitType = simdBaseJitType; maskBaseType = simdBaseType; @@ -3168,7 +3099,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm maskNode = node; } - if (maskNode->gtType != TYP_MASK) + if (!maskNode->TypeIs(TYP_MASK)) { assert(node == maskNode); @@ -3188,7 +3119,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm if (count < 8) { assert((count == 1) || (count == 2) || (count == 4)); - maskIntrinsicId = NI_EVEX_CompareNotEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotEqualMask; } else { @@ -3200,7 +3131,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm } else { - maskIntrinsicId = NI_EVEX_CompareNotEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotEqualMask; } } @@ -3216,11 +3147,11 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm { GenTreeHWIntrinsic* cc; - cc = comp->gtNewSimdHWIntrinsicNode(simdType, maskNode, NI_EVEX_KORTEST, maskBaseJitType, simdSize); + cc = comp->gtNewSimdHWIntrinsicNode(simdType, maskNode, NI_AVX512_KORTEST, maskBaseJitType, simdSize); BlockRange().InsertBefore(nextNode, cc); use.ReplaceWith(cc); - LowerHWIntrinsicCC(cc, NI_EVEX_KORTEST, cmpCnd); + LowerHWIntrinsicCC(cc, NI_AVX512_KORTEST, cmpCnd); nextNode = cc->gtNext; } @@ -3257,8 +3188,8 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm { assert(simdSize == 16); - cmpIntrinsic = NI_SSE2_CompareEqual; - mskIntrinsic = NI_SSE2_MoveMask; + cmpIntrinsic = NI_X86Base_CompareEqual; + mskIntrinsic = NI_X86Base_MoveMask; mskConstant = 0xFFFF; } break; @@ -3286,10 +3217,10 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm } else { - cmpIntrinsic = NI_SSE2_CompareEqual; + cmpIntrinsic = NI_X86Base_CompareEqual; cmpJitType = CORINFO_TYPE_UINT; } - mskIntrinsic = NI_SSE2_MoveMask; + mskIntrinsic = NI_X86Base_MoveMask; mskConstant = 0xFFFF; } break; @@ -3308,8 +3239,8 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm } else { - cmpIntrinsic = NI_SSE_CompareEqual; - mskIntrinsic = NI_SSE_MoveMask; + cmpIntrinsic = NI_X86Base_CompareEqual; + mskIntrinsic = NI_X86Base_MoveMask; if (simdSize == 16) { @@ -3343,8 +3274,8 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm { assert(simdSize == 16); - cmpIntrinsic = NI_SSE2_CompareEqual; - mskIntrinsic = NI_SSE2_MoveMask; + cmpIntrinsic = NI_X86Base_CompareEqual; + mskIntrinsic = NI_X86Base_MoveMask; mskConstant = 0x3; } break; @@ -3458,10 +3389,41 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(node, maskNode); } - assert(maskNode->TypeGet() == TYP_MASK); - blendVariableId = NI_EVEX_BlendVariableMask; + assert(maskNode->TypeIs(TYP_MASK)); + blendVariableId = NI_AVX512_BlendVariableMask; op1 = maskNode; } + else if (op2->IsVectorZero() || op3->IsVectorZero()) + { + // If either of the value operands is const zero, we can optimize down to AND or AND_NOT. + GenTree* binOp = nullptr; + + if (op3->IsVectorZero()) + { + binOp = comp->gtNewSimdBinOpNode(GT_AND, simdType, op1, op2, simdBaseJitType, simdSize); + BlockRange().Remove(op3); + } + else + { + binOp = comp->gtNewSimdBinOpNode(GT_AND_NOT, simdType, op3, op1, simdBaseJitType, simdSize); + BlockRange().Remove(op2); + } + + BlockRange().InsertAfter(node, binOp); + + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(binOp); + } + else + { + binOp->SetUnusedValue(); + } + + BlockRange().Remove(node); + return LowerNode(binOp); + } else if (simdSize == 32) { // For Vector256 (simdSize == 32), BlendVariable for floats/doubles @@ -3493,26 +3455,15 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } } - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // We can't use the mask, but we can emit a ternary logic node - NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic; - - if (simdSize != 64) - { - ternaryLogicId = !isV512Supported ? NI_AVX10v1_TernaryLogic : NI_AVX512F_VL_TernaryLogic; - } + NamedIntrinsic ternaryLogicId = NI_AVX512_TernaryLogic; GenTree* control = comp->gtNewIconNode(0xCA); // (B & A) | (C & ~A) BlockRange().InsertBefore(node, control); node->ResetHWIntrinsicId(ternaryLogicId, comp, op1, op2, op3, control); - if (varTypeIsSmall(simdBaseType)) - { - assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId)); - node->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType); - } return LowerNode(node); } @@ -3712,109 +3663,97 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) { case NI_AVX_Compare: { - cndId = NI_EVEX_CompareMask; + cndId = NI_AVX512_CompareMask; break; } - case NI_SSE_CompareEqual: - case NI_SSE2_CompareEqual: + case NI_X86Base_CompareEqual: case NI_SSE41_CompareEqual: case NI_AVX_CompareEqual: case NI_AVX2_CompareEqual: { - cndId = NI_EVEX_CompareEqualMask; + cndId = NI_AVX512_CompareEqualMask; break; } - case NI_SSE_CompareGreaterThan: - case NI_SSE2_CompareGreaterThan: + case NI_X86Base_CompareGreaterThan: case NI_SSE42_CompareGreaterThan: case NI_AVX_CompareGreaterThan: case NI_AVX2_CompareGreaterThan: { - cndId = NI_EVEX_CompareGreaterThanMask; + cndId = NI_AVX512_CompareGreaterThanMask; break; } - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE2_CompareGreaterThanOrEqual: + case NI_X86Base_CompareGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: { - cndId = NI_EVEX_CompareGreaterThanOrEqualMask; + cndId = NI_AVX512_CompareGreaterThanOrEqualMask; break; } - case NI_SSE_CompareLessThan: - case NI_SSE2_CompareLessThan: + case NI_X86Base_CompareLessThan: case NI_SSE42_CompareLessThan: case NI_AVX_CompareLessThan: case NI_AVX2_CompareLessThan: { - cndId = NI_EVEX_CompareLessThanMask; + cndId = NI_AVX512_CompareLessThanMask; break; } - case NI_SSE_CompareLessThanOrEqual: - case NI_SSE2_CompareLessThanOrEqual: + case NI_X86Base_CompareLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: { - cndId = NI_EVEX_CompareLessThanOrEqualMask; + cndId = NI_AVX512_CompareLessThanOrEqualMask; break; } - case NI_SSE_CompareNotEqual: - case NI_SSE2_CompareNotEqual: + case NI_X86Base_CompareNotEqual: case NI_AVX_CompareNotEqual: { - cndId = NI_EVEX_CompareNotEqualMask; + cndId = NI_AVX512_CompareNotEqualMask; break; } - case NI_SSE_CompareNotGreaterThan: - case NI_SSE2_CompareNotGreaterThan: + case NI_X86Base_CompareNotGreaterThan: case NI_AVX_CompareNotGreaterThan: { - cndId = NI_EVEX_CompareGreaterThanMask; + cndId = NI_AVX512_CompareGreaterThanMask; break; } - case NI_SSE_CompareNotGreaterThanOrEqual: - case NI_SSE2_CompareNotGreaterThanOrEqual: + case NI_X86Base_CompareNotGreaterThanOrEqual: case NI_AVX_CompareNotGreaterThanOrEqual: { - cndId = NI_EVEX_CompareNotGreaterThanOrEqualMask; + cndId = NI_AVX512_CompareNotGreaterThanOrEqualMask; break; } - case NI_SSE_CompareNotLessThan: - case NI_SSE2_CompareNotLessThan: + case NI_X86Base_CompareNotLessThan: case NI_AVX_CompareNotLessThan: { - cndId = NI_EVEX_CompareNotLessThanMask; + cndId = NI_AVX512_CompareNotLessThanMask; break; } - case NI_SSE_CompareNotLessThanOrEqual: - case NI_SSE2_CompareNotLessThanOrEqual: + case NI_X86Base_CompareNotLessThanOrEqual: case NI_AVX_CompareNotLessThanOrEqual: { - cndId = NI_EVEX_CompareNotLessThanOrEqualMask; + cndId = NI_AVX512_CompareNotLessThanOrEqualMask; break; } - case NI_SSE_CompareOrdered: - case NI_SSE2_CompareOrdered: + case NI_X86Base_CompareOrdered: case NI_AVX_CompareOrdered: { - cndId = NI_EVEX_CompareOrderedMask; + cndId = NI_AVX512_CompareOrderedMask; break; } - case NI_SSE_CompareUnordered: - case NI_SSE2_CompareUnordered: + case NI_X86Base_CompareUnordered: case NI_AVX_CompareUnordered: { - cndId = NI_EVEX_CompareUnorderedMask; + cndId = NI_AVX512_CompareUnorderedMask; break; } @@ -3836,7 +3775,20 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) } assert(varTypeIsMask(condition)); - node->ResetHWIntrinsicId(NI_EVEX_BlendVariableMask, comp, selectFalse, selectTrue, condition); + + // The TernaryLogic node normalizes small SIMD base types on import. To optimize + // to BlendVariableMask, we need to "un-normalize". We no longer have the original + // base type, so we use the mask base type instead. + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + + if (!condition->OperIsHWIntrinsic()) + { + break; + } + + node->SetSimdBaseJitType(condition->AsHWIntrinsic()->GetSimdBaseJitType()); + + node->ResetHWIntrinsicId(NI_AVX512_BlendVariableMask, comp, selectFalse, selectTrue, condition); BlockRange().Remove(op4); break; } @@ -4008,6 +3960,11 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) { replacementNode->SetUnusedValue(); } + + GenTree* next = node->gtNext; + BlockRange().Remove(op4); + BlockRange().Remove(node); + return next; } break; } @@ -4056,10 +4013,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* tmp2 = nullptr; GenTree* tmp3 = nullptr; - bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal); - bool isCreateScalar = (intrinsicId == NI_Vector128_CreateScalar) || (intrinsicId == NI_Vector256_CreateScalar) || - (intrinsicId == NI_Vector512_CreateScalar); - size_t argCnt = node->GetOperandCount(); + bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal); + bool isCreateScalar = HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId); + size_t argCnt = node->GetOperandCount(); if (isConstant) { @@ -4070,8 +4026,8 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) #if !defined(TARGET_64BIT) if (arg->OperIsLong()) { - BlockRange().Remove(arg->AsOp()->gtGetOp1()); - BlockRange().Remove(arg->AsOp()->gtGetOp2()); + BlockRange().Remove(arg->gtGetOp1()); + BlockRange().Remove(arg->gtGetOp2()); } #endif // !TARGET_64BIT BlockRange().Remove(arg); @@ -4093,171 +4049,67 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().Remove(node); - return LowerNode(vecCon); + return vecCon->gtNext; } else if (argCnt == 1) { if (isCreateScalar) { - node->gtType = TYP_SIMD16; - node->SetSimdSize(16); - switch (simdBaseType) { case TYP_BYTE: case TYP_UBYTE: - { - // Types need to be explicitly zero-extended to ensure upper-bits are zero - // - // We need to explicitly use TYP_UBYTE since unsigned is ignored for small types - // Explicitly handle both BYTE and UBYTE to account for reinterpret casts and the like - // - // The from type is INT since that is the input type tracked by IR, where-as the target - // type needs to be UBYTE so it implicitly zero-extends back to TYP_INT - - tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_UBYTE); - BlockRange().InsertAfter(op1, tmp1); - LowerNode(tmp1); - - node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1); - node->SetSimdBaseJitType(CORINFO_TYPE_INT); - break; - } - case TYP_SHORT: case TYP_USHORT: { - // Types need to be explicitly zero-extended to ensure upper-bits are zero + // The smallest scalar SIMD load that zeroes upper elements is 32 bits, so for CreateScalar, + // we must ensure that the upper bits of that 32-bit value are zero if the base type is small. // - // We need to explicitly use TYP_USHORT since unsigned is ignored for small types - // Explicitly handle both SHORT and USHORT to account for reinterpret casts and the like + // The most likely case is that op1 is a cast from int/long to the base type: + // * CAST int <- short <- int/long + // If the base type is signed, that cast will be sign-extending, but we need zero extension, + // so we may be able to simply retype the cast to the unsigned type of the same size. + // This is valid only if the cast is not checking overflow and is not containing a load. // - // The from type is INT since that is the input type tracked by IR, where-as the target - // type needs to be USHORT so it implicitly zero-extends back to TYP_INT - - tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_USHORT); - BlockRange().InsertAfter(op1, tmp1); - LowerNode(tmp1); + // It's also possible we have a memory load of the base type: + // * IND short + // We can likewise change the type of the indir to force zero extension on load. + // + // If we can't safely retype one of the above patterns and don't already have a cast to the + // correct unsigned type, we will insert our own cast. - node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1); node->SetSimdBaseJitType(CORINFO_TYPE_INT); - break; - } - - case TYP_INT: - { - node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32); - break; - } - - case TYP_UINT: - { - node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128UInt32); - break; - } - -#if defined(TARGET_AMD64) - case TYP_LONG: - { - node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128Int64); - break; - } - case TYP_ULONG: - { - node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128UInt64); - break; - } -#endif // TARGET_AMD64 - - case TYP_FLOAT: - { - tmp1 = comp->gtNewZeroConNode(simdType); - BlockRange().InsertBefore(op1, tmp1); - LowerNode(tmp1); + var_types unsignedType = varTypeToUnsigned(simdBaseType); - if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + if (op1->OperIs(GT_CAST) && !op1->gtOverflow() && !op1->AsCast()->CastOp()->isContained() && + (genTypeSize(op1->CastToType()) == genTypeSize(simdBaseType))) { - // Sse41.Insert has: - // * Bits 0-3: zmask - // * Bits 4-5: count_d - // * Bits 6-7: count_s (register form only) - // - // We want zmask 0b1110 (0xE) to zero elements 1/2/3 - // We want count_d 0b00 (0x0) to insert the value to element 0 - // We want count_s 0b00 (0x0) as we're just taking element 0 of the source - - idx = comp->gtNewIconNode(0x0E); - BlockRange().InsertAfter(op1, idx); - LowerNode(idx); - - node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, op1, idx); + op1->AsCast()->gtCastType = unsignedType; } - else + else if (op1->OperIs(GT_IND, GT_LCL_FLD) && (genTypeSize(op1) == genTypeSize(simdBaseType))) { - node->ResetHWIntrinsicId(NI_SSE_MoveScalar, comp, tmp1, op1); + op1->gtType = unsignedType; + } + else if (!op1->OperIs(GT_CAST) || (op1->AsCast()->CastToType() != unsignedType)) + { + tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* fromUnsigned */ false, unsignedType); + node->Op(1) = tmp1; + BlockRange().InsertAfter(op1, tmp1); + LowerNode(tmp1); } - break; - } - - case TYP_DOUBLE: - { - tmp1 = comp->gtNewZeroConNode(simdType); - BlockRange().InsertBefore(op1, tmp1); - LowerNode(tmp1); - node->ResetHWIntrinsicId(NI_SSE2_MoveScalar, comp, tmp1, op1); break; } default: { - unreached(); - } - } - - if (simdSize > 16) - { - assert((simdSize == 32) || (simdSize == 64)); - - // We're creating a Vector256/512 scalar so we need to treat the original op as Vector128, - // we need to unsafely extend up to Vector256/512 (which is actually safe since the 128-bit - // op will zero extend up to 256/512-bits), and then we need to replace the original use - // with the new TYP_SIMD32/64 node. - - node->ChangeType(TYP_SIMD16); - node->SetSimdSize(16); - LowerNode(node); - - LIR::Use use; - bool foundUse = BlockRange().TryGetUse(node, &use); - - tmp2 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node, NI_Vector128_ToVector256Unsafe, simdBaseJitType, - 16); - BlockRange().InsertAfter(node, tmp2); - - if (simdSize == 64) - { - tmp3 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD64, tmp2, NI_Vector256_ToVector512Unsafe, - simdBaseJitType, 32); - BlockRange().InsertAfter(tmp2, tmp3); - tmp2 = tmp3; - } - - if (foundUse) - { - use.ReplaceWith(tmp2); - } - else - { - node->ClearUnusedValue(); - tmp2->SetUnusedValue(); + break; } - - node = tmp2->AsHWIntrinsic(); } - return LowerNode(node); + ContainCheckHWIntrinsic(node); + return node->gtNext; } // We have the following (where simd is simd16, simd32 or simd64): @@ -4266,7 +4118,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if (intrinsicId == NI_Vector512_Create) { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); // We will be constructing the following parts: // /--* op1 T // tmp1 = * HWINTRINSIC simd32 T CreateScalarUnsafe @@ -4279,34 +4131,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, simdBaseJitType, 16); LowerNode(tmp1); - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - node->ResetHWIntrinsicId(NI_AVX512BW_BroadcastScalarToVector512, tmp1); - break; - } - - case TYP_INT: - case TYP_UINT: - case TYP_FLOAT: - case TYP_DOUBLE: - case TYP_LONG: - case TYP_ULONG: - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - node->ResetHWIntrinsicId(NI_AVX512F_BroadcastScalarToVector512, tmp1); - break; - } - default: - { - unreached(); - } - } + node->ResetHWIntrinsicId(NI_AVX512_BroadcastScalarToVector512, tmp1); return LowerNode(node); } @@ -4436,8 +4261,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4463,7 +4286,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_X86Base_UnpackLow, CORINFO_TYPE_UBYTE, simdSize); BlockRange().InsertAfter(tmp2, tmp1); LowerNode(tmp1); @@ -4491,8 +4314,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse2.UnpackLow(tmp1, tmp2); // ... - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4501,7 +4322,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_USHORT, + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_X86Base_UnpackLow, CORINFO_TYPE_USHORT, simdSize); BlockRange().InsertAfter(tmp2, tmp1); LowerNode(tmp1); @@ -4523,50 +4344,14 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // ... // return Sse2.Shuffle(tmp1, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp1, idx); - node->ResetHWIntrinsicId(NI_SSE2_Shuffle, tmp1, idx); + node->ResetHWIntrinsicId(NI_X86Base_Shuffle, tmp1, idx); node->SetSimdBaseJitType(CORINFO_TYPE_UINT); break; } -#if defined(TARGET_AMD64) - case TYP_LONG: - case TYP_ULONG: - { - // We will be constructing the following parts: - // ... - // /--* tmp1 simd16 - // * STORE_LCL_VAR simd16 - // tmp1 = LCL_VAR simd16 - // tmp2 = LCL_VAR simd16 - // /--* tmp1 simd16 - // +--* tmp2 simd16 - // node = * HWINTRINSIC simd16 ulong UnpackLow - - // This is roughly the following managed code: - // ... - // var tmp2 = tmp1; - // return Sse2.UnpackLow(tmp1, tmp2); - - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - - node->Op(1) = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); - ReplaceWithLclVar(tmp1Use); - tmp1 = node->Op(1); - - tmp2 = comp->gtClone(tmp1); - BlockRange().InsertAfter(tmp1, tmp2); - - node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); - break; - } -#endif // TARGET_AMD64 - case TYP_FLOAT: { if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -4606,8 +4391,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = tmp1; // return Sse.Shuffle(tmp1, tmp2, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4619,13 +4402,16 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp2, idx); - node->ResetHWIntrinsicId(NI_SSE_Shuffle, comp, tmp1, tmp2, idx); + node->ResetHWIntrinsicId(NI_X86Base_Shuffle, comp, tmp1, tmp2, idx); break; } + case TYP_LONG: + case TYP_ULONG: case TYP_DOUBLE: { - if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) + if ((IsContainableMemoryOp(op1) || simdBaseType == TYP_DOUBLE) && + comp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) { // We will be constructing the following parts: // ... @@ -4637,11 +4423,10 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // return Sse3.MoveAndDuplicate(tmp1); node->ChangeHWIntrinsicId(NI_SSE3_MoveAndDuplicate, tmp1); + node->SetSimdBaseJitType(CORINFO_TYPE_DOUBLE); break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4650,12 +4435,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = LCL_VAR simd16 // /--* tmp1 simd16 // +--* tmp2 simd16 - // node = * HWINTRINSIC simd16 float MoveLowToHigh + // node = * HWINTRINSIC simd16 T UnpackLow // This is roughly the following managed code: // ... // var tmp2 = tmp1; - // return Sse.MoveLowToHigh(tmp1, tmp2); + // return Sse2.UnpackLow(tmp1, tmp2); node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); @@ -4665,8 +4450,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); - node->SetSimdBaseJitType(CORINFO_TYPE_FLOAT); + node->ResetHWIntrinsicId(NI_X86Base_UnpackLow, tmp1, tmp2); break; } @@ -4679,19 +4463,16 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) return LowerNode(node); } - GenTree* op2 = node->Op(2); - - // TODO-XArch-AVX512 : Merge the NI_Vector512_Create and NI_Vector256_Create paths below. - // We have the following (where simd is simd16 or simd32): - // /--* op1 T - // +--* ... T - // +--* opN T - // node = * HWINTRINSIC simd T Create - if (intrinsicId == NI_Vector512_Create) + if (intrinsicId == NI_Vector512_Create || intrinsicId == NI_Vector256_Create) { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(argCnt >= (simdSize / genTypeSize(TYP_LONG))); + assert(((simdSize == 64) && comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)) || + ((simdSize == 32) && comp->compIsaSupportedDebugOnly(InstructionSet_AVX))); - // We will be constructing the following parts: + // The larger vector implementation is simplified by splitting the + // job in half and delegating to the next smaller vector size. + // + // For example, for Vector512, we construct the following: // /--* op1 T // +--* ... T // lo = * HWINTRINSIC simd32 T Create @@ -4721,86 +4502,35 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // lo = Vector256.Create(op1, ..., op16); // hi = Vector256.Create(op17, ..., op32); + var_types halfType = comp->getSIMDTypeForSize(simdSize / 2); + NamedIntrinsic halfCreate = (simdSize == 64) ? NI_Vector256_Create : NI_Vector128_Create; + NamedIntrinsic withUpper = (simdSize == 64) ? NI_Vector512_WithUpper : NI_Vector256_WithUpper; + size_t halfArgCnt = argCnt / 2; assert((halfArgCnt * 2) == argCnt); GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt); - - GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(), halfArgCnt, - NI_Vector256_Create, simdBaseJitType, 32); - BlockRange().InsertAfter(loInsertionPoint, lo); - GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt); - GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(halfArgCnt), halfArgCnt, - NI_Vector256_Create, simdBaseJitType, 32); - BlockRange().InsertAfter(hiInsertionPoint, hi); - assert(argCnt >= 7); - node->ResetHWIntrinsicId(NI_Vector512_WithUpper, comp, lo, hi); + GenTree* lo = comp->gtNewSimdHWIntrinsicNode(halfType, node->GetOperandArray(), halfArgCnt, halfCreate, + simdBaseJitType, simdSize / 2); - LowerNode(lo); - LowerNode(hi); + GenTree* hi = comp->gtNewSimdHWIntrinsicNode(halfType, node->GetOperandArray(halfArgCnt), halfArgCnt, + halfCreate, simdBaseJitType, simdSize / 2); - return LowerNode(node); - } - else if (intrinsicId == NI_Vector256_Create) - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); - - // We will be constructing the following parts: - // /--* op1 T - // +--* ... T - // lo = * HWINTRINSIC simd16 T Create - // /--* ... T - // +--* opN T - // hi = * HWINTRINSIC simd16 T Create - // /--* lo simd32 - // +--* hi simd16 - // node = * HWINTRINSIC simd32 T WithUpper - - // This is roughly the following managed code: - // ... - // var lo = Vector128.Create(op1, ...); - // var hi = Vector128.Create(..., opN); - // return lo.WithUpper(hi); - - // Each Vector128.Create call gets half the operands. That is: - // lo = Vector128.Create(op1, op2); - // hi = Vector128.Create(op3, op4); - // -or- - // lo = Vector128.Create(op1, ..., op4); - // hi = Vector128.Create(op5, ..., op8); - // -or- - // lo = Vector128.Create(op1, ..., op8); - // hi = Vector128.Create(op9, ..., op16); - // -or- - // lo = Vector128.Create(op1, ..., op16); - // hi = Vector128.Create(op17, ..., op32); - - size_t halfArgCnt = argCnt / 2; - assert((halfArgCnt * 2) == argCnt); + node->ResetHWIntrinsicId(withUpper, comp, lo, hi); - GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt); - - GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt, - NI_Vector128_Create, simdBaseJitType, 16); BlockRange().InsertAfter(loInsertionPoint, lo); - - GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt); - - GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt, - NI_Vector128_Create, simdBaseJitType, 16); BlockRange().InsertAfter(hiInsertionPoint, hi); - assert(argCnt >= 3); - node->ResetHWIntrinsicId(NI_Vector256_WithUpper, comp, lo, hi); - LowerNode(lo); LowerNode(hi); return LowerNode(node); } + assert(intrinsicId == NI_Vector128_Create); + // We will be constructing the following parts: // /--* op1 T // tmp1 = * HWINTRINSIC simd16 T CreateScalarUnsafe @@ -4826,8 +4556,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_USHORT)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - insIntrinsic = NI_SSE2_Insert; + insIntrinsic = NI_X86Base_Insert; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) { @@ -4886,7 +4615,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } assert((simdBaseType != TYP_SHORT) && (simdBaseType != TYP_USHORT)); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); GenTree* op[16]; op[0] = tmp1; @@ -4935,18 +4663,18 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) size_t P = N + 2; size_t Q = N + 3; - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[N], op[O], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, - simdSize); + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[N], op[O], NI_X86Base_UnpackLow, + CORINFO_TYPE_UBYTE, simdSize); BlockRange().InsertAfter(LIR::LastNode(op[N], op[O]), tmp1); LowerNode(tmp1); - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[P], op[Q], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, - simdSize); + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[P], op[Q], NI_X86Base_UnpackLow, + CORINFO_TYPE_UBYTE, simdSize); BlockRange().InsertAfter(LIR::LastNode(op[P], op[Q]), tmp2); LowerNode(tmp2); - tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_USHORT, - simdSize); + tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_X86Base_UnpackLow, + CORINFO_TYPE_USHORT, simdSize); BlockRange().InsertAfter(LIR::LastNode(tmp1, tmp2), tmp3); LowerNode(tmp3); @@ -4984,69 +4712,21 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Sse2.UnpackLow(opP, opQ); // return Sse2.UnpackLow(tmp1, tmp2); - tmp1 = - comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_X86Base_UnpackLow, CORINFO_TYPE_UINT, + simdSize); BlockRange().InsertAfter(LIR::LastNode(op[0], op[1]), tmp1); LowerNode(tmp1); - tmp2 = - comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_X86Base_UnpackLow, CORINFO_TYPE_UINT, + simdSize); BlockRange().InsertAfter(LIR::LastNode(op[2], op[3]), tmp2); LowerNode(tmp2); - node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); + node->ResetHWIntrinsicId(NI_X86Base_UnpackLow, tmp1, tmp2); node->SetSimdBaseJitType(CORINFO_TYPE_ULONG); break; } -#if defined(TARGET_AMD64) - case TYP_LONG: - case TYP_ULONG: - { - if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41_X64)) - { - // We will be constructing the following parts: - // ... - // idx = CNS_INT int 1 - // /--* tmp1 simd16 - // +--* op2 T - // +--* idx int - // node = * HWINTRINSIC simd16 T Insert - - // This is roughly the following managed code: - // ... - // return Sse41.X64.Insert(tmp1, op2, 0x01); - - idx = comp->gtNewIconNode(0x01, TYP_INT); - BlockRange().InsertBefore(node, idx); - - node->ResetHWIntrinsicId(NI_SSE41_X64_Insert, comp, tmp1, op2, idx); - break; - } - - // We will be constructing the following parts: - // ... - // /--* op2 T - // tmp2 = * HWINTRINSIC simd16 T CreateScalarUnsafe - // /--* tmp1 simd16 - // +--* tmp2 simd16 - // node = * HWINTRINSIC simd16 T UnpackLow - - // This is roughly the following managed code: - // ... - // var tmp2 = Vector128.CreateScalarUnsafe(op2); - // return Sse2.UnpackLow(tmp1, tmp2); - - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - - tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16); - LowerNode(tmp2); - - node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); - break; - } -#endif // TARGET_AMD64 - case TYP_FLOAT: { unsigned N = 0; @@ -5161,8 +4841,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Sse.UnpackLow(opP, opQ); // return Sse.MoveLowToHigh(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); - GenTree* op[4]; op[0] = tmp1; @@ -5174,40 +4852,64 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(op[N]); } - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE_UnpackLow, simdBaseJitType, simdSize); + tmp1 = + comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_X86Base_UnpackLow, simdBaseJitType, simdSize); BlockRange().InsertAfter(LIR::LastNode(op[0], op[1]), tmp1); LowerNode(tmp1); - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE_UnpackLow, simdBaseJitType, simdSize); + tmp2 = + comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_X86Base_UnpackLow, simdBaseJitType, simdSize); BlockRange().InsertAfter(LIR::LastNode(op[2], op[3]), tmp2); LowerNode(tmp2); - node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); + node->ResetHWIntrinsicId(NI_X86Base_MoveLowToHigh, tmp1, tmp2); break; } + case TYP_LONG: + case TYP_ULONG: case TYP_DOUBLE: { + GenTree* op2 = node->Op(2); + + if (varTypeIsLong(simdBaseType) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41_X64)) + { + // We will be constructing the following parts: + // ... + // idx = CNS_INT int 1 + // /--* tmp1 simd16 + // +--* op2 T + // +--* idx int + // node = * HWINTRINSIC simd16 T Insert + + // This is roughly the following managed code: + // ... + // return Sse41.X64.Insert(tmp1, op2, 0x01); + + idx = comp->gtNewIconNode(0x01, TYP_INT); + BlockRange().InsertBefore(node, idx); + + node->ResetHWIntrinsicId(NI_SSE41_X64_Insert, comp, tmp1, op2, idx); + break; + } + // We will be constructing the following parts: // ... // /--* op2 T // tmp2 = * HWINTRINSIC simd16 T CreateScalarUnsafe // /--* tmp1 simd16 // +--* tmp2 simd16 - // node = * HWINTRINSIC simd16 T MoveLowToHigh + // node = * HWINTRINSIC simd16 T UnpackLow // This is roughly the following managed code: // ... // var tmp2 = Vector128.CreateScalarUnsafe(op2); - // return Sse.MoveLowToHigh(tmp1, tmp2); - - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + // return Sse.UnpackLow(tmp1, tmp2); tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16); LowerNode(tmp2); - node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); - node->SetSimdBaseJitType(CORINFO_TYPE_FLOAT); + node->ResetHWIntrinsicId(NI_X86Base_UnpackLow, tmp1, tmp2); break; } @@ -5234,9 +4936,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); - assert((intrinsicId == NI_Vector128_GetElement) || (intrinsicId == NI_Vector256_GetElement) || - (intrinsicId == NI_Vector512_GetElement)); - + assert(HWIntrinsicInfo::IsVectorGetElement(intrinsicId)); assert(!varTypeIsSIMD(simdType)); assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); @@ -5266,8 +4966,8 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) return LowerNode(node); } - uint32_t count = simdSize / genTypeSize(simdBaseType); uint32_t elemSize = genTypeSize(simdBaseType); + uint32_t count = simdSize / elemSize; if (op1->OperIs(GT_IND)) { @@ -5497,37 +5197,6 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) } } - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: -#if defined(TARGET_AMD64) - case TYP_LONG: - case TYP_ULONG: -#endif // TARGET_AMD64 - { - // Using software fallback if simdBaseType is not supported by hardware - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); - break; - } - - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - break; - } - - default: - { - unreached(); - } - } - // Remove the index node up front to simplify downstream logic BlockRange().Remove(op2); @@ -5539,7 +5208,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) if (intrinsicId == NI_Vector512_GetElement) { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); if (simd16Idx == 0) { @@ -5577,12 +5246,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(node, idx); LowerNode(idx); - NamedIntrinsic extractIntrinsicId = NI_AVX512F_ExtractVector128; - - if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - extractIntrinsicId = NI_AVX512DQ_ExtractVector128; - } + NamedIntrinsic extractIntrinsicId = NI_AVX512_ExtractVector128; tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, idx, extractIntrinsicId, simdBaseJitType, simdSize); BlockRange().InsertBefore(node, tmp1); @@ -5655,6 +5319,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_LONG: case TYP_ULONG: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); resIntrinsic = NI_SSE41_X64_Extract; break; } @@ -5672,6 +5337,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_INT: case TYP_UINT: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); resIntrinsic = NI_SSE41_Extract; break; } @@ -5679,7 +5345,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_SHORT: case TYP_USHORT: { - resIntrinsic = NI_SSE2_Extract; + resIntrinsic = NI_X86Base_Extract; break; } @@ -5752,40 +5418,24 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) GenTree* op2 = node->Op(2); GenTree* op3 = node->Op(3); - assert(op2->OperIsConst()); - - ssize_t count = simdSize / genTypeSize(simdBaseType); - ssize_t imm8 = op2->AsIntCon()->IconValue(); - ssize_t simd16Cnt = 16 / genTypeSize(simdBaseType); - ssize_t simd16Idx = imm8 / simd16Cnt; - - assert(0 <= imm8 && imm8 < count); - - switch (simdBaseType) + if (!op2->OperIsConst()) { - // Using software fallback if simdBaseType is not supported by hardware - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); - break; + // We will specially handle WithElement in codegen when op2 isn't a constant + ContainCheckHWIntrinsic(node); + return node->gtNext; + } - case TYP_LONG: - case TYP_ULONG: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); - break; + // We should have a bounds check inserted for any index outside the allowed range + // but we need to generate some code anyways, and so we'll simply mask here for simplicity. - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - break; + uint32_t elemSize = genTypeSize(simdBaseType); + uint32_t count = simdSize / elemSize; + + uint32_t imm8 = static_cast(op2->AsIntCon()->IconValue()) % count; + uint32_t simd16Cnt = 16 / elemSize; + uint32_t simd16Idx = imm8 / simd16Cnt; - default: - unreached(); - } + assert((0 <= imm8) && (imm8 < count)); // Remove the index node up front to simplify downstream logic BlockRange().Remove(op2); @@ -5804,7 +5454,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // it and then operate on that. At the end, we will insert the simd16 // result back into the simd64 local, producing our final value. - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); // This copy of "node" will have the simd16 value we need. result = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, intrinsicId, simdBaseJitType, 16); @@ -5861,12 +5511,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(op1, idx); LowerNode(idx); - NamedIntrinsic extractIntrinsicId = NI_AVX512F_ExtractVector128; - - if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - extractIntrinsicId = NI_AVX512DQ_ExtractVector128; - } + NamedIntrinsic extractIntrinsicId = NI_AVX512_ExtractVector128; tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, idx, extractIntrinsicId, simdBaseJitType, simdSize); BlockRange().InsertAfter(idx, tmp1); @@ -5881,12 +5526,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(node, idx); LowerNode(idx); - NamedIntrinsic insertIntrinsicId = NI_AVX512F_InsertVector128; - - if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - insertIntrinsicId = NI_AVX512DQ_InsertVector128; - } + NamedIntrinsic insertIntrinsicId = NI_AVX512_InsertVector128; node->ResetHWIntrinsicId(insertIntrinsicId, comp, tmp64, result, idx); } @@ -5976,6 +5616,8 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_LONG: case TYP_ULONG: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); + idx = comp->gtNewIconNode(imm8); BlockRange().InsertBefore(result, idx); result->ChangeHWIntrinsicId(NI_SSE41_X64_Insert, op1, op3, idx); @@ -6010,7 +5652,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // ... // node = Sse.MoveScalar(op1, op2); - result->ResetHWIntrinsicId(NI_SSE_MoveScalar, op1, tmp1); + result->ResetHWIntrinsicId(NI_X86Base_MoveScalar, op1, tmp1); } else { @@ -6092,7 +5734,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) std::swap(tmp1, tmp2); } - op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, idx, NI_SSE_Shuffle, + op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, idx, NI_X86Base_Shuffle, CORINFO_TYPE_FLOAT, 16); BlockRange().InsertAfter(idx, op1); LowerNode(op1); @@ -6105,7 +5747,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) std::swap(op1, op2); } - result->ChangeHWIntrinsicId(NI_SSE_Shuffle, op1, op2, idx); + result->ChangeHWIntrinsicId(NI_X86Base_Shuffle, op1, op2, idx); } break; } @@ -6122,6 +5764,8 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_INT: case TYP_UINT: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); + idx = comp->gtNewIconNode(imm8); BlockRange().InsertBefore(result, idx); result->ChangeHWIntrinsicId(NI_SSE41_Insert, op1, op3, idx); @@ -6133,7 +5777,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) { idx = comp->gtNewIconNode(imm8); BlockRange().InsertBefore(result, idx); - result->ChangeHWIntrinsicId(NI_SSE2_Insert, op1, op3, idx); + result->ChangeHWIntrinsicId(NI_X86Base_Insert, op1, op3, idx); break; } @@ -6151,7 +5795,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) tmp1 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op3, CORINFO_TYPE_DOUBLE, 16); LowerNode(tmp1); - result->ResetHWIntrinsicId((imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow, op1, tmp1); + result->ResetHWIntrinsicId((imm8 == 0) ? NI_X86Base_MoveScalar : NI_X86Base_UnpackLow, op1, tmp1); break; } @@ -6166,8 +5810,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) { // Now that we have finalized the shape of the tree, lower the insertion node as well. - assert((node->GetHWIntrinsicId() == NI_AVX512F_InsertVector128) || - (node->GetHWIntrinsicId() == NI_AVX512DQ_InsertVector128)); + assert(node->GetHWIntrinsicId() == NI_AVX512_InsertVector128); assert(node != result); nextNode = LowerNode(node); @@ -6327,8 +5970,6 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - switch (simdBaseType) { case TYP_SHORT: @@ -6338,7 +5979,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSSE3)) { - shuffle = NI_SSE2_ShuffleLow; + shuffle = NI_X86Base_ShuffleLow; } break; } @@ -6411,7 +6052,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) if ((simdSize == 8) || !comp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) { // We also do this for simdSize == 8 to ensure we broadcast the result as expected - shuffle = NI_SSE_Shuffle; + shuffle = NI_X86Base_Shuffle; } break; } @@ -6462,7 +6103,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) horizontalAdd = NI_SSE3_HorizontalAdd; // We need to ensure we broadcast the result as expected - shuffle = NI_SSE2_Shuffle; + shuffle = NI_X86Base_Shuffle; break; } @@ -6725,7 +6366,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // tmp2 = Isa.Shuffle(tmp1, shuffleConst); // ... - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_SSE2_ShuffleLow, simdBaseJitType, + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_X86Base_ShuffleLow, simdBaseJitType, simdSize); BlockRange().InsertAfter(idx, tmp2); LowerNode(tmp2); @@ -6733,7 +6374,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(shuffleConst, TYP_INT); BlockRange().InsertAfter(tmp2, idx); - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_SSE2_ShuffleHigh, simdBaseJitType, + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_X86Base_ShuffleHigh, simdBaseJitType, simdSize); } else @@ -6753,7 +6394,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // tmp2 = Isa.Shuffle(tmp1, shuffleConst); // ... - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_SSE2_Shuffle, CORINFO_TYPE_INT, + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); } } @@ -6859,24 +6500,25 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) unsigned simdSize = node->GetSimdSize(); var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert((intrinsicId == NI_Vector128_ToScalar) || (intrinsicId == NI_Vector256_ToScalar) || - (intrinsicId == NI_Vector512_ToScalar)); - + assert(HWIntrinsicInfo::IsVectorToScalar(intrinsicId)); assert(varTypeIsSIMD(simdType)); assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); GenTree* op1 = node->Op(1); - if (IsContainableMemoryOp(op1)) + if (IsContainableMemoryOp(op1) && (!varTypeIsLong(simdBaseType) || TargetArchitecture::Is64Bit)) { - // We will specially handle ToScalar when op1 is already in memory + // If op1 is already in memory, we'd like the consumer of ToScalar to be able to look + // through to the memory directly. Early folding is preferable, as it unlocks additional + // containment opportunities for the consuming nodes. If we can't fold away ToScalar, + // we will still contain op1 if possible, and let codegen try to peek through to it. + // + // However, we specifically need to avoid doing this for long on 32-bit because we are + // already past DecomposeLongs, and codegen wouldn't be able to handle it. if (op1->OperIs(GT_IND)) { - // We want to optimize ToScalar down to an Indir where possible as - // this unlocks additional containment opportunities for various nodes - GenTreeIndir* indir = op1->AsIndir(); GenTreeIndir* newIndir = @@ -6903,9 +6545,6 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) { uint32_t elemSize = genTypeSize(simdBaseType); - // We want to optimize ToScalar down to a LclFld where possible as - // this unlocks additional containment opportunities for various nodes - GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon(); uint32_t lclOffs = lclVar->GetLclOffs() + (0 * elemSize); LclVarDsc* lclDsc = comp->lvaGetDesc(lclVar); @@ -6932,92 +6571,10 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) return LowerNode(lclFld); } } - - if (IsSafeToContainMem(node, op1)) - { - // Handle other cases in codegen - ContainCheckHWIntrinsic(node); - return node->gtNext; - } - } - - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_SHORT: - case TYP_INT: - { - node->gtType = TYP_INT; - node->SetSimdBaseJitType(CORINFO_TYPE_INT); - node->ChangeHWIntrinsicId(NI_SSE2_ConvertToInt32); - break; - } - - case TYP_UBYTE: - case TYP_USHORT: - case TYP_UINT: - { - node->gtType = TYP_INT; - node->SetSimdBaseJitType(CORINFO_TYPE_UINT); - node->ChangeHWIntrinsicId(NI_SSE2_ConvertToUInt32); - break; - } - -#if defined(TARGET_AMD64) - case TYP_LONG: - { - node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertToInt64); - break; - } - - case TYP_ULONG: - { - node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertToUInt64); - break; - } -#endif // TARGET_AMD64 - - case TYP_FLOAT: - case TYP_DOUBLE: - { - ContainCheckHWIntrinsic(node); - return node->gtNext; - } - - default: - { - unreached(); - } - } - - GenTree* next = LowerNode(node); - - if (genTypeSize(simdBaseType) < 4) - { - // The move intrinsics do not touch the upper bits, so we need an explicit - // cast to ensure the result is properly sign extended - - LIR::Use use; - - bool foundUse = BlockRange().TryGetUse(node, &use); - bool fromUnsigned = varTypeIsUnsigned(simdBaseType); - - GenTreeCast* cast = comp->gtNewCastNode(TYP_INT, node, fromUnsigned, simdBaseType); - BlockRange().InsertAfter(node, cast); - - if (foundUse) - { - use.ReplaceWith(cast); - } - else - { - node->ClearUnusedValue(); - cast->SetUnusedValue(); - } - next = LowerNode(cast); } - return next; + ContainCheckHWIntrinsic(node); + return node->gtNext; } //---------------------------------------------------------------------------------------------- @@ -7093,9 +6650,9 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode) JITDUMP("to:\n"); DISPNODE(blsrNode); + BlockRange().InsertBefore(andNode, blsrNode); use.ReplaceWith(blsrNode); - BlockRange().InsertBefore(andNode, blsrNode); BlockRange().Remove(andNode); BlockRange().Remove(op2); BlockRange().Remove(addOp1); @@ -7178,9 +6735,9 @@ GenTree* Lowering::TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode) JITDUMP("to:\n"); DISPNODE(blsiNode); + BlockRange().InsertBefore(andNode, blsiNode); use.ReplaceWith(blsiNode); - BlockRange().InsertBefore(andNode, blsiNode); BlockRange().Remove(andNode); BlockRange().Remove(negNode); BlockRange().Remove(negOp); @@ -7265,9 +6822,9 @@ GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode) JITDUMP("to:\n"); DISPNODE(andnNode); + BlockRange().InsertBefore(andNode, andnNode); use.ReplaceWith(andnNode); - BlockRange().InsertBefore(andNode, andnNode); BlockRange().Remove(andNode); BlockRange().Remove(notNode); @@ -7350,9 +6907,9 @@ GenTree* Lowering::TryLowerXorOpToGetMaskUpToLowestSetBit(GenTreeOp* xorNode) JITDUMP("to:\n"); DISPNODE(blsmskNode); + BlockRange().InsertBefore(xorNode, blsmskNode); use.ReplaceWith(blsmskNode); - BlockRange().InsertBefore(xorNode, blsmskNode); BlockRange().Remove(xorNode); BlockRange().Remove(op2); BlockRange().Remove(addOp1); @@ -7404,7 +6961,7 @@ void Lowering::LowerBswapOp(GenTreeOp* node) bool Lowering::IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd) { // If the operand isn't an indirection, it's trivially not a candidate. - if (operand->OperGet() != GT_IND) + if (!operand->OperIs(GT_IND)) { return false; } @@ -7906,7 +7463,7 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) if (ctrlExpr != nullptr) { // we should never see a gtControlExpr whose type is void. - assert(ctrlExpr->TypeGet() != TYP_VOID); + assert(!ctrlExpr->TypeIs(TYP_VOID)); #ifdef TARGET_X86 // On x86, we need to generate a very specific pattern for indirect VSD calls: @@ -7951,7 +7508,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node) GenTree* addr = node->Addr(); // If this is the rhs of a block copy it will be handled when we handle the store. - if (node->TypeGet() == TYP_STRUCT) + if (node->TypeIs(TYP_STRUCT)) { return; } @@ -7972,7 +7529,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node) GenTreeIntConCommon* icon = addr->AsIntConCommon(); #if defined(FEATURE_SIMD) - if (((addr->TypeGet() != TYP_SIMD12) || !icon->ImmedValNeedsReloc(comp)) && icon->FitsInAddrBase(comp)) + if ((!addr->TypeIs(TYP_SIMD12) || !icon->ImmedValNeedsReloc(comp)) && icon->FitsInAddrBase(comp)) #else if (icon->FitsInAddrBase(comp)) #endif @@ -7986,7 +7543,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node) MakeSrcContained(node, addr); } } - else if ((addr->OperGet() == GT_LEA) && IsInvariantInRange(addr, node)) + else if (addr->OperIs(GT_LEA) && IsInvariantInRange(addr, node)) { MakeSrcContained(node, addr); } @@ -8040,28 +7597,63 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) case NI_Vector256_ToScalar: case NI_Vector512_ToScalar: { - if (varTypeIsFloating(simdBaseType)) + // These intrinsics are "ins reg/mem, xmm" or "ins xmm, reg/mem" + // + // In the case we are coming from and going to memory, we want to + // preserve the original containment as we'll end up emitting a pair + // of scalar moves. e.g. for float: + // movss xmm0, [addr1] ; Size: 4, Latency: 4-7, TP: 0.5 + // movss [addr2], xmm0 ; Size: 4, Latency: 4-10, TP: 1 + // + // However, we want to prefer containing the store over allowing the + // input to be regOptional, so track and clear containment if required. + + GenTree* op1 = hwintrinsic->Op(1); + clearContainedNode = op1; + isContainable = !clearContainedNode->isContained(); + + if (isContainable && varTypeIsIntegral(simdBaseType)) { - // These intrinsics are "ins reg/mem, xmm" or "ins xmm, reg/mem" - // - // In the case we are coming from and going to memory, we want to - // preserve the original containment as we'll end up emitting: - // movss xmm0, [addr1] ; Size: 4, Latency: 4-7, TP: 0.5 - // movss [addr2], xmm0 ; Size: 4, Latency: 4-10, TP: 1 - // - // However, we want to prefer containing the store over allowing the - // input to be regOptional, so track and clear containment if required. + isContainable = (genTypeSize(simdBaseType) == genTypeSize(node)) && + (!varTypeIsSmall(simdBaseType) || + comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)); - clearContainedNode = hwintrinsic->Op(1); - isContainable = !clearContainedNode->isContained(); + if (isContainable && varTypeIsSmall(simdBaseType)) + { + CorInfoType baseJitType = varTypeIsByte(node) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_USHORT; + + if (intrinsicId == NI_Vector512_ToScalar) + { + op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector512_GetLower128, + baseJitType, 64); + BlockRange().InsertBefore(hwintrinsic, op1); + LowerNode(op1); + } + else if (intrinsicId == NI_Vector256_ToScalar) + { + op1 = comp->gtNewSimdGetLowerNode(TYP_SIMD16, op1, baseJitType, 32); + BlockRange().InsertBefore(hwintrinsic, op1); + LowerNode(op1); + } + + intrinsicId = varTypeIsByte(node) ? NI_SSE41_Extract : NI_X86Base_Extract; + + GenTree* zero = comp->gtNewZeroConNode(TYP_INT); + BlockRange().InsertBefore(hwintrinsic, zero); + + hwintrinsic->SetSimdBaseJitType(baseJitType); + hwintrinsic->SetSimdSize(16); + hwintrinsic->ResetHWIntrinsicId(intrinsicId, op1, zero); + zero->SetContained(); + } } break; } - case NI_SSE2_ConvertToInt32: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_X64_ConvertToUInt64: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_X64_ConvertToUInt64: case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: { @@ -8108,24 +7700,20 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) else { // TODO-XArch-CQ: We really should specially handle TYP_DOUBLE here but - // it requires handling GetElement(1) and GT_STOREIND as NI_SSE2_StoreHigh + // it requires handling GetElement(1) and GT_STOREIND as NI_X86Base_StoreHigh assert(!isContainable); } } break; } - case NI_SSE2_Extract: + case NI_X86Base_Extract: case NI_SSE41_Extract: case NI_SSE41_X64_Extract: case NI_AVX_ExtractVector128: case NI_AVX2_ExtractVector128: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: { // These intrinsics are "ins reg/mem, xmm, imm8" @@ -8135,7 +7723,7 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) isContainable = HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI() && (genTypeSize(simdBaseType) == genTypeSize(node)); - if (isContainable && (intrinsicId == NI_SSE2_Extract)) + if (isContainable && (intrinsicId == NI_X86Base_Extract)) { // The encoding that supports containment is SSE4.1 only isContainable = comp->compOpportunisticallyDependsOn(InstructionSet_SSE41); @@ -8143,12 +7731,10 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) break; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { if (varTypeIsFloating(simdBaseType)) { @@ -8157,52 +7743,30 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) FALLTHROUGH; } - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: { // These intrinsics are "ins reg/mem, xmm" - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType); - insTupleType tupleType = comp->GetEmitter()->insTupleTypeInfo(ins); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, comp); + insTupleType tupleType = emitter::insTupleTypeInfo(ins); unsigned simdSize = hwintrinsic->GetSimdSize(); unsigned memSize = 0; @@ -8308,12 +7872,12 @@ void Lowering::ContainCheckMul(GenTreeOp* node) { hasImpliedFirstOperand = true; } - else if (node->OperGet() == GT_MULHI) + else if (node->OperIs(GT_MULHI)) { hasImpliedFirstOperand = true; } #if defined(TARGET_X86) - else if (node->OperGet() == GT_MUL_LONG) + else if (node->OperIs(GT_MUL_LONG)) { hasImpliedFirstOperand = true; } @@ -8448,7 +8012,7 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node) bool divisorCanBeRegOptional = true; #ifdef TARGET_X86 GenTree* dividend = node->gtGetOp1(); - if (dividend->OperGet() == GT_LONG) + if (dividend->OperIs(GT_LONG)) { divisorCanBeRegOptional = false; MakeSrcContained(node, dividend); @@ -8477,21 +8041,40 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node) void Lowering::ContainCheckShiftRotate(GenTreeOp* node) { assert(node->OperIsShiftOrRotate()); + + GenTree* source = node->gtOp1; + GenTree* shiftBy = node->gtOp2; + #ifdef TARGET_X86 - GenTree* source = node->gtOp1; if (node->OperIsShiftLong()) { - assert(source->OperGet() == GT_LONG); + assert(source->OperIs(GT_LONG)); MakeSrcContained(node, source); } -#endif +#endif // TARGET_X86 - GenTree* shiftBy = node->gtOp2; if (IsContainableImmed(node, shiftBy) && (shiftBy->AsIntConCommon()->IconValue() <= 255) && (shiftBy->AsIntConCommon()->IconValue() >= 0)) { MakeSrcContained(node, shiftBy); } + + bool canContainSource = !source->isContained() && (genTypeSize(source) >= genTypeSize(node)); + + // BMI2 rotate and shift instructions take memory operands but do not set flags. + // rorx takes imm8 for the rotate amount; shlx/shrx/sarx take r32/64 for shift amount. + if (canContainSource && !node->gtSetFlags() && (shiftBy->isContained() != node->OperIsShift()) && + comp->compOpportunisticallyDependsOn(InstructionSet_BMI2)) + { + if (IsContainableMemoryOp(source) && IsSafeToContainMem(node, source)) + { + MakeSrcContained(node, source); + } + else if (IsSafeToMarkRegOptional(node, source)) + { + MakeSrcRegOptional(node, source); + } + } } //------------------------------------------------------------------------ @@ -8537,7 +8120,7 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const MakeSrcContained(storeLoc, op1); } #ifdef TARGET_X86 - else if (op1->OperGet() == GT_LONG) + else if (op1->OperIs(GT_LONG)) { MakeSrcContained(storeLoc, op1); } @@ -8569,26 +8152,14 @@ void Lowering::ContainCheckCast(GenTreeCast* node) if (varTypeIsFloating(castToType) || varTypeIsFloating(srcType)) { -#ifdef DEBUG - // If converting to float/double, the operand must be 4 or 8 byte in size. - if (varTypeIsFloating(castToType)) + if (castOp->IsCnsNonZeroFltOrDbl()) { - unsigned opSize = genTypeSize(srcType); - assert(opSize == 4 || opSize == 8); + MakeSrcContained(node, castOp); } -#endif // DEBUG - - // U8 -> R8 conversion requires that the operand be in a register. - if (srcType != TYP_ULONG) + else { - if (castOp->IsCnsNonZeroFltOrDbl()) - { - MakeSrcContained(node, castOp); - } - else - { - srcIsContainable = true; - } + // The ulong->floating SSE2 fallback requires the source to be in register + srcIsContainable = !varTypeIsSmall(srcType) && ((srcType != TYP_ULONG) || comp->canUseEvexEncoding()); } } else if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(castOp) && varTypeIsIntegral(castToType)) @@ -8609,7 +8180,7 @@ void Lowering::ContainCheckCast(GenTreeCast* node) #if !defined(TARGET_64BIT) if (varTypeIsLong(srcType)) { - noway_assert(castOp->OperGet() == GT_LONG); + noway_assert(castOp->OperIs(GT_LONG)); castOp->SetContained(); } #endif // !defined(TARGET_64BIT) @@ -8820,7 +8391,7 @@ void Lowering::ContainCheckSelect(GenTreeOp* select) // bool Lowering::LowerRMWMemOp(GenTreeIndir* storeInd) { - assert(storeInd->OperGet() == GT_STOREIND); + assert(storeInd->OperIs(GT_STOREIND)); // SSE2 doesn't support RMW on float values assert(!varTypeIsFloating(storeInd)); @@ -8892,7 +8463,7 @@ bool Lowering::LowerRMWMemOp(GenTreeIndir* storeInd) GenTree* indirCandidateChild = indirCandidate->gtGetOp1(); indirCandidateChild->SetContained(); - if (indirCandidateChild->OperGet() == GT_LEA) + if (indirCandidateChild->OperIs(GT_LEA)) { GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode(); @@ -9132,8 +8703,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case HW_Category_SimpleSIMD: case HW_Category_IMM: { - instruction ins = HWIntrinsicInfo::lookupIns(parentIntrinsicId, parentBaseType); - insTupleType tupleType = comp->GetEmitter()->insTupleTypeInfo(ins); + instruction ins = HWIntrinsicInfo::lookupIns(parentIntrinsicId, parentBaseType, comp); + insTupleType tupleType = emitter::insTupleTypeInfo(ins); switch (parentIntrinsicId) { @@ -9143,9 +8714,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX2_ConvertToVector256Int16: case NI_AVX2_ConvertToVector256Int32: case NI_AVX2_ConvertToVector256Int64: - case NI_AVX2_BroadcastVector128ToVector256: - case NI_AVX512F_BroadcastVector128ToVector512: - case NI_AVX512F_BroadcastVector256ToVector512: { // These can have either pointer or vector operands. For the pointer case, we can't check // size, so just assume it matches. Otherwise, do normal size check based on tuple type. @@ -9158,8 +8726,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre goto SIZE_FROM_TUPLE_TYPE; } - case NI_SSE2_ShiftLeftLogical128BitLane: - case NI_SSE2_ShiftRightLogical128BitLane: + case NI_X86Base_ShiftLeftLogical128BitLane: + case NI_X86Base_ShiftRightLogical128BitLane: case NI_AVX2_ShiftLeftLogical128BitLane: case NI_AVX2_ShiftRightLogical128BitLane: { @@ -9172,20 +8740,15 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre goto SIZE_FROM_TUPLE_TYPE; } - case NI_SSE2_ShiftLeftLogical: - case NI_SSE2_ShiftRightArithmetic: - case NI_SSE2_ShiftRightLogical: + case NI_X86Base_ShiftLeftLogical: + case NI_X86Base_ShiftRightArithmetic: + case NI_X86Base_ShiftRightLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX512_ShiftLeftLogical: + case NI_AVX512_ShiftRightArithmetic: + case NI_AVX512_ShiftRightLogical: { assert((tupleType & INS_TT_MEM128) != 0); @@ -9196,23 +8759,25 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre if (!HWIntrinsicInfo::isImmOp(parentIntrinsicId, parentNode->Op(2))) { + tupleType = static_cast(INS_TT_MEM128); expectedSize = genTypeSize(TYP_SIMD16); break; } - else if ((expectedSize < genTypeSize(TYP_SIMD64)) && (ins != INS_vpsraq)) + else { - // TODO-XArch-CQ: This should really only be checking EVEX capability, however - // emitter::TakesEvexPrefix doesn't currently handle requiring EVEX based on presence - // of an immediate operand. For now we disable containment of op1 unless EVEX is - // required for some other reason. - supportsMemoryOp = false; - break; + tupleType = static_cast(tupleType & ~INS_TT_MEM128); + + if (!comp->canUseEvexEncoding()) + { + supportsMemoryOp = false; + break; + } } goto SIZE_FROM_TUPLE_TYPE; } - case NI_SSE2_Insert: + case NI_X86Base_Insert: case NI_SSE41_Insert: case NI_SSE41_X64_Insert: { @@ -9250,7 +8815,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre default: SIZE_FROM_TUPLE_TYPE: { - tupleType = static_cast(tupleType & ~INS_TT_MEM128); switch (tupleType) { case INS_TT_NONE: @@ -9331,6 +8895,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre switch (parentIntrinsicId) { + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: case NI_Vector128_CreateScalarUnsafe: case NI_Vector256_CreateScalarUnsafe: case NI_Vector512_CreateScalarUnsafe: @@ -9347,8 +8914,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: - case NI_AVX512BW_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { // These can have either pointer or vector operands. For the pointer case, we can't check // size, so just assume it matches. @@ -9435,6 +9001,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre switch (intrinsicId) { + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: case NI_Vector128_CreateScalarUnsafe: case NI_Vector256_CreateScalarUnsafe: case NI_Vector512_CreateScalarUnsafe: @@ -9449,7 +9018,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre if (IsInvariantInRange(op1, parentNode, hwintrinsic)) { - if (op1->isContained()) + if (op1->isContained() && !op1->OperIsLong()) { // We have CreateScalarUnsafe where the underlying scalar is contained // As such, we can contain the CreateScalarUnsafe and consume the value @@ -9474,10 +9043,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre return false; } - case NI_SSE_LoadAlignedVector128: - case NI_SSE2_LoadAlignedVector128: + case NI_X86Base_LoadAlignedVector128: case NI_AVX_LoadAlignedVector256: - case NI_AVX512F_LoadAlignedVector512: + case NI_AVX512_LoadAlignedVector512: { // In minOpts, we need to ensure that an unaligned address will fault when an explicit LoadAligned is used. // Non-VEX encoded instructions will fault if an unaligned SIMD16 load is contained but will not for scalar @@ -9490,8 +9058,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre ((!comp->canUseVexEncoding() && expectedSize == genTypeSize(TYP_SIMD16)) || !comp->opts.MinOpts())); } - case NI_SSE_LoadScalarVector128: - case NI_SSE2_LoadScalarVector128: + case NI_X86Base_LoadScalarVector128: { // These take only pointer operands. assert(hwintrinsic->OperIsMemoryLoad()); @@ -9502,7 +9069,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_SSE3_MoveAndDuplicate: case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { if (comp->opts.MinOpts() || !comp->canUseEmbeddedBroadcast()) { @@ -9522,17 +9089,19 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre assert(childBaseType == TYP_DOUBLE); } - if (parentNode->OperIsEmbBroadcastCompatible() && comp->canUseEvexEncoding()) + if (parentNode->isEmbeddedBroadcastCompatibleHWIntrinsic(comp)) { GenTree* broadcastOperand = hwintrinsic->Op(1); if (broadcastOperand->OperIsHWIntrinsic()) { GenTreeHWIntrinsic* hwintrinsicOperand = broadcastOperand->AsHWIntrinsic(); + NamedIntrinsic operandIntrinsicId = hwintrinsicOperand->GetHWIntrinsicId(); - if (hwintrinsicOperand->OperIsCreateScalarUnsafe()) + if (HWIntrinsicInfo::IsVectorCreateScalar(operandIntrinsicId) || + HWIntrinsicInfo::IsVectorCreateScalarUnsafe(operandIntrinsicId)) { - // CreateScalarUnsafe can contain non-memory operands such as enregistered + // CreateScalar/Unsafe can contain non-memory operands such as enregistered // locals, so we want to check if its operand is containable instead. This // will result in such enregistered locals returning `false`. broadcastOperand = hwintrinsicOperand->Op(1); @@ -9562,8 +9131,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre assert(hwintrinsic->OperIsMemoryLoad()); assert(varTypeIsFloating(childBaseType)); - return (parentBaseType == childBaseType) && parentNode->OperIsEmbBroadcastCompatible() && - comp->canUseEvexEncoding(); + return (parentBaseType == childBaseType) && parentNode->isEmbeddedBroadcastCompatibleHWIntrinsic(comp); } default: @@ -9622,7 +9190,7 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, } else if (simdType == TYP_SIMD64) { - broadcastName = NI_AVX512F_BroadcastScalarToVector512; + broadcastName = NI_AVX512_BroadcastScalarToVector512; } else { @@ -9702,83 +9270,6 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, MakeSrcContained(parentNode, childNode); } -//---------------------------------------------------------------------------------------------- -// TryCompressConstVecData: -// Try to compress the constant vector input if it has duplicated parts and can be optimized by -// broadcast -// -// Arguments: -// node - the storeind node. -// -// Return: -// return true if compress success. -void Lowering::TryCompressConstVecData(GenTreeStoreInd* node) -{ - assert(node->Data()->IsCnsVec()); - assert(node->Data()->AsVecCon()->TypeIs(TYP_SIMD32, TYP_SIMD64)); - - GenTreeVecCon* vecCon = node->Data()->AsVecCon(); - GenTreeHWIntrinsic* broadcast = nullptr; - - if (vecCon->TypeIs(TYP_SIMD32)) - { - assert(comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)); - if (vecCon->gtSimd32Val.v128[0] == vecCon->gtSimdVal.v128[1]) - { - simd16_t simd16Val = {}; - simd16Val.f64[0] = vecCon->gtSimd32Val.f64[0]; - simd16Val.f64[1] = vecCon->gtSimd32Val.f64[1]; - GenTreeVecCon* compressedVecCon = comp->gtNewVconNode(TYP_SIMD16); - memcpy(&compressedVecCon->gtSimdVal, &simd16Val, sizeof(simd16_t)); - BlockRange().InsertBefore(node->Data(), compressedVecCon); - BlockRange().Remove(vecCon); - broadcast = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, compressedVecCon, - NI_AVX2_BroadcastVector128ToVector256, CORINFO_TYPE_UINT, 32); - } - } - else - { - assert(vecCon->TypeIs(TYP_SIMD64)); - assert(comp->IsBaselineVector512IsaSupportedOpportunistically()); - if (vecCon->gtSimd64Val.v128[0] == vecCon->gtSimd64Val.v128[1] && - vecCon->gtSimd64Val.v128[0] == vecCon->gtSimd64Val.v128[2] && - vecCon->gtSimd64Val.v128[0] == vecCon->gtSimd64Val.v128[3]) - { - simd16_t simd16Val = {}; - simd16Val.f64[0] = vecCon->gtSimd64Val.f64[0]; - simd16Val.f64[1] = vecCon->gtSimd64Val.f64[1]; - GenTreeVecCon* compressedVecCon = comp->gtNewVconNode(TYP_SIMD16); - memcpy(&compressedVecCon->gtSimdVal, &simd16Val, sizeof(simd16_t)); - BlockRange().InsertBefore(node->Data(), compressedVecCon); - BlockRange().Remove(vecCon); - broadcast = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD64, compressedVecCon, - NI_AVX512F_BroadcastVector128ToVector512, CORINFO_TYPE_UINT, 64); - } - else if (vecCon->gtSimd64Val.v256[0] == vecCon->gtSimd64Val.v256[1]) - { - simd32_t simd32Val = {}; - simd32Val.v128[0] = vecCon->gtSimd32Val.v128[0]; - simd32Val.v128[1] = vecCon->gtSimd32Val.v128[1]; - GenTreeVecCon* compressedVecCon = comp->gtNewVconNode(TYP_SIMD32); - memcpy(&compressedVecCon->gtSimdVal, &simd32Val, sizeof(simd32_t)); - BlockRange().InsertBefore(node->Data(), compressedVecCon); - BlockRange().Remove(vecCon); - broadcast = - comp->gtNewSimdHWIntrinsicNode(TYP_SIMD64, compressedVecCon, NI_AVX512F_BroadcastVector256ToVector512, - CORINFO_TYPE_ULONG, 64); - } - } - - if (broadcast == nullptr) - { - return; - } - - BlockRange().InsertBefore(node, broadcast); - node->Data() = broadcast; - LowerNode(broadcast); -} - //------------------------------------------------------------------------ // TryMakeSrcContainedOrRegOptional: Tries to make "childNode" a contained or regOptional node // @@ -9792,7 +9283,7 @@ void Lowering::TryMakeSrcContainedOrRegOptional(GenTreeHWIntrinsic* parentNode, if (IsContainableHWIntrinsicOp(parentNode, childNode, &supportsRegOptional)) { - if (childNode->IsCnsVec() && parentNode->OperIsEmbBroadcastCompatible() && comp->canUseEvexEncoding()) + if (childNode->IsCnsVec() && parentNode->isEmbeddedBroadcastCompatibleHWIntrinsic(comp)) { TryFoldCnsVecForEmbeddedBroadcast(parentNode, childNode->AsVecCon()); } @@ -9818,7 +9309,7 @@ void Lowering::TryMakeSrcContainedOrRegOptional(GenTreeHWIntrinsic* parentNode, // void Lowering::ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* addr, unsigned size) { - assert((genActualType(addr) == TYP_I_IMPL) || (addr->TypeGet() == TYP_BYREF)); + assert((genActualType(addr) == TYP_I_IMPL) || addr->TypeIs(TYP_BYREF)); if ((addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), size)) || (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp))) { @@ -9877,9 +9368,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if ((simdSize == 8) || (simdSize == 12)) { // We want to handle GetElement/ToScalar still for Vector2/3 - if ((intrinsicId != NI_Vector128_GetElement) && (intrinsicId != NI_Vector128_ToScalar) && - (intrinsicId != NI_Vector256_GetElement) && (intrinsicId != NI_Vector256_ToScalar) && - (intrinsicId != NI_Vector512_GetElement) && (intrinsicId != NI_Vector512_ToScalar)) + if (!HWIntrinsicInfo::IsVectorToScalar(intrinsicId) && !HWIntrinsicInfo::IsVectorGetElement(intrinsicId)) { // TODO-XArch-CQ: Ideally we would key this off of the size the containing node // expects vs the size node actually is or would be if spilled to the stack @@ -9915,10 +9404,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrinsicId) { - case NI_SSE_ReciprocalScalar: - case NI_SSE_ReciprocalSqrtScalar: - case NI_SSE_SqrtScalar: - case NI_SSE2_SqrtScalar: + case NI_X86Base_ReciprocalScalar: + case NI_X86Base_ReciprocalSqrtScalar: + case NI_X86Base_SqrtScalar: case NI_SSE41_CeilingScalar: case NI_SSE41_FloorScalar: case NI_SSE41_RoundCurrentDirectionScalar: @@ -9926,12 +9414,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE41_RoundToNegativeInfinityScalar: case NI_SSE41_RoundToPositiveInfinityScalar: case NI_SSE41_RoundToZeroScalar: - case NI_AVX512F_GetExponentScalar: - case NI_AVX512F_Reciprocal14Scalar: - case NI_AVX512F_ReciprocalSqrt14Scalar: - case NI_AVX10v1_GetExponentScalar: - case NI_AVX10v1_Reciprocal14Scalar: - case NI_AVX10v1_ReciprocalSqrt14Scalar: + case NI_AVX512_GetExponentScalar: + case NI_AVX512_Reciprocal14Scalar: + case NI_AVX512_ReciprocalSqrt14Scalar: { // These intrinsics have both 1 and 2-operand overloads. // @@ -9942,10 +9427,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) return; } - case NI_SSE2_ConvertToInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToUInt64: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToUInt64: case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: { @@ -9975,8 +9460,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: - case NI_AVX512BW_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { if (node->OperIsMemoryLoad()) { @@ -9986,20 +9470,28 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (varTypeIsIntegral(simdBaseType) && op1->OperIsHWIntrinsic()) { - GenTreeHWIntrinsic* childNode = op1->AsHWIntrinsic(); + GenTreeHWIntrinsic* childNode = op1->AsHWIntrinsic(); + NamedIntrinsic childIntrinsic = childNode->GetHWIntrinsicId(); - if (childNode->OperIsCreateScalarUnsafe()) + if (HWIntrinsicInfo::IsVectorCreateScalar(childIntrinsic) || + HWIntrinsicInfo::IsVectorCreateScalarUnsafe(childIntrinsic)) { - // We have a very special case of BroadcastScalarToVector(CreateScalarUnsafe(op1)) + // We have a very special case of BroadcastScalarToVector(CreateScalar/Unsafe(op1)) // // This is one of the only instructions where it supports taking integer types from // a SIMD register or directly as a scalar from memory. Most other instructions, in // comparison, take such values from general-purpose registers instead. // - // Because of this, we're going to remove the CreateScalarUnsafe and try to contain + // Because of this, we're going to remove the CreateScalar/Unsafe and try to contain // op1 directly, we'll then special case the codegen to materialize the value into a // SIMD register in the case it is marked optional and doesn't get spilled. + if (childNode->Op(1)->OperIsLong()) + { + // Decomposed longs require special codegen + return; + } + node->Op(1) = childNode->Op(1); BlockRange().Remove(op1); @@ -10011,7 +9503,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { GenTreeCast* cast = op1->AsCast(); if (!varTypeIsFloating(cast->CastToType()) && - !varTypeIsFloating(cast->CastFromType()) && + !varTypeIsFloating(cast->CastFromType()) && !cast->CastOp()->OperIsLong() && (genTypeSize(cast->CastToType()) >= genTypeSize(simdBaseType)) && (genTypeSize(cast->CastFromType()) >= genTypeSize(simdBaseType))) { @@ -10026,26 +9518,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVX2_BroadcastVector128ToVector256: - case NI_AVX512F_BroadcastVector128ToVector512: - case NI_AVX512F_BroadcastVector256ToVector512: - { - if (node->OperIsMemoryLoad()) - { - ContainCheckHWIntrinsicAddr(node, op1, /* conservative maximum */ 32); - return; - } - - assert(op1->IsCnsVec()); - break; - } - - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { if (varTypeIsFloating(simdBaseType)) { @@ -10056,54 +9532,77 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) FALLTHROUGH; } - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: { // These intrinsics are "ins reg/mem, xmm" and get // contained by the relevant store operation instead. return; } +#ifdef TARGET_X86 + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: + case NI_Vector128_CreateScalarUnsafe: + case NI_Vector256_CreateScalarUnsafe: + case NI_Vector512_CreateScalarUnsafe: + { + if (op1->OperIsLong()) + { + // Contain decomposed longs and handle them in codegen + assert(varTypeIsLong(simdBaseType)); + + for (GenTree* longOp : op1->Operands()) + { + if (!varTypeIsSmall(longOp) && IsContainableMemoryOp(longOp) && + IsSafeToContainMem(node, longOp)) + { + MakeSrcContained(node, longOp); + } + else if (IsSafeToMarkRegOptional(node, longOp)) + { + MakeSrcRegOptional(node, longOp); + } + } + + MakeSrcContained(node, op1); + return; + } + break; + } + + case NI_Vector128_ToScalar: + case NI_Vector256_ToScalar: + case NI_Vector512_ToScalar: + { + // These will be contained by a STOREIND + if (varTypeIsLong(simdBaseType)) + { + return; + } + break; + } +#endif + default: { break; @@ -10187,8 +9686,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (containedOperand != nullptr) { - if (containedOperand->IsCnsVec() && node->OperIsEmbBroadcastCompatible() && - comp->canUseEvexEncoding()) + if (containedOperand->IsCnsVec() && node->isEmbeddedBroadcastCompatibleHWIntrinsic(comp)) { TryFoldCnsVecForEmbeddedBroadcast(node, containedOperand->AsVecCon()); } @@ -10217,15 +9715,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_SSE2_Extract: + case NI_X86Base_Extract: case NI_AVX_ExtractVector128: case NI_AVX2_ExtractVector128: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: { // These intrinsics are "ins reg/mem, xmm, imm8" and get // contained by the relevant store operation instead. @@ -10233,6 +9727,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } case NI_AVX2_Shuffle: + case NI_AVX512_Shuffle: { if (varTypeIsByte(simdBaseType)) { @@ -10245,24 +9740,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) FALLTHROUGH; } - case NI_SSE2_Shuffle: - case NI_SSE2_ShuffleHigh: - case NI_SSE2_ShuffleLow: + case NI_X86Base_Shuffle: + case NI_X86Base_ShuffleHigh: + case NI_X86Base_ShuffleLow: case NI_AVX2_Permute4x64: case NI_AVX2_ShuffleHigh: case NI_AVX2_ShuffleLow: - case NI_AVX512F_Permute2x64: - case NI_AVX512F_Permute4x32: - case NI_AVX512F_Permute4x64: - case NI_AVX512F_Shuffle: - case NI_AVX512BW_ShuffleHigh: - case NI_AVX512BW_ShuffleLow: - case NI_AVX512F_RotateLeft: - case NI_AVX512F_RotateRight: - case NI_AVX512F_VL_RotateLeft: - case NI_AVX512F_VL_RotateRight: - case NI_AVX10v1_RotateLeft: - case NI_AVX10v1_RotateRight: + case NI_AVX512_Permute2x64: + case NI_AVX512_Permute4x32: + case NI_AVX512_Permute4x64: + case NI_AVX512_ShuffleHigh: + case NI_AVX512_ShuffleLow: + case NI_AVX512_RotateLeft: + case NI_AVX512_RotateRight: { // These intrinsics have op2 as an imm and op1 as a reg/mem @@ -10287,20 +9777,15 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } case NI_AVX_Permute: - case NI_SSE2_ShiftLeftLogical: - case NI_SSE2_ShiftRightArithmetic: - case NI_SSE2_ShiftRightLogical: + case NI_X86Base_ShiftLeftLogical: + case NI_X86Base_ShiftRightArithmetic: + case NI_X86Base_ShiftRightLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX512_ShiftLeftLogical: + case NI_AVX512_ShiftRightArithmetic: + case NI_AVX512_ShiftRightLogical: { // These intrinsics can have op2 be imm or reg/mem // They also can have op1 be reg/mem and op2 be imm @@ -10323,16 +9808,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } case NI_AES_KeygenAssist: - case NI_AVX512F_GetMantissa: - case NI_AVX512F_VL_GetMantissa: - case NI_AVX512F_RoundScale: - case NI_AVX512F_VL_RoundScale: - case NI_AVX512DQ_Reduce: - case NI_AVX512DQ_VL_Reduce: - case NI_AVX10v1_GetMantissa: - case NI_AVX10v1_Reduce: - case NI_AVX10v1_RoundScale: - case NI_AVX10v1_V512_Reduce: + case NI_AVX512_GetMantissa: + case NI_AVX512_RoundScale: + case NI_AVX512_Reduce: { if (!isContainedImm) { @@ -10344,12 +9822,12 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE2_ShiftLeftLogical128BitLane: - case NI_SSE2_ShiftRightLogical128BitLane: + case NI_X86Base_ShiftLeftLogical128BitLane: + case NI_X86Base_ShiftRightLogical128BitLane: case NI_AVX2_ShiftLeftLogical128BitLane: case NI_AVX2_ShiftRightLogical128BitLane: - case NI_AVX512BW_ShiftLeftLogical128BitLane: - case NI_AVX512BW_ShiftRightLogical128BitLane: + case NI_AVX512_ShiftLeftLogical128BitLane: + case NI_AVX512_ShiftRightLogical128BitLane: { // These intrinsics have op2 as an imm and op1 as a reg/mem when AVX512BW+VL is supported @@ -10363,12 +9841,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVX512F_GetMantissaScalar: - case NI_AVX512F_RoundScaleScalar: - case NI_AVX512DQ_ReduceScalar: - case NI_AVX10v1_GetMantissaScalar: - case NI_AVX10v1_ReduceScalar: - case NI_AVX10v1_RoundScaleScalar: + case NI_AVX512_GetMantissaScalar: + case NI_AVX512_RoundScaleScalar: + case NI_AVX512_ReduceScalar: { // These intrinsics have both 2 and 3-operand overloads. // @@ -10379,8 +9854,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) return; } - case NI_EVEX_ShiftLeftMask: - case NI_EVEX_ShiftRightMask: + case NI_AVX512_ShiftLeftMask: + case NI_AVX512_ShiftRightMask: { // These intrinsics don't support a memory operand and // we don't currently generate a jmp table fallback. @@ -10422,6 +9897,12 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Vector128_op_Division: + case NI_Vector256_op_Division: + { + break; + } + default: { assert(!"Unhandled containment for helper binary hardware intrinsic"); @@ -10523,8 +10004,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (containedOperand != nullptr) { - if (containedOperand->IsCnsVec() && node->OperIsEmbBroadcastCompatible() && - comp->canUseEvexEncoding()) + if (containedOperand->IsCnsVec() && node->isEmbeddedBroadcastCompatibleHWIntrinsic(comp)) { TryFoldCnsVecForEmbeddedBroadcast(node, containedOperand->AsVecCon()); } @@ -10607,7 +10087,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (containedOperand != nullptr) { - if (containedOperand->IsCnsVec() && node->OperIsEmbBroadcastCompatible()) + if (containedOperand->IsCnsVec() && node->isEmbeddedBroadcastCompatibleHWIntrinsic(comp)) { TryFoldCnsVecForEmbeddedBroadcast(node, containedOperand->AsVecCon()); } @@ -10747,7 +10227,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_EVEX_BlendVariableMask: + case NI_AVX512_BlendVariableMask: { // BlendVariableMask represents one of the following instructions: // * vblendmpd @@ -10778,37 +10258,231 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // contained and not a memory operand and know to invoke the special handling // so that the embedded masking can work as expected. - bool isEmbeddedMask = false; + if (op1->IsVectorZero()) + { + // When we are merging with zero, we can specialize + // and avoid instantiating the vector constant. + MakeSrcContained(node, op1); + } if (op2->isEmbeddedMaskingCompatibleHWIntrinsic()) { - isEmbeddedMask = !comp->opts.MinOpts() && comp->canUseEmbeddedMasking(); + bool isEmbeddedMask = !comp->opts.MinOpts() && comp->canUseEmbeddedMasking(); if (op2->isRMWHWIntrinsic(comp)) { // TODO-AVX512-CQ: Ensure we can support embedded operations on RMW intrinsics isEmbeddedMask = false; } - } - if (isEmbeddedMask) - { - uint32_t maskSize = genTypeSize(simdBaseType); - uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()); + GenTreeHWIntrinsic* op2Intrinsic = op2->AsHWIntrinsic(); + NamedIntrinsic op2IntrinsicId = NI_Illegal; + HWIntrinsicCategory category = HW_Category_Special; - if ((maskSize == operSize) && IsInvariantInRange(op2, node)) + if (isEmbeddedMask) { - MakeSrcContained(node, op2); - op2->MakeEmbMaskOp(); + // TODO-AVX512-CQ: Codegen is currently limited to only handling embedded + // masking for table driven intrinsics. This can be relaxed once that is fixed. + + op2IntrinsicId = op2Intrinsic->GetHWIntrinsicId(); + category = HWIntrinsicInfo::lookupCategory(op2IntrinsicId); + isEmbeddedMask = + HWIntrinsicInfo::genIsTableDrivenHWIntrinsic(op2IntrinsicId, category); + + size_t numArgs = node->GetOperandCount(); + + if (numArgs == 1) + { + if (op2Intrinsic->OperIsMemoryLoad()) + { + isEmbeddedMask = false; + } + } + else if (numArgs == 2) + { + if (category == HW_Category_MemoryStore) + { + isEmbeddedMask = false; + } + } + } + + if (isEmbeddedMask) + { + var_types op2SimdBaseType = op2Intrinsic->GetSimdBaseType(); + + instruction ins = + HWIntrinsicInfo::lookupIns(op2IntrinsicId, op2SimdBaseType, comp); + + unsigned expectedMaskBaseSize = CodeGenInterface::instKMaskBaseSize(ins); + + // It's safe to use the return and base type of the BlendVariableMask node + // since anything which lowered to it will have validated compatibility itself + unsigned actualMaskSize = + genTypeSize(node->TypeGet()) / genTypeSize(simdBaseType); + unsigned actualMaskBaseSize = + actualMaskSize / (genTypeSize(node->TypeGet()) / 16); - if (op1->IsVectorZero()) + CorInfoType op2AdjustedSimdBaseJitType = CORINFO_TYPE_UNDEF; + + if (actualMaskBaseSize != expectedMaskBaseSize) + { + // Some intrinsics are effectively bitwise operations and so we + // can freely update them to match the size of the actual mask + + bool supportsMaskBaseSize4Or8 = false; + + switch (ins) + { + case INS_andpd: + case INS_andps: + case INS_andnpd: + case INS_andnps: + case INS_orpd: + case INS_orps: + case INS_pandd: + case INS_pandnd: + case INS_pord: + case INS_pxord: + case INS_vpandq: + case INS_vpandnq: + case INS_vporq: + case INS_vpxorq: + case INS_vshuff32x4: + case INS_vshuff64x2: + case INS_vshufi32x4: + case INS_vshufi64x2: + case INS_xorpd: + case INS_xorps: + { + // These intrinsics support embedded broadcast and have masking + // support for 4 or 8 + assert((expectedMaskBaseSize == 4) || (expectedMaskBaseSize == 8)); + + if (!comp->codeGen->IsEmbeddedBroadcastEnabled(ins, + op2Intrinsic->Op(2))) + { + // We cannot change the base type if we've already contained a + // broadcast + supportsMaskBaseSize4Or8 = true; + } + break; + } + + case INS_vpternlogd: + case INS_vpternlogq: + { + // These intrinsics support embedded broadcast and have masking + // support for 4 or 8 + assert((expectedMaskBaseSize == 4) || (expectedMaskBaseSize == 8)); + + if (!comp->codeGen->IsEmbeddedBroadcastEnabled(ins, + op2Intrinsic->Op(3))) + { + // We cannot change the base type if we've already contained a + // broadcast + supportsMaskBaseSize4Or8 = true; + } + break; + } + + case INS_vbroadcastf32x4: + case INS_vbroadcastf32x8: + case INS_vbroadcastf64x2: + case INS_vbroadcastf64x4: + case INS_vbroadcasti32x4: + case INS_vbroadcasti32x8: + case INS_vbroadcasti64x2: + case INS_vbroadcasti64x4: + case INS_vextractf32x4: + case INS_vextractf32x8: + case INS_vextractf64x2: + case INS_vextractf64x4: + case INS_vextracti32x4: + case INS_vextracti32x8: + case INS_vextracti64x2: + case INS_vextracti64x4: + case INS_vinsertf32x4: + case INS_vinsertf32x8: + case INS_vinsertf64x2: + case INS_vinsertf64x4: + case INS_vinserti32x4: + case INS_vinserti32x8: + case INS_vinserti64x2: + case INS_vinserti64x4: + { + // These intrinsics don't support embedded broadcast and have + // masking support for 4 or 8 + assert((expectedMaskBaseSize == 4) || (expectedMaskBaseSize == 8)); + supportsMaskBaseSize4Or8 = true; + break; + } + + default: + { + break; + } + } + + if (supportsMaskBaseSize4Or8) + { + if (actualMaskBaseSize == 8) + { + if (varTypeIsFloating(op2SimdBaseType)) + { + op2AdjustedSimdBaseJitType = CORINFO_TYPE_DOUBLE; + } + else if (varTypeIsSigned(op2SimdBaseType)) + { + op2AdjustedSimdBaseJitType = CORINFO_TYPE_LONG; + } + else + { + op2AdjustedSimdBaseJitType = CORINFO_TYPE_ULONG; + } + } + else if (actualMaskBaseSize == 4) + { + if (varTypeIsFloating(op2SimdBaseType)) + { + op2AdjustedSimdBaseJitType = CORINFO_TYPE_FLOAT; + } + else if (varTypeIsSigned(op2SimdBaseType)) + { + op2AdjustedSimdBaseJitType = CORINFO_TYPE_INT; + } + else + { + op2AdjustedSimdBaseJitType = CORINFO_TYPE_UINT; + } + } + } + } + + if (op2AdjustedSimdBaseJitType != CORINFO_TYPE_UNDEF) { - // When we are merging with zero, we can specialize - // and avoid instantiating the vector constant. + ins = HWIntrinsicInfo::lookupIns(op2IntrinsicId, op2SimdBaseType, comp); + expectedMaskBaseSize = CodeGenInterface::instKMaskBaseSize(ins); + } + + unsigned expectedMaskSize = + expectedMaskBaseSize * (genTypeSize(op2->TypeGet()) / 16); + assert(expectedMaskSize != 0); - assert(!op2->TypeIs(TYP_MASK)); - MakeSrcContained(node, op1); + if (actualMaskSize != expectedMaskSize) + { + isEmbeddedMask = false; + } + else if (op2AdjustedSimdBaseJitType != CORINFO_TYPE_UNDEF) + { + op2Intrinsic->SetSimdBaseJitType(op2AdjustedSimdBaseJitType); } + } + + if (isEmbeddedMask && IsInvariantInRange(op2, node)) + { + MakeSrcContained(node, op2); + op2->MakeEmbMaskOp(); break; } } @@ -10901,9 +10575,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrinsicId) { - case NI_SSE_Shuffle: - case NI_SSE2_Insert: - case NI_SSE2_Shuffle: + case NI_X86Base_Shuffle: + case NI_X86Base_Insert: case NI_SSSE3_AlignRight: case NI_SSE41_Blend: case NI_SSE41_DotProduct: @@ -10921,46 +10594,27 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_InsertVector128: case NI_AVX2_MultipleSumAbsoluteDifferences: case NI_AVX2_Permute2x128: - case NI_AVX512F_AlignRight32: - case NI_AVX512F_AlignRight64: - case NI_EVEX_CompareMask: - case NI_AVX512F_GetMantissaScalar: - case NI_AVX512F_InsertVector128: - case NI_AVX512F_InsertVector256: - case NI_AVX512F_RoundScaleScalar: - case NI_AVX512F_Shuffle: - case NI_AVX512F_Shuffle4x128: - case NI_AVX512F_VL_AlignRight32: - case NI_AVX512F_VL_AlignRight64: - case NI_AVX512F_VL_Shuffle2x128: - case NI_AVX512BW_AlignRight: - case NI_AVX512BW_SumAbsoluteDifferencesInBlock32: - case NI_AVX512BW_VL_SumAbsoluteDifferencesInBlock32: - case NI_AVX512DQ_InsertVector128: - case NI_AVX512DQ_InsertVector256: - case NI_AVX512DQ_Range: - case NI_AVX512DQ_RangeScalar: - case NI_AVX512DQ_VL_Range: - case NI_AVX512DQ_ReduceScalar: + case NI_AVX512_AlignRight32: + case NI_AVX512_AlignRight64: + case NI_AVX512_AlignRight: + case NI_AVX512_GetMantissaScalar: + case NI_AVX512_InsertVector128: + case NI_AVX512_InsertVector256: + case NI_AVX512_Range: + case NI_AVX512_RangeScalar: + case NI_AVX512_ReduceScalar: + case NI_AVX512_RoundScaleScalar: + case NI_AVX512_Shuffle2x128: + case NI_AVX512_Shuffle4x128: + case NI_AVX512_Shuffle: + case NI_AVX512_SumAbsoluteDifferencesInBlock32: + case NI_AVX512_CompareMask: case NI_PCLMULQDQ_CarrylessMultiply: case NI_PCLMULQDQ_V256_CarrylessMultiply: case NI_PCLMULQDQ_V512_CarrylessMultiply: - case NI_AVX10v1_AlignRight32: - case NI_AVX10v1_AlignRight64: - case NI_AVX10v1_GetMantissaScalar: - case NI_AVX10v1_Range: - case NI_AVX10v1_RangeScalar: - case NI_AVX10v1_ReduceScalar: - case NI_AVX10v1_RoundScaleScalar: - case NI_AVX10v1_SumAbsoluteDifferencesInBlock32: - case NI_AVX10v1_Shuffle2x128: - case NI_AVX10v1_V512_InsertVector128: - case NI_AVX10v1_V512_InsertVector256: - case NI_AVX10v1_V512_Range: - case NI_AVX10v2_MinMaxScalar: case NI_AVX10v2_MinMax: - case NI_AVX10v2_V512_MinMax: - case NI_AVX10v2_V512_MultipleSumAbsoluteDifferences: + case NI_AVX10v2_MinMaxScalar: + case NI_AVX10v2_MultipleSumAbsoluteDifferences: case NI_GFNI_GaloisFieldAffineTransform: case NI_GFNI_GaloisFieldAffineTransformInverse: case NI_GFNI_V256_GaloisFieldAffineTransform: @@ -11082,11 +10736,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrinsicId) { - case NI_AVX512F_Fixup: - case NI_AVX512F_FixupScalar: - case NI_AVX512F_VL_Fixup: - case NI_AVX10v1_Fixup: - case NI_AVX10v1_FixupScalar: + case NI_AVX512_Fixup: + case NI_AVX512_FixupScalar: { if (!isContainedImm) { @@ -11109,9 +10760,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX10v1_TernaryLogic: + case NI_AVX512_TernaryLogic: { assert(comp->canUseEvexEncodingDebugOnly()); @@ -11310,7 +10959,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (containedOperand != nullptr) { - if (containedOperand->IsCnsVec() && node->OperIsEmbBroadcastCompatible()) + if (containedOperand->IsCnsVec() && + node->isEmbeddedBroadcastCompatibleHWIntrinsic(comp)) { TryFoldCnsVecForEmbeddedBroadcast(node, containedOperand->AsVecCon()); } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 3958dd2df29d..915019445fe6 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -138,20 +138,13 @@ void lsraAssignRegToTree(GenTree* tree, regNumber reg, unsigned regIdx) } #endif // TARGET_64BIT #if FEATURE_MULTIREG_RET - else if (tree->OperGet() == GT_COPY) + else if (tree->OperIs(GT_COPY)) { assert(regIdx == 1); GenTreeCopyOrReload* copy = tree->AsCopyOrReload(); copy->gtOtherRegs[0] = (regNumberSmall)reg; } #endif // FEATURE_MULTIREG_RET -#if FEATURE_ARG_SPLIT - else if (tree->OperIsPutArgSplit()) - { - GenTreePutArgSplit* putArg = tree->AsPutArgSplit(); - putArg->SetRegNumByIdx(reg, regIdx); - } -#endif // FEATURE_ARG_SPLIT #ifdef FEATURE_HW_INTRINSICS else if (tree->OperIs(GT_HWINTRINSIC)) { @@ -275,30 +268,66 @@ SingleTypeRegSet LinearScan::lowSIMDRegs() #endif } +template void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill) { LsraLocation nextLocation = nextRefPosition == nullptr ? MaxLocation : nextRefPosition->nodeLocation; RefPosition* kill = nextKill; + +#ifdef HAS_MORE_THAN_64_REGISTERS + SingleTypeRegSet regMask = isLow ? genSingleTypeRegMask(regRecord->regNum) + : genSingleTypeRegMask((regNumber)(regRecord->regNum - REG_HIGH_BASE)); +#else + SingleTypeRegSet regMask = genSingleTypeRegMask(regRecord->regNum); +#endif while ((kill != nullptr) && (kill->nodeLocation < nextLocation)) { - if (kill->killedRegisters.IsRegNumInMask(regRecord->regNum)) + if (isLow) { - nextLocation = kill->nodeLocation; - break; + if ((kill->killedRegisters.getLow() & regMask) != RBM_NONE) + { + nextLocation = kill->nodeLocation; + break; + } } - +#ifdef HAS_MORE_THAN_64_REGISTERS + else + { + if ((kill->killedRegisters.getHigh() & regMask) != RBM_NONE) + { + nextLocation = kill->nodeLocation; + break; + } + } +#endif kill = kill->nextRefPosition; } - if (nextLocation == MaxLocation) + if (isLow) { - fixedRegs.RemoveRegNumFromMask(regRecord->regNum); + if (nextLocation == MaxLocation) + { + fixedRegsLow &= ~regMask; + } + else + { + fixedRegsLow |= regMask; + } } +#ifdef HAS_MORE_THAN_64_REGISTERS else { - fixedRegs.AddRegNumInMask(regRecord->regNum); + if (nextLocation == MaxLocation) + { + fixedRegsHigh &= ~regMask; + } + else + { + fixedRegsHigh |= regMask; + } } +#endif nextFixedRef[regRecord->regNum] = nextLocation; } @@ -536,7 +565,8 @@ static const regMaskTP LsraLimitUpperSimdSet = (RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 | RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31); static const regMaskTP LsraLimitExtGprSet = - (RBM_R16 | RBM_R17 | RBM_R18 | RBM_R19 | RBM_R20 | RBM_R21 | RBM_R22 | RBM_R23 | RBM_ETW_FRAMED_EBP); + (RBM_R16 | RBM_R17 | RBM_R18 | RBM_R19 | RBM_R20 | RBM_R21 | RBM_R22 | RBM_R23 | RBM_R24 | RBM_R25 | RBM_R26 | + RBM_R27 | RBM_R28 | RBM_R29 | RBM_R30 | RBM_R31 | RBM_ETW_FRAMED_EBP); #elif defined(TARGET_ARM) // On ARM, we may need two registers to set up the target register for a virtual call, so we need // to have at least the maximum number of arg registers, plus 2. @@ -793,21 +823,45 @@ LinearScan::LinearScan(Compiler* theCompiler) availableRegCount = ACTUAL_REG_COUNT; needNonIntegerRegisters = false; +#if defined(TARGET_XARCH) + evexIsSupported = compiler->canUseEvexEncoding(); + #if defined(TARGET_AMD64) rbmAllFloat = compiler->rbmAllFloat; rbmFltCalleeTrash = compiler->rbmFltCalleeTrash; rbmAllInt = compiler->rbmAllInt; rbmIntCalleeTrash = compiler->rbmIntCalleeTrash; regIntLast = compiler->regIntLast; - isApxSupported = compiler->canUseApxEncoding(); + apxIsSupported = compiler->canUseApxEncoding(); + + if (apxIsSupported) + { + int size = (int)ACTUAL_REG_COUNT + 1; + regIndices = theCompiler->getAllocator(CMK_LSRA).allocate(size); + for (int i = 0; i < size; i++) + { + regIndices[i] = static_cast(i); + } + } + else + { + regIndices = + new regNumber[]{REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, + REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, + REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7, + REG_XMM8, REG_XMM9, REG_XMM10, REG_XMM11, REG_XMM12, REG_XMM13, REG_XMM14, REG_XMM15, + REG_XMM16, REG_XMM17, REG_XMM18, REG_XMM19, REG_XMM20, REG_XMM21, REG_XMM22, REG_XMM23, + REG_XMM24, REG_XMM25, REG_XMM26, REG_XMM27, REG_XMM28, REG_XMM29, REG_XMM30, REG_XMM31, + REG_K0, REG_K1, REG_K2, REG_K3, REG_K4, REG_K5, REG_K6, REG_K7, + REG_COUNT}; + } #endif // TARGET_AMD64 -#if defined(TARGET_XARCH) rbmAllMask = compiler->rbmAllMask; rbmMskCalleeTrash = compiler->rbmMskCalleeTrash; memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); - if (!compiler->canUseEvexEncoding()) + if (!evexIsSupported) { availableRegCount -= (CNT_HIGHFLOAT + CNT_MASK_REGS); } @@ -902,7 +956,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #endif // TARGET_AMD64 || TARGET_ARM64 #if defined(TARGET_AMD64) - if (compiler->canUseEvexEncoding()) + if (evexIsSupported) { availableFloatRegs |= RBM_HIGHFLOAT.GetFloatRegSet(); availableDoubleRegs |= RBM_HIGHFLOAT.GetFloatRegSet(); @@ -956,7 +1010,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // Notes: // On return, the blockSequence array contains the blocks in reverse post-order. // This method clears the bbVisitedSet on LinearScan, and when it returns the set -// contains all the bbNums for the block. +// contains all the bbPostorderNums for the block. // void LinearScan::setBlockSequence() { @@ -967,30 +1021,33 @@ void LinearScan::setBlockSequence() bbVisitedSet = BitVecOps::MakeEmpty(traits); assert((blockSequence == nullptr) && (bbSeqCount == 0)); - - compiler->m_dfsTree = compiler->fgComputeDfs(); - FlowGraphDfsTree* const dfsTree = compiler->m_dfsTree; - blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount]; + blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount]; if (compiler->opts.OptimizationEnabled()) { - // Ensure loop bodies are compact in the visitation order. - compiler->m_loops = FlowGraphNaturalLoops::Find(dfsTree); + // If optimizations are enabled, allocate blocks in reverse post-order. + // This ensures each block's predecessors are visited first. + // Also, ensure loop bodies are compact in the visitation order. + compiler->m_dfsTree = compiler->fgComputeDfs(); + compiler->m_loops = FlowGraphNaturalLoops::Find(compiler->m_dfsTree); FlowGraphNaturalLoops* const loops = compiler->m_loops; - unsigned index = 0; - auto addToSequence = [this, &index](BasicBlock* block) { - blockSequence[index++] = block; + auto addToSequence = [this](BasicBlock* block) { + blockSequence[bbSeqCount++] = block; }; - compiler->fgVisitBlocksInLoopAwareRPO(dfsTree, loops, addToSequence); + compiler->fgVisitBlocksInLoopAwareRPO(compiler->m_dfsTree, loops, addToSequence); } else { - // TODO: Just use lexical block order in MinOpts - for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++) + // If we aren't optimizing, we won't have any cross-block live registers, + // so the order of blocks allocated shouldn't matter. + // Just use the linear order. + for (BasicBlock* const block : compiler->Blocks()) { - blockSequence[i] = dfsTree->GetPostOrder(dfsTree->GetPostOrderCount() - i - 1); + // Give this block a unique post-order number that can be used as a key into bbVisitedSet + block->bbPostorderNum = bbSeqCount; + blockSequence[bbSeqCount++] = block; } } @@ -1094,30 +1151,29 @@ void LinearScan::setBlockSequence() }; JITDUMP("Start LSRA Block Sequence: \n"); - for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++) + for (unsigned i = 0; i < bbSeqCount; i++) { visitBlock(blockSequence[i]); } - // If the DFS didn't visit any blocks, add them to the end of blockSequence - if (dfsTree->GetPostOrderCount() < compiler->fgBBcount) + // If any blocks remain unvisited, add them to the end of blockSequence. + // Unvisited blocks are more likely to be at the back of the list, so iterate backwards. + for (BasicBlock* block = compiler->fgLastBB; bbSeqCount < compiler->fgBBcount; block = block->Prev()) { - // Unvisited blocks are more likely to be at the back of the list, so iterate backwards - unsigned i = dfsTree->GetPostOrderCount(); - for (BasicBlock* block = compiler->fgLastBB; i < compiler->fgBBcount; block = block->Prev()) + assert(compiler->opts.OptimizationEnabled()); + assert(block != nullptr); + assert(compiler->m_dfsTree != nullptr); + + if (!compiler->m_dfsTree->Contains(block)) { - assert(block != nullptr); - if (!dfsTree->Contains(block)) - { - // Give this block a unique post-order number that can be used as a key into bbVisitedSet - block->bbPostorderNum = i; - visitBlock(block); - blockSequence[i++] = block; - } + // Give this block a unique post-order number that can be used as a key into bbVisitedSet + block->bbPostorderNum = bbSeqCount; + visitBlock(block); + blockSequence[bbSeqCount++] = block; } } - bbSeqCount = compiler->fgBBcount; + assert(bbSeqCount == compiler->fgBBcount); blockSequencingDone = true; #ifdef DEBUG @@ -1619,13 +1675,7 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) // vars will have `lvMustInit` set, because emitter has poor support for struct liveness, // but if the variable is tracked the prolog generator would expect it to be in liveIn set, // so an assert in `genFnProlog` will fire. - bool isRegCandidate = compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // The LoongArch64's ABI which the float args within a struct maybe passed by integer register - // when no float register left but free integer register. - isRegCandidate &= !genIsValidFloatReg(varDsc->GetOtherArgReg()); -#endif - return isRegCandidate; + return compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); } case TYP_UNDEF: @@ -1822,9 +1872,9 @@ void LinearScan::identifyCandidates() if (varDsc->lvIsStructField) { LclVarDsc* parentVarDsc = compiler->lvaGetDesc(varDsc->lvParentLcl); - if (parentVarDsc->lvIsMultiRegRet && !parentVarDsc->lvDoNotEnregister) + if (parentVarDsc->lvIsMultiRegDest && !parentVarDsc->lvDoNotEnregister) { - JITDUMP("Setting multi-reg struct V%02u as not enregisterable:", varDsc->lvParentLcl); + JITDUMP("Setting multi-reg-dest struct V%02u as not enregisterable:", varDsc->lvParentLcl); compiler->lvaSetVarDoNotEnregister(varDsc->lvParentLcl DEBUGARG(DoNotEnregisterReason::BlockOp)); for (unsigned int i = 0; i < parentVarDsc->lvFieldCnt; i++) { @@ -3860,9 +3910,37 @@ void LinearScan::processKills(RefPosition* killRefPosition) RefPosition* nextKill = killRefPosition->nextRefPosition; regMaskTP killedRegs = killRefPosition->getKilledRegisters(); - while (killedRegs.IsNonEmpty()) + + freeKilledRegs(killRefPosition, killedRegs.getLow(), nextKill, REG_LOW_BASE); + +#ifdef HAS_MORE_THAN_64_REGISTERS + freeKilledRegs(killRefPosition, killedRegs.getHigh(), nextKill, REG_HIGH_BASE); +#endif + + regsBusyUntilKill &= ~killRefPosition->getKilledRegisters(); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, + killRefPosition->getKilledRegisters())); +} + +//------------------------------------------------------------------------ +// freeKilledRegs: Handle registers that are being killed. +// +// Arguments: +// killRefPosition - The RefPosition for the kill +// killedRegs - Registers to kill +// nextKill - The RefPosition for next kill +// regBase - `0` or `64` based on the `killedRegs` being processed +// +template +void LinearScan::freeKilledRegs(RefPosition* killRefPosition, + SingleTypeRegSet killedRegs, + RefPosition* nextKill, + int regBase) +{ + + while (killedRegs != RBM_NONE) { - regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); + regNumber killedReg = (regNumber)(genFirstRegNumFromMaskAndToggle(killedRegs) + regBase); RegRecord* regRecord = getRegisterRecord(killedReg); Interval* assignedInterval = regRecord->assignedInterval; if (assignedInterval != nullptr) @@ -3876,12 +3954,8 @@ void LinearScan::processKills(RefPosition* killRefPosition) RefPosition* regNextRefPos = regRecord->recentRefPosition == nullptr ? regRecord->firstRefPosition : regRecord->recentRefPosition->nextRefPosition; - updateNextFixedRef(regRecord, regNextRefPos, nextKill); + updateNextFixedRef(regRecord, regNextRefPos, nextKill); } - - regsBusyUntilKill &= ~killRefPosition->getKilledRegisters(); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, - killRefPosition->getKilledRegisters())); } //------------------------------------------------------------------------ @@ -4228,8 +4302,8 @@ void LinearScan::resetAllRegistersState() resetAvailableRegs(); clearAllNextIntervalRef(); clearAllSpillCost(); - - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + int regIndex = REG_FIRST; + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; NEXT_REGISTER(reg, regIndex)) { RegRecord* physRegRecord = getRegisterRecord(reg); #ifdef DEBUG @@ -4559,14 +4633,34 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) } } #else + regMaskTP deadCandidates = ~liveRegs; // Only focus on actual registers present deadCandidates &= actualRegistersMask; + handleDeadCandidates(deadCandidates.getLow(), REG_LOW_BASE, inVarToRegMap); +#ifdef HAS_MORE_THAN_64_REGISTERS + handleDeadCandidates(deadCandidates.getHigh(), REG_HIGH_BASE, inVarToRegMap); +#endif // HAS_MORE_THAN_64_REGISTERS +#endif // TARGET_ARM +} - while (deadCandidates.IsNonEmpty()) +//------------------------------------------------------------------------ +// handleDeadCandidates: Handle registers that are assigned to local variables. +// +// Arguments: +// deadCandidates - mask of registers. +// regBase - base register number. +// inVarToRegMap - variable to register map. +// +// Return Value: +// None +// +void LinearScan::handleDeadCandidates(SingleTypeRegSet deadCandidates, int regBase, VarToRegMap inVarToRegMap) +{ + while (deadCandidates != RBM_NONE) { - regNumber reg = genFirstRegNumFromMaskAndToggle(deadCandidates); + regNumber reg = (regNumber)(genFirstRegNumFromMaskAndToggle(deadCandidates) + regBase); RegRecord* physRegRecord = getRegisterRecord(reg); makeRegAvailable(reg, physRegRecord->registerType); @@ -4596,7 +4690,6 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) } } } -#endif // TARGET_ARM } //------------------------------------------------------------------------ @@ -4745,6 +4838,22 @@ void LinearScan::freeRegister(RegRecord* physRegRecord) } } +//------------------------------------------------------------------------ +// LinearScan::freeRegisters: Free the registers in 'regsToFree' +// +// Arguments: +// regsToFree - the mask of registers to free, separated into low and high parts. +// regBase - `0` or `64` depending on if the registers to be freed are in the lower or higher bank. +// +void LinearScan::freeRegistersSingleType(SingleTypeRegSet regsToFree, int regBase) +{ + while (regsToFree != RBM_NONE) + { + regNumber nextReg = (regNumber)(genFirstRegNumFromMaskAndToggle(regsToFree) + regBase); + RegRecord* regRecord = getRegisterRecord(nextReg); + freeRegister(regRecord); + } +} //------------------------------------------------------------------------ // LinearScan::freeRegisters: Free the registers in 'regsToFree' // @@ -4760,20 +4869,26 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS)); makeRegsAvailable(regsToFree); +#ifdef TARGET_ARM while (regsToFree.IsNonEmpty()) { regNumber nextReg = genFirstRegNumFromMaskAndToggle(regsToFree); RegRecord* regRecord = getRegisterRecord(nextReg); -#ifdef TARGET_ARM if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE)) { assert(genIsValidDoubleReg(nextReg)); regsToFree.RemoveRegNumFromMask(regNumber(nextReg + 1)); } -#endif freeRegister(regRecord); } +#else + freeRegistersSingleType(regsToFree.getLow(), REG_LOW_BASE); +#ifdef HAS_MORE_THAN_64_REGISTERS + freeRegistersSingleType(regsToFree.getHigh(), REG_HIGH_BASE); +#endif + +#endif } //------------------------------------------------------------------------ @@ -4798,11 +4913,12 @@ void LinearScan::allocateRegistersMinimal() clearAllNextIntervalRef(); clearAllSpillCost(); - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + int regIndex = REG_FIRST; + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; NEXT_REGISTER(reg, regIndex)) { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->recentRefPosition = nullptr; - updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition, killHead); + updateNextFixedRefDispatch(physRegRecord, physRegRecord->firstRefPosition, killHead); assert(physRegRecord->assignedInterval == nullptr); } @@ -5020,8 +5136,7 @@ void LinearScan::allocateRegistersMinimal() { RegRecord* regRecord = currentRefPosition.getReg(); Interval* assignedInterval = regRecord->assignedInterval; - - updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition, nextKill); + updateNextFixedRefDispatch(regRecord, currentRefPosition.nextRefPosition, nextKill); // This is a FixedReg. Disassociate any inactive constant interval from this register. if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) @@ -5441,7 +5556,7 @@ void LinearScan::allocateRegisters() if (currentInterval->isLocalVar && !stressInitialParamReg()) { LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); - if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr) + if (varDsc->lvIsRegArg && (currentInterval->firstRefPosition != nullptr) && !compiler->opts.IsOSR()) { currentInterval->isActive = true; } @@ -5462,11 +5577,12 @@ void LinearScan::allocateRegisters() #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE resetRegState(); - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + int regIndex = REG_FIRST; + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; NEXT_REGISTER(reg, regIndex)) { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->recentRefPosition = nullptr; - updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition, killHead); + updateNextFixedRefDispatch(physRegRecord, physRegRecord->firstRefPosition, killHead); // Is this an incoming arg register? (Note that we don't, currently, consider reassigning // an incoming arg register as having spill cost.) @@ -5734,8 +5850,7 @@ void LinearScan::allocateRegisters() { RegRecord* regRecord = currentRefPosition.getReg(); Interval* assignedInterval = regRecord->assignedInterval; - - updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition, nextKill); + updateNextFixedRefDispatch(regRecord, currentRefPosition.nextRefPosition, nextKill); // This is a FixedReg. Disassociate any inactive constant interval from this register. if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) @@ -7553,8 +7668,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, noway_assert(!blockRange.IsEmpty()); GenTree* branch = blockRange.LastNode(); - assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE || - branch->OperGet() == GT_SWITCH); + assert(branch->OperIsConditionalJump() || branch->OperIs(GT_SWITCH_TABLE) || branch->OperIs(GT_SWITCH)); blockRange.InsertBefore(branch, LIR::SeqTree(compiler, simdUpperRestore)); } @@ -7782,7 +7896,8 @@ void LinearScan::resolveRegisters() // are encountered. if (localVarsEnregistered) { - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + int regIndex = REG_FIRST; + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; NEXT_REGISTER(reg, regIndex)) { RegRecord* physRegRecord = getRegisterRecord(reg); Interval* assignedInterval = physRegRecord->assignedInterval; @@ -8222,7 +8337,7 @@ void LinearScan::resolveRegisters() // Determine initial position for parameters - if (varDsc->lvIsParam) + if (varDsc->lvIsParam || varDsc->lvIsParamRegTarget) { SingleTypeRegSet initialRegMask = interval->firstRefPosition->registerAssignment; regNumber initialReg = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter) @@ -8391,7 +8506,7 @@ void LinearScan::insertMove( var_types typ = varDsc->TypeGet(); #if defined(FEATURE_SIMD) - if ((typ == TYP_SIMD12) && compiler->lvaMapSimd12ToSimd16(varDsc)) + if ((typ == TYP_SIMD12) && compiler->lvaMapSimd12ToSimd16(lclNum)) { typ = TYP_SIMD16; } @@ -8524,8 +8639,7 @@ void LinearScan::insertSwap( noway_assert(!blockRange.IsEmpty()); GenTree* branch = blockRange.LastNode(); - assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE || - branch->OperGet() == GT_SWITCH); + assert(branch->OperIsConditionalJump() || branch->OperIs(GT_SWITCH_TABLE) || branch->OperIs(GT_SWITCH)); blockRange.InsertBefore(branch, std::move(swapRange)); } @@ -8871,7 +8985,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // At this point, Lowering has transformed any non-switch-table blocks into // cascading ifs. GenTree* switchTable = LIR::AsRange(block).LastNode(); - assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE); + assert(switchTable != nullptr && switchTable->OperIs(GT_SWITCH_TABLE)); consumedRegs = compiler->codeGen->internalRegisters.GetAll(switchTable).GetRegSetForType(IntRegisterType); GenTree* op1 = switchTable->gtGetOp1(); @@ -10781,7 +10895,21 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) const LclVarDsc* varDsc = compiler->lvaGetDesc(interval->varNum); printf("("); regNumber assignedReg = varDsc->GetRegNum(); - regNumber argReg = (varDsc->lvIsRegArg) ? varDsc->GetArgReg() : REG_STK; + + regNumber argReg = REG_STK; + if (varDsc->lvIsParamRegTarget) + { + const ParameterRegisterLocalMapping* mapping = + compiler->FindParameterRegisterLocalMappingByLocal(interval->varNum, 0); + assert(mapping != nullptr); + argReg = mapping->RegisterSegment->GetRegister(); + } + else if (varDsc->lvIsRegArg && !varDsc->lvIsStructField) + { + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo( + varDsc->lvIsStructField ? varDsc->lvParentLcl : interval->varNum); + argReg = abiInfo.Segment(0).GetRegister(); + } assert(reg == assignedReg || varDsc->lvRegister == false); if (reg != argReg) @@ -11671,7 +11799,7 @@ bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node) return true; } - if (!IsLsraAdded(node) || (node->OperGet() != GT_LCL_VAR)) + if (!IsLsraAdded(node) || !node->OperIs(GT_LCL_VAR)) { return false; } @@ -11693,7 +11821,8 @@ bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node) // void LinearScan::verifyFreeRegisters(regMaskTP regsToFree) { - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + int regIndex = REG_FIRST; + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; NEXT_REGISTER(reg, regIndex)) { regMaskTP regMask = genRegMask(reg); // If this isn't available or if it's still waiting to be freed (i.e. it was in @@ -11813,7 +11942,8 @@ void LinearScan::verifyFinalAllocation() } // Clear register assignments. - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + int regIndex = REG_FIRST; + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; NEXT_REGISTER(reg, regIndex)) { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->assignedInterval = nullptr; @@ -11916,7 +12046,8 @@ void LinearScan::verifyFinalAllocation() } // Clear register assignments. - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + int regIndex = REG_FIRST; + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; NEXT_REGISTER(reg, regIndex)) { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->assignedInterval = nullptr; @@ -12241,7 +12372,8 @@ void LinearScan::verifyFinalAllocation() } // Clear register assignments. - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + int regIndex = REG_FIRST; + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; NEXT_REGISTER(reg, regIndex)) { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->assignedInterval = nullptr; @@ -12326,7 +12458,7 @@ void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation curr GenTree* dst = resolutionMove; assert(IsResolutionMove(dst)); - if (dst->OperGet() == GT_SWAP) + if (dst->OperIs(GT_SWAP)) { GenTreeLclVarCommon* left = dst->gtGetOp1()->AsLclVarCommon(); GenTreeLclVarCommon* right = dst->gtGetOp2()->AsLclVarCommon(); @@ -12361,7 +12493,7 @@ void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation curr regNumber dstRegNum = dst->GetRegNum(); regNumber srcRegNum; GenTreeLclVarCommon* lcl; - if (dst->OperGet() == GT_COPY) + if (dst->OperIs(GT_COPY)) { lcl = dst->gtGetOp1()->AsLclVarCommon(); srcRegNum = lcl->GetRegNum(); @@ -13519,14 +13651,29 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); + SingleTypeRegSet checkConflictMask = candidates; + int regBase = REG_LOW_BASE; +#ifdef HAS_MORE_THAN_64_REGISTERS + if (!varTypeIsMask(regType)) + { + checkConflictMask &= linearScan->fixedRegsLow; + } + else + { + regBase = REG_HIGH_BASE; + checkConflictMask &= linearScan->fixedRegsHigh; + } +#else + checkConflictMask &= linearScan->fixedRegsLow; +#endif while (checkConflictMask != RBM_NONE) { - regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask, regType); - SingleTypeRegSet checkConflictBit = genSingleTypeRegMask(checkConflictReg); + regNumber checkConflictRegSingle = (regNumber)BitOperations::BitScanForward(checkConflictMask); + SingleTypeRegSet checkConflictBit = genSingleTypeRegMask(checkConflictRegSingle); checkConflictMask ^= checkConflictBit; - LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; + LsraLocation checkConflictLocation = + linearScan->nextFixedRef[(regNumber)(checkConflictRegSingle + regBase)]; if ((checkConflictLocation == refPosition->nodeLocation) || (refPosition->delayRegFree && (checkConflictLocation == (refPosition->nodeLocation + 1)))) @@ -13838,14 +13985,28 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); + SingleTypeRegSet checkConflictMask = candidates; + int regBase = REG_LOW_BASE; +#ifdef HAS_MORE_THAN_64_REGISTERS + if (!varTypeIsMask(regType)) + { + checkConflictMask &= linearScan->fixedRegsLow; + } + else + { + regBase = REG_HIGH_BASE; + checkConflictMask &= linearScan->fixedRegsHigh; + } +#else + checkConflictMask &= linearScan->fixedRegsLow; +#endif while (checkConflictMask != RBM_NONE) { - regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask, regType); - SingleTypeRegSet checkConflictBit = genSingleTypeRegMask(checkConflictReg); + regNumber checkConflictRegSingle = (regNumber)BitOperations::BitScanForward(checkConflictMask); + SingleTypeRegSet checkConflictBit = genSingleTypeRegMask(checkConflictRegSingle); checkConflictMask ^= checkConflictBit; - LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; + LsraLocation checkConflictLocation = linearScan->nextFixedRef[(regNumber)(checkConflictRegSingle + regBase)]; if ((checkConflictLocation == refPosition->nodeLocation) || (refPosition->delayRegFree && (checkConflictLocation == (refPosition->nodeLocation + 1)))) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 35c08455e946..a866e9937687 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -38,6 +38,13 @@ typedef var_types RegisterType; #define FloatRegisterType TYP_FLOAT #define MaskRegisterType TYP_MASK +// NEXT_REGISTER : update reg to next active registers. +#ifdef TARGET_AMD64 +#define NEXT_REGISTER(reg, index) index++, reg = regIndices[index] +#else +#define NEXT_REGISTER(reg, index) reg = REG_NEXT(reg) +#endif + //------------------------------------------------------------------------ // regType: Return the RegisterType to use for a given type // @@ -582,7 +589,7 @@ inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple) inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1) { - if (leaf->OperGet() != GT_ADD) + if (!leaf->OperIs(GT_ADD)) { return false; } @@ -741,15 +748,16 @@ class LinearScan : public LinearScanInterface void updateMaxSpill(RefPosition* refPosition); void recordMaxSpill(); +private: // max simultaneous spill locations used of every type unsigned int maxSpill[TYP_COUNT]; unsigned int currentSpill[TYP_COUNT]; bool needFloatTmpForFPCall; bool needDoubleTmpForFPCall; bool needNonIntegerRegisters; + bool needToKillFloatRegs; #ifdef DEBUG -private: //------------------------------------------------------------------------ // Should we stress lsra? This uses the DOTNET_JitStressRegs variable. // @@ -1004,8 +1012,10 @@ class LinearScan : public LinearScanInterface // Record variable locations at start/end of block void processBlockStartLocations(BasicBlock* current); - void processBlockEndLocations(BasicBlock* current); - void resetAllRegistersState(); + + FORCEINLINE void handleDeadCandidates(SingleTypeRegSet deadCandidates, int regBase, VarToRegMap inVarToRegMap); + void processBlockEndLocations(BasicBlock* current); + void resetAllRegistersState(); #ifdef TARGET_ARM bool isSecondHalfReg(RegRecord* regRec, Interval* interval); @@ -1027,7 +1037,7 @@ class LinearScan : public LinearScanInterface // insert refpositions representing prolog zero-inits which will be added later void insertZeroInitRefPositions(); - void addKillForRegs(regMaskTP mask, LsraLocation currentLoc); + RefPosition* addKillForRegs(regMaskTP mask, LsraLocation currentLoc); void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); @@ -1078,9 +1088,10 @@ class LinearScan : public LinearScanInterface SingleTypeRegSet lowSIMDRegs(); SingleTypeRegSet internalFloatRegCandidates(); - void makeRegisterInactive(RegRecord* physRegRecord); - void freeRegister(RegRecord* physRegRecord); - void freeRegisters(regMaskTP regsToFree); + void makeRegisterInactive(RegRecord* physRegRecord); + void freeRegister(RegRecord* physRegRecord); + void freeRegisters(regMaskTP regsToFree); + FORCEINLINE void freeRegistersSingleType(SingleTypeRegSet regsToFree, int regBase); // Get the type that this tree defines. var_types getDefType(GenTree* tree) @@ -1192,7 +1203,12 @@ class LinearScan : public LinearScanInterface void spillInterval(Interval* interval, RefPosition* fromRefPosition DEBUGARG(RefPosition* toRefPosition)); void processKills(RefPosition* killRefPosition); - void spillGCRefs(RefPosition* killRefPosition); + template + FORCEINLINE void freeKilledRegs(RefPosition* killRefPosition, + SingleTypeRegSet killedRegs, + RefPosition* nextKill, + int regBase); + void spillGCRefs(RefPosition* killRefPosition); /***************************************************************************** * Register selection @@ -1814,9 +1830,28 @@ class LinearScan : public LinearScanInterface } SingleTypeRegSet getMatchingConstants(SingleTypeRegSet mask, Interval* currentInterval, RefPosition* refPosition); - regMaskTP fixedRegs; + SingleTypeRegSet fixedRegsLow; +#ifdef HAS_MORE_THAN_64_REGISTERS + SingleTypeRegSet fixedRegsHigh; +#endif LsraLocation nextFixedRef[REG_COUNT]; - void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill); + template + void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill); + void updateNextFixedRefDispatch(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill) + { +#ifdef HAS_MORE_THAN_64_REGISTERS + if (regRecord->regNum < 64) + { + updateNextFixedRef(regRecord, nextRefPosition, nextKill); + } + else + { + updateNextFixedRef(regRecord, nextRefPosition, nextKill); + } +#else + updateNextFixedRef(regRecord, nextRefPosition, nextKill); +#endif + } LsraLocation getNextFixedRef(regNumber regNum, var_types regType) { LsraLocation loc = nextFixedRef[regNum]; @@ -1938,9 +1973,12 @@ class LinearScan : public LinearScanInterface int BuildRMWUses( GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet op1Candidates, SingleTypeRegSet op2Candidates); inline SingleTypeRegSet BuildEvexIncompatibleMask(GenTree* tree); - inline SingleTypeRegSet BuildApxIncompatibleGPRMask(GenTree* tree, - SingleTypeRegSet candidates = RBM_NONE, - bool isGPR = false); + inline SingleTypeRegSet ForceLowGprForApx(GenTree* tree, + SingleTypeRegSet candidates = RBM_NONE, + bool isGPR = false); + inline SingleTypeRegSet ForceLowGprForApxIfNeeded(GenTree* tree, + SingleTypeRegSet candidates = RBM_NONE, + bool UseApxRegs = false); inline bool DoesThisUseGPR(GenTree* op); #endif // !TARGET_XARCH int BuildSelect(GenTreeOp* select); @@ -1986,6 +2024,7 @@ class LinearScan : public LinearScanInterface int BuildPutArgReg(GenTreeUnOp* node); int BuildCall(GenTreeCall* call); void MarkSwiftErrorBusyForCall(GenTreeCall* call); + void MarkAsyncContinuationBusyForCall(GenTreeCall* call); int BuildCmp(GenTree* tree); int BuildCmpOperands(GenTree* tree); int BuildBlockStore(GenTreeBlk* blkNode); @@ -2045,9 +2084,6 @@ class LinearScan : public LinearScanInterface #endif // DEBUG int BuildPutArgStk(GenTreePutArgStk* argNode); -#if FEATURE_ARG_SPLIT - int BuildPutArgSplit(GenTreePutArgSplit* tree); -#endif // FEATURE_ARG_SPLIT int BuildLclHeap(GenTree* tree); #if defined(TARGET_AMD64) @@ -2056,7 +2092,7 @@ class LinearScan : public LinearScanInterface regMaskTP rbmAllInt; regMaskTP rbmIntCalleeTrash; regNumber regIntLast; - bool isApxSupported; + bool apxIsSupported; FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const { @@ -2078,9 +2114,9 @@ class LinearScan : public LinearScanInterface { return this->regIntLast; } - FORCEINLINE bool getIsApxSupported() const + FORCEINLINE bool getApxIsSupported() const { - return this->isApxSupported; + return this->apxIsSupported; } #else FORCEINLINE regNumber get_REG_INT_LAST() const @@ -2093,6 +2129,7 @@ class LinearScan : public LinearScanInterface regMaskTP rbmAllMask; regMaskTP rbmMskCalleeTrash; SingleTypeRegSet lowGprRegs; + bool evexIsSupported; FORCEINLINE regMaskTP get_RBM_ALLMASK() const { @@ -2102,9 +2139,14 @@ class LinearScan : public LinearScanInterface { return this->rbmMskCalleeTrash; } + FORCEINLINE bool getEvexIsSupported() const + { + return this->evexIsSupported; + } #endif // TARGET_XARCH - unsigned availableRegCount; + unsigned availableRegCount; + regNumber* regIndices; FORCEINLINE unsigned get_AVAILABLE_REG_COUNT() const { diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 815f0149aede..195996383106 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -131,11 +131,11 @@ int LinearScan::BuildLclHeap(GenTree* tree) // int LinearScan::BuildShiftLongCarry(GenTree* tree) { - assert(tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO); + assert(tree->OperIs(GT_LSH_HI) || tree->OperIs(GT_RSH_LO)); int srcCount = 2; GenTree* source = tree->AsOp()->gtOp1; - assert((source->OperGet() == GT_LONG) && source->isContained()); + assert(source->OperIs(GT_LONG) && source->isContained()); GenTree* sourceLo = source->gtGetOp1(); GenTree* sourceHi = source->gtGetOp2(); @@ -146,7 +146,7 @@ int LinearScan::BuildShiftLongCarry(GenTree* tree) if (!tree->isContained()) { - if (tree->OperGet() == GT_LSH_HI) + if (tree->OperIs(GT_LSH_HI)) { setDelayFree(sourceLoUse); } @@ -440,7 +440,7 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_CNS_DBL: - if (tree->TypeGet() == TYP_FLOAT) + if (tree->TypeIs(TYP_FLOAT)) { // An int register for float constant buildInternalIntRegisterDefForNode(tree); @@ -448,7 +448,7 @@ int LinearScan::BuildNode(GenTree* tree) else { // TYP_DOUBLE - assert(tree->TypeGet() == TYP_DOUBLE); + assert(tree->TypeIs(TYP_DOUBLE)); // Two int registers for double constant buildInternalIntRegisterDefForNode(tree); @@ -474,13 +474,13 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETFILT: assert(dstCount == 0); - if (tree->TypeGet() == TYP_VOID) + if (tree->TypeIs(TYP_VOID)) { srcCount = 0; } else { - assert(tree->TypeGet() == TYP_INT); + assert(tree->TypeIs(TYP_INT)); srcCount = 1; BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } @@ -631,12 +631,18 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_COPY: srcCount = 1; #ifdef TARGET_ARM // This case currently only occurs for double types that are passed as TYP_LONG; // actual long types would have been decomposed by now. - if (tree->TypeGet() == TYP_LONG) + if (tree->TypeIs(TYP_LONG)) { dstCount = 2; } @@ -649,11 +655,6 @@ int LinearScan::BuildNode(GenTree* tree) BuildDefs(tree, dstCount); break; - case GT_PUTARG_SPLIT: - srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); - dstCount = tree->AsPutArgSplit()->gtNumRegs; - break; - case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; @@ -693,6 +694,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_JCC: case GT_SETCC: case GT_MEMORYBARRIER: + case GT_RETURN_SUSPEND: srcCount = BuildSimple(tree); break; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9af6bef2f17f..cb19df849ada 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -641,7 +641,7 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; #ifdef FEATURE_SIMD // Need an additional register to read upper 4 bytes of Vector3. - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { // We need an internal register different from targetReg in which 'tree' produces its result // because both targetReg and internal reg will be in use at the same time. @@ -796,13 +796,13 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETFILT: assert(dstCount == 0); - if (tree->TypeGet() == TYP_VOID) + if (tree->TypeIs(TYP_VOID)) { srcCount = 0; } else { - assert(tree->TypeGet() == TYP_INT); + assert(tree->TypeIs(TYP_INT)); srcCount = 1; BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } @@ -1059,7 +1059,7 @@ int LinearScan::BuildNode(GenTree* tree) { // GT_XCHG requires a single internal register; the others require two. buildInternalIntRegisterDefForNode(tree); - if (tree->OperGet() != GT_XCHG) + if (!tree->OperIs(GT_XCHG)) { buildInternalIntRegisterDefForNode(tree); } @@ -1108,13 +1108,6 @@ int LinearScan::BuildNode(GenTree* tree) } break; -#if FEATURE_ARG_SPLIT - case GT_PUTARG_SPLIT: - srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); - dstCount = tree->AsPutArgSplit()->gtNumRegs; - break; -#endif // FEATURE_ARG_SPLIT - case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; @@ -1320,6 +1313,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); @@ -1481,6 +1479,23 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { srcCount += BuildContainedCselUses(containedCselOp, delayFreeOp, candidates); } + else if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2) && + !HWIntrinsicInfo::HasImmediateOperand(intrin.id)) + { + // Some "Advanced SIMD scalar x indexed element" and "Advanced SIMD vector x indexed element" instructions + // (e.g. "MLA (by element)") have encoding that restricts what registers that can be used for the indexed + // element when the element size is H (i.e. 2 bytes). + if (((opNum == 2) || (opNum == 3))) + { + // For those intrinsics, just force the delay-free registers, so they do not conflict with the + // definition. + srcCount += BuildDelayFreeUses(operand, nullptr, candidates); + } + else + { + srcCount += BuildOperandUses(operand, candidates); + } + } // Only build as delay free use if register types match else if ((delayFreeOp != nullptr) && (varTypeUsesSameRegType(delayFreeOp->TypeGet(), operand->TypeGet()) || @@ -1633,10 +1648,10 @@ void LinearScan::BuildHWIntrinsicImmediate(GenTreeHWIntrinsic* intrinsicTree, co case NI_AdvSimd_ExtractVector128: case NI_AdvSimd_StoreSelectedScalar: case NI_AdvSimd_Arm64_StoreSelectedScalar: - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: case NI_Sve_ExtractVector: case NI_Sve_TrigonometricMultiplyAddCoefficient: needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed(); @@ -1845,7 +1860,7 @@ int LinearScan::BuildContainedCselUses(GenTreeHWIntrinsic* containedCselOpNode, for (size_t opNum = 1; opNum <= containedCselOpNode->GetOperandCount(); opNum++) { GenTree* currentOp = containedCselOpNode->Op(opNum); - if (currentOp->TypeGet() == TYP_MASK) + if (currentOp->TypeIs(TYP_MASK)) { srcCount += BuildOperandUses(currentOp, candidates); } @@ -2243,7 +2258,7 @@ GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, bool break; case NI_AdvSimd_Arm64_DuplicateToVector64: - if (intrinsicTree->Op(1)->TypeGet() == TYP_DOUBLE) + if (intrinsicTree->Op(1)->TypeIs(TYP_DOUBLE)) { delayFreeOp = intrinsicTree->Op(1); assert(delayFreeOp != nullptr); @@ -2268,6 +2283,9 @@ GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, bool break; case NI_Sve_CreateBreakPropagateMask: + case NI_Sve2_BitwiseSelect: + case NI_Sve2_BitwiseSelectLeftInverted: + case NI_Sve2_BitwiseSelectRightInverted: // RMW operates on the second op. assert(isRMW); delayFreeOp = intrinsicTree->Op(2); @@ -2331,14 +2349,14 @@ GenTree* LinearScan::getVectorAddrOperand(GenTreeHWIntrinsic* intrinsicTree) // Operands that are not loads or stores but do require an address switch (intrinsicTree->GetHWIntrinsicId()) { - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: case NI_Sve_GatherPrefetch8Bit: case NI_Sve_GatherPrefetch16Bit: case NI_Sve_GatherPrefetch32Bit: case NI_Sve_GatherPrefetch64Bit: + case NI_Sve_Prefetch16Bit: + case NI_Sve_Prefetch32Bit: + case NI_Sve_Prefetch64Bit: + case NI_Sve_Prefetch8Bit: if (!varTypeIsSIMD(intrinsicTree->Op(2)->gtType)) { return intrinsicTree->Op(2); @@ -2395,7 +2413,7 @@ GenTree* LinearScan::getConsecutiveRegistersOperand(const HWIntrinsic intrin, bo case NI_AdvSimd_StoreSelectedScalar: case NI_AdvSimd_Arm64_StoreSelectedScalar: - if (intrin.op2->gtType == TYP_STRUCT) + if (intrin.op2->TypeIs(TYP_STRUCT)) { consecutiveOp = intrin.op2; assert(consecutiveOp != nullptr); @@ -2484,7 +2502,7 @@ GenTreeHWIntrinsic* LinearScan::getContainedCselOperand(GenTreeHWIntrinsic* intr { GenTree* currentOp = intrinsicTree->Op(opNum); - if ((currentOp->OperGet() == GT_HWINTRINSIC) && + if (currentOp->OperIs(GT_HWINTRINSIC) && (currentOp->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConditionalSelect) && currentOp->isContained()) { return currentOp->AsHWIntrinsic(); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index b3c6c7d4cf78..0c06c07f8e32 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -39,7 +39,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) { // struct typed indirs are expected only on rhs of a block copy, // but in this case they must be contained. - assert(indirTree->TypeGet() != TYP_STRUCT); + assert(!indirTree->TypeIs(TYP_STRUCT)); GenTree* addr = indirTree->Addr(); GenTree* index = nullptr; @@ -50,11 +50,11 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) if (indirTree->gtFlags & GTF_IND_UNALIGNED) { var_types type = TYP_UNDEF; - if (indirTree->OperGet() == GT_STOREIND) + if (indirTree->OperIs(GT_STOREIND)) { type = indirTree->AsStoreInd()->Data()->TypeGet(); } - else if (indirTree->OperGet() == GT_IND) + else if (indirTree->OperIs(GT_IND)) { type = indirTree->TypeGet(); } @@ -73,7 +73,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) if (addr->isContained()) { - if (addr->OperGet() == GT_LEA) + if (addr->OperIs(GT_LEA)) { GenTreeAddrMode* lea = addr->AsAddrMode(); index = lea->Index(); @@ -95,7 +95,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } #ifdef FEATURE_SIMD - if (indirTree->TypeGet() == TYP_SIMD12) + if (indirTree->TypeIs(TYP_SIMD12)) { // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir(). assert(!addr->isContained()); @@ -133,7 +133,7 @@ int LinearScan::BuildCall(GenTreeCall* call) int srcCount = 0; int dstCount = 0; - if (call->TypeGet() != TYP_VOID) + if (!call->TypeIs(TYP_VOID)) { hasMultiRegRetVal = call->HasMultiRegRetVal(); if (hasMultiRegRetVal) @@ -163,7 +163,7 @@ int LinearScan::BuildCall(GenTreeCall* call) if (ctrlExpr != nullptr) { // we should never see a gtControlExpr whose type is void. - assert(ctrlExpr->TypeGet() != TYP_VOID); + assert(!ctrlExpr->TypeIs(TYP_VOID)); // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into a register. @@ -274,6 +274,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { @@ -322,7 +327,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) { - assert(argNode->gtOper == GT_PUTARG_STK); + assert(argNode->OperIs(GT_PUTARG_STK)); GenTree* src = argNode->Data(); int srcCount = 0; @@ -392,118 +397,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) return srcCount; } -//------------------------------------------------------------------------ -// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node -// -// Arguments: -// argNode - a GT_PUTARG_SPLIT node -// -// Return Value: -// The number of sources consumed by this node. -// -// Notes: -// Set the child node(s) to be contained -// -int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) -{ - int srcCount = 0; - assert(argNode->gtOper == GT_PUTARG_SPLIT); - - GenTree* src = argNode->gtGetOp1(); - - // Registers for split argument corresponds to source - int dstCount = argNode->gtNumRegs; - - regNumber argReg = argNode->GetRegNum(); - SingleTypeRegSet argMask = RBM_NONE; - for (unsigned i = 0; i < argNode->gtNumRegs; i++) - { - regNumber thisArgReg = (regNumber)((unsigned)argReg + i); - argMask |= genSingleTypeRegMask(thisArgReg); - argNode->SetRegNumByIdx(thisArgReg, i); - } - assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || - ((argMask & availableFloatRegs) != RBM_NONE)); - - if (src->OperGet() == GT_FIELD_LIST) - { - // Generated code: - // 1. Consume all of the items in the GT_FIELD_LIST (source) - // 2. Store to target slot and move to target registers (destination) from source - // - unsigned sourceRegCount = 0; - - // To avoid redundant moves, have the argument operand computed in the - // register in which the argument is passed to the call. - - for (GenTreeFieldList::Use& use : src->AsFieldList()->Uses()) - { - GenTree* node = use.GetNode(); - assert(!node->isContained()); - // The only multi-reg nodes we should see are OperIsMultiRegOp() - unsigned currentRegCount; -#ifdef TARGET_ARM - if (node->OperIsMultiRegOp()) - { - currentRegCount = node->AsMultiRegOp()->GetRegCount(); - } - else -#endif // TARGET_ARM - { - assert(!node->IsMultiRegNode()); - currentRegCount = 1; - } - // Consume all the registers, setting the appropriate register mask for the ones that - // go into registers. - for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) - { - if (sourceRegCount < argNode->gtNumRegs) - { - regNumber nextArgReg = (regNumber)((unsigned)argReg + sourceRegCount); - SingleTypeRegSet sourceMask = genSingleTypeRegMask(nextArgReg); - BuildUse(node, sourceMask, regIndex); - placedArgRegs.AddRegNumInMask(nextArgReg); - } - else - { - BuildUse(node, RBM_NONE, regIndex); - } - - sourceRegCount++; - } - } - srcCount += sourceRegCount; - assert(src->isContained()); - } - else - { - assert(src->TypeIs(TYP_STRUCT) && src->isContained()); - - if (src->OperIs(GT_BLK)) - { - // If the PUTARG_SPLIT clobbers only one register we may need an - // extra internal register in case there is a conflict between the - // source address register and target register. - if (argNode->gtNumRegs == 1) - { - // We can use a ldr/str sequence so we need an internal register - buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); - } - - // We will generate code that loads from the OBJ's address, which must be in a register. - srcCount = BuildOperandUses(src->AsBlk()->Addr()); - } - else - { - // We will generate all of the code for the GT_PUTARG_SPLIT and LCL_VAR/LCL_FLD as one contained operation. - assert(src->OperIsLocalRead()); - } - } - buildInternalRegisterUses(); - BuildDefs(argNode, dstCount, argMask); - return srcCount; -} - //------------------------------------------------------------------------ // BuildBlockStore: Build the RefPositions for a block store node. // @@ -592,7 +485,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); } - if (size >= 4 * REGSIZE_BYTES && compiler->IsBaselineSimdIsaSupported()) + if (size >= 4 * REGSIZE_BYTES) { // We can use 128-bit SIMD ldp/stp for larger block sizes buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index a3a0ad88b80f..493a163961ef 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -686,7 +686,7 @@ bool LinearScan::isContainableMemoryOp(GenTree* node) // mask - the mask (set) of registers. // currentLoc - the location at which they should be added // -void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) +RefPosition* LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) { // The mask identifies a set of registers that will be used during // codegen. Mark these as modified here, so when we do final frame @@ -705,6 +705,8 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) *killTail = pos; killTail = &pos->nextRefPosition; + + return pos; } //------------------------------------------------------------------------ @@ -824,11 +826,11 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) #ifdef TARGET_X86 if (compiler->compFloatingPointUsed) { - if (call->TypeGet() == TYP_DOUBLE) + if (call->TypeIs(TYP_DOUBLE)) { needDoubleTmpForFPCall = true; } - else if (call->TypeGet() == TYP_FLOAT) + else if (call->TypeIs(TYP_FLOAT)) { needFloatTmpForFPCall = true; } @@ -841,8 +843,9 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) } // if there is no FP used, we can ignore the FP kills - if (!compiler->compFloatingPointUsed) + if (!needToKillFloatRegs) { + assert(!compiler->compFloatingPointUsed || !enregisterLocalVars); #if defined(TARGET_XARCH) #ifdef TARGET_AMD64 @@ -956,7 +959,7 @@ regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node) #ifdef TARGET_XARCH switch (node->GetHWIntrinsicId()) { - case NI_SSE2_MaskMove: + case NI_X86Base_MaskMove: // maskmovdqu uses edi as the implicit address register. // Although it is set as the srcCandidate on the address, if there is also a fixed // assignment for the definition of the address, resolveConflictingDefAndUse() may @@ -1823,13 +1826,6 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc } } - if (tree->OperIsPutArgSplit()) - { - // While we have attempted to account for any "specialPutArg" defs above, we're only looking at RefPositions - // created for this node. We must be defining at least one register in the PutArgSplit, so conservatively - // add one less than the maximum number of registers args to 'minRegCount'. - minRegCount += MAX_REG_ARG - 1; - } for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) { RefPosition* newRefPosition = &(*refPositionMark); @@ -1959,7 +1955,7 @@ void LinearScan::buildPhysRegRecords() // callee trash and should appear at the end up the existing callee // trash set - if (compiler->canUseEvexEncoding()) + if (getEvexIsSupported()) { regOrderFlt = &lsraRegOrderFltEvex[0]; regOrderFltSize = lsraRegOrderFltEvexSize; @@ -1984,7 +1980,7 @@ void LinearScan::buildPhysRegRecords() #if defined(TARGET_XARCH) // xarch has mask registers available when EVEX is supported - if (compiler->canUseEvexEncoding()) + if (getEvexIsSupported()) { for (unsigned int i = 0; i < lsraRegOrderMskSize; i++) { @@ -2266,7 +2262,12 @@ void LinearScan::buildIntervals() } else if (lclDsc->lvIsParam) { - if (lclDsc->lvIsStructField) + if (compiler->opts.IsOSR()) + { + // Fall through with no preferred register since parameter are + // not passed in registers for OSR + } + else if (lclDsc->lvIsStructField) { // All fields passed in registers should be assigned via the // lvIsParamRegTarget mechanism, so this must be a stack @@ -2321,7 +2322,6 @@ void LinearScan::buildIntervals() numPlacedArgLocals = 0; placedArgRegs = RBM_NONE; - BasicBlock* predBlock = nullptr; BasicBlock* prevBlock = nullptr; // Initialize currentLiveVars to the empty set. We will set it to the current @@ -2334,19 +2334,19 @@ void LinearScan::buildIntervals() JITDUMP("\nNEW BLOCK " FMT_BB "\n", block->bbNum); compiler->compCurBB = block; - bool predBlockIsAllocated = false; - predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); - if (predBlock != nullptr) - { - JITDUMP("\n\nSetting " FMT_BB " as the predecessor for determining incoming variable registers of " FMT_BB - "\n", - predBlock->bbNum, block->bbNum); - assert(predBlock->bbNum <= bbNumMaxBeforeResolution); - blockInfo[block->bbNum].predBBNum = predBlock->bbNum; - } - if (localVarsEnregistered) { + needToKillFloatRegs = compiler->compFloatingPointUsed; + bool predBlockIsAllocated = false; + BasicBlock* const predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); + if (predBlock != nullptr) + { + JITDUMP("\n\nSetting " FMT_BB + " as the predecessor for determining incoming variable registers of " FMT_BB "\n", + predBlock->bbNum, block->bbNum); + assert(predBlock->bbNum <= bbNumMaxBeforeResolution); + blockInfo[block->bbNum].predBBNum = predBlock->bbNum; + } VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn)); @@ -2406,6 +2406,11 @@ void LinearScan::buildIntervals() } } } + else + { + // If state isn't live across blocks, set FP register kill switch per block. + needToKillFloatRegs = false; + } // Add a dummy RefPosition to mark the block boundary. // Note that we do this AFTER adding the exposed uses above, because the @@ -2438,7 +2443,13 @@ void LinearScan::buildIntervals() // do that in the prolog. We handle registers in the prolog and the // stack args in the scratch BB that we have ensured exists. The // handling clobbers REG_SCRATCH, so kill it here. - if ((block == compiler->fgFirstBB) && compiler->lvaHasAnySwiftStackParamToReassemble()) + bool prologUsesScratchReg = compiler->lvaHasAnySwiftStackParamToReassemble(); +#ifdef TARGET_X86 + // On x86, CodeGen::genFnProlog does a varargs preprocessing that uses + // the scratch register. + prologUsesScratchReg |= compiler->info.compIsVarArgs; +#endif + if ((block == compiler->fgFirstBB) && prologUsesScratchReg) { addKillForRegs(genRegMask(REG_SCRATCH), currentLoc + 1); currentLoc += 2; @@ -3003,6 +3014,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates, if (!varTypeUsesIntReg(type)) { compiler->compFloatingPointUsed = true; + needToKillFloatRegs = true; } Interval* interval = newInterval(type); @@ -3094,19 +3106,6 @@ int LinearScan::BuildCallArgUses(GenTreeCall* call) } #endif -#if FEATURE_ARG_SPLIT - if (argNode->OperIs(GT_PUTARG_SPLIT)) - { - unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs; - for (unsigned int i = 0; i < regCount; i++) - { - BuildUse(argNode, genSingleTypeRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); - } - srcCount += regCount; - continue; - } -#endif - // Each register argument corresponds to one source. if (argNode->OperIsPutArgReg()) { @@ -3115,7 +3114,7 @@ int LinearScan::BuildCallArgUses(GenTreeCall* call) continue; } - assert(!arg.NewAbiInfo.HasAnyRegisterSegment()); + assert(!arg.AbiInfo.HasAnyRegisterSegment()); assert(argNode->OperIs(GT_PUTARG_STK)); } @@ -3816,6 +3815,14 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, // Notes: // The operands must already have been processed by buildRefPositionsForNode, and their // RefInfoListNodes placed in the defList. +// For TARGET_XARCH: +// Case 1: APX is not supported at all – We do not need to worry about it at all +// since high GPR doesn’t come into play at all. So, in effect, candidates are +// limited to lowGPRs +// Case 2: APX is supported but EVEX support is not there – In this case, we need +// to restrict candidates to just lowGPRs +// Case 3: APX support exists with EVEX support. – In this case, we do not need +// to do anything. Can give LSRA access to all registers for this node // int LinearScan::BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates) { @@ -3827,14 +3834,18 @@ int LinearScan::BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates) assert(op2 != nullptr); if (candidates == RBM_NONE && varTypeUsesFloatReg(node) && (op1->isContainedIndir() || op2->isContainedIndir())) { - if (op1->isContainedIndir()) + if (op1->isContainedIndir() && !getEvexIsSupported()) { return BuildRMWUses(node, op1, op2, lowGprRegs, candidates); } - else + else if (op2->isContainedIndir() && !getEvexIsSupported()) { return BuildRMWUses(node, op1, op2, candidates, lowGprRegs); } + else + { + return BuildRMWUses(node, op1, op2, candidates, candidates); + } } return BuildRMWUses(node, op1, op2, candidates, candidates); } @@ -3844,11 +3855,17 @@ int LinearScan::BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates) { #ifdef TARGET_XARCH // BSWAP creates movbe - if (op1->isContainedIndir() && - ((varTypeUsesFloatReg(node) || node->OperGet() == GT_BSWAP || node->OperGet() == GT_BSWAP16)) && - candidates == RBM_NONE) + if (op1->isContainedIndir() && !getEvexIsSupported()) { - srcCount += BuildOperandUses(op1, lowGprRegs); + if (candidates == RBM_NONE) + { + srcCount += BuildOperandUses(op1, lowGprRegs); + } + else + { + assert((candidates & lowGprRegs) != RBM_NONE); + srcCount += BuildOperandUses(op1, candidates & lowGprRegs); + } } else #endif @@ -3858,11 +3875,18 @@ int LinearScan::BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates) } if (op2 != nullptr) { - #ifdef TARGET_XARCH - if (op2->isContainedIndir() && varTypeUsesFloatReg(op1) && candidates == RBM_NONE) + if (op2->isContainedIndir() && !getEvexIsSupported()) { - candidates = lowGprRegs; + if (candidates == RBM_NONE) + { + candidates = lowGprRegs; + } + else + { + assert((candidates & lowGprRegs) != RBM_NONE); + srcCount += BuildOperandUses(op1, candidates & lowGprRegs); + } } #endif srcCount += BuildOperandUses(op2, candidates); @@ -3922,7 +3946,6 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, unsigned varIndex = varDsc->lvVarIndex; Interval* varDefInterval = getIntervalForLocalVar(varIndex); - GenTree* op1 = storeLoc->gtGetOp1(); if (!storeLoc->IsLastUse(index)) { VarSetOps::AddElemD(compiler, currentLiveVars, varIndex); @@ -3963,14 +3986,6 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, defCandidates = allRegs(type); #endif // TARGET_X86 -#ifdef TARGET_AMD64 - if (op1->isContained() && op1->OperIs(GT_BITCAST) && varTypeUsesIntReg(varDsc->GetRegisterType(storeLoc))) - { - defCandidates = lowGprRegs; - } - -#endif // TARGET_AMD64 - RefPosition* def = newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, defCandidates, index); if (varDefInterval->isWriteThru) { @@ -4003,7 +4018,7 @@ int LinearScan::BuildMultiRegStoreLoc(GenTreeLclVar* storeLoc) LclVarDsc* varDsc = compiler->lvaGetDesc(storeLoc); assert(compiler->lvaEnregMultiRegVars); - assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); + assert(storeLoc->OperIs(GT_STORE_LCL_VAR)); bool isMultiRegSrc = op1->IsMultiRegNode(); // The source must be: // - a multi-reg source @@ -4088,7 +4103,7 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) // First, define internal registers. #ifdef FEATURE_SIMD - if (varTypeIsSIMD(storeLoc) && !op1->IsVectorZero() && (storeLoc->TypeGet() == TYP_SIMD12)) + if (varTypeIsSIMD(storeLoc) && !op1->IsVectorZero() && storeLoc->TypeIs(TYP_SIMD12)) { #ifdef TARGET_ARM64 // Need an additional register to extract upper 4 bytes of Vector3, @@ -4108,7 +4123,7 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) { // This is the case where the source produces multiple registers. // This must be a store lclvar. - assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); + assert(storeLoc->OperIs(GT_STORE_LCL_VAR)); srcCount = op1->GetMultiRegCount(compiler); for (int i = 0; i < srcCount; ++i) @@ -4134,20 +4149,9 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) } else if (op1->isContained() && op1->OperIs(GT_BITCAST)) { - GenTree* bitCastSrc = op1->gtGetOp1(); - RegisterType registerType = regType(bitCastSrc->TypeGet()); - SingleTypeRegSet candidates = RBM_NONE; -#ifdef TARGET_AMD64 - if (registerType == IntRegisterType) - { - candidates = lowGprRegs; - } - else -#endif // TARGET_AMD64 - { - candidates = allRegs(registerType); - } - singleUseRef = BuildUse(bitCastSrc, candidates); + GenTree* bitCastSrc = op1->gtGetOp1(); + RegisterType registerType = regType(bitCastSrc->TypeGet()); + singleUseRef = BuildUse(bitCastSrc, allRegs(registerType)); Interval* srcInterval = singleUseRef->getInterval(); assert(regType(srcInterval->registerType) == registerType); @@ -4229,16 +4233,7 @@ int LinearScan::BuildSimple(GenTree* tree) } if (tree->IsValue()) { -#ifdef TARGET_AMD64 - if ((tree->OperGet() == GT_BSWAP || tree->OperGet() == GT_BSWAP16) && varTypeUsesIntReg(tree)) - { - BuildDef(tree, lowGprRegs); - } - else -#endif // TARGET_AMD64 - { - BuildDef(tree); - } + BuildDef(tree); } return srcCount; } @@ -4257,9 +4252,9 @@ int LinearScan::BuildReturn(GenTree* tree) GenTree* op1 = tree->AsOp()->GetReturnValue(); #if !defined(TARGET_64BIT) - if (tree->TypeGet() == TYP_LONG) + if (tree->TypeIs(TYP_LONG)) { - assert((op1->OperGet() == GT_LONG) && op1->isContained()); + assert(op1->OperIs(GT_LONG) && op1->isContained()); GenTree* loVal = op1->gtGetOp1(); GenTree* hiVal = op1->gtGetOp2(); BuildUse(loVal, RBM_LNGRET_LO.GetIntRegSet()); @@ -4268,7 +4263,7 @@ int LinearScan::BuildReturn(GenTree* tree) } else #endif // !defined(TARGET_64BIT) - if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) + if (!tree->TypeIs(TYP_VOID) && !op1->isContained()) { SingleTypeRegSet useCandidates = RBM_NONE; @@ -4284,7 +4279,7 @@ int LinearScan::BuildReturn(GenTree* tree) if (varTypeIsStruct(tree)) { // op1 has to be either a lclvar or a multi-reg returning call - if ((op1->OperGet() == GT_LCL_VAR) && !op1->IsMultiRegLclVar()) + if (op1->OperIs(GT_LCL_VAR) && !op1->IsMultiRegLclVar()) { BuildUse(op1, useCandidates); } @@ -4387,6 +4382,22 @@ int LinearScan::BuildReturn(GenTree* tree) return 1; } } + else if (!tree->TypeIs(TYP_VOID) && op1->OperIsFieldList()) + { + const ReturnTypeDesc& retDesc = compiler->compRetTypeDesc; + + unsigned regIndex = 0; + for (const GenTreeFieldList::Use& use : op1->AsFieldList()->Uses()) + { + GenTree* tree = use.GetNode(); + regNumber retReg = retDesc.GetABIReturnReg(regIndex, compiler->info.compCallConv); + BuildUse(tree, genSingleTypeRegMask(retReg)); + + regIndex++; + } + + return regIndex; + } else { // In other cases we require the incoming operand to be in the @@ -4570,10 +4581,14 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // int LinearScan::BuildCmp(GenTree* tree) { -#if defined(TARGET_XARCH) +#if defined(TARGET_AMD64) + assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_BT, GT_CCMP)); +#elif defined(TARGET_X86) assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_BT)); #elif defined(TARGET_ARM64) assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_JCMP, GT_JTEST, GT_CCMP)); +#elif defined(TARGET_RISCV64) + assert(tree->OperIsCmpCompare() || tree->OperIs(GT_JCMP)); #else assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_JCMP)); #endif @@ -4706,3 +4721,24 @@ void LinearScan::MarkSwiftErrorBusyForCall(GenTreeCall* call) setDelayFree(swiftErrorRegRecord->lastRefPosition); } #endif + +//------------------------------------------------------------------------ +// MarkAsyncContinuationBusyForCall: +// Add a ref position that marks the async continuation register as busy +// until it is killed. +// +// Arguments: +// call - The call node +// +void LinearScan::MarkAsyncContinuationBusyForCall(GenTreeCall* call) +{ + // We model the async continuation like the swift error register: we ensure + // the node follows the call in lowering, and make it delay freed to ensure + // nothing is allocated into the register between the call and + // ASYNC_CONTINUATION node. We need to add a kill here in the right spot as + // not all targets may naturally have one created. + assert(call->gtNext != nullptr); + assert(call->gtNext->OperIs(GT_ASYNC_CONTINUATION)); + RefPosition* refPos = addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc + 1); + setDelayFree(refPos); +} diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 529e6d8127b6..a7e0ea0d6520 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -89,7 +89,7 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; #ifdef FEATURE_SIMD // Need an additional register to read upper 4 bytes of Vector3. - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { // We need an internal register different from targetReg in which 'tree' produces its result // because both targetReg and internal reg will be in use at the same time. @@ -175,13 +175,13 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETFILT: assert(dstCount == 0); - if (tree->TypeGet() == TYP_VOID) + if (tree->TypeIs(TYP_VOID)) { srcCount = 0; } else { - assert(tree->TypeGet() == TYP_INT); + assert(tree->TypeIs(TYP_INT)); srcCount = 1; BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } @@ -366,11 +366,6 @@ int LinearScan::BuildNode(GenTree* tree) } break; - case GT_PUTARG_SPLIT: - srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); - dstCount = tree->AsPutArgSplit()->gtNumRegs; - break; - case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; @@ -563,6 +558,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); @@ -617,7 +617,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) { // struct typed indirs are expected only on rhs of a block copy, // but in this case they must be contained. - assert(indirTree->TypeGet() != TYP_STRUCT); + assert(!indirTree->TypeIs(TYP_STRUCT)); GenTree* addr = indirTree->Addr(); GenTree* index = nullptr; @@ -625,7 +625,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) if (addr->isContained()) { - if (addr->OperGet() == GT_LEA) + if (addr->OperIs(GT_LEA)) { GenTreeAddrMode* lea = addr->AsAddrMode(); index = lea->Index(); @@ -647,7 +647,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } #ifdef FEATURE_SIMD - if (indirTree->TypeGet() == TYP_SIMD12) + if (indirTree->TypeIs(TYP_SIMD12)) { // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir(). assert(!addr->isContained()); @@ -685,7 +685,7 @@ int LinearScan::BuildCall(GenTreeCall* call) int srcCount = 0; int dstCount = 0; - if (call->TypeGet() != TYP_VOID) + if (!call->TypeIs(TYP_VOID)) { hasMultiRegRetVal = call->HasMultiRegRetVal(); if (hasMultiRegRetVal) @@ -715,7 +715,7 @@ int LinearScan::BuildCall(GenTreeCall* call) if (ctrlExpr != nullptr) { // we should never see a gtControlExpr whose type is void. - assert(ctrlExpr->TypeGet() != TYP_VOID); + assert(!ctrlExpr->TypeIs(TYP_VOID)); // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into a register. @@ -782,6 +782,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { @@ -823,7 +828,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) { - assert(argNode->gtOper == GT_PUTARG_STK); + assert(argNode->OperIs(GT_PUTARG_STK)); GenTree* putArgChild = argNode->gtGetOp1(); @@ -850,7 +855,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) buildInternalIntRegisterDefForNode(argNode); buildInternalIntRegisterDefForNode(argNode); - if (putArgChild->OperGet() == GT_BLK) + if (putArgChild->OperIs(GT_BLK)) { assert(putArgChild->isContained()); GenTree* objChild = putArgChild->gtGetOp1(); @@ -885,97 +890,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) return srcCount; } -//------------------------------------------------------------------------ -// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node -// -// Arguments: -// argNode - a GT_PUTARG_SPLIT node -// -// Return Value: -// The number of sources consumed by this node. -// -// Notes: -// Set the child node(s) to be contained -// -int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) -{ - int srcCount = 0; - assert(argNode->gtOper == GT_PUTARG_SPLIT); - - GenTree* putArgChild = argNode->gtGetOp1(); - - // Registers for split argument corresponds to source - int dstCount = argNode->gtNumRegs; - - regNumber argReg = argNode->GetRegNum(); - SingleTypeRegSet argMask = RBM_NONE; - for (unsigned i = 0; i < argNode->gtNumRegs; i++) - { - regNumber thisArgReg = (regNumber)((unsigned)argReg + i); - argMask |= genSingleTypeRegMask(thisArgReg); - argNode->SetRegNumByIdx(thisArgReg, i); - } - assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || - ((argMask & availableFloatRegs) != RBM_NONE)); - - if (putArgChild->OperGet() == GT_FIELD_LIST) - { - // Generated code: - // 1. Consume all of the items in the GT_FIELD_LIST (source) - // 2. Store to target slot and move to target registers (destination) from source - // - unsigned sourceRegCount = 0; - - // To avoid redundant moves, have the argument operand computed in the - // register in which the argument is passed to the call. - - for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses()) - { - GenTree* node = use.GetNode(); - assert(!node->isContained()); - // The only multi-reg nodes we should see are OperIsMultiRegOp() - assert(!node->IsMultiRegNode()); - - // Consume all the registers, setting the appropriate register mask for the ones that - // go into registers. - SingleTypeRegSet sourceMask = RBM_NONE; - if (sourceRegCount < argNode->gtNumRegs) - { - sourceMask = genSingleTypeRegMask((regNumber)((unsigned)argReg + sourceRegCount)); - } - sourceRegCount++; - BuildUse(node, sourceMask, 0); - } - srcCount += sourceRegCount; - assert(putArgChild->isContained()); - } - else - { - assert(putArgChild->TypeGet() == TYP_STRUCT); - assert(putArgChild->OperGet() == GT_BLK); - - // We can use a ld/st sequence so we need an internal register - buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); - - GenTree* objChild = putArgChild->gtGetOp1(); - if (objChild->IsLclVarAddr()) - { - // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_BLK and the GT_LCL_ADDR<0> - // as one contained operation - // - assert(objChild->isContained()); - } - else - { - srcCount = BuildIndirUses(putArgChild->AsIndir()); - } - assert(putArgChild->isContained()); - } - buildInternalRegisterUses(); - BuildDefs(argNode, dstCount, argMask); - return srcCount; -} - //------------------------------------------------------------------------ // BuildBlockStore: Build the RefPositions for a block store node. // diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 7583d7687d54..213a6ebefbd0 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -90,7 +90,7 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; #ifdef FEATURE_SIMD // Need an additional register to read upper 4 bytes of Vector3. - if (tree->TypeGet() == TYP_SIMD12) + if (tree->TypeIs(TYP_SIMD12)) { // We need an internal register different from targetReg in which 'tree' produces its result // because both targetReg and internal reg will be in use at the same time. @@ -144,20 +144,11 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CNS_DBL: { emitAttr size = emitActualTypeSize(tree); - - double constValue = tree->AsDblCon()->DconValue(); - if (!FloatingPointUtils::isPositiveZero(constValue)) + int64_t bits; + if (emitter::isSingleInstructionFpImm(tree->AsDblCon()->DconValue(), size, &bits) && bits != 0) { - int64_t bits = - (size == EA_4BYTE) - ? (int32_t)BitOperations::SingleToUInt32Bits(FloatingPointUtils::convertToSingle(constValue)) - : (int64_t)BitOperations::DoubleToUInt64Bits(constValue); - bool fitsInLui = ((bits & 0xfff) == 0) && emitter::isValidSimm20(bits >> 12); - if (fitsInLui || emitter::isValidSimm12(bits)) // can we synthesize bits with a single instruction? - { - buildInternalIntRegisterDefForNode(tree); - buildInternalRegisterUses(); - } + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); } } FALLTHROUGH; @@ -188,13 +179,13 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETFILT: assert(dstCount == 0); - if (tree->TypeGet() == TYP_VOID) + if (tree->TypeIs(TYP_VOID)) { srcCount = 0; } else { - assert(tree->TypeGet() == TYP_INT); + assert(tree->TypeIs(TYP_INT)); srcCount = 1; BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } @@ -270,7 +261,15 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RSZ: case GT_ROR: case GT_ROL: - if (tree->OperIs(GT_ROR, GT_ROL)) + case GT_SH1ADD: + case GT_SH1ADD_UW: + case GT_SH2ADD: + case GT_SH2ADD_UW: + case GT_SH3ADD: + case GT_SH3ADD_UW: + case GT_ADD_UW: + case GT_SLLI_UW: + if (tree->OperIs(GT_ROR, GT_ROL) && !compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)) buildInternalIntRegisterDefForNode(tree); srcCount = BuildBinaryUses(tree->AsOp()); buildInternalRegisterUses(); @@ -355,19 +354,56 @@ int LinearScan::BuildNode(GenTree* tree) case GT_INTRINSIC: { - noway_assert((tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Ceiling) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Floor) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Sqrt)); - - // Both operand and its result must be of the same floating point type. GenTree* op1 = tree->gtGetOp1(); - assert(varTypeIsFloating(op1)); - assert(op1->TypeGet() == tree->TypeGet()); + GenTree* op2 = tree->gtGetOp2IfPresent(); + + switch (tree->AsIntrinsic()->gtIntrinsicName) + { + // Both operands and its result must be of the same floating-point type. + case NI_System_Math_MinNumber: + case NI_System_Math_MaxNumber: + assert(op2 != nullptr); + assert(op2->TypeIs(tree->TypeGet())); + FALLTHROUGH; + case NI_System_Math_Abs: + case NI_System_Math_Sqrt: + assert(op1->TypeIs(tree->TypeGet())); + assert(varTypeIsFloating(tree)); + break; + + // Integer Min/Max + case NI_System_Math_Min: + case NI_System_Math_Max: + case NI_System_Math_MinUnsigned: + case NI_System_Math_MaxUnsigned: + assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)); + assert(op2 != nullptr); + assert(op2->TypeIs(tree->TypeGet())); + assert(op1->TypeIs(tree->TypeGet())); + assert(tree->TypeIs(TYP_I_IMPL)); + break; + + // Operand and its result must be integers + case NI_PRIMITIVE_LeadingZeroCount: + case NI_PRIMITIVE_TrailingZeroCount: + case NI_PRIMITIVE_PopCount: + assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)); + assert(op2 == nullptr); + assert(varTypeIsIntegral(op1)); + assert(varTypeIsIntegral(tree)); + break; + + default: + NO_WAY("Unknown intrinsic"); + } BuildUse(op1); srcCount = 1; + if (op2 != nullptr) + { + BuildUse(op2); + srcCount++; + } assert(dstCount == 1); BuildDef(tree); } @@ -406,32 +442,21 @@ int LinearScan::BuildNode(GenTree* tree) case GT_GT: { var_types op1Type = genActualType(tree->gtGetOp1()->TypeGet()); - if (varTypeIsFloating(op1Type)) - { - bool isUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; - if (isUnordered) - { - if (tree->OperIs(GT_EQ)) - buildInternalIntRegisterDefForNode(tree); - } - else - { - if (tree->OperIs(GT_NE)) - buildInternalIntRegisterDefForNode(tree); - } - } - else + if (!varTypeIsFloating(op1Type)) { emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); - if (tree->gtGetOp2()->isContainedIntOrIImmed()) + if (cmpSize == EA_4BYTE) { - bool isUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; - if (cmpSize == EA_4BYTE && isUnsigned) - buildInternalIntRegisterDefForNode(tree); - } - else - { - if (cmpSize == EA_4BYTE) + GenTree* op2 = tree->gtGetOp2(); + + bool isUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + bool useAddSub = !(!tree->OperIs(GT_EQ, GT_NE) || op2->IsIntegralConst(-2048)); + bool useShiftRight = !isUnsigned && ((tree->OperIs(GT_LT) && op2->IsIntegralConst(0)) || + (tree->OperIs(GT_LE) && op2->IsIntegralConst(-1))); + bool useLoadImm = isUnsigned && ((tree->OperIs(GT_LT, GT_GE) && op2->IsIntegralConst(0)) || + (tree->OperIs(GT_LE, GT_GT) && op2->IsIntegralConst(-1))); + + if (!useAddSub && !useShiftRight && !useLoadImm) buildInternalIntRegisterDefForNode(tree); } } @@ -455,16 +480,44 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CMPXCHG: { GenTreeCmpXchg* cas = tree->AsCmpXchg(); - assert(!cas->Comparand()->isContained()); - srcCount = 3; assert(dstCount == 1); - buildInternalIntRegisterDefForNode(tree); // temp reg for store conditional error + srcCount = 1; // Extend lifetimes of argument regs because they may be reused during retries + assert(!cas->Addr()->isContained()); setDelayFree(BuildUse(cas->Addr())); - setDelayFree(BuildUse(cas->Data())); - setDelayFree(BuildUse(cas->Comparand())); + GenTree* data = cas->Data(); + if (!data->isContained()) + { + srcCount++; + setDelayFree(BuildUse(data)); + } + else + { + assert(data->IsIntegralConst(0)); + } + + GenTree* comparand = cas->Comparand(); + if (!comparand->isContained()) + { + srcCount++; + RefPosition* use = BuildUse(comparand); + if (comparand->TypeIs(TYP_INT, TYP_UINT)) + { + buildInternalIntRegisterDefForNode(tree); // temp reg for sign-extended comparand + } + else + { + setDelayFree(use); + } + } + else + { + assert(comparand->IsIntegralConst(0)); + } + + buildInternalIntRegisterDefForNode(tree); // temp reg for store conditional error // Internals may not collide with target setInternalRegsDelayFree = true; buildInternalRegisterUses(); @@ -484,11 +537,20 @@ int LinearScan::BuildNode(GenTree* tree) assert(dstCount == (tree->TypeIs(TYP_VOID) ? 0 : 1)); GenTree* addr = tree->gtGetOp1(); GenTree* data = tree->gtGetOp2(); - assert(!addr->isContained() && !data->isContained()); - srcCount = 2; + assert(!addr->isContained()); + srcCount = 1; BuildUse(addr); - BuildUse(data); + if (!data->isContained()) + { + srcCount++; + BuildUse(data); + } + else + { + assert(data->IsIntegralConst(0)); + } + if (dstCount == 1) { BuildDef(tree); @@ -496,11 +558,6 @@ int LinearScan::BuildNode(GenTree* tree) } break; - case GT_PUTARG_SPLIT: - srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); - dstCount = tree->AsPutArgSplit()->gtNumRegs; - break; - case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; @@ -717,6 +774,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); @@ -788,7 +850,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) { // struct typed indirs are expected only on rhs of a block copy, // but in this case they must be contained. - assert(indirTree->TypeGet() != TYP_STRUCT); + assert(!indirTree->TypeIs(TYP_STRUCT)); GenTree* addr = indirTree->Addr(); GenTree* index = nullptr; @@ -796,7 +858,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) if (addr->isContained()) { - if (addr->OperGet() == GT_LEA) + if (addr->OperIs(GT_LEA)) { GenTreeAddrMode* lea = addr->AsAddrMode(); index = lea->Index(); @@ -815,10 +877,14 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) buildInternalIntRegisterDefForNode(indirTree); } } + else if (addr->OperIs(GT_CNS_INT)) + { + buildInternalIntRegisterDefForNode(indirTree); + } } #ifdef FEATURE_SIMD - if (indirTree->TypeGet() == TYP_SIMD12) + if (indirTree->TypeIs(TYP_SIMD12)) { // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir(). assert(!addr->isContained()); @@ -856,7 +922,7 @@ int LinearScan::BuildCall(GenTreeCall* call) int srcCount = 0; int dstCount = 0; - if (call->TypeGet() != TYP_VOID) + if (!call->TypeIs(TYP_VOID)) { hasMultiRegRetVal = call->HasMultiRegRetVal(); if (hasMultiRegRetVal) @@ -886,7 +952,7 @@ int LinearScan::BuildCall(GenTreeCall* call) if (ctrlExpr != nullptr) { // we should never see a gtControlExpr whose type is void. - assert(ctrlExpr->TypeGet() != TYP_VOID); + assert(!ctrlExpr->TypeIs(TYP_VOID)); // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into a register. @@ -902,6 +968,12 @@ int LinearScan::BuildCall(GenTreeCall* call) } assert(ctrlExprCandidates != RBM_NONE); } + + // In case ctrlExpr is a contained constant, we need a register to store the value. + if (ctrlExpr->isContainedIntOrIImmed()) + { + buildInternalIntRegisterDefForNode(call); + } } else if (call->IsR2ROrVirtualStubRelativeIndir()) { @@ -939,7 +1011,7 @@ int LinearScan::BuildCall(GenTreeCall* call) srcCount += BuildCallArgUses(call); - if (ctrlExpr != nullptr) + if (ctrlExpr != nullptr && !ctrlExpr->isContainedIntOrIImmed()) { BuildUse(ctrlExpr, ctrlExprCandidates); srcCount++; @@ -948,6 +1020,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { @@ -989,7 +1066,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) { - assert(argNode->gtOper == GT_PUTARG_STK); + assert(argNode->OperIs(GT_PUTARG_STK)); GenTree* src = argNode->gtGetOp1(); @@ -1018,7 +1095,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) assert(src->isContained()); - if (src->OperGet() == GT_BLK) + if (src->OperIs(GT_BLK)) { srcCount = BuildOperandUses(src->AsBlk()->Addr()); } @@ -1038,103 +1115,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) return srcCount; } -//------------------------------------------------------------------------ -// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node -// -// Arguments: -// argNode - a GT_PUTARG_SPLIT node -// -// Return Value: -// The number of sources consumed by this node. -// -// Notes: -// Set the child node(s) to be contained -// -int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) -{ - int srcCount = 0; - assert(argNode->gtOper == GT_PUTARG_SPLIT); - - GenTree* src = argNode->gtGetOp1(); - - // Registers for split argument corresponds to source - int dstCount = argNode->gtNumRegs; - - regNumber argReg = argNode->GetRegNum(); - SingleTypeRegSet argMask = RBM_NONE; - for (unsigned i = 0; i < argNode->gtNumRegs; i++) - { - regNumber thisArgReg = (regNumber)((unsigned)argReg + i); - argMask |= genSingleTypeRegMask(thisArgReg); - argNode->SetRegNumByIdx(thisArgReg, i); - } - assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || - ((argMask & availableFloatRegs) != RBM_NONE)); - - if (src->OperGet() == GT_FIELD_LIST) - { - // Generated code: - // 1. Consume all of the items in the GT_FIELD_LIST (source) - // 2. Store to target slot and move to target registers (destination) from source - // - unsigned sourceRegCount = 0; - - // To avoid redundant moves, have the argument operand computed in the - // register in which the argument is passed to the call. - - for (GenTreeFieldList::Use& use : src->AsFieldList()->Uses()) - { - GenTree* node = use.GetNode(); - assert(!node->isContained()); - // The only multi-reg nodes we should see are OperIsMultiRegOp() - unsigned currentRegCount = 1; - assert(!node->IsMultiRegNode()); - - // Consume all the registers, setting the appropriate register mask for the ones that - // go into registers. - for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) - { - SingleTypeRegSet sourceMask = RBM_NONE; - if (sourceRegCount < argNode->gtNumRegs) - { - sourceMask = genSingleTypeRegMask((regNumber)((unsigned)argReg + sourceRegCount)); - } - sourceRegCount++; - BuildUse(node, sourceMask, regIndex); - } - } - srcCount += sourceRegCount; - assert(src->isContained()); - } - else - { - assert(src->TypeIs(TYP_STRUCT) && src->isContained()); - - if (src->OperIs(GT_BLK)) - { - // If the PUTARG_SPLIT clobbers only one register we may need an - // extra internal register in case there is a conflict between the - // source address register and target register. - if (argNode->gtNumRegs == 1) - { - // We can use a ldr/str sequence so we need an internal register - buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); - } - - // We will generate code that loads from the OBJ's address, which must be in a register. - srcCount = BuildOperandUses(src->AsBlk()->Addr()); - } - else - { - // We will generate all of the code for the GT_PUTARG_SPLIT and LCL_VAR/LCL_FLD as one contained operation. - assert(src->OperIsLocalRead()); - } - } - buildInternalRegisterUses(); - BuildDefs(argNode, dstCount, argMask); - return srcCount; -} - //------------------------------------------------------------------------ // BuildBlockStore: Build the RefPositions for a block store node. // diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 294f67e612fb..1e9763c6c5fe 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -210,13 +210,13 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETFILT: assert(dstCount == 0); - if (tree->TypeGet() == TYP_VOID) + if (tree->TypeIs(TYP_VOID)) { srcCount = 0; } else { - assert(tree->TypeGet() == TYP_INT); + assert(tree->TypeIs(TYP_INT)); srcCount = 1; BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } @@ -275,14 +275,14 @@ int LinearScan::BuildNode(GenTree* tree) case GT_JMPTABLE: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, BuildApxIncompatibleGPRMask(tree)); + BuildDef(tree); break; case GT_SWITCH_TABLE: { assert(dstCount == 0); - buildInternalIntRegisterDefForNode(tree, BuildApxIncompatibleGPRMask(tree)); - srcCount = BuildBinaryUses(tree->AsOp(), BuildApxIncompatibleGPRMask(tree->AsOp())); + buildInternalIntRegisterDefForNode(tree); + srcCount = BuildBinaryUses(tree->AsOp()); buildInternalRegisterUses(); assert(srcCount == 2); } @@ -307,9 +307,8 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETURNTRAP: { // This just turns into a compare of its child with an int + a conditional call. - RefPosition* internalDef = - buildInternalIntRegisterDefForNode(tree, BuildApxIncompatibleGPRMask(tree, RBM_NONE, true)); - srcCount = BuildOperandUses(tree->gtGetOp1()); + RefPosition* internalDef = buildInternalIntRegisterDefForNode(tree); + srcCount = BuildOperandUses(tree->gtGetOp1()); buildInternalRegisterUses(); killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); BuildKills(tree, killMask); @@ -441,16 +440,17 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CMP: case GT_TEST: case GT_BT: +#ifdef TARGET_AMD64 + case GT_CCMP: +#endif srcCount = BuildCmp(tree); break; case GT_CKFINITE: { assert(dstCount == 1); - // TODO-Xarch-apx: Revisit. This internally creates a float -> int which is a movd - RefPosition* internalDef = - buildInternalIntRegisterDefForNode(tree, BuildApxIncompatibleGPRMask(tree, RBM_NONE, true)); - srcCount = BuildOperandUses(tree->gtGetOp1()); + RefPosition* internalDef = buildInternalIntRegisterDefForNode(tree); + srcCount = BuildOperandUses(tree->gtGetOp1()); buildInternalRegisterUses(); BuildDef(tree); } @@ -536,11 +536,9 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; break; -#ifdef FEATURE_PUT_STRUCT_ARG_STK case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; -#endif // FEATURE_PUT_STRUCT_ARG_STK case GT_STORE_BLK: srcCount = BuildBlockStore(tree->AsBlk()); @@ -626,6 +624,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: srcCount = 0; @@ -643,8 +646,7 @@ int LinearScan::BuildNode(GenTree* tree) // length into a register to widen it to `native int` // - if the index is `int` (or smaller) then we need to widen // it to `long` to perform the address calculation - // TODO-Xarch-apx: Revisit. This internally created instructions that require extended EVEX. - internalDef = buildInternalIntRegisterDefForNode(tree, BuildApxIncompatibleGPRMask(tree, RBM_NONE, true)); + internalDef = buildInternalIntRegisterDefForNode(tree); #else // !TARGET_64BIT assert(!varTypeIsLong(tree->AsIndexAddr()->Index()->TypeGet())); switch (tree->AsIndexAddr()->gtElemSize) @@ -660,25 +662,7 @@ int LinearScan::BuildNode(GenTree* tree) break; } #endif // !TARGET_64BIT - // TODO-Xarch-apx: Might have to mask away eGPR if imul is likely to be created. - // see - // https://github.com/dotnet/runtime/blob/31733b9a35185785427bac69ef80a4eb56b727c2/src/coreclr/jit/codegenxarch.cpp#L1303 - SingleTypeRegSet ApxAwareMask = RBM_NONE; -#ifdef TARGET_64BIT - switch (tree->AsIndexAddr()->gtElemSize) - { - case 1: - case 2: - case 4: - case 8: - break; - - default: - ApxAwareMask = BuildApxIncompatibleGPRMask(tree, ApxAwareMask, true); - break; - } -#endif - srcCount = BuildBinaryUses(tree->AsOp(), ApxAwareMask); + srcCount = BuildBinaryUses(tree->AsOp()); if (internalDef != nullptr) { buildInternalRegisterUses(); @@ -1102,31 +1086,27 @@ int LinearScan::BuildShiftRotate(GenTree* tree) if ((genActualType(targetType) == TYP_LONG) && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) && (shiftByValue > 0) && (shiftByValue < 64)) { - srcCandidates = BuildApxIncompatibleGPRMask(source, srcCandidates, true); - dstCandidates = BuildApxIncompatibleGPRMask(tree, dstCandidates, true); + srcCandidates = ForceLowGprForApxIfNeeded(source, srcCandidates, getEvexIsSupported()); + dstCandidates = ForceLowGprForApxIfNeeded(tree, dstCandidates, getEvexIsSupported()); } + #endif } -#if defined(TARGET_64BIT) - else if (tree->OperIsShift() && !tree->isContained() && - compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2)) - { - // shlx (as opposed to mov+shl) instructions handles all register forms, but it does not handle contained form - // for memory operand. Likewise for sarx and shrx. - // ToDo-APX : Remove when extended EVEX support is available - srcCount += BuildOperandUses(source, BuildApxIncompatibleGPRMask(source, srcCandidates)); - srcCount += BuildOperandUses(shiftBy, BuildApxIncompatibleGPRMask(shiftBy, dstCandidates)); - BuildDef(tree, BuildApxIncompatibleGPRMask(tree, dstCandidates, true)); + else if (!tree->isContained() && (tree->OperIsShift() || source->isContained()) && + compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && !tree->gtSetFlags()) + { + // We don'thave any specific register requirements here, so skip the logic that + // reserves RCX or preferences the source reg. + srcCount += BuildOperandUses(source, ForceLowGprForApxIfNeeded(source, srcCandidates, getEvexIsSupported())); + srcCount += BuildOperandUses(shiftBy, ForceLowGprForApxIfNeeded(shiftBy, dstCandidates, getEvexIsSupported())); + BuildDef(tree, ForceLowGprForApxIfNeeded(tree, dstCandidates, getEvexIsSupported())); return srcCount; } -#endif else { // This ends up being BMI srcCandidates = availableIntRegs & ~SRBM_RCX; - srcCandidates = BuildApxIncompatibleGPRMask(source, srcCandidates, true); dstCandidates = availableIntRegs & ~SRBM_RCX; - dstCandidates = BuildApxIncompatibleGPRMask(tree, dstCandidates, true); } // Note that Rotate Left/Right instructions don't set ZF and SF flags. @@ -1143,9 +1123,9 @@ int LinearScan::BuildShiftRotate(GenTree* tree) #ifdef TARGET_X86 // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that // we can have a three operand form. - if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) + if (tree->OperIs(GT_LSH_HI) || tree->OperIs(GT_RSH_LO)) { - assert((source->OperGet() == GT_LONG) && source->isContained()); + assert(source->OperIs(GT_LONG) && source->isContained()); GenTree* sourceLo = source->gtGetOp1(); GenTree* sourceHi = source->gtGetOp2(); @@ -1155,7 +1135,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree) if (!tree->isContained()) { - if (tree->OperGet() == GT_LSH_HI) + if (tree->OperIs(GT_LSH_HI)) { setDelayFree(sourceLoUse); } @@ -1176,6 +1156,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { srcCount += BuildOperandUses(source, srcCandidates); } + if (!tree->isContained()) { if (!shiftBy->isContained()) @@ -1214,7 +1195,7 @@ int LinearScan::BuildCall(GenTreeCall* call) SingleTypeRegSet singleDstCandidates = RBM_NONE; assert(!call->isContained()); - if (call->TypeGet() != TYP_VOID) + if (!call->TypeIs(TYP_VOID)) { hasMultiRegRetVal = call->HasMultiRegRetVal(); if (hasMultiRegRetVal) @@ -1285,7 +1266,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { for (CallArg& arg : call->gtArgs.LateArgs()) { - for (const ABIPassingSegment& seg : arg.NewAbiInfo.Segments()) + for (const ABIPassingSegment& seg : arg.AbiInfo.Segments()) { if (seg.IsPassedInRegister() && genIsValidFloatReg(seg.GetRegister())) { @@ -1339,9 +1320,6 @@ int LinearScan::BuildCall(GenTreeCall* call) // by Amd64 ABI. ctrlExprCandidates = availableIntRegs & ~(RBM_ARG_REGS.GetIntRegSet()); } -#if defined(TARGET_AMD64) - ctrlExprCandidates = BuildApxIncompatibleGPRMask(ctrlExpr, ctrlExprCandidates, true); -#endif // TARGET_AMD64 srcCount += BuildOperandUses(ctrlExpr, ctrlExprCandidates); } @@ -1358,6 +1336,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { @@ -1432,7 +1415,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { case GenTreeBlk::BlkOpKindUnroll: { - bool willUseSimdMov = compiler->IsBaselineSimdIsaSupported() && (size >= XMM_REGSIZE_BYTES); + bool willUseSimdMov = (size >= XMM_REGSIZE_BYTES); if (willUseSimdMov && blkNode->IsOnHeapAndContainsReferences()) { ClassLayout* layout = blkNode->GetLayout(); @@ -1481,9 +1464,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) case GenTreeBlk::BlkOpKindLoop: // Needed for offsetReg - // TODO-XArch-apx: Revert. We vectorized these. Cannot use eGPR currently - buildInternalIntRegisterDefForNode(blkNode, - BuildApxIncompatibleGPRMask(blkNode, availableIntRegs, true)); + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); break; default: @@ -1532,7 +1513,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // or if are but the remainder is a power of 2 and less than the // size of a register - SingleTypeRegSet regMask = BuildApxIncompatibleGPRMask(blkNode, availableIntRegs, true); + SingleTypeRegSet regMask = availableIntRegs; #ifdef TARGET_X86 if ((size & 1) != 0) { @@ -1589,16 +1570,13 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) else if (isPow2(size)) { // Single GPR for 1,2,4,8 - buildInternalIntRegisterDefForNode(blkNode, - BuildApxIncompatibleGPRMask(blkNode, availableIntRegs, true)); + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); } else { // Any size from 3 to 15 can be handled via two GPRs - buildInternalIntRegisterDefForNode(blkNode, - BuildApxIncompatibleGPRMask(blkNode, availableIntRegs, true)); - buildInternalIntRegisterDefForNode(blkNode, - BuildApxIncompatibleGPRMask(blkNode, availableIntRegs, true)); + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); } } break; @@ -1632,11 +1610,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (!dstAddr->isContained()) { useCount++; - BuildUse(dstAddr, BuildApxIncompatibleGPRMask(dstAddr, dstAddrRegMask)); + BuildUse(dstAddr, ForceLowGprForApxIfNeeded(dstAddr, dstAddrRegMask, getEvexIsSupported())); } else if (dstAddr->OperIsAddrMode()) { - useCount += BuildAddrUses(dstAddr, BuildApxIncompatibleGPRMask(dstAddr)); + useCount += BuildAddrUses(dstAddr, ForceLowGprForApxIfNeeded(dstAddr, RBM_NONE, getEvexIsSupported())); } if (srcAddrOrFill != nullptr) @@ -1644,11 +1622,12 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (!srcAddrOrFill->isContained()) { useCount++; - BuildUse(srcAddrOrFill, BuildApxIncompatibleGPRMask(srcAddrOrFill, srcRegMask)); + BuildUse(srcAddrOrFill, ForceLowGprForApxIfNeeded(srcAddrOrFill, srcRegMask, getEvexIsSupported())); } else if (srcAddrOrFill->OperIsAddrMode()) { - useCount += BuildAddrUses(srcAddrOrFill, BuildApxIncompatibleGPRMask(srcAddrOrFill)); + useCount += + BuildAddrUses(srcAddrOrFill, ForceLowGprForApxIfNeeded(srcAddrOrFill, RBM_NONE, getEvexIsSupported())); } } @@ -1679,7 +1658,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) return useCount; } -#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK. // @@ -1692,7 +1670,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) { int srcCount = 0; - if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST) + if (putArgStk->gtOp1->OperIs(GT_FIELD_LIST)) { assert(putArgStk->gtOp1->isContained()); @@ -1853,7 +1831,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) return srcCount; } -#endif // FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // BuildLclHeap: Set the NodeInfo for a GT_LCLHEAP. @@ -1919,7 +1896,7 @@ int LinearScan::BuildModDiv(GenTree* tree) // Dividend in RAX:RDX and computes // Quotient in RAX, Remainder in RDX - if (tree->OperGet() == GT_MOD || tree->OperGet() == GT_UMOD) + if (tree->OperIs(GT_MOD) || tree->OperIs(GT_UMOD)) { // We are interested in just the remainder. // RAX is used as a trashable register during computation of remainder. @@ -1933,7 +1910,7 @@ int LinearScan::BuildModDiv(GenTree* tree) } #ifdef TARGET_X86 - if (op1->OperGet() == GT_LONG) + if (op1->OperIs(GT_LONG)) { assert(op1->isContained()); @@ -1943,7 +1920,7 @@ int LinearScan::BuildModDiv(GenTree* tree) assert(!loVal->isContained() && !hiVal->isContained()); assert(op2->IsCnsIntOrI()); - assert(tree->OperGet() == GT_UMOD); + assert(tree->OperIs(GT_UMOD)); // This situation also requires an internal register. buildInternalIntRegisterDefForNode(tree); @@ -2026,11 +2003,28 @@ int LinearScan::BuildIntrinsic(GenTree* tree) { SingleTypeRegSet op1RegCandidates = RBM_NONE; - // NI_System_Math_Abs is the only one likely to use a GPR - op1RegCandidates = BuildApxIncompatibleGPRMask(op1, op1RegCandidates); - if (op1RegCandidates == RBM_NONE) + switch (tree->AsIntrinsic()->gtIntrinsicName) { - op1RegCandidates = BuildEvexIncompatibleMask(op1); + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: + case NI_System_Math_Truncate: + case NI_System_Math_Round: + case NI_System_Math_Sqrt: + { + op1RegCandidates = ForceLowGprForApx(op1); + break; + } + case NI_System_Math_Abs: + { + op1RegCandidates = ForceLowGprForApxIfNeeded(op1, RBM_NONE, getEvexIsSupported()); + break; + } + default: + { + noway_assert(!"Unsupported math intrinsic"); + unreached(); + break; + } } srcCount = BuildOperandUses(op1, op1RegCandidates); @@ -2050,39 +2044,47 @@ int LinearScan::BuildIntrinsic(GenTree* tree) #ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ -// SkipContainedCreateScalarUnsafe: Skips a contained CreateScalarUnsafe node +// SkipContainedUnaryOp: Skips a contained non-memory or const node // and gets the underlying op1 instead // // Arguments: // node - The node to handle // // Return Value: -// If node is a contained CreateScalarUnsafe, it's op1 is returned; +// If node is a contained non-memory or const unary op, its op1 is returned; // otherwise node is returned unchanged. -static GenTree* SkipContainedCreateScalarUnsafe(GenTree* node) +static GenTree* SkipContainedUnaryOp(GenTree* node) { - if (!node->OperIsHWIntrinsic() || !node->isContained()) + if (!node->isContained()) { return node; } - GenTreeHWIntrinsic* hwintrinsic = node->AsHWIntrinsic(); - NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); - - switch (intrinsicId) + if (node->OperIsHWIntrinsic()) { - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: - { - return hwintrinsic->Op(1); - } + GenTreeHWIntrinsic* hwintrinsic = node->AsHWIntrinsic(); + NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); - default: + switch (intrinsicId) { - return node; + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: + case NI_Vector128_CreateScalarUnsafe: + case NI_Vector256_CreateScalarUnsafe: + case NI_Vector512_CreateScalarUnsafe: + { + return hwintrinsic->Op(1); + } + + default: + { + break; + } } } + + return node; } //------------------------------------------------------------------------ @@ -2139,8 +2141,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - // A contained CreateScalarUnsafe is special in that we're not containing it to load from - // memory and it isn't a constant. Instead, its essentially a "transparent" node we're ignoring + // In a few cases, we contain an operand that isn't a load from memory or a constant. Instead, + // it is essentially a "transparent" node we're ignoring or handling specially in codegen // to simplify the overall IR handling. As such, we need to "skip" such nodes when present and // get the underlying op1 so that delayFreeUse and other preferencing remains correct. @@ -2149,37 +2151,37 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou GenTree* op3 = nullptr; GenTree* op4 = nullptr; GenTree* op5 = nullptr; - GenTree* lastOp = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(numArgs)); + GenTree* lastOp = SkipContainedUnaryOp(intrinsicTree->Op(numArgs)); switch (numArgs) { case 5: { - op5 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(5)); + op5 = SkipContainedUnaryOp(intrinsicTree->Op(5)); FALLTHROUGH; } case 4: { - op4 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(4)); + op4 = SkipContainedUnaryOp(intrinsicTree->Op(4)); FALLTHROUGH; } case 3: { - op3 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(3)); + op3 = SkipContainedUnaryOp(intrinsicTree->Op(3)); FALLTHROUGH; } case 2: { - op2 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(2)); + op2 = SkipContainedUnaryOp(intrinsicTree->Op(2)); FALLTHROUGH; } case 1: { - op1 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(1)); + op1 = SkipContainedUnaryOp(intrinsicTree->Op(1)); break; } @@ -2191,6 +2193,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou bool buildUses = true; + // Determine whether this is an RMW operation where op2+ must be marked delayFree so that it + // is not allocated the same register as the target. + bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler); + + const bool isEvexCompatible = intrinsicTree->isEvexCompatibleHWIntrinsic(compiler); +#ifdef TARGET_AMD64 + const bool canHWIntrinsicUseApxRegs = isEvexCompatible && getEvexIsSupported(); +#else + // We can never use EGPRs on non-64-bit platforms. + const bool canHWIntrinsicUseApxRegs = false; +#endif // TARGET_AMD64 + if ((category == HW_Category_IMM) && !HWIntrinsicInfo::NoJmpTableImm(intrinsicId)) { if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && !lastOp->isContainedIntOrIImmed()) @@ -2200,39 +2214,31 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // We need two extra reg when lastOp isn't a constant so // the offset into the jump table for the fallback path // can be computed. - buildInternalIntRegisterDefForNode(intrinsicTree, - BuildApxIncompatibleGPRMask(intrinsicTree, availableIntRegs, true)); - buildInternalIntRegisterDefForNode(intrinsicTree, - BuildApxIncompatibleGPRMask(intrinsicTree, availableIntRegs, true)); + buildInternalIntRegisterDefForNode(intrinsicTree); + buildInternalIntRegisterDefForNode(intrinsicTree); } } if (intrinsicTree->OperIsEmbRoundingEnabled() && !lastOp->IsCnsIntOrI()) { - buildInternalIntRegisterDefForNode(intrinsicTree, - BuildApxIncompatibleGPRMask(intrinsicTree, availableIntRegs, true)); - buildInternalIntRegisterDefForNode(intrinsicTree, - BuildApxIncompatibleGPRMask(intrinsicTree, availableIntRegs, true)); + buildInternalIntRegisterDefForNode(intrinsicTree); + buildInternalIntRegisterDefForNode(intrinsicTree); } - // Determine whether this is an RMW operation where op2+ must be marked delayFree so that it - // is not allocated the same register as the target. - bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler); -#if defined(TARGET_AMD64) - bool isEvexCompatible = intrinsicTree->isEvexCompatibleHWIntrinsic(compiler); -#endif // TARGET_AMD64 - // Create internal temps, and handle any other special requirements. // Note that the default case for building uses will handle the RMW flag, but if the uses // are built in the individual cases, buildUses is set to false, and any RMW handling (delayFree) // must be handled within the case. switch (intrinsicId) { + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: + case NI_Vector512_CreateScalar: case NI_Vector128_CreateScalarUnsafe: - case NI_Vector128_ToScalar: case NI_Vector256_CreateScalarUnsafe: - case NI_Vector256_ToScalar: case NI_Vector512_CreateScalarUnsafe: + case NI_Vector128_ToScalar: + case NI_Vector256_ToScalar: case NI_Vector512_ToScalar: { assert(numArgs == 1); @@ -2241,22 +2247,44 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { if (op1->isContained()) { - SingleTypeRegSet apxAwareRegCandidates = BuildApxIncompatibleGPRMask(op1); + SingleTypeRegSet apxAwareRegCandidates = + ForceLowGprForApxIfNeeded(op1, RBM_NONE, canHWIntrinsicUseApxRegs); srcCount += BuildOperandUses(op1, apxAwareRegCandidates); } else { - // We will either be in memory and need to be moved - // into a register of the appropriate size or we - // are already in an XMM/YMM/ZMM register and can stay - // where we are. + // CreateScalarUnsafe and ToScalar are essentially no-ops for floating point types and can reuse + // the op1 register. CreateScalar needs to clear the upper elements, so if we have a float and + // can't use insertps to zero the upper elements in-place, we'll need a different target reg. - tgtPrefUse = BuildUse(op1); + RefPosition* op1Use = BuildUse(op1); srcCount += 1; + + if ((baseType == TYP_FLOAT) && HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) && + !compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + setDelayFree(op1Use); + } + else + { + tgtPrefUse = op1Use; + } } buildUses = false; } +#if TARGET_X86 + else if (varTypeIsByte(baseType) && HWIntrinsicInfo::IsVectorToScalar(intrinsicId)) + { + dstCandidates = allByteRegs(); + } + else if (varTypeIsLong(baseType) && !compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + // For SSE2 fallbacks, we will need a temp register to insert the upper half of a long + buildInternalFloatRegisterDefForNode(intrinsicTree); + setInternalRegsDelayFree = true; + } +#endif // TARGET_X86 break; } @@ -2277,6 +2305,33 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } + case NI_Vector128_WithElement: + case NI_Vector256_WithElement: + case NI_Vector512_WithElement: + { + assert(numArgs == 3); + + assert(!op1->isContained()); + assert(!op2->OperIsConst()); + + // If the index is not a constant + // we will use the SIMD temp location to store the vector. + + var_types requiredSimdTempType = intrinsicTree->TypeGet(); + compiler->getSIMDInitTempVarNum(requiredSimdTempType); + + // We then customize the uses as we will effectively be spilling + // op1, storing op3 to that spill location based on op2. Then + // reloading the updated value to the destination + + srcCount += BuildOperandUses(op1); + srcCount += BuildOperandUses(op2); + srcCount += BuildOperandUses(op3, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); + + buildUses = false; + break; + } + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_AsVector2: case NI_Vector128_AsVector3: @@ -2290,10 +2345,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_Vector512_GetLower128: { assert(numArgs == 1); - + SingleTypeRegSet apxAwareRegCandidates = + ForceLowGprForApxIfNeeded(op1, RBM_NONE, canHWIntrinsicUseApxRegs); if (op1->isContained()) { - SingleTypeRegSet apxAwareRegCandidates = BuildApxIncompatibleGPRMask(op1); srcCount += BuildOperandUses(op1, apxAwareRegCandidates); } else @@ -2303,7 +2358,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // are already in an XMM/YMM register and can stay // where we are. - tgtPrefUse = BuildUse(op1, BuildApxIncompatibleGPRMask(op1)); + tgtPrefUse = BuildUse(op1, apxAwareRegCandidates); srcCount += 1; } @@ -2311,7 +2366,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_SSE2_MaskMove: + case NI_X86Base_MaskMove: { assert(numArgs == 3); assert(!isRMW); @@ -2338,7 +2393,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount += 1; - SingleTypeRegSet op2RegCandidates = BuildApxIncompatibleGPRMask(op2); + SingleTypeRegSet op2RegCandidates = ForceLowGprForApx(op2); if (op2RegCandidates == RBM_NONE) { op2RegCandidates = BuildEvexIncompatibleMask(op2); @@ -2413,7 +2468,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - SingleTypeRegSet apxAwareRegCandidates = BuildApxIncompatibleGPRMask(op3); + SingleTypeRegSet apxAwareRegCandidates = + ForceLowGprForApxIfNeeded(op3, RBM_NONE, canHWIntrinsicUseApxRegs); srcCount += BuildOperandUses(op3, apxAwareRegCandidates); } @@ -2430,15 +2486,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { assert(numArgs == 2 || numArgs == 3); srcCount += BuildOperandUses(op1, SRBM_EDX); - srcCount += BuildOperandUses(op2, BuildApxIncompatibleGPRMask(op2)); + SingleTypeRegSet apxAwareRegCandidates = + ForceLowGprForApxIfNeeded(op2, RBM_NONE, canHWIntrinsicUseApxRegs); + srcCount += BuildOperandUses(op2, apxAwareRegCandidates); if (numArgs == 3) { // op3 reg should be different from target reg to // store the lower half result after executing the instruction srcCount += BuildDelayFreeUses(op3, op1); // Need a internal register different from the dst to take the lower half result - buildInternalIntRegisterDefForNode(intrinsicTree, - BuildApxIncompatibleGPRMask(intrinsicTree, availableIntRegs)); + buildInternalIntRegisterDefForNode(intrinsicTree); setInternalRegsDelayFree = true; } buildUses = false; @@ -2455,20 +2512,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_FMA_MultiplySubtractNegated: case NI_FMA_MultiplySubtractNegatedScalar: case NI_FMA_MultiplySubtractScalar: - case NI_AVX512F_FusedMultiplyAdd: - case NI_AVX512F_FusedMultiplyAddScalar: - case NI_AVX512F_FusedMultiplyAddNegated: - case NI_AVX512F_FusedMultiplyAddNegatedScalar: - case NI_AVX512F_FusedMultiplyAddSubtract: - case NI_AVX512F_FusedMultiplySubtract: - case NI_AVX512F_FusedMultiplySubtractScalar: - case NI_AVX512F_FusedMultiplySubtractAdd: - case NI_AVX512F_FusedMultiplySubtractNegated: - case NI_AVX512F_FusedMultiplySubtractNegatedScalar: - case NI_AVX10v1_FusedMultiplyAddNegatedScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: - case NI_AVX10v1_FusedMultiplySubtractNegatedScalar: - case NI_AVX10v1_FusedMultiplySubtractScalar: + case NI_AVX512_FusedMultiplyAdd: + case NI_AVX512_FusedMultiplyAddScalar: + case NI_AVX512_FusedMultiplyAddNegated: + case NI_AVX512_FusedMultiplyAddNegatedScalar: + case NI_AVX512_FusedMultiplyAddSubtract: + case NI_AVX512_FusedMultiplySubtract: + case NI_AVX512_FusedMultiplySubtractScalar: + case NI_AVX512_FusedMultiplySubtractAdd: + case NI_AVX512_FusedMultiplySubtractNegated: + case NI_AVX512_FusedMultiplySubtractNegatedScalar: { assert((numArgs == 3) || (intrinsicTree->OperIsEmbRoundingEnabled())); assert(isRMW); @@ -2577,7 +2630,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (op == emitOp3) { - SingleTypeRegSet apxAwareRegCandidates = BuildApxIncompatibleGPRMask(op); + SingleTypeRegSet apxAwareRegCandidates = + ForceLowGprForApxIfNeeded(op, RBM_NONE, canHWIntrinsicUseApxRegs); srcCount += op->isContained() ? BuildOperandUses(op, apxAwareRegCandidates) : BuildDelayFreeUses(op, emitOp1); } @@ -2592,7 +2646,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_EVEX_BlendVariableMask: + case NI_AVX512_BlendVariableMask: { assert(numArgs == 3); @@ -2612,7 +2666,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou for (GenTree* operand : op2->AsHWIntrinsic()->Operands()) { - assert(varTypeIsSIMD(operand) || varTypeIsInt(operand)); srcCount += BuildDelayFreeUses(operand, op1); } } @@ -2625,7 +2678,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou for (GenTree* operand : op2->AsHWIntrinsic()->Operands()) { - assert(varTypeIsSIMD(operand) || varTypeIsInt(operand)); srcCount += BuildOperandUses(operand); } } @@ -2638,27 +2690,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_AVX512F_PermuteVar8x64x2: - case NI_AVX512F_PermuteVar16x32x2: - case NI_AVX512F_VL_PermuteVar2x64x2: - case NI_AVX512F_VL_PermuteVar4x32x2: - case NI_AVX512F_VL_PermuteVar4x64x2: - case NI_AVX512F_VL_PermuteVar8x32x2: - case NI_AVX512BW_PermuteVar32x16x2: - case NI_AVX512BW_VL_PermuteVar8x16x2: - case NI_AVX512BW_VL_PermuteVar16x16x2: + case NI_AVX512_PermuteVar2x64x2: + case NI_AVX512_PermuteVar4x32x2: + case NI_AVX512_PermuteVar4x64x2: + case NI_AVX512_PermuteVar8x32x2: + case NI_AVX512_PermuteVar8x64x2: + case NI_AVX512_PermuteVar8x16x2: + case NI_AVX512_PermuteVar16x16x2: + case NI_AVX512_PermuteVar16x32x2: + case NI_AVX512_PermuteVar32x16x2: + case NI_AVX512VBMI_PermuteVar16x8x2: + case NI_AVX512VBMI_PermuteVar32x8x2: case NI_AVX512VBMI_PermuteVar64x8x2: - case NI_AVX512VBMI_VL_PermuteVar16x8x2: - case NI_AVX512VBMI_VL_PermuteVar32x8x2: - case NI_AVX10v1_PermuteVar16x8x2: - case NI_AVX10v1_PermuteVar2x64x2: - case NI_AVX10v1_PermuteVar4x32x2: - case NI_AVX10v1_PermuteVar8x16x2: - case NI_AVX10v1_PermuteVar32x8x2: - case NI_AVX10v1_PermuteVar4x64x2: - case NI_AVX10v1_PermuteVar8x32x2: - case NI_AVX10v1_PermuteVar16x16x2: - case NI_AVX10v1_V512_PermuteVar64x8x2: { assert(numArgs == 3); assert(isRMW); @@ -2695,11 +2738,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (op == emitOp2) { - srcCount += BuildDelayFreeUses(op, emitOp1, BuildApxIncompatibleGPRMask(op)); + srcCount += BuildDelayFreeUses(op, emitOp1, ForceLowGprForApx(op)); } else if (op == emitOp3) { - srcCount += op->isContained() ? BuildOperandUses(op, BuildApxIncompatibleGPRMask(op)) + srcCount += op->isContained() ? BuildOperandUses(op, ForceLowGprForApx(op)) : BuildDelayFreeUses(op, emitOp1); } } @@ -2716,8 +2759,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou tgtPrefUse = BuildUse(op1); srcCount += 1; srcCount += BuildDelayFreeUses(op2, op1); - srcCount += op3->isContained() ? BuildOperandUses(op3, BuildApxIncompatibleGPRMask(op3)) - : BuildDelayFreeUses(op3, op1); + srcCount += + op3->isContained() ? BuildOperandUses(op3, ForceLowGprForApx(op3)) : BuildDelayFreeUses(op3, op1); buildUses = false; break; @@ -2730,14 +2773,14 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(!isRMW); // Any pair of the index, mask, or destination registers should be different - SingleTypeRegSet op1RegCandidates = BuildApxIncompatibleGPRMask(op1); + SingleTypeRegSet op1RegCandidates = ForceLowGprForApx(op1); if (op1RegCandidates == RBM_NONE) { op1RegCandidates = BuildEvexIncompatibleMask(op1); } srcCount += BuildOperandUses(op1, op1RegCandidates); - SingleTypeRegSet op2RegCandidates = BuildApxIncompatibleGPRMask(op2); + SingleTypeRegSet op2RegCandidates = ForceLowGprForApx(op2); if (op2RegCandidates == RBM_NONE) { op2RegCandidates = BuildEvexIncompatibleMask(op2); @@ -2760,13 +2803,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { assert(!isRMW); // Any pair of the index, mask, or destination registers should be different - SingleTypeRegSet op1RegCandidates = BuildApxIncompatibleGPRMask(op1); + SingleTypeRegSet op1RegCandidates = ForceLowGprForApx(op1); if (op1RegCandidates == RBM_NONE) { op1RegCandidates = BuildEvexIncompatibleMask(op2); } srcCount += BuildOperandUses(op1, op1RegCandidates); - SingleTypeRegSet op2RegCandidates = BuildApxIncompatibleGPRMask(op2); + SingleTypeRegSet op2RegCandidates = ForceLowGprForApx(op2); if (op2RegCandidates == RBM_NONE) { op2RegCandidates = BuildEvexIncompatibleMask(op2); @@ -2786,6 +2829,23 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } + case NI_Vector128_op_Division: + case NI_Vector256_op_Division: + { + srcCount = BuildOperandUses(op1, lowSIMDRegs()); + srcCount += BuildOperandUses(op2, lowSIMDRegs()); + + // get a tmp register for div-by-zero check + buildInternalFloatRegisterDefForNode(intrinsicTree, lowSIMDRegs()); + + // get a tmp register for overflow check + buildInternalFloatRegisterDefForNode(intrinsicTree, lowSIMDRegs()); + setInternalRegsDelayFree = true; + + buildUses = false; + break; + } + default: { assert((intrinsicId > NI_HW_INTRINSIC_START) && (intrinsicId < NI_HW_INTRINSIC_END)); @@ -2800,12 +2860,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou SingleTypeRegSet op1RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) - op1RegCandidates = BuildApxIncompatibleGPRMask(op1, op1RegCandidates); - if (!isEvexCompatible && (op1RegCandidates == RBM_NONE)) + if (!isEvexCompatible) { op1RegCandidates = BuildEvexIncompatibleMask(op1); } - + op1RegCandidates = ForceLowGprForApxIfNeeded(op1, op1RegCandidates, canHWIntrinsicUseApxRegs); #endif // TARGET_AMD64 if (intrinsicTree->OperIsMemoryLoadOrStore()) @@ -2827,12 +2886,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou SingleTypeRegSet op2RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) - op2RegCandidates = BuildApxIncompatibleGPRMask(op2, op2RegCandidates); - if (!isEvexCompatible && (op2RegCandidates == RBM_NONE)) + if (!isEvexCompatible) { op2RegCandidates = BuildEvexIncompatibleMask(op2); } + if (!isEvexCompatible || !getEvexIsSupported()) + { + op2RegCandidates = ForceLowGprForApx(op2, op2RegCandidates); + } #endif // TARGET_AMD64 if (op2->OperIs(GT_HWINTRINSIC) && op2->AsHWIntrinsic()->OperIsMemoryLoad() && op2->isContained()) @@ -2874,15 +2936,28 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou SingleTypeRegSet op3RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) - op3RegCandidates = BuildApxIncompatibleGPRMask(op3, op3RegCandidates); - if (!isEvexCompatible && (op3RegCandidates == RBM_NONE)) + if (!isEvexCompatible) { op3RegCandidates = BuildEvexIncompatibleMask(op3); } + if (!isEvexCompatible || !getEvexIsSupported()) + { + op3RegCandidates = ForceLowGprForApx(op3, op3RegCandidates); + } #endif // TARGET_AMD64 - srcCount += isRMW ? BuildDelayFreeUses(op3, op1, op3RegCandidates) - : BuildOperandUses(op3, op3RegCandidates); + if (op3->OperIs(GT_HWINTRINSIC) && op3->AsHWIntrinsic()->OperIsMemoryLoad() && op3->isContained()) + { + srcCount += BuildAddrUses(op3->AsHWIntrinsic()->Op(1), op3RegCandidates); + } + else if (isRMW && !op3->isContained()) + { + srcCount += BuildDelayFreeUses(op3, op1, op3RegCandidates); + } + else + { + srcCount += BuildOperandUses(op3, op3RegCandidates); + } if (op4 != nullptr) { @@ -2890,7 +2965,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou #if defined(TARGET_AMD64) assert(isEvexCompatible); - op4RegCandidates = BuildApxIncompatibleGPRMask(op4, op4RegCandidates); #endif // TARGET_AMD64 srcCount += isRMW ? BuildDelayFreeUses(op4, op1, op4RegCandidates) @@ -2906,13 +2980,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (dstCount == 1) { #if defined(TARGET_AMD64) + // TODO-xarch-apx: there might be some problem if we allow EGPR as the dst of some instructions. bool isEvexCompatible = intrinsicTree->isEvexCompatibleHWIntrinsic(compiler); - dstCandidates = BuildApxIncompatibleGPRMask(intrinsicTree, dstCandidates); - if (!isEvexCompatible && (dstCandidates == RBM_NONE)) + if (!isEvexCompatible) { dstCandidates = BuildEvexIncompatibleMask(intrinsicTree); } + + // TODO-xarch-apx: revisit this part to check if we can merge this 2 checks. + if (!isEvexCompatible || !getEvexIsSupported()) + { + dstCandidates = ForceLowGprForApx(intrinsicTree, dstCandidates); + } #endif BuildDef(intrinsicTree, dstCandidates); @@ -2946,12 +3026,14 @@ int LinearScan::BuildCast(GenTreeCast* cast) const var_types srcType = src->TypeGet(); const var_types castType = cast->gtCastType; - if (cast->IsUnsigned() && varTypeIsLong(srcType) && varTypeIsFloating(castType) && !compiler->canUseEvexEncoding()) + if (cast->IsUnsigned() && varTypeIsLong(srcType) && varTypeIsFloating(castType) && !getEvexIsSupported()) { // We need two extra temp regs for LONG->DOUBLE cast // if we don't have EVEX unsigned conversions available. - buildInternalIntRegisterDefForNode(cast, BuildApxIncompatibleGPRMask(cast, availableIntRegs, true)); - buildInternalIntRegisterDefForNode(cast, BuildApxIncompatibleGPRMask(cast, availableIntRegs, true)); + // We need to reserve one APXIncompatible register for + // cvtt* instruction. Second temp can use EGPR. + buildInternalIntRegisterDefForNode(cast, ForceLowGprForApx(cast, availableIntRegs, true)); + buildInternalIntRegisterDefForNode(cast); } SingleTypeRegSet candidates = RBM_NONE; @@ -2970,43 +3052,19 @@ int LinearScan::BuildCast(GenTreeCast* cast) { // Here we don't need internal register to be different from targetReg, // rather require it to be different from operand's reg. - // movsxd - candidates = BuildApxIncompatibleGPRMask(cast, candidates, true); - buildInternalIntRegisterDefForNode(cast, candidates); + buildInternalIntRegisterDefForNode(cast); } - // ToDo-APX: movsxd doesn't have REX2 support in .NET - const unsigned srcSize = genTypeSize(srcType); - const unsigned castSize = genTypeSize(castType); - if (varTypeUsesIntReg(srcType) && !varTypeIsUnsigned(srcType) && !varTypeIsUnsigned(castType)) - { - if ((castSize > 4) && (castSize < srcSize)) - { - // case 1 : movsdx : CHECK_INT_RANGE - candidates = BuildApxIncompatibleGPRMask(cast, candidates, true); - } - - if (castSize > srcSize) - { - // case 2 : movsdx : SIGN_EXTEND_INT or LOAD_SIGN_EXTEND_INT - candidates = BuildApxIncompatibleGPRMask(cast, candidates, true); - } - } // skipping eGPR use for cvt* - if ((varTypeUsesIntReg(src) || src->isContainedIndir()) && varTypeUsesFloatReg(cast)) + if ((varTypeUsesIntReg(src) || src->isContainedIndir()) && varTypeUsesFloatReg(cast) && !getEvexIsSupported()) { - candidates = BuildApxIncompatibleGPRMask(cast, candidates, true); + candidates = ForceLowGprForApx(cast, candidates, true); } #endif int srcCount = BuildCastUses(cast, candidates); buildInternalRegisterUses(); #ifdef TARGET_AMD64 candidates = RBM_NONE; - if ((varTypeIsFloating(srcType) && !varTypeIsFloating(castType)) || - (varTypeUsesIntReg(castType) && cast->GetRegNum() == REG_NA)) - { - candidates = BuildApxIncompatibleGPRMask(cast, candidates, true); - } #endif BuildDef(cast, candidates); @@ -3026,7 +3084,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) { // struct typed indirs are expected only on rhs of a block copy, // but in this case they must be contained. - assert(indirTree->TypeGet() != TYP_STRUCT); + assert(!indirTree->TypeIs(TYP_STRUCT)); SingleTypeRegSet useCandidates = RBM_NONE; #ifdef FEATURE_SIMD @@ -3038,12 +3096,14 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } #endif // FEATURE_SIMD +#ifdef TARGET_AMD64 if (varTypeUsesIntReg(indirTree->Addr())) { - useCandidates = BuildApxIncompatibleGPRMask(indirTree->Addr(), useCandidates, true); + useCandidates = ForceLowGprForApxIfNeeded(indirTree->Addr(), useCandidates, getEvexIsSupported()); } +#endif // TARGET_AMD64 int srcCount = BuildIndirUses(indirTree, useCandidates); - if (indirTree->gtOper == GT_STOREIND) + if (indirTree->OperIs(GT_STOREIND)) { GenTree* source = indirTree->gtGetOp2(); @@ -3098,7 +3158,6 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) CheckAndMoveRMWLastUse(index, dstIndex); } #endif // TARGET_X86 - srcCandidates = BuildApxIncompatibleGPRMask(source->AsOp(), srcCandidates, true); srcCount += BuildBinaryUses(source->AsOp(), srcCandidates); } } @@ -3113,16 +3172,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) else #endif { - GenTree* data = indirTree->Data(); - if (data->isContained() && (data->OperIs(GT_BSWAP, GT_BSWAP16)) && (int)varTypeUsesIntReg(source)) - { - /// movbe cannot use eGPR - srcCount += BuildOperandUses(source, BuildApxIncompatibleGPRMask(source, RBM_NONE, true)); - } - else - { - srcCount += BuildOperandUses(source); - } + srcCount += BuildOperandUses(source); } } } @@ -3145,7 +3195,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) assert(srcCount <= BYTE_REG_COUNT); #endif - if (indirTree->gtOper != GT_STOREIND) + if (!indirTree->OperIs(GT_STOREIND)) { BuildDef(indirTree); } @@ -3174,8 +3224,8 @@ int LinearScan::BuildMul(GenTree* tree) } // ToDo-APX : imul currently doesn't have rex2 support. So, cannot use R16-R31. - int srcCount = BuildBinaryUses(tree->AsOp(), BuildApxIncompatibleGPRMask(tree->AsOp(), RBM_NONE, true)); - int dstCount = 1; + int srcCount = BuildBinaryUses(tree->AsOp()); + int dstCount = 1; SingleTypeRegSet dstCandidates = RBM_NONE; bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0); @@ -3188,7 +3238,7 @@ int LinearScan::BuildMul(GenTree* tree) // This special widening 32x32->64 MUL is not used on x64 #if defined(TARGET_X86) - if (tree->OperGet() != GT_MUL_LONG) + if (!tree->OperIs(GT_MUL_LONG)) #endif { assert((tree->gtFlags & GTF_MUL_64RSLT) == 0); @@ -3206,25 +3256,20 @@ int LinearScan::BuildMul(GenTree* tree) // dstCandidates = SRBM_RAX; } - else if (tree->OperGet() == GT_MULHI) + else if (tree->OperIs(GT_MULHI)) { // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the // upper 32 bits of the result set the destination candidate to REG_RDX. dstCandidates = SRBM_RDX; } #if defined(TARGET_X86) - else if (tree->OperGet() == GT_MUL_LONG) + else if (tree->OperIs(GT_MUL_LONG)) { // have to use the encoding:RDX:RAX = RAX * rm dstCandidates = SRBM_RAX | SRBM_RDX; dstCount = 2; } #endif - else - { - // ToDo-APX : imul currently doesn't have rex2 support. So, cannot use R16-R31. - dstCandidates = BuildApxIncompatibleGPRMask(tree, dstCandidates, true); - } GenTree* containedMemOp = nullptr; if (op1->isContained() && !op1->IsCnsIntOrI()) { @@ -3340,7 +3385,7 @@ inline bool LinearScan::DoesThisUseGPR(GenTree* op) } //------------------------------------------------------------------------------ -// BuildApxIncompatibleGPRMask: Returns candidates or a mask representing the +// ForceLowGprForApx: Returns candidates or a mask representing the // lower GPR registers for a node that lowers to an instruction that does not // have APX support(via REX2 or eEVEX) currently (thus cannot use the eGPR registers). // The caller invokes this function when it knows the node is APX incompatible. @@ -3353,13 +3398,11 @@ inline bool LinearScan::DoesThisUseGPR(GenTree* op) // // Return Value: // updated register mask. -inline SingleTypeRegSet LinearScan::BuildApxIncompatibleGPRMask(GenTree* tree, - SingleTypeRegSet candidates, - bool forceLowGpr) +inline SingleTypeRegSet LinearScan::ForceLowGprForApx(GenTree* tree, SingleTypeRegSet candidates, bool forceLowGpr) { #if defined(TARGET_AMD64) - if (!getIsApxSupported()) + if (!getApxIsSupported()) { return candidates; } @@ -3382,4 +3425,32 @@ inline SingleTypeRegSet LinearScan::BuildApxIncompatibleGPRMask(GenTree* #endif } +//------------------------------------------------------------------------------ +// ForceLowGprForApxIfNeeded: Returns candidates or a mask representing the +// lower GPR registers for a node that lowers to an instruction that does not +// have EGPR supports via EVEX. +// +// +// Arguments: +// tree - tree to check for APX compatibility +// candidates - currently computed mask for the node +// UseApxRegs - if this is true, take out eGPR without checking any other conditions. +// +// Return Value: +// updated register mask. +inline SingleTypeRegSet LinearScan::ForceLowGprForApxIfNeeded(GenTree* tree, + SingleTypeRegSet candidates, + bool useApxRegs) +{ + // All the HWIntrinsics cannot access EGPRs when EVEX is disabled. + if (!useApxRegs) + { + return ForceLowGprForApx(tree, candidates); + } + else + { + return candidates; + } +} + #endif // TARGET_XARCH diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 265aebdccc67..1c25e892c30d 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -62,7 +62,7 @@ PhaseStatus Compiler::fgMorphInit() { for (unsigned i = 0; i < info.compArgsCount; i++) { - if (lvaGetDesc(i)->TypeGet() == TYP_REF) + if (lvaGetDesc(i)->TypeIs(TYP_REF)) { // confirm that the argument is a GC pointer (for debugging (GC stress)) GenTree* op = gtNewLclvNode(i, TYP_REF); @@ -130,7 +130,7 @@ GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* ope // assert that oper is unchanged and that it is still a GT_CAST node noway_assert(tree->AsCast()->CastOp() == oper); - noway_assert(tree->gtOper == GT_CAST); + noway_assert(tree->OperIs(GT_CAST)); } result = fgMorphIntoHelperCall(tree, helper, true /* morphArgs */, oper); assert(result == tree); @@ -297,26 +297,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // Arm64: src = float, dst is overflow conversion. +#ifdef TARGET_64BIT + // 64-bit: src = float, dst is overflow conversion. // This goes through helper and hence src needs to be converted to double. && tree->gtOverflow() -#elif defined(TARGET_AMD64) - // Amd64: src = float, dst = uint64 or overflow conversion. - // src needs to be converted to double except for the following cases - // dstType = int/uint/ulong for AVX512F - // dstType = int for SSE41 - // For pre-SSE41, the all src is converted to TYP_DOUBLE - // and goes through helpers. - && (tree->gtOverflow() || (dstType == TYP_LONG) || - !(canUseEvexEncoding() || (dstType == TYP_INT && compOpportunisticallyDependsOn(InstructionSet_SSE41)))) -#elif defined(TARGET_ARM) - // Arm: src = float, dst = int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsLong(dstType)) #else - // x86: src = float, dst = uint32/int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsIntegral(dstType)) -#endif + // 32-bit: src = float, dst = int64/uint64 or overflow conversion. + && (tree->gtOverflow() || varTypeIsLong(dstType)) +#endif // TARGET_64BIT ) { oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); @@ -337,50 +325,24 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -// ARM64 and LoongArch64 optimize all non-overflow checking conversions -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef TARGET_64BIT return nullptr; #else -#if defined(TARGET_AMD64) - // Following nodes are handled when lowering the nodes - // float -> ulong/uint/int for AVX512F - // double -> ulong/uint/long/int for AVX512F - // float -> int for SSE41 - // double -> int/uint/long for SSE41 - // For all other conversions, we use helper functions. - if (canUseEvexEncoding() || - ((dstType != TYP_ULONG) && compOpportunisticallyDependsOn(InstructionSet_SSE41))) - { - if (tree->CastOp() != oper) - { - tree->CastOp() = oper; - } + if (!varTypeIsLong(dstType)) + { return nullptr; } -#endif // TARGET_AMD64 + switch (dstType) { - case TYP_INT: -#ifdef TARGET_XARCH - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); -#endif // TARGET_XARCH - return nullptr; - - case TYP_UINT: -#if defined(TARGET_ARM) - return nullptr; -#endif - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); - case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); - case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); } -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 +#endif // TARGET_64BIT } else { @@ -417,11 +379,16 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) // Because there is no IL instruction conv.r4.un, uint/ulong -> float // casts are always imported as CAST(float <- CAST(double <- uint/ulong)). // We can usually eliminate the redundant intermediate cast as an optimization. + // // AArch and xarch+EVEX have instructions that can cast directly from - // all integers (except for longs on 32-bit of course) to floats. + // all integers (except for longs on ARM32) to floats. // On x64, we also have the option of widening uint -> long and // using the signed conversion instructions, and ulong -> float/double // is handled directly in codegen, so we can allow all casts. + // + // This logic will also catch CAST(float <- CAST(double <- float)) + // and reduce it to CAST(float <- float), which is handled in codegen as + // an optional mov. else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && oper->OperIs(GT_CAST) #ifndef TARGET_64BIT && !varTypeIsLong(oper->AsCast()->CastOp()) @@ -442,22 +409,16 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) // converts long/ulong --> float/double casts into helper calls. else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType)) { + CorInfoHelpFunc helper = CORINFO_HELP_UNDEF; if (dstType == TYP_FLOAT) { - // there is only a double helper, so we - // - change the dsttype to double - // - insert a cast from double to float - // - recurse into the resulting tree - tree->CastToType() = TYP_DOUBLE; - tree->gtType = TYP_DOUBLE; - - tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); - - return fgMorphTree(tree); + helper = tree->IsUnsigned() ? CORINFO_HELP_ULNG2FLT : CORINFO_HELP_LNG2FLT; + } + else + { + helper = tree->IsUnsigned() ? CORINFO_HELP_ULNG2DBL : CORINFO_HELP_LNG2DBL; } - if (tree->gtFlags & GTF_UNSIGNED) - return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); - return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); + return fgMorphCastIntoHelper(tree, helper, oper); } #endif // TARGET_ARM @@ -481,6 +442,15 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) #endif // TARGET_AMD64 #ifdef TARGET_X86 +#ifdef FEATURE_HW_INTRINSICS + else if (varTypeIsLong(srcType) && varTypeIsFloating(dstType) && canUseEvexEncoding()) + { + // We can handle these casts directly using SIMD instructions. + // The transform to SIMD is done in DecomposeLongs. + return nullptr; + } +#endif // FEATURE_HW_INTRINSICS + // Do we have to do two step U4/8 -> R4/8 ? else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) { @@ -488,41 +458,23 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (srcType == TYP_ULONG) { - return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); + CorInfoHelpFunc helper = (dstType == TYP_FLOAT) ? CORINFO_HELP_ULNG2FLT : CORINFO_HELP_ULNG2DBL; + return fgMorphCastIntoHelper(tree, helper, oper); } else if (srcType == TYP_UINT && !canUseEvexEncoding()) { oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); - tree->gtFlags &= ~GTF_UNSIGNED; - return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); + tree->ClearUnsigned(); + + CorInfoHelpFunc helper = (dstType == TYP_FLOAT) ? CORINFO_HELP_LNG2FLT : CORINFO_HELP_LNG2DBL; + return fgMorphCastIntoHelper(tree, helper, oper); } } else if (!tree->IsUnsigned() && (srcType == TYP_LONG) && varTypeIsFloating(dstType)) { - oper = fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); - - // Since we don't have a Jit Helper that converts to a TYP_FLOAT - // we just use the one that converts to a TYP_DOUBLE - // and then add a cast to TYP_FLOAT - // - if ((dstType == TYP_FLOAT) && oper->OperIs(GT_CALL)) - { - // Fix the return type to be TYP_DOUBLE - // - oper->gtType = TYP_DOUBLE; - oper->SetMorphed(this); - - // Add a Cast to TYP_FLOAT - // - tree = gtNewCastNode(TYP_FLOAT, oper, false, TYP_FLOAT); - tree->SetMorphed(this); - return tree; - } - else - { - return oper; - } + CorInfoHelpFunc helper = (dstType == TYP_FLOAT) ? CORINFO_HELP_LNG2FLT : CORINFO_HELP_LNG2DBL; + return fgMorphCastIntoHelper(tree, helper, oper); } #endif // TARGET_X86 else @@ -677,7 +629,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) } // Clear the GT_MUL_64RSLT if it is set. - if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT)) + if (oper->OperIs(GT_MUL) && (oper->gtFlags & GTF_MUL_64RSLT)) { oper->gtFlags &= ~GTF_MUL_64RSLT; } @@ -711,6 +663,8 @@ const char* getWellKnownArgName(WellKnownArg arg) return "VarArgsCookie"; case WellKnownArg::InstParam: return "InstParam"; + case WellKnownArg::AsyncContinuation: + return "AsyncContinuation"; case WellKnownArg::RetBuffer: return "RetBuffer"; case WellKnownArg::PInvokeFrame: @@ -741,6 +695,8 @@ const char* getWellKnownArgName(WellKnownArg arg) return "X86TailCallSpecialArg"; case WellKnownArg::StackArrayLocal: return "StackArrayLocal"; + case WellKnownArg::RuntimeMethodHandle: + return "RuntimeMethodHandle"; } return "N/A"; @@ -753,26 +709,13 @@ void CallArg::Dump(Compiler* comp) { printf("CallArg[[%06u].%s", comp->dspTreeID(GetNode()), GenTree::OpName(GetNode()->OperGet())); printf(" %s", varTypeName(m_signatureType)); - printf(" (%s)", AbiInfo.PassedByRef ? "By ref" : "By value"); - if (AbiInfo.GetRegNum() != REG_STK) - { - printf(", %u reg%s:", AbiInfo.NumRegs, AbiInfo.NumRegs == 1 ? "" : "s"); - for (unsigned i = 0; i < AbiInfo.NumRegs; i++) - { - printf(" %s", getRegName(AbiInfo.GetRegNum(i))); - } - } - if (AbiInfo.GetStackByteSize() > 0) - { - printf(", byteSize=%u, byteOffset=%u", AbiInfo.ByteSize, AbiInfo.ByteOffset); - } - if (GetLateNode() != nullptr) - { - printf(", isLate"); - } - if (AbiInfo.IsSplit()) + printf(" (%s)", AbiInfo.IsPassedByReference() ? "By ref" : "By value"); + printf(", %u segments:", AbiInfo.NumSegments); + for (const ABIPassingSegment& segment : AbiInfo.Segments()) { - printf(", isSplit"); + printf(" <"); + segment.Dump(); + printf(">"); } if (m_needPlace) { @@ -782,10 +725,6 @@ void CallArg::Dump(Compiler* comp) { printf(", processed"); } - if (AbiInfo.IsHfaRegArg()) - { - printf(", isHfa(%s)", varTypeName(AbiInfo.GetHfaType())); - } if (m_wellKnownArg != WellKnownArg::None) { printf(", wellKnown[%s]", getWellKnownArgName(m_wellKnownArg)); @@ -813,21 +752,14 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) assert(argx != nullptr); bool canEvalToTemp = true; - if (arg.AbiInfo.GetRegNum() == REG_STK) - { - assert(m_hasStackArgs); #if !FEATURE_FIXED_OUT_ARGS + if (!arg.AbiInfo.HasAnyRegisterSegment()) + { // Non-register arguments are evaluated and pushed in order; they // should never go in the late arg list. canEvalToTemp = false; -#endif } -#if FEATURE_ARG_SPLIT - else if (arg.AbiInfo.IsSplit()) - { - assert(m_hasStackArgs); - } -#endif // FEATURE_ARG_SPLIT +#endif // If the argument tree contains a store (GTF_ASG) then the argument and // and every earlier argument (except constants) must be evaluated into temps @@ -861,7 +793,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) } #if !FEATURE_FIXED_OUT_ARGS - if (prevArg.AbiInfo.GetRegNum() == REG_STK) + if (!prevArg.AbiInfo.HasAnyRegisterSegment()) { // All stack args are already evaluated and placed in order // in this case. @@ -906,7 +838,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) continue; } - if (otherArg.AbiInfo.GetRegNum() == REG_STK) + if (!otherArg.AbiInfo.HasAnyRegisterSegment()) { treatLikeCall = true; break; @@ -931,7 +863,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) { SetNeedsTemp(&arg); } - else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL)) + else if (varTypeIsFloating(argx->TypeGet()) && argx->OperIs(GT_CALL)) { // Spill all arguments that are floating point calls SetNeedsTemp(&arg); @@ -947,7 +879,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) } #if !FEATURE_FIXED_OUT_ARGS - if (prevArg.AbiInfo.GetRegNum() == REG_STK) + if (!prevArg.AbiInfo.HasAnyRegisterSegment()) { // All stack args are already evaluated and placed in order // in this case. @@ -964,12 +896,12 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) #if FEATURE_FIXED_OUT_ARGS // Or, if they are stored into the FIXED_OUT_ARG area // we require that they be moved to the late list - else if (prevArg.AbiInfo.GetRegNum() == REG_STK) + else if (!prevArg.AbiInfo.HasAnyRegisterSegment()) { prevArg.m_needPlace = true; } #if FEATURE_ARG_SPLIT - else if (prevArg.AbiInfo.IsSplit()) + else if (prevArg.AbiInfo.IsSplitAcrossRegistersAndStack()) { prevArg.m_needPlace = true; } @@ -1010,7 +942,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) } #if !FEATURE_FIXED_OUT_ARGS - if (prevArg.AbiInfo.GetRegNum() == REG_STK) + if (!prevArg.AbiInfo.HasAnyRegisterSegment()) { // All stack args are already evaluated and placed in order // in this case. @@ -1035,64 +967,6 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) prevExceptionTree = argx; prevExceptionFlags = exceptionFlags; } - -#if FEATURE_MULTIREG_ARGS - // In "fgMorphMultiRegStructArg" we will expand the arg into a GT_FIELD_LIST with multiple indirections, so - // here we consider spilling it into a local. We also need to spill it in case we have a node that we do not - // currently handle in multi-reg morphing. - // This logic can be skipped when the arg is already in the right multireg arg shape. - // - if (varTypeIsStruct(argx) && !arg.m_needTmp && !argx->OperIs(GT_FIELD_LIST)) - { - if ((arg.AbiInfo.NumRegs > 0) && ((arg.AbiInfo.NumRegs + arg.AbiInfo.GetStackSlotsNumber()) > 1)) - { - if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) - { - // Spill multireg struct arguments that have stores or calls embedded in them. - SetNeedsTemp(&arg); - } - else if (!argx->OperIsLocalRead() && !argx->OperIsLoad()) - { - // TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing. - SetNeedsTemp(&arg); - } - else if (comp->opts.OptimizationEnabled()) - { - // Finally, we call gtPrepareCost to measure the cost of evaluating this tree. - comp->gtPrepareCost(argx); - - if (argx->GetCostEx() > (6 * IND_COST_EX)) - { - // Spill multireg struct arguments that are expensive to evaluate twice. - SetNeedsTemp(&arg); - } - } - } - - // We are only able to expand certain "BLK"s into field lists, so here we spill all the - // "mis-sized" ones. We could in theory support them directly with some arithmetic and - // shifts, but these cases are rare enough that it is probably not worth the complexity. - // No need to do this for stack args as they are directly supported by codegen. - // - if (argx->OperIs(GT_BLK) && (arg.AbiInfo.GetRegNum() != REG_STK)) - { - GenTreeBlk* argObj = argx->AsBlk(); - unsigned structSize = argObj->Size(); - unsigned lastLoadSize = structSize % TARGET_POINTER_SIZE; - - if ((lastLoadSize != 0) && !isPow2(lastLoadSize)) - { -#ifdef TARGET_ARM - // On ARM we don't expand split args larger than 16 bytes into field lists. - if (!arg.AbiInfo.IsSplit() || (structSize <= 16)) -#endif // TARGET_ARM - { - SetNeedsTemp(&arg); - } - } - } - } -#endif // FEATURE_MULTIREG_ARGS } #if FEATURE_FIXED_OUT_ARGS @@ -1125,7 +999,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // Examine the register args that are currently not marked needTmp // - if (!arg.m_needTmp && (arg.AbiInfo.GetRegNum() != REG_STK)) + if (!arg.m_needTmp && arg.AbiInfo.HasAnyRegisterSegment()) { if (hasStackArgsWeCareAbout) { @@ -1241,7 +1115,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) CallArg* arg = sortedArgs[curInx]; - if (arg->AbiInfo.GetRegNum() != REG_STK) + if (arg->AbiInfo.HasAnyRegisterSegment()) { regCount++; } @@ -1255,7 +1129,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) assert(argx != nullptr); // put constants at the end of the table // - if (argx->gtOper == GT_CNS_INT) + if (argx->OperIs(GT_CNS_INT)) { noway_assert(curInx <= endTab); @@ -1480,51 +1354,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) assert(argsRemaining == 0); } -//------------------------------------------------------------------------------ -// MakeTmpArgNode: -// Create a temp for an argument if needed. We usually need this to be done -// in order to enforce ordering of the evaluation of arguments. -// -// Return Value: -// the newly created temp var tree. -// -GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum) -{ - LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); - var_types argType = varDsc->TypeGet(); - assert(genActualType(argType) == genActualType(arg->GetSignatureType())); - - GenTree* argNode = nullptr; - - if (varTypeIsStruct(argType)) - { - if (arg->AbiInfo.PassedByRef) - { - argNode = comp->gtNewLclVarAddrNode(lclNum); - comp->lvaSetVarAddrExposed(lclNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); - } - // TODO-CQ: currently this mirrors the logic in "fgMorphArgs", but actually we only need - // this retyping for args passed in a single register: "(NumRegs == 1) && !IsSplit()". - else if (arg->AbiInfo.ArgType != TYP_STRUCT) - { - argNode = comp->gtNewLclFldNode(lclNum, arg->AbiInfo.ArgType, 0); - comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg)); - } - else - { - // We are passing this struct by value in multiple registers and/or on stack. - argNode = comp->gtNewLclvNode(lclNum, argType); - } - } - else - { - assert(!arg->AbiInfo.PassedByRef); - argNode = comp->gtNewLclvNode(lclNum, argType); - } - - return argNode; -} - //------------------------------------------------------------------------------ // EvalArgsToTemps: Handle arguments that were marked as requiring temps. // @@ -1589,7 +1418,7 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) // Only the register arguments need to be replaced with placeholder nodes. // Stacked arguments are evaluated and pushed (or stored into the stack) in order. // - if (arg.AbiInfo.GetRegNum() == REG_STK) + if (!arg.AbiInfo.HasAnyRegisterSegment()) continue; #endif @@ -1606,29 +1435,36 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) } #endif -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - noway_assert(argx->gtType != TYP_STRUCT); -#endif - - if (argx->OperIs(GT_FIELD_LIST)) + GenTree* argxEffectiveVal = argx->gtEffectiveVal(); + if (argxEffectiveVal->OperIs(GT_FIELD_LIST)) { - GenTreeFieldList* fieldList = argx->AsFieldList(); + GenTreeFieldList* fieldList = argxEffectiveVal->AsFieldList(); fieldList->gtFlags &= ~GTF_ALL_EFFECT; - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); - GenTree* store = comp->gtNewTempStore(tmpVarNum, use.GetNode()); - store->SetMorphed(comp); + auto appendEffect = [=, &setupArg](GenTree* effect) { if (setupArg == nullptr) { - setupArg = store; + setupArg = effect; } else { - setupArg = comp->gtNewOperNode(GT_COMMA, TYP_VOID, setupArg, store); + setupArg = comp->gtNewOperNode(GT_COMMA, TYP_VOID, setupArg, effect); setupArg->SetMorphed(comp); } + }; + + for (GenTree* comma = argx; comma->OperIs(GT_COMMA); comma = comma->gtGetOp2()) + { + appendEffect(comma->gtGetOp1()); + } + + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); + GenTree* store = comp->gtNewTempStore(tmpVarNum, use.GetNode()); + store->SetMorphed(comp); + + appendEffect(store); GenTree* setupUse = comp->gtNewLclvNode(tmpVarNum, genActualType(use.GetNode())); setupUse->SetMorphed(comp); @@ -1648,31 +1484,14 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum); var_types lclVarType = genActualType(argx->gtType); - var_types scalarType = TYP_UNKNOWN; if (setupArg->OperIsCopyBlkOp()) { setupArg = comp->fgMorphCopyBlock(setupArg); -#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT)) - { - scalarType = arg.AbiInfo.ArgType; - } -#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) } - // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => - // 8) - if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) - { - // Create a GT_LCL_FLD using the wider type to go to the late argument list - defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); - } - else - { - // Create a copy of the temp to go to the late argument list - defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); - } + // Create a copy of the temp to go to the late argument list + defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); defArg->SetMorphed(comp); } @@ -1696,7 +1515,7 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) // this argument so we have to replace it in the gtCallArgs list // (the initial argument evaluation list) with a placeholder. // - if ((arg.AbiInfo.GetRegNum() == REG_STK) && !arg.m_needPlace) + if (!arg.AbiInfo.HasAnyRegisterSegment() && !arg.m_needPlace) { continue; } @@ -1705,24 +1524,16 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) defArg = argx; -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - - // All structs are either passed (and retyped) as integral types, OR they - // are passed by reference. - noway_assert(argx->gtType != TYP_STRUCT); - -#endif // !(defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) - #ifdef DEBUG if (comp->verbose) { - if (arg.AbiInfo.GetRegNum() == REG_STK) + if (arg.AbiInfo.HasAnyRegisterSegment()) { - printf("Deferred stack argument :\n"); + printf("Deferred argument:\n"); } else { - printf("Deferred argument ('%s'):\n", getRegName(arg.AbiInfo.GetRegNum())); + printf("Deferred stack argument:\n"); } comp->gtDispTree(argx); @@ -1753,12 +1564,15 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) #ifdef DEBUG if (comp->verbose) { - printf("\nRegister placement order: "); + printf("\nRegister placement order:"); for (CallArg& arg : LateArgs()) { - if (arg.AbiInfo.GetRegNum() != REG_STK) + for (const ABIPassingSegment& segment : arg.AbiInfo.Segments()) { - printf("%s ", getRegName(arg.AbiInfo.GetRegNum())); + if (segment.IsPassedInRegister()) + { + printf(" %s", getRegName(segment.GetRegister())); + } } } printf("\n"); @@ -1870,12 +1684,11 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call { assert(&call->gtArgs == this); - if (IsAbiInformationDetermined()) + if (m_hasAddedFinalArgs) { - // We've already determined ABI information. return; } - JITDUMP("Initializing arg info for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); + JITDUMP("Adding final args and determining ABI info for [%06u]:\n", Compiler::dspTreeID(call)); m_hasRegArgs = false; m_hasStackArgs = false; @@ -2063,181 +1876,27 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call else { ABIPassingSegment segment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE); - abiInfo = ABIPassingInformation::FromSegment(comp, segment); + abiInfo = ABIPassingInformation::FromSegmentByValue(comp, segment); } JITDUMP("Argument %u ABI info: ", GetIndex(&arg)); DBEXEC(VERBOSE, abiInfo.Dump()); - arg.NewAbiInfo = abiInfo; - arg.AbiInfo = CallArgABIInformation(); - - if (varTypeIsStruct(argSigType)) - { - assert(argx == arg.GetEarlyNode()); - - Compiler::structPassingKind howToPassStruct; - var_types structBaseType = - comp->getArgTypeForStruct(argSigClass, &howToPassStruct, IsVarArgs(), argLayout->GetSize()); -#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - if (arg.NewAbiInfo.HasAnyFloatingRegisterSegment()) - { - // Struct passed according to hardware floating-point calling convention - assert(!arg.NewAbiInfo.HasAnyStackSegment()); - assert(howToPassStruct == Compiler::SPK_ByValue || howToPassStruct == Compiler::SPK_PrimitiveType); - if (arg.NewAbiInfo.NumSegments == 2) - { - // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG" - // for "struct { float, float }", and retyping to a primitive here will cause the - // multi-reg morphing to not kick in (the struct in question needs to be passed in - // two FP registers). Here is just keep "structBaseType" as "TYP_STRUCT". - // TODO-LoongArch64: fix "getPrimitiveTypeForStruct". - structBaseType = TYP_STRUCT; - } - else - { - assert(arg.NewAbiInfo.NumSegments == 1); - structBaseType = arg.NewAbiInfo.Segment(0).GetRegisterType(); - } - } -#endif // defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - arg.AbiInfo.PassedByRef = howToPassStruct == Compiler::SPK_ByReference; - arg.AbiInfo.ArgType = structBaseType == TYP_UNKNOWN ? argx->TypeGet() : structBaseType; - } - else - { - arg.AbiInfo.ArgType = argx->TypeGet(); - } - - if (abiInfo.IsSplitAcrossRegistersAndStack()) - { - m_hasStackArgs = true; - m_hasRegArgs = true; - - arg.AbiInfo.SetSplit(true); - arg.AbiInfo.ByteOffset = 0; - unsigned regNumIndex = 0; - for (const ABIPassingSegment& segment : abiInfo.Segments()) - { - if (segment.IsPassedInRegister()) - { - if (regNumIndex < MAX_ARG_REG_COUNT) - { - arg.AbiInfo.SetRegNum(regNumIndex, segment.GetRegister()); - regNumIndex++; - } - - arg.AbiInfo.NumRegs++; - } - else - { - assert(segment.GetStackOffset() == 0); - } - } - } - else if (abiInfo.HasAnyRegisterSegment()) - { - // This is a register argument - m_hasRegArgs = true; - - unsigned regNumIndex = 0; - for (const ABIPassingSegment& segment : abiInfo.Segments()) - { - if (regNumIndex < MAX_ARG_REG_COUNT) - { - arg.AbiInfo.SetRegNum(regNumIndex, segment.GetRegister()); - regNumIndex++; - } - - arg.AbiInfo.NumRegs++; - -#ifdef TARGET_ARM - // Old style ABI info expects two registers counted for these segments. - if (segment.GetRegisterType() == TYP_DOUBLE) - { - arg.AbiInfo.NumRegs++; - - if (argSigType == TYP_DOUBLE) - { - arg.AbiInfo.SetRegNum(regNumIndex, REG_NEXT(segment.GetRegister())); - regNumIndex++; - } - } -#endif - } - -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - INDEBUG(arg.CheckIsStruct()); -#endif - } - else - { - assert(abiInfo.HasAnyStackSegment()); - // We only expect to see one stack segment in these cases. - assert(abiInfo.NumSegments == 1); - // This is a stack argument - m_hasStackArgs = true; - const ABIPassingSegment& segment = abiInfo.Segment(0); - arg.AbiInfo.SetRegNum(0, REG_STK); - arg.AbiInfo.ByteOffset = segment.GetStackOffset(); - } - - // TODO-Cleanup: remove HFA information from VarDsc. - var_types hfaType = TYP_UNDEF; - bool isHfaArg = false; - unsigned hfaSlots = 0; + arg.AbiInfo = abiInfo; - if (GlobalJitOptions::compFeatureHfa) + for (const ABIPassingSegment& segment : abiInfo.Segments()) { - hfaType = comp->GetHfaType(argSigClass); - isHfaArg = varTypeIsValidHfaType(hfaType); - - if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && IsVarArgs()) + if (segment.IsPassedOnStack()) { - // Make sure for vararg methods isHfaArg is not true. - isHfaArg = false; - } - - if (isHfaArg) - { - hfaSlots = comp->GetHfaCount(argSigClass); - - // If we have a HFA struct it's possible we transition from a method that originally - // only had integer types to now start having FP types. We have to communicate this - // through this flag since LSRA later on will use this flag to determine whether - // or not to track the FP register set. - // - comp->compFloatingPointUsed = true; - } - } - - if (arg.AbiInfo.PassedByRef) - { - arg.AbiInfo.ByteSize = TARGET_POINTER_SIZE; - } - else - { - unsigned size = argLayout != nullptr ? argLayout->GetSize() : genTypeSize(argSigType); - - // Apple arm64 reuses the same stack slot for multiple args in some - // cases; old ABI info reflects that in the size. - // Primitives and float HFAs do not necessarily take up full stack - // slots. - if (compAppleArm64Abi() && (!varTypeIsStruct(argSigType) || (isHfaArg && (hfaType == TYP_FLOAT)))) - { - arg.AbiInfo.ByteSize = size; + m_hasStackArgs = true; } else { - arg.AbiInfo.ByteSize = roundUp(size, TARGET_POINTER_SIZE); + m_hasRegArgs = true; + comp->compFloatingPointUsed |= genIsValidFloatReg(segment.GetRegister()); } } - - if (isHfaArg) - { - arg.AbiInfo.SetHfaType(hfaType, hfaSlots); - } - } // end foreach argument loop + } m_argsStackSize = classifier.StackSize(); @@ -2254,8 +1913,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } #endif - m_abiInformationDetermined = true; - m_newAbiInformationDetermined = true; + m_abiInformationDetermined = true; + m_hasAddedFinalArgs = true; } //------------------------------------------------------------------------ @@ -2267,7 +1926,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // comp - The compiler object. // call - The call to which the CallArgs belongs. // -void CallArgs::DetermineNewABIInfo(Compiler* comp, GenTreeCall* call) +void CallArgs::DetermineABIInfo(Compiler* comp, GenTreeCall* call) { ClassifierInfo info; info.CallConv = call->GetUnmanagedCallConv(); @@ -2291,26 +1950,17 @@ void CallArgs::DetermineNewABIInfo(Compiler* comp, GenTreeCall* call) if (nonStdRegNum == REG_NA) { - arg.NewAbiInfo = classifier.Classify(comp, argSigType, argLayout, arg.GetWellKnownArg()); + arg.AbiInfo = classifier.Classify(comp, argSigType, argLayout, arg.GetWellKnownArg()); } else { ABIPassingSegment segment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE); - arg.NewAbiInfo = ABIPassingInformation::FromSegment(comp, segment); + arg.AbiInfo = ABIPassingInformation::FromSegmentByValue(comp, segment); } - - // TODO-Cleanup: This should be added to the new ABI info. - Compiler::structPassingKind passingKind = Compiler::SPK_ByValue; - if (argLayout != nullptr) - { - comp->getArgTypeForStruct(argSigClass, &passingKind, call->IsVarargs(), argLayout->GetSize()); - } - - arg.AbiInfo.PassedByRef = passingKind == Compiler::SPK_ByReference; } - m_argsStackSize = classifier.StackSize(); - m_newAbiInformationDetermined = true; + m_argsStackSize = classifier.StackSize(); + m_abiInformationDetermined = true; } //------------------------------------------------------------------------ @@ -2399,10 +2049,6 @@ unsigned CallArgs::CountUserArgs() // argument and replaced in the "early" arg list with a placeholder node. // Also see `CallArgs::EvalArgsToTemps`. // -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) { GenTreeFlags flagsSummary = GTF_EMPTY; @@ -2427,9 +2073,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // information about late arguments in CallArgs. // This information is used later to construct the late args - // Note that this name a misnomer - it indicates that there are struct args - // that are passed by value in more than one register or on stack. - bool hasMultiregStructArgs = false; for (CallArg& arg : call->gtArgs.Args()) { GenTree** parentArgx = &arg.EarlyNodeRef(); @@ -2457,14 +2100,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } - // TODO-ARGS: Review this, is it really necessary to treat them specially here? - if (call->gtArgs.IsNonStandard(this, call, &arg) && arg.AbiInfo.IsPassedInRegisters()) - { - flagsSummary |= argx->gtFlags; - continue; - } - assert(arg.AbiInfo.ByteSize > 0); - // For pointers to locals we can skip reporting GC info and also skip zero initialization. // NOTE: We deferred this from the importer because of the inliner. if (argx->OperIs(GT_LCL_ADDR)) @@ -2472,65 +2107,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) argx->gtType = TYP_I_IMPL; } - bool isStructArg = varTypeIsStruct(arg.GetSignatureType()); - GenTree* argObj = argx->gtEffectiveVal(); - bool makeOutArgCopy = false; - - if (argObj->OperIs(GT_FIELD_LIST)) - { - // FIELD_LISTs can be created directly by physical promotion. - // Physical promotion will create this shape even for single-reg - // arguments. We strip the field list here for that case as the - // rest of the JIT does not expect single-reg args to be wrapped - // like that. - if (arg.NewAbiInfo.HasExactlyOneRegisterSegment()) - { - GenTreeFieldList* fieldList = argObj->AsFieldList(); - assert(fieldList->Uses().GetHead()->GetNext() == nullptr); - GenTree* node = fieldList->Uses().GetHead()->GetNode(); - - JITDUMP("Replacing single-field FIELD_LIST [%06u] by sole field [%06u]\n", dspTreeID(fieldList), - dspTreeID(node)); - - assert(varTypeUsesSameRegType(node, arg.AbiInfo.ArgType)); - GenTree** effectiveUse = parentArgx; - while ((*effectiveUse)->OperIs(GT_COMMA)) - { - effectiveUse = &(*effectiveUse)->AsOp()->gtOp2; - } - *effectiveUse = node; - - argx = *parentArgx; - argObj = node; - } - } - else if (isStructArg && !reMorphing) + if (varTypeIsStruct(arg.GetSignatureType()) && !reMorphing) { - unsigned originalSize; - if (argObj->TypeIs(TYP_STRUCT)) - { - assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); - originalSize = argObj->GetLayout(this)->GetSize(); - } - else - { - originalSize = genTypeSize(argx); - } - - assert(argx->TypeGet() == arg.GetSignatureType()); - assert(originalSize == info.compCompHnd->getClassSize(arg.GetSignatureClassHandle())); - - // First, handle the case where the argument is passed by reference. - if (arg.AbiInfo.PassedByRef) + bool makeOutArgCopy = false; + if (arg.AbiInfo.IsPassedByReference()) { - assert(arg.AbiInfo.ByteSize == TARGET_POINTER_SIZE); makeOutArgCopy = true; -#ifdef UNIX_AMD64_ABI - assert(!"Structs are not passed by reference on x64/ux"); -#endif // UNIX_AMD64_ABI } - else // This is passed by value. + else if (fgTryMorphStructArg(&arg)) { +<<<<<<< HEAD unsigned structSize = originalSize; unsigned passingSize = originalSize; @@ -2726,32 +2312,33 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) if ((arg.AbiInfo.ArgType == TYP_LONG) || (arg.AbiInfo.ArgType == TYP_DOUBLE)) { assert((arg.AbiInfo.NumRegs == 2) || (arg.AbiInfo.GetStackSlotsNumber() == 2)); +======= + argx = *parentArgx; +>>>>>>> upstream-jun } else -#endif { - // We must have exactly one register or slot. - assert(((arg.AbiInfo.NumRegs == 1) && (arg.AbiInfo.GetStackSlotsNumber() == 0)) || - ((arg.AbiInfo.NumRegs == 0) && (arg.AbiInfo.GetStackSlotsNumber() == 1))); + makeOutArgCopy = true; } - } -#endif +<<<<<<< HEAD #if defined(TARGET_X86) || defined(TARGET_WASM) if (isStructArg && !arg.AbiInfo.PassedByRef) { if (argx->OperIs(GT_LCL_VAR) && (lvaGetPromotionType(argx->AsLclVar()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT)) +======= + if (makeOutArgCopy) +>>>>>>> upstream-jun { - argx = fgMorphLclArgToFieldlist(argx->AsLclVar()); - arg.SetEarlyNode(argx); - } - else if (argx->OperIs(GT_LCL_FLD)) - { - lvaSetVarDoNotEnregister(argx->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + fgMakeOutgoingStructArgCopy(call, &arg); + + if (arg.GetEarlyNode() != nullptr) + { + flagsSummary |= arg.GetEarlyNode()->gtFlags; + } } } -#endif // TARGET_X86 flagsSummary |= arg.GetEarlyNode()->gtFlags; @@ -2798,11 +2385,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) call->gtArgs.EvalArgsToTemps(this, call); } - if (hasMultiregStructArgs) - { - fgMorphMultiregStructArgs(call); - } - #ifdef DEBUG if (verbose) { @@ -2816,94 +2398,36 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #endif return call; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //----------------------------------------------------------------------------- -// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and -// call fgMorphMultiregStructArg on each of them. +// fgTryMorphStructArg: +// Given a varTypeIsStruct argument, try to morph it into a shape that the +// backend supports. // // Arguments: -// call : a GenTreeCall node that has one or more TYP_STRUCT arguments\. +// arg - The argument // -// Notes: -// We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types. -// It will ensure that the struct arguments are in the correct form. -// If this method fails to find any TYP_STRUCT arguments it will assert. -// -void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) -{ - bool foundStructArg = false; - GenTreeFlags flagsSummary = GTF_EMPTY; - -#ifdef TARGET_X86 - assert(!"Logic error: no MultiregStructArgs for X86"); -#endif -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI"); -#endif - - for (CallArg& arg : call->gtArgs.Args()) - { - if ((arg.AbiInfo.ArgType == TYP_STRUCT) && !arg.AbiInfo.PassedByRef) - { - foundStructArg = true; - GenTree*& argx = arg.NodeRef(); - - if (!argx->OperIs(GT_FIELD_LIST)) - { - argx = fgMorphMultiregStructArg(&arg); - } - } - } - - // We should only call this method when we actually have one or more multireg struct args - assert(foundStructArg); - - // Update the flags - call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); -} - -//----------------------------------------------------------------------------- -// fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list, -// morph the argument as needed to be passed correctly. +// Returns: +// False if the argument cannot be put into a shape supported by the backend. // -// Arguments: -// arg - The argument containing a struct node. +// Remarks: +// The backend requires register-passed arguments to be of FIELD_LIST shape. +// For split arguments it is additionally required that registers and stack +// slots have clean mappings to fields. +// For stack-passed arguments the backend supports struct-typed arguments +// directly. // -// Notes: -// The arg node must be a GT_BLK or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT. -// If arg node is a lclVar passed on the stack, we will ensure that any lclVars that must be on the -// stack are marked as doNotEnregister, and then we return. -// -// If it is passed by register, we mutate the argument into the GT_FIELD_LIST form -// which is only used for struct arguments. -// -// If arg is a LclVar we check if it is struct promoted and has the right number of fields -// and if they are at the appropriate offsets we will use the struct promted fields -// in the GT_FIELD_LIST nodes that we create. -// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements -// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct -// this also forces the struct to be stack allocated into the local frame. -// For the GT_BLK case will clone the address expression and generate two (or more) -// indirections. -// -GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) +bool Compiler::fgTryMorphStructArg(CallArg* arg) { - GenTree* argNode = arg->GetNode(); + GenTree** use = GenTree::EffectiveUse(&arg->NodeRef()); + GenTree* argNode = *use; assert(varTypeIsStruct(argNode)); -#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - NYI("fgMorphMultiregStructArg requires implementation for this target"); -#endif - - bool isSplit = arg->NewAbiInfo.IsSplitAcrossRegistersAndStack(); + bool isSplit = arg->AbiInfo.IsSplitAcrossRegistersAndStack(); #ifdef TARGET_ARM - if ((isSplit && (arg->NewAbiInfo.CountRegsAndStackSlots() > 4)) || - (!isSplit && arg->NewAbiInfo.HasAnyStackSegment())) + if ((isSplit && (arg->AbiInfo.CountRegsAndStackSlots() > 4)) || (!isSplit && arg->AbiInfo.HasAnyStackSegment())) #else - if (!arg->NewAbiInfo.HasAnyRegisterSegment()) + if (!arg->AbiInfo.HasAnyRegisterSegment()) #endif { if (argNode->OperIs(GT_LCL_VAR) && @@ -2912,7 +2436,15 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // TODO-Arm-CQ: support decomposing "large" promoted structs into field lists. if (!isSplit) { - argNode = fgMorphLclArgToFieldlist(argNode->AsLclVar()); + GenTreeFieldList* fieldList = fgMorphLclToFieldList(argNode->AsLclVar()); + // TODO-Cleanup: The containment/reg optionality for x86 is + // conservative in the "no field list" case. +#ifdef TARGET_X86 + *use = fieldList; +#else + *use = fieldList->SoleFieldOrThis(); +#endif + *use = fgMorphTree(*use); } else { @@ -2924,89 +2456,59 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { lvaSetVarDoNotEnregister(argNode->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } + else if (argNode->OperIs(GT_BLK)) + { + ClassLayout* layout = argNode->AsBlk()->GetLayout(); - return argNode; - } + var_types primitiveType = layout->GetRegisterType(); + if (primitiveType != TYP_UNDEF) + { + JITDUMP("Converting argument [%06u] to primitive indirection\n", dspTreeID(argNode)); -#if FEATURE_MULTIREG_ARGS - ClassLayout* layout = argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(this) : nullptr; - unsigned structSize = argNode->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(argNode); + argNode->SetOper(GT_IND); + argNode->gtType = primitiveType; + } + } - if (layout != nullptr) - { - assert(ClassLayout::AreCompatible(typGetObjLayout(arg->GetSignatureClassHandle()), layout)); - } - else - { - assert(varTypeIsSIMD(argNode) && varTypeIsSIMD(arg->GetSignatureType())); + // Potentially update commas + arg->GetNode()->ChangeType((*use)->TypeGet()); + return true; } - // We should still have a TYP_STRUCT - assert(varTypeIsStruct(argNode)); - - GenTreeFieldList* newArg = nullptr; + GenTree* newArg = nullptr; - // Are we passing a struct LclVar? - // if (argNode->OperIs(GT_LCL_VAR)) { - GenTreeLclVarCommon* lclNode = argNode->AsLclVarCommon(); - unsigned lclNum = lclNode->GetLclNum(); - LclVarDsc* varDsc = lvaGetDesc(lclNum); + GenTreeLclVar* lclNode = argNode->AsLclVar(); + unsigned lclNum = lclNode->GetLclNum(); + LclVarDsc* varDsc = lvaGetDesc(lclNum); - varDsc->lvIsMultiRegArg = true; + if (!arg->AbiInfo.HasExactlyOneRegisterSegment()) + { + varDsc->lvIsMultiRegArg = true; + } - JITDUMP("Multireg struct argument V%02u : ", lclNum); + JITDUMP("Struct argument V%02u: ", lclNum); JITDUMPEXEC(arg->Dump(this)); - // Try to see if we can use the promoted fields to pass this argument. + // Try to see if we can and should use promoted fields to pass this + // argument. // - if (varDsc->lvPromoted && (varDsc->lvFieldCnt == arg->NewAbiInfo.CountRegsAndStackSlots())) + if (varDsc->lvPromoted && !varDsc->lvDoNotEnregister && (!isSplit || FieldsMatchAbi(varDsc, arg->AbiInfo))) { - bool fieldsMatch = true; - - for (const ABIPassingSegment& seg : arg->NewAbiInfo.Segments()) - { - if (seg.IsPassedInRegister()) - { - unsigned fieldLclNum = lvaGetFieldLocal(varDsc, seg.Offset); - if (fieldLclNum == BAD_VAR_NUM) - { - fieldsMatch = false; - break; - } - - var_types fieldType = lvaGetDesc(fieldLclNum)->TypeGet(); - var_types regType = genActualType(seg.GetRegisterType()); - - if (!varTypeUsesSameRegType(fieldType, regType)) - { - // TODO-CQ: We should be able to tolerate mismatches by inserting GT_BITCAST in lowering. - // - JITDUMP("Multireg struct V%02u will be passed using GT_LCL_FLD because of type mismatch: " - "register type is %s, field local V%02u's type is %s\n", - lclNum, varTypeName(regType), fieldLclNum, varTypeName(fieldType)); - fieldsMatch = false; - break; - } - } - else - { - for (unsigned offset = 0; offset < seg.Size; offset += TARGET_POINTER_SIZE) - { - if (lvaGetFieldLocal(varDsc, seg.Offset + offset) == BAD_VAR_NUM) - { - fieldsMatch = false; - break; - } - } - } - } - - if (fieldsMatch) - { - newArg = fgMorphLclArgToFieldlist(lclNode); - } + newArg = fgMorphLclToFieldList(lclNode)->SoleFieldOrThis(); + newArg = fgMorphTree(newArg); + } + } + else if (argNode->OperIsFieldList()) + { + // We can already see a field list here if physical promotion created it. + // Physical promotion will also create single-field field lists which + // not everything treats the same as a single node, so fix that here. + newArg = argNode->AsFieldList()->SoleFieldOrThis(); + if (newArg == argNode) + { + return true; } } @@ -3014,10 +2516,47 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // if (newArg == nullptr) { - if (!arg->NewAbiInfo.HasAnyRegisterSegment()) + if (!argNode->TypeIs(TYP_STRUCT) && arg->AbiInfo.HasExactlyOneRegisterSegment()) + { + // This can be treated primitively. Leave it alone. + return true; + } + + if (!argNode->OperIsLocalRead() && !argNode->OperIsLoad()) + { + // A node we do not know how to turn into multiple registers. + // Usually HWINTRINSIC. Bail. + return false; + } + + ClassLayout* layout = argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(this) : nullptr; + unsigned structSize = argNode->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(argNode); + + if (layout != nullptr) + { + assert(ClassLayout::AreCompatible(typGetObjLayout(arg->GetSignatureClassHandle()), layout)); + } + else { - // We leave this stack passed argument alone. - return argNode; + assert(varTypeIsSIMD(argNode) && varTypeIsSIMD(arg->GetSignatureType())); + } + + if (argNode->OperIsLoad()) + { + unsigned lastLoadSize = structSize % TARGET_POINTER_SIZE; + if ((lastLoadSize != 0) && !isPow2(lastLoadSize)) + { + // Cannot read this size from a non-local. Bail. + return false; + } + + GenTree* indirAddr = argNode->AsIndir()->Addr(); + if (((indirAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) && + (arg->AbiInfo.CountRegsAndStackSlots() > 1)) + { + // Cannot create multiple uses of the address. Bail. + return false; + } } auto createSlotAccess = [=](unsigned offset, var_types type) -> GenTree* { @@ -3025,16 +2564,10 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) if (type == TYP_UNDEF) { - if ((structSize - offset) < TARGET_POINTER_SIZE) + unsigned sizeLeft = structSize - offset; + if (sizeLeft < TARGET_POINTER_SIZE) { - // ArgsComplete has made it so that for loads from memory - // we will only see the easily handleable cases here, For - // locals we may see odd sizes, but for those we can load - // "too much" from the stack frame, and thus can just round - // up the size. - assert(isPow2(structSize - offset) || argNode->OperIsLocalRead()); - - switch (structSize - offset) + switch (sizeLeft) { case 1: type = TYP_UBYTE; @@ -3057,7 +2590,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) } #ifdef TARGET_ARM64 - if (argNode->OperIsLocalRead()) + if ((offset > 0) && argNode->OperIsLocalRead()) { // For arm64 it's beneficial to consider all tails to // be TYP_I_IMPL to allow more ldp's. @@ -3077,10 +2610,27 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) if (argNode->OperIsLocalRead()) { - GenTreeLclFld* lclFld = gtNewLclFldNode(argNode->AsLclVarCommon()->GetLclNum(), genActualType(type), - argNode->AsLclVarCommon()->GetLclOffs() + offset); - lclFld->SetMorphed(this); - return lclFld; + GenTreeLclVarCommon* lclVar = argNode->AsLclVarCommon(); + LclVarDsc* dsc = lvaGetDesc(lclVar); + GenTree* result; + // We sometimes end up with struct reinterpretations where the + // retyping into a primitive allows us to replace by a scalar + // local here, so make sure we do that if possible. + if ((lclVar->GetLclOffs() == 0) && (offset == 0) && (genTypeSize(type) == genTypeSize(dsc))) + { + result = gtNewLclVarNode(lclVar->GetLclNum()); + } + else + { + result = gtNewLclFldNode(lclVar->GetLclNum(), type, lclVar->GetLclOffs() + offset); + + if (!dsc->lvDoNotEnregister) + { + lvaSetVarDoNotEnregister(lclVar->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + } + result = fgMorphTree(result); + return result; } else { @@ -3094,15 +2644,13 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) } else { - assert((indirAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); - GenTree* indirAddrDup = gtCloneExpr(indirAddr); GenTree* offsetNode = gtNewIconNode(offset, TYP_I_IMPL); addr = gtNewOperNode(GT_ADD, indirAddr->TypeGet(), indirAddrDup, offsetNode); } GenTree* indir = gtNewIndir(type, addr); - indir->SetMorphed(this, /* doChildren*/ true); + indir->SetMorphed(this, /* doChildren */ true); return indir; } }; @@ -3110,7 +2658,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); newArg->SetMorphed(this); - for (const ABIPassingSegment& seg : arg->NewAbiInfo.Segments()) + for (const ABIPassingSegment& seg : arg->AbiInfo.Segments()) { if (seg.IsPassedInRegister()) { @@ -3119,7 +2667,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // createSlotAccess get the type from the layout. var_types slotType = varTypeUsesFloatReg(regType) ? regType : TYP_UNDEF; GenTree* access = createSlotAccess(seg.Offset, slotType); - newArg->AddField(this, access, seg.Offset, access->TypeGet()); + + newArg->AsFieldList()->AddField(this, access, seg.Offset, access->TypeGet()); } else { @@ -3127,33 +2676,71 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { unsigned layoutOffset = seg.Offset + slotOffset; GenTree* access = createSlotAccess(layoutOffset, TYP_UNDEF); - newArg->AddField(this, access, layoutOffset, access->TypeGet()); + + newArg->AsFieldList()->AddField(this, access, layoutOffset, access->TypeGet()); } } } - if (argNode->OperIsLocalRead()) - { - lvaSetVarDoNotEnregister(argNode->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - } + newArg = newArg->AsFieldList()->SoleFieldOrThis(); } - // If we reach here we should have set newArg to something - noway_assert(newArg != nullptr); - - JITDUMP("fgMorphMultiregStructArg created tree:\n"); + JITDUMP("fgTryMorphStructArg created tree:\n"); DISPTREE(newArg); - argNode = newArg; // consider calling fgMorphTree(newArg); + *use = newArg; + // Potentially update commas + arg->GetNode()->ChangeType((*use)->TypeGet()); + return true; +} -#endif // FEATURE_MULTIREG_ARGS +//----------------------------------------------------------------------------- +// FieldsMatchAbi: +// Check if the fields of a local map cleanly (in terms of offsets) to the +// specified ABI info. +// +// Arguments: +// varDsc - promoted local +// abiInfo - ABI information +// +// Returns: +// True if it does. In that case FIELD_LIST usage is allowed for split args +// by the backend. +// +bool Compiler::FieldsMatchAbi(LclVarDsc* varDsc, const ABIPassingInformation& abiInfo) +{ + if (varDsc->lvFieldCnt != abiInfo.CountRegsAndStackSlots()) + { + return false; + } - return argNode; + for (const ABIPassingSegment& seg : abiInfo.Segments()) + { + if (seg.IsPassedInRegister()) + { + unsigned fieldLclNum = lvaGetFieldLocal(varDsc, seg.Offset); + if (fieldLclNum == BAD_VAR_NUM) + { + return false; + } + } + else + { + for (unsigned offset = 0; offset < seg.Size; offset += TARGET_POINTER_SIZE) + { + if (lvaGetFieldLocal(varDsc, seg.Offset + offset) == BAD_VAR_NUM) + { + return false; + } + } + } + } + + return true; } //------------------------------------------------------------------------ -// fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields +// fgMorphLclToFieldList: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields // // Arguments: // lcl - The GT_LCL_VAR node we will transform @@ -3161,7 +2748,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // Return value: // The new GT_FIELD_LIST that we have created. // -GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) +GenTreeFieldList* Compiler::fgMorphLclToFieldList(GenTreeLclVar* lcl) { LclVarDsc* varDsc = lvaGetDesc(lcl); assert(varDsc->lvPromoted); @@ -3169,15 +2756,15 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) unsigned fieldLclNum = varDsc->lvFieldLclStart; GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + for (unsigned i = 0; i < fieldCount; i++) { LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); - lclVar->SetMorphed(this); fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); fieldLclNum++; } - fieldList->SetMorphed(this); + return fieldList; } @@ -3200,7 +2787,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) // // We don't need a copy if this is the last use of the local. // - if (opts.OptimizationEnabled() && arg->AbiInfo.PassedByRef) + if (opts.OptimizationEnabled() && arg->AbiInfo.IsPassedByReference()) { GenTree* implicitByRefLclAddr; target_ssize_t implicitByRefLclOffs; @@ -3288,7 +2875,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) #endif JITDUMP("making an outgoing copy for struct arg\n"); - assert(!call->IsTailCall() || !arg->AbiInfo.PassedByRef); + assert(!call->IsTailCall() || !arg->AbiInfo.IsPassedByReference()); CORINFO_CLASS_HANDLE copyBlkClass = arg->GetSignatureClassHandle(); unsigned tmp = 0; @@ -3320,10 +2907,6 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk. tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument")); lvaSetStruct(tmp, copyBlkClass, false); - if (call->IsVarargs()) - { - lvaSetStructUsedAsVarArg(tmp); - } } if (fgUsedSharedTemps != nullptr) @@ -3341,29 +2924,42 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) GenTree* copyBlk = gtNewStoreLclVarNode(tmp, argx); copyBlk = fgMorphCopyBlock(copyBlk); + GenTree* argNode; + if (arg->AbiInfo.IsPassedByReference()) + { + argNode = gtNewLclVarAddrNode(tmp); + lvaSetVarAddrExposed(tmp DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); + } + else + { + argNode = gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet()); + } + argNode->SetMorphed(this); + #if FEATURE_FIXED_OUT_ARGS // For fixed out args we create the setup node here; EvalArgsToTemps knows // to handle the case of "already have a setup node" properly. arg->SetEarlyNode(copyBlk); - GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp); - argNode->SetMorphed(this); arg->SetLateNode(argNode); -#else // !FEATURE_FIXED_OUT_ARGS +#else // !FEATURE_FIXED_OUT_ARGS // Structs are always on the stack, and thus never need temps // so we have to put the copy and temp all into one expression. - GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp); - argNode->SetMorphed(this); - // Change the expression to "(tmp=val),tmp" argNode = gtNewOperNode(GT_COMMA, argNode->TypeGet(), copyBlk, argNode); argNode->SetMorphed(this); arg->SetEarlyNode(argNode); - #endif // !FEATURE_FIXED_OUT_ARGS + + if (!arg->AbiInfo.IsPassedByReference()) + { + bool morphed = fgTryMorphStructArg(arg); + // Should always succeed for an unpromoted local. + assert(morphed); + } } /***************************************************************************** @@ -3434,7 +3030,7 @@ void Compiler::fgMoveOpsLeft(GenTree* tree) // is negative. It also requires the address generation be in a fully-interruptible // code region. // - if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL) + if (varTypeIsGC(op1->TypeGet()) && op2->TypeIs(TYP_I_IMPL)) { assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD)); break; @@ -3464,17 +3060,17 @@ void Compiler::fgMoveOpsLeft(GenTree* tree) if (varTypeIsGC(op1->TypeGet())) { - noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && - oper == GT_ADD) || // byref(ref + (int+int)) - (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && - oper == GT_OR)); // int(gcref | int(gcref|intval)) + noway_assert( + (varTypeIsGC(tree->TypeGet()) && op2->TypeIs(TYP_I_IMPL) && oper == GT_ADD) || // byref(ref + (int+int)) + (varTypeIsI(tree->TypeGet()) && op2->TypeIs(TYP_I_IMPL) && oper == GT_OR)); // int(gcref | + // int(gcref|intval)) new_op1->gtType = tree->gtType; } else if (varTypeIsGC(ad2->TypeGet())) { // Neither ad1 nor op1 are GC. So new_op1 isnt either - noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL); + noway_assert(op1->TypeIs(TYP_I_IMPL) && ad1->TypeIs(TYP_I_IMPL)); new_op1->gtType = TYP_I_IMPL; } @@ -3654,7 +3250,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr) // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case, // the comparison will have to be widened to 64 bits. - if (index->TypeGet() == TYP_I_IMPL) + if (index->TypeIs(TYP_I_IMPL)) { bndsChkType = TYP_I_IMPL; } @@ -3681,9 +3277,9 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr) #ifdef TARGET_64BIT // Widen 'index' on 64-bit targets - if (index->TypeGet() != TYP_I_IMPL) + if (!index->TypeIs(TYP_I_IMPL)) { - if (index->OperGet() == GT_CNS_INT) + if (index->OperIs(GT_CNS_INT)) { index->gtType = TYP_I_IMPL; } @@ -3722,16 +3318,21 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr) // the partial byref will not point within the object, and thus not get updated correctly during a GC. // This is mostly a risk in fully-interruptible code regions. - // We can generate two types of trees for "addr": + // We can generate three types of trees for "addr": // // 1) "arrRef + (index + elemOffset)" // 2) "(arrRef + elemOffset) + index" + // 3) "(arrRef + index) + elemOffset" // // XArch has powerful addressing modes such as [base + index*scale + offset] so it's fine with 1), // while for Arm we better try to make an invariant sub-tree as large as possible, which is usually // "(arrRef + elemOffset)" and is CSE/LoopHoisting friendly => produces better codegen. // 2) should still be safe from GC's point of view since both ADD operations are byref and point to // within the object so GC will be able to correctly track and update them. + // + // RISC-V has very minimal addressing mode: [base + offset] which won't benefit much from CSE/LoopHoisting. However, + // RISC-V has the SH(X)ADD_(UW) instruction that represents [base + index] well. Therefore, 3) lends itself more + // naturally to RISC-V addressing mode. bool groupArrayRefWithElemOffset = false; #ifdef TARGET_ARMARCH @@ -3746,18 +3347,37 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr) groupArrayRefWithElemOffset = false; } #endif + bool groupArrayRefWithIndex = false; +#if defined(TARGET_RISCV64) + groupArrayRefWithIndex = true; + + // Don't use 3) for structs to reduce number of size regressions + if (varTypeIsStruct(elemTyp)) + { + groupArrayRefWithIndex = false; + } +#endif + + // Note the array reference may now be TYP_I_IMPL, TYP_BYREF, or TYP_REF + // + var_types const arrPtrType = arrRef->TypeIs(TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; // First element's offset GenTree* elemOffset = gtNewIconNode(elemOffs, TYP_I_IMPL); if (groupArrayRefWithElemOffset) { - GenTree* basePlusOffset = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, elemOffset); - addr = gtNewOperNode(GT_ADD, TYP_BYREF, basePlusOffset, addr); + GenTree* basePlusOffset = gtNewOperNode(GT_ADD, arrPtrType, arrRef, elemOffset); + addr = gtNewOperNode(GT_ADD, arrPtrType, basePlusOffset, addr); + } + else if (groupArrayRefWithIndex) + { + addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); + addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, elemOffset); } else { addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, elemOffset); - addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); + addr = gtNewOperNode(GT_ADD, arrPtrType, arrRef, addr); } // TODO-Throughput: bash the INDEX_ADDR to ARR_ADDR here instead of creating a new node. @@ -3910,10 +3530,8 @@ GenTree* Compiler::fgMorphExpandStackArgForVarArgs(GenTreeLclVarCommon* lclNode) GenTree* argsBaseAddr = gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL); ssize_t offset = (ssize_t)abiInfo.Segment(0).GetStackOffset() - lclNode->GetLclOffs(); - assert(abiInfo.Segment(0).GetStackOffset() == - (varDsc->GetStackOffset() - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)); - GenTree* offsetNode = gtNewIconNode(offset, TYP_I_IMPL); - GenTree* argAddr = gtNewOperNode(GT_SUB, TYP_I_IMPL, argsBaseAddr, offsetNode); + GenTree* offsetNode = gtNewIconNode(offset, TYP_I_IMPL); + GenTree* argAddr = gtNewOperNode(GT_SUB, TYP_I_IMPL, argsBaseAddr, offsetNode); GenTree* argNode; if (lclNode->OperIsLocalStore()) @@ -4720,24 +4338,26 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) #endif // DEBUG }; -#if defined(TARGET_ARM) || defined(TARGET_RISCV64) +#if defined(TARGET_ARM) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) for (CallArg& arg : callee->gtArgs.Args()) { - if (arg.NewAbiInfo.IsSplitAcrossRegistersAndStack()) + if (arg.AbiInfo.IsSplitAcrossRegistersAndStack()) { reportFastTailCallDecision("Argument splitting in callee is not supported on " TARGET_READABLE_NAME); return false; } } -#endif // TARGET_ARM || TARGET_RISCV64 -#if defined(TARGET_ARM) || defined(TARGET_RISCV64) - if (compHasSplitParam) + for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) { - reportFastTailCallDecision("Argument splitting in caller is not supported on " TARGET_READABLE_NAME); - return false; + const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(lclNum); + if (abiInfo.IsSplitAcrossRegistersAndStack()) + { + reportFastTailCallDecision("Argument splitting in caller is not supported on " TARGET_READABLE_NAME); + return false; + } } -#endif // TARGET_ARM || TARGET_RISCV64 +#endif // TARGET_ARM || TARGET_RISCV64 || defined(TARGET_LOONGARCH64) #ifdef TARGET_ARM if (compIsProfilerHookNeeded()) @@ -4904,7 +4524,7 @@ bool Compiler::fgCallHasMustCopyByrefParameter(GenTreeCall* call) // bool Compiler::fgCallArgWillPointIntoLocalFrame(GenTreeCall* call, CallArg& arg) { - if (!arg.AbiInfo.PassedByRef) + if (!arg.AbiInfo.IsPassedByReference()) { return false; } @@ -5051,6 +4671,12 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } #endif + if (compIsAsync() != call->IsAsync()) + { + failTailCall("Caller and callee do not agree on async-ness"); + return nullptr; + } + // We have to ensure to pass the incoming retValBuf as the // outgoing one. Using a temp will not do as this function will // not regain control to do the copy. This can happen when inlining @@ -5059,10 +4685,10 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // result. TODO-CQ: Fix this. if (info.compRetBuffArg != BAD_VAR_NUM) { - noway_assert(call->TypeGet() == TYP_VOID); + noway_assert(call->TypeIs(TYP_VOID)); noway_assert(call->gtArgs.HasRetBuffer()); GenTree* retValBuf = call->gtArgs.GetRetBufferArg()->GetNode(); - if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg) + if (!retValBuf->OperIs(GT_LCL_VAR) || retValBuf->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg) { failTailCall("Need to copy return buffer"); return nullptr; @@ -5252,8 +4878,13 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the // generic type parameters of both caller and callee generic method are the same. - if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() && - !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet())) + // + // For OSR, we prefer to tailcall for call counting + potential transition + // into the actual tier1 version. + // + if (opts.compTailCallLoopOpt && canFastTailCall && !opts.IsOSR() && gtIsRecursiveCall(call) && + !lvaReportParamTypeArg() && !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && + !varTypeIsStruct(call->TypeGet())) { fastTailCallToLoop = true; } @@ -5446,7 +5077,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } // Peel off casts - while (treeWithCall->gtOper == GT_CAST) + while (treeWithCall->OperIs(GT_CAST)) { assert(!treeWithCall->gtOverflow()); treeWithCall = treeWithCall->gtGetOp1(); @@ -5676,6 +5307,10 @@ void Compiler::fgValidateIRForTailCall(GenTreeCall* call) { assert(ValidateUse(tree) && "Expected use of local to be tailcall value"); } + else if (IsCommaNop(tree)) + { + // COMMA(NOP,NOP) + } else { DISPTREE(tree); @@ -5685,6 +5320,16 @@ void Compiler::fgValidateIRForTailCall(GenTreeCall* call) return WALK_CONTINUE; } + bool IsCommaNop(GenTree* node) + { + if (!node->OperIs(GT_COMMA)) + { + return false; + } + + return node->AsOp()->gtGetOp1()->OperIs(GT_NOP) && node->AsOp()->gtGetOp2()->OperIs(GT_NOP); + } + bool ValidateUse(GenTree* node) { if (m_lclNum != BAD_VAR_NUM) @@ -5769,7 +5414,7 @@ GenTree* Compiler::fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL // R2R requires different handling but we don't support tailcall via // helpers in R2R yet, so just leave it for now. // TODO: R2R: TailCallViaHelper - assert(!opts.IsReadyToRun()); + assert(!IsAot()); JITDUMP("fgMorphTailCallViaHelpers (before):\n"); DISPTREE(call); @@ -6019,12 +5664,12 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig retValArg = retBufArg; - if (origCall->gtType != TYP_VOID) + if (!origCall->TypeIs(TYP_VOID)) { retVal = gtClone(retBufArg); } } - else if (origCall->gtType != TYP_VOID) + else if (!origCall->TypeIs(TYP_VOID)) { JITDUMP("Creating a new temp for the return value\n"); newRetLcl = lvaGrabTemp(false DEBUGARG("Return value for tail call dispatcher")); @@ -6074,7 +5719,7 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig NewCallArg retValCallArg = NewCallArg::Primitive(retValArg); callDispatcherNode->gtArgs.PushFront(this, retAddrSlotArg, callTargetArg, retValCallArg); - if (origCall->gtType == TYP_VOID) + if (origCall->TypeIs(TYP_VOID)) { return callDispatcherNode; } @@ -6432,9 +6077,8 @@ void Compiler::fgMorphTailCallViaJitHelper(GenTreeCall* call) call->gtArgs.Remove(thisArg); } - unsigned nOldStkArgsWords = - (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; - GenTree* arg3Node = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); + unsigned nOldStkArgsWords = lvaParameterStackSize / REGSIZE_BYTES; + GenTree* arg3Node = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); CallArg* arg3 = call->gtArgs.PushBack(this, NewCallArg::Primitive(arg3Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. @@ -6731,23 +6375,12 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa // Remove the call fgRemoveStmt(block, lastStmt); + assert(!opts.IsOSR()); // Set the loop edge. - BasicBlock* entryBB; - if (opts.IsOSR()) - { - // Todo: this may not look like a viable loop header. - // Might need the moral equivalent of an init BB. - entryBB = fgEntryBB; - } - else - { - assert(doesMethodHaveRecursiveTailcall()); - - // TODO-Cleanup: We should really be expanding tailcalls into loops - // much earlier than this, at a place where we do not need to have - // hacky workarounds to figure out what the actual IL entry block is. - entryBB = fgGetFirstILBlock(); - } + // TODO-Cleanup: We should really be expanding tailcalls into loops much + // earlier than this, at a place where we can just use the init BB here. + BasicBlock* entryBB = fgGetFirstILBlock(); + assert(doesMethodHaveRecursiveTailcall()); FlowEdge* const newEdge = fgAddRefPred(entryBB, block); block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); @@ -6948,7 +6581,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) return fgMorphTree(nullCheck); } - noway_assert(call->gtOper == GT_CALL); + noway_assert(call->OperIs(GT_CALL)); // // Only count calls once (only in the global morph phase) @@ -7028,7 +6661,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) // Process the "normal" argument list call = fgMorphArgs(call); - noway_assert(call->gtOper == GT_CALL); + noway_assert(call->OperIs(GT_CALL)); // Try to replace CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE with a constant gc handle // pointing to a frozen segment @@ -7041,9 +6674,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) CORINFO_OBJECT_HANDLE ptr = info.compCompHnd->getRuntimeTypePointer(hClass); if (ptr != NULL) { - setMethodHasFrozenObjects(); - GenTree* retNode = gtNewIconEmbHndNode((void*)ptr, nullptr, GTF_ICON_OBJ_HDL, nullptr); - return fgMorphTree(retNode); + return fgMorphTree(gtNewIconEmbObjHndNode(ptr)); } } } @@ -7584,10 +7215,6 @@ GenTreeOp* Compiler::fgMorphCommutative(GenTreeOp* tree) // Returns: // Tree, possibly updated // -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optAssertionPropDone) { assert(tree->OperKind() & GTK_SMPOP); @@ -7659,8 +7286,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA else { GenTree* effOp1 = op1->gtEffectiveVal(); - noway_assert((effOp1->gtOper == GT_CNS_INT) && - (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); + noway_assert(effOp1->OperIs(GT_CNS_INT) && (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); } break; @@ -7668,6 +7294,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA if (optLocalAssertionProp) { isQmarkColon = true; + BitVecOps::ClearD(apTraits, apLocalPostorder); } break; @@ -7844,9 +7471,9 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA { helper = CORINFO_HELP_DBLREM; noway_assert(op2); - if (op1->TypeGet() == TYP_FLOAT) + if (op1->TypeIs(TYP_FLOAT)) { - if (op2->TypeGet() == TYP_FLOAT) + if (op2->TypeIs(TYP_FLOAT)) { helper = CORINFO_HELP_FLTREM; } @@ -7855,7 +7482,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA tree->AsOp()->gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); } } - else if (op2->TypeGet() == TYP_FLOAT) + else if (op2->TypeIs(TYP_FLOAT)) { tree->AsOp()->gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); } @@ -8018,7 +7645,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA } // Did we fold it into a comma node with throw? - if (tree->gtOper == GT_COMMA) + if (tree->OperIs(GT_COMMA)) { noway_assert(fgIsCommaThrow(tree)); return fgMorphTree(tree); @@ -8030,16 +7657,19 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA case GT_RETURN: case GT_SWIFT_ERROR_RET: { - GenTree* retVal = tree->AsOp()->GetReturnValue(); + GenTree*& retVal = tree->AsOp()->ReturnValueRef(); - if (!tree->TypeIs(TYP_VOID)) + // Apply some optimizations that change the type of the return. + // These are not applicable when this is a merged return that will + // be changed into a store and jump to the return BB. + if (!tree->TypeIs(TYP_VOID) && ((genReturnBB == nullptr) || (compCurBB == genReturnBB))) { if (retVal->OperIs(GT_LCL_FLD)) { retVal = fgMorphRetInd(tree->AsOp()); } - fgTryReplaceStructLocalWithField(retVal); + fgTryReplaceStructLocalWithFields(&retVal); } // normalize small integer return values @@ -8065,7 +7695,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA retVal->gtFlags |= (tree->gtFlags & GTF_COLON_COND); retVal = fgMorphTree(retVal); - tree->AsOp()->SetReturnValue(retVal); // Propagate side effect flags tree->SetAllEffectsFlags(retVal); @@ -8154,7 +7783,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA return fgMorphTree(op1); case GT_COMMA: - if (op2->OperIsStore() || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2)) + if (op2->OperIsStore() || (op2->OperIs(GT_COMMA) && op2->TypeIs(TYP_VOID)) || fgIsThrow(op2)) { typ = tree->gtType = TYP_VOID; } @@ -8285,7 +7914,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA // children have been unmarked, unmark the tree too. // Remember that GT_COMMA inherits it's type only from op2 - if (tree->gtOper == GT_COMMA) + if (tree->OperIs(GT_COMMA)) { tree->gtType = genActualType(op2->TypeGet()); } @@ -8300,12 +7929,46 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA GenTree* qmarkOp1 = nullptr; GenTree* qmarkOp2 = nullptr; - if ((tree->OperGet() == GT_QMARK) && (tree->AsOp()->gtOp2->OperGet() == GT_COLON)) + if (tree->OperIs(GT_QMARK) && (tree->AsOp()->gtOp2->OperIs(GT_COLON))) { qmarkOp1 = oldTree->AsOp()->gtOp2->AsOp()->gtOp1; qmarkOp2 = oldTree->AsOp()->gtOp2->AsOp()->gtOp2; } + // During global morph, give assertion prop another shot at this tree. + // + // We need to use the "postorder" assertion set here, because apLocal + // may reflect results from subtrees that have since been reordered. + // + // apLocalPostorder only includes live assertions from prior statements. + // + if (fgGlobalMorph && optLocalAssertionProp && (optAssertionCount > 0)) + { + GenTree* optimizedTree = tree; + bool again = JitConfig.JitEnablePostorderLocalAssertionProp() > 0; + bool didOptimize = false; + + if (!again) + { + JITDUMP("*** Postorder assertion prop disabled by config\n"); + } + + while (again) + { + tree = optimizedTree; + optimizedTree = optAssertionProp(apLocalPostorder, tree, nullptr, nullptr); + again = (optimizedTree != nullptr); + didOptimize |= again; + } + + assert(tree != nullptr); + + if (didOptimize) + { + gtUpdateNodeSideEffects(tree); + } + } + // Try to fold it, maybe we get lucky, tree = gtFoldExpr(tree); @@ -8434,9 +8097,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA { tree = fgOptimizeRelationalComparisonWithConst(tree->AsOp()); oper = tree->OperGet(); - - assert(op1 == tree->AsOp()->gtGetOp1()); - assert(op2 == tree->AsOp()->gtGetOp2()); + op1 = tree->gtGetOp1(); + op2 = tree->gtGetOp2(); } if (opts.OptimizationEnabled() && fgGlobalMorph && tree->OperIs(GT_GT, GT_LT, GT_LE, GT_GE)) @@ -8715,7 +8377,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA break; case GT_BOUNDS_CHECK: - + setMethodHasBoundsChecks(); fgAddCodeRef(compCurBB, tree->AsBoundsChk()->gtThrowKind); break; @@ -8753,7 +8415,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA // least. commaNode->SetMorphed(this); - while (commaNode->AsOp()->gtOp2->gtOper == GT_COMMA) + while (commaNode->AsOp()->gtOp2->OperIs(GT_COMMA)) { commaNode = commaNode->AsOp()->gtOp2; commaNode->gtType = typ; @@ -8826,7 +8488,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA case GT_COMMA: { /* Special case: trees that don't produce a value */ - if (op2->OperIsStore() || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2)) + if (op2->OperIsStore() || (op2->OperIs(GT_COMMA) && op2->TypeIs(TYP_VOID)) || fgIsThrow(op2)) { typ = tree->gtType = TYP_VOID; } @@ -8931,11 +8593,15 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA { // Retry updating return operand to a field -- assertion // prop done when morphing this operand changed the local. - // - GenTree* const retVal = tree->AsOp()->GetReturnValue(); - if (retVal != nullptr) + // Skip this for merged returns that will be changed to a store and + // jump to the return BB. + GenTree*& retVal = tree->AsOp()->ReturnValueRef(); + if ((retVal != nullptr) && ((genReturnBB == nullptr) || (compCurBB == genReturnBB))) { - fgTryReplaceStructLocalWithField(retVal); + if (fgTryReplaceStructLocalWithFields(&retVal)) + { + retVal = fgMorphTree(retVal); + } } break; } @@ -8993,36 +8659,22 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA // Notes: // Currently only called when the tree parent is a GT_RETURN/GT_SWIFT_ERROR_RET. // -void Compiler::fgTryReplaceStructLocalWithField(GenTree* tree) +bool Compiler::fgTryReplaceStructLocalWithFields(GenTree** use) { - if (!tree->OperIs(GT_LCL_VAR)) + if (!(*use)->OperIs(GT_LCL_VAR)) { - return; + return false; } - // With a `genReturnBB` this `RETURN(src)` tree will be replaced by a `STORE_LCL_VAR(src)` - // and `STORE_LCL_VAR` will be transformed into field by field copy without parent local referencing if - // possible. - GenTreeLclVar* lclVar = tree->AsLclVar(); - unsigned lclNum = lclVar->GetLclNum(); - if ((genReturnLocal == BAD_VAR_NUM) || (genReturnLocal == lclNum)) - { - LclVarDsc* const varDsc = lvaGetDesc(lclVar); - if (varDsc->CanBeReplacedWithItsField(this)) - { - // We can replace the struct with its only field and allow copy propagation to replace - // return value that was written as a field. - unsigned const fieldLclNum = varDsc->lvFieldLclStart; - LclVarDsc* const fieldDsc = lvaGetDesc(fieldLclNum); + LclVarDsc* varDsc = lvaGetDesc((*use)->AsLclVar()); - JITDUMP("Replacing an independently promoted local var V%02u with its only field " - "V%02u for " - "the return [%06u]\n", - lclVar->GetLclNum(), fieldLclNum, dspTreeID(tree)); - lclVar->SetLclNum(fieldLclNum); - lclVar->ChangeType(fieldDsc->lvType); - } + if (varDsc->lvDoNotEnregister || !varDsc->lvPromoted) + { + return false; } + + *use = fgMorphLclToFieldList((*use)->AsLclVar()); + return true; } //------------------------------------------------------------------------ @@ -9366,7 +9018,7 @@ GenTree* Compiler::fgOptimizeEqualityComparisonWithConst(GenTreeOp* cmp) // Here we reverse the RELOP if necessary. - bool reverse = ((op2Value == 0) == (cmp->OperIs(GT_EQ))); + bool reverse = ((op2Value == 0) == cmp->OperIs(GT_EQ)); if (reverse) { @@ -9747,6 +9399,18 @@ GenTree* Compiler::fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp) oper = (oper == GT_LE) ? GT_GE : GT_LT; cmp->gtFlags &= ~GTF_UNSIGNED; } + // LE_UN/GT_UN(expr, int.MaxValue) => EQ/NE(RSZ(expr, 32), 0). + else if (opts.OptimizationEnabled() && (op1->TypeIs(TYP_LONG) && (op2Value == UINT_MAX))) + { + oper = (oper == GT_GT) ? GT_NE : GT_EQ; + GenTree* icon32 = gtNewIconNode(32, TYP_INT); + icon32->SetMorphed(this); + + GenTreeOp* shiftNode = gtNewOperNode(GT_RSZ, TYP_LONG, op1, icon32); + shiftNode->SetMorphed(this); + + cmp->gtOp1 = shiftNode; + } } } @@ -9864,210 +9528,70 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) { break; } - - GenTreeHWIntrinsic* hwop1 = op1->AsHWIntrinsic(); - -#if defined(TARGET_ARM64) - if ((hwop1->GetHWIntrinsicId() == NI_Vector64_ToScalar) || - (hwop1->GetHWIntrinsicId() == NI_Vector128_ToScalar)) -#else - if (hwop1->GetHWIntrinsicId() == NI_Vector128_ToScalar) -#endif - { - op1 = hwop1->Op(1); - - if (!op1->OperIs(GT_HWINTRINSIC)) - { - break; - } - - toScalar = hwop1; - hwop1 = op1->AsHWIntrinsic(); - } - -#if defined(TARGET_ARM64) - if ((hwop1->GetHWIntrinsicId() != NI_Vector64_Dot) && (hwop1->GetHWIntrinsicId() != NI_Vector128_Dot)) -#else - if (hwop1->GetHWIntrinsicId() != NI_Vector128_Dot) -#endif - { - break; - } - - // Must be working with the same types of vectors. - if (hwop1->TypeGet() != retType) - { - break; - } - - if (toScalar != nullptr) - { - DEBUG_DESTROY_NODE(toScalar); - } - - if (sqrt != nullptr) - { - var_types simdType = getSIMDTypeForSize(simdSize); - - node = gtNewSimdSqrtNode(simdType, hwop1, simdBaseJitType, simdSize)->AsHWIntrinsic(); - DEBUG_DESTROY_NODE(sqrt); - } - else - { - node = hwop1; - } - node->SetMorphed(this); - return node; - } - - default: - { -#if defined(FEATURE_MASKED_HW_INTRINSICS) - bool isScalar = false; - genTreeOps actualOper = node->GetOperForHWIntrinsicId(&isScalar); - genTreeOps oper = actualOper; - - // We shouldn't find AND_NOT, OR_NOT or XOR_NOT nodes since it should only be produced in lowering - assert((oper != GT_AND_NOT) && (oper != GT_OR_NOT) && (oper != GT_XOR_NOT)); - - if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper)) - { - GenTree* op1 = node->Op(1); - - GenTree* op2; - GenTree* actualOp2; - - if (oper == GT_NOT) - { - op2 = op1; - actualOp2 = nullptr; - } - else - { - op2 = node->Op(2); - actualOp2 = op2; - } - - // We need both operands to be ConvertMaskToVector in - // order to optimize this to a direct mask operation - - if (!op1->OperIsConvertMaskToVector()) - { - break; - } - - if (!op2->OperIsHWIntrinsic()) - { -#if defined(TARGET_XARCH) - if ((oper != GT_XOR) || !op2->IsVectorAllBitsSet()) - { - break; - } - - // We want to explicitly recognize op1 ^ AllBitsSet as - // some platforms don't have direct support for ~op1 - - oper = GT_NOT; - op2 = op1; -#else - break; -#endif - } - - GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); - GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic(); - - if (!cvtOp2->OperIsConvertMaskToVector()) - { - break; - } - - unsigned simdBaseTypeSize = genTypeSize(simdBaseType); - - if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize) || - (genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize)) - { - // We need both operands to be the same kind of mask; otherwise - // the bitwise operation can differ in how it performs - break; - } - - NamedIntrinsic maskIntrinsicId = NI_Illegal; - -#if defined(TARGET_XARCH) - switch (oper) - { - case GT_AND: - { - maskIntrinsicId = NI_EVEX_AndMask; - break; - } - - case GT_NOT: - { - maskIntrinsicId = NI_EVEX_NotMask; - break; - } - - case GT_OR: - { - maskIntrinsicId = NI_EVEX_OrMask; - break; - } - - case GT_XOR: - { - maskIntrinsicId = NI_EVEX_XorMask; - break; - } - - default: - { - unreached(); - } - } -#elif defined(TARGET_ARM64) - // TODO-ARM64-CQ: Support transforming bitwise operations on masks - break; + + GenTreeHWIntrinsic* hwop1 = op1->AsHWIntrinsic(); + +#if defined(TARGET_ARM64) + if ((hwop1->GetHWIntrinsicId() == NI_Vector64_ToScalar) || + (hwop1->GetHWIntrinsicId() == NI_Vector128_ToScalar)) #else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 + if (hwop1->GetHWIntrinsicId() == NI_Vector128_ToScalar) +#endif + { + op1 = hwop1->Op(1); - if (maskIntrinsicId == NI_Illegal) + if (!op1->OperIs(GT_HWINTRINSIC)) { break; } - if (oper == actualOper) - { - node->ChangeHWIntrinsicId(maskIntrinsicId); - node->Op(1) = cvtOp1->Op(1); - } - else - { - assert(oper == GT_NOT); - node->ResetHWIntrinsicId(maskIntrinsicId, this, cvtOp1->Op(1)); - node->gtFlags &= ~GTF_REVERSE_OPS; - } + toScalar = hwop1; + hwop1 = op1->AsHWIntrinsic(); + } - node->gtType = TYP_MASK; - DEBUG_DESTROY_NODE(op1); +#if defined(TARGET_ARM64) + if ((hwop1->GetHWIntrinsicId() != NI_Vector64_Dot) && (hwop1->GetHWIntrinsicId() != NI_Vector128_Dot)) +#else + if (hwop1->GetHWIntrinsicId() != NI_Vector128_Dot) +#endif + { + break; + } - if (oper != GT_NOT) - { - assert(actualOp2 != nullptr); - node->Op(2) = cvtOp2->Op(1); - } + // Must be working with the same types of vectors. + if (hwop1->TypeGet() != retType) + { + break; + } - if (actualOp2 != nullptr) - { - DEBUG_DESTROY_NODE(actualOp2); - } + if (toScalar != nullptr) + { + DEBUG_DESTROY_NODE(toScalar); + } + if (sqrt != nullptr) + { + var_types simdType = getSIMDTypeForSize(simdSize); + + node = gtNewSimdSqrtNode(simdType, hwop1, simdBaseJitType, simdSize)->AsHWIntrinsic(); + DEBUG_DESTROY_NODE(sqrt); + } + else + { + node = hwop1; + } + node->SetMorphed(this); + return node; + } + + default: + { +#if defined(FEATURE_MASKED_HW_INTRINSICS) + GenTreeHWIntrinsic* maskedIntrinsic = fgOptimizeForMaskedIntrinsic(node); + if (maskedIntrinsic != nullptr) + { + node = maskedIntrinsic; node->SetMorphed(this); - node = gtNewSimdCvtMaskToVectorNode(retType, node, simdBaseJitType, simdSize)->AsHWIntrinsic(); - node->SetMorphed(this); - return node; } #endif // FEATURE_MASKED_HW_INTRINSICS break; @@ -10252,6 +9776,284 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) return node; } +#if defined(FEATURE_MASKED_HW_INTRINSICS) +//------------------------------------------------------------------------ +// fgOptimizeForMaskedIntrinsic: Tries to recognize intrinsics that are operating +// on mask types and morphs the tree to use intrinsics +// better suited to this. +// +// Arguments: +// node - the hardware intrinsic tree to try and optimize. +// This tree will be mutated if it is possible to optimize the tree. +// +// Return Value: +// The optimized tree, nullptr if no change was made. +// +GenTreeHWIntrinsic* Compiler::fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node) +{ +#if defined(TARGET_XARCH) + bool isScalar = false; + genTreeOps actualOper = node->GetOperForHWIntrinsicId(&isScalar); + genTreeOps oper = actualOper; + var_types retType = node->TypeGet(); + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); + + // We shouldn't find AND_NOT, OR_NOT or XOR_NOT nodes since it should only be produced in lowering + assert((oper != GT_AND_NOT) && (oper != GT_OR_NOT) && (oper != GT_XOR_NOT)); + + if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper)) + { + GenTree* op1 = node->Op(1); + + GenTree* op2; + GenTree* actualOp2; + + if (oper == GT_NOT) + { + op2 = op1; + actualOp2 = nullptr; + } + else + { + op2 = node->Op(2); + actualOp2 = op2; + } + + // We need both operands to be ConvertMaskToVector in + // order to optimize this to a direct mask operation + + if (!op1->OperIsConvertMaskToVector()) + { + return nullptr; + } + + if (!op2->OperIsHWIntrinsic()) + { + if ((oper != GT_XOR) || !op2->IsVectorAllBitsSet()) + { + return nullptr; + } + + // We want to explicitly recognize op1 ^ AllBitsSet as + // some platforms don't have direct support for ~op1 + + oper = GT_NOT; + op2 = op1; + } + + GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); + GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic(); + + if (!cvtOp2->OperIsConvertMaskToVector()) + { + return nullptr; + } + + unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); + + if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize) || + (genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize)) + { + // We need both operands to be the same kind of mask; otherwise + // the bitwise operation can differ in how it performs + return nullptr; + } + + NamedIntrinsic maskIntrinsicId = NI_Illegal; + + switch (oper) + { + case GT_AND: + { + maskIntrinsicId = NI_AVX512_AndMask; + break; + } + + case GT_NOT: + { + maskIntrinsicId = NI_AVX512_NotMask; + break; + } + + case GT_OR: + { + maskIntrinsicId = NI_AVX512_OrMask; + break; + } + + case GT_XOR: + { + maskIntrinsicId = NI_AVX512_XorMask; + break; + } + + default: + { + unreached(); + } + } + + if (maskIntrinsicId == NI_Illegal) + { + return nullptr; + } + + if (oper == actualOper) + { + node->ChangeHWIntrinsicId(maskIntrinsicId); + node->Op(1) = cvtOp1->Op(1); + } + else + { + assert(oper == GT_NOT); + node->ResetHWIntrinsicId(maskIntrinsicId, this, cvtOp1->Op(1)); + node->gtFlags &= ~GTF_REVERSE_OPS; + } + + node->gtType = TYP_MASK; + DEBUG_DESTROY_NODE(op1); + + if (oper != GT_NOT) + { + assert(actualOp2 != nullptr); + node->Op(2) = cvtOp2->Op(1); + } + + if (actualOp2 != nullptr) + { + DEBUG_DESTROY_NODE(actualOp2); + } + + node->SetMorphed(this); + node = gtNewSimdCvtMaskToVectorNode(retType, node, simdBaseJitType, simdSize)->AsHWIntrinsic(); + node->SetMorphed(this); + return node; + } +#elif defined(TARGET_ARM64) + return fgMorphTryUseAllMaskVariant(node); +#else +#error Unsupported platform +#endif + return nullptr; +} + +#ifdef TARGET_ARM64 +//------------------------------------------------------------------------ +// canMorphVectorOperandToMask: Can this vector operand be converted to a +// node with type TYP_MASK easily? +// +bool Compiler::canMorphVectorOperandToMask(GenTree* node) +{ + return varTypeIsMask(node) || node->OperIsConvertMaskToVector() || node->IsVectorZero(); +} + +//------------------------------------------------------------------------ +// canMorphAllVectorOperandsToMasks: Can all vector operands to this node +// be converted to a node with type +// TYP_MASK easily? +// +bool Compiler::canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node) +{ + bool allMaskConversions = true; + for (size_t i = 1; i <= node->GetOperandCount() && allMaskConversions; i++) + { + allMaskConversions &= canMorphVectorOperandToMask(node->Op(i)); + } + + return allMaskConversions; +} + +//------------------------------------------------------------------------ +// doMorphVectorOperandToMask: Morph a vector node that is close to a mask +// node into a mask node. +// +// Return value: +// The morphed tree, or nullptr if the transform is not applicable. +// +GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent) +{ + if (varTypeIsMask(node)) + { + // Already a mask, nothing to do. + return node; + } + else if (node->OperIsConvertMaskToVector()) + { + // Replace node with op1. + return node->AsHWIntrinsic()->Op(1); + } + else if (node->IsVectorZero()) + { + // Morph the vector of zeroes into mask of zeroes. + GenTree* mask = gtNewSimdFalseMaskByteNode(parent->GetSimdSize()); + mask->SetMorphed(this); + return mask; + } + + return nullptr; +} + +//----------------------------------------------------------------------------------------------------- +// fgMorphTryUseAllMaskVariant: For NamedIntrinsics that have a variant where all operands are +// mask nodes. If all operands to this node are 'suggesting' that they +// originate closely from a mask, but are of vector types, then morph the +// operands as appropriate to use mask types instead. 'Suggesting' +// is defined by the canMorphVectorOperandToMask function. +// +// Arguments: +// tree - The HWIntrinsic to try and optimize. +// +// Return Value: +// The fully morphed tree if a change was made, else nullptr. +// +GenTreeHWIntrinsic* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node) +{ + if (HWIntrinsicInfo::HasAllMaskVariant(node->GetHWIntrinsicId())) + { + NamedIntrinsic maskVariant = HWIntrinsicInfo::GetMaskVariant(node->GetHWIntrinsicId()); + + // As some intrinsics have many variants, check that the count of operands on the node + // matches the number of operands required for the mask variant of the intrinsic. The mask + // variant of the intrinsic must have a fixed number of operands. + int numArgs = HWIntrinsicInfo::lookupNumArgs(maskVariant); + assert(numArgs >= 0); + if (node->GetOperandCount() == (size_t)numArgs) + { + // We're sure it will work at this point, so perform the pattern match on operands. + if (canMorphAllVectorOperandsToMasks(node)) + { + switch (node->GetOperandCount()) + { + case 1: + node->ResetHWIntrinsicId(maskVariant, doMorphVectorOperandToMask(node->Op(1), node)); + break; + case 2: + node->ResetHWIntrinsicId(maskVariant, doMorphVectorOperandToMask(node->Op(1), node), + doMorphVectorOperandToMask(node->Op(2), node)); + break; + case 3: + node->ResetHWIntrinsicId(maskVariant, this, doMorphVectorOperandToMask(node->Op(1), node), + doMorphVectorOperandToMask(node->Op(2), node), + doMorphVectorOperandToMask(node->Op(3), node)); + break; + default: + unreached(); + } + + node->gtType = TYP_MASK; + return node; + } + } + } + + return nullptr; +} +#endif // TARGET_ARM64 + +#endif // FEATURE_MASKED_HW_INTRINSICS + //------------------------------------------------------------------------ // fgOptimizeHWIntrinsicAssociative: Morph an associative GenTreeHWIntrinsic tree. // @@ -11056,9 +10858,7 @@ GenTree* Compiler::fgMorphRetInd(GenTreeOp* ret) bool canFold = (indSize == lclVarSize) && (lclVarSize <= REGSIZE_BYTES); #endif - // If we have a shared return temp we cannot represent the store properly with these retyped values, - // so skip the optimization in that case. - if (canFold && (genReturnBB == nullptr)) + if (canFold) { // Fold even if types do not match, lowering will handle it. This allows the local // to remain DNER-free and be enregistered. @@ -11075,9 +10875,6 @@ GenTree* Compiler::fgMorphRetInd(GenTreeOp* ret) return lclFld; } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //------------------------------------------------------------- // fgMorphSmpOpOptional: optional post-order morping of some SMP trees // @@ -11134,7 +10931,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD /* Change "((x+icon)+y)" to "((x+y)+icon)" Don't reorder floating-point operations */ - if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() && + if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && op1->OperIs(GT_ADD) && !op1->gtOverflow() && varTypeIsIntegralOrI(typ)) { GenTree* ad1 = op1->AsOp()->gtOp1; @@ -11225,7 +11022,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD break; } - if (op2->gtOper == GT_CAST && !op2->gtOverflow()) + if (op2->OperIs(GT_CAST) && !op2->gtOverflow()) { var_types srct; var_types cast; @@ -11249,7 +11046,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD /* Check for the case "(val + icon) * icon" */ - if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD) + if (op2->OperIs(GT_CNS_INT) && op1->OperIs(GT_ADD)) { GenTree* add = op1->AsOp()->gtOp2; @@ -11298,7 +11095,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD /* Check for the case "(val + icon) << icon" */ - if (op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow()) + if (op2->IsCnsIntOrI() && op1->OperIs(GT_ADD) && !op1->gtOverflow()) { GenTree* cns = op1->AsOp()->gtOp2; @@ -11417,6 +11214,14 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree) tree->AddAllEffectsFlags(operand); } +#ifdef TARGET_XARCH + if (intrinsicId == NI_Vector128_op_Division || intrinsicId == NI_Vector256_op_Division) + { + fgAddCodeRef(compCurBB, SCK_DIV_BY_ZERO); + fgAddCodeRef(compCurBB, SCK_OVERFLOW); + } +#endif // TARGET_XARCH + if (opts.OptimizationEnabled()) { var_types retType = tree->TypeGet(); @@ -11601,11 +11406,11 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) { JITDUMP("\nMorphing MOD/UMOD [%06u] to Sub/Mul/Div\n", dspTreeID(tree)); - if (tree->OperGet() == GT_MOD) + if (tree->OperIs(GT_MOD)) { tree->SetOper(GT_DIV); } - else if (tree->OperGet() == GT_UMOD) + else if (tree->OperIs(GT_UMOD)) { tree->SetOper(GT_UDIV); } @@ -11804,12 +11609,12 @@ GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree) GenTree* op2 = tree->gtGetOp2(); GenTree* leftShiftTree = nullptr; GenTree* rightShiftTree = nullptr; - if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ)) + if (op1->OperIs(GT_LSH) && op2->OperIs(GT_RSZ)) { leftShiftTree = op1; rightShiftTree = op2; } - else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH)) + else if (op1->OperIs(GT_RSZ) && op2->OperIs(GT_LSH)) { leftShiftTree = op2; rightShiftTree = op1; @@ -11839,7 +11644,7 @@ GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree) ssize_t leftShiftMask = -1; ssize_t rightShiftMask = -1; - if ((leftShiftIndex->OperGet() == GT_AND)) + if (leftShiftIndex->OperIs(GT_AND)) { if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI()) { @@ -11852,7 +11657,7 @@ GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree) } } - if ((rightShiftIndex->OperGet() == GT_AND)) + if (rightShiftIndex->OperIs(GT_AND)) { if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI()) { @@ -11879,13 +11684,13 @@ GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree) genTreeOps rotateOp = GT_NONE; GenTree* rotateIndex = nullptr; - if (leftShiftIndex->OperGet() == GT_ADD) + if (leftShiftIndex->OperIs(GT_ADD)) { shiftIndexWithAdd = leftShiftIndex; shiftIndexWithoutAdd = rightShiftIndex; rotateOp = GT_ROR; } - else if (rightShiftIndex->OperGet() == GT_ADD) + else if (rightShiftIndex->OperIs(GT_ADD)) { shiftIndexWithAdd = rightShiftIndex; shiftIndexWithoutAdd = leftShiftIndex; @@ -12151,7 +11956,7 @@ GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac) assert(tree != nullptr); } } - PREFAST_ASSUME(tree != nullptr); + assert(tree != nullptr); } /* Figure out what kind of a node we have */ @@ -12293,36 +12098,36 @@ void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* { // Active dependent assertions are killed here // - ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum)); - BitVecOps::IntersectionD(apTraits, killed, apLocal); - - if (killed) - { + ASSERT_TP killed = GetAssertionDep(lclNum); #ifdef DEBUG + bool hasKills = !BitVecOps::IsEmptyIntersection(apTraits, apLocal, killed); + if (hasKills) + { AssertionIndex index = optAssertionCount; while (killed && (index > 0)) { if (BitVecOps::IsMember(apTraits, killed, index - 1)) { AssertionDsc* curAssertion = optGetAssertion(index); - noway_assert((curAssertion->op1.lcl.lclNum == lclNum) || - ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum))); + noway_assert((curAssertion->op1.lclNum == lclNum) || + ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lclNum == lclNum))); if (verbose) { printf("\nThe store "); printTreeID(tree); - printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum); + printf(" using V%02u removes: ", curAssertion->op1.lclNum); optPrintAssertion(curAssertion, index); } } index--; } + } #endif - BitVecOps::DiffD(apTraits, apLocal, killed); - } + BitVecOps::DiffD(apTraits, apLocal, killed); + BitVecOps::DiffD(apTraits, apLocalPostorder, killed); } //------------------------------------------------------------------------ @@ -12339,7 +12144,7 @@ void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* // void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree)) { - if (BitVecOps::IsEmpty(apTraits, apLocal)) + if (BitVecOps::IsEmpty(apTraits, apLocal) && BitVecOps::IsEmpty(apTraits, apLocalPostorder)) { return; } @@ -12384,6 +12189,7 @@ void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree) // void Compiler::fgAssertionGen(GenTree* tree) { + assert(optLocalAssertionProp); INDEBUG(unsigned oldAssertionCount = optAssertionCount;); optAssertionGen(tree); @@ -12428,18 +12234,18 @@ void Compiler::fgAssertionGen(GenTree* tree) if ((assertion->assertionKind == OAK_EQUAL) && (assertion->op1.kind == O1K_LCLVAR) && (assertion->op2.kind == O2K_CONST_INT)) { - LclVarDsc* const lclDsc = lvaGetDesc(assertion->op1.lcl.lclNum); + LclVarDsc* const lclDsc = lvaGetDesc(assertion->op1.lclNum); if (varTypeIsIntegral(lclDsc->TypeGet())) { ssize_t iconVal = assertion->op2.u1.iconVal; if ((iconVal == 0) || (iconVal == 1)) { - AssertionDsc extraAssertion = {OAK_SUBRANGE}; - extraAssertion.op1.kind = O1K_LCLVAR; - extraAssertion.op1.lcl.lclNum = assertion->op1.lcl.lclNum; - extraAssertion.op2.kind = O2K_SUBRANGE; - extraAssertion.op2.u2 = IntegralRange(SymbolicIntegerValue::Zero, SymbolicIntegerValue::One); + AssertionDsc extraAssertion = {OAK_SUBRANGE}; + extraAssertion.op1.kind = O1K_LCLVAR; + extraAssertion.op1.lclNum = assertion->op1.lclNum; + extraAssertion.op2.kind = O2K_SUBRANGE; + extraAssertion.op2.u2 = IntegralRange(SymbolicIntegerValue::Zero, SymbolicIntegerValue::One); AssertionIndex extraIndex = optFinalizeCreatingAssertion(&extraAssertion); if (extraIndex != NO_ASSERTION_INDEX) @@ -12621,7 +12427,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) noway_assert(lastStmt->GetNextStmt() == nullptr); - if (lastStmt->GetRootNode()->gtOper == GT_CALL) + if (lastStmt->GetRootNode()->OperIs(GT_CALL)) { noway_assert(fgRemoveRestOfBlock); @@ -12635,7 +12441,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) return result; } - noway_assert(lastStmt->GetRootNode()->gtOper == GT_JTRUE); + noway_assert(lastStmt->GetRootNode()->OperIs(GT_JTRUE)); /* Did we fold the conditional */ @@ -12650,7 +12456,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) /* Yupee - we folded the conditional! * Remove the conditional statement */ - noway_assert(cond->gtOper == GT_CNS_INT); + noway_assert(cond->OperIs(GT_CNS_INT)); noway_assert((block->GetFalseTarget()->countOfInEdges() > 0) && (block->GetTrueTarget()->countOfInEdges() > 0)); @@ -12704,7 +12510,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) noway_assert(lastStmt->GetNextStmt() == nullptr); - if (lastStmt->GetRootNode()->gtOper == GT_CALL) + if (lastStmt->GetRootNode()->OperIs(GT_CALL)) { noway_assert(fgRemoveRestOfBlock); @@ -12718,7 +12524,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) return result; } - noway_assert(lastStmt->GetRootNode()->gtOper == GT_SWITCH); + noway_assert(lastStmt->GetRootNode()->OperIs(GT_SWITCH)); // Did we fold the conditional @@ -12731,7 +12537,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) // Yupee - we folded the conditional! // Remove the conditional statement - noway_assert(cond->gtOper == GT_CNS_INT); + noway_assert(cond->OperIs(GT_CNS_INT)); if (condTree != cond) { @@ -12819,6 +12625,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) // block - block containing the statement // stmt - statement to morph // msg - string to identify caller in a dump +// allowFGChange - whether or not the flow graph can be changed // invalidateDFSTreeOnFGChange - whether or not the DFS tree should be invalidated // by this function if it makes a flow graph change // @@ -12831,6 +12638,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) // bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg), + bool allowFGChange, bool invalidateDFSTreeOnFGChange) { assert(block != nullptr); @@ -12857,7 +12665,7 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, #endif // Use the call as the new stmt morph = morph->AsOp()->gtOp1; - noway_assert(morph->gtOper == GT_CALL); + noway_assert(morph->OperIs(GT_CALL)); } // we can get a throw as a statement root @@ -12880,7 +12688,7 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, bool removedStmt = fgCheckRemoveStmt(block, stmt); // Or this is the last statement of a conditional branch that was just folded? - if (!removedStmt && (stmt->GetNextStmt() == nullptr) && !fgRemoveRestOfBlock) + if (allowFGChange && !removedStmt && (stmt->GetNextStmt() == nullptr) && !fgRemoveRestOfBlock) { FoldResult const fr = fgFoldConditional(block); if (invalidateDFSTreeOnFGChange && (fr != FoldResult::FOLD_DID_NOTHING)) @@ -12923,7 +12731,7 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, // // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_ALWAYS. // We should not convert it to a ThrowBB. - if ((block != fgFirstBB) || !fgFirstBB->HasFlag(BBF_INTERNAL)) + if (allowFGChange && ((block != fgFirstBB) || !fgFirstBB->HasFlag(BBF_INTERNAL))) { // Convert block to a throw bb, or make it rarely run if already a throw. // @@ -13022,6 +12830,11 @@ void Compiler::fgMorphStmts(BasicBlock* block) compCurStmt = stmt; GenTree* oldTree = stmt->GetRootNode(); + if (optLocalAssertionProp) + { + BitVecOps::Assign(apTraits, apLocalPostorder, apLocal); + } + #ifdef DEBUG unsigned oldHash = verbose ? gtHashValue(oldTree) : DUMMY_INIT(~0); @@ -13050,7 +12863,7 @@ void Compiler::fgMorphStmts(BasicBlock* block) } noway_assert(compTailCallUsed); - noway_assert(morphedTree->gtOper == GT_CALL); + noway_assert(morphedTree->OperIs(GT_CALL)); GenTreeCall* call = morphedTree->AsCall(); // Could be // - a fast call made as jmp in which case block will be ending with @@ -13104,7 +12917,7 @@ void Compiler::fgMorphStmts(BasicBlock* block) { /* Use the call as the new stmt */ morphedTree = morphedTree->AsOp()->gtOp1; - noway_assert(morphedTree->gtOper == GT_CALL); + noway_assert(morphedTree->OperIs(GT_CALL)); noway_assert((morphedTree->gtFlags & GTF_COLON_COND) == 0); fgRemoveRestOfBlock = true; @@ -13158,8 +12971,8 @@ void Compiler::fgMorphStmts(BasicBlock* block) noway_assert(lastStmt && lastStmt->GetNextStmt() == nullptr); GenTree* last = lastStmt->GetRootNode(); - if ((block->KindIs(BBJ_COND) && (last->gtOper == GT_JTRUE)) || - (block->KindIs(BBJ_SWITCH) && (last->gtOper == GT_SWITCH))) + if ((block->KindIs(BBJ_COND) && last->OperIs(GT_JTRUE)) || + (block->KindIs(BBJ_SWITCH) && last->OperIs(GT_SWITCH))) { GenTree* op1 = last->AsOp()->gtOp1; @@ -13178,10 +12991,10 @@ void Compiler::fgMorphStmts(BasicBlock* block) } #if FEATURE_FASTTAILCALL - GenTree* recursiveTailCall = nullptr; + GenTreeCall* recursiveTailCall = nullptr; if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall)) { - fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall()); + fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall); } #endif @@ -13242,7 +13055,8 @@ void Compiler::fgMorphBlock(BasicBlock* block, MorphUnreachableInfo* unreachable // Each block starts with an empty table, and no available assertions // optAssertionReset(0); - apLocal = BitVecOps::MakeEmpty(apTraits); + BitVecOps::ClearD(apTraits, apLocal); + BitVecOps::ClearD(apTraits, apLocalPostorder); } else { @@ -13380,6 +13194,8 @@ void Compiler::fgMorphBlock(BasicBlock* block, MorphUnreachableInfo* unreachable apLocal = BitVecOps::MakeEmpty(apTraits); } + BitVecOps::Assign(apTraits, apLocalPostorder, apLocal); + JITDUMPEXEC(optDumpAssertionIndices("Assertions in: ", apLocal)); } } @@ -13448,6 +13264,8 @@ PhaseStatus Compiler::fgMorphBlocks() // Local assertion prop is enabled if we are optimizing. // optAssertionInit(/* isLocalProp*/ true); + apLocal = BitVecOps::MakeEmpty(apTraits); + apLocalPostorder = BitVecOps::MakeEmpty(apTraits); } else { @@ -13583,10 +13401,12 @@ PhaseStatus Compiler::fgMorphBlocks() if (optLocalAssertionProp) { - Metrics.LocalAssertionCount = optAssertionCount; - Metrics.LocalAssertionOverflow = optAssertionOverflow; - Metrics.MorphTrackedLocals = lvaTrackedCount; - Metrics.MorphLocals = lvaCount; + Metrics.LocalAssertionCount = optAssertionCount; + Metrics.LocalAssertionOverflow = optAssertionOverflow; + Metrics.MorphTrackedLocals = lvaTrackedCount; + Metrics.MorphLocals = lvaCount; + optLocalAssertionProp = false; + optCrossBlockLocalAssertionProp = false; } // We may have converted a tailcall into a loop, in which case the first BB @@ -13892,7 +13712,7 @@ void Compiler::fgMergeBlockReturn(BasicBlock* block) // Must be a void return node with null operand; delete it as this block branches to // oneReturn block GenTree* const retVal = ret->AsOp()->GetReturnValue(); - noway_assert(ret->TypeGet() == TYP_VOID); + noway_assert(ret->TypeIs(TYP_VOID)); noway_assert(retVal == nullptr); if (opts.compDbgCode && lastStmt->GetDebugInfo().IsValid()) @@ -13965,47 +13785,22 @@ void Compiler::fgSetOptions() codeGen->setFramePointerRequired(true); } - // Assert that the EH table has been initialized by now. Note that - // compHndBBtabAllocCount never decreases; it is a high-water mark - // of table allocation. In contrast, compHndBBtabCount does shrink - // if we delete a dead EH region, and if it shrinks to zero, the - // table pointer compHndBBtab is unreliable. - assert(compHndBBtabAllocCount >= info.compXcptnsCount); - -#ifdef TARGET_X86 - - // Note: this case, and the !X86 case below, should both use the - // !X86 path. This would require a few more changes for X86 to use - // compHndBBtabCount (the current number of EH clauses) instead of - // info.compXcptnsCount (the number of EH clauses in IL), such as - // in ehNeedsShadowSPslots(). This is because sometimes the IL has - // an EH clause that we delete as statically dead code before we - // get here, leaving no EH clauses left, and thus no requirement - // to use a frame pointer because of EH. But until all the code uses - // the same test, leave info.compXcptnsCount here. Also test for - // CORINFO_FLG_SYNCH methods which are converted into try-finally - // with Monitor helper calls in funclet ABI and need to be treated - // as methods with EH. - if (info.compXcptnsCount > 0 || (UsesFunclets() && (info.compFlags & CORINFO_FLG_SYNCH))) + // If there is EH, we need a frame pointer. + // Note this may premature... we can eliminate all EH after morph, sometimes. + // + if (compHndBBtabCount > 0) { codeGen->setFramePointerRequiredEH(true); +#ifdef TARGET_X86 if (UsesFunclets()) { assert(!codeGen->isGCTypeFixed()); // Enforce fully interruptible codegen for funclet unwinding SetInterruptible(true); } - } - -#else // !TARGET_X86 - - if (compHndBBtabCount > 0) - { - codeGen->setFramePointerRequiredEH(true); - } - #endif // TARGET_X86 + } if (compMethodRequiresPInvokeFrame()) { @@ -14054,7 +13849,7 @@ GenTree* Compiler::fgInitThisClass() { #ifdef FEATURE_READYTORUN // Only NativeAOT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR. - if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + if (IsNativeAot()) { CORINFO_RESOLVED_TOKEN resolvedToken; memset(&resolvedToken, 0, sizeof(resolvedToken)); @@ -14198,7 +13993,7 @@ GenTreeQmark* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = N GenTreeQmark* topQmark = nullptr; - if (expr->gtOper == GT_QMARK) + if (expr->OperIs(GT_QMARK)) { topQmark = expr->AsQmark(); } @@ -14303,8 +14098,8 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) assert(!varTypeIsFloating(condExpr->TypeGet())); - bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP); - bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP); + bool hasTrueExpr = !trueExpr->OperIs(GT_NOP); + bool hasFalseExpr = !falseExpr->OperIs(GT_NOP); assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark! // Create remainder, cond and "else" blocks. After this, the blocks are in this order: @@ -14450,7 +14245,7 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) } else { - assert(qmark->TypeGet() == TYP_VOID); + assert(qmark->TypeIs(TYP_VOID)); } if (hasTrueExpr) @@ -14758,14 +14553,12 @@ PhaseStatus Compiler::fgMarkImplicitByRefCopyOmissionCandidates() continue; } - unsigned structSize = - argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(m_compiler)->GetSize() : genTypeSize(argNode); - - Compiler::structPassingKind passKind; - m_compiler->getArgTypeForStruct(arg.GetSignatureClassHandle(), &passKind, call->IsVarargs(), - structSize); + if (!call->gtArgs.IsAbiInformationDetermined()) + { + call->gtArgs.DetermineABIInfo(m_compiler, call); + } - if (passKind != SPK_ByReference) + if (!arg.AbiInfo.IsPassedByReference()) { continue; } @@ -14846,10 +14639,6 @@ PhaseStatus Compiler::fgRetypeImplicitByRefArgs() varDsc = lvaGetDesc(lclNum); lvaSetStruct(newLclNum, varDsc->GetLayout(), true); - if (info.compIsVarArgs) - { - lvaSetStructUsedAsVarArg(newLclNum); - } // Copy the struct promotion annotations to the new temp. LclVarDsc* newVarDsc = lvaGetDesc(newLclNum); @@ -14914,8 +14703,8 @@ PhaseStatus Compiler::fgRetypeImplicitByRefArgs() // which is a precondition for this phase when optimizing. assert(fgFirstBB->bbPreds == nullptr); GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF); - GenTree* data = (varDsc->TypeGet() == TYP_STRUCT) ? gtNewBlkIndir(varDsc->GetLayout(), addr) - : gtNewIndir(varDsc->TypeGet(), addr); + GenTree* data = varDsc->TypeIs(TYP_STRUCT) ? gtNewBlkIndir(varDsc->GetLayout(), addr) + : gtNewIndir(varDsc->TypeGet(), addr); GenTree* store = gtNewStoreLclVarNode(newLclNum, data); fgNewStmtAtBeg(fgFirstBB, store); } @@ -14946,10 +14735,6 @@ PhaseStatus Compiler::fgRetypeImplicitByRefArgs() // the parameter which is really a pointer to the struct. fieldVarDsc->lvIsRegArg = false; fieldVarDsc->lvIsMultiRegArg = false; - fieldVarDsc->SetArgReg(REG_NA); -#if FEATURE_MULTIREG_ARGS - fieldVarDsc->SetOtherArgReg(REG_NA); -#endif // Promoted fields of implicit byrefs can't be OSR locals. // if (fieldVarDsc->lvIsOSRLocal) @@ -15109,7 +14894,7 @@ bool Compiler::fgCanTailCallViaJitHelper(GenTreeCall* call) #else // For R2R make sure we go through portable mechanism that the 'EE' side // will properly turn into a runtime JIT. - if (opts.IsReadyToRun()) + if (IsAot()) { return false; } @@ -15152,7 +14937,7 @@ GenTree* Compiler::fgMorphReduceAddOps(GenTree* tree) #ifndef TARGET_64BIT // Transforming 64-bit ADD to 64-bit MUL on 32-bit system results in replacing // ADD ops with a helper function call. Don't apply optimization in that case. - if (tree->TypeGet() == TYP_LONG) + if (tree->TypeIs(TYP_LONG)) { return tree; } diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index c5ab836fdc22..ac234686a636 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -242,8 +242,6 @@ void MorphInitBlockHelper::PropagateBlockAssertions() // void MorphInitBlockHelper::PropagateExpansionAssertions() { - // Consider doing this for FieldByField as well - // if (m_comp->optLocalAssertionProp && (m_transformationDecision == BlockTransformation::OneStoreBlock)) { m_comp->fgAssertionGen(m_store); @@ -400,6 +398,7 @@ void MorphInitBlockHelper::TryInitFieldByField() if (m_comp->fgGlobalMorph && m_dstLclNode->IsLastUse(i)) { JITDUMP("Field-by-field init skipping write to dead field V%02u\n", fieldLclNum); + m_comp->fgKillDependentAssertionsSingle(m_dstLclNum DEBUGARG(m_store)); continue; } @@ -666,7 +665,7 @@ void MorphCopyBlockHelper::PrepareSrc() assert(m_store->TypeGet() == m_src->TypeGet()); if (m_store->TypeIs(TYP_STRUCT)) { - assert(ClassLayout::AreCompatible(m_blockLayout, m_src->GetLayout(m_comp))); + assert(m_blockLayout->CanAssignFrom(m_src->GetLayout(m_comp))); } } @@ -679,7 +678,7 @@ void MorphCopyBlockHelper::TrySpecialCases() { assert(m_store->OperIs(GT_STORE_LCL_VAR)); - m_dstVarDsc->lvIsMultiRegRet = true; + m_dstVarDsc->SetIsMultiRegDest(); JITDUMP("Not morphing a multireg node return\n"); m_transformationDecision = BlockTransformation::SkipMultiRegSrc; @@ -1242,6 +1241,7 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField() { INDEBUG(unsigned dstFieldLclNum = m_comp->lvaGetDesc(m_dstLclNum)->lvFieldLclStart + i); JITDUMP("Field-by-field copy skipping write to dead field V%02u\n", dstFieldLclNum); + m_comp->fgKillDependentAssertionsSingle(m_dstLclNum DEBUGARG(m_store)); continue; } diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index baf060aaec2e..d5867d9f8086 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -48,10 +48,12 @@ enum NamedIntrinsic : unsigned short NI_System_Math_MaxMagnitude, NI_System_Math_MaxMagnitudeNumber, NI_System_Math_MaxNumber, + NI_System_Math_MaxUnsigned, NI_System_Math_Min, NI_System_Math_MinMagnitude, NI_System_Math_MinMagnitudeNumber, NI_System_Math_MinNumber, + NI_System_Math_MinUnsigned, NI_System_Math_MultiplyAddEstimate, NI_System_Math_Pow, NI_System_Math_ReciprocalEstimate, @@ -75,6 +77,7 @@ enum NamedIntrinsic : unsigned short NI_System_Threading_Thread_get_CurrentThread, NI_System_Threading_Thread_get_ManagedThreadId, + NI_System_Threading_Thread_FastPollGC, NI_System_Threading_Volatile_Read, NI_System_Threading_Volatile_Write, NI_System_Threading_Volatile_ReadBarrier, @@ -102,6 +105,7 @@ enum NamedIntrinsic : unsigned short NI_System_RuntimeType_get_TypeHandle, NI_System_StubHelpers_GetStubContext, NI_System_StubHelpers_NextCallReturnAddress, + NI_System_StubHelpers_AsyncCallContinuation, NI_Array_Address, NI_Array_Get, @@ -118,6 +122,9 @@ enum NamedIntrinsic : unsigned short NI_System_Runtime_CompilerServices_RuntimeHelpers_IsReferenceOrContainsReferences, NI_System_Runtime_CompilerServices_RuntimeHelpers_GetMethodTable, + NI_System_Runtime_CompilerServices_AsyncHelpers_AsyncSuspend, + NI_System_Runtime_CompilerServices_AsyncHelpers_Await, + NI_System_Runtime_CompilerServices_StaticsHelpers_VolatileReadAsByref, NI_System_Runtime_InteropService_MemoryMarshal_GetArrayDataReference, @@ -149,6 +156,8 @@ enum NamedIntrinsic : unsigned short NI_System_Threading_Interlocked_ExchangeAdd, NI_System_Threading_Interlocked_MemoryBarrier, + NI_System_Threading_Tasks_Task_ConfigureAwait, + // These two are special marker IDs so that we still get the inlining profitability boost NI_System_Numerics_Intrinsic, NI_System_Runtime_Intrinsics_Intrinsic, diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index fce4152d0a06..2c4ca928b971 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -18,6 +18,160 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gentree.h" #include "jitstd/algorithm.h" +//------------------------------------------------------------------------ +// ObjectAllocator: construct the object allocator object +// +// Arguments: +// comp - compiler instance +// +// Notes: +// Runs only if Compiler::optMethodFlags has flag OMF_HAS_NEWOBJ or OMF_HAS_NEWARR. +// +// Builds a connection graph where nodes mostly represent gc typed local vars, +// showing how these locals can assign values to one another. +// +// The graph also includes a abstract node types: a node representing an unknown source of values, +// pseudo nodes representing assignments that only happen under particular conditions, +// and nodes representing fields of local structs. +// +ObjectAllocator::ObjectAllocator(Compiler* comp) + : Phase(comp, PHASE_ALLOCATE_OBJECTS) + , m_IsObjectStackAllocationEnabled(false) + , m_AnalysisDone(false) + , m_isR2R(comp->IsReadyToRun()) + , m_bvCount(0) + , m_bitVecTraits(BitVecTraits(comp->lvaCount, comp)) + , m_unknownSourceIndex(BAD_VAR_NUM) + , m_HeapLocalToStackObjLocalMap(comp->getAllocator(CMK_ObjectAllocator)) + , m_HeapLocalToStackArrLocalMap(comp->getAllocator(CMK_ObjectAllocator)) + , m_ConnGraphAdjacencyMatrix(nullptr) + , m_StackAllocMaxSize(0) + , m_stackAllocationCount(0) + , m_EnumeratorLocalToPseudoIndexMap(comp->getAllocator(CMK_ObjectAllocator)) + , m_CloneMap(comp->getAllocator(CMK_ObjectAllocator)) + , m_nextLocalIndex(0) + , m_firstPseudoIndex(BAD_VAR_NUM) + , m_numPseudos(0) + , m_maxPseudos(0) + , m_regionsToClone(0) + , m_trackFields(false) + , m_StoreAddressToIndexMap(comp->getAllocator(CMK_ObjectAllocator)) +{ + m_EscapingPointers = BitVecOps::UninitVal(); + m_PossiblyStackPointingPointers = BitVecOps::UninitVal(); + m_DefinitelyStackPointingPointers = BitVecOps::UninitVal(); + m_ConnGraphAdjacencyMatrix = nullptr; + m_StackAllocMaxSize = (unsigned)JitConfig.JitObjectStackAllocationSize(); + m_trackFields = JitConfig.JitObjectStackAllocationTrackFields() > 0; +} + +//------------------------------------------------------------------------ +// IsTrackedType: see if this type is being tracked by escape analysis +// +// Arguments: +// type - type of interest +// +// Returns: +// true if so +// +bool ObjectAllocator::IsTrackedType(var_types type) +{ + const bool isTrackableScalar = (type == TYP_REF) || (type == TYP_BYREF); + const bool isTrackableStruct = (type == TYP_STRUCT) && m_trackFields; + + return isTrackableScalar || isTrackableStruct; +} + +//------------------------------------------------------------------------ +// IsTrackedLocal: see if this local is being tracked by escape analysis +// +// Arguments: +// lclNum - local of interest +// +// Returns: +// true if so +// +bool ObjectAllocator::IsTrackedLocal(unsigned lclNum) +{ + assert(lclNum < comp->lvaCount); + LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum); + return varDsc->lvTracked; +} + +//------------------------------------------------------------------------ +// LocalToIndex: get the bit vector index for a local +// +// Arguments: +// lclNum -- local var num +// +// Returns: +// bvIndex to use, or BAD_VAR_NUM if local is not tracked +// +unsigned ObjectAllocator::LocalToIndex(unsigned lclNum) +{ + assert(IsTrackedLocal(lclNum)); + LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum); + unsigned const result = varDsc->lvVarIndex; + assert(result < m_bvCount); + return result; +} + +//------------------------------------------------------------------------ +// IndexToLocal: get the local num for a bv index +// +// Arguments: +// bvIndex -- bit vector index +// +// Returns: +// local num or BAD_VAR_NUM if index is not a tracked local var +// +unsigned ObjectAllocator::IndexToLocal(unsigned bvIndex) +{ + assert(bvIndex < m_bvCount); + unsigned result = BAD_VAR_NUM; + + if (bvIndex < m_firstPseudoIndex) + { + result = comp->lvaTrackedToVarNum[bvIndex]; + assert(IsTrackedLocal(result)); + } + return result; +} + +#ifdef DEBUG +//------------------------------------------------------------------------------ +// DumpIndex: write a description of a given bv index +// +// Arguments: +// bvIndex - index to describe +// +// Notes: +// includes leading space +// +void ObjectAllocator::DumpIndex(unsigned bvIndex) +{ + if (bvIndex < m_firstPseudoIndex) + { + printf(" V%02u", IndexToLocal(bvIndex)); + return; + } + + if (bvIndex < m_unknownSourceIndex) + { + printf(" P%02u", bvIndex); + return; + } + + if (bvIndex == m_unknownSourceIndex) + { + printf(" U%02u", bvIndex); + return; + } + + printf(" ?%02u", bvIndex); +} +#endif + //------------------------------------------------------------------------ // DoPhase: Run analysis (if object stack allocation is enabled) and then // morph each GT_ALLOCOBJ node either into an allocation helper @@ -98,10 +252,23 @@ PhaseStatus ObjectAllocator::DoPhase() // // Arguments: // lclNum - Escaping pointing local variable number - +// void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum) { - BitVecOps::AddElemD(&m_bitVecTraits, m_EscapingPointers, lclNum); + const unsigned bvIndex = LocalToIndex(lclNum); + MarkIndexAsEscaping(bvIndex); +} + +//------------------------------------------------------------------------------ +// MarkIndexAsEscaping : Mark resource as escaping. +// +// +// Arguments: +// index - bv index for the resource +// +void ObjectAllocator::MarkIndexAsEscaping(unsigned int bvIndex) +{ + BitVecOps::AddElemD(&m_bitVecTraits, m_EscapingPointers, bvIndex); } //------------------------------------------------------------------------------ @@ -111,10 +278,24 @@ void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum) // // Arguments: // lclNum - Possibly stack-object-pointing local variable number - +// void ObjectAllocator::MarkLclVarAsPossiblyStackPointing(unsigned int lclNum) { - BitVecOps::AddElemD(&m_bitVecTraits, m_PossiblyStackPointingPointers, lclNum); + const unsigned bvIndex = LocalToIndex(lclNum); + MarkIndexAsPossiblyStackPointing(bvIndex); +} + +//------------------------------------------------------------------------------ +// MarkIndexAsPossiblyStackPointing : Mark resource as possibly pointing +// to a stack-allocated object. +// +// +// Arguments: +// index - bv index for the resource +// +void ObjectAllocator::MarkIndexAsPossiblyStackPointing(unsigned int bvIndex) +{ + BitVecOps::AddElemD(&m_bitVecTraits, m_PossiblyStackPointingPointers, bvIndex); } //------------------------------------------------------------------------------ @@ -124,10 +305,24 @@ void ObjectAllocator::MarkLclVarAsPossiblyStackPointing(unsigned int lclNum) // // Arguments: // lclNum - Definitely stack-object-pointing local variable number - +// void ObjectAllocator::MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum) { - BitVecOps::AddElemD(&m_bitVecTraits, m_DefinitelyStackPointingPointers, lclNum); + const unsigned bvIndex = LocalToIndex(lclNum); + MarkIndexAsDefinitelyStackPointing(bvIndex); +} + +//------------------------------------------------------------------------------ +// MarIndexAsDefinitelyStackPointing : Mark resource as definitely pointing +// to a stack-allocated object. +// +// +// Arguments: +// index - bv index for the resource +// +void ObjectAllocator::MarkIndexAsDefinitelyStackPointing(unsigned int bvIndex) +{ + BitVecOps::AddElemD(&m_bitVecTraits, m_DefinitelyStackPointingPointers, bvIndex); } //------------------------------------------------------------------------------ @@ -137,40 +332,271 @@ void ObjectAllocator::MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum) // Arguments: // sourceLclNum - Local variable number of the edge source // targetLclNum - Local variable number of the edge target - +// void ObjectAllocator::AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum) { - BitVecOps::AddElemD(&m_bitVecTraits, m_ConnGraphAdjacencyMatrix[sourceLclNum], targetLclNum); + const unsigned sourceBvIndex = LocalToIndex(sourceLclNum); + const unsigned targetBvIndex = LocalToIndex(targetLclNum); + AddConnGraphEdgeIndex(sourceBvIndex, targetBvIndex); +} + +//------------------------------------------------------------------------------ +// AddConnGraphEdgeIndex : Record that the source resource may point to the same set of objects +// as the set pointed to by target resource +// +// Arguments: +// sourceBvIndex - index of the edge source +// targetBvIndex - index of the edge target +// +void ObjectAllocator::AddConnGraphEdgeIndex(unsigned int sourceBvIndex, unsigned int targetBvIndex) +{ + BitVecOps::AddElemD(&m_bitVecTraits, m_ConnGraphAdjacencyMatrix[sourceBvIndex], targetBvIndex); +} + +//------------------------------------------------------------------------ +// PrepareAnalysis: determine how to model the escape analysis problem +// with bit vectors. +// +void ObjectAllocator::PrepareAnalysis() +{ + // Determine how locals map to indicies in the bit vectors / connection graph. + // + // In "lcl num" space + // + // We reserve the range [0...L-1] for the initial set of locals. + // Here L is the initial lvaCount. + // + // If conditional escape analysis is enabled, we reserve the range [L...L+M-1] + // for locals allocated during the conditional escape analysis expansions, + // where M is the maximum number of pseudos. + // + // In "bv" space + // + // We reserve the range [0...N-1] for the initial set of tracked locals. + // Here N <= L is the number of tracked locals, determined below, an each + // tracked local has an index assigned in this range. + // + // If conditional escape analysis is enabled, we reserve the range [N...N+M-1] + // for locals allocated during the conditional escape analysis expansions, + // where M is the maximum number of pseudos. + // + // We reserve the range [N+M ... N+2M-1] for pseudos. + // + // We reserve the singleton [N+2M] for the "unknown source" local + // + // LocalToIndex translates from "lcl num" space to "bv" space + // IndexToLocal translates from "bv" space space to "lcl num" space + // + const unsigned localCount = comp->lvaCount; + unsigned bvNext = 0; + + // Enumerate which locals are going to appear in our connection + // graph, and assign them BV indicies. + // + for (unsigned lclNum = 0; lclNum < localCount; lclNum++) + { + LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum); + + if (IsTrackedType(varDsc->TypeGet())) + { + varDsc->lvTracked = 1; + varDsc->lvVarIndex = (unsigned short)bvNext; + bvNext++; + } + else + { + varDsc->lvTracked = 0; + varDsc->lvVarIndex = 0; + } + } + + m_nextLocalIndex = bvNext; + + // If we are going to do any conditional escape analysis, determine + // how much extra BV space we'll need. + // + bool const hasEnumeratorLocals = comp->hasImpEnumeratorGdvLocalMap(); + + if (hasEnumeratorLocals) + { + unsigned const enumeratorLocalCount = comp->getImpEnumeratorGdvLocalMap()->GetCount(); + assert(enumeratorLocalCount > 0); + + // For now, disable conditional escape analysis with OSR + // since the dominance picture is muddled at this point. + // + // The conditionally escaping allocation sites will likely be in loops anyways. + // + bool const enableConditionalEscape = JitConfig.JitObjectStackAllocationConditionalEscape() > 0; + bool const isOSR = comp->opts.IsOSR(); + + if (enableConditionalEscape && !isOSR) + { + +#ifdef DEBUG + static ConfigMethodRange JitObjectStackAllocationConditionalEscapeRange; + JitObjectStackAllocationConditionalEscapeRange.EnsureInit( + JitConfig.JitObjectStackAllocationConditionalEscapeRange()); + const unsigned hash = comp->info.compMethodHash(); + const bool inRange = JitObjectStackAllocationConditionalEscapeRange.Contains(hash); +#else + const bool inRange = true; +#endif + + if (inRange) + { + JITDUMP("Enabling conditional escape analysis [%u pseudos]\n", enumeratorLocalCount); + m_maxPseudos = enumeratorLocalCount; + } + else + { + JITDUMP("Not enabling conditional escape analysis (disabled by range config)\n"); + } + } + else + { + JITDUMP("Not enabling conditional escape analysis [%u pseudos]: %s\n", enumeratorLocalCount, + enableConditionalEscape ? "OSR" : "disabled by config"); + } + } + +#ifdef DEBUG + if (m_trackFields) + { + static ConfigMethodRange JitObjectStackAllocationTrackFieldsRange; + JitObjectStackAllocationTrackFieldsRange.EnsureInit(JitConfig.JitObjectStackAllocationTrackFieldsRange()); + const unsigned hash = comp->info.compMethodHash(); + const bool inRange = JitObjectStackAllocationTrackFieldsRange.Contains(hash); + + if (!inRange) + { + JITDUMP("Disabling field wise escape analysis per range config\n"); + m_trackFields = false; + } + } +#endif + + // When we clone to prevent conditional escape, we'll also create a new local + // var that we will track. So we need to leave room for these vars. There can + // be as many of these as there are pseudos. + // + const unsigned maxTrackedLclNum = localCount + m_maxPseudos; + m_firstPseudoIndex = bvNext + m_maxPseudos; // N, per above + bvNext += 2 * m_maxPseudos; + + // A bv index for an unknown source of values + // + m_unknownSourceIndex = bvNext; + bvNext++; + + // Now set up the BV traits. + // + m_bvCount = bvNext; + m_bitVecTraits = BitVecTraits(m_bvCount, comp); + + // Create the reverse mapping from bvIndex to local var index + // (leave room for locals we may allocate) + // + if (comp->lvaTrackedToVarNumSize < maxTrackedLclNum) + { + comp->lvaTrackedToVarNumSize = maxTrackedLclNum; + comp->lvaTrackedToVarNum = new (comp->getAllocator(CMK_LvaTable)) unsigned[comp->lvaTrackedToVarNumSize]; + } + + for (unsigned lclNum = 0; lclNum < localCount; lclNum++) + { + LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum); + + if (varDsc->lvTracked) + { + comp->lvaTrackedToVarNum[varDsc->lvVarIndex] = lclNum; + } + } + + JITDUMP("%u locals, %u tracked by escape analysis\n", localCount, m_nextLocalIndex); + JITDUMP("Local field tracking is %s\n", m_trackFields ? "enabled" : "disabled"); + + if (m_nextLocalIndex > 0) + { + JITDUMP("\nLocal var range [%02u...%02u]\n", 0, localCount - 1); + if (m_maxPseudos > 0) + { + JITDUMP("Enumerator var range [%02u...%02u]\n", localCount, localCount + m_maxPseudos - 1); + } + + JITDUMP("\nLocal var bv range [%02u...%02u]\n", 0, m_nextLocalIndex - 1); + if (m_maxPseudos > 0) + { + JITDUMP("Enumerator var bv range [%02u...%02u]\n", m_nextLocalIndex, m_nextLocalIndex + m_maxPseudos - 1); + JITDUMP("Pseudo var bv range [%02u...%02u]\n", m_nextLocalIndex + m_maxPseudos, + m_nextLocalIndex + 2 * m_maxPseudos - 1); + } + JITDUMP("Unknown var bv range [%02u...%02u]\n", m_unknownSourceIndex, m_unknownSourceIndex); + } } //------------------------------------------------------------------------ // DoAnalysis: Walk over basic blocks of the method and detect all local // variables that can be allocated on the stack. - +// void ObjectAllocator::DoAnalysis() { assert(m_IsObjectStackAllocationEnabled); assert(!m_AnalysisDone); - if (comp->lvaCount > 0) + PrepareAnalysis(); + + if (m_bvCount > 0) { - m_EscapingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits); - m_ConnGraphAdjacencyMatrix = - new (comp->getAllocator(CMK_ObjectAllocator)) BitSetShortLongRep[comp->lvaCount + m_maxPseudoLocals + 1]; + m_EscapingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits); + m_ConnGraphAdjacencyMatrix = new (comp->getAllocator(CMK_ObjectAllocator)) BitSetShortLongRep[m_bvCount]; // If we are doing conditional escape analysis, we also need to compute dominance. // - if (CanHavePseudoLocals()) + if (CanHavePseudos()) { assert(comp->m_dfsTree != nullptr); assert(comp->m_domTree == nullptr); comp->m_domTree = FlowGraphDominatorTree::Build(comp->m_dfsTree); } + for (unsigned int i = 0; i < m_bvCount; i++) + { + m_ConnGraphAdjacencyMatrix[i] = BitVecOps::MakeEmpty(&m_bitVecTraits); + } + MarkEscapingVarsAndBuildConnGraph(); ComputeEscapingNodes(&m_bitVecTraits, m_EscapingPointers); } +#ifdef DEBUG + // Print the connection graph + // + if (JitConfig.JitObjectStackAllocationDumpConnGraph() > 0) + { + JITDUMP("digraph ConnectionGraph {\n"); + for (unsigned int i = 0; i < m_bvCount; i++) + { + BitVecOps::Iter iterator(&m_bitVecTraits, m_ConnGraphAdjacencyMatrix[i]); + unsigned int lclIndex; + while (iterator.NextElem(&lclIndex)) + { + JITDUMPEXEC(DumpIndex(lclIndex)); + JITDUMP(" -> "); + JITDUMPEXEC(DumpIndex(i)); + JITDUMP(";\n"); + } + + if (CanIndexEscape(i)) + { + JITDUMPEXEC(DumpIndex(i)); + JITDUMP(" -> E;\n"); + } + } + JITDUMP("}\n"); + } +#endif + m_AnalysisDone = true; } @@ -202,6 +628,7 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph() enum { DoPreOrder = true, + DoPostOrder = true, DoLclVarsOnly = true, ComputeStack = true, }; @@ -216,41 +643,55 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph() Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { - GenTree* const tree = *use; - unsigned const lclNum = tree->AsLclVarCommon()->GetLclNum(); + GenTree* const tree = *use; + unsigned const lclNum = tree->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const lclDsc = m_compiler->lvaGetDesc(lclNum); - // If this local already escapes, no need to look further. + // Are we tracking this local? // - if (m_allocator->CanLclVarEscape(lclNum)) + if (!m_allocator->IsTrackedLocal(lclNum)) { return Compiler::fgWalkResult::WALK_CONTINUE; } - bool lclEscapes = true; + const unsigned lclIndex = m_allocator->LocalToIndex(lclNum); + + // If this local already escapes, no need to look further. + // + if (m_allocator->CanIndexEscape(lclIndex)) + { + return Compiler::fgWalkResult::WALK_CONTINUE; + } if (tree->OperIsLocalStore()) { - lclEscapes = false; m_allocator->CheckForGuardedAllocationOrCopy(m_block, m_stmt, use, lclNum); } - else if (tree->OperIs(GT_LCL_VAR) && tree->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL)) + else if (tree->OperIs(GT_LCL_VAR)) { assert(tree == m_ancestors.Top()); - if (!m_allocator->CanLclVarEscapeViaParentStack(&m_ancestors, lclNum, m_block)) - { - lclEscapes = false; - } + m_allocator->AnalyzeParentStack(&m_ancestors, lclIndex, m_block); } - - if (lclEscapes) + else if (tree->OperIs(GT_LCL_ADDR) && lclDsc->TypeIs(TYP_STRUCT)) { - if (!m_allocator->CanLclVarEscape(lclNum)) - { - JITDUMP("V%02u first escapes via [%06u]\n", lclNum, m_compiler->dspTreeID(tree)); - } + assert(tree == m_ancestors.Top()); + m_allocator->AnalyzeParentStack(&m_ancestors, lclIndex, m_block); + } + else if (tree->OperIs(GT_LCL_FLD)) + { + // We generally don't see these in early IR. Bail for now. + // + JITDUMP("V%02u local field at [%06u]\n", lclNum, m_compiler->dspTreeID(tree)); + m_allocator->MarkLclVarAsEscaping(lclNum); + } + else + { + assert((tree->OperIs(GT_LCL_ADDR) && !lclDsc->TypeIs(TYP_STRUCT))); + JITDUMP("V%02u address taken at [%06u]\n", lclNum, m_compiler->dspTreeID(tree)); m_allocator->MarkLclVarAsEscaping(lclNum); } - else if (!tree->OperIsLocalStore()) + + if (!m_allocator->CanIndexEscape(lclIndex) && !tree->OperIsLocalStore()) { // Note uses of variables of interest to conditional escape analysis. // @@ -259,34 +700,171 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph() return Compiler::fgWalkResult::WALK_CONTINUE; } - }; - - for (unsigned int lclNum = 0; lclNum < comp->lvaCount; ++lclNum) - { - var_types type = comp->lvaTable[lclNum].TypeGet(); - if (type == TYP_REF || genActualType(type) == TYP_I_IMPL || type == TYP_BYREF) + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { - m_ConnGraphAdjacencyMatrix[lclNum] = BitVecOps::MakeEmpty(&m_bitVecTraits); + GenTree* const tree = *use; - if (comp->lvaTable[lclNum].IsAddressExposed()) + if (tree->OperIsLocalStore()) { - JITDUMP(" V%02u is address exposed\n", lclNum); - MarkLclVarAsEscaping(lclNum); - } - } - else - { - // Variable that may not point to objects will not participate in our analysis. - m_ConnGraphAdjacencyMatrix[lclNum] = BitVecOps::UninitVal(); - } - } + GenTreeLclVarCommon* const lclTree = tree->AsLclVarCommon(); + unsigned const lclNum = lclTree->GetLclNum(); + if (m_allocator->IsTrackedLocal(lclNum) && !m_allocator->CanLclVarEscape(lclNum)) + { + // See if we connected it to a source. + // + StoreInfo* const info = m_allocator->m_StoreAddressToIndexMap.LookupPointer(tree); - for (unsigned int p = 0; p < m_maxPseudoLocals; p++) - { - m_ConnGraphAdjacencyMatrix[p + comp->lvaCount] = BitVecOps::MakeEmpty(&m_bitVecTraits); + if ((info == nullptr) || !info->m_connected) + { + // This store was not modelled in the connection graph. + // + // If the stored value was was not a stack-viable allocation or null, + // add an edge to unknown source. This will ensure this local does + // not get retyped as TYP_I_IMPL. + // + GenTree* const data = lclTree->Data(); + ObjectAllocationType const oat = m_allocator->AllocationKind(data); + bool const valueIsUnknown = + (oat == OAT_NEWOBJ_HEAP) || ((oat == OAT_NONE) && !data->IsIntegralConst(0)); + + if (valueIsUnknown) + { + // Add a connection to the unknown source. + // + JITDUMP("V%02u value unknown at [%06u]\n", lclNum, m_compiler->dspTreeID(tree)); + m_allocator->AddConnGraphEdgeIndex(m_allocator->LocalToIndex(lclNum), + m_allocator->m_unknownSourceIndex); + } + } + else + { + JITDUMP(" ... Already connected at [%06u]\n", m_compiler->dspTreeID(tree)); + } + } + else + { + JITDUMP(" ... Not a GC store at [%06u]\n", m_compiler->dspTreeID(tree)); + } + } + else if (tree->OperIs(GT_STOREIND, GT_STORE_BLK)) + { + // Is this a GC store? + // + bool isGCStore = true; + + if (!m_allocator->IsTrackedType(tree->TypeGet())) + { + isGCStore = false; + } + else if (tree->OperIs(GT_STORE_BLK)) + { + isGCStore = tree->AsBlk()->GetLayout()->HasGCPtr(); + } + + // If so, did we model it yet? + // + if (isGCStore) + { + // See if we have an index for the destination, and if we connected it to a source. + // + StoreInfo* const info = m_allocator->m_StoreAddressToIndexMap.LookupPointer(tree); + + // Note here, unlike the local case above, we do not implicitly know the destination + // of the store. So if we have no info, we assume the store is to some place we don't track. + // + if ((info != nullptr) && !info->m_connected) + { + assert(info->m_index != BAD_VAR_NUM); + const unsigned dstIndex = info->m_index; + + JITDUMP(" ... Unmodelled GC store to"); + JITDUMPEXEC(m_allocator->DumpIndex(dstIndex)); + JITDUMP(" at [%06u]\n", m_compiler->dspTreeID(tree)); + + // Look for stores of nullptrs; these do not need to create a connection. + // + GenTree* const data = tree->AsIndir()->Data(); + bool const valueIsUnknown = !data->IsIntegralConst(0); + + if (valueIsUnknown) + { + m_allocator->AddConnGraphEdgeIndex(dstIndex, m_allocator->m_unknownSourceIndex); + JITDUMPEXEC(m_allocator->DumpIndex(dstIndex)) + JITDUMP(" ... value unknown at [%06u]\n", m_compiler->dspTreeID(tree)); + } + else + { + JITDUMP(" ... Store of nullptr(s) at [%06u]\n", m_compiler->dspTreeID(tree)); + } + + info->m_connected = true; + } + else if (info == nullptr) + { + JITDUMP(" ... No store info for [%06u]\n", m_compiler->dspTreeID(tree)); + } + else + { + JITDUMP(" ... Already connected at [%06u]\n", m_compiler->dspTreeID(tree)); + } + } + else + { + JITDUMP(" ... Not a GC store at [%06u]\n", m_compiler->dspTreeID(tree)); + } + } + return Compiler::fgWalkResult::WALK_CONTINUE; + } + }; + + for (unsigned int lclNum = 0; lclNum < comp->lvaCount; ++lclNum) + { + if (!IsTrackedLocal(lclNum)) + { + continue; + } + + LclVarDsc* const lclDsc = comp->lvaGetDesc(lclNum); + const unsigned bvIndex = LocalToIndex(lclNum); + + if (lclDsc->IsAddressExposed()) + { + JITDUMP(" V%02u is address exposed\n", lclNum); + MarkIndexAsEscaping(bvIndex); + continue; + } + + if (lclNum == comp->info.compRetBuffArg) + { + JITDUMP(" V%02u is retbuff\n", lclNum); + MarkIndexAsEscaping(bvIndex); + continue; + } + +#if FEATURE_IMPLICIT_BYREFS + // We have to mark all implicit byref params as escaping, because + // their GC reporting is controlled by the caller + // + if (lclDsc->lvIsParam && lclDsc->lvIsImplicitByRef) + { + JITDUMP(" V%02u is an implicit byref param\n", lclNum); + MarkIndexAsEscaping(bvIndex); + continue; + } +#endif + + // Parameters have unknown initial values. + // OSR locals have unknown initial values. + // + if (lclDsc->lvIsParam || lclDsc->lvIsOSRLocal) + { + AddConnGraphEdgeIndex(bvIndex, m_unknownSourceIndex); + } } + MarkIndexAsEscaping(m_unknownSourceIndex); + // We should have computed the DFS tree already. // FlowGraphDfsTree* const dfs = comp->m_dfsTree; @@ -321,29 +899,29 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e JITDUMP("\nComputing escape closure\n\n"); bool doOneMoreIteration = true; BitSetShortLongRep newEscapingNodes = BitVecOps::UninitVal(); - unsigned int lclNum; + unsigned int lclIndex; while (doOneMoreIteration) { BitVecOps::Iter iterator(bitVecTraits, escapingNodesToProcess); doOneMoreIteration = false; - while (iterator.NextElem(&lclNum)) + while (iterator.NextElem(&lclIndex)) { - if (m_ConnGraphAdjacencyMatrix[lclNum] != nullptr) + if (m_ConnGraphAdjacencyMatrix[lclIndex] != nullptr) { doOneMoreIteration = true; // newEscapingNodes = adjacentNodes[lclNum] - BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclNum]); + BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclIndex]); // newEscapingNodes = newEscapingNodes \ escapingNodes BitVecOps::DiffD(bitVecTraits, newEscapingNodes, escapingNodes); // escapingNodesToProcess = escapingNodesToProcess U newEscapingNodes BitVecOps::UnionD(bitVecTraits, escapingNodesToProcess, newEscapingNodes); // escapingNodes = escapingNodes U newEscapingNodes BitVecOps::UnionD(bitVecTraits, escapingNodes, newEscapingNodes); - // escapingNodesToProcess = escapingNodesToProcess \ { lclNum } - BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclNum); + // escapingNodesToProcess = escapingNodesToProcess \ { lclIndex } + BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclIndex); #ifdef DEBUG // Print the first witness to new escapes. @@ -351,12 +929,13 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e if (!BitVecOps::IsEmpty(bitVecTraits, newEscapingNodes)) { BitVecOps::Iter iterator(bitVecTraits, newEscapingNodes); - unsigned int newLclNum; - while (iterator.NextElem(&newLclNum)) + unsigned int newLclIndex; + while (iterator.NextElem(&newLclIndex)) { - // Note P's never are sources of assignments... - JITDUMP("%c%02u causes V%02u to escape\n", lclNum >= comp->lvaCount ? 'P' : 'V', lclNum, - newLclNum); + JITDUMPEXEC(DumpIndex(lclIndex)); + JITDUMP(" causes "); + JITDUMPEXEC(DumpIndex(newLclIndex)); + JITDUMP(" to escape\n"); } } #endif @@ -367,7 +946,7 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e computeClosure(); - if (m_numPseudoLocals > 0) + if (m_numPseudos > 0) { bool newEscapes = AnalyzeIfCloningCanPreventEscape(bitVecTraits, escapingNodes, escapingNodesToProcess); if (newEscapes) @@ -388,77 +967,251 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits) { - bool changed = true; + // Keep track of locals that we know may point at the heap + // + BitVec possiblyHeapPointingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits); + BitVecOps::AddElemD(bitVecTraits, possiblyHeapPointingPointers, m_unknownSourceIndex); + bool changed = true; + unsigned pass = 0; while (changed) { + JITDUMP("\n---- computing stack pointing locals, pass %u\n", pass++); changed = false; - for (unsigned int lclNum = 0; lclNum < comp->lvaCount; ++lclNum) + for (unsigned int index = 0; index < m_bvCount; index++) { - LclVarDsc* lclVarDsc = comp->lvaGetDesc(lclNum); - var_types type = lclVarDsc->TypeGet(); + if (!MayIndexPointToStack(index) && + !BitVecOps::IsEmptyIntersection(bitVecTraits, m_PossiblyStackPointingPointers, + m_ConnGraphAdjacencyMatrix[index])) + { + // We discovered a new pointer that may point to the stack. + JITDUMPEXEC(DumpIndex(index)); + JITDUMP(" may point to the stack\n"); + MarkIndexAsPossiblyStackPointing(index); + changed = true; + } - if (type == TYP_REF || type == TYP_I_IMPL || type == TYP_BYREF) + if (!BitVecOps::IsMember(bitVecTraits, possiblyHeapPointingPointers, index) && + !BitVecOps::IsEmptyIntersection(bitVecTraits, possiblyHeapPointingPointers, + m_ConnGraphAdjacencyMatrix[index])) { - if (!MayLclVarPointToStack(lclNum) && - !BitVecOps::IsEmptyIntersection(bitVecTraits, m_PossiblyStackPointingPointers, - m_ConnGraphAdjacencyMatrix[lclNum])) - { - // We discovered a new pointer that may point to the stack. - MarkLclVarAsPossiblyStackPointing(lclNum); + // We discovered a new pointer that may point to the heap. + JITDUMPEXEC(DumpIndex(index)); + JITDUMP(" may point to the heap\n"); + BitVecOps::AddElemD(bitVecTraits, possiblyHeapPointingPointers, index); + changed = true; + } + } + } + JITDUMP("\n---- done computing stack pointing locals\n"); - // Check if this pointer always points to the stack. - // For OSR the reference may be pointing at the heap-allocated Tier0 version. - // - if ((lclVarDsc->lvSingleDef == 1) && !comp->opts.IsOSR()) - { - // Check if we know what is assigned to this pointer. - unsigned bitCount = BitVecOps::Count(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]); - assert(bitCount <= 1); - if (bitCount == 1) - { - BitVecOps::Iter iter(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]); - unsigned rhsLclNum = 0; - iter.NextElem(&rhsLclNum); + // If a local is possibly stack pointing and not possibly heap pointing, then it is definitely stack pointing. + // + BitVec newDefinitelyStackPointingPointers = BitVecOps::UninitVal(); + BitVecOps::Assign(bitVecTraits, newDefinitelyStackPointingPointers, m_PossiblyStackPointingPointers); + BitVecOps::DiffD(bitVecTraits, newDefinitelyStackPointingPointers, possiblyHeapPointingPointers); - if (DoesLclVarPointToStack(rhsLclNum)) - { - // The only store to lclNum local is the definitely-stack-pointing - // rhsLclNum local so lclNum local is also definitely-stack-pointing. - MarkLclVarAsDefinitelyStackPointing(lclNum); - } - } - } - changed = true; + // We should have only added to the set of things that are definitely stack pointing. + // + assert(BitVecOps::IsSubset(bitVecTraits, m_DefinitelyStackPointingPointers, newDefinitelyStackPointingPointers)); + BitVecOps::AssignNoCopy(bitVecTraits, m_DefinitelyStackPointingPointers, newDefinitelyStackPointingPointers); + +#ifdef DEBUG + if (comp->verbose) + { + printf("Definitely stack-pointing locals:"); + { + BitVecOps::Iter iter(bitVecTraits, m_DefinitelyStackPointingPointers); + unsigned index = 0; + while (iter.NextElem(&index)) + { + DumpIndex(index); + } + printf("\n"); + } + + printf("Possibly stack-pointing locals:"); + { + BitVecOps::Iter iter(bitVecTraits, m_PossiblyStackPointingPointers); + unsigned index = 0; + while (iter.NextElem(&index)) + { + if (!BitVecOps::IsMember(bitVecTraits, m_DefinitelyStackPointingPointers, index)) + { + DumpIndex(index); } } + printf("\n"); } } +#endif +} - JITDUMP("Definitely stack-pointing locals:"); +//------------------------------------------------------------------------ +// CanAllocateLclVarOnStack: Returns true iff local variable can be +// allocated on the stack. +// +// Arguments: +// lclNum - Local variable number +// clsHnd - Class/struct handle of the variable class +// allocType - Type of allocation (newobj or newarr) +// length - Length of the array (for newarr) +// blockSize - [out, optional] exact size of the object +// reason - [out, required] if result is false, reason why +// preliminaryCheck - if true, allow checking before analysis is done +// (for things that inherently disqualify the local) +// +// Return Value: +// Returns true iff local variable can be allocated on the stack. +// +bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, + CORINFO_CLASS_HANDLE clsHnd, + ObjectAllocationType allocType, + ssize_t length, + unsigned int* blockSize, + const char** reason, + bool preliminaryCheck) +{ + assert(preliminaryCheck || m_AnalysisDone); + + bool enableBoxedValueClasses = true; + bool enableRefClasses = true; + bool enableArrays = true; + *reason = "[ok]"; + +#ifdef DEBUG + enableBoxedValueClasses = (JitConfig.JitObjectStackAllocationBoxedValueClass() != 0); + enableRefClasses = (JitConfig.JitObjectStackAllocationRefClass() != 0); + enableArrays = (JitConfig.JitObjectStackAllocationArray() != 0); +#endif + + unsigned classSize = 0; + + if (allocType == OAT_NEWOBJ_HEAP) { - BitVecOps::Iter iter(bitVecTraits, m_DefinitelyStackPointingPointers); - unsigned lclNum = 0; - while (iter.NextElem(&lclNum)) + *reason = "[runtime disallows]"; + return false; + } + if (allocType == OAT_NEWARR) + { + if (!enableArrays) { - JITDUMP(" V%02u", lclNum); + *reason = "[disabled by config]"; + return false; } - JITDUMP("\n"); + + if ((length < 0) || (length > CORINFO_Array_MaxLength)) + { + *reason = "[invalid array length]"; + return false; + } + + ClassLayout* const layout = comp->typGetArrayLayout(clsHnd, (unsigned)length); + classSize = layout->GetSize(); } + else if (allocType == OAT_NEWOBJ) + { + if (comp->info.compCompHnd->isValueClass(clsHnd)) + { + if (!enableBoxedValueClasses) + { + *reason = "[disabled by config]"; + return false; + } - JITDUMP("Possibly stack-pointing locals:"); + classSize = comp->info.compCompHnd->getClassSize(clsHnd); + } + else + { + if (!enableRefClasses) + { + *reason = "[disabled by config]"; + return false; + } + + assert(comp->info.compCompHnd->canAllocateOnStack(clsHnd)); + classSize = comp->info.compCompHnd->getHeapClassSize(clsHnd); + } + } + else + { + assert(!"Unexpected allocation type"); + return false; + } + + if (classSize > m_StackAllocMaxSize) + { + *reason = "[too large]"; + return false; + } + + if (preliminaryCheck) + { + return true; + } + + const bool escapes = CanLclVarEscape(lclNum); + + if (escapes) + { + *reason = "[escapes]"; + return false; + } + + if (blockSize != nullptr) + { + *blockSize = classSize; + } + + return true; +} + +//------------------------------------------------------------------------ +// AllocationKind: return kind of stack-allocatable object made by this tree (if any) +// +// Arguments: +// tree -- tree in question +// +// Returns: +// value indicating type of allocation +// +ObjectAllocator::ObjectAllocationType ObjectAllocator::AllocationKind(GenTree* tree) +{ + ObjectAllocationType allocType = OAT_NONE; + if (tree->OperIs(GT_ALLOCOBJ)) { - BitVecOps::Iter iter(bitVecTraits, m_PossiblyStackPointingPointers); - unsigned lclNum = 0; - while (iter.NextElem(&lclNum)) + GenTreeAllocObj* const allocObj = tree->AsAllocObj(); + CORINFO_CLASS_HANDLE clsHnd = allocObj->gtAllocObjClsHnd; + assert(clsHnd != NO_CLASS_HANDLE); + const bool isValueClass = comp->info.compCompHnd->isValueClass(clsHnd); + bool const canBeOnStack = isValueClass || comp->info.compCompHnd->canAllocateOnStack(clsHnd); + allocType = canBeOnStack ? OAT_NEWOBJ : OAT_NEWOBJ_HEAP; + } + else if (!m_isR2R && tree->IsHelperCall()) + { + GenTreeCall* const call = tree->AsCall(); + switch (call->GetHelperNum()) { - if (!BitVecOps::IsMember(bitVecTraits, m_DefinitelyStackPointingPointers, lclNum)) + case CORINFO_HELP_NEWARR_1_VC: + case CORINFO_HELP_NEWARR_1_PTR: + case CORINFO_HELP_NEWARR_1_DIRECT: + case CORINFO_HELP_NEWARR_1_ALIGN8: { - JITDUMP(" V%02u", lclNum); + if ((call->gtArgs.CountUserArgs() == 2) && call->gtArgs.GetUserArgByIndex(1)->GetNode()->IsCnsIntOrI()) + { + allocType = OAT_NEWARR; + } + break; + } + + default: + { + break; } } - JITDUMP("\n"); } + + return allocType; } //------------------------------------------------------------------------ @@ -473,10 +1226,9 @@ void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits) bool ObjectAllocator::MorphAllocObjNodes() { - bool didStackAllocate = false; + m_stackAllocationCount = 0; m_PossiblyStackPointingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits); m_DefinitelyStackPointingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits); - const bool isReadyToRun = comp->opts.IsReadyToRun() && !comp->IsTargetAbi(CORINFO_NATIVEAOT_ABI); for (BasicBlock* const block : comp->Blocks()) { @@ -491,249 +1243,292 @@ bool ObjectAllocator::MorphAllocObjNodes() for (Statement* const stmt : block->Statements()) { - GenTree* stmtExpr = stmt->GetRootNode(); - GenTree* data = nullptr; + GenTree* const stmtExpr = stmt->GetRootNode(); + + if (!stmtExpr->OperIs(GT_STORE_LCL_VAR) || !stmtExpr->TypeIs(TYP_REF)) + { + // We assume that GT_ALLOCOBJ nodes are always present in the canonical form. + assert(!comp->gtTreeContainsOper(stmtExpr, GT_ALLOCOBJ)); + continue; + } - ObjectAllocationType allocType = OAT_NONE; + const unsigned int lclNum = stmtExpr->AsLclVar()->GetLclNum(); + GenTree* const data = stmtExpr->AsLclVar()->Data(); + ObjectAllocationType const allocType = AllocationKind(data); + + if (allocType == OAT_NONE) + { + continue; + } + + AllocationCandidate c(block, stmt, stmtExpr, lclNum, allocType); + MorphAllocObjNode(c); + } + } + + return (m_stackAllocationCount > 0); +} + +//------------------------------------------------------------------------ +// MorphAllocObjNode: Transform an allocation site, possibly into as stack allocation +// +// Arguments: +// candidate -- allocation candidate +// +// Return Value: +// True if candidate was stack allocated +// If false, candidate reason is updated to explain why not +// +void ObjectAllocator::MorphAllocObjNode(AllocationCandidate& candidate) +{ + const bool didStackAllocate = MorphAllocObjNodeHelper(candidate); + const unsigned lclNum = candidate.m_lclNum; + + if (didStackAllocate) + { + // We keep the set of possibly-stack-pointing pointers as a superset of the set of + // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both + // sets. + MarkLclVarAsDefinitelyStackPointing(lclNum); + MarkLclVarAsPossiblyStackPointing(lclNum); + + // If this was conditionally escaping enumerator, establish a connection between this local + // and the enumeratorLocal we already allocated. This is needed because we do early rewriting + // in the conditional clone. + // + unsigned pseudoIndex = BAD_VAR_NUM; + if (m_EnumeratorLocalToPseudoIndexMap.TryGetValue(lclNum, &pseudoIndex)) + { + CloneInfo* info = nullptr; + if (m_CloneMap.Lookup(pseudoIndex, &info)) + { + if (info->m_willClone) + { + JITDUMP("Connecting stack allocated enumerator V%02u to its address var V%02u\n", lclNum, + info->m_enumeratorLocal); + AddConnGraphEdge(lclNum, info->m_enumeratorLocal); + MarkLclVarAsPossiblyStackPointing(info->m_enumeratorLocal); + MarkLclVarAsDefinitelyStackPointing(info->m_enumeratorLocal); + } + } + } + + if (candidate.m_bashCall) + { + candidate.m_statement->GetRootNode()->gtBashToNOP(); + } + + comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC; + m_stackAllocationCount++; + } + else + { + assert(candidate.m_onHeapReason != nullptr); + JITDUMP("Allocating V%02u on the heap: %s\n", lclNum, candidate.m_onHeapReason); + if ((candidate.m_allocType == OAT_NEWOBJ) || (candidate.m_allocType == OAT_NEWOBJ_HEAP)) + { + GenTree* const stmtExpr = candidate.m_tree; + GenTree* const oldData = stmtExpr->AsLclVar()->Data(); + GenTree* const newData = MorphAllocObjNodeIntoHelperCall(oldData->AsAllocObj()); + stmtExpr->AsLclVar()->Data() = newData; + stmtExpr->AddAllEffectsFlags(newData); + } + + if (IsTrackedLocal(lclNum)) + { + AddConnGraphEdgeIndex(LocalToIndex(lclNum), m_unknownSourceIndex); + } + } +} + +//------------------------------------------------------------------------ +// MorphAllocObjNodeHelper: See if we can stack allocate a GT_ALLOCOBJ or GT_NEWARR +// +// Arguments: +// candidate -- allocation candidate +// +// Return Value: +// True if candidate was stack allocated +// If false, candidate reason is updated to explain why not +// +bool ObjectAllocator::MorphAllocObjNodeHelper(AllocationCandidate& candidate) +{ + if (!IsObjectStackAllocationEnabled()) + { + candidate.m_onHeapReason = "[object stack allocation disabled]"; + return false; + } + + // Don't attempt to do stack allocations inside basic blocks that may be in a loop. + // + if (candidate.m_block->HasFlag(BBF_BACKWARD_JUMP)) + { + candidate.m_onHeapReason = "[alloc in loop]"; + return false; + } + + switch (candidate.m_allocType) + { + case OAT_NEWARR: + return MorphAllocObjNodeHelperArr(candidate); + case OAT_NEWOBJ: + return MorphAllocObjNodeHelperObj(candidate); + case OAT_NEWOBJ_HEAP: + candidate.m_onHeapReason = "[runtime disallows]"; + return false; + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// MorphAllocObjNodeHelperObj: See if we can stack allocate a GT_NEWARR +// +// Arguments: +// candidate -- allocation candidate +// +// Return Value: +// True if candidate was stack allocated +// If false, candidate reason is updated to explain why not +// +bool ObjectAllocator::MorphAllocObjNodeHelperArr(AllocationCandidate& candidate) +{ + assert(candidate.m_block->HasFlag(BBF_HAS_NEWARR)); + + // R2R not yet supported + // + if (m_isR2R) + { + candidate.m_onHeapReason = "[R2R array not yet supported]"; + return false; + } + + GenTree* const data = candidate.m_tree->AsLclVar()->Data(); + + //------------------------------------------------------------------------ + // We expect the following expression tree at this point + // For non-ReadyToRun: + // STMTx (IL 0x... ???) + // * STORE_LCL_VAR ref + // \--* CALL help ref + // +--* CNS_INT(h) long + // \--* CNS_INT long + // For ReadyToRun: + // STMTx (IL 0x... ???) + // * STORE_LCL_VAR ref + // \--* CALL help ref + // \--* CNS_INT long + //------------------------------------------------------------------------ + + bool isExact = false; + bool isNonNull = false; + CORINFO_CLASS_HANDLE clsHnd = comp->gtGetHelperCallClassHandle(data->AsCall(), &isExact, &isNonNull); + GenTree* const len = data->AsCall()->gtArgs.GetUserArgByIndex(1)->GetNode(); + + assert(len != nullptr); + + unsigned int blockSize = 0; + comp->Metrics.NewArrayHelperCalls++; + + if (!isExact || !isNonNull) + { + candidate.m_onHeapReason = "[array type is either non-exact or null]"; + return false; + } + + if (!len->IsCnsIntOrI()) + { + candidate.m_onHeapReason = "[non-constant array size]"; + return false; + } + + if (!CanAllocateLclVarOnStack(candidate.m_lclNum, clsHnd, candidate.m_allocType, len->AsIntCon()->IconValue(), + &blockSize, &candidate.m_onHeapReason)) + { + // reason set by the call + return false; + } - if (stmtExpr->OperIs(GT_STORE_LCL_VAR) && stmtExpr->TypeIs(TYP_REF)) - { - data = stmtExpr->AsLclVar()->Data(); + JITDUMP("Allocating V%02u on the stack\n", candidate.m_lclNum); + const unsigned int stackLclNum = + MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd, (unsigned int)len->AsIntCon()->IconValue(), blockSize, + candidate.m_block, candidate.m_statement); - if (data->OperGet() == GT_ALLOCOBJ) - { - allocType = OAT_NEWOBJ; - } - else if (!isReadyToRun && data->IsHelperCall()) - { - switch (data->AsCall()->GetHelperNum()) - { - case CORINFO_HELP_NEWARR_1_VC: - case CORINFO_HELP_NEWARR_1_OBJ: - case CORINFO_HELP_NEWARR_1_DIRECT: - case CORINFO_HELP_NEWARR_1_ALIGN8: - { - if ((data->AsCall()->gtArgs.CountUserArgs() == 2) && - data->AsCall()->gtArgs.GetUserArgByIndex(1)->GetNode()->IsCnsIntOrI()) - { - allocType = OAT_NEWARR; - } - break; - } + // Keep track of this new local for later type updates. + // + m_HeapLocalToStackArrLocalMap.AddOrUpdate(candidate.m_lclNum, stackLclNum); + comp->Metrics.StackAllocatedArrays++; - default: - { - break; - } - } - } - } + return true; +} - if (allocType != OAT_NONE) - { - bool canStack = false; - bool bashCall = false; - const char* onHeapReason = nullptr; - unsigned int lclNum = stmtExpr->AsLclVar()->GetLclNum(); +//------------------------------------------------------------------------ +// MorphAllocObjNodeHelperObj: See if we can stack allocate a GT_ALLOCOBJ +// +// Arguments: +// candidate -- allocation candidate +// +// Return Value: +// True if candidate was stack allocated +// If false, candidate reason is updated to explain why not +// +bool ObjectAllocator::MorphAllocObjNodeHelperObj(AllocationCandidate& candidate) +{ + assert(candidate.m_block->HasFlag(BBF_HAS_NEWOBJ)); - // Don't attempt to do stack allocations inside basic blocks that may be in a loop. - // - if (!IsObjectStackAllocationEnabled()) - { - onHeapReason = "[object stack allocation disabled]"; - canStack = false; - } - else if (basicBlockHasBackwardJump) - { - onHeapReason = "[alloc in loop]"; - canStack = false; - } - else - { - if (allocType == OAT_NEWARR) - { - assert(basicBlockHasNewArr); + //------------------------------------------------------------------------ + // We expect the following expression tree at this point + // STMTx (IL 0x... ???) + // * STORE_LCL_VAR ref + // \--* ALLOCOBJ ref + // \--* CNS_INT(h) long + //------------------------------------------------------------------------ - // R2R not yet supported - // - assert(!isReadyToRun); - - //------------------------------------------------------------------------ - // We expect the following expression tree at this point - // For non-ReadyToRun: - // STMTx (IL 0x... ???) - // * STORE_LCL_VAR ref - // \--* CALL help ref - // +--* CNS_INT(h) long - // \--* CNS_INT long - // For ReadyToRun: - // STMTx (IL 0x... ???) - // * STORE_LCL_VAR ref - // \--* CALL help ref - // \--* CNS_INT long - //------------------------------------------------------------------------ - - bool isExact = false; - bool isNonNull = false; - CORINFO_CLASS_HANDLE clsHnd = - comp->gtGetHelperCallClassHandle(data->AsCall(), &isExact, &isNonNull); - GenTree* const len = data->AsCall()->gtArgs.GetUserArgByIndex(1)->GetNode(); - - assert(len != nullptr); - - unsigned int blockSize = 0; - comp->Metrics.NewArrayHelperCalls++; - - if (!isExact || !isNonNull) - { - onHeapReason = "[array type is either non-exact or null]"; - canStack = false; - } - else if (!len->IsCnsIntOrI()) - { - onHeapReason = "[non-constant size]"; - canStack = false; - } - else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, len->AsIntCon()->IconValue(), - &blockSize, &onHeapReason)) - { - // reason set by the call - canStack = false; - } - else - { - JITDUMP("Allocating V%02u on the stack\n", lclNum); - canStack = true; - const unsigned int stackLclNum = - MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd, - (unsigned int)len->AsIntCon()->IconValue(), blockSize, - block, stmt); - - // Note we do not want to rewrite uses of the array temp, so we - // do not update m_HeapLocalToStackLocalMap. - // - comp->Metrics.StackAllocatedArrays++; - } - } - else if (allocType == OAT_NEWOBJ) - { - assert(basicBlockHasNewObj); - //------------------------------------------------------------------------ - // We expect the following expression tree at this point - // STMTx (IL 0x... ???) - // * STORE_LCL_VAR ref - // \--* ALLOCOBJ ref - // \--* CNS_INT(h) long - //------------------------------------------------------------------------ - - CORINFO_CLASS_HANDLE clsHnd = data->AsAllocObj()->gtAllocObjClsHnd; - CORINFO_CLASS_HANDLE stackClsHnd = clsHnd; - const bool isValueClass = comp->info.compCompHnd->isValueClass(clsHnd); - - if (isValueClass) - { - comp->Metrics.NewBoxedValueClassHelperCalls++; - stackClsHnd = comp->info.compCompHnd->getTypeForBoxOnStack(clsHnd); - } - else - { - comp->Metrics.NewRefClassHelperCalls++; - } + unsigned const lclNum = candidate.m_lclNum; + GenTree* const data = candidate.m_tree->AsLclVar()->Data(); + CORINFO_CLASS_HANDLE clsHnd = data->AsAllocObj()->gtAllocObjClsHnd; + const bool isValueClass = comp->info.compCompHnd->isValueClass(clsHnd); - if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, 0, nullptr, &onHeapReason)) - { - // reason set by the call - canStack = false; - } - else if (stackClsHnd == NO_CLASS_HANDLE) - { - assert(isValueClass); - onHeapReason = "[no class handle for this boxed value class]"; - canStack = false; - } - else - { - JITDUMP("Allocating V%02u on the stack\n", lclNum); - canStack = true; - const unsigned int stackLclNum = - MorphAllocObjNodeIntoStackAlloc(data->AsAllocObj(), stackClsHnd, isValueClass, block, - stmt); - m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum); - - if (isValueClass) - { - comp->Metrics.StackAllocatedBoxedValueClasses++; - } - else - { - comp->Metrics.StackAllocatedRefClasses++; - } + if (isValueClass) + { + comp->Metrics.NewBoxedValueClassHelperCalls++; + } + else + { + comp->Metrics.NewRefClassHelperCalls++; + } - bashCall = true; - } - } - } + if (!CanAllocateLclVarOnStack(lclNum, clsHnd, candidate.m_allocType, 0, nullptr, &candidate.m_onHeapReason)) + { + // reason set by the call + return false; + } - if (canStack) - { - // We keep the set of possibly-stack-pointing pointers as a superset of the set of - // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both - // sets. - MarkLclVarAsDefinitelyStackPointing(lclNum); - MarkLclVarAsPossiblyStackPointing(lclNum); - - // If this was conditionally escaping enumerator, establish a connection between this local - // and the enumeratorLocal we already allocated. This is needed because we do early rewriting - // in the conditional clone. - // - unsigned pseudoLocal = BAD_VAR_NUM; - if (m_EnumeratorLocalToPseudoLocalMap.TryGetValue(lclNum, &pseudoLocal)) - { - CloneInfo* info = nullptr; - if (m_CloneMap.Lookup(pseudoLocal, &info)) - { - if (info->m_willClone) - { - JITDUMP("Connecting stack allocated enumerator V%02u to its address var V%02u\n", - lclNum, info->m_enumeratorLocal); - AddConnGraphEdge(lclNum, info->m_enumeratorLocal); - MarkLclVarAsPossiblyStackPointing(info->m_enumeratorLocal); - MarkLclVarAsDefinitelyStackPointing(info->m_enumeratorLocal); - } - } - } + JITDUMP("Allocating V%02u on the stack\n", lclNum); - if (bashCall) - { - stmt->GetRootNode()->gtBashToNOP(); - } + ClassLayout* layout = nullptr; - comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC; - didStackAllocate = true; - } - else - { - assert(onHeapReason != nullptr); - JITDUMP("Allocating V%02u on the heap: %s\n", lclNum, onHeapReason); - if (allocType == OAT_NEWOBJ) - { - data = MorphAllocObjNodeIntoHelperCall(data->AsAllocObj()); - stmtExpr->AsLclVar()->Data() = data; - stmtExpr->AddAllEffectsFlags(data); - } - } - } -#ifdef DEBUG - else - { - // We assume that GT_ALLOCOBJ nodes are always present in the canonical form. - assert(!comp->gtTreeContainsOper(stmt->GetRootNode(), GT_ALLOCOBJ)); - } -#endif // DEBUG - } + if (isValueClass) + { + CORINFO_CLASS_HANDLE boxedClsHnd = comp->info.compCompHnd->getTypeForBox(clsHnd); + assert(boxedClsHnd != NO_CLASS_HANDLE); + ClassLayout* structLayout = comp->typGetObjLayout(boxedClsHnd); + layout = GetBoxedLayout(structLayout); + comp->Metrics.StackAllocatedBoxedValueClasses++; + } + else + { + layout = comp->typGetObjLayout(clsHnd); + comp->Metrics.StackAllocatedRefClasses++; } - return didStackAllocate; + const unsigned int stackLclNum = + MorphAllocObjNodeIntoStackAlloc(data->AsAllocObj(), layout, candidate.m_block, candidate.m_statement); + m_HeapLocalToStackObjLocalMap.AddOrUpdate(lclNum, stackLclNum); + + candidate.m_bashCall = true; + + return true; } //------------------------------------------------------------------------ @@ -775,7 +1570,7 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc #ifdef FEATURE_READYTORUN if (entryPoint.addr != nullptr) { - assert(comp->opts.IsReadyToRun()); + assert(comp->IsAot()); helperCall->AsCall()->setEntryPoint(entryPoint); } else @@ -882,8 +1677,7 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall* // allocation. // Arguments: // allocObj - GT_ALLOCOBJ that will be replaced by a stack allocation -// clsHnd - class representing the stack allocated object -// isValueClass - we are stack allocating a boxed value class +// layout - layout for the stack allocated objectd // block - a basic block where allocObj is // stmt - a statement where allocObj is // @@ -893,24 +1687,29 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall* // Notes: // This function can insert additional statements before stmt. // -unsigned int ObjectAllocator::MorphAllocObjNodeIntoStackAlloc( - GenTreeAllocObj* allocObj, CORINFO_CLASS_HANDLE clsHnd, bool isValueClass, BasicBlock* block, Statement* stmt) +unsigned int ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, + ClassLayout* layout, + BasicBlock* block, + Statement* stmt) { assert(allocObj != nullptr); assert(m_AnalysisDone); - assert(clsHnd != NO_CLASS_HANDLE); - const bool shortLifetime = false; - const unsigned int lclNum = comp->lvaGrabTemp(shortLifetime DEBUGARG( - isValueClass ? "stack allocated boxed value class temp" : "stack allocated ref class temp")); +#ifdef DEBUG + const char* lclName = comp->printfAlloc("stack allocated %.110s", layout->GetShortClassName()); +#endif - comp->lvaSetStruct(lclNum, clsHnd, /* unsafeValueClsCheck */ false); + const bool shortLifetime = false; + const unsigned int lclNum = comp->lvaGrabTemp(shortLifetime DEBUGARG(lclName)); + comp->lvaSetStruct(lclNum, layout, /* unsafeValueClsCheck */ false); - // Initialize the object memory if necessary. - bool bbInALoop = block->HasFlag(BBF_BACKWARD_JUMP); - bool bbIsReturn = block->KindIs(BBJ_RETURN); LclVarDsc* const lclDsc = comp->lvaGetDesc(lclNum); lclDsc->lvStackAllocatedObject = true; + + // Initialize the object memory if necessary. + bool bbInALoop = block->HasFlag(BBF_BACKWARD_JUMP); + bool bbIsReturn = block->KindIs(BBJ_RETURN); + if (comp->fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) { //------------------------------------------------------------------------ @@ -1001,32 +1800,28 @@ unsigned int ObjectAllocator::MorphAllocObjNodeIntoStackAlloc( } //------------------------------------------------------------------------ -// CanLclVarEscapeViaParentStack: Check if the local variable escapes via the given parent stack. +// AnalyzeParentStack: Check if the local variable escapes via the given parent stack. // Update the connection graph as necessary. // // Arguments: // parentStack - Parent stack of the current visit -// lclNum - Local variable number +// lclIndex - Index for a tracked, unescaped local referenced at the top of the stack // block - basic block holding the trees // -// Return Value: -// true if the local can escape via the parent stack; false otherwise -// -// Notes: -// The method currently treats all locals assigned to a field as escaping. -// The can potentially be tracked by special field edges in the connection graph. -// -bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parentStack, - unsigned int lclNum, - BasicBlock* block) +void ObjectAllocator::AnalyzeParentStack(ArrayStack* parentStack, unsigned int lclIndex, BasicBlock* block) { assert(parentStack != nullptr); - int parentIndex = 1; + assert(!CanIndexEscape(lclIndex)); - bool keepChecking = true; - bool canLclVarEscapeViaParentStack = true; - bool isCopy = true; - bool isEnumeratorLocal = comp->lvaGetDesc(lclNum)->lvIsEnumerator; + int parentIndex = 1; + const unsigned lclNum = IndexToLocal(lclIndex); + LclVarDsc* const lclDsc = comp->lvaGetDesc(lclNum); + + bool keepChecking = true; + bool canLclVarEscapeViaParentStack = true; + bool isCopy = true; + bool const isEnumeratorLocal = lclDsc->lvIsEnumerator; + bool isAddress = parentStack->Top()->OperIs(GT_LCL_ADDR); while (keepChecking) { @@ -1048,15 +1843,30 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent switch (parent->OperGet()) { - // Update the connection graph if we are storing to a local. - // For all other stores we mark the local as escaping. case GT_STORE_LCL_VAR: { - // Add an edge to the connection graph. + // If the store value is a local address, anything assigned to that local escapes + // + if (isAddress) + { + break; + } + const unsigned int dstLclNum = parent->AsLclVar()->GetLclNum(); - const unsigned int srcLclNum = lclNum; - AddConnGraphEdge(dstLclNum, srcLclNum); + // If we're not tracking stores to the dest local, the value does not escape. + // + if (!IsTrackedLocal(dstLclNum)) + { + canLclVarEscapeViaParentStack = false; + break; + } + + const unsigned dstIndex = LocalToIndex(dstLclNum); + + // Add an edge to the connection graph. + // + AddConnGraphEdgeIndex(dstIndex, lclIndex); canLclVarEscapeViaParentStack = false; // If the source of this store is an enumerator local, @@ -1064,8 +1874,12 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent // if (isCopy) { - CheckForEnumeratorUse(srcLclNum, dstLclNum); + CheckForEnumeratorUse(lclNum, dstLclNum); } + + // Note that we modelled this store in the connection graph + // + m_StoreAddressToIndexMap.Set(parent, StoreInfo(dstIndex, /* connected */ true)); } break; @@ -1077,6 +1891,7 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent case GT_GE: case GT_NULLCHECK: case GT_ARR_LENGTH: + case GT_BOUNDS_CHECK: canLclVarEscapeViaParentStack = false; break; @@ -1091,9 +1906,22 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent case GT_COLON: case GT_QMARK: case GT_ADD: - case GT_SUB: case GT_FIELD_ADDR: - // Check whether the local escapes via its grandparent. + // Check whether the local escapes higher up + ++parentIndex; + keepChecking = true; + break; + + case GT_SUB: + // Sub of two GC refs is no longer a GC ref. + if (!parent->TypeIs(TYP_BYREF, TYP_REF)) + { + canLclVarEscapeViaParentStack = false; + break; + } + + // Check whether the local escapes higher up + isAddress = false; ++parentIndex; keepChecking = true; break; @@ -1118,26 +1946,214 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent case GT_STOREIND: case GT_STORE_BLK: - case GT_BLK: - if (tree != parent->AsIndir()->Addr()) + { + // Is this a GC store? + // + if (!IsTrackedType(parent->TypeGet())) { - // TODO-ObjectStackAllocation: track stores to fields. + canLclVarEscapeViaParentStack = false; break; } - FALLTHROUGH; + + if (tree->OperIs(GT_STORE_BLK)) + { + ClassLayout* const layout = parent->AsBlk()->GetLayout(); + + if (!layout->HasGCPtr()) + { + canLclVarEscapeViaParentStack = false; + break; + } + } + GenTree* const addr = parent->AsIndir()->Addr(); + if (tree == addr) + { + if (isAddress) + { + // Remember the resource being stored to. + // + JITDUMP("... store address is local\n"); + m_StoreAddressToIndexMap.Set(parent, StoreInfo(lclIndex)); + } + + // The address does not escape + // + canLclVarEscapeViaParentStack = false; + break; + } + + // If we're walking the value tree, model the store. + // + StoreInfo* const dstInfo = m_StoreAddressToIndexMap.LookupPointer(parent); + if (dstInfo != nullptr) + { + assert(dstInfo->m_index != BAD_VAR_NUM); + assert(!dstInfo->m_connected); + JITDUMP("... local.field store\n"); + + // Note that we will model this store + // + dstInfo->m_connected = true; + + JITDUMP(" ... Modelled GC store to"); + JITDUMPEXEC(DumpIndex(dstInfo->m_index)); + JITDUMP(" at [%06u]\n", comp->dspTreeID(parent)); + + if (isAddress) + { + AddConnGraphEdgeIndex(dstInfo->m_index, m_unknownSourceIndex); + } + else + { + AddConnGraphEdgeIndex(dstInfo->m_index, lclIndex); + canLclVarEscapeViaParentStack = false; + break; + } + } + + // We're storing the value somewhere unknown. Assume the worst. + break; + } + + case GT_STORE_LCL_FLD: + { + // Does this store a type we're tracking? + // + if (!IsTrackedType(tree->TypeGet())) + { + canLclVarEscapeViaParentStack = false; + break; + } + + unsigned const dstLclNum = parent->AsLclVarCommon()->GetLclNum(); + + if (IsTrackedLocal(dstLclNum)) + { + JITDUMP("... local V%02u.f store\n", dstLclNum); + const unsigned dstIndex = LocalToIndex(dstLclNum); + AddConnGraphEdgeIndex(dstIndex, lclIndex); + canLclVarEscapeViaParentStack = false; + + // Note that we modelled this store in the connection graph + // + m_StoreAddressToIndexMap.Set(parent, StoreInfo(dstIndex, /* connected */ true)); + } + + // Else we're storing the value somewhere unknown. + // Assume the worst. + break; + } + case GT_IND: - // Address of the field/ind is not taken so the local doesn't escape. + case GT_BLK: + { + // Does this load a type we're tracking? + // + if (!IsTrackedType(parent->TypeGet())) + { + canLclVarEscapeViaParentStack = false; + break; + } + + // For structs we need to check the layout as well + // + if (parent->OperIs(GT_BLK)) + { + ClassLayout* const layout = parent->AsBlk()->GetLayout(); + + if (!layout->HasGCPtr()) + { + canLclVarEscapeViaParentStack = false; + break; + } + } + + GenTree* const addr = parent->AsIndir()->Addr(); + + // For loads from local structs we may be tracking the underlying fields. + // + // We can assume that the local being read is lclNum, + // since we have walked up to this node from a leaf local. + // + // We only track through the first indir. + // + if (m_trackFields && isAddress) + { + JITDUMP("... load local.field\n"); + ++parentIndex; + isAddress = false; + keepChecking = true; + break; + } + + // Address doesn't refer to any location we track + // + canLclVarEscapeViaParentStack = false; + break; + } + + case GT_LCL_FLD: + { + // Does this load a type we're tracking? + // + if (!IsTrackedType(parent->TypeGet())) + { + canLclVarEscapeViaParentStack = false; + break; + } + + // For loads from local structs we may be tracking the underlying fields. + // + if (m_trackFields && (lclDsc->TypeGet() == TYP_STRUCT)) + { + JITDUMP("... load local.field\n"); + ++parentIndex; + isAddress = false; + keepChecking = true; + break; + } + + // Load from some untracked local's fields. + // canLclVarEscapeViaParentStack = false; break; + } case GT_CALL: { - GenTreeCall* const asCall = parent->AsCall(); + GenTreeCall* const call = parent->AsCall(); + + if (call->IsHelperCall()) + { + canLclVarEscapeViaParentStack = + !Compiler::s_helperCallProperties.IsNoEscape(comp->eeGetHelperNum(call->gtCallMethHnd)); + } + else if (call->IsSpecialIntrinsic()) + { + // Some known special intrinsics don't escape. At this moment, only the ones accepting byrefs + // are supported. In order to support more intrinsics accepting objects, we need extra work + // on the VM side which is not ready for that yet. + // + switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd)) + { + case NI_System_SpanHelpers_ClearWithoutReferences: + case NI_System_SpanHelpers_Fill: + case NI_System_SpanHelpers_Memmove: + case NI_System_SpanHelpers_SequenceEqual: + canLclVarEscapeViaParentStack = false; + break; - if (asCall->IsHelperCall()) + default: + break; + } + } + else if (call->IsDelegateInvoke()) { - canLclVarEscapeViaParentStack = - !Compiler::s_helperCallProperties.IsNoEscape(comp->eeGetHelperNum(asCall->gtCallMethHnd)); + if (tree == call->gtArgs.GetThisArg()->GetNode()) + { + JITDUMP("Delegate invoke this...\n"); + canLclVarEscapeViaParentStack = false; + } } // Note there is nothing special here about the parent being a call. We could move all this processing @@ -1148,7 +2164,7 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent // // In particular it might be tempting to look for references in uncatchable BBJ_THROWs or similar // and enable a kind of "partial escape analysis" where we copy from stack to heap just before the - // point of escape. We would have to add pseudo-locals for this like we do for GDV, but we wouldn't + // point of escape. We would have to add pseudos for this like we do for GDV, but we wouldn't // necessarily need to do the predicate analysis or cloning. // if (isEnumeratorLocal) @@ -1164,7 +2180,13 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent } } - return canLclVarEscapeViaParentStack; + if (canLclVarEscapeViaParentStack && !CanIndexEscape(lclIndex)) + { + JITDUMPEXEC(DumpIndex(lclIndex)); + JITDUMP(" first escapes via [%06u]...[%06u]\n", comp->dspTreeID(parentStack->Top()), + comp->dspTreeID(parentStack->Top(parentIndex))); + MarkLclVarAsEscaping(lclNum); + } } //------------------------------------------------------------------------ @@ -1175,6 +2197,8 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent // tree - Possibly-stack-pointing tree // parentStack - Parent stack of the possibly-stack-pointing tree // newType - New type of the possibly-stack-pointing tree +// newLayout - Layout for a retyped local struct +// retypeFields - Inspiring local is a retyped local struct; retype fields. // // Notes: // If newType is TYP_I_IMPL, the tree is definitely pointing to the stack (or is null); @@ -1182,13 +2206,14 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent // In addition to updating types this method may set GTF_IND_TGT_NOT_HEAP on ancestor // indirections to help codegen with write barrier selection. // -void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* parentStack, var_types newType) +void ObjectAllocator::UpdateAncestorTypes( + GenTree* tree, ArrayStack* parentStack, var_types newType, ClassLayout* newLayout, bool retypeFields) { assert(newType == TYP_BYREF || newType == TYP_I_IMPL); assert(parentStack != nullptr); - int parentIndex = 1; - + int parentIndex = 1; bool keepChecking = true; + bool sawIndir = false; while (keepChecking && (parentStack->Height() > parentIndex)) { @@ -1198,12 +2223,21 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* p switch (parent->OperGet()) { case GT_STORE_LCL_VAR: - case GT_BOX: - if (parent->TypeGet() == TYP_REF) + { + if (parent->TypeGet() != newType) { - parent->ChangeType(newType); + // If we have retyped the local, retype the store. + // Else keep TYP_BYREF. + // + GenTreeLclVarCommon* const lclParent = parent->AsLclVarCommon(); + LclVarDsc* const lclDsc = comp->lvaGetDesc(lclParent); + if (parent->TypeIs(TYP_REF) || (lclDsc->TypeGet() == newType)) + { + parent->ChangeType(newType); + } } break; + } case GT_EQ: case GT_NE: @@ -1211,6 +2245,29 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* p case GT_GT: case GT_LE: case GT_GE: + { + // We may see sibling null refs. Retype them as appropriate. + // + GenTree* const lhs = parent->AsOp()->gtGetOp1(); + GenTree* const rhs = parent->AsOp()->gtGetOp2(); + + if (lhs == tree) + { + if (rhs->IsIntegralConst(0)) + { + rhs->ChangeType(newType); + } + } + else if (rhs == tree) + { + if (lhs->IsIntegralConst(0)) + { + lhs->ChangeType(newType); + } + } + break; + } + case GT_NULLCHECK: case GT_ARR_LENGTH: break; @@ -1224,10 +2281,9 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* p FALLTHROUGH; case GT_QMARK: case GT_ADD: - case GT_SUB: case GT_FIELD_ADDR: - case GT_INDEX_ADDR: - if (parent->TypeGet() == TYP_REF) + case GT_BOX: + if (parent->TypeGet() != newType) { parent->ChangeType(newType); } @@ -1235,6 +2291,43 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* p keepChecking = true; break; + case GT_INDEX_ADDR: + // We are not retyping array "fields" yet + // so we can stop updating here. + // + if (parent->TypeGet() != newType) + { + parent->ChangeType(newType); + } + break; + + case GT_SUB: + { + // Parent type can be TYP_I_IMPL, TYP_BYREF. + // But not TYP_REF. + // + var_types parentType = parent->TypeGet(); + assert(parentType != TYP_REF); + + // New type can be TYP_I_IMPL, TYP_BYREF. + // But TYP_BYREF only if parent is also + // + if (parentType != newType) + { + // We must be retyping TYP_BYREF to TYP_I_IMPL. + // + assert(newType == TYP_I_IMPL); + assert(parentType == TYP_BYREF); + parent->ChangeType(newType); + + // Propagate that upwards. + // + ++parentIndex; + keepChecking = true; + } + break; + } + case GT_COLON: { GenTree* const lhs = parent->AsOp()->gtGetOp1(); @@ -1263,26 +2356,124 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* p case GT_STOREIND: case GT_STORE_BLK: - case GT_BLK: - assert(tree == parent->AsIndir()->Addr()); - - // The new target could be *not* on the heap. - parent->gtFlags &= ~GTF_IND_TGT_HEAP; + { + if (tree == parent->AsIndir()->Addr()) + { + // The new target could be *not* on the heap. + parent->gtFlags &= ~GTF_IND_TGT_HEAP; - if (newType != TYP_BYREF) + if (newType != TYP_BYREF) + { + // This indicates that a write barrier is not needed when writing + // to this field/indirection since the address is not pointing to the heap. + // It's either null or points to inside a stack-allocated object. + parent->gtFlags |= GTF_IND_TGT_NOT_HEAP; + } + } + else { - // This indicates that a write barrier is not needed when writing - // to this field/indirection since the address is not pointing to the heap. - // It's either null or points to inside a stack-allocated object. - parent->gtFlags |= GTF_IND_TGT_NOT_HEAP; + assert(tree == parent->AsIndir()->Data()); + + // If we are storing to a GC struct field, we may need to retype the store + // + if (varTypeIsGC(parent->TypeGet())) + { + parent->ChangeType(newType); + } + else if (retypeFields && parent->OperIs(GT_STORE_BLK)) + { + GenTreeBlk* const block = parent->AsBlk(); + ClassLayout* const oldLayout = block->GetLayout(); + + if (oldLayout->HasGCPtr()) + { + if (newLayout->GetSize() == oldLayout->GetSize()) + { + block->SetLayout(newLayout); + } + else + { + // We must be storing just a portion of the original local + // + assert(newLayout->GetSize() > oldLayout->GetSize()); + + if (newLayout->HasGCPtr()) + { + block->SetLayout(GetByrefLayout(oldLayout)); + } + else + { + block->SetLayout(GetNonGCLayout(oldLayout)); + } + } + } + } } break; + } case GT_IND: + case GT_BLK: + { + // If we are loading from a GC struct field, we may need to retype the load + // + if (retypeFields && !sawIndir) + { + bool didRetype = false; + + if (varTypeIsGC(parent->TypeGet())) + { + parent->ChangeType(newType); + didRetype = true; + } + else if (parent->OperIs(GT_BLK)) + { + GenTreeBlk* const block = parent->AsBlk(); + ClassLayout* const oldLayout = block->GetLayout(); + + if (oldLayout->HasGCPtr()) + { + if (newLayout->GetSize() == oldLayout->GetSize()) + { + block->SetLayout(newLayout); + } + else + { + // We must be loading just a portion of the original local + // + assert(newLayout->GetSize() > oldLayout->GetSize()); + + if (newLayout->HasGCPtr()) + { + block->SetLayout(GetByrefLayout(oldLayout)); + } + else + { + block->SetLayout(GetNonGCLayout(oldLayout)); + } + } + + didRetype = true; + } + } + + if (didRetype) + { + ++parentIndex; + keepChecking = true; + sawIndir = true; + } + } + + break; + } + case GT_CALL: break; default: + JITDUMP("UpdateAncestorTypes: unexpected op %s in [%06u]\n", GenTree::OpName(parent->OperGet()), + comp->dspTreeID(parent)); unreached(); } @@ -1298,7 +2489,10 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* p //------------------------------------------------------------------------ // RewriteUses: Find uses of the newobj temp for stack-allocated // objects and replace with address of the stack local. - +// +// Notes: +// Also retypes GC typed locals that now may or must refer to stack objects +// void ObjectAllocator::RewriteUses() { class RewriteUsesVisitor final : public GenTreeVisitor @@ -1328,47 +2522,43 @@ void ObjectAllocator::RewriteUses() return Compiler::fgWalkResult::WALK_CONTINUE; } - const unsigned int lclNum = tree->AsLclVarCommon()->GetLclNum(); - unsigned int newLclNum = BAD_VAR_NUM; - LclVarDsc* lclVarDsc = m_compiler->lvaGetDesc(lclNum); + const unsigned int lclNum = tree->AsLclVarCommon()->GetLclNum(); + LclVarDsc* lclVarDsc = m_compiler->lvaGetDesc(lclNum); + bool retypeFields = false; - if ((lclNum < BitVecTraits::GetSize(&m_allocator->m_bitVecTraits)) && - m_allocator->MayLclVarPointToStack(lclNum)) + // Revise IR for local that were retyped or are mapped to stack locals + // + if (!lclVarDsc->lvTracked) { - // Analysis does not handle indirect access to pointer locals. - assert(tree->OperIsScalarLocal()); + return Compiler::fgWalkResult::WALK_CONTINUE; + } - var_types newType; - if (m_allocator->m_HeapLocalToStackLocalMap.TryGetValue(lclNum, &newLclNum)) - { - assert(tree->OperIs(GT_LCL_VAR)); // Must be a use. - newType = TYP_I_IMPL; - tree = m_compiler->gtNewLclVarAddrNode(newLclNum); - *use = tree; - } - else - { - newType = m_allocator->DoesLclVarPointToStack(lclNum) ? TYP_I_IMPL : TYP_BYREF; - if (tree->TypeGet() == TYP_REF) - { - tree->ChangeType(newType); - } - } + unsigned int newLclNum = BAD_VAR_NUM; + var_types newType = lclVarDsc->TypeGet(); + ClassLayout* newLayout = nullptr; - if (lclVarDsc->lvType != newType) - { - JITDUMP("Changing the type of V%02u from %s to %s\n", lclNum, varTypeName(lclVarDsc->lvType), - varTypeName(newType)); - lclVarDsc->lvType = newType; - } - m_allocator->UpdateAncestorTypes(tree, &m_ancestors, newType); + if (m_allocator->m_HeapLocalToStackObjLocalMap.TryGetValue(lclNum, &newLclNum)) + { + assert(tree->OperIs(GT_LCL_VAR)); // Must be a use. + newType = TYP_I_IMPL; + tree = m_compiler->gtNewLclVarAddrNode(newLclNum); + *use = tree; - if (newLclNum != BAD_VAR_NUM) - { - JITDUMP("Update V%02u to V%02u from use [%06u]\n", lclNum, newLclNum, m_compiler->dspTreeID(tree)); - DISPTREE(tree); - } + JITDUMP("Update V%02u to V%02u in use [%06u]\n", lclNum, newLclNum, m_compiler->dspTreeID(tree)); + DISPTREE(tree); } + else if (newType == TYP_STRUCT) + { + newLayout = lclVarDsc->GetLayout(); + newType = newLayout->HasGCPtr() ? TYP_BYREF : TYP_I_IMPL; + retypeFields = true; + } + else + { + tree->ChangeType(newType); + } + + m_allocator->UpdateAncestorTypes(tree, &m_ancestors, newType, newLayout, retypeFields); return Compiler::fgWalkResult::WALK_CONTINUE; } @@ -1390,6 +2580,7 @@ void ObjectAllocator::RewriteUses() } } // Make box accesses explicit for UNBOX_HELPER + // Expand delegate invoke for calls where "this" is possibly stack pointing // else if (tree->IsCall()) { @@ -1438,6 +2629,54 @@ void ObjectAllocator::RewriteUses() } } } + else if (call->IsDelegateInvoke()) + { + CallArg* const thisArg = call->gtArgs.GetThisArg(); + GenTree* const delegateThis = thisArg->GetNode(); + + if (delegateThis->OperIs(GT_LCL_VAR, GT_LCL_ADDR)) + { + GenTreeLclVarCommon* const lcl = delegateThis->AsLclVarCommon(); + bool const isStackAllocatedDelegate = + delegateThis->OperIs(GT_LCL_ADDR) || m_allocator->DoesLclVarPointToStack(lcl->GetLclNum()); + + if (isStackAllocatedDelegate) + { + JITDUMP("Expanding delegate invoke [%06u]\n", m_compiler->dspTreeID(call)); + + // Expand the delgate invoke early, so that physical promotion has + // a chance to promote the delegate fields. + // + // Note the instance field may also be stack allocatable (someday) + // + GenTree* const cloneThis = m_compiler->gtClone(lcl, /* complexOk */ true); + unsigned const instanceOffset = m_compiler->eeGetEEInfo()->offsetOfDelegateInstance; + GenTree* const newThisAddr = + m_compiler->gtNewOperNode(GT_ADD, TYP_I_IMPL, cloneThis, + m_compiler->gtNewIconNode(instanceOffset, TYP_I_IMPL)); + + // For now assume the instance field is on the heap... + // + GenTree* const newThis = m_compiler->gtNewIndir(TYP_REF, newThisAddr); + thisArg->SetEarlyNode(newThis); + + // the control target is + // [originalThis + firstTgtOffs] + // + unsigned const targetOffset = m_compiler->eeGetEEInfo()->offsetOfDelegateFirstTarget; + GenTree* const targetAddr = + m_compiler->gtNewOperNode(GT_ADD, TYP_I_IMPL, lcl, + m_compiler->gtNewIconNode(targetOffset, TYP_I_IMPL)); + GenTree* const target = m_compiler->gtNewIndir(TYP_I_IMPL, targetAddr); + + // Update call state -- now an indirect call to the delegate target + // + call->gtCallAddr = target; + call->gtCallType = CT_INDIRECT; + call->gtCallMoreFlags &= ~(GTF_CALL_M_DELEGATE_INV | GTF_CALL_M_WRAPPER_DELEGATE_INV); + } + } + } } else if (tree->OperIsIndir()) { @@ -1473,6 +2712,107 @@ void ObjectAllocator::RewriteUses() } }; + // Determine which locals should be retyped, and retype them. + // Use lvTracked to remember which locals were retyped or will be replaced. + // + for (unsigned lclNum = 0; lclNum < comp->lvaCount; lclNum++) + { + LclVarDsc* const lclVarDsc = comp->lvaGetDesc(lclNum); + + if (!lclVarDsc->lvTracked) + { + JITDUMP("V%02u not tracked\n", lclNum); + continue; + } + + if (!MayLclVarPointToStack(lclNum)) + { + JITDUMP("V%02u not possibly stack pointing\n", lclNum); + lclVarDsc->lvTracked = 0; + continue; + } + + var_types newType = TYP_UNDEF; + if (m_HeapLocalToStackObjLocalMap.Contains(lclNum)) + { + // Appearances of lclNum will be replaced. We need to retype. + // + newType = TYP_I_IMPL; + } + else if (m_HeapLocalToStackArrLocalMap.Contains(lclNum)) + { + // Appearances of lclNum will be NOT be replaced. We need to retype. + // + newType = TYP_I_IMPL; + } + else + { + newType = DoesLclVarPointToStack(lclNum) ? TYP_I_IMPL : TYP_BYREF; + } + + // For local structs, retype the GC fields. + // + if (lclVarDsc->lvType == TYP_STRUCT) + { + assert(m_trackFields); + + ClassLayout* const layout = lclVarDsc->GetLayout(); + ClassLayout* newLayout = nullptr; + + if (!layout->HasGCPtr()) + { + assert(newType == TYP_I_IMPL); + JITDUMP("V%02u not GC\n", lclNum); + lclVarDsc->lvTracked = 0; + continue; + } + + if (newType == TYP_I_IMPL) + { + // New layout with no gc refs + padding + newLayout = GetNonGCLayout(layout); + JITDUMP("Changing layout of struct V%02u to block\n", lclNum); + lclVarDsc->ChangeLayout(newLayout); + } + else + { + // New layout with all gc refs as byrefs + padding + // (todo, perhaps: see if old layout was already all byrefs) + newLayout = GetByrefLayout(layout); + JITDUMP("Changing layout of struct V%02u to byref\n", lclNum); + lclVarDsc->ChangeLayout(newLayout); + } + } + else + { + // For non-struct locals, retype the local + // + if (!varTypeIsGC(lclVarDsc->TypeGet())) + { + JITDUMP("V%02u not GC\n", lclNum); + lclVarDsc->lvTracked = 0; + continue; + } + + if (lclVarDsc->lvType != newType) + { + // Params should only retype from ref->byref as they have unknown initial value + // + assert(!(lclVarDsc->lvIsParam && (newType == TYP_I_IMPL))); + JITDUMP("Changing the type of V%02u from %s to %s\n", lclNum, varTypeName(lclVarDsc->lvType), + varTypeName(newType)); + lclVarDsc->lvType = newType; + } + else + { + JITDUMP("V%02u already properly typed\n", lclNum); + lclVarDsc->lvTracked = 0; + } + } + } + + // Update locals and types in the IR to match. + // for (BasicBlock* const block : comp->Blocks()) { for (Statement* const stmt : block->Statements()) @@ -1497,14 +2837,14 @@ void ObjectAllocator::RewriteUses() // // Notes: // During our analysis we have may have noted conditionally escaping objects -// and var references and connected them to a pseduolocal, along with information +// and var references and connected them to a pseduo, along with information // about how we could clone blocks to ensure that the object could be stack allocated. // // The current assumption is that these nodes do not escape, but to ensure // that we must be able to clone the code and remove the potential for escape // -// So, we verify for each case that we can clone; if not, mark we the pseudolocal -// as escaping. If any pseudlocal now escapes, we return true so that the main +// So, we verify for each case that we can clone; if not, mark we the Pseudo +// as escaping. If any pseudo now escapes, we return true so that the main // analysis can update its closure. // // We may choose not to clone a candiate for several reasons: @@ -1518,45 +2858,53 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait { bool newEscapes = false; - for (unsigned p = 0; p < m_numPseudoLocals; p++) + for (unsigned p = 0; p < m_numPseudos; p++) { - unsigned const pseudoLocal = p + comp->lvaCount; + unsigned const pseudoIndex = p + m_firstPseudoIndex; bool canClone = true; CloneInfo* info = nullptr; - const bool hasInfo = m_CloneMap.Lookup(pseudoLocal, &info); + const bool hasInfo = m_CloneMap.Lookup(pseudoIndex, &info); if (!hasInfo) { - // We never found any conditional allocation attached to this pseudoLocal. + // We never found any conditional allocation attached to this pseudoIndex. // - JITDUMP(" P%02u has no guard info\n", pseudoLocal); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP(" has no guard info\n"); canClone = false; break; } - unsigned lclNum = BAD_VAR_NUM; - BitVec pseudoLocalAdjacencies = m_ConnGraphAdjacencyMatrix[pseudoLocal]; + // See what locals were "assigned" to the pseudo. + // + BitVec pseudoAdjacencies = m_ConnGraphAdjacencyMatrix[pseudoIndex]; // If we found an allocation but didn't find any conditionally escaping uses, then cloning is of no use // - if (BitVecOps::IsEmpty(bitVecTraits, pseudoLocalAdjacencies)) + if (BitVecOps::IsEmpty(bitVecTraits, pseudoAdjacencies)) { - JITDUMP(" No conditionally escaping uses under P%02u, so no reason to clone\n", pseudoLocal); + JITDUMP(" No conditionally escaping uses under"); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP(", so no reason to clone\n"); canClone = false; break; } // Check if each conditionally escaping local escapes on its own; if so cloning is of no use // - BitVecOps::Iter iterator(bitVecTraits, pseudoLocalAdjacencies); - while (canClone && iterator.NextElem(&lclNum)) + BitVecOps::Iter iterator(bitVecTraits, pseudoAdjacencies); + unsigned lclNumIndex = BAD_VAR_NUM; + while (canClone && iterator.NextElem(&lclNumIndex)) { - if (BitVecOps::IsMember(bitVecTraits, escapingNodes, lclNum)) + if (BitVecOps::IsMember(bitVecTraits, escapingNodes, lclNumIndex)) { // The enumerator var or a related var had escaping uses somewhere in the method, // not under a failing GDV or any GDV. // - JITDUMP(" V%02u escapes independently of P%02u\n", lclNum, pseudoLocal); + JITDUMPEXEC(DumpIndex(lclNumIndex)); + JITDUMP(" escapes independently of", IndexToLocal(lclNumIndex)); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP("\n"); canClone = false; break; } @@ -1564,13 +2912,18 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait // Also check the alloc temps // - if (info->m_allocTemps != nullptr) + if (canClone && (info->m_allocTemps != nullptr)) { for (unsigned v : *(info->m_allocTemps)) { - if (BitVecOps::IsMember(bitVecTraits, escapingNodes, v)) + if (BitVecOps::IsMember(bitVecTraits, escapingNodes, LocalToIndex(v))) { - JITDUMP(" alloc temp V%02u escapes independently of P%02u\n", v, pseudoLocal) + JITDUMP(" alloc temp"); + JITDUMPEXEC(DumpIndex(v)); + JITDUMP(" escapes independently of", IndexToLocal(lclNumIndex)); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP("\n"); + canClone = false; break; } @@ -1582,7 +2935,8 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait // We may be able to clone and specialize the enumerator uses to ensure // that the allocated enumerator does not escape. // - JITDUMP(" P%02u is guarding the escape of V%02u\n", pseudoLocal, lclNum); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP(" is guarding the escape of V%02u\n", info->m_local); if (info->m_allocTemps != nullptr) { JITDUMP(" along with "); @@ -1623,15 +2977,20 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait // if (canClone) { - JITDUMP("\n*** Can prevent escape under P%02u via cloning ***\n", pseudoLocal); + JITDUMP("\n*** Can prevent escape under"); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP(" via cloning ***\n"); + info->m_willClone = true; m_regionsToClone++; } else { - JITDUMP(" not optimizing, so will mark P%02u as escaping\n", pseudoLocal); - MarkLclVarAsEscaping(pseudoLocal); - BitVecOps::AddElemD(bitVecTraits, escapingNodesToProcess, pseudoLocal); + JITDUMP(" not optimizing, so will mark"); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP(" as escaping\n"); + MarkIndexAsEscaping(pseudoIndex); + BitVecOps::AddElemD(bitVecTraits, escapingNodesToProcess, pseudoIndex); newEscapes = true; } } @@ -1640,18 +2999,22 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait } //------------------------------------------------------------------------------ -// NewPseudoLocal: return index of a new pseudo local. +// NewPseudoIndex: return index of a new pseudo. // // Returns: // index to use, or BAD_VAR_NUM if no more indices are available. // -unsigned ObjectAllocator::NewPseudoLocal() +unsigned ObjectAllocator::NewPseudoIndex() { unsigned result = BAD_VAR_NUM; - if (m_numPseudoLocals < m_maxPseudoLocals) + if (m_numPseudos >= m_maxPseudos) + { + assert(!"unexpected number of pseudos"); + } + else { - result = comp->lvaCount + m_numPseudoLocals; - m_numPseudoLocals++; + result = m_firstPseudoIndex + m_numPseudos; + m_numPseudos++; } return result; } @@ -1860,12 +3223,12 @@ GenTree* ObjectAllocator::IsGuard(BasicBlock* block, GuardInfo* info) // bool ObjectAllocator::CheckForGuardedUse(BasicBlock* block, GenTree* tree, unsigned lclNum) { - // Find pseudo local... + // Find pseudo... // - unsigned pseudoLocal = BAD_VAR_NUM; - if (!m_EnumeratorLocalToPseudoLocalMap.TryGetValue(lclNum, &pseudoLocal)) + unsigned pseudoIndex = BAD_VAR_NUM; + if (!m_EnumeratorLocalToPseudoIndexMap.TryGetValue(lclNum, &pseudoIndex)) { - JITDUMP("... no pseudo local?\n"); + JITDUMP("... no pseudo?\n"); return false; } @@ -1878,10 +3241,10 @@ bool ObjectAllocator::CheckForGuardedUse(BasicBlock* block, GenTree* tree, unsig return false; } - // Find the GDV guard for the pseudo-local + // Find the GDV guard for the pseudo // CloneInfo* pseudoGuardInfo; - if (!m_CloneMap.Lookup(pseudoLocal, &pseudoGuardInfo)) + if (!m_CloneMap.Lookup(pseudoIndex, &pseudoGuardInfo)) { JITDUMP("... under non-gdv guard?\n"); return false; @@ -1891,14 +3254,16 @@ bool ObjectAllocator::CheckForGuardedUse(BasicBlock* block, GenTree* tree, unsig // if ((info.m_local == lclNum) && (pseudoGuardInfo->m_local == lclNum) && (info.m_type == pseudoGuardInfo->m_type)) { - // If so, track this as an assignment pseudoLocal = ... + // If so, track this as an assignment pseudoIndex = ... // // Later if we don't clone and split off the failing GDV paths, - // we will mark pseudoLocal as escaped, and that will lead + // we will mark pseudoIndex as escaped, and that will lead // to lclNum escaping as well. // - JITDUMP("... under GDV; tracking via pseudo-local P%02u\n", pseudoLocal); - AddConnGraphEdge(pseudoLocal, lclNum); + JITDUMP("... under GDV; tracking via pseudo index"); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP("\n") + AddConnGraphEdgeIndex(pseudoIndex, LocalToIndex(lclNum)); return true; } @@ -1929,7 +3294,7 @@ void ObjectAllocator::CheckForGuardedAllocationOrCopy(BasicBlock* block, GenTree* const tree = *use; assert(tree->OperIsLocalStore()); - if (!CanHavePseudoLocals()) + if (!CanHavePseudos()) { // We didn't flag any allocations of interest during importation, // so there is nothing to do here. @@ -1973,14 +3338,15 @@ void ObjectAllocator::CheckForGuardedAllocationOrCopy(BasicBlock* block, const char* reason = nullptr; unsigned size = 0; unsigned length = TARGET_POINTER_SIZE; - if (CanAllocateLclVarOnStack(enumeratorLocal, clsHnd, OAT_NEWOBJ, length, &size, &reason, + ObjectAllocationType oat = AllocationKind(data); + if (CanAllocateLclVarOnStack(enumeratorLocal, clsHnd, oat, length, &size, &reason, /* preliminaryCheck */ true)) { - // We are going to conditionally track accesses to the enumerator local via a pseudo local. + // We are going to conditionally track accesses to the enumerator local via a pseudo. // - const unsigned pseudoLocal = NewPseudoLocal(); - assert(pseudoLocal != BAD_VAR_NUM); - bool added = m_EnumeratorLocalToPseudoLocalMap.AddOrUpdate(enumeratorLocal, pseudoLocal); + const unsigned pseudoIndex = NewPseudoIndex(); + assert(pseudoIndex != BAD_VAR_NUM); + bool added = m_EnumeratorLocalToPseudoIndexMap.AddOrUpdate(enumeratorLocal, pseudoIndex); if (!added) { @@ -2002,16 +3368,18 @@ void ObjectAllocator::CheckForGuardedAllocationOrCopy(BasicBlock* block, CloneInfo* info = new (alloc) CloneInfo(); info->m_local = enumeratorLocal; info->m_type = clsHnd; - info->m_pseudoLocal = pseudoLocal; + info->m_pseudoIndex = pseudoIndex; info->m_appearanceMap = new (alloc) EnumeratorVarMap(alloc); info->m_allocBlock = block; info->m_allocStmt = stmt; info->m_allocTree = data; info->m_domBlock = controllingGDV.m_block; - m_CloneMap.Set(pseudoLocal, info); + m_CloneMap.Set(pseudoIndex, info); - JITDUMP("Enumerator allocation [%06u]: will track accesses to V%02u guarded by type %s via P%02u\n", - comp->dspTreeID(data), enumeratorLocal, comp->eeGetClassName(clsHnd), pseudoLocal); + JITDUMP("Enumerator allocation [%06u]: will track accesses to V%02u guarded by type %s via", + comp->dspTreeID(data), enumeratorLocal, comp->eeGetClassName(clsHnd)); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP("\n"); // If this is not a direct assignment to the enumerator var we also need to // track the temps that will appear in between. Later we will rewrite these @@ -2081,16 +3449,16 @@ void ObjectAllocator::CheckForGuardedAllocationOrCopy(BasicBlock* block, // bool ObjectAllocator::CheckForEnumeratorUse(unsigned lclNum, unsigned dstLclNum) { - unsigned pseudoLocal = BAD_VAR_NUM; + unsigned pseudoIndex = BAD_VAR_NUM; - if (m_EnumeratorLocalToPseudoLocalMap.TryGetValue(dstLclNum, &pseudoLocal)) + if (m_EnumeratorLocalToPseudoIndexMap.TryGetValue(dstLclNum, &pseudoIndex)) { // We already knew dstLclNum was a potential copy // return true; } - if (!m_EnumeratorLocalToPseudoLocalMap.TryGetValue(lclNum, &pseudoLocal)) + if (!m_EnumeratorLocalToPseudoIndexMap.TryGetValue(lclNum, &pseudoIndex)) { // lclNum is not a potential source // @@ -2098,7 +3466,7 @@ bool ObjectAllocator::CheckForEnumeratorUse(unsigned lclNum, unsigned dstLclNum) } CloneInfo* info = nullptr; - if (!m_CloneMap.Lookup(pseudoLocal, &info)) + if (!m_CloneMap.Lookup(pseudoIndex, &info)) { // We aren't interested in locals under this guard // @@ -2107,11 +3475,13 @@ bool ObjectAllocator::CheckForEnumeratorUse(unsigned lclNum, unsigned dstLclNum) // lclNum is an interesting enumerator var, so now so is dstLclNum. // - const bool added = m_EnumeratorLocalToPseudoLocalMap.AddOrUpdate(dstLclNum, pseudoLocal); + const bool added = m_EnumeratorLocalToPseudoIndexMap.AddOrUpdate(dstLclNum, pseudoIndex); assert(added); - JITDUMP("Enumerator allocation: will also track accesses to V%02u via P%02u\n", dstLclNum, pseudoLocal); + JITDUMP("Enumerator allocation: will also track accesses to V%02u via", dstLclNum); + JITDUMPEXEC(DumpIndex(pseudoIndex)); + JITDUMP("\n"); if (info->m_allocTemps == nullptr) { @@ -2135,14 +3505,14 @@ bool ObjectAllocator::CheckForEnumeratorUse(unsigned lclNum, unsigned dstLclNum) // void ObjectAllocator::RecordAppearance(unsigned lclNum, BasicBlock* block, Statement* stmt, GenTree** use) { - unsigned pseudoLocal = BAD_VAR_NUM; - if (!m_EnumeratorLocalToPseudoLocalMap.TryGetValue(lclNum, &pseudoLocal)) + unsigned Pseudo = BAD_VAR_NUM; + if (!m_EnumeratorLocalToPseudoIndexMap.TryGetValue(lclNum, &Pseudo)) { return; } CloneInfo* info; - if (!m_CloneMap.Lookup(pseudoLocal, &info)) + if (!m_CloneMap.Lookup(Pseudo, &info)) { return; } @@ -2223,8 +3593,11 @@ bool ObjectAllocator::CloneOverlaps(CloneInfo* info) continue; } - JITDUMP("Cloned blocks for P%02u overlap with those for P%02u; unable to clone\n", info->m_pseudoLocal, - c->m_pseudoLocal); + JITDUMP("Cloned blocks for"); + JITDUMPEXEC(DumpIndex(info->m_pseudoIndex)); + JITDUMP(" overlap with those for"); + JITDUMPEXEC(DumpIndex(c->m_pseudoIndex)); + JITDUMP(" unable to clone\n"); overlaps = true; break; @@ -2260,7 +3633,9 @@ bool ObjectAllocator::ShouldClone(CloneInfo* info) unsigned blockSize = 0; if (block->ComplexityExceeds(comp, slack, &blockSize)) { - JITDUMP("Rejecting P%02u cloning: exceeds size limit %u\n", info->m_pseudoLocal, sizeLimit); + JITDUMP("Rejecting"); + JITDUMPEXEC(DumpIndex(info->m_pseudoIndex)); + JITDUMP(" cloning: exceeds size limit %u\n", sizeLimit); return false; } size += blockSize; @@ -2268,7 +3643,9 @@ bool ObjectAllocator::ShouldClone(CloneInfo* info) // TODO: some kind of profile check... // - JITDUMP("Accepting P%02u cloning: size %u does not exceed size limit %u\n", info->m_pseudoLocal, size, sizeLimit); + JITDUMP("Accepting"); + JITDUMPEXEC(DumpIndex(info->m_pseudoIndex)); + JITDUMP(" cloning: size %u does not exceed size limit %u\n", size, sizeLimit); return true; } @@ -2918,10 +4295,23 @@ void ObjectAllocator::CloneAndSpecialize(CloneInfo* info) // Type for now as TYP_REF; this will get rewritten later during RewriteUses // - comp->lvaTable[newEnumeratorLocal].lvType = TYP_REF; - comp->lvaTable[newEnumeratorLocal].lvSingleDef = 1; + LclVarDsc* const newEnumeratorDsc = comp->lvaGetDesc(newEnumeratorLocal); + + newEnumeratorDsc->lvType = TYP_REF; + newEnumeratorDsc->lvSingleDef = 1; comp->lvaSetClass(newEnumeratorLocal, info->m_type, /* isExact */ true); + newEnumeratorDsc->lvTracked = 1; + newEnumeratorDsc->lvVarIndex = (unsigned short)m_nextLocalIndex; // grr + assert(newEnumeratorDsc->lvVarIndex < comp->lvaTrackedToVarNumSize); + comp->lvaTrackedToVarNum[newEnumeratorDsc->lvVarIndex] = newEnumeratorLocal; + m_ConnGraphAdjacencyMatrix[newEnumeratorDsc->lvVarIndex] = BitVecOps::MakeEmpty(&m_bitVecTraits); + m_nextLocalIndex++; + assert(m_maxPseudos > 0); + assert(newEnumeratorDsc->lvVarIndex < m_firstPseudoIndex); + + JITDUMP("Tracking V%02u via 0x%02x\n", newEnumeratorLocal, newEnumeratorDsc->lvVarIndex); + class ReplaceVisitor final : public GenTreeVisitor { CloneInfo* m_info; @@ -3180,3 +4570,80 @@ void ObjectAllocator::CloneAndSpecialize() assert(numberOfClonedRegions == m_regionsToClone); } + +//------------------------------------------------------------------------------ +// GetBoxedLayout: get a layout for a boxed version of a struct +// +// Arguments: +// layout - layout of the struct +// +// Notes: +// For Nullable, layout class should be T +// +ClassLayout* ObjectAllocator::GetBoxedLayout(ClassLayout* layout) +{ + assert(layout->IsValueClass()); + + ClassLayoutBuilder b(comp, TARGET_POINTER_SIZE + layout->GetSize()); + b.CopyPaddingFrom(TARGET_POINTER_SIZE, layout); + b.CopyGCInfoFrom(TARGET_POINTER_SIZE, layout); + +#ifdef DEBUG + b.CopyNameFrom(layout, "[boxed] "); +#endif + + return comp->typGetCustomLayout(b); +} + +//------------------------------------------------------------------------------ +// GetNonGCLayout: get a layout with the same size and padding as an existing +// layout, but with no GC fields. +// +// Arguments: +// layout - existing layout to use as template +// +ClassLayout* ObjectAllocator::GetNonGCLayout(ClassLayout* layout) +{ + assert(layout->HasGCPtr()); + ClassLayoutBuilder b(comp, layout->GetSize()); + b.CopyPaddingFrom(0, layout); + +#ifdef DEBUG + b.CopyNameFrom(layout, "[nongc] "); +#endif + + return comp->typGetCustomLayout(b); +} + +//------------------------------------------------------------------------------ +// GetByrefLayout: get a layout with the same size and padding as an existing +// layout, but with all GC fields retyped to byref. +// +// Arguments: +// layout - existing layout to use as template +// +ClassLayout* ObjectAllocator::GetByrefLayout(ClassLayout* layout) +{ + assert(layout->HasGCPtr()); + ClassLayoutBuilder b(comp, layout->GetSize()); + b.CopyPaddingFrom(0, layout); + + if (layout->GetGCPtrCount() > 0) + { + for (unsigned slot = 0; slot < layout->GetSlotCount(); slot++) + { + var_types gcType = layout->GetGCPtrType(slot); + if (gcType == TYP_REF) + { + gcType = TYP_BYREF; + } + b.SetGCPtrType(slot, gcType); + } + } + +#ifdef DEBUG + b.CopyNameFrom(layout, "[byref] "); +#endif + + return comp->typGetCustomLayout(b); +} diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h index 99dd89659246..b06d39c7eb53 100644 --- a/src/coreclr/jit/objectalloc.h +++ b/src/coreclr/jit/objectalloc.h @@ -75,8 +75,8 @@ struct CloneInfo : public GuardInfo m_blocks = BitVecOps::UninitVal(); } - // Pseudo-local tracking conditinal escapes - unsigned m_pseudoLocal = BAD_VAR_NUM; + // Pseudo-local tracking conditional escapes + unsigned m_pseudoIndex = BAD_VAR_NUM; // Local allocated for the address of the enumerator unsigned m_enumeratorLocal = BAD_VAR_NUM; @@ -109,39 +109,87 @@ struct CloneInfo : public GuardInfo bool m_willClone = false; }; +struct StoreInfo +{ + StoreInfo(unsigned index, bool connected = false) + : m_index(index) + , m_connected(connected) + { + } + unsigned m_index; + bool m_connected; +}; + typedef JitHashTable, CloneInfo*> CloneMap; +typedef JitHashTable, StoreInfo> NodeToIndexMap; class ObjectAllocator final : public Phase { - typedef SmallHashTable LocalToLocalMap; enum ObjectAllocationType { OAT_NONE, OAT_NEWOBJ, + OAT_NEWOBJ_HEAP, OAT_NEWARR }; + struct AllocationCandidate + { + AllocationCandidate( + BasicBlock* block, Statement* statement, GenTree* tree, unsigned lclNum, ObjectAllocationType allocType) + : m_block(block) + , m_statement(statement) + , m_tree(tree) + , m_lclNum(lclNum) + , m_allocType(allocType) + , m_onHeapReason(nullptr) + , m_bashCall(false) + { + } + + BasicBlock* const m_block; + Statement* const m_statement; + GenTree* const m_tree; + unsigned const m_lclNum; + ObjectAllocationType const m_allocType; + const char* m_onHeapReason; + bool m_bashCall; + }; + + typedef SmallHashTable LocalToLocalMap; + //=============================================================================== // Data members bool m_IsObjectStackAllocationEnabled; bool m_AnalysisDone; + bool m_isR2R; + unsigned m_bvCount; BitVecTraits m_bitVecTraits; + unsigned m_unknownSourceIndex; BitVec m_EscapingPointers; // We keep the set of possibly-stack-pointing pointers as a superset of the set of // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets. BitVec m_PossiblyStackPointingPointers; BitVec m_DefinitelyStackPointingPointers; - LocalToLocalMap m_HeapLocalToStackLocalMap; + LocalToLocalMap m_HeapLocalToStackObjLocalMap; + LocalToLocalMap m_HeapLocalToStackArrLocalMap; BitSetShortLongRep* m_ConnGraphAdjacencyMatrix; unsigned int m_StackAllocMaxSize; + unsigned m_stackAllocationCount; // Info for conditionally-escaping locals - LocalToLocalMap m_EnumeratorLocalToPseudoLocalMap; + LocalToLocalMap m_EnumeratorLocalToPseudoIndexMap; CloneMap m_CloneMap; - unsigned m_maxPseudoLocals; - unsigned m_numPseudoLocals; + unsigned m_nextLocalIndex; + unsigned m_firstPseudoIndex; + unsigned m_numPseudos; + unsigned m_maxPseudos; unsigned m_regionsToClone; + // Struct fields + bool m_trackFields; + NodeToIndexMap m_StoreAddressToIndexMap; + //=============================================================================== // Methods public: @@ -160,22 +208,40 @@ class ObjectAllocator final : public Phase virtual PhaseStatus DoPhase() override; private: + bool IsTrackedType(var_types type); + bool IsTrackedLocal(unsigned lclNum); + unsigned LocalToIndex(unsigned lclNum); + unsigned IndexToLocal(unsigned bvIndex); bool CanLclVarEscape(unsigned int lclNum); + bool CanIndexEscape(unsigned int index); void MarkLclVarAsPossiblyStackPointing(unsigned int lclNum); + void MarkIndexAsPossiblyStackPointing(unsigned int index); void MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum); + void MarkIndexAsDefinitelyStackPointing(unsigned int index); bool MayLclVarPointToStack(unsigned int lclNum); bool DoesLclVarPointToStack(unsigned int lclNum); + bool MayIndexPointToStack(unsigned int index); + bool DoesIndexPointToStack(unsigned int index); + void PrepareAnalysis(); void DoAnalysis(); void MarkLclVarAsEscaping(unsigned int lclNum); + void MarkIndexAsEscaping(unsigned int lclNum); void MarkEscapingVarsAndBuildConnGraph(); void AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum); + void AddConnGraphEdgeIndex(unsigned int sourceIndex, unsigned int targetIndex); void ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes); void ComputeStackObjectPointers(BitVecTraits* bitVecTraits); bool MorphAllocObjNodes(); + void MorphAllocObjNode(AllocationCandidate& candidate); + bool MorphAllocObjNodeHelper(AllocationCandidate& candidate); + bool MorphAllocObjNodeHelperArr(AllocationCandidate& candidate); + bool MorphAllocObjNodeHelperObj(AllocationCandidate& candidate); void RewriteUses(); GenTree* MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj); - unsigned int MorphAllocObjNodeIntoStackAlloc( - GenTreeAllocObj* allocObj, CORINFO_CLASS_HANDLE clsHnd, bool isValueClass, BasicBlock* block, Statement* stmt); + unsigned int MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, + ClassLayout* layout, + BasicBlock* block, + Statement* stmt); unsigned int MorphNewArrNodeIntoStackAlloc(GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, unsigned int length, @@ -183,8 +249,10 @@ class ObjectAllocator final : public Phase BasicBlock* block, Statement* stmt); struct BuildConnGraphVisitorCallbackData; - bool CanLclVarEscapeViaParentStack(ArrayStack* parentStack, unsigned int lclNum, BasicBlock* block); - void UpdateAncestorTypes(GenTree* tree, ArrayStack* parentStack, var_types newType); + void AnalyzeParentStack(ArrayStack* parentStack, unsigned int lclNum, BasicBlock* block); + void UpdateAncestorTypes( + GenTree* tree, ArrayStack* parentStack, var_types newType, ClassLayout* newLayout, bool retypeFields); + ObjectAllocationType AllocationKind(GenTree* tree); // Conditionally escaping allocation support // @@ -193,11 +261,11 @@ class ObjectAllocator final : public Phase bool CheckForEnumeratorUse(unsigned lclNum, unsigned dstLclNum); bool IsGuarded(BasicBlock* block, GenTree* tree, GuardInfo* info, bool testOutcome); GenTree* IsGuard(BasicBlock* block, GuardInfo* info); - unsigned NewPseudoLocal(); + unsigned NewPseudoIndex(); - bool CanHavePseudoLocals() + bool CanHavePseudos() { - return (m_maxPseudoLocals > 0); + return (m_maxPseudos > 0); } void RecordAppearance(unsigned lclNum, BasicBlock* block, Statement* stmt, GenTree** use); @@ -212,79 +280,15 @@ class ObjectAllocator final : public Phase void CloneAndSpecialize(); static const unsigned int s_StackAllocMaxSize = 0x2000U; -}; -//=============================================================================== - -inline ObjectAllocator::ObjectAllocator(Compiler* comp) - : Phase(comp, PHASE_ALLOCATE_OBJECTS) - , m_IsObjectStackAllocationEnabled(false) - , m_AnalysisDone(false) - , m_bitVecTraits(BitVecTraits(comp->lvaCount, comp)) - , m_HeapLocalToStackLocalMap(comp->getAllocator(CMK_ObjectAllocator)) - , m_EnumeratorLocalToPseudoLocalMap(comp->getAllocator(CMK_ObjectAllocator)) - , m_CloneMap(comp->getAllocator(CMK_ObjectAllocator)) - , m_maxPseudoLocals(0) - , m_numPseudoLocals(0) - , m_regionsToClone(0) - -{ - // If we are going to do any conditional escape analysis, allocate - // extra BV space for the "pseudo" locals we'll need. - // - // For now, disable conditional escape analysis with OSR - // since the dominance picture is muddled at this point. - // - // The conditionally escaping allocation sites will likely be in loops anyways. - // - bool const hasEnumeratorLocals = comp->hasImpEnumeratorGdvLocalMap(); - - if (hasEnumeratorLocals) - { - unsigned const enumeratorLocalCount = comp->getImpEnumeratorGdvLocalMap()->GetCount(); - assert(enumeratorLocalCount > 0); - - bool const enableConditionalEscape = JitConfig.JitObjectStackAllocationConditionalEscape() > 0; - bool const isOSR = comp->opts.IsOSR(); - - if (enableConditionalEscape && !isOSR) - { + ClassLayout* GetBoxedLayout(ClassLayout* structLayout); + ClassLayout* GetNonGCLayout(ClassLayout* existingLayout); + ClassLayout* GetByrefLayout(ClassLayout* existingLayout); #ifdef DEBUG - static ConfigMethodRange JitObjectStackAllocationConditionalEscapeRange; - JitObjectStackAllocationConditionalEscapeRange.EnsureInit( - JitConfig.JitObjectStackAllocationConditionalEscapeRange()); - const unsigned hash = comp->info.compMethodHash(); - const bool inRange = JitObjectStackAllocationConditionalEscapeRange.Contains(hash); -#else - const bool inRange = true; + void DumpIndex(unsigned bvIndex); #endif - - if (inRange) - { - m_maxPseudoLocals = enumeratorLocalCount; - m_bitVecTraits = BitVecTraits(comp->lvaCount + enumeratorLocalCount + 1, comp); - JITDUMP("Enabling conditional escape analysis [%u pseudo-vars]\n", enumeratorLocalCount); - } - else - { - JITDUMP("Not enabling conditional escape analysis (disabled by range config)\n"); - } - } - else - { - JITDUMP("Not enabling conditional escape analysis [%u pseudo-vars]: %s\n", enumeratorLocalCount, - enableConditionalEscape ? "OSR" : "disabled by config"); - } - } - - m_EscapingPointers = BitVecOps::UninitVal(); - m_PossiblyStackPointingPointers = BitVecOps::UninitVal(); - m_DefinitelyStackPointingPointers = BitVecOps::UninitVal(); - m_ConnGraphAdjacencyMatrix = nullptr; - - m_StackAllocMaxSize = (unsigned)JitConfig.JitObjectStackAllocationSize(); -} +}; //------------------------------------------------------------------------ // IsObjectStackAllocationEnabled: Returns true iff object stack allocation is enabled @@ -306,128 +310,18 @@ inline void ObjectAllocator::EnableObjectStackAllocation() } //------------------------------------------------------------------------ -// CanAllocateLclVarOnStack: Returns true iff local variable can be -// allocated on the stack. +// CanIndexEscape: Returns true iff resource described by index can +// potentially escape from the method // // Arguments: -// lclNum - Local variable number -// clsHnd - Class/struct handle of the variable class -// allocType - Type of allocation (newobj or newarr) -// length - Length of the array (for newarr) -// blockSize - [out, optional] exact size of the object -// reason - [out, required] if result is false, reason why -// preliminaryCheck - if true, allow checking before analysis is done -// (for things that inherently disqualify the local) +// index - bv index // // Return Value: -// Returns true iff local variable can be allocated on the stack. -// -inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, - CORINFO_CLASS_HANDLE clsHnd, - ObjectAllocationType allocType, - ssize_t length, - unsigned int* blockSize, - const char** reason, - bool preliminaryCheck) -{ - assert(preliminaryCheck || m_AnalysisDone); - - bool enableBoxedValueClasses = true; - bool enableRefClasses = true; - bool enableArrays = true; - *reason = "[ok]"; - -#ifdef DEBUG - enableBoxedValueClasses = (JitConfig.JitObjectStackAllocationBoxedValueClass() != 0); - enableRefClasses = (JitConfig.JitObjectStackAllocationRefClass() != 0); - enableArrays = (JitConfig.JitObjectStackAllocationArray() != 0); -#endif - - unsigned classSize = 0; +// Returns true if so - if (allocType == OAT_NEWARR) - { - if (!enableArrays) - { - *reason = "[disabled by config]"; - return false; - } - - if ((length < 0) || (length > CORINFO_Array_MaxLength)) - { - *reason = "[invalid array length]"; - return false; - } - - ClassLayout* const layout = comp->typGetArrayLayout(clsHnd, (unsigned)length); - classSize = layout->GetSize(); - } - else if (allocType == OAT_NEWOBJ) - { - if (comp->info.compCompHnd->isValueClass(clsHnd)) - { - if (!enableBoxedValueClasses) - { - *reason = "[disabled by config]"; - return false; - } - - if (comp->info.compCompHnd->getTypeForBoxOnStack(clsHnd) == NO_CLASS_HANDLE) - { - *reason = "[no boxed type available]"; - return false; - } - - classSize = comp->info.compCompHnd->getClassSize(clsHnd); - } - else - { - if (!enableRefClasses) - { - *reason = "[disabled by config]"; - return false; - } - - if (!comp->info.compCompHnd->canAllocateOnStack(clsHnd)) - { - *reason = "[runtime disallows]"; - return false; - } - - classSize = comp->info.compCompHnd->getHeapClassSize(clsHnd); - } - } - else - { - assert(!"Unexpected allocation type"); - return false; - } - - if (classSize > m_StackAllocMaxSize) - { - *reason = "[too large]"; - return false; - } - - if (preliminaryCheck) - { - return true; - } - - const bool escapes = CanLclVarEscape(lclNum); - - if (escapes) - { - *reason = "[escapes]"; - return false; - } - - if (blockSize != nullptr) - { - *blockSize = classSize; - } - - return true; +inline bool ObjectAllocator::CanIndexEscape(unsigned int index) +{ + return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, index); } //------------------------------------------------------------------------ @@ -442,7 +336,28 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNu inline bool ObjectAllocator::CanLclVarEscape(unsigned int lclNum) { - return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, lclNum); + if (!IsTrackedLocal(lclNum)) + { + return true; + } + + return CanIndexEscape(LocalToIndex(lclNum)); +} + +//------------------------------------------------------------------------ +// MayIndexPointToStack: Returns true iff the resource described by index may +// point to a stack-allocated object +// +// Arguments: +// index - bv index +// +// Return Value: +// Returns true if so. +// +inline bool ObjectAllocator::MayIndexPointToStack(unsigned int index) +{ + assert(m_AnalysisDone); + return BitVecOps::IsMember(&m_bitVecTraits, m_PossiblyStackPointingPointers, index); } //------------------------------------------------------------------------ @@ -454,11 +369,33 @@ inline bool ObjectAllocator::CanLclVarEscape(unsigned int lclNum) // // Return Value: // Returns true iff local variable may point to a stack-allocated object - +// inline bool ObjectAllocator::MayLclVarPointToStack(unsigned int lclNum) { assert(m_AnalysisDone); - return BitVecOps::IsMember(&m_bitVecTraits, m_PossiblyStackPointingPointers, lclNum); + + if (!IsTrackedLocal(lclNum)) + { + return false; + } + + return MayIndexPointToStack(LocalToIndex(lclNum)); +} + +//------------------------------------------------------------------------ +// DoesIndexPointToStack: Returns true iff the resource described by index definitely +// points to a stack-allocated object (or is null) +// +// Arguments: +// index - bv index +// +// Return Value: +// Returns true if so. +// +inline bool ObjectAllocator::DoesIndexPointToStack(unsigned int index) +{ + assert(m_AnalysisDone); + return BitVecOps::IsMember(&m_bitVecTraits, m_DefinitelyStackPointingPointers, index); } //------------------------------------------------------------------------ @@ -471,11 +408,17 @@ inline bool ObjectAllocator::MayLclVarPointToStack(unsigned int lclNum) // Return Value: // Returns true iff local variable definitely points to a stack-allocated object // (or is null) - +// inline bool ObjectAllocator::DoesLclVarPointToStack(unsigned int lclNum) { assert(m_AnalysisDone); - return BitVecOps::IsMember(&m_bitVecTraits, m_DefinitelyStackPointingPointers, lclNum); + + if (!IsTrackedLocal(lclNum)) + { + return false; + } + + return DoesIndexPointToStack(LocalToIndex(lclNum)); } //=============================================================================== diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 8de3609aa85e..214276e555de 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -298,8 +298,8 @@ bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2) /* static */ bool Compiler::optCSEcostCmpEx::operator()(const CSEdsc* dsc1, const CSEdsc* dsc2) { - GenTree* exp1 = dsc1->csdTree; - GenTree* exp2 = dsc2->csdTree; + GenTree* exp1 = dsc1->csdTreeList.tslTree; + GenTree* exp2 = dsc2->csdTreeList.tslTree; auto expCost1 = exp1->GetCostEx(); auto expCost2 = exp2->GetCostEx(); @@ -334,8 +334,8 @@ bool Compiler::optCSEcostCmpEx::operator()(const CSEdsc* dsc1, const CSEdsc* dsc /* static */ bool Compiler::optCSEcostCmpSz::operator()(const CSEdsc* dsc1, const CSEdsc* dsc2) { - GenTree* exp1 = dsc1->csdTree; - GenTree* exp2 = dsc2->csdTree; + GenTree* exp1 = dsc1->csdTreeList.tslTree; + GenTree* exp2 = dsc2->csdTreeList.tslTree; auto expCost1 = exp1->GetCostSz(); auto expCost2 = exp2->GetCostSz(); @@ -434,7 +434,7 @@ void CSEdsc::ComputeNumLocals(Compiler* compiler) }; LocalCountingVisitor lcv(compiler); - lcv.WalkTree(&csdTree, nullptr); + lcv.WalkTree(&csdTreeList.tslTree, nullptr); numDistinctLocals = lcv.m_count; numLocalOccurrences = lcv.m_occurrences; @@ -615,99 +615,85 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) for (hashDsc = optCSEhash[hval]; hashDsc; hashDsc = hashDsc->csdNextInBucket) { - if (hashDsc->csdHashKey == key) + if (hashDsc->csdHashKey != key) { - // Check for mismatched types on GT_CNS_INT nodes - if ((tree->OperGet() == GT_CNS_INT) && (tree->TypeGet() != hashDsc->csdTree->TypeGet())) - { - continue; - } + continue; + } + + assert(hashDsc->csdTreeList.tslTree != nullptr); - treeStmtLst* newElem; + // Check for mismatched types on GT_CNS_INT nodes + if (tree->OperIs(GT_CNS_INT) && (tree->TypeGet() != hashDsc->csdTreeList.tslTree->TypeGet())) + { + continue; + } - // Have we started the list of matching nodes? + // Have we started the list of matching nodes? - if (hashDsc->csdTreeList == nullptr) + if (hashDsc->csdTreeList.tslNext == nullptr) + { + // This is the second time we see this value. Handle cases + // where the first value dominates the second one and we can + // already prove that the first one is _not_ going to be a + // valid def for the second one, due to the second one having + // more exceptions. This happens for example in code like + // CASTCLASS(x, y) where the "CASTCLASS" just adds exceptions + // on top of "x". In those cases it is always better to let the + // second value be the def. + // It also happens for GT_COMMA, but that one is special cased + // above; this handling is a less special-casey version of the + // GT_COMMA handling above. However, it is quite limited since + // it only handles the def/use being in the same block. + if (compCurBB == hashDsc->csdTreeList.tslBlock) { - // This is the second time we see this value. Handle cases - // where the first value dominates the second one and we can - // already prove that the first one is _not_ going to be a - // valid def for the second one, due to the second one having - // more exceptions. This happens for example in code like - // CASTCLASS(x, y) where the "CASTCLASS" just adds exceptions - // on top of "x". In those cases it is always better to let the - // second value be the def. - // It also happens for GT_COMMA, but that one is special cased - // above; this handling is a less special-casey version of the - // GT_COMMA handling above. However, it is quite limited since - // it only handles the def/use being in the same block. - if (compCurBB == hashDsc->csdBlock) + GenTree* prevTree = hashDsc->csdTreeList.tslTree; + ValueNum prevVnLib = prevTree->GetVN(VNK_Liberal); + if (prevVnLib != vnLib) { - GenTree* prevTree = hashDsc->csdTree; - ValueNum prevVnLib = prevTree->GetVN(VNK_Liberal); - if (prevVnLib != vnLib) + ValueNum prevExceptionSet = vnStore->VNExceptionSet(prevVnLib); + ValueNum curExceptionSet = vnStore->VNExceptionSet(vnLib); + if ((prevExceptionSet != curExceptionSet) && + vnStore->VNExcIsSubset(curExceptionSet, prevExceptionSet)) { - ValueNum prevExceptionSet = vnStore->VNExceptionSet(prevVnLib); - ValueNum curExceptionSet = vnStore->VNExceptionSet(vnLib); - if ((prevExceptionSet != curExceptionSet) && - vnStore->VNExcIsSubset(curExceptionSet, prevExceptionSet)) - { - JITDUMP("Skipping CSE candidate for tree [%06u]; tree [%06u] is a better candidate with " - "more exceptions\n", - prevTree->gtTreeID, tree->gtTreeID); - prevTree->gtCSEnum = 0; - hashDsc->csdStmt = stmt; - hashDsc->csdTree = tree; - tree->gtCSEnum = (signed char)hashDsc->csdIndex; - return hashDsc->csdIndex; - } + JITDUMP("Skipping CSE candidate for tree [%06u]; tree [%06u] is a better candidate with " + "more exceptions\n", + prevTree->gtTreeID, tree->gtTreeID); + prevTree->gtCSEnum = 0; + hashDsc->csdTreeList.tslStmt = stmt; + hashDsc->csdTreeList.tslTree = tree; + tree->gtCSEnum = (signed char)hashDsc->csdIndex; + return hashDsc->csdIndex; } } - - // Create the new element based upon the matching hashDsc element. - - newElem = new (this, CMK_TreeStatementList) treeStmtLst; - - newElem->tslTree = hashDsc->csdTree; - newElem->tslStmt = hashDsc->csdStmt; - newElem->tslBlock = hashDsc->csdBlock; - newElem->tslNext = nullptr; - - /* Start the list with the first CSE candidate recorded */ - - hashDsc->csdTreeList = newElem; - hashDsc->csdTreeLast = newElem; - - hashDsc->csdIsSharedConst = isSharedConst; } - noway_assert(hashDsc->csdTreeList); - - /* Append this expression to the end of the list */ + hashDsc->csdIsSharedConst = isSharedConst; + } - newElem = new (this, CMK_TreeStatementList) treeStmtLst; + // Append this expression to the end of the list - newElem->tslTree = tree; - newElem->tslStmt = stmt; - newElem->tslBlock = compCurBB; - newElem->tslNext = nullptr; + treeStmtLst* newElem = new (this, CMK_TreeStatementList) treeStmtLst; - hashDsc->csdTreeLast->tslNext = newElem; - hashDsc->csdTreeLast = newElem; + newElem->tslTree = tree; + newElem->tslStmt = stmt; + newElem->tslBlock = compCurBB; + newElem->tslNext = nullptr; - optDoCSE = true; // Found a duplicate CSE tree + hashDsc->csdTreeLast->tslNext = newElem; + hashDsc->csdTreeLast = newElem; - /* Have we assigned a CSE index? */ - if (hashDsc->csdIndex == 0) - { - newCSE = true; - break; - } + optDoCSE = true; // Found a duplicate CSE tree - assert(FitsIn(hashDsc->csdIndex)); - tree->gtCSEnum = ((signed char)hashDsc->csdIndex); - return hashDsc->csdIndex; + /* Have we assigned a CSE index? */ + if (hashDsc->csdIndex == 0) + { + newCSE = true; + break; } + + assert(FitsIn(hashDsc->csdIndex)); + tree->gtCSEnum = ((signed char)hashDsc->csdIndex); + return hashDsc->csdIndex; } if (!newCSE) @@ -763,10 +749,12 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->defExcSetPromise = vnStore->VNForEmptyExcSet(); hashDsc->defExcSetCurrent = vnStore->VNForNull(); // uninit value - hashDsc->csdTree = tree; - hashDsc->csdStmt = stmt; - hashDsc->csdBlock = compCurBB; - hashDsc->csdTreeList = nullptr; + hashDsc->csdTreeList.tslTree = tree; + hashDsc->csdTreeList.tslStmt = stmt; + hashDsc->csdTreeList.tslBlock = compCurBB; + hashDsc->csdTreeList.tslNext = nullptr; + + hashDsc->csdTreeLast = &hashDsc->csdTreeList; /* Append the entry to the hash bucket */ @@ -801,11 +789,11 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdIndex = CSEindex; /* Update the gtCSEnum field in the original tree */ - noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0); + noway_assert(hashDsc->csdTreeList.tslTree->gtCSEnum == 0); assert(FitsIn(CSEindex)); - hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char)CSEindex); - noway_assert(((unsigned)hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex); + hashDsc->csdTreeList.tslTree->gtCSEnum = ((signed char)CSEindex); + noway_assert(((unsigned)hashDsc->csdTreeList.tslTree->gtCSEnum) == CSEindex); tree->gtCSEnum = ((signed char)CSEindex); @@ -975,7 +963,7 @@ void Compiler::optValnumCSE_InitDataFlow() { CSEdsc* dsc = optCSEtab[inx]; unsigned CSEindex = dsc->csdIndex; - treeStmtLst* lst = dsc->csdTreeList; + treeStmtLst* lst = &dsc->csdTreeList; noway_assert(lst); while (lst != nullptr) @@ -999,6 +987,11 @@ void Compiler::optValnumCSE_InitDataFlow() } } + if (compIsAsync()) + { + optValnumCSE_SetUpAsyncByrefKills(); + } + for (BasicBlock* const block : Blocks()) { // If the block doesn't contains a call then skip it... @@ -1037,7 +1030,7 @@ void Compiler::optValnumCSE_InitDataFlow() unsigned cseAvailCrossCallBit = getCSEAvailCrossCallBit(CSEnum); BitVecOps::AddElemD(cseLivenessTraits, block->bbCseGen, cseAvailCrossCallBit); } - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { // Any cse's that we haven't placed in the block->bbCseGen set // aren't currently alive (using cseAvailCrossCallBit) @@ -1082,6 +1075,112 @@ void Compiler::optValnumCSE_InitDataFlow() #endif // DEBUG } +//--------------------------------------------------------------------------- +// optValnumCSE_SetUpAsyncByrefKills: +// Compute kills because of async calls requiring byrefs not to be live +// across them. +// +void Compiler::optValnumCSE_SetUpAsyncByrefKills() +{ + bool anyAsyncKills = false; + cseAsyncKillsMask = BitVecOps::MakeFull(cseLivenessTraits); + for (unsigned inx = 1; inx <= optCSECandidateCount; inx++) + { + CSEdsc* dsc = optCSEtab[inx - 1]; + assert(dsc->csdIndex == inx); + bool isByRef = false; + if (dsc->csdTreeList.tslTree->TypeIs(TYP_BYREF)) + { + isByRef = true; + } + else if (dsc->csdTreeList.tslTree->TypeIs(TYP_STRUCT)) + { + ClassLayout* layout = dsc->csdTreeList.tslTree->GetLayout(this); + isByRef = layout->HasGCByRef(); + } + + if (isByRef) + { + // We generate a bit pattern like: 1111111100111100 where there + // are 0s only for the byref CSEs. + BitVecOps::RemoveElemD(cseLivenessTraits, cseAsyncKillsMask, getCSEAvailBit(inx)); + BitVecOps::RemoveElemD(cseLivenessTraits, cseAsyncKillsMask, getCSEAvailCrossCallBit(inx)); + anyAsyncKills = true; + } + } + + if (!anyAsyncKills) + { + return; + } + + for (BasicBlock* block : Blocks()) + { + Statement* asyncCallStmt = nullptr; + GenTree* asyncCall = nullptr; + // Find last async call in block + Statement* stmt = block->lastStmt(); + if (stmt == nullptr) + { + continue; + } + + while (asyncCall == nullptr) + { + if ((stmt->GetRootNode()->gtFlags & GTF_CALL) != 0) + { + for (GenTree* tree = stmt->GetRootNode(); tree != nullptr; tree = tree->gtPrev) + { + if (tree->IsCall() && tree->AsCall()->IsAsync()) + { + asyncCallStmt = stmt; + asyncCall = tree; + break; + } + } + } + + if (stmt == block->firstStmt()) + break; + + stmt = stmt->GetPrevStmt(); + } + + if (asyncCall == nullptr) + { + continue; + } + + // This block has a suspension point. Make all BYREF CSEs unavailable. + BitVecOps::IntersectionD(cseLivenessTraits, block->bbCseGen, cseAsyncKillsMask); + BitVecOps::IntersectionD(cseLivenessTraits, block->bbCseOut, cseAsyncKillsMask); + + // Now make all byref CSEs after the suspension point available. + Statement* curStmt = asyncCallStmt; + GenTree* curTree = asyncCall; + while (true) + { + do + { + if (IS_CSE_INDEX(curTree->gtCSEnum)) + { + unsigned CSEnum = GET_CSE_INDEX(curTree->gtCSEnum); + BitVecOps::AddElemD(cseLivenessTraits, block->bbCseGen, getCSEAvailBit(CSEnum)); + BitVecOps::AddElemD(cseLivenessTraits, block->bbCseOut, getCSEAvailBit(CSEnum)); + } + + curTree = curTree->gtNext; + } while (curTree != nullptr); + + curStmt = curStmt->GetNextStmt(); + if (curStmt == nullptr) + break; + + curTree = curStmt->GetTreeList(); + } + } +} + /***************************************************************************** * * CSE Dataflow, so that all helper methods for dataflow are in a single place @@ -1420,6 +1519,49 @@ void Compiler::optValnumCSE_Availability() // This is the first time visited, so record this defs exception set desc->defExcSetCurrent = theLiberalExcSet; } + else if (desc->defExcSetCurrent != theLiberalExcSet) + { + // We will change the value of desc->defExcSetCurrent to be the intersection of + // these two sets. + // This is the set of exceptions that all CSE defs have (that we have visited so + // far) + // + ValueNum intersectionExcSet = + vnStore->VNExcSetIntersection(desc->defExcSetCurrent, theLiberalExcSet); +#ifdef DEBUG + if (this->verbose) + { + VNFuncApp excSeq; + + vnStore->GetVNFunc(desc->defExcSetCurrent, &excSeq); + printf(">>> defExcSetCurrent is "); + vnStore->vnDumpExcSeq(this, &excSeq, true); + printf("\n"); + + vnStore->GetVNFunc(theLiberalExcSet, &excSeq); + printf(">>> theLiberalExcSet is "); + vnStore->vnDumpExcSeq(this, &excSeq, true); + printf("\n"); + + if (intersectionExcSet == vnStore->VNForEmptyExcSet()) + { + printf(">>> the intersectionExcSet is the EmptyExcSet\n"); + } + else + { + vnStore->GetVNFunc(intersectionExcSet, &excSeq); + printf(">>> the intersectionExcSet is "); + vnStore->vnDumpExcSeq(this, &excSeq, true); + printf("\n"); + } + } +#endif // DEBUG + + // Change the defExcSetCurrent to be a subset of its prior value + // + assert(vnStore->VNExcIsSubset(desc->defExcSetCurrent, intersectionExcSet)); + desc->defExcSetCurrent = intersectionExcSet; + } // Have we seen a CSE use and made a promise of an exception set? // @@ -1432,51 +1574,6 @@ void Compiler::optValnumCSE_Availability() // This new def still satisfies any promise made to all the CSE uses that we have // encountered // - - // no update is needed when these are the same VN - if (desc->defExcSetCurrent != theLiberalExcSet) - { - // We will change the value of desc->defExcSetCurrent to be the intersection of - // these two sets. - // This is the set of exceptions that all CSE defs have (that we have visited so - // far) - // - ValueNum intersectionExcSet = - vnStore->VNExcSetIntersection(desc->defExcSetCurrent, theLiberalExcSet); -#ifdef DEBUG - if (this->verbose) - { - VNFuncApp excSeq; - - vnStore->GetVNFunc(desc->defExcSetCurrent, &excSeq); - printf(">>> defExcSetCurrent is "); - vnStore->vnDumpExcSeq(this, &excSeq, true); - printf("\n"); - - vnStore->GetVNFunc(theLiberalExcSet, &excSeq); - printf(">>> theLiberalExcSet is "); - vnStore->vnDumpExcSeq(this, &excSeq, true); - printf("\n"); - - if (intersectionExcSet == vnStore->VNForEmptyExcSet()) - { - printf(">>> the intersectionExcSet is the EmptyExcSet\n"); - } - else - { - vnStore->GetVNFunc(intersectionExcSet, &excSeq); - printf(">>> the intersectionExcSet is "); - vnStore->vnDumpExcSeq(this, &excSeq, true); - printf("\n"); - } - } -#endif // DEBUG - - // Change the defExcSetCurrent to be a subset of its prior value - // - assert(vnStore->VNExcIsSubset(desc->defExcSetCurrent, intersectionExcSet)); - desc->defExcSetCurrent = intersectionExcSet; - } } else // This CSE def doesn't satisfy one of the exceptions already promised to a CSE use { @@ -1577,7 +1674,7 @@ void Compiler::optValnumCSE_Availability() // kill all of the cseAvailCrossCallBit for each CSE whenever we see a GT_CALL (unless the call // generates a CSE). // - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { // Check for the common case of an already empty available_cses set // and thus nothing needs to be killed @@ -1595,6 +1692,12 @@ void Compiler::optValnumCSE_Availability() // BitVecOps::IntersectionD(cseLivenessTraits, available_cses, cseCallKillsMask); + // In async state machines, make all byref CSEs unavailable after suspension points. + if (tree->AsCall()->IsAsync() && compIsAsync()) + { + BitVecOps::IntersectionD(cseLivenessTraits, available_cses, cseAsyncKillsMask); + } + if (isDef) { // We can have a GT_CALL that produces a CSE, @@ -1773,7 +1876,7 @@ bool CSE_HeuristicCommon::CanConsiderTree(GenTree* tree, bool isReturn) "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing the second would not allow it */ - if (tree->AsOp()->gtOp1->gtOper == GT_ARR_ELEM) + if (tree->AsOp()->gtOp1->OperIs(GT_ARR_ELEM)) { return false; } @@ -1908,6 +2011,7 @@ bool CSE_HeuristicCommon::CanConsiderTree(GenTree* tree, bool isReturn) case GT_COLON: case GT_QMARK: case GT_NOP: + case GT_GCPOLL: case GT_RETURN: return false; // Currently the only special nodes that we hit // that we know that we don't want to CSE @@ -2412,14 +2516,14 @@ void CSE_HeuristicParameterized::GetFeatures(CSEdsc* cse, double* features) return; } - const unsigned char costEx = cse->csdTree->GetCostEx(); + const unsigned char costEx = cse->csdTreeList.tslTree->GetCostEx(); const double deMinimis = 1e-3; const double deMinimusAdj = -log(deMinimis); features[0] = costEx; features[1] = deMinimusAdj + log(max(deMinimis, cse->csdUseWtCnt)); features[2] = deMinimusAdj + log(max(deMinimis, cse->csdDefWtCnt)); - features[3] = cse->csdTree->GetCostSz(); + features[3] = cse->csdTreeList.tslTree->GetCostSz(); features[4] = cse->csdUseCount; features[5] = cse->csdDefCount; @@ -2429,9 +2533,9 @@ void CSE_HeuristicParameterized::GetFeatures(CSEdsc* cse, double* features) const bool isLiveAcrossCall = cse->csdLiveAcrossCall; features[6] = booleanScale * isLiveAcrossCall; - features[7] = booleanScale * varTypeUsesIntReg(cse->csdTree->TypeGet()); + features[7] = booleanScale * varTypeUsesIntReg(cse->csdTreeList.tslTree->TypeGet()); - const bool isConstant = cse->csdTree->OperIsConst(); + const bool isConstant = cse->csdTreeList.tslTree->OperIsConst(); const bool isSharedConstant = cse->csdIsSharedConst; features[8] = booleanScale * (isConstant & !isSharedConstant); @@ -2457,7 +2561,7 @@ void CSE_HeuristicParameterized::GetFeatures(CSEdsc* cse, double* features) unsigned maxPostorderNum = 0; BasicBlock* minPostorderBlock = nullptr; BasicBlock* maxPostorderBlock = nullptr; - for (treeStmtLst* treeList = cse->csdTreeList; treeList != nullptr; treeList = treeList->tslNext) + for (treeStmtLst* treeList = &cse->csdTreeList; treeList != nullptr; treeList = treeList->tslNext) { BasicBlock* const treeBlock = treeList->tslBlock; unsigned postorderNum = treeBlock->bbPostorderNum; @@ -2486,12 +2590,12 @@ void CSE_HeuristicParameterized::GetFeatures(CSEdsc* cse, double* features) // More // - features[17] = booleanScale * ((cse->csdTree->gtFlags & GTF_CALL) != 0); + features[17] = booleanScale * ((cse->csdTreeList.tslTree->gtFlags & GTF_CALL) != 0); features[18] = deMinimusAdj + log(max(deMinimis, cse->csdUseCount * cse->csdUseWtCnt)); features[19] = deMinimusAdj + log(max(deMinimis, cse->numLocalOccurrences * cse->csdUseWtCnt)); features[20] = booleanScale * ((double)(blockSpread) / numBBs); - const bool isContainable = cse->csdTree->OperIs(GT_ADD, GT_NOT, GT_MUL, GT_LSH); + const bool isContainable = cse->csdTreeList.tslTree->OperIs(GT_ADD, GT_NOT, GT_MUL, GT_LSH); features[21] = booleanScale * isContainable; features[22] = booleanScale * (isContainable && isLowCost); @@ -3056,7 +3160,7 @@ void CSE_HeuristicRLHook::GetFeatures(CSEdsc* cse, int* features) unsigned maxPostorderNum = 0; BasicBlock* minPostorderBlock = nullptr; BasicBlock* maxPostorderBlock = nullptr; - for (treeStmtLst* treeList = cse->csdTreeList; treeList != nullptr; treeList = treeList->tslNext) + for (treeStmtLst* treeList = &cse->csdTreeList; treeList != nullptr; treeList = treeList->tslNext) { BasicBlock* const treeBlock = treeList->tslBlock; unsigned postorderNum = treeBlock->bbPostorderNum; @@ -3116,13 +3220,13 @@ void CSE_HeuristicRLHook::GetFeatures(CSEdsc* cse, int* features) features[i++] = type; features[i++] = cse->IsViable() ? 1 : 0; features[i++] = cse->csdLiveAcrossCall ? 1 : 0; - features[i++] = cse->csdTree->OperIsConst() ? 1 : 0; + features[i++] = cse->csdTreeList.tslTree->OperIsConst() ? 1 : 0; features[i++] = cse->csdIsSharedConst ? 1 : 0; features[i++] = isMakeCse ? 1 : 0; - features[i++] = ((cse->csdTree->gtFlags & GTF_CALL) != 0) ? 1 : 0; - features[i++] = cse->csdTree->OperIs(GT_ADD, GT_NOT, GT_MUL, GT_LSH) ? 1 : 0; - features[i++] = cse->csdTree->GetCostEx(); - features[i++] = cse->csdTree->GetCostSz(); + features[i++] = ((cse->csdTreeList.tslTree->gtFlags & GTF_CALL) != 0) ? 1 : 0; + features[i++] = cse->csdTreeList.tslTree->OperIs(GT_ADD, GT_NOT, GT_MUL, GT_LSH) ? 1 : 0; + features[i++] = cse->csdTreeList.tslTree->GetCostEx(); + features[i++] = cse->csdTreeList.tslTree->GetCostSz(); features[i++] = cse->csdUseCount; features[i++] = cse->csdDefCount; features[i++] = (int)cse->csdUseWtCnt; @@ -3933,7 +4037,7 @@ void CSE_Heuristic::Initialize() if (onStack) { - frameSize += m_pCompiler->lvaLclSize(lclNum); + frameSize += m_pCompiler->lvaLclStackHomeSize(lclNum); } else { @@ -4155,7 +4259,7 @@ void CSE_Heuristic::SortCandidates() for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++) { CSEdsc* dsc = sortTab[cnt]; - GenTree* expr = dsc->csdTree; + GenTree* expr = dsc->csdTreeList.tslTree; weight_t def; weight_t use; @@ -4165,13 +4269,13 @@ void CSE_Heuristic::SortCandidates() { def = dsc->csdDefCount; // def count use = dsc->csdUseCount; // use count (excluding the implicit uses at defs) - cost = dsc->csdTree->GetCostSz(); + cost = dsc->csdTreeList.tslTree->GetCostSz(); } else { def = dsc->csdDefWtCnt; // weighted def count use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) - cost = dsc->csdTree->GetCostEx(); + cost = dsc->csdTreeList.tslTree->GetCostEx(); } if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey)) @@ -4724,7 +4828,7 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) ValueNum bestVN = ValueNumStore::NoVN; bool bestIsDef = false; ssize_t bestConstValue = 0; - treeStmtLst* lst = dsc->csdTreeList; + treeStmtLst* lst = &dsc->csdTreeList; while (lst != nullptr) { @@ -4815,7 +4919,7 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) } else // !isSharedConst { - lst = dsc->csdTreeList; + lst = &dsc->csdTreeList; GenTree* firstTree = lst->tslTree; printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", m_pCompiler->info.compFullName, GenTree::OpName(firstTree->OperGet()), varTypeName(firstTree->TypeGet())); @@ -4840,7 +4944,7 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) ArrayStack defUses(m_pCompiler->getAllocator(CMK_CSE)); // First process the defs. - for (lst = dsc->csdTreeList; lst != nullptr; lst = lst->tslNext) + for (lst = &dsc->csdTreeList; lst != nullptr; lst = lst->tslNext) { GenTree* const exp = lst->tslTree; Statement* const stmt = lst->tslStmt; @@ -4949,7 +5053,7 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) } // Now process the actual uses. - for (lst = dsc->csdTreeList; lst != nullptr; lst = lst->tslNext) + for (lst = &dsc->csdTreeList; lst != nullptr; lst = lst->tslNext) { GenTree* const exp = lst->tslTree; Statement* const stmt = lst->tslStmt; diff --git a/src/coreclr/jit/optcse.h b/src/coreclr/jit/optcse.h index b6d9cda3b927..a6afa85c87cb 100644 --- a/src/coreclr/jit/optcse.h +++ b/src/coreclr/jit/optcse.h @@ -362,11 +362,7 @@ struct CSEdsc weight_t csdDefWtCnt; // weighted def count weight_t csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) - GenTree* csdTree; // treenode containing the 1st occurrence - Statement* csdStmt; // stmt containing the 1st occurrence - BasicBlock* csdBlock; // block containing the 1st occurrence - - treeStmtLst* csdTreeList; // list of matching tree nodes: head + treeStmtLst csdTreeList; // list of matching tree nodes: head treeStmtLst* csdTreeLast; // list of matching tree nodes: tail // The exception set that is now required for all defs of this CSE. @@ -500,7 +496,7 @@ class CSE_Candidate // TODO-CQ: With ValNum CSE's the Expr and its cost can vary. GenTree* Expr() { - return m_CseDsc->csdTree; + return m_CseDsc->csdTreeList.tslTree; } unsigned Cost() { diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 4a6057e122d0..c748aa5b7394 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -67,20 +67,17 @@ class OptBoolsDsc { m_b1 = b1; m_b2 = b2; - m_b3 = nullptr; m_comp = comp; } private: BasicBlock* m_b1; // The first basic block with the BBJ_COND conditional jump type - BasicBlock* m_b2; // The next basic block of m_b1. Either BBJ_COND or BBJ_RETURN type - BasicBlock* m_b3; // m_b1's target block. Null if m_b2 is not a return block. + BasicBlock* m_b2; // The next basic block of m_b1. BBJ_COND type Compiler* m_comp; // The pointer to the Compiler instance OptTestInfo m_testInfo1; // The first test info OptTestInfo m_testInfo2; // The second test info - GenTree* m_t3; // The root node of the first statement of m_b3 GenTree* m_c1; // The first operand of m_testInfo1.compTree GenTree* m_c2; // The first operand of m_testInfo2.compTree @@ -95,7 +92,6 @@ class OptBoolsDsc bool optOptimizeBoolsCondBlock(); bool optOptimizeCompareChainCondBlock(); bool optOptimizeRangeTests(); - bool optOptimizeBoolsReturnBlock(BasicBlock* b3); #ifdef DEBUG void optOptimizeBoolsGcStress(); #endif @@ -125,16 +121,6 @@ class OptBoolsDsc // B1 : brtrue(t1|t2, BX) // B3 : // -// For example, (x == 0 && y == 0 && z == 0) generates -// B1: GT_JTRUE (BBJ_COND), jump to B4 -// B2: GT_JTRUE (BBJ_COND), jump to B4 -// B3: GT_RETURN/GT_SWIFT_ERROR_RET (BBJ_RETURN) -// B4: GT_RETURN/GT_SWIFT_ERROR_RET (BBJ_RETURN) -// and B1 and B2 are folded into B1: -// B1: GT_JTRUE (BBJ_COND), jump to B4 -// B3: GT_RETURN/GT_SWIFT_ERROR_RET (BBJ_RETURN) -// B4: GT_RETURN/GT_SWIFT_ERROR_RET (BBJ_RETURN) -// // Case 2: if B2->FalseTargetIs(B1->GetTarget()), it transforms // B1 : brtrue(t1, B3) // B2 : brtrue(t2, Bx) @@ -145,12 +131,9 @@ class OptBoolsDsc // bool OptBoolsDsc::optOptimizeBoolsCondBlock() { - assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); + assert(m_b1 != nullptr && m_b2 != nullptr); // Check if m_b1 and m_b2 jump to the same target and get back pointers to m_testInfo1 and t2 tree nodes - - m_t3 = nullptr; - // Check if m_b1 and m_b2 have the same target if (m_b1->TrueTargetIs(m_b2->GetTrueTarget())) @@ -223,11 +206,11 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() if (m_sameTarget) { - if (m_c1->gtOper == GT_LCL_VAR && m_c2->gtOper == GT_LCL_VAR && + if (m_c1->OperIs(GT_LCL_VAR) && m_c2->OperIs(GT_LCL_VAR) && m_c1->AsLclVarCommon()->GetLclNum() == m_c2->AsLclVarCommon()->GetLclNum()) { - if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_EQ) || - (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_LT)) + if ((m_testInfo1.compTree->OperIs(GT_LT) && m_testInfo2.compTree->OperIs(GT_EQ)) || + (m_testInfo1.compTree->OperIs(GT_EQ) && m_testInfo2.compTree->OperIs(GT_LT))) { // Case: t1:c1<0 t2:c1==0 // So we will branch to BX if c1<=0 @@ -236,8 +219,8 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() // So we will branch to BX if c1<=0 cmpOp = GT_LE; } - else if ((m_testInfo1.compTree->gtOper == GT_GT && m_testInfo2.compTree->gtOper == GT_EQ) || - (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_GT)) + else if ((m_testInfo1.compTree->OperIs(GT_GT) && m_testInfo2.compTree->OperIs(GT_EQ)) || + (m_testInfo1.compTree->OperIs(GT_EQ) && m_testInfo2.compTree->OperIs(GT_GT))) { // Case: t1:c1>0 t2:c1==0 // So we will branch to BX if c1>=0 @@ -253,7 +236,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() foldOp = GT_NONE; } - else if (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_EQ) + else if (m_testInfo1.compTree->OperIs(GT_EQ) && m_testInfo2.compTree->OperIs(GT_EQ)) { // t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0 // So we will branch to BX if (c1&c2)==0 @@ -261,7 +244,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() foldOp = GT_AND; cmpOp = GT_EQ; } - else if (m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_LT && + else if (m_testInfo1.compTree->OperIs(GT_LT) && m_testInfo2.compTree->OperIs(GT_LT) && (!m_testInfo1.GetTestOp()->IsUnsigned() && !m_testInfo2.GetTestOp()->IsUnsigned())) { // t1:c1<0 t2:c2<0 ==> Branch to BX if either value < 0 @@ -270,7 +253,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() foldOp = GT_OR; cmpOp = GT_LT; } - else if (m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_NE) + else if (m_testInfo1.compTree->OperIs(GT_NE) && m_testInfo2.compTree->OperIs(GT_NE)) { // t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0 // So we will branch to BX if (c1|c2)!=0 @@ -285,11 +268,11 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() } else { - if (m_c1->gtOper == GT_LCL_VAR && m_c2->gtOper == GT_LCL_VAR && + if (m_c1->OperIs(GT_LCL_VAR) && m_c2->OperIs(GT_LCL_VAR) && m_c1->AsLclVarCommon()->GetLclNum() == m_c2->AsLclVarCommon()->GetLclNum()) { - if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_NE) || - (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_GE)) + if ((m_testInfo1.compTree->OperIs(GT_LT) && m_testInfo2.compTree->OperIs(GT_NE)) || + (m_testInfo1.compTree->OperIs(GT_EQ) && m_testInfo2.compTree->OperIs(GT_GE))) { // Case: t1:c1<0 t2:c1!=0 // So we will branch to BX if c1>0 @@ -298,8 +281,8 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() // So we will branch to BX if c1>0 cmpOp = GT_GT; } - else if ((m_testInfo1.compTree->gtOper == GT_GT && m_testInfo2.compTree->gtOper == GT_NE) || - (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_LE)) + else if ((m_testInfo1.compTree->OperIs(GT_GT) && m_testInfo2.compTree->OperIs(GT_NE)) || + (m_testInfo1.compTree->OperIs(GT_EQ) && m_testInfo2.compTree->OperIs(GT_LE))) { // Case: t1:c1>0 t2:c1!=0 // So we will branch to BX if c1<0 @@ -315,7 +298,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() foldOp = GT_NONE; } - else if (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) + else if (m_testInfo1.compTree->OperIs(GT_EQ) && m_testInfo2.compTree->OperIs(GT_NE)) { // t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0 // So we will branch to BX if (c1&c2)!=0 @@ -323,7 +306,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() foldOp = GT_AND; cmpOp = GT_NE; } - else if (m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE && + else if (m_testInfo1.compTree->OperIs(GT_LT) && m_testInfo2.compTree->OperIs(GT_GE) && (!m_testInfo1.GetTestOp()->IsUnsigned() && !m_testInfo2.GetTestOp()->IsUnsigned())) { // t1:c1<0 t2:c2>=0 ==> Branch to BX if both values >= 0 @@ -332,7 +315,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() foldOp = GT_OR; cmpOp = GT_GE; } - else if (m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) + else if (m_testInfo1.compTree->OperIs(GT_NE) && m_testInfo2.compTree->OperIs(GT_EQ)) { // t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0 // So we will branch to BX if (c1|c2)==0 @@ -751,7 +734,7 @@ bool FoldRangeTests(Compiler* comp, GenTreeOp* cmp1, bool cmp1IsReversed, GenTre bool OptBoolsDsc::optOptimizeRangeTests() { // At this point we have two consecutive conditional blocks (BBJ_COND): m_b1 and m_b2 - assert((m_b1 != nullptr) && (m_b2 != nullptr) && (m_b3 == nullptr)); + assert((m_b1 != nullptr) && (m_b2 != nullptr)); assert(m_b1->KindIs(BBJ_COND) && m_b2->KindIs(BBJ_COND) && m_b1->FalseTargetIs(m_b2)); if (m_b2->isRunRarely()) @@ -962,8 +945,7 @@ bool OptBoolsDsc::optOptimizeRangeTests() // bool OptBoolsDsc::optOptimizeCompareChainCondBlock() { - assert((m_b1 != nullptr) && (m_b2 != nullptr) && (m_b3 == nullptr)); - m_t3 = nullptr; + assert((m_b1 != nullptr) && (m_b2 != nullptr)); bool foundEndOfOrConditions = false; if (m_b1->FalseTargetIs(m_b2) && m_b2->FalseTargetIs(m_b1->GetTrueTarget())) @@ -1121,15 +1103,9 @@ Statement* OptBoolsDsc::optOptimizeBoolsChkBlkCond() { assert(m_b1 != nullptr && m_b2 != nullptr); - bool optReturnBlock = false; - if (m_b3 != nullptr) - { - optReturnBlock = true; - } - // Find the block conditions of m_b1 and m_b2 - if (m_b2->countOfInEdges() > 1 || (optReturnBlock && m_b3->countOfInEdges() > 1)) + if (m_b2->countOfInEdges() > 1) { return nullptr; } @@ -1150,49 +1126,7 @@ Statement* OptBoolsDsc::optOptimizeBoolsChkBlkCond() } GenTree* testTree2 = s2->GetRootNode(); - - if (!optReturnBlock) - { - assert(testTree2->OperIs(GT_JTRUE)); - } - else - { - if (!testTree2->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) - { - return nullptr; - } - - Statement* s3 = m_b3->firstStmt(); - if (s3->GetPrevStmt() != s3) - { - return nullptr; - } - - GenTree* testTree3 = s3->GetRootNode(); - if (!testTree3->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) - { - return nullptr; - } - - if (!varTypeIsIntegral(testTree2->TypeGet()) || !varTypeIsIntegral(testTree3->TypeGet())) - { - return nullptr; - } - - // The third block is Return with "CNS_INT int 0/1" - GenTree* const retVal = testTree3->AsOp()->GetReturnValue(); - if (!retVal->OperIs(GT_CNS_INT)) - { - return nullptr; - } - - if (!retVal->TypeIs(TYP_INT)) - { - return nullptr; - } - - m_t3 = testTree3; - } + assert(testTree2->OperIs(GT_JTRUE)); m_testInfo1.testStmt = s1; m_testInfo1.testTree = testTree1; @@ -1263,13 +1197,6 @@ bool OptBoolsDsc::optOptimizeBoolsChkTypeCostCond() void OptBoolsDsc::optOptimizeBoolsUpdateTrees() { assert(m_b1 != nullptr && m_b2 != nullptr); - - bool optReturnBlock = false; - if (m_b3 != nullptr) - { - optReturnBlock = true; - } - assert(m_cmpOp != GT_NONE && m_c1 != nullptr && m_c2 != nullptr); GenTree* cmpOp1 = m_foldOp == GT_NONE ? m_c1 : m_comp->gtNewOperNode(m_foldOp, m_foldType, m_c1, m_c2); @@ -1278,17 +1205,6 @@ void OptBoolsDsc::optOptimizeBoolsUpdateTrees() t1Comp->SetOper(m_cmpOp); t1Comp->AsOp()->gtOp1 = cmpOp1; t1Comp->AsOp()->gtOp2->gtType = m_foldType; // Could have been varTypeIsGC() - if (optReturnBlock) - { - // Update tree when m_b1 is BBJ_COND and m_b2 and m_b3 are GT_RETURN/GT_SWIFT_ERROR_RET (BBJ_RETURN) - t1Comp->AsOp()->gtOp2->AsIntCon()->gtIconVal = 0; - m_testInfo1.testTree->gtOper = m_testInfo2.testTree->OperGet(); - m_testInfo1.testTree->gtType = m_testInfo2.testTree->TypeGet(); - - // Update the return count of flow graph - assert(m_comp->fgReturnCount >= 2); - --m_comp->fgReturnCount; - } // Recost/rethread the tree if necessary // @@ -1300,15 +1216,6 @@ void OptBoolsDsc::optOptimizeBoolsUpdateTrees() /* Modify the target of the conditional jump and update bbRefs and bbPreds */ - if (optReturnBlock) - { - assert(m_b1->KindIs(BBJ_COND)); - assert(m_b2->KindIs(BBJ_RETURN)); - assert(m_b1->FalseTargetIs(m_b2)); - assert(m_b3 != nullptr); - m_b1->SetKindAndTargetEdge(BBJ_RETURN); - } - else { // Modify b1, if necessary, so it has the same // true target as b2. @@ -1386,253 +1293,8 @@ void OptBoolsDsc::optOptimizeBoolsUpdateTrees() // If m_b2 was the last block of a try or handler, update the EH table. m_comp->ehUpdateForDeletedBlock(m_b2); - if (optReturnBlock) - { - // Get rid of the third block - m_comp->fgUnlinkBlockForRemoval(m_b3); - m_b3->SetFlags(BBF_REMOVED); - // If m_b3 was the last block of a try or handler, update the EH table. - m_comp->ehUpdateForDeletedBlock(m_b3); - } - // Update IL range of first block - m_b1->bbCodeOffsEnd = optReturnBlock ? m_b3->bbCodeOffsEnd : m_b2->bbCodeOffsEnd; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsReturnBlock: Optimize boolean when m_b1 is BBJ_COND and m_b2 and m_b3 are BBJ_RETURN -// -// Arguments: -// b3: Pointer to basic block b3 -// -// Returns: -// true if boolean optimization is done and m_b1, m_b2 and m_b3 are folded into m_b1, else false. -// -// Notes: -// m_b1, m_b2 and m_b3 of OptBoolsDsc are set on entry. -// -// if B1->TargetIs(b3), it transforms -// B1 : brtrue(t1, B3) -// B2 : ret(t2) -// B3 : ret(0) -// to -// B1 : ret((!t1) && t2) -// -// For example, (x==0 && y==0) generates: -// B1: GT_JTRUE (BBJ_COND), jumps to B3 -// B2: GT_RETURN/GT_SWIFT_ERROR (BBJ_RETURN) -// B3: GT_RETURN/GT_SWIFT_ERROR (BBJ_RETURN), -// and it is folded into -// B1: GT_RETURN/GT_SWIFT_ERROR (BBJ_RETURN) -// -bool OptBoolsDsc::optOptimizeBoolsReturnBlock(BasicBlock* b3) -{ - assert(m_b1 != nullptr && m_b2 != nullptr); - - // m_b3 is set for cond/return/return case - m_b3 = b3; - - m_sameTarget = false; - Statement* const s1 = optOptimizeBoolsChkBlkCond(); - if (s1 == nullptr) - { - return false; - } - - // Find the branch conditions of m_b1 and m_b2 - - m_c1 = optIsBoolComp(&m_testInfo1); - if (m_c1 == nullptr) - { - return false; - } - - m_c2 = optIsBoolComp(&m_testInfo2); - if (m_c2 == nullptr) - { - return false; - } - - // Find the type and cost conditions of m_testInfo1 and m_testInfo2 - - if (!optOptimizeBoolsChkTypeCostCond()) - { - return false; - } - - // Get the fold operator (m_foldOp, e.g., GT_OR/GT_AND) and - // the comparison operator (m_cmpOp, e.g., GT_EQ/GT_NE/GT_GE/GT_LT) - - var_types foldType = genActualType(m_c1->TypeGet()); - if (varTypeIsGC(foldType)) - { - foldType = TYP_I_IMPL; - } - m_foldType = foldType; - - m_foldOp = GT_NONE; - m_cmpOp = GT_NONE; - - genTreeOps foldOp; - genTreeOps cmpOp; - - ssize_t it1val = m_testInfo1.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; - ssize_t it2val = m_testInfo2.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; - ssize_t it3val = m_t3->AsOp()->gtOp1->AsIntCon()->gtIconVal; - - if (m_c1->gtOper == GT_LCL_VAR && m_c2->gtOper == GT_LCL_VAR && - m_c1->AsLclVarCommon()->GetLclNum() == m_c2->AsLclVarCommon()->GetLclNum()) - { - if (((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_EQ) || - (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_LT)) && - it3val == 1) - { - // Case: x < 0 || x == 0 - // t1:c1<0 t2:c2==0 t3:c3==1 - // ==> true if c1<=0 - // - // Case: x == 0 || x < 0 - // t1:c1==0 t2:c2<0 t3:c3==1 - // ==> true if c1 <= 0 - cmpOp = GT_LE; - } - else if (((m_testInfo1.compTree->gtOper == GT_GT && m_testInfo2.compTree->gtOper == GT_EQ) || - (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_GT)) && - it3val == 1) - { - // Case: x > 0 || x == 0 - // t1:c1<0 t2:c2==0 t3:c3==1 - // ==> true if c1>=0 - // - // Case: x == 0 || x > 0 - // t1:c1==0 t2:c2>0 t3:c3==1 - // ==> true if c1 >= 0 - cmpOp = GT_GE; - } - else if (((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_NE) || - (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_GE)) && - it3val == 0) - { - // Case: x >= 0 && x != 0 - // t1:c1<0 t2:c2==0 t3:c3==0 - // ==> true if c1>0 - // - // Case: x != 0 && x >= 0 - // t1:c1==0 t2:c2>=0 t3:c3==0 - // ==> true if c1>0 - cmpOp = GT_GT; - } - else if (((m_testInfo1.compTree->gtOper == GT_GT && m_testInfo2.compTree->gtOper == GT_NE) || - (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_LE)) && - it3val == 0) - { - // Case: x <= 0 && x != 0 - // t1:c1<0 t2:c2==0 t3:c3==0 - // ==> true if c1<0 - // - // Case: x != 0 && x <= 0 - // t1:c1==0 t2:c2<=0 t3:c3==0 - // ==> true if c1<0 - cmpOp = GT_LT; - } - else - { - return false; - } - - foldOp = GT_NONE; - } - else if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) && - (it1val == 0 && it2val == 0 && it3val == 0)) - { - // Case: x == 0 && y == 0 - // t1:c1!=0 t2:c2==0 t3:c3==0 - // ==> true if (c1|c2)==0 - foldOp = GT_OR; - cmpOp = GT_EQ; - } - else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) && - (it1val == 0 && it2val == 0 && it3val == 0)) - { - // Case: x == 1 && y ==1 - // t1:c1!=1 t2:c2==1 t3:c3==0 is reversed from optIsBoolComp() to: t1:c1==0 t2:c2!=0 t3:c3==0 - // ==> true if (c1&c2)!=0 - foldOp = GT_AND; - cmpOp = GT_NE; - } - else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) && - (it1val == 0 && it2val == 0 && it3val == 0) && - (!m_testInfo1.GetTestOp()->IsUnsigned() && !m_testInfo2.GetTestOp()->IsUnsigned())) - { - // Case: x >= 0 && y >= 0 - // t1:c1<0 t2:c2>=0 t3:c3==0 - // ==> true if (c1|c2)>=0 - - foldOp = GT_OR; - cmpOp = GT_GE; - } - else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_EQ) && - (it1val == 0 && it2val == 0 && it3val == 1)) - { - // Case: x == 0 || y == 0 - // t1:c1==0 t2:c2==0 t3:c3==1 - // ==> true if (c1&c2)==0 - foldOp = GT_AND; - cmpOp = GT_EQ; - } - else if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_NE) && - (it1val == 0 && it2val == 0 && it3val == 1)) - { - // Case: x == 1 || y == 1 - // t1:c1==1 t2:c2==1 t3:c3==1 is reversed from optIsBoolComp() to: t1:c1!=0 t2:c2!=0 t3:c3==1 - // ==> true if (c1|c2)!=0 - foldOp = GT_OR; - cmpOp = GT_NE; - } - else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_LT) && - (it1val == 0 && it2val == 0 && it3val == 1) && - (!m_testInfo1.GetTestOp()->IsUnsigned() && !m_testInfo2.GetTestOp()->IsUnsigned())) - { - // Case: x < 0 || y < 0 - // t1:c1<0 t2:c2<0 t3:c3==1 - // ==> true if (c1|c2)<0 - - foldOp = GT_OR; - cmpOp = GT_LT; - } - else - { - // Require NOT operation for operand(s). Do Not fold. - return false; - } - - if ((foldOp == GT_AND || (cmpOp == GT_NE && foldOp != GT_OR)) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) - { - // x == 1 && y == 1: Skip cases where x or y is greater than 1, e.g., x=3, y=1 - // x == 0 || y == 0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 - // x == 1 || y == 1: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 - return false; - } - - m_foldOp = foldOp; - m_cmpOp = cmpOp; - - // Now update the trees - - optOptimizeBoolsUpdateTrees(); - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("Folded %sboolean conditions of " FMT_BB ", " FMT_BB " and " FMT_BB " to :\n", - m_c2->OperIsLeaf() ? "" : "non-leaf ", m_b1->bbNum, m_b2->bbNum, m_b3->bbNum); - m_comp->gtDispStmt(s1); - printf("\n"); - } -#endif - - // Return true to continue the bool optimization for the rest of the BB chain - return true; + m_b1->bbCodeOffsEnd = m_b2->bbCodeOffsEnd; } //----------------------------------------------------------------------------- @@ -1652,7 +1314,7 @@ void OptBoolsDsc::optOptimizeBoolsGcStress() Statement* const stmt = m_b1->lastStmt(); GenTree* const cond = stmt->GetRootNode(); - assert(cond->gtOper == GT_JTRUE); + assert(cond->OperIs(GT_JTRUE)); OptTestInfo test; test.testStmt = stmt; @@ -1680,7 +1342,7 @@ void OptBoolsDsc::optOptimizeBoolsGcStress() // Comparand type is already checked, and we have const int, there is no harm // morphing it into a TYP_I_IMPL. - noway_assert(relop->AsOp()->gtOp2->gtOper == GT_CNS_INT); + noway_assert(relop->AsOp()->gtOp2->OperIs(GT_CNS_INT)); relop->AsOp()->gtOp2->gtType = TYP_I_IMPL; // Recost/rethread the tree if necessary @@ -1737,7 +1399,7 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) GenTree* opr1 = cond->AsOp()->gtOp1; GenTree* opr2 = cond->AsOp()->gtOp2; - if (opr2->gtOper != GT_CNS_INT) + if (!opr2->OperIs(GT_CNS_INT)) { return nullptr; } @@ -1752,7 +1414,7 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) // Is the value a boolean? // We can either have a boolean expression (marked GTF_BOOLEAN) or a constant 0/1. - if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) + if (opr1->OperIs(GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) { pOptTest->isBool = true; } @@ -1918,7 +1580,6 @@ PhaseStatus Compiler::optOptimizeBools() bool change = false; bool retry = false; unsigned numCond = 0; - unsigned numReturn = 0; unsigned numPasses = 0; unsigned stress = false; @@ -1930,6 +1591,11 @@ PhaseStatus Compiler::optOptimizeBools() for (BasicBlock* b1 = fgFirstBB; b1 != nullptr; b1 = retry ? b1 : b1->Next()) { retry = false; + if (b1->KindIs(BBJ_COND) && fgFoldCondToReturnBlock(b1)) + { + change = true; + numCond++; + } // We're only interested in conditional jumps here @@ -1985,32 +1651,23 @@ PhaseStatus Compiler::optOptimizeBools() retry = true; numCond++; } -#endif - } - else if (b2->KindIs(BBJ_RETURN)) - { - // Set b3 to b1 jump destination - BasicBlock* b3 = b1->GetTrueTarget(); - - // b3 must not be marked as BBF_DONT_REMOVE - - if (b3->HasFlag(BBF_DONT_REMOVE)) - { - continue; - } - - // b3 must be RETURN type - - if (!b3->KindIs(BBJ_RETURN)) - { - continue; - } - - if (optBoolsDsc.optOptimizeBoolsReturnBlock(b3)) +#elif defined(TARGET_AMD64) + // todo-xarch-apx: when we have proper CPUID (hardware) support, we can switch the below from an OR + // condition to an AND, for now, `JitConfig.JitEnableApxIfConv` will drive whether the optimization + // trigger or not + // else if ((compOpportunisticallyDependsOn(InstructionSet_APX) || JitConfig.JitEnableApxIfConv()) && + // optBoolsDsc.optOptimizeCompareChainCondBlock()) + else if (JitConfig.EnableApxConditionalChaining() && !optSwitchDetectAndConvert(b1, true) && + optBoolsDsc.optOptimizeCompareChainCondBlock()) { + // The optimization will have merged b1 and b2. Retry the loop so that + // b1 and b2->bbNext can be tested. change = true; - numReturn++; + retry = true; + numCond++; } + +#endif } else { @@ -2022,8 +1679,142 @@ PhaseStatus Compiler::optOptimizeBools() } } while (change); - JITDUMP("\noptimized %u BBJ_COND cases, %u BBJ_RETURN cases in %u passes\n", numCond, numReturn, numPasses); + JITDUMP("\noptimized %u BBJ_COND cases in %u passes\n", numCond, numPasses); - const bool modified = stress || ((numCond + numReturn) > 0); + const bool modified = stress || (numCond > 0); return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } + +//------------------------------------------------------------- +// fgFoldCondToReturnBlock: Folds BBJ_COND into BBJ_RETURN +// This operation is the opposite of what fgDedupReturnComparison does. +// We don't fold such conditionals if both return blocks have multiple predecessors. +// +// Arguments: +// block - the BBJ_COND block to convert into BBJ_RETURN +// +// Returns: +// true if the block was converted into BBJ_RETURN +// +bool Compiler::fgFoldCondToReturnBlock(BasicBlock* block) +{ + bool modified = false; + + assert(block->KindIs(BBJ_COND)); + +#ifdef JIT32_GCENCODER + // JIT32_GCENCODER has a hard limit on the number of epilogues. + return modified; +#endif + + // Early out if the current method is not returning a boolean. + if ((info.compRetType != TYP_UBYTE)) + { + return modified; + } + + // Both edges must be BBJ_RETURN + BasicBlock* const retFalseBb = block->GetFalseTarget(); + BasicBlock* const retTrueBb = block->GetTrueTarget(); + + // We might want to compact BBJ_ALWAYS blocks first, + // but don't compact the conditional block away in the process + if (fgCanCompactBlock(retTrueBb) && !retTrueBb->TargetIs(block)) + { + fgCompactBlock(retTrueBb); + modified = true; + } + // By the time we get to the retFalseBb, it might be removed by fgCompactBlock() + // so we need to check if it is still valid. + if (!retFalseBb->HasFlag(BBF_REMOVED) && fgCanCompactBlock(retFalseBb) && !retFalseBb->TargetIs(block)) + { + fgCompactBlock(retFalseBb); + modified = true; + } + // Same here - bail out if the block is no longer BBJ_COND after compacting. + if (!block->KindIs(BBJ_COND)) + { + return modified; + } + + assert(block->TrueTargetIs(retTrueBb)); + assert(block->FalseTargetIs(retFalseBb)); + if (!retTrueBb->KindIs(BBJ_RETURN) || !retFalseBb->KindIs(BBJ_RETURN) || + !BasicBlock::sameEHRegion(block, retTrueBb) || !BasicBlock::sameEHRegion(block, retFalseBb) || + (retTrueBb == genReturnBB) || (retFalseBb == genReturnBB)) + { + // Both edges must be BBJ_RETURN + return modified; + } + + // The last statement has to be either JTRUE(cond) or JTRUE(comma(cond)), + // but let's be resilient just in case. + assert(block->lastStmt() != nullptr); + GenTree* node = block->lastStmt()->GetRootNode(); + GenTree* cond = node->gtGetOp1(); + if (!cond->OperIsCompare()) + { + return modified; + } + assert(cond->TypeIs(TYP_INT)); + + if ((retTrueBb->GetUniquePred(this) == nullptr) && (retFalseBb->GetUniquePred(this) == nullptr)) + { + // Both return blocks have multiple predecessors - bail out. + // We don't want to introduce a new epilogue. + return modified; + } + + // Is block a BBJ_RETURN(1/0) ? (single statement) + auto isReturnBool = [](const BasicBlock* block, bool value) { + if (block->KindIs(BBJ_RETURN) && block->hasSingleStmt() && (block->lastStmt() != nullptr)) + { + GenTree* node = block->lastStmt()->GetRootNode(); + return node->OperIs(GT_RETURN) && node->gtGetOp1()->IsIntegralConst(value ? 1 : 0); + } + return false; + }; + + // Make sure we deal with true/false return blocks (or false/true) + bool retTrueFalse = isReturnBool(retTrueBb, true) && isReturnBool(retFalseBb, false); + bool retFalseTrue = isReturnBool(retTrueBb, false) && isReturnBool(retFalseBb, true); + if (!retTrueFalse && !retFalseTrue) + { + return modified; + } + + // Reverse the condition if we jump to "return false" on true. + if (retFalseTrue) + { + gtReverseCond(cond); + } + modified = true; + + // Decrease the weight of the return blocks since we no longer have edges to them. + // Although one might still be reachable from other blocks. + if (retTrueBb->hasProfileWeight()) + { + retTrueBb->decreaseBBProfileWeight(block->GetTrueEdge()->getLikelyWeight()); + } + if (retFalseBb->hasProfileWeight()) + { + retFalseBb->decreaseBBProfileWeight(block->GetFalseEdge()->getLikelyWeight()); + } + + // Unlink the return blocks + fgRemoveRefPred(block->GetTrueEdge()); + fgRemoveRefPred(block->GetFalseEdge()); + block->SetKindAndTargetEdge(BBJ_RETURN); + node->ChangeOper(GT_RETURN); + node->ChangeType(TYP_INT); + cond->gtFlags &= ~GTF_RELOP_JMP_USED; + + block->bbCodeOffsEnd = max(retTrueBb->bbCodeOffsEnd, retFalseBb->bbCodeOffsEnd); + gtSetStmtInfo(block->lastStmt()); + fgSetStmtSeq(block->lastStmt()); + gtUpdateStmtSideEffects(block->lastStmt()); + + JITDUMP("fgFoldCondToReturnBlock: folding " FMT_BB " from BBJ_COND into BBJ_RETURN:", block->bbNum); + DISPBLOCK(block) + return modified; +} diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index 1685581b0523..b328e884637e 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -195,7 +195,7 @@ class MaskConversionsCheckVisitor final : public GenTreeVisitorisEmbeddedMaskingCompatibleHWIntrinsic() + // We notably don't check that op2 supported embedded masking directly // because we can still consume the mask directly in such cases. We'll just // emit `vblendmps zmm1 {k1}, zmm2, zmm3` instead of containing the CndSel // as part of something like `vaddps zmm1 {k1}, zmm2, zmm3` @@ -383,7 +383,7 @@ class MaskConversionsUpdateVisitor final : public GenTreeVisitorDumpTotalWeight(); // Fix up the type of the lcl and the lclvar. - assert(lclOp->gtType != TYP_MASK); + assert(!lclOp->TypeIs(TYP_MASK)); var_types lclOrigType = lclOp->gtType; lclOp->gtType = TYP_MASK; diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 8541db6c512e..86ddd21c0595 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -41,14 +41,14 @@ PhaseStatus Compiler::optSetBlockWeights() { noway_assert(opts.OptimizationEnabled()); assert(m_dfsTree != nullptr); - const bool usingProfileWeights = fgIsUsingProfileWeights(); + + // Leave breadcrumb for loop alignment + fgHasLoops = m_dfsTree->HasCycle(); // Rely on profile synthesis to propagate weights when we have PGO data. // TODO: Replace optSetBlockWeights with profile synthesis entirely. - if (usingProfileWeights) + if (fgIsUsingProfileWeights()) { - // Leave breadcrumb for loop alignment - fgHasLoops = m_dfsTree->HasCycle(); return PhaseStatus::MODIFIED_NOTHING; } @@ -63,11 +63,10 @@ PhaseStatus Compiler::optSetBlockWeights() m_reachabilitySets = BlockReachabilitySets::Build(m_dfsTree); } - if (m_dfsTree->HasCycle()) + for (FlowGraphNaturalLoop* const loop : m_loops->InReversePostOrder()) { - madeChanges = fgRenumberBlocks(); - optMarkLoopHeads(); - optFindAndScaleGeneralLoopBlocks(); + optScaleLoopBlocks(loop); + madeChanges = true; } bool firstBBDominatesAllReturns = true; @@ -108,7 +107,7 @@ PhaseStatus Compiler::optSetBlockWeights() block->bbSetRunRarely(); } - if (!usingProfileWeights && firstBBDominatesAllReturns) + if (firstBBDominatesAllReturns) { // If the weight is already zero (and thus rarely run), there's no point scaling it. if (block->bbWeight != BB_ZERO_WEIGHT) @@ -163,11 +162,10 @@ PhaseStatus Compiler::optSetBlockWeights() } //------------------------------------------------------------------------ -// optScaleLoopBlocks: Scale the weight of loop blocks from 'begBlk' to 'endBlk'. +// optScaleLoopBlocks: Scale the weight of the blocks in 'loop'. // // Arguments: -// begBlk - first block of range. Must be marked as a loop head (BBF_LOOP_HEAD). -// endBlk - last block of range (inclusive). Must be reachable from `begBlk`. +// loop - the loop to scale the weight of. // // Operation: // Calculate the 'loop weight'. This is the amount to scale the weight of each block in the loop. @@ -179,124 +177,58 @@ PhaseStatus Compiler::optSetBlockWeights() // 64 -- double loop nesting // 512 -- triple loop nesting // -void Compiler::optScaleLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) +void Compiler::optScaleLoopBlocks(FlowGraphNaturalLoop* loop) { - noway_assert(begBlk->bbNum <= endBlk->bbNum); - noway_assert(begBlk->isLoopHead()); - noway_assert(m_reachabilitySets->CanReach(begBlk, endBlk)); - noway_assert(!opts.MinOpts()); - -#ifdef DEBUG - if (verbose) - { - printf("\nMarking a loop from " FMT_BB " to " FMT_BB, begBlk->bbNum, endBlk->bbNum); - } -#endif - - // Build list of back edges for block begBlk. - FlowEdge* backedgeList = nullptr; - - for (BasicBlock* const predBlock : begBlk->PredBlocks()) - { - // Is this a back edge? - if (predBlock->bbNum >= begBlk->bbNum) - { - backedgeList = new (this, CMK_FlowEdge) FlowEdge(predBlock, begBlk, backedgeList); - -#if MEASURE_BLOCK_SIZE - genFlowNodeCnt += 1; - genFlowNodeSize += sizeof(FlowEdge); -#endif // MEASURE_BLOCK_SIZE - } - } - - // At least one backedge must have been found (the one from endBlk). - noway_assert(backedgeList); - - auto reportBlockWeight = [&](BasicBlock* blk, const char* message) { -#ifdef DEBUG - if (verbose) - { - printf("\n " FMT_BB "(wt=" FMT_WT ")%s", blk->bbNum, blk->getBBWeight(this), message); - } -#endif // DEBUG - }; + loop->VisitLoopBlocks([&](BasicBlock* curBlk) -> BasicBlockVisit { + auto reportBlockWeight = [&](const char* message) { + DBEXEC(verbose, + printf("\n " FMT_BB "(wt=" FMT_WT ")%s", curBlk->bbNum, curBlk->getBBWeight(this), message)); + }; - for (BasicBlock* const curBlk : BasicBlockRangeList(begBlk, endBlk)) - { // Don't change the block weight if it came from profile data. if (curBlk->hasProfileWeight() && fgHaveProfileWeights()) { - reportBlockWeight(curBlk, "; unchanged: has profile weight"); - continue; + reportBlockWeight("; unchanged: has profile weight"); + return BasicBlockVisit::Continue; } // Don't change the block weight if it's known to be rarely run. if (curBlk->isRunRarely()) { - reportBlockWeight(curBlk, "; unchanged: run rarely"); - continue; - } - - // Don't change the block weight if it's unreachable. - if (!m_reachabilitySets->GetDfsTree()->Contains(curBlk)) - { - reportBlockWeight(curBlk, "; unchanged: unreachable"); - continue; + reportBlockWeight("; unchanged: run rarely"); + return BasicBlockVisit::Continue; } - // For curBlk to be part of a loop that starts at begBlk, curBlk must be reachable from begBlk and - // (since this is a loop) begBlk must likewise be reachable from curBlk. + // If `curBlk` dominates any of the back edge blocks we set `dominates`. + bool dominates = false; - if (m_reachabilitySets->CanReach(curBlk, begBlk) && m_reachabilitySets->CanReach(begBlk, curBlk)) + for (FlowEdge* const backEdge : loop->BackEdges()) { - // If `curBlk` reaches any of the back edge blocks we set `reachable`. - // If `curBlk` dominates any of the back edge blocks we set `dominates`. - bool reachable = false; - bool dominates = false; + BasicBlock* const backEdgeSource = backEdge->getSourceBlock(); + dominates |= m_domTree->Dominates(curBlk, backEdgeSource); - for (FlowEdge* tmp = backedgeList; tmp != nullptr; tmp = tmp->getNextPredEdge()) + if (dominates) { - BasicBlock* backedge = tmp->getSourceBlock(); - - reachable |= m_reachabilitySets->CanReach(curBlk, backedge); - dominates |= m_domTree->Dominates(curBlk, backedge); - - if (dominates && reachable) - { - // No need to keep looking; we've already found all the info we need. - break; - } + // No need to keep looking; we've already found all the info we need. + break; } + } - if (reachable) - { - // If the block has BB_ZERO_WEIGHT, then it should be marked as rarely run, and skipped, above. - noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT); + weight_t scale = BB_LOOP_WEIGHT_SCALE; - weight_t scale = BB_LOOP_WEIGHT_SCALE; + if (!dominates) + { + // If `curBlk` reaches but doesn't dominate any back edge to `endBlk` then there must be at least + // some other path to `endBlk`, so don't give `curBlk` all the execution weight. + scale = scale / 2; + } - if (!dominates) - { - // If `curBlk` reaches but doesn't dominate any back edge to `endBlk` then there must be at least - // some other path to `endBlk`, so don't give `curBlk` all the execution weight. - scale = scale / 2; - } + curBlk->scaleBBWeight(scale); - curBlk->scaleBBWeight(scale); + reportBlockWeight(""); - reportBlockWeight(curBlk, ""); - } - else - { - reportBlockWeight(curBlk, "; unchanged: back edge unreachable"); - } - } - else - { - reportBlockWeight(curBlk, "; unchanged: block not in loop"); - } - } + return BasicBlockVisit::Continue; + }); } //---------------------------------------------------------------------------------- @@ -330,7 +262,7 @@ unsigned Compiler::optIsLoopIncrTree(GenTree* incr) // Increment should be by a const int. // TODO-CQ: CLONE: allow variable increments. - if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT)) + if (!incrVal->OperIs(GT_CNS_INT) || !incrVal->TypeIs(TYP_INT)) { return BAD_VAR_NUM; } @@ -365,7 +297,7 @@ bool Compiler::optIsLoopTestEvalIntoTemp(Statement* testStmt, Statement** newTes { GenTree* test = testStmt->GetRootNode(); - if (test->gtOper != GT_JTRUE) + if (!test->OperIs(GT_JTRUE)) { return false; } @@ -377,8 +309,7 @@ bool Compiler::optIsLoopTestEvalIntoTemp(Statement* testStmt, Statement** newTes GenTree* opr2 = relop->AsOp()->gtOp2; // Make sure we have jtrue (vtmp != 0) - if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) && - opr2->IsIntegralConst(0)) + if (relop->OperIs(GT_NE) && opr1->OperIs(GT_LCL_VAR) && opr2->OperIs(GT_CNS_INT) && opr2->IsIntegralConst(0)) { // Get the previous statement to get the def (rhs) of Vtmp to see // if the "test" is evaluated into Vtmp. @@ -1390,6 +1321,12 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0); assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0); + if (loop->GetHeader()->isRunRarely()) + { + JITDUMP("Failed to unroll loop " FMT_LP ": Loop is cold.\n", loop->GetIndex()); + return false; + } + NaturalLoopIterInfo iterInfo; if (!loop->AnalyzeIteration(&iterInfo)) { @@ -1731,7 +1668,7 @@ void Compiler::optRedirectPrevUnrollIteration(FlowGraphNaturalLoop* loop, BasicB assert(prevTestBlock->KindIs(BBJ_COND)); Statement* testCopyStmt = prevTestBlock->lastStmt(); GenTree* testCopyExpr = testCopyStmt->GetRootNode(); - assert(testCopyExpr->gtOper == GT_JTRUE); + assert(testCopyExpr->OperIs(GT_JTRUE)); GenTree* sideEffList = nullptr; gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF); if (sideEffList == nullptr) @@ -1870,13 +1807,11 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t } //----------------------------------------------------------------------------- -// optInvertWhileLoop: modify flow and duplicate code so that for/while loops are +// optTryInvertWhileLoop: modify flow and duplicate code so that for/while loops are // entered at top and tested at bottom (aka loop rotation or bottom testing). // Creates a "zero trip test" condition which guards entry to the loop. // Enables loop invariant hoisting and loop cloning, which depend on -// `do {} while` format loops. Enables creation of a pre-header block after the -// zero trip test to place code that only runs if the loop is guaranteed to -// run at least once. +// `do {} while` format loops. // // Arguments: // block -- block that may be the predecessor of the un-rotated loop's test block. @@ -1884,156 +1819,112 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t // Returns: // true if any IR changes possibly made (used to determine phase return status) // -// Notes: -// Uses a simple lexical screen to detect likely loops. -// -// Specifically, we're looking for the following case: -// -// block: -// ... -// jmp test // `block` argument -// top: -// ... -// ... -// test: -// ..stmts.. -// cond -// jtrue top -// -// If we find this, and the condition is simple enough, we change -// the loop to the following: -// -// block: -// ... -// jmp bNewCond -// bNewCond: -// ..stmts.. // duplicated cond block statements -// cond // duplicated cond -// jfalse join -// // else fall-through -// top: -// ... -// ... -// test: -// ..stmts.. -// cond -// jtrue top -// join: -// -// Makes no changes if the flow pattern match fails. -// -// May not modify a loop if profile is unfavorable, if the cost of duplicating -// code is large (factoring in potential CSEs). -// -bool Compiler::optInvertWhileLoop(BasicBlock* block) +bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) { - assert(opts.OptimizationEnabled()); - assert(compCodeOpt() != SMALL_CODE); + // Should have preheaders at this point + assert(loop->EntryEdges().size() == 1); + BasicBlock* const preheader = loop->EntryEdge(0)->getSourceBlock(); - // Does the BB end with an unconditional jump? + ArrayStack duplicatedBlocks(getAllocator(CMK_LoopOpt)); - if (!block->KindIs(BBJ_ALWAYS) || block->JumpsToNext()) + BasicBlock* condBlock = loop->GetHeader(); + while (true) { - return false; - } + if (!BasicBlock::sameEHRegion(preheader, condBlock)) + { + JITDUMP("No loop-inversion for " FMT_LP + " since we could not find a condition block in the same EH region as the preheader\n", + loop->GetIndex()); + return false; + } - if (block->HasFlag(BBF_KEEP_BBJ_ALWAYS)) - { - // It can't be one of the ones we use for our exception magic - return false; - } + duplicatedBlocks.Push(condBlock); - // Get hold of the jump target - BasicBlock* const bTest = block->GetTarget(); + if (condBlock->KindIs(BBJ_ALWAYS)) + { + condBlock = condBlock->GetTarget(); - // Does the bTest consist of 'jtrue(cond) block' ? - if (!bTest->KindIs(BBJ_COND)) - { - return false; - } + if (!loop->ContainsBlock(condBlock) || (condBlock == loop->GetHeader())) + { + JITDUMP("No loop-inversion for " FMT_LP "; ran out of blocks following BBJ_ALWAYS blocks\n", + loop->GetIndex()); + return false; + } - // bTest must be a backwards jump to block->bbNext - // This will be the top of the loop. - // - BasicBlock* const bTop = bTest->GetTrueTarget(); + continue; + } - if (!block->NextIs(bTop)) - { - return false; + if (!condBlock->KindIs(BBJ_COND)) + { + JITDUMP("No loop-inversion for " FMT_LP " since we could not find any BBJ_COND block\n", loop->GetIndex()); + return false; + } + + break; } - // Since bTest is a BBJ_COND it will have a false target - // - BasicBlock* const bJoin = bTest->GetFalseTarget(); - noway_assert(bJoin != nullptr); + const bool trueExits = !loop->ContainsBlock(condBlock->GetTrueTarget()); + const bool falseExits = !loop->ContainsBlock(condBlock->GetFalseTarget()); - // 'block' must be in the same try region as the condition, since we're going to insert a duplicated condition - // in a new block after 'block', and the condition might include exception throwing code. - // On non-funclet platforms (x86), the catch exit is a BBJ_ALWAYS, but we don't want that to - // be considered as the head of a loop, so also disallow different handler regions. - if (!BasicBlock::sameEHRegion(block, bTest)) + if (trueExits == falseExits) { + JITDUMP("No loop-inversion for " FMT_LP " since we could not find any exiting BBJ_COND block\n", + loop->GetIndex()); return false; } - // The duplicated condition block will branch to bTest->GetFalseTarget(), so that also better be in the - // same try region (or no try region) to avoid generating illegal flow. - if (bJoin->hasTryIndex() && !BasicBlock::sameTryRegion(block, bJoin)) + BasicBlock* const exit = trueExits ? condBlock->GetTrueTarget() : condBlock->GetFalseTarget(); + BasicBlock* const stayInLoopSucc = trueExits ? condBlock->GetFalseTarget() : condBlock->GetTrueTarget(); + + // If the condition is already a latch, then the loop is already inverted + if (stayInLoopSucc == loop->GetHeader()) { + JITDUMP("No loop-inversion for " FMT_LP " since it is already inverted\n", loop->GetIndex()); return false; } - // It has to be a forward jump. Defer this check until after all the cheap checks - // are done, since it iterates forward in the block list looking for block's target. - // TODO-CQ: Check if we can also optimize the backwards jump as well. - // - if (!fgIsForwardBranch(block, block->GetTarget())) + // Exiting the loop may enter a new try-region. However, to keep exits canonical, we will + // have to split the exit such that old loop edges exit to one half, while the duplicated condition + // exits to the other half. This will result in jump into the middle of a try-region, which is illegal. + // TODO: We can fix this by placing the first half of the split (which will be an empty block) outside + // the try region. + if (!BasicBlock::sameEHRegion(preheader, exit)) { + JITDUMP("No loop-inversion for " FMT_LP " since the preheader " FMT_BB " and exit " FMT_BB + " are in different EH regions\n", + loop->GetIndex(), preheader->bbNum, exit->bbNum); return false; } - // Find the loop termination test at the bottom of the loop. - Statement* const condStmt = bTest->lastStmt(); + JITDUMP("Condition in block " FMT_BB " of loop " FMT_LP " is a candidate for duplication to invert the loop\n", + condBlock->bbNum, loop->GetIndex()); - // Verify the test block ends with a conditional that we can manipulate. - GenTree* const condTree = condStmt->GetRootNode(); - noway_assert(condTree->gtOper == GT_JTRUE); - if (!condTree->AsOp()->gtOp1->OperIsCompare()) + // Check if loop is small enough to consider for inversion. + // Large loops are less likely to benefit from inversion. + const int sizeLimit = JitConfig.JitLoopInversionSizeLimit(); + if ((sizeLimit >= 0) && optLoopComplexityExceeds(loop, (unsigned)sizeLimit)) { return false; } - JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n", - block->bbNum, bTop->bbNum, bTest->bbNum); - - // Estimate the cost of cloning the entire test block. - // - // Note: it would help throughput to compute the maximum cost - // first and early out for large bTest blocks, as we are doing two - // tree walks per tree. But because of this helper call scan, the - // maximum cost depends on the trees in the block. - // - // We might consider flagging blocks with hoistable helper calls - // during importation, so we can avoid the helper search and - // implement an early bail out for large blocks with no helper calls. - // - // Note that gtPrepareCost can cause operand swapping, so we must - // return `true` (possible IR change) from here on. - unsigned estDupCostSz = 0; - for (Statement* const stmt : bTest->Statements()) + for (int i = 0; i < duplicatedBlocks.Height(); i++) { - GenTree* tree = stmt->GetRootNode(); - gtPrepareCost(tree); - estDupCostSz += tree->GetCostSz(); + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* stmt : block->Statements()) + { + GenTree* tree = stmt->GetRootNode(); + gtPrepareCost(tree); + estDupCostSz += tree->GetCostSz(); + } } - weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; - bool haveProfileWeights = false; - weight_t const weightBlock = block->bbWeight; - weight_t const weightTest = bTest->bbWeight; - weight_t const weightTop = bTop->bbWeight; + weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; + bool haveProfileWeights = false; + weight_t const weightPreheader = preheader->bbWeight; + weight_t const weightCond = condBlock->bbWeight; + weight_t const weightStayInLoopSucc = stayInLoopSucc->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -2041,25 +1932,28 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) { // Only rely upon the profile weight when all three of these blocks // have good profile weights - if (block->hasProfileWeight() && bTest->hasProfileWeight() && bTop->hasProfileWeight()) + if (preheader->hasProfileWeight() && condBlock->hasProfileWeight() && stayInLoopSucc->hasProfileWeight()) { // If this while loop never iterates then don't bother transforming // - if (weightTop == BB_ZERO_WEIGHT) + if (weightStayInLoopSucc == BB_ZERO_WEIGHT) { - return true; + JITDUMP("No loop-inversion for " FMT_LP " since the in-loop successor " FMT_BB " has 0 weight\n", + loop->GetIndex(), preheader->bbNum); + return false; } haveProfileWeights = true; - // We generally expect weightTest > weightTop + // We generally expect weightCond > weightStayInLoopSucc // // Tolerate small inconsistencies... // - if (!fgProfileWeightsConsistent(weightBlock + weightTop, weightTest)) + if (!fgProfileWeightsConsistent(weightPreheader + weightStayInLoopSucc, weightCond)) { - JITDUMP("Profile weights locally inconsistent: block " FMT_WT ", next " FMT_WT ", test " FMT_WT "\n", - weightBlock, weightTop, weightTest); + JITDUMP("Profile weights locally inconsistent: preheader " FMT_WT ", stayInLoopSucc " FMT_WT + ", cond " FMT_WT "\n", + weightPreheader, weightStayInLoopSucc, weightCond); } else { @@ -2069,17 +1963,17 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // weightTest is the number of times that we consider entering or remaining in the loop // loopIterations is the average number of times that this loop iterates // - weight_t loopEntries = weightTest - weightTop; + weight_t loopEntries = weightCond - weightStayInLoopSucc; // If profile is inaccurate, try and use other data to provide a credible estimate. // The value should at least be >= weightBlock. // - if (loopEntries < weightBlock) + if (loopEntries < weightPreheader) { - loopEntries = weightBlock; + loopEntries = weightPreheader; } - loopIterations = weightTop / loopEntries; + loopIterations = weightStayInLoopSucc / loopEntries; } } else @@ -2105,8 +1999,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } } - // If the compare has too high cost then we don't want to dup. - bool costIsTooHigh = (estDupCostSz > maxDupCostSz); OptInvertCountTreeInfoType optInvertTotalInfo = {}; @@ -2121,32 +2013,37 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // // If the condition has array.Length operations, also boost, as they are likely to be CSE'd. - for (Statement* const stmt : bTest->Statements()) + for (int i = 0; i < duplicatedBlocks.Height() && costIsTooHigh; i++) { - GenTree* tree = stmt->GetRootNode(); - - OptInvertCountTreeInfoType optInvertInfo = optInvertCountTreeInfo(tree); - optInvertTotalInfo.sharedStaticHelperCount += optInvertInfo.sharedStaticHelperCount; - optInvertTotalInfo.arrayLengthCount += optInvertInfo.arrayLengthCount; - - if ((optInvertInfo.sharedStaticHelperCount > 0) || (optInvertInfo.arrayLengthCount > 0)) + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* const stmt : block->Statements()) { - // Calculate a new maximum cost. We might be able to early exit. + GenTree* tree = stmt->GetRootNode(); - unsigned newMaxDupCostSz = - maxDupCostSz + 24 * min(optInvertTotalInfo.sharedStaticHelperCount, (int)(loopIterations + 1.5)) + - 8 * optInvertTotalInfo.arrayLengthCount; + OptInvertCountTreeInfoType optInvertInfo = optInvertCountTreeInfo(tree); + optInvertTotalInfo.sharedStaticHelperCount += optInvertInfo.sharedStaticHelperCount; + optInvertTotalInfo.arrayLengthCount += optInvertInfo.arrayLengthCount; - // Is the cost too high now? - costIsTooHigh = (estDupCostSz > newMaxDupCostSz); - if (!costIsTooHigh) + if ((optInvertInfo.sharedStaticHelperCount > 0) || (optInvertInfo.arrayLengthCount > 0)) { - // No need counting any more trees; we're going to do the transformation. - JITDUMP("Decided to duplicate loop condition block after counting helpers in tree [%06u] in " - "block " FMT_BB, - dspTreeID(tree), bTest->bbNum); - maxDupCostSz = newMaxDupCostSz; // for the JitDump output below - break; + // Calculate a new maximum cost. We might be able to early exit. + + unsigned newMaxDupCostSz = + maxDupCostSz + + 24 * min(optInvertTotalInfo.sharedStaticHelperCount, (int)(loopIterations + 1.5)) + + 8 * optInvertTotalInfo.arrayLengthCount; + + // Is the cost too high now? + costIsTooHigh = (estDupCostSz > newMaxDupCostSz); + if (!costIsTooHigh) + { + // No need counting any more trees; we're going to do the transformation. + JITDUMP("Decided to duplicate loop condition block after counting helpers in tree [%06u] in " + "block " FMT_BB, + dspTreeID(tree), block->bbNum); + maxDupCostSz = newMaxDupCostSz; // for the JitDump output below + break; + } } } } @@ -2160,7 +2057,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) printf( "\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i," "\n loopIterations = %7.3f, optInvertTotalInfo.sharedStaticHelperCount >= %d, haveProfileWeights = %s\n", - dspTreeID(condTree), costIsTooHigh ? "not done" : "performed", estDupCostSz, + dspTreeID(condBlock->lastStmt()->GetRootNode()), costIsTooHigh ? "not done" : "performed", estDupCostSz, costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, optInvertTotalInfo.sharedStaticHelperCount, dspBool(haveProfileWeights)); } @@ -2171,148 +2068,103 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) return true; } - bool foundCondTree = false; - - // Create a new block after `block` to put the copied condition code. - // - BasicBlock* const bNewCond = fgNewBBafter(BBJ_COND, block, /*extendRegion*/ true); - - // Clone each statement in bTest and append to bNewCond. - for (Statement* const stmt : bTest->Statements()) - { - GenTree* originalTree = stmt->GetRootNode(); - GenTree* clonedTree = gtCloneExpr(originalTree); - - // Special case handling needed for the conditional jump tree - if (originalTree == condTree) - { - foundCondTree = true; - - // Get the compare subtrees - GenTree* originalCompareTree = originalTree->AsOp()->gtOp1; - GenTree* clonedCompareTree = clonedTree->AsOp()->gtOp1; - assert(originalCompareTree->OperIsCompare()); - assert(clonedCompareTree->OperIsCompare()); - - // The original test branches to remain in the loop. The - // new cloned test will branch to avoid the loop. So the - // cloned compare needs to reverse the branch condition. - gtReverseCond(clonedCompareTree); - } + // Split the preheader so we can duplicate the statements into it. The new + // block will be the new preheader. + BasicBlock* const newPreheader = fgSplitBlockAtEnd(preheader); - Statement* clonedStmt = fgNewStmtAtEnd(bNewCond, clonedTree); + // Make sure exit stays canonical + BasicBlock* nonEnterBlock = fgSplitBlockAtBeginning(exit); - if (opts.compDbgInfo) - { - clonedStmt->SetDebugInfo(stmt->GetDebugInfo()); - } - } + JITDUMP("New preheader is " FMT_BB "\n", newPreheader->bbNum); + JITDUMP("Duplicated condition block is " FMT_BB "\n", preheader->bbNum); + JITDUMP("Old exit is " FMT_BB ", new non-enter block is " FMT_BB "\n", exit->bbNum, nonEnterBlock->bbNum); - assert(foundCondTree); + // Get the newCond -> newPreheader edge + FlowEdge* const newCondToNewPreheader = preheader->GetTargetEdge(); - // Flag the block that received the copy as potentially having various constructs. - bNewCond->CopyFlags(bTest, BBF_COPY_PROPAGATE); + // Add newCond -> nonEnterBlock + FlowEdge* const newCondToNewExit = fgAddRefPred(nonEnterBlock, preheader); - // Update pred info - // - // For now we set the likelihood of the newCond branch to match - // the likelihood of the test branch (though swapped, since we're - // currently reversing the condition). This may or may not match - // the block weight adjustments we're making. All this becomes - // easier to reconcile once we rely on edge likelihoods more and - // have synthesis running (so block weights ==> frequencies). - // - // Until then we won't worry that edges and blocks are potentially - // out of sync. - // - FlowEdge* const testTopEdge = bTest->GetTrueEdge(); - FlowEdge* const testJoinEdge = bTest->GetFalseEdge(); - FlowEdge* const newCondJoinEdge = fgAddRefPred(bJoin, bNewCond, testJoinEdge); - FlowEdge* const newCondTopEdge = fgAddRefPred(bTop, bNewCond, testTopEdge); + preheader->SetCond(trueExits ? newCondToNewExit : newCondToNewPreheader, + trueExits ? newCondToNewPreheader : newCondToNewExit); - bNewCond->SetTrueEdge(newCondJoinEdge); - bNewCond->SetFalseEdge(newCondTopEdge); + preheader->GetTrueEdge()->setLikelihood(condBlock->GetTrueEdge()->getLikelihood()); + preheader->GetFalseEdge()->setLikelihood(condBlock->GetFalseEdge()->getLikelihood()); - fgRedirectTargetEdge(block, bNewCond); - assert(block->JumpsToNext()); + // Redirect newPreheader from header to stayInLoopSucc + fgRedirectTargetEdge(newPreheader, stayInLoopSucc); - // Fix flow and profile - // - bNewCond->inheritWeight(block); - - // Move all predecessor edges that look like loop entry edges to point to the new cloned condition - // block, not the existing condition block. The idea is that if we only move `block` to point to - // `bNewCond`, but leave other `bTest` predecessors still pointing to `bTest`, when we eventually - // recognize loops, the loop will appear to have multiple entries, which will prevent optimization. - // We don't have loops yet, but blocks should be in increasing lexical numbered order, so use that - // as the proxy for predecessors that are "in" versus "out" of the potential loop. Note that correctness - // is maintained no matter which condition block we point to, but we'll lose optimization potential - // (and create spaghetti code) if we get it wrong. - // - unsigned const loopFirstNum = bTop->bbNum; - unsigned const loopBottomNum = bTest->bbNum; - for (FlowEdge* const predEdge : bTest->PredEdgesEditing()) + // Duplicate all the code now + for (int i = 0; i < duplicatedBlocks.Height(); i++) { - BasicBlock* const predBlock = predEdge->getSourceBlock(); - unsigned const bNum = predBlock->bbNum; - if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum)) + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* stmt : block->Statements()) { - // Looks like the predecessor is from within the potential loop; skip it. - continue; + GenTree* clonedTree = gtCloneExpr(stmt->GetRootNode()); + Statement* clonedStmt = fgNewStmtAtEnd(preheader, clonedTree, stmt->GetDebugInfo()); + + if (stmt == condBlock->lastStmt()) + { + // TODO: This ought not to be necessary, but has large negative diffs if we don't do it + assert(clonedStmt->GetRootNode()->OperIs(GT_JTRUE)); + clonedStmt->GetRootNode()->AsUnOp()->gtOp1 = gtReverseCond(clonedStmt->GetRootNode()->gtGetOp1()); + preheader->SetCond(preheader->GetFalseEdge(), preheader->GetTrueEdge()); + } + + DISPSTMT(clonedStmt); } - // Redirect the predecessor to the new block. - JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, - bTest->bbNum, predBlock->bbNum, bNewCond->bbNum); + preheader->CopyFlags(block, BBF_COPY_PROPAGATE); + } - switch (predBlock->GetKind()) - { - case BBJ_ALWAYS: - case BBJ_CALLFINALLY: - case BBJ_CALLFINALLYRET: - case BBJ_COND: - case BBJ_SWITCH: - case BBJ_EHFINALLYRET: - fgReplaceJumpTarget(predBlock, bTest, bNewCond); - break; + if (haveProfileWeights) + { + // Reduce flow into the new loop entry/exit blocks + newPreheader->setBBProfileWeight(newCondToNewPreheader->getLikelyWeight()); + exit->decreaseBBProfileWeight(newCondToNewExit->getLikelyWeight()); - case BBJ_EHCATCHRET: - case BBJ_EHFILTERRET: - // These block types should not need redirecting - break; + // Update the weight for the duplicated blocks. Normally, this reduces + // the weight of condBlock, except in odd cases of stress modes with + // inconsistent weights. - default: - assert(!"Unexpected bbKind for predecessor block"); - break; + for (int i = 0; i < (duplicatedBlocks.Height() - 1); i++) + { + BasicBlock* block = duplicatedBlocks.Bottom(i); + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", block->bbNum, weightCond, + weightStayInLoopSucc); + block->setBBProfileWeight(weightStayInLoopSucc); } + + condBlock->setBBProfileWeight(condBlock->computeIncomingWeight()); } - if (haveProfileWeights) + // Finally compact the condition with its pred if that is possible now. + // TODO-Cleanup: This compensates for limitations in analysis of downstream + // phases, particularly the pattern-based IV analysis. + BasicBlock* const condPred = condBlock->GetUniquePred(this); + if (condPred != nullptr) { - // The above change should have moved some flow out of 'bTest', and into 'bNewCond'. - // Check that no extraneous flow was lost or gained in the process. - // - const weight_t totalWeight = bTest->bbWeight; - bTest->setBBProfileWeight(bTest->computeIncomingWeight()); - bNewCond->setBBProfileWeight(bNewCond->computeIncomingWeight()); - - if (!fgProfileWeightsConsistent(totalWeight, bTest->bbWeight + bNewCond->bbWeight)) + JITDUMP("Cond block " FMT_BB " has a unique pred now, seeing if we can compact...\n", condBlock->bbNum); + if (fgCanCompactBlock(condPred)) { - JITDUMP("Redirecting flow from " FMT_BB " to " FMT_BB " introduced inconsistency. Data %s inconsistent.\n", - bTest->bbNum, bNewCond->bbNum, fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; + JITDUMP(" ..we can!\n"); + fgCompactBlock(condPred); + condBlock = condPred; + } + else + { + JITDUMP(" ..we cannot\n"); } } #ifdef DEBUG if (verbose) { - printf("\nDuplicated loop exit block at " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")\n", bNewCond->bbNum, - bNewCond->GetFalseTarget()->bbNum, bTest->bbNum); + printf("\nDuplicated loop exit block at " FMT_BB " for loop " FMT_LP "\n", preheader->bbNum, loop->GetIndex()); printf("Estimated code size expansion is %d\n", estDupCostSz); - fgDumpBlock(bNewCond); - fgDumpBlock(bTest); + fgDumpBlock(preheader); + fgDumpBlock(condBlock); } #endif // DEBUG @@ -2328,8 +2180,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // PhaseStatus Compiler::optInvertLoops() { - noway_assert(opts.OptimizationEnabled()); - #if defined(OPT_CONFIG) if (!JitConfig.JitDoLoopInversion()) { @@ -2338,87 +2188,74 @@ PhaseStatus Compiler::optInvertLoops() } #endif // OPT_CONFIG - bool madeChanges = fgRenumberBlocks(); - - if (compCodeOpt() == SMALL_CODE) - { - // do not invert any loops - } - else + if (compCodeOpt() != SMALL_CODE) { - for (BasicBlock* const block : Blocks()) + fgDfsBlocksAndRemove(); + optFindLoops(); + for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) { - // Make sure the appropriate fields are initialized - // - if (block->bbWeight == BB_ZERO_WEIGHT) - { - // Zero weighted block can't have a LOOP_HEAD flag - noway_assert(block->isLoopHead() == false); - continue; - } - - if (optInvertWhileLoop(block)) - { - madeChanges = true; - } + optTryInvertWhileLoop(loop); } + + fgInvalidateDfsTree(); + return PhaseStatus::MODIFIED_EVERYTHING; } - return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; + return PhaseStatus::MODIFIED_NOTHING; } //----------------------------------------------------------------------------- -// optOptimizeFlow: simplify flow graph +// optOptimizeFlow: Simplify flowgraph, and run a few flow optimizations // // Returns: // suitable phase status // -// Notes: -// Does not do profile-based reordering to try and ensure that -// that we recognize and represent as many loops as possible. -// PhaseStatus Compiler::optOptimizeFlow() { noway_assert(opts.OptimizationEnabled()); - fgUpdateFlowGraph(/* doTailDuplication */ true); - fgReorderBlocks(/* useProfile */ false); + bool modified = fgUpdateFlowGraph(/* doTailDuplication */ true); - // fgReorderBlocks can cause IR changes even if it does not modify - // the flow graph. It calls gtPrepareCost which can cause operand swapping. - // Work around this for now. - // - // Note phase status only impacts dumping and checking done post-phase, - // it has no impact on a release build. - // - return PhaseStatus::MODIFIED_EVERYTHING; + // TODO: Always rely on profile synthesis to identify cold blocks. + if (!fgIsUsingProfileWeights()) + { + modified |= fgExpandRarelyRunBlocks(); + } + + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //----------------------------------------------------------------------------- -// optOptimizeLayout: reorder blocks to reduce cost of control flow +// optOptimizePreLayout: Optimizes flow before reordering blocks. // // Returns: // suitable phase status // -// Notes: -// Reorders using profile data, if available. -// -PhaseStatus Compiler::optOptimizeLayout() +PhaseStatus Compiler::optOptimizePreLayout() { - noway_assert(opts.OptimizationEnabled()); + assert(opts.OptimizationEnabled()); - fgUpdateFlowGraph(/* doTailDuplication */ false); - fgReorderBlocks(/* useProfile */ true); - fgUpdateFlowGraph(/* doTailDuplication */ false, /* isPhase */ false); + bool modified = fgUpdateFlowGraph(); - // fgReorderBlocks can cause IR changes even if it does not modify - // the flow graph. It calls gtPrepareCost which can cause operand swapping. - // Work around this for now. - // - // Note phase status only impacts dumping and checking done post-phase, - // it has no impact on a release build. - // - return PhaseStatus::MODIFIED_EVERYTHING; + // TODO: Always rely on profile synthesis to identify cold blocks. + if (!fgIsUsingProfileWeights()) + { + modified |= fgExpandRarelyRunBlocks(); + } + + // Run a late pass of unconditional-to-conditional branch optimization, skipping handler blocks. + for (BasicBlock* block = fgFirstBB; block != fgFirstFuncletBB; block = block->Next()) + { + if (!UsesFunclets() && block->hasHndIndex()) + { + block = ehGetDsc(block->getHndIndex())->ebdHndLast; + continue; + } + + modified |= fgOptimizeBranch(block); + } + + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //----------------------------------------------------------------------------- @@ -2471,62 +2308,6 @@ PhaseStatus Compiler::optOptimizePostLayout() return status; } -//------------------------------------------------------------------------ -// optMarkLoopHeads: Mark all potential loop heads as BBF_LOOP_HEAD. A potential loop head is a block -// targeted by a lexical back edge, where the source of the back edge is reachable from the block. -// Note that if there are no lexical back edges, there can't be any loops. -// -// If there are any potential loop heads, set `fgHasLoops` to `true`. -// -// Assumptions: -// The reachability sets must be computed and valid. -// -void Compiler::optMarkLoopHeads() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In optMarkLoopHeads()\n"); - } - fgDebugCheckBBNumIncreasing(); - - int loopHeadsMarked = 0; -#endif - - assert((m_dfsTree != nullptr) && (m_reachabilitySets != nullptr)); - - bool hasLoops = false; - - for (BasicBlock* const block : Blocks()) - { - // Set BBF_LOOP_HEAD if we have backwards branches to this block. - - for (BasicBlock* const predBlock : block->PredBlocks()) - { - if (block->bbNum <= predBlock->bbNum) - { - if (predBlock->KindIs(BBJ_CALLFINALLY)) - { - // Loops never have BBJ_CALLFINALLY as the source of their "back edge". - continue; - } - - // If block can reach predBlock then we have a loop head - if (m_reachabilitySets->CanReach(block, predBlock)) - { - hasLoops = true; - block->SetFlags(BBF_LOOP_HEAD); - INDEBUG(++loopHeadsMarked); - break; // No need to look at more `block` predecessors - } - } - } - } - - JITDUMP("%d loop heads marked\n", loopHeadsMarked); - fgHasLoops = hasLoops; -} - //----------------------------------------------------------------------------- // optResetLoopInfo: reset all loop info in preparation for refinding the loops // and scaling blocks based on it. @@ -2548,103 +2329,9 @@ void Compiler::optResetLoopInfo() block->bbWeight = BB_UNITY_WEIGHT; block->RemoveFlags(BBF_RUN_RARELY); } - - block->RemoveFlags(BBF_LOOP_HEAD); } } -//----------------------------------------------------------------------------- -// optFindAndScaleGeneralLoopBlocks: scale block weights based on loop nesting depth. -// Note that this uses a very general notion of "loop": any block targeted by a reachable -// back-edge is considered a loop. -// -void Compiler::optFindAndScaleGeneralLoopBlocks() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In optFindAndScaleGeneralLoopBlocks()\n"); - } -#endif - - // This code depends on block number ordering. - INDEBUG(fgDebugCheckBBNumIncreasing()); - - assert((m_dfsTree != nullptr) && (m_domTree != nullptr) && (m_reachabilitySets != nullptr)); - - unsigned generalLoopCount = 0; - - // We will use the following terminology: - // top - the first basic block in the loop (i.e. the head of the backward edge) - // bottom - the last block in the loop (i.e. the block from which we jump to the top) - // lastBottom - used when we have multiple back edges to the same top - - for (BasicBlock* const top : Blocks()) - { - // Only consider `top` blocks already determined to be potential loop heads. - if (!top->isLoopHead()) - { - continue; - } - - BasicBlock* foundBottom = nullptr; - - for (BasicBlock* const bottom : top->PredBlocks()) - { - // Is this a loop candidate? - We look for "back edges" - - // Is this a backward edge? (from BOTTOM to TOP) - if (top->bbNum > bottom->bbNum) - { - continue; - } - - // We only consider back-edges of these kinds for loops. - if (!bottom->KindIs(BBJ_COND, BBJ_ALWAYS, BBJ_CALLFINALLYRET)) - { - continue; - } - - /* the top block must be able to reach the bottom block */ - if (!m_reachabilitySets->CanReach(top, bottom)) - { - continue; - } - - /* Found a new loop, record the longest backedge in foundBottom */ - - if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum)) - { - foundBottom = bottom; - } - } - - if (foundBottom) - { - generalLoopCount++; - - /* Mark all blocks between 'top' and 'bottom' */ - - optScaleLoopBlocks(top, foundBottom); - } - - // We track at most 255 loops - if (generalLoopCount == 255) - { -#if COUNT_LOOPS - totalUnnatLoopOverflows++; -#endif - break; - } - } - - JITDUMP("\nFound a total of %d general loops.\n", generalLoopCount); - -#if COUNT_LOOPS - totalUnnatLoopCount += generalLoopCount; -#endif -} - //----------------------------------------------------------------------------- // optFindLoopsPhase: find loops in the function. // @@ -2866,7 +2553,7 @@ bool Compiler::optCreatePreheader(FlowGraphNaturalLoop* loop) { // Preheader should be in the true enclosing region of the header. // - preheaderEHRegion = ehTrueEnclosingTryIndexIL(preheaderEHRegion); + preheaderEHRegion = ehTrueEnclosingTryIndex(preheaderEHRegion); inSameRegionAsHeader = false; break; } @@ -3152,6 +2839,47 @@ bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) return true; } +//------------------------------------------------------------------------ +// optLoopComplexityExceeds: Check if the number of nodes in the loop exceeds some limit +// +// Arguments: +// loop - the loop to compute the number of nodes in +// limit - limit on the number of nodes +// +// Returns: +// true if the number of nodes exceeds the limit +// +bool Compiler::optLoopComplexityExceeds(FlowGraphNaturalLoop* loop, unsigned limit) +{ + assert(loop != nullptr); + + // See if loop size exceeds the limit. + // + unsigned size = 0; + + BasicBlockVisit const result = loop->VisitLoopBlocks([this, limit, &size](BasicBlock* block) { + assert(limit >= size); + unsigned const slack = limit - size; + unsigned blockSize = 0; + if (block->ComplexityExceeds(this, slack, &blockSize)) + { + return BasicBlockVisit::Abort; + } + + size += blockSize; + return BasicBlockVisit::Continue; + }); + + if (result == BasicBlockVisit::Abort) + { + JITDUMP("Loop " FMT_LP ": exceeds size limit %u\n", loop->GetIndex(), limit); + return true; + } + + JITDUMP("Loop " FMT_LP ": size %u does not exceed size limit %u\n", loop->GetIndex(), size, limit); + return false; +} + //----------------------------------------------------------------------------- // optSetWeightForPreheaderOrExit: Set the weight of a newly created preheader // or exit, after it has been added to the flowgraph. @@ -3372,7 +3100,7 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu // If 'dstt' is unsigned and one of the operands can be narrowed into 'dsst', // the result of the GT_AND will also fit into 'dstt' and can be narrowed. // The same is true if one of the operands is an int const and can be narrowed into 'dsst'. - if ((op2->gtOper == GT_CNS_INT) || varTypeIsUnsigned(dstt)) + if (op2->OperIs(GT_CNS_INT) || varTypeIsUnsigned(dstt)) { if (optNarrowTree(op2, srct, dstt, NoVNPair, false)) { @@ -3385,7 +3113,7 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu } } - if ((opToNarrow == nullptr) && ((op1->gtOper == GT_CNS_INT) || varTypeIsUnsigned(dstt))) + if ((opToNarrow == nullptr) && (op1->OperIs(GT_CNS_INT) || varTypeIsUnsigned(dstt))) { if (optNarrowTree(op1, srct, dstt, NoVNPair, false)) { @@ -3411,7 +3139,7 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu // We may also need to cast away the upper bits of *otherOpPtr if (srcSize == 8) { - assert(tree->gtType == TYP_INT); + assert(tree->TypeIs(TYP_INT)); GenTree* castOp = gtNewCastNode(TYP_INT, *otherOpPtr, false, TYP_INT); castOp->SetMorphed(this); *otherOpPtr = castOp; @@ -3453,7 +3181,7 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu if (doit) { - if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT)) + if (tree->OperIs(GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT)) { tree->gtFlags &= ~GTF_MUL_64RSLT; } @@ -3668,30 +3396,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, FlowGr preheader->CopyFlags(exprBb, BBF_COPY_PROPAGATE); - Statement* hoistStmt = gtNewStmt(hoist); - - // Simply append the statement at the end of the preHead's list. - Statement* firstStmt = preheader->firstStmt(); - if (firstStmt != nullptr) - { - /* append after last statement */ - - Statement* lastStmt = preheader->lastStmt(); - assert(lastStmt->GetNextStmt() == nullptr); - - lastStmt->SetNextStmt(hoistStmt); - hoistStmt->SetPrevStmt(lastStmt); - firstStmt->SetPrevStmt(hoistStmt); - } - else - { - /* Empty pre-header - store the single statement in the block */ - - preheader->bbStmtList = hoistStmt; - hoistStmt->SetPrevStmt(hoistStmt); - } - - hoistStmt->SetNextStmt(nullptr); + fgInsertStmtAtEnd(preheader, fgNewStmtFromTree(hoist)); #ifdef DEBUG if (verbose) @@ -3702,12 +3407,6 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, FlowGr } #endif - if (fgNodeThreading == NodeThreading::AllTrees) - { - gtSetStmtInfo(hoistStmt); - fgSetStmtSeq(hoistStmt); - } - #ifdef DEBUG if (m_nodeTestData != nullptr) { @@ -4298,7 +3997,7 @@ void Compiler::optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, V } //------------------------------------------------------------------------ -// optCopyLoopMemoryDependence: record that tree's loop memory dependence +// optCopyLoopMemoryDependence: Recursively record that tree's loop memory dependence // is the same as some other tree. // // Arguments: @@ -4307,6 +4006,8 @@ void Compiler::optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, V // void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) { + assert(fromTree->OperGet() == toTree->OperGet()); + NodeToLoopMemoryBlockMap* const map = GetNodeToLoopMemoryBlockMap(); BasicBlock* mapBlock = nullptr; @@ -4314,6 +4015,20 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) { map->Set(toTree, mapBlock); } + + GenTreeOperandIterator fromIterCur = fromTree->OperandsBegin(); + GenTreeOperandIterator fromIterEnd = fromTree->OperandsEnd(); + GenTreeOperandIterator toIterCur = toTree->OperandsBegin(); + GenTreeOperandIterator toIterEnd = toTree->OperandsEnd(); + + while (fromIterCur != fromIterEnd) + { + optCopyLoopMemoryDependence(*fromIterCur, *toIterCur); + ++fromIterCur; + ++toIterCur; + } + + assert(toIterCur == toIterEnd); } //------------------------------------------------------------------------ @@ -4426,7 +4141,7 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop, { // TODO-CQ: This is a more restrictive version of a check that optIsCSEcandidate already does - it allows // a struct typed node if a class handle can be recovered from it. - if (node->TypeGet() == TYP_STRUCT) + if (node->TypeIs(TYP_STRUCT)) { return false; } @@ -5208,7 +4923,7 @@ void Compiler::fgSetEHRegionForNewPreheaderOrExit(BasicBlock* block) { // `next` is the beginning of a try block. Figure out the EH region to use. assert(next->hasTryIndex()); - unsigned newTryIndex = ehTrueEnclosingTryIndexIL(next->getTryIndex()); + unsigned newTryIndex = ehTrueEnclosingTryIndex(next->getTryIndex()); if (newTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) { // No EH try index. @@ -5402,7 +5117,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk, FlowGraphNatura GenTree* addr = tree->AsIndir()->Addr()->gtEffectiveVal(); - if (addr->TypeGet() == TYP_BYREF && addr->OperGet() == GT_LCL_VAR) + if (addr->TypeIs(TYP_BYREF) && addr->OperIs(GT_LCL_VAR)) { // If it's a local byref for which we recorded a value number, use that... GenTreeLclVar* argLcl = addr->AsLclVar(); @@ -5738,19 +5453,19 @@ void Compiler::optRemoveCommaBasedRangeCheck(GenTree* comma, Statement* stmt) ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk)) { assert(mul); - assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH); + assert(mul->OperIs(GT_MUL) || mul->OperIs(GT_LSH)); assert(mul->AsOp()->gtOp2->IsCnsIntOrI()); ssize_t scale = mul->AsOp()->gtOp2->AsIntConCommon()->IconValue(); - if (mul->gtOper == GT_LSH) + if (mul->OperIs(GT_LSH)) { scale = ((ssize_t)1) << scale; } GenTree* index = mul->AsOp()->gtOp1; - if (index->gtOper == GT_MUL && index->AsOp()->gtOp2->IsCnsIntOrI()) + if (index->OperIs(GT_MUL) && index->AsOp()->gtOp2->IsCnsIntOrI()) { // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4): // When index->gtOper is GT_MUL and index->AsOp()->gtOp2->gtOper is GT_CNS_INT (i.e. * 5), @@ -5760,7 +5475,7 @@ ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEB index = index->AsOp()->gtOp1; } - assert(!bRngChk || index->gtOper != GT_COMMA); + assert(!bRngChk || !index->OperIs(GT_COMMA)); if (pIndex) { @@ -5899,8 +5614,10 @@ void Compiler::optRemoveRedundantZeroInits() defsInBlock.Set(lclNum, 1); } } - else if (varTypeIsStruct(lclDsc) && ((tree->gtFlags & GTF_VAR_USEASG) == 0) && - lvaGetPromotionType(lclDsc) != PROMOTION_TYPE_NONE) + // Here we treat both "full" and "partial" tracked field defs as defs + // (that is, we ignore the state of GTF_VAR_USEASG). + // + else if (varTypeIsStruct(lclDsc) && lvaGetPromotionType(lclDsc) != PROMOTION_TYPE_NONE) { for (unsigned i = lclDsc->lvFieldLclStart; i < lclDsc->lvFieldLclStart + lclDsc->lvFieldCnt; ++i) @@ -6080,6 +5797,14 @@ PhaseStatus Compiler::optVNBasedDeadStoreRemoval() continue; } + if (compIsAsync() && ((varDsc->TypeGet() == TYP_BYREF) || + ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->GetLayout()->HasGCByRef()))) + { + // A dead store to a byref local may not actually be dead if it + // crosses a suspension point. + continue; + } + for (unsigned defIndex = 1; defIndex < defCount; defIndex++) { LclSsaVarDsc* defDsc = varDsc->lvPerSsaData.GetSsaDefByIndex(defIndex); @@ -6117,7 +5842,7 @@ PhaseStatus Compiler::optVNBasedDeadStoreRemoval() // CQ heuristic: avoid removing defs of enregisterable locals where this is likely to // make them "must-init", extending live ranges. Here we assume the first SSA def was // the implicit "live-in" one, which is not guaranteed, but very likely. - if ((defIndex == 1) && (varDsc->TypeGet() != TYP_STRUCT)) + if ((defIndex == 1) && !varDsc->TypeIs(TYP_STRUCT)) { JITDUMP(" -- no; first explicit def of a non-STRUCT local\n", lclNum); continue; diff --git a/src/coreclr/jit/patchpoint.cpp b/src/coreclr/jit/patchpoint.cpp index ab695c0a5c3b..3649ee4e72d6 100644 --- a/src/coreclr/jit/patchpoint.cpp +++ b/src/coreclr/jit/patchpoint.cpp @@ -238,8 +238,7 @@ class PatchpointTransformer // call PartialCompilationPatchpointHelper(ilOffset) // GenTree* ilOffsetNode = compiler->gtNewIconNode(ilOffset, TYP_INT); - GenTreeCall* helperCall = - compiler->gtNewHelperCallNode(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, TYP_VOID, ilOffsetNode); + GenTreeCall* helperCall = compiler->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffsetNode); compiler->fgNewStmtAtEnd(block, helperCall); } diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 7db809bc5cad..b4e2e284d051 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1,6 +1,37 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// +// Physical promotion is an optimization where struct fields accessed as +// LCL_FLD nodes are promoted to individual primitive-typed local variables +// accessed as LCL_VAR, allowing register allocation and removing unnecessary +// memory operations. +// +// Key components: +// +// 1. Candidate Identification: +// - Identifies struct locals that aren't already promoted and aren't address-exposed +// - Analyzes access patterns to determine which fields are good promotion candidates +// - Uses weighted cost models to balance performance and code size and to take PGO +// data into account +// +// 2. Field Promotion: +// - Creates primitive-typed replacement locals for selected fields +// - Records which parts of the struct remains unpromoted +// +// 3. Access Transformation: +// - Transforms local field accesses to use promoted field variables +// - Decomposes struct stores and copies to operate on the primitive fields +// - Handles call argument passing and returns with field lists where appropriate +// - Tracks when values in promoted fields vs. original struct are fresher +// - Inserts read-backs when the struct field is fresher than the promoted local +// - Inserts write-backs when the promoted local is fresher than the struct field +// - Ensures proper state across basic block boundaries and exception flow +// +// The transformation carefully handles OSR locals, parameters, and call arguments, +// while maintaining correct behavior for exception handling and control flow. +// + #include "jitpch.h" #include "promotion.h" #include "jitstd/algorithm.h" @@ -777,7 +808,7 @@ class LocalUses else if (lcl->lvIsParam) { // For parameters, the backend may be able to map it directly from a register. - if (MapsToRegister(comp, access, lclNum)) + if (Promotion::MapsToParameterRegister(comp, lclNum, access.Offset, access.AccessType)) { // No promotion will result in a store to stack in the prolog. costWithout += COST_STRUCT_ACCESS_CYCLES * comp->fgFirstBB->getBBWeight(comp); @@ -1022,46 +1053,6 @@ class LocalUses return nullptr; } - - //------------------------------------------------------------------------ - // MapsToRegister: - // Check if a specific access in the specified parameter local is - // expected to map to a register. - // - // Parameters: - // comp - Compiler instance - // access - Access in the local - // lclNum - Parameter lcl num - // - // Returns: - // Pointer to a matching access, or nullptr if no match was found. - // - bool MapsToRegister(Compiler* comp, const Access& access, unsigned lclNum) - { - assert(lclNum < comp->info.compArgsCount); - - if (comp->lvaIsImplicitByRefLocal(lclNum)) - { - return false; - } - - const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum); - if (abiInfo.HasAnyStackSegment()) - { - return false; - } - - for (const ABIPassingSegment& seg : abiInfo.Segments()) - { - if ((access.Offset == seg.Offset) && (genTypeSize(access.AccessType) == seg.Size) && - (varTypeUsesIntReg(access.AccessType) == genIsValidIntReg(seg.GetRegister()))) - { - return true; - } - } - - return false; - } }; // Struct used to save all struct stores involving physical promotion candidates. @@ -1347,6 +1338,13 @@ class LocalsUseVisitor : public GenTreeVisitor rep.LclNum = m_compiler->lvaGrabTemp(false DEBUGARG(rep.Description)); LclVarDsc* dsc = m_compiler->lvaGetDesc(rep.LclNum); dsc->lvType = rep.AccessType; + + // Are we promoting Span<>._length field? + if ((rep.Offset == OFFSETOF__CORINFO_Span__length) && (rep.AccessType == TYP_INT) && + m_compiler->lvaGetDesc(agg->LclNum)->IsSpan()) + { + dsc->SetIsNeverNegative(true); + } } #ifdef DEBUG @@ -1553,12 +1551,12 @@ class LocalsUseVisitor : public GenTreeVisitor flags |= AccessKindFlags::IsCallArg; - if (!call->gtArgs.IsNewAbiInformationDetermined()) + if (!call->gtArgs.IsAbiInformationDetermined()) { - call->gtArgs.DetermineNewABIInfo(m_compiler, call); + call->gtArgs.DetermineABIInfo(m_compiler, call); } - if (!arg.NewAbiInfo.HasAnyStackSegment() && !arg.AbiInfo.PassedByRef) + if (!arg.AbiInfo.HasAnyStackSegment() && !arg.AbiInfo.IsPassedByReference()) { flags |= AccessKindFlags::IsRegCallArg; } @@ -1674,7 +1672,10 @@ GenTree* Promotion::CreateReadBack(Compiler* compiler, unsigned structLclNum, co // Parameters: // block - The block // -void ReplaceVisitor::StartBlock(BasicBlock* block) +// Returns: +// Statement in block to start from. +// +Statement* ReplaceVisitor::StartBlock(BasicBlock* block) { m_currentBlock = block; @@ -1697,9 +1698,11 @@ void ReplaceVisitor::StartBlock(BasicBlock* block) // when we start the initial BB. if (block != m_compiler->fgFirstBB) { - return; + return block->firstStmt(); } + Statement* lastInsertedStmt = nullptr; + for (AggregateInfo* agg : m_aggregates) { LclVarDsc* dsc = m_compiler->lvaGetDesc(agg->LclNum); @@ -1708,24 +1711,48 @@ void ReplaceVisitor::StartBlock(BasicBlock* block) continue; } - JITDUMP("Marking fields of %s V%02u as needing read-back in entry BB " FMT_BB "\n", - dsc->lvIsParam ? "parameter" : "OSR-local", agg->LclNum, block->bbNum); + JITDUMP("Processing fields of %s V%02u in entry BB " FMT_BB "\n", dsc->lvIsParam ? "parameter" : "OSR-local", + agg->LclNum, block->bbNum); for (size_t i = 0; i < agg->Replacements.size(); i++) { Replacement& rep = agg->Replacements[i]; ClearNeedsWriteBack(rep); - if (m_liveness->IsReplacementLiveIn(block, agg->LclNum, (unsigned)i)) + if (!m_liveness->IsReplacementLiveIn(block, agg->LclNum, (unsigned)i)) + { + JITDUMP(" V%02u (%s) ignored because it is not live-in to entry BB\n", rep.LclNum, rep.Description); + continue; + } + + if (!dsc->lvIsParam || + !Promotion::MapsToParameterRegister(m_compiler, agg->LclNum, rep.Offset, rep.AccessType)) { SetNeedsReadBack(rep); - JITDUMP(" V%02u (%s) marked\n", rep.LclNum, rep.Description); + JITDUMP(" V%02u (%s) marked as needing read back\n", rep.LclNum, rep.Description); + continue; + } + + // Insert read backs of parameters mapping to registers eagerly to + // set the backend up for recognizing these as register accesses. + GenTree* readBack = Promotion::CreateReadBack(m_compiler, agg->LclNum, rep); + Statement* stmt = m_compiler->fgNewStmtFromTree(readBack); + JITDUMP(" V%02u (%s) is read back eagerly because it is a register parameter\n", rep.LclNum, + rep.Description); + DISPSTMT(stmt); + if (lastInsertedStmt == nullptr) + { + m_compiler->fgInsertStmtAtBeg(block, stmt); } else { - JITDUMP(" V%02u (%s) not marked (not live-in to entry BB)\n", rep.LclNum, rep.Description); + m_compiler->fgInsertStmtAfter(block, lastInsertedStmt, stmt); } + lastInsertedStmt = stmt; } } + + // Skip all the eager read-backs if any were inserted. + return lastInsertedStmt == nullptr ? block->firstStmt() : lastInsertedStmt->GetNextStmt(); } //------------------------------------------------------------------------ @@ -2201,9 +2228,151 @@ GenTree** ReplaceVisitor::InsertMidTreeReadBacks(GenTree** use) return use; } +//------------------------------------------------------------------------ +// ReplaceStructLocal: +// Try to replace a promoted struct local with uses of its fields. +// +// Parameters: +// user - The user +// value - The struct local +// +// Returns: +// True if the local was replaced and no more work needs to be done; false if +// the use will need to be handled via write-backs. +// +// Remarks: +// Usually this amounts to replacing the struct local by a FIELD_LIST with +// the promoted fields, but merged returns require more complicated handling. +// +bool ReplaceVisitor::ReplaceStructLocal(GenTree* user, GenTreeLclVarCommon* value) +{ + if (user->IsCall()) + { + return ReplaceCallArgWithFieldList(user->AsCall(), value); + } + else + { + assert(user->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)); + return ReplaceReturnedStructLocal(user->AsOp(), value); + } +} + +//------------------------------------------------------------------------ +// ReplaceReturnedStructLocal: +// Try to replace a returned promoted struct local. +// +// Parameters: +// ret - The return node +// value - The struct local +// +// Returns: +// True if the local was used and no more work needs to be done; false if the +// use will need to be handled via write-backs. +// +// Remarks: +// The backend supports arbitrary FIELD_LIST for returns, i.e. there is no +// requirement that the fields map cleanly to registers. However, morph does +// not support introducing a returned FIELD_LIST in cases where returns are +// being merged. Due to that, and for CQ, we instead decompose a store to the +// return local for that case. +// +bool ReplaceVisitor::ReplaceReturnedStructLocal(GenTreeOp* ret, GenTreeLclVarCommon* value) +{ + if (m_compiler->genReturnLocal != BAD_VAR_NUM) + { + JITDUMP("Replacing merged return by store to merged return local\n"); + // If we have merged returns then replace with a store to the return + // local, and switch out the GT_RETURN to return that local. + GenTree* sideEffects = nullptr; + m_compiler->gtExtractSideEffList(ret, &sideEffects, GTF_SIDE_EFFECT, true); + m_currentStmt->SetRootNode(sideEffects == nullptr ? m_compiler->gtNewNothingNode() : sideEffects); + DISPSTMT(m_currentStmt); + m_madeChanges = true; + + GenTree* store = m_compiler->gtNewStoreLclVarNode(m_compiler->genReturnLocal, value); + Statement* storeStmt = m_compiler->fgNewStmtFromTree(store); + m_compiler->fgInsertStmtAfter(m_currentBlock, m_currentStmt, storeStmt); + DISPSTMT(storeStmt); + + ret->SetReturnValue(m_compiler->gtNewLclVarNode(m_compiler->genReturnLocal)); + Statement* retStmt = m_compiler->fgNewStmtFromTree(ret); + m_compiler->fgInsertStmtAfter(m_currentBlock, storeStmt, retStmt); + DISPSTMT(retStmt); + + return true; + } + + AggregateInfo* agg = m_aggregates.Lookup(value->GetLclNum()); + ClassLayout* layout = value->GetLayout(m_compiler); + assert(layout != nullptr); + + unsigned startOffset = value->GetLclOffs(); + unsigned returnValueSize = layout->GetSize(); + if (agg->Unpromoted.Intersects(SegmentList::Segment(startOffset, startOffset + returnValueSize))) + { + // TODO-CQ: We could handle cases where the intersected remainder is simple + return false; + } + + auto checkPartialOverlap = [=](Replacement& rep) { + bool contained = + (rep.Offset >= startOffset) && (rep.Offset + genTypeSize(rep.AccessType) <= startOffset + returnValueSize); + + if (contained) + { + // Keep visiting overlapping replacements + return true; + } + + // Partial overlap, abort the visit and give up + return false; + }; + + if (!VisitOverlappingReplacements(value->GetLclNum(), startOffset, returnValueSize, checkPartialOverlap)) + { + return false; + } + + StructDeaths deaths = m_liveness->GetDeathsForStructLocal(value); + GenTreeFieldList* fieldList = m_compiler->gtNewFieldList(); + + auto addField = [=](Replacement& rep) { + GenTree* fieldValue; + if (!rep.NeedsReadBack) + { + fieldValue = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + + assert(deaths.IsReplacementDying(static_cast(&rep - agg->Replacements.data()))); + fieldValue->gtFlags |= GTF_VAR_DEATH; + CheckForwardSubForLastUse(rep.LclNum); + } + else + { + // Replacement local is not up to date. + fieldValue = m_compiler->gtNewLclFldNode(value->GetLclNum(), rep.AccessType, rep.Offset); + + if (!m_compiler->lvaGetDesc(value->GetLclNum())->lvDoNotEnregister) + { + m_compiler->lvaSetVarDoNotEnregister(value->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + } + + fieldList->AddField(m_compiler, fieldValue, rep.Offset - startOffset, rep.AccessType); + + return true; + }; + + VisitOverlappingReplacements(value->GetLclNum(), startOffset, returnValueSize, addField); + + ret->SetReturnValue(fieldList); + + m_madeChanges = true; + return true; +} + //------------------------------------------------------------------------ // ReplaceCallArgWithFieldList: -// Handle a call that may pass a struct local with replacements as the +// Handle a call that may pass a struct local with replacements as the // retbuf. // // Parameters: @@ -2228,8 +2397,8 @@ bool ReplaceVisitor::ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVa ClassLayout* layout = argNode->GetLayout(m_compiler); assert(layout != nullptr); StructDeaths deaths = m_liveness->GetDeathsForStructLocal(argNode); - GenTreeFieldList* fieldList = new (m_compiler, GT_FIELD_LIST) GenTreeFieldList; - for (const ABIPassingSegment& seg : callArg->NewAbiInfo.Segments()) + GenTreeFieldList* fieldList = m_compiler->gtNewFieldList(); + for (const ABIPassingSegment& seg : callArg->AbiInfo.Segments()) { Replacement* rep = nullptr; if (agg->OverlappingReplacements(argNode->GetLclOffs() + seg.Offset, seg.Size, &rep, nullptr) && @@ -2306,9 +2475,9 @@ bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* GenTreeLclVarCommon* lcl) { // We should have computed ABI information during the costing phase. - assert(call->gtArgs.IsNewAbiInformationDetermined()); + assert(call->gtArgs.IsAbiInformationDetermined()); - if (callArg->NewAbiInfo.HasAnyStackSegment() || callArg->AbiInfo.PassedByRef) + if (callArg->AbiInfo.HasAnyStackSegment() || callArg->AbiInfo.IsPassedByReference()) { return false; } @@ -2317,7 +2486,7 @@ bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* assert(agg != nullptr); bool anyReplacements = false; - for (const ABIPassingSegment& seg : callArg->NewAbiInfo.Segments()) + for (const ABIPassingSegment& seg : callArg->AbiInfo.Segments()) { assert(seg.IsPassedInRegister()); @@ -2348,12 +2517,6 @@ bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* return false; } - // Finally, the backend requires the register types to match. - if (!varTypeUsesSameRegType(rep.AccessType, seg.GetRegisterType())) - { - return false; - } - return true; }; @@ -2533,7 +2696,7 @@ void ReplaceVisitor::ReplaceLocal(GenTree** use, GenTree* user) assert(effectiveUser->OperIs(GT_CALL, GT_RETURN, GT_SWIFT_ERROR_RET)); - if (!effectiveUser->IsCall() || !ReplaceCallArgWithFieldList(effectiveUser->AsCall(), lcl)) + if (!ReplaceStructLocal(effectiveUser, lcl)) { unsigned size = lcl->GetLayout(m_compiler)->GetSize(); WriteBackBeforeUse(use, lclNum, lcl->GetLclOffs(), size); @@ -2847,13 +3010,13 @@ PhaseStatus Promotion::Run() ReplaceVisitor replacer(this, aggregates, &liveness); for (BasicBlock* bb : m_compiler->Blocks()) { - replacer.StartBlock(bb); + Statement* firstStmt = replacer.StartBlock(bb); JITDUMP("\nReplacing in "); DBEXEC(m_compiler->verbose, bb->dspBlockHeader(m_compiler)); JITDUMP("\n"); - for (Statement* stmt : bb->Statements()) + for (Statement* stmt : StatementList(firstStmt)) { replacer.StartStatement(stmt); @@ -2930,7 +3093,7 @@ bool Promotion::HaveCandidateLocals() // bool Promotion::IsCandidateForPhysicalPromotion(LclVarDsc* dsc) { - return (dsc->TypeGet() == TYP_STRUCT) && !dsc->lvPromoted && !dsc->IsAddressExposed(); + return dsc->TypeIs(TYP_STRUCT) && !dsc->lvPromoted && !dsc->IsAddressExposed(); } //------------------------------------------------------------------------ @@ -2959,6 +3122,47 @@ GenTree* Promotion::EffectiveUser(Compiler::GenTreeStack& ancestors) return nullptr; } +//------------------------------------------------------------------------ +// MapsToParameterRegister: +// Check if a specific access in the specified parameter local is +// expected to map to a register. +// +// Parameters: +// comp - Compiler instance +// lclNum - Local being accessed into +// offset - Offset being accessed at +// accessType - Type of access +// +// Returns: +// True if the access can be efficiently done via a parameter register. +// +bool Promotion::MapsToParameterRegister(Compiler* comp, unsigned lclNum, unsigned offset, var_types accessType) +{ + assert(lclNum < comp->info.compArgsCount); + + if (comp->opts.IsOSR()) + { + return false; + } + + const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum); + if (abiInfo.IsPassedByReference() || abiInfo.HasAnyStackSegment()) + { + return false; + } + + for (const ABIPassingSegment& seg : abiInfo.Segments()) + { + if ((offset == seg.Offset) && (genTypeSize(accessType) == seg.Size) && + (varTypeUsesIntReg(accessType) == genIsValidIntReg(seg.GetRegister()))) + { + return true; + } + } + + return false; +} + // Promotion::ExplicitlyZeroInitReplacementLocals: // Insert IR to zero out replacement locals if necessary. // diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index b71f50bfc107..aad98bb3c1f2 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -154,7 +154,7 @@ class Promotion static bool IsCandidateForPhysicalPromotion(LclVarDsc* dsc); static GenTree* EffectiveUser(Compiler::GenTreeStack& ancestors); - + static bool MapsToParameterRegister(Compiler* comp, unsigned lclNum, unsigned offs, var_types accessType); public: explicit Promotion(Compiler* compiler) : m_compiler(compiler) @@ -274,9 +274,9 @@ class ReplaceVisitor : public GenTreeVisitor return m_mayHaveForwardSub; } - void StartBlock(BasicBlock* block); - void EndBlock(); - void StartStatement(Statement* stmt); + Statement* StartBlock(BasicBlock* block); + void EndBlock(); + void StartStatement(Statement* stmt); fgWalkResult PostOrderVisit(GenTree** use, GenTree* user); @@ -294,6 +294,8 @@ class ReplaceVisitor : public GenTreeVisitor void InsertPreStatementWriteBacks(); GenTree** InsertMidTreeReadBacks(GenTree** use); + bool ReplaceStructLocal(GenTree* user, GenTreeLclVarCommon* value); + bool ReplaceReturnedStructLocal(GenTreeOp* ret, GenTreeLclVarCommon* value); bool ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVarCommon* callArg); bool CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* call, CallArg* callArg, GenTreeLclVarCommon* lcl); void ReadBackAfterCall(GenTreeCall* call, GenTree* user); diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index e4db1ff6fa07..7f2dbf0257fa 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -1,6 +1,33 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// +// This file provides the machinery to decompose stores and initializations +// involving physically promoted structs into stores/initialization involving +// individual fields. +// +// Key components include: +// +// 1. DecompositionStatementList +// - Collects statement trees during decomposition +// - Converts them to a single comma tree at the end +// +// 2. DecompositionPlan +// - Plans the decomposition of block operations +// - Manages mappings between source and destination replacements +// - Supports both copies between structs and initializations +// - Creates specialized access plans for remainders (unpromoted parts) +// +// 3. Field-by-field copying and initialization +// - Determines optimal order and strategy for field operations +// - Handles cases where replacements partially overlap +// - Optimizes GC pointer handling to minimize write barriers +// - Special cases primitive fields when possible +// +// This works in coordination with the ReplaceVisitor from promotion.cpp to +// transform IR after physical promotion decisions have been made. +// + #include "jitpch.h" #include "promotion.h" #include "jitstd/algorithm.h" @@ -588,7 +615,17 @@ class DecompositionPlan numAddrUses++; } - if (numAddrUses > 1) + if (numAddrUses == 0) + { + GenTree* sideEffects = nullptr; + m_compiler->gtExtractSideEffList(addr, &sideEffects); + + if (sideEffects != nullptr) + { + statements->AddStatement(sideEffects); + } + } + else if (numAddrUses > 1) { m_compiler->gtPeelOffsets(&addr, &addrBaseOffs, &addrBaseOffsFldSeq); @@ -1206,7 +1243,7 @@ class DecompositionPlan // offset - [out] The sum of offset peeled such that ADD(addr, offset) is equivalent to the original addr. // fldSeq - [out, optional] The combined field sequence for all the peeled offsets. // -void Compiler::gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq) +void Compiler::gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq) const { assert((*addr)->TypeIs(TYP_I_IMPL, TYP_BYREF, TYP_REF)); *offset = 0; diff --git a/src/coreclr/jit/promotionliveness.cpp b/src/coreclr/jit/promotionliveness.cpp index 93ac0c1eaf6e..e137caab8275 100644 --- a/src/coreclr/jit/promotionliveness.cpp +++ b/src/coreclr/jit/promotionliveness.cpp @@ -4,6 +4,42 @@ #include "jitpch.h" #include "promotion.h" +// +// This file implements a specialized liveness analysis for physically promoted struct fields +// and remainders. Unlike standard JIT liveness analysis, it focuses on accurately tracking +// which fields are live at specific program points to optimize physically promoted struct operations. +// +// Key characteristics: +// +// 1. Separate Bit Vectors: +// - Maintains its own liveness bit vectors separate from the main compiler's bbLiveIn/bbLiveOut +// - Uses "dense" indices: bit vectors only contain entries for the remainder and replacement +// fields of physically promoted structs (allocating 1 + num_fields indices per local) +// - Does not update BasicBlock::bbLiveIn or other standard liveness storage, as this would +// require allocating regular tracked indices (lvVarIndex) for all new fields +// +// 2. Liveness Representation: +// - Writes liveness into IR using normal GTF_VAR_DEATH flags +// - Important: After liveness is computed but before replacement phase completes, +// GTF_VAR_DEATH semantics temporarily differ from the rest of the JIT +// (e.g., "LCL_FLD int V16 [+8] (last use)" indicates that specific field is dying, +// not the whole variable) +// - For struct uses that can indicate deaths of multiple fields or remainder parts, +// maintains side information accessed via GetDeathsForStructLocal() +// +// 3. Analysis Process: +// - Single-pass dataflow computation (no DCE iterations, unlike other liveness passes) +// - Handles QMark nodes specially for conditional execution +// - Accounts for implicit exception flow +// - Distinguishes between full definitions and partial definitions +// +// The liveness information is critical for: +// - Avoiding creation of dead stores (especially to remainders, which the SSA liveness +// pass handles very conservatively as partial definitions) +// - Marking replacement fields with proper liveness flags for subsequent compiler phases +// - Optimizing read-back operations by determining when they're unnecessary +// + struct BasicBlockLiveness { // Variables used before a full definition. @@ -22,54 +58,6 @@ struct BasicBlockLiveness // Run: // Compute liveness information pertaining the promoted structs. // -// Remarks: -// For each promoted aggregate we compute the liveness for its remainder and -// all of its fields. Unlike regular liveness we currently do not do any DCE -// here and so only do the dataflow computation once. -// -// The liveness information is written into the IR using the normal -// GTF_VAR_DEATH flag. Note that the semantics of GTF_VAR_DEATH differs from -// the rest of the JIT for a short while between the liveness is computed and -// the replacement phase has run: in particular, after this liveness pass you -// may see a node like: -// -// LCL_FLD int V16 tmp9 [+8] (last use) -// -// that indicates that this particular field (or the remainder if it wasn't -// promoted) is dying, not that V16 itself is dying. After replacement has -// run the semantics align with the rest of the JIT: in the promoted case V16 -// [+8] would be replaced by its promoted field local, and in the remainder -// case all non-remainder uses of V16 would also be. -// -// There is one catch which is struct uses of the local. These can indicate -// deaths of multiple fields and also the remainder, so this information is -// stored on the side. PromotionLiveness::GetDeathsForStructLocal is used to -// query this information. -// -// The liveness information is used by decomposition to avoid creating dead -// stores, and also to mark the replacement field uses/defs with proper -// up-to-date liveness information to be used by future phases (forward sub -// and morph, as of writing this). It is also used to avoid creating -// unnecessary read-backs; this is mostly just a TP optimization as future -// liveness passes would be expected to DCE these anyway. -// -// Avoiding the creation of dead stores to the remainder is especially -// important as these otherwise would often end up looking like partial -// definitions, and the other liveness passes handle partial definitions very -// conservatively and are not able to DCE them. -// -// Unlike the other liveness passes we keep the per-block liveness -// information on the side and we do not update BasicBlock::bbLiveIn et al. -// This relies on downstream phases not requiring/wanting to use per-basic -// block live-in/live-out/var-use/var-def sets. To be able to update these we -// would need to give the new locals "regular" tracked indices (i.e. allocate -// a lvVarIndex). -// -// The indices allocated and used internally within the liveness computation -// are "dense" in the sense that the bit vectors only have indices for -// remainders and the replacement fields introduced by this pass. In other -// words, we allocate 1 + num_fields indices for each promoted struct local). -// void PromotionLiveness::Run() { m_structLclToTrackedIndex = new (m_compiler, CMK_Promotion) unsigned[m_compiler->lvaCount]{}; diff --git a/src/coreclr/jit/rangecheck.cpp b/src/coreclr/jit/rangecheck.cpp index 6d022778e198..938d3b0f0c30 100644 --- a/src/coreclr/jit/rangecheck.cpp +++ b/src/coreclr/jit/rangecheck.cpp @@ -14,11 +14,30 @@ // PhaseStatus Compiler::rangeCheckPhase() { - RangeCheck rc(this); - const bool madeChanges = rc.OptimizeRangeChecks(); + if (!doesMethodHaveBoundsChecks() || (fgSsaPassesCompleted == 0)) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + const bool madeChanges = GetRangeCheck()->OptimizeRangeChecks(); return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } +//------------------------------------------------------------------------ +// GetRangeCheck: get the RangeCheck instance +// +// Returns: +// The range check object +// +RangeCheck* Compiler::GetRangeCheck() +{ + if (optRangeCheck == nullptr) + { + optRangeCheck = new (this, CMK_Generic) RangeCheck(this); + } + return optRangeCheck; +} + // Max stack depth (path length) in walking the UD chain. static const int MAX_SEARCH_DEPTH = 100; @@ -27,13 +46,10 @@ static const int MAX_VISIT_BUDGET = 8192; // RangeCheck constructor. RangeCheck::RangeCheck(Compiler* pCompiler) - : m_pOverflowMap(nullptr) + : m_preferredBound(ValueNumStore::NoVN) + , m_pOverflowMap(nullptr) , m_pRangeMap(nullptr) , m_pSearchPath(nullptr) -#ifdef DEBUG - , m_fMappedDefs(false) - , m_pDefTable(nullptr) -#endif , m_pCompiler(pCompiler) , m_alloc(pCompiler->getAllocator(CMK_RangeCheck)) , m_nVisitBudget(MAX_VISIT_BUDGET) @@ -56,6 +72,14 @@ RangeCheck::RangeMap* RangeCheck::GetRangeMap() return m_pRangeMap; } +void RangeCheck::ClearRangeMap() +{ + if (m_pRangeMap != nullptr) + { + m_pRangeMap->RemoveAll(); + } +} + // Get the overflow map in which computed overflows are cached. RangeCheck::OverflowMap* RangeCheck::GetOverflowMap() { @@ -66,6 +90,31 @@ RangeCheck::OverflowMap* RangeCheck::GetOverflowMap() return m_pOverflowMap; } +void RangeCheck::ClearOverflowMap() +{ + if (m_pOverflowMap != nullptr) + { + m_pOverflowMap->RemoveAll(); + } +} + +RangeCheck::SearchPath* RangeCheck::GetSearchPath() +{ + if (m_pSearchPath == nullptr) + { + m_pSearchPath = new (m_alloc) SearchPath(m_alloc); + } + return m_pSearchPath; +} + +void RangeCheck::ClearSearchPath() +{ + if (m_pSearchPath != nullptr) + { + m_pSearchPath->RemoveAll(); + } +} + // Get the length of the array vn, if it is new. int RangeCheck::GetArrLength(ValueNum vn) { @@ -218,7 +267,7 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree* GenTree* comma = treeParent->OperIs(GT_COMMA) ? treeParent : nullptr; GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); - m_pCurBndsChk = bndsChk; + m_preferredBound = m_pCompiler->vnStore->VNConservativeNormalValue(bndsChk->GetArrayLength()->gtVNPair); GenTree* treeIndex = bndsChk->GetIndex(); // Take care of constant index first, like a[2], for example. @@ -246,7 +295,7 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree* { JITDUMP("Looking for array size assertions for: " FMT_VN "\n", arrLenVn); Range arrLength = Range(Limit(Limit::keDependent)); - MergeEdgeAssertions(arrLenVn, block->bbAssertionIn, &arrLength); + MergeEdgeAssertions(m_pCompiler, arrLenVn, arrLenVn, block->bbAssertionIn, &arrLength); if (arrLength.lLimit.IsConstant()) { arrSize = arrLength.lLimit.GetConstant(); @@ -275,10 +324,6 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree* } } - GetRangeMap()->RemoveAll(); - GetOverflowMap()->RemoveAll(); - m_pSearchPath = new (m_alloc) SearchPath(m_alloc); - // Special case: arr[arr.Length - CNS] if we know that arr.Length >= CNS // We assume that SUB(x, CNS) is canonized into ADD(x, -CNS) VNFuncApp funcApp; @@ -306,7 +351,7 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree* // bool result = (arr.Length == 0) || (arr[arr.Length - 1] == 0); // // here for the array access we know that arr.Length >= 1 - Range arrLenRange = GetRange(block, bndsChk->GetArrayLength(), false DEBUGARG(0)); + Range arrLenRange = GetRangeWorker(block, bndsChk->GetArrayLength(), false DEBUGARG(0)); if (arrLenRange.LowerLimit().IsConstant()) { // Lower known limit of ArrLen: @@ -342,25 +387,24 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree* } // Get the range for this index. - Range range = GetRange(block, treeIndex, false DEBUGARG(0)); + Range range = Range(Limit(Limit::keUndef)); + if (!TryGetRange(block, treeIndex, &range)) + { + JITDUMP("Failed to get range\n"); + return; + } // If upper or lower limit is found to be unknown (top), or it was found to // be unknown because of over budget or a deep search, then return early. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { - // Note: If we had stack depth too deep in the GetRange call, we'd be + // Note: If we had stack depth too deep in the GetRangeWorker call, we'd be // too deep even in the DoesOverflow call. So return early. return; } - if (DoesOverflow(block, treeIndex, range)) - { - JITDUMP("Method determined to overflow.\n"); - return; - } - JITDUMP("Range value %s\n", range.ToString(m_pCompiler)); - m_pSearchPath->RemoveAll(); + ClearSearchPath(); Widen(block, treeIndex, &range); // If upper or lower limit is unknown, then return. @@ -399,8 +443,8 @@ void RangeCheck::Widen(BasicBlock* block, GenTree* tree, Range* pRange) if (increasing) { JITDUMP("[%06d] is monotonically increasing.\n", Compiler::dspTreeID(tree)); - GetRangeMap()->RemoveAll(); - *pRange = GetRange(block, tree, true DEBUGARG(0)); + ClearRangeMap(); + *pRange = GetRangeWorker(block, tree, true DEBUGARG(0)); } } } @@ -416,12 +460,12 @@ bool RangeCheck::IsBinOpMonotonicallyIncreasing(GenTreeOp* binop) Compiler::dspTreeID(op2)); // Check if we have a var + const or var * const. - if (binop->OperIs(GT_ADD, GT_MUL) && op2->OperGet() == GT_LCL_VAR) + if (binop->OperIs(GT_ADD, GT_MUL) && op2->OperIs(GT_LCL_VAR)) { std::swap(op1, op2); } - if (op1->OperGet() != GT_LCL_VAR) + if (!op1->OperIs(GT_LCL_VAR)) { JITDUMP("Not monotonically increasing because op1 is not lclVar.\n"); return false; @@ -454,7 +498,7 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTree* expr, bool rejectNegativeCon JITDUMP("[RangeCheck::IsMonotonicallyIncreasing] [%06d]\n", Compiler::dspTreeID(expr)); // Add hashtable entry for expr. - bool alreadyPresent = m_pSearchPath->Set(expr, nullptr, SearchPath::Overwrite); + bool alreadyPresent = GetSearchPath()->Set(expr, nullptr, SearchPath::Overwrite); if (alreadyPresent) { return true; @@ -462,11 +506,11 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTree* expr, bool rejectNegativeCon // Remove hashtable entry for expr when we exit the present scope. auto code = [this, expr] { - m_pSearchPath->Remove(expr); + GetSearchPath()->Remove(expr); }; jitstd::utility::scoped_code finally(code); - if (m_pSearchPath->GetCount() > MAX_SEARCH_DEPTH) + if (GetSearchPath()->GetCount() > MAX_SEARCH_DEPTH) { return false; } @@ -496,12 +540,12 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTree* expr, bool rejectNegativeCon { return IsBinOpMonotonicallyIncreasing(expr->AsOp()); } - else if (expr->OperGet() == GT_PHI) + else if (expr->OperIs(GT_PHI)) { for (GenTreePhi::Use& use : expr->AsPhi()->Uses()) { // If the arg is already in the path, skip. - if (m_pSearchPath->Lookup(use.GetNode())) + if (GetSearchPath()->Lookup(use.GetNode())) { continue; } @@ -513,7 +557,7 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTree* expr, bool rejectNegativeCon } return true; } - else if (expr->OperGet() == GT_COMMA) + else if (expr->OperIs(GT_COMMA)) { return IsMonotonicallyIncreasing(expr->gtEffectiveVal(), rejectNegativeConst); } @@ -555,69 +599,9 @@ LclSsaVarDsc* RangeCheck::GetSsaDefStore(GenTreeLclVarCommon* lclUse) return nullptr; } -#ifdef DEBUG - Location* loc = GetDef(lclUse); - assert(loc != nullptr); - assert(loc->tree == defStore); - assert(loc->block == ssaDef->GetBlock()); -#endif - return ssaDef; } -#ifdef DEBUG -UINT64 RangeCheck::HashCode(unsigned lclNum, unsigned ssaNum) -{ - assert(ssaNum != SsaConfig::RESERVED_SSA_NUM); - return UINT64(lclNum) << 32 | ssaNum; -} - -// Get the def location of a given variable. -RangeCheck::Location* RangeCheck::GetDef(unsigned lclNum, unsigned ssaNum) -{ - Location* loc = nullptr; - if (ssaNum == SsaConfig::RESERVED_SSA_NUM) - { - return nullptr; - } - if (!m_fMappedDefs) - { - MapMethodDefs(); - } - // No defs. - if (m_pDefTable == nullptr) - { - return nullptr; - } - m_pDefTable->Lookup(HashCode(lclNum, ssaNum), &loc); - return loc; -} - -RangeCheck::Location* RangeCheck::GetDef(GenTreeLclVarCommon* lcl) -{ - return GetDef(lcl->GetLclNum(), lcl->GetSsaNum()); -} - -// Add the def location to the hash table. -void RangeCheck::SetDef(UINT64 hash, Location* loc) -{ - if (m_pDefTable == nullptr) - { - m_pDefTable = new (m_alloc) VarToLocMap(m_alloc); - } -#ifdef DEBUG - Location* loc2; - if (m_pDefTable->Lookup(hash, &loc2)) - { - JITDUMP("Already have " FMT_BB ", " FMT_STMT ", [%06d] for hash => %0I64X", loc2->block->bbNum, - loc2->stmt->GetID(), Compiler::dspTreeID(loc2->tree), hash); - assert(false); - } -#endif - m_pDefTable->Set(hash, loc); -} -#endif - //------------------------------------------------------------------------ // MergeEdgeAssertions: Merge assertions on the edge flowing into the block about a variable // @@ -635,20 +619,48 @@ void RangeCheck::MergeEdgeAssertions(GenTreeLclVarCommon* lcl, ASSERT_VALARG_TP LclSsaVarDsc* ssaData = m_pCompiler->lvaGetDesc(lcl)->GetPerSsaData(lcl->GetSsaNum()); ValueNum normalLclVN = m_pCompiler->vnStore->VNConservativeNormalValue(ssaData->m_vnPair); - MergeEdgeAssertions(normalLclVN, assertions, pRange); + MergeEdgeAssertions(m_pCompiler, normalLclVN, m_preferredBound, assertions, pRange); +} + +//------------------------------------------------------------------------ +// TryGetRangeFromAssertions: Cheaper version of TryGetRange that is based purely on assertions +// and does not require a full range analysis based on SSA. +// +// Arguments: +// comp - the compiler instance +// num - the value number to analyze range for +// assertions - the assertions to use +// pRange - the range to tighten with assertions +// +// Return Value: +// True if the range was successfully computed +// +bool RangeCheck::TryGetRangeFromAssertions(Compiler* comp, ValueNum num, ASSERT_VALARG_TP assertions, Range* pRange) +{ + MergeEdgeAssertions(comp, num, ValueNumStore::NoVN, assertions, pRange, false); + return !pRange->LowerLimit().IsUnknown() || !pRange->UpperLimit().IsUnknown(); } //------------------------------------------------------------------------ // MergeEdgeAssertions: Merge assertions on the edge flowing into the block about a variable // // Arguments: -// normalLclVN - the value number to look for assertions for -// assertions - the assertions to use -// pRange - the range to tighten with assertions +// comp - the compiler instance +// normalLclVN - the value number to look for assertions for +// preferredBoundVN - when this VN is set, it will be given preference over constant limits +// assertions - the assertions to use +// pRange - the range to tighten with assertions +// canUseCheckedBounds - true if we can use checked bounds assertions (cache) // -void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP assertions, Range* pRange) +void RangeCheck::MergeEdgeAssertions(Compiler* comp, + ValueNum normalLclVN, + ValueNum preferredBoundVN, + ASSERT_VALARG_TP assertions, + Range* pRange, + bool canUseCheckedBounds) { - if (BitVecOps::IsEmpty(m_pCompiler->apTraits, assertions)) + Range assertedRange = Range(Limit(Limit::keUnknown)); + if (BitVecOps::IsEmpty(comp->apTraits, assertions)) { return; } @@ -658,14 +670,14 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse return; } - // Walk through the "assertions" to check if the apply. - BitVecOps::Iter iter(m_pCompiler->apTraits, assertions); + // Walk through the "assertions" to check if they apply. + BitVecOps::Iter iter(comp->apTraits, assertions); unsigned index = 0; while (iter.NextElem(&index)) { AssertionIndex assertionIndex = GetAssertionIndex(index); - Compiler::AssertionDsc* curAssertion = m_pCompiler->optGetAssertion(assertionIndex); + Compiler::AssertionDsc* curAssertion = comp->optGetAssertion(assertionIndex); Limit limit(Limit::keUndef); genTreeOps cmpOper = GT_NONE; @@ -673,12 +685,12 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse bool isUnsigned = false; // Current assertion is of the form (i < len - cns) != 0 - if (curAssertion->IsCheckedBoundArithBound()) + if (canUseCheckedBounds && curAssertion->IsCheckedBoundArithBound()) { ValueNumStore::CompareCheckedBoundArithInfo info; // Get i, len, cns and < as "info." - m_pCompiler->vnStore->GetCompareCheckedBoundArithInfo(curAssertion->op1.vn, &info); + comp->vnStore->GetCompareCheckedBoundArithInfo(curAssertion->op1.vn, &info); // If we don't have the same variable we are comparing against, bail. if (normalLclVN != info.cmpOp) @@ -692,22 +704,22 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse } // If the operand that operates on the bound is not constant, then done. - if (!m_pCompiler->vnStore->IsVNInt32Constant(info.arrOp)) + if (!comp->vnStore->IsVNInt32Constant(info.arrOp)) { continue; } - int cons = m_pCompiler->vnStore->ConstantValue(info.arrOp); + int cons = comp->vnStore->ConstantValue(info.arrOp); limit = Limit(Limit::keBinOpArray, info.vnBound, info.arrOper == GT_SUB ? -cons : cons); cmpOper = (genTreeOps)info.cmpOper; } // Current assertion is of the form (i < len) != 0 - else if (curAssertion->IsCheckedBoundBound()) + else if (canUseCheckedBounds && curAssertion->IsCheckedBoundBound()) { ValueNumStore::CompareCheckedBoundArithInfo info; // Get the info as "i", "<" and "len" - m_pCompiler->vnStore->GetCompareCheckedBound(curAssertion->op1.vn, &info); + comp->vnStore->GetCompareCheckedBound(curAssertion->op1.vn, &info); // If we don't have the same variable we are comparing against, bail. if (normalLclVN == info.cmpOp) @@ -731,7 +743,7 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse ValueNumStore::ConstantBoundInfo info; // Get the info as "i", "<" and "100" - m_pCompiler->vnStore->GetConstantBoundInfo(curAssertion->op1.vn, &info); + comp->vnStore->GetConstantBoundInfo(curAssertion->op1.vn, &info); // If we don't have the same variable we are comparing against, bail. if (normalLclVN != info.cmpOpVN) @@ -751,10 +763,17 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse continue; } - int cnstLimit = m_pCompiler->vnStore->CoercedConstantValue(curAssertion->op2.vn); + // Ignore GC values/NULL caught by IsConstantInt32Assertion assertion (may happen on 32bit) + if (varTypeIsGC(comp->vnStore->TypeOfVN(curAssertion->op2.vn))) + { + continue; + } + + int cnstLimit = (int)curAssertion->op2.u1.iconVal; + assert(cnstLimit == comp->vnStore->CoercedConstantValue(curAssertion->op2.vn)); - if ((cnstLimit == 0) && (curAssertion->assertionKind == Compiler::OAK_NOT_EQUAL) && - m_pCompiler->vnStore->IsVNCheckedBound(curAssertion->op1.vn)) + if ((cnstLimit == 0) && (curAssertion->assertionKind == Compiler::OAK_NOT_EQUAL) && canUseCheckedBounds && + comp->vnStore->IsVNCheckedBound(curAssertion->op1.vn)) { // we have arr.Len != 0, so the length must be atleast one limit = Limit(Limit::keConstant, 1); @@ -784,6 +803,20 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse cmpOper = GT_LT; limit = Limit(Limit::keBinOpArray, lenVN, 0); } + else if ((normalLclVN == lenVN) && comp->vnStore->IsVNInt32Constant(indexVN)) + { + // We have "Const < arr.Length" assertion, it means that "arr.Length > Const" + int indexCns = comp->vnStore->GetConstantInt32(indexVN); + if (indexCns >= 0) + { + cmpOper = GT_GT; + limit = Limit(Limit::keConstant, indexCns); + } + else + { + continue; + } + } else { continue; @@ -799,31 +832,31 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse // Make sure the assertion is of the form != 0 or == 0 if it isn't a constant assertion. if (!isConstantAssertion && (curAssertion->assertionKind != Compiler::OAK_NO_THROW) && - (curAssertion->op2.vn != m_pCompiler->vnStore->VNZeroForType(TYP_INT))) + (curAssertion->op2.vn != comp->vnStore->VNZeroForType(TYP_INT))) { continue; } #ifdef DEBUG - if (m_pCompiler->verbose) + if (comp->verbose) { - m_pCompiler->optPrintAssertion(curAssertion, assertionIndex); + comp->optPrintAssertion(curAssertion, assertionIndex); } #endif // Limits are sometimes made with the form vn + constant, where vn is a known constant // see if we can simplify this to just a constant - if (limit.IsBinOpArray() && m_pCompiler->vnStore->IsVNInt32Constant(limit.vn)) + if (limit.IsBinOpArray() && comp->vnStore->IsVNInt32Constant(limit.vn)) { - Limit tempLimit = Limit(Limit::keConstant, m_pCompiler->vnStore->ConstantValue(limit.vn)); + Limit tempLimit = Limit(Limit::keConstant, comp->vnStore->ConstantValue(limit.vn)); if (tempLimit.AddConstant(limit.cns)) { limit = tempLimit; } } - ValueNum arrLenVN = m_pCompiler->vnStore->VNConservativeNormalValue(m_pCurBndsChk->GetArrayLength()->gtVNPair); + ValueNum arrLenVN = preferredBoundVN; - if (m_pCompiler->vnStore->IsVNConstant(arrLenVN)) + if (comp->vnStore->IsVNConstant(arrLenVN)) { // Set arrLenVN to NoVN; this will make it match the "vn" recorded on // constant limits (where we explicitly track the constant and don't @@ -866,105 +899,117 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse continue; } - // Skip if it doesn't tighten the current bound: - if (pRange->uLimit.IsConstant() && ((cmpOper == GT_LE) || (cmpOper == GT_LT))) - { - if (!limit.IsConstant() && (limit.vn != arrLenVN)) - { - // If our new limit is not constant and doesn't represent the array's length - bail out. - // NOTE: it's fine to replace the current constant limit with a non-constant arrLenVN. - continue; - } - if (limit.IsConstant() && (limit.cns > pRange->uLimit.cns)) - { - // The new constant limit doesn't tighten the current constant bound. - // E.g. current is "X < 10" and the new one is "X < 100" - continue; - } - } - // Same for the lower bound: - if (pRange->lLimit.IsConstant() && ((cmpOper == GT_GE) || (cmpOper == GT_GT))) - { - if (!limit.IsConstant() && (limit.vn != arrLenVN)) - { - // If our new limit is not constant and doesn't represent the array's length - bail out. - // NOTE: it's fine to replace the current constant limit with a non-constant arrLenVN. - continue; - } - if (limit.IsConstant() && (limit.cns < pRange->lLimit.cns)) - { - // The new constant limit doesn't tighten the current constant bound. - // E.g. current is "X > 10" and the new one is "X > 5" - continue; - } - } - - // Check if the incoming limit from assertions tightens the existing upper limit. - if (pRange->uLimit.IsBinOpArray() && (pRange->uLimit.vn == arrLenVN)) - { - // We have checked the current range's (pRange's) upper limit is either of the form: - // length + cns - // and length == the bndsChkCandidate's arrLen - // - // We want to check if the incoming limit tightens the bound, and for that - // we need to make sure that incoming limit is also on the same length (or - // length + cns) and not some other length. - - if (limit.vn != arrLenVN) - { - JITDUMP("Array length VN did not match arrLen=" FMT_VN ", limit=" FMT_VN "\n", arrLenVN, limit.vn); - continue; - } - - int curCns = pRange->uLimit.cns; - int limCns = limit.IsBinOpArray() ? limit.cns : 0; - - // Incoming limit doesn't tighten the existing upper limit. - if (limCns >= curCns) - { - JITDUMP("Bound limit %d doesn't tighten current bound %d\n", limCns, curCns); - continue; - } - } - else - { - // Current range's upper bound is not "length + cns" and the - // incoming limit is not on the same length as the bounds check candidate. - // So we could skip this assertion. But in cases, of Dependent or Unknown - // type of upper limit, the incoming assertion still tightens the upper - // bound to a saner value. So do not skip the assertion. - } - // cmpOp (loop index i) cmpOper len +/- cns switch (cmpOper) { case GT_LT: case GT_LE: - pRange->uLimit = limit; + assertedRange.uLimit = limit; if (isUnsigned) { - pRange->lLimit = Limit(Limit::keConstant, 0); + assertedRange.lLimit = Limit(Limit::keConstant, 0); } break; case GT_GT: case GT_GE: - pRange->lLimit = limit; - // it doesn't matter if it's isUnsigned or not here - it's not negative anyway. + // GT/GE being unsigned creates a non-contiguous range which we can't represent + // using single Range object. + if (!isUnsigned) + { + assertedRange.lLimit = limit; + } break; case GT_EQ: - pRange->uLimit = limit; - pRange->lLimit = limit; + assertedRange.uLimit = limit; + assertedRange.lLimit = limit; break; default: // All other 'cmpOper' kinds leave lLimit/uLimit unchanged break; } - JITDUMP("The range after edge merging:"); - JITDUMP(pRange->ToString(m_pCompiler)); - JITDUMP("\n"); + + // We have two ranges - we need to merge (tighten) them. + + auto tightenLimit = [](Limit l1, Limit l2, ValueNum preferredBound, bool isLower) -> Limit { + // 1) One of the limits is undef, unknown or dependent + if (l1.IsUndef() || l2.IsUndef()) + { + // Anything is better than undef. + return l1.IsUndef() ? l2 : l1; + } + if (l1.IsUnknown() || l2.IsUnknown()) + { + // Anything is better than unknown. + return l1.IsUnknown() ? l2 : l1; + } + if (l1.IsDependent() || l2.IsDependent()) + { + // Anything is better than dependent. + return l1.IsDependent() ? l2 : l1; + } + + // 2) Both limits are constants + if (l1.IsConstant() && l2.IsConstant()) + { + // isLower: whatever is higher is better. + // !isLower: whatever is lower is better. + return isLower ? (l1.cns > l2.cns ? l1 : l2) : (l1.cns < l2.cns ? l1 : l2); + } + + // 3) Both limits are BinOpArray (which is "arrLen + cns") + if (l1.IsBinOpArray() && l2.IsBinOpArray()) + { + // If one of them is preferredBound and the other is not, use the preferredBound. + if (preferredBound != ValueNumStore::NoVN) + { + if ((l1.vn == preferredBound) && (l2.vn != preferredBound)) + { + return l1; + } + if ((l2.vn == preferredBound) && (l1.vn != preferredBound)) + { + return l2; + } + } + + // Otherwise, just use the one with the higher/lower constant. + // even if they use different arrLen. + return isLower ? (l1.cns > l2.cns ? l1 : l2) : (l1.cns < l2.cns ? l1 : l2); + } + + // 4) One of the limits is a constant and the other is BinOpArray + if ((l1.IsConstant() && l2.IsBinOpArray()) || (l2.IsConstant() && l1.IsBinOpArray())) + { + // l1 - BinOpArray, l2 - constant + if (l1.IsConstant()) + { + std::swap(l1, l2); + } + + if (((preferredBound == ValueNumStore::NoVN) || (l1.vn != preferredBound))) + { + // if we don't have a preferred bound, + // or it doesn't match l1.vn, use the constant (l2). + return l2; + } + + // Otherwise, prefer the BinOpArray(preferredBound) over the constant for the upper bound + // and the constant for the lower bound. + return isLower ? l2 : l1; + } + unreached(); + }; + + JITDUMP("Tightening pRange: [%s] with assertedRange: [%s] into ", pRange->ToString(comp), + assertedRange.ToString(comp)); + + pRange->lLimit = tightenLimit(assertedRange.lLimit, pRange->lLimit, preferredBoundVN, true); + pRange->uLimit = tightenLimit(assertedRange.uLimit, pRange->uLimit, preferredBoundVN, false); + + JITDUMP("[%s]\n", pRange->ToString(comp)); } } @@ -1003,7 +1048,20 @@ void RangeCheck::MergeAssertion(BasicBlock* block, GenTree* op, Range* pRange DE // Compute the range for a binary operation. Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block, GenTreeOp* binop, bool monIncreasing DEBUGARG(int indent)) { - assert(binop->OperIs(GT_ADD, GT_AND, GT_RSH, GT_RSZ, GT_LSH, GT_UMOD, GT_MUL)); + assert(binop->OperIs(GT_ADD, GT_XOR, GT_AND, GT_RSH, GT_RSZ, GT_LSH, GT_UMOD, GT_MUL)); + + // For XOR we only care about Log2 pattern for now + if (binop->OperIs(GT_XOR)) + { + int upperBound; + if (m_pCompiler->vnStore->IsVNLog2(m_pCompiler->vnStore->VNConservativeNormalValue(binop->gtVNPair), + &upperBound)) + { + assert(upperBound > 0); + return Range(Limit(Limit::keConstant, 0), Limit(Limit::keConstant, upperBound)); + } + return Range(Limit(Limit::keUnknown)); + } GenTree* op1 = binop->gtGetOp1(); GenTree* op2 = binop->gtGetOp2(); @@ -1105,13 +1163,13 @@ Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block, GenTreeOp* binop, bool { // If we already have the op in the path, then, just rely on assertions, else // find the range. - if (m_pSearchPath->Lookup(op1)) + if (GetSearchPath()->Lookup(op1)) { op1Range = Range(Limit(Limit::keDependent)); } else { - op1Range = GetRange(block, op1, monIncreasing DEBUGARG(indent)); + op1Range = GetRangeWorker(block, op1, monIncreasing DEBUGARG(indent)); } MergeAssertion(block, op1, &op1Range DEBUGARG(indent + 1)); } @@ -1127,13 +1185,13 @@ Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block, GenTreeOp* binop, bool { // If we already have the op in the path, then, just rely on assertions, else // find the range. - if (m_pSearchPath->Lookup(op2)) + if (GetSearchPath()->Lookup(op2)) { op2Range = Range(Limit(Limit::keDependent)); } else { - op2Range = GetRange(block, op2, monIncreasing DEBUGARG(indent)); + op2Range = GetRangeWorker(block, op2, monIncreasing DEBUGARG(indent)); } MergeAssertion(block, op2, &op2Range DEBUGARG(indent + 1)); } @@ -1216,7 +1274,7 @@ Range RangeCheck::ComputeRangeForLocalDef(BasicBlock* block, JITDUMP("----------------------------------------------------\n"); } #endif - Range range = GetRange(ssaDef->GetBlock(), ssaDef->GetDefNode()->Data(), monIncreasing DEBUGARG(indent)); + Range range = GetRangeWorker(ssaDef->GetBlock(), ssaDef->GetDefNode()->Data(), monIncreasing DEBUGARG(indent)); if (!BitVecOps::MayBeUninit(block->bbAssertionIn) && (m_pCompiler->GetAssertionCount() > 0)) { JITDUMP("Merge assertions from " FMT_BB ": ", block->bbNum); @@ -1316,12 +1374,12 @@ bool RangeCheck::DoesBinOpOverflow(BasicBlock* block, GenTreeOp* binop, const Ra GenTree* op1 = binop->gtGetOp1(); GenTree* op2 = binop->gtGetOp2(); - if (!m_pSearchPath->Lookup(op1) && DoesOverflow(block, op1, range)) + if (!GetSearchPath()->Lookup(op1) && DoesOverflow(block, op1, range)) { return true; } - if (!m_pSearchPath->Lookup(op2) && DoesOverflow(block, op2, range)) + if (!GetSearchPath()->Lookup(op2) && DoesOverflow(block, op2, range)) { return true; } @@ -1390,7 +1448,7 @@ bool RangeCheck::DoesPhiOverflow(BasicBlock* block, GenTree* expr, const Range& for (GenTreePhi::Use& use : expr->AsPhi()->Uses()) { GenTree* arg = use.GetNode(); - if (m_pSearchPath->Lookup(arg)) + if (GetSearchPath()->Lookup(arg)) { continue; } @@ -1427,21 +1485,23 @@ bool RangeCheck::DoesOverflow(BasicBlock* block, GenTree* expr, const Range& ran bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTree* expr, const Range& range) { + ValueNumStore* vnStore = m_pCompiler->vnStore; + JITDUMP("Does overflow [%06d]?\n", Compiler::dspTreeID(expr)); - m_pSearchPath->Set(expr, block, SearchPath::Overwrite); + GetSearchPath()->Set(expr, block, SearchPath::Overwrite); bool overflows = true; - if (m_pSearchPath->GetCount() > MAX_SEARCH_DEPTH) + if (GetSearchPath()->GetCount() > MAX_SEARCH_DEPTH) { overflows = true; } // If the definition chain resolves to a constant, it doesn't overflow. - else if (m_pCompiler->vnStore->IsVNConstant(expr->gtVNPair.GetConservative())) + else if (vnStore->IsVNConstant(expr->gtVNPair.GetConservative())) { overflows = false; } - else if (expr->OperIs(GT_IND)) + else if (expr->OperIs(GT_IND, GT_ARR_LENGTH)) { overflows = false; } @@ -1465,6 +1525,11 @@ bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTree* expr, const Ran { overflows = false; } + else if (expr->OperIs(GT_XOR) && vnStore->IsVNLog2(m_pCompiler->vnStore->VNConservativeNormalValue(expr->gtVNPair))) + { + // For XOR we only care about Log2 pattern for now, which never overflows. + overflows = false; + } // Walk through phi arguments to check if phi arguments involve arithmetic that overflows. else if (expr->OperIs(GT_PHI)) { @@ -1475,7 +1540,7 @@ bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTree* expr, const Ran overflows = ComputeDoesOverflow(block, expr->gtGetOp1(), range); } GetOverflowMap()->Set(expr, overflows, OverflowMap::Overwrite); - m_pSearchPath->Remove(expr); + GetSearchPath()->Remove(expr); JITDUMP("[%06d] %s\n", Compiler::dspTreeID(expr), ((overflows) ? "overflows" : "does not overflow")); return overflows; } @@ -1503,7 +1568,7 @@ bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTree* expr, const Ran // Range RangeCheck::ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreasing DEBUGARG(int indent)) { - bool newlyAdded = !m_pSearchPath->Set(expr, block, SearchPath::Overwrite); + bool newlyAdded = !GetSearchPath()->Set(expr, block, SearchPath::Overwrite); Range range = Limit(Limit::keUndef); ValueNum vn = m_pCompiler->vnStore->VNConservativeNormalValue(expr->gtVNPair); @@ -1523,23 +1588,20 @@ Range RangeCheck::ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreas // searches. This is because anything that merges with Unknown will // yield Unknown. Unknown is lattice top. range = Range(Limit(Limit::keUnknown)); - JITDUMP("GetRange not tractable within max node visit budget.\n"); + JITDUMP("GetRangeWorker not tractable within max node visit budget.\n"); } // Prevent unbounded recursion. - else if (m_pSearchPath->GetCount() > MAX_SEARCH_DEPTH) + else if (GetSearchPath()->GetCount() > MAX_SEARCH_DEPTH) { // Unknown is lattice top, anything that merges with Unknown will yield Unknown. range = Range(Limit(Limit::keUnknown)); - JITDUMP("GetRange not tractable within max stack depth.\n"); + JITDUMP("GetRangeWorker not tractable within max stack depth.\n"); } - // TODO-CQ: The current implementation is reliant on integer storage types - // for constants. It could use INT64. Still, representing ULONG constants - // might require preserving the var_type whether it is a un/signed 64-bit. - // JIT64 doesn't do anything for "long" either. No asm diffs. - else if (expr->TypeGet() == TYP_LONG || expr->TypeGet() == TYP_ULONG) + // TYP_LONG is not supported anyway. + else if (expr->TypeIs(TYP_LONG)) { range = Range(Limit(Limit::keUnknown)); - JITDUMP("GetRange long or ulong, setting to unknown value.\n"); + JITDUMP("GetRangeWorker long, setting to unknown value.\n"); } // If VN is constant return range as constant. else if (m_pCompiler->vnStore->IsVNConstant(vn)) @@ -1555,14 +1617,14 @@ Range RangeCheck::ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreas MergeAssertion(block, expr, &range DEBUGARG(indent + 1)); } // compute the range for binary operation - else if (expr->OperIs(GT_ADD, GT_AND, GT_RSH, GT_RSZ, GT_LSH, GT_UMOD, GT_MUL)) + else if (expr->OperIs(GT_XOR, GT_ADD, GT_AND, GT_RSH, GT_RSZ, GT_LSH, GT_UMOD, GT_MUL)) { range = ComputeRangeForBinOp(block, expr->AsOp(), monIncreasing DEBUGARG(indent + 1)); } else if (expr->OperIs(GT_NEG)) { // Compute range for negation, e.g.: [0..8] -> [-8..0] - Range op1Range = GetRange(block, expr->gtGetOp1(), monIncreasing DEBUGARG(indent + 1)); + Range op1Range = GetRangeWorker(block, expr->gtGetOp1(), monIncreasing DEBUGARG(indent + 1)); range = RangeOps::Negate(op1Range); } // If phi, then compute the range for arguments, calling the result "dependent" when looping begins. @@ -1571,14 +1633,14 @@ Range RangeCheck::ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreas for (GenTreePhi::Use& use : expr->AsPhi()->Uses()) { Range argRange = Range(Limit(Limit::keUndef)); - if (m_pSearchPath->Lookup(use.GetNode())) + if (GetSearchPath()->Lookup(use.GetNode())) { JITDUMP("PhiArg [%06d] is already being computed\n", Compiler::dspTreeID(use.GetNode())); argRange = Range(Limit(Limit::keDependent)); } else { - argRange = GetRange(block, use.GetNode(), monIncreasing DEBUGARG(indent + 1)); + argRange = GetRangeWorker(block, use.GetNode(), monIncreasing DEBUGARG(indent + 1)); } assert(!argRange.LowerLimit().IsUndef()); assert(!argRange.UpperLimit().IsUndef()); @@ -1595,13 +1657,18 @@ Range RangeCheck::ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreas } else if (expr->OperIs(GT_COMMA)) { - range = GetRange(block, expr->gtEffectiveVal(), monIncreasing DEBUGARG(indent + 1)); + range = GetRangeWorker(block, expr->gtEffectiveVal(), monIncreasing DEBUGARG(indent + 1)); } else if (expr->OperIs(GT_CAST)) { // TODO: consider computing range for CastOp and intersect it with this. range = GetRangeFromType(expr->AsCast()->CastToType()); } + else if (expr->OperIs(GT_ARR_LENGTH)) + { + // Better than keUnknown + range = Range(Limit(Limit::keConstant, 0), Limit(Limit::keConstant, CORINFO_Array_MaxLength)); + } else { // The expression is not recognized, so the result is unknown. @@ -1609,7 +1676,7 @@ Range RangeCheck::ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreas } GetRangeMap()->Set(expr, new (m_alloc) Range(range), RangeMap::Overwrite); - m_pSearchPath->Remove(expr); + GetSearchPath()->Remove(expr); return range; } @@ -1623,14 +1690,61 @@ void Indent(int indent) } #endif -// Get the range, if it is already computed, use the cached range value, else compute it. -Range RangeCheck::GetRange(BasicBlock* block, GenTree* expr, bool monIncreasing DEBUGARG(int indent)) +//------------------------------------------------------------------------ +// TryGetRange: Try to obtain the range of an expression. +// +// Arguments: +// block - the block that contains `expr`; +// expr - expression to compute the range for; +// pRange - [Out] range of the expression; +// +// Return Value: +// false if the range is unknown or determined to overflow. +// +bool RangeCheck::TryGetRange(BasicBlock* block, GenTree* expr, Range* pRange) +{ + // Reset the maps. + ClearRangeMap(); + ClearOverflowMap(); + ClearSearchPath(); + + Range range = GetRangeWorker(block, expr, false DEBUGARG(0)); + if (range.UpperLimit().IsUnknown() && range.LowerLimit().IsUnknown()) + { + JITDUMP("Range is completely unknown.\n"); + return false; + } + + if (DoesOverflow(block, expr, range)) + { + JITDUMP("Range determined to overflow.\n"); + return false; + } + + *pRange = range; + return true; +} + +//------------------------------------------------------------------------ +// GetRangeWorker: Internal worker for TryGetRange. Does not reset the internal state +// needed to obtain cached ranges quickly. +// +// Arguments: +// block - the block that contains `expr`; +// expr - expression to compute the range for; +// monIncreasing - true if `expr` is proven to be monotonically increasing; +// indent - debug printing indent. +// +// Return Value: +// expr's range +// +Range RangeCheck::GetRangeWorker(BasicBlock* block, GenTree* expr, bool monIncreasing DEBUGARG(int indent)) { #ifdef DEBUG if (m_pCompiler->verbose) { Indent(indent); - JITDUMP("[RangeCheck::GetRange] " FMT_BB " ", block->bbNum); + JITDUMP("[RangeCheck::GetRangeWorker] " FMT_BB " ", block->bbNum); m_pCompiler->gtDispTree(expr); Indent(indent); JITDUMP("{\n", expr); @@ -1654,67 +1768,12 @@ Range RangeCheck::GetRange(BasicBlock* block, GenTree* expr, bool monIncreasing return range; } -#ifdef DEBUG -// If this is a tree local definition add its location to the def map. -void RangeCheck::MapStmtDefs(const Location& loc) -{ - GenTreeLclVarCommon* tree = loc.tree; - - if (tree->HasSsaName() && tree->OperIsLocalStore()) - { - SetDef(HashCode(tree->GetLclNum(), tree->GetSsaNum()), new (m_alloc) Location(loc)); - } -} - -struct MapMethodDefsData -{ - RangeCheck* rc; - BasicBlock* block; - Statement* stmt; - - MapMethodDefsData(RangeCheck* rc, BasicBlock* block, Statement* stmt) - : rc(rc) - , block(block) - , stmt(stmt) - { - } -}; - -Compiler::fgWalkResult MapMethodDefsVisitor(GenTree** ptr, Compiler::fgWalkData* data) -{ - GenTree* tree = *ptr; - MapMethodDefsData* rcd = ((MapMethodDefsData*)data->pCallbackData); - - if (tree->IsLocal()) - { - rcd->rc->MapStmtDefs(RangeCheck::Location(rcd->block, rcd->stmt, tree->AsLclVarCommon())); - } - - return Compiler::WALK_CONTINUE; -} - -void RangeCheck::MapMethodDefs() -{ - // First, gather where all definitions occur in the program and store it in a map. - for (BasicBlock* const block : m_pCompiler->Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - MapMethodDefsData data(this, block, stmt); - m_pCompiler->fgWalkTreePre(stmt->GetRootNodePointer(), MapMethodDefsVisitor, &data, false, true); - } - } - m_fMappedDefs = true; -} -#endif - // Entry point to range check optimizations. bool RangeCheck::OptimizeRangeChecks() { - if (m_pCompiler->fgSsaPassesCompleted == 0) - { - return false; - } + // Reset the budget in case of JitOptRepeat. + m_nVisitBudget = MAX_VISIT_BUDGET; + m_preferredBound = ValueNumStore::NoVN; bool madeChanges = false; @@ -1732,6 +1791,14 @@ bool RangeCheck::OptimizeRangeChecks() return madeChanges; } + if (tree->OperIs(GT_BOUNDS_CHECK)) + { + // Leave a hint for optRangeCheckCloning to improve the JIT TP. + // NOTE: it doesn't have to be precise and being properly maintained + // during transformations, it's just a hint. + block->SetFlags(BBF_MAY_HAVE_BOUNDS_CHECKS); + } + OptimizeRangeCheck(block, stmt, tree); } diff --git a/src/coreclr/jit/rangecheck.h b/src/coreclr/jit/rangecheck.h index de44b88d3ae5..8df2fc19ea79 100644 --- a/src/coreclr/jit/rangecheck.h +++ b/src/coreclr/jit/rangecheck.h @@ -510,10 +510,15 @@ struct RangeOps { result.lLimit = r1lo; } - // Widen Upper Limit => Max(k, (a.len + n)) yields (a.len + n), - // This is correct if k >= 0 and n >= k, since a.len always >= 0 - // (a.len + n) could overflow, but the result (a.len + n) also + + // NOTE: in some of the calculations below, we assume that $bnd is never negative + // and we have to be careful by not masking possible overflows. + + // Widen Upper Limit => Max(k, ($bnd + n)) yields ($bnd + n), + // This is correct if k >= 0 and n >= k, since $bnd always >= 0 + // ($bnd + n) could overflow, but the result ($bnd + n) also // preserves the overflow. + // if (r1hi.IsConstant() && r1hi.GetConstant() >= 0 && r2hi.IsBinOpArray() && r2hi.GetConstant() >= r1hi.GetConstant()) { @@ -524,14 +529,38 @@ struct RangeOps { result.uLimit = r1hi; } + + // Rule: <$bnd + cns1, ...> U = when cns1 <= 0 + // + // Example: <$bnd - 3, ...> U <0, ...> = <-3, ...> + // + if (r1lo.IsBinOpArray() && r2lo.IsConstant() && (r1lo.cns <= 0)) + { + result.lLimit = Limit(Limit::keConstant, min(r1lo.cns, r2lo.cns)); + } + if (r2lo.IsBinOpArray() && r1lo.IsConstant() && (r2lo.cns <= 0)) + { + result.lLimit = Limit(Limit::keConstant, min(r2lo.cns, r1lo.cns)); + } + + // Rule: <..., $bnd + cns1> U <..., $bnd + cns2> = <..., $bnd + max(cns1, cns2)> + // + // Example: <..., $bnd + 10> U <..., $bnd + 20> = <..., $bnd + 20> + // if (r1hi.IsBinOpArray() && r2hi.IsBinOpArray() && r1hi.vn == r2hi.vn) { - result.uLimit = r1hi; - // Widen the upper bound if the other constant is greater. - if (r2hi.GetConstant() > r1hi.GetConstant()) - { - result.uLimit = r2hi; - } + result.uLimit = r1hi; // copy $bnd and kind info + result.uLimit.cns = max(r1hi.cns, r2hi.cns); + } + + // Rule: <$bnd + cns1, ...> U <$bnd + cns2, ...> = <$bnd + min(cns1, cns2), ...> + // + // Example: <$bnd + 10, ...> U <$bnd + 20, ...> = <$bnd + 10, ...> + // + if (r1lo.IsBinOpArray() && r2lo.IsBinOpArray() && r1lo.vn == r2lo.vn) + { + result.lLimit = r1lo; // copy $bnd and kind info + result.lLimit.cns = min(r1lo.cns, r2lo.cns); } return result; } @@ -585,60 +614,110 @@ struct RangeOps result.uLimit = Limit(Limit::keConstant, -lo); return result; } -}; -class RangeCheck -{ -public: - // Constructor - RangeCheck(Compiler* pCompiler); - - typedef JitHashTable, bool> OverflowMap; - typedef JitHashTable, Range*> RangeMap; - typedef JitHashTable, BasicBlock*> SearchPath; + enum class RelationKind + { + AlwaysTrue, + AlwaysFalse, + Unknown + }; -#ifdef DEBUG - // TODO-Cleanup: This code has been kept around just to ensure that the SSA data is still - // valid when RangeCheck runs. It should be removed at some point (and perhaps replaced - // by a proper SSA validity checker). + //------------------------------------------------------------------------ + // EvalRelop: Evaluate the relation between two ranges for the given relop + // Example: "x >= y" is AlwaysTrue when "x.LowerLimit() >= y.UpperLimit()" + // + // Arguments: + // relop - The relational operator (LE,LT,GE,GT,EQ,NE) + // isUnsigned - True if the comparison is unsigned + // x - The left range + // y - The right range + // + // Returns: + // AlwaysTrue when the given relop always evaluates to true for the given ranges + // AlwaysFalse when the given relop always evaluates to false for the given ranges + // Otherwise Unknown + // + static RelationKind EvalRelop(const genTreeOps relop, bool isUnsigned, const Range& x, const Range& y) + { + const Limit& xLower = x.LowerLimit(); + const Limit& yLower = y.LowerLimit(); + const Limit& xUpper = x.UpperLimit(); + const Limit& yUpper = y.UpperLimit(); + + // For unsigned comparisons, we only support non-negative ranges. + if (isUnsigned) + { + if (!xLower.IsConstant() || !yUpper.IsConstant() || (xLower.GetConstant() < 0) || + (yLower.GetConstant() < 0)) + { + return RelationKind::Unknown; + } + } - // Location information is used to map where the defs occur in the method. - struct Location - { - BasicBlock* block; - Statement* stmt; - GenTreeLclVarCommon* tree; - Location(BasicBlock* block, Statement* stmt, GenTreeLclVarCommon* tree) - : block(block) - , stmt(stmt) - , tree(tree) + switch (relop) { - } + case GT_GE: + case GT_LT: + if (xLower.IsConstant() && yUpper.IsConstant() && (xLower.GetConstant() >= yUpper.GetConstant())) + { + return relop == GT_GE ? RelationKind::AlwaysTrue : RelationKind::AlwaysFalse; + } - private: - Location(); - }; + if (xUpper.IsConstant() && yLower.IsConstant() && (xUpper.GetConstant() < yLower.GetConstant())) + { + return relop == GT_GE ? RelationKind::AlwaysFalse : RelationKind::AlwaysTrue; + } + break; + + case GT_GT: + case GT_LE: + if (xLower.IsConstant() && yUpper.IsConstant() && (xLower.GetConstant() > yUpper.GetConstant())) + { + return relop == GT_GT ? RelationKind::AlwaysTrue : RelationKind::AlwaysFalse; + } - typedef JitHashTable, Location*> VarToLocMap; + if (xUpper.IsConstant() && yLower.IsConstant() && (xUpper.GetConstant() <= yLower.GetConstant())) + { + return relop == GT_GT ? RelationKind::AlwaysFalse : RelationKind::AlwaysTrue; + } + break; - // Generate a hashcode unique for this ssa var. - UINT64 HashCode(unsigned lclNum, unsigned ssaNum); + case GT_EQ: + case GT_NE: + if ((xLower.IsConstant() && yUpper.IsConstant() && (xLower.GetConstant() > yUpper.GetConstant())) || + (xUpper.IsConstant() && yLower.IsConstant() && (xUpper.GetConstant() < yLower.GetConstant()))) + { + return relop == GT_EQ ? RelationKind::AlwaysFalse : RelationKind::AlwaysTrue; + } + break; - // Add a location of the definition of ssa var to the location map. - // Requires "hash" to be computed using HashCode. - // Requires "location" to be the local definition. - void SetDef(UINT64 hash, Location* loc); + default: + assert(!"unknown comparison operator"); + break; + } + return RelationKind::Unknown; + } +}; - // Given a tree node that is a local, return the Location defining the local. - Location* GetDef(GenTreeLclVarCommon* lcl); - Location* GetDef(unsigned lclNum, unsigned ssaNum); +class RangeCheck +{ +public: + // Constructor + RangeCheck(Compiler* pCompiler); - // Given a statement, check if it is a def and add its locations in a map. - void MapStmtDefs(const Location& loc); + // Entry point to optimize range checks in the method. Assumes value numbering + // and assertion prop phases are completed. + bool OptimizeRangeChecks(); - // Given the CFG, check if it has defs and add their locations in a map. - void MapMethodDefs(); -#endif + bool TryGetRange(BasicBlock* block, GenTree* expr, Range* pRange); + + // Cheaper version of TryGetRange that is based only on incoming assertions. + static bool TryGetRangeFromAssertions(Compiler* comp, ValueNum num, ASSERT_VALARG_TP assertions, Range* pRange); + +private: + typedef JitHashTable, bool> OverflowMap; + typedef JitHashTable, Range*> RangeMap; + typedef JitHashTable, BasicBlock*> SearchPath; int GetArrLength(ValueNum vn); @@ -648,30 +727,22 @@ class RangeCheck // TODO-CQ: This is not general enough. bool BetweenBounds(Range& range, GenTree* upper, int arrSize); - // Entry point to optimize range checks in the method. Assumes value numbering - // and assertion prop phases are completed. - bool OptimizeRangeChecks(); - // Given a "tree" node, check if it contains array bounds check node and // optimize to remove it, if possible. Requires "stmt" and "block" that // contain the tree. void OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree* tree); - // Given the index expression try to find its range. - // The range of a variable depends on its rhs which in turn depends on its constituent variables. - // The "path" is the path taken in the search for the rhs' range and its constituents' range. - // If "monIncreasing" is true, the calculations are made more liberally assuming initial values - // at phi definitions for the lower bound. - Range GetRange(BasicBlock* block, GenTree* expr, bool monIncreasing DEBUGARG(int indent)); + // Internal worker for GetRange. + Range GetRangeWorker(BasicBlock* block, GenTree* expr, bool monIncreasing DEBUGARG(int indent)); // Compute the range from the given type Range GetRangeFromType(var_types type); // Given the local variable, first find the definition of the local and find the range of the rhs. - // Helper for GetRange. + // Helper for GetRangeWorker. Range ComputeRangeForLocalDef(BasicBlock* block, GenTreeLclVarCommon* lcl, bool monIncreasing DEBUGARG(int indent)); - // Compute the range, rather than retrieve a cached value. Helper for GetRange. + // Compute the range, rather than retrieve a cached value. Helper for GetRangeWorker. Range ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreasing DEBUGARG(int indent)); // Compute the range for the op1 and op2 for the given binary operator. @@ -686,7 +757,12 @@ class RangeCheck void MergeEdgeAssertions(GenTreeLclVarCommon* lcl, ASSERT_VALARG_TP assertions, Range* pRange); // Inspect the assertions about the current ValueNum to refine pRange - void MergeEdgeAssertions(ValueNum num, ASSERT_VALARG_TP assertions, Range* pRange); + static void MergeEdgeAssertions(Compiler* comp, + ValueNum num, + ValueNum preferredBoundVN, + ASSERT_VALARG_TP assertions, + Range* pRange, + bool canUseCheckedBounds = true); // The maximum possible value of the given "limit". If such a value could not be determined // return "false". For example: CORINFO_Array_MaxLength for array length. @@ -708,11 +784,11 @@ class RangeCheck // calculation that overflows. bool DoesVarDefOverflow(BasicBlock* block, GenTreeLclVarCommon* lcl, const Range& range); - bool ComputeDoesOverflow(BasicBlock* block, GenTree* expr, const Range& range); - // Does the current "expr", which is a use, involve a definition that overflows. bool DoesOverflow(BasicBlock* block, GenTree* tree, const Range& range); + bool ComputeDoesOverflow(BasicBlock* block, GenTree* expr, const Range& range); + // Widen the range by first checking if the induction variable is monotonically increasing. // Requires "pRange" to be partially computed. void Widen(BasicBlock* block, GenTree* tree, Range* pRange); @@ -728,27 +804,26 @@ class RangeCheck // will be applied for the currently compiled method. bool IsOverBudget(); -private: // Given a lclvar use, try to find the lclvar's defining store and its containing block. LclSsaVarDsc* GetSsaDefStore(GenTreeLclVarCommon* lclUse); - GenTreeBoundsChk* m_pCurBndsChk; + // When we have this bound and a constant, we prefer to use this bound (if set) + ValueNum m_preferredBound; // Get the cached overflow values. OverflowMap* GetOverflowMap(); + void ClearOverflowMap(); OverflowMap* m_pOverflowMap; // Get the cached range values. RangeMap* GetRangeMap(); + void ClearRangeMap(); RangeMap* m_pRangeMap; + SearchPath* GetSearchPath(); + void ClearSearchPath(); SearchPath* m_pSearchPath; -#ifdef DEBUG - bool m_fMappedDefs; - VarToLocMap* m_pDefTable; -#endif - Compiler* m_pCompiler; CompAllocator m_alloc; diff --git a/src/coreclr/jit/rangecheckcloning.cpp b/src/coreclr/jit/rangecheckcloning.cpp new file mode 100644 index 000000000000..2064f873c511 --- /dev/null +++ b/src/coreclr/jit/rangecheckcloning.cpp @@ -0,0 +1,647 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" +#include "rangecheckcloning.h" + +// This file contains the definition of the "Range check cloning" phase. +// +// The goal of this phase is to pick up range checks which were not optimized by the +// range check optimization phase and clone them to have "fast" and "slow" paths. +// This is similar to what the "Loop Cloning" phase does for loops. Example: +// +// arr[i + 1] = x; +// arr[i + 3] = y; +// arr[i + 5] = z; +// arr[i + 8] = w; +// +// assertprop/rangecheck phases give up on the above bounds checks because of the +// increasing offsets and there are no assertions that they can rely on. +// This phase handles such cases by cloning the entire block (only the affected statements +// to be precise) into "fast" and "slow" under a "cloned" condition: +// +// if (i >= 0 && i < arr.Length - 8) +// { +// // Fast path +// arr[i + 1] = x; // no bounds check +// arr[i + 3] = y; // no bounds check +// arr[i + 5] = z; // no bounds check +// arr[i + 8] = w; // no bounds check +// } +// else +// { +// // Slow path +// arr[i + 1] = x; // bounds check +// arr[i + 3] = y; // bounds check +// arr[i + 5] = w; // bounds check +// arr[i + 8] = w; // bounds check +// } +// +// The phase scans all statements in a block and groups the bounds checks based on +// "Base Index and Length" pairs (VNs). Then the phase takes the largest group and +// clones the block to have fast and slow paths. + +//------------------------------------------------------------------------------------ +// Initialize: Initialize the BoundsCheckInfo with the given bounds check node. +// and perform some basic legality checks. +// +// Arguments: +// comp - The compiler instance +// statement - The statement containing the bounds check +// statementIdx - The index of the statement in the block +// bndChk - The bounds check node (its use edge) +// +// Return Value: +// true if the initialization was successful, false otherwise. +// +bool BoundsCheckInfo::Initialize(const Compiler* comp, Statement* statement, int statementIdx, GenTree** bndChk) +{ + assert((bndChk != nullptr) && ((*bndChk) != nullptr)); + + stmt = statement; + stmtIdx = statementIdx; + bndChkUse = bndChk; + idxVN = comp->vnStore->VNConservativeNormalValue(BndChk()->GetIndex()->gtVNPair); + lenVN = comp->vnStore->VNConservativeNormalValue(BndChk()->GetArrayLength()->gtVNPair); + if ((idxVN == ValueNumStore::NoVN) || (lenVN == ValueNumStore::NoVN)) + { + return false; + } + + if (BndChk()->GetIndex()->IsIntCnsFitsInI32()) + { + // Index being a constant means we have index=0 and cns offset + offset = static_cast(BndChk()->GetIndex()->AsIntCon()->IconValue()); + idxVN = comp->vnStore->VNZeroForType(TYP_INT); + } + else + { + if (comp->vnStore->TypeOfVN(idxVN) != TYP_INT) + { + return false; + } + + // Otherwise, peel the offset from the index using VN + comp->vnStore->PeelOffsetsI32(&idxVN, &offset); + assert(idxVN != ValueNumStore::NoVN); + } + assert(comp->vnStore->TypeOfVN(idxVN) == TYP_INT); + + if (offset < 0) + { + // Not supported yet. + return false; + } + return true; +} + +//------------------------------------------------------------------------------------ +// RemoveBoundsChk - Remove the given bounds check from the statement and the block. +// +// Arguments: +// comp - compiler instance +// treeUse - the bounds check node to remove (its use edge) +// stmt - the statement containing the bounds check +// +static void RemoveBoundsChk(Compiler* comp, GenTree** treeUse, Statement* stmt) +{ + JITDUMP("Before RemoveBoundsChk:\n"); + DISPTREE(*treeUse); + + GenTree* sideEffList = nullptr; + comp->gtExtractSideEffList(*treeUse, &sideEffList, GTF_SIDE_EFFECT, /*ignoreRoot*/ true); + *treeUse = (sideEffList != nullptr) ? sideEffList : comp->gtNewNothingNode(); + + comp->gtUpdateStmtSideEffects(stmt); + comp->gtSetStmtInfo(stmt); + comp->fgSetStmtSeq(stmt); + + JITDUMP("After RemoveBoundsChk:\n"); + DISPTREE(stmt->GetRootNode()); +} + +// ----------------------------------------------------------------------------- +// optRangeCheckCloning_DoClone: Perform the actual range check cloning for the given range +// of bounds checks. All the legality checks are done before calling this function. +// This function effectively converts a single block (containing bounds checks) into: +// +// prevBb: +// goto lowerBndBb +// +// lowerBndBb: +// if (idx < 0) +// goto fallbackBb +// else +// goto upperBndBb +// +// upperBndBb: +// if (idx < len - maxConstOffset) +// goto fastpathBb +// else +// goto fallbackBb +// +// fallbackBb: +// [Original block with bounds checks] +// goto nextBb +// +// fastpathBb: +// [Cloned block with no bounds checks] +// goto nextBb +// +// nextBb: +// ... +// +// Arguments: +// comp - The compiler instance +// block - The block to clone +// bndChkStack - The stack of bounds checks to clone +// lastStmt - The last statement in the block (the block is split after this statement) +// +// Return Value: +// The next block to visit after the cloning. +// +static BasicBlock* optRangeCheckCloning_DoClone(Compiler* comp, + BasicBlock* block, + BoundsCheckInfoStack* bndChkStack, + Statement* lastStmt) +{ + assert(block != nullptr); + assert(bndChkStack->Height() > 0); + + // The bound checks are in the execution order (top of the stack is the last check) + BoundsCheckInfo firstCheck = bndChkStack->Bottom(); + BasicBlock* prevBb = block; + + // First, split the block at the first bounds check using gtSplitTree (via fgSplitBlockBeforeTree): + GenTree** bndChkUse; + Statement* newFirstStmt; + BasicBlock* fastpathBb = + comp->fgSplitBlockBeforeTree(block, firstCheck.stmt, firstCheck.BndChk(), &newFirstStmt, &bndChkUse); + + // Perform the usual routine after gtSplitTree: + while ((newFirstStmt != nullptr) && (newFirstStmt != firstCheck.stmt)) + { + comp->fgMorphStmtBlockOps(fastpathBb, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + comp->fgMorphStmtBlockOps(fastpathBb, firstCheck.stmt); + comp->gtUpdateStmtSideEffects(firstCheck.stmt); + + // Now split the block at the last bounds check using fgSplitBlockAfterStatement: + // TODO-RangeCheckCloning: call gtSplitTree for lastBndChkStmt as well, to cut off + // the stuff we don't have to clone. + BasicBlock* lastBb = comp->fgSplitBlockAfterStatement(fastpathBb, lastStmt); + + DebugInfo debugInfo = fastpathBb->firstStmt()->GetDebugInfo(); + + // Find the maximum offset + int offset = 0; + for (int i = 0; i < bndChkStack->Height(); i++) + { + offset = max(offset, bndChkStack->Top(i).offset); + } + assert(offset >= 0); + + GenTree* idx = comp->gtCloneExpr(firstCheck.BndChk()->GetIndex()); + GenTree* arrLen = comp->gtCloneExpr(firstCheck.BndChk()->GetArrayLength()); + + // gtSplitTree is expected to spill the side effects of the index and array length expressions + assert((idx->gtFlags & GTF_ALL_EFFECT) == 0); + assert((arrLen->gtFlags & GTF_ALL_EFFECT) == 0); + + // Since we're re-using the index node from the first bounds check and its value was spilled + // by the tree split, we need to restore the base index by subtracting the offset. + // Hopefully, someone will fold this back into the index expression. + // + GenTree* idxClone; + if (firstCheck.offset > 0) + { + GenTree* offsetNode = comp->gtNewIconNode(-firstCheck.offset); // never overflows + idx = comp->gtNewOperNode(GT_ADD, TYP_INT, idx, offsetNode); + idxClone = comp->fgInsertCommaFormTemp(&idx); + } + else + { + idxClone = comp->gtCloneExpr(idx); + } + + // 1) lowerBndBb: + // + // if (i < 0) + // goto fallbackBb + // else + // goto upperBndBb + // + GenTreeOp* idxLowerBoundTree = comp->gtNewOperNode(GT_LT, TYP_INT, comp->gtCloneExpr(idx), comp->gtNewIconNode(0)); + idxLowerBoundTree->gtFlags |= GTF_RELOP_JMP_USED; + GenTree* jtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, idxLowerBoundTree); + BasicBlock* lowerBndBb = comp->fgNewBBFromTreeAfter(BBJ_COND, prevBb, jtrue, debugInfo); + + JITDUMP("\nLower bound check:\n"); + DISPTREE(jtrue); + + // 2) upperBndBb: + // + // if (i < arrLen - indexOffset) + // goto fastpathBb + // else + // goto fallbackBb + // + GenTreeOp* idxUpperBoundTree; + if (idx->IsIntegralConst(0)) + { + // if the index is just 0, then we can simplify the condition to "arrLen > indexOffset" + idxUpperBoundTree = comp->gtNewOperNode(GT_GT, TYP_INT, arrLen, comp->gtNewIconNode(offset)); + } + else + { + // "i < arrLen + (-indexOffset)" + GenTree* negOffset = comp->gtNewIconNode(-offset); // never overflows + GenTreeOp* subNode = comp->gtNewOperNode(GT_ADD, TYP_INT, arrLen, negOffset); + idxUpperBoundTree = comp->gtNewOperNode(GT_LT, TYP_INT, idxClone, subNode); + } + idxUpperBoundTree->gtFlags |= GTF_RELOP_JMP_USED; + jtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, idxUpperBoundTree); + BasicBlock* upperBndBb = comp->fgNewBBFromTreeAfter(BBJ_COND, lowerBndBb, jtrue, debugInfo); + + JITDUMP("\nUpper bound check:\n"); + DISPTREE(jtrue); + + // 3) fallbackBb: + // + // For the fallback (slow path), we just entirely clone the fast path. + // + BasicBlock* fallbackBb = comp->fgNewBBafter(BBJ_ALWAYS, upperBndBb, false); + BasicBlock::CloneBlockState(comp, fallbackBb, fastpathBb); + + // 4) fastBlockBb: + // + // No actions needed - it's our current block as is. + + // Wire up the edges + // + comp->fgRedirectTargetEdge(prevBb, lowerBndBb); + FlowEdge* fallbackToNextBb = comp->fgAddRefPred(lastBb, fallbackBb); + FlowEdge* lowerBndToUpperBndEdge = comp->fgAddRefPred(upperBndBb, lowerBndBb); + FlowEdge* lowerBndToFallbackEdge = comp->fgAddRefPred(fallbackBb, lowerBndBb); + FlowEdge* upperBndToFastPathEdge = comp->fgAddRefPred(fastpathBb, upperBndBb); + FlowEdge* upperBndToFallbackEdge = comp->fgAddRefPred(fallbackBb, upperBndBb); + fallbackBb->SetTargetEdge(fallbackToNextBb); + lowerBndBb->SetTrueEdge(lowerBndToFallbackEdge); + lowerBndBb->SetFalseEdge(lowerBndToUpperBndEdge); + upperBndBb->SetTrueEdge(upperBndToFastPathEdge); + upperBndBb->SetFalseEdge(upperBndToFallbackEdge); + + // Set the weights. We assume that the fallback is rarely taken. + // + lowerBndBb->inheritWeight(prevBb); + upperBndBb->inheritWeight(prevBb); + fastpathBb->inheritWeight(prevBb); + fallbackBb->bbSetRunRarely(); + fallbackToNextBb->setLikelihood(1.0f); + lowerBndToUpperBndEdge->setLikelihood(1.0f); + lowerBndToFallbackEdge->setLikelihood(0.0f); + upperBndToFastPathEdge->setLikelihood(1.0f); + upperBndToFallbackEdge->setLikelihood(0.0f); + + lowerBndBb->SetFlags(BBF_INTERNAL); + upperBndBb->SetFlags(BBF_INTERNAL | BBF_HAS_IDX_LEN); + + // Now drop the bounds check from the fast path + while (!bndChkStack->Empty()) + { + BoundsCheckInfo info = bndChkStack->Pop(); +#if DEBUG + // Ensure that the bounds check that we're removing is in the fast path: + bool statementFound = false; + for (Statement* const stmt : fastpathBb->Statements()) + { + if (stmt == info.stmt) + { + statementFound = true; + + // Find the bndChk in the statement + Compiler::fgWalkResult result = comp->fgWalkTreePre( + stmt->GetRootNodePointer(), + [](GenTree** pTree, Compiler::fgWalkData* data) -> Compiler::fgWalkResult { + return (*pTree == (GenTree*)data->pCallbackData) ? Compiler::WALK_ABORT : Compiler::WALK_CONTINUE; + }, + info.BndChk()); + // We don't need to validate bndChkParent - RemoveBoundsChk will do it for us + assert(result == Compiler::WALK_ABORT); + break; + } + } + assert(statementFound); +#endif + RemoveBoundsChk(comp, info.bndChkUse, info.stmt); + } + + comp->fgMorphBlockStmt(lowerBndBb, lowerBndBb->lastStmt() DEBUGARG("Morph lowerBnd")); + comp->fgMorphBlockStmt(upperBndBb, upperBndBb->lastStmt() DEBUGARG("Morph upperBnd")); + if (lowerBndBb->lastStmt() != nullptr) + { + // lowerBndBb might be converted into no-op by fgMorphBlockStmt(lowerBndBb) + // it happens when we emit BBJ_COND(0 >= 0) fake block (for simplicity) + comp->gtUpdateStmtSideEffects(lowerBndBb->lastStmt()); + } + if (upperBndBb->lastStmt() != nullptr) + { + // In rare cases, upperBndBb can also be folded into an empty block + // by fgMorphBlockStmt + comp->gtUpdateStmtSideEffects(upperBndBb->lastStmt()); + } + + // All blocks must be in the same EH region + assert(BasicBlock::sameEHRegion(prevBb, lowerBndBb)); + assert(BasicBlock::sameEHRegion(prevBb, upperBndBb)); + assert(BasicBlock::sameEHRegion(prevBb, fastpathBb)); + assert(BasicBlock::sameEHRegion(prevBb, fallbackBb)); + assert(BasicBlock::sameEHRegion(prevBb, lastBb)); + + return fastpathBb; +} + +// A visitor to record all the bounds checks in a statement in the execution order +class BoundsChecksVisitor final : public GenTreeVisitor +{ + Statement* m_stmt; + ArrayStack* m_boundsChks; + int m_stmtIdx; + +public: + enum + { + DoPostOrder = true, + DoPreOrder = true, + UseExecutionOrder = true + }; + + BoundsChecksVisitor(Compiler* compiler, + Statement* stmt, + int stmtIdx, + ArrayStack* bndChkLocations) + : GenTreeVisitor(compiler) + , m_stmt(stmt) + , m_boundsChks(bndChkLocations) + , m_stmtIdx(stmtIdx) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + // No GTF_EXCEPT - no bounds check down the tree + if (((*use)->gtFlags & GTF_EXCEPT) == 0) + { + return fgWalkResult::WALK_SKIP_SUBTREES; + } + return fgWalkResult::WALK_CONTINUE; + } + + fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + // For now, we only handle SCK_RNGCHK_FAIL + if ((*use)->OperIs(GT_BOUNDS_CHECK) && ((*use)->AsBoundsChk()->gtThrowKind == SCK_RNGCHK_FAIL)) + { + m_boundsChks->Push(BoundCheckLocation(m_stmt, use, m_stmtIdx)); + } + return fgWalkResult::WALK_CONTINUE; + } +}; + +// ----------------------------------------------------------------------------- +// DoesComplexityExceed: Check if the complexity of the bounds checks exceeds the budget. +// We want to avoid cloning blocks with too many unrelated trees/statements between +// the bounds checks. +// +// Arguments: +// comp - The compiler instance +// bndChks - The stack of bounds checks +// +// Return Value: +// true if the complexity exceeds the budget, false otherwise. +// +static bool DoesComplexityExceed(Compiler* comp, ArrayStack* bndChks) +{ + Statement* firstBndChkStmt = bndChks->Bottom().stmt; + Statement* lastBndChkStmt = bndChks->Top().stmt; + + JITDUMP("Checking complexity from " FMT_STMT " to " FMT_STMT "\n", firstBndChkStmt->GetID(), + lastBndChkStmt->GetID()); + + assert(bndChks->Height() <= MAX_CHECKS_PER_GROUP); + + // An average statement with a bounds check is ~20 nodes. There can be statements + // between the bounds checks (i.e. bounds checks from another groups). So let's say + // our budget is 40 nodes per bounds check. + unsigned budget = bndChks->Height() * BUDGET_MULTIPLIER; + JITDUMP("\tBudget: %d nodes.\n", budget); + + Statement* currentStmt = firstBndChkStmt; + while (currentStmt != lastBndChkStmt) + { + GenTree* rootNode = currentStmt->GetRootNode(); + if (rootNode != nullptr) + { + unsigned actual = 0; + if (comp->gtComplexityExceeds(rootNode, budget, &actual)) + { + JITDUMP("\tExceeded budget!"); + return true; + } + JITDUMP("\t\tSubtracting %d from budget in " FMT_STMT " statement\n", actual, currentStmt->GetID()); + budget -= actual; + } + currentStmt = currentStmt->GetNextStmt(); + } + + JITDUMP("Complexity is within budget: %d\n", budget); + return false; +} + +// ----------------------------------------------------------------------------- +// optRangeCheckCloning: The main entry point for the range check cloning phase. +// This phase scans all the blocks in the method and groups the bounds checks +// in each block by the "Base Index and Length" pairs (VNs). Then it picks up +// the largest group and clones the block to have fast and slow paths in order +// to optimize the bounds checks in the fast path. +// See the overview at the top of the file and the comments in the optRangeCheckCloning_DoClone +// function for more details. +// +// Return Value: +// The status of the phase after the transformation. +// +PhaseStatus Compiler::optRangeCheckCloning() +{ + if (!doesMethodHaveBoundsChecks()) + { + JITDUMP("Current method has no bounds checks\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + const bool preferSize = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT); + if (preferSize) + { + // The optimization comes with a codegen size increase + JITDUMP("Optimized for size - bail out.\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + bool modified = false; + + // An array to keep all the bounds checks in the block + // Strictly speaking, we don't need this array and can group the bounds checks + // right as we walk them, but this helps to improve the TP/Memory usage + // as many blocks don't have enough bounds checks to clone anyway. + ArrayStack bndChkLocations(getAllocator(CMK_RangeCheckCloning)); + + // A map to group the bounds checks by the base index and length VNs + BoundsCheckInfoMap bndChkMap(getAllocator(CMK_RangeCheckCloning)); + + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next()) + { + if (!block->HasFlag(BBF_MAY_HAVE_BOUNDS_CHECKS)) + { + // TP optimization - skip blocks that *likely* don't have bounds checks + continue; + } + + if (block->isRunRarely() || block->KindIs(BBJ_THROW)) + { + continue; + } + + bndChkLocations.Reset(); + bndChkMap.RemoveAll(); + + int stmtIdx = -1; + for (Statement* const stmt : block->Statements()) + { + stmtIdx++; + if (block->HasTerminator() && (stmt == block->lastStmt())) + { + // TODO-RangeCheckCloning: Splitting these blocks at the last statements + // require using gtSplitTree for the last bounds check. + break; + } + + // Now just record all the bounds checks in the block (in the execution order) + // + BoundsChecksVisitor visitor(this, stmt, stmtIdx, &bndChkLocations); + visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); + } + + if (bndChkLocations.Height() < MIN_CHECKS_PER_GROUP) + { + JITDUMP("Not enough bounds checks in the block - bail out.\n"); + continue; + } + + // Now we need to group the bounds checks by the base index and length VNs. + // We could do it directly in the visitor above and avoid this O(n) pass, + // but it's more TP/Memory wise to use stack-allocated ArrayStack first and + // bail out on Height() < MAX_CHECKS_PER_GROUP) + { + (*value)->Push(bci); + } + } + } + + if (bndChkMap.GetCount() == 0) + { + JITDUMP("No bounds checks in the block - bail out.\n"); + continue; + } + + // Now choose the largest group of bounds checks (the one with the most checks) + ArrayStack groups(getAllocator(CMK_RangeCheckCloning)); + + for (BoundsCheckInfoMap::Node* keyValuePair : BoundsCheckInfoMap::KeyValueIteration(&bndChkMap)) + { + ArrayStack* value = keyValuePair->GetValue(); + if ((value->Height() >= MIN_CHECKS_PER_GROUP) && !DoesComplexityExceed(this, value)) + { + groups.Push(value); + } + } + + if (groups.Height() == 0) + { + JITDUMP("No suitable group of bounds checks in the block - bail out.\n"); + continue; + } + + // We have multiple groups of bounds checks in the block. + // let's pick a group that appears first in the block and the one whose last bounds check + // appears last in the block. + // + BoundsCheckInfoStack* firstGroup = groups.Top(); + BoundsCheckInfoStack* lastGroup = groups.Top(); + for (int i = 0; i < groups.Height(); i++) + { + BoundsCheckInfoStack* group = groups.Bottom(i); + int firstStmt = group->Bottom().stmtIdx; + int secondStmt = group->Top().stmtIdx; + if (firstStmt < firstGroup->Bottom().stmtIdx) + { + firstGroup = group; + } + if (secondStmt > lastGroup->Top().stmtIdx) + { + lastGroup = group; + } + } + + // We're going to clone for the first group. + // But let's see if we can extend the end of the group so future iterations + // can fit more groups in the same block. + // + Statement* lastStmt = firstGroup->Top().stmt; + + int firstGroupStarts = firstGroup->Bottom().stmtIdx; + int firstGroupEnds = firstGroup->Top().stmtIdx; + int lastGroupStarts = lastGroup->Bottom().stmtIdx; + int lastGroupEnds = lastGroup->Top().stmtIdx; + + // The only requirement is that both groups must overlap - we don't want to + // end up cloning unrelated statements between them (not a correctness issue, + // just a heuristic to avoid cloning too much). + // + if (firstGroupEnds < lastGroupEnds && firstGroupEnds >= lastGroupStarts) + { + lastStmt = lastGroup->Top().stmt; + } + + JITDUMP("Cloning bounds checks in " FMT_BB " from " FMT_STMT " to " FMT_STMT "\n", block->bbNum, + firstGroup->Bottom().stmt->GetID(), lastStmt->GetID()); + + BasicBlock* nextBbToVisit = optRangeCheckCloning_DoClone(this, block, firstGroup, lastStmt); + assert(nextBbToVisit != nullptr); + // optRangeCheckCloning_DoClone wants us to visit nextBbToVisit next + block = nextBbToVisit->Prev(); + assert(block != nullptr); + modified = true; + } + + if (modified) + { + return PhaseStatus::MODIFIED_EVERYTHING; + } + + return PhaseStatus::MODIFIED_NOTHING; +} diff --git a/src/coreclr/jit/rangecheckcloning.h b/src/coreclr/jit/rangecheckcloning.h new file mode 100644 index 000000000000..724bbe19f77b --- /dev/null +++ b/src/coreclr/jit/rangecheckcloning.h @@ -0,0 +1,95 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma once + +// This file contains the definition of the "Range check cloning" phase. +// +// See rangecheckcloning.cpp for context and overview. +// + +// Min number of bounds checks required to form a group +#define MIN_CHECKS_PER_GROUP 4 + +// Max number of bounds checks allowed in a group. +// This is just an arbitrary number to avoid cloning too many checks. +#define MAX_CHECKS_PER_GROUP 64 + +// See comments in DoesComplexityExceed function for more details. +#define BUDGET_MULTIPLIER 40 + +struct BoundCheckLocation +{ + Statement* stmt; + GenTree** bndChkUse; + int stmtIdx; + + BoundCheckLocation(Statement* stmt, GenTree** bndChkUse, int stmtIdx) + : stmt(stmt) + , bndChkUse(bndChkUse) + , stmtIdx(stmtIdx) + { + assert(stmt != nullptr); + assert((bndChkUse != nullptr)); + assert((*bndChkUse) != nullptr); + assert((*bndChkUse)->OperIs(GT_BOUNDS_CHECK)); + assert(stmtIdx >= 0); + } +}; + +struct BoundsCheckInfo +{ + Statement* stmt; + GenTree** bndChkUse; + ValueNum lenVN; + ValueNum idxVN; + int offset; + int stmtIdx; + + BoundsCheckInfo() + : stmt(nullptr) + , bndChkUse(nullptr) + , lenVN(ValueNumStore::NoVN) + , idxVN(ValueNumStore::NoVN) + , offset(0) + , stmtIdx(0) + { + } + + bool Initialize(const Compiler* comp, Statement* statement, int statementIdx, GenTree** bndChkUse); + + GenTreeBoundsChk* BndChk() const + { + return (*bndChkUse)->AsBoundsChk(); + } +}; + +struct IdxLenPair +{ + IdxLenPair(ValueNum idx, ValueNum len) + : idxVN(idx) + , lenVN(len) + { + } + + ValueNum idxVN; + ValueNum lenVN; +}; + +struct LargePrimitiveKeyFuncsIdxLenPair +{ + static unsigned GetHashCode(const IdxLenPair& val) + { + // VNs are mostly small integers + return val.idxVN ^ (val.lenVN << 16); + } + + static bool Equals(const IdxLenPair& x, const IdxLenPair& y) + { + return (x.idxVN == y.idxVN) && (x.lenVN == y.lenVN); + } +}; + +typedef ArrayStack BoundsCheckInfoStack; + +typedef JitHashTable BoundsCheckInfoMap; diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 1e8e56a36a72..27b1acfd6ee0 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -131,7 +131,7 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, unreached(); #endif // FEATURE_HW_INTRINSICS } - arg = NewCallArg::Struct(operand, sigTyp, clsHnd); + arg = NewCallArg::Struct(operand, sigTyp, comp->typGetObjLayout(clsHnd)); } else { @@ -341,11 +341,19 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStackgtFlags & GTF_REVERSE_OPS) == 0); // gtNewSimdShuffleNode with reverse ops is not supported GenTree* op1 = operands[0]; GenTree* op2 = operands[1]; - if (op2->IsCnsVec() && comp->IsValidForShuffle(op2->AsVecCon(), simdSize, simdBaseType)) - { - result = comp->gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize); - } - break; - } - - case NI_Vector128_WithElement: + bool isShuffleNative = intrinsicId != NI_Vector128_Shuffle; #if defined(TARGET_XARCH) - case NI_Vector256_WithElement: - case NI_Vector512_WithElement: + isShuffleNative = + isShuffleNative && (intrinsicId != NI_Vector256_Shuffle) && (intrinsicId != NI_Vector512_Shuffle); #elif defined(TARGET_ARM64) - case NI_Vector64_WithElement: + isShuffleNative = isShuffleNative && (intrinsicId != NI_Vector64_Shuffle); #endif - { - assert(operandCount == 3); - GenTree* op1 = operands[0]; - GenTree* op2 = operands[1]; - GenTree* op3 = operands[2]; - - if (op2->OperIsConst()) + // Check if the required intrinsics to emit are available. + if (!comp->IsValidForShuffle(op2, simdSize, simdBaseType, nullptr, isShuffleNative)) { - ssize_t imm8 = op2->AsIntCon()->IconValue(); - ssize_t count = simdSize / genTypeSize(simdBaseType); - - if ((imm8 >= count) || (imm8 < 0)) - { - // Using software fallback if index is out of range (throw exception) - break; - } - -#if defined(TARGET_XARCH) - if (varTypeIsIntegral(simdBaseType)) - { - if (varTypeIsLong(simdBaseType)) - { - if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41_X64)) - { - break; - } - } - else if (!varTypeIsShort(simdBaseType)) - { - if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - break; - } - } - } -#endif // TARGET_XARCH - - result = comp->gtNewSimdWithElementNode(retType, op1, op2, op3, simdBaseJitType, simdSize); break; } + + result = comp->gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isShuffleNative); break; } @@ -718,6 +687,13 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge } break; + case GT_GCPOLL: + { + // GCPOLL is essentially a no-op, we used it as a hint for fgCreateGCPoll + node->gtBashToNOP(); + return Compiler::WALK_CONTINUE; + } + case GT_COMMA: { GenTree* op1 = node->gtGetOp1(); @@ -790,6 +766,7 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge } break; +<<<<<<< HEAD #ifdef TARGET_WASM case GT_RETURN: // LLVM lowering needs to know whether the struct is dependently promoted or not in all cases. @@ -805,6 +782,14 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge } break; #endif // TARGET_WASM +======= + case GT_BSWAP16: + if (node->gtGetOp1()->OperIs(GT_CAST)) + { + comp->fgSimpleLowerBswap16(BlockRange(), node); + } + break; +>>>>>>> upstream-jun default: // Check that we don't have nodes not allowed in HIR here. @@ -849,7 +834,7 @@ Compiler::fgWalkResult Rationalizer::RationalizeVisitor::PreOrderVisit(GenTree** { GenTree* const node = *use; - if (node->OperGet() == GT_INTRINSIC) + if (node->OperIs(GT_INTRINSIC)) { if (m_rationalizer.comp->IsIntrinsicImplementedByUserCall(node->AsIntrinsic()->gtIntrinsicName)) { diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index 3d028bbc764d..d430abe5171c 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -980,7 +980,8 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) JITDUMP("\nRedundant branch opt in " FMT_BB ":\n", block->bbNum); - fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__), /* invalidateDFSTreeOnFGChange */ false); + fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__), /* allowFGChange */ true, + /* invalidateDFSTreeOnFGChange */ false); Metrics.RedundantBranchesEliminated++; return true; } diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index 1de4dd8bd669..a2e3c28005ae 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -256,9 +256,9 @@ void Compiler::raMarkStkVars() noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN)); #if FEATURE_FIXED_OUT_ARGS - noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0); + noway_assert((lclNum == lvaOutgoingArgSpaceVar) || (lvaLclStackHomeSize(lclNum) != 0)); #else // FEATURE_FIXED_OUT_ARGS - noway_assert(lvaLclSize(lclNum) != 0); + noway_assert(lvaLclStackHomeSize(lclNum) != 0); #endif // FEATURE_FIXED_OUT_ARGS varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h index 7dc6baf8e31c..2fecf847a5c4 100644 --- a/src/coreclr/jit/register.h +++ b/src/coreclr/jit/register.h @@ -36,32 +36,41 @@ REGALIAS(RDI, EDI) #else // !defined(TARGET_X86) +#define GPRMASK(x) (1ULL << (x)) /* REGDEF(name, rnum, mask, sname) */ -REGDEF(RAX, 0, 0x00000001, "rax" ) -REGDEF(RCX, 1, 0x00000002, "rcx" ) -REGDEF(RDX, 2, 0x00000004, "rdx" ) -REGDEF(RBX, 3, 0x00000008, "rbx" ) -REGDEF(RSP, 4, 0x00000010, "rsp" ) -REGDEF(RBP, 5, 0x00000020, "rbp" ) -REGDEF(RSI, 6, 0x00000040, "rsi" ) -REGDEF(RDI, 7, 0x00000080, "rdi" ) -REGDEF(R8, 8, 0x00000100, "r8" ) -REGDEF(R9, 9, 0x00000200, "r9" ) -REGDEF(R10, 10, 0x00000400, "r10" ) -REGDEF(R11, 11, 0x00000800, "r11" ) -REGDEF(R12, 12, 0x00001000, "r12" ) -REGDEF(R13, 13, 0x00002000, "r13" ) -REGDEF(R14, 14, 0x00004000, "r14" ) -REGDEF(R15, 15, 0x00008000, "r15" ) -REGDEF(R16, 16, 0x00010000, "r16" ) -REGDEF(R17, 17, 0x00020000, "r17" ) -REGDEF(R18, 18, 0x00040000, "r18" ) -REGDEF(R19, 19, 0x00080000, "r19" ) -REGDEF(R20, 20, 0x00100000, "r20" ) -REGDEF(R21, 21, 0x00200000, "r21" ) -REGDEF(R22, 22, 0x00400000, "r22" ) -REGDEF(R23, 23, 0x00800000, "r23" ) +REGDEF(RAX, 0, GPRMASK(0), "rax" ) +REGDEF(RCX, 1, GPRMASK(1), "rcx" ) +REGDEF(RDX, 2, GPRMASK(2), "rdx" ) +REGDEF(RBX, 3, GPRMASK(3), "rbx" ) +REGDEF(RSP, 4, GPRMASK(4), "rsp" ) +REGDEF(RBP, 5, GPRMASK(5), "rbp" ) +REGDEF(RSI, 6, GPRMASK(6), "rsi" ) +REGDEF(RDI, 7, GPRMASK(7), "rdi" ) +REGDEF(R8, 8, GPRMASK(8), "r8" ) +REGDEF(R9, 9, GPRMASK(9), "r9" ) +REGDEF(R10, 10, GPRMASK(10), "r10" ) +REGDEF(R11, 11, GPRMASK(11), "r11" ) +REGDEF(R12, 12, GPRMASK(12), "r12" ) +REGDEF(R13, 13, GPRMASK(13), "r13" ) +REGDEF(R14, 14, GPRMASK(14), "r14" ) +REGDEF(R15, 15, GPRMASK(15), "r15" ) +REGDEF(R16, 16, GPRMASK(16), "r16" ) +REGDEF(R17, 17, GPRMASK(17), "r17" ) +REGDEF(R18, 18, GPRMASK(18), "r18" ) +REGDEF(R19, 19, GPRMASK(19), "r19" ) +REGDEF(R20, 20, GPRMASK(20), "r20" ) +REGDEF(R21, 21, GPRMASK(21), "r21" ) +REGDEF(R22, 22, GPRMASK(22), "r22" ) +REGDEF(R23, 23, GPRMASK(23), "r23" ) +REGDEF(R24, 24, GPRMASK(24), "r24" ) +REGDEF(R25, 25, GPRMASK(25), "r25" ) +REGDEF(R26, 26, GPRMASK(26), "r26" ) +REGDEF(R27, 27, GPRMASK(27), "r27" ) +REGDEF(R28, 28, GPRMASK(28), "r28" ) +REGDEF(R29, 29, GPRMASK(29), "r29" ) +REGDEF(R30, 30, GPRMASK(30), "r30" ) +REGDEF(R31, 31, GPRMASK(31), "r31" ) REGALIAS(EAX, RAX) REGALIAS(ECX, RCX) @@ -75,11 +84,11 @@ REGALIAS(EDI, RDI) #endif // !defined(TARGET_X86) #ifdef TARGET_AMD64 -#define XMMBASE 24 +#define XMMBASE 32 #define XMMMASK(x) (1ULL << ((x)+XMMBASE)) -#define KBASE 56 -#define KMASK(x) (1ULL << ((x)+KBASE)) +#define KBASE 64 +#define KMASK(x) (1ULL << ((x))) #elif defined(TARGET_WASM) #define XMMBASE 16 @@ -228,6 +237,22 @@ REGDEF(STK, 8+KBASE, 0x0000, "STK" ) #define REG_R22 JITREG_R22 #undef REG_R23 #define REG_R23 JITREG_R23 +#undef REG_R24 +#define REG_R24 JITREG_R24 +#undef REG_R25 +#define REG_R25 JITREG_R25 +#undef REG_R26 +#define REG_R26 JITREG_R26 +#undef REG_R27 +#define REG_R27 JITREG_R27 +#undef REG_R28 +#define REG_R28 JITREG_R28 +#undef REG_R29 +#define REG_R29 JITREG_R29 +#undef REG_R30 +#define REG_R30 JITREG_R30 +#undef REG_R31 +#define REG_R31 JITREG_R31 #undef REG_EAX #define REG_EAX JITREG_EAX #undef REG_ECX diff --git a/src/coreclr/jit/registerargconvention.cpp b/src/coreclr/jit/registerargconvention.cpp deleted file mode 100644 index f58388a39672..000000000000 --- a/src/coreclr/jit/registerargconvention.cpp +++ /dev/null @@ -1,120 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "jitpch.h" -#ifdef _MSC_VER -#pragma hdrstop -#endif - -#include "registerargconvention.h" - -unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */) -{ - assert(numRegs > 0); - - unsigned resultArgNum = regArgNum(type); - bool isBackFilled = false; - -#ifdef TARGET_ARM - // Check for back-filling - if (varTypeIsFloating(type) && // We only back-fill the float registers - !anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) - (numRegs == 1) && // Is there a possibility we could back-fill? - (fltArgSkippedRegMask != RBM_NONE)) // Is there an available back-fill slot? - { - // We will never back-fill something greater than a single register - // (TYP_FLOAT, or TYP_STRUCT HFA with a single float). This is because - // we don't have any types that require > 2 register alignment, so we - // can't create a > 1 register alignment hole to back-fill. - - // Back-fill the register - regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); - fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask - resultArgNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); - assert(resultArgNum < MAX_FLOAT_REG_ARG); - isBackFilled = true; - } -#endif // TARGET_ARM - - if (!isBackFilled) - { -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - // For System V the reg type counters should be independent. - nextReg(TYP_INT, numRegs); - nextReg(TYP_FLOAT, numRegs); -#else - // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated. - nextReg(type, numRegs); -#endif - } - - return resultArgNum; -} - -bool InitVarDscInfo::enoughAvailRegs(var_types type, unsigned numRegs /* = 1 */) -{ - assert(numRegs > 0); - - unsigned backFillCount = 0; - -#ifdef TARGET_ARM - // Check for back-filling - if (varTypeIsFloating(type) && // We only back-fill the float registers - !anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) - (numRegs == 1) && // Is there a possibility we could back-fill? - (fltArgSkippedRegMask != RBM_NONE)) // Is there an available back-fill slot? - { - backFillCount = 1; - } -#endif // TARGET_ARM - - return regArgNum(type) + numRegs - backFillCount <= maxRegArgNum(type); -} - -#ifdef TARGET_ARM -unsigned InitVarDscInfo::alignReg(var_types type, unsigned requiredRegAlignment) -{ - assert(requiredRegAlignment > 0); - if (requiredRegAlignment == 1) - { - return 0; // Everything is always "1" aligned - } - - assert(requiredRegAlignment == 2); // we don't expect anything else right now - - int alignMask = regArgNum(type) & (requiredRegAlignment - 1); - if (alignMask == 0) - { - return 0; // We're already aligned - } - - unsigned cAlignSkipped = requiredRegAlignment - alignMask; - assert(cAlignSkipped == 1); // Alignment is currently only 1 or 2, so misalignment can only be 1. - - if (varTypeIsFloating(type)) - { - fltArgSkippedRegMask |= genMapFloatRegArgNumToRegMask(floatRegArgNum); - } - - assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type)); // if equal, then we aligned the last slot, and the - // arg can't be enregistered - regArgNum(type) += cAlignSkipped; - - return cAlignSkipped; -} -#endif // TARGET_ARM - -bool InitVarDscInfo::canEnreg(var_types type, unsigned numRegs /* = 1 */) -{ - if (!isRegParamType(type)) - { - return false; - } - - if (!enoughAvailRegs(type, numRegs)) - { - return false; - } - - return true; -} diff --git a/src/coreclr/jit/registerargconvention.h b/src/coreclr/jit/registerargconvention.h deleted file mode 100644 index 840f7adc4fce..000000000000 --- a/src/coreclr/jit/registerargconvention.h +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#ifndef __register_arg_convention__ -#define __register_arg_convention__ - -class LclVarDsc; - -struct InitVarDscInfo -{ - LclVarDsc* varDsc; - unsigned varNum; - - unsigned intRegArgNum; - unsigned floatRegArgNum; - unsigned maxIntRegArgNum; - unsigned maxFloatRegArgNum; - - bool hasRetBufArg; - -#ifdef TARGET_ARM - // Support back-filling of FP parameters. This is similar to code in gtMorphArgs() that - // handles arguments. - regMaskTP fltArgSkippedRegMask; - bool anyFloatStackArgs; -#endif // TARGET_ARM - -#if defined(TARGET_ARM) || defined(TARGET_RISCV64) - bool hasSplitParam; -#endif // TARGET_ARM || TARGET_RISCV64 - - // Bytes passed on the stack (including things like padding after structs) - unsigned stackArgSize; - -public: - // set to initial values - void Init(LclVarDsc* lvaTable, bool _hasRetBufArg, unsigned _maxIntRegArgNum, unsigned _maxFloatRegArgNum) - { - hasRetBufArg = _hasRetBufArg; - varDsc = lvaTable; // the first argument LclVar 0 - varNum = 0; // the first argument varNum 0 - intRegArgNum = 0; - floatRegArgNum = 0; - maxIntRegArgNum = _maxIntRegArgNum; - maxFloatRegArgNum = _maxFloatRegArgNum; - -#ifdef TARGET_ARM - fltArgSkippedRegMask = RBM_NONE; - anyFloatStackArgs = false; -#endif // TARGET_ARM - -#if defined(TARGET_ARM) || defined(TARGET_RISCV64) - hasSplitParam = false; -#endif // TARGET_ARM || TARGET_RISCV64 - - stackArgSize = 0; - } - - // return ref to current register arg for this type - unsigned& regArgNum(var_types type) - { - return varTypeUsesFloatArgReg(type) ? floatRegArgNum : intRegArgNum; - } - - // Allocate a set of contiguous argument registers. "type" is either an integer - // type, indicating to use the integer registers, or a floating-point type, indicating - // to use the floating-point registers. The actual type (TYP_FLOAT vs. TYP_DOUBLE) is - // ignored. "numRegs" is the number of registers to allocate. Thus, on ARM, to allocate - // a double-precision floating-point register, you need to pass numRegs=2. For an HFA, - // pass the number of slots/registers needed. - // This routine handles floating-point register back-filling on ARM. - // Returns the first argument register of the allocated set. - unsigned allocRegArg(var_types type, unsigned numRegs = 1); - -#ifdef TARGET_ARM - // We are aligning the register to an ABI-required boundary, such as putting - // double-precision floats in even-numbered registers, by skipping one register. - // "requiredRegAlignment" is the amount to align to: 1 for no alignment (everything - // is 1-aligned), 2 for "double" alignment. - // Returns the number of registers skipped. - unsigned alignReg(var_types type, unsigned requiredRegAlignment); -#endif // TARGET_ARM - - // Return true if it is an enregisterable type and there is room. - // Note that for "type", we only care if it is float or not. In particular, - // "numRegs" must be "2" to allocate an ARM double-precision floating-point register. - bool canEnreg(var_types type, unsigned numRegs = 1); - - // Set the fact that we have used up all remaining registers of 'type' - // - void setAllRegArgUsed(var_types type) - { - regArgNum(type) = maxRegArgNum(type); - } - -#ifdef TARGET_ARM - - void setAnyFloatStackArgs() - { - anyFloatStackArgs = true; - } - - bool existAnyFloatStackArgs() - { - return anyFloatStackArgs; - } - -#endif // TARGET_ARM - - void nextParam() - { - varDsc++; - varNum++; - } - -private: - // return max register arg for this type - unsigned maxRegArgNum(var_types type) - { - return varTypeUsesFloatArgReg(type) ? maxFloatRegArgNum : maxIntRegArgNum; - } - - bool enoughAvailRegs(var_types type, unsigned numRegs = 1); - - void nextReg(var_types type, unsigned numRegs = 1) - { - regArgNum(type) = min(regArgNum(type) + numRegs, maxRegArgNum(type)); - } -}; - -#endif // __register_arg_convention__ diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index b4bf5d8e28ce..e6124926c74a 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -605,7 +605,7 @@ var_types RegSet::tmpNormalizeType(var_types type) // We always spill SIMD12 to a 16-byte SIMD16 temp. // This is because we don't have a single instruction to store 12 bytes, so we want // to ensure that we always have the full 16 bytes for loading & storing the value. - // We also allocate non-argument locals as 16 bytes; see lvSize(). + // We also allocate non-argument locals as 16 bytes; see lvaLclStackHomeSize(). if (type == TYP_SIMD12) { type = TYP_SIMD16; @@ -950,27 +950,6 @@ regNumber genRegArgNext(regNumber argReg) } } -/***************************************************************************** - * - * The following table determines the order in which callee registers - * are encoded in GC information at call sites. - */ - -const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; - -regMaskTP genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) -{ - regMaskTP res = 0; - for (int i = 0; i < CNT_CALL_GC_REGS; i++) - { - if ((calleeSaveMask & (1 << i)) != 0) - { - res |= raRbmCalleeSaveOrder[i]; - } - } - return res; -} - /***************************************************************************** * * Initializes the spill code. Should be called once per function compiled. diff --git a/src/coreclr/jit/scev.cpp b/src/coreclr/jit/scev.cpp index e1140e48660c..5a11e81e85a5 100644 --- a/src/coreclr/jit/scev.cpp +++ b/src/coreclr/jit/scev.cpp @@ -1,12 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// // This file contains code to analyze how the value of induction variables // evolve (scalar evolution analysis), and to turn them into the SCEV IR // defined in scev.h. The analysis is inspired by "Michael Wolfe. 1992. Beyond // induction variables." and also by LLVM's scalar evolution analysis. // -// The main idea of scalar evolution nalysis is to give a closed form +// The main idea of scalar evolution analysis is to give a closed form // describing the value of tree nodes inside loops even when taking into // account that they are changing on each loop iteration. This is useful for // optimizations that want to reason about values of IR nodes inside loops, @@ -28,34 +29,19 @@ // describes its value (possibly taking its evolution into account). Note that // SCEV nodes are immutable and the values they represent are _not_ // flow-dependent; that is, they don't exist at a specific location inside the -// loop, even though some particular tree node gave rise to that SCEV node. The -// analysis itself _is_ flow-dependent and guarantees that the Scev* returned -// describes the value that corresponds to what the tree node computes at its -// specific location. However, it would be perfectly legal for two trees at -// different locations in the loop to analyze to the same SCEV node (even -// potentially returning the same pointer). For example, in theory "i" and "j" -// in the following loop would both be represented by the same add recurrence -// , and the analysis could even return the same Scev* for both of -// them, even if it does not today: -// -// int i = 0; -// while (true) -// { -// i++; -// ... -// int j = i - 1; -// } -// -// Actually materializing the value of a SCEV node back into tree IR is not -// implemented yet, but generally would depend on the availability of tree -// nodes that compute the dependent values at the point where the IR is to be -// materialized. -// -// Besides the add recurrences the analysis itself is generally a -// straightforward translation from JIT IR into the SCEV IR. Creating the add -// recurrences requires paying attention to the structure of PHIs, and -// disambiguating the values coming from outside the loop and the values coming -// from the backedges. +// loop, even though some particular tree node gave rise to that SCEV node. +// +// The SCEV analysis is capable of: +// +// 1. Identifying both direct and indirect induction variables +// 2. Simplifying complex expressions involving induction variables +// 3. Determining when recurrences won't overflow during loop execution +// 4. Computing exact trip counts for countable loops +// 5. Converting SCEV expressions back to JIT IR and value numbers +// +// Understanding the relationship between values across iterations enables +// many loop optimizations, including strength reduction, loop reversal, +// and IV widening, which are implemented in inductionvariableopts.cpp. // #include "jitpch.h" diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 94a844aabb59..7822334e1d4e 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -1486,7 +1486,7 @@ void CodeGen::siBeginBlock(BasicBlock* block) return; } - if (block->HasFlag(BBF_FUNCLET_BEG)) + if (block == compiler->fgFirstFuncletBB) { // For now, don't report any scopes in funclets. JIT64 doesn't. siInFuncletRegion = true; @@ -1703,86 +1703,40 @@ void CodeGen::psiBegProlog() } siVarLoc varLocation; - if (lclVarDsc->lvIsRegArg) + regNumber reg1 = REG_NA; + regNumber reg2 = REG_NA; + + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(varScope->vsdVarNum); + for (const ABIPassingSegment& segment : abiInfo.Segments()) { - bool isStructHandled = false; -#if defined(UNIX_AMD64_ABI) - SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; - if (varTypeIsStruct(lclVarDsc)) + if (segment.IsPassedInRegister()) { - CORINFO_CLASS_HANDLE typeHnd = lclVarDsc->GetLayout()->GetClassHandle(); - assert(typeHnd != nullptr); - compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); - if (structDesc.passedInRegisters) + if (reg1 == REG_NA) { - regNumber regNum = REG_NA; - regNumber otherRegNum = REG_NA; - for (unsigned nCnt = 0; nCnt < structDesc.eightByteCount; nCnt++) - { - if (nCnt == 0) - { - regNum = lclVarDsc->GetArgReg(); - } - else if (nCnt == 1) - { - otherRegNum = lclVarDsc->GetOtherArgReg(); - } - else - { - assert(false && "Invalid eightbyte number."); - } - } - - varLocation.storeVariableInRegisters(regNum, otherRegNum); + reg1 = segment.GetRegister(); } else { - // Stack passed argument. Get the offset from the caller's frame. - varLocation.storeVariableOnStack(REG_SPBASE, psiGetVarStackOffset(lclVarDsc)); + reg2 = segment.GetRegister(); + break; } - - isStructHandled = true; } -#endif // !defined(UNIX_AMD64_ABI) - if (!isStructHandled) + else { -#ifdef DEBUG -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - var_types regType; - if (varTypeIsStruct(lclVarDsc)) - { - // Must be <= 16 bytes or else it wouldn't be passed in registers, - // which can be bigger (and is handled above). - noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16); - if (emitter::isFloatReg(lclVarDsc->GetArgReg())) - { - regType = TYP_DOUBLE; - } - else - { - regType = lclVarDsc->GetLayout()->GetGCPtrType(0); - } - } - else - { - regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet()); - if (emitter::isGeneralRegisterOrR0(lclVarDsc->GetArgReg()) && isFloatRegType(regType)) - { - // For LoongArch64 and RISCV64's ABI, the float args may be passed by integer register. - regType = TYP_LONG; - } - } -#else - var_types regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet()); - if (lclVarDsc->lvIsHfaRegArg()) - { - regType = lclVarDsc->GetHfaType(); - } -#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) -#endif // DEBUG - varLocation.storeVariableInRegisters(lclVarDsc->GetArgReg(), REG_NA); + break; } } + + // We only report multiple registers on SysV ABI. On other ABIs we + // report only the first register. +#ifndef UNIX_AMD64_ABI + reg2 = REG_NA; +#endif + + if (reg1 != REG_NA) + { + varLocation.storeVariableInRegisters(reg1, reg2); + } else { varLocation.storeVariableOnStack(REG_SPBASE, psiGetVarStackOffset(lclVarDsc)); @@ -1996,7 +1950,7 @@ void CodeGen::genSetScopeInfo(unsigned which, // accessed via the varargs cookie. Discard generated info, // and just find its position relative to the varargs handle - PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount); + assert(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount); if (!compiler->lvaGetDesc(compiler->lvaVarargsHandleArg)->lvOnFrame) { noway_assert(!compiler->opts.compDbgCode); @@ -2011,7 +1965,7 @@ void CodeGen::genSetScopeInfo(unsigned which, noway_assert(cookieOffset < varOffset); unsigned offset = varOffset - cookieOffset; - unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; + unsigned stkArgSize = compiler->lvaParameterStackSize; noway_assert(offset < stkArgSize); offset = stkArgSize - offset; diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 6256ee0c3779..771889fd9c6c 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -169,7 +169,7 @@ unsigned Compiler::getFFRegisterVarNum() // to determine if this api needs to be called. // // The type handle passed here can only be used in a subset of JIT-EE calls -// since it may be called by promotion during prejit of a method that does +// since it may be called by promotion during AOT of a method that does // not version with SPC. See CORINFO_TYPE_LAYOUT_NODE for the contract on // the supported JIT-EE calls. // @@ -437,9 +437,9 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH return CORINFO_TYPE_UNDEF; } - if (!compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - // We must treat as a regular struct if AVX512F isn't supported + // We must treat as a regular struct if AVX512 isn't supported return CORINFO_TYPE_UNDEF; } @@ -627,15 +627,12 @@ bool Compiler::areArrayElementsContiguous(GenTree* op1, GenTree* op2) GenTreeIndexAddr* op1IndexAddr = op1->AsIndir()->Addr()->AsIndexAddr(); GenTreeIndexAddr* op2IndexAddr = op2->AsIndir()->Addr()->AsIndexAddr(); + GenTree* op1ArrayRef = op1IndexAddr->Arr(); + GenTree* op2ArrayRef = op2IndexAddr->Arr(); + GenTree* op1IndexNode = op1IndexAddr->Index(); + GenTree* op2IndexNode = op2IndexAddr->Index(); - GenTree* op1ArrayRef = op1IndexAddr->Arr(); - GenTree* op2ArrayRef = op2IndexAddr->Arr(); - assert(op1ArrayRef->TypeGet() == TYP_REF); - assert(op2ArrayRef->TypeGet() == TYP_REF); - - GenTree* op1IndexNode = op1IndexAddr->Index(); - GenTree* op2IndexNode = op2IndexAddr->Index(); - if ((op1IndexNode->OperGet() == GT_CNS_INT && op2IndexNode->OperGet() == GT_CNS_INT) && + if ((op1IndexNode->OperIs(GT_CNS_INT) && op2IndexNode->OperIs(GT_CNS_INT)) && (op1IndexNode->AsIntCon()->gtIconVal + 1 == op2IndexNode->AsIntCon()->gtIconVal)) { if (op1ArrayRef->OperIs(GT_IND) && op2ArrayRef->OperIs(GT_IND)) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 2f7610b7e614..d0450fa91caf 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -330,18 +330,26 @@ struct simdmask_t return !(*this == other); } - static simdmask_t AllBitsSet() + static simdmask_t AllBitsSet(unsigned elementCount) { + assert((elementCount >= 1) && (elementCount <= 64)); simdmask_t result; - result.u64[0] = 0xFFFFFFFFFFFFFFFF; + if (elementCount == 64) + { + result.u64[0] = 0xFFFFFFFFFFFFFFFF; + } + else + { + result.u64[0] = (1ULL << elementCount) - 1; + } return result; } bool IsAllBitsSet() const { - return *this == AllBitsSet(); + return *this == AllBitsSet(64); } bool IsZero() const @@ -1412,7 +1420,7 @@ void EvaluateWithElementFloating(var_types simdBaseType, TSimd* result, const TS case TYP_DOUBLE: { - result->f64[arg1] = static_cast(arg2); + result->f64[arg1] = arg2; break; } diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index 57fcb5d46880..0ffca39bb312 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -430,8 +430,8 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) if (tgtReg != REG_NA) { // We should never save to register for zmm. - assert(op1->TypeGet() == TYP_SIMD32); - GetEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tgtReg, op1Reg, 0x01); + assert(op1->TypeIs(TYP_SIMD32)); + GetEmitter()->emitIns_R_R_I(INS_vextractf32x4, EA_32BYTE, tgtReg, op1Reg, 0x01); genProduceReg(node); } else @@ -442,16 +442,16 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) LclVarDsc* varDsc = compiler->lvaGetDesc(varNum); assert(varDsc->lvOnFrame); - if (op1->TypeGet() == TYP_SIMD32) + if (op1->TypeIs(TYP_SIMD32)) { // We want to store this to the upper 16 bytes of this localVar's home. int offs = 16; - GetEmitter()->emitIns_S_R_I(INS_vextractf128, EA_32BYTE, varNum, offs, op1Reg, 0x01); + GetEmitter()->emitIns_S_R_I(INS_vextractf32x4, EA_32BYTE, varNum, offs, op1Reg, 0x01); } else { - assert(op1->TypeGet() == TYP_SIMD64); + assert(op1->TypeIs(TYP_SIMD64)); // We will save the whole 64 bytes for zmm. GetEmitter()->emitIns_S_R(INS_movups, EA_64BYTE, op1Reg, varNum, 0); } @@ -487,8 +487,8 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) if (srcReg != REG_NA) { // We should never save to register for zmm. - assert(op1->TypeGet() == TYP_SIMD32); - GetEmitter()->emitIns_R_R_R_I(INS_vinsertf128, EA_32BYTE, lclVarReg, lclVarReg, srcReg, 0x01); + assert(op1->TypeIs(TYP_SIMD32)); + GetEmitter()->emitIns_R_R_R_I(INS_vinsertf32x4, EA_32BYTE, lclVarReg, lclVarReg, srcReg, 0x01); } else { @@ -496,15 +496,15 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) unsigned varNum = op1->AsLclVarCommon()->GetLclNum(); LclVarDsc* varDsc = compiler->lvaGetDesc(varNum); assert(varDsc->lvOnFrame); - if (op1->TypeGet() == TYP_SIMD32) + if (op1->TypeIs(TYP_SIMD32)) { // We will load this from the upper 16 bytes of this localVar's home. int offs = 16; - GetEmitter()->emitIns_R_R_S_I(INS_vinsertf128, EA_32BYTE, lclVarReg, lclVarReg, varNum, offs, 0x01); + GetEmitter()->emitIns_R_R_S_I(INS_vinsertf32x4, EA_32BYTE, lclVarReg, lclVarReg, varNum, offs, 0x01); } else { - assert(op1->TypeGet() == TYP_SIMD64); + assert(op1->TypeIs(TYP_SIMD64)); // We will restore the whole 64 bytes for zmm. GetEmitter()->emitIns_R_S(INS_movups, EA_64BYTE, lclVarReg, varNum, 0); } diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index d4704d6995ad..52464eb998d9 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1544,7 +1544,7 @@ void Compiler::JitTestCheckSSA() assert(nodeExists); if (tlAndN.m_tl == TL_SsaName) { - if (node->OperGet() != GT_LCL_VAR) + if (!node->OperIs(GT_LCL_VAR)) { printf("SSAName constraint put on non-lcl-var expression "); printTreeID(node); diff --git a/src/coreclr/jit/stacklevelsetter.cpp b/src/coreclr/jit/stacklevelsetter.cpp index eab295b9bc0b..cfff07017a2c 100644 --- a/src/coreclr/jit/stacklevelsetter.cpp +++ b/src/coreclr/jit/stacklevelsetter.cpp @@ -24,28 +24,15 @@ StackLevelSetter::StackLevelSetter(Compiler* compiler) } //------------------------------------------------------------------------ -// DoPhase: Calculate stack slots numbers for outgoing args. +// DoPhase: Calculate stack slots numbers for outgoing args and compute +// requirements of throw helper blocks. // // Returns: // PhaseStatus indicating what, if anything, was changed. // -// Notes: -// For non-x86 platforms it calculates the max number of slots -// that calls inside this method can push on the stack. -// This value is used for sanity checks in the emitter. -// -// Stack slots are pointer-sized: 4 bytes for 32-bit platforms, 8 bytes for 64-bit platforms. -// -// For x86 it also sets throw-helper blocks incoming stack depth and set -// framePointerRequired when it is necessary. These values are used to pop -// pushed args when an exception occurs. -// PhaseStatus StackLevelSetter::DoPhase() { - for (BasicBlock* const block : comp->Blocks()) - { - ProcessBlock(block); - } + ProcessBlocks(); #if !FEATURE_FIXED_OUT_ARGS if (framePointerRequired) @@ -56,7 +43,6 @@ PhaseStatus StackLevelSetter::DoPhase() CheckAdditionalArgs(); - comp->fgSetPtrArgCntMax(maxStackLevel); CheckArgCnt(); // When optimizing, check if there are any unused throw helper blocks, @@ -109,12 +95,33 @@ PhaseStatus StackLevelSetter::DoPhase() } //------------------------------------------------------------------------ -// ProcessBlock: Do stack level calculations for one block. +// ProcessBlocks: Process all the blocks if necessary. +// +void StackLevelSetter::ProcessBlocks() +{ +#ifndef TARGET_X86 + // Outside x86 we do not need to compute pushed/popped stack slots. + // However, we do optimize throw-helpers and need to process the blocks for + // that, but only when optimizing. + if (!throwHelperBlocksUsed || comp->opts.OptimizationDisabled()) + { + return; + } +#endif + + for (BasicBlock* const block : comp->Blocks()) + { + ProcessBlock(block); + } +} + +//------------------------------------------------------------------------ +// ProcessBlock: Do stack level and throw helper determinations for one block. // // Notes: // Block starts and ends with an empty outgoing stack. // Nodes in blocks are iterated in the reverse order to memorize GT_PUTARG_STK -// and GT_PUTARG_SPLIT stack sizes. +// stack sizes. // // Also note which (if any) throw helper blocks might end up being used by // codegen. @@ -125,11 +132,14 @@ PhaseStatus StackLevelSetter::DoPhase() void StackLevelSetter::ProcessBlock(BasicBlock* block) { assert(currentStackLevel == 0); + LIR::ReadOnlyRange& range = LIR::AsRange(block); for (auto i = range.rbegin(); i != range.rend(); ++i) { GenTree* node = *i; - if (node->OperIsPutArgStkOrSplit()) + +#ifdef TARGET_X86 + if (node->OperIsPutArgStk()) { GenTreePutArgStk* putArg = node->AsPutArgStk(); unsigned numSlots = putArgNumSlots[putArg]; @@ -145,6 +155,7 @@ void StackLevelSetter::ProcessBlock(BasicBlock* block) call->gtArgs.SetStkSizeBytes(usedStackSlotsCount * TARGET_POINTER_SIZE); #endif // UNIX_X86_ABI } +#endif if (!throwHelperBlocksUsed) { @@ -206,6 +217,20 @@ void StackLevelSetter::SetThrowHelperBlocks(GenTree* node, BasicBlock* block) } break; +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + case GT_HWINTRINSIC: + { + + NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->GetHWIntrinsicId(); + if (intrinsicId == NI_Vector128_op_Division || intrinsicId == NI_Vector256_op_Division) + { + SetThrowHelperBlock(SCK_DIV_BY_ZERO, block); + SetThrowHelperBlock(SCK_OVERFLOW, block); + } + } + break; +#endif // defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + case GT_INDEX_ADDR: case GT_ARR_ELEM: SetThrowHelperBlock(SCK_RNGCHK_FAIL, block); @@ -345,11 +370,12 @@ unsigned StackLevelSetter::PopArgumentsFromCall(GenTreeCall* call) { for (CallArg& arg : call->gtArgs.Args()) { - const unsigned slotCount = arg.AbiInfo.GetStackSlotsNumber(); + unsigned slotCount = (arg.AbiInfo.StackBytesConsumed() + (TARGET_POINTER_SIZE - 1)) / TARGET_POINTER_SIZE; + if (slotCount != 0) { GenTree* node = arg.GetNode(); - assert(node->OperIsPutArgStkOrSplit()); + assert(node->OperIsPutArgStk()); GenTreePutArgStk* putArg = node->AsPutArgStk(); @@ -410,7 +436,12 @@ void StackLevelSetter::SubStackLevel(unsigned value) // void StackLevelSetter::CheckArgCnt() { - if (!comp->compCanEncodePtrArgCntMax()) +#ifdef JIT32_GCENCODER + // The GC encoding for fully interruptible methods does not + // support more than 1023 pushed arguments, so we have to + // use a partially interruptible GC info/encoding. + // + if (maxStackLevel >= MAX_PTRARG_OFS) { #ifdef DEBUG if (comp->verbose) @@ -421,6 +452,7 @@ void StackLevelSetter::CheckArgCnt() #endif comp->SetInterruptible(false); } + if (maxStackLevel >= sizeof(unsigned)) { #ifdef DEBUG @@ -431,6 +463,7 @@ void StackLevelSetter::CheckArgCnt() #endif comp->codeGen->setFramePointerRequired(true); } +#endif } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/stacklevelsetter.h b/src/coreclr/jit/stacklevelsetter.h index 45b7d13775af..2cb4ea612112 100644 --- a/src/coreclr/jit/stacklevelsetter.h +++ b/src/coreclr/jit/stacklevelsetter.h @@ -16,6 +16,7 @@ class StackLevelSetter final : public Phase virtual PhaseStatus DoPhase() override; private: + void ProcessBlocks(); void ProcessBlock(BasicBlock* block); void SetThrowHelperBlocks(GenTree* node, BasicBlock* block); diff --git a/src/coreclr/jit/switchrecognition.cpp b/src/coreclr/jit/switchrecognition.cpp index 7329194cb10c..6818a6b15b89 100644 --- a/src/coreclr/jit/switchrecognition.cpp +++ b/src/coreclr/jit/switchrecognition.cpp @@ -12,36 +12,55 @@ #define SWITCH_MIN_TESTS 3 //----------------------------------------------------------------------------- -// optSwitchRecognition: Optimize range check for `x == cns1 || x == cns2 || x == cns3 ...` -// pattern and convert it to Switch block (jump table) which is then *might* be converted +// optRecognizeAndOptimizeSwitchJumps: Optimize range check for `x == cns1 || x == cns2 || x == cns3 ...` +// pattern and convert it to a BBJ_SWITCH block (jump table), which then *might* be converted // to a bitmap test via TryLowerSwitchToBitTest. +// If we have PGO data, try peeling switches with dominant cases. // TODO: recognize general jump table patterns. // // Return Value: -// MODIFIED_EVERYTHING if the optimization was applied. +// MODIFIED_EVERYTHING if any switches were newly identified and/or optimized, false otherwise // -PhaseStatus Compiler::optSwitchRecognition() +PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps() { -// Limit to XARCH, ARM is already doing a great job with such comparisons using -// a series of ccmp instruction (see ifConvert phase). -#ifdef TARGET_XARCH bool modified = false; + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next()) { + if (block->isRunRarely()) + { + continue; + } + +// Limit to XARCH, ARM is already doing a great job with such comparisons using +// a series of ccmp instruction (see ifConvert phase). +#ifdef TARGET_XARCH // block->KindIs(BBJ_COND) check is for better throughput. - if (block->KindIs(BBJ_COND) && !block->isRunRarely() && optSwitchDetectAndConvert(block)) + if (block->KindIs(BBJ_COND) && optSwitchDetectAndConvert(block)) { JITDUMP("Converted block " FMT_BB " to switch\n", block->bbNum) modified = true; + + // Converted switches won't have dominant cases, so we can skip the switch peeling check. + assert(!block->GetSwitchTargets()->bbsHasDominantCase); } - } + else +#endif - if (modified) - { - return PhaseStatus::MODIFIED_EVERYTHING; + if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->bbsHasDominantCase) + { + fgPeelSwitch(block); + modified = true; + + // Switch peeling will convert this block into a check for the dominant case, + // and insert the updated switch block after, which doesn't have a dominant case. + // Skip over the switch block in the loop iteration. + assert(block->Next()->KindIs(BBJ_SWITCH)); + block = block->Next(); + } } -#endif - return PhaseStatus::MODIFIED_NOTHING; + + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //------------------------------------------------------------------------------ @@ -138,11 +157,14 @@ bool IsConstantTestCondBlock(const BasicBlock* block, // // Arguments: // firstBlock - A block to start the search from +// testingForConversion - Test if its likely a switch conversion will happen. +// Used to prevent a pessimization when optimizing for conditional chaining. +// Done in this function to prevent maintaining the check in two places. // // Return Value: // True if the conversion was successful, false otherwise // -bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) +bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingForConversion) { assert(firstBlock->KindIs(BBJ_COND)); @@ -187,7 +209,8 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) { // Only the first conditional block can have multiple statements. // Stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } // Inspect secondary blocks @@ -197,25 +220,29 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) if (currTrueTarget != trueTarget) { // This blocks jumps to a different target, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } if (!GenTree::Compare(currVariableNode, variableNode->gtEffectiveVal())) { // A different variable node is used, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } if (currBb->GetUniquePred(this) != prevBlock) { // Multiple preds in a secondary block, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } if (!BasicBlock::sameEHRegion(prevBlock, currBb)) { // Current block is in a different EH region, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } // Ok we can work with that, add the test value to the list @@ -225,21 +252,27 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) if (testValueIndex == SWITCH_MAX_DISTANCE) { // Too many suitable tests found - stop and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } if (isReversed) { // We only support reversed test (GT_NE) for the last block. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } + if (testingForConversion) + return true; + prevBlock = currBb; } else { // Current block is not a suitable test, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } } } diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index e07fc96d9379..f64c207e4990 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -243,10 +243,14 @@ typedef uint64_t regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) #define HAS_MORE_THAN_64_REGISTERS 1 -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_AMD64 +#define REG_LOW_BASE 0 +#ifdef HAS_MORE_THAN_64_REGISTERS +#define REG_HIGH_BASE 64 +#endif // TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit) typedef regMaskSmall SingleTypeRegSet; inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); @@ -387,6 +391,11 @@ struct regMaskTP #endif } + static regMaskTP FromIntRegSet(SingleTypeRegSet intRegs) + { + return regMaskTP(intRegs); + } + void operator|=(const regMaskTP& second) { low |= second.getLow(); @@ -1082,16 +1091,6 @@ inline SingleTypeRegSet getSingleTypeRegMask(regNumber reg, var_types regType) return regMask; } -/***************************************************************************** - * - * These arrays list the callee-saved register numbers (and bitmaps, respectively) for - * the current architecture. - */ -extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; - -// This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask. -regMaskTP genRegMaskFromCalleeSavedMask(unsigned short); - /***************************************************************************** * * Assumes that "reg" is of the given "type". Return the next unused reg number after "reg" diff --git a/src/coreclr/jit/targetamd64.cpp b/src/coreclr/jit/targetamd64.cpp index 43c200fdd359..a9b6b5c0e5e2 100644 --- a/src/coreclr/jit/targetamd64.cpp +++ b/src/coreclr/jit/targetamd64.cpp @@ -117,14 +117,15 @@ ABIPassingInformation SysVX64Classifier::Classify(Compiler* comp, else { regNumber reg = varTypeUsesFloatArgReg(type) ? m_floatRegs.Dequeue() : m_intRegs.Dequeue(); - info = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(reg, 0, genTypeSize(type))); + info = ABIPassingInformation::FromSegmentByValue(comp, + ABIPassingSegment::InRegister(reg, 0, genTypeSize(type))); } } else { assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0); unsigned size = type == TYP_STRUCT ? structLayout->GetSize() : genTypeSize(type); - info = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0, size)); + info = ABIPassingInformation::FromSegmentByValue(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0, size)); m_stackArgSize += roundUp(size, TARGET_POINTER_SIZE); } @@ -171,10 +172,12 @@ ABIPassingInformation WinX64Classifier::Classify(Compiler* comp, // vice versa. assert(m_intRegs.Count() == m_floatRegs.Count()); - unsigned typeSize = type == TYP_STRUCT ? structLayout->GetSize() : genTypeSize(type); + bool passedByRef = false; + unsigned typeSize = type == TYP_STRUCT ? structLayout->GetSize() : genTypeSize(type); if ((typeSize > TARGET_POINTER_SIZE) || !isPow2(typeSize)) { - typeSize = TARGET_POINTER_SIZE; // Passed by implicit byref + passedByRef = true; + typeSize = TARGET_POINTER_SIZE; } ABIPassingSegment segment; @@ -191,7 +194,7 @@ ABIPassingInformation WinX64Classifier::Classify(Compiler* comp, m_stackArgSize += TARGET_POINTER_SIZE; } - return ABIPassingInformation::FromSegment(comp, segment); + return ABIPassingInformation::FromSegment(comp, passedByRef, segment); } //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 869bf1944ce5..3a78b7477bb5 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -102,7 +102,7 @@ #define LAST_FP_ARGREG REG_XMM3 #endif // !UNIX_AMD64_ABI - #define REGNUM_BITS 6 // number of bits in a REG_* + #define REGNUM_BITS 7 // number of bits in a REG_* #define REGSIZE_BYTES 8 // number of bytes in one register #define XMM_REGSIZE_BYTES 16 // XMM register size in bytes #define YMM_REGSIZE_BYTES 32 // YMM register size in bytes @@ -168,7 +168,7 @@ #define REG_FLT_CALLEE_SAVED_LAST REG_XMM15 #define RBM_LOWINT RBM_ALLINT_INIT - #define RBM_HIGHINT (RBM_R16|RBM_R17|RBM_R18|RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23) + #define RBM_HIGHINT (RBM_R16|RBM_R17|RBM_R18|RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23|RBM_R24|RBM_R25|RBM_R26|RBM_R27|RBM_R28|RBM_R29|RBM_R30|RBM_R31) #define RBM_ALLINT_INIT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH_INIT) #define RBM_ALLINT get_RBM_ALLINT() @@ -265,7 +265,7 @@ // when the hardware supports it. There are no additional hidden costs for these. #ifdef UNIX_AMD64_ABI - #define REG_VAR_ORDER_CALLEE_TRASH REG_EAX,REG_ECX,REG_EDX,REG_EDI,REG_ESI,REG_R8,REG_R9,REG_R10,REG_R11,REG_R16,REG_R17,REG_R18,REG_R19,REG_R20,REG_R21,REG_R22,REG_R23 + #define REG_VAR_ORDER_CALLEE_TRASH REG_EAX,REG_ECX,REG_EDX,REG_EDI,REG_ESI,REG_R8,REG_R9,REG_R10,REG_R11,REG_R16,REG_R17,REG_R18,REG_R19,REG_R20,REG_R21,REG_R22,REG_R23,REG_R24,REG_R25,REG_R26,REG_R27,REG_R28,REG_R29,REG_R30,REG_R31 #define REG_VAR_ORDER_CALLEE_SAVED REG_EBX,REG_ETW_FRAMED_EBP_LIST REG_R15,REG_R14,REG_R13,REG_R12 #define REG_VAR_ORDER_FLT_CALLEE_TRASH REG_XMM0,REG_XMM1,REG_XMM2,REG_XMM3,REG_XMM4,REG_XMM5,REG_XMM6,REG_XMM7, \ @@ -279,7 +279,7 @@ REG_XMM27,REG_XMM28,REG_XMM29,REG_XMM30,REG_XMM31 #define REG_VAR_ORDER_FLT_EVEX_CALLEE_SAVED REG_VAR_ORDER_FLT_CALLEE_SAVED #else // !UNIX_AMD64_ABI - #define REG_VAR_ORDER_CALLEE_TRASH REG_EAX,REG_ECX,REG_EDX,REG_R8,REG_R10,REG_R9,REG_R11,REG_R16,REG_R17,REG_R18,REG_R19,REG_R20,REG_R21,REG_R22,REG_R23 + #define REG_VAR_ORDER_CALLEE_TRASH REG_EAX,REG_ECX,REG_EDX,REG_R8,REG_R10,REG_R9,REG_R11,REG_R16,REG_R17,REG_R18,REG_R19,REG_R20,REG_R21,REG_R22,REG_R23,REG_R24,REG_R25,REG_R26,REG_R27,REG_R28,REG_R29,REG_R30,REG_R31 #define REG_VAR_ORDER_CALLEE_SAVED REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST REG_R14,REG_R15,REG_R13,REG_R12 #define REG_VAR_ORDER_FLT_CALLEE_TRASH REG_XMM0,REG_XMM1,REG_XMM2,REG_XMM3,REG_XMM4,REG_XMM5 @@ -300,35 +300,27 @@ #ifdef UNIX_AMD64_ABI #define CNT_CALLEE_SAVED (5 + REG_ETW_FRAMED_EBP_COUNT) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED + 2) #define CNT_CALLEE_TRASH_INT_INIT (9) - #define CNT_CALLEE_TRASH_HIGHINT (8) + #define CNT_CALLEE_TRASH_HIGHINT (16) #define CNT_CALLEE_SAVED_FLOAT (0) #define CNT_CALLEE_TRASH_FLOAT_INIT (16) #define CNT_CALLEE_TRASH_HIGHFLOAT (16) - /* NOTE: Sync with variable name defined in compiler.h */ - #define RBM_CALL_GC_REGS_ORDER RBM_EBX,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15,RBM_INTRET,RBM_INTRET_1 - #define RBM_CALL_GC_REGS (RBM_EBX|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_INTRET|RBM_INTRET_1) // For SysV we have more volatile registers so we do not save any callee saves for EnC. #define RBM_ENC_CALLEE_SAVED 0 #else // !UNIX_AMD64_ABI #define CNT_CALLEE_SAVED (7 + REG_ETW_FRAMED_EBP_COUNT) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED + 1) #define CNT_CALLEE_TRASH_INT_INIT (7) - #define CNT_CALLEE_TRASH_HIGHINT (8) + #define CNT_CALLEE_TRASH_HIGHINT (16) #define CNT_CALLEE_SAVED_FLOAT (10) #define CNT_CALLEE_TRASH_FLOAT_INIT (6) #define CNT_CALLEE_TRASH_HIGHFLOAT (16) - /* NOTE: Sync with variable name defined in compiler.h */ - #define RBM_CALL_GC_REGS_ORDER RBM_EBX,RBM_ESI,RBM_EDI,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15,RBM_INTRET - #define RBM_CALL_GC_REGS (RBM_EBX|RBM_ESI|RBM_EDI|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_INTRET) // Callee-preserved registers we always save and allow use of for EnC code, since there are quite few volatile registers. #define RBM_ENC_CALLEE_SAVED (RBM_RSI | RBM_RDI) @@ -355,11 +347,11 @@ // Where is the exception object on entry to the handler block? #ifdef UNIX_AMD64_ABI - #define REG_EXCEPTION_OBJECT REG_ESI - #define RBM_EXCEPTION_OBJECT RBM_ESI + #define REG_EXCEPTION_OBJECT REG_EDI + #define RBM_EXCEPTION_OBJECT RBM_EDI #else // !UNIX_AMD64_ABI - #define REG_EXCEPTION_OBJECT REG_EDX - #define RBM_EXCEPTION_OBJECT RBM_EDX + #define REG_EXCEPTION_OBJECT REG_ECX + #define RBM_EXCEPTION_OBJECT RBM_ECX #endif // !UNIX_AMD64_ABI #define REG_JUMP_THUNK_PARAM REG_EAX @@ -398,7 +390,7 @@ // The following defines are useful for iterating a regNumber #define REG_FIRST REG_EAX #define REG_INT_FIRST REG_EAX - #define REG_INT_LAST REG_R23 + #define REG_INT_LAST REG_R31 #define REG_INT_COUNT (get_REG_INT_LAST() - REG_INT_FIRST + 1) #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1)) #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1)) @@ -548,6 +540,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_RAX + #define REG_ASYNC_CONTINUATION_RET REG_RCX + #define RBM_ASYNC_CONTINUATION_RET RBM_RCX + // What sort of reloc do we use for [disp32] address mode #define IMAGE_REL_BASED_DISP32 IMAGE_REL_BASED_REL32 diff --git a/src/coreclr/jit/targetarm.cpp b/src/coreclr/jit/targetarm.cpp index 3118621a240e..00071fe98041 100644 --- a/src/coreclr/jit/targetarm.cpp +++ b/src/coreclr/jit/targetarm.cpp @@ -194,10 +194,9 @@ ABIPassingInformation Arm32Classifier::ClassifyFloat(Compiler* comp, var_types t // As soon as any float arg goes on stack no other float arg can go in a register. m_floatRegs = 0; - m_stackArgSize = roundUp(m_stackArgSize, genTypeSize(type)); - ABIPassingInformation info = - ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0, - numElems * genTypeSize(type))); + m_stackArgSize = roundUp(m_stackArgSize, genTypeSize(type)); + ABIPassingSegment segment = ABIPassingSegment::OnStack(m_stackArgSize, 0, numElems * genTypeSize(type)); + ABIPassingInformation info = ABIPassingInformation::FromSegmentByValue(comp, segment); m_stackArgSize += numElems * genTypeSize(type); return info; diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h index 710187e70b06..0cb5ff9aa27f 100644 --- a/src/coreclr/jit/targetarm.h +++ b/src/coreclr/jit/targetarm.h @@ -89,13 +89,9 @@ #define RBM_LOW_REGS (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7) #define RBM_HIGH_REGS (RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_SP|RBM_LR|RBM_PC) - #define RBM_CALL_GC_REGS_ORDER RBM_R4,RBM_R5,RBM_R6,RBM_R7,RBM_R8,RBM_R9,RBM_R10,RBM_R11,RBM_INTRET - #define RBM_CALL_GC_REGS (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_INTRET) - #define CNT_CALLEE_SAVED (8) #define CNT_CALLEE_TRASH (6) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED+1) #define CNT_CALLEE_SAVED_FLOAT (16) #define CNT_CALLEE_TRASH_FLOAT (16) @@ -142,7 +138,8 @@ // On exit: // r0: trashed // r3: trashed - // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier): + // r12: trashed +// CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier): // On entry: // r0: the destination address (object reference written here) // r1: the source address (points to object reference to write) @@ -151,6 +148,7 @@ // r1: incremented by 4 // r2: trashed // r3: trashed + // r12: trashed #define REG_WRITE_BARRIER_DST REG_ARG_0 #define RBM_WRITE_BARRIER_DST RBM_ARG_0 @@ -251,6 +249,9 @@ #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R0 + #define REG_ASYNC_CONTINUATION_RET REG_R2 + #define RBM_ASYNC_CONTINUATION_RET RBM_R2 + #define REG_FPBASE REG_R11 #define RBM_FPBASE RBM_R11 #define STR_FPBASE "r11" diff --git a/src/coreclr/jit/targetarm64.cpp b/src/coreclr/jit/targetarm64.cpp index 5473db7296ec..244ffddaaff2 100644 --- a/src/coreclr/jit/targetarm64.cpp +++ b/src/coreclr/jit/targetarm64.cpp @@ -59,8 +59,8 @@ ABIPassingInformation Arm64Classifier::Classify(Compiler* comp, { if ((wellKnownParam == WellKnownArg::RetBuffer) && hasFixedRetBuffReg(m_info.CallConv)) { - return ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_ARG_RET_BUFF, 0, - TARGET_POINTER_SIZE)); + return ABIPassingInformation::FromSegmentByValue(comp, ABIPassingSegment::InRegister(REG_ARG_RET_BUFF, 0, + TARGET_POINTER_SIZE)); } // First handle HFA/HVAs. These are allowed to be passed in more registers @@ -88,8 +88,11 @@ ABIPassingInformation Arm64Classifier::Classify(Compiler* comp, unsigned alignment = compAppleArm64Abi() ? min(elemSize, (unsigned)TARGET_POINTER_SIZE) : TARGET_POINTER_SIZE; m_stackArgSize = roundUp(m_stackArgSize, alignment); - info = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0, - structLayout->GetSize())); + ABIPassingSegment segment = + alignment < TARGET_POINTER_SIZE + ? ABIPassingSegment::OnStackWithoutConsumingFullSlot(m_stackArgSize, 0, structLayout->GetSize()) + : ABIPassingSegment::OnStack(m_stackArgSize, 0, structLayout->GetSize()); + info = ABIPassingInformation::FromSegmentByValue(comp, segment); m_stackArgSize += roundUp(structLayout->GetSize(), alignment); // After passing any float value on the stack, we should not enregister more float values. m_floatRegs.Clear(); @@ -101,13 +104,15 @@ ABIPassingInformation Arm64Classifier::Classify(Compiler* comp, unsigned slots; unsigned passedSize; + bool passedByRef = false; if (varTypeIsStruct(type)) { unsigned size = structLayout->GetSize(); if (size > 16) { - slots = 1; // Passed by implicit byref - passedSize = TARGET_POINTER_SIZE; + passedByRef = true; + slots = 1; + passedSize = TARGET_POINTER_SIZE; } else { @@ -153,39 +158,44 @@ ABIPassingInformation Arm64Classifier::Classify(Compiler* comp, if (regs->Count() >= slots) { - info = ABIPassingInformation(comp, slots); - unsigned slotSize = min(passedSize, (unsigned)TARGET_POINTER_SIZE); - info.Segment(0) = ABIPassingSegment::InRegister(regs->Dequeue(), 0, slotSize); - if (slots == 2) + unsigned slotSize = min(passedSize, (unsigned)TARGET_POINTER_SIZE); + ABIPassingSegment firstSegment = ABIPassingSegment::InRegister(regs->Dequeue(), 0, slotSize); + if (slots == 1) { - assert(varTypeIsStruct(type)); - unsigned tailSize = structLayout->GetSize() - slotSize; - info.Segment(1) = ABIPassingSegment::InRegister(regs->Dequeue(), slotSize, tailSize); + info = ABIPassingInformation::FromSegment(comp, passedByRef, firstSegment); + } + else + { + info = ABIPassingInformation(comp, slots); + info.Segment(0) = firstSegment; + if (slots == 2) + { + assert(varTypeIsStruct(type)); + unsigned tailSize = structLayout->GetSize() - slotSize; + info.Segment(1) = ABIPassingSegment::InRegister(regs->Dequeue(), slotSize, tailSize); + } } } else { - unsigned alignment; + ABIPassingSegment segment; + unsigned alignment; if (compAppleArm64Abi()) { - if (varTypeIsStruct(type)) - { - alignment = TARGET_POINTER_SIZE; - } - else - { - alignment = genTypeSize(type); - } - + alignment = varTypeIsStruct(type) ? TARGET_POINTER_SIZE : genTypeSize(type); m_stackArgSize = roundUp(m_stackArgSize, alignment); + segment = alignment < TARGET_POINTER_SIZE + ? ABIPassingSegment::OnStackWithoutConsumingFullSlot(m_stackArgSize, 0, passedSize) + : ABIPassingSegment::OnStack(m_stackArgSize, 0, passedSize); } else { alignment = TARGET_POINTER_SIZE; assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0); + segment = ABIPassingSegment::OnStack(m_stackArgSize, 0, passedSize); } - info = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0, passedSize)); + info = ABIPassingInformation::FromSegment(comp, passedByRef, segment); m_stackArgSize += roundUp(passedSize, alignment); @@ -195,6 +205,7 @@ ABIPassingInformation Arm64Classifier::Classify(Compiler* comp, } } + assert(info.IsPassedByReference() == passedByRef); return info; } diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index f5b96a2bc610..678a05e181e4 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -108,13 +108,9 @@ REG_V12, REG_V13, REG_V14, REG_V15, \ REG_V3, REG_V2, REG_V1, REG_V0 - #define RBM_CALL_GC_REGS_ORDER RBM_R19,RBM_R20,RBM_R21,RBM_R22,RBM_R23,RBM_R24,RBM_R25,RBM_R26,RBM_R27,RBM_R28,RBM_INTRET,RBM_INTRET_1 - #define RBM_CALL_GC_REGS (RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23|RBM_R24|RBM_R25|RBM_R26|RBM_R27|RBM_R28|RBM_INTRET|RBM_INTRET_1) - #define CNT_CALLEE_SAVED (11) #define CNT_CALLEE_TRASH (17) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED+2) #define CNT_CALLEE_SAVED_FLOAT (8) #define CNT_CALLEE_TRASH_FLOAT (24) @@ -267,6 +263,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_R9 + #define REG_ASYNC_CONTINUATION_RET REG_R2 + #define RBM_ASYNC_CONTINUATION_RET RBM_R2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp index 1e876d7eb5f6..1b3fde2e74f2 100644 --- a/src/coreclr/jit/targetloongarch64.cpp +++ b/src/coreclr/jit/targetloongarch64.cpp @@ -67,12 +67,14 @@ ABIPassingInformation LoongArch64Classifier::Classify(Compiler* comp, unsigned argRegOffset2 = 0; bool canPassArgInRegisters = false; + bool passedByRef = false; if (varTypeIsStruct(type)) { passedSize = structLayout->GetSize(); if (passedSize > MAX_PASS_MULTIREG_BYTES) { - slots = 1; // Passed by implicit byref + passedByRef = true; + slots = 1; passedSize = TARGET_POINTER_SIZE; canPassArgInRegisters = m_intRegs.Count() > 0; } @@ -165,9 +167,9 @@ ABIPassingInformation LoongArch64Classifier::Classify(Compiler* comp, ABIPassingInformation info; if (canPassArgInRegisters) { - info = ABIPassingInformation(comp, slots); if (argRegTypeInStruct1 != TYP_UNKNOWN) { + info = ABIPassingInformation(comp, slots); RegisterQueue* regs = varTypeIsFloating(argRegTypeInStruct1) ? &m_floatRegs : &m_intRegs; assert(regs->Count() > 0); @@ -186,13 +188,21 @@ ABIPassingInformation LoongArch64Classifier::Classify(Compiler* comp, } else { - RegisterQueue* regs = varTypeIsFloating(type) ? &m_floatRegs : &m_intRegs; - unsigned slotSize = min(passedSize, (unsigned)TARGET_POINTER_SIZE); - info.Segment(0) = ABIPassingSegment::InRegister(regs->Dequeue(), 0, slotSize); - if (slots == 2) + RegisterQueue* regs = varTypeIsFloating(type) ? &m_floatRegs : &m_intRegs; + unsigned slotSize = min(passedSize, (unsigned)TARGET_POINTER_SIZE); + ABIPassingSegment firstSegment = ABIPassingSegment::InRegister(regs->Dequeue(), 0, slotSize); + if (slots == 1) { + info = ABIPassingInformation::FromSegment(comp, passedByRef, firstSegment); + } + else + { + assert(slots == 2); assert(varTypeIsStruct(type)); assert(passedSize > TARGET_POINTER_SIZE); + + info = ABIPassingInformation(comp, slots); + info.Segment(0) = firstSegment; unsigned tailSize = passedSize - slotSize; if (m_intRegs.Count() > 0) { @@ -212,7 +222,8 @@ ABIPassingInformation LoongArch64Classifier::Classify(Compiler* comp, { assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0); - info = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0, passedSize)); + info = ABIPassingInformation::FromSegment(comp, passedByRef, + ABIPassingSegment::OnStack(m_stackArgSize, 0, passedSize)); m_stackArgSize += roundUp(passedSize, TARGET_POINTER_SIZE); diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 452778c31963..d691f4c8fd1e 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -246,6 +246,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + #define REG_ASYNC_CONTINUATION_RET REG_A2 + #define RBM_ASYNC_CONTINUATION_RET RBM_A2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetriscv64.cpp b/src/coreclr/jit/targetriscv64.cpp index b91420136fb7..4290ad00bd61 100644 --- a/src/coreclr/jit/targetriscv64.cpp +++ b/src/coreclr/jit/targetriscv64.cpp @@ -62,13 +62,15 @@ ABIPassingInformation RiscV64Classifier::Classify(Compiler* comp, unsigned intFields = 0, floatFields = 0; unsigned passedSize; + bool passedByRef = false; if (varTypeIsStruct(type)) { passedSize = structLayout->GetSize(); if (passedSize > MAX_PASS_MULTIREG_BYTES) { - passedSize = TARGET_POINTER_SIZE; // pass by reference + passedByRef = true; + passedSize = TARGET_POINTER_SIZE; } else if (!structLayout->IsBlockLayout()) { @@ -113,7 +115,7 @@ ABIPassingInformation RiscV64Classifier::Classify(Compiler* comp, assert(varTypeIsFloating(type)); ABIPassingSegment seg = ABIPassingSegment::InRegister(m_floatRegs.Dequeue(), offset, passedSize); - return ABIPassingInformation::FromSegment(comp, seg); + return ABIPassingInformation::FromSegmentByValue(comp, seg); } else { @@ -151,7 +153,7 @@ ABIPassingInformation RiscV64Classifier::Classify(Compiler* comp, if (passedSize <= TARGET_POINTER_SIZE) { ABIPassingSegment seg = ABIPassingSegment::InRegister(m_intRegs.Dequeue(), 0, passedSize); - return ABIPassingInformation::FromSegment(comp, seg); + return ABIPassingInformation::FromSegment(comp, passedByRef, seg); } else { @@ -168,7 +170,7 @@ ABIPassingInformation RiscV64Classifier::Classify(Compiler* comp, } else { - return ABIPassingInformation::FromSegment(comp, passOnStack(0, passedSize)); + return ABIPassingInformation::FromSegment(comp, passedByRef, passOnStack(0, passedSize)); } } } diff --git a/src/coreclr/jit/targetriscv64.h b/src/coreclr/jit/targetriscv64.h index e5dcded3d878..ee6c6d22260c 100644 --- a/src/coreclr/jit/targetriscv64.h +++ b/src/coreclr/jit/targetriscv64.h @@ -222,6 +222,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + #define REG_ASYNC_CONTINUATION_RET REG_A2 + #define RBM_ASYNC_CONTINUATION_RET RBM_A2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetx86.cpp b/src/coreclr/jit/targetx86.cpp index e00bba78a3a5..b51ec62d8b9b 100644 --- a/src/coreclr/jit/targetx86.cpp +++ b/src/coreclr/jit/targetx86.cpp @@ -136,7 +136,7 @@ ABIPassingInformation X86Classifier::Classify(Compiler* comp, segment = ABIPassingSegment::OnStack(offset, 0, size); } - return ABIPassingInformation::FromSegment(comp, segment); + return ABIPassingInformation::FromSegmentByValue(comp, segment); } #endif // TARGET_X86 diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index 2e46478690e5..ac0130f5d274 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -52,9 +52,6 @@ // target #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, // filter-handler, fault) and directly execute 'finally' clauses. -#if !defined(UNIX_X86_ABI) - #define FEATURE_EH_WINDOWS_X86 1 // Enable support for SEH regions -#endif #define ETW_EBP_FRAMED 1 // if 1 we cannot use EBP as a scratch register and must create EBP based // frames for most methods #define CSE_CONSTS 1 // Enable if we want to CSE constants @@ -143,16 +140,9 @@ #define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX #define MAX_VAR_ORDER_SIZE 6 - // The order here is fixed: it must agree with an order assumed in eetwain... - // NB: x86 GC decoder does not report return registers at call sites. - #define RBM_CALL_GC_REGS_ORDER RBM_EDI,RBM_ESI,RBM_EBX,RBM_EBP - #define RBM_CALL_GC_REGS (RBM_EDI|RBM_ESI|RBM_EBX|RBM_EBP) - #define CNT_CALLEE_SAVED (4) #define CNT_CALLEE_TRASH (3) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - // NB: x86 GC decoder does not report return registers at call sites. - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED) #define CNT_CALLEE_SAVED_FLOAT (0) #define CNT_CALLEE_TRASH_FLOAT (6) @@ -301,6 +291,9 @@ #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~RBM_ECX) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_ECX + #define REG_ASYNC_CONTINUATION_RET REG_ECX + #define RBM_ASYNC_CONTINUATION_RET RBM_ECX + #define REG_FPBASE REG_EBP #define RBM_FPBASE RBM_EBP #define STR_FPBASE "ebp" diff --git a/src/coreclr/jit/treelifeupdater.cpp b/src/coreclr/jit/treelifeupdater.cpp index 4fc5a283f4a7..b16bb6bc9689 100644 --- a/src/coreclr/jit/treelifeupdater.cpp +++ b/src/coreclr/jit/treelifeupdater.cpp @@ -305,7 +305,6 @@ void TreeLifeUpdater::UpdateLife(GenTree* tree) } // Note that after lowering, we can see indirect uses and definitions of tracked variables. - // TODO-Bug: we're not handling calls with return buffers here properly. GenTreeLclVarCommon* lclVarTree = nullptr; if (tree->OperIsNonPhiLocal()) { @@ -315,6 +314,10 @@ void TreeLifeUpdater::UpdateLife(GenTree* tree) { lclVarTree = tree->AsIndir()->Addr()->AsLclVarCommon(); } + else if (tree->IsCall()) + { + lclVarTree = compiler->gtCallGetDefinedRetBufLclAddr(tree->AsCall()); + } if (lclVarTree != nullptr) { diff --git a/src/coreclr/jit/unwindamd64.cpp b/src/coreclr/jit/unwindamd64.cpp index e42a4368581f..768b82f91258 100644 --- a/src/coreclr/jit/unwindamd64.cpp +++ b/src/coreclr/jit/unwindamd64.cpp @@ -71,6 +71,54 @@ short Compiler::mapRegNumToDwarfReg(regNumber reg) case REG_R15: dwarfReg = 15; break; + case REG_R16: + dwarfReg = 16; + break; + case REG_R17: + dwarfReg = 17; + break; + case REG_R18: + dwarfReg = 18; + break; + case REG_R19: + dwarfReg = 19; + break; + case REG_R20: + dwarfReg = 20; + break; + case REG_R21: + dwarfReg = 21; + break; + case REG_R22: + dwarfReg = 22; + break; + case REG_R23: + dwarfReg = 23; + break; + case REG_R24: + dwarfReg = 24; + break; + case REG_R25: + dwarfReg = 25; + break; + case REG_R26: + dwarfReg = 26; + break; + case REG_R27: + dwarfReg = 27; + break; + case REG_R28: + dwarfReg = 28; + break; + case REG_R29: + dwarfReg = 29; + break; + case REG_R30: + dwarfReg = 30; + break; + case REG_R31: + dwarfReg = 31; + break; default: noway_assert(!"unexpected REG_NUM"); } @@ -399,7 +447,22 @@ void Compiler::unwindSaveRegWindows(regNumber reg, unsigned offset) code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)]; code->UnwindOp = (genIsValidFloatReg(reg)) ? UWOP_SAVE_XMM128_FAR : UWOP_SAVE_NONVOL_FAR; } - code->OpInfo = (BYTE)reg; + unsigned unwindRegNum; + if (genIsValidFloatReg(reg)) + { + unwindRegNum = reg - XMMBASE; + } + else + { + assert(genIsValidIntReg(reg)); + unwindRegNum = reg; + } + // We only add unwind codes for non-volatile registers and for x86/x64, + // the max registers index for a non-volatile register is 15. + assert(unwindRegNum <= 15); + code->OpInfo = (UCHAR)unwindRegNum; + assert((unsigned)code->OpInfo == unwindRegNum); + unsigned int cbProlog = unwindGetCurrentOffset(func); noway_assert((BYTE)cbProlog == cbProlog); code->CodeOffset = (BYTE)cbProlog; diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 3b16eec3c7cc..26e3b60f8dfb 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -23,6 +23,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "opcode.h" #include "jitstd/algorithm.h" +#include "minipal/time.h" /*****************************************************************************/ @@ -1086,7 +1087,7 @@ void ConfigDoubleArray::Dump() #endif // defined(DEBUG) -#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC +#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC void Counter::dump(FILE* output) { @@ -1252,7 +1253,7 @@ void DumpOnShutdown::DumpAll() } } -#endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE +#endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || EMITTER_STATS || MEASURE_NODE_SIZE /***************************************************************************** * Fixed bit vector class @@ -1518,7 +1519,7 @@ void HelperCallProperties::init() bool mutatesHeap = false; // true if any previous heap objects [are|can be] modified bool mayRunCctor = false; // true if the helper call may cause a static constructor to be run. bool isNoEscape = false; // true if none of the GC ref arguments can escape - bool isNoGC = false; // true is the helper cannot trigger GC + bool isNoGC = false; // true if the helper cannot trigger GC switch (helper) { @@ -1529,7 +1530,9 @@ void HelperCallProperties::init() isNoGC = true; FALLTHROUGH; case CORINFO_HELP_LMUL: + case CORINFO_HELP_LNG2FLT: case CORINFO_HELP_LNG2DBL: + case CORINFO_HELP_ULNG2FLT: case CORINFO_HELP_ULNG2DBL: case CORINFO_HELP_DBL2INT: case CORINFO_HELP_DBL2LNG: @@ -1595,7 +1598,7 @@ void HelperCallProperties::init() case CORINFO_HELP_NEW_MDARR_RARE: case CORINFO_HELP_NEWARR_1_DIRECT: case CORINFO_HELP_NEWARR_1_MAYBEFROZEN: - case CORINFO_HELP_NEWARR_1_OBJ: + case CORINFO_HELP_NEWARR_1_PTR: case CORINFO_HELP_READYTORUN_NEWARR_1: isAllocator = true; @@ -1674,6 +1677,13 @@ void HelperCallProperties::init() isPure = true; break; + case CORINFO_HELP_MEMCPY: + case CORINFO_HELP_MEMZERO: + case CORINFO_HELP_MEMSET: + case CORINFO_HELP_NATIVE_MEMSET: + isNoEscape = true; + break; + case CORINFO_HELP_LDELEMA_REF: isPure = true; break; @@ -2215,22 +2225,15 @@ double CycleCount::ElapsedTime() bool PerfCounter::Start() { - bool result = QueryPerformanceFrequency(&beg) != 0; - if (!result) - { - return result; - } - freq = (double)beg.QuadPart / 1000.0; - (void)QueryPerformanceCounter(&beg); - return result; + freq = (double)minipal_hires_tick_frequency() / 1000.0; + beg = minipal_hires_ticks(); + return true; } // Return elapsed time from Start() in millis. double PerfCounter::ElapsedTime() { - LARGE_INTEGER li; - (void)QueryPerformanceCounter(&li); - return (double)(li.QuadPart - beg.QuadPart) / freq; + return (double)(minipal_hires_ticks() - beg) / freq; } #endif diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 3d3ef423b44b..75edcb4d8028 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -785,11 +785,11 @@ class CycleCount bool GetCycles(uint64_t* time); }; -// Uses win API QueryPerformanceCounter/QueryPerformanceFrequency. +// Uses minipal/time.h class PerfCounter { - LARGE_INTEGER beg; - double freq; + int64_t beg; + double freq; public: // If the method returns false, any other query yield unpredictable results. diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index d07655cde902..5eebd83962d9 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -2181,7 +2181,7 @@ ValueNum ValueNumStore::VNOneForType(var_types typ) } } -ValueNum ValueNumStore::VNAllBitsForType(var_types typ) +ValueNum ValueNumStore::VNAllBitsForType(var_types typ, unsigned elementCount) { switch (typ) { @@ -2228,7 +2228,7 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ) #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: { - return VNForSimdMaskCon(simdmask_t::AllBitsSet()); + return VNForSimdMaskCon(simdmask_t::AllBitsSet(elementCount)); } #endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD @@ -2614,6 +2614,37 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN) { *resultVN = VNForIntCon(knownSize); } + + // Case 4: ARR_LENGTH(new T[(long)size]) -> size + VNFuncApp newArrFuncApp; + if (GetVNFunc(arg0VN, &newArrFuncApp) && (newArrFuncApp.m_func == VNF_JitNewArr)) + { + ValueNum actualSizeVN = newArrFuncApp.m_args[1]; + var_types actualSizeVNType = TypeOfVN(actualSizeVN); + + // JitNewArr's size argument (args[1]) is typically upcasted to TYP_LONG via VNF_Cast. + if (actualSizeVNType == TYP_LONG) + { + VNFuncApp castFuncApp; + if (GetVNFunc(actualSizeVN, &castFuncApp) && (castFuncApp.m_func == VNF_Cast)) + { + var_types castToType; + bool srcIsUnsigned; + GetCastOperFromVN(castFuncApp.m_args[1], &castToType, &srcIsUnsigned); + + // Make sure we have exactly (TYP_LONG)myInt32 cast: + if (!srcIsUnsigned && (castToType == TYP_LONG) && TypeOfVN(castFuncApp.m_args[0]) == TYP_INT) + { + // If that is the case, return the original size argument + *resultVN = castFuncApp.m_args[0]; + } + } + } + else if (actualSizeVNType == TYP_INT) + { + *resultVN = actualSizeVN; + } + } } // Try to perform constant-folding. @@ -2681,9 +2712,9 @@ ValueNum ValueNumStore::VNForCast(VNFunc func, ValueNum castToVN, ValueNum objVN bool isExact; bool isNonNull; CORINFO_CLASS_HANDLE castFrom = GetObjectType(objVN, &isExact, &isNonNull); - CORINFO_CLASS_HANDLE castTo; + CORINFO_CLASS_HANDLE castTo = NO_CLASS_HANDLE; if ((castFrom != NO_CLASS_HANDLE) && - EmbeddedHandleMapLookup(ConstantValue(castToVN), (ssize_t*)&castTo)) + EmbeddedHandleMapLookup(ConstantValue(castToVN), (ssize_t*)&castTo) && (castTo != NO_CLASS_HANDLE)) { TypeCompareState castResult = m_pComp->info.compCompHnd->compareTypesForCast(castFrom, castTo); if (castResult == TypeCompareState::Must) @@ -3679,7 +3710,12 @@ ValueNum ValueNumStore::VNForMapSelectWork(ValueNumKind vnk, entry.Result = sameSelResult; entry.SetMemoryDependencies(m_pComp, recMemoryDependencies); - GetMapSelectWorkCache()->Set(fstruct, entry); + // If we ran out of budget we could have already cached the + // result in the leaf. In that case it is ok to overwrite + // it here. + MapSelectWorkCache* cache = GetMapSelectWorkCache(); + assert(!cache->Lookup(fstruct) || ((*cache)[fstruct].Result == sameSelResult)); + cache->Set(fstruct, entry, MapSelectWorkCache::Overwrite); } recMemoryDependencies.ForEach([this, &memoryDependencies](ValueNum vn) { @@ -4637,7 +4673,7 @@ ValueNum ValueNumStore::VNEvalFoldTypeCompare(var_types type, VNFunc func, Value // // Note that VN actually tracks the value of embedded handle; // we need to pass the VM the associated the compile time handles, - // in case they differ (say for prejitting or AOT). + // in case they differ (say for AOT). // ValueNum handle0 = arg0Func.m_args[0]; if (!IsVNHandle(handle0)) @@ -5323,7 +5359,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN } // Handle `x | AllBitsSet == AllBitsSet` and `AllBitsSet | x == AllBitsSet` - if (cnsVN == VNAllBitsForType(typ)) + if (cnsVN == VNAllBitsForType(typ, 1)) { resultVN = cnsVN; break; @@ -5368,7 +5404,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN } // Handle `x & AllBitsSet == x` and `AllBitsSet & x == x` - if (cnsVN == VNAllBitsForType(typ)) + if (cnsVN == VNAllBitsForType(typ, 1)) { resultVN = opVN; break; @@ -6047,7 +6083,7 @@ FieldSeq* ValueNumStore::FieldSeqVNToFieldSeq(ValueNum vn) ValueNum ValueNumStore::ExtendPtrVN(GenTree* opA, GenTree* opB) { - if (opB->OperGet() == GT_CNS_INT) + if (opB->OperIs(GT_CNS_INT)) { return ExtendPtrVN(opA, opB->AsIntCon()->gtFieldSeq, opB->AsIntCon()->IconValue()); } @@ -6569,6 +6605,48 @@ bool ValueNumStore::IsVNInt32Constant(ValueNum vn) return TypeOfVN(vn) == TYP_INT; } +//------------------------------------------------------------------------ +// IsVNLog2: Determine if the value number is a log2 pattern, which is +// "XOR(LZCNT32(OR(X, 1), 31)" or "XOR(LZCNT64(OR(X, 1), 63))". +// +// Arguments: +// vn - the value number to analyze +// upperBound - if not null, will be set to the upper bound of the log2 pattern (31 or 63) +// +// Return value: +// true if the value number is a log2 pattern, false otherwise. +// +bool ValueNumStore::IsVNLog2(ValueNum vn, int* upperBound) +{ +#if defined(FEATURE_HW_INTRINSICS) && (defined(TARGET_XARCH) || defined(TARGET_ARM64)) + int xorBy; + ValueNum op; + // First, see if it's "X ^ 31" or "X ^ 63". + if (IsVNBinFuncWithConst(vn, VNF_XOR, &op, &xorBy) && ((xorBy == 31) || (xorBy == 63))) + { + // Drop any integer cast if any, we're dealing with [0..63] range, any integer cast is redundant. + IsVNBinFunc(op, VNF_Cast, &op); + +#ifdef TARGET_XARCH + VNFunc lzcntFunc = (xorBy == 31) ? VNF_HWI_LZCNT_LeadingZeroCount : VNF_HWI_LZCNT_X64_LeadingZeroCount; +#else + VNFunc lzcntFunc = (xorBy == 31) ? VNF_HWI_ArmBase_LeadingZeroCount : VNF_HWI_ArmBase_Arm64_LeadingZeroCount; +#endif + // Next, see if it's "LZCNT32(X | 1)" or "LZCNT64(X | 1)". + int orBy; + if (IsVNBinFunc(op, lzcntFunc, &op) && IsVNBinFuncWithConst(op, VNF_OR, &op, &orBy) && (orBy == 1)) + { + if (upperBound != nullptr) + { + *upperBound = xorBy; + } + return true; + } + } +#endif + return false; +} + //------------------------------------------------------------------------ // IsVNNeverNegative: Determines if the given value number can never take on a negative value // in a signed context (i.e. when the most-significant bit represents signedness). @@ -6644,6 +6722,13 @@ bool ValueNumStore::IsVNNeverNegative(ValueNum vn) case VNF_HWI_ArmBase_Arm64_LeadingSignCount: return VNVisit::Continue; #endif + case VNF_XOR: + if (IsVNLog2(vn)) + { + return VNVisit::Continue; + } + break; + #endif // FEATURE_HW_INTRINSICS default: @@ -6882,120 +6967,6 @@ const char* ValueNumStore::VNRelationString(VN_RELATION_KIND vrk) } #endif -//------------------------------------------------------------------------ -// AreVNsEquivalent: returns true iff VNs represent the same value -// -// Arguments: -// vn1 - first value number to consider -// vn2 - second value number ot consider -// -// Notes: -// Normally if vn1 != vn2 then we cannot say if the two values -// are equivalent or different. PhiDef VNs don't represent the -// phi def values in this way, and can be proven equivalent even -// for different value numbers. -// -bool ValueNumStore::AreVNsEquivalent(ValueNum vn1, ValueNum vn2) -{ - if (vn1 == vn2) - { - return true; - } - - VNPhiDef def1; - if (!GetPhiDef(vn1, &def1)) - { - return false; - } - - VNPhiDef def2; - if (!GetPhiDef(vn2, &def2)) - { - return false; - } - - // We have two PhiDefs. They may be equivalent, if - // they come from Phis in the same block. - // - const unsigned lclNum1 = def1.LclNum; - const unsigned ssaDefNum1 = def1.SsaDef; - - LclVarDsc* const varDsc1 = m_pComp->lvaGetDesc(lclNum1); - LclSsaVarDsc* const varSsaDsc1 = varDsc1->GetPerSsaData(ssaDefNum1); - GenTree* const varDefTree1 = varSsaDsc1->GetDefNode(); - BasicBlock* const varDefBlock1 = varSsaDsc1->GetBlock(); - - const unsigned lclNum2 = def2.LclNum; - const unsigned ssaDefNum2 = def2.SsaDef; - - LclVarDsc* const varDsc2 = m_pComp->lvaGetDesc(lclNum2); - LclSsaVarDsc* const varSsaDsc2 = varDsc2->GetPerSsaData(ssaDefNum2); - GenTree* const varDefTree2 = varSsaDsc2->GetDefNode(); - BasicBlock* const varDefBlock2 = varSsaDsc2->GetBlock(); - - if (varDefBlock1 != varDefBlock2) - { - return false; - } - - if ((varDefTree1 == nullptr) || (varDefTree2 == nullptr)) - { - return false; - } - - // PhiDefs are from same block. Walk the phi args - // - GenTreePhi* const treePhi1 = varDefTree1->AsLclVar()->Data()->AsPhi(); - GenTreePhi* const treePhi2 = varDefTree2->AsLclVar()->Data()->AsPhi(); - GenTreePhi::UseIterator treeIter1 = treePhi1->Uses().begin(); - GenTreePhi::UseIterator treeEnd1 = treePhi1->Uses().end(); - GenTreePhi::UseIterator treeIter2 = treePhi2->Uses().begin(); - GenTreePhi::UseIterator treeEnd2 = treePhi2->Uses().end(); - - bool phiArgsAreEquivalent = true; - - // TODO-CQ: This logic could walk the SSA nums in the VNPhiDef, which - // accounts for unreachable predecessors. - for (; (treeIter1 != treeEnd1) && (treeIter2 != treeEnd2); ++treeIter1, ++treeIter2) - { - GenTreePhiArg* const treePhiArg1 = treeIter1->GetNode()->AsPhiArg(); - GenTreePhiArg* const treePhiArg2 = treeIter2->GetNode()->AsPhiArg(); - - assert(treePhiArg1->gtPredBB == treePhiArg2->gtPredBB); - - ValueNum treePhiArgVN1 = treePhiArg1->gtVNPair.GetConservative(); - ValueNum treePhiArgVN2 = treePhiArg2->gtVNPair.GetConservative(); - - // If the PhiArg VNs differ, the phis are not equivalent. - // (Note we don't recurse into AreVNsEquivalent as we can't - // handle possible cycles in the SSA graph). - // - if (treePhiArgVN1 != treePhiArgVN2) - { - phiArgsAreEquivalent = false; - break; - } - - // If we failed to find meaningful VNs, the phis are not equivalent - // - if (treePhiArgVN1 == ValueNumStore::NoVN) - { - phiArgsAreEquivalent = false; - break; - } - } - - // If we didn't verify all phi args we have failed to prove equivalence - // - if (phiArgsAreEquivalent) - { - phiArgsAreEquivalent &= (treeIter1 == treeEnd1); - phiArgsAreEquivalent &= (treeIter2 == treeEnd2); - } - - return phiArgsAreEquivalent; -} - bool ValueNumStore::IsVNRelop(ValueNum vn) { VNFuncApp funcAttr; @@ -7041,6 +7012,9 @@ bool ValueNumStore::IsVNConstantBound(ValueNum vn) { const bool op1IsConst = IsVNInt32Constant(funcApp.m_args[0]); const bool op2IsConst = IsVNInt32Constant(funcApp.m_args[1]); + + // Technically, we can allow both to be constants, + // but such relops are expected to be constant folded anyway. return op1IsConst != op2IsConst; } } @@ -7058,6 +7032,8 @@ bool ValueNumStore::IsVNConstantBoundUnsigned(ValueNum vn) case VNF_LE_UN: case VNF_GE_UN: case VNF_GT_UN: + // Technically, we can allow both to be constants, + // but such relops are expected to be constant folded anyway. return IsVNPositiveInt32Constant(funcApp.m_args[0]) != IsVNPositiveInt32Constant(funcApp.m_args[1]); default: break; @@ -8014,10 +7990,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, #endif // TARGET_ARM64 #if defined(TARGET_XARCH) - case NI_AVX512CD_LeadingZeroCount: - case NI_AVX512CD_VL_LeadingZeroCount: - case NI_AVX10v1_V512_LeadingZeroCount: - case NI_AVX10v1_LeadingZeroCount: + case NI_AVX512_LeadingZeroCount: { return EvaluateUnarySimd(this, GT_LZCNT, /* scalar */ false, type, baseType, arg0VN); } @@ -8122,12 +8095,6 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, return VNForLongCon(static_cast(result)); } - case NI_Vector128_AsVector2: - { - simd8_t result = GetConstantSimd16(arg0VN).v64[0]; - return VNForSimd8Con(result); - } - case NI_Vector128_ToVector256: case NI_Vector128_ToVector256Unsafe: { @@ -8182,6 +8149,12 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, } #endif // TARGET_XARCH + case NI_Vector128_AsVector2: + { + simd8_t result = GetConstantSimd16(arg0VN).v64[0]; + return VNForSimd8Con(result); + } + case NI_Vector128_AsVector3: { simd12_t result = {}; @@ -8475,7 +8448,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( } // Handle `x & AllBitsSet == x` and `AllBitsSet & x == x` - ValueNum allBitsVN = VNAllBitsForType(type); + ValueNum allBitsVN = VNAllBitsForType(type, simdSize); if (cnsVN == allBitsVN) { @@ -8564,7 +8537,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( // Handle `x >= 0 == true` for unsigned types. if ((cnsVN == arg1VN) && (cnsVN == VNZeroForType(simdType))) { - return VNAllBitsForType(type); + unsigned elementCount = simdSize / genTypeSize(baseType); + return VNAllBitsForType(type, elementCount); } } else if (varTypeIsFloating(baseType)) @@ -8610,7 +8584,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( // Handle `0 <= x == true` for unsigned types. if ((cnsVN == arg0VN) && (cnsVN == VNZeroForType(simdType))) { - return VNAllBitsForType(type); + unsigned elementCount = simdSize / genTypeSize(baseType); + return VNAllBitsForType(type, elementCount); } } else if (varTypeIsFloating(baseType)) @@ -8680,7 +8655,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( // Handle `(x != NaN) == true` and `(NaN != x) == true` for floating-point types if (VNIsVectorNaN(simdType, baseType, cnsVN)) { - return VNAllBitsForType(type); + unsigned elementCount = simdSize / genTypeSize(baseType); + return VNAllBitsForType(type, elementCount); } } break; @@ -8697,7 +8673,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( } // Handle `x | ~0 == ~0` and `~0 | x== ~0` - ValueNum allBitsVN = VNAllBitsForType(type); + ValueNum allBitsVN = VNAllBitsForType(type, simdSize); if (cnsVN == allBitsVN) { @@ -8931,7 +8907,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( if (varTypeIsIntegral(baseType)) { - return VNAllBitsForType(type); + unsigned elementCount = simdSize / genTypeSize(baseType); + return VNAllBitsForType(type, elementCount); } break; } @@ -9136,6 +9113,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary( { var_types type = tree->TypeGet(); var_types baseType = tree->GetSimdBaseType(); + unsigned simdSize = tree->GetSimdSize(); NamedIntrinsic ni = tree->GetHWIntrinsicId(); switch (ni) @@ -9160,7 +9138,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary( } // Handle `AllBitsSet ? x : y` - ValueNum allBitsVN = VNAllBitsForType(type); + ValueNum allBitsVN = VNAllBitsForType(type, simdSize); if (arg0VN == allBitsVN) { @@ -9251,12 +9229,12 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary( ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN, ValueNum arg0VN) { assert(arg0VN == VNNormalValue(arg0VN)); - assert(m_pComp->IsMathIntrinsic(gtMathFN)); + assert(m_pComp->IsMathIntrinsic(gtMathFN) RISCV64_ONLY(|| m_pComp->IsBitCountingIntrinsic(gtMathFN))); // If the math intrinsic is not implemented by target-specific instructions, such as implemented // by user calls, then don't do constant folding on it during ReadyToRun. This minimizes precision loss. - if (IsVNConstant(arg0VN) && (!m_pComp->opts.IsReadyToRun() || m_pComp->IsTargetIntrinsic(gtMathFN))) + if (IsVNConstant(arg0VN) && (!m_pComp->IsAot() || m_pComp->IsTargetIntrinsic(gtMathFN))) { assert(varTypeIsFloating(TypeOfVN(arg0VN))); @@ -9503,10 +9481,8 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN unreached(); } } - else + else if (gtMathFN == NI_System_Math_Round) { - assert(gtMathFN == NI_System_Math_Round); - switch (TypeOfVN(arg0VN)) { case TYP_DOUBLE: @@ -9527,6 +9503,58 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN unreached(); } } + else if (gtMathFN == NI_PRIMITIVE_LeadingZeroCount) + { + switch (TypeOfVN(arg0VN)) + { + case TYP_LONG: + res = BitOperations::LeadingZeroCount((uint64_t)GetConstantInt64(arg0VN)); + break; + + case TYP_INT: + res = BitOperations::LeadingZeroCount((uint32_t)GetConstantInt32(arg0VN)); + break; + + default: + unreached(); + } + } + else if (gtMathFN == NI_PRIMITIVE_TrailingZeroCount) + { + switch (TypeOfVN(arg0VN)) + { + case TYP_LONG: + res = BitOperations::TrailingZeroCount((uint64_t)GetConstantInt64(arg0VN)); + break; + + case TYP_INT: + res = BitOperations::TrailingZeroCount((uint32_t)GetConstantInt32(arg0VN)); + break; + + default: + unreached(); + } + } + else if (gtMathFN == NI_PRIMITIVE_PopCount) + { + switch (TypeOfVN(arg0VN)) + { + case TYP_LONG: + res = BitOperations::PopCount((uint64_t)GetConstantInt64(arg0VN)); + break; + + case TYP_INT: + res = BitOperations::PopCount((uint32_t)GetConstantInt32(arg0VN)); + break; + + default: + unreached(); + } + } + else + { + unreached(); + } return VNForIntCon(res); } @@ -9534,7 +9562,10 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN else { assert((typ == TYP_DOUBLE) || (typ == TYP_FLOAT) || - ((typ == TYP_INT) && ((gtMathFN == NI_System_Math_ILogB) || (gtMathFN == NI_System_Math_Round)))); + ((typ == TYP_INT) && ((gtMathFN == NI_System_Math_ILogB) || (gtMathFN == NI_System_Math_Round))) || + (((typ == TYP_INT) || (typ == TYP_LONG)) && + ((gtMathFN == NI_PRIMITIVE_LeadingZeroCount) || (gtMathFN == NI_PRIMITIVE_TrailingZeroCount) || + (gtMathFN == NI_PRIMITIVE_PopCount)))); VNFunc vnf = VNF_Boundary; switch (gtMathFN) @@ -9626,6 +9657,15 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN case NI_System_Math_Truncate: vnf = VNF_Truncate; break; + case NI_PRIMITIVE_LeadingZeroCount: + vnf = VNF_LeadingZeroCount; + break; + case NI_PRIMITIVE_TrailingZeroCount: + vnf = VNF_TrailingZeroCount; + break; + case NI_PRIMITIVE_PopCount: + vnf = VNF_PopCount; + break; default: unreached(); // the above are the only math intrinsics at the time of this writing. } @@ -9636,7 +9676,7 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathFN, ValueNum arg0VN, ValueNum arg1VN) { - assert(varTypeIsFloating(typ)); + assert(varTypeIsArithmetic(typ)); assert(arg0VN == VNNormalValue(arg0VN)); assert(arg1VN == VNNormalValue(arg1VN)); assert(m_pComp->IsMathIntrinsic(gtMathFN)); @@ -9644,8 +9684,7 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF // If the math intrinsic is not implemented by target-specific instructions, such as implemented // by user calls, then don't do constant folding on it during ReadyToRun. This minimizes precision loss. - if (IsVNConstant(arg0VN) && IsVNConstant(arg1VN) && - (!m_pComp->opts.IsReadyToRun() || m_pComp->IsTargetIntrinsic(gtMathFN))) + if (IsVNConstant(arg0VN) && IsVNConstant(arg1VN) && (!m_pComp->IsAot() || m_pComp->IsTargetIntrinsic(gtMathFN))) { if (typ == TYP_DOUBLE) { @@ -9743,10 +9782,9 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF return VNForDoubleCon(res); } - else + else if (typ == TYP_FLOAT) { // Both operand and its result must be of the same floating point type. - assert(typ == TYP_FLOAT); assert(typ == TypeOfVN(arg0VN)); float arg0Val = GetConstantSingle(arg0VN); @@ -9840,6 +9878,45 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF return VNForFloatCon(res); } + else + { +#ifdef TARGET_RISCV64 + // Both operands and its result must be of the same integer type. + assert(typ == TypeOfVN(arg0VN)); + assert(typ == TypeOfVN(arg1VN)); + // Note: GetConstantInt64 sign-extends 'uint' but for comparison purposes that's ok + INT64 arg0Val = GetConstantInt64(arg0VN); + INT64 arg1Val = GetConstantInt64(arg1VN); + INT64 result = 0; + + switch (gtMathFN) + { + case NI_System_Math_Min: + result = std::min(arg0Val, arg1Val); + break; + + case NI_System_Math_MinUnsigned: + result = std::min(arg0Val, arg1Val); + break; + + case NI_System_Math_Max: + result = std::max(arg0Val, arg1Val); + break; + + case NI_System_Math_MaxUnsigned: + result = std::max(arg0Val, arg1Val); + break; + + default: + // the above are the only binary math intrinsics at the time of this writing. + unreached(); + } + return (typ == TYP_LONG) || (typ == TYP_ULONG) ? VNForLongCon(result) + : VNForIntCon(static_cast(result)); +#else // !TARGET_RISCV64 + unreached(); +#endif // !TARGET_RISCV64 + } } else { @@ -9883,6 +9960,16 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF vnf = VNF_MinNumber; break; +#ifdef TARGET_RISCV64 + case NI_System_Math_MaxUnsigned: + vnf = VNF_Max_UN; + break; + + case NI_System_Math_MinUnsigned: + vnf = VNF_Min_UN; + break; +#endif // TARGET_RISCV64 + case NI_System_Math_Pow: vnf = VNF_Pow; break; @@ -9941,6 +10028,37 @@ bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp) return false; } +//---------------------------------------------------------------------------------- +// IsVNBinFunc: A specialized version of GetVNFunc that checks if the given ValueNum +// is the given VNFunc with arity 2. If so, it returns the two operands. +// +// Arguments: +// vn - The ValueNum to check. +// func - The VNFunc to check for. +// op1 - The first operand (if not null). +// op2 - The second operand (if not null). +// +// Return Value: +// true if the given vn is the given VNFunc with two operands. +// +bool ValueNumStore::IsVNBinFunc(ValueNum vn, VNFunc func, ValueNum* op1, ValueNum* op2) +{ + VNFuncApp funcApp; + if (GetVNFunc(vn, &funcApp) && (funcApp.m_func == func) && (funcApp.m_arity == 2)) + { + if (op1 != nullptr) + { + *op1 = funcApp.m_args[0]; + } + if (op2 != nullptr) + { + *op2 = funcApp.m_args[1]; + } + return true; + } + return false; +} + bool ValueNumStore::VNIsValid(ValueNum vn) { ChunkNum cn = GetChunkNum(vn); @@ -9974,7 +10092,7 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) ssize_t val = ConstantValue(vn); const GenTreeFlags handleFlags = GetHandleFlags(vn); printf("Hnd const: 0x%p %s", dspPtr(val), GenTree::gtGetHandleKindString(handleFlags)); - if (!comp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && !comp->opts.IsReadyToRun()) + if (!comp->IsAot()) { switch (handleFlags & GTF_ICON_HDL_MASK) { @@ -10488,8 +10606,8 @@ static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memo GT_NOP, // These control-flow operations need no values. - GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE, - GT_SWIFT_ERROR_RET}; + GT_JTRUE, GT_RETURN, GT_RETURN_SUSPEND, GT_SWITCH, GT_RETFILT, + GT_CKFINITE, GT_SWIFT_ERROR_RET}; void ValueNumStore::ValidateValueNumStoreStatics() { @@ -10838,11 +10956,7 @@ PhaseStatus Compiler::fgValueNumber() } assert(m_dfsTree != nullptr); - - if (m_loops == nullptr) - { - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - } + assert(m_loops != nullptr); m_blockToLoop = BlockToNaturalLoopMap::Build(m_loops); // Compute the side effects of loops. @@ -11741,7 +11855,6 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) } else { - assert(doesMethodHaveFrozenObjects()); tree->gtVNPair.SetBoth( vnStore->VNForHandle(ssize_t(tree->AsIntConCommon()->IconValue()), tree->GetIconHandleFlag())); @@ -11840,7 +11953,7 @@ void Compiler::fgValueNumberStore(GenTree* store) valueVNPair.SetBoth(initObjVN); } - else if (value->TypeGet() == TYP_REF) + else if (value->TypeIs(TYP_REF)) { // If we have an unsafe IL store of a TYP_REF to a non-ref (typically a TYP_BYREF) // then don't propagate this ValueNumber to the lhs, instead create a new unique VN. @@ -11959,13 +12072,13 @@ void Compiler::fgValueNumberSsaVarDef(GenTreeLclVarCommon* lcl) { if (genTypeSize(varDsc) != genTypeSize(lcl)) { - assert((varDsc->TypeGet() == TYP_LONG) && lcl->TypeIs(TYP_INT)); + assert(varDsc->TypeIs(TYP_LONG) && lcl->TypeIs(TYP_INT)); lcl->gtVNPair = vnStore->VNPairForCast(wholeLclVarVNP, lcl->TypeGet(), varDsc->TypeGet()); } else { - assert(((varDsc->TypeGet() == TYP_I_IMPL) && lcl->TypeIs(TYP_BYREF)) || - ((varDsc->TypeGet() == TYP_BYREF) && lcl->TypeIs(TYP_I_IMPL))); + assert((varDsc->TypeIs(TYP_I_IMPL) && lcl->TypeIs(TYP_BYREF)) || + (varDsc->TypeIs(TYP_BYREF) && lcl->TypeIs(TYP_I_IMPL))); lcl->gtVNPair = wholeLclVarVNP; } } @@ -12184,10 +12297,6 @@ bool Compiler::fgValueNumberConstLoad(GenTreeIndir* tree) if (info.compCompHnd->getStaticFieldContent(fieldHandle, buffer, size, (int)byteOffset)) { ValueNum vn = vnStore->VNForGenericCon(tree->TypeGet(), buffer); - if (vnStore->IsVNObjHandle(vn)) - { - setMethodHasFrozenObjects(); - } tree->gtVNPair.SetBoth(vn); return true; } @@ -12379,9 +12488,9 @@ void Compiler::fgValueNumberTree(GenTree* tree) break; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_SWIFT_ERROR: - // We know nothing about the value of a caught expression. - // We also know nothing about the error register's value post-Swift call. + // We know nothing about the value of these. tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); break; @@ -12394,6 +12503,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) // These do not represent values. case GT_NO_OP: case GT_NOP: + case GT_GCPOLL: case GT_JMP: // Control flow case GT_LABEL: // Control flow #if defined(FEATURE_EH_WINDOWS_X86) @@ -12575,11 +12685,11 @@ void Compiler::fgValueNumberTree(GenTree* tree) tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp); } } - else if (tree->OperGet() == GT_CAST) + else if (tree->OperIs(GT_CAST)) { fgValueNumberCastTree(tree); } - else if (tree->OperGet() == GT_INTRINSIC) + else if (tree->OperIs(GT_INTRINSIC)) { fgValueNumberIntrinsic(tree); } @@ -12750,6 +12860,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) case GT_SWITCH: case GT_RETURN: case GT_RETFILT: + case GT_RETURN_SUSPEND: case GT_NULLCHECK: if (tree->gtGetOp1() != nullptr) { @@ -12939,7 +13050,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) void Compiler::fgValueNumberIntrinsic(GenTree* tree) { - assert(tree->OperGet() == GT_INTRINSIC); + assert(tree->OperIs(GT_INTRINSIC)); GenTreeIntrinsic* intrinsic = tree->AsIntrinsic(); ValueNumPair arg0VNP, arg1VNP; ValueNumPair arg0VNPx = ValueNumStore::VNPForEmptyExcSet(); @@ -12952,7 +13063,7 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) vnStore->VNPUnpackExc(intrinsic->AsOp()->gtOp2->gtVNPair, &arg1VNP, &arg1VNPx); } - if (IsMathIntrinsic(intrinsic->gtIntrinsicName)) + if (IsMathIntrinsic(intrinsic->gtIntrinsicName) || IsBitCountingIntrinsic(intrinsic->gtIntrinsicName)) { // GT_INTRINSIC is a currently a subtype of binary operators. But most of // the math intrinsics are actually unary operations. @@ -12984,7 +13095,6 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) CORINFO_OBJECT_HANDLE typeObj = info.compCompHnd->getRuntimeTypePointer(cls); if (typeObj != nullptr) { - setMethodHasFrozenObjects(); ValueNum handleVN = vnStore->VNForHandle((ssize_t)typeObj, GTF_ICON_OBJ_HDL); intrinsic->gtVNPair = vnStore->VNPWithExc(ValueNumPair(handleVN, handleVN), arg0VNPx); return; @@ -13197,7 +13307,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) switch (intrinsicId) { #ifdef TARGET_XARCH - case NI_SSE2_MaskMove: + case NI_X86Base_MaskMove: case NI_AVX_MaskStore: case NI_AVX2_MaskStore: case NI_AVX_MaskLoad: @@ -13226,7 +13336,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) void Compiler::fgValueNumberCastTree(GenTree* tree) { - assert(tree->OperGet() == GT_CAST); + assert(tree->OperIs(GT_CAST)); ValueNumPair srcVNPair = tree->AsOp()->gtOp1->gtVNPair; var_types castToType = tree->CastToType(); @@ -13552,7 +13662,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN #ifdef DEBUG for (CallArg& arg : call->gtArgs.Args()) { - assert(!arg.AbiInfo.PassedByRef && + assert(!arg.AbiInfo.IsPassedByReference() && "Helpers taking implicit byref arguments should not be marked as pure"); } #endif @@ -13714,9 +13824,12 @@ bool Compiler::fgValueNumberSpecialIntrinsic(GenTreeCall* call) break; } - ValueNum clsVN = typeHandleFuncApp.m_args[0]; - ssize_t clsHandle; - if (!vnStore->EmbeddedHandleMapLookup(vnStore->ConstantValue(clsVN), &clsHandle)) + ValueNum clsVN = typeHandleFuncApp.m_args[0]; + ssize_t clsHandle = 0; + + // NOTE: EmbeddedHandleMapLookup may return 0 for non-0 embedded handle + if (!vnStore->EmbeddedHandleMapLookup(vnStore->ConstantValue(clsVN), &clsHandle) && + (clsHandle != 0)) { break; } @@ -13724,7 +13837,6 @@ bool Compiler::fgValueNumberSpecialIntrinsic(GenTreeCall* call) CORINFO_OBJECT_HANDLE obj = info.compCompHnd->getRuntimeTypePointer((CORINFO_CLASS_HANDLE)clsHandle); if (obj != nullptr) { - setMethodHasFrozenObjects(); call->gtVNPair.SetBoth(vnStore->VNForHandle((ssize_t)obj, GTF_ICON_OBJ_HDL)); return true; } @@ -13791,11 +13903,22 @@ void Compiler::fgValueNumberCastHelper(GenTreeCall* call) switch (helpFunc) { + case CORINFO_HELP_LNG2FLT: + castToType = TYP_FLOAT; + castFromType = TYP_LONG; + break; + case CORINFO_HELP_LNG2DBL: castToType = TYP_DOUBLE; castFromType = TYP_LONG; break; + case CORINFO_HELP_ULNG2FLT: + castToType = TYP_FLOAT; + castFromType = TYP_LONG; + srcIsUnsigned = true; + break; + case CORINFO_HELP_ULNG2DBL: castToType = TYP_DOUBLE; castFromType = TYP_LONG; @@ -13929,7 +14052,7 @@ VNFunc Compiler::fgValueNumberJitHelperMethodVNFunc(CorInfoHelpFunc helpFunc) break; case CORINFO_HELP_NEWARR_1_DIRECT: - case CORINFO_HELP_NEWARR_1_OBJ: + case CORINFO_HELP_NEWARR_1_PTR: case CORINFO_HELP_NEWARR_1_VC: case CORINFO_HELP_NEWARR_1_ALIGN8: vnf = VNF_JitNewArr; @@ -13945,11 +14068,11 @@ VNFunc Compiler::fgValueNumberJitHelperMethodVNFunc(CorInfoHelpFunc helpFunc) break; case CORINFO_HELP_NEWFAST_MAYBEFROZEN: - vnf = opts.IsReadyToRun() ? VNF_JitReadyToRunNew : VNF_JitNew; + vnf = IsAot() ? VNF_JitReadyToRunNew : VNF_JitNew; break; case CORINFO_HELP_NEWARR_1_MAYBEFROZEN: - vnf = opts.IsReadyToRun() ? VNF_JitReadyToRunNewArr : VNF_JitNewArr; + vnf = IsAot() ? VNF_JitReadyToRunNewArr : VNF_JitNewArr; break; case CORINFO_HELP_GET_GCSTATIC_BASE: @@ -14143,7 +14266,9 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) switch (helpFunc) { + case CORINFO_HELP_LNG2FLT: case CORINFO_HELP_LNG2DBL: + case CORINFO_HELP_ULNG2FLT: case CORINFO_HELP_ULNG2DBL: case CORINFO_HELP_DBL2INT: case CORINFO_HELP_DBL2INT_OVF: @@ -14167,7 +14292,6 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) CORINFO_OBJECT_HANDLE typeObj = info.compCompHnd->getRuntimeTypePointer(cls); if (typeObj != nullptr) { - setMethodHasFrozenObjects(); ValueNum typeObjVN = vnStore->VNForHandle((ssize_t)typeObj, GTF_ICON_OBJ_HDL); call->gtVNPair.SetBoth(vnStore->VNForBitCast(typeObjVN, TYP_STRUCT, genTypeSize(TYP_REF))); return false; @@ -14314,7 +14438,7 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call) ValueNumPair vnpNorm; - if (call->TypeGet() == TYP_VOID) + if (call->TypeIs(TYP_VOID)) { vnpNorm = ValueNumStore::VNPForVoid(); } @@ -14762,7 +14886,7 @@ void Compiler::fgValueNumberAddExceptionSetForBoundsCheck(GenTree* tree) void Compiler::fgValueNumberAddExceptionSetForCkFinite(GenTree* tree) { // We should only be dealing with an check finite operation. - assert(tree->OperGet() == GT_CKFINITE); + assert(tree->OperIs(GT_CKFINITE)); // Unpack, Norm,Exc for the tree's VN // @@ -15078,8 +15202,8 @@ void Compiler::vnPrint(ValueNum vn, unsigned level) // Methods of ValueNumPair. ValueNumPair::ValueNumPair() - : m_liberal(ValueNumStore::NoVN) - , m_conservative(ValueNumStore::NoVN) + : m_conservative(ValueNumStore::NoVN) + , m_liberal(ValueNumStore::NoVN) { } @@ -15130,9 +15254,12 @@ CORINFO_CLASS_HANDLE ValueNumStore::GetObjectType(ValueNum vn, bool* pIsExact, b const VNFunc func = funcApp.m_func; if ((func == VNF_CastClass) || (func == VNF_IsInstanceOf) || (func == VNF_JitNew)) { - ssize_t clsHandle; - ValueNum clsVN = funcApp.m_args[0]; - if (IsVNTypeHandle(clsVN) && EmbeddedHandleMapLookup(ConstantValue(clsVN), &clsHandle)) + ssize_t clsHandle = 0; + ValueNum clsVN = funcApp.m_args[0]; + + // NOTE: EmbeddedHandleMapLookup may return 0 for non-0 embedded handle + if (IsVNTypeHandle(clsVN) && EmbeddedHandleMapLookup(ConstantValue(clsVN), &clsHandle) && + (clsHandle != 0)) { // JitNew returns an exact and non-null obj, castclass and isinst do not have this guarantee. *pIsNonNull = func == VNF_JitNew; @@ -15190,3 +15317,42 @@ void ValueNumStore::PeelOffsets(ValueNum* vn, target_ssize_t* offset) } } } + +//-------------------------------------------------------------------------------- +// PeelOffsetsI32: Peel all additions with a TYP_INT constant offset away from the +// specified VN. +// +// Arguments: +// vn - [in, out] The VN. Will be modified to the base VN that the offsets are added to. +// offset - [out] The offsets peeled out of the VNF_ADD funcs. +// +void ValueNumStore::PeelOffsetsI32(ValueNum* vn, int* offset) +{ + *offset = 0; + VNFuncApp app; + while (GetVNFunc(*vn, &app) && (app.m_func == VNF_ADD)) + { + ValueNum op1 = app.m_args[0]; + ValueNum op2 = app.m_args[1]; + + if ((TypeOfVN(op1) != TYP_INT) || (TypeOfVN(op2) != TYP_INT)) + { + break; + } + + if (IsVNInt32Constant(op1) && !IsVNHandle(op1)) + { + *offset += ConstantValue(op1); + *vn = op2; + } + else if (IsVNInt32Constant(op2) && !IsVNHandle(op2)) + { + *offset += ConstantValue(op2); + *vn = op1; + } + else + { + break; + } + } +} diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index f2ae7be7a4ca..9c96241809b3 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -529,6 +529,7 @@ class ValueNumStore CORINFO_CLASS_HANDLE GetObjectType(ValueNum vn, bool* pIsExact, bool* pIsNonNull); void PeelOffsets(ValueNum* vn, target_ssize_t* offset); + void PeelOffsetsI32(ValueNum* vn, int* offset); typedef JitHashTable, bool> ValueNumSet; @@ -689,7 +690,8 @@ class ValueNumStore // Returns the value number for AllBitsSet of the given "typ". // It has an unreached() for a "typ" that has no all bits set value, such as TYP_VOID. - ValueNum VNAllBitsForType(var_types typ); + // elementCount is used for TYP_MASK and indicates how many bits should be set + ValueNum VNAllBitsForType(var_types typ, unsigned elementCount); #ifdef FEATURE_SIMD // Returns the value number broadcast of the given "simdType" and "simdBaseType". @@ -1054,6 +1056,9 @@ class ValueNumStore // Returns true if the VN represents a node that is never negative. bool IsVNNeverNegative(ValueNum vn); + // Returns true if the VN represents BitOperations.Log2 pattern + bool IsVNLog2(ValueNum vn, int* upperBound = nullptr); + typedef SmallHashTable CheckedBoundVNSet; // Returns true if the VN is known or likely to appear as the conservative value number @@ -1210,10 +1215,6 @@ class ValueNumStore // Returns true iff the VN represents a relop bool IsVNRelop(ValueNum vn); - // Returns true if the two VNs represent the same value - // despite being different VNs. Useful for phi def VNs. - bool AreVNsEquivalent(ValueNum vn1, ValueNum vn2); - enum class VN_RELATION_KIND { VRK_Inferred, // (x ? y) @@ -1411,6 +1412,38 @@ class ValueNumStore // the function application it represents; otherwise, return "false." bool GetVNFunc(ValueNum vn, VNFuncApp* funcApp); + // Returns "true" iff "vn" is a function application of the form "func(op1, op2)". + bool IsVNBinFunc(ValueNum vn, VNFunc func, ValueNum* op1 = nullptr, ValueNum* op2 = nullptr); + + // Returns "true" iff "vn" is a function application of the form "func(op, cns)" + // the cns can be on the left side if the function is commutative. + template + bool IsVNBinFuncWithConst(ValueNum vn, VNFunc func, ValueNum* op, T* cns) + { + T opCns; + ValueNum op1, op2; + if (IsVNBinFunc(vn, func, &op1, &op2)) + { + if (IsVNIntegralConstant(op2, &opCns)) + { + if (op != nullptr) + *op = op1; + if (cns != nullptr) + *cns = opCns; + return true; + } + else if (VNFuncIsCommutative(func) && IsVNIntegralConstant(op1, &opCns)) + { + if (op != nullptr) + *op = op2; + if (cns != nullptr) + *cns = opCns; + return true; + } + } + return false; + } + // Returns "true" iff "vn" is a valid value number -- one that has been previously returned. bool VNIsValid(ValueNum vn); diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index 802fbc69defb..6d2305c315a8 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -90,14 +90,14 @@ ValueNumFuncDef(ILogB, 1, false, false, false) ValueNumFuncDef(Log, 1, false, false, false) ValueNumFuncDef(Log2, 1, false, false, false) ValueNumFuncDef(Log10, 1, false, false, false) -ValueNumFuncDef(Max, 2, false, false, false) -ValueNumFuncDef(MaxMagnitude, 2, false, false, false) -ValueNumFuncDef(MaxMagnitudeNumber, 2, false, false, false) -ValueNumFuncDef(MaxNumber, 2, false, false, false) -ValueNumFuncDef(Min, 2, false, false, false) -ValueNumFuncDef(MinMagnitude, 2, false, false, false) -ValueNumFuncDef(MinMagnitudeNumber, 2, false, false, false) -ValueNumFuncDef(MinNumber, 2, false, false, false) +ValueNumFuncDef(Max, 2, true, false, false) +ValueNumFuncDef(MaxMagnitude, 2, true, false, false) +ValueNumFuncDef(MaxMagnitudeNumber, 2, true, false, false) +ValueNumFuncDef(MaxNumber, 2, true, false, false) +ValueNumFuncDef(Min, 2, true, false, false) +ValueNumFuncDef(MinMagnitude, 2, true, false, false) +ValueNumFuncDef(MinMagnitudeNumber, 2, true, false, false) +ValueNumFuncDef(MinNumber, 2, true, false, false) ValueNumFuncDef(Pow, 2, false, false, false) ValueNumFuncDef(RoundDouble, 1, false, false, false) ValueNumFuncDef(RoundInt32, 1, false, false, false) @@ -109,6 +109,10 @@ ValueNumFuncDef(Tan, 1, false, false, false) ValueNumFuncDef(Tanh, 1, false, false, false) ValueNumFuncDef(Truncate, 1, false, false, false) +ValueNumFuncDef(LeadingZeroCount, 1, false, false, false) +ValueNumFuncDef(TrailingZeroCount, 1, false, false, false) +ValueNumFuncDef(PopCount, 1, false, false, false) + ValueNumFuncDef(ManagedThreadId, 0, false, false, false) ValueNumFuncDef(ObjGetType, 1, false, true, false) @@ -204,8 +208,8 @@ ValueNumFuncDef(HWI_##isa##_##name, ((argCount == -1) ? -1 : (argCount + 1)), (( //TODO-LOONGARCH64-CQ: add LoongArch64's Hardware Intrinsics Instructions if supported. #elif defined (TARGET_RISCV64) - //TODO-RISCV64-CQ: add RISCV64's Hardware Intrinsics Instructions if supported. - + ValueNumFuncDef(Min_UN, 2, true, false, false) // unsigned min/max intrinsics + ValueNumFuncDef(Max_UN, 2, true, false, false) #else #error Unsupported platform #endif diff --git a/src/coreclr/jit/valuenumtype.h b/src/coreclr/jit/valuenumtype.h index e41db9726754..b34b90e44cf3 100644 --- a/src/coreclr/jit/valuenumtype.h +++ b/src/coreclr/jit/valuenumtype.h @@ -34,8 +34,8 @@ enum ValueNumKind struct ValueNumPair { private: - ValueNum m_liberal; ValueNum m_conservative; + ValueNum m_liberal; public: ValueNum GetLiberal() const @@ -116,8 +116,8 @@ struct ValueNumPair ValueNumPair(); ValueNumPair(ValueNum lib, ValueNum cons) - : m_liberal(lib) - , m_conservative(cons) + : m_conservative(cons) + , m_liberal(lib) { } diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index c0cfa87775da..e214d1f8a346 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -41,13 +41,6 @@ enum var_types_register #else #define TYP_I_IMPL TYP_INT #define TYP_U_IMPL TYP_UINT -#ifdef _PREFAST_ -// We silence this in the 32-bit build because for portability, we like to have asserts like this: -// assert(op2->gtType == TYP_INT || op2->gtType == TYP_I_IMPL); -// This is obviously redundant for 32-bit builds, but we don't want to have ifdefs and different -// asserts just for 64-bit builds, so for now just silence the assert -#pragma warning(disable : 6287) // warning 6287: the left and right sub-expressions are identical -#endif //_PREFAST_ #endif /*****************************************************************************/ diff --git a/src/coreclr/md/CMakeLists.txt b/src/coreclr/md/CMakeLists.txt index c820cb19674d..ef60850b5a10 100644 --- a/src/coreclr/md/CMakeLists.txt +++ b/src/coreclr/md/CMakeLists.txt @@ -17,4 +17,3 @@ add_subdirectory(runtime) add_subdirectory(enc) add_subdirectory(ceefilegen) add_subdirectory(datasource) -add_subdirectory(staticmd) diff --git a/src/coreclr/md/ceefilegen/pesectionman.cpp b/src/coreclr/md/ceefilegen/pesectionman.cpp index 2e3df18e85b4..10d9b2b74016 100644 --- a/src/coreclr/md/ceefilegen/pesectionman.cpp +++ b/src/coreclr/md/ceefilegen/pesectionman.cpp @@ -282,7 +282,7 @@ HRESULT PESection::applyRelocs(CeeGenTokenMapper *pTokenMapper) unsigned * pos = (unsigned*) m_blobFetcher.ComputePointer(pCurReloc->offset); mdToken newToken; - PREFIX_ASSUME(pos != NULL); + _ASSERTE(pos != NULL); if (pTokenMapper->HasTokenMoved(*pos, newToken)) { // we have a mapped token *pos = newToken; diff --git a/src/coreclr/md/compiler/CMakeLists.txt b/src/coreclr/md/compiler/CMakeLists.txt index 2adf73ebc46a..56a08ec4ac5b 100644 --- a/src/coreclr/md/compiler/CMakeLists.txt +++ b/src/coreclr/md/compiler/CMakeLists.txt @@ -1,7 +1,6 @@ set(MDCOMPILER_SOURCES assemblymd.cpp assemblymd_emit.cpp - classfactory.cpp custattr_import.cpp custattr_emit.cpp disp.cpp @@ -32,7 +31,6 @@ set(MDCOMPILER_HEADERS ../inc/metamodelrw.h ../inc/rwutil.h ../inc/stgio.h - classfactory.h custattr.h disp.h filtermanager.h diff --git a/src/coreclr/md/compiler/assemblymd.cpp b/src/coreclr/md/compiler/assemblymd.cpp index f81e835bf926..90e08944f37b 100644 --- a/src/coreclr/md/compiler/assemblymd.cpp +++ b/src/coreclr/md/compiler/assemblymd.cpp @@ -15,10 +15,6 @@ #include "mdlog.h" #include "importhelper.h" -#ifdef _MSC_VER -#pragma warning(disable: 4102) -#endif - //******************************************************************************* // Get the properties for the given Assembly token. //******************************************************************************* @@ -198,7 +194,7 @@ STDMETHODIMP RegMeta::GetExportedTypeProps( // S_OK or error. LPCSTR szTypeName; IfFailGo(pMiniMd->getTypeNamespaceOfExportedType(pRecord, &szTypeNamespace)); - PREFIX_ASSUME(szTypeNamespace != NULL); + _ASSERTE(szTypeNamespace != NULL); MAKE_WIDEPTR_FROMUTF8_NOTHROW(wzTypeNamespace, szTypeNamespace); IfNullGo(wzTypeNamespace); @@ -572,7 +568,7 @@ STDMETHODIMP RegMeta::FindManifestResourceByName( // S_OK or error } // RegMeta::FindManifestResourceByName //******************************************************************************* -// Used to find assemblies either in Fusion cache or on disk at build time. +// Used to find assemblies either on disk at build time. //******************************************************************************* STDMETHODIMP RegMeta::FindAssembliesByName( // S_OK or error LPCWSTR szAppBase, // [IN] optional - can be NULL @@ -582,10 +578,5 @@ STDMETHODIMP RegMeta::FindAssembliesByName( // S_OK or error ULONG cMax, // [IN] The max number to put ULONG *pcAssemblies) // [OUT] The number of assemblies returned. { -#ifdef FEATURE_METADATA_IN_VM - return COR_E_NOTSUPPORTED; -#else //!FEATURE_METADATA_IN_VM - // Calls to fusion are not supported outside VM return E_NOTIMPL; -#endif //!FEATURE_METADATA_IN_VM } // RegMeta::FindAssembliesByName diff --git a/src/coreclr/md/compiler/assemblymd_emit.cpp b/src/coreclr/md/compiler/assemblymd_emit.cpp index 476e11366b56..f78d33cda815 100644 --- a/src/coreclr/md/compiler/assemblymd_emit.cpp +++ b/src/coreclr/md/compiler/assemblymd_emit.cpp @@ -15,10 +15,6 @@ #include "mdlog.h" #include "importhelper.h" -#ifdef _MSC_VER -#pragma warning(disable: 4102) -#endif - #ifdef FEATURE_METADATA_EMIT //******************************************************************************* diff --git a/src/coreclr/md/compiler/classfactory.cpp b/src/coreclr/md/compiler/classfactory.cpp deleted file mode 100644 index f47029cb5fd7..000000000000 --- a/src/coreclr/md/compiler/classfactory.cpp +++ /dev/null @@ -1,147 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -//***************************************************************************** -// ClassFactory.cpp -// - -// -// Dll* routines for entry points, and support for COM framework. The class -// factory and other routines live in this module. -// -// This file is not included in the standalone metadata version, because standalone can't use COM, -// let alone COM-activation. So this file gets linked into mscorwks.dll, and then the mscorwks -// class factory delegates to this co-creation routine. -// -//***************************************************************************** -#include "stdafx.h" - -#ifdef FEATURE_METADATA_IN_VM - -#include "classfactory.h" -#include "disp.h" -#include "regmeta.h" -#include "mscoree.h" -#include "corhost.h" - -// This map contains the list of coclasses which are exported from this module. -// NOTE: CLSID_CorMetaDataDispenser must be the first entry in this table! -const COCLASS_REGISTER g_CoClasses[] = -{ -// pClsid szProgID pfnCreateObject - { &CLSID_CorMetaDataDispenser, W("CorMetaDataDispenser"), Disp::CreateObject }, - { NULL, NULL, NULL } -}; - - -//***************************************************************************** -// Called by COM to get a class factory for a given CLSID. If it is one we -// support, instantiate a class factory object and prepare for create instance. -// -// Notes: -// This gets invoked from mscorwks's DllGetClassObject. -//***************************************************************************** -STDAPI MetaDataDllGetClassObject( // Return code. - REFCLSID rclsid, // The class to desired. - REFIID riid, // Interface wanted on class factory. - LPVOID FAR *ppv) // Return interface pointer here. -{ - MDClassFactory *pClassFactory; // To create class factory object. - const COCLASS_REGISTER *pCoClass; // Loop control. - HRESULT hr = CLASS_E_CLASSNOTAVAILABLE; - - // Scan for the right one. - for (pCoClass=g_CoClasses; pCoClass->pClsid; pCoClass++) - { - if (*pCoClass->pClsid == rclsid) - { - // Allocate the new factory object. - pClassFactory = new (nothrow) MDClassFactory(pCoClass); - if (!pClassFactory) - return (E_OUTOFMEMORY); - - // Pick the v-table based on the caller's request. - hr = pClassFactory->QueryInterface(riid, ppv); - - // Always release the local reference, if QI failed it will be - // the only one and the object gets freed. - pClassFactory->Release(); - break; - } - } - return hr; -} - - -//***************************************************************************** -// -//********** Class factory code. -// -//***************************************************************************** - - -//***************************************************************************** -// QueryInterface is called to pick a v-table on the co-class. -//***************************************************************************** -HRESULT STDMETHODCALLTYPE MDClassFactory::QueryInterface( - REFIID riid, - void **ppvObject) -{ - HRESULT hr; - - // Avoid confusion. - *ppvObject = NULL; - - // Pick the right v-table based on the IID passed in. - if (riid == IID_IUnknown) - *ppvObject = (IUnknown *) this; - else if (riid == IID_IClassFactory) - *ppvObject = (IClassFactory *) this; - - // If successful, add a reference for out pointer and return. - if (*ppvObject) - { - hr = S_OK; - AddRef(); - } - else - hr = E_NOINTERFACE; - return hr; -} - - -//***************************************************************************** -// CreateInstance is called to create a new instance of the coclass for which -// this class was created in the first place. The returned pointer is the -// v-table matching the IID if there. -//***************************************************************************** -HRESULT STDMETHODCALLTYPE MDClassFactory::CreateInstance( - IUnknown *pUnkOuter, - REFIID riid, - void **ppvObject) -{ - HRESULT hr; - - // Avoid confusion. - *ppvObject = NULL; - _ASSERTE(m_pCoClass); - - // Aggregation is not supported by these objects. - if (pUnkOuter) - IfFailGo(CLASS_E_NOAGGREGATION); - - // Ask the object to create an instance of itself, and check the iid. - hr = (*m_pCoClass->pfnCreateObject)(riid, ppvObject); - -ErrExit: - return hr; -} - -HRESULT STDMETHODCALLTYPE -MDClassFactory::LockServer( - BOOL fLock) -{ - // @FUTURE: Should we return E_NOTIMPL instead of S_OK? - return S_OK; -} - -#endif //FEATURE_METADATA_IN_VM diff --git a/src/coreclr/md/compiler/classfactory.h b/src/coreclr/md/compiler/classfactory.h deleted file mode 100644 index 86417708fcf9..000000000000 --- a/src/coreclr/md/compiler/classfactory.h +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -//***************************************************************************** -// ClassFactory.h -// - -// -// Class factories are used by the pluming in COM to activate new objects. -// This module contains the class factory code to instantiate the debugger -// objects described in . -// -//***************************************************************************** -#ifndef __ClassFactory__h__ -#define __ClassFactory__h__ - -#include "disp.h" - - -// This typedef is for a function which will create a new instance of an object. -typedef HRESULT (* PFN_CREATE_OBJ)(REFIID riid, void **ppvObject); - -//***************************************************************************** -// This structure is used to declare a global list of coclasses. The class -// factory object is created with a pointer to the correct one of these, so -// that when create instance is called, it can be created. -//***************************************************************************** -struct COCLASS_REGISTER -{ - const GUID *pClsid; // Class ID of the coclass. - LPCWSTR szProgID; // Prog ID of the class. - PFN_CREATE_OBJ pfnCreateObject; // Creation function for an instance. -}; - - - -//***************************************************************************** -// One class factory object satifies all of our clsid's, to reduce overall -// code bloat. -//***************************************************************************** -class MDClassFactory : - public IClassFactory -{ - MDClassFactory() { } // Can't use without data. - -public: - MDClassFactory(const COCLASS_REGISTER *pCoClass) - : m_cRef(1), m_pCoClass(pCoClass) - { } - - virtual ~MDClassFactory() {} - - // - // IUnknown methods. - // - - virtual HRESULT STDMETHODCALLTYPE QueryInterface( - REFIID riid, - void **ppvObject); - - virtual ULONG STDMETHODCALLTYPE AddRef() - { - return InterlockedIncrement(&m_cRef); - } - - virtual ULONG STDMETHODCALLTYPE Release() - { - LONG cRef = InterlockedDecrement(&m_cRef); - if (cRef <= 0) - delete this; - return (cRef); - } - - - // - // IClassFactory methods. - // - - virtual HRESULT STDMETHODCALLTYPE CreateInstance( - IUnknown *pUnkOuter, - REFIID riid, - void **ppvObject); - - virtual HRESULT STDMETHODCALLTYPE LockServer( - BOOL fLock); - - -private: - LONG m_cRef; // Reference count. - const COCLASS_REGISTER *m_pCoClass; // The class we belong to. -}; - - - -#endif // __ClassFactory__h__ diff --git a/src/coreclr/md/compiler/custattr_emit.cpp b/src/coreclr/md/compiler/custattr_emit.cpp index ba6b47c62284..491a85ef7f57 100644 --- a/src/coreclr/md/compiler/custattr_emit.cpp +++ b/src/coreclr/md/compiler/custattr_emit.cpp @@ -1001,10 +1001,6 @@ HRESULT RegMeta::_IsKnownCustomAttribute( // S_OK, S_FALSE, or error. //***************************************************************************** //***************************************************************************** -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif HRESULT RegMeta::_HandleKnownCustomAttribute( // S_OK or error. mdToken tkObj, // [IN] Object being attributed. const void *pData, // [IN] Custom Attribute data blob. @@ -1025,7 +1021,7 @@ HRESULT RegMeta::_HandleKnownCustomAttribute( // S_OK or error. CQuickArray qNativeType;// Native type string. _ASSERTE(ixCa > 0 && ixCa < CA_COUNT); - *bKeep = props->bKeepCa || m_bKeepKnownCa; + *bKeep = props->bKeepCa; // Validate that target is valid for attribute. tkObjType = TypeFromToken(tkObj); @@ -1448,16 +1444,9 @@ HRESULT RegMeta::_HandleKnownCustomAttribute( // S_OK or error. ErrExit: return hr; } // RegMeta::_HandleKnownCustomAttribute -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //***************************************************************************** //***************************************************************************** -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif HRESULT RegMeta::_HandleNativeTypeCustomAttribute(// S_OK or error. mdToken tkObj, // The token this CA is applied on. CaArg *pArgs, // Pointer to args. @@ -1983,9 +1972,6 @@ HRESULT RegMeta::_HandleNativeTypeCustomAttribute(// S_OK or error. CloseEnum(phEnum); return hr; } // RegMeta::_HandleNativeTypeCustomAttribute -#ifdef _PREFAST_ -#pragma warning(pop) -#endif #endif // !FEATURE_METADATA_EMIT_IN_DEBUGGER diff --git a/src/coreclr/md/compiler/disp.cpp b/src/coreclr/md/compiler/disp.cpp index 5d0393536252..c2a72bfac2fa 100644 --- a/src/coreclr/md/compiler/disp.cpp +++ b/src/coreclr/md/compiler/disp.cpp @@ -182,33 +182,10 @@ Disp::OpenRawScope( _ASSERTE(!IsOfReserved(dwOpenFlags)); #endif //!FEATURE_METADATA_LOAD_TRUSTED_IMAGES - { - } - if (IsOfReadOnly(dwOpenFlags) && IsOfReadWrite(dwOpenFlags)) { // Invalid combination of flags - ofReadOnly & ofWrite IfFailGo(E_INVALIDARG); } - // If open-for-read, and there is already an open-for-read copy, return it. - if (IsOfReadOnly(dwOpenFlags)) - { - RegMeta::FindCachedReadOnlyEntry(szFileName, dwOpenFlags, &pMeta); - if (pMeta != NULL) - { - // Return the requested interface. - hr = pMeta->QueryInterface(riid, (void **) ppIUnk); - if (FAILED(hr)) - { - pMeta = NULL; // Don't delete cached RegMeta! - } - else - { - pMeta->Release(); // Give back refcount from QI - } - - goto ErrExit; - } - } // Create a new coclass for this guy. pMeta = new (nothrow) RegMeta(); IfNullGo(pMeta); @@ -228,13 +205,7 @@ Disp::OpenRawScope( // Obtain the requested interface. IfFailGo(pMeta->QueryInterface(riid, (void **)ppIUnk) ); - // Add the new RegMeta to the cache. If this is read-only, any future opens will - // find this entry. If, due to another thread concurrently opening the same file, - // there is already another copy in the cache, well, then there will be two - // read-only copies in the cache. This is considered to be somewhat of a corner - // case, and the only harm is temporary memory usage. All requests will be - // satisfied by one or the other (depending on search algorithm), and eventually, - // the "other" copy will be released. + // Add the new RegMeta to the cache. IfFailGo(pMeta->AddToCache()); #if defined(_DEBUG) @@ -308,7 +279,7 @@ HRESULT Disp::OpenRawScopeOnMemory( // Return code. IfFailGo(pMeta->SetOption(&m_OptionValue)); - PREFIX_ASSUME(pMeta != NULL); + _ASSERTE(pMeta != NULL); // Always initialize the RegMeta's stgdb. IfFailGo(pMeta->OpenExistingMD(0 /* szFileName */, const_cast(pData), cbData, dwOpenFlags)); @@ -518,7 +489,7 @@ HRESULT Disp::OpenRawScopeOnCustomDataSource( // Return code. IfFailGo(pMeta->SetOption(&m_OptionValue)); - PREFIX_ASSUME(pMeta != NULL); + _ASSERTE(pMeta != NULL); // Always initialize the RegMeta's stgdb. // TODO IfFailGo(pMeta->OpenExistingMD(pDataSource, dwOpenFlags)); @@ -847,27 +818,13 @@ HRESULT Disp::GetOption( // Return code. return hr; } // Disp::GetOption -#if defined(FEATURE_METADATA_IN_VM) - -//--------------------------------------------------------------------------------------- -// -// Process detach destruction. -// Called from DllMain of clr.dll/RoMetadata.dll/MidlrtMd.dll. -// -void DeleteMetaData() -{ - LOADEDMODULES::DeleteStatics(); -} - -#endif //FEATURE_METADATA_IN_VM - // // This is the entrypoint for usages of MetaData that need to start with the dispenser (e.g. // mscordbi.dll and profiling API). // // Notes: // This could be merged with the class factory support. -HRESULT InternalCreateMetaDataDispenser(REFIID riid, void ** pMetaDataDispenserOut) +HRESULT CreateMetaDataDispenser(REFIID riid, void ** pMetaDataDispenserOut) { _ASSERTE(pMetaDataDispenserOut != NULL); return Disp::CreateObject(riid, pMetaDataDispenserOut); diff --git a/src/coreclr/md/compiler/emit.cpp b/src/coreclr/md/compiler/emit.cpp index 2c3b5ed56735..4477d55c215e 100644 --- a/src/coreclr/md/compiler/emit.cpp +++ b/src/coreclr/md/compiler/emit.cpp @@ -17,10 +17,6 @@ #ifdef FEATURE_METADATA_EMIT -#ifdef _MSC_VER -#pragma warning(disable: 4102) -#endif - //***************************************************************************** // Create and set a new MethodDef record. //***************************************************************************** @@ -2933,6 +2929,22 @@ HRESULT RegMeta::SetParamProps( // Return code. #endif //!FEATURE_METADATA_EMIT_IN_DEBUGGER } // RegMeta::SetParamProps +//***************************************************************************** +// Persist a set of security custom attributes into a set of permission set +// blobs on the same class or method. +// +// Notes: +// Only in the full version because this is an emit operation. +//***************************************************************************** +HRESULT RegMeta::DefineSecurityAttributeSet(// Return code. + mdToken tkObj, // [IN] Class or method requiring security attributes. + COR_SECATTR rSecAttrs[], // [IN] Array of security attribute descriptions. + ULONG cSecAttrs, // [IN] Count of elements in above array. + ULONG *pulErrorAttr) // [OUT] On error, index of attribute causing problem. +{ + return E_NOTIMPL; +} // RegMeta::DefineSecurityAttributeSet + //***************************************************************************** // Apply edit and continue changes to this metadata. //***************************************************************************** diff --git a/src/coreclr/md/compiler/filtermanager.cpp b/src/coreclr/md/compiler/filtermanager.cpp index 2e3f3003353d..a641f348bec1 100644 --- a/src/coreclr/md/compiler/filtermanager.cpp +++ b/src/coreclr/md/compiler/filtermanager.cpp @@ -28,7 +28,7 @@ HRESULT FilterManager::Mark(mdToken tk) goto ErrExit; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); switch ( TypeFromToken(tk) ) { @@ -171,7 +171,7 @@ HRESULT FilterManager::MarkCustomAttribute(mdCustomAttribute cv) CustomAttributeRec *pRec; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); IfFailGo( m_pMiniMd->GetFilterTable()->MarkCustomAttribute( cv ) ); @@ -192,7 +192,7 @@ HRESULT FilterManager::MarkDeclSecurity(mdPermission pe) HRESULT hr = NOERROR; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); IfFailGo( m_pMiniMd->GetFilterTable()->MarkDeclSecurity( pe ) ); ErrExit: @@ -214,7 +214,7 @@ HRESULT FilterManager::MarkStandAloneSig(mdSignature sig) IHostFilter *pFilter = m_pMiniMd->GetHostFilter(); // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if TypeRef is already marked, just return if (m_pMiniMd->GetFilterTable()->IsSignatureMarked(sig)) @@ -253,7 +253,7 @@ HRESULT FilterManager::MarkTypeSpec(mdTypeSpec ts) IHostFilter *pFilter = m_pMiniMd->GetHostFilter(); // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if TypeRef is already marked, just return if (m_pMiniMd->GetFilterTable()->IsTypeSpecMarked(ts)) @@ -294,7 +294,7 @@ HRESULT FilterManager::MarkTypeRef(mdTypeRef tr) mdToken parentTk; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if TypeRef is already marked, just return if (m_pMiniMd->GetFilterTable()->IsTypeRefMarked(tr)) @@ -313,7 +313,7 @@ HRESULT FilterManager::MarkTypeRef(mdTypeRef tr) } tkMap = m_pMiniMd->GetTypeRefToTypeDefMap(); - PREFIX_ASSUME(tkMap != NULL); + _ASSERTE(tkMap != NULL); td = *(tkMap->Get(RidFromToken(tr))); if ( td != mdTokenNil ) { @@ -346,7 +346,7 @@ HRESULT FilterManager::MarkMemberRef(mdMemberRef mr) mdToken tkParent; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if MemberRef is already marked, just return if (m_pMiniMd->GetFilterTable()->IsMemberRefMarked(mr)) @@ -375,7 +375,7 @@ HRESULT FilterManager::MarkMemberRef(mdMemberRef mr) IfFailGo( MarkSignature(pbSig, cbSize, &cbUsed) ); tkMap = m_pMiniMd->GetMemberRefToMemberDefMap(); - PREFIX_ASSUME(tkMap != NULL); + _ASSERTE(tkMap != NULL); md = *(tkMap->Get(RidFromToken(mr))); // can be fielddef or methoddef if ( RidFromToken(md) != mdTokenNil ) { @@ -402,7 +402,7 @@ HRESULT FilterManager::MarkUserString(mdString str) HRESULT hr = NOERROR; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if UserString is already marked, just return if (m_pMiniMd->GetFilterTable()->IsUserStringMarked(str)) @@ -423,7 +423,7 @@ HRESULT FilterManager::MarkNewUserString(mdString str) HRESULT hr = NOERROR; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); IfFailGo( m_pMiniMd->GetFilterTable()->MarkNewUserString( str ) ); @@ -444,7 +444,7 @@ HRESULT FilterManager::MarkMethodSpec(mdMethodSpec ms) PCCOR_SIGNATURE pbSig; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if MethodSpec is already marked, just return if (m_pMiniMd->GetFilterTable()->IsMethodSpecMarked(ms)) @@ -474,7 +474,7 @@ HRESULT FilterManager::MarkModuleRef(mdModuleRef mr) HRESULT hr = NOERROR; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if ModuleRef is already marked, just return if (m_pMiniMd->GetFilterTable()->IsModuleRefMarked(mr)) @@ -496,7 +496,7 @@ HRESULT FilterManager::MarkAssemblyRef(mdAssemblyRef ar) HRESULT hr = NOERROR; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if ModuleREf is already marked, just return if (m_pMiniMd->GetFilterTable()->IsAssemblyRefMarked(ar)) @@ -563,7 +563,7 @@ HRESULT FilterManager::MarkDeclSecuritiesWithParentToken(mdToken tkParent) DeclSecurityRec *pRec; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); if ( m_pMiniMd->IsSorted( TBL_DeclSecurity ) ) { @@ -633,7 +633,7 @@ HRESULT FilterManager::MarkParam(mdParamDef pd) HRESULT hr; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); IfFailGo( m_pMiniMd->GetFilterTable()->MarkParam( pd ) ); @@ -663,7 +663,7 @@ HRESULT FilterManager::MarkMethod(mdMethodDef md) IHostFilter *pFilter = m_pMiniMd->GetHostFilter(); // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if MethodDef is already marked, just return if (m_pMiniMd->GetFilterTable()->IsMethodMarked(md)) @@ -731,7 +731,7 @@ HRESULT FilterManager::MarkField(mdFieldDef fd) IHostFilter *pFilter = m_pMiniMd->GetHostFilter(); // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if FieldDef is already marked, just return if (m_pMiniMd->GetFilterTable()->IsFieldMarked(fd)) @@ -767,7 +767,7 @@ HRESULT FilterManager::MarkEvent(mdEvent ev) EventRec *pRec; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if Event is already marked, just return if (m_pMiniMd->GetFilterTable()->IsEventMarked(ev)) @@ -804,7 +804,7 @@ HRESULT FilterManager::MarkProperty(mdProperty pr) PCCOR_SIGNATURE pbSig; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if Property is already marked, just return if (m_pMiniMd->GetFilterTable()->IsPropertyMarked(pr)) @@ -897,7 +897,7 @@ HRESULT FilterManager::MarkMethodImplsWithParentToken(mdTypeDef td) HENUMInternal hEnum; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); HENUMInternal::ZeroEnum(&hEnum); IfFailGo( m_pMiniMd->FindMethodImplHelper(td, &hEnum) ); @@ -1068,7 +1068,7 @@ HRESULT FilterManager::MarkInterfaceImpls( InterfaceImplRec *pRec; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); if ( m_pMiniMd->IsSorted(TBL_InterfaceImpl) ) { @@ -1110,7 +1110,7 @@ HRESULT FilterManager::MarkTypeDef( RID iNester; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if TypeDef is already marked, just return if (m_pMiniMd->GetFilterTable()->IsTypeDefMarked(td)) @@ -1380,7 +1380,7 @@ HRESULT FilterManager::UnmarkTypeDef( CustomAttributeRec *pCARec; // We know that the filter table is not null here. Tell PREFIX that we know it. - PREFIX_ASSUME(m_pMiniMd->GetFilterTable() != NULL); + _ASSERTE(m_pMiniMd->GetFilterTable() != NULL); // if TypeDef is already unmarked, just return if (m_pMiniMd->GetFilterTable()->IsTypeDefMarked(td) == false) diff --git a/src/coreclr/md/compiler/import.cpp b/src/coreclr/md/compiler/import.cpp index 910f4d0b9feb..098223ebef40 100644 --- a/src/coreclr/md/compiler/import.cpp +++ b/src/coreclr/md/compiler/import.cpp @@ -931,7 +931,7 @@ STDMETHODIMP RegMeta::FindMethod( if (szName == NULL) IfFailGo(E_INVALIDARG); - PREFIX_ASSUME(szName != NULL); + _ASSERTE(szName != NULL); // If this is a global method, then use the typedef as parent. IsGlobalMethodParent(&td); diff --git a/src/coreclr/md/compiler/mdutil.cpp b/src/coreclr/md/compiler/mdutil.cpp index 05b56a25875b..03f4d1a42956 100644 --- a/src/coreclr/md/compiler/mdutil.cpp +++ b/src/coreclr/md/compiler/mdutil.cpp @@ -23,16 +23,6 @@ LOADEDMODULES * LOADEDMODULES::s_pLoadedModules = NULL; UTSemReadWrite * LOADEDMODULES::m_pSemReadWrite = NULL; -RegMeta * LOADEDMODULES::m_HashedModules[LOADEDMODULES_HASH_SIZE] = { NULL }; - -//***************************************************************************** -// Hash a file name. -//***************************************************************************** -ULONG LOADEDMODULES::HashFileName( - LPCWSTR szName) -{ - return HashString(szName) % LOADEDMODULES_HASH_SIZE; -} // LOADEDMODULES::HashFileName //--------------------------------------------------------------------------------------- // @@ -77,27 +67,6 @@ LOADEDMODULES::InitializeStatics() return hr; } // LOADEDMODULES::InitializeStatics -//--------------------------------------------------------------------------------------- -// -// Destroy the static instance and lock. -// -void -LOADEDMODULES::DeleteStatics() -{ - HRESULT hr = S_OK; - - if (s_pLoadedModules != NULL) - { - delete s_pLoadedModules; - s_pLoadedModules = NULL; - } - if (m_pSemReadWrite != NULL) - { - delete m_pSemReadWrite; - m_pSemReadWrite = NULL; - } -} // LOADEDMODULES::DeleteStatics - //***************************************************************************** // Add a RegMeta pointer to the loaded module list //***************************************************************************** @@ -118,13 +87,6 @@ HRESULT LOADEDMODULES::AddModuleToLoadedList(RegMeta * pRegMeta) // point to the ref-count, because it just changes comparisons against 0 // to comparisons against 1. *ppRegMeta = pRegMeta; - - // If the module is read-only, hash it. - if (pRegMeta->IsReadOnly()) - { - ULONG ixHash = HashFileName(pRegMeta->GetNameOfDBFile()); - m_HashedModules[ixHash] = pRegMeta; - } } ErrExit: @@ -192,18 +154,6 @@ BOOL LOADEDMODULES::RemoveModuleFromLoadedList(RegMeta * pRegMeta) // that we're done with it. (Caller will delete.) s_pLoadedModules->Delete(iFound); bRemoved = TRUE; - - // If the module is read-only, remove from hash. - if (pRegMeta->IsReadOnly()) - { - // There may have been multiple capitalizations pointing to the same entry. - // Find and remove all of them. - for (ULONG ixHash = 0; ixHash < LOADEDMODULES_HASH_SIZE; ++ixHash) - { - if (m_HashedModules[ixHash] == pRegMeta) - m_HashedModules[ixHash] = NULL; - } - } } } @@ -211,117 +161,6 @@ BOOL LOADEDMODULES::RemoveModuleFromLoadedList(RegMeta * pRegMeta) return bRemoved; } // LOADEDMODULES::RemoveModuleFromLoadedList - -//***************************************************************************** -// Search the cached RegMetas for a given scope. -//***************************************************************************** -HRESULT LOADEDMODULES::FindCachedReadOnlyEntry( - LPCWSTR szName, // Name of the desired file. - DWORD dwOpenFlags, // Flags the new file is opened with. - RegMeta ** ppMeta) // Put found RegMeta here. -{ - RegMeta * pRegMeta = 0; - BOOL bWillBeCopyMemory; // Will the opened file be copied to memory? - DWORD dwLowFileSize; // Low bytes of this file's size - DWORD dwLowFileTime; // Low butes of this file's last write time - HRESULT hr; - ULONG ixHash = 0; - - IfFailGo(InitializeStatics()); - - { - LOCKREAD(); - - hr = S_FALSE; // We haven't found a match yet. - - // Avoid confusion. - *ppMeta = NULL; - - bWillBeCopyMemory = IsOfCopyMemory(dwOpenFlags); - - // The cache is locked for read, so the list will not change. - - // Figure out the size and timestamp of this file - WIN32_FILE_ATTRIBUTE_DATA faData; - if (!WszGetFileAttributesEx(szName, GetFileExInfoStandard, &faData)) - return E_FAIL; - dwLowFileSize = faData.nFileSizeLow; - dwLowFileTime = faData.ftLastWriteTime.dwLowDateTime; - - // Check the hash first. - ixHash = HashFileName(szName); - if ((pRegMeta = m_HashedModules[ixHash]) != NULL) - { - _ASSERTE(pRegMeta->IsReadOnly()); - - // Only match if the IsOfCopyMemory() bit is the same in both. This is because - // when ofCopyMemory is set, the file is not locked on disk, and may become stale - // in memory. - // - // Also, only match if the date and size are the same - if (pRegMeta->IsCopyMemory() == bWillBeCopyMemory && - pRegMeta->GetLowFileTimeOfDBFile() == dwLowFileTime && - pRegMeta->GetLowFileSizeOfDBFile() == dwLowFileSize) - { - // If the name matches... - LPCWSTR pszName = pRegMeta->GetNameOfDBFile(); - if (SString::_wcsicmp(szName, pszName) == 0) - { - ULONG cRefs; - - // Found it. Add a reference, and return it. - *ppMeta = pRegMeta; - cRefs = pRegMeta->AddRef(); - - LOG((LF_METADATA, LL_INFO10, "Disp::OpenScope found cached RegMeta in hash: %#8x, crefs: %d\n", pRegMeta, cRefs)); - - return S_OK; - } - } - } - - // Not found in hash; loop through each loaded modules - int count = s_pLoadedModules->Count(); - for (int index = 0; index < count; index++) - { - pRegMeta = (*s_pLoadedModules)[index]; - - // If the module is read-only, and the CopyMemory bit matches, and the date - // and size are the same.... - if (pRegMeta->IsReadOnly() && - pRegMeta->IsCopyMemory() == bWillBeCopyMemory && - pRegMeta->GetLowFileTimeOfDBFile() == dwLowFileTime && - pRegMeta->GetLowFileSizeOfDBFile() == dwLowFileSize) - { - // If the name matches... - LPCWSTR pszName = pRegMeta->GetNameOfDBFile(); - if (SString::_wcsicmp(szName, pszName) == 0) - { - ULONG cRefs; - - // Found it. Add a reference, and return it. - *ppMeta = pRegMeta; - cRefs = pRegMeta->AddRef(); - - // Update the hash. - m_HashedModules[ixHash] = pRegMeta; - - LOG((LF_METADATA, LL_INFO10, "Disp::OpenScope found cached RegMeta by search: %#8x, crefs: %d\n", pRegMeta, cRefs)); - - return S_OK; - } - } - } - } - -ErrExit: - // Didn't find it. - LOG((LF_METADATA, LL_INFO10, "Disp::OpenScope did not find cached RegMeta\n")); - - _ASSERTE(hr != S_OK); - return hr; -} // LOADEDMODULES::FindCachedReadOnlyEntry - #ifdef _DEBUG //***************************************************************************** diff --git a/src/coreclr/md/compiler/mdutil.h b/src/coreclr/md/compiler/mdutil.h index ecfd85650297..a66a2125c440 100644 --- a/src/coreclr/md/compiler/mdutil.h +++ b/src/coreclr/md/compiler/mdutil.h @@ -39,7 +39,6 @@ class RegMeta; // //********************************************************************* class UTSemReadWrite; -#define LOADEDMODULES_HASH_SIZE 47 class LOADEDMODULES : public CDynArray { @@ -50,20 +49,12 @@ class LOADEDMODULES : public CDynArray static LOADEDMODULES * s_pLoadedModules; public: - static void DeleteStatics(); - // Named for locking macros - see code:LOCKREAD static UTSemReadWrite * m_pSemReadWrite; - static RegMeta *m_HashedModules[LOADEDMODULES_HASH_SIZE]; - - static ULONG HashFileName(LPCWSTR szName); static HRESULT AddModuleToLoadedList(RegMeta *pRegMeta); static BOOL RemoveModuleFromLoadedList(RegMeta *pRegMeta); // true if found and removed. - static HRESULT FindCachedReadOnlyEntry(LPCWSTR szName, DWORD dwOpenFlags, RegMeta **ppMeta); - -#ifdef FEATURE_METADATA_IN_VM static HRESULT ResolveTypeRefWithLoadedModules( mdTypeRef tkTypeRef, // [IN] TypeRef to be resolved. RegMeta * pTypeRefRegMeta, // [IN] Scope in which the TypeRef is defined. @@ -71,7 +62,6 @@ class LOADEDMODULES : public CDynArray REFIID riid, // [IN] iid for the return interface. IUnknown ** ppIScope, // [OUT] Return interface. mdTypeDef * ptd); // [OUT] TypeDef corresponding the TypeRef. -#endif //FEATURE_METADATA_IN_VM #ifdef _DEBUG static BOOL IsEntryInList(RegMeta *pRegMeta); diff --git a/src/coreclr/md/compiler/regmeta.cpp b/src/coreclr/md/compiler/regmeta.cpp index 31f447952462..b635d43b2124 100644 --- a/src/coreclr/md/compiler/regmeta.cpp +++ b/src/coreclr/md/compiler/regmeta.cpp @@ -25,22 +25,13 @@ #include "mdinternalrw.h" - #include -#define DEFINE_CUSTOM_NODUPCHECK 1 -#define DEFINE_CUSTOM_DUPCHECK 2 -#define SET_CUSTOM 3 - #if defined(_DEBUG) #define LOGGING #endif #include -#ifdef _MSC_VER -#pragma warning(disable: 4102) -#endif - RegMeta::RegMeta() : m_pStgdb(0), m_pStgdbFreeList(NULL), @@ -63,10 +54,8 @@ RegMeta::RegMeta() : m_cRef(0), m_pFreeThreadedMarshaler(NULL), m_bCached(false), - m_trLanguageType(0), m_SetAPICaller(EXTERNAL_CALLER), m_ModuleType(ValidatorModuleTypeInvalid), - m_bKeepKnownCa(false), m_ReorderingOptions(NoReordering) #ifdef FEATURE_METADATA_RELEASE_MEMORY_ON_REOPEN , m_safeToDeleteStgdb(true) @@ -79,8 +68,6 @@ RegMeta::RegMeta() : { _ASSERTE(!"RegMeta()"); } - if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_MD_KeepKnownCA)) - m_bKeepKnownCa = true; #endif // _DEBUG } // RegMeta::RegMeta() @@ -1418,9 +1405,9 @@ HRESULT RegMeta::ReOpenWithMemory( //***************************************************************************** // This function returns the requested public interface based on the given // internal import interface. -// A common path to call this is updating the matedata for dynamic modules. +// A common path to call this is updating the metadata for dynamic modules. //***************************************************************************** -STDAPI MDReOpenMetaDataWithMemoryEx( +STDAPI MDReOpenMetaDataWithMemory( void *pImport, // [IN] Given scope. public interfaces LPCVOID pData, // [in] Location of scope data. ULONG cbData, // [in] Size of the data pointed to by pData. @@ -1443,53 +1430,7 @@ STDAPI MDReOpenMetaDataWithMemoryEx( pMDImport->Release(); return hr; -} // MDReOpenMetaDataWithMemoryEx - -STDAPI MDReOpenMetaDataWithMemory( - void *pImport, // [IN] Given scope. public interfaces - LPCVOID pData, // [in] Location of scope data. - ULONG cbData) // [in] Size of the data pointed to by pData. -{ - return MDReOpenMetaDataWithMemoryEx(pImport, pData, cbData, 0); -} - -// -------------------------------------------------------------------------------------- -// -// Zeros used by public APIs as return value (or pointer to this memory) for invalid input. -// It is used by methods: -// * code:RegMeta::GetPublicApiCompatibilityZeros, and -// * code:RegMeta::GetPublicApiCompatibilityZerosOfSize. -// -const BYTE -RegMeta::s_rgMetaDataPublicApiCompatibilityZeros[64] = -{ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// -------------------------------------------------------------------------------------- -// -// Returns pointer to zeros of size (cbSize). -// Used by public APIs to return compatible values with previous releases. -// -const BYTE * -RegMeta::GetPublicApiCompatibilityZerosOfSize(UINT32 cbSize) -{ - if (cbSize <= sizeof(s_rgMetaDataPublicApiCompatibilityZeros)) - { - return s_rgMetaDataPublicApiCompatibilityZeros; - } - _ASSERTE(!"Dangerous call to this method! Reconsider fixing the caller."); - return NULL; -} // RegMeta::GetPublicApiCompatibilityZerosOfSize - - +} // MDReOpenMetaDataWithMemory // diff --git a/src/coreclr/md/compiler/regmeta.h b/src/coreclr/md/compiler/regmeta.h index e575cdf992ea..4f19d5d1ea91 100644 --- a/src/coreclr/md/compiler/regmeta.h +++ b/src/coreclr/md/compiler/regmeta.h @@ -33,19 +33,10 @@ struct CORDBG_SYMBOL_URL GUID FormatID; // ID of the format type. WCHAR rcName[2]; // Variable sized name of the item. -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:6305) // "Potential mismatch between sizeof and countof quantities" -#endif - ULONG Size() const { return (ULONG)(sizeof(GUID) + ((u16_strlen(rcName) + 1) * 2)); } - -#ifdef _PREFAST_ -#pragma warning(pop) -#endif }; @@ -737,7 +728,6 @@ class RegMeta : STDMETHODIMP GetAssemblyFromScope( // S_OK or error mdAssembly *ptkAssembly); // [OUT] Put token here. - // This uses Fusion to lookup, so it's E_NOTIMPL in the standalone versions. STDMETHODIMP FindAssembliesByName( // S_OK or error LPCWSTR szAppBase, // [IN] optional - can be NULL LPCWSTR szPrivateBin, // [IN] optional - can be NULL @@ -1569,16 +1559,12 @@ class RegMeta : HRESULT SetOption(OptionValue *pOptionValue); - // HRESULT Init(); - // void Cleanup(); - HRESULT InitWithStgdb( IUnknown *pUnk, // The IUnknown that owns the life time for the existing stgdb CLiteWeightStgdbRW *pStgdb); // existing light weight stgdb ULONG GetRefCount() { return m_cRef; } HRESULT AddToCache(); - static HRESULT FindCachedReadOnlyEntry(LPCWSTR szName, DWORD dwOpenFlags, RegMeta **ppMeta); BOOL IsReadOnly() { return IsOfReadOnly(m_OpenFlags); } BOOL IsCopyMemory() { return IsOfCopyMemory(m_OpenFlags); } @@ -1623,9 +1609,6 @@ class RegMeta : return (m_OptionValue.m_ThreadSafetyOptions & MDThreadSafetyOn) == MDThreadSafetyOn; } - LPCWSTR GetNameOfDBFile() { return (m_pStgdb->m_wszFileName == NULL) ? W("") : m_pStgdb->m_wszFileName; } - DWORD GetLowFileTimeOfDBFile() { return m_pStgdb->m_dwDatabaseLFT; } - DWORD GetLowFileSizeOfDBFile() { return m_pStgdb->m_dwDatabaseLFS; } protected: // Helper functions used for implementation of MetaData APIs. HRESULT RefToDefOptimization(); @@ -1637,9 +1620,6 @@ class RegMeta : HRESULT PreSave(); - // Initialize the EE - HRESULT StartupEE(); - // Define a TypeRef given the name. enum eCheckDups {eCheckDefault=0, eCheckNo=1, eCheckYes=2}; @@ -2039,14 +2019,11 @@ class RegMeta : LONG m_cRef; // Ref count. IUnknown *m_pFreeThreadedMarshaler; // FreeThreadedMarshaler - // If true, cached in list of global scopes. This is very dangerous because it may allow - // unpredictable state sharing between seemingly unrelated dispensers. + // If true, cached in list of global scopes. bool m_bCached; OptionValue m_OptionValue; - mdTypeRef m_trLanguageType; - // Specifies whether the caller of the Set API is one of the Define functions // or an external API. This allows for performance optimization in the Set APIs // by not checking for Duplicates in certain cases. @@ -2058,8 +2035,6 @@ class RegMeta : SHash m_caHash; // Hashed list of custom attribute types seen. #endif - bool m_bKeepKnownCa; // Should all known CA's be kept? - MetaDataReorderingOptions m_ReorderingOptions; #ifdef FEATURE_METADATA_RELEASE_MEMORY_ON_REOPEN @@ -2069,23 +2044,6 @@ class RegMeta : // TRUE in order to delete safely. #endif -private: - // Returns pointer to zeros of size (cbSize). - // Used by public APIs to return compatible values with previous releases. - static const BYTE *GetPublicApiCompatibilityZerosOfSize(UINT32 cbSize); - // Returns pointer to zeros typed as type T. - // Used by public APIs to return compatible values with previous releases. - template - T *GetPublicApiCompatibilityZeros() - { - static_assert_no_msg(sizeof(T) <= sizeof(s_rgMetaDataPublicApiCompatibilityZeros)); - return reinterpret_cast(s_rgMetaDataPublicApiCompatibilityZeros); - } - // Zeros used by public APIs as return value (or pointer to this memory) for invalid input. - // It is used by methods: - // * code:RegMeta::GetPublicApiCompatibilityZeros, and - // * code:RegMeta::GetPublicApiCompatibilityZerosOfSize. - static const BYTE s_rgMetaDataPublicApiCompatibilityZeros[64]; }; // class RegMeta diff --git a/src/coreclr/md/compiler/regmeta_compilersupport.cpp b/src/coreclr/md/compiler/regmeta_compilersupport.cpp index 60bedca6a70b..4dfb1c4e767b 100644 --- a/src/coreclr/md/compiler/regmeta_compilersupport.cpp +++ b/src/coreclr/md/compiler/regmeta_compilersupport.cpp @@ -24,19 +24,11 @@ #include -#define DEFINE_CUSTOM_NODUPCHECK 1 -#define DEFINE_CUSTOM_DUPCHECK 2 -#define SET_CUSTOM 3 - #if defined(_DEBUG) #define LOGGING #endif #include -#ifdef _MSC_VER -#pragma warning(disable: 4102) -#endif - #ifdef FEATURE_METADATA_EMIT //***************************************************************************** diff --git a/src/coreclr/md/compiler/regmeta_emit.cpp b/src/coreclr/md/compiler/regmeta_emit.cpp index eba625b297ef..34942c408b59 100644 --- a/src/coreclr/md/compiler/regmeta_emit.cpp +++ b/src/coreclr/md/compiler/regmeta_emit.cpp @@ -27,19 +27,11 @@ #include -#define DEFINE_CUSTOM_NODUPCHECK 1 -#define DEFINE_CUSTOM_DUPCHECK 2 -#define SET_CUSTOM 3 - #if defined(_DEBUG) #define LOGGING #endif #include -#ifdef _MSC_VER -#pragma warning(disable: 4102) -#endif - #ifdef FEATURE_METADATA_EMIT //***************************************************************************** @@ -908,7 +900,7 @@ HRESULT RegMeta::_DefineTypeRef( { szUTF8FullQualName = (LPUTF8)szName; } - PREFIX_ASSUME(szUTF8FullQualName != NULL); + _ASSERTE(szUTF8FullQualName != NULL); ulStringLen = (ULONG)(strlen(szUTF8FullQualName) + 1); IfFailGo(qbNamespace.ReSizeNoThrow(ulStringLen)); @@ -1276,7 +1268,7 @@ HRESULT RegMeta::_DefineEvent( // Return hresult. mdEvent mdEv; LPUTF8 szUTF8Event; UTF8STR(szEvent, szUTF8Event); - PREFIX_ASSUME(szUTF8Event != NULL); + _ASSERTE(szUTF8Event != NULL); @@ -1835,7 +1827,7 @@ HRESULT RegMeta::_DefineTypeDef( // S_OK or error. _ASSERTE(IsNilToken(tdEncloser) || IsTdNested(dwTypeDefFlags)); UTF8STR(szTypeDef, szTypeDefUTF8); - PREFIX_ASSUME(szTypeDefUTF8 != NULL); + _ASSERTE(szTypeDefUTF8 != NULL); ulStringLen = (ULONG)(strlen(szTypeDefUTF8) + 1); IfFailGo(qbNamespace.ReSizeNoThrow(ulStringLen)); diff --git a/src/coreclr/md/compiler/regmeta_imetadatatables.cpp b/src/coreclr/md/compiler/regmeta_imetadatatables.cpp index 25b64d3261cc..10bbabe473fd 100644 --- a/src/coreclr/md/compiler/regmeta_imetadatatables.cpp +++ b/src/coreclr/md/compiler/regmeta_imetadatatables.cpp @@ -174,7 +174,7 @@ HRESULT RegMeta::GetGuid( if (ixGuid == 0) { // Return zeros - *ppGuid = GetPublicApiCompatibilityZeros(); + *ppGuid = &GUID_NULL; hr = S_OK; } else diff --git a/src/coreclr/md/compiler/regmeta_import.cpp b/src/coreclr/md/compiler/regmeta_import.cpp index dd260d2f2ff8..97c011790aba 100644 --- a/src/coreclr/md/compiler/regmeta_import.cpp +++ b/src/coreclr/md/compiler/regmeta_import.cpp @@ -27,19 +27,11 @@ #include -#define DEFINE_CUSTOM_NODUPCHECK 1 -#define DEFINE_CUSTOM_DUPCHECK 2 -#define SET_CUSTOM 3 - #if defined(_DEBUG) #define LOGGING #endif #include -#ifdef _MSC_VER -#pragma warning(disable: 4102) -#endif - //***************************************************************************** // determine if a token is valid or not //***************************************************************************** @@ -577,7 +569,7 @@ STDMETHODIMP RegMeta::FindTypeDefByName(// S_OK or error. if (wzTypeDef == NULL) IfFailGo(E_INVALIDARG); - PREFIX_ASSUME(wzTypeDef != NULL); + _ASSERTE(wzTypeDef != NULL); LPSTR szTypeDef; UTF8STR(wzTypeDef, szTypeDef); LPCSTR szNamespace; @@ -937,7 +929,7 @@ STDMETHODIMP RegMeta::FindTypeRef( // S_OK or error. LOCKREAD(); // Convert the name to UTF8. - PREFIX_ASSUME(wzTypeName != NULL); // caller might pass NULL, but they'll AV. + _ASSERTE(wzTypeName != NULL); // caller might pass NULL, but they'll AV. UTF8STR(wzTypeName, szFullName); ns::SplitInline(szFullName, szNamespace, szName); diff --git a/src/coreclr/md/compiler/regmeta_vm.cpp b/src/coreclr/md/compiler/regmeta_vm.cpp index 0a1ea82e46c9..7d4840f1de4b 100644 --- a/src/coreclr/md/compiler/regmeta_vm.cpp +++ b/src/coreclr/md/compiler/regmeta_vm.cpp @@ -26,19 +26,11 @@ #include -#define DEFINE_CUSTOM_NODUPCHECK 1 -#define DEFINE_CUSTOM_DUPCHECK 2 -#define SET_CUSTOM 3 - #if defined(_DEBUG) #define LOGGING #endif #include -#ifdef _MSC_VER -#pragma warning(disable: 4102) -#endif - //***************************************************************************** // Call this after initialization is complete. //***************************************************************************** @@ -67,61 +59,6 @@ HRESULT RegMeta::AddToCache() } // RegMeta::AddToCache -//***************************************************************************** -// Search the cached RegMetas for a given scope. -//***************************************************************************** -HRESULT RegMeta::FindCachedReadOnlyEntry( - LPCWSTR szName, // Name of the desired file. - DWORD dwOpenFlags, // Flags the new file is opened with. - RegMeta **ppMeta) // Put found RegMeta here. -{ -#if defined(FEATURE_METADATA_IN_VM) - return LOADEDMODULES::FindCachedReadOnlyEntry(szName, dwOpenFlags, ppMeta); -#else // FEATURE_METADATA_IN_VM - // No cache support in standalone version. - *ppMeta = NULL; - return S_FALSE; -#endif // FEATURE_METADATA_IN_VM -} // RegMeta::FindCachedReadOnlyEntry - - -#ifdef FEATURE_METADATA_EMIT_ALL - -//***************************************************************************** -// Helper function to startup the EE -// -// Notes: -// This is called by code:RegMeta.DefineSecurityAttributeSet. -//***************************************************************************** -HRESULT RegMeta::StartupEE() -{ - UNREACHABLE_MSG_RET("About to CoCreateInstance! This code should not be " - "reachable or needs to be reimplemented for CoreCLR!"); -} - -#endif //FEATURE_METADATA_EMIT_ALL - -#ifdef FEATURE_METADATA_EMIT - -//***************************************************************************** -// Persist a set of security custom attributes into a set of permission set -// blobs on the same class or method. -// -// Notes: -// Only in the full version because this is an emit operation. -//***************************************************************************** -HRESULT RegMeta::DefineSecurityAttributeSet(// Return code. - mdToken tkObj, // [IN] Class or method requiring security attributes. - COR_SECATTR rSecAttrs[], // [IN] Array of security attribute descriptions. - ULONG cSecAttrs, // [IN] Count of elements in above array. - ULONG *pulErrorAttr) // [OUT] On error, index of attribute causing problem. -{ - return E_NOTIMPL; -} // RegMeta::DefineSecurityAttributeSet - -#endif //FEATURE_METADATA_EMIT - - //***************************************************************************** // Implementation of IMetaDataImport::ResolveTypeRef to resolve a typeref across scopes. // @@ -139,11 +76,11 @@ HRESULT RegMeta::DefineSecurityAttributeSet(// Return code. // This resolve (type-ref, this cope) --> (type-def=*ptd, other scope=*ppIScope) // // However, this resolution requires knowing what modules have been loaded, which is not decided -// until runtime via loader / fusion policy. Thus this interface can't possibly be correct since -// it doesn't have that knowledge. Furthermore, when inspecting metadata from another process -// (such as a debugger inspecting the debuggee's metadata), this API can be truly misleading. +// until runtime via loader. Thus this interface can't possibly be correct since it doesn't have +// that knowledge. Furthermore, when inspecting metadata from another process (such as a debugger +// inspecting the debuggee's metadata), this API can be truly misleading. // -// This API usage should be avoided. +// This API usage should be avoided. It is kept to avoid breaking profilers. // //***************************************************************************** STDMETHODIMP @@ -157,7 +94,7 @@ RegMeta::ResolveTypeRef( HRESULT hr; TypeRefRec * pTypeRefRec; - WCHAR wzNameSpace[_MAX_PATH]; + WCHAR wzNameSpace[MAX_PATH]; CMiniMdRW * pMiniMd = NULL; LOCKREAD(); @@ -206,9 +143,6 @@ RegMeta::ResolveTypeRef( wzNameSpace[STRING_LENGTH(wzNameSpace)] = 0; } - //*********************** - // before we go off to CORPATH, check the loaded modules! - //*********************** if (LOADEDMODULES::ResolveTypeRefWithLoadedModules( tr, this, diff --git a/src/coreclr/md/datasource/targettypes.cpp b/src/coreclr/md/datasource/targettypes.cpp index cdf19b1e5bca..3cdcc6ddb2ec 100644 --- a/src/coreclr/md/datasource/targettypes.cpp +++ b/src/coreclr/md/datasource/targettypes.cpp @@ -460,9 +460,7 @@ m_dwMachine(0), m_pStreamList(0), m_pNextStgdb(0), m_eFileType(0), -m_wszFileName(0), -m_dwDatabaseLFT(0), -m_dwDatabaseLFS(0) +m_wszFileName(0) {} HRESULT Target_CLiteWeightStgdbRW::ReadFrom(DataTargetReader & reader) @@ -480,8 +478,6 @@ HRESULT Target_CLiteWeightStgdbRW::ReadFrom(DataTargetReader & reader) IfFailRet(reader.ReadPointer(&m_pNextStgdb)); IfFailRet(reader.Read32(&m_eFileType)); IfFailRet(reader.ReadPointer(&m_wszFileName)); - IfFailRet(reader.Read32(&m_dwDatabaseLFT)); - IfFailRet(reader.Read32(&m_dwDatabaseLFS)); IfFailRet(reader.ReadPointer(&m_pStgIO)); return S_OK; } diff --git a/src/coreclr/md/datasource/targettypes.h b/src/coreclr/md/datasource/targettypes.h index 70eae213c2c5..2a5ae29ef87e 100644 --- a/src/coreclr/md/datasource/targettypes.h +++ b/src/coreclr/md/datasource/targettypes.h @@ -322,8 +322,6 @@ class Target_CLiteWeightStgdbRW : public Target_CLiteWeightStgdb_CMiniMdRW CORDB_ADDRESS m_pNextStgdb; ULONG32 m_eFileType; CORDB_ADDRESS m_wszFileName; - ULONG32 m_dwDatabaseLFT; - ULONG32 m_dwDatabaseLFS; CORDB_ADDRESS m_pStgIO; }; diff --git a/src/coreclr/md/enc/liteweightstgdbrw.cpp b/src/coreclr/md/enc/liteweightstgdbrw.cpp index 3b2e2db60429..df43ef1bcfdc 100644 --- a/src/coreclr/md/enc/liteweightstgdbrw.cpp +++ b/src/coreclr/md/enc/liteweightstgdbrw.cpp @@ -305,12 +305,6 @@ HRESULT CLiteWeightStgdbRW::OpenForRead( if (!szDatabase) szDatabase = pNoFile; - // Sanity check the name lentgh. - if (!IsValidFileNameLength(szDatabase)) - { - IfFailGo(E_INVALIDARG); - } - // If we have storage to work with, init it and get type. if (*szDatabase || pbData) { @@ -409,16 +403,6 @@ HRESULT CLiteWeightStgdbRW::OpenForRead( // Save off everything. IfFailGo(SetFileName(szDatabase)); - // If this was a file... - if (pbData == NULL) - { - WIN32_FILE_ATTRIBUTE_DATA faData; - if (!WszGetFileAttributesEx(szDatabase, GetFileExInfoStandard, &faData)) - IfFailGo(E_FAIL); - m_dwDatabaseLFS = faData.nFileSizeLow; - m_dwDatabaseLFT = faData.ftLastWriteTime.dwLowDateTime; - } - ErrExit: if (SUCCEEDED(hr)) { @@ -922,12 +906,6 @@ HRESULT CLiteWeightStgdbRW::Save( IfFailGo(SetFileName(szDatabase)); } - // Sanity check the name. - if (!IsValidFileNameLength(m_wszFileName)) - { - IfFailGo(E_INVALIDARG); - } - m_eFileType = FILETYPE_CLB; // Allocate a new storage object. @@ -1177,15 +1155,3 @@ CLiteWeightStgdbRW::SetFileName( ErrExit: return hr; } // CLiteWeightStgdbRW::SetFileName - -//======================================================================================= -// -// Returns TRUE if wszFileName has valid path length (MAX_PATH or 32767 if prefixed with \\?\). -// -//static -BOOL -CLiteWeightStgdbRW::IsValidFileNameLength( - const WCHAR * wszFileName) -{ - return TRUE; -} // CLiteWeightStgdbRW::IsValidFileNameLength diff --git a/src/coreclr/md/enc/mdinternalrw.cpp b/src/coreclr/md/enc/mdinternalrw.cpp index e9735004e846..01fdc0e4b78d 100644 --- a/src/coreclr/md/enc/mdinternalrw.cpp +++ b/src/coreclr/md/enc/mdinternalrw.cpp @@ -942,11 +942,6 @@ HRESULT MDInternalRW::EnumGlobalFieldsInit( // return hresult return EnumInit(mdtFieldDef, m_tdModule, phEnum); } // MDInternalRW::EnumGlobalFieldsInit - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif //***************************************** // Enumerator initializer //***************************************** @@ -1363,9 +1358,6 @@ HRESULT MDInternalRW::EnumInit( // return S_FALSE if record not found return hr; } // MDInternalRW::EnumInit -#ifdef _PREFAST_ -#pragma warning(pop) -#endif //***************************************** // Enumerator initializer diff --git a/src/coreclr/md/enc/metamodelrw.cpp b/src/coreclr/md/enc/metamodelrw.cpp index 45b75bb09ba1..c033ae063c57 100644 --- a/src/coreclr/md/enc/metamodelrw.cpp +++ b/src/coreclr/md/enc/metamodelrw.cpp @@ -598,7 +598,7 @@ class CQuickSortMiniMdRW return S_OK; } - PREFAST_ASSUME_MSG(m_iElemSize <= (int) sizeof(m_buf), "The MetaData table row has to fit into buffer for swapping."); + _ASSERTE((m_iElemSize <= (int) sizeof(m_buf)) && "The MetaData table row has to fit into buffer for swapping."); IfFailRet(getRow(iFirst, &pFirst)); IfFailRet(getRow(iSecond, &pSecond)); @@ -1040,7 +1040,7 @@ CMiniMdRW::CalculateTypeRefToTypeDefMap() mdToken td; mdToken tkResScope; - PREFIX_ASSUME(GetTypeRefToTypeDefMap() != NULL); + _ASSERTE(GetTypeRefToTypeDefMap() != NULL); for (index = 1; index <= m_Schema.m_cRecs[TBL_TypeRef]; index++) { @@ -2389,10 +2389,6 @@ bool CMiniMdRW::CanHaveCustomAttribute( // Can a given table have a custom attri } // CMiniMdRW::CanHaveCustomAttribute #endif //_DEBUG -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif //--------------------------------------------------------------------------------------- // // Perform any available pre-save optimizations. @@ -2866,7 +2862,7 @@ CMiniMdRW::PreSaveFull() if (m_pHandler != NULL) { TOKENMAP * ptkmap = GetMemberRefToMemberDefMap(); - PREFIX_ASSUME(ptkmap != NULL); // RegMeta always inits this. + _ASSERTE(ptkmap != NULL); // RegMeta always inits this. MDTOKENMAP * ptkRemap = GetTokenMovementMap(); int iCount = m_Schema.m_cRecs[TBL_MemberRef]; mdToken tkTo; @@ -2917,10 +2913,6 @@ CMiniMdRW::PreSaveFull() return hr; } // CMiniMdRW::PreSaveFull -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - //--------------------------------------------------------------------------------------- // // ENC-specific pre-safe work. @@ -6698,7 +6690,7 @@ CMiniMdRW::FindParentOfMethodHelper( for (indexMd = ridStart; indexMd < ridEnd; indexMd++) { IfFailGo(GetMethodPtrRecord(indexMd, &pMethodPtrRec)); - PREFIX_ASSUME(pMethodMap->Get(getMethodOfMethodPtr(pMethodPtrRec)) != NULL); + _ASSERTE(pMethodMap->Get(getMethodOfMethodPtr(pMethodPtrRec)) != NULL); *(pMethodMap->Get(getMethodOfMethodPtr(pMethodPtrRec))) = indexTd; } } @@ -6761,7 +6753,7 @@ CMiniMdRW::FindParentOfFieldHelper( for (indexFd = ridStart; indexFd < ridEnd; indexFd++) { IfFailGo(GetFieldPtrRecord(indexFd, &pFieldPtrRec)); - PREFIX_ASSUME(pFieldMap->Get(getFieldOfFieldPtr(pFieldPtrRec)) != NULL); + _ASSERTE(pFieldMap->Get(getFieldOfFieldPtr(pFieldPtrRec)) != NULL); *(pFieldMap->Get(getFieldOfFieldPtr(pFieldPtrRec))) = indexTd; } } @@ -6825,7 +6817,7 @@ CMiniMdRW::FindParentOfPropertyHelper( { IfFailGo(GetPropertyPtrRecord(indexPr, &pPropertyPtrRec)); mdToken *tok = pPropertyMap->Get(getPropertyOfPropertyPtr(pPropertyPtrRec)); - PREFIX_ASSUME(tok != NULL); + _ASSERTE(tok != NULL); *tok = getParentOfPropertyMap(pPropertyMapRec); } } @@ -6894,7 +6886,7 @@ CMiniMdRW::FindParentOfEventHelper( { IfFailGo(GetEventPtrRecord(indexEv, &pEventPtrRec)); mdToken* tok = pEventMap->Get(getEventOfEventPtr(pEventPtrRec)); - PREFIX_ASSUME(tok != NULL); + _ASSERTE(tok != NULL); *tok = getParentOfEventMap(pEventMapRec); } } @@ -6962,7 +6954,7 @@ CMiniMdRW::FindParentOfParamHelper( for (indexPd = ridStart; indexPd < ridEnd; indexPd++) { IfFailGo(GetParamPtrRecord(indexPd, &pParamPtrRec)); - PREFIX_ASSUME(pParamMap->Get(getParamOfParamPtr(pParamPtrRec)) != NULL); + _ASSERTE(pParamMap->Get(getParamOfParamPtr(pParamPtrRec)) != NULL); *(pParamMap->Get(getParamOfParamPtr(pParamPtrRec))) = indexMd; } } diff --git a/src/coreclr/md/inc/VerifyLayouts.inc b/src/coreclr/md/inc/VerifyLayouts.inc index 673a6b738468..d62ecc163364 100644 --- a/src/coreclr/md/inc/VerifyLayouts.inc +++ b/src/coreclr/md/inc/VerifyLayouts.inc @@ -153,8 +153,6 @@ FIELD(CLiteWeightStgdbRW, m_pStreamList, sizeof(void*)) FIELD(CLiteWeightStgdbRW, m_pNextStgdb, sizeof(void*)) FIELD(CLiteWeightStgdbRW, m_eFileType, 4) FIELD(CLiteWeightStgdbRW, m_wszFileName, sizeof(void*)) -FIELD(CLiteWeightStgdbRW, m_dwDatabaseLFT, 4) -FIELD(CLiteWeightStgdbRW, m_dwDatabaseLFS, 4) FIELD(CLiteWeightStgdbRW, m_pStgIO, sizeof(void*)) #ifdef FEATURE_METADATA_EMIT_PORTABLE_PDB FIELD(CLiteWeightStgdbRW, m_pPdbHeap, sizeof(void*)) diff --git a/src/coreclr/md/inc/liteweightstgdb.h b/src/coreclr/md/inc/liteweightstgdb.h index af782217f59d..9b9536e5cb04 100644 --- a/src/coreclr/md/inc/liteweightstgdb.h +++ b/src/coreclr/md/inc/liteweightstgdb.h @@ -25,14 +25,7 @@ class StgIO; #include "pdbheap.h" #endif -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:28718) // public header missing SAL annotations -#endif // _PREFAST_ class TiggerStorage; -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // _PREFAST_ //***************************************************************************** // This class provides common definitions for heap segments. It is both the @@ -106,8 +99,6 @@ class CLiteWeightStgdbRW : public CLiteWeightStgdb m_pImage = NULL; m_dwImageSize = 0; m_dwPEKind = (DWORD)(-1); - m_dwDatabaseLFS = 0; - m_dwDatabaseLFT = 0; #ifdef FEATURE_METADATA_EMIT_PORTABLE_PDB m_pPdbHeap = NULL; #endif @@ -238,8 +229,6 @@ class CLiteWeightStgdbRW : public CLiteWeightStgdb private: FILETYPE m_eFileType; WCHAR * m_wszFileName; // Database file name (NULL or non-empty string) - DWORD m_dwDatabaseLFT; // Low bytes of the database file's last write time - DWORD m_dwDatabaseLFS; // Low bytes of the database file's size StgIO * m_pStgIO; // For file i/o. #ifdef FEATURE_METADATA_EMIT_PORTABLE_PDB PdbHeap *m_pPdbHeap; diff --git a/src/coreclr/md/inc/mdinternalrw.h b/src/coreclr/md/inc/mdinternalrw.h index 9d3ea31d75a2..dd55ac20abb4 100644 --- a/src/coreclr/md/inc/mdinternalrw.h +++ b/src/coreclr/md/inc/mdinternalrw.h @@ -73,16 +73,6 @@ class MDInternalRW : public IMDInternalImportENC, public IMDCommon return static_cast(&m_pStgdb->m_MiniMd); } - __checkReturn - STDMETHODIMP SetOptimizeAccessForSpeed(// return hresult - BOOL fOptSpeed) - { - // If there is any optional work we can avoid (for example, because we have - // traded space for speed) this is the place to turn it off or on. - - return S_OK; - } - //***************************************************************************** // return the count of entries of a given kind in a scope // For example, pass in mdtMethodDef will tell you how many MethodDef @@ -782,32 +772,8 @@ class MDInternalRW : public IMDInternalImportENC, public IMDCommon { return (DWORD)m_pStgdb->m_MiniMd.m_Schema.m_minor | ((DWORD)m_pStgdb->m_MiniMd.m_Schema.m_major << 16); - }; - - __checkReturn - STDMETHODIMP SetVerifiedByTrustedSource(// return hresult - BOOL fVerified) - { - m_pStgdb->m_MiniMd.SetVerifiedByTrustedSource(fVerified); - return S_OK; } - STDMETHODIMP GetRvaOffsetData(// S_OK or error - DWORD *pFirstMethodRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in MethodDef table. - DWORD *pMethodDefRecordSize, // [OUT] Size of each record in MethodDef table. - DWORD *pMethodDefCount, // [OUT] Number of records in MethodDef table. - DWORD *pFirstFieldRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in FieldRVA table. - DWORD *pFieldRvaRecordSize, // [OUT] Size of each record in FieldRVA table. - DWORD *pFieldRvaCount) // [OUT] Number of records in FieldRVA table. - { - return m_pStgdb->m_MiniMd.GetRvaOffsetData( - pFirstMethodRvaOffset, - pMethodDefRecordSize, - pMethodDefCount, - pFirstFieldRvaOffset, - pFieldRvaRecordSize, - pFieldRvaCount); - } }; // class MDInternalRW #endif //FEATURE_METADATA_INTERNAL_APIS diff --git a/src/coreclr/md/inc/metamodel.h b/src/coreclr/md/inc/metamodel.h index 00314646ee76..942a48412791 100644 --- a/src/coreclr/md/inc/metamodel.h +++ b/src/coreclr/md/inc/metamodel.h @@ -40,11 +40,6 @@ #define METAMODEL_MAJOR_VER 2 #define METAMODEL_MINOR_VER 0 -// Metadata version number up through Whidbey Beta2 -#define METAMODEL_MAJOR_VER_B1 1 -#define METAMODEL_MINOR_VER_B1 1 - - typedef enum MetadataVersion { MDVersion1 = 0x00000001, @@ -530,23 +525,6 @@ class CMiniMdBase : public IMetaModelCommonRO return m_fVerifiedByTrustedSource && CommonIsRo(); } - void SetVerifiedByTrustedSource(BOOL fVerifiedByTrustedSource) - { - m_fVerifiedByTrustedSource = fVerifiedByTrustedSource; - } - - STDMETHODIMP GetRvaOffsetData(// S_OK or error - DWORD *pFirstMethodRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in MethodDef table. - DWORD *pMethodDefRecordSize, // [OUT] Size of each record in MethodDef table. - DWORD *pMethodDefCount, // [OUT] Number of records in MethodDef table. - DWORD *pFirstFieldRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in FieldRVA table. - DWORD *pFieldRvaRecordSize, // [OUT] Size of each record in FieldRVA table. - DWORD *pFieldRvaCount) // [OUT] Number of records in FieldRVA table. - { - _ASSERTE("Not implemented"); - return E_NOTIMPL; - } - //***************************************************************************** // Some of the tables need coded tokens, not just rids (ie, the column can // refer to more than one other table). Code the tokens into as few bits @@ -1998,9 +1976,8 @@ template class CMiniMdTemplate : public CMiniMdBase BOOL SupportsGenerics() { - // Only 2.0 of the metadata (and 1.1) support generics - return (m_Schema.m_major >= METAMODEL_MAJOR_VER_V2_0 || - (m_Schema.m_major == METAMODEL_MAJOR_VER_B1 && m_Schema.m_minor == METAMODEL_MINOR_VER_B1)); + // Only 2.0 of the metadata support generics + return (m_Schema.m_major >= METAMODEL_MAJOR_VER_V2_0); }// SupportGenerics protected: diff --git a/src/coreclr/md/runtime/mdfileformat.cpp b/src/coreclr/md/runtime/mdfileformat.cpp index 9b1c5f6e64bb..9e9a25330277 100644 --- a/src/coreclr/md/runtime/mdfileformat.cpp +++ b/src/coreclr/md/runtime/mdfileformat.cpp @@ -18,7 +18,6 @@ //***************************************************************************** // Verify the signature at the front of the file to see what type it is. //***************************************************************************** -#define STORAGE_MAGIC_OLD_SIG 0x2B4D4F43 // +MOC (old version of BSJB signature code:STORAGE_MAGIC_SIG) HRESULT MDFormat::VerifySignature( PSTORAGESIGNATURE pSig, // The signature to check. @@ -28,11 +27,6 @@ MDFormat::VerifySignature( // If signature didn't match, you shouldn't be here. ULONG dwSignature = pSig->GetSignature(); - if (dwSignature == STORAGE_MAGIC_OLD_SIG) - { - Debug_ReportError("Invalid MetaData storage signature - old magic signature +MOC."); - return PostError(CLDB_E_FILE_OLDVER, 1, 0); - } if (dwSignature != STORAGE_MAGIC_SIG) { Debug_ReportError("Invalid MetaData storage signature - unrecognized magic signature, should be BSJB."); @@ -76,17 +70,6 @@ MDFormat::VerifySignature( } } - // Only a specific version of the 0.x format is supported by this code - // in order to support the NT 5 beta clients which used this format. - if (pSig->GetMajorVer() == FILE_VER_MAJOR_v0) - { - if (pSig->GetMinorVer() < FILE_VER_MINOR_v0) - { - Debug_ReportError("Invalid MetaData storage signature - unrecognized version, should be 1.1."); - hr = CLDB_E_FILE_OLDVER; - } - } - else // There is currently no code to migrate an old format of the 1.x. This // would be added only under special circumstances. if ((pSig->GetMajorVer() != FILE_VER_MAJOR) || (pSig->GetMinorVer() != FILE_VER_MINOR)) diff --git a/src/coreclr/md/runtime/mdinternalro.cpp b/src/coreclr/md/runtime/mdinternalro.cpp index 5e2d52eeaa95..b4ce60510cd7 100644 --- a/src/coreclr/md/runtime/mdinternalro.cpp +++ b/src/coreclr/md/runtime/mdinternalro.cpp @@ -3321,63 +3321,4 @@ HRESULT MDInternalRO::ApplyEditAndContinue( return hr; } -HRESULT MDInternalRO::GetRvaOffsetData( - DWORD *pFirstMethodRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in MethodDef table. - DWORD *pMethodDefRecordSize, // [OUT] Size of each record in MethodDef table. - DWORD *pMethodDefCount, // [OUT] Number of records in MethodDef table. - DWORD *pFirstFieldRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in FieldRVA table. - DWORD *pFieldRvaRecordSize, // [OUT] Size of each record in FieldRVA table. - DWORD *pFieldRvaCount) // [OUT] Number of records in FieldRVA table. -{ - HRESULT hr = S_OK; - DWORD methodDefCount = *pMethodDefCount = m_LiteWeightStgdb.m_MiniMd.getCountMethods(); - if (methodDefCount == 0) - *pFirstMethodRvaOffset = *pMethodDefRecordSize = 0; - else - { - MethodRec *pMethodRec; - IfFailGo(m_LiteWeightStgdb.m_MiniMd.GetMethodRecord(1, &pMethodRec)); - - // RVA is the first column of the MethodDef table, so the address of MethodRec is also address of RVA column. - if ((const BYTE *)m_LiteWeightStgdb.m_pvMd > (const BYTE *)pMethodRec) - { - Debug_ReportError("Stream header is not within MetaData block."); - IfFailGo(CLDB_E_FILE_CORRUPT); - } - *pFirstMethodRvaOffset = (DWORD)((const BYTE *)pMethodRec - (const BYTE *)m_LiteWeightStgdb.m_pvMd); - *pMethodDefRecordSize = m_LiteWeightStgdb.m_MiniMd._CBREC(Method); - } - - { - DWORD fieldRvaCount = *pFieldRvaCount = m_LiteWeightStgdb.m_MiniMd.getCountFieldRVAs(); - if (fieldRvaCount == 0) - *pFirstFieldRvaOffset = *pFieldRvaRecordSize = 0; - else - { - - // orig - // FieldRVARec *pFieldRVARec = m_LiteWeightStgdb.m_MiniMd.getFieldRVA(1); - FieldRVARec *pFieldRVARec; - IfFailGo(m_LiteWeightStgdb.m_MiniMd.GetFieldRVARecord(1, &pFieldRVARec)); - -//FieldRVARec *pFieldRVARec; -//mdToken fakeTok = 1; -//RidToToken(&fakeTok, mdtFieldDef); -//GetFieldRVA(fakeTok, &pFieldRVARec); - // RVA is the first column of the FieldRVA table, so the address of FieldRVARec is also address of RVA column. - if ((const BYTE *)m_LiteWeightStgdb.m_pvMd > (const BYTE *)pFieldRVARec) - { - Debug_ReportError("Stream header is not within MetaData block."); - IfFailGo(CLDB_E_FILE_CORRUPT); - } - *pFirstFieldRvaOffset = (DWORD)((const BYTE *)pFieldRVARec - (const BYTE *)m_LiteWeightStgdb.m_pvMd); - *pFieldRvaRecordSize = m_LiteWeightStgdb.m_MiniMd._CBREC(FieldRVA); - } - } - hr = S_OK; - -ErrExit: - return hr; -} - #endif //FEATURE_METADATA_INTERNAL_APIS diff --git a/src/coreclr/md/runtime/mdinternalro.h b/src/coreclr/md/runtime/mdinternalro.h index 5f46a34b0fff..8ea00e1b9081 100644 --- a/src/coreclr/md/runtime/mdinternalro.h +++ b/src/coreclr/md/runtime/mdinternalro.h @@ -66,13 +66,6 @@ class MDInternalRO : public IMDInternalImport, IMDCommon return static_cast(&m_LiteWeightStgdb.m_MiniMd); } - __checkReturn - STDMETHODIMP SetOptimizeAccessForSpeed( - BOOL fOptSpeed) - { - return S_OK; - } - //***************************************************************************** // return the count of entries of a given kind in a scope // For example, pass in mdtMethodDef will tell you how many MethodDef @@ -734,15 +727,6 @@ class MDInternalRO : public IMDInternalImport, IMDCommon ULONG cbData, // [IN] length of pData IMDInternalImport **ppv); // [OUT] the resulting metadata interface - STDMETHODIMP GetRvaOffsetData( - DWORD *pFirstMethodRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in MethodDef table. - DWORD *pMethodDefRecordSize, // [OUT] Size of each record in MethodDef table. - DWORD *pMethodDefCount, // [OUT] Number of records in MethodDef table. - DWORD *pFirstFieldRvaOffset, // [OUT] Offset (from start of metadata) to the first RVA field in FieldRVA table. - DWORD *pFieldRvaRecordSize, // [OUT] Size of each record in FieldRVA table. - DWORD *pFieldRvaCount // [OUT] Number of records in FieldRVA table. - ); - CLiteWeightStgdb m_LiteWeightStgdb; private: @@ -784,12 +768,6 @@ class MDInternalRO : public IMDInternalImport, IMDCommon ((DWORD)m_LiteWeightStgdb.m_MiniMd.m_Schema.m_major << 16); }; - STDMETHODIMP SetVerifiedByTrustedSource(// return hresult - BOOL fVerified) - { - m_LiteWeightStgdb.m_MiniMd.SetVerifiedByTrustedSource(fVerified); - return S_OK; - } }; // class MDInternalRO #endif //FEATURE_METADATA_INTERNAL_APIS diff --git a/src/coreclr/md/runtime/metamodel.cpp b/src/coreclr/md/runtime/metamodel.cpp index aca6fefbcdcf..e7f55f217152 100644 --- a/src/coreclr/md/runtime/metamodel.cpp +++ b/src/coreclr/md/runtime/metamodel.cpp @@ -106,10 +106,6 @@ const CMiniTableDefEx g_Tables[TBL_COUNT] = { #endif }; -// Define a table descriptor for the obsolete v1.0 GenericParam table definition. -const CMiniTableDefEx g_Table_GenericParamV1_1 = { { rGenericParamV1_1Cols, ARRAY_SIZE(rGenericParamV1_1Cols), GenericParamV1_1Rec::COL_KEY, 0 }, rGenericParamV1_1ColNames, "GenericParamV1_"}; - - // Define the array of Ptr Tables. This is initialized to TBL_COUNT here. // The correct values will be set in the constructor for MiniMdRW. @@ -187,10 +183,7 @@ CMiniMdSchema::SaveTo( // Minor version is preset when we instantiate the MiniMd. - // Make sure we're saving out a version that Beta1 version can read - _ASSERTE((m_major == METAMODEL_MAJOR_VER && m_minor == METAMODEL_MINOR_VER) || - (m_major == METAMODEL_MAJOR_VER_B1 && m_minor == METAMODEL_MINOR_VER_B1) || - (m_major == METAMODEL_MAJOR_VER_V1_0 && m_minor == METAMODEL_MINOR_VER_V1_0)); + _ASSERTE((m_major == METAMODEL_MAJOR_VER) && (m_minor == METAMODEL_MINOR_VER)); // Transfer the fixed fields. *static_cast(pDest) = *static_cast(this); @@ -555,13 +548,6 @@ CMiniMdBase::SchemaPopulate( // Older version has fewer tables. m_TblCount = TBL_COUNT_V1; } - else if ((m_Schema.m_major == METAMODEL_MAJOR_VER_B1) && - (m_Schema.m_minor == METAMODEL_MINOR_VER_B1)) - { - // 1.1 had a different type of GenericParam table - m_TableDefs[TBL_GenericParam] = g_Table_GenericParamV1_1.m_Def; - m_TableDefs[TBL_GenericParam].m_pColDefs = BYTEARRAY_TO_COLDES(s_GenericParamCol); - } else { // We don't support this version of the metadata Debug_ReportError("Unsupported version of MetaData."); @@ -602,12 +588,6 @@ CMiniMdBase::SchemaPopulate( m_TblCount = that.m_TblCount; _ASSERTE(m_TblCount == TBL_COUNT_V1); } - else if (m_Schema.m_major == METAMODEL_MAJOR_VER_B1 && m_Schema.m_minor == METAMODEL_MINOR_VER_B1) - { - // 1.1 had a different type of GenericParam table - m_TableDefs[TBL_GenericParam] = g_Table_GenericParamV1_1.m_Def; - m_TableDefs[TBL_GenericParam].m_pColDefs = BYTEARRAY_TO_COLDES(s_GenericParamCol); - } // Is it a supported old version? This should never fail! else { @@ -689,19 +669,7 @@ const CMiniTableDef * CMiniMdBase::GetTableDefTemplate( int ixTbl) { - const CMiniTableDef *pTemplate; // the return value. - - // Return the table definition for the given table. Account for version of schema. - if ((m_Schema.m_major == METAMODEL_MAJOR_VER_B1) && (m_Schema.m_minor == METAMODEL_MINOR_VER_B1) && (ixTbl == TBL_GenericParam)) - { - pTemplate = &g_Table_GenericParamV1_1.m_Def; - } - else - { - pTemplate = &g_Tables[ixTbl].m_Def; - } - - return pTemplate; + return &g_Tables[ixTbl].m_Def; } // CMiniMdBase::GetTableDefTemplate //***************************************************************************** @@ -735,7 +703,7 @@ CMiniMdBase::InitColsForTable( pTemplate = GetTableDefTemplate(ixTbl); - PREFIX_ASSUME(pTemplate->m_pColDefs != NULL); + _ASSERTE(pTemplate->m_pColDefs != NULL); // For each column in the table... for (ULONG ixCol = 0; ixCol < pTable->m_cCols; ++ixCol) diff --git a/src/coreclr/md/runtime/metamodelro.cpp b/src/coreclr/md/runtime/metamodelro.cpp index 4850db9833e6..2f3638c33195 100644 --- a/src/coreclr/md/runtime/metamodelro.cpp +++ b/src/coreclr/md/runtime/metamodelro.cpp @@ -65,7 +65,7 @@ CMiniMd::InitOnMem( // Uncompress the schema from the buffer into our structures. IfFailGo(SchemaPopulate(pvBuf, ulBufLen, &cbData)); - PREFAST_ASSUME(cbData <= ulBufLen); + _ASSERTE(cbData <= ulBufLen); // There shouldn't be any pointer tables. if ((m_Schema.m_cRecs[TBL_MethodPtr] != 0) || (m_Schema.m_cRecs[TBL_FieldPtr] != 0)) diff --git a/src/coreclr/md/staticmd/CMakeLists.txt b/src/coreclr/md/staticmd/CMakeLists.txt deleted file mode 100644 index 99612f824aba..000000000000 --- a/src/coreclr/md/staticmd/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -add_definitions(-DNO_COR) - -set(STATICMD_SOURCES - apis.cpp -) - -convert_to_absolute_path(STATICMD_SOURCES ${STATICMD_SOURCES}) - -add_definitions(-DFEATURE_METADATA_EMIT_ALL) -add_definitions(-DFEATURE_METADATA_EMIT) -add_definitions(-DFEATURE_METADATA_INTERNAL_APIS) - -add_library_clr(mdstaticapi ${STATICMD_SOURCES}) - -add_library_clr(mdstaticapi_ppdb ${STATICMD_SOURCES}) -target_compile_definitions(mdstaticapi_ppdb PRIVATE FEATURE_METADATA_EMIT_PORTABLE_PDB) \ No newline at end of file diff --git a/src/coreclr/md/staticmd/apis.cpp b/src/coreclr/md/staticmd/apis.cpp deleted file mode 100644 index 02459f54bb74..000000000000 --- a/src/coreclr/md/staticmd/apis.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "stdafx.h" - -#include // Utility helpers. -#include // Error handlers -#define INIT_GUIDS -#include -#include -#include -#include "shimload.h" -#include "metadataexports.h" -#include "ex.h" - -// --------------------------------------------------------------------------- -// %%Function: MetaDataGetDispenser -// This function gets the Dispenser interface given the CLSID and REFIID. -// --------------------------------------------------------------------------- -STDAPI DLLEXPORT MetaDataGetDispenser( // Return HRESULT - REFCLSID rclsid, // The class to desired. - REFIID riid, // Interface wanted on class factory. - LPVOID FAR *ppv) // Return interface pointer here. -{ - - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - ENTRY_POINT; - PRECONDITION(CheckPointer(ppv)); - } CONTRACTL_END; - - NonVMComHolder pcf(NULL); - HRESULT hr; - - IfFailGo(MetaDataDllGetClassObject(rclsid, IID_IClassFactory, (void **) &pcf)); - hr = pcf->CreateInstance(NULL, riid, ppv); - -ErrExit: - return (hr); -} - -// --------------------------------------------------------------------------- -// %%Function: GetMetaDataInternalInterface -// This function gets the IMDInternalImport given the metadata on memory. -// --------------------------------------------------------------------------- -STDAPI DLLEXPORT GetMetaDataInternalInterface( - LPVOID pData, // [IN] in memory metadata section - ULONG cbData, // [IN] size of the metadata section - DWORD flags, // [IN] MDInternal_OpenForRead or MDInternal_OpenForENC - REFIID riid, // [IN] desired interface - void **ppv) // [OUT] returned interface -{ - CONTRACTL{ - NOTHROW; - GC_NOTRIGGER; - ENTRY_POINT; - PRECONDITION(CheckPointer(pData)); - PRECONDITION(CheckPointer(ppv)); - } CONTRACTL_END; - - return GetMDInternalInterface(pData, cbData, flags, riid, ppv); -} - -// --------------------------------------------------------------------------- -// %%Function: GetMetaDataInternalInterfaceFromPublic -// This function gets the internal scopeless interface given the public -// scopeless interface. -// --------------------------------------------------------------------------- -STDAPI DLLEXPORT GetMetaDataInternalInterfaceFromPublic( - IUnknown *pv, // [IN] Given interface. - REFIID riid, // [IN] desired interface - void **ppv) // [OUT] returned interface -{ - CONTRACTL{ - NOTHROW; - GC_NOTRIGGER; - ENTRY_POINT; - PRECONDITION(CheckPointer(pv)); - PRECONDITION(CheckPointer(ppv)); - } CONTRACTL_END; - - return GetMDInternalInterfaceFromPublic(pv, riid, ppv); -} - -// --------------------------------------------------------------------------- -// %%Function: GetMetaDataPublicInterfaceFromInternal -// This function gets the public scopeless interface given the internal -// scopeless interface. -// --------------------------------------------------------------------------- -STDAPI DLLEXPORT GetMetaDataPublicInterfaceFromInternal( - void *pv, // [IN] Given interface. - REFIID riid, // [IN] desired interface. - void **ppv) // [OUT] returned interface -{ - CONTRACTL{ - NOTHROW; - GC_NOTRIGGER; - PRECONDITION(CheckPointer(pv)); - PRECONDITION(CheckPointer(ppv)); - ENTRY_POINT; - } CONTRACTL_END; - - return GetMDPublicInterfaceFromInternal(pv, riid, ppv); -} diff --git a/src/coreclr/md/staticmd/stdafx.h b/src/coreclr/md/staticmd/stdafx.h deleted file mode 100644 index 65812f46ffa6..000000000000 --- a/src/coreclr/md/staticmd/stdafx.h +++ /dev/null @@ -1,10 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include // Windows wrappers. - -#include // OLE definitions - - -#include "intrinsic.h" // Functions to make intrinsic. diff --git a/src/coreclr/minipal/Unix/CMakeLists.txt b/src/coreclr/minipal/Unix/CMakeLists.txt index aa4ea90d5e27..baa9c4116181 100644 --- a/src/coreclr/minipal/Unix/CMakeLists.txt +++ b/src/coreclr/minipal/Unix/CMakeLists.txt @@ -3,9 +3,14 @@ set(SOURCES dn-u16.cpp ) +add_library(coreclrminipal_objects + OBJECT + ${SOURCES} +) + add_library(coreclrminipal STATIC - ${SOURCES} + $ ) target_link_libraries(coreclrminipal PRIVATE minipal) diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp index b866da9f93e6..4a2516bea584 100644 --- a/src/coreclr/minipal/Unix/doublemapping.cpp +++ b/src/coreclr/minipal/Unix/doublemapping.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,11 @@ #include "minipal.h" #include "minipal/cpufeatures.h" +#ifndef TARGET_APPLE +#include +#include +#endif // TARGET_APPLE + #ifdef TARGET_APPLE #include @@ -253,3 +259,320 @@ bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size) { return munmap(pStart, size) != -1; } + +#ifndef TARGET_APPLE +#define MAX_TEMPLATE_THUNK_TYPES 3 // Maximum number of times the CreateTemplate api can be called +struct TemplateThunkMappingData +{ + int fdImage; + off_t offsetInFileOfStartOfSection; + void* addrOfStartOfSection; // Always NULL if the template mapping data could not be initialized + void* addrOfEndOfSection; + bool imageTemplates; + int templatesCreated; + off_t nonImageTemplateCurrent; +}; + +struct InitializeTemplateThunkLocals +{ + void* pTemplate; + Dl_info info; + TemplateThunkMappingData data; +}; + +static TemplateThunkMappingData *s_pThunkData = NULL; + +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE + +static Elf32_Word Elf32_WordMin(Elf32_Word left, Elf32_Word right) +{ + return left < right ? left : right; +} + +static int InitializeTemplateThunkMappingDataPhdrCallback(struct dl_phdr_info *info, size_t size, void *dataPtr) +{ + InitializeTemplateThunkLocals *locals = (InitializeTemplateThunkLocals*)dataPtr; + + if ((void*)info->dlpi_addr == locals->info.dli_fbase) + { + for (size_t j = 0; j < info->dlpi_phnum; j++) + { + uint8_t* baseSectionAddr = (uint8_t*)locals->info.dli_fbase + info->dlpi_phdr[j].p_vaddr; + if (locals->pTemplate < baseSectionAddr) + { + // Address is before the virtual address of this section begins + continue; + } + + // Since this is all in support of mapping code from the file, we need to ensure that the region we find + // is actually present in the file. + Elf32_Word sizeOfSectionWhichCanBeMapped = Elf32_WordMin(info->dlpi_phdr[j].p_filesz, info->dlpi_phdr[j].p_memsz); + + uint8_t* endAddressAllowedForTemplate = baseSectionAddr + sizeOfSectionWhichCanBeMapped; + if (locals->pTemplate >= endAddressAllowedForTemplate) + { + // Template is after the virtual address of this section ends (or the mappable region of the file) + continue; + } + + // At this point, we have found the template section. Attempt to open the file, and record the various offsets for future use + + if (strlen(info->dlpi_name) == 0) + { + // This image cannot be directly referenced without capturing the argv[0] parameter + return -1; + } + + int fdImage = open(info->dlpi_name, O_RDONLY); + if (fdImage == -1) + { + return -1; // Opening the image didn't work + } + + locals->data.fdImage = fdImage; + locals->data.offsetInFileOfStartOfSection = info->dlpi_phdr[j].p_offset; + locals->data.addrOfStartOfSection = baseSectionAddr; + locals->data.addrOfEndOfSection = baseSectionAddr + sizeOfSectionWhichCanBeMapped; + locals->data.imageTemplates = true; + return 1; // We have found the result. Abort further processing. + } + } + + // This isn't the interesting .so + return 0; +} +#endif // FEATURE_MAP_THUNKS_FROM_IMAGE + +TemplateThunkMappingData *InitializeTemplateThunkMappingData(void* pTemplate) +{ + InitializeTemplateThunkLocals locals; + locals.pTemplate = pTemplate; + locals.data.fdImage = 0; + locals.data.offsetInFileOfStartOfSection = 0; + locals.data.addrOfStartOfSection = NULL; + locals.data.addrOfEndOfSection = NULL; + locals.data.imageTemplates = false; + locals.data.nonImageTemplateCurrent = 0; + locals.data.templatesCreated = 0; + +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE + if (dladdr(pTemplate, &locals.info) != 0) + { + dl_iterate_phdr(InitializeTemplateThunkMappingDataPhdrCallback, &locals); + } +#endif // FEATURE_MAP_THUNKS_FROM_IMAGE + + if (locals.data.addrOfStartOfSection == NULL) + { + // This is the detail of thunk data which indicates if we were able to compute the template mapping data from the image. + +#ifdef TARGET_FREEBSD + int fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, S_IRWXU); +#elif defined(TARGET_LINUX) || defined(TARGET_ANDROID) + int fd = memfd_create("doublemapper-template", MFD_CLOEXEC); +#else + int fd = -1; + +#ifndef TARGET_ANDROID + // Bionic doesn't have shm_{open,unlink} + // POSIX fallback + if (fd == -1) + { + char name[24]; + sprintf(name, "/shm-dotnet-template-%d", getpid()); + name[sizeof(name) - 1] = '\0'; + shm_unlink(name); + fd = shm_open(name, O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, 0600); + shm_unlink(name); + } +#endif // !TARGET_ANDROID +#endif + if (fd != -1) + { + off_t maxFileSize = MAX_TEMPLATE_THUNK_TYPES * 0x10000; // The largest page size we support currently is 64KB. + if (ftruncate(fd, maxFileSize) == -1) // Reserve a decent size chunk of logical memory for these things. + { + close(fd); + } + else + { + locals.data.fdImage = fd; + locals.data.offsetInFileOfStartOfSection = 0; + // We simulate the template thunk mapping data existing in mapped ram, by declaring that it exists at at + // an address which is not NULL, and which is naturally aligned on the largest page size supported by any + // architecture we support (0x10000). We do this, as the generalized logic here is designed around remapping + // already mapped memory, and by doing this we are able to share that logic. + locals.data.addrOfStartOfSection = (void*)0x10000; + locals.data.addrOfEndOfSection = ((uint8_t*)locals.data.addrOfStartOfSection) + maxFileSize; + locals.data.imageTemplates = false; + } + } + } + + + TemplateThunkMappingData *pAllocatedData = (TemplateThunkMappingData*)malloc(sizeof(TemplateThunkMappingData)); + *pAllocatedData = locals.data; + TemplateThunkMappingData *pExpectedNull = NULL; + if (__atomic_compare_exchange_n (&s_pThunkData, &pExpectedNull, pAllocatedData, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) + { + return pAllocatedData; + } + else + { + free(pAllocatedData); + return __atomic_load_n(&s_pThunkData, __ATOMIC_ACQUIRE); + } +} +#endif + +bool VMToOSInterface::AllocateThunksFromTemplateRespectsStartAddress() +{ +#ifdef TARGET_APPLE + return false; +#else + return true; +#endif +} + +void* VMToOSInterface::CreateTemplate(void* pImageTemplate, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)) +{ +#ifdef TARGET_APPLE + return pImageTemplate; +#elif defined(TARGET_X86) + return NULL; // X86 doesn't support high performance relative addressing, which makes the template system not work +#else + if (pImageTemplate == NULL) + return NULL; + + TemplateThunkMappingData* pThunkData = __atomic_load_n(&s_pThunkData, __ATOMIC_ACQUIRE); + if (s_pThunkData == NULL) + { + pThunkData = InitializeTemplateThunkMappingData(pImageTemplate); + } + + // Unable to create template mapping region + if (pThunkData->addrOfStartOfSection == NULL) + { + return NULL; + } + + int templatesCreated = __atomic_add_fetch(&pThunkData->templatesCreated, 1, __ATOMIC_SEQ_CST); + assert(templatesCreated <= MAX_TEMPLATE_THUNK_TYPES); + + if (!pThunkData->imageTemplates) + { + // Need to allocate a memory mapped region to fill in the data + off_t locationInFileToStoreGeneratedCode = __atomic_fetch_add((off_t*)&pThunkData->nonImageTemplateCurrent, (off_t)templateSize, __ATOMIC_SEQ_CST); + void* mappedMemory = mmap(NULL, templateSize, PROT_READ | PROT_WRITE, MAP_SHARED, pThunkData->fdImage, locationInFileToStoreGeneratedCode); + if (mappedMemory != MAP_FAILED) + { + codePageGenerator((uint8_t*)mappedMemory, (uint8_t*)mappedMemory, templateSize); + munmap(mappedMemory, templateSize); + return ((uint8_t*)pThunkData->addrOfStartOfSection) + locationInFileToStoreGeneratedCode; + } + else + { + return NULL; + } + } + else + { + return pImageTemplate; + } +#endif +} + +void* VMToOSInterface::AllocateThunksFromTemplate(void* pTemplate, size_t templateSize, void* pStartSpecification) +{ +#ifdef TARGET_APPLE + vm_address_t addr, taddr; + vm_prot_t prot, max_prot; + kern_return_t ret; + + // Allocate two contiguous ranges of memory: the first range will contain the stubs + // and the second range will contain their data. + do + { + ret = vm_allocate(mach_task_self(), &addr, templateSize * 2, VM_FLAGS_ANYWHERE); + } while (ret == KERN_ABORTED); + + if (ret != KERN_SUCCESS) + { + return NULL; + } + + do + { + ret = vm_remap( + mach_task_self(), &addr, templateSize, 0, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, + mach_task_self(), (vm_address_t)pTemplate, FALSE, &prot, &max_prot, VM_INHERIT_SHARE); + } while (ret == KERN_ABORTED); + + if (ret != KERN_SUCCESS) + { + do + { + ret = vm_deallocate(mach_task_self(), addr, templateSize * 2); + } while (ret == KERN_ABORTED); + + return NULL; + } + return (void*)addr; +#else + TemplateThunkMappingData* pThunkData = __atomic_load_n(&s_pThunkData, __ATOMIC_ACQUIRE); + if (s_pThunkData == NULL) + { + pThunkData = InitializeTemplateThunkMappingData(pTemplate); + } + + if (pThunkData->addrOfStartOfSection == NULL) + { + // This is the detail of thunk data which indicates if we were able to compute the template mapping data + return NULL; + } + + if (pTemplate < pThunkData->addrOfStartOfSection) + { + return NULL; + } + + uint8_t* endOfTemplate = ((uint8_t*)pTemplate + templateSize); + if (endOfTemplate > pThunkData->addrOfEndOfSection) + return NULL; + + size_t sectionOffset = (uint8_t*)pTemplate - (uint8_t*)pThunkData->addrOfStartOfSection; + off_t fileOffset = pThunkData->offsetInFileOfStartOfSection + sectionOffset; + + void *pStart = mmap(pStartSpecification, templateSize * 2, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | (pStartSpecification != NULL ? MAP_FIXED : 0), -1, 0); + if (pStart == MAP_FAILED) + { + return NULL; + } + + void *pStartCode = mmap(pStart, templateSize, PROT_READ | PROT_EXEC, MAP_PRIVATE | MAP_FIXED, pThunkData->fdImage, fileOffset); + if (pStart != pStartCode) + { + munmap(pStart, templateSize * 2); + return NULL; + } + + return pStart; +#endif +} + +bool VMToOSInterface::FreeThunksFromTemplate(void* thunks, size_t templateSize) +{ +#ifdef TARGET_APPLE + kern_return_t ret; + + do + { + ret = vm_deallocate(mach_task_self(), (vm_address_t)thunks, templateSize * 2); + } while (ret == KERN_ABORTED); + + return ret == KERN_SUCCESS ? true : false; +#else + munmap(thunks, templateSize * 2); + return true; +#endif +} diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp index 9e8ddfed8e96..f5f25f2bec92 100644 --- a/src/coreclr/minipal/Windows/doublemapping.cpp +++ b/src/coreclr/minipal/Windows/doublemapping.cpp @@ -210,3 +210,23 @@ bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size) { return UnmapViewOfFile(pStart); } + +void* VMToOSInterface::CreateTemplate(void* pImageTemplate, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)) +{ + return NULL; +} + +bool VMToOSInterface::AllocateThunksFromTemplateRespectsStartAddress() +{ + return false; +} + +void* VMToOSInterface::AllocateThunksFromTemplate(void* pTemplate, size_t templateSize, void* pStart) +{ + return NULL; +} + +bool VMToOSInterface::FreeThunksFromTemplate(void* thunks, size_t templateSize) +{ + return false; +} diff --git a/src/coreclr/minipal/minipal.h b/src/coreclr/minipal/minipal.h index 38ab07ec63c5..01f497e60e6d 100644 --- a/src/coreclr/minipal/minipal.h +++ b/src/coreclr/minipal/minipal.h @@ -75,4 +75,52 @@ class VMToOSInterface // Return: // true if it succeeded, false if it failed static bool ReleaseRWMapping(void* pStart, size_t size); + + // Create a template for use by AllocateThunksFromTemplate + // Parameters: + // pImageTemplate - Address of start of template in the image for coreclr. (All addresses passed to the api in a process must be from the same module, if any call uses a pImageTemplate, all calls MUST) + // templateSize - Size of the template + // codePageGenerator - If the system is unable to use pImageTemplate, use this parameter to generate the code page instead + // + // Return: + // NULL if creating the template fails + // Non-NULL, a pointer to the template + static void* CreateTemplate(void* pImageTemplate, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)); + + // Indicate if the AllocateThunksFromTemplate function respects the pStart address passed to AllocateThunksFromTemplate on this platform + // Return: + // true if the parameter is respected, false if not + static bool AllocateThunksFromTemplateRespectsStartAddress(); + + // Allocate thunks from template + // Parameters: + // pTemplate - Value returned from CreateTemplate + // templateSize - Size of the templates block in the image + // pStart - Where to allocate (Specify NULL if no particular address is required). If non-null, this must be an address returned by ReserveDoubleMappedMemory + // + // Return: + // NULL if the allocation fails + // Non-NULL, a pointer to the allocated region. + static void* AllocateThunksFromTemplate(void* pTemplate, size_t templateSize, void* pStart); + + // Free thunks allocated from template + // Parameters: + // pThunks - Address previously returned by AllocateThunksFromTemplate + // templateSize - Size of the templates block in the image + // Return: + // true if it succeeded, false if it failed + static bool FreeThunksFromTemplate(void* thunks, size_t templateSize); }; + +#if defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) +EXTERN_C uint8_t _InterlockedCompareExchange128(int64_t volatile *, int64_t, int64_t, int64_t *); + +#if defined(HOST_WINDOWS) +#pragma intrinsic(_InterlockedCompareExchange128) +#endif + +FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult); +} +#endif // defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) \ No newline at end of file diff --git a/src/coreclr/nativeaot/Bootstrap/main.cpp b/src/coreclr/nativeaot/Bootstrap/main.cpp index db92ee1c5388..32cb98eb6e84 100644 --- a/src/coreclr/nativeaot/Bootstrap/main.cpp +++ b/src/coreclr/nativeaot/Bootstrap/main.cpp @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. #include +#include // // This is the mechanism whereby multiple linked modules contribute their global data for initialization at @@ -101,7 +102,7 @@ extern "C" bool RhRegisterOSModule(void * pModule, void * pvUnboxingStubsStartRange, uint32_t cbUnboxingStubsRange, void ** pClasslibFunctions, uint32_t nClasslibFunctions); -extern "C" void* PalGetModuleHandleFromPointer(void* pointer); +void* PalGetModuleHandleFromPointer(void* pointer); #if defined(HOST_X86) && defined(HOST_WINDOWS) #define STRINGIFY(s) #s @@ -256,7 +257,12 @@ int main(int argc, char* argv[]) if (initval != 0) return initval; +#if defined(DEBUG) && defined(_WIN32) + // quick_exit works around Debug UCRT shutdown issues: https://github.com/dotnet/runtime/issues/108640 + quick_exit(__managed__Main(argc, argv)); +#else return __managed__Main(argc, argv); +#endif } #ifdef HAS_ADDRESS_SANITIZER diff --git a/src/coreclr/nativeaot/BuildIntegration/BuildIntegration.proj b/src/coreclr/nativeaot/BuildIntegration/BuildIntegration.proj index 7ecbeb33addd..8d9375a9e164 100644 --- a/src/coreclr/nativeaot/BuildIntegration/BuildIntegration.proj +++ b/src/coreclr/nativeaot/BuildIntegration/BuildIntegration.proj @@ -1,16 +1,21 @@ +<<<<<<< HEAD false +======= + +>>>>>>> upstream-jun $(RuntimeBinDir)/build/ - + +<<<<<<< HEAD @@ -44,4 +49,6 @@ +======= +>>>>>>> upstream-jun diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets index a11ba22a977c..4e5b53a938b8 100644 --- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets +++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets @@ -54,7 +54,7 @@ The .NET Foundation licenses this file to you under the MIT license. - + @@ -64,9 +64,6 @@ The .NET Foundation licenses this file to you under the MIT license. $(CrossCompileArch)-alpine-linux-$(CrossCompileAbi) $(CrossCompileArch)-unknown-freebsd12 - $ORIGIN - @executable_path - @rpath/$(TargetName)$(NativeBinaryExt) libeventpipe-disabled @@ -222,7 +219,7 @@ The .NET Foundation licenses this file to you under the MIT license. - + @@ -234,8 +231,6 @@ The .NET Foundation licenses this file to you under the MIT license. - - @@ -262,9 +257,9 @@ The .NET Foundation licenses this file to you under the MIT license. see https://github.com/bminor/glibc/commit/99468ed45f5a58f584bab60364af937eb6f8afda --> - + - + @@ -356,7 +351,7 @@ The .NET Foundation licenses this file to you under the MIT license. - false true false + true @@ -196,7 +197,10 @@ The .NET Foundation licenses this file to you under the MIT license. - + + + + @@ -258,16 +262,17 @@ The .NET Foundation licenses this file to you under the MIT license. - <_IlcRootedAssemblies Include="@(_IlcRootedAssembliesRaw->'%(Filename)')" Condition="Exists('%(Identity)')" /> - <_IlcRootedAssemblies Include="@(_IlcRootedAssembliesRaw)" Condition="!Exists('%(Identity)')" /> - <_IlcConditionallyRootedAssemblies Include="@(_IlcConditionallyRootedAssembliesRaw->'%(Filename)')" Condition="Exists('%(Identity)')" /> - <_IlcConditionallyRootedAssemblies Include="@(_IlcConditionallyRootedAssembliesRaw)" Condition="!Exists('%(Identity)')" /> - <_IlcTrimmedAssemblies Include="@(_IlcTrimmedAssembliesRaw->'%(Filename)')" Condition="Exists('%(Identity)')" /> - <_IlcTrimmedAssemblies Include="@(_IlcTrimmedAssembliesRaw)" Condition="!Exists('%(Identity)')" /> - <_IlcSingleWarnAssemblies Include="@(_IlcSingleWarnAssembliesRaw->'%(Filename)')" Condition="Exists('%(Identity)')" /> - <_IlcSingleWarnAssemblies Include="@(_IlcSingleWarnAssembliesRaw)" Condition="!Exists('%(Identity)')" /> - <_IlcNoSingleWarnAssemblies Include="@(_IlcNoSingleWarnAssembliesRaw->'%(Filename)')" Condition="Exists('%(Identity)')" /> - <_IlcNoSingleWarnAssemblies Include="@(_IlcNoSingleWarnAssembliesRaw)" Condition="!Exists('%(Identity)')" /> + + <_IlcRootedAssemblies Include="@(_IlcRootedAssembliesRaw->'%(Filename)')" Condition="$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcRootedAssemblies Include="@(_IlcRootedAssembliesRaw)" Condition="!$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcConditionallyRootedAssemblies Include="@(_IlcConditionallyRootedAssembliesRaw->'%(Filename)')" Condition="$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcConditionallyRootedAssemblies Include="@(_IlcConditionallyRootedAssembliesRaw)" Condition="!$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcTrimmedAssemblies Include="@(_IlcTrimmedAssembliesRaw->'%(Filename)')" Condition="$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcTrimmedAssemblies Include="@(_IlcTrimmedAssembliesRaw)" Condition="!$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcSingleWarnAssemblies Include="@(_IlcSingleWarnAssembliesRaw->'%(Filename)')" Condition="$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcSingleWarnAssemblies Include="@(_IlcSingleWarnAssembliesRaw)" Condition="!$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcNoSingleWarnAssemblies Include="@(_IlcNoSingleWarnAssembliesRaw->'%(Filename)')" Condition="$([System.IO.File]::Exists('%(Identity)'))" /> + <_IlcNoSingleWarnAssemblies Include="@(_IlcNoSingleWarnAssembliesRaw)" Condition="!$([System.IO.File]::Exists('%(Identity)'))" /> @@ -467,7 +472,7 @@ The .NET Foundation licenses this file to you under the MIT license. - + @@ -492,7 +497,7 @@ The .NET Foundation licenses this file to you under the MIT license. - + - RhpNewFast|RhpNewFinalizable|RhpNewFastAlign8|RhpNewFastMisalign|RhpNewFinalizableAlign8|RhpNewArray|RhpNewArrayAlign8 + RhpNewFast|RhpNewFinalizable|RhpNewFastAlign8|RhpNewFastMisalign|RhpNewFinalizableAlign8|RhpNewArrayFast|RhpNewArrayFastAlign8 NoStepInto diff --git a/src/coreclr/nativeaot/BuildIntegration/findvcvarsall.bat b/src/coreclr/nativeaot/BuildIntegration/findvcvarsall.bat index 70294486de45..9524c42b7b13 100644 --- a/src/coreclr/nativeaot/BuildIntegration/findvcvarsall.bat +++ b/src/coreclr/nativeaot/BuildIntegration/findvcvarsall.bat @@ -15,7 +15,6 @@ IF /I "%~1"=="arm64" SET toolsSuffix=ARM64 FOR /F "tokens=*" %%i IN ( '"%vswherePath%" -latest -prerelease -products * ^ -requires Microsoft.VisualStudio.Component.VC.Tools.%toolsSuffix% ^ - -version [16^,18^) ^ -property installationPath' ) DO SET vsBase=%%i diff --git a/src/coreclr/nativeaot/CMakeLists.txt b/src/coreclr/nativeaot/CMakeLists.txt index c26e0d1ae2bf..036f5a43af8d 100644 --- a/src/coreclr/nativeaot/CMakeLists.txt +++ b/src/coreclr/nativeaot/CMakeLists.txt @@ -23,7 +23,7 @@ if(CLR_CMAKE_HOST_UNIX) endif(CLR_CMAKE_TARGET_APPLE) if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) - # Allow 16 byte compare-exchange + # Allow 16 byte compare-exchange (cmpxchg16b) add_compile_options(-mcx16) endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) endif (CLR_CMAKE_HOST_UNIX) diff --git a/src/coreclr/nativeaot/Common/src/Internal/Runtime/MethodTable.cs b/src/coreclr/nativeaot/Common/src/Internal/Runtime/MethodTable.cs index 2bc4c7661384..6aabd800f291 100644 --- a/src/coreclr/nativeaot/Common/src/Internal/Runtime/MethodTable.cs +++ b/src/coreclr/nativeaot/Common/src/Internal/Runtime/MethodTable.cs @@ -1327,7 +1327,7 @@ internal readonly struct RelativePointer { private readonly int _value; - public unsafe IntPtr Value => (IntPtr)((byte*)Unsafe.AsPointer(ref Unsafe.AsRef(in _value)) + _value); + public unsafe IntPtr Value => (IntPtr)((byte*)Unsafe.AsPointer(in _value) + _value); } // Wrapper around relative pointers @@ -1336,7 +1336,7 @@ internal readonly struct RelativePointer { private readonly int _value; - public T* Value => (T*)((byte*)Unsafe.AsPointer(ref Unsafe.AsRef(in _value)) + _value); + public T* Value => (T*)((byte*)Unsafe.AsPointer(in _value) + _value); } // Abstracts a list of MethodTable pointers that could either be relative diff --git a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs index 7932ba9300dc..a071f8f3af77 100644 --- a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs +++ b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs @@ -312,8 +312,6 @@ internal struct ReturnBlock { private IntPtr returnValue; private IntPtr returnValue2; - private IntPtr returnValue3; - private IntPtr returnValue4; } [StructLayout(LayoutKind.Sequential)] @@ -353,8 +351,8 @@ internal struct ArchitectureConstants public const int NUM_ARGUMENT_REGISTERS = 8; public const int ARGUMENTREGISTERS_SIZE = NUM_ARGUMENT_REGISTERS * 8; - public const int ENREGISTERED_RETURNTYPE_MAXSIZE = 32; // bytes (four FP registers: d0,d1,d2 and d3) - public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE = 16; // bytes (two int registers: x0 and x1) + public const int ENREGISTERED_RETURNTYPE_MAXSIZE = 16; // bytes (two FP registers: f0, f1) + public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE = 16; // bytes (two int registers: a0, a1) public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE_PRIMITIVE = 8; public const int ENREGISTERED_PARAMTYPE_MAXSIZE = 16; // bytes (max value type size that can be passed by value) public const int STACK_ELEM_SIZE = 8; @@ -366,8 +364,6 @@ internal struct ReturnBlock { private IntPtr returnValue; private IntPtr returnValue2; - private IntPtr returnValue3; - private IntPtr returnValue4; } [StructLayout(LayoutKind.Sequential)] diff --git a/src/coreclr/nativeaot/Runtime.Base/src/RhBaseName.cs b/src/coreclr/nativeaot/Runtime.Base/src/RhBaseName.cs deleted file mode 100644 index 2c2189c7d8bf..000000000000 --- a/src/coreclr/nativeaot/Runtime.Base/src/RhBaseName.cs +++ /dev/null @@ -1,4 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -internal static class Redhawk { public const string BaseName = "*"; } diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CompilerServices/Unsafe.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CompilerServices/Unsafe.cs index 7f95b9bd401f..8b941d4feedb 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CompilerServices/Unsafe.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CompilerServices/Unsafe.cs @@ -36,7 +36,7 @@ public static IntPtr ByteOffset(ref readonly T origin, ref readonly T target) ///
[Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void* AsPointer(ref T value) + public static void* AsPointer(ref readonly T value) { throw new PlatformNotSupportedException(); diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/ExceptionHandling.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/ExceptionHandling.cs index 2fa56bb80ea2..75e8b469d6f6 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/ExceptionHandling.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/ExceptionHandling.cs @@ -444,9 +444,9 @@ public static Exception GetRuntimeException(ExceptionIDs id) private enum HwExceptionCode : uint { - STATUS_REDHAWK_NULL_REFERENCE = 0x00000000u, - STATUS_REDHAWK_UNMANAGED_HELPER_NULL_REFERENCE = 0x00000042u, - STATUS_REDHAWK_THREAD_ABORT = 0x00000043u, + STATUS_NATIVEAOT_NULL_REFERENCE = 0x00000000u, + STATUS_NATIVEAOT_UNMANAGED_HELPER_NULL_REFERENCE = 0x00000042u, + STATUS_NATIVEAOT_THREAD_ABORT = 0x00000043u, STATUS_DATATYPE_MISALIGNMENT = 0x80000002u, STATUS_ACCESS_VIOLATION = 0xC0000005u, @@ -574,11 +574,11 @@ public static void RhThrowHwEx(uint exceptionCode, ref ExInfo exInfo) switch (exceptionCode) { - case (uint)HwExceptionCode.STATUS_REDHAWK_NULL_REFERENCE: + case (uint)HwExceptionCode.STATUS_NATIVEAOT_NULL_REFERENCE: exceptionId = ExceptionIDs.NullReference; break; - case (uint)HwExceptionCode.STATUS_REDHAWK_UNMANAGED_HELPER_NULL_REFERENCE: + case (uint)HwExceptionCode.STATUS_NATIVEAOT_UNMANAGED_HELPER_NULL_REFERENCE: // The write barrier where the actual fault happened has been unwound already. // The IP of this fault needs to be treated as return address, not as IP of // faulting instruction. @@ -587,7 +587,7 @@ public static void RhThrowHwEx(uint exceptionCode, ref ExInfo exInfo) break; #if NATIVEAOT - case (uint)HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT: + case (uint)HwExceptionCode.STATUS_NATIVEAOT_THREAD_ABORT: exceptionToThrow = InternalCalls.RhpGetThreadAbortException(); break; #endif @@ -597,7 +597,7 @@ public static void RhThrowHwEx(uint exceptionCode, ref ExInfo exInfo) break; // N.B. -- AVs that have a read/write address lower than 64k are already transformed to - // HwExceptionCode.REDHAWK_NULL_REFERENCE prior to calling this routine. + // HwExceptionCode.STATUS_NATIVEAOT_NULL_REFERENCE prior to calling this routine. case (uint)HwExceptionCode.STATUS_ACCESS_VIOLATION: exceptionId = ExceptionIDs.AccessViolation; break; @@ -649,11 +649,18 @@ public static void RhThrowHwEx(uint exceptionCode, ref ExInfo exInfo) public static void RhThrowEx(object exceptionObj, ref ExInfo exInfo) { #if NATIVEAOT + +#if TARGET_WINDOWS + // Alert the debugger that we threw an exception. + InternalCalls.RhpFirstChanceExceptionNotification(); +#endif // TARGET_WINDOWS + // trigger a GC (only if gcstress) to ensure we can stackwalk at this point GCStress.TriggerGC(); InternalCalls.RhpValidateExInfoStack(); -#endif +#endif // NATIVEAOT + // Transform attempted throws of null to a throw of NullReferenceException. if (exceptionObj == null) { @@ -665,6 +672,7 @@ public static void RhThrowEx(object exceptionObj, ref ExInfo exInfo) DispatchEx(ref exInfo._frameIter, ref exInfo); FallbackFailFast(RhFailFastReason.InternalError, null); } + #if !NATIVEAOT public static void RhUnwindAndIntercept(ref ExInfo exInfo, UIntPtr interceptStackFrameSP) { @@ -731,11 +739,18 @@ public static void RhUnwindAndIntercept(ref ExInfo exInfo, UIntPtr interceptStac public static void RhRethrow(ref ExInfo activeExInfo, ref ExInfo exInfo) { #if NATIVEAOT + +#if TARGET_WINDOWS + // Alert the debugger that we threw an exception. + InternalCalls.RhpFirstChanceExceptionNotification(); +#endif // TARGET_WINDOWS + // trigger a GC (only if gcstress) to ensure we can stackwalk at this point GCStress.TriggerGC(); InternalCalls.RhpValidateExInfoStack(); -#endif +#endif // NATIVEAOT + // We need to copy the exception object to this stack location because collided unwinds // will cause the original stack location to go dead. object rethrownException = activeExInfo.ThrownException; diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/GCStress.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/GCStress.cs index 3478cf335330..70ddce2a3548 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/GCStress.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/GCStress.cs @@ -32,7 +32,7 @@ public static void Initialize() // drop the first element Head = Head.Next; - // notify redhawku.dll + // notify the runtime InternalCalls.RhpInitializeGcStress(); #endif // FEATURE_GC_STRESS } diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs index fb65014ce385..e406f0424f67 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs @@ -58,6 +58,8 @@ internal static partial class InternalCalls // internalcalls for System.GC. // + private const string RuntimeLibrary = "*"; + // Force a garbage collection. [RuntimeExport("RhCollect")] internal static void RhCollect(int generation, InternalGCCollectionMode mode, bool lowMemoryP = false) @@ -65,7 +67,7 @@ internal static void RhCollect(int generation, InternalGCCollectionMode mode, bo RhpCollect(generation, mode, lowMemoryP ? Interop.BOOL.TRUE : Interop.BOOL.FALSE); } - [DllImport(Redhawk.BaseName)] + [DllImport(RuntimeLibrary)] private static extern void RhpCollect(int generation, InternalGCCollectionMode mode, Interop.BOOL lowMemoryP); [RuntimeExport("RhGetGcTotalMemory")] @@ -74,7 +76,7 @@ internal static long RhGetGcTotalMemory() return RhpGetGcTotalMemory(); } - [DllImport(Redhawk.BaseName)] + [DllImport(RuntimeLibrary)] private static extern long RhpGetGcTotalMemory(); [RuntimeExport("RhStartNoGCRegion")] @@ -94,7 +96,7 @@ internal static int RhEndNoGCRegion() // // Fetch next object which needs finalization or return null if we've reached the end of the list. - [RuntimeImport(Redhawk.BaseName, "RhpGetNextFinalizableObject")] + [RuntimeImport(RuntimeLibrary, "RhpGetNextFinalizableObject")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern object RhpGetNextFinalizableObject(); @@ -103,99 +105,103 @@ internal static int RhEndNoGCRegion() // // Allocate handle. - [RuntimeImport(Redhawk.BaseName, "RhpHandleAlloc")] + [RuntimeImport(RuntimeLibrary, "RhpHandleAlloc")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern IntPtr RhpHandleAlloc(object value, GCHandleType type); - [RuntimeImport(Redhawk.BaseName, "RhHandleGet")] + [RuntimeImport(RuntimeLibrary, "RhHandleGet")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern object RhHandleGet(IntPtr handle); - [RuntimeImport(Redhawk.BaseName, "RhHandleSet")] + [RuntimeImport(RuntimeLibrary, "RhHandleSet")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern IntPtr RhHandleSet(IntPtr handle, object value); // // internal calls for allocation // - [RuntimeImport(Redhawk.BaseName, "RhpNewFast")] + [RuntimeImport(RuntimeLibrary, "RhpNewFast")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe object RhpNewFast(MethodTable* pEEType); // BEWARE: not for finalizable objects! - [RuntimeImport(Redhawk.BaseName, "RhpNewFinalizable")] + [RuntimeImport(RuntimeLibrary, "RhpNewFinalizable")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe object RhpNewFinalizable(MethodTable* pEEType); - [RuntimeImport(Redhawk.BaseName, "RhpNewArray")] + [RuntimeImport(RuntimeLibrary, "RhpNewArrayFast")] [MethodImpl(MethodImplOptions.InternalCall)] - internal static extern unsafe object RhpNewArray(MethodTable* pEEType, int length); + internal static extern unsafe object RhpNewArrayFast(MethodTable* pEEType, int length); #if FEATURE_64BIT_ALIGNMENT - [RuntimeImport(Redhawk.BaseName, "RhpNewFastAlign8")] + [RuntimeImport(RuntimeLibrary, "RhpNewFastAlign8")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe object RhpNewFastAlign8(MethodTable * pEEType); // BEWARE: not for finalizable objects! - [RuntimeImport(Redhawk.BaseName, "RhpNewFinalizableAlign8")] + [RuntimeImport(RuntimeLibrary, "RhpNewFinalizableAlign8")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe object RhpNewFinalizableAlign8(MethodTable* pEEType); - [RuntimeImport(Redhawk.BaseName, "RhpNewArrayAlign8")] + [RuntimeImport(RuntimeLibrary, "RhpNewArrayFastAlign8")] [MethodImpl(MethodImplOptions.InternalCall)] - internal static extern unsafe object RhpNewArrayAlign8(MethodTable* pEEType, int length); + internal static extern unsafe object RhpNewArrayFastAlign8(MethodTable* pEEType, int length); - [RuntimeImport(Redhawk.BaseName, "RhpNewFastMisalign")] + [RuntimeImport(RuntimeLibrary, "RhpNewFastMisalign")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe object RhpNewFastMisalign(MethodTable * pEEType); #endif // FEATURE_64BIT_ALIGNMENT +<<<<<<< HEAD #if !TARGET_WASM [RuntimeImport(Redhawk.BaseName, "RhpAssignRef")] +======= + [RuntimeImport(RuntimeLibrary, "RhpAssignRef")] +>>>>>>> upstream-jun [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe void RhpAssignRef(ref object? address, object? obj); #endif [MethodImplAttribute(MethodImplOptions.InternalCall)] - [RuntimeImport(Redhawk.BaseName, "RhpGcSafeZeroMemory")] + [RuntimeImport(RuntimeLibrary, "RhpGcSafeZeroMemory")] internal static extern unsafe ref byte RhpGcSafeZeroMemory(ref byte dmem, nuint size); [MethodImplAttribute(MethodImplOptions.InternalCall)] - [RuntimeImport(Redhawk.BaseName, "RhBulkMoveWithWriteBarrier")] + [RuntimeImport(RuntimeLibrary, "RhBulkMoveWithWriteBarrier")] internal static extern unsafe void RhBulkMoveWithWriteBarrier(ref byte dmem, ref byte smem, nuint size); #if FEATURE_GC_STRESS // // internal calls for GC stress // - [RuntimeImport(Redhawk.BaseName, "RhpInitializeGcStress")] + [RuntimeImport(RuntimeLibrary, "RhpInitializeGcStress")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe void RhpInitializeGcStress(); #endif // FEATURE_GC_STRESS - [RuntimeImport(Redhawk.BaseName, "RhpEHEnumInitFromStackFrameIterator")] + [RuntimeImport(RuntimeLibrary, "RhpEHEnumInitFromStackFrameIterator")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe bool RhpEHEnumInitFromStackFrameIterator(ref StackFrameIterator pFrameIter, out EH.MethodRegionInfo pMethodRegionInfo, void* pEHEnum); - [RuntimeImport(Redhawk.BaseName, "RhpEHEnumNext")] + [RuntimeImport(RuntimeLibrary, "RhpEHEnumNext")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe bool RhpEHEnumNext(void* pEHEnum, void* pEHClause); - [RuntimeImport(Redhawk.BaseName, "RhpGetDispatchCellInfo")] + [RuntimeImport(RuntimeLibrary, "RhpGetDispatchCellInfo")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe void RhpGetDispatchCellInfo(IntPtr pCell, out DispatchCellInfo newCellInfo); - [RuntimeImport(Redhawk.BaseName, "RhpSearchDispatchCellCache")] + [RuntimeImport(RuntimeLibrary, "RhpSearchDispatchCellCache")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe IntPtr RhpSearchDispatchCellCache(IntPtr pCell, MethodTable* pInstanceType); - [RuntimeImport(Redhawk.BaseName, "RhpUpdateDispatchCellCache")] + [RuntimeImport(RuntimeLibrary, "RhpUpdateDispatchCellCache")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe IntPtr RhpUpdateDispatchCellCache(IntPtr pCell, IntPtr pTargetCode, MethodTable* pInstanceType, ref DispatchCellInfo newCellInfo); - [RuntimeImport(Redhawk.BaseName, "RhpGetClasslibFunctionFromCodeAddress")] + [RuntimeImport(RuntimeLibrary, "RhpGetClasslibFunctionFromCodeAddress")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe void* RhpGetClasslibFunctionFromCodeAddress(IntPtr address, ClassLibFunctionId id); - [RuntimeImport(Redhawk.BaseName, "RhpGetClasslibFunctionFromEEType")] + [RuntimeImport(RuntimeLibrary, "RhpGetClasslibFunctionFromEEType")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe void* RhpGetClasslibFunctionFromEEType(MethodTable* pEEType, ClassLibFunctionId id); @@ -203,11 +209,11 @@ internal static int RhEndNoGCRegion() // StackFrameIterator // - [RuntimeImport(Redhawk.BaseName, "RhpSfiInit")] + [RuntimeImport(RuntimeLibrary, "RhpSfiInit")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe bool RhpSfiInit(ref StackFrameIterator pThis, void* pStackwalkCtx, bool instructionFault, bool* fIsExceptionIntercepted); - [RuntimeImport(Redhawk.BaseName, "RhpSfiNext")] + [RuntimeImport(RuntimeLibrary, "RhpSfiNext")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe bool RhpSfiNext(ref StackFrameIterator pThis, uint* uExCollideClauseIdx, bool* fUnwoundReversePInvoke, bool* fIsExceptionIntercepted); @@ -215,55 +221,65 @@ internal static int RhEndNoGCRegion() // Miscellaneous helpers. // - [RuntimeImport(Redhawk.BaseName, "RhpCallCatchFunclet")] + [RuntimeImport(RuntimeLibrary, "RhpCallCatchFunclet")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe IntPtr RhpCallCatchFunclet( object exceptionObj, byte* pHandlerIP, void* pvRegDisplay, ref EH.ExInfo exInfo); - [RuntimeImport(Redhawk.BaseName, "RhpCallFinallyFunclet")] + [RuntimeImport(RuntimeLibrary, "RhpCallFinallyFunclet")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe void RhpCallFinallyFunclet(byte* pHandlerIP, void* pvRegDisplay); - [RuntimeImport(Redhawk.BaseName, "RhpCallFilterFunclet")] + [RuntimeImport(RuntimeLibrary, "RhpCallFilterFunclet")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe bool RhpCallFilterFunclet( object exceptionObj, byte* pFilterIP, void* pvRegDisplay); #if FEATURE_OBJCMARSHAL - [RuntimeImport(Redhawk.BaseName, "RhpCallPropagateExceptionCallback")] + [RuntimeImport(RuntimeLibrary, "RhpCallPropagateExceptionCallback")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe IntPtr RhpCallPropagateExceptionCallback( IntPtr callbackContext, IntPtr callback, void* pvRegDisplay, ref EH.ExInfo exInfo, IntPtr pPreviousTransitionFrame); #endif // FEATURE_OBJCMARSHAL - [RuntimeImport(Redhawk.BaseName, "RhpFallbackFailFast")] + [RuntimeImport(RuntimeLibrary, "RhpFallbackFailFast")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe void RhpFallbackFailFast(); +<<<<<<< HEAD [RuntimeImport(Redhawk.BaseName, "RhpClearThreadDoNotTriggerGC")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern void RhpClearThreadDoNotTriggerGC(); [RuntimeImport(Redhawk.BaseName, "RhpSetThreadDoNotTriggerGC")] +======= + [RuntimeImport(RuntimeLibrary, "RhpSetThreadDoNotTriggerGC")] +>>>>>>> upstream-jun [MethodImpl(MethodImplOptions.InternalCall)] internal static extern void RhpSetThreadDoNotTriggerGC(); [System.Diagnostics.Conditional("DEBUG")] - [RuntimeImport(Redhawk.BaseName, "RhpValidateExInfoStack")] + [RuntimeImport(RuntimeLibrary, "RhpValidateExInfoStack")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern void RhpValidateExInfoStack(); #if TARGET_WINDOWS - [RuntimeImport(Redhawk.BaseName, "RhpCopyContextFromExInfo")] + [RuntimeImport(RuntimeLibrary, "RhpFirstChanceExceptionNotification")] + [MethodImpl(MethodImplOptions.InternalCall)] + internal static extern void RhpFirstChanceExceptionNotification(); +#endif + +#if TARGET_WINDOWS + [RuntimeImport(RuntimeLibrary, "RhpCopyContextFromExInfo")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe void RhpCopyContextFromExInfo(void* pOSContext, int cbOSContext, EH.PAL_LIMITED_CONTEXT* pPalContext); #endif - [RuntimeImport(Redhawk.BaseName, "RhpGetThreadAbortException")] + [RuntimeImport(RuntimeLibrary, "RhpGetThreadAbortException")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern Exception RhpGetThreadAbortException(); - [RuntimeImport(Redhawk.BaseName, "RhCurrentNativeThreadId")] + [RuntimeImport(RuntimeLibrary, "RhCurrentNativeThreadId")] [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe IntPtr RhCurrentNativeThreadId(); @@ -279,24 +295,21 @@ internal static extern unsafe IntPtr RhpCallPropagateExceptionCallback( // Block the current thread until at least one object needs to be finalized (returns true) or // memory is low (returns false and the finalizer thread should initiate a garbage collection). - [DllImport(Redhawk.BaseName)] + [DllImport(RuntimeLibrary)] internal static extern uint RhpWaitForFinalizerRequest(); // Indicate that the current round of finalizations is complete. - [DllImport(Redhawk.BaseName)] + [DllImport(RuntimeLibrary)] internal static extern void RhpSignalFinalizationComplete(uint fCount, int observedFullGcCount); - [DllImport(Redhawk.BaseName)] - internal static extern ulong RhpGetTickCount64(); - // Enters a no GC region, possibly doing a blocking GC if there is not enough // memory available to satisfy the caller's request. - [DllImport(Redhawk.BaseName)] + [DllImport(RuntimeLibrary)] internal static extern int RhpStartNoGCRegion(long totalSize, Interop.BOOL hasLohSize, long lohSize, Interop.BOOL disallowFullBlockingGC); // Exits a no GC region, possibly doing a GC to clean up the garbage that // the caller allocated. - [DllImport(Redhawk.BaseName)] + [DllImport(RuntimeLibrary)] internal static extern int RhpEndNoGCRegion(); } } diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InteropServices/OutAttribute.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InteropServices/OutAttribute.cs index 97838bea3476..5b89da9ac109 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InteropServices/OutAttribute.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InteropServices/OutAttribute.cs @@ -3,7 +3,7 @@ namespace System.Runtime.InteropServices { - // Not used in Redhawk. Only here as C# compiler requires it + // Only here as C# compiler requires it [AttributeUsage(AttributeTargets.Parameter, Inherited = false)] internal sealed class OutAttribute : Attribute { diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs index d6e058b9c302..949b9b15ac01 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs @@ -57,20 +57,44 @@ public static unsafe object RhNewObject(MethodTable* pEEType) [RuntimeExport("RhNewArray")] public static unsafe object RhNewArray(MethodTable* pEEType, int length) + { + Debug.Assert(pEEType->IsSzArray); + +#if FEATURE_64BIT_ALIGNMENT + MethodTable* pEEElementType = pEEType->RelatedParameterType; + if (pEEElementType->IsValueType && pEEElementType->RequiresAlign8) + { + return InternalCalls.RhpNewArrayFastAlign8(pEEType, length); + } + else +#endif // FEATURE_64BIT_ALIGNMENT + { + return InternalCalls.RhpNewArrayFast(pEEType, length); + } + } + + [RuntimeExport("RhNewVariableSizeObject")] + public static unsafe object RhNewVariableSizeObject(MethodTable* pEEType, int length) { Debug.Assert(pEEType->IsArray || pEEType->IsString); + object array; #if FEATURE_64BIT_ALIGNMENT MethodTable* pEEElementType = pEEType->RelatedParameterType; if (pEEElementType->IsValueType && pEEElementType->RequiresAlign8) { - return InternalCalls.RhpNewArrayAlign8(pEEType, length); + RuntimeImports.RhAllocateNewArray(pEEType, (uint)length, (uint)GC_ALLOC_FLAGS.GC_ALLOC_ALIGN8, &array); } else #endif // FEATURE_64BIT_ALIGNMENT { - return InternalCalls.RhpNewArray(pEEType, length); + RuntimeImports.RhAllocateNewArray(pEEType, (uint)length, (uint)GC_ALLOC_FLAGS.GC_ALLOC_NO_FLAGS, &array); } + + if (array == null) + throw new OutOfMemoryException(); + + return array; } public static unsafe object RhBox(MethodTable* pEEType, ref byte data) @@ -294,7 +318,7 @@ public static unsafe int RhGetCurrentThreadStackTrace(IntPtr[] outputBuffer) } #pragma warning disable SYSLIB1054 // Use DllImport here instead of LibraryImport because this file is used by Test.CoreLib. - [DllImport(Redhawk.BaseName)] + [DllImport("*")] private static extern unsafe int RhpGetCurrentThreadStackTrace(IntPtr* pOutputBuffer, uint outputBufferLength, UIntPtr addressInCurrentFrame); #pragma warning restore SYSLIB1054 @@ -382,10 +406,10 @@ internal static unsafe IntPtr RhGetRuntimeHelperForType(MethodTable* pEEType, Ru #if FEATURE_64BIT_ALIGNMENT MethodTable* pEEElementType = pEEType->RelatedParameterType; if (pEEElementType->IsValueType && pEEElementType->RequiresAlign8) - return (IntPtr)(delegate*)&InternalCalls.RhpNewArrayAlign8; + return (IntPtr)(delegate*)&InternalCalls.RhpNewArrayFastAlign8; #endif // FEATURE_64BIT_ALIGNMENT - return (IntPtr)(delegate*)&InternalCalls.RhpNewArray; + return (IntPtr)(delegate*)&InternalCalls.RhpNewArrayFast; default: Debug.Fail("Unknown RuntimeHelperKind"); diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/StackFrameIterator.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/StackFrameIterator.cs index f758b16b5d22..e89eb88a3f0b 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/StackFrameIterator.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/StackFrameIterator.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.InteropServices; #if !NATIVEAOT using System.Runtime.ExceptionServices; @@ -60,6 +61,7 @@ internal unsafe struct StackFrameIterator #pragma warning restore CA1822 #endif // NATIVEAOT + [StackTraceHidden] internal bool Init(EH.PAL_LIMITED_CONTEXT* pStackwalkCtx, bool instructionFault = false, bool* fIsExceptionIntercepted = null) { return InternalCalls.RhpSfiInit(ref this, pStackwalkCtx, instructionFault, fIsExceptionIntercepted); diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs index 8425f31f16cc..bc5abe5dc63d 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs @@ -789,27 +789,12 @@ public static unsafe void StelemRef(object?[] array, nint index, object? obj) // This is supported only on arrays Debug.Assert(array is null || array.GetMethodTable()->IsArray, "first argument must be an array"); -#if INPLACE_RUNTIME // This will throw NullReferenceException if obj is null. if ((nuint)index >= (uint)array.Length) ThrowIndexOutOfRangeException(array); Debug.Assert(index >= 0); ref object? element = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index); -#else - if (array is null) - { - // TODO: If both array and obj are null, we're likely going to throw Redhawk's NullReferenceException. - // This should blame the caller. - throw obj.GetMethodTable()->GetClasslibException(ExceptionIDs.NullReference); - } - if ((uint)index >= (uint)array.Length) - { - throw array.GetMethodTable()->GetClasslibException(ExceptionIDs.IndexOutOfRange); - } - ref object rawData = ref Unsafe.As(ref Unsafe.As(array).Data); - ref object element = ref Unsafe.Add(ref rawData, index); -#endif MethodTable* elementType = array.GetMethodTable()->RelatedParameterType; diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/__Finalizer.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/__Finalizer.cs index dce232600ff1..2053b5b34620 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/__Finalizer.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/__Finalizer.cs @@ -7,7 +7,7 @@ using System.Runtime.InteropServices; // -// Implements the single finalizer thread for a Redhawk instance. Essentially waits for an event to fire +// Implements the single finalizer thread for a NativeAOT instance. Essentially waits for an event to fire // indicating finalization is necessary then drains the queue of pending finalizable objects, calling the // finalize method for each one. // @@ -20,10 +20,6 @@ internal static class __Finalizer [UnmanagedCallersOnly(EntryPoint = "ProcessFinalizers")] public static void ProcessFinalizers() { -#if INPLACE_RUNTIME - System.Runtime.FinalizerInitRunner.DoInitialize(); -#endif - while (true) { // Wait until there's some work to be done. If true is returned we should finalize objects, diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/RuntimeTypeHandle.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/RuntimeTypeHandle.cs index 8ecb99bc49e5..f216c6c3330c 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/RuntimeTypeHandle.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/RuntimeTypeHandle.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // System.Type and System.RuntimeTypeHandle are defined here as the C# compiler requires them -// In the redhawk runtime these are not used. In the class library there is an implementation that support typeof using System; using System.Runtime.CompilerServices; diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 0284daba94d5..616c4847235e 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -32,13 +32,15 @@ ASM_OFFSET( 4, 8, String, m_Length) ASM_OFFSET( 8, C, String, m_FirstChar) ASM_CONST( 2, 2, STRING_COMPONENT_SIZE) ASM_CONST( E, 16, STRING_BASE_SIZE) +ASM_CONST( C, 18, SZARRAY_BASE_SIZE) +ASM_CONST( C, 18, MIN_OBJECT_SIZE) ASM_CONST(3FFFFFDF,3FFFFFDF,MAX_STRING_LENGTH) #if defined(HOST_ARM64) // Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions -// ARM64IntrinsicConstants_Atomics = 0x0080 -ASM_CONST( 7, 7, ARM64_ATOMICS_FEATURE_FLAG_BIT) +// ARM64IntrinsicConstants_Atomics = 0x0040 +ASM_CONST( 6, 6, ARM64_ATOMICS_FEATURE_FLAG_BIT) #endif ASM_OFFSET( 0, 0, MethodTable, m_usComponentSize) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp index a0ca5a4c0081..a0e4356830fb 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp +++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -4,9 +4,7 @@ #include "gcenv.h" #include "gcheaputilities.h" #include "rhassert.h" -#include "RedhawkWarnings.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index f9b16ef516ac..db9f042c06f8 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -1,9 +1,11 @@ set(GC_DIR ../../gc) +set(RUNTIME_DIR ../../runtime) set(COMMON_RUNTIME_SOURCES allocheap.cpp rhassert.cpp - CachedInterfaceDispatch.cpp + ${RUNTIME_DIR}/CachedInterfaceDispatch.cpp + CachedInterfaceDispatch_Aot.cpp Crst.cpp DebugHeader.cpp MethodTable.cpp @@ -76,6 +78,7 @@ include_directories(.) include_directories(${GC_DIR}) include_directories(${GC_DIR}/env) include_directories(${CMAKE_CURRENT_BINARY_DIR}/eventpipe/inc) +include_directories(${RUNTIME_DIR}) if (WIN32) set(GC_HEADERS @@ -117,8 +120,8 @@ if (WIN32) include_directories(windows) list(APPEND COMMON_RUNTIME_SOURCES - windows/PalRedhawkCommon.cpp - windows/PalRedhawkMinWin.cpp + windows/PalCommon.cpp + windows/PalMinWin.cpp ${GC_DIR}/windows/gcenv.windows.cpp ) @@ -148,7 +151,7 @@ else() endif() list(APPEND COMMON_RUNTIME_SOURCES - unix/PalRedhawkUnix.cpp + unix/PalUnix.cpp unix/PalCreateDump.cpp ${GC_DIR}/unix/gcenv.unix.cpp ${GC_DIR}/unix/numasupport.cpp @@ -230,17 +233,23 @@ if (CLR_CMAKE_TARGET_ARCH_WASM) endif (CLR_CMAKE_TARGET_ARCH_WASM) list(APPEND RUNTIME_SOURCES_ARCH_ASM - ${ARCH_SOURCES_DIR}/AllocFast.${ASM_SUFFIX} + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/ExceptionHandling.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/GcProbe.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/MiscStubs.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/PInvoke.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/InteropThunksHelpers.${ASM_SUFFIX} - ${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/UniversalTransition.${ASM_SUFFIX} - ${ARCH_SOURCES_DIR}/WriteBarriers.${ASM_SUFFIX} + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.${ASM_SUFFIX} ) +if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + list(APPEND RUNTIME_SOURCES_ARCH_ASM + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchAot.${ASM_SUFFIX} + ) +endif () + # Add architecture specific folder for looking up headers. convert_to_absolute_path(ARCH_SOURCES_DIR ${ARCH_SOURCES_DIR}) include_directories(${ARCH_SOURCES_DIR}) @@ -264,7 +273,7 @@ endif() add_definitions(-DFEATURE_BASICFREEZE) add_definitions(-DFEATURE_CONSERVATIVE_GC) -if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) +if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_RISCV64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES) endif() @@ -317,7 +326,7 @@ if (CLR_CMAKE_TARGET_UNIX) endif(CLR_CMAKE_TARGET_UNIX) -set(RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(NATIVEAOT_RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}) list(APPEND COMMON_RUNTIME_SOURCES ${GC_HEADERS}) diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.h b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.h deleted file mode 100644 index ea0f7841164b..000000000000 --- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.h +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// ==--== -// -// Shared (non-architecture specific) portions of a mechanism to perform interface dispatch using an alternate -// mechanism to VSD that does not require runtime generation of code. -// -// ============================================================================ - -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH - -bool InitializeInterfaceDispatch(); -void ReclaimUnusedInterfaceDispatchCaches(); - -// Interface dispatch caches contain an array of these entries. An instance of a cache is paired with a stub -// that implicitly knows how many entries are contained. These entries must be aligned to twice the alignment -// of a pointer due to the synchonization mechanism used to update them at runtime. -struct InterfaceDispatchCacheEntry -{ - MethodTable * m_pInstanceType; // Potential type of the object instance being dispatched on - PCODE m_pTargetCode; // Method to dispatch to if the actual instance type matches the above -}; - -// The interface dispatch cache itself. As well as the entries we include the cache size (since logic such as -// cache miss processing needs to determine this value in a synchronized manner, so it can't be contained in -// the owning interface dispatch indirection cell) and a list entry used to link the caches in one of a couple -// of lists related to cache reclamation. - -#pragma warning(push) -#pragma warning(disable:4200) // nonstandard extension used: zero-sized array in struct/union -struct InterfaceDispatchCell; -struct InterfaceDispatchCache -{ - InterfaceDispatchCacheHeader m_cacheHeader; - union - { - InterfaceDispatchCache * m_pNextFree; // next in free list -#ifdef INTERFACE_DISPATCH_CACHE_HAS_CELL_BACKPOINTER - // On ARM and x86 the slow path in the stubs needs to reload the cell pointer from the cache due to the lack - // of available (volatile non-argument) registers. - InterfaceDispatchCell * m_pCell; // pointer back to interface dispatch cell -#endif - }; - uint32_t m_cEntries; - InterfaceDispatchCacheEntry m_rgEntries[]; -}; -#pragma warning(pop) - -#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h new file mode 100644 index 000000000000..08665bca02f7 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __CACHEDINTERFACEDISPATCHPAL_H__ +#define __CACHEDINTERFACEDISPATCHPAL_H__ + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "DebugMacrosExt.h" +#include "PalLimitedContext.h" +#include "Pal.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "Crst.h" +#include "TargetPtrs.h" +#include "MethodTable.h" +#include "Range.h" +#include "allocheap.h" +#include "rhbinder.h" +#include "ObjectLayout.h" +#include "shash.h" +#include "TypeManager.h" +#include "RuntimeInstance.h" +#include "MethodTable.inl" +#include "CommonMacros.inl" + +bool InterfaceDispatch_InitializePal(); + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); +// Allocate memory aligned at sizeof(void*) boundaries + +void *InterfaceDispatch_AllocPointerAligned(size_t size); + +#endif // __CACHEDINTERFACEDISPATCHPAL_H__ \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch_Aot.cpp b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch_Aot.cpp new file mode 100644 index 000000000000..9a8be1bfd69e --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch_Aot.cpp @@ -0,0 +1,56 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include +#include "CachedInterfaceDispatchPal.h" +#include "CachedInterfaceDispatch.h" + +// The base memory allocator. +static AllocHeap * g_pAllocHeap = NULL; + +bool InterfaceDispatch_InitializePal() +{ + g_pAllocHeap = new (nothrow) AllocHeap(); + if (g_pAllocHeap == NULL) + return false; + + if (!g_pAllocHeap->Init()) + return false; + + return true; +} + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) +{ + return g_pAllocHeap->AllocAligned(size, sizeof(void*) * 2); +} + +// Allocate memory aligned at sizeof(void*) boundaries + +void *InterfaceDispatch_AllocPointerAligned(size_t size) +{ + return g_pAllocHeap->AllocAligned(size, sizeof(void*)); +} + +FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo) +{ + return InterfaceDispatch_UpdateDispatchCellCache(pCell, pTargetCode, pInstanceType, pNewCellInfo); +} +FCIMPLEND + +FCIMPL2(PCODE, RhpSearchDispatchCellCache, InterfaceDispatchCell * pCell, MethodTable* pInstanceType) +{ + return InterfaceDispatch_SearchDispatchCellCache(pCell, pInstanceType); +} +FCIMPLEND + +// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented +// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed +// code due to its use of the GC state as a lock, and as lifetime control +FCIMPL2(void, RhpGetDispatchCellInfo, InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo) +{ + *pDispatchCellInfo = pCell->GetDispatchCellInfo(); +} +FCIMPLEND diff --git a/src/coreclr/nativeaot/Runtime/CommonMacros.h b/src/coreclr/nativeaot/Runtime/CommonMacros.h index f0424ee94cb9..8231aa4b6c2f 100644 --- a/src/coreclr/nativeaot/Runtime/CommonMacros.h +++ b/src/coreclr/nativeaot/Runtime/CommonMacros.h @@ -6,6 +6,9 @@ #include "rhassert.h" #include +#ifdef PROFILE_STARTUP +#include +#endif #define EXTERN_C extern "C" @@ -207,6 +210,7 @@ typedef uint8_t CODE_LOCATION; FCIMPL_RENAME_ARGSIZE(_rettype, _method, 16) \ EXTERN_C _rettype F_CALL_CONV _method##_FCall (b, a) \ { +#define FCIMPL2_LL FCIMPL2_DD #define FCIMPL2_FI(_rettype, _method, a, b) \ FCIMPL_RENAME_ARGSIZE(_rettype, _method, 8) \ EXTERN_C _rettype F_CALL_CONV _method##_FCall (a, b) \ @@ -258,6 +262,7 @@ typedef uint8_t CODE_LOCATION; #define FCIMPL2_DD(_rettype, _method, a, b) \ EXTERN_C _rettype F_CALL_CONV _method FCALL_METHOD_ARGS_NO_METHOD_NAME(a, b) \ { +#define FCIMPL2_LL FCIMPL2_DD #define FCIMPL2_FI(_rettype, _method, a, b) \ EXTERN_C _rettype F_CALL_CONV _method FCALL_METHOD_ARGS_NO_METHOD_NAME(a, b) \ { @@ -330,7 +335,7 @@ enum STARTUP_TIMELINE_EVENT_ID #ifdef PROFILE_STARTUP extern uint64_t g_startupTimelineEvents[NUM_STARTUP_TIMELINE_EVENTS]; -#define STARTUP_TIMELINE_EVENT(eventid) g_startupTimelineEvents[eventid] = PalQueryPerformanceCounter(); +#define STARTUP_TIMELINE_EVENT(eventid) g_startupTimelineEvents[eventid] = (uint64_t)minipal_hires_ticks(); #else // PROFILE_STARTUP #define STARTUP_TIMELINE_EVENT(eventid) #endif // PROFILE_STARTUP @@ -345,22 +350,6 @@ extern uint64_t g_startupTimelineEvents[NUM_STARTUP_TIMELINE_EVENTS]; #define DECLSPEC_THREAD __thread #endif // !_MSC_VER -#ifndef __GCENV_BASE_INCLUDED__ -#if !defined(_INC_WINDOWS) -#ifdef _WIN32 -// this must exactly match the typedef used by windows.h -typedef long HRESULT; -#else -typedef int32_t HRESULT; -#endif - -#define S_OK 0x0 -#define E_FAIL 0x80004005 - -#define UNREFERENCED_PARAMETER(P) (void)(P) -#endif // !defined(_INC_WINDOWS) -#endif // __GCENV_BASE_INCLUDED__ - // PAL Numbers // Used to ensure cross-compiler compatibility when declaring large // integer constants. 64-bit integer constants should be wrapped in the diff --git a/src/coreclr/nativeaot/Runtime/Crst.cpp b/src/coreclr/nativeaot/Runtime/Crst.cpp index 48a3ee7fde03..36beaf28ed56 100644 --- a/src/coreclr/nativeaot/Runtime/Crst.cpp +++ b/src/coreclr/nativeaot/Runtime/Crst.cpp @@ -3,8 +3,8 @@ #include "common.h" #include "CommonTypes.h" #include "CommonMacros.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "holder.h" #include "Crst.h" @@ -16,14 +16,14 @@ void CrstStatic::Init(CrstType eType, CrstFlags eFlags) #if defined(_DEBUG) m_uiOwnerId.Clear(); #endif // _DEBUG - PalInitializeCriticalSectionEx(&m_sCritSec, 0, 0); + minipal_mutex_init(&m_Lock); #endif // !DACCESS_COMPILE } void CrstStatic::Destroy() { #ifndef DACCESS_COMPILE - PalDeleteCriticalSection(&m_sCritSec); + minipal_mutex_destroy(&m_Lock); #endif // !DACCESS_COMPILE } @@ -31,7 +31,7 @@ void CrstStatic::Destroy() void CrstStatic::Enter(CrstStatic *pCrst) { #ifndef DACCESS_COMPILE - PalEnterCriticalSection(&pCrst->m_sCritSec); + minipal_mutex_enter(&pCrst->m_Lock); #if defined(_DEBUG) pCrst->m_uiOwnerId.SetToCurrentThread(); #endif // _DEBUG @@ -47,7 +47,7 @@ void CrstStatic::Leave(CrstStatic *pCrst) #if defined(_DEBUG) pCrst->m_uiOwnerId.Clear(); #endif // _DEBUG - PalLeaveCriticalSection(&pCrst->m_sCritSec); + minipal_mutex_leave(&pCrst->m_Lock); #else UNREFERENCED_PARAMETER(pCrst); #endif // !DACCESS_COMPILE diff --git a/src/coreclr/nativeaot/Runtime/Crst.h b/src/coreclr/nativeaot/Runtime/Crst.h index 4ab9db08e0f5..13e2177afb77 100644 --- a/src/coreclr/nativeaot/Runtime/Crst.h +++ b/src/coreclr/nativeaot/Runtime/Crst.h @@ -4,7 +4,7 @@ // // ----------------------------------------------------------------------------------------------------------- // -// Minimal Crst implementation based on CRITICAL_SECTION. Doesn't support much except for the basic locking +// Minimal Crst implementation. Doesn't support much except for the basic locking // functionality (in particular there is no rank violation checking). // @@ -51,7 +51,7 @@ class CrstStatic #endif // _DEBUG private: - CRITICAL_SECTION m_sCritSec; + minipal_mutex m_Lock; #if defined(_DEBUG) EEThreadId m_uiOwnerId; #endif // _DEBUG diff --git a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp index 051b9b0d8f7a..9c2a06892c62 100644 --- a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp +++ b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp @@ -7,9 +7,8 @@ #include "gcinterface.dac.h" #include "rhassert.h" #include "TargetPtrs.h" -#include "varint.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "holder.h" #include "RuntimeInstance.h" #include "regdisplay.h" @@ -81,7 +80,7 @@ struct DotNetRuntimeDebugHeader // v1-v4 were never doc'ed but history is source control if you need it // v5 - Thread now has an m_eeAllocContext field and the previous m_rgbAllocContextBuffer // field is nested inside of it. - // + // const uint16_t MajorVersion = 5; // This counter can be incremented to indicate back-compatible changes diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index a3643e32f5ea..8c956352c2d4 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -1,9 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. #include "common.h" -#ifdef HOST_WINDOWS -#include -#endif #ifndef DACCESS_COMPILE #include "CommonTypes.h" #include "CommonMacros.h" @@ -13,9 +10,8 @@ #include "GcEnum.h" #include "shash.h" #include "TypeManager.h" -#include "varint.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "holder.h" #include "Crst.h" #include "RuntimeInstance.h" @@ -32,6 +28,8 @@ #include "MethodTable.inl" #include "CommonMacros.inl" #include "NativeContext.h" +#include +#include "corexcep.h" struct MethodRegionInfo { @@ -86,6 +84,26 @@ FCIMPL0(void, RhpValidateExInfoStack) } FCIMPLEND +#ifdef TARGET_WINDOWS +FCIMPL0(void, RhpFirstChanceExceptionNotification) +{ + // Throw an SEH exception and immediately catch it. This is used to notify debuggers and other tools + // that an exception has been thrown. + if (minipal_is_native_debugger_present()) + { + __try + { + RaiseException(EXCEPTION_COMPLUS, 0, 0, NULL); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + // Do nothing, we just want to notify the debugger. + } + } +} +FCIMPLEND +#endif // TARGET_WINDOWS + FCIMPL0(void, RhpClearThreadDoNotTriggerGC) { Thread * pThisThread = ThreadStore::GetCurrentThread(); @@ -329,7 +347,7 @@ static uintptr_t UnwindSimpleHelperToCaller( #ifdef TARGET_UNIX -int32_t __stdcall RhpHardwareExceptionHandler(uintptr_t faultCode, uintptr_t faultAddress, +int32_t RhpHardwareExceptionHandler(uintptr_t faultCode, uintptr_t faultAddress, PAL_LIMITED_CONTEXT* palContext, uintptr_t* arg0Reg, uintptr_t* arg1Reg) { uintptr_t faultingIP = palContext->GetIp(); @@ -339,13 +357,13 @@ int32_t __stdcall RhpHardwareExceptionHandler(uintptr_t faultCode, uintptr_t fau if (pCodeManager != NULL) { // Make sure that the OS does not use our internal fault codes - ASSERT(faultCode != STATUS_REDHAWK_NULL_REFERENCE && faultCode != STATUS_REDHAWK_UNMANAGED_HELPER_NULL_REFERENCE); + ASSERT(faultCode != STATUS_NATIVEAOT_NULL_REFERENCE && faultCode != STATUS_NATIVEAOT_UNMANAGED_HELPER_NULL_REFERENCE); if (faultCode == STATUS_ACCESS_VIOLATION) { if (faultAddress < NULL_AREA_SIZE) { - faultCode = STATUS_REDHAWK_NULL_REFERENCE; + faultCode = STATUS_NATIVEAOT_NULL_REFERENCE; } } else if (faultCode == STATUS_STACK_OVERFLOW) @@ -369,7 +387,7 @@ int32_t __stdcall RhpHardwareExceptionHandler(uintptr_t faultCode, uintptr_t fau { if (faultAddress < NULL_AREA_SIZE) { - faultCode = STATUS_REDHAWK_UNMANAGED_HELPER_NULL_REFERENCE; + faultCode = STATUS_NATIVEAOT_UNMANAGED_HELPER_NULL_REFERENCE; } // we were AV-ing in a helper - unwind our way to our caller @@ -472,13 +490,13 @@ LONG WINAPI RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs) if (pCodeManager != NULL) { // Make sure that the OS does not use our internal fault codes - ASSERT(faultCode != STATUS_REDHAWK_NULL_REFERENCE && faultCode != STATUS_REDHAWK_UNMANAGED_HELPER_NULL_REFERENCE); + ASSERT(faultCode != STATUS_NATIVEAOT_NULL_REFERENCE && faultCode != STATUS_NATIVEAOT_UNMANAGED_HELPER_NULL_REFERENCE); if (faultCode == STATUS_ACCESS_VIOLATION) { if (pExPtrs->ExceptionRecord->ExceptionInformation[1] < NULL_AREA_SIZE) { - faultCode = STATUS_REDHAWK_NULL_REFERENCE; + faultCode = STATUS_NATIVEAOT_NULL_REFERENCE; } } else if (faultCode == STATUS_STACK_OVERFLOW) @@ -507,7 +525,7 @@ LONG WINAPI RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs) { if (pExPtrs->ExceptionRecord->ExceptionInformation[1] < NULL_AREA_SIZE) { - faultCode = STATUS_REDHAWK_UNMANAGED_HELPER_NULL_REFERENCE; + faultCode = STATUS_NATIVEAOT_UNMANAGED_HELPER_NULL_REFERENCE; } // we were AV-ing in a helper - unwind our way to our caller diff --git a/src/coreclr/nativeaot/Runtime/FinalizerHelpers.cpp b/src/coreclr/nativeaot/Runtime/FinalizerHelpers.cpp index 7f49f0830c24..730edbc87a3a 100644 --- a/src/coreclr/nativeaot/Runtime/FinalizerHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/FinalizerHelpers.cpp @@ -29,14 +29,30 @@ CLREventStatic g_FinalizerDoneEvent; static HANDLE g_lowMemoryNotification = NULL; +#ifdef TARGET_WINDOWS +static bool g_ComAndFlsInitSucceeded = false; +#endif + EXTERN_C void QCALLTYPE ProcessFinalizers(); -// Unmanaged front-end to the finalizer thread. We require this because at the point the GC creates the -// finalizer thread we can't run managed code. Instead this method waits +// Unmanaged front-end to the finalizer thread. We require this because at the point when this thread is +// created we can't run managed code. Instead this method waits // for the first finalization request (by which time everything must be up and running) and kicks off the // managed portion of the thread at that point uint32_t WINAPI FinalizerStart(void* pContext) { +#ifdef TARGET_WINDOWS + g_ComAndFlsInitSucceeded = PalInitComAndFlsSlot(); + // handshake with EE initialization, as now we can attach Thread objects to native threads. + UInt32_BOOL res = PalSetEvent(g_FinalizerDoneEvent.GetOSEvent()); + ASSERT(res); + + // if FLS initialization failed do not attach the current thread and just exit instead. + // we are going to fail the runtime initialization. + if (!g_ComAndFlsInitSucceeded) + return 0; +#endif // DEBUG + HANDLE hFinalizerEvent = (HANDLE)pContext; PalSetCurrentThreadName(".NET Finalizer"); @@ -86,12 +102,16 @@ bool RhInitializeFinalization() return true; } -void RhEnableFinalization() +#ifdef TARGET_WINDOWS +bool RhWaitForFinalizerThreadStart() { - g_FinalizerEvent.Set(); + g_FinalizerDoneEvent.Wait(INFINITE,FALSE); + g_FinalizerDoneEvent.Reset(); + return g_ComAndFlsInitSucceeded; } +#endif -EXTERN_C void QCALLTYPE RhInitializeFinalizerThread() +void RhEnableFinalization() { g_FinalizerEvent.Set(); } diff --git a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt index 11618fd78edc..821b4fe8ca9e 100644 --- a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt @@ -59,16 +59,16 @@ endif (CLR_CMAKE_TARGET_WIN32) # Get the current list of definitions get_compile_definitions(DEFINITIONS) -set(ASM_OFFSETS_CSPP ${RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) +set(ASM_OFFSETS_CSPP ${NATIVEAOT_RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) if(WIN32) set(COMPILER_LANGUAGE "") set(PREPROCESSOR_FLAGS -EP -nologo) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/windows/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/windows/AsmOffsets.cpp) else() set(COMPILER_LANGUAGE -x c++) set(PREPROCESSOR_FLAGS -E -P) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/unix/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/unix/AsmOffsets.cpp) endif() add_custom_command( @@ -80,9 +80,9 @@ add_custom_command( ) add_custom_command( - COMMAND ${CMAKE_CXX_COMPILER} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${RUNTIME_DIR}" -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" + COMMAND ${CMAKE_CXX_COMPILER} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${NATIVEAOT_RUNTIME_DIR}" -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" - DEPENDS "${ASM_OFFSETS_CPP}" "${RUNTIME_DIR}/AsmOffsets.h" + DEPENDS "${ASM_OFFSETS_CPP}" "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.h" COMMENT "Generating AsmOffsets.inc" ) diff --git a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp index c50d2c854c24..8bcf4513d3ef 100644 --- a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp @@ -15,9 +15,8 @@ #include "forward_declarations.h" #include "RhConfig.h" -#include "PalRedhawkCommon.h" +#include "PalLimitedContext.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "interoplibinterface.h" @@ -47,11 +46,18 @@ void FinalizeFinalizableObjects(); #endif bool RhInitializeFinalization(); +#ifdef TARGET_WINDOWS +bool RhWaitForFinalizerThreadStart(); +#endif // Perform any runtime-startup initialization needed by the GC, HandleTable or environmental code in gcenv.ee. // Returns true on success or false if a subsystem failed to initialize. bool InitializeGC() { + // Give some headstart to the finalizer thread by launching it early. + if (!RhInitializeFinalization()) + return false; + // Initialize the special MethodTable used to mark free list entries in the GC heap. g_FreeObjectEEType.InitializeAsGcFreeType(); g_pFreeObjectEEType = &g_FreeObjectEEType; @@ -82,13 +88,17 @@ bool InitializeGC() if (FAILED(hr)) return false; - if (!RhInitializeFinalization()) - return false; - // Initialize HandleTable. if (!GCHandleUtilities::GetGCHandleManager()->Initialize()) return false; +#ifdef TARGET_WINDOWS + // By now finalizer thread should have initialized FLS slot for thread cleanup notifications. + // And ensured that COM is initialized (must happen before allocating FLS slot). + // Make sure that this was done. + if (!RhWaitForFinalizerThreadStart()) + return false; +#endif return true; } @@ -653,7 +663,7 @@ static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t // numElements - number of array elements // pTransitionFrame- transition frame to make stack crawlable // Returns a pointer to the object allocated or NULL on failure. -EXTERN_C void* F_CALL_CONV RhpGcAlloc(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements, PInvokeTransitionFrame* pTransitionFrame) +EXTERN_C void* RhpGcAlloc(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements, PInvokeTransitionFrame* pTransitionFrame) { Thread* pThread = ThreadStore::GetCurrentThread(); diff --git a/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp index b34c41c37bb3..644015607518 100644 --- a/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp @@ -7,7 +7,7 @@ #include "common.h" #include "gcenv.h" -#include "PalRedhawkCommon.h" +#include "PalLimitedContext.h" #include "CommonMacros.inl" #include "GCMemoryHelpers.inl" @@ -30,14 +30,14 @@ FCIMPL2(void *, RhpGcSafeZeroMemory, void * mem, size_t size) } FCIMPLEND -#if defined(TARGET_X86) || defined(TARGET_AMD64) - // +#if defined(TARGET_X86) || defined(TARGET_AMD64) + // // Memory writes are already ordered - // - #define GCHeapMemoryBarrier() + // + #define GCHeapMemoryBarrier() #else - #define GCHeapMemoryBarrier() MemoryBarrier() -#endif + #define GCHeapMemoryBarrier() MemoryBarrier() +#endif // Move memory, in a way that is compatible with a move onto the heap, but // does not require the destination pointer to be on the heap. diff --git a/src/coreclr/nativeaot/Runtime/GcStressControl.cpp b/src/coreclr/nativeaot/Runtime/GcStressControl.cpp index 3d0174855758..dc9506acebb0 100644 --- a/src/coreclr/nativeaot/Runtime/GcStressControl.cpp +++ b/src/coreclr/nativeaot/Runtime/GcStressControl.cpp @@ -8,14 +8,13 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "holder.h" #include "Crst.h" #include "RhConfig.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "forward_declarations.h" #include "StackFrameIterator.h" @@ -26,6 +25,7 @@ #include "shash.h" #include "shash.inl" #include "GcStressControl.h" +#include "minipal/time.h" class GcStressControl @@ -71,7 +71,7 @@ class GcStressControl if (g_pRhConfig->GetGcStressSeed()) s_lGcStressRNGSeed = (uint32_t)g_pRhConfig->GetGcStressSeed(); else - s_lGcStressRNGSeed = (uint32_t)PalGetTickCount64(); + s_lGcStressRNGSeed = (uint32_t)minipal_lowres_ticks(); if (g_pRhConfig->GetGcStressFreqDenom()) s_lGcStressFreqDenom = (uint32_t)g_pRhConfig->GetGcStressFreqDenom(); diff --git a/src/coreclr/nativeaot/Runtime/HandleTableHelpers.cpp b/src/coreclr/nativeaot/Runtime/HandleTableHelpers.cpp index 8eddc1d3440d..91750c078e45 100644 --- a/src/coreclr/nativeaot/Runtime/HandleTableHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/HandleTableHelpers.cpp @@ -1,13 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// -// Helper functions that are p/invoked from redhawkm in order to expose handle table functionality to managed -// code. These p/invokes are special in that the handle table code requires we remain in co-operative mode -// (since these routines mutate the handle tables which are also accessed during garbage collections). The -// binder has special knowledge of these methods and doesn't generate the normal code to transition out of the -// runtime prior to the call. -// #include "common.h" #include "gcenv.h" #include "objecthandle.h" @@ -96,16 +89,32 @@ struct ManagedObjectWrapper } }; +template +struct Span +{ + T* _pointer; + int _length; +}; + // This structure mirrors the managed type System.Runtime.InteropServices.ComWrappers.InternalComInterfaceDispatch. struct InternalComInterfaceDispatch { - void* Vtable; ManagedObjectWrapper* _thisPtr; + Span Vtables; }; +#ifdef TARGET_64BIT +constexpr uintptr_t DispatchAlignment = 64; +#else +constexpr uintptr_t DispatchAlignment = 16; +#endif + +constexpr uintptr_t DispatchAlignmentMask = ~(DispatchAlignment - 1); + static ManagedObjectWrapper* ToManagedObjectWrapper(void* dispatchPtr) { - return ((InternalComInterfaceDispatch*)dispatchPtr)->_thisPtr; + uintptr_t dispatch = reinterpret_cast(dispatchPtr) & DispatchAlignmentMask; + return ((InternalComInterfaceDispatch*)dispatch)->_thisPtr; } // @@ -113,14 +122,17 @@ static ManagedObjectWrapper* ToManagedObjectWrapper(void* dispatchPtr) // invokes AddRef while holding a lock that it *also* holds while a GC is in progress. If AddRef was managed, we would have // to synchronize with the GC before entering AddRef, which would deadlock with the other thread holding Xaml's lock. // -static uint32_t __stdcall IUnknown_AddRef(void* pComThis) +EXTERN_C uint32_t __stdcall RhIUnknown_AddRef(void* pComThis) { ManagedObjectWrapper* wrapper = ToManagedObjectWrapper(pComThis); return wrapper->AddRef(); } -FCIMPL0(void*, RhGetIUnknownAddRef) +// +// Release is implemented in native code so that it does not need to synchronize with the GC. This is important because Xaml +// can invoke this Release during shutdown, and we don't want to synchronize with the GC at that time. +// +EXTERN_C uint32_t __stdcall RhUntracked_AddRefRelease(void*) { - return (void*)&IUnknown_AddRef; + return 1; } -FCIMPLEND diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp index 6e3f4d73de8a..033edb3de28d 100644 --- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp @@ -56,29 +56,33 @@ FCIMPL1_D(uint32_t, RhpDbl2UInt, double val) FCIMPLEND #ifndef HOST_64BIT -EXTERN_C int64_t QCALLTYPE RhpLDiv(int64_t i, int64_t j) +FCIMPL2_LL(int64_t, DivInt64Internal, int64_t i, int64_t j) { ASSERT(j && "Divide by zero!"); return i / j; } +FCIMPLEND -EXTERN_C uint64_t QCALLTYPE RhpULDiv(uint64_t i, uint64_t j) +FCIMPL2_LL(uint64_t, DivUInt64Internal, uint64_t i, uint64_t j) { ASSERT(j && "Divide by zero!"); return i / j; } +FCIMPLEND -EXTERN_C int64_t QCALLTYPE RhpLMod(int64_t i, int64_t j) +FCIMPL2_LL(int64_t, ModInt64Internal, int64_t i, int64_t j) { ASSERT(j && "Divide by zero!"); return i % j; } +FCIMPLEND -EXTERN_C uint64_t QCALLTYPE RhpULMod(uint64_t i, uint64_t j) +FCIMPL2_LL(uint64_t, ModUInt64Internal, uint64_t i, uint64_t j) { ASSERT(j && "Divide by zero!"); return i % j; } +FCIMPLEND FCIMPL1_L(double, RhpLng2Dbl, int64_t val) { @@ -92,33 +96,51 @@ FCIMPL1_L(double, RhpULng2Dbl, uint64_t val) } FCIMPLEND +FCIMPL1_L(float, RhpLng2Flt, int64_t val) +{ + return (float)val; +} +FCIMPLEND + +FCIMPL1_L(float, RhpULng2Flt, uint64_t val) +{ + return (float)val; +} +FCIMPLEND + #endif -#ifdef HOST_ARM -EXTERN_C int32_t F_CALL_CONV RhpIDiv(int32_t i, int32_t j) +#ifndef HOST_64BIT +FCIMPL2(int32_t, DivInt32Internal, int32_t i, int32_t j) { ASSERT(j && "Divide by zero!"); return i / j; } +FCIMPLEND -EXTERN_C uint32_t F_CALL_CONV RhpUDiv(uint32_t i, uint32_t j) +FCIMPL2(uint32_t, DivUInt32Internal, uint32_t i, uint32_t j) { ASSERT(j && "Divide by zero!"); return i / j; } +FCIMPLEND -EXTERN_C int32_t F_CALL_CONV RhpIMod(int32_t i, int32_t j) +FCIMPL2(int32_t, ModInt32Internal, int32_t i, int32_t j) { ASSERT(j && "Divide by zero!"); return i % j; } +FCIMPLEND -EXTERN_C uint32_t F_CALL_CONV RhpUMod(uint32_t i, uint32_t j) +FCIMPL2(uint32_t, ModUInt32Internal, uint32_t i, uint32_t j) { ASSERT(j && "Divide by zero!"); return i % j; } +FCIMPLEND +#endif +#ifdef HOST_ARM EXTERN_C int64_t F_CALL_CONV RhpLMul(int64_t i, int64_t j) { return i * j; diff --git a/src/coreclr/nativeaot/Runtime/MethodTable.cpp b/src/coreclr/nativeaot/Runtime/MethodTable.cpp index e91b6b947deb..024222c96519 100644 --- a/src/coreclr/nativeaot/Runtime/MethodTable.cpp +++ b/src/coreclr/nativeaot/Runtime/MethodTable.cpp @@ -7,8 +7,8 @@ #include "rhassert.h" #include "rhbinder.h" #include "MethodTable.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "CommonMacros.inl" #include "MethodTable.inl" @@ -95,7 +95,7 @@ bool MethodTable::Validate(bool assertOnFail /* default: true */) //----------------------------------------------------------------------------------------------------------- MethodTable::Kinds MethodTable::GetKind() { - return (Kinds)(m_uFlags & (uint16_t)EETypeKindMask); + return (Kinds)(m_uFlags & EETypeKindMask); } //----------------------------------------------------------------------------------------------------------- diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index cd5f37e51999..138c3917198e 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -9,8 +9,8 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" #include "holder.h" @@ -18,7 +18,6 @@ #include "rhbinder.h" #include "RuntimeInstance.h" #include "regdisplay.h" -#include "varint.h" #include "StackFrameIterator.h" #include "thread.h" #include "event.h" @@ -37,6 +36,7 @@ #include "RhConfig.h" #include #include +#include FCIMPL0(void, RhDebugBreak) { @@ -76,7 +76,7 @@ EXTERN_C void QCALLTYPE RhFlushProcessWriteBuffers() PalFlushProcessWriteBuffers(); } -// Get the list of currently loaded Redhawk modules (as OS HMODULE handles). The caller provides a reference +// Get the list of currently loaded NativeAOT modules (as OS HMODULE handles). The caller provides a reference // to an array of pointer-sized elements and we return the total number of modules currently loaded (whether // that is less than, equal to or greater than the number of elements in the array). If there are more modules // loaded than the array will hold then the array is filled to capacity and the caller can tell further @@ -346,7 +346,7 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) pCode++; } // is this an indirect jump? - // pcalau12i $t7, imm20; ld.d $t7, $t7, imm12; jirl $r0, $t7, 0 + // pcalau12i $rd, imm20; ld.d $rd, $rj, imm12; jirl $rd, $rj, 0 if ((pCode[0] & 0xfe000000) == 0x1a000000 && (pCode[1] & 0xffc00000) == 0x28c00000 && (pCode[2] & 0xfc000000) == 0x4c000000) @@ -419,11 +419,6 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) } FCIMPLEND -EXTERN_C uint64_t QCALLTYPE RhpGetTickCount64() -{ - return PalGetTickCount64(); -} - EXTERN_C int32_t QCALLTYPE RhpCalculateStackTraceWorker(void* pOutputBuffer, uint32_t outputBufferLength, void* pAddressInCurrentFrame); EXTERN_C int32_t QCALLTYPE RhpGetCurrentThreadStackTrace(void* pOutputBuffer, uint32_t outputBufferLength, void* pAddressInCurrentFrame) diff --git a/src/coreclr/nativeaot/Runtime/NativePrimitiveDecoder.h b/src/coreclr/nativeaot/Runtime/NativePrimitiveDecoder.h new file mode 100644 index 000000000000..bbfd20b1ac4a --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/NativePrimitiveDecoder.h @@ -0,0 +1,62 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// C/C++ clone of NativePrimitiveDecoder.cs subset + +class NativePrimitiveDecoder +{ +public: + static uint32_t ReadUnsigned(uint8_t* & p) + { + uint32_t value = 0; + + uint32_t val = *p; + if ((val & 1) == 0) + { + value = (val >> 1); + p += 1; + } + else if ((val & 2) == 0) + { + value = (val >> 2) | + (*(p + 1) << 6); + p += 2; + } + else if ((val & 4) == 0) + { + value = (val >> 3) | + (*(p + 1) << 5) | + (*(p + 2) << 13); + p += 3; + } + else if ((val & 8) == 0) + { + value = (val >> 4) | + (*(p + 1) << 4) | + (*(p + 2) << 12) | + (*(p + 3) << 20); + p += 4; + } + else + { + value = *(p+1) | (*(p+2) << 8) | (*(p+3) << 16) | (*(p+4) << 24); + p += 5; + } + + return value; + } + + static int32_t ReadInt32(uint8_t* & p) + { + int32_t value = *p | (*(p+1) << 8) | (*(p+2) << 16) | (*(p+3) << 24); + p += 4; + return value; + } + + static uint32_t ReadUInt32(uint8_t* & p) + { + uint32_t value = *p | (*(p+1) << 8) | (*(p+2) << 16) | (*(p+3) << 24); + p += 4; + return value; + } +}; diff --git a/src/coreclr/nativeaot/Runtime/ObjectLayout.cpp b/src/coreclr/nativeaot/Runtime/ObjectLayout.cpp index 707d134fb5e1..eff0b754c18f 100644 --- a/src/coreclr/nativeaot/Runtime/ObjectLayout.cpp +++ b/src/coreclr/nativeaot/Runtime/ObjectLayout.cpp @@ -9,9 +9,8 @@ #include "CommonMacros.h" #include "daccess.h" #include "rhassert.h" -#include "RedhawkWarnings.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "TargetPtrs.h" #include "MethodTable.h" #include "ObjectLayout.h" diff --git a/src/coreclr/nativeaot/Runtime/ObjectLayout.h b/src/coreclr/nativeaot/Runtime/ObjectLayout.h index 146c986938f7..658696175089 100644 --- a/src/coreclr/nativeaot/Runtime/ObjectLayout.h +++ b/src/coreclr/nativeaot/Runtime/ObjectLayout.h @@ -6,8 +6,7 @@ // // Bits stolen from the sync block index that the GC/HandleTable knows about (currently these are at the same -// positions as the mainline runtime but we can change this below when it becomes apparent how Redhawk will -// handle sync blocks). +// positions as the mainline runtime). #define BIT_SBLK_GC_RESERVE 0x20000000 #define BIT_SBLK_FINALIZER_RUN 0x40000000 @@ -125,6 +124,9 @@ static uintptr_t const STRING_COMPONENT_SIZE = StringConstants::ComponentSize; //------------------------------------------------------------------------------------------------- static uintptr_t const STRING_BASE_SIZE = StringConstants::BaseSize; +//------------------------------------------------------------------------------------------------- +static uintptr_t const SZARRAY_BASE_SIZE = MIN_OBJECT_SIZE; + //------------------------------------------------------------------------------------------------- static uintptr_t const MAX_STRING_LENGTH = 0x3FFFFFDF; diff --git a/src/coreclr/nativeaot/Runtime/Pal.h b/src/coreclr/nativeaot/Runtime/Pal.h new file mode 100644 index 000000000000..c96f28effa6a --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/Pal.h @@ -0,0 +1,312 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Provides declarations for external resources consumed by NativeAOT. This comprises functionality +// normally exported from Win32 libraries such as KERNEL32 and MSVCRT. When hosted on Win32 calls to these +// functions become simple pass throughs to the native implementation via export forwarding entries in a PAL +// (Platform Abstraction Layer) library. On other platforms the PAL library has actual code to emulate the +// functionality of these same APIs. +// +// In order to make it both obvious and intentional where NativeAOT consumes an external API, such functions are +// decorated with an 'Pal' prefix. Ideally the associated supporting types, constants etc. would be +// similarly isolated from their concrete Win32 definitions, making the extent of platform dependence within +// the core explicit. For now that is too big a work item and we'll settle for manually restricting the use of +// external header files to within this header. +// + +#include +#include +#ifdef HOST_WINDOWS +#include +#else +#include +#endif + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "PalLimitedContext.h" +#include "gcenv.structs.h" // EEThreadId + +#ifndef PAL_INCLUDED +#define PAL_INCLUDED + +/* Adapted from intrin.h - For compatibility with , some intrinsics are __cdecl except on x64 */ +#if defined (_M_X64) +#define __PN__MACHINECALL_CDECL_OR_DEFAULT +#else +#define __PN__MACHINECALL_CDECL_OR_DEFAULT __cdecl +#endif + +#ifndef _MSC_VER + +// Note: Win32-hosted GCC predefines __stdcall and __cdecl, but Unix- +// hosted GCC does not. + +#ifdef __i386__ + +#if !defined(__cdecl) +#define __cdecl __attribute__((cdecl)) +#endif + +#else // !defined(__i386__) + +#define __cdecl + +#endif // !defined(__i386__) + +#endif // !_MSC_VER + +#ifdef TARGET_UNIX +#define DIRECTORY_SEPARATOR_CHAR '/' +#else // TARGET_UNIX +#define DIRECTORY_SEPARATOR_CHAR '\\' +#endif // TARGET_UNIX + +#ifdef TARGET_UNIX +typedef int32_t HRESULT; + +#define S_OK 0x0 +#define E_FAIL 0x80004005 +#define E_OUTOFMEMORY 0x8007000E + +typedef WCHAR * LPWSTR; +typedef const WCHAR * LPCWSTR; +typedef char * LPSTR; +typedef const char * LPCSTR; +typedef void * HINSTANCE; + +typedef void * LPSECURITY_ATTRIBUTES; +typedef void * LPOVERLAPPED; + +#define UNREFERENCED_PARAMETER(P) (void)(P) + +struct FILETIME +{ + uint32_t dwLowDateTime; + uint32_t dwHighDateTime; +}; + +typedef struct _CONTEXT CONTEXT, *PCONTEXT; + +typedef struct _EXCEPTION_RECORD EXCEPTION_RECORD, *PEXCEPTION_RECORD; + +#define EXCEPTION_CONTINUE_EXECUTION (-1) +#define EXCEPTION_CONTINUE_SEARCH (0) +#define EXCEPTION_EXECUTE_HANDLER (1) + +#define STATUS_ACCESS_VIOLATION ((uint32_t )0xC0000005L) +#define STATUS_STACK_OVERFLOW ((uint32_t )0xC00000FDL) + +#endif // TARGET_UNIX + +#define STATUS_NATIVEAOT_NULL_REFERENCE ((uint32_t )0x00000000L) +#define STATUS_NATIVEAOT_UNMANAGED_HELPER_NULL_REFERENCE ((uint32_t )0x00000042L) + +#ifdef TARGET_UNIX +#define NULL_AREA_SIZE (4*1024) +#else +#define NULL_AREA_SIZE (64*1024) +#endif + +#ifdef TARGET_UNIX +#define _T(s) s +typedef char TCHAR; +#else +// Avoid including tchar.h on Windows. +#define _T(s) L ## s +#endif // TARGET_UNIX + +#ifndef DACCESS_COMPILE +#ifdef TARGET_UNIX + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#define INVALID_HANDLE_VALUE ((HANDLE)(intptr_t)-1) + +#define INFINITE 0xFFFFFFFF + +#define PAGE_NOACCESS 0x01 +#define PAGE_READONLY 0x02 +#define PAGE_READWRITE 0x04 +#define PAGE_WRITECOPY 0x08 +#define PAGE_EXECUTE 0x10 +#define PAGE_EXECUTE_READ 0x20 +#define PAGE_EXECUTE_READWRITE 0x40 +#define PAGE_EXECUTE_WRITECOPY 0x80 +#define PAGE_GUARD 0x100 +#define PAGE_NOCACHE 0x200 +#define PAGE_WRITECOMBINE 0x400 + +#define WAIT_OBJECT_0 0 +#define WAIT_TIMEOUT 258 +#define WAIT_FAILED 0xFFFFFFFF + +#endif // TARGET_UNIX +#endif // !DACCESS_COMPILE + +extern uint32_t g_RhNumberOfProcessors; + +// The NativeAOT PAL must be initialized before any of its exports can be called. Returns true for a successful +// initialization and false on failure. +bool PalInit(); + +// Given the OS handle of a loaded module, compute the upper and lower virtual address bounds (inclusive). +void PalGetModuleBounds(HANDLE hOsHandle, _Out_ uint8_t ** ppLowerBound, _Out_ uint8_t ** ppUpperBound); + +struct NATIVE_CONTEXT; + +#if _WIN32 +NATIVE_CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextBuffer); +bool PalGetCompleteThreadContext(HANDLE hThread, _Out_ NATIVE_CONTEXT * pCtx); +bool PalSetThreadContext(HANDLE hThread, _Out_ NATIVE_CONTEXT * pCtx); +void PalRestoreContext(NATIVE_CONTEXT * pCtx); + +// For platforms that have segment registers in the CONTEXT_CONTROL set that +// are not saved in PAL_LIMITED_CONTEXT, this captures them from the current +// thread and saves them in `pContext`. +void PopulateControlSegmentRegisters(CONTEXT * pContext); +#endif + +int32_t PalGetProcessCpuCount(); + +// Retrieves the entire range of memory dedicated to the calling thread's stack. This does +// not get the current dynamic bounds of the stack, which can be significantly smaller than +// the maximum bounds. +bool PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut); + +// Return value: number of characters in name string +int32_t PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase); + +#if _WIN32 + +// Various intrinsic declarations needed for the PalGetCurrentTEB implementation below. +#if defined(HOST_X86) +EXTERN_C unsigned long __readfsdword(unsigned long Offset); +#pragma intrinsic(__readfsdword) +#elif defined(HOST_AMD64) +EXTERN_C unsigned __int64 __readgsqword(unsigned long Offset); +#pragma intrinsic(__readgsqword) +#elif defined(HOST_ARM64) +EXTERN_C unsigned __int64 __getReg(int); +#pragma intrinsic(__getReg) +#else +#error Unsupported architecture +#endif + +// Retrieves the OS TEB for the current thread. +inline uint8_t * PalNtCurrentTeb() +{ +#if defined(HOST_X86) + return (uint8_t*)__readfsdword(0x18); +#elif defined(HOST_AMD64) + return (uint8_t*)__readgsqword(0x30); +#elif defined(HOST_ARM64) + return (uint8_t*)__getReg(18); +#else +#error Unsupported architecture +#endif +} + +// Offsets of ThreadLocalStoragePointer in the TEB. +#if defined(HOST_64BIT) +#define OFFSETOF__TEB__ThreadLocalStoragePointer 0x58 +#else +#define OFFSETOF__TEB__ThreadLocalStoragePointer 0x2c +#endif + +#endif // _WIN32 + +_Ret_maybenull_ _Post_writable_byte_size_(size) void* PalVirtualAlloc(uintptr_t size, uint32_t protect); +void PalVirtualFree(_In_ void* pAddress, uintptr_t size); +UInt32_BOOL PalVirtualProtect(_In_ void* pAddress, uintptr_t size, uint32_t protect); +void PalFlushInstructionCache(_In_ void* pAddress, size_t size); +void PalSleep(uint32_t milliseconds); +UInt32_BOOL PalSwitchToThread(); +UInt32_BOOL PalAreShadowStacksEnabled(); +HANDLE PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName); +HANDLE PalGetModuleHandleFromPointer(_In_ void* pointer); + +#ifdef TARGET_UNIX +uint32_t PalGetOsPageSize(); +typedef int32_t (*PHARDWARE_EXCEPTION_HANDLER)(uintptr_t faultCode, uintptr_t faultAddress, PAL_LIMITED_CONTEXT* palContext, uintptr_t* arg0Reg, uintptr_t* arg1Reg); +void PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler); +#endif + +typedef uint32_t (*BackgroundCallback)(_In_opt_ void* pCallbackContext); +bool PalSetCurrentThreadName(const char* name); +#ifdef HOST_WINDOWS +bool PalSetCurrentThreadNameW(const WCHAR* name); +bool PalInitComAndFlsSlot(); +#endif +bool PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); +bool PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); +bool PalStartEventPipeHelperThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); + +#ifdef FEATURE_HIJACK +class Thread; +void PalHijack(Thread* pThreadToHijack); +HijackFunc* PalGetHijackTarget(_In_ HijackFunc* defaultHijackTarget); +#endif + +UInt32_BOOL PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut); +UInt32_BOOL PalFreeThunksFromTemplate(_In_ void *pBaseAddress, size_t templateSize); + +UInt32_BOOL PalMarkThunksAsValidCallTargets( + void *virtualAddress, + int thunkSize, + int thunksPerBlock, + int thunkBlockSize, + int thunkBlocksPerMapping); + +uint32_t PalCompatibleWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t count, HANDLE* pHandles, UInt32_BOOL allowReentrantWait); + +HANDLE PalCreateLowMemoryResourceNotification(); + +void PalAttachThread(void* thread); + +uint64_t PalGetCurrentOSThreadId(); + +void PalPrintFatalError(const char* message); + +char* PalCopyTCharAsChar(const TCHAR* toCopy); + +HANDLE PalLoadLibrary(const char* moduleName); + +void* PalGetProcAddress(HANDLE module, const char* functionName); + +#ifdef TARGET_UNIX +int32_t _stricmp(const char *string1, const char *string2); +#endif // TARGET_UNIX + +uint16_t PalCaptureStackBackTrace(uint32_t arg1, uint32_t arg2, void* arg3, uint32_t* arg4); +UInt32_BOOL PalCloseHandle(HANDLE arg1); +void PalFlushProcessWriteBuffers(); +uint32_t PalGetCurrentProcessId(); + +#ifdef UNICODE +uint32_t PalGetEnvironmentVariable(_In_opt_ LPCWSTR lpName, _Out_writes_to_opt_(nSize, return + 1) LPWSTR lpBuffer, _In_ uint32_t nSize); +#else +uint32_t PalGetEnvironmentVariable(_In_opt_ LPCSTR lpName, _Out_writes_to_opt_(nSize, return + 1) LPSTR lpBuffer, _In_ uint32_t nSize); +#endif + +UInt32_BOOL PalResetEvent(HANDLE arg1); +UInt32_BOOL PalSetEvent(HANDLE arg1); +uint32_t PalWaitForSingleObjectEx(HANDLE arg1, uint32_t arg2, UInt32_BOOL arg3); + +void PalGetSystemTimeAsFileTime(FILETIME * arg1); + +void RuntimeThreadShutdown(void* thread); + +typedef void (*ThreadExitCallback)(); + +extern ThreadExitCallback g_threadExitCallback; + +#include "PalInline.h" + +#endif // !PAL_INCLUDED diff --git a/src/coreclr/nativeaot/Runtime/PalLimitedContext.h b/src/coreclr/nativeaot/Runtime/PalLimitedContext.h new file mode 100644 index 000000000000..c5e36453687a --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/PalLimitedContext.h @@ -0,0 +1,212 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + + +#ifndef PAL_LIMITED_CONTEXT_INCLUDED +#define PAL_LIMITED_CONTEXT_INCLUDED + +#include "rhassert.h" + +#ifndef DECLSPEC_ALIGN +#ifdef _MSC_VER +#define DECLSPEC_ALIGN(x) __declspec(align(x)) +#else +#define DECLSPEC_ALIGN(x) __attribute__((aligned(x))) +#endif +#endif // DECLSPEC_ALIGN + +#ifdef HOST_AMD64 +#define AMD64_ALIGN_16 DECLSPEC_ALIGN(16) +#else // HOST_AMD64 +#define AMD64_ALIGN_16 +#endif // HOST_AMD64 + +struct AMD64_ALIGN_16 Fp128 { + uint64_t Low; + int64_t High; +}; + + +struct PAL_LIMITED_CONTEXT +{ + // Includes special registers, callee saved registers and general purpose registers used to return values from functions (not floating point return registers) +#ifdef TARGET_ARM + uintptr_t R0; + uintptr_t R4; + uintptr_t R5; + uintptr_t R6; + uintptr_t R7; + uintptr_t R8; + uintptr_t R9; + uintptr_t R10; + uintptr_t R11; + + uintptr_t IP; + uintptr_t SP; + uintptr_t LR; + + uint64_t D[16-8]; // D8 .. D15 registers (D16 .. D31 are volatile according to the ABI spec) + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return SP; } + uintptr_t GetFp() const { return R7; } + uintptr_t GetLr() const { return LR; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { SP = sp; } + +#elif defined(TARGET_ARM64) + uintptr_t FP; + uintptr_t LR; + + uintptr_t X0; + uintptr_t X1; + uintptr_t X19; + uintptr_t X20; + uintptr_t X21; + uintptr_t X22; + uintptr_t X23; + uintptr_t X24; + uintptr_t X25; + uintptr_t X26; + uintptr_t X27; + uintptr_t X28; + + uintptr_t SP; + uintptr_t IP; + + uint64_t D[16 - 8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved + // (V0-V7 and V16-V31 are not preserved according to the ABI spec). + + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return SP; } + uintptr_t GetFp() const { return FP; } + uintptr_t GetLr() const { return LR; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { SP = sp; } + +#elif defined(TARGET_LOONGARCH64) + uintptr_t FP; + uintptr_t RA; + + uintptr_t R4; + uintptr_t R5; + uintptr_t R23; + uintptr_t R24; + uintptr_t R25; + uintptr_t R26; + uintptr_t R27; + uintptr_t R28; + uintptr_t R29; + uintptr_t R30; + uintptr_t R31; + + uintptr_t SP; + uintptr_t IP; + + uint64_t F[32 - 24]; // Only the F registers F24..F31 need to be preserved + // (F0-F23 are not preserved according to the ABI spec). + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return SP; } + uintptr_t GetFp() const { return FP; } + uintptr_t GetRa() const { return RA; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { SP = sp; } + +#elif defined(TARGET_RISCV64) + + uintptr_t FP; + uintptr_t RA; + + uintptr_t A0; + uintptr_t A1; + uintptr_t S1; + uintptr_t S2; + uintptr_t S3; + uintptr_t S4; + uintptr_t S5; + uintptr_t S6; + uintptr_t S7; + uintptr_t S8; + uintptr_t S9; + uintptr_t S10; + uintptr_t S11; + + uintptr_t SP; + uintptr_t IP; + + uint64_t F[12]; + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return SP; } + uintptr_t GetFp() const { return FP; } + uintptr_t GetRa() const { return RA; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { SP = sp; } + +#elif defined(UNIX_AMD64_ABI) + // Param regs: rdi, rsi, rdx, rcx, r8, r9, scratch: rax, rdx (both return val), preserved: rbp, rbx, r12-r15 + uintptr_t IP; + uintptr_t Rsp; + uintptr_t Rbp; + uintptr_t Rax; + uintptr_t Rbx; + uintptr_t Rdx; + uintptr_t R12; + uintptr_t R13; + uintptr_t R14; + uintptr_t R15; + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return Rsp; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { Rsp = sp; } + uintptr_t GetFp() const { return Rbp; } +#elif defined(TARGET_X86) || defined(TARGET_AMD64) + uintptr_t IP; + uintptr_t Rsp; + uintptr_t Rbp; + uintptr_t Rdi; + uintptr_t Rsi; + uintptr_t Rax; + uintptr_t Rbx; +#ifdef TARGET_AMD64 + uintptr_t R12; + uintptr_t R13; + uintptr_t R14; + uintptr_t R15; +#if defined(TARGET_WINDOWS) + uintptr_t SSP; +#else + uintptr_t __explicit_padding__; +#endif // TARGET_WINDOWS + Fp128 Xmm6; + Fp128 Xmm7; + Fp128 Xmm8; + Fp128 Xmm9; + Fp128 Xmm10; + Fp128 Xmm11; + Fp128 Xmm12; + Fp128 Xmm13; + Fp128 Xmm14; + Fp128 Xmm15; +#endif // TARGET_AMD64 + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return Rsp; } + uintptr_t GetFp() const { return Rbp; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { Rsp = sp; } +#else // TARGET_ARM + uintptr_t IP; + + uintptr_t GetIp() const { PORTABILITY_ASSERT("GetIp"); return 0; } + uintptr_t GetSp() const { PORTABILITY_ASSERT("GetSp"); return 0; } + uintptr_t GetFp() const { PORTABILITY_ASSERT("GetFp"); return 0; } + void SetIp(uintptr_t ip) { PORTABILITY_ASSERT("SetIp"); } + void SetSp(uintptr_t sp) { PORTABILITY_ASSERT("GetSp"); } +#endif // TARGET_ARM +}; + +#endif // PAL_LIMITED_CONTEXT_INCLUDED diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h deleted file mode 100644 index 6b93fc11a323..000000000000 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ /dev/null @@ -1,345 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// -// Provides declarations for external resources consumed by Redhawk. This comprises functionality -// normally exported from Win32 libraries such as KERNEL32 and MSVCRT. When hosted on Win32 calls to these -// functions become simple pass throughs to the native implementation via export forwarding entries in a PAL -// (Platform Abstraction Layer) library. On other platforms the PAL library has actual code to emulate the -// functionality of these same APIs. -// -// In order to make it both obvious and intentional where Redhawk consumes an external API, such functions are -// decorated with an 'Pal' prefix. Ideally the associated supporting types, constants etc. would be -// similarly isolated from their concrete Win32 definitions, making the extent of platform dependence within -// the core explicit. For now that is too big a work item and we'll settle for manually restricting the use of -// external header files to within this header. -// - -#include -#include -#ifdef TARGET_UNIX -#include -#endif - -#include "CommonTypes.h" -#include "CommonMacros.h" -#include "gcenv.structs.h" // CRITICAL_SECTION -#include "PalRedhawkCommon.h" - -#ifndef PAL_REDHAWK_INCLUDED -#define PAL_REDHAWK_INCLUDED - -/* Adapted from intrin.h - For compatibility with , some intrinsics are __cdecl except on x64 */ -#if defined (_M_X64) -#define __PN__MACHINECALL_CDECL_OR_DEFAULT -#else -#define __PN__MACHINECALL_CDECL_OR_DEFAULT __cdecl -#endif - -#ifndef _MSC_VER - -// Note: Win32-hosted GCC predefines __stdcall and __cdecl, but Unix- -// hosted GCC does not. - -#ifdef __i386__ - -#if !defined(__cdecl) -#define __cdecl __attribute__((cdecl)) -#endif - -#else // !defined(__i386__) - -#define __cdecl - -#endif // !defined(__i386__) - -#endif // !_MSC_VER - -#ifdef TARGET_UNIX -#define DIRECTORY_SEPARATOR_CHAR '/' -#else // TARGET_UNIX -#define DIRECTORY_SEPARATOR_CHAR '\\' -#endif // TARGET_UNIX - -#ifndef _INC_WINDOWS - -// There are some fairly primitive type definitions below but don't pull them into the rest of Redhawk unless -// we have to (in which case these definitions will move to CommonTypes.h). -typedef WCHAR * LPWSTR; -typedef const WCHAR * LPCWSTR; -typedef char * LPSTR; -typedef const char * LPCSTR; -typedef void * HINSTANCE; - -typedef void * LPSECURITY_ATTRIBUTES; -typedef void * LPOVERLAPPED; - -#ifdef TARGET_UNIX -#define __stdcall -typedef char TCHAR; -#define _T(s) s -#else -typedef wchar_t TCHAR; -#define _T(s) L##s -#endif - -typedef union _LARGE_INTEGER { - struct { -#if BIGENDIAN - int32_t HighPart; - uint32_t LowPart; -#else - uint32_t LowPart; - int32_t HighPart; -#endif - } u; - int64_t QuadPart; -} LARGE_INTEGER, *PLARGE_INTEGER; - -#define DECLARE_HANDLE(_name) typedef HANDLE _name - -struct FILETIME -{ - uint32_t dwLowDateTime; - uint32_t dwHighDateTime; -}; - -typedef struct _CONTEXT CONTEXT, *PCONTEXT; - -#define EXCEPTION_MAXIMUM_PARAMETERS 15 // maximum number of exception parameters - -typedef struct _EXCEPTION_RECORD32 { - uint32_t ExceptionCode; - uint32_t ExceptionFlags; - uintptr_t ExceptionRecord; - uintptr_t ExceptionAddress; - uint32_t NumberParameters; - uintptr_t ExceptionInformation[EXCEPTION_MAXIMUM_PARAMETERS]; -} EXCEPTION_RECORD, *PEXCEPTION_RECORD; - -#define EXCEPTION_CONTINUE_EXECUTION (-1) -#define EXCEPTION_CONTINUE_SEARCH (0) -#define EXCEPTION_EXECUTE_HANDLER (1) - -typedef enum _EXCEPTION_DISPOSITION { - ExceptionContinueExecution, - ExceptionContinueSearch, - ExceptionNestedException, - ExceptionCollidedUnwind -} EXCEPTION_DISPOSITION; - -#define STATUS_BREAKPOINT ((uint32_t )0x80000003L) -#define STATUS_SINGLE_STEP ((uint32_t )0x80000004L) -#define STATUS_ACCESS_VIOLATION ((uint32_t )0xC0000005L) -#define STATUS_STACK_OVERFLOW ((uint32_t )0xC00000FDL) - -#endif // !_INC_WINDOWS - -#define STATUS_REDHAWK_NULL_REFERENCE ((uint32_t )0x00000000L) -#define STATUS_REDHAWK_UNMANAGED_HELPER_NULL_REFERENCE ((uint32_t )0x00000042L) - -#ifdef TARGET_UNIX -#define NULL_AREA_SIZE (4*1024) -#else -#define NULL_AREA_SIZE (64*1024) -#endif - - -#ifndef DACCESS_COMPILE -#ifndef _INC_WINDOWS - -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef FALSE -#define FALSE 0 -#endif - -#define INVALID_HANDLE_VALUE ((HANDLE)(intptr_t)-1) - -#define INFINITE 0xFFFFFFFF - -#define DUPLICATE_CLOSE_SOURCE 0x00000001 -#define DUPLICATE_SAME_ACCESS 0x00000002 - -#define PAGE_NOACCESS 0x01 -#define PAGE_READONLY 0x02 -#define PAGE_READWRITE 0x04 -#define PAGE_WRITECOPY 0x08 -#define PAGE_EXECUTE 0x10 -#define PAGE_EXECUTE_READ 0x20 -#define PAGE_EXECUTE_READWRITE 0x40 -#define PAGE_EXECUTE_WRITECOPY 0x80 -#define PAGE_GUARD 0x100 -#define PAGE_NOCACHE 0x200 -#define PAGE_WRITECOMBINE 0x400 - -#define WAIT_OBJECT_0 0 -#define WAIT_TIMEOUT 258 -#define WAIT_FAILED 0xFFFFFFFF - -#endif // !_INC_WINDOWS -#endif // !DACCESS_COMPILE - -extern uint32_t g_RhNumberOfProcessors; - -#ifdef TARGET_UNIX -#define REDHAWK_PALIMPORT extern "C" -#define REDHAWK_PALEXPORT extern "C" -#define REDHAWK_PALAPI -#else -#define REDHAWK_PALIMPORT EXTERN_C -#define REDHAWK_PALAPI __stdcall -#endif // TARGET_UNIX - -#ifndef DACCESS_COMPILE - -#ifdef _DEBUG -#define CaptureStackBackTrace RtlCaptureStackBackTrace -#endif - -#ifndef _INC_WINDOWS -// Include the list of external functions we wish to access. If we do our job 100% then it will be -// possible to link without any direct reference to any Win32 library. -#include "PalRedhawkFunctions.h" -#endif // !_INC_WINDOWS -#endif // !DACCESS_COMPILE - -// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful -// initialization and false on failure. -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalInit(); - -// Given the OS handle of a loaded module, compute the upper and lower virtual address bounds (inclusive). -REDHAWK_PALIMPORT void REDHAWK_PALAPI PalGetModuleBounds(HANDLE hOsHandle, _Out_ uint8_t ** ppLowerBound, _Out_ uint8_t ** ppUpperBound); - -struct NATIVE_CONTEXT; - -#if _WIN32 -REDHAWK_PALIMPORT NATIVE_CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextBuffer); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalGetCompleteThreadContext(HANDLE hThread, _Out_ NATIVE_CONTEXT * pCtx); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalSetThreadContext(HANDLE hThread, _Out_ NATIVE_CONTEXT * pCtx); -REDHAWK_PALIMPORT void REDHAWK_PALAPI PalRestoreContext(NATIVE_CONTEXT * pCtx); - -// For platforms that have segment registers in the CONTEXT_CONTROL set that -// are not saved in PAL_LIMITED_CONTEXT, this captures them from the current -// thread and saves them in `pContext`. -REDHAWK_PALIMPORT void REDHAWK_PALAPI PopulateControlSegmentRegisters(CONTEXT * pContext); -#endif - -REDHAWK_PALIMPORT int32_t REDHAWK_PALAPI PalGetProcessCpuCount(); - -// Retrieves the entire range of memory dedicated to the calling thread's stack. This does -// not get the current dynamic bounds of the stack, which can be significantly smaller than -// the maximum bounds. -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut); - -// Return value: number of characters in name string -REDHAWK_PALIMPORT int32_t PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase); - -#if _WIN32 - -// Various intrinsic declarations needed for the PalGetCurrentTEB implementation below. -#if defined(HOST_X86) -EXTERN_C unsigned long __readfsdword(unsigned long Offset); -#pragma intrinsic(__readfsdword) -#elif defined(HOST_AMD64) -EXTERN_C unsigned __int64 __readgsqword(unsigned long Offset); -#pragma intrinsic(__readgsqword) -#elif defined(HOST_ARM64) -EXTERN_C unsigned __int64 __getReg(int); -#pragma intrinsic(__getReg) -#else -#error Unsupported architecture -#endif - -// Retrieves the OS TEB for the current thread. -inline uint8_t * PalNtCurrentTeb() -{ -#if defined(HOST_X86) - return (uint8_t*)__readfsdword(0x18); -#elif defined(HOST_AMD64) - return (uint8_t*)__readgsqword(0x30); -#elif defined(HOST_ARM64) - return (uint8_t*)__getReg(18); -#else -#error Unsupported architecture -#endif -} - -// Offsets of ThreadLocalStoragePointer in the TEB. -#if defined(HOST_64BIT) -#define OFFSETOF__TEB__ThreadLocalStoragePointer 0x58 -#else -#define OFFSETOF__TEB__ThreadLocalStoragePointer 0x2c -#endif - -#endif // _WIN32 - -REDHAWK_PALIMPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(uintptr_t size, uint32_t protect); -REDHAWK_PALIMPORT void REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, uintptr_t size); -REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, uintptr_t size, uint32_t protect); -REDHAWK_PALIMPORT void PalFlushInstructionCache(_In_ void* pAddress, size_t size); -REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds); -REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread(); -REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled(); -REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName); -REDHAWK_PALIMPORT uint64_t REDHAWK_PALAPI PalGetTickCount64(); -REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer); - -#ifdef TARGET_UNIX -REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI PalGetOsPageSize(); -REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler); -#endif - -typedef uint32_t (__stdcall *BackgroundCallback)(_In_opt_ void* pCallbackContext); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalSetCurrentThreadName(const char* name); -#ifdef TARGET_WINDOWS -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalSetCurrentThreadNameW(const WCHAR* name); -#endif -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartEventPipeHelperThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); - -#ifdef FEATURE_HIJACK -class Thread; -REDHAWK_PALIMPORT void REDHAWK_PALAPI PalHijack(Thread* pThreadToHijack); -REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(_In_ HijackFunc* defaultHijackTarget); -#endif - -REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut); -REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(_In_ void *pBaseAddress, size_t templateSize); - -REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets( - void *virtualAddress, - int thunkSize, - int thunksPerBlock, - int thunkBlockSize, - int thunkBlocksPerMapping); - -REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t count, HANDLE* pHandles, UInt32_BOOL allowReentrantWait); - -REDHAWK_PALIMPORT HANDLE PalCreateLowMemoryResourceNotification(); - -REDHAWK_PALIMPORT void REDHAWK_PALAPI PalAttachThread(void* thread); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalDetachThread(void* thread); - -REDHAWK_PALIMPORT uint64_t PalGetCurrentOSThreadId(); - -REDHAWK_PALIMPORT uint64_t PalQueryPerformanceCounter(); -REDHAWK_PALIMPORT uint64_t PalQueryPerformanceFrequency(); - -REDHAWK_PALIMPORT void PalPrintFatalError(const char* message); - -REDHAWK_PALIMPORT char* PalCopyTCharAsChar(const TCHAR* toCopy); - -REDHAWK_PALIMPORT HANDLE PalLoadLibrary(const char* moduleName); - -REDHAWK_PALIMPORT void* PalGetProcAddress(HANDLE module, const char* functionName); - -#ifdef TARGET_UNIX -REDHAWK_PALIMPORT int32_t __cdecl _stricmp(const char *string1, const char *string2); -#endif // TARGET_UNIX - -#include "PalRedhawkInline.h" - -#endif // !PAL_REDHAWK_INCLUDED diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h deleted file mode 100644 index 0c117737818d..000000000000 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ /dev/null @@ -1,231 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// -// Provide common definitions between the Redhawk and the Redhawk PAL implementation. This header file is used -// (rather than PalRedhawk.h) since the PAL implementation is built in a different environment than Redhawk -// code. For instance both environments may provide a definition of various common macros such as NULL. -// -// This header contains only environment neutral definitions (i.e. using only base C++ types and compositions -// of those types) and can thus be included from either environment without issue. -// - -#ifndef __PAL_REDHAWK_COMMON_INCLUDED -#define __PAL_REDHAWK_COMMON_INCLUDED - -#include "rhassert.h" - -#ifndef DECLSPEC_ALIGN -#ifdef _MSC_VER -#define DECLSPEC_ALIGN(x) __declspec(align(x)) -#else -#define DECLSPEC_ALIGN(x) __attribute__((aligned(x))) -#endif -#endif // DECLSPEC_ALIGN - -#ifdef HOST_AMD64 -#define AMD64_ALIGN_16 DECLSPEC_ALIGN(16) -#else // HOST_AMD64 -#define AMD64_ALIGN_16 -#endif // HOST_AMD64 - -struct AMD64_ALIGN_16 Fp128 { - uint64_t Low; - int64_t High; -}; - - -struct PAL_LIMITED_CONTEXT -{ - // Includes special registers, callee saved registers and general purpose registers used to return values from functions (not floating point return registers) -#ifdef TARGET_ARM - uintptr_t R0; - uintptr_t R4; - uintptr_t R5; - uintptr_t R6; - uintptr_t R7; - uintptr_t R8; - uintptr_t R9; - uintptr_t R10; - uintptr_t R11; - - uintptr_t IP; - uintptr_t SP; - uintptr_t LR; - - uint64_t D[16-8]; // D8 .. D15 registers (D16 .. D31 are volatile according to the ABI spec) - - uintptr_t GetIp() const { return IP; } - uintptr_t GetSp() const { return SP; } - uintptr_t GetFp() const { return R7; } - uintptr_t GetLr() const { return LR; } - void SetIp(uintptr_t ip) { IP = ip; } - void SetSp(uintptr_t sp) { SP = sp; } - -#elif defined(TARGET_ARM64) - uintptr_t FP; - uintptr_t LR; - - uintptr_t X0; - uintptr_t X1; - uintptr_t X19; - uintptr_t X20; - uintptr_t X21; - uintptr_t X22; - uintptr_t X23; - uintptr_t X24; - uintptr_t X25; - uintptr_t X26; - uintptr_t X27; - uintptr_t X28; - - uintptr_t SP; - uintptr_t IP; - - uint64_t D[16 - 8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved - // (V0-V7 and V16-V31 are not preserved according to the ABI spec). - - - uintptr_t GetIp() const { return IP; } - uintptr_t GetSp() const { return SP; } - uintptr_t GetFp() const { return FP; } - uintptr_t GetLr() const { return LR; } - void SetIp(uintptr_t ip) { IP = ip; } - void SetSp(uintptr_t sp) { SP = sp; } - -#elif defined(TARGET_LOONGARCH64) - uintptr_t FP; - uintptr_t RA; - - uintptr_t R4; - uintptr_t R5; - uintptr_t R23; - uintptr_t R24; - uintptr_t R25; - uintptr_t R26; - uintptr_t R27; - uintptr_t R28; - uintptr_t R29; - uintptr_t R30; - uintptr_t R31; - - uintptr_t SP; - uintptr_t IP; - - uint64_t F[32 - 24]; // Only the F registers F24..F31 need to be preserved - // (F0-F23 are not preserved according to the ABI spec). - - - uintptr_t GetIp() const { return IP; } - uintptr_t GetSp() const { return SP; } - uintptr_t GetFp() const { return FP; } - uintptr_t GetRa() const { return RA; } - void SetIp(uintptr_t ip) { IP = ip; } - void SetSp(uintptr_t sp) { SP = sp; } - -#elif defined(TARGET_RISCV64) - - uintptr_t FP; - uintptr_t RA; - - uintptr_t A0; - uintptr_t A1; - uintptr_t S1; - uintptr_t S2; - uintptr_t S3; - uintptr_t S4; - uintptr_t S5; - uintptr_t S6; - uintptr_t S7; - uintptr_t S8; - uintptr_t S9; - uintptr_t S10; - uintptr_t S11; - - uintptr_t SP; - uintptr_t IP; - - uint64_t F[12]; - - uintptr_t GetIp() const { return IP; } - uintptr_t GetSp() const { return SP; } - uintptr_t GetFp() const { return FP; } - uintptr_t GetRa() const { return RA; } - void SetIp(uintptr_t ip) { IP = ip; } - void SetSp(uintptr_t sp) { SP = sp; } - -#elif defined(UNIX_AMD64_ABI) - // Param regs: rdi, rsi, rdx, rcx, r8, r9, scratch: rax, rdx (both return val), preserved: rbp, rbx, r12-r15 - uintptr_t IP; - uintptr_t Rsp; - uintptr_t Rbp; - uintptr_t Rax; - uintptr_t Rbx; - uintptr_t Rdx; - uintptr_t R12; - uintptr_t R13; - uintptr_t R14; - uintptr_t R15; - - uintptr_t GetIp() const { return IP; } - uintptr_t GetSp() const { return Rsp; } - void SetIp(uintptr_t ip) { IP = ip; } - void SetSp(uintptr_t sp) { Rsp = sp; } - uintptr_t GetFp() const { return Rbp; } -#elif defined(TARGET_X86) || defined(TARGET_AMD64) - uintptr_t IP; - uintptr_t Rsp; - uintptr_t Rbp; - uintptr_t Rdi; - uintptr_t Rsi; - uintptr_t Rax; - uintptr_t Rbx; -#ifdef TARGET_AMD64 - uintptr_t R12; - uintptr_t R13; - uintptr_t R14; - uintptr_t R15; -#if defined(TARGET_WINDOWS) - uintptr_t SSP; -#else - uintptr_t __explicit_padding__; -#endif // TARGET_WINDOWS - Fp128 Xmm6; - Fp128 Xmm7; - Fp128 Xmm8; - Fp128 Xmm9; - Fp128 Xmm10; - Fp128 Xmm11; - Fp128 Xmm12; - Fp128 Xmm13; - Fp128 Xmm14; - Fp128 Xmm15; -#endif // TARGET_AMD64 - - uintptr_t GetIp() const { return IP; } - uintptr_t GetSp() const { return Rsp; } - uintptr_t GetFp() const { return Rbp; } - void SetIp(uintptr_t ip) { IP = ip; } - void SetSp(uintptr_t sp) { Rsp = sp; } -#else // TARGET_ARM - uintptr_t IP; - - uintptr_t GetIp() const { PORTABILITY_ASSERT("GetIp"); return 0; } - uintptr_t GetSp() const { PORTABILITY_ASSERT("GetSp"); return 0; } - uintptr_t GetFp() const { PORTABILITY_ASSERT("GetFp"); return 0; } - void SetIp(uintptr_t ip) { PORTABILITY_ASSERT("SetIp"); } - void SetSp(uintptr_t sp) { PORTABILITY_ASSERT("GetSp"); } -#endif // TARGET_ARM -}; - -void RuntimeThreadShutdown(void* thread); - -typedef void (*ThreadExitCallback)(); - -extern ThreadExitCallback g_threadExitCallback; - -#ifdef TARGET_UNIX -typedef int32_t (*PHARDWARE_EXCEPTION_HANDLER)(uintptr_t faultCode, uintptr_t faultAddress, PAL_LIMITED_CONTEXT* palContext, uintptr_t* arg0Reg, uintptr_t* arg1Reg); -#endif - -#endif // __PAL_REDHAWK_COMMON_INCLUDED diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkFunctions.h b/src/coreclr/nativeaot/Runtime/PalRedhawkFunctions.h deleted file mode 100644 index 05d59f10d817..000000000000 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkFunctions.h +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -extern "C" uint16_t __stdcall CaptureStackBackTrace(uint32_t, uint32_t, void*, uint32_t*); -inline uint16_t PalCaptureStackBackTrace(uint32_t arg1, uint32_t arg2, void* arg3, uint32_t* arg4) -{ - return CaptureStackBackTrace(arg1, arg2, arg3, arg4); -} - -extern "C" UInt32_BOOL __stdcall CloseHandle(HANDLE); -inline UInt32_BOOL PalCloseHandle(HANDLE arg1) -{ - return CloseHandle(arg1); -} - -extern "C" void __stdcall DeleteCriticalSection(CRITICAL_SECTION *); -inline void PalDeleteCriticalSection(CRITICAL_SECTION * arg1) -{ - DeleteCriticalSection(arg1); -} - -extern "C" void __stdcall EnterCriticalSection(CRITICAL_SECTION *); -inline void PalEnterCriticalSection(CRITICAL_SECTION * arg1) -{ - EnterCriticalSection(arg1); -} - -extern "C" void __stdcall FlushProcessWriteBuffers(); -inline void PalFlushProcessWriteBuffers() -{ - FlushProcessWriteBuffers(); -} - -extern "C" uint32_t __stdcall GetCurrentProcessId(); -inline uint32_t PalGetCurrentProcessId() -{ - return GetCurrentProcessId(); -} - -#ifdef UNICODE -_Success_(return != 0 && return < nSize) -extern "C" uint32_t __stdcall GetEnvironmentVariableW(_In_opt_ LPCWSTR lpName, _Out_writes_to_opt_(nSize, return + 1) LPWSTR lpBuffer, _In_ uint32_t nSize); -inline uint32_t PalGetEnvironmentVariable(_In_opt_ LPCWSTR lpName, _Out_writes_to_opt_(nSize, return + 1) LPWSTR lpBuffer, _In_ uint32_t nSize) -{ - return GetEnvironmentVariableW(lpName, lpBuffer, nSize); -} -#else -_Success_(return != 0 && return < nSize) -extern "C" uint32_t __stdcall GetEnvironmentVariableA(_In_opt_ LPCSTR lpName, _Out_writes_to_opt_(nSize, return + 1) LPSTR lpBuffer, _In_ uint32_t nSize); -inline uint32_t PalGetEnvironmentVariable(_In_opt_ LPCSTR lpName, _Out_writes_to_opt_(nSize, return + 1) LPSTR lpBuffer, _In_ uint32_t nSize) -{ - return GetEnvironmentVariableA(lpName, lpBuffer, nSize); -} -#endif - -extern "C" UInt32_BOOL __stdcall InitializeCriticalSectionEx(CRITICAL_SECTION *, uint32_t, uint32_t); -inline UInt32_BOOL PalInitializeCriticalSectionEx(CRITICAL_SECTION * arg1, uint32_t arg2, uint32_t arg3) -{ - return InitializeCriticalSectionEx(arg1, arg2, arg3); -} - -extern "C" void __stdcall LeaveCriticalSection(CRITICAL_SECTION *); -inline void PalLeaveCriticalSection(CRITICAL_SECTION * arg1) -{ - LeaveCriticalSection(arg1); -} - -extern "C" UInt32_BOOL __stdcall ResetEvent(HANDLE); -inline UInt32_BOOL PalResetEvent(HANDLE arg1) -{ - return ResetEvent(arg1); -} - -extern "C" UInt32_BOOL __stdcall SetEvent(HANDLE); -inline UInt32_BOOL PalSetEvent(HANDLE arg1) -{ - return SetEvent(arg1); -} - -extern "C" uint32_t __stdcall WaitForSingleObjectEx(HANDLE, uint32_t, UInt32_BOOL); -inline uint32_t PalWaitForSingleObjectEx(HANDLE arg1, uint32_t arg2, UInt32_BOOL arg3) -{ - return WaitForSingleObjectEx(arg1, arg2, arg3); -} - -#ifdef PAL_REDHAWK_INCLUDED -extern "C" void __stdcall GetSystemTimeAsFileTime(FILETIME *); -inline void PalGetSystemTimeAsFileTime(FILETIME * arg1) -{ - GetSystemTimeAsFileTime(arg1); -} - -extern "C" void __stdcall RaiseFailFastException(PEXCEPTION_RECORD, PCONTEXT, uint32_t); -inline void PalRaiseFailFastException(PEXCEPTION_RECORD arg1, PCONTEXT arg2, uint32_t arg3) -{ - RaiseFailFastException(arg1, arg2, arg3); -} -#endif diff --git a/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt index 523d1fe230d0..345a8ba1d005 100644 --- a/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt @@ -16,27 +16,32 @@ endif() # Get the current list of definitions get_compile_definitions(DEFINITIONS) -set(ASM_OFFSETS_CSPP ${RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) +set(ASM_OFFSETS_CSPP ${NATIVEAOT_RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) if(WIN32) set(COMPILER_LANGUAGE "") set(PREPROCESSOR_FLAGS -EP -nologo) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/windows/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/windows/AsmOffsets.cpp) set_target_properties(aotminipal PROPERTIES COMPILE_PDB_NAME "aotminipal" COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/$") else() set(COMPILER_LANGUAGE -x c++) +<<<<<<< HEAD set(PREPROCESSOR_FLAGS -E -P -C) # include license for code cop set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/unix/AsmOffsets.cpp) +======= + set(PREPROCESSOR_FLAGS -E -P) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/unix/AsmOffsets.cpp) +>>>>>>> upstream-jun endif() add_custom_command( # The AsmOffsetsPortable.cs is consumed later by the managed build TARGET PortableRuntime COMMAND ${CMAKE_CXX_COMPILER} ${COMPILER_LANGUAGE} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CSPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsetsPortable.cs" - DEPENDS "${RUNTIME_DIR}/AsmOffsets.cpp" "${RUNTIME_DIR}/AsmOffsets.h" + DEPENDS "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.cpp" "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.h" ) install_static_library(PortableRuntime aotsdk nativeaot) diff --git a/src/coreclr/nativeaot/Runtime/RedhawkWarnings.h b/src/coreclr/nativeaot/Runtime/RedhawkWarnings.h deleted file mode 100644 index e3cc1118b5d8..000000000000 --- a/src/coreclr/nativeaot/Runtime/RedhawkWarnings.h +++ /dev/null @@ -1,8 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// -// Disable some commonly ignored warnings -// - -MSVC_DISABLE_WARNING(4200) // nonstandard extension used : zero-sized array in struct/union diff --git a/src/coreclr/nativeaot/Runtime/RestrictedCallouts.cpp b/src/coreclr/nativeaot/Runtime/RestrictedCallouts.cpp index 56e673217967..782d7f5f9dc8 100644 --- a/src/coreclr/nativeaot/Runtime/RestrictedCallouts.cpp +++ b/src/coreclr/nativeaot/Runtime/RestrictedCallouts.cpp @@ -10,8 +10,8 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" #include "holder.h" @@ -22,7 +22,6 @@ #include "MethodTable.h" #include "ObjectLayout.h" #include "event.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" diff --git a/src/coreclr/nativeaot/Runtime/RhConfig.cpp b/src/coreclr/nativeaot/Runtime/RhConfig.cpp index 506cf750672b..23d405822b51 100644 --- a/src/coreclr/nativeaot/Runtime/RhConfig.cpp +++ b/src/coreclr/nativeaot/Runtime/RhConfig.cpp @@ -4,8 +4,8 @@ #ifndef DACCESS_COMPILE #include "CommonTypes.h" #include "CommonMacros.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "holder.h" #include "RhConfig.h" diff --git a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp index 294cae6eaa31..de70f1966fdb 100644 --- a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp +++ b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp @@ -1,11 +1,12 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + #include "common.h" #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" #include "holder.h" @@ -21,7 +22,6 @@ #include "shash.h" #include "TypeManager.h" #include "MethodTable.h" -#include "varint.h" #include "CommonMacros.inl" #include "slist.inl" @@ -218,7 +218,7 @@ void RuntimeInstance::RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID m_cbManagedCodeRange = cbRange; } -extern "C" void __stdcall RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, uint32_t cbRange) +extern "C" void RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, uint32_t cbRange) { GetRuntimeInstance()->RegisterCodeManager(pCodeManager, pvStartRange, cbRange); } @@ -258,7 +258,7 @@ bool RuntimeInstance::IsUnboxingStub(uint8_t* pCode) return false; } -extern "C" bool __stdcall RegisterUnboxingStubs(PTR_VOID pvStartRange, uint32_t cbRange) +extern "C" bool RegisterUnboxingStubs(PTR_VOID pvStartRange, uint32_t cbRange) { return GetRuntimeInstance()->RegisterUnboxingStubs(pvStartRange, cbRange); } diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 762cde5c9bfb..a10beb5cf67d 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -2,19 +2,14 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "common.h" -#ifdef HOST_WINDOWS -#include -#endif #include "gcenv.h" #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" -#include "RedhawkWarnings.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" @@ -496,7 +491,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, RA); // - // preserved vfp regs + // preserved fp regs // for (int32_t i = 0; i < 16 - 8; i++) { @@ -530,7 +525,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO // preserved floating-point registers // int32_t preservedFpIndices[] = {8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}; - for (int i = 0; i < sizeof(preservedFpIndices) / sizeof(preservedFpIndices[0]); i++) + for (int i = 0; i < ARRAY_SIZE(preservedFpIndices); i++) { m_RegDisplay.F[preservedFpIndices[i]] = pCtx->F[preservedFpIndices[i]]; } @@ -810,6 +805,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pS9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S9); m_RegDisplay.pS10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S10); m_RegDisplay.pS11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S11); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_REG(pCtx, Fp); + m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_REG(pCtx, Ra); // // scratch regs @@ -1283,13 +1280,15 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_RegDisplay.pR29 = SP++; m_RegDisplay.pR30 = SP++; m_RegDisplay.pR31 = SP++; + SP++; // for alignment padding #elif defined(TARGET_RISCV64) PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); - for (int i = 0; i < 32; i++) + int32_t preservedFpIndices[] = {8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}; + for (int i = 0; i < ARRAY_SIZE(preservedFpIndices); i++) { - m_RegDisplay.F[i] = *f++; + m_RegDisplay.F[preservedFpIndices[i]] = *f++; } SP = (PTR_uintptr_t)f; @@ -1476,12 +1475,12 @@ struct UniversalTransitionStackFrame // Conservative GC reporting must be applied to everything between the base of the // ReturnBlock and the top of the StackPassedArgs. private: - uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (fp) - uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (ra) - Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) - uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) - uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) - uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) + uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0A0 (0x08 bytes) (fp) + uintptr_t m_pushedRA; // ChildSP+008 CallerSP-098 (0x08 bytes) (ra) + uint64_t m_fpArgRegs[8]; // ChildSP+010 CallerSP-090 (0x40 bytes) (fa0-fa7) + uintptr_t m_returnBlock[2]; // ChildSP+050 CallerSP-050 (0x10 bytes) + uintptr_t m_intArgRegs[8]; // ChildSP+060 CallerSP-040 (0x40 bytes) (a0-a7) + uintptr_t m_stackPassedArgs[1]; // ChildSP+0A0 CallerSP+000 (unknown size) public: PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } @@ -1498,12 +1497,12 @@ struct UniversalTransitionStackFrame // Conservative GC reporting must be applied to everything between the base of the // ReturnBlock and the top of the StackPassedArgs. private: - uintptr_t m_pushedRA; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (ra) - uintptr_t m_pushedFP; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (fp) - Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) - uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) - uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) - uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) + uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0A0 (0x08 bytes) (fp) + uintptr_t m_pushedRA; // ChildSP+008 CallerSP-098 (0x08 bytes) (ra) + uint64_t m_fpArgRegs[8]; // ChildSP+010 CallerSP-090 (0x40 bytes) (fa0-fa7) + uintptr_t m_returnBlock[2]; // ChildSP+050 CallerSP-050 (0x10 bytes) + uintptr_t m_intArgRegs[8]; // ChildSP+060 CallerSP-040 (0x40 bytes) (a0-a7) + uintptr_t m_stackPassedArgs[1]; // ChildSP+0A0 CallerSP+000 (unknown size) public: PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h index 9c1739c3951f..90a722634e21 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h @@ -6,7 +6,7 @@ #include "CommonMacros.h" #include "ICodeManager.h" -#include "PalRedhawk.h" // NATIVE_CONTEXT +#include "Pal.h" // NATIVE_CONTEXT #include "regdisplay.h" #include "forward_declarations.h" @@ -55,6 +55,7 @@ class StackFrameIterator bool GetHijackedReturnValueLocation(PTR_OBJECTREF * pLocation, GCRefKind * pKind); #endif void SetControlPC(PTR_VOID controlPC); + PTR_VOID GetControlPC() { return m_ControlPC; } static bool IsValidReturnAddress(PTR_VOID pvAddress); diff --git a/src/coreclr/nativeaot/Runtime/SyncClean.cpp b/src/coreclr/nativeaot/Runtime/SyncClean.cpp index 8204193f50f1..57ca29fce9c7 100644 --- a/src/coreclr/nativeaot/Runtime/SyncClean.cpp +++ b/src/coreclr/nativeaot/Runtime/SyncClean.cpp @@ -4,8 +4,8 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" #include "holder.h" @@ -23,6 +23,6 @@ void SyncClean::CleanUp () { #ifdef FEATURE_CACHED_INTERFACE_DISPATCH // Update any interface dispatch caches that were unsafe to modify outside of this GC. - ReclaimUnusedInterfaceDispatchCaches(); + InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); #endif } diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index 9f7211a2ee76..9e087f6e9c18 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -5,10 +5,10 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" +#include "PalLimitedContext.h" #include "CommonMacros.inl" #include "volatile.h" -#include "PalRedhawk.h" +#include "Pal.h" #include "rhassert.h" @@ -25,7 +25,7 @@ #elif TARGET_LOONGARCH64 #define THUNK_SIZE 16 #elif TARGET_RISCV64 -#define THUNK_SIZE 12 +#define THUNK_SIZE 20 #else #define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0 #endif @@ -100,13 +100,15 @@ FCIMPL0(int, RhpGetThunkBlockSize) } FCIMPLEND -EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() +EXTERN_C HRESULT QCALLTYPE RhAllocateThunksMapping(void** ppThunksSection) { #ifdef WIN32 void * pNewMapping = PalVirtualAlloc(THUNKS_MAP_SIZE * 2, PAGE_READWRITE); if (pNewMapping == NULL) - return NULL; + { + return E_OUTOFMEMORY; + } void * pThunksSection = pNewMapping; void * pDataSection = (uint8_t*)pNewMapping + THUNKS_MAP_SIZE; @@ -120,7 +122,9 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() // changed anymore. void * pNewMapping = PalVirtualAlloc(THUNKS_MAP_SIZE * 2, PAGE_EXECUTE_READ); if (pNewMapping == NULL) - return NULL; + { + return E_OUTOFMEMORY; + } void * pThunksSection = pNewMapping; void * pDataSection = (uint8_t*)pNewMapping + THUNKS_MAP_SIZE; @@ -129,7 +133,7 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() !PalVirtualProtect(pThunksSection, THUNKS_MAP_SIZE, PAGE_EXECUTE_READWRITE)) { PalVirtualFree(pNewMapping, THUNKS_MAP_SIZE * 2); - return NULL; + return E_FAIL; } #if defined(HOST_APPLE) && defined(HOST_ARM64) @@ -244,10 +248,14 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() //jirl $r0, $t8, 0 int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress); + ASSERT((-0x200000 <= delta) && (delta < 0x200000)); + *((uint32_t*)pCurrentThunkAddress) = 0x18000013 | (((delta & 0x3FFFFC) >> 2) << 5); pCurrentThunkAddress += 4; delta += OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2) - 4; + ASSERT((-0x200000 <= delta) && (delta < 0x200000)); + *((uint32_t*)pCurrentThunkAddress) = 0x18000014 | (((delta & 0x3FFFFC) >> 2) << 5); pCurrentThunkAddress += 4; @@ -259,21 +267,27 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() #elif defined(TARGET_RISCV64) - // auipc t0, %hi(delta) // Load upper immediate with address high bits - // ld t1, %lo(delta)(t0) // Load data from address in (t0 + lower immediate) - // jr t1 // Jump and don't link register + //auipc t1, hi() + //addi t1, t1, lo() + //auipc t0, hi() + //ld t0, (t0) + //jalr zero, t0, 0 int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress); - uint32_t deltaHi = (delta + 0x800) & 0xfffff000; - uint32_t deltaLo = delta << (32 - 12); - - *((uint32_t*)pCurrentThunkAddress) = 0x00000297 | deltaHi; // auipc + *((uint32_t*)pCurrentThunkAddress) = 0x00000317 | ((((delta + 0x800) & 0xFFFFF000) >> 12) << 12); // auipc t1, delta[31:12] + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x00030313 | ((delta & 0xFFF) << 20); // addi t1, t1, delta[11:0] + pCurrentThunkAddress += 4; + + delta += OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2) - 8; + *((uint32_t*)pCurrentThunkAddress) = 0x00000297 | ((((delta + 0x800) & 0xFFFFF000) >> 12) << 12); // auipc t0, delta[31:12] pCurrentThunkAddress += 4; - *((uint32_t*)pCurrentThunkAddress) = 0x0002B303 | deltaLo; // addi + *((uint32_t*)pCurrentThunkAddress) = 0x0002b283 | ((delta & 0xFFF) << 20); // ld t0, (delta[11:0])(t0) pCurrentThunkAddress += 4; - *((uint32_t*)pCurrentThunkAddress) = 0x00030067; // jr + *((uint32_t*)pCurrentThunkAddress) = 0x00008282; // jalr zero, t0, 0 pCurrentThunkAddress += 4; #else @@ -296,13 +310,14 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() if (!PalVirtualProtect(pThunksSection, THUNKS_MAP_SIZE, PAGE_EXECUTE_READ)) { PalVirtualFree(pNewMapping, THUNKS_MAP_SIZE * 2); - return NULL; + return E_FAIL; } #endif PalFlushInstructionCache(pThunksSection, THUNKS_MAP_SIZE); - return pThunksSection; + *ppThunksSection = pThunksSection; + return S_OK; } // FEATURE_RX_THUNKS @@ -317,7 +332,7 @@ FCDECL0(int, RhpGetThunkBlockSize); FCDECL1(void*, RhpGetThunkDataBlockAddress, void* addr); FCDECL1(void*, RhpGetThunkStubsBlockAddress, void* addr); -EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() +EXTERN_C HRESULT QCALLTYPE RhAllocateThunksMapping(void** ppThunksSection) { static int nextThunkDataMapping = 0; @@ -332,7 +347,7 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() if (nextThunkDataMapping == thunkDataMappingCount) { - return NULL; + return E_FAIL; } if (g_pThunkStubData == NULL) @@ -343,7 +358,7 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() if (g_pThunkStubData == NULL) { - return NULL; + return E_OUTOFMEMORY; } } @@ -351,7 +366,7 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() if (VirtualAlloc(pThunkDataBlock, thunkDataMappingSize, MEM_COMMIT, PAGE_READWRITE) == NULL) { - return NULL; + return E_OUTOFMEMORY; } nextThunkDataMapping++; @@ -359,7 +374,8 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() void* pThunks = RhpGetThunkStubsBlockAddress(pThunkDataBlock); ASSERT(RhpGetThunkDataBlockAddress(pThunks) == pThunkDataBlock); - return pThunks; + *ppThunksSection = pThunks; + return S_OK; } #else // FEATURE_FIXED_POOL_THUNKS @@ -370,7 +386,7 @@ FCDECL0(int, RhpGetNumThunksPerBlock); FCDECL0(int, RhpGetThunkSize); FCDECL0(int, RhpGetThunkBlockSize); -EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() +EXTERN_C HRESULT QCALLTYPE RhAllocateThunksMapping(void** ppThunksSection) { static void* pThunksTemplateAddress = NULL; @@ -399,7 +415,7 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() int templateRva = (int)((uint8_t*)RhpGetThunksBase() - pModuleBase); if (!PalAllocateThunksFromTemplate((HANDLE)pModuleBase, templateRva, templateSize, &pThunkMap)) - return NULL; + return E_OUTOFMEMORY; } if (!PalMarkThunksAsValidCallTargets( @@ -412,10 +428,11 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() if (pThunkMap != pThunksTemplateAddress) PalFreeThunksFromTemplate(pThunkMap, templateSize); - return NULL; + return E_FAIL; } - return pThunkMap; + *ppThunksSection = pThunkMap; + return S_OK; } #endif // FEATURE_RX_THUNKS diff --git a/src/coreclr/nativeaot/Runtime/TypeManager.cpp b/src/coreclr/nativeaot/Runtime/TypeManager.cpp index 96dc357136d9..edba0b2ed207 100644 --- a/src/coreclr/nativeaot/Runtime/TypeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/TypeManager.cpp @@ -4,13 +4,12 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "holder.h" #include "rhassert.h" #include "slist.h" #include "shash.h" -#include "varint.h" #include "rhbinder.h" #include "regdisplay.h" #include "StackFrameIterator.h" diff --git a/src/coreclr/nativeaot/Runtime/UniversalTransitionHelpers.cpp b/src/coreclr/nativeaot/Runtime/UniversalTransitionHelpers.cpp index 649aac21ac8d..aabf45392313 100644 --- a/src/coreclr/nativeaot/Runtime/UniversalTransitionHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/UniversalTransitionHelpers.cpp @@ -1,10 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + #include "common.h" #include "CommonTypes.h" #include "CommonMacros.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #ifdef _DEBUG #define TRASH_SAVED_ARGUMENT_REGISTERS diff --git a/src/coreclr/nativeaot/Runtime/allocheap.cpp b/src/coreclr/nativeaot/Runtime/allocheap.cpp index b50ded0af56c..dd8425cc0ef5 100644 --- a/src/coreclr/nativeaot/Runtime/allocheap.cpp +++ b/src/coreclr/nativeaot/Runtime/allocheap.cpp @@ -5,8 +5,8 @@ #include "CommonMacros.h" #include "daccess.h" #include "DebugMacrosExt.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" #include "holder.h" diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S deleted file mode 100644 index 8923a7a4fbb6..000000000000 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S +++ /dev/null @@ -1,312 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.intel_syntax noprefix -#include // generated by the build from AsmOffsets.cpp -#include - -// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -// allocation context then automatically fallback to the slow allocation path. -// RDI == MethodTable -NESTED_ENTRY RhpNewFast, _TEXT, NoHandler - push_nonvol_reg rbx - mov rbx, rdi - - // rax = GetThread() - INLINE_GETTHREAD - - // - // rbx contains MethodTable pointer - // - mov edx, [rbx + OFFSETOF__MethodTable__m_uBaseSize] - - // - // rax: Thread pointer - // rbx: MethodTable pointer - // rdx: base size - // - - mov rsi, [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - add rdx, rsi - cmp rdx, [rax + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja LOCAL_LABEL(RhpNewFast_RarePath) - - // set the new alloc pointer - mov [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rdx - - mov rax, rsi - - // set the new object's MethodTable pointer - mov [rsi], rbx - - .cfi_remember_state - pop_nonvol_reg rbx - ret - - .cfi_restore_state - .cfi_def_cfa_offset 16 // workaround cfi_restore_state bug -LOCAL_LABEL(RhpNewFast_RarePath): - mov rdi, rbx // restore MethodTable - xor esi, esi - pop_nonvol_reg rbx - jmp C_FUNC(RhpNewObject) - -NESTED_END RhpNewFast, _TEXT - - - -// Allocate non-array object with finalizer -// RDI == MethodTable -LEAF_ENTRY RhpNewFinalizable, _TEXT - mov esi, GC_ALLOC_FINALIZE - jmp C_FUNC(RhpNewObject) -LEAF_END RhpNewFinalizable, _TEXT - - - -// Allocate non-array object -// RDI == MethodTable -// ESI == alloc flags -NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - - PUSH_COOP_PINVOKE_FRAME rcx - END_PROLOGUE - - // RCX: transition frame - - // Preserve the MethodTable in RBX - mov rbx, rdi - - xor edx, edx // numElements - - // Call the rest of the allocation helper. - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call C_FUNC(RhpGcAlloc) - - test rax, rax - jz LOCAL_LABEL(NewOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - ret - - .cfi_restore_state - .cfi_def_cfa_offset 96 // workaround cfi_restore_state bug -LOCAL_LABEL(NewOutOfMemory): - // This is the OOM failure path. We're going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mov rdi, rbx // MethodTable pointer - xor esi, esi // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - - jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) -NESTED_END RhpNewObject, _TEXT - - -// Allocate a string. -// RDI == MethodTable -// ESI == character/element count -NESTED_ENTRY RhNewString, _TEXT, NoHandler - // we want to limit the element count to the non-negative 32-bit int range - cmp rsi, MAX_STRING_LENGTH - ja LOCAL_LABEL(StringSizeOverflow) - - push_nonvol_reg rbx - push_nonvol_reg r12 - push_register rcx // padding - - mov rbx, rdi // save MethodTable - mov r12, rsi // save element count - - // rax = GetThread() - INLINE_GETTHREAD - - mov rcx, rax // rcx = Thread* - - // Compute overall allocation size (align(base size + (element size * elements), 8)). - lea rax, [r12 * STRING_COMPONENT_SIZE + STRING_BASE_SIZE + 7] - and rax, -8 - - // rax == string size - // rbx == MethodTable - // rcx == Thread* - // r12 == element count - - mov rdx, rax - add rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc LOCAL_LABEL(RhNewString_RarePath) - - // rax == new alloc ptr - // rbx == MethodTable - // rcx == Thread* - // rdx == string size - // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja LOCAL_LABEL(RhNewString_RarePath) - - mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax - - // calc the new object pointer - sub rax, rdx - - mov [rax + OFFSETOF__Object__m_pEEType], rbx - mov [rax + OFFSETOF__String__m_Length], r12d - - .cfi_remember_state - pop_register rcx // padding - pop_nonvol_reg r12 - pop_nonvol_reg rbx - ret - - .cfi_restore_state - .cfi_def_cfa_offset 32 // workaround cfi_restore_state bug -LOCAL_LABEL(RhNewString_RarePath): - mov rdi, rbx // restore MethodTable - mov rsi, r12 // restore element count - // passing string size in rdx - - pop_register rcx // padding - pop_nonvol_reg r12 - pop_nonvol_reg rbx - jmp C_FUNC(RhpNewArrayRare) - -LOCAL_LABEL(StringSizeOverflow): - // We get here if the size of the final string object can't be represented as an unsigned - // 32-bit value. We're going to tail-call to a managed helper that will throw - // an OOM exception that the caller of this allocator understands. - - // rdi holds MethodTable pointer already - xor esi, esi // Indicate that we should throw OOM. - jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) - -NESTED_END RhNewString, _TEXT - - -// Allocate one dimensional, zero based array (SZARRAY). -// RDI == MethodTable -// ESI == element count -NESTED_ENTRY RhpNewArray, _TEXT, NoHandler - // we want to limit the element count to the non-negative 32-bit int range - cmp rsi, 0x07fffffff - ja LOCAL_LABEL(ArraySizeOverflow) - - push_nonvol_reg rbx - push_nonvol_reg r12 - push_register rcx // padding - - mov rbx, rdi // save MethodTable - mov r12, rsi // save element count - - // rax = GetThread() - INLINE_GETTHREAD - - mov rcx, rax // rcx = Thread* - - // Compute overall allocation size (align(base size + (element size * elements), 8)). - movzx eax, word ptr [rbx + OFFSETOF__MethodTable__m_usComponentSize] - mul r12 - mov edx, [rbx + OFFSETOF__MethodTable__m_uBaseSize] - add rax, rdx - add rax, 7 - and rax, -8 - - // rax == array size - // rbx == MethodTable - // rcx == Thread* - // r12 == element count - - mov rdx, rax - add rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc LOCAL_LABEL(RhpNewArray_RarePath) - - // rax == new alloc ptr - // rbx == MethodTable - // rcx == Thread* - // rdx == array size - // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja LOCAL_LABEL(RhpNewArray_RarePath) - - mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax - - // calc the new object pointer - sub rax, rdx - - mov [rax + OFFSETOF__Object__m_pEEType], rbx - mov [rax + OFFSETOF__Array__m_Length], r12d - - .cfi_remember_state - pop_register rcx // padding - pop_nonvol_reg r12 - pop_nonvol_reg rbx - ret - - .cfi_restore_state - .cfi_def_cfa_offset 32 // workaround cfi_restore_state bug -LOCAL_LABEL(RhpNewArray_RarePath): - mov rdi, rbx // restore MethodTable - mov rsi, r12 // restore element count - // passing array size in rdx - - pop_register rcx // padding - pop_nonvol_reg r12 - pop_nonvol_reg rbx - jmp C_FUNC(RhpNewArrayRare) - -LOCAL_LABEL(ArraySizeOverflow): - // We get here if the size of the final array object can't be represented as an unsigned - // 32-bit value. We're going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. - - // rdi holds MethodTable pointer already - mov esi, 1 // Indicate that we should throw OverflowException - jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) - -NESTED_END RhpNewArray, _TEXT - -NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // rdi == MethodTable - // rsi == element count - - PUSH_COOP_PINVOKE_FRAME rcx - END_PROLOGUE - - // rcx: transition frame - - // Preserve the MethodTable in RBX - mov rbx, rdi - - mov rdx, rsi // numElements - - // passing MethodTable in rdi - xor rsi, rsi // uFlags - // passing pTransitionFrame in rcx - - // Call the rest of the allocation helper. - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call C_FUNC(RhpGcAlloc) - - test rax, rax - jz LOCAL_LABEL(ArrayOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - ret - - .cfi_restore_state - .cfi_def_cfa_offset 96 // workaround cfi_restore_state bug -LOCAL_LABEL(ArrayOutOfMemory): - // This is the OOM failure path. We're going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mov rdi, rbx // MethodTable pointer - xor esi, esi // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - - jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) - -NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm deleted file mode 100644 index 6ba69c0c1412..000000000000 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm +++ /dev/null @@ -1,247 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -include asmmacros.inc - - -;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -;; allocation context then automatically fallback to the slow allocation path. -;; RCX == MethodTable -LEAF_ENTRY RhpNewFast, _TEXT - - ;; rdx = GetThread(), TRASHES rax - INLINE_GETTHREAD rdx, rax - - ;; - ;; rcx contains MethodTable pointer - ;; - mov r8d, [rcx + OFFSETOF__MethodTable__m_uBaseSize] - - ;; - ;; eax: base size - ;; rcx: MethodTable pointer - ;; rdx: Thread pointer - ;; - - mov rax, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - add r8, rax - cmp r8, [rdx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja RhpNewFast_RarePath - - ;; set the new alloc pointer - mov [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], r8 - - ;; set the new object's MethodTable pointer - mov [rax], rcx - ret - -RhpNewFast_RarePath: - xor edx, edx - jmp RhpNewObject - -LEAF_END RhpNewFast, _TEXT - - - -;; Allocate non-array object with finalizer -;; RCX == MethodTable -LEAF_ENTRY RhpNewFinalizable, _TEXT - mov edx, GC_ALLOC_FINALIZE - jmp RhpNewObject -LEAF_END RhpNewFinalizable, _TEXT - - - -;; Allocate non-array object -;; RCX == MethodTable -;; EDX == alloc flags -NESTED_ENTRY RhpNewObject, _TEXT - - PUSH_COOP_PINVOKE_FRAME r9 - END_PROLOGUE - - ; R9: transition frame - - ;; Preserve the MethodTable in RSI - mov rsi, rcx - - xor r8d, r8d ; numElements - - ;; Call the rest of the allocation helper. - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test rax, rax - jz NewOutOfMemory - - POP_COOP_PINVOKE_FRAME - ret - -NewOutOfMemory: - ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw - ;; an out of memory exception that the caller of this allocator understands. - - mov rcx, rsi ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - - jmp RhExceptionHandling_FailedAllocation -NESTED_END RhpNewObject, _TEXT - - -;; Allocate a string. -;; RCX == MethodTable -;; EDX == character/element count -LEAF_ENTRY RhNewString, _TEXT - - ; we want to limit the element count to the non-negative 32-bit int range - cmp rdx, MAX_STRING_LENGTH - ja StringSizeOverflow - - ; Compute overall allocation size (align(base size + (element size * elements), 8)). - lea rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)] - and rax, -8 - - ; rax == string size - ; rcx == MethodTable - ; rdx == element count - - INLINE_GETTHREAD r10, r8 - - mov r8, rax - add rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc RhpNewArrayRare - - ; rax == new alloc ptr - ; rcx == MethodTable - ; rdx == element count - ; r8 == array size - ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja RhpNewArrayRare - - mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax - - ; calc the new object pointer - sub rax, r8 - - mov [rax + OFFSETOF__Object__m_pEEType], rcx - mov [rax + OFFSETOF__String__m_Length], edx - - ret - -StringSizeOverflow: - ; We get here if the size of the final string object can't be represented as an unsigned - ; 32-bit value. We're going to tail-call to a managed helper that will throw - ; an OOM exception that the caller of this allocator understands. - - ; rcx holds MethodTable pointer already - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation -LEAF_END RhNewString, _TEXT - - -;; Allocate one dimensional, zero based array (SZARRAY). -;; RCX == MethodTable -;; EDX == element count -LEAF_ENTRY RhpNewArray, _TEXT - - ; we want to limit the element count to the non-negative 32-bit int range - cmp rdx, 07fffffffh - ja ArraySizeOverflow - - ; save element count - mov r8, rdx - - ; Compute overall allocation size (align(base size + (element size * elements), 8)). - movzx eax, word ptr [rcx + OFFSETOF__MethodTable__m_usComponentSize] - mul rdx - mov edx, [rcx + OFFSETOF__MethodTable__m_uBaseSize] - add rax, rdx - add rax, 7 - and rax, -8 - - mov rdx, r8 - - ; rax == array size - ; rcx == MethodTable - ; rdx == element count - - INLINE_GETTHREAD r10, r8 - - mov r8, rax - add rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc RhpNewArrayRare - - ; rax == new alloc ptr - ; rcx == MethodTable - ; rdx == element count - ; r8 == array size - ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja RhpNewArrayRare - - mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax - - ; calc the new object pointer - sub rax, r8 - - mov [rax + OFFSETOF__Object__m_pEEType], rcx - mov [rax + OFFSETOF__Array__m_Length], edx - - ret - -ArraySizeOverflow: - ; We get here if the size of the final array object can't be represented as an unsigned - ; 32-bit value. We're going to tail-call to a managed helper that will throw - ; an overflow exception that the caller of this allocator understands. - - ; rcx holds MethodTable pointer already - mov edx, 1 ; Indicate that we should throw OverflowException - jmp RhExceptionHandling_FailedAllocation -LEAF_END RhpNewArray, _TEXT - -NESTED_ENTRY RhpNewArrayRare, _TEXT - - ; rcx == MethodTable - ; rdx == element count - - PUSH_COOP_PINVOKE_FRAME r9 - END_PROLOGUE - - ; r9: transition frame - - ; Preserve the MethodTable in RSI - mov rsi, rcx - - ; passing MethodTable in rcx - mov r8, rdx ; numElements - xor rdx, rdx ; uFlags - ; passing pTransitionFrame in r9 - - ; Call the rest of the allocation helper. - ; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test rax, rax - jz ArrayOutOfMemory - - POP_COOP_PINVOKE_FRAME - ret - -ArrayOutOfMemory: - ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw - ;; an out of memory exception that the caller of this allocator understands. - - mov rcx, rsi ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - - jmp RhExceptionHandling_FailedAllocation - -NESTED_END RhpNewArrayRare, _TEXT - - - END diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc index 4a3437f2f061..a81cfc9679a4 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc @@ -302,6 +302,8 @@ PUSH_COOP_PINVOKE_FRAME macro trashReg ;; allocate scratch space and any required alignment alloc_stack 28h + + END_PROLOGUE endm ;; @@ -322,6 +324,10 @@ POP_COOP_PINVOKE_FRAME macro pop r10 ; discard caller RSP endm +INLINE_GET_ALLOC_CONTEXT_BASE macro destReg, trashReg + INLINE_GET_TLS_VAR destReg, trashReg, tls_CurrentThread +endm + ; - TAILCALL_RAX: ("jmp rax") should be used for tailcalls, this emits an instruction ; sequence which is recognized by the unwinder as a valid epilogue terminator TAILJMP_RAX TEXTEQU @@ -329,17 +335,14 @@ TAILJMP_RAX TEXTEQU ;; ;; CONSTANTS -- INTEGER ;; -TSF_Attached equ 01h TSF_SuppressGcStress equ 08h TSF_DoNotTriggerGc equ 10h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit - - +OFFSETOF__ee_alloc_context__alloc_ptr equ OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__ee_alloc_context equ OFFSETOF__Thread__m_eeAllocContext ;; GC type flags GC_ALLOC_FINALIZE equ 1 @@ -364,8 +367,8 @@ TrapThreadsFlags_None equ 0 TrapThreadsFlags_AbortInProgress equ 1 TrapThreadsFlags_TrapThreads equ 2 -;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -STATUS_REDHAWK_THREAD_ABORT equ 43h +;; This must match HwExceptionCode.STATUS_NATIVEAOT_THREAD_ABORT +STATUS_NATIVEAOT_THREAD_ABORT equ 43h ;; ;; CONSTANTS -- SYMBOLS diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h new file mode 100644 index 000000000000..eb9905ffca63 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h @@ -0,0 +1,7 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include // generated by the build from AsmOffsets.cpp +#include diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc new file mode 100644 index 000000000000..956d4d22e383 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc @@ -0,0 +1,6 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +; This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +include AsmMacros.inc \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S new file mode 100644 index 000000000000..378abf0f8ca9 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S @@ -0,0 +1,19 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp +#include + +// trick to avoid PLT relocation at runtime which corrupts registers +#define REL_C_FUNC(name) C_FUNC(name)@gotpcrel + +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // r11 contains indirection cell address already, so it will naturally be passed to RhpCidResolve + // the universal transition thunk as an argument to RhpCidResolve + mov r10, [rip + REL_C_FUNC(RhpCidResolve)] + jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)] + +LEAF_END RhpInterfaceDispatchSlow, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm new file mode 100644 index 000000000000..b2f58cb4f31e --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm @@ -0,0 +1,21 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransition_DebugStepTailCall : PROC + +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransition_DebugStepTailCall : PROC + +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + ;; r11 contains indirection cell address + lea r10, RhpCidResolve + jmp RhpUniversalTransition_DebugStepTailCall + +LEAF_END RhpInterfaceDispatchSlow, _TEXT + +end diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S index c5ce852e46fd..d2bce874ceca 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S @@ -313,25 +313,7 @@ NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] mov r15, [rax] -#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger - // trash the values at the old homes to make sure nobody uses them - mov rcx, 0xbaaddeed - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] - mov [rax], rcx -#endif - - mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame - mov rsi, [rsp + locArg0] // rsi <- exception object + mov rdi, [rsp + locArg0] // rsi <- exception object call qword ptr [rsp + locArg1] // call handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 @@ -453,43 +435,10 @@ NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] mov r15, [rax] -#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger - // trash the values at the old homes to make sure nobody uses them - mov rcx, 0xbaaddeed - mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR12] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR13] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR14] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] - mov [rax], rcx -#endif - - mov rdi, [rsi + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame call qword ptr [rsp + locArg0] // handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 - mov rsi, [rsp + locArg1] // rsi <- regdisplay - - mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbx] - mov [rax] , rbx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbp] - mov [rax] , rbp - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR12] - mov [rax] , r12 - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR13] - mov [rax] , r13 - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR14] - mov [rax] , r14 - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] - mov [rax] , r15 - mov rax, [rsp + locThread] // rax <- Thread* lock or dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc @@ -518,10 +467,7 @@ NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] mov rbp, [rax] - mov rax, rsi // rax <- handler funclet address - mov rsi, rdi // rsi <- exception object - mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame - call rax + call rsi ALTERNATE_ENTRY RhpCallFilterFunclet2 @@ -590,23 +536,6 @@ NESTED_ENTRY RhpCallPropagateExceptionCallback, _TEXT, NoHandler mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] mov r15, [rax] -#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger - // trash the values at the old homes to make sure nobody uses them - mov rcx, 0xbaaddeed - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] - mov [rax], rcx -#endif - #ifdef _DEBUG // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we // have to spill all the preserved registers and then refill them after the call. diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm index 741b916f00b9..928a3a9bce78 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm @@ -401,27 +401,6 @@ NESTED_ENTRY RhpCallCatchFunclet, _TEXT mov rax, [r8 + OFFSETOF__REGDISPLAY__pR15] mov r15, [rax] -if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the debugger - ;; trash the values at the old homes to make sure nobody uses them - mov r9, 0baaddeedh - mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pRsi] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pRdi] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pR12] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pR13] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pR14] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pR15] - mov [rax], r9 -endif - movdqa xmm6, [r8 + OFFSETOF__REGDISPLAY__Xmm + 0*10h] movdqa xmm7, [r8 + OFFSETOF__REGDISPLAY__Xmm + 1*10h] movdqa xmm8, [r8 + OFFSETOF__REGDISPLAY__Xmm + 2*10h] @@ -434,8 +413,7 @@ endif movdqa xmm14,[r8 + OFFSETOF__REGDISPLAY__Xmm + 8*10h] movdqa xmm15,[r8 + OFFSETOF__REGDISPLAY__Xmm + 9*10h] - mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame - mov rdx, [rsp + rsp_offsetof_arguments + 0h] ;; rdx <- exception object + mov rcx, [rsp + rsp_offsetof_arguments + 0h] ;; rcx <- exception object call qword ptr [rsp + rsp_offsetof_arguments + 8h] ;; call handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 @@ -530,7 +508,7 @@ endif je @f ;; It was the ThreadAbortException, so rethrow it - mov rcx, STATUS_REDHAWK_THREAD_ABORT + mov rcx, STATUS_NATIVEAOT_THREAD_ABORT mov rdx, rax ;; rdx <- continuation address as exception RIP lea rax, [RhpThrowHwEx] ;; Throw the ThreadAbortException as a special kind of hardware exception @@ -618,63 +596,10 @@ NESTED_ENTRY RhpCallFinallyFunclet, _TEXT movdqa xmm14,[rdx + OFFSETOF__REGDISPLAY__Xmm + 8*10h] movdqa xmm15,[rdx + OFFSETOF__REGDISPLAY__Xmm + 9*10h] -if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the debugger - ;; trash the values at the old homes to make sure nobody uses them - mov r9, 0baaddeedh - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRsi] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRdi] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] - mov [rax], r9 -endif - - mov rcx, [rdx + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame call qword ptr [rsp + rsp_offsetof_arguments + 0h] ;; handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 - mov rdx, [rsp + rsp_offsetof_arguments + 8h] ;; rdx <- regdisplay - - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] - mov [rax] , rbx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] - mov [rax] , rbp - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRsi] - mov [rax] , rsi - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRdi] - mov [rax] , rdi - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] - mov [rax] , r12 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] - mov [rax] , r13 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] - mov [rax] , r14 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] - mov [rax] , r15 - - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 0*10h], xmm6 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 1*10h], xmm7 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 2*10h], xmm8 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 3*10h], xmm9 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 4*10h], xmm10 - - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 5*10h], xmm11 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 6*10h], xmm12 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 7*10h], xmm13 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 8*10h], xmm14 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 9*10h], xmm15 - mov rax, [rsp + rsp_offsetof_thread] ;; rax <- Thread* lock or dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc @@ -702,10 +627,7 @@ NESTED_ENTRY RhpCallFilterFunclet, _TEXT mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] mov rbp, [rax] - mov rax, rdx ;; rax <- handler funclet address - mov rdx, rcx ;; rdx <- exception object - mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame - call rax + call rdx ALTERNATE_ENTRY RhpCallFilterFunclet2 diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S index ef4709055f01..0d6f48c60b97 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S @@ -131,7 +131,7 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler ret LOCAL_LABEL(Abort): POP_PROBE_FRAME - mov rcx, STATUS_REDHAWK_THREAD_ABORT + mov rcx, STATUS_NATIVEAOT_THREAD_ABORT pop rdx // return address as exception RIP jmp C_FUNC(RhpThrowHwEx) // Throw the ThreadAbortException as a special kind of hardware exception @@ -148,7 +148,6 @@ LEAF_END RhpGcPoll, _TEXT NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler PUSH_COOP_PINVOKE_FRAME rdi - END_PROLOGUE call C_FUNC(RhpGcPoll2) POP_COOP_PINVOKE_FRAME ret diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index ae06ae891886..67e139beb92e 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -50,7 +50,7 @@ endm ;; registers and return value to their values from before the probe was called (while also updating any ;; object refs or byrefs). ;; -POP_PROBE_FRAME macro +POP_PROBE_FRAME macro movdqa xmm0, [rsp + 20h] add rsp, 20h + 10h + 8 ; deallocate stack and discard saved m_RIP pop rbp @@ -124,7 +124,7 @@ NESTED_ENTRY RhpWaitForGC, _TEXT ret Abort: POP_PROBE_FRAME - mov rcx, STATUS_REDHAWK_THREAD_ABORT + mov rcx, STATUS_NATIVEAOT_THREAD_ABORT pop rdx ;; return address as exception RIP jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception @@ -140,7 +140,6 @@ LEAF_END RhpGcPoll, _TEXT NESTED_ENTRY RhpGcPollRare, _TEXT PUSH_COOP_PINVOKE_FRAME rcx - END_PROLOGUE call RhpGcPoll2 POP_COOP_PINVOKE_FRAME ret diff --git a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.S deleted file mode 100644 index 9e1239d1de06..000000000000 --- a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.S +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.intel_syntax noprefix -#include // generated by the build from AsmOffsets.cpp -#include - -// trick to avoid PLT relocation at runtime which corrupts registers -#define REL_C_FUNC(name) C_FUNC(name)@gotpcrel - - -// Macro that generates a stub consuming a cache with the given number of entries. -.macro DEFINE_INTERFACE_DISPATCH_STUB entries - -LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT - - // r10 currently contains the indirection cell address. - // load r11 to point to the cache block. - mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] - - // Load the MethodTable from the object instance in rdi. - ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries - mov rax, [rdi] - - CurrentOffset = OFFSETOF__InterfaceDispatchCache__m_rgEntries - - // For each entry in the cache, see if its MethodTable type matches the MethodTable in rax. - // If so, call the second cache entry. If not, skip the InterfaceDispatchCacheEntry. - .rept \entries - cmp rax, [r11 + CurrentOffset] - jne 0f - jmp [r11 + CurrentOffset + 8] - 0: - CurrentOffset = CurrentOffset + 16 - .endr - - // r10 still contains the indirection cell address. - - jmp C_FUNC(RhpInterfaceDispatchSlow) -LEAF_END RhpInterfaceDispatch\entries, _TEXT - -.endm // DEFINE_INTERFACE_DISPATCH_STUB - - - -// Define all the stub routines we currently need. -// -// The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed. -// If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo -// -// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the -// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens -// during the interface dispatch. -// -DEFINE_INTERFACE_DISPATCH_STUB 1 -DEFINE_INTERFACE_DISPATCH_STUB 2 -DEFINE_INTERFACE_DISPATCH_STUB 4 -DEFINE_INTERFACE_DISPATCH_STUB 8 -DEFINE_INTERFACE_DISPATCH_STUB 16 -DEFINE_INTERFACE_DISPATCH_STUB 32 -DEFINE_INTERFACE_DISPATCH_STUB 64 - -// Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // UNIXTODO: Implement this function - int 3 -LEAF_END RhpVTableOffsetDispatch, _TEXT - -// Initial dispatch on an interface when we don't have a cache yet. -LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT -ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch - // Trigger an AV if we're dispatching on a null this. - // The exception handling infrastructure is aware of the fact that this is the first - // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here - // to a NullReferenceException at the callsite. - cmp byte ptr [rdi], 0 - - // Just tail call to the cache miss helper. - jmp C_FUNC(RhpInterfaceDispatchSlow) - -LEAF_END RhpInitialInterfaceDispatch, _TEXT - -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution -LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // r10 contains indirection cell address, move to r11 where it will be passed by - // the universal transition thunk as an argument to RhpCidResolve - mov r11, r10 - mov r10, [rip + REL_C_FUNC(RhpCidResolve)] - jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)] - -LEAF_END RhpInterfaceDispatchSlow, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.asm b/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.asm deleted file mode 100644 index b93d948ad5d1..000000000000 --- a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.asm +++ /dev/null @@ -1,119 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -include AsmMacros.inc - - -ifdef FEATURE_CACHED_INTERFACE_DISPATCH - - -EXTERN RhpCidResolve : PROC -EXTERN RhpUniversalTransition_DebugStepTailCall : PROC - -;; Macro that generates code to check a single cache entry. -CHECK_CACHE_ENTRY macro entry -NextLabel textequ @CatStr( Attempt, %entry+1 ) - cmp rax, [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16)] - jne NextLabel - jmp qword ptr [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16) + 8] -NextLabel: -endm - - -;; Macro that generates a stub consuming a cache with the given number of entries. -DEFINE_INTERFACE_DISPATCH_STUB macro entries - -StubName textequ @CatStr( RhpInterfaceDispatch, entries ) -StubAVLocation textequ @CatStr( RhpInterfaceDispatchAVLocation, entries ) - -LEAF_ENTRY StubName, _TEXT - -;EXTERN CID_g_cInterfaceDispatches : DWORD - ;inc [CID_g_cInterfaceDispatches] - - ;; r10 currently contains the indirection cell address. - ;; load r11 to point to the cache block. - mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in rcx. - ALTERNATE_ENTRY StubAVLocation - mov rax, [rcx] - -CurrentEntry = 0 - while CurrentEntry lt entries - CHECK_CACHE_ENTRY %CurrentEntry -CurrentEntry = CurrentEntry + 1 - endm - - ;; r10 still contains the indirection cell address. - - jmp RhpInterfaceDispatchSlow - -LEAF_END StubName, _TEXT - - endm ;; DEFINE_INTERFACE_DISPATCH_STUB - - -;; Define all the stub routines we currently need. -;; -;; The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed. -;; If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo -;; -;; If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the -;; *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens -;; during the interface dispatch. -;; -DEFINE_INTERFACE_DISPATCH_STUB 1 -DEFINE_INTERFACE_DISPATCH_STUB 2 -DEFINE_INTERFACE_DISPATCH_STUB 4 -DEFINE_INTERFACE_DISPATCH_STUB 8 -DEFINE_INTERFACE_DISPATCH_STUB 16 -DEFINE_INTERFACE_DISPATCH_STUB 32 -DEFINE_INTERFACE_DISPATCH_STUB 64 - -;; Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - ;; r10 currently contains the indirection cell address. - ;; load rax to point to the vtable offset (which is stored in the m_pCache field). - mov rax, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset - ;; to get the address in the vtable of what we want to dereference - add rax, [rcx] - - ;; Load the target address of the vtable into rax - mov rax, [rax] - - TAILJMP_RAX -LEAF_END RhpVTableOffsetDispatch, _TEXT - - -;; Initial dispatch on an interface when we don't have a cache yet. -LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT -ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch - ;; Trigger an AV if we're dispatching on a null this. - ;; The exception handling infrastructure is aware of the fact that this is the first - ;; instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here - ;; to a NullReferenceException at the callsite. - cmp byte ptr [rcx], 0 - - ;; Just tail call to the cache miss helper. - jmp RhpInterfaceDispatchSlow - -LEAF_END RhpInitialInterfaceDispatch, _TEXT - -;; Cache miss case, call the runtime to resolve the target and update the cache. -;; Use universal transition helper to allow an exception to flow out of resolution -LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - ;; r10 contains indirection cell address, move to r11 where it will be passed by - ;; the universal transition thunk as an argument to RhpCidResolve - mov r11, r10 - lea r10, RhpCidResolve - jmp RhpUniversalTransition_DebugStepTailCall - -LEAF_END RhpInterfaceDispatchSlow, _TEXT - - -endif ;; FEATURE_CACHED_INTERFACE_DISPATCH - -end diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S deleted file mode 100644 index e55e682653b5..000000000000 --- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S +++ /dev/null @@ -1,328 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.intel_syntax noprefix -#include - -#ifdef WRITE_BARRIER_CHECK - -.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG - - // If g_GCShadow is 0, don't perform the check. - cmp qword ptr [C_VAR(g_GCShadow)], 0 - je LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG) - - // Save DESTREG since we're about to modify it (and we need the original value both within the macro and - // once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of - // the prolog inside a method without a frame. But given that this is only debug code and generally we - // shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier - // variants to set up frames. The compiler knows exactly which registers are trashed in the simple write - // barrier case, so we don't have any more scratch registers to play with (and doing so would only make - // things harder if at a later stage we want to allow multiple barrier versions based on the input - // registers). - push \DESTREG - - // Transform DESTREG into the equivalent address in the shadow heap. - sub \DESTREG, [C_VAR(g_lowest_address)] - jb LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG) - add \DESTREG, [C_VAR(g_GCShadow)] - cmp \DESTREG, [C_VAR(g_GCShadowEnd)] - jae LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG) - - // Update the shadow heap. - mov [\DESTREG], \REFREG - - // Now check that the real heap location still contains the value we just wrote into the shadow heap. This - // read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to - // recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock - // prefix). - xchg [rsp], \DESTREG - cmp [\DESTREG], \REFREG - jne LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG) - - // The original DESTREG value is now restored but the stack has a value (the shadow version of the - // location) pushed. Need to discard this push before we are done. - add rsp, 8 - jmp LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG) - -LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG): - // Someone went and updated the real heap. We need to invalidate the shadow location since we can't - // guarantee whose shadow update won. - - // Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an - // additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg - // variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 64-bit - // immediate and therefore must be moved into a register before it can be written to the shadow - // location. - xchg [rsp], \DESTREG - push \REFREG - movabs \REFREG, INVALIDGCVALUE - mov qword ptr [\DESTREG], \REFREG - pop \REFREG - -LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG): - // Restore original DESTREG value from the stack. - pop \DESTREG - -LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG): -.endm - -#else // WRITE_BARRIER_CHECK - -.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG -.endm - -#endif // WRITE_BARRIER_CHECK - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -// name of the register that points to the location to be updated and the name of the register that holds the -// object reference (this should be in upper case as it's used in the definition of the name of the helper). -.macro DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG - - // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - // we're in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW \BASENAME, \REFREG, rdi - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - mov r11, [C_VAR(g_write_watch_table)] - cmp r11, 0x0 - je LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG) - - mov r10, rdi - shr r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift - add r10, r11 - cmp byte ptr [r10], 0x0 - jne LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG) - mov byte ptr [r10], 0xFF -#endif - -LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG): - - // If the reference is to an object that's not in an ephemeral generation we have no need to track it - // (since the object won't be collected or moved by an ephemeral collection). - cmp \REFREG, [C_VAR(g_ephemeral_low)] - jb LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) - cmp \REFREG, [C_VAR(g_ephemeral_high)] - jae LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) - - // We have a location on the GC heap being updated with a reference to an ephemeral object so we must - // track this write. The location address is translated into an offset in the card table bitmap. We set - // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write - // the byte if it hasn't already been done since writes are expensive and impact scaling. - shr rdi, 0x0B - mov r10, [C_VAR(g_card_table)] - cmp byte ptr [rdi + r10], 0x0FF - je LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) - -// We get here if it's necessary to update the card table. - mov byte ptr [rdi + r10], 0xFF - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Shift rdi by 0x0A more to get the card bundle byte (we shifted by 0x0B already) - shr rdi, 0x0A - add rdi, [C_VAR(g_card_bundle_table)] - cmp byte ptr [rdi], 0xFF - je LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) - - mov byte ptr [rdi], 0xFF -#endif - -LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG): - ret - -.endm - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. One argument is taken, the -// name of the register that will hold the object reference (this should be in upper case as it's used in the -// definition of the name of the helper). -.macro DEFINE_UNCHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME - -// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard -// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that -// location is in one of the other general registers determined by the value of REFREG. - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction -// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address -LEAF_ENTRY RhpAssignRef\EXPORT_REG_NAME, _TEXT - - // Export the canonical write barrier under unqualified name as well - .ifc \REFREG, RSI - ALTERNATE_ENTRY RhpAssignRef - ALTERNATE_ENTRY RhpAssignRefAVLocation - .endif - - // Write the reference into the location. Note that we rely on the fact that no GC can occur between here - // and the card table update we may perform below. - mov qword ptr [rdi], \REFREG - - DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, \REFREG - -LEAF_END RhpAssignRef\EXPORT_REG_NAME, _TEXT -.endm - -// One day we might have write barriers for all the possible argument registers but for now we have -// just one write barrier that assumes the input register is RSI. -DEFINE_UNCHECKED_WRITE_BARRIER RSI, ESI - -// -// Define the helpers used to implement the write barrier required when writing an object reference into a -// location residing on the GC heap. Such write barriers allow the GC to optimize which objects in -// non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral -// collection. -// - -.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, REFREG - - // The location being updated might not even lie in the GC heap (a handle or stack location for instance), - // in which case no write barrier is required. - cmp rdi, [C_VAR(g_lowest_address)] - jb LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) - cmp rdi, [C_VAR(g_highest_address)] - jae LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) - - DEFINE_UNCHECKED_WRITE_BARRIER_CORE \BASENAME, \REFREG - -.endm - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. One argument is taken, the -// name of the register that will hold the object reference (this should be in upper case as it's used in the -// definition of the name of the helper). -.macro DEFINE_CHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME - -// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard -// decoration). The location to be updated is always in RDI. The object reference that will be assigned into -// that location is in one of the other general registers determined by the value of REFREG. - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction -// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address -LEAF_ENTRY RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT - - // Export the canonical write barrier under unqualified name as well - .ifc \REFREG, RSI - ALTERNATE_ENTRY RhpCheckedAssignRef - ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation - .endif - - // Write the reference into the location. Note that we rely on the fact that no GC can occur between here - // and the card table update we may perform below. - mov qword ptr [rdi], \REFREG - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \REFREG - -LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT -.endm - -// One day we might have write barriers for all the possible argument registers but for now we have -// just one write barrier that assumes the input register is RSI. -DEFINE_CHECKED_WRITE_BARRIER RSI, ESI - -LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT - mov rax, rdx - lock cmpxchg [rdi], rsi - jne LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_RSI) - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, RSI - -LEAF_END RhpCheckedLockCmpXchg, _TEXT - -LEAF_ENTRY RhpCheckedXchg, _TEXT - - // Setup rax with the new object for the exchange, that way it will automatically hold the correct result - // afterwards and we can leave rdx unaltered ready for the GC write barrier below. - mov rax, rsi - xchg [rdi], rax - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RSI - -LEAF_END RhpCheckedXchg, _TEXT - -// -// RhpByRefAssignRef simulates movs instruction for object references. -// -// On entry: -// rdi: address of ref-field (assigned to) -// rsi: address of the data (source) -// -// On exit: -// rdi, rsi are incremented by 8, -// rcx, rax: trashed -// -// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF -// if you add more trashed registers. -// -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1/2 -// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address -LEAF_ENTRY RhpByRefAssignRef, _TEXT -ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - mov rcx, [rsi] -ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2 - mov [rdi], rcx - - // Check whether the writes were even into the heap. If not there's no card update required. - cmp rdi, [C_VAR(g_lowest_address)] - jb LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired) - cmp rdi, [C_VAR(g_highest_address)] - jae LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired) - - // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - // we're in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW BASENAME, rcx, rdi - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - cmp qword ptr [C_VAR(g_write_watch_table)], 0x0 - je LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable) - - mov rax, rdi - shr rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift - add rax, [C_VAR(g_write_watch_table)] - cmp byte ptr [rax], 0x0 - jne LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable) - mov byte ptr [rax], 0xFF -#endif - -LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable): - - // If the reference is to an object that's not in an ephemeral generation we have no need to track it - // (since the object won't be collected or moved by an ephemeral collection). - cmp rcx, [C_VAR(g_ephemeral_low)] - jb LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired) - cmp rcx, [C_VAR(g_ephemeral_high)] - jae LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired) - - // move current rdi value into rcx, we need to keep rdi and eventually increment by 8 - mov rcx, rdi - - // We have a location on the GC heap being updated with a reference to an ephemeral object so we must - // track this write. The location address is translated into an offset in the card table bitmap. We set - // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write - // the byte if it hasn't already been done since writes are expensive and impact scaling. - shr rcx, 0x0B - mov rax, [C_VAR(g_card_table)] - cmp byte ptr [rcx + rax], 0x0FF - je LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired) - -// We get here if it's necessary to update the card table. - mov byte ptr [rcx + rax], 0xFF - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already) - shr rcx, 0x0A - add rcx, [C_VAR(g_card_bundle_table)] - cmp byte ptr [rcx], 0xFF - je LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired) - - mov byte ptr [rcx], 0xFF -#endif - -LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired): - // Increment the pointers before leaving - add rdi, 0x8 - add rsi, 0x8 - ret -LEAF_END RhpByRefAssignRef, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm deleted file mode 100644 index 302b9e0a8b1f..000000000000 --- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm +++ /dev/null @@ -1,346 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -include AsmMacros.inc - -;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used -;; during garbage collections to verify that object references where never written to the heap without using a -;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing -;; new references to the real heap. Since this can't be solved perfectly without critical sections around the -;; entire update process, we instead update the shadow location and then re-check the real location (as two -;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value -;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC -;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the -;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. -ifdef WRITE_BARRIER_CHECK - -g_GCShadow TEXTEQU -g_GCShadowEnd TEXTEQU -INVALIDGCVALUE EQU 0CCCCCCCDh - -EXTERN g_GCShadow : QWORD -EXTERN g_GCShadowEnd : QWORD - -UPDATE_GC_SHADOW macro BASENAME, REFREG, DESTREG - - ;; If g_GCShadow is 0, don't perform the check. - cmp g_GCShadow, 0 - je &BASENAME&_UpdateShadowHeap_Done_&REFREG& - - ;; Save DESTREG since we're about to modify it (and we need the original value both within the macro and - ;; once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of - ;; the prolog inside a method without a frame. But given that this is only debug code and generally we - ;; shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier - ;; variants to set up frames. Unlike RhpBulkWriteBarrier below which is treated as a helper call using the - ;; usual calling convention, the compiler knows exactly which registers are trashed in the simple write - ;; barrier case, so we don't have any more scratch registers to play with (and doing so would only make - ;; things harder if at a later stage we want to allow multiple barrier versions based on the input - ;; registers). - push DESTREG - - ;; Transform DESTREG into the equivalent address in the shadow heap. - sub DESTREG, g_lowest_address - jb &BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG& - add DESTREG, [g_GCShadow] - cmp DESTREG, [g_GCShadowEnd] - jae &BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG& - - ;; Update the shadow heap. - mov [DESTREG], REFREG - - ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. This - ;; read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to - ;; recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock - ;; prefix). - xchg [rsp], DESTREG - cmp [DESTREG], REFREG - jne &BASENAME&_UpdateShadowHeap_Invalidate_&REFREG& - - ;; The original DESTREG value is now restored but the stack has a value (the shadow version of the - ;; location) pushed. Need to discard this push before we are done. - add rsp, 8 - jmp &BASENAME&_UpdateShadowHeap_Done_&REFREG& - -&BASENAME&_UpdateShadowHeap_Invalidate_&REFREG&: - ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't - ;; guarantee whose shadow update won. - - ;; Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an - ;; additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg - ;; variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 64-bit - ;; immediate and therefore must be moved into a register before it can be written to the shadow - ;; location. - xchg [rsp], DESTREG - push REFREG - mov REFREG, INVALIDGCVALUE - mov qword ptr [DESTREG], REFREG - pop REFREG - -&BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG&: - ;; Restore original DESTREG value from the stack. - pop DESTREG - -&BASENAME&_UpdateShadowHeap_Done_&REFREG&: -endm - -else ; WRITE_BARRIER_CHECK - -UPDATE_GC_SHADOW macro BASENAME, REFREG, DESTREG -endm - -endif ; WRITE_BARRIER_CHECK - -;; There are several different helpers used depending on which register holds the object reference. Since all -;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -;; name of the register that points to the location to be updated and the name of the register that holds the -;; object reference (this should be in upper case as it's used in the definition of the name of the helper). -DEFINE_UNCHECKED_WRITE_BARRIER_CORE macro BASENAME, REFREG - - ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - ;; we're in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW BASENAME, REFREG, rcx - -ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - mov r11, [g_write_watch_table] - cmp r11, 0 - je &BASENAME&_CheckCardTable_&REFREG& - - mov r10, rcx - shr r10, 0Ch ;; SoftwareWriteWatch::AddressToTableByteIndexShift - add r10, r11 - cmp byte ptr [r10], 0 - jne &BASENAME&_CheckCardTable_&REFREG& - mov byte ptr [r10], 0FFh -endif - -&BASENAME&_CheckCardTable_&REFREG&: - - ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it - ;; (since the object won't be collected or moved by an ephemeral collection). - cmp REFREG, [g_ephemeral_low] - jb &BASENAME&_NoBarrierRequired_&REFREG& - cmp REFREG, [g_ephemeral_high] - jae &BASENAME&_NoBarrierRequired_&REFREG& - - ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must - ;; track this write. The location address is translated into an offset in the card table bitmap. We set - ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write - ;; the byte if it hasn't already been done since writes are expensive and impact scaling. - shr rcx, 0Bh - mov r10, [g_card_table] - cmp byte ptr [rcx + r10], 0FFh - je &BASENAME&_NoBarrierRequired_&REFREG& - - ;; We get here if it's necessary to update the card table. - mov byte ptr [rcx + r10], 0FFh - -ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - ;; Shift rcx by 0Ah more to get the card bundle byte (we shifted by 0x0B already) - shr rcx, 0Ah - add rcx, [g_card_bundle_table] - cmp byte ptr [rcx], 0FFh - je &BASENAME&_NoBarrierRequired_&REFREG& - - mov byte ptr [rcx], 0FFh -endif - -&BASENAME&_NoBarrierRequired_&REFREG&: - ret - -endm - -;; There are several different helpers used depending on which register holds the object reference. Since all -;; the helpers have identical structure we use a macro to define this structure. One argument is taken, the -;; name of the register that will hold the object reference (this should be in upper case as it's used in the -;; definition of the name of the helper). -DEFINE_UNCHECKED_WRITE_BARRIER macro REFREG, EXPORT_REG_NAME - -;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard -;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that -;; location is in one of the other general registers determined by the value of REFREG. - -;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction -;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address -LEAF_ENTRY RhpAssignRef&EXPORT_REG_NAME&, _TEXT - - ;; Export the canonical write barrier under unqualified name as well - ifidni , - ALTERNATE_ENTRY RhpAssignRef - ALTERNATE_ENTRY RhpAssignRefAVLocation - endif - - ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here - ;; and the card table update we may perform below. - mov qword ptr [rcx], REFREG - - DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, REFREG - -LEAF_END RhpAssignRef&EXPORT_REG_NAME&, _TEXT -endm - -;; One day we might have write barriers for all the possible argument registers but for now we have -;; just one write barrier that assumes the input register is RDX. -DEFINE_UNCHECKED_WRITE_BARRIER RDX, EDX - -;; -;; Define the helpers used to implement the write barrier required when writing an object reference into a -;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in -;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral -;; collection. -;; - -DEFINE_CHECKED_WRITE_BARRIER_CORE macro BASENAME, REFREG - - ;; The location being updated might not even lie in the GC heap (a handle or stack location for instance), - ;; in which case no write barrier is required. - cmp rcx, [g_lowest_address] - jb &BASENAME&_NoBarrierRequired_&REFREG& - cmp rcx, [g_highest_address] - jae &BASENAME&_NoBarrierRequired_&REFREG& - - DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG - -endm - -;; There are several different helpers used depending on which register holds the object reference. Since all -;; the helpers have identical structure we use a macro to define this structure. One argument is taken, the -;; name of the register that will hold the object reference (this should be in upper case as it's used in the -;; definition of the name of the helper). -DEFINE_CHECKED_WRITE_BARRIER macro REFREG, EXPORT_REG_NAME - -;; Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard -;; decoration). The location to be updated is always in RCX. The object reference that will be assigned into -;; that location is in one of the other general registers determined by the value of REFREG. - -;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction -;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address -LEAF_ENTRY RhpCheckedAssignRef&EXPORT_REG_NAME&, _TEXT - - ;; Export the canonical write barrier under unqualified name as well - ifidni , - ALTERNATE_ENTRY RhpCheckedAssignRef - ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation - endif - - ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here - ;; and the card table update we may perform below. - mov qword ptr [rcx], REFREG - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, REFREG - -LEAF_END RhpCheckedAssignRef&EXPORT_REG_NAME&, _TEXT -endm - -;; One day we might have write barriers for all the possible argument registers but for now we have -;; just one write barrier that assumes the input register is RDX. -DEFINE_CHECKED_WRITE_BARRIER RDX, EDX - -LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT - mov rax, r8 - lock cmpxchg [rcx], rdx - jne RhpCheckedLockCmpXchg_NoBarrierRequired_RDX - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, RDX - -LEAF_END RhpCheckedLockCmpXchg, _TEXT - -LEAF_ENTRY RhpCheckedXchg, _TEXT - - ;; Setup rax with the new object for the exchange, that way it will automatically hold the correct result - ;; afterwards and we can leave rdx unaltered ready for the GC write barrier below. - mov rax, rdx - xchg [rcx], rax - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RDX - -LEAF_END RhpCheckedXchg, _TEXT - -;; -;; RhpByRefAssignRef simulates movs instruction for object references. -;; -;; On entry: -;; rdi: address of ref-field (assigned to) -;; rsi: address of the data (source) -;; -;; On exit: -;; rdi, rsi are incremented by 8, -;; rcx, rax: trashed -;; -;; NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF -;; if you add more trashed registers. -;; -;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1/2 -;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address -LEAF_ENTRY RhpByRefAssignRef, _TEXT -ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - mov rcx, [rsi] -ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2 - mov [rdi], rcx - - ;; Check whether the writes were even into the heap. If not there's no card update required. - cmp rdi, [g_lowest_address] - jb RhpByRefAssignRef_NoBarrierRequired - cmp rdi, [g_highest_address] - jae RhpByRefAssignRef_NoBarrierRequired - - ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - ;; we're in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW BASENAME, rcx, rdi - -ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - cmp [g_write_watch_table], 0 - je RhpByRefAssignRef_CheckCardTable - - mov rax, rdi - shr rax, 0Ch ;; SoftwareWriteWatch::AddressToTableByteIndexShift - add rax, [g_write_watch_table] - cmp byte ptr [rax], 0 - jne RhpByRefAssignRef_CheckCardTable - mov byte ptr [rax], 0FFh -endif - -RhpByRefAssignRef_CheckCardTable: - - ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it - ;; (since the object won't be collected or moved by an ephemeral collection). - cmp rcx, [g_ephemeral_low] - jb RhpByRefAssignRef_NoBarrierRequired - cmp rcx, [g_ephemeral_high] - jae RhpByRefAssignRef_NoBarrierRequired - - ;; move current rdi value into rcx, we need to keep rdi and eventually increment by 8 - mov rcx, rdi - - ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must - ;; track this write. The location address is translated into an offset in the card table bitmap. We set - ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write - ;; the byte if it hasn't already been done since writes are expensive and impact scaling. - shr rcx, 0Bh - mov rax, [g_card_table] - cmp byte ptr [rcx + rax], 0FFh - je RhpByRefAssignRef_NoBarrierRequired - -;; We get here if it's necessary to update the card table. - mov byte ptr [rcx + rax], 0FFh - -ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - ;; Shift rcx by 0Ah more to get the card bundle byte (we shifted by 0Bh already) - shr rcx, 0Ah - add rcx, [g_card_bundle_table] - cmp byte ptr [rcx], 0FFh - je RhpByRefAssignRef_NoBarrierRequired - - mov byte ptr [rcx], 0FFh -endif - -RhpByRefAssignRef_NoBarrierRequired: - ;; Increment the pointers before leaving - add rdi, 8h - add rsi, 8h - ret -LEAF_END RhpByRefAssignRef, _TEXT - - end diff --git a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm/AllocFast.S deleted file mode 100644 index 760913036965..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S +++ /dev/null @@ -1,502 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.syntax unified -.thumb - -#include // generated by the build from AsmOffsets.cpp -#include - -// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -// allocation context then automatically fallback to the slow allocation path. -// r0 == MethodTable -LEAF_ENTRY RhpNewFast, _TEXT - PROLOG_PUSH "{r4,lr}" - mov r4, r0 // save MethodTable - - // r0 = GetThread() - INLINE_GETTHREAD - - // r4 contains MethodTable pointer - ldr r2, [r4, #OFFSETOF__MethodTable__m_uBaseSize] - - // r0: Thread pointer - // r4: MethodTable pointer - // r2: base size - - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - add r2, r3 - ldr r1, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r2, r1 - bhi LOCAL_LABEL(RhpNewFast_RarePath) - - // set the new alloc pointer - str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer - str r4, [r3, #OFFSETOF__Object__m_pEEType] - - mov r0, r3 - - EPILOG_POP "{r4,pc}" - -LOCAL_LABEL(RhpNewFast_RarePath): - mov r0, r4 // restore MethodTable - mov r1, #0 - EPILOG_POP "{r4,lr}" - b C_FUNC(RhpNewObject) - -LEAF_END RhpNewFast, _TEXT - -// Allocate non-array object with finalizer. -// r0 == MethodTable -// -LEAF_ENTRY RhpNewFinalizable, _TEXT - mov r1, #GC_ALLOC_FINALIZE - b C_FUNC(RhpNewObject) -LEAF_END RhpNewFinalizable, _TEXT - - -// Allocate non-array object. -// r0 == MethodTable -// r1 == alloc flags -NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - - PUSH_COOP_PINVOKE_FRAME r3 - - // r0: MethodTable - // r1: alloc flags - // r3: transition frame - - // Preserve the MethodTable in r5. - mov r5, r0 - - mov r2, #0 // numElements - - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - blx C_FUNC(RhpGcAlloc) - - cbz r0, LOCAL_LABEL(NewOutOfMemory) - - POP_COOP_PINVOKE_FRAME - bx lr - -LOCAL_LABEL(NewOutOfMemory): - // This is the OOM failure path. We're going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mov r0, r5 // MethodTable pointer - mov r1, #0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - - b C_FUNC(RhExceptionHandling_FailedAllocation) - -NESTED_END RhpNewObject, _TEXT - - -// Allocate a string. -// r0 == MethodTable -// r1 == element/character count -LEAF_ENTRY RhNewString, _TEXT - PROLOG_PUSH "{r4-r6,lr}" - // Make sure computing the overall allocation size won't overflow - MOV32 r12, MAX_STRING_LENGTH - cmp r1, r12 - bhi LOCAL_LABEL(StringSizeOverflow) - - // Compute overall allocation size (align(base size + (element size * elements), 4)). - mov r2, #(STRING_BASE_SIZE + 3) -#if STRING_COMPONENT_SIZE == 2 - add r2, r2, r1, lsl #1 // r2 += characters * 2 -#else - NotImplementedComponentSize -#endif - bic r2, r2, #3 - - mov r4, r0 // Save MethodTable - mov r5, r1 // Save element count - mov r6, r2 // Save string size - // r0 = GetThread() - INLINE_GETTHREAD - // r4 == MethodTable - // r5 == element count - // r6 == string size - // r0 == Thread* - - // Load potential new object address into r12. - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - adds r6, r12 - bcs LOCAL_LABEL(RhNewString_RarePath) // if we get a carry here, the string is too large to fit below 4 GB - - ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r6, r12 - bhi LOCAL_LABEL(RhNewString_RarePath) - - // Reload new object address into r12. - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Update the alloc pointer to account for the allocation. - str r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer and element count. - str r4, [r12, #OFFSETOF__Object__m_pEEType] - str r5, [r12, #OFFSETOF__String__m_Length] - - // Return the object allocated in r0. - mov r0, r12 - EPILOG_POP "{r4-r6,pc}" - -LOCAL_LABEL(StringSizeOverflow): - // We get here if the size of the final string object can't be represented as an unsigned - // 32-bit value. We're going to tail-call to a managed helper that will throw - // an OOM exception that the caller of this allocator understands. - - // MethodTable is in r0 already - mov r1, 0 // Indicate that we should throw OOM - EPILOG_POP "{r4-r6,lr}" - b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhNewString_RarePath): - mov r3, r0 - mov r0, r4 - mov r1, r5 - mov r2, r6 - // r0 == MethodTable - // r1 == element count - // r2 == string size + Thread::m_alloc_context::alloc_ptr - // r3 == Thread - EPILOG_POP "{r4-r6,lr}" - b C_FUNC(RhpNewArrayRare) - -LEAF_END RhNewString, _TEXT - - -// Allocate one dimensional, zero based array (SZARRAY). -// r0 == MethodTable -// r1 == element count -LEAF_ENTRY RhpNewArray, _TEXT - PROLOG_PUSH "{r4-r6,lr}" - - // Compute overall allocation size (align(base size + (element size * elements), 4)). - // if the element count is <= 0x10000, no overflow is possible because the component - // size is <= 0xffff (it's an unsigned 16-bit value) and thus the product is <= 0xffff0000 - // and the base size for the worst case (32 dimensional MdArray) is less than 0xffff. - ldrh r2, [r0, #OFFSETOF__MethodTable__m_usComponentSize] - cmp r1, #0x10000 - bhi LOCAL_LABEL(ArraySizeBig) - umull r2, r3, r2, r1 - ldr r3, [r0, #OFFSETOF__MethodTable__m_uBaseSize] - adds r2, r3 - adds r2, #3 -LOCAL_LABEL(ArrayAlignSize): - bic r2, r2, #3 - - mov r4, r0 // Save MethodTable - mov r5, r1 // Save element count - mov r6, r2 // Save array size - // r0 = GetThread() - INLINE_GETTHREAD - // r4 == MethodTable - // r5 == element count - // r6 == array size - // r0 == Thread* - - // Load potential new object address into r12. - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - adds r6, r12 - bcs LOCAL_LABEL(RhpNewArray_RarePath) // if we get a carry here, the array is too large to fit below 4 GB - - ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r6, r12 - bhi LOCAL_LABEL(RhpNewArray_RarePath) - - // Reload new object address into r12. - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Update the alloc pointer to account for the allocation. - str r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer and element count. - str r4, [r12, #OFFSETOF__Object__m_pEEType] - str r5, [r12, #OFFSETOF__Array__m_Length] - - // Return the object allocated in r0. - mov r0, r12 - EPILOG_POP "{r4-r6,pc}" - -LOCAL_LABEL(ArraySizeBig): - // if the element count is negative, it's an overflow error - cmp r1, #0 - blt LOCAL_LABEL(ArraySizeOverflow) - - // now we know the element count is in the signed int range [0..0x7fffffff] - // overflow in computing the total size of the array size gives an out of memory exception, - // NOT an overflow exception - // we already have the component size in r2 - umull r2, r3, r2, r1 - cbnz r3, LOCAL_LABEL(ArrayOutOfMemoryFinal) - ldr r3, [r0, #OFFSETOF__MethodTable__m_uBaseSize] - adds r2, r3 - bcs LOCAL_LABEL(ArrayOutOfMemoryFinal) - adds r2, #3 - bcs LOCAL_LABEL(ArrayOutOfMemoryFinal) - b LOCAL_LABEL(ArrayAlignSize) - -LOCAL_LABEL(ArrayOutOfMemoryFinal): - - // MethodTable is in r0 already - mov r1, #0 // Indicate that we should throw OOM. - EPILOG_POP "{r4-r6,lr}" - b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(ArraySizeOverflow): - // We get here if the size of the final array object can't be represented as an unsigned - // 32-bit value. We're going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. - - // MethodTable is in r0 already - mov r1, #1 // Indicate that we should throw OverflowException - EPILOG_POP "{r4-r6,lr}" - b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhpNewArray_RarePath): - mov r3, r0 - mov r0, r4 - mov r1, r5 - mov r2, r6 - // r0 == MethodTable - // r1 == element count - // r2 == array size + Thread::m_alloc_context::alloc_ptr - // r3 == Thread - EPILOG_POP "{r4-r6,lr}" - b C_FUNC(RhpNewArrayRare) - -LEAF_END RhpNewArray, _TEXT - - -// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. -// r0 == MethodTable -// r1 == element count -// r2 == array size + Thread::m_alloc_context::alloc_ptr -// r3 == Thread -NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from r2. - ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - sub r2, r12 - - PUSH_COOP_PINVOKE_FRAME r3 - - // Preserve the MethodTable in r5. - mov r5, r0 - - mov r2, r1 // numElements - mov r1, #0 // uFlags - - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - blx C_FUNC(RhpGcAlloc) - - // Test for failure (NULL return). - cbz r0, LOCAL_LABEL(ArrayOutOfMemory) - - POP_COOP_PINVOKE_FRAME - bx lr - -LOCAL_LABEL(ArrayOutOfMemory): - - mov r0, r5 // MethodTable - mov r1, #0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - - b C_FUNC(RhExceptionHandling_FailedAllocation) - -NESTED_END RhpNewArrayRare, _TEXT - -// Allocate simple object (not finalizable, array or value type) on an 8 byte boundary. -// r0 == MethodTable -LEAF_ENTRY RhpNewFastAlign8, _TEXT - PROLOG_PUSH "{r4,lr}" - - mov r4, r0 // save MethodTable - - // r0 = GetThread() - INLINE_GETTHREAD - - // Fetch object size into r2. - ldr r2, [r4, #OFFSETOF__MethodTable__m_uBaseSize] - - // r4: MethodTable pointer - // r0: Thread pointer - // r2: base size - - // Load potential new object address into r3. Cache this result in r12 as well for the common case - // where the allocation succeeds (r3 will be overwritten in the following bounds check). - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - mov r12, r3 - - // Check whether the current allocation context is already aligned for us. - tst r3, #0x7 - bne LOCAL_LABEL(Alloc8Failed) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r2, r3 - bhi LOCAL_LABEL(Alloc8Failed) - - // Update the alloc pointer to account for the allocation. - str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer. - str r4, [r12, #OFFSETOF__Object__m_pEEType] - - // Return the object allocated in r0. - mov r0, r12 - - EPILOG_POP "{r4,pc}" - -LOCAL_LABEL(Alloc8Failed): - // Fast allocation failed. Call slow helper with flags set to indicate an 8-byte alignment and no - // finalization. - mov r0, r4 // restore MethodTable - mov r1, #GC_ALLOC_ALIGN8 - EPILOG_POP "{r4,lr}" - b C_FUNC(RhpNewObject) - -LEAF_END RhpNewFastAlign8, _TEXT - -// Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary. -// r0 == MethodTable -LEAF_ENTRY RhpNewFinalizableAlign8, _TEXT - mov r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8) - b C_FUNC(RhpNewObject) -LEAF_END RhpNewFinalizableAlign8, _TEXT - -// Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload -// itself is 8 byte aligned). -// r0 == MethodTable -LEAF_ENTRY RhpNewFastMisalign, _TEXT - PROLOG_PUSH "{r4,lr}" - - mov r4, r0 // save MethodTable - - // r0 = GetThread() - INLINE_GETTHREAD - - // Fetch object size into r2. - ldr r2, [r4, #OFFSETOF__MethodTable__m_uBaseSize] - - // r4: MethodTable pointer - // r0: Thread pointer - // r2: base size - - // Load potential new object address into r3. Cache this result in r12 as well for the common case - // where the allocation succeeds (r3 will be overwritten in the following bounds check). - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - mov r12, r3 - - // Check whether the current allocation context is already aligned for us (for boxing that means the - // address % 8 == 4, so the value type payload following the MethodTable* is actually 8-byte aligned). - tst r3, #0x7 - beq LOCAL_LABEL(BoxAlloc8Failed) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r2, r3 - bhi LOCAL_LABEL(BoxAlloc8Failed) - - // Update the alloc pointer to account for the allocation. - str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer. - str r4, [r12, #OFFSETOF__Object__m_pEEType] - - // Return the object allocated in r0. - mov r0, r12 - - EPILOG_POP "{r4,pc}" - -LOCAL_LABEL(BoxAlloc8Failed): - // Fast allocation failed. Call slow helper with flags set to indicate an 8+4 byte alignment and no - // finalization. - mov r0, r4 // restore MethodTable - mov r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS) - EPILOG_POP "{r4,lr}" - b C_FUNC(RhpNewObject) - -LEAF_END RhpNewFastMisalign, _TEXT - -// Allocate an array on an 8 byte boundary. -// r0 == MethodTable -// r1 == element count -NESTED_ENTRY RhpNewArrayAlign8, _TEXT, NoHandler - - PUSH_COOP_PINVOKE_FRAME r3 - - // Compute overall allocation size (base size + align((element size * elements), 4)). - ldrh r2, [r0, #OFFSETOF__MethodTable__m_usComponentSize] - umull r2, r4, r2, r1 - cbnz r4, LOCAL_LABEL(Array8SizeOverflow) - adds r2, #3 - bcs LOCAL_LABEL(Array8SizeOverflow) - bic r2, r2, #3 - ldr r4, [r0, #OFFSETOF__MethodTable__m_uBaseSize] - adds r2, r4 - bcs LOCAL_LABEL(Array8SizeOverflow) - - // Preserve the MethodTable in r5. - mov r5, r0 - - mov r2, r1 // numElements - mov r1, #GC_ALLOC_ALIGN8 // uFlags - - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - blx C_FUNC(RhpGcAlloc) - - // Test for failure (NULL return). - cbz r0, LOCAL_LABEL(Array8OutOfMemory) - - POP_COOP_PINVOKE_FRAME - - bx lr - -LOCAL_LABEL(Array8SizeOverflow): - // We get here if the size of the final array object can't be represented as an unsigned - // 32-bit value. We're going to tail-call to a managed helper that will throw - // an OOM or overflow exception that the caller of this allocator understands. - - // if the element count is non-negative, it's an OOM error - cmp r1, #0 - bge LOCAL_LABEL(Array8OutOfMemory1) - - // r0 holds MethodTable pointer already - mov r1, #1 // Indicate that we should throw OverflowException - - POP_COOP_PINVOKE_FRAME - b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(Array8OutOfMemory): - // This is the OOM failure path. We're going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mov r0, r5 // MethodTable pointer - -LOCAL_LABEL(Array8OutOfMemory1): - - mov r1, #0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - b C_FUNC(RhExceptionHandling_FailedAllocation) - -NESTED_END RhpNewArrayAlign8, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/arm/AsmMacros_Shared.h new file mode 100644 index 000000000000..1a10c1d2c72f --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm/AsmMacros_Shared.h @@ -0,0 +1,7 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include "AsmOffsets.inc" +#include diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index 0092be687bce..ad6dbe8ecac6 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -85,7 +85,7 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler PUSH_PROBE_FRAME r2, r3, r12 - ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] bl RhpWaitForGC2 ldr r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] @@ -97,7 +97,7 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler LOCAL_LABEL(ThrowThreadAbort): POP_PROBE_FRAME - mov r0, #STATUS_REDHAWK_THREAD_ABORT + mov r0, #STATUS_NATIVEAOT_THREAD_ABORT mov r1, lr // return address as exception PC b C_FUNC(RhpThrowHwEx) NESTED_END RhpWaitForGC diff --git a/src/coreclr/nativeaot/Runtime/arm/StubDispatch.S b/src/coreclr/nativeaot/Runtime/arm/StubDispatch.S deleted file mode 100644 index 7c2f0bef20af..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm/StubDispatch.S +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.syntax unified -.thumb - -#include // generated by the build from AsmOffsets.cpp -#include - -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH - -// Macro that generates a stub consuming a cache with the given number of entries. -.macro DEFINE_INTERFACE_DISPATCH_STUB entries - -NESTED_ENTRY RhpInterfaceDispatch\entries, _TEXT, NoHandler - // r12 currently contains the indirection cell address. But we need more scratch registers and - // we may A/V on a null this. Store r1 and r2 in red zone. - str r1, [sp, #-8] - str r2, [sp, #-4] - - // r12 currently holds the indirection cell address. We need to get the cache structure instead. - ldr r2, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - // Load the MethodTable from the object instance in r0. - GLOBAL_LABEL RhpInterfaceDispatchAVLocation\entries - ldr r1, [r0] - - CurrentOffset = OFFSETOF__InterfaceDispatchCache__m_rgEntries - // For each entry in the cache, see if its MethodTable type matches the MethodTable in r1. - // If so, call the second cache entry. If not, skip the InterfaceDispatchCacheEntry. - // R1 : Instance MethodTable* - // R2: Cache data structure - // R12 : Trashed. On successful check, set to the target address to jump to. - .rept \entries - ldr r12, [r2, #CurrentOffset] - cmp r1, r12 - bne 0f - ldr r12, [r2, #(CurrentOffset + 4)] - b LOCAL_LABEL(99_\entries) - 0: - CurrentOffset = CurrentOffset + 8 - .endr - - // Point r12 to the indirection cell using the back pointer in the cache block - ldr r12, [r2, #OFFSETOF__InterfaceDispatchCache__m_pCell] - - ldr r1, [sp, #-8] - ldr r2, [sp, #-4] - b C_FUNC(RhpInterfaceDispatchSlow) - - // Common epilog for cache hits. Have to out of line it here due to limitation on the number of - // epilogs imposed by the unwind code macros. -LOCAL_LABEL(99_\entries): - // R2 contains address of the cache block. We store it in the red zone in case the target we jump - // to needs it. - // R12 contains the target address to jump to - ldr r1, [sp, #-8] - // We have to store R2 with address of the cache block into red zone before restoring original r2. - str r2, [sp, #-8] - ldr r2, [sp, #-4] - EPILOG_BRANCH_REG r12 - -NESTED_END RhpInterfaceDispatch\entries, _TEXT - -.endm // DEFINE_INTERFACE_DISPATCH_STUB - -// Define all the stub routines we currently need. -// -// The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed. -// If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo -// -DEFINE_INTERFACE_DISPATCH_STUB 1 -DEFINE_INTERFACE_DISPATCH_STUB 2 -DEFINE_INTERFACE_DISPATCH_STUB 4 -DEFINE_INTERFACE_DISPATCH_STUB 8 -DEFINE_INTERFACE_DISPATCH_STUB 16 -DEFINE_INTERFACE_DISPATCH_STUB 32 -DEFINE_INTERFACE_DISPATCH_STUB 64 - -// Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // On input we have the indirection cell data structure in r12. But we need more scratch registers and - // we may A/V on a null this. Both of these suggest we need a real prolog and epilog. - PROLOG_PUSH {r1} - - // r12 currently holds the indirection cell address. We need to update it to point to the vtable - // offset instead. - ldr r12, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - // Load the MethodTable from the object instance in r0. - ldr r1, [r0] - - // add the vtable offset to the MethodTable pointer - add r12, r1, r12 - - // Load the target address of the vtable into r12 - ldr r12, [r12] - - EPILOG_POP {r1} - EPILOG_BRANCH_REG r12 -LEAF_END RhpVTableOffsetDispatch, _TEXT - -// Initial dispatch on an interface when we don't have a cache yet. -LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT - // Just tail call to the cache miss helper. - b C_FUNC(RhpInterfaceDispatchSlow) -LEAF_END RhpInitialInterfaceDispatch, _TEXT - -// No as alternate entry due to missed thumb bit in this case -// See https://github.com/dotnet/runtime/issues/8608 -LEAF_ENTRY RhpInitialDynamicInterfaceDispatch, _TEXT - // Just tail call to the cache miss helper. - b C_FUNC(RhpInterfaceDispatchSlow) -LEAF_END RhpInitialDynamicInterfaceDispatch, _TEXT - -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution -LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // r12 has the interface dispatch cell address in it. - // The calling convention of the universal thunk is that the parameter - // for the universal thunk target is to be placed in sp-8 - // and the universal thunk target address is to be placed in sp-4 - str r12, [sp, #-8] - PREPARE_EXTERNAL_VAR RhpCidResolve, r12 - str r12, [sp, #-4] - - // jump to universal transition thunk - b C_FUNC(RhpUniversalTransition_DebugStepTailCall) -LEAF_END RhpInterfaceDispatchSlow, _TEXT - -#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/arm/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/arm/WriteBarriers.S deleted file mode 100644 index 3bb862231a34..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm/WriteBarriers.S +++ /dev/null @@ -1,368 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.syntax unified -.thumb - -#include // generated by the build from AsmOffsets.cpp -#include - -#ifdef WRITE_BARRIER_CHECK - -.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG - - // If g_GCShadow is 0, don't perform the check. - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, r12 - cbz r12, LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG) - - // Save DESTREG since we're about to modify it (and we need the original value both within the macro and - // once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of - // the prolog inside a method without a frame. But given that this is only debug code and generally we - // shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier - // variants to set up frames. The compiler knows exactly which registers are trashed in the simple write - // barrier case, so we don't have any more scratch registers to play with (and doing so would only make - // things harder if at a later stage we want to allow multiple barrier versions based on the input - // registers). - push \DESTREG - - // Transform DESTREG into the equivalent address in the shadow heap. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, r12 - sub \DESTREG, r12 - cmp \DESTREG, #0 - blo LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG) - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, r12 - add \DESTREG, r12 - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, r12 - cmp \DESTREG, r12 - bhs LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG) - - // Update the shadow heap. - str \REFREG, [\DESTREG] - - // The following read must be strongly ordered wrt to the write we've just performed in order to - // prevent race conditions. - dmb - - // Now check that the real heap location still contains the value we just wrote into the shadow heap. - mov r12, \DESTREG - ldr \DESTREG, [sp] - str r12, [sp] - ldr r12, [\DESTREG] - cmp r12, \REFREG - bne LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG) - - // The original DESTREG value is now restored but the stack has a value (the shadow version of the - // location) pushed. Need to discard this push before we are done. - add sp, #4 - b LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG) - -LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG): - // Someone went and updated the real heap. We need to invalidate the shadow location since we can't - // guarantee whose shadow update won. - - // Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an - // additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg - // variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 32-bit - // immediate and therefore must be moved into a register before it can be written to the shadow - // location. - mov r12, \DESTREG - ldr \DESTREG, [sp] - str r12, [sp] - push \REFREG - movw \REFREG, #(INVALIDGCVALUE & 0xFFFF) - movt \REFREG, #(INVALIDGCVALUE >> 16) - str \REFREG, [\DESTREG] - pop \REFREG - -LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG): - // Restore original DESTREG value from the stack. - pop \DESTREG - -LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG): - -.endm - -#else // WRITE_BARRIER_CHECK - -.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG -.endm - -#endif // WRITE_BARRIER_CHECK - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -// name of the register that points to the location to be updated and the name of the register that holds the -// object reference (this should be in upper case as it's used in the definition of the name of the helper). -.macro DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG - - // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - // we're in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW \BASENAME, \REFREG, r0 - - // If the reference is to an object that's not in an ephemeral generation we have no need to track it - // (since the object won't be collected or moved by an ephemeral collection). - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, r12 - cmp \REFREG, r12 - blo LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG) - - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, r12 - cmp \REFREG, r12 - bhs LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG) - - // We have a location on the GC heap being updated with a reference to an ephemeral object so we must - // track this write. The location address is translated into an offset in the card table bitmap. We set - // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write - // the byte if it hasn't already been done since writes are expensive and impact scaling. - PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, r12 - add r0, r12, r0, lsr #LOG2_CLUMP_SIZE - ldrb r12, [r0] - cmp r12, #0x0FF - bne LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG) - -LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG): - b LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG) - -// We get here if it's necessary to update the card table. -LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG): - mov r12, #0x0FF - strb r12, [r0] - -LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG): - -.endm - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. One argument is taken, the -// name of the register that will hold the object reference (this should be in upper case as it's used in the -// definition of the name of the helper). -.macro DEFINE_UNCHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME - -// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard -// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that -// location is in one of the other general registers determined by the value of REFREG. - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at WriteBarrierFunctionAvLOC -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address -LEAF_ENTRY RhpAssignRef\EXPORT_REG_NAME, _TEXT - -// Export the canonical write barrier under unqualified name as well -.ifc \REFREG, r1 -ALTERNATE_ENTRY RhpAssignRef -.endif - - // Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The - // intent is that writes to the target object ($REFREG) will be visible across all CPUs before the - // write to the destination ($DESTREG). This covers most of the common scenarios where the programmer - // might assume strongly ordered accessess, namely where the preceding writes are used to initialize - // the object and the final write, made by this barrier in the instruction following the DMB, - // publishes that object for other threads/cpus to see. - // - // Note that none of this is relevant for single cpu machines. We may choose to implement a - // uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again. - dmb - - // Write the reference into the location. Note that we rely on the fact that no GC can occur between here - // and the card table update we may perform below. -GLOBAL_LABEL "RhpAssignRefAvLocation"\EXPORT_REG_NAME // WriteBarrierFunctionAvLocation -.ifc \REFREG, r1 -GLOBAL_LABEL RhpAssignRefAVLocation -.endif - str \REFREG, [r0] - - DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, \REFREG - - bx lr -LEAF_END RhpAssignRef\EXPORT_REG_NAME, _TEXT -.endm - -// One day we might have write barriers for all the possible argument registers but for now we have -// just one write barrier that assumes the input register is RSI. -DEFINE_UNCHECKED_WRITE_BARRIER r1, r1 - -// -// Define the helpers used to implement the write barrier required when writing an object reference into a -// location residing on the GC heap. Such write barriers allow the GC to optimize which objects in -// non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral -// collection. -// - -.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, REFREG - - // The location being updated might not even lie in the GC heap (a handle or stack location for instance), - // in which case no write barrier is required. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, r12 - cmp r0, r12 - blo LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, r12 - cmp r0, r12 - bhs LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) - - DEFINE_UNCHECKED_WRITE_BARRIER_CORE \BASENAME, \REFREG - -.endm - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. One argument is taken, the -// name of the register that will hold the object reference (this should be in upper case as it's used in the -// definition of the name of the helper). -.macro DEFINE_CHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME - -// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard -// decoration). The location to be updated is always in R0. The object reference that will be assigned into -// that location is in one of the other general registers determined by the value of REFREG. - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address -LEAF_ENTRY RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT - -// Export the canonical write barrier under unqualified name as well -.ifc \REFREG, r1 -ALTERNATE_ENTRY RhpCheckedAssignRef -.endif - - // Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The - // intent is that writes to the target object ($REFREG) will be visible across all CPUs before the - // write to the destination ($DESTREG). This covers most of the common scenarios where the programmer - // might assume strongly ordered accessess, namely where the preceding writes are used to initialize - // the object and the final write, made by this barrier in the instruction following the DMB, - // publishes that object for other threads/cpus to see. - // - // Note that none of this is relevant for single cpu machines. We may choose to implement a - // uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again. - dmb - // Write the reference into the location. Note that we rely on the fact that no GC can occur between here - // and the card table update we may perform below. -GLOBAL_LABEL "RhpCheckedAssignRefAvLocation"\EXPORT_REG_NAME // WriteBarrierFunctionAvLocation -.ifc \REFREG, r1 -GLOBAL_LABEL RhpCheckedAssignRefAVLocation -.endif - str \REFREG, [r0] - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \REFREG - - bx lr -LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT -.endm - -// One day we might have write barriers for all the possible argument registers but for now we have -// just one write barrier that assumes the input register is RSI. -DEFINE_CHECKED_WRITE_BARRIER r1, r1 - -// r0 = destination address -// r1 = value -// r2 = comparand -LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT - // To implement our chosen memory model for ARM we insert a memory barrier at GC write brriers. This - // barrier must occur before the object reference update, so we have to do it unconditionally even - // though the update may fail below. - dmb -LOCAL_LABEL(RhpCheckedLockCmpXchgRetry): - ldrex r3, [r0] - cmp r2, r3 - bne LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_r1) - strex r3, r1, [r0] - cmp r3, #0 - bne LOCAL_LABEL(RhpCheckedLockCmpXchgRetry) - mov r3, r2 - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, r1 - - mov r0, r3 - bx lr -LEAF_END RhpCheckedLockCmpXchg, _TEXT - -// r0 = destination address -// r1 = value -LEAF_ENTRY RhpCheckedXchg, _TEXT - // To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This - // barrier must occur before the object reference update. - dmb -LOCAL_LABEL(RhpCheckedXchgRetry): - ldrex r2, [r0] - strex r3, r1, [r0] - cmp r3, #0 - bne LOCAL_LABEL(RhpCheckedXchgRetry) - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, r1 - - // The original value is currently in r2. We need to return it in r0. - mov r0, r2 - - bx lr -LEAF_END RhpCheckedXchg, _TEXT - -// -// RhpByRefAssignRef simulates movs instruction for object references. -// -// On entry: -// r0: address of ref-field (assigned to) -// r1: address of the data (source) -// r2, r3: be trashed -// -// On exit: -// r0, r1 are incremented by 4, -// r2, r3: trashed -// -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1/2 -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address -LEAF_ENTRY RhpByRefAssignRef, _TEXT - // See comment in RhpAssignRef - dmb - -GLOBAL_LABEL RhpByRefAssignRefAVLocation1 - ldr r2, [r1] -GLOBAL_LABEL RhpByRefAssignRefAVLocation2 - str r2, [r0] - - // Check whether the writes were even into the heap. If not there's no card update required. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, r3 - cmp r0, r3 - blo LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, r3 - cmp r0, r3 - bhs LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) - - // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - // we're in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW BASENAME, r2, r0 - - // If the reference is to an object that's not in an ephemeral generation we have no need to track it - // (since the object won't be collected or moved by an ephemeral collection). - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, r3 - cmp r2, r3 - blo LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, r3 - cmp r2, r3 - bhs LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) - - // move current r0 value into r2 and then increment the pointers - mov r2, r0 - add r1, #4 - add r0, #4 - - // We have a location on the GC heap being updated with a reference to an ephemeral object so we must - // track this write. The location address is translated into an offset in the card table bitmap. We set - // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write - // the byte if it hasn't already been done since writes are expensive and impact scaling. - PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, r3 - add r2, r3, r2, lsr #LOG2_CLUMP_SIZE - ldrb r3, [r2] - cmp r3, #0x0FF - bne LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable) - bx lr - -// We get here if it's necessary to update the card table. -LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable): - mov r3, #0x0FF - strb r3, [r2] - bx lr - -LOCAL_LABEL(RhpByRefAssignRef_NotInHeap): - // Increment the pointers before leaving - add r0, #4 - add r1, #4 - bx lr -LEAF_END RhpByRefAssignRef, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S deleted file mode 100644 index 6c61b2de3563..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S +++ /dev/null @@ -1,282 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -// GC type flags -#define GC_ALLOC_FINALIZE 1 - -// -// Rename fields of nested structs -// -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) - - - -// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -// allocation context then automatically fallback to the slow allocation path. -// x0 == MethodTable - LEAF_ENTRY RhpNewFast, _TEXT - - // x1 = GetThread() -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_1 -#else - INLINE_GETTHREAD x1 -#endif - - // - // x0 contains MethodTable pointer - // - ldr w2, [x0, #OFFSETOF__MethodTable__m_uBaseSize] - - // - // x0: MethodTable pointer - // x1: Thread pointer - // x2: base size - // - - // Load potential new object address into x12. - ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x13 - bhi LOCAL_LABEL(RhpNewFast_RarePath) - - // Update the alloc pointer to account for the allocation. - str x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new objects MethodTable pointer - str x0, [x12, #OFFSETOF__Object__m_pEEType] - - mov x0, x12 - ret - -LOCAL_LABEL(RhpNewFast_RarePath): - mov x1, #0 - b C_FUNC(RhpNewObject) - LEAF_END RhpNewFast, _TEXT - -// Allocate non-array object with finalizer. -// x0 == MethodTable - LEAF_ENTRY RhpNewFinalizable, _TEXT - mov x1, #GC_ALLOC_FINALIZE - b C_FUNC(RhpNewObject) - LEAF_END RhpNewFinalizable, _TEXT - -// Allocate non-array object. -// x0 == MethodTable -// x1 == alloc flags - NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - - PUSH_COOP_PINVOKE_FRAME x3 - - // x3: transition frame - - // Preserve the MethodTable in x19 - mov x19, x0 - - mov w2, 0 // numElements - - // Call the rest of the allocation helper. - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - bl C_FUNC(RhpGcAlloc) - - // Set the new objects MethodTable pointer on success. - cbz x0, LOCAL_LABEL(NewOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state -LOCAL_LABEL(NewOutOfMemory): - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mov x0, x19 // MethodTable pointer - mov x1, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - b C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewObject, _TEXT - -// Allocate a string. -// x0 == MethodTable -// x1 == element/character count - LEAF_ENTRY RhNewString, _TEXT - // Make sure computing the overall allocation size wont overflow - movz x2, MAX_STRING_LENGTH & 0xFFFF - movk x2, MAX_STRING_LENGTH >> 16, lsl 16 - cmp x1, x2 - bhi LOCAL_LABEL(StringSizeOverflow) - - // Compute overall allocation size (align(base size + (element size * elements), 8)). - mov w2, #STRING_COMPONENT_SIZE - mov x3, #(STRING_BASE_SIZE + 7) - umaddl x2, w1, w2, x3 // x2 = w1 * w2 + x3 - and x2, x2, #-8 - - // x0 == MethodTable - // x1 == element count - // x2 == string size - -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_3 -#else - INLINE_GETTHREAD x3 -#endif - - // Load potential new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x12 - bhi LOCAL_LABEL(RhNewString_Rare) - - // Reload new object address into r12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Update the alloc pointer to account for the allocation. - str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new objects MethodTable pointer and element count. - str x0, [x12, #OFFSETOF__Object__m_pEEType] - str x1, [x12, #OFFSETOF__Array__m_Length] - - // Return the object allocated in x0. - mov x0, x12 - - ret - -LOCAL_LABEL(StringSizeOverflow): - // We get here if the length of the final string object can not be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an OOM exception that the caller of this allocator understands. - - // x0 holds MethodTable pointer already - mov x1, #1 // Indicate that we should throw OverflowException - b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhNewString_Rare): - b C_FUNC(RhpNewArrayRare) - LEAF_END RhNewString, _Text - -// Allocate one dimensional, zero based array (SZARRAY). -// x0 == MethodTable -// x1 == element count - LEAF_ENTRY RhpNewArray, _Text - - // We want to limit the element count to the non-negative 32-bit int range. - // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component - // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst - // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. - mov x2, #0x7FFFFFFF - cmp x1, x2 - bhi LOCAL_LABEL(ArraySizeOverflow) - - ldrh w2, [x0, #OFFSETOF__MethodTable__m_usComponentSize] - umull x2, w1, w2 - ldr w3, [x0, #OFFSETOF__MethodTable__m_uBaseSize] - add x2, x2, x3 - add x2, x2, #7 - and x2, x2, #-8 - - // x0 == MethodTable - // x1 == element count - // x2 == array size - -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_3 -#else - INLINE_GETTHREAD x3 -#endif - - // Load potential new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x12 - bhi LOCAL_LABEL(RhpNewArray_Rare) - - // Reload new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Update the alloc pointer to account for the allocation. - str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new objects MethodTable pointer and element count. - str x0, [x12, #OFFSETOF__Object__m_pEEType] - str x1, [x12, #OFFSETOF__Array__m_Length] - - // Return the object allocated in r0. - mov x0, x12 - - ret - -LOCAL_LABEL(ArraySizeOverflow): - // We get here if the size of the final array object can not be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. - - // x0 holds MethodTable pointer already - mov x1, #1 // Indicate that we should throw OverflowException - b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhpNewArray_Rare): - b C_FUNC(RhpNewArrayRare) - LEAF_END RhpNewArray, _TEXT - -// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. -// x0 == MethodTable -// x1 == element count -// x2 == array size + Thread::m_alloc_context::alloc_ptr -// x3 == Thread - NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from x2. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - sub x2, x2, x12 - - PUSH_COOP_PINVOKE_FRAME x3 - - // Preserve data we will need later into the callee saved registers - mov x19, x0 // Preserve MethodTable - - mov x2, x1 // numElements - mov x1, #0 // uFlags - - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - bl C_FUNC(RhpGcAlloc) - - // Set the new objects MethodTable pointer and length on success. - cbz x0, LOCAL_LABEL(ArrayOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state -LOCAL_LABEL(ArrayOutOfMemory): - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mov x0, x19 // MethodTable Pointer - mov x1, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - b C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm deleted file mode 100644 index d8e506335d77..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm +++ /dev/null @@ -1,250 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -#include "AsmMacros.h" - - TEXTAREA - -;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -;; allocation context then automatically fallback to the slow allocation path. -;; x0 == MethodTable - LEAF_ENTRY RhpNewFast - - ;; x1 = GetThread(), TRASHES x2 - INLINE_GETTHREAD x1, x2 - - ;; - ;; x0 contains MethodTable pointer - ;; - ldr w2, [x0, #OFFSETOF__MethodTable__m_uBaseSize] - - ;; - ;; x0: MethodTable pointer - ;; x1: Thread pointer - ;; x2: base size - ;; - - ;; Load potential new object address into x12. - ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - ;; Determine whether the end of the object would lie outside of the current allocation context. If so, - ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x13 - bhi RhpNewFast_RarePath - - ;; Update the alloc pointer to account for the allocation. - str x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - ;; Set the new object's MethodTable pointer - str x0, [x12, #OFFSETOF__Object__m_pEEType] - - mov x0, x12 - ret - -RhpNewFast_RarePath - mov x1, #0 - b RhpNewObject - LEAF_END RhpNewFast - -;; Allocate non-array object with finalizer. -;; x0 == MethodTable - LEAF_ENTRY RhpNewFinalizable - mov x1, #GC_ALLOC_FINALIZE - b RhpNewObject - LEAF_END RhpNewFinalizable - -;; Allocate non-array object. -;; x0 == MethodTable -;; x1 == alloc flags - NESTED_ENTRY RhpNewObject - - PUSH_COOP_PINVOKE_FRAME x3 - - ;; x3: transition frame - - ;; Preserve the MethodTable in x19 - mov x19, x0 - - mov w2, #0 ; numElements - - ;; Call the rest of the allocation helper. - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - bl RhpGcAlloc - - cbz x0, NewOutOfMemory - - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - -NewOutOfMemory - ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw - ;; an out of memory exception that the caller of this allocator understands. - - mov x0, x19 ; MethodTable pointer - mov x1, #0 ; Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - EPILOG_NOP b RhExceptionHandling_FailedAllocation - - NESTED_END RhpNewObject - -;; Allocate a string. -;; x0 == MethodTable -;; x1 == element/character count - LEAF_ENTRY RhNewString - ;; Make sure computing the overall allocation size won't overflow - movz x2, #(MAX_STRING_LENGTH & 0xFFFF) - movk x2, #(MAX_STRING_LENGTH >> 16), lsl #16 - cmp x1, x2 - bhi StringSizeOverflow - - ;; Compute overall allocation size (align(base size + (element size * elements), 8)). - mov w2, #STRING_COMPONENT_SIZE - mov x3, #(STRING_BASE_SIZE + 7) - umaddl x2, w1, w2, x3 ; x2 = w1 * w2 + x3 - and x2, x2, #-8 - - ; x0 == MethodTable - ; x1 == element count - ; x2 == string size - - INLINE_GETTHREAD x3, x5 - - ;; Load potential new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - ;; Determine whether the end of the object would lie outside of the current allocation context. If so, - ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x12 - bhi RhpNewArrayRare - - ;; Reload new object address into r12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - ;; Update the alloc pointer to account for the allocation. - str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - ;; Set the new object's MethodTable pointer and element count. - str x0, [x12, #OFFSETOF__Object__m_pEEType] - str x1, [x12, #OFFSETOF__Array__m_Length] - - ;; Return the object allocated in x0. - mov x0, x12 - - ret - -StringSizeOverflow - ; We get here if the length of the final string object can't be represented as an unsigned - ; 32-bit value. We're going to tail-call to a managed helper that will throw - ; an OOM exception that the caller of this allocator understands. - - ; x0 holds MethodTable pointer already - mov x1, #1 ; Indicate that we should throw OverflowException - b RhExceptionHandling_FailedAllocation - LEAF_END RhNewString - -;; Allocate one dimensional, zero based array (SZARRAY). -;; x0 == MethodTable -;; x1 == element count - LEAF_ENTRY RhpNewArray - - ;; We want to limit the element count to the non-negative 32-bit int range. - ;; If the element count is <= 0x7FFFFFFF, no overflow is possible because the component - ;; size is <= 0xffff (it's an unsigned 16-bit value), and the base size for the worst - ;; case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. - mov x2, #0x7FFFFFFF - cmp x1, x2 - bhi ArraySizeOverflow - - ldrh w2, [x0, #OFFSETOF__MethodTable__m_usComponentSize] - umull x2, w1, w2 - ldr w3, [x0, #OFFSETOF__MethodTable__m_uBaseSize] - add x2, x2, x3 - add x2, x2, #7 - and x2, x2, #-8 - - ; x0 == MethodTable - ; x1 == element count - ; x2 == array size - - INLINE_GETTHREAD x3, x5 - - ;; Load potential new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - ;; Determine whether the end of the object would lie outside of the current allocation context. If so, - ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x12 - bhi RhpNewArrayRare - - ;; Reload new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - ;; Update the alloc pointer to account for the allocation. - str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - ;; Set the new object's MethodTable pointer and element count. - str x0, [x12, #OFFSETOF__Object__m_pEEType] - str x1, [x12, #OFFSETOF__Array__m_Length] - - ;; Return the object allocated in r0. - mov x0, x12 - - ret - -ArraySizeOverflow - ; We get here if the size of the final array object can't be represented as an unsigned - ; 32-bit value. We're going to tail-call to a managed helper that will throw - ; an overflow exception that the caller of this allocator understands. - - ; x0 holds MethodTable pointer already - mov x1, #1 ; Indicate that we should throw OverflowException - b RhExceptionHandling_FailedAllocation - LEAF_END RhpNewArray - -;; Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. -;; x0 == MethodTable -;; x1 == element count -;; x2 == array size + Thread::m_alloc_context::alloc_ptr -;; x3 == Thread - NESTED_ENTRY RhpNewArrayRare - - ; Recover array size by subtracting the alloc_ptr from x2. - PROLOG_NOP ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - PROLOG_NOP sub x2, x2, x12 - - PUSH_COOP_PINVOKE_FRAME x3 - - ; Preserve data we'll need later into the callee saved registers - mov x19, x0 ; Preserve MethodTable - - mov x2, x1 ; numElements - mov x1, #0 ; uFlags - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - bl RhpGcAlloc - - cbz x0, ArrayOutOfMemory - - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - -ArrayOutOfMemory - ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw - ;; an out of memory exception that the caller of this allocator understands. - - mov x0, x19 ; MethodTable Pointer - mov x1, #0 ; Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - EPILOG_NOP b RhExceptionHandling_FailedAllocation - - NESTED_END RhpNewArrayRare - - END diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h index 90e1b5d77799..09e5493c7fb4 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h @@ -9,13 +9,11 @@ ;; ;; CONSTANTS -- INTEGER ;; -TSF_Attached equ 0x01 TSF_SuppressGcStress equ 0x08 TSF_DoNotTriggerGc equ 0x10 TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC equ 0x18 ;; Bit position for the flags above, to be used with tbz/tbnz instructions -TSF_Attached_Bit equ 0 TSF_SuppressGcStress_Bit equ 3 TSF_DoNotTriggerGc_Bit equ 4 @@ -74,14 +72,14 @@ TrapThreadsFlags_TrapThreads equ 2 TrapThreadsFlags_AbortInProgress_Bit equ 0 TrapThreadsFlags_TrapThreads_Bit equ 1 -;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -STATUS_REDHAWK_THREAD_ABORT equ 0x43 +;; This must match HwExceptionCode.STATUS_NATIVEAOT_THREAD_ABORT +STATUS_NATIVEAOT_THREAD_ABORT equ 0x43 ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit +OFFSETOF__ee_alloc_context__alloc_ptr equ OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__ee_alloc_context equ OFFSETOF__Thread__m_eeAllocContext ;; ;; IMPORTS @@ -222,7 +220,6 @@ TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister INLINE_GET_TLS_VAR $destReg, $trashReg, tls_CurrentThread MEND - MACRO INLINE_THREAD_UNHIJACK $threadReg, $trashReg1, $trashReg2 ;; @@ -238,6 +235,12 @@ TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister 0 MEND + MACRO + INLINE_GET_ALLOC_CONTEXT_BASE $destReg, $trashReg + + INLINE_GET_TLS_VAR $destReg, $trashReg, tls_CurrentThread + MEND + ;; ---------------------------------------------------------------------------- - ;; ;; Macro to add a memory barrier. Equal to __sync_synchronize(). diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h new file mode 100644 index 000000000000..f67496574352 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#ifdef TARGET_WINDOWS +#include "AsmMacros.h" +#else +#include +#include "AsmOffsets.inc" +#endif diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S new file mode 100644 index 000000000000..f759ac029aff --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution. +// + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // x11 contains the interface dispatch cell address. + // Calling convention of the universal thunk is: + // xip0: target address for the thunk to call + // xip1: parameter of the thunk's target + PREPARE_EXTERNAL_VAR RhpCidResolve, xip0 + mov xip1, x11 + b C_FUNC(RhpUniversalTransition_DebugStepTailCall) + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm new file mode 100644 index 000000000000..7d6fb39a7c9c --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm @@ -0,0 +1,29 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + EXTERN RhpCidResolve + EXTERN RhpUniversalTransition_DebugStepTailCall + +;; +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; Use universal transition helper to allow an exception to flow out of resolution. +;; + LEAF_ENTRY RhpInterfaceDispatchSlow + ;; x11 contains the interface dispatch cell address. + ;; Calling convention of the universal thunk is: + ;; xip0: target address for the thunk to call + ;; xip1: parameter of the thunk's target + ldr xip0, =RhpCidResolve + mov xip1, x11 + b RhpUniversalTransition_DebugStepTailCall + LEAF_END RhpInterfaceDispatchSlow + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + + END diff --git a/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.S index 0f7e1b7e31ae..40c0b37b9917 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.S @@ -506,7 +506,7 @@ LOCAL_LABEL(DonePopping): // It was the ThreadAbortException, so rethrow it // reset SP mov x1, x0 // x1 <- continuation address as exception PC - mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov w0, #STATUS_NATIVEAOT_THREAD_ABORT mov sp, x2 b C_FUNC(RhpThrowHwEx) diff --git a/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.asm index 3c2437fb9135..5918745625fd 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.asm @@ -482,7 +482,7 @@ DonePopping ;; It was the ThreadAbortException, so rethrow it ;; reset SP mov x1, x0 ;; x1 <- continuation address as exception PC - mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov w0, #STATUS_NATIVEAOT_THREAD_ABORT mov sp, x2 b RhpThrowHwEx diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S index b8ae78630a6e..42ef0e3b510c 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S @@ -149,7 +149,7 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler .cfi_restore_state LOCAL_LABEL(ThrowThreadAbort): POP_PROBE_FRAME - mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov w0, #STATUS_NATIVEAOT_THREAD_ABORT mov x1, lr // return address as exception PC b C_FUNC(RhpThrowHwEx) NESTED_END RhpWaitForGC diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 4de07d188969..bd6e6d37652f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -174,7 +174,7 @@ WaitForGC EPILOG_RETURN ThrowThreadAbort POP_PROBE_FRAME - EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT + EPILOG_NOP mov w0, #STATUS_NATIVEAOT_THREAD_ABORT EPILOG_NOP mov x1, lr ;; return address as exception PC EPILOG_NOP b RhpThrowHwEx NESTED_END RhpWaitForGC diff --git a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.S deleted file mode 100644 index 5d3d11cf4108..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.S +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH - - .extern RhpCidResolve - .extern RhpUniversalTransition_DebugStepTailCall - - // Macro that generates code to check a single cache entry. - .macro CHECK_CACHE_ENTRY entry - // Check a single entry in the cache. - // x9 : Cache data structure. Also used for target address jump. - // x10 : Instance MethodTable* - // x11 : Indirection cell address, preserved - // x12 : Trashed - ldr x12, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))] - cmp x10, x12 - bne 0f - ldr x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)] - br x9 -0: - .endm - -// -// Macro that generates a stub consuming a cache with the given number of entries. -// - .macro DEFINE_INTERFACE_DISPATCH_STUB entries - - NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler - - // x11 holds the indirection cell address. Load the cache pointer. - ldr x9, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - // Load the MethodTable from the object instance in x0. - ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries - ldr x10, [x0] - - .global CurrentEntry - .set CurrentEntry, 0 - - .rept \entries - CHECK_CACHE_ENTRY CurrentEntry - .set CurrentEntry, CurrentEntry + 1 - .endr - - // x11 still contains the indirection cell address. - b C_FUNC(RhpInterfaceDispatchSlow) - - NESTED_END "RhpInterfaceDispatch\entries", _TEXT - - .endm - -// -// Define all the stub routines we currently need. -// -// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the -// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens -// during the interface dispatch. -// - DEFINE_INTERFACE_DISPATCH_STUB 1 - DEFINE_INTERFACE_DISPATCH_STUB 2 - DEFINE_INTERFACE_DISPATCH_STUB 4 - DEFINE_INTERFACE_DISPATCH_STUB 8 - DEFINE_INTERFACE_DISPATCH_STUB 16 - DEFINE_INTERFACE_DISPATCH_STUB 32 - DEFINE_INTERFACE_DISPATCH_STUB 64 - -// -// Initial dispatch on an interface when we don't have a cache yet. -// - LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT - ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch - // Trigger an AV if we're dispatching on a null this. - // The exception handling infrastructure is aware of the fact that this is the first - // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here - // to a NullReferenceException at the callsite. - ldr xzr, [x0] - - // Just tail call to the cache miss helper. - b C_FUNC(RhpInterfaceDispatchSlow) - LEAF_END RhpInitialInterfaceDispatch, _TEXT - -// -// Stub dispatch routine for dispatch to a vtable slot -// - LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // x11 contains the interface dispatch cell address. - // load x12 to point to the vtable offset (which is stored in the m_pCache field). - ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - // Load the MethodTable from the object instance in x0, and add it to the vtable offset - // to get the address in the vtable of what we want to dereference - ldr x13, [x0] - add x12, x12, x13 - - // Load the target address of the vtable into x12 - ldr x12, [x12] - - br x12 - LEAF_END RhpVTableOffsetDispatch, _TEXT - -// -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution. -// - LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // x11 contains the interface dispatch cell address. - // Calling convention of the universal thunk is: - // xip0: target address for the thunk to call - // xip1: parameter of the thunk's target - PREPARE_EXTERNAL_VAR RhpCidResolve, xip0 - mov xip1, x11 - b C_FUNC(RhpUniversalTransition_DebugStepTailCall) - LEAF_END RhpInterfaceDispatchSlow, _TEXT - -#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.asm b/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.asm deleted file mode 100644 index 93e6038f1047..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.asm +++ /dev/null @@ -1,126 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -#include "AsmMacros.h" - - TEXTAREA - -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH - - EXTERN RhpCidResolve - EXTERN RhpUniversalTransition_DebugStepTailCall - - ;; Macro that generates code to check a single cache entry. - MACRO - CHECK_CACHE_ENTRY $entry - ;; Check a single entry in the cache. - ;; x9 : Cache data structure. Also used for target address jump. - ;; x10 : Instance MethodTable* - ;; x11 : Indirection cell address, preserved - ;; x12 : Trashed - ldr x12, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16))] - cmp x10, x12 - bne %ft0 - ldr x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16) + 8)] - br x9 -0 - MEND - - -;; -;; Macro that generates a stub consuming a cache with the given number of entries. -;; - MACRO - DEFINE_INTERFACE_DISPATCH_STUB $entries - - NESTED_ENTRY RhpInterfaceDispatch$entries - - ;; x11 holds the indirection cell address. Load the cache pointer. - ldr x9, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in x0. - ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation$entries - ldr x10, [x0] - - GBLA CurrentEntry -CurrentEntry SETA 0 - - WHILE CurrentEntry < $entries - CHECK_CACHE_ENTRY CurrentEntry -CurrentEntry SETA CurrentEntry + 1 - WEND - - ;; x11 still contains the indirection cell address. - b RhpInterfaceDispatchSlow - - NESTED_END RhpInterfaceDispatch$entries - - MEND - -;; -;; Define all the stub routines we currently need. -;; -;; If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the -;; *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens -;; during the interface dispatch. -;; - DEFINE_INTERFACE_DISPATCH_STUB 1 - DEFINE_INTERFACE_DISPATCH_STUB 2 - DEFINE_INTERFACE_DISPATCH_STUB 4 - DEFINE_INTERFACE_DISPATCH_STUB 8 - DEFINE_INTERFACE_DISPATCH_STUB 16 - DEFINE_INTERFACE_DISPATCH_STUB 32 - DEFINE_INTERFACE_DISPATCH_STUB 64 - - -;; -;; Initial dispatch on an interface when we don't have a cache yet. -;; - LEAF_ENTRY RhpInitialInterfaceDispatch - ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch - ;; Trigger an AV if we're dispatching on a null this. - ;; The exception handling infrastructure is aware of the fact that this is the first - ;; instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here - ;; to a NullReferenceException at the callsite. - ldr xzr, [x0] - - ;; Just tail call to the cache miss helper. - b RhpInterfaceDispatchSlow - LEAF_END RhpInitialInterfaceDispatch - -;; -;; Stub dispatch routine for dispatch to a vtable slot -;; - LEAF_ENTRY RhpVTableOffsetDispatch - ;; x11 contains the interface dispatch cell address. - ;; load x12 to point to the vtable offset (which is stored in the m_pCache field). - ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset - ;; to get the address in the vtable of what we want to dereference - ldr x13, [x0] - add x12, x12, x13 - - ;; Load the target address of the vtable into x12 - ldr x12, [x12] - - br x12 - LEAF_END RhpVTableOffsetDispatch - -;; -;; Cache miss case, call the runtime to resolve the target and update the cache. -;; Use universal transition helper to allow an exception to flow out of resolution. -;; - LEAF_ENTRY RhpInterfaceDispatchSlow - ;; x11 contains the interface dispatch cell address. - ;; Calling convention of the universal thunk is: - ;; xip0: target address for the thunk to call - ;; xip1: parameter of the thunk's target - ldr xip0, =RhpCidResolve - mov xip1, x11 - b RhpUniversalTransition_DebugStepTailCall - LEAF_END RhpInterfaceDispatchSlow - -#endif // FEATURE_CACHED_INTERFACE_DISPATCH - - END diff --git a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S deleted file mode 100644 index 6948e0fa94a0..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S +++ /dev/null @@ -1,397 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used -// during garbage collections to verify that object references where never written to the heap without using a -// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing -// new references to the real heap. Since this can not be solved perfectly without critical sections around the -// entire update process, we instead update the shadow location and then re-check the real location (as two -// ordered operations) and if there is a disparity we will re-write the shadow location with a special value -// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC -// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the -// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. -#ifdef WRITE_BARRIER_CHECK - - .global $g_GCShadow - .global $g_GCShadowEnd - - // On entry: - // $destReg: location to be updated - // $refReg: objectref to be stored - // - // On exit: - // x12,x17: trashed - // other registers are preserved - // - .macro UPDATE_GC_SHADOW destReg, refReg - - // If g_GCShadow is 0, don't perform the check. - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, X12 - cbz x12, 1f - - // Save destReg since we're about to modify it (and we need the original value both within the macro and - // once we exit the macro). - mov x17, \destReg - - // Transform destReg into the equivalent address in the shadow heap. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, X12 - subs \destReg, \destReg, x12 - blo 0f - - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, X12 - add \destReg, \destReg, x12 - - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, X12 - cmp \destReg, x12 - bhs 0f - - // Update the shadow heap. - str \refReg, [\destReg] - - // The following read must be strongly ordered wrt to the write we have just performed in order to - // prevent race conditions. - dmb ish - - // Now check that the real heap location still contains the value we just wrote into the shadow heap. - mov x12, x17 - ldr x12, [x12] - cmp x12, \refReg - beq 0f - - // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we can not - // guarantee whose shadow update won. - movz x12, (INVALIDGCVALUE & 0xFFFF) // #0xcccd - movk x12, ((INVALIDGCVALUE >> 16) & 0xFFFF), LSL #16 - str x12, [\destReg] - -0: - // Restore original destReg value - mov \destReg, x17 - -1: - .endm - -#else // WRITE_BARRIER_CHECK - - .macro UPDATE_GC_SHADOW destReg, refReg - .endm - -#endif // WRITE_BARRIER_CHECK - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -// name of the register that points to the location to be updated and the name of the register that holds the -// object reference (this should be in upper case as it is used in the definition of the name of the helper). - -// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for -// some interlocked helpers that need an inline barrier. - - // On entry: - // destReg: location to be updated (cannot be x12,x17) - // refReg: objectref to be stored (cannot be x12,x17) - // - // On exit: - // x12,x17: trashed - // - .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg - - // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - // we are in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW \destReg, \refReg - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - // Update the write watch table if necessary - PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 - - cbz x12, 2f - add x12, x12, \destReg, lsr #0xc // SoftwareWriteWatch::AddressToTableByteIndexShift - ldrb w17, [x12] - cbnz x17, 2f - mov w17, #0xFF - strb w17, [x12] -#endif - -2: - // We can skip the card table write if the reference is to - // an object not on the epehemeral segment. - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, x12 - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x17 - cmp \refReg, x12 - ccmp \refReg, x17, #0x2, hs - bhs 0f - - // Set this objects card, if it has not already been set. - PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 - add x17, x12, \destReg, lsr #11 - - // Check that this card has not already been written. Avoiding useless writes is a big win on - // multi-proc systems since it avoids cache thrashing. - ldrb w12, [x17] - cmp x12, 0xFF - beq 0f - - mov x12, 0xFF - strb w12, [x17] - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Check if we need to update the card bundle table - PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12 - add x17, x12, \destReg, lsr #21 - ldrb w12, [x17] - cmp x12, 0xFF - beq 0f - - mov x12, 0xFF - strb w12, [x17] -#endif - -0: - // Exit label - .endm - - // On entry: - // destReg: location to be updated - // refReg: objectref to be stored - // - // On exit: - // x12, x17: trashed - // - .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg - - // The "check" of this checked write barrier - is destReg - // within the heap? if no, early out. - - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12 - cmp \destReg, x12 - - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x12 - - // If \destReg >= g_lowest_address, compare \destReg to g_highest_address. - // Otherwise, set the C flag (0x2) to take the next branch. - ccmp \destReg, x12, #0x2, hs - bhs 0f - - INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg - -0: - // Exit label - .endm - -// void JIT_ByRefWriteBarrier -// On entry: -// x13 : the source address (points to object reference to write) -// x14 : the destination address (object reference written here) -// -// On exit: -// x13 : incremented by 8 -// x14 : incremented by 8 -// x15 : trashed -// x12, x17 : trashed -// -// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF -// if you add more trashed registers. -// -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address -LEAF_ENTRY RhpByRefAssignRefArm64, _TEXT - - ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - ldr x15, [x13], 8 - b C_FUNC(RhpCheckedAssignRefArm64) - -LEAF_END RhpByRefAssignRefArm64, _TEXT - -// JIT_CheckedWriteBarrier(Object** dst, Object* src) -// -// Write barrier for writes to objects that may reside -// on the managed heap. -// -// On entry: -// x14 : the destination address (LHS of the assignment). -// May not be a heap location (hence the checked). -// x15 : the object reference (RHS of the assignment). -// -// On exit: -// x12, x17 : trashed -// x14 : incremented by 8 - LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT - - // is destReg within the heap? - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12 - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17 - cmp x14, x12 - ccmp x14, x17, #0x2, hs - bhs LOCAL_LABEL(NotInHeap) - - b C_FUNC(RhpAssignRefArm64) - -LOCAL_LABEL(NotInHeap): - ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation - str x15, [x14], 8 - ret - -LEAF_END RhpCheckedAssignRefArm64, _TEXT - -// JIT_WriteBarrier(Object** dst, Object* src) -// -// Write barrier for writes to objects that are known to -// reside on the managed heap. -// -// On entry: -// x14 : the destination address (LHS of the assignment). -// x15 : the object reference (RHS of the assignment). -// -// On exit: -// x12, x17 : trashed -// x14 : incremented by 8 -LEAF_ENTRY RhpAssignRefArm64, _TEXT - - ALTERNATE_ENTRY RhpAssignRefAVLocation - stlr x15, [x14] - - INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15 - - add x14, x14, 8 - ret - -LEAF_END RhpAssignRefArm64, _TEXT - -// Same as RhpAssignRefArm64, but with standard ABI. -LEAF_ENTRY RhpAssignRef, _TEXT - mov x14, x0 ; x14 = dst - mov x15, x1 ; x15 = val - b C_FUNC(RhpAssignRefArm64) -LEAF_END RhpAssignRef, _TEXT - - -// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon -// successful updates. - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT -.arch_extension lse -#endif - -// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) -// -// Interlocked compare exchange on objectref. -// -// On entry: -// x0: pointer to objectref -// x1: exchange value -// x2: comparand -// -// On exit: -// x0: original value of objectref -// x10, x12, x16, x17: trashed -// - LEAF_ENTRY RhpCheckedLockCmpXchg - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16 - tbz w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(CmpXchgRetry) -#endif - - mov x10, x2 - casal x10, x1, [x0] // exchange - cmp x2, x10 - bne LOCAL_LABEL(CmpXchgNoUpdate) - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - b LOCAL_LABEL(DoCardsCmpXchg) -LOCAL_LABEL(CmpXchgRetry): - // Check location value is what we expect. - ldaxr x10, [x0] - cmp x10, x2 - bne LOCAL_LABEL(CmpXchgNoUpdate) - - // Current value matches comparand, attempt to update with the new value. - stlxr w12, x1, [x0] - cbnz w12, LOCAL_LABEL(CmpXchgRetry) -#endif - -LOCAL_LABEL(DoCardsCmpXchg): - // We have successfully updated the value of the objectref so now we need a GC write barrier. - // The following barrier code takes the destination in x0 and the value in x1 so the arguments are - // already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1 - -LOCAL_LABEL(CmpXchgNoUpdate): - // x10 still contains the original value. - mov x0, x10 - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - tbnz w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(NoBarrierCmpXchg) - InterlockedOperationBarrier -LOCAL_LABEL(NoBarrierCmpXchg): -#endif - ret lr - - LEAF_END RhpCheckedLockCmpXchg, _TEXT - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address - -// RhpCheckedXchg(Object** destination, Object* value) -// -// Interlocked exchange on objectref. -// -// On entry: -// x0: pointer to objectref -// x1: exchange value -// -// On exit: -// x0: original value of objectref -// x10: trashed -// x12, x16, x17: trashed -// - LEAF_ENTRY RhpCheckedXchg, _TEXT - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16 - tbz w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(ExchangeRetry) -#endif - - swpal x1, x10, [x0] // exchange - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - b LOCAL_LABEL(DoCardsXchg) -LOCAL_LABEL(ExchangeRetry): - // Read the existing memory location. - ldaxr x10, [x0] - - // Attempt to update with the new value. - stlxr w12, x1, [x0] - cbnz w12, LOCAL_LABEL(ExchangeRetry) -#endif - -LOCAL_LABEL(DoCardsXchg): - // We have successfully updated the value of the objectref so now we need a GC write barrier. - // The following barrier code takes the destination in x0 and the value in x1 so the arguments are - // already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1 - - // x10 still contains the original value. - mov x0, x10 - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - tbnz w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(NoBarrierXchg) - InterlockedOperationBarrier -LOCAL_LABEL(NoBarrierXchg): -#endif - ret - - LEAF_END RhpCheckedXchg, _TEXT - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT -.arch_extension nolse -#endif diff --git a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.asm b/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.asm deleted file mode 100644 index 05a26044dddd..000000000000 --- a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.asm +++ /dev/null @@ -1,392 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -;; -;; Define the helpers used to implement the write barrier required when writing an object reference into a -;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in -;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral -;; collection. -;; - -#include "AsmMacros.h" - - TEXTAREA - -;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used -;; during garbage collections to verify that object references where never written to the heap without using a -;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing -;; new references to the real heap. Since this can't be solved perfectly without critical sections around the -;; entire update process, we instead update the shadow location and then re-check the real location (as two -;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value -;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC -;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the -;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. -#ifdef WRITE_BARRIER_CHECK - - SETALIAS g_GCShadow, ?g_GCShadow@@3PEAEEA - SETALIAS g_GCShadowEnd, ?g_GCShadowEnd@@3PEAEEA - EXTERN $g_GCShadow - EXTERN $g_GCShadowEnd - -INVALIDGCVALUE EQU 0xCCCCCCCD - - MACRO - ;; On entry: - ;; $destReg: location to be updated (cannot be x12,x17) - ;; $refReg: objectref to be stored (cannot be x12,x17) - ;; - ;; On exit: - ;; x12,x17: trashed - ;; other registers are preserved - ;; - UPDATE_GC_SHADOW $destReg, $refReg - - ;; If g_GCShadow is 0, don't perform the check. - PREPARE_EXTERNAL_VAR_INDIRECT $g_GCShadow, x12 - cbz x12, %ft1 - - ;; Save $destReg since we're about to modify it (and we need the original value both within the macro and - ;; once we exit the macro). - mov x17, $destReg - - ;; Transform $destReg into the equivalent address in the shadow heap. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12 - subs $destReg, $destReg, x12 - blo %ft0 - - PREPARE_EXTERNAL_VAR_INDIRECT $g_GCShadow, x12 - add $destReg, $destReg, x12 - - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, x12 - cmp $destReg, x12 - bhs %ft0 - - ;; Update the shadow heap. - str $refReg, [$destReg] - - ;; The following read must be strongly ordered wrt to the write we've just performed in order to - ;; prevent race conditions. - dmb ish - - ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. - mov x12, x17 - ldr x12, [x12] - cmp x12, $refReg - beq %ft0 - - ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't - ;; guarantee whose shadow update won. - MOVL64 x12, INVALIDGCVALUE, 0 - str x12, [$destReg] - -0 - ;; Restore original $destReg value - mov $destReg, x17 - -1 - MEND - -#else // WRITE_BARRIER_CHECK - - MACRO - UPDATE_GC_SHADOW $destReg, $refReg - MEND - -#endif // WRITE_BARRIER_CHECK - -;; There are several different helpers used depending on which register holds the object reference. Since all -;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -;; name of the register that points to the location to be updated and the name of the register that holds the -;; object reference (this should be in upper case as it's used in the definition of the name of the helper). - -;; Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for -;; some interlocked helpers that need an inline barrier. - MACRO - ;; On entry: - ;; $destReg: location to be updated (cannot be x12,x17) - ;; $refReg: objectref to be stored (cannot be x12,x17) - ;; - ;; On exit: - ;; x12,x17: trashed - ;; - INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg - - ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - ;; we're in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW $destReg, $refReg - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - // Update the write watch table if necessary - PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 - - cbz x12, %ft2 - add x12, x12, $destReg, lsr #0xc // SoftwareWriteWatch::AddressToTableByteIndexShift - ldrb w17, [x12] - cbnz x17, %ft2 - mov w17, #0xFF - strb w17, [x12] -#endif - -2 - ;; We can skip the card table write if the reference is to - ;; an object not on the epehemeral segment. - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, x12 - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x17 - cmp $refReg, x12 - ccmp $refReg, x17, #0x2, hs - bhs %ft0 - - ;; Set this object's card, if it hasn't already been set. - PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 - add x17, x12, $destReg lsr #11 - - ;; Check that this card hasn't already been written. Avoiding useless writes is a big win on - ;; multi-proc systems since it avoids cache trashing. - ldrb w12, [x17] - cmp x12, 0xFF - beq %ft0 - - mov x12, 0xFF - strb w12, [x17] - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Check if we need to update the card bundle table - PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12 - add x17, x12, $destReg, lsr #21 - ldrb w12, [x17] - cmp x12, 0xFF - beq %ft0 - - mov x12, 0xFF - strb w12, [x17] -#endif - -0 - ;; Exit label - MEND - - MACRO - ;; On entry: - ;; $destReg: location to be updated (cannot be x12,x17) - ;; $refReg: objectref to be stored (cannot be x12,x17) - ;; - ;; On exit: - ;; x12, x17: trashed - ;; - INSERT_CHECKED_WRITE_BARRIER_CORE $destReg, $refReg - - ;; The "check" of this checked write barrier - is $destReg - ;; within the heap? if no, early out. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12 - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17 - cmp $destReg, x12 - ccmp $destReg, x17, #0x2, hs - bhs %ft0 - - INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg - -0 - ;; Exit label - MEND - -;; void JIT_ByRefWriteBarrier -;; On entry: -;; x13 : the source address (points to object reference to write) -;; x14 : the destination address (object reference written here) -;; -;; On exit: -;; x13 : incremented by 8 -;; x14 : incremented by 8 -;; x15 : trashed -;; x12, x17 : trashed -;; -;; NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF -;; if you add more trashed registers. -;; -;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 -;; - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address - LEAF_ENTRY RhpByRefAssignRefArm64, _TEXT - - ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - ldr x15, [x13], 8 - b RhpCheckedAssignRefArm64 - - LEAF_END RhpByRefAssignRefArm64 - - -;; JIT_CheckedWriteBarrier(Object** dst, Object* src) -;; -;; Write barrier for writes to objects that may reside -;; on the managed heap. -;; -;; On entry: -;; x14 : the destination address (LHS of the assignment). -;; May not be a heap location (hence the checked). -;; x15 : the object reference (RHS of the assignment) -;; -;; On exit: -;; x12, x17 : trashed -;; x14 : incremented by 8 - LEAF_ENTRY RhpCheckedAssignRefArm64 - - ;; is destReg within the heap? - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12 - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17 - cmp x14, x12 - ccmp x14, x17, #0x2, hs - blo RhpAssignRefArm64 - -NotInHeap - ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation - str x15, [x14], 8 - ret - - LEAF_END RhpCheckedAssignRefArm64 - -;; JIT_WriteBarrier(Object** dst, Object* src) -;; -;; Write barrier for writes to objects that are known to -;; reside on the managed heap. -;; -;; On entry: -;; x14 : the destination address (LHS of the assignment) -;; x15 : the object reference (RHS of the assignment) -;; -;; On exit: -;; x12, x17 : trashed -;; x14 : incremented by 8 - LEAF_ENTRY RhpAssignRefArm64 - - ALTERNATE_ENTRY RhpAssignRefAVLocation - stlr x15, [x14] - - INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15 - - add x14, x14, 8 - ret - - LEAF_END RhpAssignRefArm64 - -;; same as RhpAssignRefArm64, but with standard ABI. - LEAF_ENTRY RhpAssignRef - mov x14, x0 ; x14 = dst - mov x15, x1 ; x15 = val - b RhpAssignRefArm64 - LEAF_END RhpAssignRef - - -;; Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon -;; successful updates. - -;; RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) -;; -;; Interlocked compare exchange on objectref. -;; -;; On entry: -;; x0 : pointer to objectref -;; x1 : exchange value -;; x2 : comparand -;; -;; On exit: -;; x0: original value of objectref -;; x10, x12, x16, x17: trashed -;; - LEAF_ENTRY RhpCheckedLockCmpXchg - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16 - tbz x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, CmpXchgRetry -#endif - - mov x10, x2 - casal x10, x1, [x0] ;; exchange - cmp x2, x10 - bne CmpXchgNoUpdate - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - b DoCardsCmpXchg -CmpXchgRetry - ;; Check location value is what we expect. - ldaxr x10, [x0] - cmp x10, x2 - bne CmpXchgNoUpdate - - ;; Current value matches comparand, attempt to update with the new value. - stlxr w12, x1, [x0] - cbnz w12, CmpXchgRetry -#endif - -DoCardsCmpXchg - ;; We have successfully updated the value of the objectref so now we need a GC write barrier. - ;; The following barrier code takes the destination in x0 and the value in x1 so the arguments are - ;; already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1 - -CmpXchgNoUpdate - ;; x10 still contains the original value. - mov x0, x10 - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - tbnz x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, NoBarrierCmpXchg - InterlockedOperationBarrier -NoBarrierCmpXchg -#endif - ret lr - - LEAF_END RhpCheckedLockCmpXchg - -;; RhpCheckedXchg(Object** destination, Object* value) -;; -;; Interlocked exchange on objectref. -;; -;; On entry: -;; x0 : pointer to objectref -;; x1 : exchange value -;; -;; On exit: -;; x0: original value of objectref -;; x10: trashed -;; x12, x16, x17: trashed -;; - LEAF_ENTRY RhpCheckedXchg - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16 - tbz x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, ExchangeRetry -#endif - - swpal x1, x10, [x0] ;; exchange - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - b DoCardsXchg -ExchangeRetry - ;; Read the existing memory location. - ldaxr x10, [x0] - - ;; Attempt to update with the new value. - stlxr w12, x1, [x0] - cbnz w12, ExchangeRetry -#endif - -DoCardsXchg - ;; We have successfully updated the value of the objectref so now we need a GC write barrier. - ;; The following barrier code takes the destination in x0 and the value in x1 so the arguments are - ;; already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1 - - ;; x10 still contains the original value. - mov x0, x10 - -#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT - tbnz x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, NoBarrierXchg - InterlockedOperationBarrier -NoBarrierXchg -#endif - ret - - LEAF_END RhpCheckedXchg - - end diff --git a/src/coreclr/nativeaot/Runtime/corexcep.h b/src/coreclr/nativeaot/Runtime/corexcep.h new file mode 100644 index 000000000000..f5786836a984 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/corexcep.h @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "../../inc/corexcep.h" diff --git a/src/coreclr/nativeaot/Runtime/disabledeventpipeinternal.cpp b/src/coreclr/nativeaot/Runtime/disabledeventpipeinternal.cpp index 6c48533b05c1..ecd0227c57e7 100644 --- a/src/coreclr/nativeaot/Runtime/disabledeventpipeinternal.cpp +++ b/src/coreclr/nativeaot/Runtime/disabledeventpipeinternal.cpp @@ -3,7 +3,7 @@ #include "CommonTypes.h" #include "CommonMacros.h" -#include "PalRedhawk.h" +#include "Pal.h" #include diff --git a/src/coreclr/nativeaot/Runtime/disabledruntimeeventinternal.cpp b/src/coreclr/nativeaot/Runtime/disabledruntimeeventinternal.cpp index f590021e6ff9..728e572e9ff1 100644 --- a/src/coreclr/nativeaot/Runtime/disabledruntimeeventinternal.cpp +++ b/src/coreclr/nativeaot/Runtime/disabledruntimeeventinternal.cpp @@ -3,7 +3,7 @@ #include "CommonTypes.h" #include "CommonMacros.h" -#include "PalRedhawk.h" +#include "Pal.h" #ifdef FEATURE_PERFTRACING diff --git a/src/coreclr/nativeaot/Runtime/event.cpp b/src/coreclr/nativeaot/Runtime/event.cpp index 05a511510e38..1d97dd30780d 100644 --- a/src/coreclr/nativeaot/Runtime/event.cpp +++ b/src/coreclr/nativeaot/Runtime/event.cpp @@ -1,15 +1,15 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + #include "common.h" #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" #include "event.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" diff --git a/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt index 4368bb84bb30..95ea2693344e 100644 --- a/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt @@ -32,9 +32,14 @@ set (EventingHeaders ${GENERATED_INCLUDE_DIR}/clretwallmain.cpp ) +if(CMAKE_CROSSCOMPILING) + set(GEN_EVENTING_TARGETOS --targetos) + set(GEN_EVENTING_TARGETOS_ARG ${CLR_CMAKE_TARGET_OS}) +endif() + add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/aot_eventing_headers.timestamp - COMMAND ${Python_EXECUTABLE} ${GENERATE_PLAT_AGNOSTIC_SCRIPT} --man ${EVENT_MANIFEST} --incdir ${GENERATED_INCLUDE_DIR} --inc ${EVENT_INCLUSION_FILE} --dummy ${GENERATED_INCLUDE_DIR}/etmdummy.h --runtimeflavor nativeaot ${NONEXTERN_ARG} ${NOXPLATHEADER_ARG} + COMMAND ${Python_EXECUTABLE} ${GENERATE_PLAT_AGNOSTIC_SCRIPT} --man ${EVENT_MANIFEST} --incdir ${GENERATED_INCLUDE_DIR} --inc ${EVENT_INCLUSION_FILE} ${GEN_EVENTING_TARGETOS} ${GEN_EVENTING_TARGETOS_ARG} --dummy ${GENERATED_INCLUDE_DIR}/etmdummy.h --runtimeflavor nativeaot ${NONEXTERN_ARG} ${NOXPLATHEADER_ARG} COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/aot_eventing_headers.timestamp DEPENDS ${EVENT_MANIFEST} ${GENERATE_PLAT_AGNOSTIC_SCRIPT} VERBATIM @@ -128,15 +133,15 @@ list(APPEND AOT_EVENTPIPE_SHIM_HEADERS list(APPEND AOT_EVENTPIPE_MANAGED_TO_NATIVE_SOURCES - ${RUNTIME_DIR}/eventpipeinternal.cpp - ${RUNTIME_DIR}/EnabledEventPipeInterface.cpp - ${RUNTIME_DIR}/runtimeeventinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventpipeinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/EnabledEventPipeInterface.cpp + ${NATIVEAOT_RUNTIME_DIR}/runtimeeventinternal.cpp ) if (FEATURE_EVENT_TRACE) list(APPEND AOT_EVENTTRACE_SOURCES - ${RUNTIME_DIR}/eventtrace.cpp - ${RUNTIME_DIR}/profheapwalkhelper.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace.cpp + ${NATIVEAOT_RUNTIME_DIR}/profheapwalkhelper.cpp ) # These are carry-overs from .NET Native and only included for ETW currently @@ -144,15 +149,15 @@ if (FEATURE_EVENT_TRACE) # gcheap : GCHeapDump, GCHeapSurvivalAndMovement - not prioritizing for nativeaot yet if (FEATURE_ETW) list(APPEND AOT_EVENTTRACE_SOURCES - ${RUNTIME_DIR}/eventtrace_bulktype.cpp - ${RUNTIME_DIR}/eventtrace_gcheap.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace_bulktype.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace_gcheap.cpp ) endif() if(CLR_CMAKE_TARGET_WIN32) - set_source_files_properties(${GEN_EVENTPIPE_PROVIDER_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") - set_source_files_properties(${GEN_EVENTPIPE_PLAT_AGNOSTIC_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") - set_source_files_properties(${AOT_EVENTTRACE_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${GEN_EVENTPIPE_PROVIDER_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${GEN_EVENTPIPE_PLAT_AGNOSTIC_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${AOT_EVENTTRACE_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") endif() endif() @@ -174,7 +179,7 @@ set_target_properties(eventpipe-shared-objects PROPERTIES ) if (CLR_CMAKE_TARGET_WIN32) target_compile_options(eventpipe-shared-objects PRIVATE - "/FI${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h") + "/FI${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h") # Install the compile PDB for the eventpipe unity builds. install(FILES "${CMAKE_CURRENT_BINARY_DIR}/$/eventpipe-shared-objects.pdb" DESTINATION aotsdk COMPONENT nativeaot) @@ -194,10 +199,10 @@ list(APPEND EVENTPIPE_SOURCES ) list(APPEND AOT_EVENTPIPE_DISABLED_SOURCES - ${RUNTIME_DIR}/DisabledEventPipeInterface.cpp - ${RUNTIME_DIR}/disabledeventpipeinternal.cpp - ${RUNTIME_DIR}/disabledeventtrace.cpp - ${RUNTIME_DIR}/disabledruntimeeventinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/DisabledEventPipeInterface.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledeventpipeinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledeventtrace.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledruntimeeventinternal.cpp ${GEN_EVENTPIPE_PLAT_AGNOSTIC_DISABLED_SOURCES} ) diff --git a/src/coreclr/nativeaot/Runtime/eventpipe/NativeaotEventPipeSupport.h b/src/coreclr/nativeaot/Runtime/eventpipe/NativeaotEventPipeSupport.h index 253f147e32d9..1248e68fa12b 100644 --- a/src/coreclr/nativeaot/Runtime/eventpipe/NativeaotEventPipeSupport.h +++ b/src/coreclr/nativeaot/Runtime/eventpipe/NativeaotEventPipeSupport.h @@ -8,8 +8,8 @@ // definitions which are needed by these source files but are not available in NativeAOT // runtime source files. -// As mentioned PalRedhawk*.cpp, in general we don't want to assume that Windows and -// Redhawk global definitions can co-exist, meaning NativeAOT runtime source files +// As mentioned Pal*.cpp, in general we don't want to assume that Windows and +// NativeAOT global definitions can co-exist, meaning NativeAOT runtime source files // generally do not have access to windows.h; that said, the HOST_WIN32 parts of the shared // EventPipe code are designed to rely on windows.h, so windows.h must be included when // compiling shared EventPipe source files, and a marker is set to indicate that windows.h diff --git a/src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.cpp b/src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.cpp index 9b2bb8522396..8d2c4b0e7c68 100644 --- a/src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.cpp +++ b/src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.cpp @@ -9,7 +9,7 @@ #include #include -#ifdef TARGET_WINDOWS +#ifdef HOST_WINDOWS #include #else #include @@ -18,6 +18,7 @@ #endif #include +#include #include "gcenv.h" #include "thread.h" @@ -45,8 +46,29 @@ ep_rt_aot_walk_managed_stack_for_thread ( ep_rt_thread_handle_t thread, EventPipeStackContents *stack_contents) { - // NativeAOT does not support getting the call stack - return false; + STATIC_CONTRACT_NOTHROW; + EP_ASSERT (thread != NULL); + EP_ASSERT (stack_contents != NULL); + + StackFrameIterator frameIterator(thread, thread->GetTransitionFrameForSampling()); + + while (frameIterator.IsValid()) + { + frameIterator.CalculateCurrentMethodState(); + + // Get the IP. + uintptr_t control_pc = (uintptr_t)frameIterator.GetControlPC(); + + if (control_pc != 0) + { + // Add the IP to the captured stack. + ep_stack_contents_append (stack_contents, control_pc, NULL); + } + + frameIterator.Next(); + } + + return true; } bool @@ -61,6 +83,53 @@ ep_rt_aot_sample_profiler_write_sampling_event_for_threads ( ep_rt_thread_handle_t sampling_thread, EventPipeEvent *sampling_event) { + STATIC_CONTRACT_NOTHROW; + EP_ASSERT (sampling_thread != NULL); + + ThreadStore *thread_store = GetThreadStore (); + + // Check to see if we can suspend managed execution. + if (thread_store->GetSuspendingThread () != NULL) + return; + + // Actually suspend managed execution. + thread_store->LockThreadStore (); + thread_store->SuspendAllThreads (false); + + EventPipeStackContents stack_contents; + EventPipeStackContents *current_stack_contents; + current_stack_contents = ep_stack_contents_init (&stack_contents); + + EP_ASSERT (current_stack_contents != NULL); + + // Walk all managed threads and capture stacks. + FOREACH_THREAD (target_thread) + { + ep_stack_contents_reset (current_stack_contents); + + // Walk the stack and write it out as an event. + if (ep_rt_aot_walk_managed_stack_for_thread (target_thread, current_stack_contents) && !ep_stack_contents_is_empty (current_stack_contents)) { + // Set the payload. + // TODO: We can actually detect whether we are in managed or external code but does it matter?! + uint32_t payload_data = EP_SAMPLE_PROFILER_SAMPLE_TYPE_EXTERNAL; + + // Write the sample. + ep_write_sample_profile_event ( + sampling_thread, + sampling_event, + target_thread, + current_stack_contents, + (uint8_t *)&payload_data, + sizeof (payload_data)); + } + } + END_FOREACH_THREAD + + ep_stack_contents_fini (current_stack_contents); + + // Resume managed execution. + thread_store->ResumeAllThreads(false); + thread_store->UnlockThreadStore(); } const ep_char8_t * @@ -397,7 +466,7 @@ uint32_t ep_rt_aot_current_process_get_id (void) { STATIC_CONTRACT_NOTHROW; - return static_cast(GetCurrentProcessId ()); + return PalGetCurrentProcessId (); } ep_rt_thread_id_t @@ -415,14 +484,14 @@ int64_t ep_rt_aot_perf_counter_query (void) { STATIC_CONTRACT_NOTHROW; - return (int64_t)PalQueryPerformanceCounter(); + return minipal_hires_ticks(); } int64_t ep_rt_aot_perf_frequency_query (void) { STATIC_CONTRACT_NOTHROW; - return (int64_t)PalQueryPerformanceFrequency(); + return minipal_hires_tick_frequency(); } int64_t @@ -431,7 +500,7 @@ ep_rt_aot_system_timestamp_get (void) STATIC_CONTRACT_NOTHROW; FILETIME value; - GetSystemTimeAsFileTime (&value); + PalGetSystemTimeAsFileTime (&value); return static_cast(((static_cast(value.dwHighDateTime)) << 32) | static_cast(value.dwLowDateTime)); } diff --git a/src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.h b/src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.h index 9c0a12053100..d1206ab16455 100644 --- a/src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.h +++ b/src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.h @@ -512,6 +512,33 @@ ep_rt_sample_profiler_write_sampling_event_for_threads ( ep_rt_aot_sample_profiler_write_sampling_event_for_threads (sampling_thread, sampling_event); } +static +inline +void +ep_rt_sample_profiler_enabled (EventPipeEvent *sampling_event) +{ + STATIC_CONTRACT_NOTHROW; + // no-op +} + +static +inline +void +ep_rt_sample_profiler_session_enabled (void) +{ + STATIC_CONTRACT_NOTHROW; + // no-op +} + +static +inline +void +ep_rt_sample_profiler_disabled (void) +{ + STATIC_CONTRACT_NOTHROW; + // no-op +} + static inline void @@ -744,6 +771,15 @@ ep_rt_thread_create ( return ep_rt_aot_thread_create(thread_func, params, thread_type, id); } +static +bool +ep_rt_queue_job ( + void *job_func, + void *params) +{ + EP_UNREACHABLE ("Not implemented in NativeAOT"); +} + static inline void @@ -794,7 +830,7 @@ uint32_t ep_rt_processors_get_count (void) { STATIC_CONTRACT_NOTHROW; -#ifdef _INC_WINDOWS +#ifdef HOST_WINDOWS SYSTEM_INFO sys_info = {}; GetSystemInfo (&sys_info); return static_cast(sys_info.dwNumberOfProcessors); @@ -846,7 +882,7 @@ ep_rt_system_time_get (EventPipeSystemTime *system_time) { STATIC_CONTRACT_NOTHROW; -#ifdef _INC_WINDOWS +#ifdef HOST_WINDOWS SYSTEMTIME value; GetSystemTime (&value); @@ -861,7 +897,7 @@ ep_rt_system_time_get (EventPipeSystemTime *system_time) value.wMinute, value.wSecond, value.wMilliseconds); -#elif TARGET_UNIX +#else time_t tt; struct tm *ut_ptr; struct timeval time_val; diff --git a/src/coreclr/nativeaot/Runtime/eventpipeinternal.cpp b/src/coreclr/nativeaot/Runtime/eventpipeinternal.cpp index 01d251c46fc0..6716c7bbd18a 100644 --- a/src/coreclr/nativeaot/Runtime/eventpipeinternal.cpp +++ b/src/coreclr/nativeaot/Runtime/eventpipeinternal.cpp @@ -2,7 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "common.h" -#include "PalRedhawk.h" +#include "Pal.h" #include #include #include @@ -13,6 +13,7 @@ #include #include #include +#include "minipal/time.h" #ifdef FEATURE_PERFTRACING @@ -246,7 +247,7 @@ EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_GetSessionInfo(uint64_t session { pSessionInfo->StartTimeAsUTCFileTime = ep_session_get_session_start_time (pSession); pSessionInfo->StartTimeStamp = ep_session_get_session_start_timestamp(pSession); - pSessionInfo->TimeStampFrequency = PalQueryPerformanceFrequency(); + pSessionInfo->TimeStampFrequency = minipal_hires_tick_frequency(); retVal = true; } } diff --git a/src/coreclr/nativeaot/Runtime/eventtrace.cpp b/src/coreclr/nativeaot/Runtime/eventtrace.cpp index b4ae11574269..a9ed42ed400d 100644 --- a/src/coreclr/nativeaot/Runtime/eventtrace.cpp +++ b/src/coreclr/nativeaot/Runtime/eventtrace.cpp @@ -67,7 +67,7 @@ void ETW::GCLog::FireGcStart(ETW_GC_INFO* pGcInfo) (pGcInfo->GCStart.Depth == GCHeapUtilities::GetGCHeap()->GetMaxGeneration()) && (pGcInfo->GCStart.Reason == ETW_GC_INFO::GC_INDUCED)) { - // No InterlockedExchange64 on Redhawk (presumably b/c there is no compiler + // No InterlockedExchange64 on NativeAOT (presumably b/c there is no compiler // intrinsic for this on x86, even though there is one for InterlockedCompareExchange64) l64ClientSequenceNumberToLog = PalInterlockedCompareExchange64(&s_l64LastClientSequenceNumber, 0, s_l64LastClientSequenceNumber); } diff --git a/src/coreclr/nativeaot/Runtime/eventtrace_bulktype.cpp b/src/coreclr/nativeaot/Runtime/eventtrace_bulktype.cpp index a86c56577005..70a3d7a7ad1d 100644 --- a/src/coreclr/nativeaot/Runtime/eventtrace_bulktype.cpp +++ b/src/coreclr/nativeaot/Runtime/eventtrace_bulktype.cpp @@ -354,7 +354,7 @@ int BulkTypeEventLogger::LogSingleType(MethodTable * pEEType) // // Arguments: // * thAsAddr - MethodTable to log -// * typeLogBehavior - Ignored in Redhawk builds +// * typeLogBehavior - Ignored in NativeAOT builds // void BulkTypeEventLogger::LogTypeAndParameters(uint64_t thAsAddr) @@ -384,7 +384,7 @@ void BulkTypeEventLogger::LogTypeAndParameters(uint64_t thAsAddr) } else if (cTypeParams > 1) { - + ASSERT_UNCONDITIONALLY("unexpected value of cTypeParams greater than 1"); } } diff --git a/src/coreclr/nativeaot/Runtime/eventtrace_etw.h b/src/coreclr/nativeaot/Runtime/eventtrace_etw.h index d4994f68d928..e9862ec074e2 100644 --- a/src/coreclr/nativeaot/Runtime/eventtrace_etw.h +++ b/src/coreclr/nativeaot/Runtime/eventtrace_etw.h @@ -2,8 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // -// This header provides Redhawk-specific ETW code and macros, to allow sharing of common -// ETW code between Redhawk and desktop CLR. +// This header provides NativeAOT-specific ETW code and macros, to allow sharing of common +// ETW code. // #ifndef EVENTTRACE_ETW_H #define EVENTTRACE_ETW_H diff --git a/src/coreclr/nativeaot/Runtime/eventtrace_gcheap.cpp b/src/coreclr/nativeaot/Runtime/eventtrace_gcheap.cpp index 2acf81392c2b..11e46dae8111 100644 --- a/src/coreclr/nativeaot/Runtime/eventtrace_gcheap.cpp +++ b/src/coreclr/nativeaot/Runtime/eventtrace_gcheap.cpp @@ -13,7 +13,6 @@ #include "daccess.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" @@ -38,14 +37,14 @@ BOOL ETW::GCLog::ShouldWalkHeapObjectsForEtw() { return RUNTIME_PROVIDER_CATEGORY_ENABLED( TRACE_LEVEL_INFORMATION, - CLR_GCHEAPDUMP_KEYWORD); + CLR_GCHEAPDUMP_KEYWORD); } BOOL ETW::GCLog::ShouldWalkHeapRootsForEtw() { return RUNTIME_PROVIDER_CATEGORY_ENABLED( TRACE_LEVEL_INFORMATION, - CLR_GCHEAPDUMP_KEYWORD); + CLR_GCHEAPDUMP_KEYWORD); } BOOL ETW::GCLog::ShouldTrackMovementForEtw() @@ -416,13 +415,7 @@ void ETW::GCLog::ForceGC(LONGLONG l64ClientSequenceNumber) if (!GCHeapUtilities::IsGCHeapInitialized()) return; - // No InterlockedExchange64 on Redhawk, even though there is one for - // InterlockedCompareExchange64. Technically, there's a race here by using - // InterlockedCompareExchange64, but it's not worth addressing. The race would be - // between two ETW controllers trying to trigger GCs simultaneously, in which case - // one will win and get its sequence number to appear in the GCStart event, while the - // other will lose. Rare, uninteresting, and low-impact. - PalInterlockedCompareExchange64(&s_l64LastClientSequenceNumber, l64ClientSequenceNumber, s_l64LastClientSequenceNumber); + InterlockedExchange64(&s_l64LastClientSequenceNumber, l64ClientSequenceNumber); ForceGCForDiagnostics(); } diff --git a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp index 0f9aa696d233..9285e6763544 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp +++ b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp @@ -63,7 +63,7 @@ void GCToEEInterface::RestartEE(bool /*bFinishedGC*/) // This is needed to synchronize threads that were running in preemptive mode while // the runtime was suspended and that will return to cooperative mode after the runtime // is restarted. - ::FlushProcessWriteBuffers(); + PalFlushProcessWriteBuffers(); #endif // !defined(TARGET_X86) && !defined(TARGET_AMD64) SyncClean::CleanUp(); @@ -399,23 +399,23 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) // On architectures with strong ordering, we only need to prevent compiler reordering. // Otherwise we put a process-wide fence here (so that we could use an ordinary read in the barrier) -#if defined(HOST_ARM64) || defined(HOST_ARM) +#if defined(HOST_ARM64) || defined(HOST_ARM) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) if (!is_runtime_suspended) { // If runtime is not suspended, force all threads to see the changed table before seeing updated heap boundaries. // See: http://vstfdevdiv:8080/DevDiv2/DevDiv/_workitems/edit/346765 - FlushProcessWriteBuffers(); + PalFlushProcessWriteBuffers(); } #endif g_lowest_address = args->lowest_address; g_highest_address = args->highest_address; -#if defined(HOST_ARM64) || defined(HOST_ARM) +#if defined(HOST_ARM64) || defined(HOST_ARM) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) if (!is_runtime_suspended) { // If runtime is not suspended, force all threads to see the changed state before observing future allocations. - FlushProcessWriteBuffers(); + PalFlushProcessWriteBuffers(); } #endif return; @@ -574,7 +574,7 @@ static bool CreateNonSuspendableThread(void (*threadStart)(void*), void* arg, co // Helper used to wrap the start routine of GC threads so we can do things like initialize the // thread state which requires running in the new thread's context. - auto threadStub = [](void* argument) -> DWORD + auto threadStub = [](void* argument) -> uint32_t { ThreadStore::RawGetCurrentThread()->SetGCSpecial(); @@ -618,7 +618,7 @@ bool GCToEEInterface::CreateThread(void (*threadStart)(void*), void* arg, bool i // Helper used to wrap the start routine of background GC threads so we can do things like initialize the // thread state which requires running in the new thread's context. - auto threadStub = [](void* argument) -> DWORD + auto threadStub = [](void* argument) -> uint32_t { ThreadStubArguments* pStartContext = (ThreadStubArguments*)argument; diff --git a/src/coreclr/nativeaot/Runtime/gcenv.h b/src/coreclr/nativeaot/Runtime/gcenv.h index 06e65edb7180..ad9589fca4b0 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.h +++ b/src/coreclr/nativeaot/Runtime/gcenv.h @@ -14,6 +14,8 @@ #include #include +#include + #ifdef TARGET_UNIX #include #endif @@ -33,8 +35,8 @@ #include "TargetPtrs.h" #include "MethodTable.h" #include "ObjectLayout.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "gcenv.interlocked.inl" #include "slist.h" diff --git a/src/coreclr/nativeaot/Runtime/holder.h b/src/coreclr/nativeaot/Runtime/holder.h index 22bd3dbfe3f5..ed7395d2cf58 100644 --- a/src/coreclr/nativeaot/Runtime/holder.h +++ b/src/coreclr/nativeaot/Runtime/holder.h @@ -3,7 +3,7 @@ // ----------------------------------------------------------------------------------------------------------- // Cut down versions of the Holder and Wrapper template classes used in the CLR. If this coding pattern is -// also common in the Redhawk code then it might be worth investigating pulling the whole holder.h header file +// also common in the NativeAOT code then it might be worth investigating pulling the whole holder.h header file // over (a quick look indicates it might not drag in too many extra dependencies). // diff --git a/src/coreclr/nativeaot/Runtime/i386/AllocFast.S b/src/coreclr/nativeaot/Runtime/i386/AllocFast.S deleted file mode 100644 index 876f2dfbcb80..000000000000 --- a/src/coreclr/nativeaot/Runtime/i386/AllocFast.S +++ /dev/null @@ -1,4 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// TODO: Implement diff --git a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm b/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm deleted file mode 100644 index d557f5ec7507..000000000000 --- a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm +++ /dev/null @@ -1,387 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - - .586 - .model flat - option casemap:none - .code - - -include AsmMacros.inc - -;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -;; allocation context then automatically fallback to the slow allocation path. -;; ECX == MethodTable -FASTCALL_FUNC RhpNewFast, 4 - - ;; edx = GetThread(), TRASHES eax - INLINE_GETTHREAD edx, eax - - ;; - ;; ecx contains MethodTable pointer - ;; - mov eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] - - ;; - ;; eax: base size - ;; ecx: MethodTable pointer - ;; edx: Thread pointer - ;; - - add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja AllocFailed - - ;; set the new alloc pointer - mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax - - ;; calc the new object pointer - sub eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] - - ;; set the new object's MethodTable pointer - mov [eax], ecx - ret - -AllocFailed: - - ;; - ;; ecx: MethodTable pointer - ;; - push ebp - mov ebp, esp - - PUSH_COOP_PINVOKE_FRAME edx - - ;; Preserve MethodTable in ESI. - mov esi, ecx - - ;; Push alloc helper arguments - push edx ; transition frame - push 0 ; numElements - xor edx, edx ; Flags - ;; Passing MethodTable in ecx - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test eax, eax - jz NewFast_OOM - - POP_COOP_PINVOKE_FRAME - - pop ebp - ret - -NewFast_OOM: - ;; This is the failure path. We're going to tail-call to a managed helper that will throw - ;; an out of memory exception that the caller of this allocator understands. - - mov eax, esi ; Preserve MethodTable pointer over POP_COOP_PINVOKE_FRAME - - POP_COOP_PINVOKE_FRAME - - ;; Cleanup our ebp frame - pop ebp - - mov ecx, eax ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -FASTCALL_ENDFUNC - -;; Allocate non-array object with finalizer. -;; ECX == MethodTable -FASTCALL_FUNC RhpNewFinalizable, 4 - ;; Create EBP frame. - push ebp - mov ebp, esp - - PUSH_COOP_PINVOKE_FRAME edx - - ;; Preserve MethodTable in ESI - mov esi, ecx - - ;; Push alloc helper arguments - push edx ; transition frame - push 0 ; numElements - mov edx, GC_ALLOC_FINALIZE ; Flags - ;; Passing MethodTable in ecx - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test eax, eax - jz NewFinalizable_OOM - - POP_COOP_PINVOKE_FRAME - - ;; Collapse EBP frame and return - pop ebp - ret - -NewFinalizable_OOM: - ;; This is the failure path. We're going to tail-call to a managed helper that will throw - ;; an out of memory exception that the caller of this allocator understands. - - mov eax, esi ; Preserve MethodTable pointer over POP_COOP_PINVOKE_FRAME - - POP_COOP_PINVOKE_FRAME - - ;; Cleanup our ebp frame - pop ebp - - mov ecx, eax ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -FASTCALL_ENDFUNC - -;; Allocate a new string. -;; ECX == MethodTable -;; EDX == element count -FASTCALL_FUNC RhNewString, 8 - - push ecx - push edx - - ;; Make sure computing the aligned overall allocation size won't overflow - cmp edx, MAX_STRING_LENGTH - ja StringSizeOverflow - - ; Compute overall allocation size (align(base size + (element size * elements), 4)). - lea eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)] - and eax, -4 - - ; ECX == MethodTable - ; EAX == allocation size - ; EDX == scratch - - INLINE_GETTHREAD edx, ecx ; edx = GetThread(), TRASHES ecx - - ; ECX == scratch - ; EAX == allocation size - ; EDX == thread - - mov ecx, eax - add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc StringAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja StringAllocContextOverflow - - ; ECX == allocation size - ; EAX == new alloc ptr - ; EDX == thread - - ; set the new alloc pointer - mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax - - ; calc the new object pointer - sub eax, ecx - - pop edx - pop ecx - - ; set the new object's MethodTable pointer and element count - mov [eax + OFFSETOF__Object__m_pEEType], ecx - mov [eax + OFFSETOF__String__m_Length], edx - ret - -StringAllocContextOverflow: - ; ECX == string size - ; original ECX pushed - ; original EDX pushed - - ; Re-push original ECX - push [esp + 4] - - ; Create EBP frame. - mov [esp + 8], ebp - lea ebp, [esp + 8] - - PUSH_COOP_PINVOKE_FRAME edx - - ; Get the MethodTable and put it in ecx. - mov ecx, dword ptr [ebp - 8] - - ; Push alloc helper arguments (thread, size, flags, MethodTable). - push edx ; transition frame - push [ebp - 4] ; numElements - xor edx, edx ; Flags - ;; Passing MethodTable in ecx - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test eax, eax - jz StringOutOfMemoryWithFrame - - POP_COOP_PINVOKE_FRAME - add esp, 8 ; pop ecx / edx - pop ebp - ret - -StringOutOfMemoryWithFrame: - ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw - ; an out of memory exception that the caller of this allocator understands. - - mov eax, [ebp - 8] ; Preserve MethodTable pointer over POP_COOP_PINVOKE_FRAME - - POP_COOP_PINVOKE_FRAME - add esp, 8 ; pop ecx / edx - pop ebp ; restore ebp - - mov ecx, eax ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -StringSizeOverflow: - ;; We get here if the size of the final string object can't be represented as an unsigned - ;; 32-bit value. We're going to tail-call to a managed helper that will throw - ;; an OOM exception that the caller of this allocator understands. - - add esp, 8 ; pop ecx / edx - - ;; ecx holds MethodTable pointer already - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -FASTCALL_ENDFUNC - - -;; Allocate one dimensional, zero based array (SZARRAY). -;; ECX == MethodTable -;; EDX == element count -FASTCALL_FUNC RhpNewArray, 8 - - push ecx - push edx - - ; Compute overall allocation size (align(base size + (element size * elements), 4)). - ; if the element count is <= 0x10000, no overflow is possible because the component size is - ; <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case - ; (32 dimensional MdArray) is less than 0xffff. - movzx eax, word ptr [ecx + OFFSETOF__MethodTable__m_usComponentSize] - cmp edx,010000h - ja ArraySizeBig - mul edx - add eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] - add eax, 3 -ArrayAlignSize: - and eax, -4 - - ; ECX == MethodTable - ; EAX == array size - ; EDX == scratch - - INLINE_GETTHREAD edx, ecx ; edx = GetThread(), TRASHES ecx - - ; ECX == scratch - ; EAX == array size - ; EDX == thread - - mov ecx, eax - add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc ArrayAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja ArrayAllocContextOverflow - - ; ECX == array size - ; EAX == new alloc ptr - ; EDX == thread - - ; set the new alloc pointer - mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax - - ; calc the new object pointer - sub eax, ecx - - pop edx - pop ecx - - ; set the new object's MethodTable pointer and element count - mov [eax + OFFSETOF__Object__m_pEEType], ecx - mov [eax + OFFSETOF__Array__m_Length], edx - ret - -ArraySizeBig: - ; Compute overall allocation size (align(base size + (element size * elements), 4)). - ; if the element count is negative, it's an overflow, otherwise it's out of memory - cmp edx, 0 - jl ArraySizeOverflow - mul edx - jc ArrayOutOfMemoryNoFrame - add eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] - jc ArrayOutOfMemoryNoFrame - add eax, 3 - jc ArrayOutOfMemoryNoFrame - jmp ArrayAlignSize - -ArrayAllocContextOverflow: - ; ECX == array size - ; original ECX pushed - ; original EDX pushed - - ; Re-push original ECX - push [esp + 4] - - ; Create EBP frame. - mov [esp + 8], ebp - lea ebp, [esp + 8] - - PUSH_COOP_PINVOKE_FRAME edx - - ; Get the MethodTable and put it in ecx. - mov ecx, dword ptr [ebp - 8] - - ; Push alloc helper arguments (thread, size, flags, MethodTable). - push edx ; transition frame - push [ebp - 4] ; numElements - xor edx, edx ; Flags - ;; Passing MethodTable in ecx - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test eax, eax - jz ArrayOutOfMemoryWithFrame - - POP_COOP_PINVOKE_FRAME - add esp, 8 ; pop ecx / edx - pop ebp - ret - -ArrayOutOfMemoryWithFrame: - ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw - ; an out of memory exception that the caller of this allocator understands. - - mov eax, [ebp - 8] ; Preserve MethodTable pointer over POP_COOP_PINVOKE_FRAME - - POP_COOP_PINVOKE_FRAME - add esp, 8 ; pop ecx / edx - pop ebp ; restore ebp - - mov ecx, eax ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -ArrayOutOfMemoryNoFrame: - add esp, 8 ; pop ecx / edx - - ; ecx holds MethodTable pointer already - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -ArraySizeOverflow: - ; We get here if the size of the final array object can't be represented as an unsigned - ; 32-bit value. We're going to tail-call to a managed helper that will throw - ; an overflow exception that the caller of this allocator understands. - - add esp, 8 ; pop ecx / edx - - ; ecx holds MethodTable pointer already - mov edx, 1 ; Indicate that we should throw OverflowException - jmp RhExceptionHandling_FailedAllocation - -FASTCALL_ENDFUNC - - end diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc index 8ee2e79f44fd..698df51ea775 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc @@ -70,13 +70,15 @@ endm ;; ;; This macro builds a frame describing the current state of managed code. ;; -;; The macro assumes it is called from a helper that has already set up an EBP frame and that the values of -;; EBX, ESI and EDI remain unchanged from their values in managed code. It pushes the frame at the top of the -;; stack. +;; The macro assumes it is called from a helper that the values of EBX, ESI and EDI remain unchanged from their +;; values in managed code. It pushes the frame at the top of the stack. ;; ;; EAX is trashed by this macro. ;; PUSH_COOP_PINVOKE_FRAME macro transitionFrameReg + push ebp + mov ebp, esp + lea eax, [ebp + 8] ; get the ESP of the caller push eax ; save ESP push edi @@ -94,28 +96,32 @@ endm ;; ;; Remove the frame from a previous call to PUSH_COOP_PINVOKE_FRAME from the top of the stack and restore EBX, -;; ESI and EDI to their previous values. -;; -;; TRASHES ECX +;; ESI, and EDI to their previous values. Tears down the EBP frame. ;; POP_COOP_PINVOKE_FRAME macro add esp, 4*4 pop ebx pop esi pop edi - pop ecx + add esp, 4 + + pop ebp endm +INLINE_GET_ALLOC_CONTEXT_BASE macro destReg, trashReg + INLINE_GETTHREAD destReg, trashReg +endm ;; ;; CONSTANTS -- INTEGER ;; -TSF_Attached equ 01h TSF_SuppressGcStress equ 08h TSF_DoNotTriggerGc equ 10h ;; GC type flags GC_ALLOC_FINALIZE equ 1 +GC_ALLOC_ALIGN8_BIAS equ 4 +GC_ALLOC_ALIGN8 equ 8 ;; Note: these must match the defs in PInvokeTransitionFrameFlags PTFF_SAVE_RBX equ 00000001h @@ -132,21 +138,21 @@ TrapThreadsFlags_None equ 0 TrapThreadsFlags_AbortInProgress equ 1 TrapThreadsFlags_TrapThreads equ 2 -;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -STATUS_REDHAWK_THREAD_ABORT equ 43h +;; This must match HwExceptionCode.STATUS_NATIVEAOT_THREAD_ABORT +STATUS_NATIVEAOT_THREAD_ABORT equ 43h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit +OFFSETOF__ee_alloc_context__alloc_ptr equ OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__ee_alloc_context equ OFFSETOF__Thread__m_eeAllocContext ;; ;; CONSTANTS -- SYMBOLS ;; RhDebugBreak equ @RhDebugBreak@0 -RhpGcAlloc equ @RhpGcAlloc@16 +RhpGcAlloc equ _RhpGcAlloc@16 G_LOWEST_ADDRESS equ _g_lowest_address G_HIGHEST_ADDRESS equ _g_highest_address G_EPHEMERAL_LOW equ _g_ephemeral_low @@ -157,6 +163,7 @@ RhpTrapThreads equ _RhpTrapThreads RhpStressGc equ @RhpStressGc@0 RhpGcPoll2 equ @RhpGcPoll2@4 RhHandleGet equ @RhHandleGet@4 +DivInt64Internal equ @DivInt64Internal_FCall@16 RhpGcSafeZeroMemory equ @RhpGcSafeZeroMemory@8 RhpGetNumThunkBlocksPerMapping equ @RhpGetNumThunkBlocksPerMapping@0 @@ -185,6 +192,7 @@ EXTERN RhpGcPoll2 : PROC EXTERN RhHandleGet : PROC EXTERN RhpGcSafeZeroMemory : PROC EXTERN RhpGetNumThunkBlocksPerMapping : PROC +EXTERN DivInt64Internal : PROC ifdef FEATURE_GC_STRESS EXTERN THREAD__HIJACKFORGCSTRESS : PROC diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/i386/AsmMacros_Shared.h new file mode 100644 index 000000000000..51951864f299 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros_Shared.h @@ -0,0 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +// TODO: Implement \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros_Shared.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros_Shared.inc new file mode 100644 index 000000000000..956d4d22e383 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros_Shared.inc @@ -0,0 +1,6 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +; This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +include AsmMacros.inc \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/i386/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/i386/ExceptionHandling.asm index 4e823bbbd6ad..facdc3abd304 100644 --- a/src/coreclr/nativeaot/Runtime/i386/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/i386/ExceptionHandling.asm @@ -357,7 +357,7 @@ FASTCALL_FUNC RhpCallCatchFunclet, 16 mov ebp, [ecx] ;; It was the ThreadAbortException, so rethrow it - mov ecx, STATUS_REDHAWK_THREAD_ABORT + mov ecx, STATUS_NATIVEAOT_THREAD_ABORT mov edx, [esp + esp_offsetof_ResumeIP] mov esp, eax ;; reset the SP to resume SP value jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm index fe09d2a73022..42f62c27fe40 100644 --- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm @@ -172,7 +172,7 @@ RhpWaitForGC proc HijackFixupEpilog Abort: - mov ecx, STATUS_REDHAWK_THREAD_ABORT + mov ecx, STATUS_NATIVEAOT_THREAD_ABORT pop edx pop eax ;; ecx was pushed here, but we don't care for its value pop ebp @@ -191,12 +191,9 @@ RhpGcPoll proc RhpGcPoll endp RhpGcPollRare proc - push ebp - mov ebp, esp PUSH_COOP_PINVOKE_FRAME ecx call RhpGcPoll2 POP_COOP_PINVOKE_FRAME - pop ebp ret RhpGcPollRare endp diff --git a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm b/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm deleted file mode 100644 index 86dd2807fbc8..000000000000 --- a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm +++ /dev/null @@ -1,128 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - - .586 - .model flat - option casemap:none - .code - - -include AsmMacros.inc - - -ifdef FEATURE_CACHED_INTERFACE_DISPATCH - -EXTERN RhpCidResolve : PROC -EXTERN _RhpUniversalTransition_DebugStepTailCall@0 : PROC - - -;; Macro that generates code to check a single cache entry. -CHECK_CACHE_ENTRY macro entry -NextLabel textequ @CatStr( Attempt, %entry+1 ) - cmp ebx, [eax + (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 8))] - jne @F - pop ebx - jmp dword ptr [eax + (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 8) + 4)] -@@: -endm - - -;; Macro that generates a stub consuming a cache with the given number of entries. -DEFINE_INTERFACE_DISPATCH_STUB macro entries - -StubName textequ @CatStr( _RhpInterfaceDispatch, entries, <@0> ) -StubAVLocation textequ @CatStr( _RhpInterfaceDispatchAVLocation, entries ) - - StubName proc public - - ;; Check the instance here to catch null references. We're going to touch it again below (to cache - ;; the MethodTable pointer), but that's after we've pushed ebx below, and taking an A/V there will - ;; mess up the stack trace. We also don't have a spare scratch register (eax holds the cache pointer - ;; and the push of ebx below is precisely so we can access a second register to hold the MethodTable - ;; pointer). - ALTERNATE_ENTRY StubAVLocation - cmp dword ptr [ecx], ecx - - ;; eax currently contains the indirection cell address. We need to update it to point to the cache - ;; block instead. - mov eax, [eax + OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Cache pointer is already loaded in the only scratch register we have so far, eax. We need - ;; another scratch register to hold the instance type so save the value of ebx and use that. - push ebx - - ;; Load the MethodTable from the object instance in ebx. - mov ebx, [ecx] - -CurrentEntry = 0 - while CurrentEntry lt entries - CHECK_CACHE_ENTRY %CurrentEntry -CurrentEntry = CurrentEntry + 1 - endm - - ;; eax currently contains the cache block. We need to point it back to the - ;; indirection cell using the back pointer in the cache block - mov eax, [eax + OFFSETOF__InterfaceDispatchCache__m_pCell] - pop ebx - jmp RhpInterfaceDispatchSlow - - StubName endp - - endm ;; DEFINE_INTERFACE_DISPATCH_STUB - - -;; Define all the stub routines we currently need. -DEFINE_INTERFACE_DISPATCH_STUB 1 -DEFINE_INTERFACE_DISPATCH_STUB 2 -DEFINE_INTERFACE_DISPATCH_STUB 4 -DEFINE_INTERFACE_DISPATCH_STUB 8 -DEFINE_INTERFACE_DISPATCH_STUB 16 -DEFINE_INTERFACE_DISPATCH_STUB 32 -DEFINE_INTERFACE_DISPATCH_STUB 64 - -;; Shared out of line helper used on cache misses. -RhpInterfaceDispatchSlow proc -;; eax points at InterfaceDispatchCell - - ;; Setup call to Universal Transition thunk - push ebp - mov ebp, esp - push eax ; First argument (Interface Dispatch Cell) - lea eax, [RhpCidResolve] - push eax ; Second argument (RhpCidResolve) - - ;; Jump to Universal Transition - jmp _RhpUniversalTransition_DebugStepTailCall@0 -RhpInterfaceDispatchSlow endp - -;; Stub dispatch routine for dispatch to a vtable slot -_RhpVTableOffsetDispatch@0 proc public - ;; eax currently contains the indirection cell address. We need to update it to point to the vtable offset (which is in the m_pCache field) - mov eax, [eax + OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; add the vtable offset to the MethodTable pointer - add eax, [ecx] - - ;; Load the target address of the vtable into eax - mov eax, [eax] - - ;; tail-jump to the target - jmp eax -_RhpVTableOffsetDispatch@0 endp - - -;; Initial dispatch on an interface when we don't have a cache yet. -FASTCALL_FUNC RhpInitialDynamicInterfaceDispatch, 0 -ALTERNATE_ENTRY _RhpInitialInterfaceDispatch - ;; Trigger an AV if we're dispatching on a null this. - ;; The exception handling infrastructure is aware of the fact that this is the first - ;; instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here - ;; to a NullReferenceException at the callsite. - cmp dword ptr [ecx], ecx - - jmp RhpInterfaceDispatchSlow -FASTCALL_ENDFUNC - -endif ;; FEATURE_CACHED_INTERFACE_DISPATCH - -end diff --git a/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.S deleted file mode 100644 index 876f2dfbcb80..000000000000 --- a/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.S +++ /dev/null @@ -1,4 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// TODO: Implement diff --git a/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.asm b/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.asm deleted file mode 100644 index 133081bee831..000000000000 --- a/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.asm +++ /dev/null @@ -1,307 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -;; -;; Define the helpers used to implement the write barrier required when writing an object reference into a -;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in -;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral -;; collection. -;; - - .xmm - .model flat - option casemap:none - .code - -include AsmMacros.inc - -;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used -;; during garbage collections to verify that object references where never written to the heap without using a -;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing -;; new references to the real heap. Since this can't be solved perfectly without critical sections around the -;; entire update process, we instead update the shadow location and then re-check the real location (as two -;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value -;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC -;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the -;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. -ifdef WRITE_BARRIER_CHECK - -g_GCShadow TEXTEQU -g_GCShadowEnd TEXTEQU -INVALIDGCVALUE EQU 0CCCCCCCDh - -EXTERN g_GCShadow : DWORD -EXTERN g_GCShadowEnd : DWORD - -UPDATE_GC_SHADOW macro BASENAME, DESTREG, REFREG - - ;; If g_GCShadow is 0, don't perform the check. - cmp g_GCShadow, 0 - je &BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG& - - ;; Save DESTREG since we're about to modify it (and we need the original value both within the macro and - ;; once we exit the macro). - push DESTREG - - ;; Transform DESTREG into the equivalent address in the shadow heap. - sub DESTREG, G_LOWEST_ADDRESS - jb &BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG& - add DESTREG, [g_GCShadow] - cmp DESTREG, [g_GCShadowEnd] - jae &BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG& - - ;; Update the shadow heap. - mov [DESTREG], REFREG - - ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. This - ;; read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to - ;; recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock - ;; prefix). - xchg [esp], DESTREG - cmp [DESTREG], REFREG - jne &BASENAME&_UpdateShadowHeap_Invalidate_&DESTREG&_&REFREG& - - ;; The original DESTREG value is now restored but the stack has a value (the shadow version of the - ;; location) pushed. Need to discard this push before we are done. - add esp, 4 - jmp &BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG& - -&BASENAME&_UpdateShadowHeap_Invalidate_&DESTREG&_&REFREG&: - ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't - ;; guarantee whose shadow update won. - - ;; Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an - ;; additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg - ;; variant that doesn't implicitly specify the lock prefix. - xchg [esp], DESTREG - mov dword ptr [DESTREG], INVALIDGCVALUE - -&BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG&: - ;; Restore original DESTREG value from the stack. - pop DESTREG - -&BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG&: -endm - -else ; WRITE_BARRIER_CHECK - -UPDATE_GC_SHADOW macro BASENAME, DESTREG, REFREG -endm - -endif ; WRITE_BARRIER_CHECK - -;; There are several different helpers used depending on which register holds the object reference. Since all -;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -;; name of the register that points to the location to be updated and the name of the register that holds the -;; object reference (this should be in upper case as it's used in the definition of the name of the helper). -DEFINE_WRITE_BARRIER macro DESTREG, REFREG - -;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard -;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that -;; location is in one of the other general registers determined by the value of REFREG. -FASTCALL_FUNC RhpAssignRef&REFREG&, 8 - - ;; Export the canonical write barrier under unqualified name as well - ifidni , - ALTERNATE_ENTRY RhpAssignRef - ALTERNATE_ENTRY _RhpAssignRefAVLocation - endif - - ALTERNATE_ENTRY _RhpAssignRef&REFREG&AVLocation - - ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here - ;; and the card table update we may perform below. - mov dword ptr [DESTREG], REFREG - - ;; Update the shadow copy of the heap with the same value (if enabled). - UPDATE_GC_SHADOW RhpAssignRef, DESTREG, REFREG - - ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it - ;; (since the object won't be collected or moved by an ephemeral collection). - cmp REFREG, [G_EPHEMERAL_LOW] - jb WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG& - cmp REFREG, [G_EPHEMERAL_HIGH] - jae WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG& - - ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must - ;; track this write. The location address is translated into an offset in the card table bitmap. We set - ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write - ;; the byte if it hasn't already been done since writes are expensive and impact scaling. - shr DESTREG, 10 - add DESTREG, [G_CARD_TABLE] - cmp byte ptr [DESTREG], 0FFh - jne WriteBarrier_UpdateCardTable_&DESTREG&_&REFREG& - -WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG&: - ret - -;; We get here if it's necessary to update the card table. -WriteBarrier_UpdateCardTable_&DESTREG&_&REFREG&: - mov byte ptr [DESTREG], 0FFh - ret -FASTCALL_ENDFUNC -endm - -RET4 macro - ret 4 -endm - -DEFINE_CHECKED_WRITE_BARRIER_CORE macro BASENAME, DESTREG, REFREG, RETINST - - ;; The location being updated might not even lie in the GC heap (a handle or stack location for instance), - ;; in which case no write barrier is required. - cmp DESTREG, [G_LOWEST_ADDRESS] - jb &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG& - cmp DESTREG, [G_HIGHEST_ADDRESS] - jae &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG& - - ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - ;; we're in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW BASENAME, DESTREG, REFREG - - ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it - ;; (since the object won't be collected or moved by an ephemeral collection). - cmp REFREG, [G_EPHEMERAL_LOW] - jb &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG& - cmp REFREG, [G_EPHEMERAL_HIGH] - jae &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG& - - ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must - ;; track this write. The location address is translated into an offset in the card table bitmap. We set - ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write - ;; the byte if it hasn't already been done since writes are expensive and impact scaling. - shr DESTREG, 10 - add DESTREG, [G_CARD_TABLE] - cmp byte ptr [DESTREG], 0FFh - jne &BASENAME&_UpdateCardTable_&DESTREG&_&REFREG& - -&BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&: - RETINST - -;; We get here if it's necessary to update the card table. -&BASENAME&_UpdateCardTable_&DESTREG&_&REFREG&: - mov byte ptr [DESTREG], 0FFh - RETINST - -endm - - -;; This macro is very much like the one above except that it generates a variant of the function which also -;; checks whether the destination is actually somewhere within the GC heap. -DEFINE_CHECKED_WRITE_BARRIER macro DESTREG, REFREG - -;; Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard -;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into -;; that location is in one of the other general registers determined by the value of REFREG. - -;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction -;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address -FASTCALL_FUNC RhpCheckedAssignRef&REFREG&, 8 - - ;; Export the canonical write barrier under unqualified name as well - ifidni , - ALTERNATE_ENTRY RhpCheckedAssignRef - ALTERNATE_ENTRY _RhpCheckedAssignRefAVLocation - endif - - ALTERNATE_ENTRY _RhpCheckedAssignRef&REFREG&AVLocation - - ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here - ;; and the card table update we may perform below. - mov dword ptr [DESTREG], REFREG - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, DESTREG, REFREG, ret - -FASTCALL_ENDFUNC - -endm - -;; One day we might have write barriers for all the possible argument registers but for now we have -;; just one write barrier that assumes the input register is EDX. -DEFINE_CHECKED_WRITE_BARRIER ECX, EDX -DEFINE_WRITE_BARRIER ECX, EDX - -;; Need some more write barriers to run CLR compiled MDIL on Redhawk - commented out for now -DEFINE_WRITE_BARRIER EDX, EAX -DEFINE_WRITE_BARRIER EDX, ECX -DEFINE_WRITE_BARRIER EDX, EBX -DEFINE_WRITE_BARRIER EDX, ESI -DEFINE_WRITE_BARRIER EDX, EDI -DEFINE_WRITE_BARRIER EDX, EBP - -DEFINE_CHECKED_WRITE_BARRIER EDX, EAX -DEFINE_CHECKED_WRITE_BARRIER EDX, ECX -DEFINE_CHECKED_WRITE_BARRIER EDX, EBX -DEFINE_CHECKED_WRITE_BARRIER EDX, ESI -DEFINE_CHECKED_WRITE_BARRIER EDX, EDI -DEFINE_CHECKED_WRITE_BARRIER EDX, EBP - -FASTCALL_FUNC RhpCheckedLockCmpXchg, 12 - mov eax, [esp+4] - lock cmpxchg [ecx], edx - jne RhpCheckedLockCmpXchg_NoBarrierRequired_ECX_EDX - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, ECX, EDX, ret 4 - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpCheckedXchg, 8 - - ;; Setup eax with the new object for the exchange, that way it will automatically hold the correct result - ;; afterwards and we can leave edx unaltered ready for the GC write barrier below. - mov eax, edx - xchg [ecx], eax - - DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, ECX, EDX, ret - -FASTCALL_ENDFUNC - -;; -;; RhpByRefAssignRef simulates movs instruction for object references. -;; -;; On entry: -;; edi: address of ref-field (assigned to) -;; esi: address of the data (source) -;; -;; On exit: -;; edi, esi are incremented by 4, -;; ecx: trashed -;; -FASTCALL_FUNC RhpByRefAssignRef, 8 -ALTERNATE_ENTRY _RhpByRefAssignRefAVLocation1 - mov ecx, [esi] -ALTERNATE_ENTRY _RhpByRefAssignRefAVLocation2 - mov [edi], ecx - - ;; Check whether the writes were even into the heap. If not there's no card update required. - cmp edi, [G_LOWEST_ADDRESS] - jb RhpByRefAssignRef_NoBarrierRequired - cmp edi, [G_HIGHEST_ADDRESS] - jae RhpByRefAssignRef_NoBarrierRequired - - UPDATE_GC_SHADOW BASENAME, ecx, edi - - ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it - ;; (since the object won't be collected or moved by an ephemeral collection). - cmp ecx, [G_EPHEMERAL_LOW] - jb RhpByRefAssignRef_NoBarrierRequired - cmp ecx, [G_EPHEMERAL_HIGH] - jae RhpByRefAssignRef_NoBarrierRequired - - mov ecx, edi - shr ecx, 10 - add ecx, [G_CARD_TABLE] - cmp byte ptr [ecx], 0FFh - je RhpByRefAssignRef_NoBarrierRequired - - mov byte ptr [ecx], 0FFh - -RhpByRefAssignRef_NoBarrierRequired: - ;; Increment the pointers before leaving - add esi,4 - add edi,4 - ret -FASTCALL_ENDFUNC - - end diff --git a/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h b/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h index 86c7e10e9211..6fc5bafb956b 100644 --- a/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h +++ b/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h @@ -10,6 +10,12 @@ #include #include +#ifdef HOST_WINDOWS +#include +#endif // HOST_WINDOWS + +#include + // Implement pure virtual for Unix (for -p:LinkStandardCPlusPlusLibrary=false the default), // to avoid linker requiring __cxa_pure_virtual. #ifdef TARGET_WINDOWS @@ -29,7 +35,7 @@ using std::intptr_t; typedef wchar_t WCHAR; #define W(str) L##str #else -typedef char16_t WCHAR; +typedef char16_t WCHAR; #define W(str) u##str #endif typedef void * HANDLE; @@ -38,28 +44,22 @@ typedef uint32_t UInt32_BOOL; // windows 4-byte BOOL, 0 -> false, #define UInt32_FALSE 0 #define UInt32_TRUE 1 +<<<<<<< HEAD #if (defined(FEATURE_EVENT_TRACE) && !defined(_INC_WINDOWS)) || defined(TARGET_WASM) +======= +#if defined(FEATURE_EVENT_TRACE) && defined(TARGET_UNIX) +>>>>>>> upstream-jun typedef int BOOL; typedef void* LPVOID; typedef uint32_t UINT; typedef void* PVOID; typedef uint64_t ULONGLONG; typedef uintptr_t ULONG_PTR; -#ifdef _MSC_VER -typedef unsigned long ULONG; -#else typedef uint32_t ULONG; -#endif typedef int64_t LONGLONG; typedef uint8_t BYTE; typedef uint16_t UINT16; -typedef struct _GUID { - uint32_t Data1; - uint16_t Data2; - uint16_t Data3; - uint8_t Data4[8]; -} GUID; -#endif // FEATURE_EVENT_TRACE && !_INC_WINDOWS +#endif // FEATURE_EVENT_TRACE && TARGET_UNIX // Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller. // Thus they cannot have any parameters or return anything. diff --git a/src/coreclr/nativeaot/Runtime/inc/MethodTable.h b/src/coreclr/nativeaot/Runtime/inc/MethodTable.h index ffc32334dc0d..1bd480dc0527 100644 --- a/src/coreclr/nativeaot/Runtime/inc/MethodTable.h +++ b/src/coreclr/nativeaot/Runtime/inc/MethodTable.h @@ -13,7 +13,7 @@ class TypeManager; struct TypeManagerHandle; //------------------------------------------------------------------------------------------------- -// The subset of TypeFlags that Redhawk knows about at runtime +// The subset of TypeFlags that NativeAOT knows about at runtime // This should match the TypeFlags enum in the managed type system. enum EETypeElementType : uint8_t { diff --git a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h index 305901d7626a..2b7464aa4ea9 100644 --- a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h +++ b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h @@ -11,7 +11,7 @@ struct ReadyToRunHeaderConstants { static const uint32_t Signature = 0x00525452; // 'RTR' - static const uint32_t CurrentMajorVersion = 12; + static const uint32_t CurrentMajorVersion = 14; static const uint32_t CurrentMinorVersion = 0; }; diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 4451b9225d5f..09bd873adf58 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -28,6 +28,11 @@ struct DispatchCellInfo uint8_t HasCache = 0; uint32_t MetadataToken = 0; uint32_t VTableOffset = 0; + + uint32_t GetVTableOffset() const + { + return VTableOffset; + } }; struct InterfaceDispatchCacheHeader @@ -336,8 +341,6 @@ enum PInvokeTransitionFrameFlags : uint64_t #elif defined(TARGET_LOONGARCH64) enum PInvokeTransitionFrameFlags : uint64_t { - // NOTE: Keep in sync with src\coreclr\nativeaot\Runtime\loongarch64\AsmMacros.h - // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp diff --git a/src/coreclr/nativeaot/Runtime/inc/stressLog.h b/src/coreclr/nativeaot/Runtime/inc/stressLog.h index fc031897f492..952789228023 100644 --- a/src/coreclr/nativeaot/Runtime/inc/stressLog.h +++ b/src/coreclr/nativeaot/Runtime/inc/stressLog.h @@ -289,7 +289,7 @@ class StressLog { UNREFERENCED_PARAMETER(level); return FALSE; #else - // In Redhawk we have rationalized facility codes and have much + // In NativeAOT, we have rationalized facility codes and have much // fewer compared to desktop, as such we'll log all facilities and // limit the filtering to the log level... return @@ -443,8 +443,7 @@ struct StressMsg static const size_t maxArgCnt = 63; static const int64_t maxOffset = (int64_t)1 << (formatOffsetLowBits + formatOffsetHighBits); - static size_t maxMsgSize () - { return sizeof(StressMsg) + maxArgCnt*sizeof(void*); } + static constexpr size_t maxMsgSize = sizeof(uint64_t) * 2 + maxArgCnt * sizeof(void*); friend void PopulateDebugHeaders(); }; @@ -623,7 +622,7 @@ inline StressMsg* ThreadStressLog::AdvReadPastBoundary() { } curReadChunk = curReadChunk->next; void** p = (void**)curReadChunk->StartPtr(); - while (*p == NULL && (size_t)(p-(void**)curReadChunk->StartPtr ()) < (StressMsg::maxMsgSize()/sizeof(void*))) + while (*p == NULL && (size_t)(p-(void**)curReadChunk->StartPtr ()) < (StressMsg::maxMsgSize/sizeof(void*))) { ++p; } diff --git a/src/coreclr/nativeaot/Runtime/inc/varint.h b/src/coreclr/nativeaot/Runtime/inc/varint.h deleted file mode 100644 index e5d0853bf48f..000000000000 --- a/src/coreclr/nativeaot/Runtime/inc/varint.h +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -class VarInt -{ -public: - static uint32_t ReadUnsigned(PTR_uint8_t & pbEncoding) - { - uintptr_t lengthBits = *pbEncoding & 0x0F; - size_t negLength = s_negLengthTab[lengthBits]; - uintptr_t shift = s_shiftTab[lengthBits]; - uint32_t result = *(PTR_uint32_t)(pbEncoding - negLength - 4); - - result >>= shift; - pbEncoding -= negLength; - - return result; - } - -private: - static int8_t s_negLengthTab[16]; - static uint8_t s_shiftTab[16]; -}; - -#ifndef __GNUC__ -__declspec(selectany) -#endif -int8_t -#ifdef __GNUC__ -__attribute__((weak)) -#endif -VarInt::s_negLengthTab[16] = -{ - -1, // 0 - -2, // 1 - -1, // 2 - -3, // 3 - - -1, // 4 - -2, // 5 - -1, // 6 - -4, // 7 - - -1, // 8 - -2, // 9 - -1, // 10 - -3, // 11 - - -1, // 12 - -2, // 13 - -1, // 14 - -5, // 15 -}; - -#ifndef __GNUC__ -__declspec(selectany) -#endif -uint8_t -#ifdef __GNUC__ -__attribute__((weak)) -#endif -VarInt::s_shiftTab[16] = -{ - 32-7*1, // 0 - 32-7*2, // 1 - 32-7*1, // 2 - 32-7*3, // 3 - - 32-7*1, // 4 - 32-7*2, // 5 - 32-7*1, // 6 - 32-7*4, // 7 - - 32-7*1, // 8 - 32-7*2, // 9 - 32-7*1, // 10 - 32-7*3, // 11 - - 32-7*1, // 12 - 32-7*2, // 13 - 32-7*1, // 14 - 0, // 15 -}; diff --git a/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp b/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp index 5a1dfe10f96b..954e46cd3e18 100644 --- a/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp +++ b/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp @@ -5,8 +5,8 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" #include "holder.h" @@ -18,7 +18,6 @@ #include "MethodTable.h" #include "ObjectLayout.h" #include "event.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S deleted file mode 100644 index 5883ef8384e1..000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S +++ /dev/null @@ -1,265 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -// GC type flags -#define GC_ALLOC_FINALIZE 1 - -// -// Rename fields of nested structs -// -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) - - - -// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -// allocation context then automatically fallback to the slow allocation path. -// $a0 == MethodTable - LEAF_ENTRY RhpNewFast, _TEXT - - // a1 = GetThread() - INLINE_GETTHREAD $a1 - - // - // a0 contains MethodTable pointer - // - ld.w $a2, $a0, OFFSETOF__MethodTable__m_uBaseSize - - // - // a0: MethodTable pointer - // a1: Thread pointer - // a2: base size - // - - // Load potential new object address into t3. - ld.d $t3, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t4, $a1, OFFSETOF__Thread__m_eeAllocContext__combined_limit - bltu $t4, $a2, LOCAL_LABEL(RhpNewFast_RarePath) - - // Update the alloc pointer to account for the allocation. - st.d $a2, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Set the new objects MethodTable pointer - st.d $a0, $t3, OFFSETOF__Object__m_pEEType - - ori $a0, $t3, 0 - jirl $r0, $ra, 0 - -LOCAL_LABEL(RhpNewFast_RarePath): - ori $a1, $zero, 0 - b RhpNewObject - LEAF_END RhpNewFast, _TEXT - -// Allocate non-array object with finalizer. -// a0 == MethodTable - LEAF_ENTRY RhpNewFinalizable, _TEXT - ori $a1, $zero, GC_ALLOC_FINALIZE - b RhpNewObject - LEAF_END RhpNewFinalizable, _TEXT - -// Allocate non-array object. -// a0 == MethodTable -// a1 == alloc flags - NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - - PUSH_COOP_PINVOKE_FRAME $a3 - - // a3: transition frame - - // Preserve the MethodTable in s0 - ori $s0, $a0, 0 - - ori $a2, $zero, 0 // numElements - - // Call the rest of the allocation helper. - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - bl C_FUNC(RhpGcAlloc) - - // Set the new objects MethodTable pointer on success. - beq $a0, $zero, LOCAL_LABEL(NewOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state -LOCAL_LABEL(NewOutOfMemory): - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - ori $a0, $s0, 0 // MethodTable pointer - ori $a1, $zero, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - b C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewObject, _TEXT - -// Allocate a string. -// a0 == MethodTable -// a1 == element/character count - LEAF_ENTRY RhNewString, _TEXT - // Make sure computing the overall allocation size wont overflow - lu12i.w $a2, ((MAX_STRING_LENGTH >> 12) & 0xFFFFF) - ori $a2, $a2, (MAX_STRING_LENGTH & 0xFFF) - bltu $a2, $a1, LOCAL_LABEL(StringSizeOverflow) - - // Compute overall allocation size (align(base size + (element size * elements), 8)). - ori $a2, $zero, STRING_COMPONENT_SIZE - mulw.d.w $a2, $a1, $a2 // a2 = (a1[31:0] * a2[31:0])[64:0] - addi.d $a2, $a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 - bstrins.d $a2, $r0, 2, 0 // clear the bits[2:0] of $a2 - - // a0 == MethodTable - // a1 == element count - // a2 == string size - - INLINE_GETTHREAD $a3 - - // Load potential new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit - bltu $t3, $a2, LOCAL_LABEL(RhNewString_Rare) - - // Reload new object address into r12. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Update the alloc pointer to account for the allocation. - st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Set the new objects MethodTable pointer and element count. - st.d $a0, $t3, OFFSETOF__Object__m_pEEType - st.d $a1, $t3, OFFSETOF__Array__m_Length - - // Return the object allocated in a0. - ori $a0, $t3, 0 - - jirl $r0, $ra, 0 - -LOCAL_LABEL(StringSizeOverflow): - // We get here if the length of the final string object can not be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an OOM exception that the caller of this allocator understands. - - // a0 holds MethodTable pointer already - ori $a1, $zero, 1 // Indicate that we should throw OverflowException - b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhNewString_Rare): - b C_FUNC(RhpNewArrayRare) - LEAF_END RhNewString, _Text - -// Allocate one dimensional, zero based array (SZARRAY). -// $a0 == MethodTable -// $a1 == element count - LEAF_ENTRY RhpNewArray, _Text - - // We want to limit the element count to the non-negative 32-bit int range. - // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component - // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst - // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. - lu12i.w $a2, 0x7ffff - ori $a2, $a2, 0xfff - bltu $a2, $a1, LOCAL_LABEL(ArraySizeOverflow) - - ld.h $a2, $a0, OFFSETOF__MethodTable__m_usComponentSize - mulw.d.w $a2, $a1, $a2 - ld.w $a3, $a0, OFFSETOF__MethodTable__m_uBaseSize - add.d $a2, $a2, $a3 - addi.d $a2, $a2, 7 - bstrins.d $a2, $r0, 2, 0 - // a0 == MethodTable - // a1 == element count - // a2 == array size - - INLINE_GETTHREAD $a3 - - // Load potential new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit - bltu $t3, $a2, LOCAL_LABEL(RhpNewArray_Rare) - - // Reload new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Update the alloc pointer to account for the allocation. - st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Set the new objects MethodTable pointer and element count. - st.d $a0, $t3, OFFSETOF__Object__m_pEEType - st.d $a1, $t3, OFFSETOF__Array__m_Length - - // Return the object allocated in r0. - ori $a0, $t3, 0 - - jirl $r0, $ra, 0 - -LOCAL_LABEL(ArraySizeOverflow): - // We get here if the size of the final array object can not be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. - - // $a0 holds MethodTable pointer already - ori $a1, $zero, 1 // Indicate that we should throw OverflowException - b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhpNewArray_Rare): - b C_FUNC(RhpNewArrayRare) - LEAF_END RhpNewArray, _TEXT - -// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. -// a0 == MethodTable -// a1 == element count -// a2 == array size + Thread::m_alloc_context::alloc_ptr -// a3 == Thread - NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from a2. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - sub.d $a2, $a2, $t3 - - PUSH_COOP_PINVOKE_FRAME $a3 - - // Preserve data we will need later into the callee saved registers - ori $s0, $a0, 0 // Preserve MethodTable - - ori $a2, $a1, 0 // numElements - ori $a1, $zero, 0 // uFlags - - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - bl C_FUNC(RhpGcAlloc) - - // Set the new objects MethodTable pointer and length on success. - beq $a0, $zero, LOCAL_LABEL(ArrayOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state -LOCAL_LABEL(ArrayOutOfMemory): - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - ori $a0, $s0, 0 // MethodTable Pointer - ori $a1, $zero, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - b C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/loongarch64/AsmMacros_Shared.h new file mode 100644 index 000000000000..f7df01464273 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AsmMacros_Shared.h @@ -0,0 +1,7 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include +#include "AsmOffsets.inc" diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h index 0f48b5f227f8..3f896ced7124 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h @@ -13,7 +13,7 @@ PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) PLAT_ASM_OFFSET(10, ExInfo, m_exception) PLAT_ASM_OFFSET(18, ExInfo, m_kind) PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) -PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(1C, ExInfo, m_idxCurClause) PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) PLAT_ASM_OFFSET(268, ExInfo, m_notifyDebuggerSP) @@ -51,16 +51,16 @@ PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, IP) PLAT_ASM_SIZEOF(148, REGDISPLAY) PLAT_ASM_OFFSET(18, REGDISPLAY, SP) -PLAT_ASM_OFFSET(b8, REGDISPLAY, pR23) -PLAT_ASM_OFFSET(c0, REGDISPLAY, pR24) -PLAT_ASM_OFFSET(c8, REGDISPLAY, pR25) -PLAT_ASM_OFFSET(d0, REGDISPLAY, pR26) -PLAT_ASM_OFFSET(d8, REGDISPLAY, pR27) -PLAT_ASM_OFFSET(e0, REGDISPLAY, pR28) -PLAT_ASM_OFFSET(e8, REGDISPLAY, pR29) -PLAT_ASM_OFFSET(f0, REGDISPLAY, pR30) -PLAT_ASM_OFFSET(f8, REGDISPLAY, pR31) +PLAT_ASM_OFFSET(B8, REGDISPLAY, pR23) +PLAT_ASM_OFFSET(C0, REGDISPLAY, pR24) +PLAT_ASM_OFFSET(C8, REGDISPLAY, pR25) +PLAT_ASM_OFFSET(D0, REGDISPLAY, pR26) +PLAT_ASM_OFFSET(D8, REGDISPLAY, pR27) +PLAT_ASM_OFFSET(E0, REGDISPLAY, pR28) +PLAT_ASM_OFFSET(E8, REGDISPLAY, pR29) +PLAT_ASM_OFFSET(F0, REGDISPLAY, pR30) +PLAT_ASM_OFFSET(F8, REGDISPLAY, pR31) PLAT_ASM_OFFSET(10, REGDISPLAY, pR2) -PLAT_ASM_OFFSET(b0, REGDISPLAY, pFP) +PLAT_ASM_OFFSET(B0, REGDISPLAY, pFP) PLAT_ASM_OFFSET(8, REGDISPLAY, pRA) PLAT_ASM_OFFSET(108, REGDISPLAY, F) diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S index e6a226d8bf53..a285ca1f208c 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S @@ -18,14 +18,15 @@ ori $a3, $sp, 0 // Setup a PAL_LIMITED_CONTEXT on the stack { + // Total stack: 0xC0 (0x68+0x58) .if \exceptionType == HARDWARE_EXCEPTION - addi.d $sp, $sp, -0x50 - .cfi_adjust_cfa_offset 0x50 + addi.d $sp, $sp, -0x58 + .cfi_adjust_cfa_offset 0x58 st.d $a3, $sp, 0 // a3 is the SP and a1 is the IP of the fault site st.d $a1, $sp, 8 .else - PROLOG_STACK_ALLOC 0x50 - .cfi_adjust_cfa_offset 0x50 + PROLOG_STACK_ALLOC 0x58 + .cfi_adjust_cfa_offset 0x58 st.d $a3, $sp, 0 // a3 is the SP and ra is the IP of the fault site st.d $ra, $sp, 8 .endif @@ -37,6 +38,8 @@ fst.d $f29, $sp, 0x38 fst.d $f30, $sp, 0x40 fst.d $f31, $sp, 0x48 + // Slot at $sp+0x50 is alignment padding + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 0x68 st.d $zero, $sp, 0x10 // locations reserved for return value, not used for exception handling st.d $zero, $sp, 0x18 @@ -61,7 +64,7 @@ // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body // of method. However, this method needs to be able to change fp before calling funclet. // This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, 0x58 + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, 0x60 PROLOG_SAVE_REG_PAIR 23, 24, 0x10 PROLOG_SAVE_REG_PAIR 25, 26, 0x20 PROLOG_SAVE_REG_PAIR 27, 28, 0x30 @@ -92,7 +95,7 @@ EPILOG_RESTORE_REG_PAIR 27, 28, 0x30 EPILOG_RESTORE_REG_PAIR 29, 30, 0x40 EPILOG_RESTORE_REG 31, 0x50 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x58 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x60 .endm @@ -160,7 +163,7 @@ ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP st.d $fp, $t3, 0 // - // store vfp preserved regs + // store fp preserved regs // addi.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F fst.d $f24, $t3, 0x00 @@ -288,7 +291,7 @@ // where the tail-calling thread had saved RA, which may not match where we have saved RA. ld.d $a1, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress - beq $a1, $zero, LOCAL_LABEL(NotHijacked) + beqz $a1, LOCAL_LABEL(NotHijacked) ld.d $a3, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation @@ -504,7 +507,7 @@ LOCAL_LABEL(NotHijacked): LOCAL_LABEL(PopExInfoLoop): ld.d $a3, $a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo - beq $a3, $zero, LOCAL_LABEL(DonePopping) // if (pExInfo == null) { we're done } + beqz $a3, LOCAL_LABEL(DonePopping) // if (pExInfo == null) { we're done } blt $a3, $a2, LOCAL_LABEL(PopExInfoLoop) // if (pExInfo < resume SP} { keep going } LOCAL_LABEL(DonePopping): @@ -513,15 +516,15 @@ LOCAL_LABEL(DonePopping): PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a3 bstrpick.d $t7, $a3, TrapThreadsFlags_AbortInProgress_Bit, TrapThreadsFlags_AbortInProgress_Bit - beq $t7, $zero, LOCAL_LABEL(NoAbort) + beqz $t7, LOCAL_LABEL(NoAbort) ld.d $a3, $sp, rsp_offset_is_not_handling_thread_abort - bne $a3, $zero, LOCAL_LABEL(NoAbort) + bnez $a3, LOCAL_LABEL(NoAbort) // It was the ThreadAbortException, so rethrow it // reset SP ori $a1, $a0, 0 // a1 <- continuation address as exception PC - addi.w $a0, $zero, STATUS_REDHAWK_THREAD_ABORT + addi.w $a0, $zero, STATUS_NATIVEAOT_THREAD_ABORT ori $sp, $a2, 0 b C_FUNC(RhpThrowHwEx) @@ -773,7 +776,7 @@ LOCAL_LABEL(NoAbort): LOCAL_LABEL(Propagate_PopExInfoLoop): ld.d $a3, $a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo - beq $a3, $zero, LOCAL_LABEL(Propagate_DonePopping) // if (pExInfo == null) { we're done } + beqz $a3, LOCAL_LABEL(Propagate_DonePopping) // if (pExInfo == null) { we're done } blt $a3, $a2, LOCAL_LABEL(Propagate_PopExInfoLoop) // if (pExInfo < resume SP} { keep going } LOCAL_LABEL(Propagate_DonePopping): diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S index 1dac3afa2904..74e71fab8a36 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S @@ -4,12 +4,11 @@ #include #include "AsmOffsets.inc" -#define PROBE_FRAME_SIZE 0xC8 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + - // 9 * 8 for callee saved registers + +#define PROBE_FRAME_SIZE 0x90 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + + // 9 * 8 for callee saved registers + // 1 * 8 for caller SP + // 2 * 8 for int returns + - // 1 * 8 for alignment padding + - // 4 * 16 for FP returns + // 2 * 8 for FP returns // See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers // and accepts the register bitmask @@ -42,13 +41,9 @@ st.d $a0, $sp, 0x70 st.d $a1, $sp, 0x78 - // Slot at $sp+0x80 is alignment padding - // Save the FP return registers - fst.d $f0, $sp, 0x88 - fst.d $f1, $sp, 0x90 - fst.d $f2, $sp, 0x98 - fst.d $f3, $sp, 0xA0 + fst.d $f0, $sp, 0x80 + fst.d $f1, $sp, 0x88 // Perform the rest of the PInvokeTransitionFrame initialization. st.d \threadReg, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread // Thread * (unused by stackwalker) @@ -58,8 +53,7 @@ st.d \trashReg, $sp, 0x68 // save caller's SP // link the frame into the Thread - ori \trashReg, $sp, 0 - st.d \trashReg, \threadReg, OFFSETOF__Thread__m_pDeferredTransitionFrame + st.d $sp, \threadReg, OFFSETOF__Thread__m_pDeferredTransitionFrame .endm // @@ -74,10 +68,8 @@ ld.d $a1, $sp, 0x78 // Restore the FP return registers - fld.d $f0, $sp, 0x88 - fld.d $f1, $sp, 0x90 - fld.d $f2, $sp, 0x98 - fld.d $f3, $sp, 0xA0 + fld.d $f0, $sp, 0x80 + fld.d $f1, $sp, 0x88 // Restore callee saved registers EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 @@ -126,12 +118,11 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a3 bstrpick.d $t8, $a3, TrapThreadsFlags_TrapThreads_Bit, TrapThreadsFlags_TrapThreads_Bit - bne $t8, $zero, LOCAL_LABEL(WaitForGC) + bnez $t8, LOCAL_LABEL(WaitForGC) jirl $r0, $ra, 0 LOCAL_LABEL(WaitForGC): - lu12i.w $t3, ((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + PTFF_THREAD_HIJACK_HI) >> 12) & 0xfffff - ori $t3, $t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + PTFF_THREAD_HIJACK_HI) & 0xfff + li.d $t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + (PTFF_THREAD_HIJACK_HI << 32)) b C_FUNC(RhpWaitForGC) NESTED_END RhpGcProbeHijack @@ -145,7 +136,7 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler ld.d $a2,$sp, OFFSETOF__PInvokeTransitionFrame__m_Flags bstrpick.d $t8, $a2, PTFF_THREAD_ABORT_BIT, PTFF_THREAD_ABORT_BIT - bne $t8, $zero, LOCAL_LABEL(ThrowThreadAbort) + bnez $t8, LOCAL_LABEL(ThrowThreadAbort) .cfi_remember_state POP_PROBE_FRAME @@ -154,7 +145,7 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler .cfi_restore_state LOCAL_LABEL(ThrowThreadAbort): POP_PROBE_FRAME - addi.w $a0, $zero, STATUS_REDHAWK_THREAD_ABORT + addi.w $a0, $zero, STATUS_NATIVEAOT_THREAD_ABORT ori $a1, $ra, 0 // return address as exception PC b RhpThrowHwEx NESTED_END RhpWaitForGC @@ -163,7 +154,7 @@ NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a0 - bne $a0, $zero, C_FUNC(RhpGcPollRare) + bnez $a0, C_FUNC(RhpGcPollRare) jirl $r0, $ra, 0 LEAF_END RhpGcPoll diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S index b35352f341f3..5d791e7bb4a6 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S @@ -12,19 +12,19 @@ // // RhCommonStub // - // INPUT: tp: thunk's data block + // INPUT: t7: thunk's data block // - // TRASHES: t0, t1, tp + // TRASHES: t0, t1, t7 // LEAF_ENTRY RhCommonStub, _TEXT // There are arbitrary callers passing arguments with arbitrary signatures. // Custom calling convention: - // tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + // t7 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) INLINE_GET_TLS_VAR $t0, C_FUNC(tls_thunkData) // t0 = base address of TLS data - // tp = address of context cell in thunk's data + // t7 = address of context cell in thunk's data // store thunk address in thread static ld.d $t1, $t7, 0 diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S index 0ad422b0a486..e9db850efc48 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S @@ -38,9 +38,9 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT ld.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_pThread st.d $zero, $t0, OFFSETOF__Thread__m_pTransitionFrame - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a5 + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $t0 - bne $t0, $zero, 0f // TrapThreadsFlags_None = 0 + bnez $t0, 0f // TrapThreadsFlags_None = 0 jirl $r0, $ra, 0 0: // passing transition frame pointer in x0 diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S deleted file mode 100644 index 138992ef1a32..000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH - - .extern RhpCidResolve - .extern RhpUniversalTransition_DebugStepTailCall - - // Macro that generates code to check a single cache entry. - .macro CHECK_CACHE_ENTRY entry - // Check a single entry in the cache. - // t0 : Cache data structure. Also used for target address jump. - // t1 : Instance MethodTable* - // t2 : Indirection cell address, preserved - // t3 : Trashed - ld.d $t3, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16)) - bne $t1, $t3, 0f - ld.d $t0, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8) - jirl $r0, $t0, 0 -0: - .endm - -// -// Macro that generates a stub consuming a cache with the given number of entries. -// - .macro DEFINE_INTERFACE_DISPATCH_STUB entries - - NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler - - // t2 holds the indirection cell address. Load the cache pointer. - ld.d $t0, $t8, OFFSETOF__InterfaceDispatchCell__m_pCache - - // Load the MethodTable from the object instance in a0. - ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries - ld.d $t1, $a0, 0 - - .global CurrentEntry - .set CurrentEntry, 0 - - .rept \entries - CHECK_CACHE_ENTRY CurrentEntry - .set CurrentEntry, CurrentEntry + 1 - .endr - - // t2 still contains the indirection cell address. - b C_FUNC(RhpInterfaceDispatchSlow) - - NESTED_END "RhpInterfaceDispatch\entries", _TEXT - - .endm - -// -// Define all the stub routines we currently need. -// -// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the -// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens -// during the interface dispatch. -// - DEFINE_INTERFACE_DISPATCH_STUB 1 - DEFINE_INTERFACE_DISPATCH_STUB 2 - DEFINE_INTERFACE_DISPATCH_STUB 4 - DEFINE_INTERFACE_DISPATCH_STUB 8 - DEFINE_INTERFACE_DISPATCH_STUB 16 - DEFINE_INTERFACE_DISPATCH_STUB 32 - DEFINE_INTERFACE_DISPATCH_STUB 64 - -// -// Initial dispatch on an interface when we don't have a cache yet. -// - LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT - ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch - // Trigger an AV if we're dispatching on a null this. - // The exception handling infrastructure is aware of the fact that this is the first - // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here - // to a NullReferenceException at the callsite. - ld.d $zero, $a0, 0 - - // Just tail call to the cache miss helper. - b C_FUNC(RhpInterfaceDispatchSlow) - LEAF_END RhpInitialInterfaceDispatch, _TEXT - -// -// Stub dispatch routine for dispatch to a vtable slot -// - LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // t2 contains the interface dispatch cell address. - // load t3 to point to the vtable offset (which is stored in the m_pCache field). - ld.d $t3, $t2, OFFSETOF__InterfaceDispatchCell__m_pCache - - // Load the MethodTable from the object instance in a0, and add it to the vtable offset - // to get the address in the vtable of what we want to dereference - ld.d $t4, $a0, 0 - add.d $t3, $t3, $t4 - - // Load the target address of the vtable into t3 - ld.d $t3, $t3, 0 - - jirl $r0, $t3, 0 - LEAF_END RhpVTableOffsetDispatch, _TEXT - -// -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution. -// - LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // t2 contains the interface dispatch cell address. - // Calling convention of the universal thunk is: - // t7: target address for the thunk to call - // t8: parameter of the thunk's target - PREPARE_EXTERNAL_VAR RhpCidResolve, $t7 - b C_FUNC(RhpUniversalTransition_DebugStepTailCall) - LEAF_END RhpInterfaceDispatchSlow, _TEXT - -#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S index 13a48cb256b4..c9e408648525 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S @@ -16,11 +16,11 @@ #define INTEGER_REGISTER_SIZE (8) #define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) -// Largest return block is 4 doubles -#define RETURN_BLOCK_SIZE (32) +// Largest return block is 2 doubles +#define RETURN_BLOCK_SIZE (16) #define COUNT_FLOAT_ARG_REGISTERS (8) -#define FLOAT_REGISTER_SIZE (16) +#define FLOAT_REGISTER_SIZE (8) #define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) #define PUSHED_RA_SIZE (8) @@ -59,15 +59,15 @@ // // Frame layout is: // -// {StackPassedArgs} ChildSP+0F0 CallerSP+000 -// {IntArgRegs (a0-a7) (0x40 bytes)} ChildSP+0B0 CallerSP-040 -// {ReturnBlock (0x20 bytes)} ChildSP+090 CallerSP-060 +// {StackPassedArgs} ChildSP+0A0 CallerSP+000 +// {IntArgRegs (a0-a7) (0x40 bytes)} ChildSP+060 CallerSP-040 +// {ReturnBlock (0x10 bytes)} ChildSP+050 CallerSP-050 // -- The base address of the Return block is the TransitionBlock pointer, the floating point args are // in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact // layout of all pieces of the frame that lie at or above the pushed floating point registers. -// {FpArgRegs (f0-f7) (0x80 bytes)} ChildSP+010 CallerSP-0E0 -// {PushedRA} ChildSP+008 CallerSP-0E8 -// {PushedFP} ChildSP+000 CallerSP-0F0 +// {FpArgRegs (f0-f7) (0x40 bytes)} ChildSP+010 CallerSP-090 +// {PushedRA} ChildSP+008 CallerSP-098 +// {PushedFP} ChildSP+000 CallerSP-0A0 // // NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure // must be updated as well. diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S deleted file mode 100644 index b438746e81cb..000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S +++ /dev/null @@ -1,355 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used -// during garbage collections to verify that object references where never written to the heap without using a -// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing -// new references to the real heap. Since this can not be solved perfectly without critical sections around the -// entire update process, we instead update the shadow location and then re-check the real location (as two -// ordered operations) and if there is a disparity we will re-write the shadow location with a special value -// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC -// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the -// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. -#ifdef WRITE_BARRIER_CHECK - - .global $g_GCShadow - .global $g_GCShadowEnd - - // On entry: - // $destReg: location to be updated - // $refReg: objectref to be stored - // - // On exit: - // t3,t4: trashed - // other registers are preserved - // - .macro UPDATE_GC_SHADOW destReg, refReg - - // If g_GCShadow is 0, don't perform the check. - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3 - beq $t3, $zero, 1f - ori $t4, $t3, 0 - - // Save destReg since we're about to modify it (and we need the original value both within the macro and - // once we exit the macro). - ori $t4, \destReg, 0 - - // Transform destReg into the equivalent address in the shadow heap. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 - sub.d \destReg, \destReg, $t3 - bltu $t4, $zero, 0f - - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3 - add.d \destReg, \destReg, $t3 - - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, $t3 - bgeu \destReg, $t3, 0f - - // Update the shadow heap. - st.d \refReg, \destReg, 0 - - // The following read must be strongly ordered wrt to the write we have just performed in order to - // prevent race conditions. - dbar 0 - - // Now check that the real heap location still contains the value we just wrote into the shadow heap. - ori $t3, $t4, 0 - ld.d $t3, $t3, 0 - beq $t3, \refReg, 0f - - // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we can not - // guarantee whose shadow update won. - lu12i.w $t3, ((INVALIDGCVALUE >> 12) & 0xFFFFF) - ori $t3, $t3, (INVALIDGCVALUE & 0xFFF) - st.d $t3, \destReg, 0 - -0: - // Restore original destReg value - ori \destReg, $t4, 0 - -1: - .endm - -#else // WRITE_BARRIER_CHECK - - .macro UPDATE_GC_SHADOW destReg, refReg - .endm - -#endif // WRITE_BARRIER_CHECK - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -// name of the register that points to the location to be updated and the name of the register that holds the -// object reference (this should be in upper case as it is used in the definition of the name of the helper). - -// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for -// some interlocked helpers that need an inline barrier. - - // On entry: - // destReg: location to be updated (cannot be t3,t4) - // refReg: objectref to be stored (cannot be t3,t4) - // - // On exit: - // t3,t4: trashed - // - .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg - - // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - // we are in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW \destReg, \refReg - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - // Update the write watch table if necessary - PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, $t3 - - beq $t3, $zero, 2f - srli.d $t5, \destReg, 12 - add.d $t3, $t3, $t5 // SoftwareWriteWatch::AddressToTableByteIndexShift - ld.b $t4, $t3, 0 - bne $t4, $zero, 2f - ori $t4, $zero, 0xFF - st.b $t4, $t3, 0 -#endif - -2: - // We can skip the card table write if the reference is to - // an object not on the epehemeral segment. - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, $t3 - bltu \refReg, $t3, 0f - - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, $t3 - bgeu \refReg, $t3, 0f - - // Set this objects card, if it has not already been set. - PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, $t3 - srli.d $t5, \destReg, 11 - add.d $t4, $t3, $t5 - - // Check that this card has not already been written. Avoiding useless writes is a big win on - // multi-proc systems since it avoids cache thrashing. - ld.b $t3, $t4, 0 - ori $t5, $zero, 0xFF - beq $t3, $t5, 0f - - ori $t3, $zero, 0xFF - st.b $t3, $t4, 0 - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Check if we need to update the card bundle table - PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, $t3 - srli.d $t5, \destReg, 21 - add.d $t4, $t3, $t5 - ld.b $t3, $t4, 0 - ori $t5, $zero, 0xFF - beq $t3, $t5, 0f - - ori $t3, $zero, 0xFF - st.b $t3, $t4, 0 -#endif - -0: - // Exit label - .endm - - // On entry: - // destReg: location to be updated - // refReg: objectref to be stored - // - // On exit: - // t3, t4: trashed - // - .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg - - // The "check" of this checked write barrier - is destReg - // within the heap? if no, early out. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 - sltu $t4, \destReg, $t3 - - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3 - - // If \destReg >= g_lowest_address, compare \destReg to g_highest_address. - // Otherwise, set the C flag (0x2) to take the next branch. - bnez $t4, 1f - bgeu \destReg, $t3, 0f - -1: - INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg - -0: - // Exit label - .endm - -// void JIT_ByRefWriteBarrier -// On entry: -// t8 : the source address (points to object reference to write) -// t6 : the destination address (object reference written here) -// -// On exit: -// t8 : incremented by 8 -// t6 : incremented by 8 -// t7 : trashed -// t3, t4 : trashed -// -// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF -// if you add more trashed registers. -// -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address -LEAF_ENTRY RhpByRefAssignRef, _TEXT - - ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - ld.d $t7, $t8, 0 - addi.d $t8, $t8, 8 - b C_FUNC(RhpCheckedAssignRef) - -LEAF_END RhpByRefAssignRef, _TEXT - -// JIT_CheckedWriteBarrier(Object** dst, Object* src) -// -// Write barrier for writes to objects that may reside -// on the managed heap. -// -// On entry: -// t6 : the destination address (LHS of the assignment). -// May not be a heap location (hence the checked). -// t7 : the object reference (RHS of the assignment). -// -// On exit: -// t3, t4 : trashed -// t6 : incremented by 8 - LEAF_ENTRY RhpCheckedAssignRef, _TEXT - - // is destReg within the heap? - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 - sltu $t4, $t6, $t3 - - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3 - sltu $t0, $t3, $t6 - or $t4, $t0, $t4 - beq $t4, $zero, C_FUNC(RhpAssignRefLoongArch64) - -LOCAL_LABEL(NotInHeap): - ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation - st.d $t7, $t6, 0 - addi.d $t6, $t6, 8 - jirl $r0, $ra, 0 - -LEAF_END RhpCheckedAssignRef, _TEXT - -// JIT_WriteBarrier(Object** dst, Object* src) -// -// Write barrier for writes to objects that are known to -// reside on the managed heap. -// -// On entry: -// t6 : the destination address (LHS of the assignment). -// t7 : the object reference (RHS of the assignment). -// -// On exit: -// t3, t4 : trashed -// t6 : incremented by 8 -LEAF_ENTRY RhpAssignRefLoongArch64, _TEXT - - ALTERNATE_ENTRY RhpAssignRefAVLocation - st.d $t7, $t6, 0 - - INSERT_UNCHECKED_WRITE_BARRIER_CORE $t6, $t7 - - addi.d $t6, $t6, 8 - jirl $r0, $ra, 0 - -LEAF_END RhpAssignRefLoongArch64, _TEXT - -// Same as RhpAssignRefLoongArch64, but with standard ABI. -LEAF_ENTRY RhpAssignRef, _TEXT - ori $t6, $a0, 0 ; t6 = dst - ori $t7, $a1, 0 ; t7 = val - b C_FUNC(RhpAssignRefLoongArch64) -LEAF_END RhpAssignRef, _TEXT - - -// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon -// successful updates. - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address - -// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) -// -// Interlocked compare exchange on objectref. -// -// On entry: -// a0: pointer to objectref -// a1: exchange value -// a2: comparand -// -// On exit: -// a0: original value of objectref -// t1, t3, t6, t4: trashed -// - LEAF_ENTRY RhpCheckedLockCmpXchg - -LOCAL_LABEL(RetryLoop): - ll.d $t3, $a0, 0 - ori $t1, $a1, 0 - bne $t3, $a2, LOCAL_LABEL(EndOfExchange) - sc.d $t1, $a0, 0 - beqz $t1, LOCAL_LABEL(RetryLoop) - b LOCAL_LABEL(DoCardsCmpXchg) - -LOCAL_LABEL(EndOfExchange): - dbar 0x700 - b LOCAL_LABEL(CmpXchgNoUpdate) - -LOCAL_LABEL(DoCardsCmpXchg): - // We have successfully updated the value of the objectref so now we need a GC write barrier. - // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are - // already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE $a0, $a1 - -LOCAL_LABEL(CmpXchgNoUpdate): - // a2 still contains the original value. - ori $a0, $a2, 0 - - jirl $r0, $ra, 0 - - LEAF_END RhpCheckedLockCmpXchg, _TEXT - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address - -// RhpCheckedXchg(Object** destination, Object* value) -// -// Interlocked exchange on objectref. -// -// On entry: -// a0: pointer to objectref -// a1: exchange value -// -// On exit: -// a0: original value of objectref -// t1: trashed -// t3, t6, t4: trashed -// - LEAF_ENTRY RhpCheckedXchg, _TEXT - - amswap_db.d $t1, $a1, $a0 // exchange - - // We have successfully updated the value of the objectref so now we need a GC write barrier. - // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are - // already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE $a0, $a1 - - // $t1 still contains the original value. - ori $a0, $t1, 0 - - jirl $r0, $ra, 0 - - LEAF_END RhpCheckedXchg, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp index 51ca2c73575a..cad97631f2f6 100644 --- a/src/coreclr/nativeaot/Runtime/portable.cpp +++ b/src/coreclr/nativeaot/Runtime/portable.cpp @@ -5,15 +5,14 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" +#include "PalLimitedContext.h" #include "CommonMacros.inl" #include "volatile.h" -#include "PalRedhawk.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" #include "shash.h" -#include "varint.h" #include "holder.h" #include "rhbinder.h" #include "Crst.h" @@ -32,8 +31,13 @@ #include "GCMemoryHelpers.inl" +<<<<<<< HEAD #if defined(USE_PORTABLE_HELPERS) && !defined(HOST_WASM) EXTERN_C void* F_CALL_CONV RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame); +======= +#if defined(USE_PORTABLE_HELPERS) +EXTERN_C void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame); +>>>>>>> upstream-jun static Object* AllocateObject(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements) { @@ -89,7 +93,7 @@ FCIMPL1(Object *, RhpNewFinalizable, MethodTable* pEEType) } FCIMPLEND -FCIMPL2(Array *, RhpNewArray, MethodTable * pArrayEEType, int numElements) +FCIMPL2(Array *, RhpNewArrayFast, MethodTable * pArrayEEType, int numElements) { Thread * pCurThread = ThreadStore::GetCurrentThread(); gc_alloc_context * acontext = pCurThread->GetAllocContext(); @@ -130,9 +134,9 @@ FCIMPLEND FCIMPL2(String *, RhNewString, MethodTable * pArrayEEType, int numElements) { - // TODO: Implement. We tail call to RhpNewArray for now since there's a bunch of TODOs in the places + // TODO: Implement. We tail call to RhpNewArrayFast for now since there's a bunch of TODOs in the places // that matter anyway. - return (String*)RhpNewArray(pArrayEEType, numElements); + return (String*)RhpNewArrayFast(pArrayEEType, numElements); } FCIMPLEND @@ -219,7 +223,7 @@ FCIMPL1(Object*, RhpNewFastMisalign, MethodTable* pEEType) } FCIMPLEND -FCIMPL2(Array*, RhpNewArrayAlign8, MethodTable* pArrayEEType, int numElements) +FCIMPL2(Array*, RhpNewArrayFastAlign8, MethodTable* pArrayEEType, int numElements) { Thread* pCurThread = ThreadStore::GetCurrentThread(); gc_alloc_context* acontext = pCurThread->GetAllocContext(); @@ -320,12 +324,6 @@ FCIMPL0(void, RhpInterfaceDispatch64) } FCIMPLEND -FCIMPL0(void, RhpVTableOffsetDispatch) -{ - ASSERT_UNCONDITIONALLY("NYI"); -} -FCIMPLEND - // @TODO Implement UniversalTransition EXTERN_C void * ReturnFromUniversalTransition; void * ReturnFromUniversalTransition; @@ -375,9 +373,9 @@ FCIMPL2(Object *, RhpCheckedXchg, Object ** location, Object * value) } FCIMPLEND -FCIMPL0(void*, RhAllocateThunksMapping) +FCIMPL1(HRESULT, RhAllocateThunksMapping, void ** ppThunksSection) { - return NULL; + return E_FAIL; } FCIMPLEND diff --git a/src/coreclr/nativeaot/Runtime/profheapwalkhelper.cpp b/src/coreclr/nativeaot/Runtime/profheapwalkhelper.cpp index 6d9d7edc6fea..d239ab93c3ab 100644 --- a/src/coreclr/nativeaot/Runtime/profheapwalkhelper.cpp +++ b/src/coreclr/nativeaot/Runtime/profheapwalkhelper.cpp @@ -5,7 +5,7 @@ // On desktop CLR, GC ETW event firing borrows heavily from code in the profiling API, // as the GC already called hooks in the profapi to notify it of roots & references. // This file shims up that profapi code the GC expects, though only for the purpose of -// firing ETW events (not for getting a full profapi up on redhawk). +// firing ETW events (not for getting a full profapi up on NativeAOT). // #include "common.h" diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 661fdff4e6c9..e398f7c710f8 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -6,7 +6,7 @@ #if defined(TARGET_X86) || defined(TARGET_AMD64) -#include "PalRedhawkCommon.h" // Fp128 +#include "PalLimitedContext.h" // Fp128 struct REGDISPLAY { diff --git a/src/coreclr/nativeaot/Runtime/rhassert.cpp b/src/coreclr/nativeaot/Runtime/rhassert.cpp index 970a0b025700..91166a1e74a4 100644 --- a/src/coreclr/nativeaot/Runtime/rhassert.cpp +++ b/src/coreclr/nativeaot/Runtime/rhassert.cpp @@ -3,8 +3,8 @@ #include "common.h" #include "CommonTypes.h" #include "CommonMacros.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include @@ -31,7 +31,7 @@ void Assert(const char * expr, const char * file, uint32_t line_num, const char // If there's no debugger attached, we just FailFast if (!minipal_is_native_debugger_present()) - PalRaiseFailFastException(NULL, NULL, FAIL_FAST_GENERATE_EXCEPTION_ADDRESS); + RhFailFast(); // If there is a debugger attached, we break and then allow continuation. PalDebugBreak(); diff --git a/src/coreclr/nativeaot/Runtime/rhassert.h b/src/coreclr/nativeaot/Runtime/rhassert.h index 34403e216f5b..de725ae26bda 100644 --- a/src/coreclr/nativeaot/Runtime/rhassert.h +++ b/src/coreclr/nativeaot/Runtime/rhassert.h @@ -60,8 +60,10 @@ void Assert(const char * expr, const char * file, unsigned int line_num, const c ASSERT_UNCONDITIONALLY(message); \ ASSUME(0); \ -#define FAIL_FAST_GENERATE_EXCEPTION_ADDRESS 0x1 - -#define RhFailFast() RaiseFailFastException(NULL, NULL, FAIL_FAST_GENERATE_EXCEPTION_ADDRESS) +#ifdef HOST_WINDOWS +#define RhFailFast() ::RaiseFailFastException(NULL, NULL, FAIL_FAST_GENERATE_EXCEPTION_ADDRESS) +#else +void RhFailFast(); +#endif // HOST_WINDOWS #endif // __RHASSERT_H__ diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S deleted file mode 100644 index f09e04520427..000000000000 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ /dev/null @@ -1,273 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -// GC type flags -#define GC_ALLOC_FINALIZE 1 - -// -// Rename fields of nested structs -// -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) - -// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -// allocation context then automatically fallback to the slow allocation path. -// a0 == MethodTable - LEAF_ENTRY RhpNewFast, _TEXT - - // a1 = GetThread() - // Save MethodTable pointer. INLINE_GETTHREAD will trash a0. - mv t2, a0 - INLINE_GETTHREAD a1 - - // - // t2 contains MethodTable pointer - // - lw a2, OFFSETOF__MethodTable__m_uBaseSize(t2) - - // - // t2: MethodTable pointer - // a1: Thread pointer - // a2: base size - // - - // Load potential new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add a2, a2, t3 - ld t4, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a1) - bltu t4, a2, LOCAL_LABEL(RhpNewFast_RarePath) - - // Update the alloc pointer to account for the allocation. - sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) - - // Set the new objects MethodTable pointer - sd t2, OFFSETOF__Object__m_pEEType(t3) - - mv a0, t3 - ret - -LOCAL_LABEL(RhpNewFast_RarePath): - mv a1, zero - mv a0, t2 - tail RhpNewObject - LEAF_END RhpNewFast, _TEXT - -// Allocate non-array object with finalizer. -// a0 == MethodTable - LEAF_ENTRY RhpNewFinalizable, _TEXT - li a1, GC_ALLOC_FINALIZE - tail RhpNewObject - LEAF_END RhpNewFinalizable, _TEXT - -// Allocate non-array object. -// a0 == MethodTable -// a1 == alloc flags - NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - - PUSH_COOP_PINVOKE_FRAME a3 - - // a3: transition frame - - // Preserve the MethodTable in s0 - mv s0, a0 - - li a2, 0 // numElements - - // Call the rest of the allocation helper. - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call C_FUNC(RhpGcAlloc) - - // Set the new object's MethodTable pointer on success. - beq a0, zero, LOCAL_LABEL(NewOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state - -LOCAL_LABEL(NewOutOfMemory): - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mv a0, s0 // MethodTable pointer - li a1, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - tail C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewObject, _TEXT - -// Allocate a string. -// a0 == MethodTable -// a1 == element/character count - LEAF_ENTRY RhNewString, _TEXT - // Make sure computing the overall allocation size won't overflow - li a2, MAX_STRING_LENGTH - bltu a2, a1, LOCAL_LABEL(StringSizeOverflow) // Branch if a2 < a1 (overflow) - - // Compute overall allocation size (align(base size + (element size * elements), 8)). - li a3, STRING_COMPONENT_SIZE // Load STRING_COMPONENT_SIZE into a3 - slli a2, a1, 1 // a2 = a1 * STRING_COMPONENT_SIZE, where STRING_COMPONENT_SIZE == 2 - addi a2, a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 - andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) - - // a0 == MethodTable - // a1 == element count - // a2 == string size - - // Save MethodTable pointer. INLINE_GETTHREAD will trash a0. - mv t2, a0 - INLINE_GETTHREAD a3 - - // Load potential new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add a2, a2, t3 - ld t3, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a3) - bltu t3, a2, LOCAL_LABEL(RhNewString_Rare) - - // Reload new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Update the alloc pointer to account for the allocation. - sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Set the new object's MethodTable pointer and element count. - sd t2, OFFSETOF__Object__m_pEEType(t3) - sd a1, OFFSETOF__Array__m_Length(t3) - - // Return the object allocated in a0. - mv a0, t3 - - ret - -LOCAL_LABEL(StringSizeOverflow): - // We get here if the length of the final string object cannot be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an OOM exception that the caller of this allocator understands. - - mv a0, t2 - li a1, 1 // Indicate that we should throw OverflowException - tail C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhNewString_Rare): - mv a0, t2 - tail C_FUNC(RhpNewArrayRare) - LEAF_END RhNewString, _TEXT - -// Allocate one-dimensional, zero-based array (SZARRAY). -// a0 == MethodTable -// a1 == element count - LEAF_ENTRY RhpNewArray, _TEXT - - // We want to limit the element count to the non-negative 32-bit int range. - // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component - // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst - // case (32-dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. - li a2, 0x7fffffff - bltu a2, a1, LOCAL_LABEL(ArraySizeOverflow) // Branch if a2 < a1 (check for overflow) - - lhu a2, OFFSETOF__MethodTable__m_usComponentSize(a0) // Load component size - mul a2, a1, a2 // a2 = a1 * component size - lw a3, OFFSETOF__MethodTable__m_uBaseSize(a0) // Load base size - add a2, a2, a3 // a2 = a2 + base size - addi a2, a2, 7 // a2 = a2 + 7 - andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) - - // a0 == MethodTable - // a1 == element count - // a2 == array size - - // Save MethodTable pointer. INLINE_GETTHREAD will trash a0. - mv t2, a0 - INLINE_GETTHREAD a3 - - // Load potential new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add a2, a2, t3 - ld t3, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a3) - bltu t3, a2, LOCAL_LABEL(RhpNewArray_Rare) - - // Reload new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Update the alloc pointer to account for the allocation. - sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Set the new object's MethodTable pointer and element count. - sd t2, OFFSETOF__Object__m_pEEType(t3) - sd a1, OFFSETOF__Array__m_Length(t3) - - // Return the object allocated in a0. - mv a0, t3 - - ret - -LOCAL_LABEL(ArraySizeOverflow): - // We get here if the size of the final array object cannot be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. - - mv a0, t2 - li a1, 1 // Indicate that we should throw OverflowException - tail C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhpNewArray_Rare): - mv a0, t2 - tail C_FUNC(RhpNewArrayRare) - LEAF_END RhpNewArray, _TEXT - -// Allocate one-dimensional, zero-based array (SZARRAY) using the slow path that calls a runtime helper. -// a0 == MethodTable -// a1 == element count -// a2 == array size + Thread::m_alloc_context::alloc_ptr -// a3 == Thread - NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from a2. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - sub a2, a2, t3 - - PUSH_COOP_PINVOKE_FRAME a3 - - // Preserve data we will need later into the callee saved registers - mv s0, a0 // Preserve MethodTable - - mv a2, a1 // numElements - li a1, 0 // uFlags - - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call C_FUNC(RhpGcAlloc) - - // Set the new object's MethodTable pointer and length on success. - beq a0, zero, LOCAL_LABEL(ArrayOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state - -LOCAL_LABEL(ArrayOutOfMemory): - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mv a0, s0 // MethodTable Pointer - li a1, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - tail C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/riscv64/AsmMacros_Shared.h new file mode 100644 index 000000000000..f7df01464273 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/AsmMacros_Shared.h @@ -0,0 +1,7 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include +#include "AsmOffsets.inc" diff --git a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S index ff20aeb736cc..09dc50e5ad62 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S @@ -4,7 +4,7 @@ #include #include "AsmOffsets.inc" -#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 7) & ~7) #define HARDWARE_EXCEPTION 1 #define SOFTWARE_EXCEPTION 0 @@ -560,7 +560,7 @@ LOCAL_LABEL(DonePopping): // It was the ThreadAbortException, so rethrow it // Reset SP mv a1, a0 // a1 <- continuation address as exception PC - li a0, STATUS_REDHAWK_THREAD_ABORT + li a0, STATUS_NATIVEAOT_THREAD_ABORT mv sp, a2 tail C_FUNC(RhpThrowHwEx) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index f5f41c44c78f..d7989f2f6694 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -44,14 +44,13 @@ # Perform the rest of the PInvokeTransitionFrame initialization. sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker) - sd \BITMASK, (OFFSETOF__PInvokeTransitionFrame__m_pThread + 8)(sp) # Save the register bitmask passed in by caller + sd \BITMASK, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) # Save the register bitmask passed in by caller addi \trashReg, sp, PROBE_FRAME_SIZE # Recover value of caller's SP sd \trashReg, 0x78(sp) # Save caller's SP # Link the frame into the Thread - mv \trashReg, sp - sd \trashReg, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) + sd sp, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) .endm @@ -84,7 +83,9 @@ .macro FixupHijackedCallstack // a2 <- GetThread() + mv t1, a0 INLINE_GETTHREAD a2 + mv a0, t1 // Fix the stack by restoring the original return address ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) @@ -100,14 +101,13 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler FixupHijackedCallstack - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 - andi t3, a3, 1 << TrapThreadsFlags_TrapThreads_Bit + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, t3 + andi t3, t3, 1 << TrapThreadsFlags_TrapThreads_Bit bnez t3, LOCAL_LABEL(WaitForGC) jr ra LOCAL_LABEL(WaitForGC): - li t6, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1 + PTFF_THREAD_HIJACK_HI) - or t3, t3, t6 + li t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1 + (PTFF_THREAD_HIJACK_HI << 32)) tail C_FUNC(RhpWaitForGC) NESTED_END RhpGcProbeHijack @@ -134,7 +134,7 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler .cfi_restore_state LOCAL_LABEL(ThrowThreadAbort): POP_PROBE_FRAME - li a0, STATUS_REDHAWK_THREAD_ABORT + li a0, STATUS_NATIVEAOT_THREAD_ABORT mv a1, ra # Set return address as exception PC call C_FUNC(RhpThrowHwEx) NESTED_END RhpWaitForGC diff --git a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S index 04f28699dd29..a19cf4c00102 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S @@ -12,27 +12,29 @@ // // RhCommonStub // - // INPUT: tp: thunk's data block + // INPUT: t1: thunk's data block // - // TRASHES: t0, t1, tp + // TRASHES: t0, t1, t2 // LEAF_ENTRY RhCommonStub, _TEXT // There are arbitrary callers passing arguments with arbitrary signatures. // Custom calling convention: // tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + mv t2, a0 INLINE_GET_TLS_VAR t0, C_FUNC(tls_thunkData) + mv a0, t2 // t0 = base address of TLS data - // tp = address of context cell in thunk's data + // t1 = address of context cell in thunk's data // Load the thunk address from the data block and store it in the thread's static storage - ld t1, 0(t0) // Load thunk address into t1 from the TLS base address - sd t1, 0(t0) // Store the thunk address in thread static storage + ld t2, 0(t1) // Load thunk data into t2 + sd t2, 0(t0) // Store the thunk address in thread static storage // Load the target address from the data block and jump to it - ld t1, POINTER_SIZE(t0) // Load target address into t1 from the data block - jalr t1 // Jump to the target address in t1 + ld t1, POINTER_SIZE(t1) // Load target address into t1 from the data block + jr t1 // Jump to the target address in t1 LEAF_END RhCommonStub, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S index d1264271cc79..93b360ebda1c 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S @@ -19,7 +19,7 @@ NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler sd fp, OFFSETOF__PInvokeTransitionFrame__m_FramePointer(a0) sd ra, OFFSETOF__PInvokeTransitionFrame__m_RIP(a0) - sd t0, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs(a0) + sd sp, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs(a0) li t0, PTFF_SAVE_SP sd t0, OFFSETOF__PInvokeTransitionFrame__m_Flags(a0) @@ -40,7 +40,7 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, t0 - bnez t1, 0f // If TrapThreadsFlags_None is non-zero, branch + bnez t0, 0f // If TrapThreadsFlags_None is non-zero, branch ret 0: diff --git a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S deleted file mode 100644 index 3823c189e0e8..000000000000 --- a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH - - .extern RhpCidResolve - .extern RhpUniversalTransition_DebugStepTailCall - - // Macro that generates code to check a single cache entry. - .macro CHECK_CACHE_ENTRY entry - // Load cache entry data into a temporary register - ld t6, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))(t0) - - // Compare with MethodTable* in t1 - bne t1, t6, 0f - - // Load the target address from the cache entry - ld t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)(t0) - - // Jump to the address in t0 - jr t0 - - 0: - .endm - - // - // Macro that generates a stub consuming a cache with the given number of entries. - // - .macro DEFINE_INTERFACE_DISPATCH_STUB entries - - NESTED_ENTRY RhpInterfaceDispatch\entries, _TEXT, NoHandler - - // t5 holds the indirection cell address. Load the cache pointer. - ld t0, OFFSETOF__InterfaceDispatchCell__m_pCache(t5) // Using a1 as an alternative base register - - // Load the MethodTable from the object instance in a0. - ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries - ld t1, 0(a0) - - .global CurrentEntry - .set CurrentEntry, 0 - - .rept \entries - CHECK_CACHE_ENTRY CurrentEntry - .set CurrentEntry, CurrentEntry + 1 - .endr - - // t0 still contains the indirection cell address. - tail C_FUNC(RhpInterfaceDispatchSlow) - - NESTED_END RhpInterfaceDispatch\entries, _TEXT - - .endm - - // - // Define all the stub routines we currently need. - // - DEFINE_INTERFACE_DISPATCH_STUB 1 - DEFINE_INTERFACE_DISPATCH_STUB 2 - DEFINE_INTERFACE_DISPATCH_STUB 4 - DEFINE_INTERFACE_DISPATCH_STUB 8 - DEFINE_INTERFACE_DISPATCH_STUB 16 - DEFINE_INTERFACE_DISPATCH_STUB 32 - DEFINE_INTERFACE_DISPATCH_STUB 64 - - // - // Initial dispatch on an interface when we don't have a cache yet. - // - LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT - ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch - // Trigger an AV if we're dispatching on a null this. - // The exception handling infrastructure is aware of the fact that this is the first - // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here - // to a NullReferenceException at the callsite. - lw zero, 0(a0) - - // Just tail call to the cache miss helper. - tail C_FUNC(RhpInterfaceDispatchSlow) - LEAF_END RhpInitialInterfaceDispatch, _TEXT - - // - // Stub dispatch routine for dispatch to a vtable slot - // - LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // t2 contains the interface dispatch cell address. - // Load t3 to point to the vtable offset (which is stored in the m_pCache field). - ld t3, OFFSETOF__InterfaceDispatchCell__m_pCache(t2) - - // Load the MethodTable from the object instance in a0, and add it to the vtable offset - // to get the address in the vtable of what we want to dereference - ld t4, 0(a0) - add t3, t3, t4 - - // Load the target address of the vtable into t3 - ld t3, 0(t3) - - jr t3 - LEAF_END RhpVTableOffsetDispatch, _TEXT - - // - // Cache miss case, call the runtime to resolve the target and update the cache. - // Use universal transition helper to allow an exception to flow out of resolution. - // - LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // t5 contains the interface dispatch cell address. - // Calling convention of the universal thunk is: - // t0: target address for the thunk to call - // t1: parameter of the thunk's target - PREPARE_EXTERNAL_VAR RhpCidResolve, t0 - mv t1, t5 - tail C_FUNC(RhpUniversalTransition_DebugStepTailCall) - LEAF_END RhpInterfaceDispatchSlow, _TEXT - -#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index 89691462c123..234e6b46357d 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -12,15 +12,12 @@ .global RhpFpTrashValues #endif // TRASH_SAVED_ARGUMENT_REGISTERS -// Padding to account for the odd number of saved integer registers -#define ALIGNMENT_PADDING_SIZE (8) - #define COUNT_ARG_REGISTERS (8) #define INTEGER_REGISTER_SIZE (8) #define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) // Largest return block is 4 doubles -#define RETURN_BLOCK_SIZE (32) +#define RETURN_BLOCK_SIZE 16 #define COUNT_FLOAT_ARG_REGISTERS (8) #define FLOAT_REGISTER_SIZE (8) @@ -31,7 +28,6 @@ // From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: // -// ALIGNMENT_PADDING_SIZE // ARGUMENT_REGISTERS_SIZE // RETURN_BLOCK_SIZE // FLOAT_ARG_REGISTERS_SIZE @@ -41,7 +37,7 @@ #define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE) -#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) +#define STACK_SIZE (ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) #define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE) #define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) @@ -63,9 +59,8 @@ // Frame layout is: // // {StackPassedArgs} ChildSP+100 CallerSP+000 -// {AlignmentPad (0x8 bytes)} ChildSP+0F8 CallerSP-008 // {IntArgRegs (a0-a7) (0x40 bytes)} ChildSP+0B8 CallerSP-048 -// {ReturnBlock (0x20 bytes)} ChildSP+098 CallerSP-068 +// {ReturnBlock (0x10 bytes)} ChildSP+098 CallerSP-068 // -- The base address of the Return block is the TransitionBlock pointer, the floating point args are // in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact // layout of all pieces of the frame that lie at or above the pushed floating point registers. @@ -91,9 +86,7 @@ NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler # FP and RA registers - addi sp, sp, -STACK_SIZE - sd s0, 0x0(sp) # Save frame pointer - sd ra, 0x08(sp) # Save return address + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, STACK_SIZE # Floating point registers fsd fa0, FLOAT_ARG_OFFSET(sp) @@ -105,7 +98,7 @@ fsd fa6, FLOAT_ARG_OFFSET + 0x30(sp) fsd fa7, FLOAT_ARG_OFFSET + 0x38(sp) - # Space for return buffer data (0x40 bytes) + # Space for return block data (0x10 bytes) # Save argument registers sd a0, ARGUMENT_REGISTERS_OFFSET(sp) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S deleted file mode 100644 index 1e9fedaa9f21..000000000000 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ /dev/null @@ -1,354 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used -// during garbage collections to verify that object references were never written to the heap without using a -// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing -// new references to the real heap. Since this cannot be solved perfectly without critical sections around the -// entire update process, we instead update the shadow location and then re-check the real location (as two -// ordered operations) and if there is a disparity we will re-write the shadow location with a special value -// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC -// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the -// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. -#ifdef WRITE_BARRIER_CHECK - - .global g_GCShadow - .global g_GCShadowEnd - - // On entry: - // destReg: location to be updated - // refReg: objectref to be stored - // - // On exit: - // t3,t4: trashed - // other registers are preserved - // - .macro UPDATE_GC_SHADOW destReg, refReg - - // If g_GCShadow is 0, don't perform the check. - la t3, g_GCShadow - ld t3, 0(t3) - beq t3, zero, 1f - li t4, 0 - - // Save destReg since we're about to modify it (and we need the original value both within the macro and - // once we exit the macro). - mv t4, \destReg - - // Transform destReg into the equivalent address in the shadow heap. - la t3, g_lowest_address - ld t3, 0(t3) - sub \destReg, \destReg, t3 - bltz \destReg, 0f - - la t3, g_GCShadow - ld t3, 0(t3) - add \destReg, \destReg, t3 - - la t3, g_GCShadowEnd - ld t3, 0(t3) - bgeu \destReg, t3, 0f - - // Update the shadow heap. - sd \refReg, 0(\destReg) - - // The following read must be strongly ordered with respect to the write we have just performed in order to - // prevent race conditions. - fence rw, rw - - // Now check that the real heap location still contains the value we just wrote into the shadow heap. - mv t3, t4 - ld t3, 0(t3) - beq t3, \refReg, 0f - - // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we cannot - // guarantee whose shadow update won. - li t3, INVALIDGCVALUE - sd t3, 0(\destReg) - -0: - // Restore original destReg value - mv \destReg, t4 - -1: - .endm - -#else // WRITE_BARRIER_CHECK - - .macro UPDATE_GC_SHADOW destReg, refReg - .endm - -#endif // WRITE_BARRIER_CHECK - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -// name of the register that points to the location to be updated and the name of the register that holds the -// object reference (this should be in upper case as it is used in the definition of the name of the helper). - -// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for -// some interlocked helpers that need an inline barrier. - - // On entry: - // destReg: location to be updated (cannot be t2,t6) - // refReg: objectref to be stored (cannot be t2,t6) - // - // On exit: - // t2, t6: trashed - // - .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg - - // Update the shadow copy of the heap with the same value just written to the same heap. - // (A no-op unless we are in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW \destReg, \refReg - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - // Update the write watch table if necessary - la t2, g_write_watch_table - - beqz t2, 2f - srli t6, \destReg, 12 // SoftwareWriteWatch::AddressToTableByteIndexShift - add t2, t2, t6 - lb t6, 0(t2) - bnez t6, 2f - li t6, 0xFF - sb t6, 0(t2) -#endif - -2: - // We can skip the card table write if the reference is to - // an object not on the ephemeral segment. - la t2, g_ephemeral_low - la t6, g_ephemeral_high - bgeu \refReg, t2, 0f - bltu \refReg, t6, 0f - - // Set this object's card, if it has not already been set. - la t2, g_card_table - srli t6, \destReg, 11 - add t6, t2, t6 - - // Check that this card has not already been written. Avoiding useless writes - // is a big win on multi-proc systems since it avoids cache thrashing. - lb t2, 0(t6) - li t6, 0xFF - beq t2, t6, 0f - - sb t6, 0(t6) - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Check if we need to update the card bundle table - la t2, g_card_bundle_table - srli t6, \destReg, 21 - add t6, t2, t6 - lb t2, 0(t6) - li t6, 0xFF - beq t2, t6, 0f - - sb t6, 0(t6) -#endif - -0: - // Exit label - .endm - - // On entry: - // destReg: location to be updated - // refReg: objectref to be stored - // - // On exit: - // t2, t6: trashed - // - .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg - - // The "check" of this checked write barrier - is destReg within the heap? - // If no, early out. - - la t2, g_lowest_address - bgeu \destReg, t2, 0f - - la t2, g_highest_address - bltu \destReg, t2, 0f - -1: - INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg - -0: - // Exit label - .endm - -// void JIT_ByRefWriteBarrier -// On entry: -// t5 : the source address (points to object reference to write) -// t3 : the destination address (object reference written here) -// -// On exit: -// t5 : incremented by 8 -// t3 : incremented by 8 -// t4 : trashed -// t2, t3 : trashed -// -// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF -// if you add more trashed registers. -// -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address -LEAF_ENTRY RhpByRefAssignRef, _TEXT - - ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - ld t4, 0(t5) - addi t5, t5, 8 - j C_FUNC(RhpCheckedAssignRef) - -LEAF_END RhpByRefAssignRef, _TEXT - -// JIT_CheckedWriteBarrier(Object** dst, Object* src) -// -// Write barrier for writes to objects that may reside -// on the managed heap. -// -// On entry: -// t3 : the destination address (LHS of the assignment). -// May not be a heap location (hence the checked). -// t4 : the object reference (RHS of the assignment). -// -// On exit: -// t2, t6 : trashed -// t3 : incremented by 8 -LEAF_ENTRY RhpCheckedAssignRef, _TEXT - - # Check if the destination is within the heap bounds - la t2, C_FUNC(g_lowest_address) - la t6, C_FUNC(g_highest_address) - - bltu t3, t2, LOCAL_LABEL(NotInHeap) - bgeu t3, t6, LOCAL_LABEL(NotInHeap) - - j C_FUNC(RhpAssignRefRiscV64) - -LOCAL_LABEL(NotInHeap): - ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation - sd t4, 0(t3) - addi t3, t3, 8 - - ret - -LEAF_END RhpCheckedAssignRef, _TEXT - -// JIT_WriteBarrier(Object** dst, Object* src) -// -// Write barrier for writes to objects that are known to -// reside on the managed heap. -// -// On entry: -// t3 : the destination address (LHS of the assignment). -// t4 : the object reference (RHS of the assignment). -// -// On exit: -// t2, t6 : trashed -// t3 : incremented by 8 -LEAF_ENTRY RhpAssignRefRiscV64, _TEXT - - ALTERNATE_ENTRY RhpAssignRefAVLocation - sd t4, 0(t3) - - INSERT_UNCHECKED_WRITE_BARRIER_CORE t3, t4 - - addi t3, t3, 8 - - ret - -LEAF_END RhpAssignRefRiscV64, _TEXT - -// Same as RhpAssignRefRiscV64, but with standard ABI. -LEAF_ENTRY RhpAssignRef, _TEXT - mv t3, a0 ; t3 = dst - mv t4, a1 ; t4 = val - mv a1, ra - j C_FUNC(RhpAssignRefRiscV64) -LEAF_END RhpAssignRef, _TEXT - - -// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon -// successful updates. - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedLockCmpXchgAVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address - -// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) -// -// Interlocked compare exchange on objectref. -// -// On entry: -// a0: pointer to objectref -// a1: exchange value -// a2: comparand -// -// On exit: -// a0: original value of objectref -// t0, t1, t2, t6: trashed -// -LEAF_ENTRY RhpCheckedLockCmpXchg - -LOCAL_LABEL(CmpXchgRetry): - // Load the current value at the destination address. - lr.d t0, (a0) // t0 = *dest - // Compare the loaded value with the comparand. - bne t0, a2, LOCAL_LABEL(CmpXchgNoUpdate) // if (*dest != comparand) goto CmpXchgNoUpdate - - // Attempt to store the exchange value at the destination address. - sc.d t1, a1, (a0) // t1 = (store conditional result: 0 if successful) - bnez t1, LOCAL_LABEL(CmpXchgRetry) // if store conditional failed, retry - -LOCAL_LABEL(DoCardsCmpXchg): - // We have successfully updated the value of the objectref so now we need a GC write barrier. - // The following barrier code takes the destination in a0 and the value in a1 so the arguments are - // already correctly set up. - INSERT_CHECKED_WRITE_BARRIER_CORE a0, a1 - -LOCAL_LABEL(CmpXchgNoUpdate): - // t0 still contains the original value. - mv a0, t0 - - ret - -LEAF_END RhpCheckedLockCmpXchg - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address - -// RhpCheckedXchg(Object** destination, Object* value) -// -// Interlocked exchange on objectref. -// -// On entry: -// a0: pointer to objectref -// a1: exchange value -// -// On exit: -// a0: original value of objectref -// t1: trashed -// t3, t6, t4: trashed -// -LEAF_ENTRY RhpCheckedXchg - - ld t1, 0(a0) - sd a1, 0(a0) - -DoCardsXchg: - // We have successfully updated the value of the objectref so now we need a GC write barrier. - // The following barrier code takes the destination in a0 and the value in a1 so the arguments are - // already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE a0, a1 - - // t1 still contains the original value. - mv a0, t1 - - jalr ra - -LEAF_END RhpCheckedXchg, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index 0c97a5e31243..486e285cf37c 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -1,17 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. #include "common.h" -#ifdef HOST_WINDOWS -#include -#endif #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" @@ -28,6 +24,7 @@ #include "RestrictedCallouts.h" #include "yieldprocessornormalized.h" #include +#include #ifdef FEATURE_PERFTRACING #include "EventPipeInterface.h" @@ -95,7 +92,7 @@ static bool InitDLL(HANDLE hPalInstance) // // Initialize interface dispatch. // - if (!InitializeInterfaceDispatch()) + if (!InterfaceDispatch_Initialize()) return false; #endif @@ -209,7 +206,7 @@ bool InitGSCookie() #endif // REVIEW: Need something better for PAL... - GSCookie val = (GSCookie)PalGetTickCount64(); + GSCookie val = (GSCookie)minipal_lowres_ticks(); #ifdef _DEBUG // In _DEBUG, always use the same value to make it easier to search for the cookie @@ -227,19 +224,6 @@ bool InitGSCookie() #endif // TARGET_UNIX #ifdef PROFILE_STARTUP -#define STD_OUTPUT_HANDLE ((uint32_t)-11) - -struct RegisterModuleTrace -{ - LARGE_INTEGER Begin; - LARGE_INTEGER End; -}; - -const int NUM_REGISTER_MODULE_TRACES = 16; -int g_registerModuleCount = 0; - -RegisterModuleTrace g_registerModuleTraces[NUM_REGISTER_MODULE_TRACES] = { 0 }; - static void AppendInt64(char * pBuffer, uint32_t* pLen, uint64_t value) { char localBuffer[20]; @@ -273,12 +257,6 @@ static void UninitDLL() AppendInt64(buffer, &len, g_startupTimelineEvents[GC_INIT_COMPLETE]); AppendInt64(buffer, &len, g_startupTimelineEvents[PROCESS_ATTACH_COMPLETE]); - for (int i = 0; i < g_registerModuleCount; i++) - { - AppendInt64(buffer, &len, g_registerModuleTraces[i].Begin.QuadPart); - AppendInt64(buffer, &len, g_registerModuleTraces[i].End.QuadPart); - } - buffer[len++] = '\n'; fwrite(buffer, len, 1, stdout); @@ -372,6 +350,10 @@ extern "C" bool RhInitialize(bool isDll) #endif #if defined(HOST_WINDOWS) || defined(FEATURE_PERFTRACING) +#if defined(DEBUG) && defined(HOST_WINDOWS) + // quick_exit works around Debug UCRT shutdown issues: https://github.com/dotnet/runtime/issues/108640 + at_quick_exit(&OnProcessExit); +#endif atexit(&OnProcessExit); #endif diff --git a/src/coreclr/nativeaot/Runtime/stressLog.cpp b/src/coreclr/nativeaot/Runtime/stressLog.cpp index 5b165a24332c..9253a9eb0e87 100644 --- a/src/coreclr/nativeaot/Runtime/stressLog.cpp +++ b/src/coreclr/nativeaot/Runtime/stressLog.cpp @@ -13,15 +13,14 @@ #endif // DACCESS_COMPILE #include "CommonTypes.h" #include "CommonMacros.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "daccess.h" #include "stressLog.h" #include "holder.h" #include "Crst.h" #include "rhassert.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" @@ -30,6 +29,7 @@ #include "threadstore.inl" #include "thread.inl" #include "volatile.h" +#include "minipal/time.h" #ifdef STRESS_LOG @@ -64,7 +64,7 @@ uint64_t getTimeStamp() #else // HOST_X86 uint64_t getTimeStamp() { - return PalQueryPerformanceCounter(); + return (uint64_t)minipal_hires_ticks(); } #endif // HOST_X86 else @@ -75,7 +75,7 @@ uint64_t getTimeStamp() */ uint64_t getTickFrequency() { - return PalQueryPerformanceFrequency(); + return (uint64_t)minipal_hires_tick_frequency(); } #endif // DACCESS_COMPILE @@ -472,7 +472,7 @@ inline void ThreadStressLog::Activate (Thread * /*pThread*/) // a previous record. Update curPtr to reflect the last safe beginning of a record, // but curPtr shouldn't wrap around, otherwise it'll break our assumptions about stress // log - curPtr = (StressMsg*)((char*)curPtr - StressMsg::maxMsgSize()); + curPtr = (StressMsg*)((char*)curPtr - StressMsg::maxMsgSize); if (curPtr < (StressMsg*)curWriteChunk->StartPtr()) { curPtr = (StressMsg *)curWriteChunk->StartPtr(); diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 3c471751f244..e924489b70d7 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -2,20 +2,16 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "common.h" -#ifdef HOST_WINDOWS -#include -#endif #include "gcenv.h" #include "gcheaputilities.h" #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" @@ -31,6 +27,7 @@ #include "RhConfig.h" #include "GcEnum.h" #include "NativeContext.h" +#include "minipal/time.h" #ifndef DACCESS_COMPILE @@ -41,7 +38,7 @@ static Thread* g_RuntimeInitializingThread; ee_alloc_context::PerThreadRandom::PerThreadRandom() { - minipal_xoshiro128pp_init(&random_state, (uint32_t)PalGetTickCount64()); + minipal_xoshiro128pp_init(&random_state, (uint32_t)minipal_lowres_ticks()); } thread_local ee_alloc_context::PerThreadRandom ee_alloc_context::t_random = PerThreadRandom(); @@ -71,6 +68,12 @@ PInvokeTransitionFrame* Thread::GetTransitionFrameForStackTrace() return m_pDeferredTransitionFrame; } +PInvokeTransitionFrame* Thread::GetTransitionFrameForSampling() +{ + CrossThreadUnhijack(); + return GetTransitionFrame(); +} + void Thread::WaitForGC(PInvokeTransitionFrame* pTransitionFrame) { ASSERT(!IsDoNotTriggerGcSet()); @@ -917,19 +920,19 @@ void Thread::Unhijack() } // This unhijack routine is called to undo a hijack, that is potentially on a different thread. -// +// // Although there are many code sequences (here and in asm) to // perform an unhijack operation, they will never execute concurrently: -// +// // - A thread may unhijack itself at any time so long as it does that from unmanaged code while in coop mode. // This ensures that coop thread can access its stack synchronously. // Unhijacking from unmanaged code ensures that another thread will not attempt to hijack it, // since we only hijack threads that are executing managed code. -// +// // - A GC thread may access a thread asynchronously, including unhijacking it. // Asynchronously accessed thread must be in preemptive mode and should not // access the managed portion of its stack. -// +// // - A thread that owns the suspension can access another thread as long as the other thread is // in preemptive mode or suspended in managed code. // Either way the other thread cannot be accessing its hijack. @@ -1107,7 +1110,10 @@ bool Thread::IsDetached() void Thread::SetDetached() { ASSERT(!IsStateSet(TSF_Detached)); + ASSERT(IsStateSet(TSF_Attached)); + SetState(TSF_Detached); + ClearState(TSF_Attached); } bool Thread::IsActivationPending() @@ -1243,7 +1249,10 @@ void Thread::EnsureRuntimeInitialized() if (g_RuntimeInitializationCallback != NULL) { if (g_RuntimeInitializationCallback() != 0) + { + PalPrintFatalError("\nFatal error. .NET runtime failed to initialize.\n"); RhFailFast(); + } g_RuntimeInitializationCallback = NULL; } diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 47e28c6321ac..9818f958ebdf 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -193,7 +193,9 @@ class Thread : private RuntimeThreadLocals { TSF_Unknown = 0x00000000, // Threads are created in this state TSF_Attached = 0x00000001, // Thread was inited by first U->M transition on this thread - TSF_Detached = 0x00000002, // Thread was detached by DllMain + // ...Prior to setting this bit the state is TSF_Unknown. + TSF_Detached = 0x00000002, // Thread was detached and no longer can run managed code. + // ...TSF_Attached is cleared when TSF_Detached is set. TSF_SuppressGcStress = 0x00000008, // Do not allow gc stress on this thread, used in DllMain // ...and on the Finalizer thread TSF_DoNotTriggerGc = 0x00000010, // Do not allow hijacking of this thread, also intended to @@ -326,6 +328,7 @@ class Thread : private RuntimeThreadLocals bool IsCurrentThreadInCooperativeMode(); PInvokeTransitionFrame* GetTransitionFrameForStackTrace(); + PInvokeTransitionFrame* GetTransitionFrameForSampling(); void * GetCurrentThreadPInvokeReturnAddress(); // diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 33d882f340fe..2dc0c1a64d97 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -6,11 +6,10 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" -#include "varint.h" #include "regdisplay.h" #include "StackFrameIterator.h" #include "thread.h" @@ -110,9 +109,16 @@ void ThreadStore::AttachCurrentThread(bool fAcquireThreadStoreLock) // we want to avoid at construction time because the loader lock is held then. Thread * pAttachingThread = RawGetCurrentThread(); - // The thread was already initialized, so it is already attached + if (pAttachingThread->IsDetached()) + { + ASSERT_UNCONDITIONALLY("Attempt to execute managed code after the .NET runtime thread state has been destroyed."); + RhFailFast(); + } + + // The thread was already initialized, so it is already attached. if (pAttachingThread->IsInitialized()) { + ASSERT((pAttachingThread->m_ThreadStateFlags & Thread::TSF_Attached) != 0); return; } @@ -156,12 +162,8 @@ void ThreadStore::DetachCurrentThread() return; } - // Unregister from OS notifications - // This can return false if detach notification is spurious and does not belong to this thread. - if (!PalDetachThread(pDetachingThread)) - { - return; - } + // detach callback should not call us twice + ASSERT(!pDetachingThread->IsDetached()); // Run pre-mortem callbacks while we still can run managed code and not holding locks. // NOTE: background GC threads are attached/suspendable threads, but should not run ordinary @@ -301,7 +303,7 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent) } } -#if defined(TARGET_ARM) || defined(TARGET_ARM64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Flush the store buffers on all CPUs, to ensure that all changes made so far are seen // by the GC threads. This only matters on weak memory ordered processors as // the strong memory ordered processors wouldn't have reordered the relevant writes. @@ -309,7 +311,7 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent) // left alone by suspension to flush their writes that they made before they switched to // preemptive mode. PalFlushProcessWriteBuffers(); -#endif //TARGET_ARM || TARGET_ARM64 +#endif //TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 } void ThreadStore::ResumeAllThreads(bool waitForGCEvent) @@ -320,7 +322,7 @@ void ThreadStore::ResumeAllThreads(bool waitForGCEvent) } END_FOREACH_THREAD -#if defined(TARGET_ARM) || defined(TARGET_ARM64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Flush the store buffers on all CPUs, to ensure that they all see changes made // by the GC threads. This only matters on weak memory ordered processors as // the strong memory ordered processors wouldn't have reordered the relevant reads. @@ -328,7 +330,7 @@ void ThreadStore::ResumeAllThreads(bool waitForGCEvent) // the runtime was suspended and that will return to cooperative mode after the runtime // is restarted. PalFlushProcessWriteBuffers(); -#endif //TARGET_ARM || TARGET_ARM64 +#endif //TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 RhpTrapThreads &= ~(uint32_t)TrapThreadsFlags::TrapThreads; diff --git a/src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp b/src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp index a9385c408288..44a0b068a828 100644 --- a/src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp @@ -2,7 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "CommonTypes.h" -#include "PalRedhawkCommon.h" +#include "Pal.h" +#include "PalLimitedContext.h" #include "CommonMacros.h" #include "config.h" #include "daccess.h" @@ -23,9 +24,6 @@ #error Cannot handle hardware exceptions on this platform #endif -#define REDHAWK_PALEXPORT extern "C" -#define REDHAWK_PALAPI - #define EXCEPTION_ACCESS_VIOLATION 0xC0000005u #define EXCEPTION_DATATYPE_MISALIGNMENT 0x80000002u #define EXCEPTION_BREAKPOINT 0x80000003u @@ -626,7 +624,7 @@ bool InitializeHardwareExceptionHandling() } // Set hardware exception handler -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler) +void PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler) { ASSERT_MSG(g_hardwareExceptionHandler == NULL, "Hardware exception handler already set") g_hardwareExceptionHandler = handler; diff --git a/src/coreclr/nativeaot/Runtime/unix/NativeContext.cpp b/src/coreclr/nativeaot/Runtime/unix/NativeContext.cpp index c7512e018f57..d1a6b3000546 100644 --- a/src/coreclr/nativeaot/Runtime/unix/NativeContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/NativeContext.cpp @@ -6,7 +6,7 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" +#include "PalLimitedContext.h" #include "regdisplay.h" #include "config.h" @@ -816,7 +816,9 @@ uint64_t GetPC(void* context) #elif TARGET_LOONGARCH64 uint64_t& NATIVE_CONTEXT::R0() { return (uint64_t&)MCREG_R0(ctx.uc_mcontext); } + uint64_t& NATIVE_CONTEXT::Ra() { return (uint64_t&)MCREG_Ra(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R2() { return (uint64_t&)MCREG_Tp(ctx.uc_mcontext); } + uint64_t& NATIVE_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R4() { return (uint64_t&)MCREG_A0(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R5() { return (uint64_t&)MCREG_A1(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R6() { return (uint64_t&)MCREG_A2(ctx.uc_mcontext); } @@ -835,6 +837,7 @@ uint64_t GetPC(void* context) uint64_t& NATIVE_CONTEXT::R19() { return (uint64_t&)MCREG_T7(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R20() { return (uint64_t&)MCREG_T8(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R21() { return (uint64_t&)MCREG_X0(ctx.uc_mcontext); } + uint64_t& NATIVE_CONTEXT::Fp() { return (uint64_t&)MCREG_Fp(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R23() { return (uint64_t&)MCREG_S0(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R24() { return (uint64_t&)MCREG_S1(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R25() { return (uint64_t&)MCREG_S2(ctx.uc_mcontext); } @@ -844,9 +847,6 @@ uint64_t GetPC(void* context) uint64_t& NATIVE_CONTEXT::R29() { return (uint64_t&)MCREG_S6(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R30() { return (uint64_t&)MCREG_S7(ctx.uc_mcontext); } uint64_t& NATIVE_CONTEXT::R31() { return (uint64_t&)MCREG_S8(ctx.uc_mcontext); } - uint64_t& NATIVE_CONTEXT::Fp() { return (uint64_t&)MCREG_Fp(ctx.uc_mcontext); } // R22 - uint64_t& NATIVE_CONTEXT::Ra() { return (uint64_t&)MCREG_Ra(ctx.uc_mcontext); } // R1 - uint64_t& NATIVE_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } // R3 uint64_t& NATIVE_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } #elif TARGET_RISCV64 diff --git a/src/coreclr/nativeaot/Runtime/unix/NativeContext.h b/src/coreclr/nativeaot/Runtime/unix/NativeContext.h index 0c16ea872e96..a670d9d30702 100644 --- a/src/coreclr/nativeaot/Runtime/unix/NativeContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/NativeContext.h @@ -175,7 +175,9 @@ struct NATIVE_CONTEXT #elif defined(TARGET_LOONGARCH64) uint64_t& R0(); + uint64_t& Ra(); uint64_t& R2(); + uint64_t& Sp(); uint64_t& R4(); uint64_t& R5(); uint64_t& R6(); @@ -194,6 +196,7 @@ struct NATIVE_CONTEXT uint64_t& R19(); uint64_t& R20(); uint64_t& R21(); + uint64_t& Fp(); uint64_t& R23(); uint64_t& R24(); uint64_t& R25(); @@ -203,9 +206,6 @@ struct NATIVE_CONTEXT uint64_t& R29(); uint64_t& R30(); uint64_t& R31(); - uint64_t& Fp(); // R22 - uint64_t& Ra(); // R1 - uint64_t& Sp(); // R3 uint64_t& Pc(); uintptr_t GetIp() { return (uintptr_t)Pc(); } @@ -219,7 +219,7 @@ struct NATIVE_CONTEXT ASSERT(&R4() + 1 == &R5()); ASSERT(&R4() + 10 == &R14()); - for (uint64_t* pReg = &R0(); pReg <= &R31(); pReg++) + for (uint64_t* pReg = &Ra(); pReg <= &R31(); pReg++) lambda((size_t*)pReg); // Ra can be used as a scratch register diff --git a/src/coreclr/nativeaot/Runtime/unix/PalInline.h b/src/coreclr/nativeaot/Runtime/unix/PalInline.h new file mode 100644 index 000000000000..7d64c1e1cd56 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/unix/PalInline.h @@ -0,0 +1,183 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Implementation of NativeAOT PAL inline functions + +#include + +FORCEINLINE void PalInterlockedOperationBarrier() +{ +#if (defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT) && !defined(__clang__)) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) + // On arm64, most of the __sync* functions generate a code sequence like: + // loop: + // ldaxr (load acquire exclusive) + // ... + // stlxr (store release exclusive) + // cbnz loop + // + // It is possible for a load following the code sequence above to be reordered to occur prior to the store above due to the + // release barrier, this is substantiated by https://github.com/dotnet/coreclr/pull/17508. Interlocked operations in the PAL + // require the load to occur after the store. This memory barrier should be used following a call to a __sync* function to + // prevent that reordering. Code generated for arm32 includes a 'dmb' after 'cbnz', so no issue there at the moment. + __sync_synchronize(); +#endif +} + +FORCEINLINE int32_t PalInterlockedIncrement(_Inout_ int32_t volatile *pDst) +{ + int32_t result = __sync_add_and_fetch(pDst, 1); + PalInterlockedOperationBarrier(); + return result; +} + +FORCEINLINE int64_t PalInterlockedIncrement64(_Inout_ int64_t volatile *pDst) +{ + int64_t result = __sync_add_and_fetch(pDst, 1); + PalInterlockedOperationBarrier(); + return result; +} + +FORCEINLINE int32_t PalInterlockedDecrement(_Inout_ int32_t volatile *pDst) +{ + int32_t result = __sync_sub_and_fetch(pDst, 1); + PalInterlockedOperationBarrier(); + return result; +} + +FORCEINLINE uint32_t PalInterlockedOr(_Inout_ uint32_t volatile *pDst, uint32_t iValue) +{ + int32_t result = __sync_or_and_fetch(pDst, iValue); + PalInterlockedOperationBarrier(); + return result; +} + +FORCEINLINE uint32_t PalInterlockedAnd(_Inout_ uint32_t volatile *pDst, uint32_t iValue) +{ + int32_t result = __sync_and_and_fetch(pDst, iValue); + PalInterlockedOperationBarrier(); + return result; +} + +FORCEINLINE int32_t PalInterlockedExchange(_Inout_ int32_t volatile *pDst, int32_t iValue) +{ +#ifdef __clang__ + int32_t result =__sync_swap(pDst, iValue); +#else + int32_t result =__atomic_exchange_n(pDst, iValue, __ATOMIC_ACQ_REL); +#endif + PalInterlockedOperationBarrier(); + return result; +} + +FORCEINLINE int64_t PalInterlockedExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue) +{ +#ifdef __clang__ + int32_t result =__sync_swap(pDst, iValue); +#else + int32_t result =__atomic_exchange_n(pDst, iValue, __ATOMIC_ACQ_REL); +#endif + PalInterlockedOperationBarrier(); + return result; +} + +FORCEINLINE int32_t PalInterlockedCompareExchange(_Inout_ int32_t volatile *pDst, int32_t iValue, int32_t iComparand) +{ + int32_t result = __sync_val_compare_and_swap(pDst, iComparand, iValue); + PalInterlockedOperationBarrier(); + return result; +} + +FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue, int64_t iComparand) +{ + int64_t result = __sync_val_compare_and_swap(pDst, iComparand, iValue); + PalInterlockedOperationBarrier(); + return result; +} + +#if defined(HOST_64BIT) +FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; + + // TODO-LOONGARCH64: the 128-bit CAS is supported starting from the 3A6000 CPU (ISA1.1). + // When running on older hardware that doesn't support native CAS-128, the system falls back + // to a mutex-based approach via libatomic, which is not suitable for runtime requirements. + // + // TODO-RISCV64: double-check if libatomic's emulated CAS-128 works as expected once AOT applications are + // functional on linux-riscv64: https://github.com/dotnet/runtime/issues/106223. + // CAS-128 is natively supported starting with the Zacas extension in Linux 6.8; however, hardware support + // for RVA23 profile is not available at the time of writing. + // + // See https://github.com/dotnet/runtime/issues/109276. + + __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); + PalInterlockedOperationBarrier(); + pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); + return iComparand == iResult; +} +#endif // HOST_64BIT + +#ifdef HOST_64BIT + +#define PalInterlockedExchangePointer(_pDst, _pValue) \ + ((void *)PalInterlockedExchange64((int64_t volatile *)(_pDst), (int64_t)(size_t)(_pValue))) + +#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ + ((void *)PalInterlockedCompareExchange64((int64_t volatile *)(_pDst), (int64_t)(size_t)(_pValue), (int64_t)(size_t)(_pComparand))) + +#else // HOST_64BIT + +#define PalInterlockedExchangePointer(_pDst, _pValue) \ + ((void *)PalInterlockedExchange((int32_t volatile *)(_pDst), (int32_t)(size_t)(_pValue))) + +#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ + ((void *)PalInterlockedCompareExchange((int32_t volatile *)(_pDst), (int32_t)(size_t)(_pValue), (int32_t)(size_t)(_pComparand))) + +#endif // HOST_64BIT + + +FORCEINLINE void PalYieldProcessor() +{ +#if defined(HOST_X86) || defined(HOST_AMD64) + __asm__ __volatile__( + "rep\n" + "nop" + ); +#elif defined(HOST_ARM64) + __asm__ __volatile__( + "dmb ishst\n" + "yield" + ); +#endif +} + +FORCEINLINE void PalMemoryBarrier() +{ + __sync_synchronize(); +} + +#define PalDebugBreak() abort() + +FORCEINLINE int32_t PalGetLastError() +{ + return errno; +} + +FORCEINLINE void PalSetLastError(int32_t error) +{ + errno = error; +} + +FORCEINLINE int32_t PalOsPageSize() +{ +#if defined(HOST_AMD64) + // all supported platforms use 4K pages on x64, including emulated environments + return 0x1000; +#elif defined(HOST_APPLE) + // OSX and related OS expose 16-kilobyte pages to the 64-bit userspace + // https://developer.apple.com/library/archive/documentation/Performance/Conceptual/ManagingMemory/Articles/AboutMemory.html + return 0x4000; +#else + return PalGetOsPageSize(); +#endif +} diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h deleted file mode 100644 index 2bea01616f01..000000000000 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h +++ /dev/null @@ -1,183 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// Implementation of Redhawk PAL inline functions - -#include - -FORCEINLINE void PalInterlockedOperationBarrier() -{ -#if (defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT) && !defined(__clang__)) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) - // On arm64, most of the __sync* functions generate a code sequence like: - // loop: - // ldaxr (load acquire exclusive) - // ... - // stlxr (store release exclusive) - // cbnz loop - // - // It is possible for a load following the code sequence above to be reordered to occur prior to the store above due to the - // release barrier, this is substantiated by https://github.com/dotnet/coreclr/pull/17508. Interlocked operations in the PAL - // require the load to occur after the store. This memory barrier should be used following a call to a __sync* function to - // prevent that reordering. Code generated for arm32 includes a 'dmb' after 'cbnz', so no issue there at the moment. - __sync_synchronize(); -#endif -} - -FORCEINLINE int32_t PalInterlockedIncrement(_Inout_ int32_t volatile *pDst) -{ - int32_t result = __sync_add_and_fetch(pDst, 1); - PalInterlockedOperationBarrier(); - return result; -} - -FORCEINLINE int64_t PalInterlockedIncrement64(_Inout_ int64_t volatile *pDst) -{ - int64_t result = __sync_add_and_fetch(pDst, 1); - PalInterlockedOperationBarrier(); - return result; -} - -FORCEINLINE int32_t PalInterlockedDecrement(_Inout_ int32_t volatile *pDst) -{ - int32_t result = __sync_sub_and_fetch(pDst, 1); - PalInterlockedOperationBarrier(); - return result; -} - -FORCEINLINE uint32_t PalInterlockedOr(_Inout_ uint32_t volatile *pDst, uint32_t iValue) -{ - int32_t result = __sync_or_and_fetch(pDst, iValue); - PalInterlockedOperationBarrier(); - return result; -} - -FORCEINLINE uint32_t PalInterlockedAnd(_Inout_ uint32_t volatile *pDst, uint32_t iValue) -{ - int32_t result = __sync_and_and_fetch(pDst, iValue); - PalInterlockedOperationBarrier(); - return result; -} - -FORCEINLINE int32_t PalInterlockedExchange(_Inout_ int32_t volatile *pDst, int32_t iValue) -{ -#ifdef __clang__ - int32_t result =__sync_swap(pDst, iValue); -#else - int32_t result =__atomic_exchange_n(pDst, iValue, __ATOMIC_ACQ_REL); -#endif - PalInterlockedOperationBarrier(); - return result; -} - -FORCEINLINE int64_t PalInterlockedExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue) -{ -#ifdef __clang__ - int32_t result =__sync_swap(pDst, iValue); -#else - int32_t result =__atomic_exchange_n(pDst, iValue, __ATOMIC_ACQ_REL); -#endif - PalInterlockedOperationBarrier(); - return result; -} - -FORCEINLINE int32_t PalInterlockedCompareExchange(_Inout_ int32_t volatile *pDst, int32_t iValue, int32_t iComparand) -{ - int32_t result = __sync_val_compare_and_swap(pDst, iComparand, iValue); - PalInterlockedOperationBarrier(); - return result; -} - -FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue, int64_t iComparand) -{ - int64_t result = __sync_val_compare_and_swap(pDst, iComparand, iValue); - PalInterlockedOperationBarrier(); - return result; -} - -#if defined(HOST_64BIT) -FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) -{ - __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; - - // TODO-LOONGARCH64: the 128-bit CAS is supported starting from the 3A6000 CPU (ISA1.1). - // When running on older hardware that doesn't support native CAS-128, the system falls back - // to a mutex-based approach via libatomic, which is not suitable for runtime requirements. - // - // TODO-RISCV64: double-check if libatomic's emulated CAS-128 works as expected once AOT applications are - // functional on linux-riscv64: https://github.com/dotnet/runtime/issues/106223. - // CAS-128 is natively supported starting with the Zacas extension in Linux 6.8; however, hardware support - // for RVA23 profile is not available at the time of writing. - // - // See https://github.com/dotnet/runtime/issues/109276. - - __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); - PalInterlockedOperationBarrier(); - pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); - return iComparand == iResult; -} -#endif // HOST_64BIT - -#ifdef HOST_64BIT - -#define PalInterlockedExchangePointer(_pDst, _pValue) \ - ((void *)PalInterlockedExchange64((int64_t volatile *)(_pDst), (int64_t)(size_t)(_pValue))) - -#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ - ((void *)PalInterlockedCompareExchange64((int64_t volatile *)(_pDst), (int64_t)(size_t)(_pValue), (int64_t)(size_t)(_pComparand))) - -#else // HOST_64BIT - -#define PalInterlockedExchangePointer(_pDst, _pValue) \ - ((void *)PalInterlockedExchange((int32_t volatile *)(_pDst), (int32_t)(size_t)(_pValue))) - -#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ - ((void *)PalInterlockedCompareExchange((int32_t volatile *)(_pDst), (int32_t)(size_t)(_pValue), (int32_t)(size_t)(_pComparand))) - -#endif // HOST_64BIT - - -FORCEINLINE void PalYieldProcessor() -{ -#if defined(HOST_X86) || defined(HOST_AMD64) - __asm__ __volatile__( - "rep\n" - "nop" - ); -#elif defined(HOST_ARM64) - __asm__ __volatile__( - "dmb ishst\n" - "yield" - ); -#endif -} - -FORCEINLINE void PalMemoryBarrier() -{ - __sync_synchronize(); -} - -#define PalDebugBreak() abort() - -FORCEINLINE int32_t PalGetLastError() -{ - return errno; -} - -FORCEINLINE void PalSetLastError(int32_t error) -{ - errno = error; -} - -FORCEINLINE int32_t PalOsPageSize() -{ -#if defined(HOST_AMD64) - // all supported platforms use 4K pages on x64, including emulated environments - return 0x1000; -#elif defined(HOST_APPLE) - // OSX and related OS expose 16-kilobyte pages to the 64-bit userspace - // https://developer.apple.com/library/archive/documentation/Performance/Conceptual/ManagingMemory/Articles/AboutMemory.html - return 0x4000; -#else - return PalGetOsPageSize(); -#endif -} diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp deleted file mode 100644 index 5456ed029459..000000000000 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ /dev/null @@ -1,1251 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// -// Implementation of the Redhawk Platform Abstraction Layer (PAL) library when Unix is the platform. -// - -#include -#include -#include -#include -#include "config.h" -#include -#include "gcenv.h" -#include "gcenv.ee.h" -#include "gcconfig.h" -#include "holder.h" -#include "UnixSignals.h" -#include "NativeContext.h" -#include "HardwareExceptions.h" -#include "PalCreateDump.h" -#include "cgroupcpu.h" -#include "threadstore.h" -#include "thread.h" -#include "threadstore.inl" - -#define _T(s) s -#include "RhConfig.h" - -#include -#include -#include -#ifndef TARGET_WASI // no dynamic linking in Wasi -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef TARGET_LINUX -#include -#endif - -#if HAVE_PTHREAD_GETTHREADID_NP -#include -#endif - -#if HAVE_LWP_SELF -#include -#endif - -#if HAVE_CLOCK_GETTIME_NSEC_NP -#include -#endif - -#ifdef TARGET_APPLE -#include -#endif - -#ifndef HOST_WASM -#include -#endif - -#ifdef HOST_WASM -#include "wasm/PalRedhawkWasm.h" -#endif - -#ifdef TARGET_HAIKU -#include -#endif - -using std::nullptr_t; - -#define PalRaiseFailFastException RaiseFailFastException - -#define INVALID_HANDLE_VALUE ((HANDLE)(intptr_t)-1) - -#define PAGE_NOACCESS 0x01 -#define PAGE_READWRITE 0x04 -#define PAGE_EXECUTE_READ 0x20 -#define PAGE_EXECUTE_READWRITE 0x40 - -#define WAIT_OBJECT_0 0 -#define WAIT_TIMEOUT 258 -#define WAIT_FAILED 0xFFFFFFFF - -static const int tccSecondsToMilliSeconds = 1000; -static const int tccSecondsToMicroSeconds = 1000000; -static const int tccSecondsToNanoSeconds = 1000000000; -static const int tccMilliSecondsToMicroSeconds = 1000; -static const int tccMilliSecondsToNanoSeconds = 1000000; -static const int tccMicroSecondsToNanoSeconds = 1000; - -extern "C" void RaiseFailFastException(PEXCEPTION_RECORD arg1, PCONTEXT arg2, uint32_t arg3) -{ - // Causes creation of a crash dump if enabled - PalCreateCrashDumpIfEnabled(); - - // Aborts the process - abort(); -} - -static void UnmaskActivationSignal() -{ -#ifndef HOST_WASM - sigset_t signal_set; - sigemptyset(&signal_set); - sigaddset(&signal_set, INJECT_ACTIVATION_SIGNAL); - - int sigmaskRet = pthread_sigmask(SIG_UNBLOCK, &signal_set, NULL); - _ASSERTE(sigmaskRet == 0); -#endif -} - -static void TimeSpecAdd(timespec* time, uint32_t milliseconds) -{ - uint64_t nsec = time->tv_nsec + (uint64_t)milliseconds * tccMilliSecondsToNanoSeconds; - if (nsec >= tccSecondsToNanoSeconds) - { - time->tv_sec += nsec / tccSecondsToNanoSeconds; - nsec %= tccSecondsToNanoSeconds; - } - - time->tv_nsec = nsec; -} - -// Convert nanoseconds to the timespec structure -// Parameters: -// nanoseconds - time in nanoseconds to convert -// t - the target timespec structure -static void NanosecondsToTimeSpec(uint64_t nanoseconds, timespec* t) -{ - t->tv_sec = nanoseconds / tccSecondsToNanoSeconds; - t->tv_nsec = nanoseconds % tccSecondsToNanoSeconds; -} - -void ReleaseCondAttr(pthread_condattr_t* condAttr) -{ - int st = pthread_condattr_destroy(condAttr); - ASSERT_MSG(st == 0, "Failed to destroy pthread_condattr_t object"); -} - -class PthreadCondAttrHolder : public Wrapper -{ -public: - PthreadCondAttrHolder(pthread_condattr_t* attrs) - : Wrapper(attrs) - { - } -}; - -class UnixEvent -{ - pthread_cond_t m_condition; - pthread_mutex_t m_mutex; - bool m_manualReset; - bool m_state; - bool m_isValid; - -public: - - UnixEvent(bool manualReset, bool initialState) - : m_manualReset(manualReset), - m_state(initialState), - m_isValid(false) - { - } - - bool Initialize() - { - pthread_condattr_t attrs; - int st = pthread_condattr_init(&attrs); - if (st != 0) - { - ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent condition attribute"); - return false; - } - - PthreadCondAttrHolder attrsHolder(&attrs); - -#if HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_CLOCK_GETTIME_NSEC_NP - // Ensure that the pthread_cond_timedwait will use CLOCK_MONOTONIC - st = pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC); - if (st != 0) - { - ASSERT_UNCONDITIONALLY("Failed to set UnixEvent condition variable wait clock"); - return false; - } -#endif // HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_CLOCK_GETTIME_NSEC_NP - - st = pthread_mutex_init(&m_mutex, NULL); - if (st != 0) - { - ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent mutex"); - return false; - } - - st = pthread_cond_init(&m_condition, &attrs); - if (st != 0) - { - ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent condition variable"); - - st = pthread_mutex_destroy(&m_mutex); - ASSERT_MSG(st == 0, "Failed to destroy UnixEvent mutex"); - return false; - } - - m_isValid = true; - - return true; - } - - bool Destroy() - { - bool success = true; - - if (m_isValid) - { - int st = pthread_mutex_destroy(&m_mutex); - ASSERT_MSG(st == 0, "Failed to destroy UnixEvent mutex"); - success = success && (st == 0); - - st = pthread_cond_destroy(&m_condition); - ASSERT_MSG(st == 0, "Failed to destroy UnixEvent condition variable"); - success = success && (st == 0); - } - - return success; - } - - uint32_t Wait(uint32_t milliseconds) - { - timespec endTime; -#if HAVE_CLOCK_GETTIME_NSEC_NP - uint64_t endNanoseconds; - if (milliseconds != INFINITE) - { - uint64_t nanoseconds = (uint64_t)milliseconds * tccMilliSecondsToNanoSeconds; - NanosecondsToTimeSpec(nanoseconds, &endTime); - endNanoseconds = clock_gettime_nsec_np(CLOCK_UPTIME_RAW) + nanoseconds; - } -#elif HAVE_PTHREAD_CONDATTR_SETCLOCK || _WASI_EMULATED_PROCESS_CLOCKS - if (milliseconds != INFINITE) - { - clock_gettime(CLOCK_MONOTONIC, &endTime); - TimeSpecAdd(&endTime, milliseconds); - } -#else -#error "Don't know how to perform timed wait on this platform" -#endif - - int st = 0; - - pthread_mutex_lock(&m_mutex); - while (!m_state) - { - if (milliseconds == INFINITE) - { - st = pthread_cond_wait(&m_condition, &m_mutex); - } - else - { -#if HAVE_CLOCK_GETTIME_NSEC_NP - // Since OSX doesn't support CLOCK_MONOTONIC, we use relative variant of the - // timed wait and we need to handle spurious wakeups properly. - st = pthread_cond_timedwait_relative_np(&m_condition, &m_mutex, &endTime); - if ((st == 0) && !m_state) - { - uint64_t currentNanoseconds = clock_gettime_nsec_np(CLOCK_UPTIME_RAW); - if (currentNanoseconds < endNanoseconds) - { - // The wake up was spurious, recalculate the relative endTime - uint64_t remainingNanoseconds = (endNanoseconds - currentNanoseconds); - NanosecondsToTimeSpec(remainingNanoseconds, &endTime); - } - else - { - // Although the timed wait didn't report a timeout, time calculated from the - // mach time shows we have already reached the end time. It can happen if - // the wait was spuriously woken up right before the timeout. - st = ETIMEDOUT; - } - } -#else // HAVE_CLOCK_GETTIME_NSEC_NP - st = pthread_cond_timedwait(&m_condition, &m_mutex, &endTime); -#endif // HAVE_CLOCK_GETTIME_NSEC_NP - } - - if (st != 0) - { - // wait failed or timed out - break; - } - } - - if ((st == 0) && !m_manualReset) - { - // Clear the state for auto-reset events so that only one waiter gets released - m_state = false; - } - - pthread_mutex_unlock(&m_mutex); - - uint32_t waitStatus; - - if (st == 0) - { - waitStatus = WAIT_OBJECT_0; - } - else if (st == ETIMEDOUT) - { - waitStatus = WAIT_TIMEOUT; - } - else - { - waitStatus = WAIT_FAILED; - } - - return waitStatus; - } - - void Set() - { - pthread_mutex_lock(&m_mutex); - m_state = true; - // Unblock all threads waiting for the condition variable - pthread_cond_broadcast(&m_condition); - pthread_mutex_unlock(&m_mutex); - } - - void Reset() - { - pthread_mutex_lock(&m_mutex); - m_state = false; - pthread_mutex_unlock(&m_mutex); - } -}; - -// This functions configures behavior of the signals that are not -// related to hardware exception handling. -void ConfigureSignals() -{ - // The default action for SIGPIPE is process termination. - // Since SIGPIPE can be signaled when trying to write on a socket for which - // the connection has been dropped, we need to tell the system we want - // to ignore this signal. - // Instead of terminating the process, the system call which would had - // issued a SIGPIPE will, instead, report an error and set errno to EPIPE. - signal(SIGPIPE, SIG_IGN); -} - -void InitializeCurrentProcessCpuCount() -{ - uint32_t count; - - // If the configuration value has been set, it takes precedence. Otherwise, take into account - // process affinity and CPU quota limit. - - const unsigned int MAX_PROCESSOR_COUNT = 0xffff; - uint64_t configValue; - - if (g_pRhConfig->ReadConfigValue("PROCESSOR_COUNT", &configValue, true /* decimal */) && - 0 < configValue && configValue <= MAX_PROCESSOR_COUNT) - { - count = configValue; - } - else - { -#if HAVE_SCHED_GETAFFINITY - - cpu_set_t cpuSet; - int st = sched_getaffinity(getpid(), sizeof(cpu_set_t), &cpuSet); - if (st != 0) - { - _ASSERTE(!"sched_getaffinity failed"); - } - - count = CPU_COUNT(&cpuSet); -#else // HAVE_SCHED_GETAFFINITY - count = GCToOSInterface::GetTotalProcessorCount(); -#endif // HAVE_SCHED_GETAFFINITY - - uint32_t cpuLimit; - if (GetCpuLimit(&cpuLimit) && cpuLimit < count) - count = cpuLimit; - } - - _ASSERTE(count > 0); - g_RhNumberOfProcessors = count; -} - -static uint32_t g_RhPageSize; - -void InitializeOsPageSize() -{ - g_RhPageSize = (uint32_t)sysconf(_SC_PAGE_SIZE); - -#if defined(HOST_AMD64) - ASSERT(g_RhPageSize == 0x1000); -#elif defined(HOST_APPLE) - ASSERT(g_RhPageSize == 0x4000); -#endif -} - -REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI PalGetOsPageSize() -{ - return g_RhPageSize; -} - -#if defined(TARGET_LINUX) || defined(TARGET_ANDROID) -static pthread_key_t key; -#endif - -#ifdef FEATURE_HIJACK -bool InitializeSignalHandling(); -#endif - -// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful -// initialization and false on failure. -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalInit() -{ -#ifndef USE_PORTABLE_HELPERS - if (!InitializeHardwareExceptionHandling()) - { - return false; - } -#endif // !USE_PORTABLE_HELPERS - - ConfigureSignals(); - - if (!PalCreateDumpInitialize()) - { - return false; - } - - GCConfig::Initialize(); - - if (!GCToOSInterface::Initialize()) - { - return false; - } - - InitializeCpuCGroup(); - - InitializeCurrentProcessCpuCount(); - - InitializeOsPageSize(); - -#ifdef FEATURE_HIJACK - if (!InitializeSignalHandling()) - { - return false; - } -#endif - -#if defined(TARGET_LINUX) || defined(TARGET_ANDROID) - if (pthread_key_create(&key, RuntimeThreadShutdown) != 0) - { - return false; - } -#endif - - return true; -} - -#if !defined(TARGET_LINUX) && !defined(TARGET_ANDROID) -struct TlsDestructionMonitor -{ - void* m_thread = nullptr; - - void SetThread(void* thread) - { - m_thread = thread; - } - - ~TlsDestructionMonitor() - { - if (m_thread != nullptr) - { - RuntimeThreadShutdown(m_thread); - } - } -}; - -// This thread local object is used to detect thread shutdown. Its destructor -// is called when a thread is being shut down. -thread_local TlsDestructionMonitor tls_destructionMonitor; -#endif - -// This thread local variable is used for delegate marshalling -DECLSPEC_THREAD intptr_t tls_thunkData; - -#ifdef FEATURE_EMULATED_TLS -EXTERN_C intptr_t* RhpGetThunkData() -{ - return &tls_thunkData; -} -#endif //FEATURE_EMULATED_TLS - -FCIMPL0(intptr_t, RhGetCurrentThunkContext) -{ - return tls_thunkData; -} -FCIMPLEND - -// Register the thread with OS to be notified when thread is about to be destroyed -// It fails fast if a different thread was already registered. -// Parameters: -// thread - thread to attach -extern "C" void PalAttachThread(void* thread) -{ -#if defined(TARGET_LINUX) || defined(TARGET_ANDROID) - if (pthread_setspecific(key, thread) != 0) - { - _ASSERTE(!"pthread_setspecific failed"); - RhFailFast(); - } -#else - tls_destructionMonitor.SetThread(thread); -#endif - - UnmaskActivationSignal(); -} - -// Detach thread from OS notifications. -// Parameters: -// thread - thread to detach -// Return: -// true if the thread was detached, false if there was no attached thread -extern "C" bool PalDetachThread(void* thread) -{ - UNREFERENCED_PARAMETER(thread); - return true; -} - -#if !defined(USE_PORTABLE_HELPERS) && !defined(FEATURE_RX_THUNKS) - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, void** newThunksOut) -{ -#ifdef TARGET_APPLE - vm_address_t addr, taddr; - vm_prot_t prot, max_prot; - kern_return_t ret; - - // Allocate two contiguous ranges of memory: the first range will contain the trampolines - // and the second range will contain their data. - do - { - ret = vm_allocate(mach_task_self(), &addr, templateSize * 2, VM_FLAGS_ANYWHERE); - } while (ret == KERN_ABORTED); - - if (ret != KERN_SUCCESS) - { - return UInt32_FALSE; - } - - do - { - ret = vm_remap( - mach_task_self(), &addr, templateSize, 0, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, - mach_task_self(), ((vm_address_t)hTemplateModule + templateRva), FALSE, &prot, &max_prot, VM_INHERIT_SHARE); - } while (ret == KERN_ABORTED); - - if (ret != KERN_SUCCESS) - { - do - { - ret = vm_deallocate(mach_task_self(), addr, templateSize * 2); - } while (ret == KERN_ABORTED); - - return UInt32_FALSE; - } - - *newThunksOut = (void*)addr; - - return UInt32_TRUE; -#else - PORTABILITY_ASSERT("UNIXTODO: Implement this function"); -#endif -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(void *pBaseAddress, size_t templateSize) -{ -#ifdef TARGET_APPLE - kern_return_t ret; - - do - { - ret = vm_deallocate(mach_task_self(), (vm_address_t)pBaseAddress, templateSize * 2); - } while (ret == KERN_ABORTED); - - return ret == KERN_SUCCESS ? UInt32_TRUE : UInt32_FALSE; -#else - PORTABILITY_ASSERT("UNIXTODO: Implement this function"); -#endif -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets( - void *virtualAddress, - int thunkSize, - int thunksPerBlock, - int thunkBlockSize, - int thunkBlocksPerMapping) -{ - int ret = mprotect( - (void*)((uintptr_t)virtualAddress + (thunkBlocksPerMapping * OS_PAGE_SIZE)), - thunkBlocksPerMapping * OS_PAGE_SIZE, - PROT_READ | PROT_WRITE); - return ret == 0 ? UInt32_TRUE : UInt32_FALSE; -} -#endif // !USE_PORTABLE_HELPERS && !FEATURE_RX_THUNKS - -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds) -{ -#if HAVE_CLOCK_NANOSLEEP - timespec endTime; - clock_gettime(CLOCK_MONOTONIC, &endTime); - TimeSpecAdd(&endTime, milliseconds); - while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &endTime, NULL) == EINTR) - { - } -#else // HAVE_CLOCK_NANOSLEEP - timespec requested; - requested.tv_sec = milliseconds / tccSecondsToMilliSeconds; - requested.tv_nsec = (milliseconds - requested.tv_sec * tccSecondsToMilliSeconds) * tccMilliSecondsToNanoSeconds; - - timespec remaining; - while (nanosleep(&requested, &remaining) == EINTR) - { - requested = remaining; - } -#endif // HAVE_CLOCK_NANOSLEEP -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI __stdcall PalSwitchToThread() -{ - // sched_yield yields to another thread in the current process. - sched_yield(); - - // The return value of sched_yield indicates the success of the call and does not tell whether a context switch happened. - // On Linux sched_yield is documented as never failing. - // Since we do not know if there was a context switch, we will just return `false`. - return false; -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled() -{ - return false; -} - -extern "C" UInt32_BOOL CloseHandle(HANDLE handle) -{ - if ((handle == NULL) || (handle == INVALID_HANDLE_VALUE)) - { - return UInt32_FALSE; - } - - UnixEvent* event = (UnixEvent*)handle; - bool success = event->Destroy(); - delete event; - - return success ? UInt32_TRUE : UInt32_FALSE; -} - -REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ const WCHAR* pName) -{ - UnixEvent* event = new (nothrow) UnixEvent(manualReset, initialState); - if (event == NULL) - { - return INVALID_HANDLE_VALUE; - } - if (!event->Initialize()) - { - delete event; - return INVALID_HANDLE_VALUE; - } - return (HANDLE)event; -} - -typedef uint32_t(__stdcall *BackgroundCallback)(_In_opt_ void* pCallbackContext); - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, UInt32_BOOL highPriority) -{ -#ifdef HOST_WASM - // No threads, so we can't start one - RhFailFast(); - return false; -#else // !HOST_WASM - pthread_attr_t attrs; - - int st = pthread_attr_init(&attrs); - ASSERT(st == 0); - - static const int NormalPriority = 0; - static const int HighestPriority = -20; - - // TODO: Figure out which scheduler to use, the default one doesn't seem to - // support per thread priorities. -#if 0 - sched_param params; - memset(¶ms, 0, sizeof(params)); - - params.sched_priority = highPriority ? HighestPriority : NormalPriority; - - // Set the priority of the thread - st = pthread_attr_setschedparam(&attrs, ¶ms); - ASSERT(st == 0); -#endif - // Create the thread as detached, that means not joinable - st = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); - ASSERT(st == 0); - - pthread_t threadId; - st = pthread_create(&threadId, &attrs, (void *(*)(void*))callback, pCallbackContext); - - int st2 = pthread_attr_destroy(&attrs); - ASSERT(st2 == 0); - - return st == 0; -#endif // !HOST_WASM -} - -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalSetCurrentThreadName(const char* name) -{ - // Ignore requests to set the main thread name because - // it causes the value returned by Process.ProcessName to change. - if ((pid_t)PalGetCurrentOSThreadId() != getpid()) - { - int setNameResult = minipal_set_thread_name(pthread_self(), name); - (void)setNameResult; // used - assert(setNameResult == 0); - } - return true; -} - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) -{ - return PalStartBackgroundWork(callback, pCallbackContext, UInt32_FALSE); -} - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) -{ - return PalStartBackgroundWork(callback, pCallbackContext, UInt32_TRUE); -} - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartEventPipeHelperThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) -{ - return PalStartBackgroundWork(callback, pCallbackContext, UInt32_FALSE); -} - -// Returns a 64-bit tick count with a millisecond resolution. It tries its best -// to return monotonically increasing counts and avoid being affected by changes -// to the system clock (either due to drift or due to explicit changes to system -// time). -REDHAWK_PALEXPORT uint64_t REDHAWK_PALAPI PalGetTickCount64() -{ - return GCToOSInterface::GetLowPrecisionTimeStamp(); -} - -REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer) -{ - HANDLE moduleHandle = NULL; - - // Emscripten's implementation of dladdr corrupts memory, - // but always returns 0 for the module handle, so just skip the call -#if !defined(HOST_WASM) - Dl_info info; - int st = dladdr(pointer, &info); - if (st != 0) - { - moduleHandle = info.dli_fbase; - } -#endif //!defined(HOST_WASM) - - return moduleHandle; -} - -REDHAWK_PALEXPORT void PalPrintFatalError(const char* message) -{ - // Write the message using lowest-level OS API available. This is used to print the stack overflow - // message, so there is not much that can be done here. - // write() has __attribute__((warn_unused_result)) in glibc, for which gcc 11+ issue `-Wunused-result` even with `(void)write(..)`, - // so we use additional NOT(!) operator to force unused-result suppression. - (void)!write(STDERR_FILENO, message, strlen(message)); -} - -REDHAWK_PALEXPORT char* PalCopyTCharAsChar(const TCHAR* toCopy) -{ - NewArrayHolder copy {new (nothrow) char[strlen(toCopy) + 1]}; - strcpy(copy, toCopy); - return copy.Extract(); -} - -#ifndef HOST_WASM -REDHAWK_PALEXPORT HANDLE PalLoadLibrary(const char* moduleName) -{ - return dlopen(moduleName, RTLD_LAZY); -} - -REDHAWK_PALEXPORT void* PalGetProcAddress(HANDLE module, const char* functionName) -{ - return dlsym(module, functionName); -} - -static int W32toUnixAccessControl(uint32_t flProtect) -{ - int prot = 0; - - switch (flProtect & 0xff) - { - case PAGE_NOACCESS: - prot = PROT_NONE; - break; - case PAGE_READWRITE: - prot = PROT_READ | PROT_WRITE; - break; - case PAGE_EXECUTE_READ: - prot = PROT_READ | PROT_EXEC; - break; - case PAGE_EXECUTE_READWRITE: - prot = PROT_READ | PROT_WRITE | PROT_EXEC; - break; - case PAGE_READONLY: - prot = PROT_READ; - break; - default: - ASSERT(false); - break; - } - return prot; -} - -REDHAWK_PALEXPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(size_t size, uint32_t protect) -{ - int unixProtect = W32toUnixAccessControl(protect); - - int flags = MAP_ANON | MAP_PRIVATE; - -#if defined(HOST_APPLE) && defined(HOST_ARM64) - if (unixProtect & PROT_EXEC) - { - flags |= MAP_JIT; - } -#endif - - return mmap(NULL, size, unixProtect, flags, -1, 0); -} - -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, size_t size) -{ - munmap(pAddress, size); -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, size_t size, uint32_t protect) -{ - int unixProtect = W32toUnixAccessControl(protect); - - // mprotect expects the address to be page-aligned - uint8_t* pPageStart = ALIGN_DOWN((uint8_t*)pAddress, OS_PAGE_SIZE); - size_t memSize = ALIGN_UP((uint8_t*)pAddress + size, OS_PAGE_SIZE) - pPageStart; - - return mprotect(pPageStart, memSize, unixProtect) == 0; -} -#endif // !HOST_WASM - -#if (defined(HOST_MACCATALYST) || defined(HOST_IOS) || defined(HOST_TVOS)) && defined(HOST_ARM64) -extern "C" void sys_icache_invalidate(const void* start, size_t len); -#endif - -REDHAWK_PALEXPORT void PalFlushInstructionCache(_In_ void* pAddress, size_t size) -{ -#if defined(__linux__) && defined(HOST_ARM) - // On Linux/arm (at least on 3.10) we found that there is a problem with __do_cache_op (arch/arm/kernel/traps.c) - // implementing cacheflush syscall. cacheflush flushes only the first page in range [pAddress, pAddress + size) - // and leaves other pages in undefined state which causes random tests failures (often due to SIGSEGV) with no particular pattern. - // - // As a workaround, we call __builtin___clear_cache on each page separately. - - uint8_t* begin = (uint8_t*)pAddress; - uint8_t* end = begin + size; - - while (begin < end) - { - uint8_t* endOrNextPageBegin = ALIGN_UP(begin + 1, OS_PAGE_SIZE); - if (endOrNextPageBegin > end) - endOrNextPageBegin = end; - - __builtin___clear_cache((char *)begin, (char *)endOrNextPageBegin); - begin = endOrNextPageBegin; - } -#elif (defined(HOST_MACCATALYST) || defined(HOST_IOS) || defined(HOST_TVOS)) && defined(HOST_ARM64) - sys_icache_invalidate (pAddress, size); -#else -#if !defined(HOST_WASM) - __builtin___clear_cache((char *)pAddress, (char *)pAddress + size); -#endif -#endif -} - -extern "C" uint32_t GetCurrentProcessId() -{ - return getpid(); -} - -extern "C" UInt32_BOOL InitializeCriticalSection(CRITICAL_SECTION * lpCriticalSection) -{ - pthread_mutexattr_t mutexAttributes; - int st = pthread_mutexattr_init(&mutexAttributes); - if (st != 0) - { - return false; - } - - st = pthread_mutexattr_settype(&mutexAttributes, PTHREAD_MUTEX_RECURSIVE); - if (st == 0) - { - st = pthread_mutex_init(&lpCriticalSection->mutex, &mutexAttributes); - } - - pthread_mutexattr_destroy(&mutexAttributes); - - return (st == 0); -} - -extern "C" UInt32_BOOL InitializeCriticalSectionEx(CRITICAL_SECTION * lpCriticalSection, uint32_t arg2, uint32_t arg3) -{ - return InitializeCriticalSection(lpCriticalSection); -} - - -extern "C" void DeleteCriticalSection(CRITICAL_SECTION * lpCriticalSection) -{ - pthread_mutex_destroy(&lpCriticalSection->mutex); -} - -extern "C" void EnterCriticalSection(CRITICAL_SECTION * lpCriticalSection) -{ - pthread_mutex_lock(&lpCriticalSection->mutex);; -} - -extern "C" void LeaveCriticalSection(CRITICAL_SECTION * lpCriticalSection) -{ - pthread_mutex_unlock(&lpCriticalSection->mutex); -} - -extern "C" UInt32_BOOL SetEvent(HANDLE event) -{ - UnixEvent* unixEvent = (UnixEvent*)event; - unixEvent->Set(); - return UInt32_TRUE; -} - -extern "C" UInt32_BOOL ResetEvent(HANDLE event) -{ - UnixEvent* unixEvent = (UnixEvent*)event; - unixEvent->Reset(); - return UInt32_TRUE; -} - -extern "C" uint32_t GetEnvironmentVariableA(const char * name, char * buffer, uint32_t size) -{ - const char* value = getenv(name); - if (value == NULL) - { - return 0; - } - - size_t valueLen = strlen(value); - if (valueLen < size) - { - strcpy(buffer, value); - return valueLen; - } - - // return required size including the null character or 0 if the size doesn't fit into uint32_t - return (valueLen < UINT32_MAX) ? (valueLen + 1) : 0; -} - -extern "C" uint16_t RtlCaptureStackBackTrace(uint32_t arg1, uint32_t arg2, void* arg3, uint32_t* arg4) -{ - // UNIXTODO: Implement this function - return 0; -} - -#ifdef FEATURE_HIJACK -static struct sigaction g_previousActivationHandler; - -static void ActivationHandler(int code, siginfo_t* siginfo, void* context) -{ - // Only accept activations from the current process - if (siginfo->si_pid == getpid() -#ifdef HOST_APPLE - // On Apple platforms si_pid is sometimes 0. It was confirmed by Apple to be expected, as the si_pid is tracked at the process level. So when multiple - // signals are in flight in the same process at the same time, it may be overwritten / zeroed. - || siginfo->si_pid == 0 -#endif - ) - { - // Make sure that errno is not modified - int savedErrNo = errno; - Thread::HijackCallback((NATIVE_CONTEXT*)context, NULL); - errno = savedErrNo; - } - - Thread* pThread = ThreadStore::GetCurrentThreadIfAvailable(); - if (pThread) - { - pThread->SetActivationPending(false); - } - - // Call the original handler when it is not ignored or default (terminate). - if (g_previousActivationHandler.sa_flags & SA_SIGINFO) - { - _ASSERTE(g_previousActivationHandler.sa_sigaction != NULL); - g_previousActivationHandler.sa_sigaction(code, siginfo, context); - } - else - { - if (g_previousActivationHandler.sa_handler != SIG_IGN && - g_previousActivationHandler.sa_handler != SIG_DFL) - { - _ASSERTE(g_previousActivationHandler.sa_handler != NULL); - g_previousActivationHandler.sa_handler(code); - } - } -} - -bool InitializeSignalHandling() -{ -#ifdef __APPLE__ - void *libSystem = dlopen("/usr/lib/libSystem.dylib", RTLD_LAZY); - if (libSystem != NULL) - { - int (*dispatch_allow_send_signals_ptr)(int) = (int (*)(int))dlsym(libSystem, "dispatch_allow_send_signals"); - if (dispatch_allow_send_signals_ptr != NULL) - { - int status = dispatch_allow_send_signals_ptr(INJECT_ACTIVATION_SIGNAL); - _ASSERTE(status == 0); - } - } - - // TODO: Once our CI tools can get upgraded to xcode >= 15.3, replace the code above by this: - // if (__builtin_available(macOS 14.4, iOS 17.4, tvOS 17.4, *)) - // { - // // Allow sending the activation signal to dispatch queue threads - // int status = dispatch_allow_send_signals(INJECT_ACTIVATION_SIGNAL); - // _ASSERTE(status == 0); - // } -#endif // __APPLE__ - - return AddSignalHandler(INJECT_ACTIVATION_SIGNAL, ActivationHandler, &g_previousActivationHandler); -} - -REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(HijackFunc* defaultHijackTarget) -{ - return defaultHijackTarget; -} - -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(Thread* pThreadToHijack) -{ - pThreadToHijack->SetActivationPending(true); - - int status = pthread_kill(pThreadToHijack->GetOSThreadHandle(), INJECT_ACTIVATION_SIGNAL); - - // We can get EAGAIN when printing stack overflow stack trace and when other threads hit - // stack overflow too. Those are held in the sigsegv_handler with blocked signals until - // the process exits. - // ESRCH may happen on some OSes when the thread is exiting. - if ((status == EAGAIN) - || (status == ESRCH) -#ifdef __APPLE__ - // On Apple, pthread_kill is not allowed to be sent to dispatch queue threads on macOS older than 14.4 or iOS/tvOS older than 17.4 - || (status == ENOTSUP) -#endif - ) - { - pThreadToHijack->SetActivationPending(false); - return; - } - - if (status != 0) - { - // Causes creation of a crash dump if enabled - PalCreateCrashDumpIfEnabled(); - - // Failure to send the signal is fatal. There are only two cases when sending - // the signal can fail. First, if the signal ID is invalid and second, - // if the thread doesn't exist anymore. - abort(); - } -} -#endif // FEATURE_HIJACK - -extern "C" uint32_t WaitForSingleObjectEx(HANDLE handle, uint32_t milliseconds, UInt32_BOOL alertable) -{ - UnixEvent* unixEvent = (UnixEvent*)handle; - return unixEvent->Wait(milliseconds); -} - -REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t handleCount, HANDLE* pHandles, UInt32_BOOL allowReentrantWait) -{ - // Only a single handle wait for event is supported - ASSERT(handleCount == 1); - - return WaitForSingleObjectEx(pHandles[0], timeout, alertable); -} - -REDHAWK_PALEXPORT HANDLE PalCreateLowMemoryResourceNotification() -{ - return NULL; -} - -#if !__has_builtin(_mm_pause) -extern "C" void _mm_pause() -// Defined for implementing PalYieldProcessor in PalRedhawk.h -{ -#if defined(HOST_AMD64) || defined(HOST_X86) - __asm__ volatile ("pause"); -#endif -} -#endif - -extern "C" int32_t _stricmp(const char *string1, const char *string2) -{ - return strcasecmp(string1, string2); -} - -uint32_t g_RhNumberOfProcessors; - -REDHAWK_PALEXPORT int32_t PalGetProcessCpuCount() -{ - ASSERT(g_RhNumberOfProcessors > 0); - return g_RhNumberOfProcessors; -} - -__thread void* pStackHighOut = NULL; -__thread void* pStackLowOut = NULL; - -// Retrieves the entire range of memory dedicated to the calling thread's stack. This does -// not get the current dynamic bounds of the stack, which can be significantly smaller than -// the maximum bounds. -REDHAWK_PALEXPORT bool PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut) -{ - if (pStackHighOut == NULL) - { -#if defined(HOST_WASM) && !defined(FEATURE_WASM_MANAGED_THREADS) - PalGetMaximumStackBounds_SingleThreadedWasm(&pStackLowOut, &pStackHighOut); -#elif defined(__APPLE__) - // This is a Mac specific method - pStackHighOut = pthread_get_stackaddr_np(pthread_self()); - pStackLowOut = ((uint8_t *)pStackHighOut - pthread_get_stacksize_np(pthread_self())); -#else // __APPLE__ - pthread_attr_t attr; - size_t stackSize; - int status; - - pthread_t thread = pthread_self(); - - status = pthread_attr_init(&attr); - ASSERT_MSG(status == 0, "pthread_attr_init call failed"); - -#if HAVE_PTHREAD_ATTR_GET_NP - status = pthread_attr_get_np(thread, &attr); -#elif HAVE_PTHREAD_GETATTR_NP - status = pthread_getattr_np(thread, &attr); -#else -#error Dont know how to get thread attributes on this platform! -#endif - ASSERT_MSG(status == 0, "pthread_getattr_np call failed"); - - status = pthread_attr_getstack(&attr, &pStackLowOut, &stackSize); - ASSERT_MSG(status == 0, "pthread_attr_getstack call failed"); - - status = pthread_attr_destroy(&attr); - ASSERT_MSG(status == 0, "pthread_attr_destroy call failed"); - - pStackHighOut = (uint8_t*)pStackLowOut + stackSize; -#endif // __APPLE__ - } - - *ppStackLowOut = pStackLowOut; - *ppStackHighOut = pStackHighOut; - - return true; -} - -// retrieves the full path to the specified module, if moduleBase is NULL retreieves the full path to the -// executable module of the current process. -// -// Return value: number of characters in name string -// -REDHAWK_PALEXPORT int32_t PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase) -{ -#if defined(HOST_WASM) - // Emscripten's implementation of dladdr corrupts memory and doesn't have the real name, so make up a name instead - const TCHAR* wasmModuleName = "WebAssemblyModule"; - *pModuleNameOut = wasmModuleName; - return strlen(wasmModuleName); -#else // HOST_WASM - Dl_info dl; - if (dladdr(moduleBase, &dl) == 0) - { - *pModuleNameOut = NULL; - return 0; - } - - *pModuleNameOut = dl.dli_fname; - return strlen(dl.dli_fname); -#endif // defined(HOST_WASM) -} - -extern "C" void FlushProcessWriteBuffers() -{ - GCToOSInterface::FlushProcessWriteBuffers(); -} - -static const int64_t SECS_BETWEEN_1601_AND_1970_EPOCHS = 11644473600LL; -static const int64_t SECS_TO_100NS = 10000000; /* 10^7 */ - -extern "C" void GetSystemTimeAsFileTime(FILETIME *lpSystemTimeAsFileTime) -{ - struct timeval time = { 0 }; - gettimeofday(&time, NULL); - - int64_t result = ((int64_t)time.tv_sec + SECS_BETWEEN_1601_AND_1970_EPOCHS) * SECS_TO_100NS + - (time.tv_usec * 10); - - lpSystemTimeAsFileTime->dwLowDateTime = (uint32_t)result; - lpSystemTimeAsFileTime->dwHighDateTime = (uint32_t)(result >> 32); -} - -extern "C" uint64_t PalQueryPerformanceCounter() -{ - return GCToOSInterface::QueryPerformanceCounter(); -} - -extern "C" uint64_t PalQueryPerformanceFrequency() -{ - return GCToOSInterface::QueryPerformanceFrequency(); -} - -extern "C" uint64_t PalGetCurrentOSThreadId() -{ - return (uint64_t)minipal_get_current_thread_id(); -} diff --git a/src/coreclr/nativeaot/Runtime/unix/PalUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalUnix.cpp new file mode 100644 index 000000000000..b346282bb919 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/unix/PalUnix.cpp @@ -0,0 +1,1184 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Implementation of the NativeAOT Platform Abstraction Layer (PAL) library when Unix is the platform. +// + +#include +#include +#include +#include +#include "config.h" +#include +#include "gcenv.h" +#include "gcenv.ee.h" +#include "gcconfig.h" +#include "holder.h" +#include "UnixSignals.h" +#include "NativeContext.h" +#include "HardwareExceptions.h" +#include "PalCreateDump.h" +#include "cgroupcpu.h" +#include "threadstore.h" +#include "thread.h" +#include "threadstore.inl" + +#define _T(s) s +#include "RhConfig.h" + +#include +#include +#include +#ifndef TARGET_WASI // no dynamic linking in Wasi +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef TARGET_LINUX +#include +#endif + +#if HAVE_PTHREAD_GETTHREADID_NP +#include +#endif + +#if HAVE_LWP_SELF +#include +#endif + +#if HAVE_CLOCK_GETTIME_NSEC_NP +#include +#endif + +#ifdef TARGET_APPLE +#include +#endif + +#ifndef HOST_WASM +#include +#endif + +#ifdef HOST_WASM +#include "wasm/PalRedhawkWasm.h" +#endif + +#ifdef TARGET_HAIKU +#include +#endif + +using std::nullptr_t; + +#define INVALID_HANDLE_VALUE ((HANDLE)(intptr_t)-1) + +#define PAGE_NOACCESS 0x01 +#define PAGE_READWRITE 0x04 +#define PAGE_EXECUTE_READ 0x20 +#define PAGE_EXECUTE_READWRITE 0x40 + +#define WAIT_OBJECT_0 0 +#define WAIT_TIMEOUT 258 +#define WAIT_FAILED 0xFFFFFFFF + +static const int tccSecondsToMilliSeconds = 1000; +static const int tccSecondsToMicroSeconds = 1000000; +static const int tccSecondsToNanoSeconds = 1000000000; +static const int tccMilliSecondsToMicroSeconds = 1000; +static const int tccMilliSecondsToNanoSeconds = 1000000; +static const int tccMicroSecondsToNanoSeconds = 1000; + +void RhFailFast() +{ + // Causes creation of a crash dump if enabled + PalCreateCrashDumpIfEnabled(); + + // Aborts the process + abort(); +} + +static void UnmaskActivationSignal() +{ +#ifndef HOST_WASM + sigset_t signal_set; + sigemptyset(&signal_set); + sigaddset(&signal_set, INJECT_ACTIVATION_SIGNAL); + + int sigmaskRet = pthread_sigmask(SIG_UNBLOCK, &signal_set, NULL); + _ASSERTE(sigmaskRet == 0); +#endif +} + +static void TimeSpecAdd(timespec* time, uint32_t milliseconds) +{ + uint64_t nsec = time->tv_nsec + (uint64_t)milliseconds * tccMilliSecondsToNanoSeconds; + if (nsec >= tccSecondsToNanoSeconds) + { + time->tv_sec += nsec / tccSecondsToNanoSeconds; + nsec %= tccSecondsToNanoSeconds; + } + + time->tv_nsec = nsec; +} + +// Convert nanoseconds to the timespec structure +// Parameters: +// nanoseconds - time in nanoseconds to convert +// t - the target timespec structure +static void NanosecondsToTimeSpec(uint64_t nanoseconds, timespec* t) +{ + t->tv_sec = nanoseconds / tccSecondsToNanoSeconds; + t->tv_nsec = nanoseconds % tccSecondsToNanoSeconds; +} + +void ReleaseCondAttr(pthread_condattr_t* condAttr) +{ + int st = pthread_condattr_destroy(condAttr); + ASSERT_MSG(st == 0, "Failed to destroy pthread_condattr_t object"); +} + +class PthreadCondAttrHolder : public Wrapper +{ +public: + PthreadCondAttrHolder(pthread_condattr_t* attrs) + : Wrapper(attrs) + { + } +}; + +class UnixEvent +{ + pthread_cond_t m_condition; + pthread_mutex_t m_mutex; + bool m_manualReset; + bool m_state; + bool m_isValid; + +public: + + UnixEvent(bool manualReset, bool initialState) + : m_manualReset(manualReset), + m_state(initialState), + m_isValid(false) + { + } + + bool Initialize() + { + pthread_condattr_t attrs; + int st = pthread_condattr_init(&attrs); + if (st != 0) + { + ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent condition attribute"); + return false; + } + + PthreadCondAttrHolder attrsHolder(&attrs); + +#if HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_CLOCK_GETTIME_NSEC_NP + // Ensure that the pthread_cond_timedwait will use CLOCK_MONOTONIC + st = pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC); + if (st != 0) + { + ASSERT_UNCONDITIONALLY("Failed to set UnixEvent condition variable wait clock"); + return false; + } +#endif // HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_CLOCK_GETTIME_NSEC_NP + + st = pthread_mutex_init(&m_mutex, NULL); + if (st != 0) + { + ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent mutex"); + return false; + } + + st = pthread_cond_init(&m_condition, &attrs); + if (st != 0) + { + ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent condition variable"); + + st = pthread_mutex_destroy(&m_mutex); + ASSERT_MSG(st == 0, "Failed to destroy UnixEvent mutex"); + return false; + } + + m_isValid = true; + + return true; + } + + bool Destroy() + { + bool success = true; + + if (m_isValid) + { + int st = pthread_mutex_destroy(&m_mutex); + ASSERT_MSG(st == 0, "Failed to destroy UnixEvent mutex"); + success = success && (st == 0); + + st = pthread_cond_destroy(&m_condition); + ASSERT_MSG(st == 0, "Failed to destroy UnixEvent condition variable"); + success = success && (st == 0); + } + + return success; + } + + uint32_t Wait(uint32_t milliseconds) + { + timespec endTime; +#if HAVE_CLOCK_GETTIME_NSEC_NP + uint64_t endNanoseconds; + if (milliseconds != INFINITE) + { + uint64_t nanoseconds = (uint64_t)milliseconds * tccMilliSecondsToNanoSeconds; + NanosecondsToTimeSpec(nanoseconds, &endTime); + endNanoseconds = clock_gettime_nsec_np(CLOCK_UPTIME_RAW) + nanoseconds; + } +#elif HAVE_PTHREAD_CONDATTR_SETCLOCK || _WASI_EMULATED_PROCESS_CLOCKS + if (milliseconds != INFINITE) + { + clock_gettime(CLOCK_MONOTONIC, &endTime); + TimeSpecAdd(&endTime, milliseconds); + } +#else +#error "Don't know how to perform timed wait on this platform" +#endif + + int st = 0; + + pthread_mutex_lock(&m_mutex); + while (!m_state) + { + if (milliseconds == INFINITE) + { + st = pthread_cond_wait(&m_condition, &m_mutex); + } + else + { +#if HAVE_CLOCK_GETTIME_NSEC_NP + // Since OSX doesn't support CLOCK_MONOTONIC, we use relative variant of the + // timed wait and we need to handle spurious wakeups properly. + st = pthread_cond_timedwait_relative_np(&m_condition, &m_mutex, &endTime); + if ((st == 0) && !m_state) + { + uint64_t currentNanoseconds = clock_gettime_nsec_np(CLOCK_UPTIME_RAW); + if (currentNanoseconds < endNanoseconds) + { + // The wake up was spurious, recalculate the relative endTime + uint64_t remainingNanoseconds = (endNanoseconds - currentNanoseconds); + NanosecondsToTimeSpec(remainingNanoseconds, &endTime); + } + else + { + // Although the timed wait didn't report a timeout, time calculated from the + // mach time shows we have already reached the end time. It can happen if + // the wait was spuriously woken up right before the timeout. + st = ETIMEDOUT; + } + } +#else // HAVE_CLOCK_GETTIME_NSEC_NP + st = pthread_cond_timedwait(&m_condition, &m_mutex, &endTime); +#endif // HAVE_CLOCK_GETTIME_NSEC_NP + } + + if (st != 0) + { + // wait failed or timed out + break; + } + } + + if ((st == 0) && !m_manualReset) + { + // Clear the state for auto-reset events so that only one waiter gets released + m_state = false; + } + + pthread_mutex_unlock(&m_mutex); + + uint32_t waitStatus; + + if (st == 0) + { + waitStatus = WAIT_OBJECT_0; + } + else if (st == ETIMEDOUT) + { + waitStatus = WAIT_TIMEOUT; + } + else + { + waitStatus = WAIT_FAILED; + } + + return waitStatus; + } + + void Set() + { + pthread_mutex_lock(&m_mutex); + m_state = true; + // Unblock all threads waiting for the condition variable + pthread_cond_broadcast(&m_condition); + pthread_mutex_unlock(&m_mutex); + } + + void Reset() + { + pthread_mutex_lock(&m_mutex); + m_state = false; + pthread_mutex_unlock(&m_mutex); + } +}; + +// This functions configures behavior of the signals that are not +// related to hardware exception handling. +void ConfigureSignals() +{ + // The default action for SIGPIPE is process termination. + // Since SIGPIPE can be signaled when trying to write on a socket for which + // the connection has been dropped, we need to tell the system we want + // to ignore this signal. + // Instead of terminating the process, the system call which would had + // issued a SIGPIPE will, instead, report an error and set errno to EPIPE. + signal(SIGPIPE, SIG_IGN); +} + +void InitializeCurrentProcessCpuCount() +{ + uint32_t count; + + // If the configuration value has been set, it takes precedence. Otherwise, take into account + // process affinity and CPU quota limit. + + const unsigned int MAX_PROCESSOR_COUNT = 0xffff; + uint64_t configValue; + + if (g_pRhConfig->ReadConfigValue("PROCESSOR_COUNT", &configValue, true /* decimal */) && + 0 < configValue && configValue <= MAX_PROCESSOR_COUNT) + { + count = configValue; + } + else + { +#if HAVE_SCHED_GETAFFINITY + + cpu_set_t cpuSet; + int st = sched_getaffinity(getpid(), sizeof(cpu_set_t), &cpuSet); + if (st != 0) + { + _ASSERTE(!"sched_getaffinity failed"); + } + + count = CPU_COUNT(&cpuSet); +#else // HAVE_SCHED_GETAFFINITY + count = GCToOSInterface::GetTotalProcessorCount(); +#endif // HAVE_SCHED_GETAFFINITY + + uint32_t cpuLimit; + if (GetCpuLimit(&cpuLimit) && cpuLimit < count) + count = cpuLimit; + } + + _ASSERTE(count > 0); + g_RhNumberOfProcessors = count; +} + +static uint32_t g_RhPageSize; + +void InitializeOsPageSize() +{ + g_RhPageSize = (uint32_t)sysconf(_SC_PAGE_SIZE); + +#if defined(HOST_AMD64) + ASSERT(g_RhPageSize == 0x1000); +#elif defined(HOST_APPLE) + ASSERT(g_RhPageSize == 0x4000); +#endif +} + +uint32_t PalGetOsPageSize() +{ + return g_RhPageSize; +} + +#if defined(TARGET_LINUX) || defined(TARGET_ANDROID) +static pthread_key_t key; +#endif + +#ifdef FEATURE_HIJACK +bool InitializeSignalHandling(); +#endif + +// The NativeAOT PAL must be initialized before any of its exports can be called. Returns true for a successful +// initialization and false on failure. +bool PalInit() +{ +#ifndef USE_PORTABLE_HELPERS + if (!InitializeHardwareExceptionHandling()) + { + return false; + } +#endif // !USE_PORTABLE_HELPERS + + ConfigureSignals(); + + if (!PalCreateDumpInitialize()) + { + return false; + } + + GCConfig::Initialize(); + + if (!GCToOSInterface::Initialize()) + { + return false; + } + + InitializeCpuCGroup(); + + InitializeCurrentProcessCpuCount(); + + InitializeOsPageSize(); + +#ifdef FEATURE_HIJACK + if (!InitializeSignalHandling()) + { + return false; + } +#endif + +#if defined(TARGET_LINUX) || defined(TARGET_ANDROID) + if (pthread_key_create(&key, RuntimeThreadShutdown) != 0) + { + return false; + } +#endif + + return true; +} + +#if !defined(TARGET_LINUX) && !defined(TARGET_ANDROID) +struct TlsDestructionMonitor +{ + void* m_thread = nullptr; + + void SetThread(void* thread) + { + m_thread = thread; + } + + ~TlsDestructionMonitor() + { + if (m_thread != nullptr) + { + RuntimeThreadShutdown(m_thread); + } + } +}; + +// This thread local object is used to detect thread shutdown. Its destructor +// is called when a thread is being shut down. +thread_local TlsDestructionMonitor tls_destructionMonitor; +#endif + +// This thread local variable is used for delegate marshalling +DECLSPEC_THREAD intptr_t tls_thunkData; + +#ifdef FEATURE_EMULATED_TLS +EXTERN_C intptr_t* RhpGetThunkData() +{ + return &tls_thunkData; +} +#endif //FEATURE_EMULATED_TLS + +FCIMPL0(intptr_t, RhGetCurrentThunkContext) +{ + return tls_thunkData; +} +FCIMPLEND + +// Register the thread with OS to be notified when thread is about to be destroyed +// It fails fast if a different thread was already registered. +// Parameters: +// thread - thread to attach +void PalAttachThread(void* thread) +{ +#if defined(TARGET_LINUX) || defined(TARGET_ANDROID) + if (pthread_setspecific(key, thread) != 0) + { + _ASSERTE(!"pthread_setspecific failed"); + RhFailFast(); + } +#else + tls_destructionMonitor.SetThread(thread); +#endif + + UnmaskActivationSignal(); +} + +#if !defined(USE_PORTABLE_HELPERS) && !defined(FEATURE_RX_THUNKS) + +UInt32_BOOL PalAllocateThunksFromTemplate(HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, void** newThunksOut) +{ +#ifdef TARGET_APPLE + vm_address_t addr, taddr; + vm_prot_t prot, max_prot; + kern_return_t ret; + + // Allocate two contiguous ranges of memory: the first range will contain the stubs + // and the second range will contain their data. + do + { + ret = vm_allocate(mach_task_self(), &addr, templateSize * 2, VM_FLAGS_ANYWHERE); + } while (ret == KERN_ABORTED); + + if (ret != KERN_SUCCESS) + { + return UInt32_FALSE; + } + + do + { + ret = vm_remap( + mach_task_self(), &addr, templateSize, 0, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, + mach_task_self(), ((vm_address_t)hTemplateModule + templateRva), FALSE, &prot, &max_prot, VM_INHERIT_SHARE); + } while (ret == KERN_ABORTED); + + if (ret != KERN_SUCCESS) + { + do + { + ret = vm_deallocate(mach_task_self(), addr, templateSize * 2); + } while (ret == KERN_ABORTED); + + return UInt32_FALSE; + } + + *newThunksOut = (void*)addr; + + return UInt32_TRUE; +#else + PORTABILITY_ASSERT("UNIXTODO: Implement this function"); +#endif +} + +UInt32_BOOL PalFreeThunksFromTemplate(void *pBaseAddress, size_t templateSize) +{ +#ifdef TARGET_APPLE + kern_return_t ret; + + do + { + ret = vm_deallocate(mach_task_self(), (vm_address_t)pBaseAddress, templateSize * 2); + } while (ret == KERN_ABORTED); + + return ret == KERN_SUCCESS ? UInt32_TRUE : UInt32_FALSE; +#else + PORTABILITY_ASSERT("UNIXTODO: Implement this function"); +#endif +} + +UInt32_BOOL PalMarkThunksAsValidCallTargets( + void *virtualAddress, + int thunkSize, + int thunksPerBlock, + int thunkBlockSize, + int thunkBlocksPerMapping) +{ + int ret = mprotect( + (void*)((uintptr_t)virtualAddress + (thunkBlocksPerMapping * OS_PAGE_SIZE)), + thunkBlocksPerMapping * OS_PAGE_SIZE, + PROT_READ | PROT_WRITE); + return ret == 0 ? UInt32_TRUE : UInt32_FALSE; +} +#endif // !USE_PORTABLE_HELPERS && !FEATURE_RX_THUNKS + +void PalSleep(uint32_t milliseconds) +{ +#if HAVE_CLOCK_NANOSLEEP + timespec endTime; + clock_gettime(CLOCK_MONOTONIC, &endTime); + TimeSpecAdd(&endTime, milliseconds); + while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &endTime, NULL) == EINTR) + { + } +#else // HAVE_CLOCK_NANOSLEEP + timespec requested; + requested.tv_sec = milliseconds / tccSecondsToMilliSeconds; + requested.tv_nsec = (milliseconds - requested.tv_sec * tccSecondsToMilliSeconds) * tccMilliSecondsToNanoSeconds; + + timespec remaining; + while (nanosleep(&requested, &remaining) == EINTR) + { + requested = remaining; + } +#endif // HAVE_CLOCK_NANOSLEEP +} + +UInt32_BOOL __stdcall PalSwitchToThread() +{ + // sched_yield yields to another thread in the current process. + sched_yield(); + + // The return value of sched_yield indicates the success of the call and does not tell whether a context switch happened. + // On Linux sched_yield is documented as never failing. + // Since we do not know if there was a context switch, we will just return `false`. + return false; +} + +UInt32_BOOL PalAreShadowStacksEnabled() +{ + return false; +} + +UInt32_BOOL PalCloseHandle(HANDLE handle) +{ + if ((handle == NULL) || (handle == INVALID_HANDLE_VALUE)) + { + return UInt32_FALSE; + } + + UnixEvent* event = (UnixEvent*)handle; + bool success = event->Destroy(); + delete event; + + return success ? UInt32_TRUE : UInt32_FALSE; +} + +HANDLE PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ const WCHAR* pName) +{ + UnixEvent* event = new (nothrow) UnixEvent(manualReset, initialState); + if (event == NULL) + { + return INVALID_HANDLE_VALUE; + } + if (!event->Initialize()) + { + delete event; + return INVALID_HANDLE_VALUE; + } + return (HANDLE)event; +} + +typedef uint32_t(__stdcall *BackgroundCallback)(_In_opt_ void* pCallbackContext); + +bool PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, UInt32_BOOL highPriority) +{ +#ifdef HOST_WASM + // No threads, so we can't start one + RhFailFast(); + return false; +#else // !HOST_WASM + pthread_attr_t attrs; + + int st = pthread_attr_init(&attrs); + ASSERT(st == 0); + + static const int NormalPriority = 0; + static const int HighestPriority = -20; + + // TODO: Figure out which scheduler to use, the default one doesn't seem to + // support per thread priorities. +#if 0 + sched_param params; + memset(¶ms, 0, sizeof(params)); + + params.sched_priority = highPriority ? HighestPriority : NormalPriority; + + // Set the priority of the thread + st = pthread_attr_setschedparam(&attrs, ¶ms); + ASSERT(st == 0); +#endif + // Create the thread as detached, that means not joinable + st = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); + ASSERT(st == 0); + + pthread_t threadId; + st = pthread_create(&threadId, &attrs, (void *(*)(void*))callback, pCallbackContext); + + int st2 = pthread_attr_destroy(&attrs); + ASSERT(st2 == 0); + + return st == 0; +#endif // !HOST_WASM +} + +bool PalSetCurrentThreadName(const char* name) +{ + // Ignore requests to set the main thread name because + // it causes the value returned by Process.ProcessName to change. + if ((pid_t)PalGetCurrentOSThreadId() != getpid()) + { + int setNameResult = minipal_set_thread_name(pthread_self(), name); + (void)setNameResult; // used + assert(setNameResult == 0); + } + return true; +} + +bool PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, UInt32_FALSE); +} + +bool PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, UInt32_TRUE); +} + +bool PalStartEventPipeHelperThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, UInt32_FALSE); +} + +HANDLE PalGetModuleHandleFromPointer(_In_ void* pointer) +{ + HANDLE moduleHandle = NULL; + + // Emscripten's implementation of dladdr corrupts memory, + // but always returns 0 for the module handle, so just skip the call +#if !defined(HOST_WASM) + Dl_info info; + int st = dladdr(pointer, &info); + if (st != 0) + { + moduleHandle = info.dli_fbase; + } +#endif //!defined(HOST_WASM) + + return moduleHandle; +} + +void PalPrintFatalError(const char* message) +{ + // Write the message using lowest-level OS API available. This is used to print the stack overflow + // message, so there is not much that can be done here. + // write() has __attribute__((warn_unused_result)) in glibc, for which gcc 11+ issue `-Wunused-result` even with `(void)write(..)`, + // so we use additional NOT(!) operator to force unused-result suppression. + (void)!write(STDERR_FILENO, message, strlen(message)); +} + +char* PalCopyTCharAsChar(const TCHAR* toCopy) +{ + NewArrayHolder copy {new (nothrow) char[strlen(toCopy) + 1]}; + strcpy(copy, toCopy); + return copy.Extract(); +} + +<<<<<<< HEAD:src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +#ifndef HOST_WASM +REDHAWK_PALEXPORT HANDLE PalLoadLibrary(const char* moduleName) +======= +HANDLE PalLoadLibrary(const char* moduleName) +>>>>>>> upstream-jun:src/coreclr/nativeaot/Runtime/unix/PalUnix.cpp +{ + return dlopen(moduleName, RTLD_LAZY); +} + +void* PalGetProcAddress(HANDLE module, const char* functionName) +{ + return dlsym(module, functionName); +} + +static int W32toUnixAccessControl(uint32_t flProtect) +{ + int prot = 0; + + switch (flProtect & 0xff) + { + case PAGE_NOACCESS: + prot = PROT_NONE; + break; + case PAGE_READWRITE: + prot = PROT_READ | PROT_WRITE; + break; + case PAGE_EXECUTE_READ: + prot = PROT_READ | PROT_EXEC; + break; + case PAGE_EXECUTE_READWRITE: + prot = PROT_READ | PROT_WRITE | PROT_EXEC; + break; + case PAGE_READONLY: + prot = PROT_READ; + break; + default: + ASSERT(false); + break; + } + return prot; +} + +_Ret_maybenull_ _Post_writable_byte_size_(size) void* PalVirtualAlloc(size_t size, uint32_t protect) +{ + int unixProtect = W32toUnixAccessControl(protect); + + int flags = MAP_ANON | MAP_PRIVATE; + +#if defined(HOST_APPLE) && defined(HOST_ARM64) + if (unixProtect & PROT_EXEC) + { + flags |= MAP_JIT; + } +#endif + void* pMappedMemory = mmap(NULL, size, unixProtect, flags, -1, 0); + if (pMappedMemory == MAP_FAILED) + return NULL; + return pMappedMemory; +} + +void PalVirtualFree(_In_ void* pAddress, size_t size) +{ + munmap(pAddress, size); +} + +UInt32_BOOL PalVirtualProtect(_In_ void* pAddress, size_t size, uint32_t protect) +{ + int unixProtect = W32toUnixAccessControl(protect); + + // mprotect expects the address to be page-aligned + uint8_t* pPageStart = ALIGN_DOWN((uint8_t*)pAddress, OS_PAGE_SIZE); + size_t memSize = ALIGN_UP((uint8_t*)pAddress + size, OS_PAGE_SIZE) - pPageStart; + + return mprotect(pPageStart, memSize, unixProtect) == 0; +} +#endif // !HOST_WASM + +#if (defined(HOST_MACCATALYST) || defined(HOST_IOS) || defined(HOST_TVOS)) && defined(HOST_ARM64) +extern "C" void sys_icache_invalidate(const void* start, size_t len); +#endif + +void PalFlushInstructionCache(_In_ void* pAddress, size_t size) +{ +#if defined(__linux__) && defined(HOST_ARM) + // On Linux/arm (at least on 3.10) we found that there is a problem with __do_cache_op (arch/arm/kernel/traps.c) + // implementing cacheflush syscall. cacheflush flushes only the first page in range [pAddress, pAddress + size) + // and leaves other pages in undefined state which causes random tests failures (often due to SIGSEGV) with no particular pattern. + // + // As a workaround, we call __builtin___clear_cache on each page separately. + + uint8_t* begin = (uint8_t*)pAddress; + uint8_t* end = begin + size; + + while (begin < end) + { + uint8_t* endOrNextPageBegin = ALIGN_UP(begin + 1, OS_PAGE_SIZE); + if (endOrNextPageBegin > end) + endOrNextPageBegin = end; + + __builtin___clear_cache((char *)begin, (char *)endOrNextPageBegin); + begin = endOrNextPageBegin; + } +#elif (defined(HOST_MACCATALYST) || defined(HOST_IOS) || defined(HOST_TVOS)) && defined(HOST_ARM64) + sys_icache_invalidate (pAddress, size); +#else +#if !defined(HOST_WASM) + __builtin___clear_cache((char *)pAddress, (char *)pAddress + size); +#endif +#endif +} + +uint32_t PalGetCurrentProcessId() +{ + return getpid(); +} + +UInt32_BOOL PalSetEvent(HANDLE event) +{ + UnixEvent* unixEvent = (UnixEvent*)event; + unixEvent->Set(); + return UInt32_TRUE; +} + +UInt32_BOOL PalResetEvent(HANDLE event) +{ + UnixEvent* unixEvent = (UnixEvent*)event; + unixEvent->Reset(); + return UInt32_TRUE; +} + +uint32_t PalGetEnvironmentVariable(const char * name, char * buffer, uint32_t size) +{ + const char* value = getenv(name); + if (value == NULL) + { + return 0; + } + + size_t valueLen = strlen(value); + if (valueLen < size) + { + strcpy(buffer, value); + return valueLen; + } + + // return required size including the null character or 0 if the size doesn't fit into uint32_t + return (valueLen < UINT32_MAX) ? (valueLen + 1) : 0; +} + +uint16_t PalCaptureStackBackTrace(uint32_t arg1, uint32_t arg2, void* arg3, uint32_t* arg4) +{ + // UNIXTODO: Implement this function + return 0; +} + +#ifdef FEATURE_HIJACK +static struct sigaction g_previousActivationHandler; + +static void ActivationHandler(int code, siginfo_t* siginfo, void* context) +{ + // Only accept activations from the current process + if (siginfo->si_pid == getpid() +#ifdef HOST_APPLE + // On Apple platforms si_pid is sometimes 0. It was confirmed by Apple to be expected, as the si_pid is tracked at the process level. So when multiple + // signals are in flight in the same process at the same time, it may be overwritten / zeroed. + || siginfo->si_pid == 0 +#endif + ) + { + // Make sure that errno is not modified + int savedErrNo = errno; + Thread::HijackCallback((NATIVE_CONTEXT*)context, NULL); + errno = savedErrNo; + } + + Thread* pThread = ThreadStore::GetCurrentThreadIfAvailable(); + if (pThread) + { + pThread->SetActivationPending(false); + } + + // Call the original handler when it is not ignored or default (terminate). + if (g_previousActivationHandler.sa_flags & SA_SIGINFO) + { + _ASSERTE(g_previousActivationHandler.sa_sigaction != NULL); + g_previousActivationHandler.sa_sigaction(code, siginfo, context); + } + else + { + if (g_previousActivationHandler.sa_handler != SIG_IGN && + g_previousActivationHandler.sa_handler != SIG_DFL) + { + _ASSERTE(g_previousActivationHandler.sa_handler != NULL); + g_previousActivationHandler.sa_handler(code); + } + } +} + +bool InitializeSignalHandling() +{ +#ifdef __APPLE__ + void *libSystem = dlopen("/usr/lib/libSystem.dylib", RTLD_LAZY); + if (libSystem != NULL) + { + int (*dispatch_allow_send_signals_ptr)(int) = (int (*)(int))dlsym(libSystem, "dispatch_allow_send_signals"); + if (dispatch_allow_send_signals_ptr != NULL) + { + int status = dispatch_allow_send_signals_ptr(INJECT_ACTIVATION_SIGNAL); + _ASSERTE(status == 0); + } + } + + // TODO: Once our CI tools can get upgraded to xcode >= 15.3, replace the code above by this: + // if (__builtin_available(macOS 14.4, iOS 17.4, tvOS 17.4, *)) + // { + // // Allow sending the activation signal to dispatch queue threads + // int status = dispatch_allow_send_signals(INJECT_ACTIVATION_SIGNAL); + // _ASSERTE(status == 0); + // } +#endif // __APPLE__ + + return AddSignalHandler(INJECT_ACTIVATION_SIGNAL, ActivationHandler, &g_previousActivationHandler); +} + +HijackFunc* PalGetHijackTarget(HijackFunc* defaultHijackTarget) +{ + return defaultHijackTarget; +} + +void PalHijack(Thread* pThreadToHijack) +{ + pThreadToHijack->SetActivationPending(true); + + int status = pthread_kill(pThreadToHijack->GetOSThreadHandle(), INJECT_ACTIVATION_SIGNAL); + + // We can get EAGAIN when printing stack overflow stack trace and when other threads hit + // stack overflow too. Those are held in the sigsegv_handler with blocked signals until + // the process exits. + // ESRCH may happen on some OSes when the thread is exiting. + if ((status == EAGAIN) + || (status == ESRCH) +#ifdef __APPLE__ + // On Apple, pthread_kill is not allowed to be sent to dispatch queue threads on macOS older than 14.4 or iOS/tvOS older than 17.4 + || (status == ENOTSUP) +#endif + ) + { + pThreadToHijack->SetActivationPending(false); + return; + } + + if (status != 0) + { + // Causes creation of a crash dump if enabled + PalCreateCrashDumpIfEnabled(); + + // Failure to send the signal is fatal. There are only two cases when sending + // the signal can fail. First, if the signal ID is invalid and second, + // if the thread doesn't exist anymore. + abort(); + } +} +#endif // FEATURE_HIJACK + +uint32_t PalWaitForSingleObjectEx(HANDLE handle, uint32_t milliseconds, UInt32_BOOL alertable) +{ + UnixEvent* unixEvent = (UnixEvent*)handle; + return unixEvent->Wait(milliseconds); +} + +uint32_t PalCompatibleWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t handleCount, HANDLE* pHandles, UInt32_BOOL allowReentrantWait) +{ + // Only a single handle wait for event is supported + ASSERT(handleCount == 1); + + return PalWaitForSingleObjectEx(pHandles[0], timeout, alertable); +} + +HANDLE PalCreateLowMemoryResourceNotification() +{ + return NULL; +} + +#if !__has_builtin(_mm_pause) +extern "C" void _mm_pause() +// Defined for implementing PalYieldProcessor in Pal.h +{ +#if defined(HOST_AMD64) || defined(HOST_X86) + __asm__ volatile ("pause"); +#endif +} +#endif + +int32_t _stricmp(const char *string1, const char *string2) +{ + return strcasecmp(string1, string2); +} + +uint32_t g_RhNumberOfProcessors; + +int32_t PalGetProcessCpuCount() +{ + ASSERT(g_RhNumberOfProcessors > 0); + return g_RhNumberOfProcessors; +} + +__thread void* pStackHighOut = NULL; +__thread void* pStackLowOut = NULL; + +// Retrieves the entire range of memory dedicated to the calling thread's stack. This does +// not get the current dynamic bounds of the stack, which can be significantly smaller than +// the maximum bounds. +bool PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut) +{ + if (pStackHighOut == NULL) + { +#if defined(HOST_WASM) && !defined(FEATURE_WASM_MANAGED_THREADS) + PalGetMaximumStackBounds_SingleThreadedWasm(&pStackLowOut, &pStackHighOut); +#elif defined(__APPLE__) + // This is a Mac specific method + pStackHighOut = pthread_get_stackaddr_np(pthread_self()); + pStackLowOut = ((uint8_t *)pStackHighOut - pthread_get_stacksize_np(pthread_self())); +#else // __APPLE__ + pthread_attr_t attr; + size_t stackSize; + int status; + + pthread_t thread = pthread_self(); + + status = pthread_attr_init(&attr); + ASSERT_MSG(status == 0, "pthread_attr_init call failed"); + +#if HAVE_PTHREAD_ATTR_GET_NP + status = pthread_attr_get_np(thread, &attr); +#elif HAVE_PTHREAD_GETATTR_NP + status = pthread_getattr_np(thread, &attr); +#else +#error Dont know how to get thread attributes on this platform! +#endif + ASSERT_MSG(status == 0, "pthread_getattr_np call failed"); + + status = pthread_attr_getstack(&attr, &pStackLowOut, &stackSize); + ASSERT_MSG(status == 0, "pthread_attr_getstack call failed"); + + status = pthread_attr_destroy(&attr); + ASSERT_MSG(status == 0, "pthread_attr_destroy call failed"); + + pStackHighOut = (uint8_t*)pStackLowOut + stackSize; +#endif // __APPLE__ + } + + *ppStackLowOut = pStackLowOut; + *ppStackHighOut = pStackHighOut; + + return true; +} + +// retrieves the full path to the specified module, if moduleBase is NULL retreieves the full path to the +// executable module of the current process. +// +// Return value: number of characters in name string +// +int32_t PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase) +{ +#if defined(HOST_WASM) + // Emscripten's implementation of dladdr corrupts memory and doesn't have the real name, so make up a name instead + const TCHAR* wasmModuleName = "WebAssemblyModule"; + *pModuleNameOut = wasmModuleName; + return strlen(wasmModuleName); +#else // HOST_WASM + Dl_info dl; + if (dladdr(moduleBase, &dl) == 0) + { + *pModuleNameOut = NULL; + return 0; + } + + *pModuleNameOut = dl.dli_fname; + return strlen(dl.dli_fname); +#endif // defined(HOST_WASM) +} + +void PalFlushProcessWriteBuffers() +{ + GCToOSInterface::FlushProcessWriteBuffers(); +} + +static const int64_t SECS_BETWEEN_1601_AND_1970_EPOCHS = 11644473600LL; +static const int64_t SECS_TO_100NS = 10000000; /* 10^7 */ + +void PalGetSystemTimeAsFileTime(FILETIME *lpSystemTimeAsFileTime) +{ + struct timeval time = { 0 }; + gettimeofday(&time, NULL); + + int64_t result = ((int64_t)time.tv_sec + SECS_BETWEEN_1601_AND_1970_EPOCHS) * SECS_TO_100NS + + (time.tv_usec * 10); + + lpSystemTimeAsFileTime->dwLowDateTime = (uint32_t)result; + lpSystemTimeAsFileTime->dwHighDateTime = (uint32_t)(result >> 32); +} + +uint64_t PalGetCurrentOSThreadId() +{ + return (uint64_t)minipal_get_current_thread_id(); +} diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index a1e2e507df81..0321e2101817 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -5,11 +5,11 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" +#include "PalLimitedContext.h" #include "regdisplay.h" #include "ICodeManager.h" #include "UnixNativeCodeManager.h" -#include "varint.h" +#include "NativePrimitiveDecoder.h" #include "holder.h" #include "CommonMacros.inl" @@ -417,7 +417,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) // ADD{S}.W FP, SP, # // 1111 0x01 000x 1101 0xxx 1011 xxxx xxxx -#define ADD_W_FP_SP_BITS 0xF10D0B00 +#define ADD_W_FP_SP_BITS 0xF10D0B00 #define ADD_W_FP_SP_MASK 0xFBEF8F00 // PUSH @@ -499,10 +499,10 @@ static bool IsArmPrologInstruction(uint16_t* pInstr) #endif +#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) // checks for known prolog instructions generated by ILC and returns // 1 - in prolog -// 0 - not in prolog, -// -1 - unknown. +// 0 - not in prolog int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddress) { #if defined(TARGET_ARM64) @@ -641,124 +641,11 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre return 0; -#elif defined(TARGET_LOONGARCH64) - -// 0010 1001 11xx xxxx xxxx xxxx xxxx xxxx -#define ST_BITS 0x29C00000 -#define ST_MASK 0xFFC00000 - -// addi.d $fp, $sp, x -// ori $fp, $sp, 0 -// 0000 0010 11xx xxxx xxxx xx00 0111 0110 -#define ADDI_FP_SP_BITS 0x02C00076 -#define ADDI_FP_SP_MASK 0xFFC003FF - -#define ST_RJ_MASK 0x3E0 -#define ST_RJ_FP 0x2C0 -#define ST_RJ_RA 0x20 -#define ST_RD_MASK 0x1F -#define ST_RD_SP 0x3 -#define ST_RD_FP 0x16 - - UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; - ASSERT(pNativeMethodInfo != NULL); - - uint32_t* start = (uint32_t*)pNativeMethodInfo->pMethodStartAddress; - bool savedFp = false; - bool savedRa = false; - bool establishedFp = false; - - for (uint32_t* pInstr = (uint32_t*)start; pInstr < pvAddress && !(savedFp && savedRa && establishedFp); pInstr++) - { - uint32_t instr = *pInstr; - - if (((instr & ST_MASK) == ST_BITS) && - ((instr & ST_RD_MASK) == ST_RD_SP || (instr & ST_RD_MASK) == ST_RD_FP)) - { - // SP/FP-relative store of pair of registers - savedFp |= (instr & ST_RJ_MASK) == ST_RJ_FP; - savedRa |= (instr & ST_RJ_MASK) == ST_RJ_RA; - } - else if ((instr & ADDI_FP_SP_MASK) == ADDI_FP_SP_BITS) - { - establishedFp = true; - } - else - { - // JIT generates other patterns into the prolog that we currently don't - // recognize (saving unpaired register, stack pointer adjustments). We - // don't need to recognize these patterns unless a compact unwinding code - // is generated for them in ILC. - // https://github.com/dotnet/runtime/issues/76371 - return -1; - } - } - - return savedFp && savedRa && establishedFp ? 0 : 1; - -#elif defined(TARGET_RISCV64) - -// store pair with signed offset -// 0100 00xx xxxxxxxx xxxx xxxx xxxx xxxx -#define STW_PAIR_BITS 0x04000000 -#define STW_PAIR_MASK 0xFC000000 - -// add fp, sp, x -// addi fp, sp, x -// 0000 0001 100x xxxx xxxx xxxx 0000 0000 -#define ADD_FP_SP_BITS 0x01C00000 -#define ADD_FP_SP_MASK 0xFFFFE000 - -#define STW_PAIR_RS1_MASK 0xF80 -#define STW_PAIR_RS1_SP 0xF80 -#define STW_PAIR_RS1_FP 0xF00 -#define STW_PAIR_RS2_MASK 0xF00 -#define STW_PAIR_RS2_FP 0xF00 -#define STW_PAIR_RS2_RA 0xF40 - - UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; - ASSERT(pNativeMethodInfo != NULL); - - uint32_t* start = (uint32_t*)pNativeMethodInfo->pMethodStartAddress; - bool savedFp = false; - bool savedRa = false; - bool establishedFp = false; - - for (uint32_t* pInstr = (uint32_t*)start; pInstr < pvAddress && !(savedFp && savedRa && establishedFp); pInstr++) - { - uint32_t instr = *pInstr; - - if (((instr & STW_PAIR_MASK) == STW_PAIR_BITS) && - ((instr & STW_PAIR_RS1_MASK) == STW_PAIR_RS1_SP || (instr & STW_PAIR_RS1_MASK) == STW_PAIR_RS1_FP) && - ((instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_FP || (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_RA)) - { - // SP/FP-relative store of pair of registers - savedFp |= (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_FP; - savedRa |= (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_RA; - } - else if ((instr & ADD_FP_SP_MASK) == ADD_FP_SP_BITS) - { - establishedFp = true; - } - else - { - // JIT generates other patterns into the prolog that we currently don't - // recognize (saving unpaired register, stack pointer adjustments). We - // don't need to recognize these patterns unless a compact unwinding code - // is generated for them in ILC. - // https://github.com/dotnet/runtime/issues/76371 - return -1; - } - } - - return savedFp && savedRa && establishedFp ? 0 : 1; - #else - - return -1; - +#error Unsupported architecture #endif } +#endif // when stopped in an epilogue, returns the count of remaining stack-consuming instructions // otherwise returns @@ -789,7 +676,7 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho // For details see similar code in OOPStackUnwinderAMD64::UnwindEpilogue // // - // + // // A canonical epilogue sequence consists of the following operations: // // 1. Optional cleanup of fixed and dynamic stack allocations, which is @@ -993,7 +880,7 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho // Since we stop on branches, the search is roughly limited by the containing basic block. // We typically examine just 1-5 instructions and in rare cases up to 30. - // + // // TODO: we can also limit the search by the longest possible epilogue length, but // we must be sure the longest length considers all possibilities, // which is somewhat nontrivial to derive/prove. @@ -1001,7 +888,7 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho for (uint32_t* pInstr = (uint32_t*)pvAddress - 1; pInstr > start; pInstr--) { uint32_t instr = *pInstr; - + // check for Branches, Exception Generating and System instruction group. // If we see such instruction before seeing FP or LR restored, we are not in an epilog. // Note: this includes RET, BRK, branches, calls, tailcalls, fences, etc... @@ -1131,10 +1018,15 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho #elif defined(TARGET_LOONGARCH64) +// addi.d $sp, $sp, x +// 0000 0010 11xx xxxx xxxx xx00 0110 0011 +#define ADDI_D_SP_SP_BITS 0x02C00063 +#define ADDI_D_SP_SP_MASK 0xFFC003FF + // ld.d // 0010 1000 11xx xxxx xxxx xxxx xxxx xxxx -#define LD_BITS 0xB9400000 -#define LD_MASK 0xBF400000 +#define LD_BITS 0x28C00000 +#define LD_MASK 0xFFC00000 // ldx.d with register offset // 0011 1000 0000 1100 0xxx xxxx xxxx xxxx @@ -1171,35 +1063,36 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho break; } - // check for restoring FP or RA with ld.d or ldx.d - int operand = (instr >> 5) & 0x1f; - if (operand == 22 || operand == 1) + // check for restoring FP or RA with ld.d + if ((instr & LD_MASK) == LD_BITS) { - if ((instr & LD_MASK) == LD_BITS || - (instr & LDX_MASK) == LDX_BITS) + int rd = (instr & 0x1f); + if (rd == 22 || rd == 1) { return -1; } } + + // Check for adjusting stack pointer + if ((instr & ADDI_D_SP_SP_MASK) == ADDI_D_SP_SP_BITS) + { + return -1; + } } #elif defined(TARGET_RISCV64) -// Load with immediate -// LUI, LD, etc. -// 0000 0000 0000 0000 1111 1111 1111 1111 -#define LUI_BITS 0x00000037 -#define LUI_MASK 0x0000007F +// addi sp, sp, x +#define ADD_SP_SP_BITS 0x00010113 +#define ADD_SP_SP_MASK 0x000FFFFF // Load with register offset // LD with register offset -// 0000 0000 0000 0000 0111 0000 0000 0000 #define LD_BITS 0x00000003 #define LD_MASK 0x0000007F -// Branches, Jumps, System calls -// BEQ, BNE, JAL, etc. -// 1100 0000 0000 0000 0000 0000 0000 0000 +// Branches +// BEQ, BNE, etc. #define BEGS_BITS 0x00000063 #define BEGS_MASK 0x0000007F @@ -1229,14 +1122,20 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho } // Check for restoring registers (FP or RA) with `ld` - int rd = (instr >> 7) & 0x1F; // Extract the destination register - if (rd == 8 || rd == 1) // Check for FP (x8) or RA (x1) + if ((instr & LD_MASK) == LD_BITS) // Match `ld` instruction { - if ((instr & LD_MASK) == LD_BITS) // Match `ld` instruction + int rd = (instr >> 7) & 0x1F; // Extract the destination register + if (rd == 8 || rd == 1) // Check for FP (x8) or RA (x1) { return -1; } } + + // Check for adjusting stack pointer + if ((instr & ADD_SP_SP_MASK) == ADD_SP_SP_BITS) + { + return -1; + } } #endif @@ -1244,15 +1143,6 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho return 0; } -// Convert the return kind that was encoded by RyuJIT to the -// enum used by the runtime. -GCRefKind GetGcRefKind(ReturnKind returnKind) -{ - ASSERT((returnKind >= RT_Scalar) && (returnKind <= RT_ByRef_ByRef)); - - return (GCRefKind)returnKind; -} - bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation) // out @@ -1422,7 +1312,7 @@ bool UnixNativeCodeManager::EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMet pEnumState->pMethodStartAddress = dac_cast(pNativeMethodInfo->pMethodStartAddress); pEnumState->pEHInfo = dac_cast(p + *dac_cast(p)); pEnumState->uClause = 0; - pEnumState->nClauses = VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEnumState->nClauses = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); return true; } @@ -1440,9 +1330,9 @@ bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE pEnumState->uClause++; - pEHClauseOut->m_tryStartOffset = VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_tryStartOffset = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); - uint32_t tryEndDeltaAndClauseKind = VarInt::ReadUnsigned(pEnumState->pEHInfo); + uint32_t tryEndDeltaAndClauseKind = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); pEHClauseOut->m_clauseKind = (EHClauseKind)(tryEndDeltaAndClauseKind & 0x3); pEHClauseOut->m_tryEndOffset = pEHClauseOut->m_tryStartOffset + (tryEndDeltaAndClauseKind >> 2); @@ -1458,23 +1348,23 @@ bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE switch (pEHClauseOut->m_clauseKind) { case EH_CLAUSE_TYPED: - pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); // Read target type { // @TODO: Compress EHInfo using type table index scheme // https://github.com/dotnet/corert/issues/972 - int32_t typeRelAddr = *((PTR_int32_t&)pEnumState->pEHInfo); - pEHClauseOut->m_pTargetType = dac_cast(pEnumState->pEHInfo + typeRelAddr); - pEnumState->pEHInfo += 4; + uint8_t* pBase = pEnumState->pEHInfo; + int32_t typeRelAddr = NativePrimitiveDecoder::ReadInt32(pEnumState->pEHInfo); + pEHClauseOut->m_pTargetType = dac_cast(pBase + typeRelAddr); } break; case EH_CLAUSE_FAULT: - pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); break; case EH_CLAUSE_FILTER: - pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo); - pEHClauseOut->m_filterAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_filterAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); break; default: UNREACHABLE_MSG("unexpected EHClauseKind"); diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h index ad9dd8cfed24..ca3f3f2272bd 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h @@ -63,7 +63,9 @@ class UnixNativeCodeManager : public ICodeManager bool IsUnwindable(PTR_VOID pvAddress); +#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) int IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddress); +#endif int TrailingEpilogueInstructionsCount(MethodInfo * pMethodInfo, PTR_VOID pvAddress); diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixSignals.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixSignals.cpp index 33852920653f..518b4bbf82f3 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixSignals.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixSignals.cpp @@ -2,7 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "CommonTypes.h" -#include "PalRedhawkCommon.h" +#include "PalLimitedContext.h" #include "CommonMacros.h" #include "config.h" diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index e387f3440e32..0d5f1cdd5212 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -175,7 +175,7 @@ struct Registers_REGDISPLAY : REGDISPLAY inline bool validFloatRegister(int) { return false; } inline bool validVectorRegister(int) { return false; } - inline static int lastDwarfRegNum() { return 16; } + static constexpr int lastDwarfRegNum() { return 16; } inline bool validRegister(int regNum) const { @@ -297,7 +297,7 @@ struct Registers_REGDISPLAY : REGDISPLAY inline bool validFloatRegister(int) { return false; } inline bool validVectorRegister(int) { return false; } - inline static int lastDwarfRegNum() { return 16; } + static constexpr int lastDwarfRegNum() { return 16; } inline bool validRegister(int regNum) const { @@ -336,7 +336,7 @@ struct Registers_REGDISPLAY : REGDISPLAY struct Registers_REGDISPLAY : REGDISPLAY { inline static int getArch() { return libunwind::REGISTERS_ARM; } - inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM; } + static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM; } bool validRegister(int num) const; bool validFloatRegister(int num) const; @@ -348,15 +348,15 @@ struct Registers_REGDISPLAY : REGDISPLAY double getFloatRegister(int num) const; void setFloatRegister(int num, double value); - libunwind::v128 getVectorRegister(int num) const {abort();} - void setVectorRegister(int num, libunwind::v128 value) {abort();} + libunwind::v128 getVectorRegister(int num) const { abort(); } + void setVectorRegister(int num, libunwind::v128 value) { abort(); } - uint32_t getSP() const { return SP;} - void setSP(uint32_t value, uint32_t location) { SP = value;} - uint32_t getIP() const { return IP;} + uint32_t getSP() const { return SP; } + void setSP(uint32_t value, uint32_t location) { SP = value; } + uint32_t getIP() const { return IP; } void setIP(uint32_t value, uint32_t location) { IP = value; } - uint32_t getFP() const { return *pR11;} - void setFP(uint32_t value, uint32_t location) { pR11 = (PTR_uintptr_t)location;} + uint32_t getFP() const { return *pR11; } + void setFP(uint32_t value, uint32_t location) { pR11 = (PTR_uintptr_t)location; } }; struct ArmUnwindCursor : public libunwind::AbstractUnwindCursor @@ -367,18 +367,18 @@ struct ArmUnwindCursor : public libunwind::AbstractUnwindCursor virtual bool validReg(int num) { return _registers->validRegister(num); } virtual unw_word_t getReg(int num) { return _registers->getRegister(num); } virtual void setReg(int num, unw_word_t value, unw_word_t location) { _registers->setRegister(num, value, location); } - virtual unw_word_t getRegLocation(int num) {abort();} + virtual unw_word_t getRegLocation(int num) { abort(); } virtual bool validFloatReg(int num) { return _registers->validFloatRegister(num); } virtual unw_fpreg_t getFloatReg(int num) { return _registers->getFloatRegister(num); } virtual void setFloatReg(int num, unw_fpreg_t value) { _registers->setFloatRegister(num, value); } - virtual int step(bool stage2 = false) {abort();} - virtual void getInfo(unw_proc_info_t *) {abort();} - virtual void jumpto() {abort();} + virtual int step(bool stage2 = false) { abort(); } + virtual void getInfo(unw_proc_info_t *) { abort(); } + virtual void jumpto() { abort(); } virtual bool isSignalFrame() { return false; } - virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off) {abort();} - virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false) {abort();} - virtual const char *getRegisterName(int num) {abort();} - virtual void saveVFPAsX() {abort();} + virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off) { abort(); } + virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false) { abort(); } + virtual const char *getRegisterName(int num) { abort(); } + virtual void saveVFPAsX() { abort(); } }; inline bool Registers_REGDISPLAY::validRegister(int num) const { @@ -533,7 +533,7 @@ void Registers_REGDISPLAY::setFloatRegister(int num, double value) struct Registers_REGDISPLAY : REGDISPLAY { inline static int getArch() { return libunwind::REGISTERS_ARM64; } - inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } + static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } bool validRegister(int num) const; bool validFloatRegister(int num) { return false; }; @@ -542,18 +542,18 @@ struct Registers_REGDISPLAY : REGDISPLAY uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - double getFloatRegister(int num) const {abort();} - void setFloatRegister(int num, double value) {abort();} + double getFloatRegister(int num) const { abort(); } + void setFloatRegister(int num, double value) { abort(); } libunwind::v128 getVectorRegister(int num) const; void setVectorRegister(int num, libunwind::v128 value); - uint64_t getSP() const { return SP;} - void setSP(uint64_t value, uint64_t location) { SP = value;} - uint64_t getIP() const { return IP;} + uint64_t getSP() const { return SP; } + void setSP(uint64_t value, uint64_t location) { SP = value; } + uint64_t getIP() const { return IP; } void setIP(uint64_t value, uint64_t location) { IP = value; } - uint64_t getFP() const { return *pFP;} - void setFP(uint64_t value, uint64_t location) { pFP = (PTR_uintptr_t)location;} + uint64_t getFP() const { return *pFP; } + void setFP(uint64_t value, uint64_t location) { pFP = (PTR_uintptr_t)location; } }; inline bool Registers_REGDISPLAY::validRegister(int num) const { @@ -816,27 +816,27 @@ void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) struct Registers_REGDISPLAY : REGDISPLAY { inline static int getArch() { return libunwind::REGISTERS_LOONGARCH; } - inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH; } + static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH; } bool validRegister(int num) const; - bool validFloatRegister(int num) { return false; }; - bool validVectorRegister(int num) const; + bool validFloatRegister(int num) const; + bool validVectorRegister(int num) const { return false; }; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - double getFloatRegister(int num) const {abort();} - void setFloatRegister(int num, double value) {abort();} + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); - libunwind::v128 getVectorRegister(int num) const; - void setVectorRegister(int num, libunwind::v128 value); + libunwind::v128 getVectorRegister(int num) const { abort(); }; + void setVectorRegister(int num, libunwind::v128 value) { abort(); }; - uint64_t getSP() const { return SP;} - void setSP(uint64_t value, uint64_t location) { SP = value;} - uint64_t getIP() const { return IP;} + uint64_t getSP() const { return SP; } + void setSP(uint64_t value, uint64_t location) { SP = value; } + uint64_t getIP() const { return IP; } void setIP(uint64_t value, uint64_t location) { IP = value; } - uint64_t getFP() const { return *pFP;} - void setFP(uint64_t value, uint64_t location) { pFP = (PTR_uintptr_t)location;} + uint64_t getFP() const { return *pFP; } + void setFP(uint64_t value, uint64_t location) { pFP = (PTR_uintptr_t)location; } }; inline bool Registers_REGDISPLAY::validRegister(int num) const { @@ -852,10 +852,13 @@ inline bool Registers_REGDISPLAY::validRegister(int num) const { if (num >= UNW_LOONGARCH_R0 && num <= UNW_LOONGARCH_R31) return true; + if (num >= UNW_LOONGARCH_F24 && num <= UNW_LOONGARCH_F31) + return true; + return false; } -bool Registers_REGDISPLAY::validVectorRegister(int num) const +bool Registers_REGDISPLAY::validFloatRegister(int num) const { if (num >= UNW_LOONGARCH_F24 && num <= UNW_LOONGARCH_F31) return true; @@ -1057,35 +1060,22 @@ void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t locatio } } -libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const +double Registers_REGDISPLAY::getFloatRegister(int num) const { - num -= UNW_LOONGARCH_F24; - - if (num < 0 || num >= sizeof(F) / sizeof(uint64_t)) + if (num >= UNW_LOONGARCH_F24 && num <= UNW_LOONGARCH_F31) { - PORTABILITY_ASSERT("unsupported loongarch64 vector register"); + return F[num - UNW_LOONGARCH_F24]; } - libunwind::v128 result; - - result.vec[0] = 0; - result.vec[1] = 0; - result.vec[2] = F[num] >> 32; - result.vec[3] = F[num] & 0xFFFFFFFF; - - return result; + PORTABILITY_ASSERT("unsupported LA freg"); } -void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) +void Registers_REGDISPLAY::setFloatRegister(int num, double value) { - num -= UNW_LOONGARCH_F24; - - if (num < 0 || num >= sizeof(F) / sizeof(uint64_t)) + if (num >= UNW_LOONGARCH_F24 && num <= UNW_LOONGARCH_F31) { - PORTABILITY_ASSERT("unsupported loongarch64 vector register"); + F[num - UNW_LOONGARCH_F24] = value; } - - F[num] = (uint64_t)value.vec[2] << 32 | (uint64_t)value.vec[3]; } #endif // TARGET_LOONGARCH64 @@ -1096,7 +1086,7 @@ void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) struct Registers_REGDISPLAY : REGDISPLAY { inline static int getArch() { return libunwind::REGISTERS_RISCV; } - inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV; } + static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV; } bool validRegister(int num) const; bool validFloatRegister(int num) { return false; }; @@ -1129,6 +1119,12 @@ inline bool Registers_REGDISPLAY::validRegister(int num) const { if (num >= UNW_RISCV_X0 && num <= UNW_RISCV_X31) return true; + if (num == UNW_RISCV_F8 || num == UNW_RISCV_F9) + return true; + + if (num >= UNW_RISCV_F18 && num <= UNW_RISCV_F27) + return true; + return false; } @@ -1140,8 +1136,13 @@ bool Registers_REGDISPLAY::validVectorRegister(int num) const inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { switch (regNum) { + case UNW_REG_IP: + return IP; case UNW_RISCV_X1: return *pRA; + case UNW_REG_SP: + case UNW_RISCV_X2: + return SP; case UNW_RISCV_X3: return *pGP; case UNW_RISCV_X4: @@ -1255,60 +1256,79 @@ inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { void Registers_REGDISPLAY::setRegister(int regNum, uint64_t value, uint64_t location) { switch (regNum) { + case UNW_REG_IP: + IP = (uintptr_t)value; + break; case UNW_RISCV_X1: - *pRA = value; + pRA = (PTR_uintptr_t)location; + break; + case UNW_REG_SP: + case UNW_RISCV_X2: + SP = (uintptr_t)value; break; case UNW_RISCV_X3: - *pGP = value; + pGP = (PTR_uintptr_t)location; break; case UNW_RISCV_X4: - *pTP = value; + pTP = (PTR_uintptr_t)location; break; case UNW_RISCV_X5: - *pT0 = value; + pT0 = (PTR_uintptr_t)location; break; case UNW_RISCV_X6: - *pT1 = value; + pT1 = (PTR_uintptr_t)location; break; case UNW_RISCV_X7: - *pT2 = value; + pT2 = (PTR_uintptr_t)location; break; case UNW_RISCV_X28: - *pT3 = value; + pT3 = (PTR_uintptr_t)location; break; case UNW_RISCV_X29: - *pT4 = value; + pT4 = (PTR_uintptr_t)location; break; case UNW_RISCV_X30: - *pT5 = value; + pT5 = (PTR_uintptr_t)location; break; case UNW_RISCV_X31: - *pT6 = value; + pT6 = (PTR_uintptr_t)location; break; case UNW_RISCV_X8: - *pFP = value; + pFP = (PTR_uintptr_t)location; break; case UNW_RISCV_X9: - *pS1 = value; + pS1 = (PTR_uintptr_t)location; break; case UNW_RISCV_X18: - *pS2 = value; + pS2 = (PTR_uintptr_t)location; break; case UNW_RISCV_X19: - *pS3 = value; + pS3 = (PTR_uintptr_t)location; break; case UNW_RISCV_X20: - *pS4 = value; + pS4 = (PTR_uintptr_t)location; break; case UNW_RISCV_X21: - *pS5 = value; + pS5 = (PTR_uintptr_t)location; break; case UNW_RISCV_X22: - *pS6 = value; + pS6 = (PTR_uintptr_t)location; break; case UNW_RISCV_X23: - *pS7 = value; + pS7 = (PTR_uintptr_t)location; + break; + case UNW_RISCV_X24: + pS8 = (PTR_uintptr_t)location; + break; + case UNW_RISCV_X25: + pS9 = (PTR_uintptr_t)location; + break; + case UNW_RISCV_X26: + pS10 = (PTR_uintptr_t)location; + break; + case UNW_RISCV_X27: + pS11 = (PTR_uintptr_t)location; break; // Add other general-purpose registers if needed diff --git a/src/coreclr/nativeaot/Runtime/unix/config.h.in b/src/coreclr/nativeaot/Runtime/unix/config.h.in index d5505b760c1f..3a5e1ef87b5d 100644 --- a/src/coreclr/nativeaot/Runtime/unix/config.h.in +++ b/src/coreclr/nativeaot/Runtime/unix/config.h.in @@ -20,8 +20,6 @@ #cmakedefine01 HAVE_SIGINFO_T #cmakedefine01 HAVE_LWP_SELF -#cmakedefine01 HAVE_CLOCK_MONOTONIC -#cmakedefine01 HAVE_CLOCK_MONOTONIC_COARSE #cmakedefine01 HAVE_CLOCK_GETTIME_NSEC_NP #cmakedefine01 HAVE_SCHED_GETAFFINITY diff --git a/src/coreclr/nativeaot/Runtime/unix/configure.cmake b/src/coreclr/nativeaot/Runtime/unix/configure.cmake index a2af5031b66a..474e23aa98d3 100644 --- a/src/coreclr/nativeaot/Runtime/unix/configure.cmake +++ b/src/coreclr/nativeaot/Runtime/unix/configure.cmake @@ -61,34 +61,6 @@ int main(int argc, char **argv) return (int)_lwp_self(); }" HAVE_LWP_SELF) -check_cxx_source_runs(" -#include -#include -#include - -int main() -{ - int ret; - struct timespec ts; - ret = clock_gettime(CLOCK_MONOTONIC, &ts); - - exit(ret); -}" HAVE_CLOCK_MONOTONIC) - -check_cxx_source_runs(" -#include -#include -#include - -int main() -{ - int ret; - struct timespec ts; - ret = clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); - - exit(ret); -}" HAVE_CLOCK_MONOTONIC_COARSE) - check_cxx_source_compiles(" #include diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index 4cf213cab49a..7e9c70354e43 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -3,8 +3,8 @@ #define INVALIDGCVALUE 0xCCCCCCCD -// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -#define STATUS_REDHAWK_THREAD_ABORT 0x43 +// This must match HwExceptionCode.STATUS_NATIVEAOT_THREAD_ABORT +#define STATUS_NATIVEAOT_THREAD_ABORT 0x43 // Enforce subsections via symbols to workaround bugs in Xcode 15 linker. #if defined(__APPLE__) @@ -34,6 +34,17 @@ .equiv \New, \Old .endm +// Rename offsets of nested structures +#define OFFSETOF__ee_alloc_context__alloc_ptr (OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__ee_alloc_context OFFSETOF__Thread__m_eeAllocContext + +// GC type flags +#define GC_ALLOC_FINALIZE 1 +#define GC_ALLOC_ALIGN8_BIAS 4 +#define GC_ALLOC_ALIGN8 8 + +#define G_FREE_OBJECT_METHOD_TABLE g_pFreeObjectEEType + #if defined(HOST_AMD64) #include "unixasmmacrosamd64.inc" #elif defined(HOST_ARM) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index 735c17a90496..5e491a49770d 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -41,6 +41,7 @@ C_FUNC(\Name): .global C_FUNC(_\Name) .type \Name, %function #endif + .p2align 4 C_FUNC(\Name): .cfi_startproc .endm @@ -233,15 +234,6 @@ C_FUNC(\Name): #define TSF_SuppressGcStress 0x08 #define TSF_DoNotTriggerGc 0x10 -// -// Rename fields of nested structs -// -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit - -// GC type flags -#define GC_ALLOC_FINALIZE 1 - // Note: these must match the defs in PInvokeTransitionFrameFlags #define PTFF_SAVE_RBX 0x00000001 #define PTFF_SAVE_R12 0x00000010 @@ -287,6 +279,10 @@ C_FUNC(\Name): #endif .endm +.macro INLINE_GET_ALLOC_CONTEXT_BASE + INLINE_GETTHREAD +.endm + .macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 // // Thread::Unhijack() diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc index 8aeb084f8e3c..bb2ff4b7362a 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -5,11 +5,6 @@ // CONSTANTS -- INTEGER // -// GC type flags -#define GC_ALLOC_FINALIZE 1 -#define GC_ALLOC_ALIGN8_BIAS 4 -#define GC_ALLOC_ALIGN8 8 - #define TSF_Attached 0x01 #define TSF_SuppressGcStress 0x08 #define TSF_DoNotTriggerGc 0x10 @@ -28,12 +23,8 @@ #define TrapThreadsFlags_AbortInProgress 1 #define TrapThreadsFlags_TrapThreads 2 -// Rename fields of nested structs -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) - // GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm). -#define SIZEOF__MinObject 12 +#define ASM_MIN_OBJECT_SIZE 12 .macro NESTED_ENTRY Name, Section, Handler LEAF_ENTRY \Name, \Section @@ -285,6 +276,10 @@ C_FUNC(\Name): #endif .endm +.macro INLINE_GET_ALLOC_CONTEXT_BASE + INLINE_GETTHREAD +.endm + .macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 // // Thread::Unhijack() diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc index 36698fece505..74e5c0b7e0b4 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc @@ -226,6 +226,31 @@ C_FUNC(\Name): INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) .endm +// Target cannot be x0. +.macro INLINE_GET_ALLOC_CONTEXT_BASE target + .ifc \target, x0 + .error "target cannot be x0" + .endif + +#ifdef FEATURE_EMULATED_TLS + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0x20 + PROLOG_SAVE_REG_PAIR x0, x1, 0x10 + + bl C_FUNC(RhpGetThread) + mov \target, x0 + + .ifc \target, x1 + EPILOG_RESTORE_REG_PAIR x0, xzr, 0x10 + .else + EPILOG_RESTORE_REG_PAIR x0, x1, 0x10 + .endif + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x20 +#else + INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) +#endif +.endm + + // Do not use these ETLS macros in functions that already create a stack frame. // Creating two stack frames in one function can confuse the unwinder/debugger diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc index b78210c8f853..85eedf18b488 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc @@ -6,7 +6,7 @@ .macro NESTED_ENTRY Name, Section, Handler LEAF_ENTRY \Name, \Section .ifnc \Handler, NoHandler - .cfi_personality 0x1b, C_FUNC(\Handler) // 0x1b == DW_EH_PE_pcrel | DW_EH_PE_sdata4 + .cfi_personality 0x1c, C_FUNC(\Handler) // 0x1c == DW_EH_PE_pcrel | DW_EH_PE_sdata8 .endif .endm @@ -83,30 +83,26 @@ C_FUNC(\Name): .endif .endm -.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize, __def_cfa_save=1 +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize addi.d $sp, $sp, -\ssize - //.cfi_adjust_cfa_offset \ssize - .cfi_def_cfa 3, \ssize + .cfi_adjust_cfa_offset \ssize st.d $r\reg1, $sp, 0 st.d $r\reg2, $sp, 8 .cfi_rel_offset \reg1, 0 .cfi_rel_offset \reg2, 8 - .if (\__def_cfa_save == 1) - ori $fp, $sp, 0 - .cfi_def_cfa_register 22 - .endif + + ori $fp, $sp, 0 + .cfi_def_cfa_register 22 .endm .macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ssize addi.d $sp, $sp, -\ssize - //.cfi_adjust_cfa_offset \ssize - .cfi_def_cfa 3, \ssize + .cfi_adjust_cfa_offset \ssize st.d $r\reg1, $sp, 0 st.d $r\reg2, $sp, 8 - .cfi_rel_offset \reg1, 0 .cfi_rel_offset \reg2, 8 .endm @@ -154,9 +150,9 @@ C_FUNC(\Name): .error "target cannot be a0" .endif - st.d $a0, $sp, -0x10 - st.d $ra, $sp, -0x8 addi.d $sp, $sp, -16 + st.d $a0, $sp, 0 + st.d $ra, $sp, 8 // This instruction is recognized and potentially patched // by the linker (GD->IE/LE relaxation). @@ -175,6 +171,11 @@ C_FUNC(\Name): INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) .endm +// Target cannot be x0. +.macro INLINE_GET_ALLOC_CONTEXT_BASE target + INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) +.endm + .macro InterlockedOperationBarrier dbar 0 .endm @@ -184,7 +185,7 @@ C_FUNC(\Name): // Thread::Unhijack() // ld.d \trashReg1, \threadReg, OFFSETOF__Thread__m_pvHijackedReturnAddress - beq \trashReg1, $zero, 0f + beqz \trashReg1, 0f ld.d \trashReg2, \threadReg, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation st.d \trashReg1, \trashReg2, 0 @@ -198,13 +199,13 @@ C_FUNC(\Name): #define PTFF_SAVE_R4 0x00000800 #define PTFF_SAVE_R5 0x00001000 #define PTFF_SAVE_ALL_PRESERVED 0x000001FF // NOTE: r23-r31 -#define PTFF_THREAD_HIJACK_HI 0x00000002 // upper 32 bits of the PTFF_THREAD_HIJACK +#define PTFF_THREAD_HIJACK_HI 0x00000001 // upper 32 bits of the PTFF_THREAD_HIJACK #define DEFAULT_FRAME_SAVE_FLAGS (PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP) .macro PUSH_COOP_PINVOKE_FRAME trashReg - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 0x78 // Push down stack pointer and store FP and RA + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 0x70 // Push down stack pointer and store FP and RA // 0x10 bytes reserved for Thread* and flags @@ -215,8 +216,8 @@ C_FUNC(\Name): PROLOG_SAVE_REG_PAIR 29, 30, 0x50 PROLOG_SAVE_REG 31, 0x60 - // Save the value of SP before stack allocation to the last slot in the frame (slot #15) - addi.d \trashReg, $sp, 0x78 + // Save the value of SP before stack allocation to the last slot in the frame (slot #13) + addi.d \trashReg, $sp, 0x70 st.d \trashReg, $sp, 0x68 // Record the bitmask of saved registers in the frame (slot #3) @@ -228,14 +229,13 @@ C_FUNC(\Name): // Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME .macro POP_COOP_PINVOKE_FRAME - - // $s0,$s1 + // Restore callee saved registers EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 EPILOG_RESTORE_REG 31, 0x60 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x78 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x70 .endm // Bit position for the flags above, to be used with bstrpick.d+beq/bne instructions diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 787d80a4000d..9aca4d7c3116 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -96,9 +96,8 @@ C_FUNC(\Name): .endm .macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize, __def_cfa_save=1 - addi sp, sp, \ssize - .cfi_adjust_cfa_offset -\ssize - .cfi_def_cfa sp, \ssize + addi sp, sp, -\ssize + .cfi_adjust_cfa_offset \ssize sd \reg1, 0(sp) sd \reg2, 8(sp) @@ -113,8 +112,7 @@ C_FUNC(\Name): .macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ssize addi sp, sp, -\ssize - //.cfi_adjust_cfa_offset \ssize - .cfi_def_cfa sp, \ssize + .cfi_adjust_cfa_offset \ssize sd \reg1, 0(sp) sd \reg2, 8(sp) @@ -158,36 +156,42 @@ C_FUNC(\Name): // Loads the address of a thread-local variable into the target register. // The target register cannot be a0. -.macro INLINE_GET_TLS_VAR target, var +.macro INLINE_GET_TLS_VAR target, var, ofs = 0 .ifc \target, a0 .error "target cannot be a0" .endif - addi sp, sp, -48 - sd ra, 40(sp) - sd t1, 32(sp) - sd a1, 24(sp) - sd a2, 16(sp) - sd a3, 8(sp) - sd a4, 0(sp) + addi sp, sp, -72 + sd ra, 64(sp) + sd t1, 56(sp) + sd a1, 48(sp) + sd a2, 40(sp) + sd a3, 32(sp) + sd a4, 24(sp) + sd a5, 16(sp) + sd a6, 8(sp) + sd a7, 0(sp) // global dynamic TLS, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#global-dynamic la.tls.gd a0, \var call C_FUNC(__tls_get_addr) - ld ra, 40(sp) - ld t1, 32(sp) - ld a1, 24(sp) - ld a2, 16(sp) - ld a3, 8(sp) - ld a4, 0(sp) - addi sp, sp, 48 + ld ra, 64(sp) + ld t1, 56(sp) + ld a1, 48(sp) + ld a2, 40(sp) + ld a3, 32(sp) + ld a4, 24(sp) + ld a5, 16(sp) + ld a6, 8(sp) + ld a7, 0(sp) + addi sp, sp, 72 - mv \target, a0 + add \target, a0, \ofs /* - In the future we should switch to TLS descriptors. The support was added in 2024 in glibc, musl, llvm, gcc and binutils, - so its support is currently unavailable on majority devices. See https://maskray.me/blog/2024-01-23-riscv-tlsdesc-works + In the future we should switch to TLS descriptors. Its support was added in 2024 in glibc, musl, llvm, gcc and binutils, + which is currently unavailable on majority devices. See https://maskray.me/blog/2024-01-23-riscv-tlsdesc-works When the support for TLS descriptors is available in NativeAOT baseline, actions to perform: * Apply this patch: @@ -204,6 +208,7 @@ C_FUNC(\Name): add_subdirectory(Bootstrap) ``` + * Remove global dynamic code including prolog and epilog. * Uncomment the following code and remove these comments. // TLS descriptor, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#tls-descriptors @@ -212,6 +217,9 @@ C_FUNC(\Name): addi a0, a0, %tlsdesc_add_lo(\var) jalr t0, 0(t0), %tlsdesc_call(\var) add \target, tp, a0 + .ifnc \ofs, 0 + add \target, \target, \ofs + .endif */ .endm @@ -221,6 +229,13 @@ C_FUNC(\Name): INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) .endm +// Caller must have an established frame, trashes volatile registers +.macro INLINE_GET_ALLOC_CONTEXT_BASE + // global dynamic TLS, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#global-dynamic + la.tls.gd a0, C_FUNC(tls_CurrentThread) + call C_FUNC(__tls_get_addr) +.endm + .macro InterlockedOperationBarrier fence rw, rw .endm @@ -244,12 +259,12 @@ C_FUNC(\Name): #define PTFF_SAVE_A0 0x00004000 #define PTFF_SAVE_A1 0x00008000 #define PTFF_SAVE_ALL_PRESERVED 0x000007FF // NOTE: S1-S11 -#define PTFF_THREAD_HIJACK_HI 0x00000002 // upper 32 bits of the PTFF_THREAD_HIJACK +#define PTFF_THREAD_HIJACK_HI 0x00000001 // upper 32 bits of the PTFF_THREAD_HIJACK #define DEFAULT_FRAME_SAVE_FLAGS PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP .macro PUSH_COOP_PINVOKE_FRAME trashReg - PROLOG_SAVE_REG_PAIR_INDEXED s0, ra, -128 // Push down stack pointer and store s0 (fp) and RA + PROLOG_SAVE_REG_PAIR_INDEXED s0, ra, 128 // Push down stack pointer and store s0 (fp) and RA // 16 bytes reserved for Thread* and flags @@ -259,10 +274,11 @@ C_FUNC(\Name): PROLOG_SAVE_REG_PAIR s5, s6, 64 PROLOG_SAVE_REG_PAIR s7, s8, 80 PROLOG_SAVE_REG_PAIR s9, s10, 96 + PROLOG_SAVE_REG s11, 112 // Save the value of SP before stack allocation to the last slot in the frame (slot #15) add \trashReg, sp, 128 - sd \trashReg, 112(sp) + sd \trashReg, 120(sp) // Record the bitmask of saved registers in the frame (slot #3) li \trashReg, DEFAULT_FRAME_SAVE_FLAGS @@ -278,6 +294,7 @@ C_FUNC(\Name): EPILOG_RESTORE_REG_PAIR s5, s6, 64 EPILOG_RESTORE_REG_PAIR s7, s8, 80 EPILOG_RESTORE_REG_PAIR s9, s10, 96 + EPILOG_RESTORE_REG s11, 112 EPILOG_RESTORE_REG_PAIR_INDEXED s0, ra, 128 .endm diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index a6437b56ac7b..f709bc465036 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -1,5 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + #include "common.h" #include @@ -7,11 +8,11 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "daccess.h" -#include "PalRedhawkCommon.h" +#include "PalLimitedContext.h" #include "regdisplay.h" #include "ICodeManager.h" #include "CoffNativeCodeManager.h" -#include "varint.h" +#include "NativePrimitiveDecoder.h" #include "holder.h" #include "CommonMacros.inl" @@ -425,9 +426,16 @@ bool CoffNativeCodeManager::IsSafePoint(PTR_VOID pvAddress) #else // Extract the necessary information from the info block header hdrInfo info; - DecodeGCHdrInfo(GCInfoToken(gcInfo), codeOffset, &info); + size_t infoSize = DecodeGCHdrInfo(GCInfoToken(gcInfo), codeOffset, &info); + PTR_CBYTE table = gcInfo + infoSize; + + if (info.prologOffs != hdrInfo::NOT_IN_PROLOG || info.epilogOffs != hdrInfo::NOT_IN_EPILOG) + return false; - return info.interruptible && info.prologOffs == hdrInfo::NOT_IN_PROLOG && info.epilogOffs == hdrInfo::NOT_IN_EPILOG; + if (!info.interruptible) + return false; + + return !IsInNoGCRegion(&info, table, codeOffset); #endif } @@ -821,19 +829,6 @@ bool CoffNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) return true; } -// Convert the return kind that was encoded by RyuJIT to the -// enum used by the runtime. -GCRefKind GetGcRefKind(ReturnKind returnKind) -{ -#ifdef TARGET_ARM64 - ASSERT((returnKind >= RT_Scalar) && (returnKind <= RT_ByRef_ByRef)); -#else - ASSERT((returnKind >= RT_Scalar) && (returnKind <= RT_ByRef)); -#endif - - return (GCRefKind)returnKind; -} - bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation) // out @@ -972,7 +967,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)registerSet.PCTAddr; return true; -#endif +#endif } #ifdef TARGET_X86 @@ -983,7 +978,8 @@ GCRefKind CoffNativeCodeManager::GetReturnValueKind(MethodInfo * pMethodInfo, hdrInfo infoBuf; size_t infoSize = DecodeGCHdrInfo(GCInfoToken(gcInfo), codeOffset, &infoBuf); - return GetGcRefKind(infoBuf.returnKind); + ASSERT(infoBuf.returnKind != RT_Float); // See TODO above + return (GCRefKind)infoBuf.returnKind; } #endif @@ -1044,7 +1040,7 @@ bool CoffNativeCodeManager::EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMet pEnumState->pMethodStartAddress = dac_cast(*pMethodStartAddress); pEnumState->pEHInfo = dac_cast(m_moduleBase + *dac_cast(p)); pEnumState->uClause = 0; - pEnumState->nClauses = VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEnumState->nClauses = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); return true; } @@ -1059,9 +1055,9 @@ bool CoffNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE return false; pEnumState->uClause++; - pEHClauseOut->m_tryStartOffset = VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_tryStartOffset = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); - uint32_t tryEndDeltaAndClauseKind = VarInt::ReadUnsigned(pEnumState->pEHInfo); + uint32_t tryEndDeltaAndClauseKind = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); pEHClauseOut->m_clauseKind = (EHClauseKind)(tryEndDeltaAndClauseKind & 0x3); pEHClauseOut->m_tryEndOffset = pEHClauseOut->m_tryStartOffset + (tryEndDeltaAndClauseKind >> 2); @@ -1077,22 +1073,22 @@ bool CoffNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE switch (pEHClauseOut->m_clauseKind) { case EH_CLAUSE_TYPED: - pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); // Read target type { // @TODO: Compress EHInfo using type table index scheme // https://github.com/dotnet/corert/issues/972 - uint32_t typeRVA = *((PTR_uint32_t&)pEnumState->pEHInfo)++; + uint32_t typeRVA = NativePrimitiveDecoder::ReadUInt32(pEnumState->pEHInfo); pEHClauseOut->m_pTargetType = dac_cast(m_moduleBase + typeRVA); } break; case EH_CLAUSE_FAULT: - pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); break; case EH_CLAUSE_FILTER: - pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo); - pEHClauseOut->m_filterAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_filterAddress = pEnumState->pMethodStartAddress + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo); break; default: UNREACHABLE_MSG("unexpected EHClauseKind"); @@ -1151,8 +1147,8 @@ PTR_VOID CoffNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC) return dac_cast(m_moduleBase + dataRVA); } -extern "C" void __stdcall RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, uint32_t cbRange); -extern "C" bool __stdcall RegisterUnboxingStubs(PTR_VOID pvStartRange, uint32_t cbRange); +extern "C" void RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, uint32_t cbRange); +extern "C" bool RegisterUnboxingStubs(PTR_VOID pvStartRange, uint32_t cbRange); extern "C" bool RhRegisterOSModule(void * pModule, diff --git a/src/coreclr/nativeaot/Runtime/windows/PalCommon.cpp b/src/coreclr/nativeaot/Runtime/windows/PalCommon.cpp new file mode 100644 index 000000000000..c10587751eb1 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/windows/PalCommon.cpp @@ -0,0 +1,95 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Implementation of the portions of the Platform Abstraction Layer (PAL) library that are common among +// multiple PAL variants. +// + +#include +#include +#include +#include +#include "CommonTypes.h" +#include "daccess.h" +#include "PalLimitedContext.h" +#include "Pal.h" +#include +#include "CommonMacros.h" +#include "rhassert.h" + +// Given the OS handle of a loaded module, compute the upper and lower virtual address bounds (inclusive). +void PalGetModuleBounds(HANDLE hOsHandle, _Out_ uint8_t ** ppLowerBound, _Out_ uint8_t ** ppUpperBound) +{ + BYTE *pbModule = (BYTE*)hOsHandle; + DWORD cbModule; + + IMAGE_NT_HEADERS *pNtHeaders = (IMAGE_NT_HEADERS*)(pbModule + ((IMAGE_DOS_HEADER*)hOsHandle)->e_lfanew); + if (pNtHeaders->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) + cbModule = ((IMAGE_OPTIONAL_HEADER32*)&pNtHeaders->OptionalHeader)->SizeOfImage; + else + cbModule = ((IMAGE_OPTIONAL_HEADER64*)&pNtHeaders->OptionalHeader)->SizeOfImage; + + *ppLowerBound = pbModule; + *ppUpperBound = pbModule + cbModule - 1; +} + +uint32_t g_RhNumberOfProcessors; + +int32_t PalGetProcessCpuCount() +{ + ASSERT(g_RhNumberOfProcessors > 0); + return g_RhNumberOfProcessors; +} + +// Retrieves the entire range of memory dedicated to the calling thread's stack. This does +// not get the current dynamic bounds of the stack, which can be significantly smaller than +// the maximum bounds. +bool PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut) +{ + // VirtualQuery on the address of a local variable to get the allocation + // base of the stack. Then use the StackBase field in the TEB to give + // the highest address of the stack region. + MEMORY_BASIC_INFORMATION mbi = { 0 }; + SIZE_T cb = VirtualQuery(&mbi, &mbi, sizeof(mbi)); + if (cb != sizeof(mbi)) + return false; + + NT_TIB* pTib = (NT_TIB*)NtCurrentTeb(); + *ppStackHighOut = pTib->StackBase; // stack base is the highest address + *ppStackLowOut = mbi.AllocationBase; // allocation base is the lowest address + return true; +} + +// retrieves the full path to the specified module, if moduleBase is NULL retrieves the full path to the +// executable module of the current process. +// +// Return value: number of characters in name string +// +//NOTE: This implementation exists because calling GetModuleFileName is not wack compliant. if we later decide +// that the framework package containing mrt100_app no longer needs to be wack compliant, this should be +// removed and the windows implementation of GetModuleFileName should be substitued on windows. +int32_t PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase) +{ + TEB* pTEB = NtCurrentTeb(); + LIST_ENTRY* pStartLink = &(pTEB->ProcessEnvironmentBlock->Ldr->InMemoryOrderModuleList); + LIST_ENTRY* pCurLink = pStartLink->Flink; + + do + { + LDR_DATA_TABLE_ENTRY* pEntry = CONTAINING_RECORD(pCurLink, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks); + + //null moduleBase will result in the first module being returned + //since the module list is ordered this is the executable module of the current process + if ((pEntry->DllBase == moduleBase) || (moduleBase == NULL)) + { + *pModuleNameOut = pEntry->FullDllName.Buffer; + return pEntry->FullDllName.Length / 2; + } + pCurLink = pCurLink->Flink; + } + while (pCurLink != pStartLink); + + *pModuleNameOut = NULL; + return 0; +} diff --git a/src/coreclr/nativeaot/Runtime/windows/PalInline.h b/src/coreclr/nativeaot/Runtime/windows/PalInline.h new file mode 100644 index 000000000000..d03d9399d015 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/windows/PalInline.h @@ -0,0 +1,189 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if defined(HOST_ARM64) +#include +#endif + +// Implementation of NativeAOT PAL inline functions + +EXTERN_C long __cdecl _InterlockedIncrement(long volatile *); +#pragma intrinsic(_InterlockedIncrement) +FORCEINLINE int32_t PalInterlockedIncrement(_Inout_ int32_t volatile *pDst) +{ + return _InterlockedIncrement((long volatile *)pDst); +} + +EXTERN_C long __cdecl _InterlockedDecrement(long volatile *); +#pragma intrinsic(_InterlockedDecrement) +FORCEINLINE int32_t PalInterlockedDecrement(_Inout_ int32_t volatile *pDst) +{ + return _InterlockedDecrement((long volatile *)pDst); +} + +EXTERN_C long _InterlockedOr(long volatile *, long); +#pragma intrinsic(_InterlockedOr) +FORCEINLINE uint32_t PalInterlockedOr(_Inout_ uint32_t volatile *pDst, uint32_t iValue) +{ + return _InterlockedOr((long volatile *)pDst, iValue); +} + +EXTERN_C long _InterlockedAnd(long volatile *, long); +#pragma intrinsic(_InterlockedAnd) +FORCEINLINE uint32_t PalInterlockedAnd(_Inout_ uint32_t volatile *pDst, uint32_t iValue) +{ + return _InterlockedAnd((long volatile *)pDst, iValue); +} + +EXTERN_C long __PN__MACHINECALL_CDECL_OR_DEFAULT _InterlockedExchange(long volatile *, long); +#pragma intrinsic(_InterlockedExchange) +FORCEINLINE int32_t PalInterlockedExchange(_Inout_ int32_t volatile *pDst, int32_t iValue) +{ + return _InterlockedExchange((long volatile *)pDst, iValue); +} + +EXTERN_C long __PN__MACHINECALL_CDECL_OR_DEFAULT _InterlockedCompareExchange(long volatile *, long, long); +#pragma intrinsic(_InterlockedCompareExchange) +FORCEINLINE int32_t PalInterlockedCompareExchange(_Inout_ int32_t volatile *pDst, int32_t iValue, int32_t iComparand) +{ + return _InterlockedCompareExchange((long volatile *)pDst, iValue, iComparand); +} + +EXTERN_C int64_t _InterlockedCompareExchange64(int64_t volatile *, int64_t, int64_t); +#pragma intrinsic(_InterlockedCompareExchange64) +FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue, int64_t iComparand) +{ + return _InterlockedCompareExchange64(pDst, iValue, iComparand); +} + +#ifdef HOST_X86 +FORCEINLINE int64_t PalInterlockedExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue) +{ + int64_t iOldValue; + do { + iOldValue = *pDst; + } while (PalInterlockedCompareExchange64(pDst, + iValue, + iOldValue) != iOldValue); + return iOldValue; +} + +FORCEINLINE int64_t PalInterlockedIncrement64(_Inout_ int64_t volatile *Addend) +{ + int64_t Old; + do { + Old = *Addend; + } while (PalInterlockedCompareExchange64(Addend, + Old + 1, + Old) != Old); + return Old + 1; +} +#else // HOST_X86 +EXTERN_C int64_t _InterlockedExchange64(int64_t volatile *, int64_t); +#pragma intrinsic(_InterlockedExchange64) +FORCEINLINE int64_t PalInterlockedExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue) +{ + return _InterlockedExchange64(pDst, iValue); +} + +EXTERN_C int64_t _InterlockedIncrement64(int64_t volatile *); +#pragma intrinsic(_InterlockedIncrement64) +FORCEINLINE int64_t PalInterlockedIncrement64(_Inout_ int64_t volatile *pDst) +{ + return _InterlockedIncrement64(pDst); +} +#endif // HOST_X86 + +#if defined(HOST_AMD64) || defined(HOST_ARM64) +EXTERN_C uint8_t _InterlockedCompareExchange128(int64_t volatile *, int64_t, int64_t, int64_t *); +#pragma intrinsic(_InterlockedCompareExchange128) +FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult); +} +#endif // HOST_AMD64 + +#ifdef HOST_64BIT + +EXTERN_C void * _InterlockedExchangePointer(void * volatile *, void *); +#pragma intrinsic(_InterlockedExchangePointer) +FORCEINLINE void * PalInterlockedExchangePointer(_Inout_ void * volatile *pDst, _In_ void *pValue) +{ + return _InterlockedExchangePointer((void * volatile *)pDst, pValue); +} + +EXTERN_C void * _InterlockedCompareExchangePointer(void * volatile *, void *, void *); +#pragma intrinsic(_InterlockedCompareExchangePointer) +FORCEINLINE void * PalInterlockedCompareExchangePointer(_Inout_ void * volatile *pDst, _In_ void *pValue, _In_ void *pComparand) +{ + return _InterlockedCompareExchangePointer((void * volatile *)pDst, pValue, pComparand); +} + +#else // HOST_64BIT + +#define PalInterlockedExchangePointer(_pDst, _pValue) \ + ((void *)_InterlockedExchange((long volatile *)(_pDst), (long)(size_t)(_pValue))) + +#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ + ((void *)_InterlockedCompareExchange((long volatile *)(_pDst), (long)(size_t)(_pValue), (long)(size_t)(_pComparand))) + +#endif // HOST_64BIT + +EXTERN_C __declspec(dllimport) unsigned long __stdcall GetLastError(); +FORCEINLINE int PalGetLastError() +{ + return (int)GetLastError(); +} + +EXTERN_C __declspec(dllimport) void __stdcall SetLastError(unsigned long error); +FORCEINLINE void PalSetLastError(int error) +{ + SetLastError((unsigned long)error); +} + +#if defined(HOST_X86) + +EXTERN_C void _mm_pause(); +#pragma intrinsic(_mm_pause) +#define PalYieldProcessor() _mm_pause() + +FORCEINLINE void PalMemoryBarrier() +{ + long Barrier; + _InterlockedOr(&Barrier, 0); +} + +#elif defined(HOST_AMD64) + +EXTERN_C void _mm_pause(); +#pragma intrinsic(_mm_pause) +#define PalYieldProcessor() _mm_pause() + +EXTERN_C void __faststorefence(); +#pragma intrinsic(__faststorefence) +#define PalMemoryBarrier() __faststorefence() + +#elif defined(HOST_ARM64) + +EXTERN_C void __yield(void); +#pragma intrinsic(__yield) +EXTERN_C void __dmb(unsigned int _Type); +#pragma intrinsic(__dmb) +FORCEINLINE void PalYieldProcessor() +{ + __dmb(_ARM64_BARRIER_ISHST); + __yield(); +} + +#define PalMemoryBarrier() __dmb(_ARM64_BARRIER_ISH) + +#else +#error Unsupported architecture +#endif + +#define PalDebugBreak() __debugbreak() + +FORCEINLINE int32_t PalOsPageSize() +{ + return 0x1000; +} diff --git a/src/coreclr/nativeaot/Runtime/windows/PalMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalMinWin.cpp new file mode 100644 index 000000000000..2927e3585353 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/windows/PalMinWin.cpp @@ -0,0 +1,1073 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include +#include +#include + +#include "holder.h" + +#define _T(s) L##s +#include "RhConfig.h" + +#include "gcenv.h" +#include "gcenv.ee.h" +#include "gcconfig.h" + +#include "thread.h" +#include "threadstore.h" + +#include "nativecontext.h" + +#ifdef FEATURE_SPECIAL_USER_MODE_APC +#include +#endif + +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + +// Index for the fiber local storage of the attached thread pointer +static uint32_t g_flsIndex = FLS_OUT_OF_INDEXES; + +// This is called when each *fiber* is destroyed. When the home fiber of a thread is destroyed, +// it means that the thread itself is destroyed. +// Since we receive that notification outside of the Loader Lock, it allows us to safely acquire +// the ThreadStore lock in the RuntimeThreadShutdown. +void __stdcall FiberDetachCallback(void* lpFlsData) +{ + ASSERT(g_flsIndex != FLS_OUT_OF_INDEXES); + ASSERT(g_flsIndex != NULL); + ASSERT(lpFlsData == FlsGetValue(g_flsIndex)); + + // The current fiber is the home fiber of a thread, so the thread is shutting down + RuntimeThreadShutdown(lpFlsData); +} + +bool PalInitComAndFlsSlot() +{ + ASSERT(g_flsIndex == FLS_OUT_OF_INDEXES); + + // Making finalizer thread MTA early ensures that COM is initialized before we initialize our thread + // termination callback. + HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED); + if (FAILED(hr)) + return false; + + // We use fiber detach callbacks to run our thread shutdown code because the fiber detach + // callback is made without the OS loader lock + g_flsIndex = FlsAlloc(FiberDetachCallback); + return g_flsIndex != FLS_OUT_OF_INDEXES; +} + +// Register the thread with OS to be notified when thread is about to be destroyed +// It fails fast if a different thread was already registered with the current fiber. +// Parameters: +// thread - thread to attach +void PalAttachThread(void* thread) +{ + void* threadFromCurrentFiber = FlsGetValue(g_flsIndex); + + if (threadFromCurrentFiber != NULL) + { + ASSERT_UNCONDITIONALLY("Multiple threads encountered from a single fiber"); + RhFailFast(); + } + + // Associate the current fiber with the current thread. This makes the current fiber the thread's "home" + // fiber. This fiber is the only fiber allowed to execute managed code on this thread. When this fiber + // is destroyed, we consider the thread to be destroyed. + FlsSetValue(g_flsIndex, thread); +} + +static HMODULE LoadKernel32dll() +{ + return LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); +} + +static HMODULE LoadNtdlldll() +{ + return LoadLibraryExW(L"ntdll.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); +} + +void InitializeCurrentProcessCpuCount() +{ + DWORD count; + + // If the configuration value has been set, it takes precedence. Otherwise, take into account + // process affinity and CPU quota limit. + + const unsigned int MAX_PROCESSOR_COUNT = 0xffff; + uint64_t configValue; + + if (g_pRhConfig->ReadConfigValue("PROCESSOR_COUNT", &configValue, true /* decimal */) && + 0 < configValue && configValue <= MAX_PROCESSOR_COUNT) + { + count = (DWORD)configValue; + } + else + { + if (GCToOSInterface::CanEnableGCCPUGroups()) + { + count = GCToOSInterface::GetTotalProcessorCount(); + } + else + { + DWORD_PTR pmask, smask; + + if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask)) + { + count = 1; + } + else + { + count = 0; + + while (pmask) + { + pmask &= (pmask - 1); + count++; + } + + // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more + // than 64 processors, which would leave us with a count of 0. Since the GC + // expects there to be at least one processor to run on (and thus at least one + // heap), we'll return 64 here if count is 0, since there are likely a ton of + // processors available in that case. + if (count == 0) + count = 64; + } + } + + JOBOBJECT_CPU_RATE_CONTROL_INFORMATION cpuRateControl; + + if (QueryInformationJobObject(NULL, JobObjectCpuRateControlInformation, &cpuRateControl, + sizeof(cpuRateControl), NULL)) + { + const DWORD HardCapEnabled = JOB_OBJECT_CPU_RATE_CONTROL_ENABLE | JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP; + const DWORD MinMaxRateEnabled = JOB_OBJECT_CPU_RATE_CONTROL_ENABLE | JOB_OBJECT_CPU_RATE_CONTROL_MIN_MAX_RATE; + DWORD maxRate = 0; + + if ((cpuRateControl.ControlFlags & HardCapEnabled) == HardCapEnabled) + { + maxRate = cpuRateControl.CpuRate; + } + else if ((cpuRateControl.ControlFlags & MinMaxRateEnabled) == MinMaxRateEnabled) + { + maxRate = cpuRateControl.MaxRate; + } + + // The rate is the percentage times 100 + const DWORD MAXIMUM_CPU_RATE = 10000; + + if (0 < maxRate && maxRate < MAXIMUM_CPU_RATE) + { + DWORD cpuLimit = (maxRate * GCToOSInterface::GetTotalProcessorCount() + MAXIMUM_CPU_RATE - 1) / MAXIMUM_CPU_RATE; + if (cpuLimit < count) + count = cpuLimit; + } + } + } + + _ASSERTE(count > 0); + g_RhNumberOfProcessors = count; +} + +// The NativeAOT PAL must be initialized before any of its exports can be called. Returns true for a successful +// initialization and false on failure. +bool PalInit() +{ + GCConfig::Initialize(); + + if (!GCToOSInterface::Initialize()) + { + return false; + } + + InitializeCurrentProcessCpuCount(); + + return true; +} + +uint64_t PalGetCurrentOSThreadId() +{ + return GetCurrentThreadId(); +} + +#if !defined(USE_PORTABLE_HELPERS) && !defined(FEATURE_RX_THUNKS) +UInt32_BOOL PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut) +{ +#ifdef XBOX_ONE + return E_NOTIMPL; +#else + BOOL success = FALSE; + HANDLE hMap = NULL, hFile = INVALID_HANDLE_VALUE; + + const WCHAR * wszModuleFileName = NULL; + if (PalGetModuleFileName(&wszModuleFileName, hTemplateModule) == 0 || wszModuleFileName == NULL) + return FALSE; + + hFile = CreateFileW(wszModuleFileName, GENERIC_READ | GENERIC_EXECUTE, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) + goto cleanup; + + hMap = CreateFileMapping(hFile, NULL, SEC_IMAGE | PAGE_READONLY, 0, 0, NULL); + if (hMap == NULL) + goto cleanup; + + *newThunksOut = MapViewOfFile(hMap, 0, 0, templateRva, templateSize); + success = ((*newThunksOut) != NULL); + +cleanup: + CloseHandle(hMap); + CloseHandle(hFile); + + return success; +#endif +} + +UInt32_BOOL PalFreeThunksFromTemplate(_In_ void *pBaseAddress, size_t templateSize) +{ +#ifdef XBOX_ONE + return TRUE; +#else + return UnmapViewOfFile(pBaseAddress); +#endif +} +#endif // !USE_PORTABLE_HELPERS && !FEATURE_RX_THUNKS + +UInt32_BOOL PalMarkThunksAsValidCallTargets( + void *virtualAddress, + int thunkSize, + int thunksPerBlock, + int thunkBlockSize, + int thunkBlocksPerMapping) +{ + // We are using RWX pages so there is no need for this API for now. + // Once we have a scenario for non-RWX pages we should be able to put the implementation here + return TRUE; +} + +uint32_t PalCompatibleWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t handleCount, HANDLE* pHandles, UInt32_BOOL allowReentrantWait) +{ + if (!allowReentrantWait) + { + return WaitForMultipleObjectsEx(handleCount, pHandles, FALSE, timeout, alertable); + } + else + { + DWORD index; + SetLastError(ERROR_SUCCESS); // recommended by MSDN. + HRESULT hr = CoWaitForMultipleHandles(alertable ? COWAIT_ALERTABLE : 0, timeout, handleCount, pHandles, &index); + + switch (hr) + { + case S_OK: + return index; + + case RPC_S_CALLPENDING: + return WAIT_TIMEOUT; + + default: + SetLastError(HRESULT_CODE(hr)); + return WAIT_FAILED; + } + } +} + +HANDLE PalCreateLowMemoryResourceNotification() +{ + return CreateMemoryResourceNotification(LowMemoryResourceNotification); +} + +void PalSleep(uint32_t milliseconds) +{ + return Sleep(milliseconds); +} + +UInt32_BOOL PalSwitchToThread() +{ + return SwitchToThread(); +} + +HANDLE PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName) +{ + return CreateEventW(pEventAttributes, manualReset, initialState, pName); +} + +UInt32_BOOL PalAreShadowStacksEnabled() +{ +#if defined(TARGET_AMD64) + // The SSP is null when CET shadow stacks are not enabled. On processors that don't support shadow stacks, this is a + // no-op and the intrinsic returns 0. CET shadow stacks are enabled or disabled for all threads, so the result is the + // same from any thread. + return _rdsspq() != 0; +#else + // When implementing AreShadowStacksEnabled() on other architectures, review all the places where this is used. + return false; +#endif +} + + +#ifdef TARGET_X86 + +#define EXCEPTION_HIJACK 0xe0434f4e // 0xe0000000 | 'COM'+1 + +PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord() +{ + return (PEXCEPTION_REGISTRATION_RECORD)__readfsdword(0); +} + +VOID SetCurrentSEHRecord(EXCEPTION_REGISTRATION_RECORD *pSEH) +{ + __writefsdword(0, (DWORD)pSEH); +} + +VOID PopSEHRecords(LPVOID pTargetSP) +{ + PEXCEPTION_REGISTRATION_RECORD currentContext = GetCurrentSEHRecord(); + // The last record in the chain is EXCEPTION_CHAIN_END which is defined as maxiumum + // pointer value so it cannot satisfy the loop condition. + while (currentContext < pTargetSP) + { + currentContext = currentContext->Next; + } + SetCurrentSEHRecord(currentContext); +} + +// This will check who caused the exception. If it was caused by the redirect function, +// the reason is to resume the thread back at the point it was redirected in the first +// place. If the exception was not caused by the function, then it was caused by the call +// out to the I[GC|Debugger]ThreadControl client and we need to determine if it's an +// exception that we can just eat and let the runtime resume the thread, or if it's an +// uncatchable exception that we need to pass on to the runtime. +int RtlRestoreContextFallbackExceptionFilter(PEXCEPTION_POINTERS pExcepPtrs, CONTEXT *pCtx, Thread *pThread) +{ + if (pExcepPtrs->ExceptionRecord->ExceptionCode == STATUS_STACK_OVERFLOW) + { + return EXCEPTION_CONTINUE_SEARCH; + } + + // Get the thread handle + _ASSERTE(pExcepPtrs->ExceptionRecord->ExceptionCode == EXCEPTION_HIJACK); + + // Copy everything in the saved context record into the EH context. + // Historically the EH context has enough space for every enabled context feature. + // That may not hold for the future features beyond AVX, but this codepath is + // supposed to be used only on OSes that do not have RtlRestoreContext. + CONTEXT* pTarget = pExcepPtrs->ContextRecord; + if (!CopyContext(pTarget, pCtx->ContextFlags, pCtx)) + { + PalPrintFatalError("Could not set context record.\n"); + RhFailFast(); + } + + DWORD espValue = pCtx->Esp; + + // NOTE: Ugly, ugly workaround. + // We need to resume the thread into the managed code where it was redirected, + // and the corresponding ESP is below the current one. But C++ expects that + // on an EXCEPTION_CONTINUE_EXECUTION that the ESP will be above where it has + // installed the SEH handler. To solve this, we need to remove all handlers + // that reside above the resumed ESP, but we must leave the OS-installed + // handler at the top, so we grab the top SEH handler, call + // PopSEHRecords which will remove all SEH handlers above the target ESP and + // then link the OS handler back in with SetCurrentSEHRecord. + + // Get the special OS handler and save it until PopSEHRecords is done + EXCEPTION_REGISTRATION_RECORD *pCurSEH = GetCurrentSEHRecord(); + + // Unlink all records above the target resume ESP + PopSEHRecords((LPVOID)(size_t)espValue); + + // Link the special OS handler back in to the top + pCurSEH->Next = GetCurrentSEHRecord(); + + // Register the special OS handler as the top handler with the OS + SetCurrentSEHRecord(pCurSEH); + + // Resume execution at point where thread was originally redirected + return EXCEPTION_CONTINUE_EXECUTION; +} + +EXTERN_C VOID __cdecl RtlRestoreContextFallback(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord) +{ + Thread *pThread = ThreadStore::GetCurrentThread(); + + // A counter to avoid a nasty case where an + // up-stack filter throws another exception + // causing our filter to be run again for + // some unrelated exception. + int filter_count = 0; + + __try + { + // Save the instruction pointer where we redirected last. This does not race with the check + // against this variable because the GC will not attempt to redirect the thread until the + // instruction pointer of this thread is back in managed code. + pThread->SetPendingRedirect(ContextRecord->Eip); + RaiseException(EXCEPTION_HIJACK, 0, 0, NULL); + } + __except (++filter_count == 1 + ? RtlRestoreContextFallbackExceptionFilter(GetExceptionInformation(), ContextRecord, pThread) + : EXCEPTION_CONTINUE_SEARCH) + { + _ASSERTE(!"Reached body of __except in RtlRestoreContextFallback"); + } +} + +#endif // TARGET_X86 + +typedef BOOL(WINAPI* PINITIALIZECONTEXT2)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength, ULONG64 XStateCompactionMask); +PINITIALIZECONTEXT2 pfnInitializeContext2 = NULL; + +#ifdef TARGET_ARM64 +// Mirror the XSTATE_ARM64_SVE flags from winnt.h + +#ifndef XSTATE_ARM64_SVE +#define XSTATE_ARM64_SVE (2) +#endif // XSTATE_ARM64_SVE + +#ifndef XSTATE_MASK_ARM64_SVE +#define XSTATE_MASK_ARM64_SVE (1ui64 << (XSTATE_ARM64_SVE)) +#endif // XSTATE_MASK_ARM64_SVE + +#ifndef CONTEXT_ARM64_XSTATE +#define CONTEXT_ARM64_XSTATE (CONTEXT_ARM64 | 0x20L) +#endif // CONTEXT_ARM64_XSTATE + +#ifndef CONTEXT_XSTATE +#define CONTEXT_XSTATE CONTEXT_ARM64_XSTATE +#endif // CONTEXT_XSTATE + +typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); +PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; + +typedef BOOL(WINAPI* PSETXSTATEFEATURESMASK)(PCONTEXT Context, DWORD64 FeatureMask); +PSETXSTATEFEATURESMASK pfnSetXStateFeaturesMask = NULL; +#endif // TARGET_ARM64 + +#ifdef TARGET_X86 +EXTERN_C VOID __cdecl RtlRestoreContextFallback(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord); +typedef VOID(__cdecl* PRTLRESTORECONTEXT)(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord); +PRTLRESTORECONTEXT pfnRtlRestoreContext = NULL; + +#define CONTEXT_COMPLETE (CONTEXT_FULL | CONTEXT_FLOATING_POINT | \ + CONTEXT_DEBUG_REGISTERS | CONTEXT_EXTENDED_REGISTERS) +#else +#define CONTEXT_COMPLETE (CONTEXT_FULL | CONTEXT_DEBUG_REGISTERS) +#endif + +NATIVE_CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextBuffer) +{ + CONTEXT* pOSContext = NULL; + +#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM64) + DWORD context = CONTEXT_COMPLETE; + + if (pfnInitializeContext2 == NULL) + { + HMODULE hm = GetModuleHandleW(_T("kernel32.dll")); + if (hm != NULL) + { + pfnInitializeContext2 = (PINITIALIZECONTEXT2)GetProcAddress(hm, "InitializeContext2"); + } + } + +#if defined(TARGET_ARM64) + if (pfnGetEnabledXStateFeatures == NULL) + { + HMODULE hm = GetModuleHandleW(_T("kernel32.dll")); + if (hm != NULL) + { + pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hm, "GetEnabledXStateFeatures"); + } + } +#endif // TARGET_ARM64 + +#ifdef TARGET_X86 + if (pfnRtlRestoreContext == NULL) + { + HMODULE hm = GetModuleHandleW(_T("ntdll.dll")); + pfnRtlRestoreContext = (PRTLRESTORECONTEXT)GetProcAddress(hm, "RtlRestoreContext"); + if (pfnRtlRestoreContext == NULL) + { + // Fallback to the internal implementation if OS doesn't provide one. + pfnRtlRestoreContext = RtlRestoreContextFallback; + } + } +#endif //TARGET_X86 + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; + const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_MPX | xStateFeatureMask; +#elif defined(TARGET_ARM64) + const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE; + const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | xStateFeatureMask; +#endif + + // Determine if the processor supports extended features so we could retrieve those registers + DWORD64 FeatureMask = 0; + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + FeatureMask = GetEnabledXStateFeatures(); +#elif defined(TARGET_ARM64) + if (pfnGetEnabledXStateFeatures != NULL) + { + FeatureMask = pfnGetEnabledXStateFeatures(); + } +#endif + + if ((FeatureMask & xStateFeatureMask) != 0) + { + context = context | CONTEXT_XSTATE; + } + + // the context does not need XSTATE_MASK_CET_U because we should not be using + // redirection when CET is enabled and should not be here. + _ASSERTE(!PalAreShadowStacksEnabled()); + + // Retrieve contextSize by passing NULL for Buffer + DWORD contextSize = 0; + // The initialize call should fail but return contextSize + BOOL success = pfnInitializeContext2 ? + pfnInitializeContext2(NULL, context, NULL, &contextSize, xStateCompactionMask) : + InitializeContext(NULL, context, NULL, &contextSize); + + // Spec mentions that we may get a different error (it was observed on Windows7). + // In such case the contextSize is undefined. + if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) + { + return NULL; + } + + // So now allocate a buffer of that size and call InitializeContext again + uint8_t* buffer = new (nothrow)uint8_t[contextSize]; + if (buffer != NULL) + { + success = pfnInitializeContext2 ? + pfnInitializeContext2(buffer, context, &pOSContext, &contextSize, xStateCompactionMask): + InitializeContext(buffer, context, &pOSContext, &contextSize); + + if (!success) + { + delete[] buffer; + buffer = NULL; + } + } + + if (!success) + { + pOSContext = NULL; + } + + *contextBuffer = buffer; + +#else + pOSContext = new (nothrow) CONTEXT; + pOSContext->ContextFlags = CONTEXT_COMPLETE; + *contextBuffer = NULL; +#endif + + return (NATIVE_CONTEXT*)pOSContext; +} + +_Success_(return) bool PalGetCompleteThreadContext(HANDLE hThread, _Out_ NATIVE_CONTEXT * pCtx) +{ + CONTEXT* pOSContext = &pCtx->ctx; + + _ASSERTE((pOSContext->ContextFlags & CONTEXT_COMPLETE) == CONTEXT_COMPLETE); + +#if defined(TARGET_ARM64) + if (pfnSetXStateFeaturesMask == NULL) + { + HMODULE hm = GetModuleHandleW(_T("kernel32.dll")); + if (hm != NULL) + { + pfnSetXStateFeaturesMask = (PSETXSTATEFEATURESMASK)GetProcAddress(hm, "SetXStateFeaturesMask"); + } + } +#endif // TARGET_ARM64 + + // This should not normally fail. + // The system silently ignores any feature specified in the FeatureMask which is not enabled on the processor. +#if defined(TARGET_X86) || defined(TARGET_AMD64) + if (!SetXStateFeaturesMask(pOSContext, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) + { + _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX"); + return FALSE; + } +#elif defined(TARGET_ARM64) + if ((pfnSetXStateFeaturesMask != NULL) && !pfnSetXStateFeaturesMask(pOSContext, XSTATE_MASK_ARM64_SVE)) + { + _ASSERTE(!"Could not apply XSTATE_MASK_ARM64_SVE"); + return FALSE; + } +#endif + + return GetThreadContext(hThread, pOSContext); +} + +_Success_(return) bool PalSetThreadContext(HANDLE hThread, _Out_ NATIVE_CONTEXT * pCtx) +{ + return SetThreadContext(hThread, &pCtx->ctx); +} + +void PalRestoreContext(NATIVE_CONTEXT * pCtx) +{ + CONTEXT* pOSContext = &pCtx->ctx; + + __asan_handle_no_return(); +#ifdef TARGET_X86 + _ASSERTE(pfnRtlRestoreContext != NULL); + pfnRtlRestoreContext(pOSContext, NULL); +#else + RtlRestoreContext(pOSContext, NULL); +#endif //TARGET_X86 +} + +#if defined(TARGET_X86) || defined(TARGET_AMD64) +void PopulateControlSegmentRegisters(CONTEXT* pContext) +{ + CONTEXT ctx; + + RtlCaptureContext(&ctx); + + pContext->SegCs = ctx.SegCs; + pContext->SegSs = ctx.SegSs; +} +#endif //defined(TARGET_X86) || defined(TARGET_AMD64) + +// These declarations are for a new special user-mode APC feature introduced in Windows. These are not yet available in Windows +// SDK headers, so some names below are prefixed with "CLONE_" to avoid conflicts in the future. Once the prefixed declarations +// become available in the Windows SDK headers, the prefixed declarations below can be removed in favor of the SDK ones. + +enum CLONE_QUEUE_USER_APC_FLAGS +{ + CLONE_QUEUE_USER_APC_FLAGS_NONE = 0x0, + CLONE_QUEUE_USER_APC_FLAGS_SPECIAL_USER_APC = 0x1, + CLONE_QUEUE_USER_APC_CALLBACK_DATA_CONTEXT = 0x10000 +}; + +struct CLONE_APC_CALLBACK_DATA +{ + ULONG_PTR Parameter; + PCONTEXT ContextRecord; + ULONG_PTR Reserved0; + ULONG_PTR Reserved1; +}; +typedef CLONE_APC_CALLBACK_DATA* CLONE_PAPC_CALLBACK_DATA; + +typedef BOOL (WINAPI* QueueUserAPC2Proc)(PAPCFUNC ApcRoutine, HANDLE Thread, ULONG_PTR Data, CLONE_QUEUE_USER_APC_FLAGS Flags); + +#define QUEUE_USER_APC2_UNINITIALIZED (QueueUserAPC2Proc)-1 +static QueueUserAPC2Proc g_pfnQueueUserAPC2Proc = QUEUE_USER_APC2_UNINITIALIZED; + +static const CLONE_QUEUE_USER_APC_FLAGS SpecialUserModeApcWithContextFlags = (CLONE_QUEUE_USER_APC_FLAGS) + (CLONE_QUEUE_USER_APC_FLAGS_SPECIAL_USER_APC | + CLONE_QUEUE_USER_APC_CALLBACK_DATA_CONTEXT); + +static void* g_returnAddressHijackTarget = NULL; + +static void NTAPI ActivationHandler(ULONG_PTR parameter) +{ + CLONE_APC_CALLBACK_DATA* data = (CLONE_APC_CALLBACK_DATA*)parameter; + Thread::HijackCallback((NATIVE_CONTEXT*)data->ContextRecord, NULL); + + Thread* pThread = (Thread*)data->Parameter; + pThread->SetActivationPending(false); +} + +void InitHijackingAPIs() +{ + HMODULE hKernel32 = LoadKernel32dll(); + +#ifdef HOST_AMD64 + typedef BOOL (WINAPI *IsWow64Process2Proc)(HANDLE hProcess, USHORT *pProcessMachine, USHORT *pNativeMachine); + + IsWow64Process2Proc pfnIsWow64Process2Proc = (IsWow64Process2Proc)GetProcAddress(hKernel32, "IsWow64Process2"); + USHORT processMachine, hostMachine; + if (pfnIsWow64Process2Proc != nullptr && + (*pfnIsWow64Process2Proc)(GetCurrentProcess(), &processMachine, &hostMachine) && + (hostMachine == IMAGE_FILE_MACHINE_ARM64) && + !IsWindowsVersionOrGreater(10, 0, 26100)) + { + // Special user-mode APCs are broken on WOW64 processes (x64 running on Arm64 machine) with Windows older than 11.0.26100 (24H2) + g_pfnQueueUserAPC2Proc = NULL; + } + else +#endif // HOST_AMD64 + { + g_pfnQueueUserAPC2Proc = (QueueUserAPC2Proc)GetProcAddress(hKernel32, "QueueUserAPC2"); + } + + if (PalAreShadowStacksEnabled()) + { + // When shadow stacks are enabled, support for special user-mode APCs is required + _ASSERTE(g_pfnQueueUserAPC2Proc != NULL); + + HMODULE hModNtdll = LoadNtdlldll(); + typedef void* (*PFN_RtlGetReturnAddressHijackTarget)(void); + + void* rtlGetReturnAddressHijackTarget = GetProcAddress(hModNtdll, "RtlGetReturnAddressHijackTarget"); + if (rtlGetReturnAddressHijackTarget != NULL) + { + g_returnAddressHijackTarget = ((PFN_RtlGetReturnAddressHijackTarget)rtlGetReturnAddressHijackTarget)(); + } + + if (g_returnAddressHijackTarget == NULL) + { + _ASSERTE(!"RtlGetReturnAddressHijackTarget must provide a target when shadow stacks are enabled"); + } + } +} + +HijackFunc* PalGetHijackTarget(HijackFunc* defaultHijackTarget) +{ + return g_returnAddressHijackTarget ? (HijackFunc*)g_returnAddressHijackTarget : defaultHijackTarget; +} + +void PalHijack(Thread* pThreadToHijack) +{ + HANDLE hThread = pThreadToHijack->GetOSThreadHandle(); + + if (hThread == INVALID_HANDLE_VALUE) + { + // cannot proceed + return; + } + +#ifdef FEATURE_SPECIAL_USER_MODE_APC + + // initialize g_pfnQueueUserAPC2Proc on demand. + // Note that only one thread at a time may perform suspension (guaranteed by the thread store lock) + // so simple condition check is ok. + if (g_pfnQueueUserAPC2Proc == QUEUE_USER_APC2_UNINITIALIZED) + { + InitHijackingAPIs(); + } + + if (g_pfnQueueUserAPC2Proc) + { + // An APC can be interrupted by another one, do not queue more if one is pending. + if (pThreadToHijack->IsActivationPending()) + { + return; + } + + pThreadToHijack->SetActivationPending(true); + BOOL success = g_pfnQueueUserAPC2Proc( + &ActivationHandler, + hThread, + (ULONG_PTR)pThreadToHijack, + SpecialUserModeApcWithContextFlags); + + if (success) + { + return; + } + + // queuing an APC failed + pThreadToHijack->SetActivationPending(false); + + DWORD lastError = GetLastError(); + if (lastError != ERROR_INVALID_PARAMETER && lastError != ERROR_NOT_SUPPORTED) + { + // An unexpected failure has happened. It is a concern. + ASSERT_UNCONDITIONALLY("Failed to queue an APC for unusual reason."); + + // maybe it will work next time. + return; + } + + // the flags that we passed are not supported. + // we will not try again + g_pfnQueueUserAPC2Proc = NULL; + } +#endif + + if (SuspendThread(hThread) == (DWORD)-1) + { + return; + } + + CONTEXT win32ctx; + win32ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_EXCEPTION_REQUEST; + + if (GetThreadContext(hThread, &win32ctx)) + { + bool isSafeToRedirect = true; + +#ifdef TARGET_X86 + // Workaround around WOW64 problems. Only do this workaround if a) this is x86, and b) the OS does + // not support trap frame reporting. + if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) == 0) + { + // This code fixes a race between GetThreadContext and NtContinue. If we redirect managed code + // at the same place twice in a row, we run the risk of reading a bogus CONTEXT when we redirect + // the second time. This leads to access violations on x86 machines. To fix the problem, we + // never redirect at the same instruction pointer that we redirected at on the previous GC. + if (pThreadToHijack->CheckPendingRedirect(win32ctx.Eip)) + { + isSafeToRedirect = false; + } + } +#else + // In some cases Windows will not set the CONTEXT_EXCEPTION_REPORTING flag if the thread is executing + // in kernel mode (i.e. in the middle of a syscall or exception handling). Therefore, we should treat + // the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that it is not safe to + // manipulate with the current state of the thread context. + isSafeToRedirect = (win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0; +#endif + + // The CONTEXT_SERVICE_ACTIVE and CONTEXT_EXCEPTION_ACTIVE output flags indicate we suspended the thread + // at a point where the kernel cannot guarantee a completely accurate context. We'll fail the request in + // this case (which should force our caller to resume the thread and try again -- since this is a fairly + // narrow window we're highly likely to succeed next time). + if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0 && + ((win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE)) != 0)) + { + isSafeToRedirect = false; + } + + if (isSafeToRedirect) + { + Thread::HijackCallback((NATIVE_CONTEXT*)&win32ctx, pThreadToHijack); + } + } + + ResumeThread(hThread); +} + +#define SET_THREAD_DESCRIPTION_UNINITIALIZED (pfnSetThreadDescription)-1 +typedef HRESULT(WINAPI *pfnSetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription); +static pfnSetThreadDescription g_pfnSetThreadDescription = SET_THREAD_DESCRIPTION_UNINITIALIZED; + +bool PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, BOOL highPriority) +{ + HANDLE hThread = CreateThread( + NULL, + 0, + (LPTHREAD_START_ROUTINE)callback, + pCallbackContext, + highPriority ? CREATE_SUSPENDED : 0, + NULL); + + if (hThread == NULL) + return false; + + if (highPriority) + { + SetThreadPriority(hThread, THREAD_PRIORITY_HIGHEST); + ResumeThread(hThread); + } + + CloseHandle(hThread); + return true; +} + +bool PalSetCurrentThreadNameW(const WCHAR* name) +{ + if (g_pfnSetThreadDescription == SET_THREAD_DESCRIPTION_UNINITIALIZED) + { + HMODULE hKernel32 = LoadKernel32dll(); + g_pfnSetThreadDescription = (pfnSetThreadDescription)GetProcAddress(hKernel32, "SetThreadDescription"); + } + if (!g_pfnSetThreadDescription) + { + return false; + } + HANDLE hThread = GetCurrentThread(); + g_pfnSetThreadDescription(hThread, name); + return true; +} + +bool PalSetCurrentThreadName(const char* name) +{ + size_t len = strlen(name); + wchar_t* threadNameWide = new (nothrow) wchar_t[len + 1]; + if (threadNameWide == nullptr) + { + return false; + } + if (MultiByteToWideChar(CP_UTF8, 0, name, -1, threadNameWide, (int)(len + 1)) == 0) + { + delete[] threadNameWide; + return false; + } + bool ret = PalSetCurrentThreadNameW(threadNameWide); + delete[] threadNameWide; + return ret; +} + +bool PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, FALSE); +} + +bool PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, TRUE); +} + +bool PalStartEventPipeHelperThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, FALSE); +} + +HANDLE PalGetModuleHandleFromPointer(_In_ void* pointer) +{ + // The runtime is not designed to be unloadable today. Use GET_MODULE_HANDLE_EX_FLAG_PIN to prevent + // the module from ever unloading. + + HMODULE module; + if (!GetModuleHandleExW( + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_PIN, + (LPCWSTR)pointer, + &module)) + { + return NULL; + } + + return (HANDLE)module; +} + +void PalPrintFatalError(const char* message) +{ + // Write the message using lowest-level OS API available. This is used to print the stack overflow + // message, so there is not much that can be done here. + DWORD dwBytesWritten; + WriteFile(GetStdHandle(STD_ERROR_HANDLE), message, (DWORD)strlen(message), &dwBytesWritten, NULL); +} + +char* PalCopyTCharAsChar(const TCHAR* toCopy) +{ + int len = ::WideCharToMultiByte(CP_UTF8, 0, toCopy, -1, nullptr, 0, nullptr, nullptr); + if (len == 0) + return nullptr; + + char* converted = new (nothrow) char[len]; + int written = ::WideCharToMultiByte(CP_UTF8, 0, toCopy, -1, converted, len, nullptr, nullptr); + assert(len == written); + return converted; +} + +HANDLE PalLoadLibrary(const char* moduleName) +{ + assert(moduleName); + size_t len = strlen(moduleName); + wchar_t* moduleNameWide = new (nothrow)wchar_t[len + 1]; + if (moduleNameWide == nullptr) + { + return 0; + } + if (MultiByteToWideChar(CP_UTF8, 0, moduleName, -1, moduleNameWide, (int)(len + 1)) == 0) + { + return 0; + } + moduleNameWide[len] = '\0'; + + HANDLE result = LoadLibraryExW(moduleNameWide, NULL, LOAD_WITH_ALTERED_SEARCH_PATH); + delete[] moduleNameWide; + return result; +} + +void* PalGetProcAddress(HANDLE module, const char* functionName) +{ + assert(module); + assert(functionName); + return GetProcAddress((HMODULE)module, functionName); +} + +_Ret_maybenull_ _Post_writable_byte_size_(size) void* PalVirtualAlloc(uintptr_t size, uint32_t protect) +{ + return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, protect); +} + +void PalVirtualFree(_In_ void* pAddress, uintptr_t size) +{ + VirtualFree(pAddress, 0, MEM_RELEASE); +} + +UInt32_BOOL PalVirtualProtect(_In_ void* pAddress, uintptr_t size, uint32_t protect) +{ + DWORD oldProtect; + return VirtualProtect(pAddress, size, protect, &oldProtect); +} + +void PalFlushInstructionCache(_In_ void* pAddress, size_t size) +{ + FlushInstructionCache(GetCurrentProcess(), pAddress, size); +} + +#ifdef TARGET_AMD64 +uintptr_t GetSSP(CONTEXT *pContext) +{ + XSAVE_CET_U_FORMAT* pCET = (XSAVE_CET_U_FORMAT*)LocateXStateFeature(pContext, XSTATE_CET_U, NULL); + if ((pCET != NULL) && (pCET->Ia32CetUMsr != 0)) + { + return pCET->Ia32Pl3SspMsr; + } + + return 0; +} + +void SetSSP(CONTEXT *pContext, uintptr_t ssp) +{ + XSAVE_CET_U_FORMAT* pCET = (XSAVE_CET_U_FORMAT*)LocateXStateFeature(pContext, XSTATE_CET_U, NULL); + if (pCET != NULL) + { + pCET->Ia32Pl3SspMsr = ssp; + pCET->Ia32CetUMsr = 1; + } +} +#endif // TARGET_AMD64 + +uint16_t PalCaptureStackBackTrace(uint32_t arg1, uint32_t arg2, void* arg3, uint32_t* arg4) +{ + DWORD backTraceHash; + WORD res = ::RtlCaptureStackBackTrace(arg1, arg2, (PVOID*)arg3, &backTraceHash); + *arg4 = backTraceHash; + return res; +} + +UInt32_BOOL PalCloseHandle(HANDLE arg1) +{ + return ::CloseHandle(arg1); +} + +void PalFlushProcessWriteBuffers() +{ + ::FlushProcessWriteBuffers(); +} + +uint32_t PalGetCurrentProcessId() +{ + return static_cast(::GetCurrentProcessId()); +} + +uint32_t PalGetEnvironmentVariable(_In_opt_ LPCWSTR lpName, _Out_writes_to_opt_(nSize, return + 1) LPWSTR lpBuffer, _In_ uint32_t nSize) +{ + return ::GetEnvironmentVariableW(lpName, lpBuffer, nSize); +} + +UInt32_BOOL PalResetEvent(HANDLE arg1) +{ + return ::ResetEvent(arg1); +} + +UInt32_BOOL PalSetEvent(HANDLE arg1) +{ + return ::SetEvent(arg1); +} + +uint32_t PalWaitForSingleObjectEx(HANDLE arg1, uint32_t arg2, UInt32_BOOL arg3) +{ + return ::WaitForSingleObjectEx(arg1, arg2, arg3); +} + +void PalGetSystemTimeAsFileTime(FILETIME * arg1) +{ + ::GetSystemTimeAsFileTime(arg1); +} diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkCommon.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkCommon.cpp deleted file mode 100644 index a805762b4621..000000000000 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkCommon.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// -// Implementation of the portions of the Redhawk Platform Abstraction Layer (PAL) library that are common among -// multiple PAL variants. -// -// Note that in general we don't want to assume that Windows and Redhawk global definitions can co-exist. -// Since this code must include Windows headers to do its job we can't therefore safely include general -// Redhawk header files. -// - -#include -#include -#include -#include -#include "CommonTypes.h" -#include "daccess.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" -#include -#include "CommonMacros.h" -#include "rhassert.h" - - -#define REDHAWK_PALEXPORT extern "C" -#define REDHAWK_PALAPI __stdcall - - -// Given the OS handle of a loaded module, compute the upper and lower virtual address bounds (inclusive). -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalGetModuleBounds(HANDLE hOsHandle, _Out_ uint8_t ** ppLowerBound, _Out_ uint8_t ** ppUpperBound) -{ - BYTE *pbModule = (BYTE*)hOsHandle; - DWORD cbModule; - - IMAGE_NT_HEADERS *pNtHeaders = (IMAGE_NT_HEADERS*)(pbModule + ((IMAGE_DOS_HEADER*)hOsHandle)->e_lfanew); - if (pNtHeaders->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) - cbModule = ((IMAGE_OPTIONAL_HEADER32*)&pNtHeaders->OptionalHeader)->SizeOfImage; - else - cbModule = ((IMAGE_OPTIONAL_HEADER64*)&pNtHeaders->OptionalHeader)->SizeOfImage; - - *ppLowerBound = pbModule; - *ppUpperBound = pbModule + cbModule - 1; -} - -uint32_t g_RhNumberOfProcessors; - -REDHAWK_PALEXPORT int32_t REDHAWK_PALAPI PalGetProcessCpuCount() -{ - ASSERT(g_RhNumberOfProcessors > 0); - return g_RhNumberOfProcessors; -} - -// Retrieves the entire range of memory dedicated to the calling thread's stack. This does -// not get the current dynamic bounds of the stack, which can be significantly smaller than -// the maximum bounds. -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut) -{ - // VirtualQuery on the address of a local variable to get the allocation - // base of the stack. Then use the StackBase field in the TEB to give - // the highest address of the stack region. - MEMORY_BASIC_INFORMATION mbi = { 0 }; - SIZE_T cb = VirtualQuery(&mbi, &mbi, sizeof(mbi)); - if (cb != sizeof(mbi)) - return false; - - NT_TIB* pTib = (NT_TIB*)NtCurrentTeb(); - *ppStackHighOut = pTib->StackBase; // stack base is the highest address - *ppStackLowOut = mbi.AllocationBase; // allocation base is the lowest address - return true; -} - -// retrieves the full path to the specified module, if moduleBase is NULL retrieves the full path to the -// executable module of the current process. -// -// Return value: number of characters in name string -// -//NOTE: This implementation exists because calling GetModuleFileName is not wack compliant. if we later decide -// that the framework package containing mrt100_app no longer needs to be wack compliant, this should be -// removed and the windows implementation of GetModuleFileName should be substitued on windows. -REDHAWK_PALEXPORT int32_t PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase) -{ - TEB* pTEB = NtCurrentTeb(); - LIST_ENTRY* pStartLink = &(pTEB->ProcessEnvironmentBlock->Ldr->InMemoryOrderModuleList); - LIST_ENTRY* pCurLink = pStartLink->Flink; - - do - { - LDR_DATA_TABLE_ENTRY* pEntry = CONTAINING_RECORD(pCurLink, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks); - - //null moduleBase will result in the first module being returned - //since the module list is ordered this is the executable module of the current process - if ((pEntry->DllBase == moduleBase) || (moduleBase == NULL)) - { - *pModuleNameOut = pEntry->FullDllName.Buffer; - return pEntry->FullDllName.Length / 2; - } - pCurLink = pCurLink->Flink; - } - while (pCurLink != pStartLink); - - *pModuleNameOut = NULL; - return 0; -} - -REDHAWK_PALEXPORT uint64_t REDHAWK_PALAPI PalGetTickCount64() -{ - return GetTickCount64(); -} diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkInline.h b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkInline.h deleted file mode 100644 index 595c7f663b9d..000000000000 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkInline.h +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#if defined(HOST_ARM64) -#include -#endif - -// Implementation of Redhawk PAL inline functions - -EXTERN_C long __cdecl _InterlockedIncrement(long volatile *); -#pragma intrinsic(_InterlockedIncrement) -FORCEINLINE int32_t PalInterlockedIncrement(_Inout_ int32_t volatile *pDst) -{ - return _InterlockedIncrement((long volatile *)pDst); -} - -EXTERN_C long __cdecl _InterlockedDecrement(long volatile *); -#pragma intrinsic(_InterlockedDecrement) -FORCEINLINE int32_t PalInterlockedDecrement(_Inout_ int32_t volatile *pDst) -{ - return _InterlockedDecrement((long volatile *)pDst); -} - -EXTERN_C long _InterlockedOr(long volatile *, long); -#pragma intrinsic(_InterlockedOr) -FORCEINLINE uint32_t PalInterlockedOr(_Inout_ uint32_t volatile *pDst, uint32_t iValue) -{ - return _InterlockedOr((long volatile *)pDst, iValue); -} - -EXTERN_C long _InterlockedAnd(long volatile *, long); -#pragma intrinsic(_InterlockedAnd) -FORCEINLINE uint32_t PalInterlockedAnd(_Inout_ uint32_t volatile *pDst, uint32_t iValue) -{ - return _InterlockedAnd((long volatile *)pDst, iValue); -} - -EXTERN_C long __PN__MACHINECALL_CDECL_OR_DEFAULT _InterlockedExchange(long volatile *, long); -#pragma intrinsic(_InterlockedExchange) -FORCEINLINE int32_t PalInterlockedExchange(_Inout_ int32_t volatile *pDst, int32_t iValue) -{ - return _InterlockedExchange((long volatile *)pDst, iValue); -} - -EXTERN_C long __PN__MACHINECALL_CDECL_OR_DEFAULT _InterlockedCompareExchange(long volatile *, long, long); -#pragma intrinsic(_InterlockedCompareExchange) -FORCEINLINE int32_t PalInterlockedCompareExchange(_Inout_ int32_t volatile *pDst, int32_t iValue, int32_t iComparand) -{ - return _InterlockedCompareExchange((long volatile *)pDst, iValue, iComparand); -} - -EXTERN_C int64_t _InterlockedCompareExchange64(int64_t volatile *, int64_t, int64_t); -#pragma intrinsic(_InterlockedCompareExchange64) -FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue, int64_t iComparand) -{ - return _InterlockedCompareExchange64(pDst, iValue, iComparand); -} - -#ifdef HOST_X86 -FORCEINLINE int64_t PalInterlockedExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue) -{ - int64_t iOldValue; - do { - iOldValue = *pDst; - } while (PalInterlockedCompareExchange64(pDst, - iValue, - iOldValue) != iOldValue); - return iOldValue; -} - -FORCEINLINE int64_t PalInterlockedIncrement64(_Inout_ int64_t volatile *Addend) -{ - int64_t Old; - do { - Old = *Addend; - } while (PalInterlockedCompareExchange64(Addend, - Old + 1, - Old) != Old); - return Old + 1; -} -#else // HOST_X86 -EXTERN_C int64_t _InterlockedExchange64(int64_t volatile *, int64_t); -#pragma intrinsic(_InterlockedExchange64) -FORCEINLINE int64_t PalInterlockedExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue) -{ - return _InterlockedExchange64(pDst, iValue); -} - -EXTERN_C int64_t _InterlockedIncrement64(int64_t volatile *); -#pragma intrinsic(_InterlockedIncrement64) -FORCEINLINE int64_t PalInterlockedIncrement64(_Inout_ int64_t volatile *pDst) -{ - return _InterlockedIncrement64(pDst); -} -#endif // HOST_X86 - -#if defined(HOST_AMD64) || defined(HOST_ARM64) -EXTERN_C uint8_t _InterlockedCompareExchange128(int64_t volatile *, int64_t, int64_t, int64_t *); -#pragma intrinsic(_InterlockedCompareExchange128) -FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) -{ - return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult); -} -#endif // HOST_AMD64 - -#ifdef HOST_64BIT - -EXTERN_C void * _InterlockedExchangePointer(void * volatile *, void *); -#pragma intrinsic(_InterlockedExchangePointer) -FORCEINLINE void * PalInterlockedExchangePointer(_Inout_ void * volatile *pDst, _In_ void *pValue) -{ - return _InterlockedExchangePointer((void * volatile *)pDst, pValue); -} - -EXTERN_C void * _InterlockedCompareExchangePointer(void * volatile *, void *, void *); -#pragma intrinsic(_InterlockedCompareExchangePointer) -FORCEINLINE void * PalInterlockedCompareExchangePointer(_Inout_ void * volatile *pDst, _In_ void *pValue, _In_ void *pComparand) -{ - return _InterlockedCompareExchangePointer((void * volatile *)pDst, pValue, pComparand); -} - -#else // HOST_64BIT - -#define PalInterlockedExchangePointer(_pDst, _pValue) \ - ((void *)_InterlockedExchange((long volatile *)(_pDst), (long)(size_t)(_pValue))) - -#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ - ((void *)_InterlockedCompareExchange((long volatile *)(_pDst), (long)(size_t)(_pValue), (long)(size_t)(_pComparand))) - -#endif // HOST_64BIT - -EXTERN_C __declspec(dllimport) unsigned long __stdcall GetLastError(); -FORCEINLINE int PalGetLastError() -{ - return (int)GetLastError(); -} - -EXTERN_C __declspec(dllimport) void __stdcall SetLastError(unsigned long error); -FORCEINLINE void PalSetLastError(int error) -{ - SetLastError((unsigned long)error); -} - -#if defined(HOST_X86) - -EXTERN_C void _mm_pause(); -#pragma intrinsic(_mm_pause) -#define PalYieldProcessor() _mm_pause() - -FORCEINLINE void PalMemoryBarrier() -{ - long Barrier; - _InterlockedOr(&Barrier, 0); -} - -#elif defined(HOST_AMD64) - -EXTERN_C void _mm_pause(); -#pragma intrinsic(_mm_pause) -#define PalYieldProcessor() _mm_pause() - -EXTERN_C void __faststorefence(); -#pragma intrinsic(__faststorefence) -#define PalMemoryBarrier() __faststorefence() - -#elif defined(HOST_ARM64) - -EXTERN_C void __yield(void); -#pragma intrinsic(__yield) -EXTERN_C void __dmb(unsigned int _Type); -#pragma intrinsic(__dmb) -FORCEINLINE void PalYieldProcessor() -{ - __dmb(_ARM64_BARRIER_ISHST); - __yield(); -} - -#define PalMemoryBarrier() __dmb(_ARM64_BARRIER_ISH) - -#else -#error Unsupported architecture -#endif - -#define PalDebugBreak() __debugbreak() - -FORCEINLINE int32_t PalOsPageSize() -{ - return 0x1000; -} diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp deleted file mode 100644 index 844a1080e2de..000000000000 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ /dev/null @@ -1,1072 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// -// Implementation of the Redhawk Platform Abstraction Layer (PAL) library when MinWin is the platform. In this -// case most or all of the import requirements which Redhawk has can be satisfied via a forwarding export to -// some native MinWin library. Therefore most of the work is done in the .def file and there is very little -// code here. -// -// Note that in general we don't want to assume that Windows and Redhawk global definitions can co-exist. -// Since this code must include Windows headers to do its job we can't therefore safely include general -// Redhawk header files. -// -#include "common.h" -#include -#include -#include - -#include "holder.h" - -#define _T(s) L##s -#include "RhConfig.h" - -#define PalRaiseFailFastException RaiseFailFastException - -#include "gcenv.h" -#include "gcenv.ee.h" -#include "gcconfig.h" - -#include "thread.h" -#include "threadstore.h" - -#include "nativecontext.h" - -#ifdef FEATURE_SPECIAL_USER_MODE_APC -#include -#endif - -#define REDHAWK_PALEXPORT extern "C" -#define REDHAWK_PALAPI __stdcall - -#ifndef XSTATE_MASK_APX -#define XSTATE_MASK_APX (0x80000) -#endif // XSTATE_MASK_APX - -// Index for the fiber local storage of the attached thread pointer -static uint32_t g_flsIndex = FLS_OUT_OF_INDEXES; - -// This is called when each *fiber* is destroyed. When the home fiber of a thread is destroyed, -// it means that the thread itself is destroyed. -// Since we receive that notification outside of the Loader Lock, it allows us to safely acquire -// the ThreadStore lock in the RuntimeThreadShutdown. -void __stdcall FiberDetachCallback(void* lpFlsData) -{ - ASSERT(g_flsIndex != FLS_OUT_OF_INDEXES); - ASSERT(lpFlsData == FlsGetValue(g_flsIndex)); - - if (lpFlsData != NULL) - { - // The current fiber is the home fiber of a thread, so the thread is shutting down - RuntimeThreadShutdown(lpFlsData); - } -} - -static HMODULE LoadKernel32dll() -{ - return LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); -} - -static HMODULE LoadNtdlldll() -{ - return LoadLibraryExW(L"ntdll.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); -} - -void InitializeCurrentProcessCpuCount() -{ - DWORD count; - - // If the configuration value has been set, it takes precedence. Otherwise, take into account - // process affinity and CPU quota limit. - - const unsigned int MAX_PROCESSOR_COUNT = 0xffff; - uint64_t configValue; - - if (g_pRhConfig->ReadConfigValue("PROCESSOR_COUNT", &configValue, true /* decimal */) && - 0 < configValue && configValue <= MAX_PROCESSOR_COUNT) - { - count = (DWORD)configValue; - } - else - { - if (GCToOSInterface::CanEnableGCCPUGroups()) - { - count = GCToOSInterface::GetTotalProcessorCount(); - } - else - { - DWORD_PTR pmask, smask; - - if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask)) - { - count = 1; - } - else - { - count = 0; - - while (pmask) - { - pmask &= (pmask - 1); - count++; - } - - // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more - // than 64 processors, which would leave us with a count of 0. Since the GC - // expects there to be at least one processor to run on (and thus at least one - // heap), we'll return 64 here if count is 0, since there are likely a ton of - // processors available in that case. - if (count == 0) - count = 64; - } - } - - JOBOBJECT_CPU_RATE_CONTROL_INFORMATION cpuRateControl; - - if (QueryInformationJobObject(NULL, JobObjectCpuRateControlInformation, &cpuRateControl, - sizeof(cpuRateControl), NULL)) - { - const DWORD HardCapEnabled = JOB_OBJECT_CPU_RATE_CONTROL_ENABLE | JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP; - const DWORD MinMaxRateEnabled = JOB_OBJECT_CPU_RATE_CONTROL_ENABLE | JOB_OBJECT_CPU_RATE_CONTROL_MIN_MAX_RATE; - DWORD maxRate = 0; - - if ((cpuRateControl.ControlFlags & HardCapEnabled) == HardCapEnabled) - { - maxRate = cpuRateControl.CpuRate; - } - else if ((cpuRateControl.ControlFlags & MinMaxRateEnabled) == MinMaxRateEnabled) - { - maxRate = cpuRateControl.MaxRate; - } - - // The rate is the percentage times 100 - const DWORD MAXIMUM_CPU_RATE = 10000; - - if (0 < maxRate && maxRate < MAXIMUM_CPU_RATE) - { - DWORD cpuLimit = (maxRate * GCToOSInterface::GetTotalProcessorCount() + MAXIMUM_CPU_RATE - 1) / MAXIMUM_CPU_RATE; - if (cpuLimit < count) - count = cpuLimit; - } - } - } - - _ASSERTE(count > 0); - g_RhNumberOfProcessors = count; -} - -// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful -// initialization and false on failure. -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalInit() -{ - // We use fiber detach callbacks to run our thread shutdown code because the fiber detach - // callback is made without the OS loader lock - g_flsIndex = FlsAlloc(FiberDetachCallback); - if (g_flsIndex == FLS_OUT_OF_INDEXES) - { - return false; - } - - GCConfig::Initialize(); - - if (!GCToOSInterface::Initialize()) - { - return false; - } - - InitializeCurrentProcessCpuCount(); - - return true; -} - -// Register the thread with OS to be notified when thread is about to be destroyed -// It fails fast if a different thread was already registered with the current fiber. -// Parameters: -// thread - thread to attach -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalAttachThread(void* thread) -{ - void* threadFromCurrentFiber = FlsGetValue(g_flsIndex); - - if (threadFromCurrentFiber != NULL) - { - ASSERT_UNCONDITIONALLY("Multiple threads encountered from a single fiber"); - RhFailFast(); - } - - // Associate the current fiber with the current thread. This makes the current fiber the thread's "home" - // fiber. This fiber is the only fiber allowed to execute managed code on this thread. When this fiber - // is destroyed, we consider the thread to be destroyed. - FlsSetValue(g_flsIndex, thread); -} - -// Detach thread from OS notifications. -// It fails fast if some other thread value was attached to the current fiber. -// Parameters: -// thread - thread to detach -// Return: -// true if the thread was detached, false if there was no attached thread -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalDetachThread(void* thread) -{ - ASSERT(g_flsIndex != FLS_OUT_OF_INDEXES); - void* threadFromCurrentFiber = FlsGetValue(g_flsIndex); - - if (threadFromCurrentFiber == NULL) - { - // we've seen this thread, but not this fiber. It must be a "foreign" fiber that was - // borrowing this thread. - return false; - } - - if (threadFromCurrentFiber != thread) - { - ASSERT_UNCONDITIONALLY("Detaching a thread from the wrong fiber"); - RhFailFast(); - } - - FlsSetValue(g_flsIndex, NULL); - return true; -} - -extern "C" uint64_t PalQueryPerformanceCounter() -{ - return GCToOSInterface::QueryPerformanceCounter(); -} - -extern "C" uint64_t PalQueryPerformanceFrequency() -{ - return GCToOSInterface::QueryPerformanceFrequency(); -} - -extern "C" uint64_t PalGetCurrentOSThreadId() -{ - return GetCurrentThreadId(); -} - -#if !defined(USE_PORTABLE_HELPERS) && !defined(FEATURE_RX_THUNKS) -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut) -{ -#ifdef XBOX_ONE - return E_NOTIMPL; -#else - BOOL success = FALSE; - HANDLE hMap = NULL, hFile = INVALID_HANDLE_VALUE; - - const WCHAR * wszModuleFileName = NULL; - if (PalGetModuleFileName(&wszModuleFileName, hTemplateModule) == 0 || wszModuleFileName == NULL) - return FALSE; - - hFile = CreateFileW(wszModuleFileName, GENERIC_READ | GENERIC_EXECUTE, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - if (hFile == INVALID_HANDLE_VALUE) - goto cleanup; - - hMap = CreateFileMapping(hFile, NULL, SEC_IMAGE | PAGE_READONLY, 0, 0, NULL); - if (hMap == NULL) - goto cleanup; - - *newThunksOut = MapViewOfFile(hMap, 0, 0, templateRva, templateSize); - success = ((*newThunksOut) != NULL); - -cleanup: - CloseHandle(hMap); - CloseHandle(hFile); - - return success; -#endif -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(_In_ void *pBaseAddress, size_t templateSize) -{ -#ifdef XBOX_ONE - return TRUE; -#else - return UnmapViewOfFile(pBaseAddress); -#endif -} -#endif // !USE_PORTABLE_HELPERS && !FEATURE_RX_THUNKS - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets( - void *virtualAddress, - int thunkSize, - int thunksPerBlock, - int thunkBlockSize, - int thunkBlocksPerMapping) -{ - // We are using RWX pages so there is no need for this API for now. - // Once we have a scenario for non-RWX pages we should be able to put the implementation here - return TRUE; -} - -REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t handleCount, HANDLE* pHandles, UInt32_BOOL allowReentrantWait) -{ - if (!allowReentrantWait) - { - return WaitForMultipleObjectsEx(handleCount, pHandles, FALSE, timeout, alertable); - } - else - { - DWORD index; - SetLastError(ERROR_SUCCESS); // recommended by MSDN. - HRESULT hr = CoWaitForMultipleHandles(alertable ? COWAIT_ALERTABLE : 0, timeout, handleCount, pHandles, &index); - - switch (hr) - { - case S_OK: - return index; - - case RPC_S_CALLPENDING: - return WAIT_TIMEOUT; - - default: - SetLastError(HRESULT_CODE(hr)); - return WAIT_FAILED; - } - } -} - -REDHAWK_PALEXPORT HANDLE PalCreateLowMemoryResourceNotification() -{ - return CreateMemoryResourceNotification(LowMemoryResourceNotification); -} - -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds) -{ - return Sleep(milliseconds); -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread() -{ - return SwitchToThread(); -} - -REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName) -{ - return CreateEventW(pEventAttributes, manualReset, initialState, pName); -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled() -{ -#if defined(TARGET_AMD64) - // The SSP is null when CET shadow stacks are not enabled. On processors that don't support shadow stacks, this is a - // no-op and the intrinsic returns 0. CET shadow stacks are enabled or disabled for all threads, so the result is the - // same from any thread. - return _rdsspq() != 0; -#else - // When implementing AreShadowStacksEnabled() on other architectures, review all the places where this is used. - return false; -#endif -} - - -#ifdef TARGET_X86 - -#define EXCEPTION_HIJACK 0xe0434f4e // 0xe0000000 | 'COM'+1 - -PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord() -{ - return (PEXCEPTION_REGISTRATION_RECORD)__readfsdword(0); -} - -VOID SetCurrentSEHRecord(EXCEPTION_REGISTRATION_RECORD *pSEH) -{ - __writefsdword(0, (DWORD)pSEH); -} - -VOID PopSEHRecords(LPVOID pTargetSP) -{ - PEXCEPTION_REGISTRATION_RECORD currentContext = GetCurrentSEHRecord(); - // The last record in the chain is EXCEPTION_CHAIN_END which is defined as maxiumum - // pointer value so it cannot satisfy the loop condition. - while (currentContext < pTargetSP) - { - currentContext = currentContext->Next; - } - SetCurrentSEHRecord(currentContext); -} - -// This will check who caused the exception. If it was caused by the redirect function, -// the reason is to resume the thread back at the point it was redirected in the first -// place. If the exception was not caused by the function, then it was caused by the call -// out to the I[GC|Debugger]ThreadControl client and we need to determine if it's an -// exception that we can just eat and let the runtime resume the thread, or if it's an -// uncatchable exception that we need to pass on to the runtime. -int RtlRestoreContextFallbackExceptionFilter(PEXCEPTION_POINTERS pExcepPtrs, CONTEXT *pCtx, Thread *pThread) -{ - if (pExcepPtrs->ExceptionRecord->ExceptionCode == STATUS_STACK_OVERFLOW) - { - return EXCEPTION_CONTINUE_SEARCH; - } - - // Get the thread handle - _ASSERTE(pExcepPtrs->ExceptionRecord->ExceptionCode == EXCEPTION_HIJACK); - - // Copy everything in the saved context record into the EH context. - // Historically the EH context has enough space for every enabled context feature. - // That may not hold for the future features beyond AVX, but this codepath is - // supposed to be used only on OSes that do not have RtlRestoreContext. - CONTEXT* pTarget = pExcepPtrs->ContextRecord; - if (!CopyContext(pTarget, pCtx->ContextFlags, pCtx)) - { - PalPrintFatalError("Could not set context record.\n"); - RhFailFast(); - } - - DWORD espValue = pCtx->Esp; - - // NOTE: Ugly, ugly workaround. - // We need to resume the thread into the managed code where it was redirected, - // and the corresponding ESP is below the current one. But C++ expects that - // on an EXCEPTION_CONTINUE_EXECUTION that the ESP will be above where it has - // installed the SEH handler. To solve this, we need to remove all handlers - // that reside above the resumed ESP, but we must leave the OS-installed - // handler at the top, so we grab the top SEH handler, call - // PopSEHRecords which will remove all SEH handlers above the target ESP and - // then link the OS handler back in with SetCurrentSEHRecord. - - // Get the special OS handler and save it until PopSEHRecords is done - EXCEPTION_REGISTRATION_RECORD *pCurSEH = GetCurrentSEHRecord(); - - // Unlink all records above the target resume ESP - PopSEHRecords((LPVOID)(size_t)espValue); - - // Link the special OS handler back in to the top - pCurSEH->Next = GetCurrentSEHRecord(); - - // Register the special OS handler as the top handler with the OS - SetCurrentSEHRecord(pCurSEH); - - // Resume execution at point where thread was originally redirected - return EXCEPTION_CONTINUE_EXECUTION; -} - -EXTERN_C VOID __cdecl RtlRestoreContextFallback(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord) -{ - Thread *pThread = ThreadStore::GetCurrentThread(); - - // A counter to avoid a nasty case where an - // up-stack filter throws another exception - // causing our filter to be run again for - // some unrelated exception. - int filter_count = 0; - - __try - { - // Save the instruction pointer where we redirected last. This does not race with the check - // against this variable because the GC will not attempt to redirect the thread until the - // instruction pointer of this thread is back in managed code. - pThread->SetPendingRedirect(ContextRecord->Eip); - RaiseException(EXCEPTION_HIJACK, 0, 0, NULL); - } - __except (++filter_count == 1 - ? RtlRestoreContextFallbackExceptionFilter(GetExceptionInformation(), ContextRecord, pThread) - : EXCEPTION_CONTINUE_SEARCH) - { - _ASSERTE(!"Reached body of __except in RtlRestoreContextFallback"); - } -} - -#endif // TARGET_X86 - -typedef BOOL(WINAPI* PINITIALIZECONTEXT2)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength, ULONG64 XStateCompactionMask); -PINITIALIZECONTEXT2 pfnInitializeContext2 = NULL; - -#ifdef TARGET_ARM64 -// Mirror the XSTATE_ARM64_SVE flags from winnt.h - -#ifndef XSTATE_ARM64_SVE -#define XSTATE_ARM64_SVE (2) -#endif // XSTATE_ARM64_SVE - -#ifndef XSTATE_MASK_ARM64_SVE -#define XSTATE_MASK_ARM64_SVE (1ui64 << (XSTATE_ARM64_SVE)) -#endif // XSTATE_MASK_ARM64_SVE - -#ifndef CONTEXT_ARM64_XSTATE -#define CONTEXT_ARM64_XSTATE (CONTEXT_ARM64 | 0x20L) -#endif // CONTEXT_ARM64_XSTATE - -#ifndef CONTEXT_XSTATE -#define CONTEXT_XSTATE CONTEXT_ARM64_XSTATE -#endif // CONTEXT_XSTATE - -typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); -PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - -typedef BOOL(WINAPI* PSETXSTATEFEATURESMASK)(PCONTEXT Context, DWORD64 FeatureMask); -PSETXSTATEFEATURESMASK pfnSetXStateFeaturesMask = NULL; -#endif // TARGET_ARM64 - -#ifdef TARGET_X86 -EXTERN_C VOID __cdecl RtlRestoreContextFallback(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord); -typedef VOID(__cdecl* PRTLRESTORECONTEXT)(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord); -PRTLRESTORECONTEXT pfnRtlRestoreContext = NULL; - -#define CONTEXT_COMPLETE (CONTEXT_FULL | CONTEXT_FLOATING_POINT | \ - CONTEXT_DEBUG_REGISTERS | CONTEXT_EXTENDED_REGISTERS) -#else -#define CONTEXT_COMPLETE (CONTEXT_FULL | CONTEXT_DEBUG_REGISTERS) -#endif - -REDHAWK_PALEXPORT NATIVE_CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextBuffer) -{ - CONTEXT* pOSContext = NULL; - -#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM64) - DWORD context = CONTEXT_COMPLETE; - - if (pfnInitializeContext2 == NULL) - { - HMODULE hm = GetModuleHandleW(_T("kernel32.dll")); - if (hm != NULL) - { - pfnInitializeContext2 = (PINITIALIZECONTEXT2)GetProcAddress(hm, "InitializeContext2"); - } - } - -#if defined(TARGET_ARM64) - if (pfnGetEnabledXStateFeatures == NULL) - { - HMODULE hm = GetModuleHandleW(_T("kernel32.dll")); - if (hm != NULL) - { - pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hm, "GetEnabledXStateFeatures"); - } - } -#endif // TARGET_ARM64 - -#ifdef TARGET_X86 - if (pfnRtlRestoreContext == NULL) - { - HMODULE hm = GetModuleHandleW(_T("ntdll.dll")); - pfnRtlRestoreContext = (PRTLRESTORECONTEXT)GetProcAddress(hm, "RtlRestoreContext"); - if (pfnRtlRestoreContext == NULL) - { - // Fallback to the internal implementation if OS doesn't provide one. - pfnRtlRestoreContext = RtlRestoreContextFallback; - } - } -#endif //TARGET_X86 - -#if defined(TARGET_X86) || defined(TARGET_AMD64) - const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; - const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_MPX | xStateFeatureMask; -#elif defined(TARGET_ARM64) - const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE; - const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | xStateFeatureMask; -#endif - - // Determine if the processor supports extended features so we could retrieve those registers - DWORD64 FeatureMask = 0; - -#if defined(TARGET_X86) || defined(TARGET_AMD64) - FeatureMask = GetEnabledXStateFeatures(); -#elif defined(TARGET_ARM64) - if (pfnGetEnabledXStateFeatures != NULL) - { - FeatureMask = pfnGetEnabledXStateFeatures(); - } -#endif - - if ((FeatureMask & xStateFeatureMask) != 0) - { - context = context | CONTEXT_XSTATE; - } - - // the context does not need XSTATE_MASK_CET_U because we should not be using - // redirection when CET is enabled and should not be here. - _ASSERTE(!PalAreShadowStacksEnabled()); - - // Retrieve contextSize by passing NULL for Buffer - DWORD contextSize = 0; - // The initialize call should fail but return contextSize - BOOL success = pfnInitializeContext2 ? - pfnInitializeContext2(NULL, context, NULL, &contextSize, xStateCompactionMask) : - InitializeContext(NULL, context, NULL, &contextSize); - - // Spec mentions that we may get a different error (it was observed on Windows7). - // In such case the contextSize is undefined. - if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) - { - return NULL; - } - - // So now allocate a buffer of that size and call InitializeContext again - uint8_t* buffer = new (nothrow)uint8_t[contextSize]; - if (buffer != NULL) - { - success = pfnInitializeContext2 ? - pfnInitializeContext2(buffer, context, &pOSContext, &contextSize, xStateCompactionMask): - InitializeContext(buffer, context, &pOSContext, &contextSize); - - if (!success) - { - delete[] buffer; - buffer = NULL; - } - } - - if (!success) - { - pOSContext = NULL; - } - - *contextBuffer = buffer; - -#else - pOSContext = new (nothrow) CONTEXT; - pOSContext->ContextFlags = CONTEXT_COMPLETE; - *contextBuffer = NULL; -#endif - - return (NATIVE_CONTEXT*)pOSContext; -} - -REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetCompleteThreadContext(HANDLE hThread, _Out_ NATIVE_CONTEXT * pCtx) -{ - CONTEXT* pOSContext = &pCtx->ctx; - - _ASSERTE((pOSContext->ContextFlags & CONTEXT_COMPLETE) == CONTEXT_COMPLETE); - -#if defined(TARGET_ARM64) - if (pfnSetXStateFeaturesMask == NULL) - { - HMODULE hm = GetModuleHandleW(_T("kernel32.dll")); - if (hm != NULL) - { - pfnSetXStateFeaturesMask = (PSETXSTATEFEATURESMASK)GetProcAddress(hm, "SetXStateFeaturesMask"); - } - } -#endif // TARGET_ARM64 - - // This should not normally fail. - // The system silently ignores any feature specified in the FeatureMask which is not enabled on the processor. -#if defined(TARGET_X86) || defined(TARGET_AMD64) - if (!SetXStateFeaturesMask(pOSContext, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) - { - _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX"); - return FALSE; - } -#elif defined(TARGET_ARM64) - if ((pfnSetXStateFeaturesMask != NULL) && !pfnSetXStateFeaturesMask(pOSContext, XSTATE_MASK_ARM64_SVE)) - { - _ASSERTE(!"Could not apply XSTATE_MASK_ARM64_SVE"); - return FALSE; - } -#endif - - return GetThreadContext(hThread, pOSContext); -} - -REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalSetThreadContext(HANDLE hThread, _Out_ NATIVE_CONTEXT * pCtx) -{ - return SetThreadContext(hThread, &pCtx->ctx); -} - -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalRestoreContext(NATIVE_CONTEXT * pCtx) -{ - CONTEXT* pOSContext = &pCtx->ctx; - - __asan_handle_no_return(); -#ifdef TARGET_X86 - _ASSERTE(pfnRtlRestoreContext != NULL); - pfnRtlRestoreContext(pOSContext, NULL); -#else - RtlRestoreContext(pOSContext, NULL); -#endif //TARGET_X86 -} - -#if defined(TARGET_X86) || defined(TARGET_AMD64) -REDHAWK_PALIMPORT void REDHAWK_PALAPI PopulateControlSegmentRegisters(CONTEXT* pContext) -{ - CONTEXT ctx; - - RtlCaptureContext(&ctx); - - pContext->SegCs = ctx.SegCs; - pContext->SegSs = ctx.SegSs; -} -#endif //defined(TARGET_X86) || defined(TARGET_AMD64) - -// These declarations are for a new special user-mode APC feature introduced in Windows. These are not yet available in Windows -// SDK headers, so some names below are prefixed with "CLONE_" to avoid conflicts in the future. Once the prefixed declarations -// become available in the Windows SDK headers, the prefixed declarations below can be removed in favor of the SDK ones. - -enum CLONE_QUEUE_USER_APC_FLAGS -{ - CLONE_QUEUE_USER_APC_FLAGS_NONE = 0x0, - CLONE_QUEUE_USER_APC_FLAGS_SPECIAL_USER_APC = 0x1, - CLONE_QUEUE_USER_APC_CALLBACK_DATA_CONTEXT = 0x10000 -}; - -struct CLONE_APC_CALLBACK_DATA -{ - ULONG_PTR Parameter; - PCONTEXT ContextRecord; - ULONG_PTR Reserved0; - ULONG_PTR Reserved1; -}; -typedef CLONE_APC_CALLBACK_DATA* CLONE_PAPC_CALLBACK_DATA; - -typedef BOOL (WINAPI* QueueUserAPC2Proc)(PAPCFUNC ApcRoutine, HANDLE Thread, ULONG_PTR Data, CLONE_QUEUE_USER_APC_FLAGS Flags); - -#define QUEUE_USER_APC2_UNINITIALIZED (QueueUserAPC2Proc)-1 -static QueueUserAPC2Proc g_pfnQueueUserAPC2Proc = QUEUE_USER_APC2_UNINITIALIZED; - -static const CLONE_QUEUE_USER_APC_FLAGS SpecialUserModeApcWithContextFlags = (CLONE_QUEUE_USER_APC_FLAGS) - (CLONE_QUEUE_USER_APC_FLAGS_SPECIAL_USER_APC | - CLONE_QUEUE_USER_APC_CALLBACK_DATA_CONTEXT); - -static void* g_returnAddressHijackTarget = NULL; - -static void NTAPI ActivationHandler(ULONG_PTR parameter) -{ - CLONE_APC_CALLBACK_DATA* data = (CLONE_APC_CALLBACK_DATA*)parameter; - Thread::HijackCallback((NATIVE_CONTEXT*)data->ContextRecord, NULL); - - Thread* pThread = (Thread*)data->Parameter; - pThread->SetActivationPending(false); -} - -void InitHijackingAPIs() -{ - HMODULE hKernel32 = LoadKernel32dll(); - -#ifdef HOST_AMD64 - typedef BOOL (WINAPI *IsWow64Process2Proc)(HANDLE hProcess, USHORT *pProcessMachine, USHORT *pNativeMachine); - - IsWow64Process2Proc pfnIsWow64Process2Proc = (IsWow64Process2Proc)GetProcAddress(hKernel32, "IsWow64Process2"); - USHORT processMachine, hostMachine; - if (pfnIsWow64Process2Proc != nullptr && - (*pfnIsWow64Process2Proc)(GetCurrentProcess(), &processMachine, &hostMachine) && - (hostMachine == IMAGE_FILE_MACHINE_ARM64) && - !IsWindowsVersionOrGreater(10, 0, 26100)) - { - // Special user-mode APCs are broken on WOW64 processes (x64 running on Arm64 machine) with Windows older than 11.0.26100 (24H2) - g_pfnQueueUserAPC2Proc = NULL; - } - else -#endif // HOST_AMD64 - { - g_pfnQueueUserAPC2Proc = (QueueUserAPC2Proc)GetProcAddress(hKernel32, "QueueUserAPC2"); - } - - if (PalAreShadowStacksEnabled()) - { - // When shadow stacks are enabled, support for special user-mode APCs is required - _ASSERTE(g_pfnQueueUserAPC2Proc != NULL); - - HMODULE hModNtdll = LoadNtdlldll(); - typedef void* (*PFN_RtlGetReturnAddressHijackTarget)(void); - - void* rtlGetReturnAddressHijackTarget = GetProcAddress(hModNtdll, "RtlGetReturnAddressHijackTarget"); - if (rtlGetReturnAddressHijackTarget != NULL) - { - g_returnAddressHijackTarget = ((PFN_RtlGetReturnAddressHijackTarget)rtlGetReturnAddressHijackTarget)(); - } - - if (g_returnAddressHijackTarget == NULL) - { - _ASSERTE(!"RtlGetReturnAddressHijackTarget must provide a target when shadow stacks are enabled"); - } - } -} - -REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(HijackFunc* defaultHijackTarget) -{ - return g_returnAddressHijackTarget ? (HijackFunc*)g_returnAddressHijackTarget : defaultHijackTarget; -} - -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(Thread* pThreadToHijack) -{ - HANDLE hThread = pThreadToHijack->GetOSThreadHandle(); - - if (hThread == INVALID_HANDLE_VALUE) - { - // cannot proceed - return; - } - -#ifdef FEATURE_SPECIAL_USER_MODE_APC - - // initialize g_pfnQueueUserAPC2Proc on demand. - // Note that only one thread at a time may perform suspension (guaranteed by the thread store lock) - // so simple condition check is ok. - if (g_pfnQueueUserAPC2Proc == QUEUE_USER_APC2_UNINITIALIZED) - { - InitHijackingAPIs(); - } - - if (g_pfnQueueUserAPC2Proc) - { - // An APC can be interrupted by another one, do not queue more if one is pending. - if (pThreadToHijack->IsActivationPending()) - { - return; - } - - pThreadToHijack->SetActivationPending(true); - BOOL success = g_pfnQueueUserAPC2Proc( - &ActivationHandler, - hThread, - (ULONG_PTR)pThreadToHijack, - SpecialUserModeApcWithContextFlags); - - if (success) - { - return; - } - - // queuing an APC failed - pThreadToHijack->SetActivationPending(false); - - DWORD lastError = GetLastError(); - if (lastError != ERROR_INVALID_PARAMETER && lastError != ERROR_NOT_SUPPORTED) - { - // An unexpected failure has happened. It is a concern. - ASSERT_UNCONDITIONALLY("Failed to queue an APC for unusual reason."); - - // maybe it will work next time. - return; - } - - // the flags that we passed are not supported. - // we will not try again - g_pfnQueueUserAPC2Proc = NULL; - } -#endif - - if (SuspendThread(hThread) == (DWORD)-1) - { - return; - } - - CONTEXT win32ctx; - win32ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_EXCEPTION_REQUEST; - - if (GetThreadContext(hThread, &win32ctx)) - { - bool isSafeToRedirect = true; - -#ifdef TARGET_X86 - // Workaround around WOW64 problems. Only do this workaround if a) this is x86, and b) the OS does - // not support trap frame reporting. - if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) == 0) - { - // This code fixes a race between GetThreadContext and NtContinue. If we redirect managed code - // at the same place twice in a row, we run the risk of reading a bogus CONTEXT when we redirect - // the second time. This leads to access violations on x86 machines. To fix the problem, we - // never redirect at the same instruction pointer that we redirected at on the previous GC. - if (pThreadToHijack->CheckPendingRedirect(win32ctx.Eip)) - { - isSafeToRedirect = false; - } - } -#else - // In some cases Windows will not set the CONTEXT_EXCEPTION_REPORTING flag if the thread is executing - // in kernel mode (i.e. in the middle of a syscall or exception handling). Therefore, we should treat - // the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that it is not safe to - // manipulate with the current state of the thread context. - isSafeToRedirect = (win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0; -#endif - - // The CONTEXT_SERVICE_ACTIVE and CONTEXT_EXCEPTION_ACTIVE output flags indicate we suspended the thread - // at a point where the kernel cannot guarantee a completely accurate context. We'll fail the request in - // this case (which should force our caller to resume the thread and try again -- since this is a fairly - // narrow window we're highly likely to succeed next time). - if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0 && - ((win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE)) != 0)) - { - isSafeToRedirect = false; - } - - if (isSafeToRedirect) - { - Thread::HijackCallback((NATIVE_CONTEXT*)&win32ctx, pThreadToHijack); - } - } - - ResumeThread(hThread); -} - -#define SET_THREAD_DESCRIPTION_UNINITIALIZED (pfnSetThreadDescription)-1 -typedef HRESULT(WINAPI *pfnSetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription); -static pfnSetThreadDescription g_pfnSetThreadDescription = SET_THREAD_DESCRIPTION_UNINITIALIZED; - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, BOOL highPriority) -{ - HANDLE hThread = CreateThread( - NULL, - 0, - (LPTHREAD_START_ROUTINE)callback, - pCallbackContext, - highPriority ? CREATE_SUSPENDED : 0, - NULL); - - if (hThread == NULL) - return false; - - if (highPriority) - { - SetThreadPriority(hThread, THREAD_PRIORITY_HIGHEST); - ResumeThread(hThread); - } - - CloseHandle(hThread); - return true; -} - -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalSetCurrentThreadNameW(const WCHAR* name) -{ - if (g_pfnSetThreadDescription == SET_THREAD_DESCRIPTION_UNINITIALIZED) - { - HMODULE hKernel32 = LoadKernel32dll(); - g_pfnSetThreadDescription = (pfnSetThreadDescription)GetProcAddress(hKernel32, "SetThreadDescription"); - } - if (!g_pfnSetThreadDescription) - { - return false; - } - HANDLE hThread = GetCurrentThread(); - g_pfnSetThreadDescription(hThread, name); - return true; -} - -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalSetCurrentThreadName(const char* name) -{ - size_t len = strlen(name); - wchar_t* threadNameWide = new (nothrow) wchar_t[len + 1]; - if (threadNameWide == nullptr) - { - return false; - } - if (MultiByteToWideChar(CP_UTF8, 0, name, -1, threadNameWide, (int)(len + 1)) == 0) - { - delete[] threadNameWide; - return false; - } - bool ret = PalSetCurrentThreadNameW(threadNameWide); - delete[] threadNameWide; - return ret; -} - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) -{ - return PalStartBackgroundWork(callback, pCallbackContext, FALSE); -} - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) -{ - return PalStartBackgroundWork(callback, pCallbackContext, TRUE); -} - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartEventPipeHelperThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) -{ - return PalStartBackgroundWork(callback, pCallbackContext, FALSE); -} - -REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer) -{ - // The runtime is not designed to be unloadable today. Use GET_MODULE_HANDLE_EX_FLAG_PIN to prevent - // the module from ever unloading. - - HMODULE module; - if (!GetModuleHandleExW( - GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_PIN, - (LPCWSTR)pointer, - &module)) - { - return NULL; - } - - return (HANDLE)module; -} - -REDHAWK_PALEXPORT void PalPrintFatalError(const char* message) -{ - // Write the message using lowest-level OS API available. This is used to print the stack overflow - // message, so there is not much that can be done here. - DWORD dwBytesWritten; - WriteFile(GetStdHandle(STD_ERROR_HANDLE), message, (DWORD)strlen(message), &dwBytesWritten, NULL); -} - -REDHAWK_PALEXPORT char* PalCopyTCharAsChar(const TCHAR* toCopy) -{ - int len = ::WideCharToMultiByte(CP_UTF8, 0, toCopy, -1, nullptr, 0, nullptr, nullptr); - if (len == 0) - return nullptr; - - char* converted = new (nothrow) char[len]; - int written = ::WideCharToMultiByte(CP_UTF8, 0, toCopy, -1, converted, len, nullptr, nullptr); - assert(len == written); - return converted; -} - -REDHAWK_PALEXPORT HANDLE PalLoadLibrary(const char* moduleName) -{ - assert(moduleName); - size_t len = strlen(moduleName); - wchar_t* moduleNameWide = new (nothrow)wchar_t[len + 1]; - if (moduleNameWide == nullptr) - { - return 0; - } - if (MultiByteToWideChar(CP_UTF8, 0, moduleName, -1, moduleNameWide, (int)(len + 1)) == 0) - { - return 0; - } - moduleNameWide[len] = '\0'; - - HANDLE result = LoadLibraryExW(moduleNameWide, NULL, LOAD_WITH_ALTERED_SEARCH_PATH); - delete[] moduleNameWide; - return result; -} - -REDHAWK_PALEXPORT void* PalGetProcAddress(HANDLE module, const char* functionName) -{ - assert(module); - assert(functionName); - return GetProcAddress((HMODULE)module, functionName); -} - -REDHAWK_PALEXPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(uintptr_t size, uint32_t protect) -{ - return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, protect); -} - -REDHAWK_PALEXPORT void REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, uintptr_t size) -{ - VirtualFree(pAddress, 0, MEM_RELEASE); -} - -REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, uintptr_t size, uint32_t protect) -{ - DWORD oldProtect; - return VirtualProtect(pAddress, size, protect, &oldProtect); -} - -REDHAWK_PALEXPORT void PalFlushInstructionCache(_In_ void* pAddress, size_t size) -{ - FlushInstructionCache(GetCurrentProcess(), pAddress, size); -} - -#ifdef TARGET_AMD64 -uintptr_t GetSSP(CONTEXT *pContext) -{ - XSAVE_CET_U_FORMAT* pCET = (XSAVE_CET_U_FORMAT*)LocateXStateFeature(pContext, XSTATE_CET_U, NULL); - if ((pCET != NULL) && (pCET->Ia32CetUMsr != 0)) - { - return pCET->Ia32Pl3SspMsr; - } - - return 0; -} - -void SetSSP(CONTEXT *pContext, uintptr_t ssp) -{ - XSAVE_CET_U_FORMAT* pCET = (XSAVE_CET_U_FORMAT*)LocateXStateFeature(pContext, XSTATE_CET_U, NULL); - if (pCET != NULL) - { - pCET->Ia32Pl3SspMsr = ssp; - pCET->Ia32CetUMsr = 1; - } -} -#endif // TARGET_AMD64 diff --git a/src/coreclr/nativeaot/Runtime/yieldprocessornormalized.cpp b/src/coreclr/nativeaot/Runtime/yieldprocessornormalized.cpp index efaf4e8bb207..06d068f7d268 100644 --- a/src/coreclr/nativeaot/Runtime/yieldprocessornormalized.cpp +++ b/src/coreclr/nativeaot/Runtime/yieldprocessornormalized.cpp @@ -8,12 +8,13 @@ #include "CommonMacros.h" #include "daccess.h" #include "DebugMacrosExt.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" +#include "PalLimitedContext.h" +#include "Pal.h" #include "rhassert.h" #include "slist.h" #include "volatile.h" #include "yieldprocessornormalized.h" +#include "minipal/time.h" #include "../../utilcode/yieldprocessornormalized.cpp" diff --git a/src/coreclr/nativeaot/Runtime/yieldprocessornormalized.h b/src/coreclr/nativeaot/Runtime/yieldprocessornormalized.h index 5539ebf90561..8a600e2b8498 100644 --- a/src/coreclr/nativeaot/Runtime/yieldprocessornormalized.h +++ b/src/coreclr/nativeaot/Runtime/yieldprocessornormalized.h @@ -1,5 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#include "PalRedhawk.h" +#include "Pal.h" #include "../../inc/yieldprocessornormalized.h" diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/CompatibilitySuppressions.xml b/src/coreclr/nativeaot/System.Private.CoreLib/src/CompatibilitySuppressions.xml index 0abedd830a6b..2de459555d11 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/CompatibilitySuppressions.xml +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/CompatibilitySuppressions.xml @@ -697,10 +697,6 @@ CP0001 T:Internal.Runtime.Augments.TypeLoaderCallbacks - - CP0001 - T:Internal.Runtime.CanonTypeKind - CP0001 T:Internal.Runtime.CompilerServices.FunctionPointerOps @@ -717,18 +713,6 @@ CP0001 T:Internal.Runtime.CompilerServices.OpenMethodResolver - - CP0001 - T:Internal.Runtime.CompilerServices.RuntimeFieldHandleInfo - - - CP0001 - T:Internal.Runtime.CompilerServices.RuntimeMethodHandleInfo - - - CP0001 - T:Internal.Runtime.CompilerServices.RuntimeSignature - CP0001 T:Internal.Runtime.TypeManagerHandle @@ -753,6 +737,14 @@ CP0001 T:System.MDArray + + CP0001 + T:System.FieldHandleInfo + + + CP0001 + T:System.MethodHandleInfo + CP0001 T:System.Reflection.BinderBundle diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Augments/ReflectionAugments.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Augments/ReflectionAugments.cs index 4a5f4a11908c..057c6ecf994f 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Augments/ReflectionAugments.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Augments/ReflectionAugments.cs @@ -103,21 +103,6 @@ public static Assembly Load(AssemblyName assemblyRef, bool throwOnFileNotFound) return RuntimeAssemblyInfo.GetRuntimeAssemblyIfExists(assemblyRef.ToRuntimeAssemblyName()); } - public static Assembly Load(ReadOnlySpan rawAssembly, ReadOnlySpan pdbSymbolStore) - { - if (rawAssembly.IsEmpty) - throw new ArgumentNullException(nameof(rawAssembly)); - - return RuntimeAssemblyInfo.GetRuntimeAssemblyFromByteArray(rawAssembly, pdbSymbolStore); - } - - public static Assembly Load(string assemblyPath) - { - ArgumentNullException.ThrowIfNull(assemblyPath); - - return RuntimeAssemblyInfo.GetRuntimeAssemblyFromPath(assemblyPath); - } - // // This overload of GetMethodForHandle only accepts handles for methods declared on non-generic types (the method, however, // can be an instance of a generic method.) To resolve handles for methods declared on generic types, you must pass @@ -136,7 +121,7 @@ public static MethodBase GetMethodFromHandle(RuntimeMethodHandle runtimeMethodHa MethodBase methodBase = ExecutionDomain.GetMethod(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles); if (methodBase.DeclaringType.IsConstructedGenericType) // For compat with desktop, insist that the caller pass us the declaring type to resolve members of generic types. - throw new ArgumentException(SR.Format(SR.Argument_MethodDeclaringTypeGeneric, methodBase)); + throw new ArgumentException(SR.Format(SR.Argument_MethodDeclaringTypeGeneric, methodBase, methodBase.DeclaringType.GetGenericTypeDefinition())); return methodBase; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/AssemblyBinder.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/AssemblyBinder.cs index a59701eb8c85..cee8abb23ab3 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/AssemblyBinder.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/AssemblyBinder.cs @@ -19,7 +19,6 @@ public partial struct AssemblyBindResult { public MetadataReader Reader; public ScopeDefinitionHandle ScopeDefinitionHandle; - public IEnumerable OverflowScopes; } // @@ -33,10 +32,6 @@ public abstract class AssemblyBinder { public abstract bool Bind(RuntimeAssemblyName refName, bool cacheMissedLookups, out AssemblyBindResult result, out Exception exception); - public abstract bool Bind(ReadOnlySpan rawAssembly, ReadOnlySpan rawSymbolStore, out AssemblyBindResult result, out Exception exception); - - public abstract bool Bind(string assemblyPath, out AssemblyBindResult bindResult, out Exception exception); - public abstract IList GetLoadedAssemblies(); } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/ExecutionEnvironment.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/ExecutionEnvironment.cs index d33d969bcdfb..9054b909bccd 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/ExecutionEnvironment.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/ExecutionEnvironment.cs @@ -19,7 +19,7 @@ namespace Internal.Reflection.Core.Execution { // - // This class abstracts the underlying Redhawk (or whatever execution engine) runtime and exposes the services + // This class abstracts the underlying NativeAOT runtime (or whatever execution engine) runtime and exposes the services // that I.R.Core.Execution needs. // [CLSCompliant(false)] @@ -53,7 +53,8 @@ public abstract class ExecutionEnvironment //============================================================================================== // Invoke and field access support. //============================================================================================== - public abstract MethodBaseInvoker TryGetMethodInvoker(RuntimeTypeHandle declaringTypeHandle, QMethodDefinition methodHandle, RuntimeTypeHandle[] genericMethodTypeArgumentHandles); + public abstract void ValidateGenericMethodConstraints(MethodInfo method); + public abstract MethodBaseInvoker TryGetMethodInvokerNoConstraintCheck(RuntimeTypeHandle declaringTypeHandle, QMethodDefinition methodHandle, RuntimeTypeHandle[] genericMethodTypeArgumentHandles); public abstract FieldAccessor TryGetFieldAccessor(MetadataReader reader, RuntimeTypeHandle declaringTypeHandle, RuntimeTypeHandle fieldTypeHandle, FieldHandle fieldHandle); //============================================================================================== @@ -109,7 +110,7 @@ internal MethodBaseInvoker GetMethodInvoker(RuntimeTypeInfo declaringType, QMeth { genericMethodTypeArgumentHandles[i] = genericMethodTypeArguments[i].TypeHandle; } - MethodBaseInvoker methodInvoker = TryGetMethodInvoker(typeDefinitionHandle, methodHandle, genericMethodTypeArgumentHandles); + MethodBaseInvoker methodInvoker = TryGetMethodInvokerNoConstraintCheck(typeDefinitionHandle, methodHandle, genericMethodTypeArgumentHandles); if (methodInvoker == null) exception = ReflectionCoreExecution.ExecutionEnvironment.CreateNonInvokabilityException(exceptionPertainant); return methodInvoker; diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/FieldAccessor.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/FieldAccessor.cs index 3a931657e2d4..7b2a5eb5b704 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/FieldAccessor.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/FieldAccessor.cs @@ -10,7 +10,7 @@ namespace Internal.Reflection.Core.Execution { // - // This class abstracts the underlying Redhawk (or whatever execution engine) runtime that sets and gets fields. + // This class abstracts the underlying NativeAOT runtime (or whatever execution engine) runtime that sets and gets fields. // [CLSCompliant(false)] public abstract class FieldAccessor diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Extensions/NonPortable/CustomAttributeSearcher.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Extensions/NonPortable/CustomAttributeSearcher.cs index 89ede0e6d92f..8249b4c58a32 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Extensions/NonPortable/CustomAttributeSearcher.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Extensions/NonPortable/CustomAttributeSearcher.cs @@ -108,18 +108,8 @@ public virtual E GetParent(E e) // // Main iterator. // - private IEnumerable GetMatchingCustomAttributesIterator(E element, Func rawPassesFilter, bool inherit) + private IEnumerable GetMatchingCustomAttributesIterator(E element, Func passesFilter, bool inherit) { - Func passesFilter = - delegate (Type attributeType) - { - // Windows prohibits instantiating WinRT custom attributes. Filter them from the search as the desktop CLR does. - TypeAttributes typeAttributes = attributeType.Attributes; - if (0 != (typeAttributes & TypeAttributes.WindowsRuntime)) - return false; - return rawPassesFilter(attributeType); - }; - LowLevelList immediateResults = new LowLevelList(); foreach (CustomAttributeData cad in GetDeclaredCustomAttributes(element)) { diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs index bc776ec8342c..8917f0b2f26b 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs @@ -26,6 +26,7 @@ using System.Runtime.InteropServices; using System.Threading; +using Internal.Reflection.Core.Execution; using Internal.Runtime.CompilerHelpers; using Internal.Runtime.CompilerServices; @@ -70,6 +71,20 @@ public static object RawNewObject(RuntimeTypeHandle typeHandle) return RuntimeImports.RhNewObject(typeHandle.ToMethodTable()); } + internal static void EnsureMethodTableSafeToAllocate(MethodTable* mt) + { + // We might be dealing with a "necessary" MethodTable (in the ILCompiler terms). + // This MethodTable is okay for casting, but must not be allocated on the GC heap. + Debug.Assert(MethodTable.Of()->NumVtableSlots > 0); + if (mt->NumVtableSlots == 0) + { + // This is a type without a vtable or GCDesc. We must not allow creating an instance of it + throw ReflectionCoreExecution.ExecutionEnvironment.CreateMissingMetadataException(Type.GetTypeFromMethodTable(mt)); + } + // Paranoid check: not-meant-for-GC-heap types should be reliably identifiable by empty vtable. + Debug.Assert(!mt->ContainsGCPointers || RuntimeImports.RhGetGCDescSize(mt) != 0); + } + // // Perform the equivalent of a "newarr" The resulting array is zero-initialized. // @@ -77,7 +92,12 @@ public static Array NewArray(RuntimeTypeHandle typeHandleForArrayType, int count { // Don't make the easy mistake of passing in the element MethodTable rather than the "array of element" MethodTable. Debug.Assert(typeHandleForArrayType.ToMethodTable()->IsSzArray); - return RuntimeImports.RhNewArray(typeHandleForArrayType.ToMethodTable(), count); + + MethodTable* mt = typeHandleForArrayType.ToMethodTable(); + + EnsureMethodTableSafeToAllocate(mt); + + return RuntimeImports.RhNewArray(mt, count); } // @@ -109,7 +129,7 @@ public static unsafe Array NewMultiDimArray(RuntimeTypeHandle typeHandleForArray // We just checked above that all lower bounds are zero. In that case, we should actually allocate // a new SzArray instead. Type elementType = Type.GetTypeFromHandle(new RuntimeTypeHandle(typeHandleForArrayType.ToMethodTable()->RelatedParameterType))!; - return RuntimeImports.RhNewArray(elementType.MakeArrayType().TypeHandle.ToMethodTable(), lengths[0]); + return NewArray(elementType.MakeArrayType().TypeHandle, lengths[0]); } // Create a local copy of the lengths that cannot be modified by the caller @@ -463,7 +483,7 @@ public static IntPtr NewInterfaceDispatchCell(RuntimeTypeHandle interfaceTypeHan } [Intrinsic] - public static RuntimeTypeHandle GetCanonType(CanonTypeKind kind) + public static RuntimeTypeHandle GetCanonType() { // Compiler needs to expand this. This is not expressible in IL. throw new NotSupportedException(); @@ -700,18 +720,12 @@ public static string TryGetMethodDisplayStringFromIp(IntPtr ip) public static object CreateThunksHeap(IntPtr commonStubAddress) { - object? newHeap = ThunksHeap.CreateThunksHeap(commonStubAddress); - if (newHeap == null) - throw new OutOfMemoryException(); - return newHeap; + return ThunksHeap.CreateThunksHeap(commonStubAddress); } public static IntPtr AllocateThunk(object thunksHeap) { - IntPtr newThunk = ((ThunksHeap)thunksHeap).AllocateThunk(); - if (newThunk == IntPtr.Zero) - throw new OutOfMemoryException(); - return newThunk; + return ((ThunksHeap)thunksHeap).AllocateThunk(); } public static void FreeThunk(object thunksHeap, IntPtr thunkAddress) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/TypeLoaderCallbacks.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/TypeLoaderCallbacks.cs index 7dc7641435e5..c7b5203ffebd 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/TypeLoaderCallbacks.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/TypeLoaderCallbacks.cs @@ -19,13 +19,10 @@ public abstract class TypeLoaderCallbacks public abstract bool TryGetConstructedGenericTypeForComponents(RuntimeTypeHandle genericTypeDefinitionHandle, RuntimeTypeHandle[] genericTypeArgumentHandles, out RuntimeTypeHandle runtimeTypeHandle); public abstract IntPtr GetThreadStaticGCDescForDynamicType(TypeManagerHandle handle, int index); public abstract IntPtr GenericLookupFromContextAndSignature(IntPtr context, IntPtr signature, out IntPtr auxResult); - public abstract bool GetRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgs); - public abstract RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, string methodName, RuntimeSignature methodSignature, RuntimeTypeHandle[] genericMethodArgs); - public abstract bool CompareMethodSignatures(RuntimeSignature signature1, RuntimeSignature signature2); + public abstract RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, MethodHandle handle, RuntimeTypeHandle[] genericMethodArgs); public abstract IntPtr TryGetDefaultConstructorForType(RuntimeTypeHandle runtimeTypeHandle); public abstract IntPtr ResolveGenericVirtualMethodTarget(RuntimeTypeHandle targetTypeHandle, RuntimeMethodHandle declMethod); - public abstract bool GetRuntimeFieldHandleComponents(RuntimeFieldHandle runtimeFieldHandle, out RuntimeTypeHandle declaringTypeHandle, out string fieldName); - public abstract RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, string fieldName); + public abstract RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, FieldHandle handle); public abstract bool TryGetPointerTypeForTargetType(RuntimeTypeHandle pointeeTypeHandle, out RuntimeTypeHandle pointerTypeHandle); public abstract bool TryGetArrayTypeForElementType(RuntimeTypeHandle elementTypeHandle, bool isMdArray, int rank, out RuntimeTypeHandle arrayTypeHandle); /// diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/InteropHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/InteropHelpers.cs index 429b85c997f6..7bccdd7b07f3 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/InteropHelpers.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/InteropHelpers.cs @@ -293,7 +293,7 @@ internal static unsafe void FixupModuleCell(ModuleFixupCell* pCell) { string moduleName = GetModuleName(pCell); - uint dllImportSearchPath = 0; + uint dllImportSearchPath = (uint)DllImportSearchPath.AssemblyDirectory; bool hasDllImportSearchPath = (pCell->DllImportSearchPathAndCookie & InteropDataConstants.HasDllImportSearchPath) != 0; if (hasDllImportSearchPath) { @@ -311,6 +311,7 @@ internal static unsafe void FixupModuleCell(ModuleFixupCell* pCell) hModule = NativeLibrary.LoadBySearch( callingAssembly, + hasDllImportSearchPath, searchAssemblyDirectory: (dllImportSearchPath & (uint)DllImportSearchPath.AssemblyDirectory) != 0, dllImportSearchPathFlags: (int)(dllImportSearchPath & ~(uint)DllImportSearchPath.AssemblyDirectory), ref loadLibErrorTracker, @@ -454,7 +455,7 @@ public static IntPtr GetCurrentCalleeOpenStaticDelegateFunctionPointer() /// /// Retrieves the current delegate that is being called /// - public static T GetCurrentCalleeDelegate() where T : class // constraint can't be System.Delegate + public static T GetCurrentCalleeDelegate() where T : Delegate { return PInvokeMarshal.GetCurrentCalleeDelegate(); } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/MethodNameAndSignature.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/MethodNameAndSignature.cs new file mode 100644 index 000000000000..04884e5ac545 --- /dev/null +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/MethodNameAndSignature.cs @@ -0,0 +1,57 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics; + +using Internal.Metadata.NativeFormat; + +namespace Internal.Runtime.CompilerServices +{ + [CLSCompliant(false)] + public class MethodNameAndSignature + { + public MetadataReader Reader { get; } + public MethodHandle Handle { get; } + + public MethodNameAndSignature(MetadataReader reader, MethodHandle handle) + { + Reader = reader; + Handle = handle; + } + + public string GetName() + { + Method method = Reader.GetMethod(Handle); + return Reader.GetString(method.Name); + } + + public override bool Equals(object? compare) + { + if (compare == null) + return false; + + MethodNameAndSignature? other = compare as MethodNameAndSignature; + if (other == null) + return false; + + // Comparing handles is enough if there's only one metadata blob + // (Same assumption in GetHashCode below!) + Debug.Assert(Reader == other.Reader); + + Method thisMethod = Reader.GetMethod(Handle); + Method otherMethod = other.Reader.GetMethod(other.Handle); + + return thisMethod.Signature.Equals(otherMethod.Signature) + && thisMethod.Name.Equals(otherMethod.Name); + } + + public override int GetHashCode() + { + Method method = Reader.GetMethod(Handle); + + // Assumes we only have one metadata blob + return method.Signature.GetHashCode() ^ method.Name.GetHashCode(); + } + } +} diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeFieldHandleInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeFieldHandleInfo.cs deleted file mode 100644 index 53f4d7e9ac25..000000000000 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeFieldHandleInfo.cs +++ /dev/null @@ -1,15 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Runtime.InteropServices; - -namespace Internal.Runtime.CompilerServices -{ - [StructLayout(LayoutKind.Sequential)] - [CLSCompliant(false)] - public unsafe struct RuntimeFieldHandleInfo - { - public IntPtr NativeLayoutInfoSignature; - } -} diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeMethodHandleInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeMethodHandleInfo.cs deleted file mode 100644 index 41aa17cdcbfd..000000000000 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeMethodHandleInfo.cs +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Diagnostics; -using System.Reflection; -using System.Runtime.InteropServices; - -using Internal.Runtime.Augments; - -namespace Internal.Runtime.CompilerServices -{ - public class MethodNameAndSignature - { - public string Name { get; } - public RuntimeSignature Signature { get; } - - public MethodNameAndSignature(string name, RuntimeSignature signature) - { - Name = name; - Signature = signature; - } - - public override bool Equals(object? compare) - { - if (compare == null) - return false; - - MethodNameAndSignature? other = compare as MethodNameAndSignature; - if (other == null) - return false; - - if (Name != other.Name) - return false; - - return Signature.Equals(other.Signature); - } - - public override int GetHashCode() - { - int hash = Name.GetHashCode(); - - return hash; - } - } - - [StructLayout(LayoutKind.Sequential)] - [CLSCompliant(false)] - public unsafe struct RuntimeMethodHandleInfo - { - public IntPtr NativeLayoutInfoSignature; - } -} diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeSignature.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeSignature.cs deleted file mode 100644 index 28fc165de7df..000000000000 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeSignature.cs +++ /dev/null @@ -1,127 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Diagnostics; -using System.Runtime; - -using Internal.Runtime.Augments; - -namespace Internal.Runtime.CompilerServices -{ - public struct RuntimeSignature - { - private IntPtr _moduleHandle; - private int _tokenOrOffset; - private bool _isNativeLayoutSignature; - - [CLSCompliant(false)] - public static RuntimeSignature CreateFromNativeLayoutSignature(TypeManagerHandle moduleHandle, uint nativeLayoutOffset) - { - return new RuntimeSignature - { - _moduleHandle = moduleHandle.GetIntPtrUNSAFE(), - _tokenOrOffset = (int)nativeLayoutOffset, - _isNativeLayoutSignature = true, - }; - } - - [CLSCompliant(false)] - public static RuntimeSignature CreateFromNativeLayoutSignature(RuntimeSignature oldSignature, uint newNativeLayoutOffset) - { - return new RuntimeSignature - { - _moduleHandle = oldSignature._moduleHandle, - _tokenOrOffset = (int)newNativeLayoutOffset, - _isNativeLayoutSignature = true, - }; - } - - public static RuntimeSignature CreateFromMethodHandle(TypeManagerHandle moduleHandle, int token) - { - return new RuntimeSignature - { - _moduleHandle = moduleHandle.GetIntPtrUNSAFE(), - _tokenOrOffset = token, - _isNativeLayoutSignature = false, - }; - } - - public bool IsNativeLayoutSignature - { - get - { - return _isNativeLayoutSignature; - } - } - - public int Token - { - get - { - if (_isNativeLayoutSignature) - { - Debug.Assert(false); - return -1; - } - return _tokenOrOffset; - } - } - - [CLSCompliant(false)] - public uint NativeLayoutOffset - { - get - { - if (!_isNativeLayoutSignature) - { - Debug.Assert(false); - return unchecked((uint)-1); - } - return (uint)_tokenOrOffset; - } - } - - public IntPtr ModuleHandle - { - get - { - return _moduleHandle; - } - } - - public bool Equals(RuntimeSignature other) - { - if (IsNativeLayoutSignature && other.IsNativeLayoutSignature) - { - if ((ModuleHandle == other.ModuleHandle) && (NativeLayoutOffset == other.NativeLayoutOffset)) - return true; - } - else if (!IsNativeLayoutSignature && !other.IsNativeLayoutSignature) - { - if ((ModuleHandle == other.ModuleHandle) && (Token == other.Token)) - return true; - } - - // Walk both signatures to check for equality the slow way - return RuntimeAugments.TypeLoaderCallbacks.CompareMethodSignatures(this, other); - } - - /// - /// Fast equality check - /// - public bool StructuralEquals(RuntimeSignature other) - { - if (_moduleHandle != other._moduleHandle) - return false; - - if (_tokenOrOffset != other._tokenOrOffset) - return false; - - if (_isNativeLayoutSignature != other._isNativeLayoutSignature) - return false; - - return true; - } - } -} diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj b/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj index 0189753a5277..43f4183fdb09 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj @@ -125,12 +125,9 @@ - - - - + @@ -173,6 +170,7 @@ + @@ -183,7 +181,6 @@ - @@ -212,9 +209,13 @@ +<<<<<<< HEAD +======= + +>>>>>>> upstream-jun @@ -226,7 +227,6 @@ - @@ -296,16 +296,10 @@ Interop\Windows\Ole32\Interop.CoGetApartmentType.cs - - Interop\Windows\Ole32\Interop.CoGetContextToken.cs - Interop\Windows\OleAut32\Interop.VariantClear.cs - - Interop\Windows\Kernel32\Interop.GetTickCount64.cs - Interop\Windows\Kernel32\Interop.DynamicLoad.cs @@ -408,9 +402,6 @@ Internal\LowLevelLinq\LowLevelEnumerable.ToArray.cs - - Internal\Runtime\CanonTypeKind.cs - System\Runtime\RhFailFastReason.cs @@ -615,9 +606,6 @@ Runtime.Base\src\System\Runtime\TypeCast.cs - - Runtime.Base\src\RhBaseName.cs - Common\TransitionBlock.cs @@ -635,34 +623,8 @@ - - - - - - src\System\Diagnostics\Eventing\NativeRuntimeEventSource.Generated.cs - - + + + - - - - - - <_PythonWarningParameter>-Wall - <_PythonWarningParameter Condition="'$(MSBuildTreatWarningsAsErrors)' == 'true'">$(_PythonWarningParameter) -Werror - <_EventingSourceFileDirectory>%(EventingSourceFile.RootDir)%(EventingSourceFile.Directory) - <_EventingSourceFileDirectory Condition="HasTrailingSlash('$(_EventingSourceFileDirectory)')">$(_EventingSourceFileDirectory.TrimEnd('\')) - - - - - - - - diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/AppContext.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/AppContext.NativeAot.cs index 535eb123ddf0..429544fb83ba 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/AppContext.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/AppContext.NativeAot.cs @@ -30,11 +30,5 @@ internal static void OnFirstChanceException(object e) { FirstChanceException?.Invoke(/* AppDomain */ null, new FirstChanceExceptionEventArgs((Exception)e)); } - - [RuntimeExport("OnUnhandledException")] - internal static void OnUnhandledException(object e) - { - UnhandledException?.Invoke(/* AppDomain */ null, new UnhandledExceptionEventArgs(e, true)); - } } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs index 09439509355e..d83afdccd168 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs @@ -78,7 +78,7 @@ private static unsafe Array InternalCreate(RuntimeType elementType, int rank, in if (rank == 1) { - return RuntimeImports.RhNewArray(elementType.MakeArrayType().TypeHandle.ToMethodTable(), pLengths[0]); + return RuntimeAugments.NewArray(elementType.MakeArrayType().TypeHandle, pLengths[0]); } else { @@ -112,15 +112,15 @@ private static unsafe Array InternalCreateFromArrayType(RuntimeType arrayType, i } } - MethodTable* eeType = arrayType.TypeHandle.ToMethodTable(); if (rank == 1) { // Multidimensional array of rank 1 with 0 lower bounds gets actually allocated // as an SzArray. SzArray is castable to MdArray rank 1. - if (!eeType->IsSzArray) - eeType = arrayType.GetElementType().MakeArrayType().TypeHandle.ToMethodTable(); + RuntimeTypeHandle arrayTypeHandle = arrayType.IsSZArray + ? arrayType.TypeHandle + : arrayType.GetElementType().MakeArrayType().TypeHandle; - return RuntimeImports.RhNewArray(eeType, pLengths[0]); + return RuntimeAugments.NewArray(arrayTypeHandle, pLengths[0]); } else { @@ -129,6 +129,7 @@ private static unsafe Array InternalCreateFromArrayType(RuntimeType arrayType, i for (int i = 0; i < rank; i++) pImmutableLengths[i] = pLengths[i]; + MethodTable* eeType = arrayType.TypeHandle.ToMethodTable(); return NewMultiDimArray(eeType, pImmutableLengths, rank); } } @@ -662,7 +663,8 @@ internal static unsafe Array NewMultiDimArray(MethodTable* eeType, int* pLengths if (maxArrayDimensionLengthOverflow) throw new OutOfMemoryException(); // "Array dimensions exceeded supported range." - Array ret = RuntimeImports.RhNewArray(eeType, (int)totalLength); + Debug.Assert(eeType->NumVtableSlots != 0, "Compiler enforces we never have unconstructed MTs for multi-dim arrays since those can be template-constructed anytime"); + Array ret = RuntimeImports.RhNewVariableSizeObject(eeType, (int)totalLength); ref int bounds = ref ret.GetRawMultiDimArrayBounds(); for (int i = 0; i < rank; i++) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Buffer.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Buffer.NativeAot.cs index caf6d9b7089a..4ceecdf70f66 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Buffer.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Buffer.NativeAot.cs @@ -12,15 +12,15 @@ namespace System public static partial class Buffer { [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe void __ZeroMemory(void* b, nuint byteLength) => + private static unsafe void ZeroMemoryInternal(void* b, nuint byteLength) => RuntimeImports.memset((byte*)b, 0, byteLength); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe void __Memmove(byte* dest, byte* src, nuint len) => + private static unsafe void MemmoveInternal(byte* dest, byte* src, nuint len) => RuntimeImports.memmove(dest, src, len); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void __BulkMoveWithWriteBarrier(ref byte destination, ref byte source, nuint byteCount) => + private static void BulkMoveWithWriteBarrierInternal(ref byte destination, ref byte source, nuint byteCount) => RuntimeImports.RhBulkMoveWithWriteBarrier(ref destination, ref source, byteCount); } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/ComAwareWeakReference.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/ComAwareWeakReference.NativeAot.cs deleted file mode 100644 index d95eab971603..000000000000 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/ComAwareWeakReference.NativeAot.cs +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Runtime.CompilerServices; - -#if FEATURE_COMINTEROP || FEATURE_COMWRAPPERS - -namespace System -{ - internal sealed partial class ComAwareWeakReference - { - // We don't want to consult ComWrappers if no RCW objects have been created. - // In addition we don't want a direct reference to ComWrappers to allow for better - // trimming. So we instead make use of delegates that ComWrappers registers when - // it is used. - private static unsafe delegate* s_comWeakRefToObjectCallback; - private static unsafe delegate* s_possiblyComObjectCallback; - private static unsafe delegate* s_objectToComWeakRefCallback; - - internal static unsafe void InitializeCallbacks( - delegate* comWeakRefToObject, - delegate* possiblyComObject, - delegate* objectToComWeakRef) - { - s_comWeakRefToObjectCallback = comWeakRefToObject; - s_objectToComWeakRefCallback = objectToComWeakRef; - s_possiblyComObjectCallback = possiblyComObject; - } - - internal static unsafe object? ComWeakRefToObject(IntPtr pComWeakRef, long wrapperId) - { - return s_comWeakRefToObjectCallback != null ? s_comWeakRefToObjectCallback(pComWeakRef, wrapperId) : null; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe bool PossiblyComObject(object target) - { - return s_possiblyComObjectCallback != null ? s_possiblyComObjectCallback(target) : false; - } - - internal static unsafe IntPtr ObjectToComWeakRef(object target, out long wrapperId) - { - wrapperId = 0; - return s_objectToComWeakRefCallback != null ? s_objectToComWeakRefCallback(target, out wrapperId) : IntPtr.Zero; - } - } -} -#endif diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs index 2c23ff3a06e8..5308b888c608 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs @@ -41,17 +41,6 @@ public static void NotifyOfCrossThreadDependency() // nothing to do...yet } - /// - /// Constants representing the importance level of messages to be logged. - /// - /// An attached debugger can enable or disable which messages will - /// actually be reported to the user through the CLR debugger - /// services API. This info is communicated to the runtime so only - /// desired events are actually reported to the debugger. - /// Constant representing the default category - /// - public static readonly string DefaultCategory; - /// /// Posts a message for the attached debugger. If there is no /// debugger attached, has no effect. The debugger may or may not diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.Unix.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.Unix.cs index 98cb72251711..f437f3a17cf0 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.Unix.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.Unix.cs @@ -9,8 +9,6 @@ namespace System { public static partial class Environment { - public static long TickCount64 => (long)RuntimeImports.RhpGetTickCount64(); - [DoesNotReturn] private static void ExitRaw() => Interop.Sys.Exit(s_latchedExitCode); } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.Windows.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.Windows.cs index 9aaa5980fc68..0c47251b931e 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.Windows.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.Windows.cs @@ -7,8 +7,6 @@ namespace System { public static partial class Environment { - public static long TickCount64 => (long)Interop.Kernel32.GetTickCount64(); - [DoesNotReturn] private static void ExitRaw() => Interop.Kernel32.ExitProcess(s_latchedExitCode); } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.cs index 19b2c0be298b..ecee6cd5b6e1 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Environment.NativeAot.cs @@ -55,6 +55,7 @@ internal static void ShutdownCore() AppContext.OnProcessExit(); #endif } +<<<<<<< HEAD public static int TickCount => (int)TickCount64; @@ -64,5 +65,7 @@ private static unsafe string[] GetCommandLineArgsNative() return Array.Empty(); } #endif +======= +>>>>>>> upstream-jun } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/GC.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/GC.NativeAot.cs index 079bd82cf0bf..6f6aec5df683 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/GC.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/GC.NativeAot.cs @@ -141,6 +141,11 @@ public static void Collect(int generation, GCCollectionMode mode, bool blocking) } public static void Collect(int generation, GCCollectionMode mode, bool blocking, bool compacting) + { + Collect(generation, mode, blocking, compacting, lowMemoryPressure: false); + } + + internal static void Collect(int generation, GCCollectionMode mode, bool blocking, bool compacting, bool lowMemoryPressure) { ArgumentOutOfRangeException.ThrowIfNegative(generation); @@ -186,7 +191,7 @@ public static void Collect(int generation, GCCollectionMode mode, bool blocking, iInternalModes |= (int)InternalGCCollectionMode.NonBlocking; } - RuntimeImports.RhCollect(generation, (InternalGCCollectionMode)iInternalModes); + RuntimeImports.RhCollect(generation, (InternalGCCollectionMode)iInternalModes, lowMemoryPressure); } /// @@ -305,7 +310,7 @@ private unsafe struct NoGCRegionCallbackFinalizerWorkItem public bool scheduled; public bool abandoned; - public GCHandle action; + public GCHandle action; } public static unsafe void RegisterNoGCRegionCallback(long totalSize, Action callback) @@ -317,7 +322,7 @@ public static unsafe void RegisterNoGCRegionCallback(long totalSize, Action call try { pWorkItem = (NoGCRegionCallbackFinalizerWorkItem*)NativeMemory.AllocZeroed((nuint)sizeof(NoGCRegionCallbackFinalizerWorkItem)); - pWorkItem->action = GCHandle.Alloc(callback); + pWorkItem->action = new GCHandle(callback); pWorkItem->callback = &Callback; EnableNoGCRegionCallbackStatus status = (EnableNoGCRegionCallbackStatus)RuntimeImports.RhEnableNoGCRegionCallback(pWorkItem, totalSize); @@ -347,14 +352,13 @@ static void Callback(NoGCRegionCallbackFinalizerWorkItem* pWorkItem) { Debug.Assert(pWorkItem->scheduled); if (!pWorkItem->abandoned) - ((Action)(pWorkItem->action.Target!))(); + pWorkItem->action.Target(); Free(pWorkItem); } static void Free(NoGCRegionCallbackFinalizerWorkItem* pWorkItem) { - if (pWorkItem->action.IsAllocated) - pWorkItem->action.Free(); + pWorkItem->action.Dispose(); NativeMemory.Free(pWorkItem); } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/InvokeUtils.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/InvokeUtils.cs index 61a2d5367c7c..9cae9eb91983 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/InvokeUtils.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/InvokeUtils.cs @@ -409,11 +409,8 @@ private static Exception CreateChangeTypeException(MethodTable* srcEEType, Metho } internal static ArgumentException CreateChangeTypeArgumentException(MethodTable* srcEEType, MethodTable* dstEEType, bool destinationIsByRef = false) - => CreateChangeTypeArgumentException(srcEEType, Type.GetTypeFromHandle(new RuntimeTypeHandle(dstEEType)), destinationIsByRef); - - internal static ArgumentException CreateChangeTypeArgumentException(MethodTable* srcEEType, Type dstType, bool destinationIsByRef = false) { - object? destinationTypeName = dstType; + object? destinationTypeName = Type.GetTypeFromHandle(new RuntimeTypeHandle(dstEEType)); if (destinationIsByRef) destinationTypeName += "&"; return new ArgumentException(SR.Format(SR.Arg_ObjObjEx, Type.GetTypeFromHandle(new RuntimeTypeHandle(srcEEType)), destinationTypeName)); diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs index 9f8dbe11a212..d241102d5070 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs @@ -29,7 +29,7 @@ public Type GetType() protected internal unsafe object MemberwiseClone() { object clone = this.GetMethodTable()->IsArray ? - RuntimeImports.RhNewArray(this.GetMethodTable(), Unsafe.As(this).Length) : + RuntimeImports.RhNewVariableSizeObject(this.GetMethodTable(), Unsafe.As(this).Length) : RuntimeImports.RhNewObject(this.GetMethodTable()); // copy contents of "this" to the clone diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/DynamicInvokeInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/DynamicInvokeInfo.cs index d7837bdc649e..00fed3ec6c6f 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/DynamicInvokeInfo.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/DynamicInvokeInfo.cs @@ -74,21 +74,20 @@ public unsafe DynamicInvokeInfo(MethodBase method, IntPtr invokeThunk) { Transform transform = default; - var argumentType = (RuntimeType)parameters[i].ParameterType; + Type argumentType = parameters[i].ParameterType; if (argumentType.IsByRef) { _needsCopyBack = true; transform |= Transform.ByRef; - argumentType = (RuntimeType)argumentType.GetElementType()!; + argumentType = argumentType.GetElementType()!; } Debug.Assert(!argumentType.IsByRef); - // This can return a null MethodTable for reference types. - // The compiler makes sure it returns a non-null MT for everything else. - MethodTable* eeArgumentType = argumentType.ToMethodTableMayBeNull(); - if (argumentType.IsValueType) + MethodTable* eeArgumentType = argumentType.TypeHandle.ToMethodTable(); + + if (eeArgumentType->IsValueType) { - Debug.Assert(eeArgumentType->IsValueType); + Debug.Assert(argumentType.IsValueType); if (eeArgumentType->IsByRefLike) _argumentCount = ArgumentCount_NotSupported_ByRefLike; @@ -96,15 +95,15 @@ public unsafe DynamicInvokeInfo(MethodBase method, IntPtr invokeThunk) if (eeArgumentType->IsNullable) transform |= Transform.Nullable; } - else if (argumentType.IsPointer) + else if (eeArgumentType->IsPointer) { - Debug.Assert(eeArgumentType->IsPointer); + Debug.Assert(argumentType.IsPointer); transform |= Transform.Pointer; } - else if (argumentType.IsFunctionPointer) + else if (eeArgumentType->IsFunctionPointer) { - Debug.Assert(eeArgumentType->IsFunctionPointer); + Debug.Assert(argumentType.IsFunctionPointer); transform |= Transform.FunctionPointer; } @@ -122,18 +121,19 @@ public unsafe DynamicInvokeInfo(MethodBase method, IntPtr invokeThunk) { Transform transform = default; - var returnType = (RuntimeType)methodInfo.ReturnType; + Type returnType = methodInfo.ReturnType; if (returnType.IsByRef) { transform |= Transform.ByRef; - returnType = (RuntimeType)returnType.GetElementType()!; + returnType = returnType.GetElementType()!; } Debug.Assert(!returnType.IsByRef); - MethodTable* eeReturnType = returnType.ToMethodTableMayBeNull(); - if (returnType.IsValueType) + MethodTable* eeReturnType = returnType.TypeHandle.ToMethodTable(); + + if (eeReturnType->IsValueType) { - Debug.Assert(eeReturnType->IsValueType); + Debug.Assert(returnType.IsValueType); if (returnType != typeof(void)) { @@ -152,17 +152,17 @@ public unsafe DynamicInvokeInfo(MethodBase method, IntPtr invokeThunk) _argumentCount = ArgumentCount_NotSupported; // ByRef to void return } } - else if (returnType.IsPointer) + else if (eeReturnType->IsPointer) { - Debug.Assert(eeReturnType->IsPointer); + Debug.Assert(returnType.IsPointer); transform |= Transform.Pointer; if ((transform & Transform.ByRef) == 0) transform |= Transform.AllocateReturnBox; } - else if (returnType.IsFunctionPointer) + else if (eeReturnType->IsFunctionPointer) { - Debug.Assert(eeReturnType->IsFunctionPointer); + Debug.Assert(returnType.IsFunctionPointer); transform |= Transform.FunctionPointer; if ((transform & Transform.ByRef) == 0) @@ -585,12 +585,6 @@ private unsafe ref byte InvokeDirectWithFewArguments( return defaultValue; } - private void ThrowForNeverValidNonNullArgument(MethodTable* srcEEType, int index) - { - Debug.Assert(index != 0 || _isStatic); - throw InvokeUtils.CreateChangeTypeArgumentException(srcEEType, Method.GetParametersAsSpan()[index - (_isStatic ? 0 : 1)].ParameterType, destinationIsByRef: false); - } - private unsafe void CheckArguments( Span copyOfParameters, void* byrefParameters, @@ -630,25 +624,16 @@ private unsafe void CheckArguments( MethodTable* srcEEType = arg.GetMethodTable(); MethodTable* dstEEType = argumentInfo.Type; - if (srcEEType != dstEEType) - { - // Destination type can be null if we don't have a MethodTable for this type. This means one cannot - // possibly pass a valid non-null object instance here. - if (dstEEType == null) - { - ThrowForNeverValidNonNullArgument(srcEEType, i); - } - - if (!(RuntimeImports.AreTypesAssignable(srcEEType, dstEEType) || - (dstEEType->IsInterface && arg is System.Runtime.InteropServices.IDynamicInterfaceCastable castable + if (!(srcEEType == dstEEType || + RuntimeImports.AreTypesAssignable(srcEEType, dstEEType) || + (dstEEType->IsInterface && arg is System.Runtime.InteropServices.IDynamicInterfaceCastable castable && castable.IsInterfaceImplemented(new RuntimeTypeHandle(dstEEType), throwIfNotImplemented: false)))) - { - // ByRefs have to be exact match - if ((argumentInfo.Transform & Transform.ByRef) != 0) - throw InvokeUtils.CreateChangeTypeArgumentException(srcEEType, argumentInfo.Type, destinationIsByRef: true); + { + // ByRefs have to be exact match + if ((argumentInfo.Transform & Transform.ByRef) != 0) + throw InvokeUtils.CreateChangeTypeArgumentException(srcEEType, argumentInfo.Type, destinationIsByRef: true); - arg = InvokeUtils.CheckArgumentConversions(arg, argumentInfo.Type, InvokeUtils.CheckArgumentSemantics.DynamicInvoke, binderBundle); - } + arg = InvokeUtils.CheckArgumentConversions(arg, argumentInfo.Type, InvokeUtils.CheckArgumentSemantics.DynamicInvoke, binderBundle); } if ((argumentInfo.Transform & Transform.Reference) == 0) @@ -707,25 +692,16 @@ private unsafe void CheckArguments( MethodTable* srcEEType = arg.GetMethodTable(); MethodTable* dstEEType = argumentInfo.Type; - if (srcEEType != dstEEType) - { - // Destination type can be null if we don't have a MethodTable for this type. This means one cannot - // possibly pass a valid non-null object instance here. - if (dstEEType == null) - { - ThrowForNeverValidNonNullArgument(srcEEType, i); - } - - if (!(RuntimeImports.AreTypesAssignable(srcEEType, dstEEType) || - (dstEEType->IsInterface && arg is System.Runtime.InteropServices.IDynamicInterfaceCastable castable + if (!(srcEEType == dstEEType || + RuntimeImports.AreTypesAssignable(srcEEType, dstEEType) || + (dstEEType->IsInterface && arg is System.Runtime.InteropServices.IDynamicInterfaceCastable castable && castable.IsInterfaceImplemented(new RuntimeTypeHandle(dstEEType), throwIfNotImplemented: false)))) - { - // ByRefs have to be exact match - if ((argumentInfo.Transform & Transform.ByRef) != 0) - throw InvokeUtils.CreateChangeTypeArgumentException(srcEEType, argumentInfo.Type, destinationIsByRef: true); + { + // ByRefs have to be exact match + if ((argumentInfo.Transform & Transform.ByRef) != 0) + throw InvokeUtils.CreateChangeTypeArgumentException(srcEEType, argumentInfo.Type, destinationIsByRef: true); - arg = InvokeUtils.CheckArgumentConversions(arg, argumentInfo.Type, InvokeUtils.CheckArgumentSemantics.DynamicInvoke, binderBundle: null); - } + arg = InvokeUtils.CheckArgumentConversions(arg, argumentInfo.Type, InvokeUtils.CheckArgumentSemantics.DynamicInvoke, binderBundle: null); } if ((argumentInfo.Transform & Transform.Reference) == 0) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs index 661f7f285352..bf8331776d7a 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs @@ -29,7 +29,7 @@ internal MethodInvoker(RuntimeConstructorInfo constructor) public static MethodInvoker Create(MethodBase method) { - ArgumentNullException.ThrowIfNull(method, nameof(method)); + ArgumentNullException.ThrowIfNull(method); if (method is RuntimeMethodInfo rmi) { diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseInsensitive.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseInsensitive.cs index a6a183fa73cd..20dbd1206ef7 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseInsensitive.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseInsensitive.cs @@ -72,36 +72,33 @@ private LowLevelDictionary CreateCaseInsensitiveTypeDictionary( LowLevelDictionary dict = new LowLevelDictionary(); - foreach (QScopeDefinition scope in AllScopes) + MetadataReader reader = Scope.Reader; + ScopeDefinition scopeDefinition = Scope.ScopeDefinition; + IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition }; + IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles); + foreach (NamespaceDefinitionHandle namespaceHandle in allNamespaceHandles) { - MetadataReader reader = scope.Reader; - ScopeDefinition scopeDefinition = scope.ScopeDefinition; - IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition }; - IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles); - foreach (NamespaceDefinitionHandle namespaceHandle in allNamespaceHandles) - { - string ns = namespaceHandle.ToNamespaceName(reader); - if (ns.Length != 0) - ns += "."; - ns = ns.ToLowerInvariant(); + string ns = namespaceHandle.ToNamespaceName(reader); + if (ns.Length != 0) + ns += "."; + ns = ns.ToLowerInvariant(); - NamespaceDefinition namespaceDefinition = namespaceHandle.GetNamespaceDefinition(reader); - foreach (TypeDefinitionHandle typeDefinitionHandle in namespaceDefinition.TypeDefinitions) + NamespaceDefinition namespaceDefinition = namespaceHandle.GetNamespaceDefinition(reader); + foreach (TypeDefinitionHandle typeDefinitionHandle in namespaceDefinition.TypeDefinitions) + { + string fullName = ns + typeDefinitionHandle.GetTypeDefinition(reader).Name.GetString(reader).ToLowerInvariant(); + if (!dict.TryGetValue(fullName, out _)) { - string fullName = ns + typeDefinitionHandle.GetTypeDefinition(reader).Name.GetString(reader).ToLowerInvariant(); - if (!dict.TryGetValue(fullName, out _)) - { - dict.Add(fullName, new QHandle(reader, typeDefinitionHandle)); - } + dict.Add(fullName, new QHandle(reader, typeDefinitionHandle)); } + } - foreach (TypeForwarderHandle typeForwarderHandle in namespaceDefinition.TypeForwarders) + foreach (TypeForwarderHandle typeForwarderHandle in namespaceDefinition.TypeForwarders) + { + string fullName = ns + typeForwarderHandle.GetTypeForwarder(reader).Name.GetString(reader).ToLowerInvariant(); + if (!dict.TryGetValue(fullName, out _)) { - string fullName = ns + typeForwarderHandle.GetTypeForwarder(reader).Name.GetString(reader).ToLowerInvariant(); - if (!dict.TryGetValue(fullName, out _)) - { - dict.Add(fullName, new QHandle(reader, typeForwarderHandle)); - } + dict.Add(fullName, new QHandle(reader, typeForwarderHandle)); } } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseSensitive.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseSensitive.cs index 34a045281fd5..e52605d1d45c 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseSensitive.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseSensitive.cs @@ -21,15 +21,12 @@ internal sealed override RuntimeTypeInfo UncachedGetTypeCoreCaseSensitive(string namespaceParts[numNamespaceParts - i - 1] = parts[i]; string name = parts[numNamespaceParts]; - foreach (QScopeDefinition scopeDefinition in AllScopes) - { - MetadataReader reader = scopeDefinition.Reader; - ScopeDefinitionHandle scopeDefinitionHandle = scopeDefinition.Handle; - - NamespaceDefinition namespaceDefinition; - if (!TryResolveNamespaceDefinitionCaseSensitive(reader, namespaceParts, scopeDefinitionHandle, out namespaceDefinition)) - continue; + MetadataReader reader = Scope.Reader; + ScopeDefinitionHandle scopeDefinitionHandle = Scope.Handle; + NamespaceDefinition namespaceDefinition; + if (TryResolveNamespaceDefinitionCaseSensitive(reader, namespaceParts, scopeDefinitionHandle, out namespaceDefinition)) + { // We've successfully drilled down the namespace chain. Now look for a top-level type matching the type name. TypeDefinitionHandleCollection candidateTypes = namespaceDefinition.TypeDefinitions; foreach (TypeDefinitionHandle candidateType in candidateTypes) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.cs index 3e26daa46725..2ca7967bcffc 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.cs @@ -20,21 +20,17 @@ namespace System.Reflection.Runtime.Assemblies.NativeFormat { internal sealed partial class NativeFormatRuntimeAssembly : RuntimeAssemblyInfo { - private NativeFormatRuntimeAssembly(MetadataReader reader, ScopeDefinitionHandle scope, IEnumerable overflowScopes) + private NativeFormatRuntimeAssembly(MetadataReader reader, ScopeDefinitionHandle scope) { Scope = new QScopeDefinition(reader, scope); - OverflowScopes = overflowScopes; } public sealed override IEnumerable CustomAttributes { get { - foreach (QScopeDefinition scope in AllScopes) - { - foreach (CustomAttributeData cad in RuntimeCustomAttributeData.GetCustomAttributes(scope.Reader, scope.ScopeDefinition.CustomAttributes)) - yield return cad; - } + foreach (CustomAttributeData cad in RuntimeCustomAttributeData.GetCustomAttributes(Scope.Reader, Scope.ScopeDefinition.CustomAttributes)) + yield return cad; } } @@ -43,17 +39,14 @@ public sealed override IEnumerable DefinedTypes [RequiresUnreferencedCode("Types might be removed")] get { - foreach (QScopeDefinition scope in AllScopes) - { - MetadataReader reader = scope.Reader; - ScopeDefinition scopeDefinition = scope.ScopeDefinition; - IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition }; - IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles); - IEnumerable allTopLevelTypes = reader.GetTopLevelTypes(allNamespaceHandles); - IEnumerable allTypes = reader.GetTransitiveTypes(allTopLevelTypes, publicOnly: false); - foreach (TypeDefinitionHandle typeDefinitionHandle in allTypes) - yield return (TypeInfo)typeDefinitionHandle.GetNamedType(reader).ToType(); - } + MetadataReader reader = Scope.Reader; + ScopeDefinition scopeDefinition = Scope.ScopeDefinition; + IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition }; + IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles); + IEnumerable allTopLevelTypes = reader.GetTopLevelTypes(allNamespaceHandles); + IEnumerable allTypes = reader.GetTransitiveTypes(allTopLevelTypes, publicOnly: false); + foreach (TypeDefinitionHandle typeDefinitionHandle in allTypes) + yield return (TypeInfo)typeDefinitionHandle.GetNamedType(reader).ToType(); } } @@ -62,17 +55,14 @@ public sealed override IEnumerable ExportedTypes [RequiresUnreferencedCode("Types might be removed")] get { - foreach (QScopeDefinition scope in AllScopes) - { - MetadataReader reader = scope.Reader; - ScopeDefinition scopeDefinition = scope.ScopeDefinition; - IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition }; - IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles); - IEnumerable allTopLevelTypes = reader.GetTopLevelTypes(allNamespaceHandles); - IEnumerable allTypes = reader.GetTransitiveTypes(allTopLevelTypes, publicOnly: true); - foreach (TypeDefinitionHandle typeDefinitionHandle in allTypes) - yield return typeDefinitionHandle.ResolveTypeDefinition(reader).ToType(); - } + MetadataReader reader = Scope.Reader; + ScopeDefinition scopeDefinition = Scope.ScopeDefinition; + IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition }; + IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles); + IEnumerable allTopLevelTypes = reader.GetTopLevelTypes(allNamespaceHandles); + IEnumerable allTypes = reader.GetTransitiveTypes(allTopLevelTypes, publicOnly: true); + foreach (TypeDefinitionHandle typeDefinitionHandle in allTypes) + yield return typeDefinitionHandle.ResolveTypeDefinition(reader).ToType(); } } @@ -80,22 +70,16 @@ public sealed override MethodInfo EntryPoint { get { - // The scope that defines metadata for the owning type of the entrypoint will be the one - // to carry the entrypoint token information. Find it by iterating over all scopes. + MetadataReader reader = Scope.Reader; - foreach (QScopeDefinition scope in AllScopes) + QualifiedMethodHandle entrypointHandle = Scope.ScopeDefinition.EntryPoint; + if (!entrypointHandle.IsNil) { - MetadataReader reader = scope.Reader; - - QualifiedMethodHandle entrypointHandle = scope.ScopeDefinition.EntryPoint; - if (!entrypointHandle.IsNil) - { - QualifiedMethod entrypointMethod = entrypointHandle.GetQualifiedMethod(reader); - TypeDefinitionHandle declaringTypeHandle = entrypointMethod.EnclosingType; - MethodHandle methodHandle = entrypointMethod.Method; - NativeFormatRuntimeNamedTypeInfo containingType = NativeFormatRuntimeNamedTypeInfo.GetRuntimeNamedTypeInfo(reader, declaringTypeHandle, default(RuntimeTypeHandle)); - return RuntimeNamedMethodInfo.GetRuntimeNamedMethodInfo(new NativeFormatMethodCommon(methodHandle, containingType, containingType), containingType); - } + QualifiedMethod entrypointMethod = entrypointHandle.GetQualifiedMethod(reader); + TypeDefinitionHandle declaringTypeHandle = entrypointMethod.EnclosingType; + MethodHandle methodHandle = entrypointMethod.Method; + NativeFormatRuntimeNamedTypeInfo containingType = NativeFormatRuntimeNamedTypeInfo.GetRuntimeNamedTypeInfo(reader, declaringTypeHandle, default(RuntimeTypeHandle)); + return RuntimeNamedMethodInfo.GetRuntimeNamedMethodInfo(new NativeFormatMethodCommon(methodHandle, containingType, containingType), containingType); } return null; @@ -106,25 +90,22 @@ protected sealed override IEnumerable TypeForwardInfos { get { - foreach (QScopeDefinition scope in AllScopes) + MetadataReader reader = Scope.Reader; + ScopeDefinition scopeDefinition = Scope.ScopeDefinition; + IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition }; + IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles); + foreach (NamespaceDefinitionHandle namespaceHandle in allNamespaceHandles) { - MetadataReader reader = scope.Reader; - ScopeDefinition scopeDefinition = scope.ScopeDefinition; - IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition }; - IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles); - foreach (NamespaceDefinitionHandle namespaceHandle in allNamespaceHandles) + string? namespaceName = null; + foreach (TypeForwarderHandle typeForwarderHandle in namespaceHandle.GetNamespaceDefinition(reader).TypeForwarders) { - string? namespaceName = null; - foreach (TypeForwarderHandle typeForwarderHandle in namespaceHandle.GetNamespaceDefinition(reader).TypeForwarders) - { - namespaceName ??= namespaceHandle.ToNamespaceName(reader); + namespaceName ??= namespaceHandle.ToNamespaceName(reader); - TypeForwarder typeForwarder = typeForwarderHandle.GetTypeForwarder(reader); - string typeName = typeForwarder.Name.GetString(reader); - RuntimeAssemblyName redirectedAssemblyName = typeForwarder.Scope.ToRuntimeAssemblyName(reader); + TypeForwarder typeForwarder = typeForwarderHandle.GetTypeForwarder(reader); + string typeName = typeForwarder.Name.GetString(reader); + RuntimeAssemblyName redirectedAssemblyName = typeForwarder.Scope.ToRuntimeAssemblyName(reader); - yield return new TypeForwardInfo(redirectedAssemblyName, namespaceName, typeName); - } + yield return new TypeForwardInfo(redirectedAssemblyName, namespaceName, typeName); } } } @@ -194,20 +175,5 @@ public sealed override int GetHashCode() } internal QScopeDefinition Scope { get; } - - internal IEnumerable OverflowScopes { get; } - - internal IEnumerable AllScopes - { - get - { - yield return Scope; - - foreach (QScopeDefinition overflowScope in OverflowScopes) - { - yield return overflowScope; - } - } - } } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/BindingFlagSupport/PropertyPolicies.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/BindingFlagSupport/PropertyPolicies.cs index 28eb0627c2fe..230c24b65c3b 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/BindingFlagSupport/PropertyPolicies.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/BindingFlagSupport/PropertyPolicies.cs @@ -33,7 +33,7 @@ public sealed override IEnumerable CoreGetDeclaredMembers(RuntimeT public sealed override void GetMemberAttributes(PropertyInfo member, out MethodAttributes visibility, out bool isStatic, out bool isVirtual, out bool isNewSlot) { - MethodInfo? accessorMethod = GetAccessorMethod(member); + MethodInfo? accessorMethod = GetMostAccessibleAccessor(member); if (accessorMethod == null) { // If we got here, this is a inherited PropertyInfo that only had private accessors and is now refusing to give them out @@ -99,5 +99,20 @@ public sealed override bool OkToIgnoreAmbiguity(PropertyInfo m1, PropertyInfo m2 return accessor; } + + private static MethodInfo? GetMostAccessibleAccessor(PropertyInfo property) + { + MethodInfo? getter = property.GetMethod; + MethodInfo? setter = property.SetMethod; + + if (getter == null) + return setter; + if (setter == null) + return getter; + + // Return the setter if it's more accessible, otherwise return the getter. + // MethodAttributes acessibility values are higher for more accessible methods: private (1) --> public (6). + return (setter.Attributes & MethodAttributes.MemberAccessMask) > (getter.Attributes & MethodAttributes.MemberAccessMask) ? setter : getter; + } } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/FieldInfos/NativeFormat/NativeFormatRuntimeFieldInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/FieldInfos/NativeFormat/NativeFormatRuntimeFieldInfo.cs index 1674cc9b18cf..fde55471130b 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/FieldInfos/NativeFormat/NativeFormatRuntimeFieldInfo.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/FieldInfos/NativeFormat/NativeFormatRuntimeFieldInfo.cs @@ -130,7 +130,7 @@ public sealed override RuntimeFieldHandle FieldHandle { return RuntimeAugments.TypeLoaderCallbacks.GetRuntimeFieldHandleForComponents( DeclaringType.TypeHandle, - Name); + _fieldHandle); } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/Dispensers.NativeFormat.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/Dispensers.NativeFormat.cs index 84e1c1c547d6..7c2486b4d88e 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/Dispensers.NativeFormat.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/Dispensers.NativeFormat.cs @@ -34,7 +34,7 @@ internal partial class RuntimeAssemblyInfo static partial void GetNativeFormatRuntimeAssembly(AssemblyBindResult bindResult, ref RuntimeAssembly? runtimeAssembly) { if (bindResult.Reader != null) - runtimeAssembly = NativeFormatRuntimeAssembly.GetRuntimeAssembly(bindResult.Reader, bindResult.ScopeDefinitionHandle, bindResult.OverflowScopes); + runtimeAssembly = NativeFormatRuntimeAssembly.GetRuntimeAssembly(bindResult.Reader, bindResult.ScopeDefinitionHandle); } } } @@ -43,9 +43,9 @@ namespace System.Reflection.Runtime.Assemblies.NativeFormat { internal sealed partial class NativeFormatRuntimeAssembly { - internal static RuntimeAssembly GetRuntimeAssembly(MetadataReader reader, ScopeDefinitionHandle scope, IEnumerable overflowScopes) + internal static RuntimeAssembly GetRuntimeAssembly(MetadataReader reader, ScopeDefinitionHandle scope) { - return s_scopeToAssemblyDispenser.GetOrAdd(new RuntimeAssemblyKey(reader, scope, overflowScopes)); + return s_scopeToAssemblyDispenser.GetOrAdd(new RuntimeAssemblyKey(reader, scope)); } private static readonly Dispenser s_scopeToAssemblyDispenser = @@ -53,26 +53,23 @@ internal static RuntimeAssembly GetRuntimeAssembly(MetadataReader reader, ScopeD DispenserScenario.Scope_Assembly, delegate (RuntimeAssemblyKey qScopeDefinition) { - return (RuntimeAssembly)new NativeFormat.NativeFormatRuntimeAssembly(qScopeDefinition.Reader, qScopeDefinition.Handle, qScopeDefinition.Overflows); + return (RuntimeAssembly)new NativeFormat.NativeFormatRuntimeAssembly(qScopeDefinition.Reader, qScopeDefinition.Handle); } ); //----------------------------------------------------------------------------------------------------------- - // Captures a qualified scope (a reader plus a handle) representing the canonical definition of an assembly, - // plus a set of "overflow" scopes representing additional pieces of the assembly. + // Captures a qualified scope (a reader plus a handle) representing the canonical definition of an assembly //----------------------------------------------------------------------------------------------------------- private struct RuntimeAssemblyKey : IEquatable { - public RuntimeAssemblyKey(MetadataReader reader, ScopeDefinitionHandle handle, IEnumerable overflows) + public RuntimeAssemblyKey(MetadataReader reader, ScopeDefinitionHandle handle) { _reader = reader; _handle = handle; - _overflows = overflows; } public MetadataReader Reader { get { return _reader; } } public ScopeDefinitionHandle Handle { get { return _handle; } } - public IEnumerable Overflows { get { return _overflows; } } public override bool Equals(object obj) { @@ -84,8 +81,6 @@ public override bool Equals(object obj) public bool Equals(RuntimeAssemblyKey other) { - // Equality depends only on the canonical definition of an assembly, not - // the overflows. if (!(_reader == other._reader)) return false; if (!(_handle.Equals(other._handle))) @@ -100,7 +95,6 @@ public override int GetHashCode() private readonly MetadataReader _reader; private readonly ScopeDefinitionHandle _handle; - private readonly IEnumerable _overflows; } } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/Dispensers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/Dispensers.cs index 060a59536ca8..bca23c27f721 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/Dispensers.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/Dispensers.cs @@ -38,42 +38,6 @@ internal static RuntimeAssembly GetRuntimeAssembly(RuntimeAssemblyName assemblyR return result; } - /// - /// Returns non-null or throws. - /// - internal static RuntimeAssembly GetRuntimeAssemblyFromByteArray(ReadOnlySpan rawAssembly, ReadOnlySpan pdbSymbolStore) - { - AssemblyBinder binder = ReflectionCoreExecution.ExecutionEnvironment.AssemblyBinder; - if (!binder.Bind(rawAssembly, pdbSymbolStore, out AssemblyBindResult bindResult, out Exception exception)) - { - if (exception != null) - throw exception; - else - throw new BadImageFormatException(); - } - - RuntimeAssembly result = GetRuntimeAssembly(bindResult); - return result; - } - - /// - /// Returns non-null or throws. - /// - internal static RuntimeAssembly GetRuntimeAssemblyFromPath(string assemblyPath) - { - AssemblyBinder binder = ReflectionCoreExecution.ExecutionEnvironment.AssemblyBinder; - if (!binder.Bind(assemblyPath, out AssemblyBindResult bindResult, out Exception exception)) - { - if (exception != null) - throw exception; - else - throw new BadImageFormatException(); - } - - RuntimeAssembly result = GetRuntimeAssembly(bindResult, assemblyPath); - return result; - } - /// /// Returns null if no assembly matches the assemblyRefName. Throws for other error cases. /// diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/ThunkedApis.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/ThunkedApis.cs index f547ee47a9f3..2389a9bc0d6a 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/ThunkedApis.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/ThunkedApis.cs @@ -95,6 +95,8 @@ internal abstract partial class RuntimeConstructorInfo public sealed override bool IsSecurityCritical => true; public sealed override bool IsSecuritySafeCritical => false; public sealed override bool IsSecurityTransparent => false; + + public sealed override bool IsCollectible => false; } } @@ -105,6 +107,8 @@ internal abstract partial class RuntimeEventInfo public sealed override MethodInfo GetAddMethod(bool nonPublic) => AddMethod.FilterAccessor(nonPublic); public sealed override MethodInfo GetRemoveMethod(bool nonPublic) => RemoveMethod.FilterAccessor(nonPublic); public sealed override MethodInfo GetRaiseMethod(bool nonPublic) => RaiseMethod?.FilterAccessor(nonPublic); + + public sealed override bool IsCollectible => false; } } @@ -119,6 +123,16 @@ internal abstract partial class RuntimeMethodInfo public sealed override bool IsSecurityCritical => true; public sealed override bool IsSecuritySafeCritical => false; public sealed override bool IsSecurityTransparent => false; + + public sealed override bool IsCollectible => false; + } +} + +namespace System.Reflection.Runtime.FieldInfos +{ + internal abstract partial class RuntimeFieldInfo + { + public sealed override bool IsCollectible => false; } } @@ -145,6 +159,8 @@ public sealed override MethodInfo[] GetAccessors(bool nonPublic) accessors[index++] = setter; return accessors; } + + public sealed override bool IsCollectible => false; } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/TypeUnifier.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/TypeUnifier.cs index cc1855f53408..7cd2e9588f51 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/TypeUnifier.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/General/TypeUnifier.cs @@ -37,42 +37,16 @@ namespace System.Reflection.Runtime.General { internal static partial class TypeUnifier { - [FeatureSwitchDefinition("System.Reflection.IsTypeConstructionEagerlyValidated")] - // This can be replaced at native compile time using a feature switch. - internal static bool IsTypeConstructionEagerlyValidated => true; - public static RuntimeTypeInfo GetArrayType(this RuntimeTypeInfo elementType) { return RuntimeArrayTypeInfo.GetArrayTypeInfo(elementType, multiDim: false, rank: 1); } - public static RuntimeTypeInfo GetArrayTypeWithTypeHandle(this RuntimeTypeInfo elementType) - { - return RuntimeArrayTypeInfo.GetArrayTypeInfo(elementType, multiDim: false, rank: 1).WithVerifiedTypeHandle(elementType); - } - public static RuntimeTypeInfo GetMultiDimArrayType(this RuntimeTypeInfo elementType, int rank) { return RuntimeArrayTypeInfo.GetArrayTypeInfo(elementType, multiDim: true, rank: rank); } - public static RuntimeTypeInfo GetMultiDimArrayTypeWithTypeHandle(this RuntimeTypeInfo elementType, int rank) - { - return RuntimeArrayTypeInfo.GetArrayTypeInfo(elementType, multiDim: true, rank: rank).WithVerifiedTypeHandle(elementType); - } - - private static RuntimeArrayTypeInfo WithVerifiedTypeHandle(this RuntimeArrayTypeInfo arrayType, RuntimeTypeInfo elementType) - { - // We only permit creating parameterized types if the pay-for-play policy specifically allows them *or* if the result - // type would be an open type. - RuntimeTypeHandle typeHandle = arrayType.InternalTypeHandleIfAvailable; - if (IsTypeConstructionEagerlyValidated - && typeHandle.IsNull() && !elementType.ContainsGenericParameters) - throw ReflectionCoreExecution.ExecutionEnvironment.CreateMissingMetadataException(arrayType.ToType()); - - return arrayType; - } - public static RuntimeTypeInfo GetByRefType(this RuntimeTypeInfo targetType) { return RuntimeByRefTypeInfo.GetByRefTypeInfo(targetType); @@ -88,29 +62,9 @@ public static RuntimeTypeInfo GetConstructedGenericTypeNoConstraintCheck(this Ru return RuntimeConstructedGenericTypeInfo.GetRuntimeConstructedGenericTypeInfoNoConstraintCheck(genericTypeDefinition, genericTypeArguments); } - public static RuntimeTypeInfo GetConstructedGenericTypeWithTypeHandle(this RuntimeTypeInfo genericTypeDefinition, RuntimeTypeInfo[] genericTypeArguments) + public static RuntimeTypeInfo GetConstructedGenericType(this RuntimeTypeInfo genericTypeDefinition, RuntimeTypeInfo[] genericTypeArguments) { - return RuntimeConstructedGenericTypeInfo.GetRuntimeConstructedGenericTypeInfo(genericTypeDefinition, genericTypeArguments).WithVerifiedTypeHandle(genericTypeArguments); - } - - private static RuntimeConstructedGenericTypeInfo WithVerifiedTypeHandle(this RuntimeConstructedGenericTypeInfo genericType, RuntimeTypeInfo[] genericTypeArguments) - { - // We only permit creating parameterized types if the pay-for-play policy specifically allows them *or* if the result - // type would be an open type. - RuntimeTypeHandle typeHandle = genericType.InternalTypeHandleIfAvailable; - if (IsTypeConstructionEagerlyValidated && typeHandle.IsNull()) - { - bool atLeastOneOpenType = false; - foreach (RuntimeTypeInfo genericTypeArgument in genericTypeArguments) - { - if (genericTypeArgument.ContainsGenericParameters) - atLeastOneOpenType = true; - } - if (!atLeastOneOpenType) - throw ReflectionCoreExecution.ExecutionEnvironment.CreateMissingMetadataException(genericType.ToType()); - } - - return genericType; + return RuntimeConstructedGenericTypeInfo.GetRuntimeConstructedGenericTypeInfo(genericTypeDefinition, genericTypeArguments); } public static RuntimeTypeInfo GetRuntimeTypeInfoForRuntimeTypeHandle(this RuntimeTypeHandle typeHandle) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/NativeFormat/NativeFormatMethodCommon.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/NativeFormat/NativeFormatMethodCommon.cs index e07f23d3fcc2..ad37847fc02c 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/NativeFormat/NativeFormatMethodCommon.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/NativeFormat/NativeFormatMethodCommon.cs @@ -201,12 +201,9 @@ public RuntimeMethodHandle GetRuntimeMethodHandle(Type[] genericArgs) genericArgHandles = null; } - TypeManagerHandle typeManager = RuntimeAugments.TypeLoaderCallbacks.GetModuleForMetadataReader(Reader); - return RuntimeAugments.TypeLoaderCallbacks.GetRuntimeMethodHandleForComponents( DeclaringType.TypeHandle, - Name, - RuntimeSignature.CreateFromMethodHandle(typeManager, MethodHandle.AsInt()), + _methodHandle, genericArgHandles); } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/RuntimeNamedMethodInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/RuntimeNamedMethodInfo.cs index a0a726a4607e..b8ae37e69816 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/RuntimeNamedMethodInfo.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/RuntimeNamedMethodInfo.cs @@ -144,7 +144,9 @@ public sealed override MethodInfo MakeGenericMethod(params Type[] typeArguments) if (typeArguments.Length != GenericTypeParameters.Length) throw new ArgumentException(SR.Format(SR.Argument_NotEnoughGenArguments, typeArguments.Length, GenericTypeParameters.Length)); RuntimeMethodInfo methodInfo = (RuntimeMethodInfo)RuntimeConstructedGenericMethodInfo.GetRuntimeConstructedGenericMethodInfo(this, genericTypeArguments); - MethodBaseInvoker _ = methodInfo.MethodInvoker; // For compatibility with other Make* apis, trigger any missing metadata exceptions now rather than later. + + ReflectionCoreExecution.ExecutionEnvironment.ValidateGenericMethodConstraints(methodInfo); + return methodInfo; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/TypeInfos/RuntimeTypeInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/TypeInfos/RuntimeTypeInfo.cs index 3a1c3718a009..da86ebb4499c 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/TypeInfos/RuntimeTypeInfo.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/TypeInfos/RuntimeTypeInfo.cs @@ -404,14 +404,14 @@ public Type MakeArrayType() // Do not implement this as a call to MakeArrayType(1) - they are not interchangeable. MakeArrayType() returns a // vector type ("SZArray") while MakeArrayType(1) returns a multidim array of rank 1. These are distinct types // in the ECMA model and in CLR Reflection. - return this.GetArrayTypeWithTypeHandle().ToType(); + return this.GetArrayType().ToType(); } public Type MakeArrayType(int rank) { if (rank <= 0) throw new IndexOutOfRangeException(); - return this.GetMultiDimArrayTypeWithTypeHandle(rank).ToType(); + return this.GetMultiDimArrayType(rank).ToType(); } public Type MakePointerType() @@ -475,7 +475,7 @@ public Type MakeGenericType(Type[] typeArguments) throw new TypeLoadException(SR.CannotUseByRefLikeTypeInInstantiation); } - return this.GetConstructedGenericTypeWithTypeHandle(runtimeTypeArguments!).ToType(); + return this.GetConstructedGenericType(runtimeTypeArguments!).ToType(); } public Type DeclaringType diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/TypeNameResolver.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/TypeNameResolver.NativeAot.cs index 4c9356e0ad53..f9ac7699a905 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/TypeNameResolver.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/TypeNameResolver.NativeAot.cs @@ -75,13 +75,13 @@ internal partial struct TypeNameResolver bool ignoreCase, Assembly topLevelAssembly) { - TypeName? parsed = TypeNameParser.Parse(typeName, throwOnError); + TypeName? parsed = TypeNameParser.Parse(typeName, throwOnError, new() { IsAssemblyGetType = true }); if (parsed is null) { return null; } - else if (topLevelAssembly is not null && parsed.AssemblyName is not null) + else if (parsed.AssemblyName is not null) { return throwOnError ? throw new ArgumentException(SR.Argument_AssemblyGetTypeCannotSpecifyAssembly) : null; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.NativeAot.cs index b30bfd88ef08..be875ba1731c 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.NativeAot.cs @@ -322,14 +322,7 @@ public static unsafe object GetUninitializedObject( throw new NotSupportedException(SR.NotSupported_ByRefLike); } - Debug.Assert(MethodTable.Of()->NumVtableSlots > 0); - if (mt->NumVtableSlots == 0) - { - // This is a type without a vtable or GCDesc. We must not allow creating an instance of it - throw ReflectionCoreExecution.ExecutionEnvironment.CreateMissingMetadataException(type); - } - // Paranoid check: not-meant-for-GC-heap types should be reliably identifiable by empty vtable. - Debug.Assert(!mt->ContainsGCPointers || RuntimeImports.RhGetGCDescSize(mt) != 0); + RuntimeAugments.EnsureMethodTableSafeToAllocate(mt); if (mt->IsNullable) { @@ -364,13 +357,7 @@ public static unsafe object GetUninitializedObject( if (mt->ElementType == EETypeElementType.Void || mt->IsGenericTypeDefinition || mt->IsByRef || mt->IsPointer || mt->IsFunctionPointer) throw new ArgumentException(SR.Arg_TypeNotSupported); - if (mt->NumVtableSlots == 0) - { - // This is a type without a vtable or GCDesc. We must not allow creating an instance of it - throw ReflectionCoreExecution.ExecutionEnvironment.CreateMissingMetadataException(Type.GetTypeFromHandle(type)); - } - // Paranoid check: not-meant-for-GC-heap types should be reliably identifiable by empty vtable. - Debug.Assert(!mt->ContainsGCPointers || RuntimeImports.RhGetGCDescSize(mt) != 0); + RuntimeAugments.EnsureMethodTableSafeToAllocate(mt); if (!mt->IsValueType) { diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InitializeFinalizerThread.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InitializeFinalizerThread.cs deleted file mode 100644 index d0021229b752..000000000000 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InitializeFinalizerThread.cs +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Threading; - -namespace System.Runtime -{ - internal static class FinalizerInitRunner - { - // Here, we are subscribing to a callback from the runtime. This callback is made from the finalizer - // thread before any objects are finalized. - [RuntimeExport("InitializeFinalizerThread")] - public static void DoInitialize() - { - // Make sure that the finalizer thread is CoInitialized before any objects are finalized. If this - // fails, it will throw an exception and that will go unhandled, triggering a FailFast. - Thread.InitializeComForFinalizerThread(); - } - } -} diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComAwareWeakReference.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComAwareWeakReference.NativeAot.cs new file mode 100644 index 000000000000..6e630e096fd7 --- /dev/null +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComAwareWeakReference.NativeAot.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; + +namespace System +{ + internal sealed partial class ComAwareWeakReference + { + internal static unsafe object? ComWeakRefToObject(IntPtr pComWeakRef, object? context) + { + return ComWeakRefToComWrappersObject(pComWeakRef, context); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static unsafe bool PossiblyComObject(object target) + { + return PossiblyComWrappersObject(target); + } + + internal static unsafe IntPtr ObjectToComWeakRef(object target, out object? context) + { + return ComWrappersObjectToComWeakRef(target, out context); + } + } +} diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.NativeAot.cs index 38105ec0984f..08fd22bac275 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.NativeAot.cs @@ -18,1220 +18,28 @@ namespace System.Runtime.InteropServices /// public abstract partial class ComWrappers { - private const int TrackerRefShift = 32; - private const ulong TrackerRefCounter = 1UL << TrackerRefShift; - private const ulong DestroySentinel = 0x0000000080000000UL; - private const ulong TrackerRefCountMask = 0xffffffff00000000UL; - private const ulong ComRefCountMask = 0x000000007fffffffUL; - private const int COR_E_ACCESSING_CCW = unchecked((int)0x80131544); - - internal static IntPtr DefaultIUnknownVftblPtr { get; } = CreateDefaultIUnknownVftbl(); - internal static IntPtr TaggedImplVftblPtr { get; } = CreateTaggedImplVftbl(); - internal static IntPtr DefaultIReferenceTrackerTargetVftblPtr { get; } = CreateDefaultIReferenceTrackerTargetVftbl(); - - internal static readonly Guid IID_IUnknown = new Guid(0x00000000, 0x0000, 0x0000, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46); - internal static readonly Guid IID_IReferenceTrackerTarget = new Guid(0x64bd43f8, 0xbfee, 0x4ec4, 0xb7, 0xeb, 0x29, 0x35, 0x15, 0x8d, 0xae, 0x21); - internal static readonly Guid IID_TaggedImpl = new Guid(0x5c13e51c, 0x4f32, 0x4726, 0xa3, 0xfd, 0xf3, 0xed, 0xd6, 0x3d, 0xa3, 0xa0); - internal static readonly Guid IID_IReferenceTracker = new Guid(0x11D3B13A, 0x180E, 0x4789, 0xA8, 0xBE, 0x77, 0x12, 0x88, 0x28, 0x93, 0xE6); - internal static readonly Guid IID_IReferenceTrackerHost = new Guid(0x29a71c6a, 0x3c42, 0x4416, 0xa3, 0x9d, 0xe2, 0x82, 0x5a, 0x7, 0xa7, 0x73); - internal static readonly Guid IID_IReferenceTrackerManager = new Guid(0x3cf184b4, 0x7ccb, 0x4dda, 0x84, 0x55, 0x7e, 0x6c, 0xe9, 0x9a, 0x32, 0x98); - internal static readonly Guid IID_IFindReferenceTargetsCallback = new Guid(0x04b3486c, 0x4687, 0x4229, 0x8d, 0x14, 0x50, 0x5a, 0xb5, 0x84, 0xdd, 0x88); - - private static readonly Guid IID_IInspectable = new Guid(0xAF86E2E0, 0xB12D, 0x4c6a, 0x9C, 0x5A, 0xD7, 0xAA, 0x65, 0x10, 0x1E, 0x90); - private static readonly Guid IID_IWeakReferenceSource = new Guid(0x00000038, 0, 0, 0xC0, 0, 0, 0, 0, 0, 0, 0x46); - - private static readonly ConditionalWeakTable s_nativeObjectWrapperTable = new ConditionalWeakTable(); - private static readonly GCHandleSet s_referenceTrackerNativeObjectWrapperCache = new GCHandleSet(); - - private readonly ConditionalWeakTable _managedObjectWrapperTable = new ConditionalWeakTable(); - private readonly RcwCache _rcwCache = new(); - - internal static bool TryGetComInstanceForIID(object obj, Guid iid, out IntPtr unknown, out long wrapperId) - { - if (obj == null - || !s_nativeObjectWrapperTable.TryGetValue(obj, out NativeObjectWrapper? wrapper)) - { - unknown = IntPtr.Zero; - wrapperId = 0; - return false; - } - - wrapperId = wrapper.ComWrappers.id; - return Marshal.QueryInterface(wrapper.ExternalComObject, iid, out unknown) == HResults.S_OK; - } - - public static unsafe bool TryGetComInstance(object obj, out IntPtr unknown) - { - unknown = IntPtr.Zero; - if (obj == null - || !s_nativeObjectWrapperTable.TryGetValue(obj, out NativeObjectWrapper? wrapper)) - { - return false; - } - - return Marshal.QueryInterface(wrapper.ExternalComObject, IID_IUnknown, out unknown) == HResults.S_OK; - } - - public static unsafe bool TryGetObject(IntPtr unknown, [NotNullWhen(true)] out object? obj) - { - obj = null; - if (unknown == IntPtr.Zero) - { - return false; - } - - ComInterfaceDispatch* comInterfaceDispatch = TryGetComInterfaceDispatch(unknown); - if (comInterfaceDispatch == null || - ComInterfaceDispatch.ToManagedObjectWrapper(comInterfaceDispatch)->MarkedToDestroy) - { - return false; - } - - obj = ComInterfaceDispatch.GetInstance(comInterfaceDispatch); - return true; - } - - /// - /// ABI for function dispatch of a COM interface. - /// - public unsafe partial struct ComInterfaceDispatch - { - /// - /// Given a from a generated Vtable, convert to the target type. - /// - /// Desired type. - /// Pointer supplied to Vtable function entry. - /// Instance of type associated with dispatched function call. - public static unsafe T GetInstance(ComInterfaceDispatch* dispatchPtr) where T : class - { - ManagedObjectWrapper* comInstance = ToManagedObjectWrapper(dispatchPtr); - return Unsafe.As(comInstance->Holder.WrappedObject); - } - - internal static unsafe ManagedObjectWrapper* ToManagedObjectWrapper(ComInterfaceDispatch* dispatchPtr) - { - return ((InternalComInterfaceDispatch*)dispatchPtr)->_thisPtr; - } - } - - internal unsafe struct InternalComInterfaceDispatch - { - public IntPtr Vtable; - internal ManagedObjectWrapper* _thisPtr; - } - - internal enum CreateComInterfaceFlagsEx - { - None = 0, - - /// - /// The caller will provide an IUnknown Vtable. - /// - /// - /// This is useful in scenarios when the caller has no need to rely on an IUnknown instance - /// that is used when running managed code is not possible (i.e. during a GC). In traditional - /// COM scenarios this is common, but scenarios involving Reference Tracker hosting - /// calling of the IUnknown API during a GC is possible. - /// - CallerDefinedIUnknown = 1, - - /// - /// Flag used to indicate the COM interface should implement IReferenceTrackerTarget. - /// When this flag is passed, the resulting COM interface will have an internal implementation of IUnknown - /// and as such none should be supplied by the caller. - /// - TrackerSupport = 2, - - LacksICustomQueryInterface = 1 << 29, - IsComActivated = 1 << 30, - IsPegged = 1 << 31, - - InternalMask = IsPegged | IsComActivated | LacksICustomQueryInterface, - } - - internal unsafe struct ManagedObjectWrapper - { - public volatile IntPtr HolderHandle; // This is GC Handle - public ulong RefCount; - - public int UserDefinedCount; - public ComInterfaceEntry* UserDefined; - internal InternalComInterfaceDispatch* Dispatches; - - internal CreateComInterfaceFlagsEx Flags; - - public bool IsRooted - { - get - { - ulong refCount = Interlocked.Read(ref RefCount); - bool rooted = GetComCount(refCount) > 0; - if (!rooted) - { - rooted = GetTrackerCount(refCount) > 0 && - ((Flags & CreateComInterfaceFlagsEx.IsPegged) != 0 || TrackerObjectManager.s_isGlobalPeggingOn); - } - return rooted; - } - } - - public ManagedObjectWrapperHolder? Holder - { - get - { - IntPtr handle = HolderHandle; - if (handle == IntPtr.Zero) - return null; - else - return Unsafe.As(GCHandle.FromIntPtr(handle).Target); - } - } - - public readonly bool MarkedToDestroy => IsMarkedToDestroy(RefCount); - - public uint AddRef() - { - return GetComCount(Interlocked.Increment(ref RefCount)); - } - - public uint Release() - { - Debug.Assert(GetComCount(RefCount) != 0); - return GetComCount(Interlocked.Decrement(ref RefCount)); - } - - public uint AddRefFromReferenceTracker() - { - ulong prev; - ulong curr; - do - { - prev = RefCount; - curr = prev + TrackerRefCounter; - } while (Interlocked.CompareExchange(ref RefCount, curr, prev) != prev); - - return GetTrackerCount(curr); - } - - public uint ReleaseFromReferenceTracker() - { - Debug.Assert(GetTrackerCount(RefCount) != 0); - ulong prev; - ulong curr; - do - { - prev = RefCount; - curr = prev - TrackerRefCounter; - } - while (Interlocked.CompareExchange(ref RefCount, curr, prev) != prev); - - // If we observe the destroy sentinel, then this release - // must destroy the wrapper. - if (curr == DestroySentinel) - Destroy(); - - return GetTrackerCount(curr); - } - - public uint Peg() - { - SetFlag(CreateComInterfaceFlagsEx.IsPegged); - return HResults.S_OK; - } - - public uint Unpeg() - { - ResetFlag(CreateComInterfaceFlagsEx.IsPegged); - return HResults.S_OK; - } - - - public unsafe int QueryInterfaceForTracker(in Guid riid, out IntPtr ppvObject) - { - if (IsMarkedToDestroy(RefCount) || Holder is null) - { - ppvObject = IntPtr.Zero; - return COR_E_ACCESSING_CCW; - } - - return QueryInterface(in riid, out ppvObject); - } - - public unsafe int QueryInterface(in Guid riid, out IntPtr ppvObject) - { - ppvObject = AsRuntimeDefined(in riid); - if (ppvObject == IntPtr.Zero) - { - if ((Flags & CreateComInterfaceFlagsEx.LacksICustomQueryInterface) == 0) - { - var customQueryInterface = Holder.WrappedObject as ICustomQueryInterface; - if (customQueryInterface is null) - { - SetFlag(CreateComInterfaceFlagsEx.LacksICustomQueryInterface); - } - else - { - Guid riidLocal = riid; - switch (customQueryInterface.GetInterface(ref riidLocal, out ppvObject)) - { - case CustomQueryInterfaceResult.Handled: - return HResults.S_OK; - case CustomQueryInterfaceResult.NotHandled: - break; - case CustomQueryInterfaceResult.Failed: - return HResults.COR_E_INVALIDCAST; - } - } - } - - ppvObject = AsUserDefined(in riid); - if (ppvObject == IntPtr.Zero) - return HResults.COR_E_INVALIDCAST; - } - - AddRef(); - return HResults.S_OK; - } - - public IntPtr As(in Guid riid) - { - // Find target interface and return dispatcher or null if not found. - IntPtr typeMaybe = AsRuntimeDefined(in riid); - if (typeMaybe == IntPtr.Zero) - typeMaybe = AsUserDefined(in riid); - - return typeMaybe; - } - - /// true if actually destroyed - public unsafe bool Destroy() - { - Debug.Assert(GetComCount(RefCount) == 0 || HolderHandle == IntPtr.Zero); - - if (HolderHandle == IntPtr.Zero) - { - // We either were previously destroyed or multiple ManagedObjectWrapperHolder - // were created by the ConditionalWeakTable for the same object and we lost the race. - return true; - } - - ulong prev, refCount; - do - { - prev = RefCount; - refCount = prev | DestroySentinel; - } while (Interlocked.CompareExchange(ref RefCount, refCount, prev) != prev); - - if (refCount == DestroySentinel) - { - IntPtr handle = Interlocked.Exchange(ref HolderHandle, IntPtr.Zero); - if (handle != IntPtr.Zero) - { - RuntimeImports.RhHandleFree(handle); - } - return true; - } - else - { - return false; - } - } - - private unsafe IntPtr AsRuntimeDefined(in Guid riid) - { - // The order of interface lookup here is important. - // See CreateCCW() for the expected order. - int i = UserDefinedCount; - if ((Flags & CreateComInterfaceFlagsEx.CallerDefinedIUnknown) == 0) - { - if (riid == IID_IUnknown) - { - return (IntPtr)(Dispatches + i); - } - - i++; - } - - if ((Flags & CreateComInterfaceFlagsEx.TrackerSupport) != 0) - { - if (riid == IID_IReferenceTrackerTarget) - { - return (IntPtr)(Dispatches + i); - } - - i++; - } - - { - if (riid == IID_TaggedImpl) - { - return (IntPtr)(Dispatches + i); - } - } - - return IntPtr.Zero; - } - - private unsafe IntPtr AsUserDefined(in Guid riid) - { - for (int i = 0; i < UserDefinedCount; ++i) - { - if (UserDefined[i].IID == riid) - { - return (IntPtr)(Dispatches + i); - } - } - - return IntPtr.Zero; - } - - private void SetFlag(CreateComInterfaceFlagsEx flag) - { - int setMask = (int)flag; - Interlocked.Or(ref Unsafe.As(ref Flags), setMask); - } - - private void ResetFlag(CreateComInterfaceFlagsEx flag) - { - int resetMask = ~(int)flag; - Interlocked.And(ref Unsafe.As(ref Flags), resetMask); - } - - private static uint GetTrackerCount(ulong c) - { - return (uint)((c & TrackerRefCountMask) >> TrackerRefShift); - } - - private static uint GetComCount(ulong c) - { - return (uint)(c & ComRefCountMask); - } - - private static bool IsMarkedToDestroy(ulong c) - { - return (c & DestroySentinel) != 0; - } - } - - internal sealed unsafe class ManagedObjectWrapperHolder - { - static ManagedObjectWrapperHolder() - { - delegate* unmanaged callback = &IsRootedCallback; - if (!RuntimeImports.RhRegisterRefCountedHandleCallback((nint)callback, MethodTable.Of())) - { - throw new OutOfMemoryException(); - } - } - - [UnmanagedCallersOnly] - private static bool IsRootedCallback(IntPtr pObj) - { - // We are paused in the GC, so this is safe. - ManagedObjectWrapperHolder* holder = (ManagedObjectWrapperHolder*)&pObj; - return holder->_wrapper->IsRooted; - } - - private readonly ManagedObjectWrapper* _wrapper; - private readonly ManagedObjectWrapperReleaser _releaser; - private readonly object _wrappedObject; - - public ManagedObjectWrapperHolder(ManagedObjectWrapper* wrapper, object wrappedObject) - { - _wrapper = wrapper; - _wrappedObject = wrappedObject; - _releaser = new ManagedObjectWrapperReleaser(wrapper); - _wrapper->HolderHandle = RuntimeImports.RhHandleAllocRefCounted(this); - } - - public unsafe IntPtr ComIp => _wrapper->As(in ComWrappers.IID_IUnknown); - - public object WrappedObject => _wrappedObject; - - public uint AddRef() => _wrapper->AddRef(); - } - - internal sealed unsafe class ManagedObjectWrapperReleaser - { - private ManagedObjectWrapper* _wrapper; - - public ManagedObjectWrapperReleaser(ManagedObjectWrapper* wrapper) - { - _wrapper = wrapper; - } - - ~ManagedObjectWrapperReleaser() - { - IntPtr refCountedHandle = _wrapper->HolderHandle; - if (refCountedHandle != IntPtr.Zero && RuntimeImports.RhHandleGet(refCountedHandle) != null) - { - // The ManagedObjectWrapperHolder has not been fully collected, so it is still - // potentially reachable via the Conditional Weak Table. - // Keep ourselves alive in case the wrapped object is resurrected. - GC.ReRegisterForFinalize(this); - return; - } - - // Release GC handle created when MOW was built. - if (_wrapper->Destroy()) - { - NativeMemory.Free(_wrapper); - _wrapper = null; - } - else - { - // There are still outstanding references on the COM side. - // This case should only be hit when an outstanding - // tracker refcount exists from AddRefFromReferenceTracker. - GC.ReRegisterForFinalize(this); - } - } - } - - internal unsafe class NativeObjectWrapper - { - private IntPtr _externalComObject; - private IntPtr _inner; - private ComWrappers _comWrappers; - private GCHandle _proxyHandle; - private GCHandle _proxyHandleTrackingResurrection; - private readonly bool _aggregatedManagedObjectWrapper; - private readonly bool _uniqueInstance; - - static NativeObjectWrapper() - { - // Registering the weak reference support callbacks to enable - // consulting ComWrappers when weak references are created - // for RCWs. - ComAwareWeakReference.InitializeCallbacks(&ComWeakRefToObject, &PossiblyComObject, &ObjectToComWeakRef); - } - - public static NativeObjectWrapper Create(IntPtr externalComObject, IntPtr inner, ComWrappers comWrappers, object comProxy, CreateObjectFlags flags) - { - if (flags.HasFlag(CreateObjectFlags.TrackerObject) && - Marshal.QueryInterface(externalComObject, IID_IReferenceTracker, out IntPtr trackerObject) == HResults.S_OK) - { - return new ReferenceTrackerNativeObjectWrapper(externalComObject, inner, comWrappers, comProxy, flags, trackerObject); - } - else - { - return new NativeObjectWrapper(externalComObject, inner, comWrappers, comProxy, flags); - } - } - - protected NativeObjectWrapper(IntPtr externalComObject, IntPtr inner, ComWrappers comWrappers, object comProxy, CreateObjectFlags flags) - { - _externalComObject = externalComObject; - _inner = inner; - _comWrappers = comWrappers; - _uniqueInstance = flags.HasFlag(CreateObjectFlags.UniqueInstance); - _proxyHandle = GCHandle.Alloc(comProxy, GCHandleType.Weak); - - // We have a separate handle tracking resurrection as we want to make sure - // we clean up the NativeObjectWrapper only after the RCW has been finalized - // due to it can access the native object in the finalizer. At the same time, - // we want other callers which are using ProxyHandle such as the reference tracker runtime - // to see the object as not alive once it is eligible for finalization. - _proxyHandleTrackingResurrection = GCHandle.Alloc(comProxy, GCHandleType.WeakTrackResurrection); - - // If this is an aggregation scenario and the identity object - // is a managed object wrapper, we need to call Release() to - // indicate this external object isn't rooted. In the event the - // object is passed out to native code an AddRef() must be called - // based on COM convention and will "fix" the count. - _aggregatedManagedObjectWrapper = flags.HasFlag(CreateObjectFlags.Aggregation) && TryGetComInterfaceDispatch(_externalComObject) != null; - if (_aggregatedManagedObjectWrapper) - { - Marshal.Release(externalComObject); - } - } - - internal IntPtr ExternalComObject => _externalComObject; - internal ComWrappers ComWrappers => _comWrappers; - internal GCHandle ProxyHandle => _proxyHandle; - internal bool IsUniqueInstance => _uniqueInstance; - internal bool IsAggregatedWithManagedObjectWrapper => _aggregatedManagedObjectWrapper; - - public virtual void Release() - { - if (!_uniqueInstance && _comWrappers is not null) - { - _comWrappers._rcwCache.Remove(_externalComObject, this); - _comWrappers = null; - } - - if (_proxyHandle.IsAllocated) - { - _proxyHandle.Free(); - } - - if (_proxyHandleTrackingResurrection.IsAllocated) - { - _proxyHandleTrackingResurrection.Free(); - } - - // If the inner was supplied, we need to release our reference. - if (_inner != IntPtr.Zero) - { - Marshal.Release(_inner); - _inner = IntPtr.Zero; - } - - _externalComObject = IntPtr.Zero; - } - - ~NativeObjectWrapper() - { - if (_proxyHandleTrackingResurrection.IsAllocated && _proxyHandleTrackingResurrection.Target != null) - { - // The RCW object has not been fully collected, so it still - // can make calls on the native object in its finalizer. - // Keep ourselves alive until it is finalized. - GC.ReRegisterForFinalize(this); - return; - } - - Release(); - } - } - - internal sealed class ReferenceTrackerNativeObjectWrapper : NativeObjectWrapper - { - private IntPtr _trackerObject; - private readonly bool _releaseTrackerObject; - private int _trackerObjectDisconnected; // Atomic boolean, so using int. - internal readonly IntPtr _contextToken; - internal readonly GCHandle _nativeObjectWrapperWeakHandle; - - public IntPtr TrackerObject => (_trackerObject == IntPtr.Zero || _trackerObjectDisconnected == 1) ? IntPtr.Zero : _trackerObject; - - public ReferenceTrackerNativeObjectWrapper( - nint externalComObject, - nint inner, - ComWrappers comWrappers, - object comProxy, - CreateObjectFlags flags, - IntPtr trackerObject) - : base(externalComObject, inner, comWrappers, comProxy, flags) - { - Debug.Assert(flags.HasFlag(CreateObjectFlags.TrackerObject)); - Debug.Assert(trackerObject != IntPtr.Zero); - - _trackerObject = trackerObject; - _releaseTrackerObject = true; - - TrackerObjectManager.OnIReferenceTrackerFound(_trackerObject); - TrackerObjectManager.AfterWrapperCreated(_trackerObject); - - if (flags.HasFlag(CreateObjectFlags.Aggregation)) - { - // Aggregation with an IReferenceTracker instance creates an extra AddRef() - // on the outer (e.g. MOW) so we clean up that issue here. - _releaseTrackerObject = false; - IReferenceTracker.ReleaseFromTrackerSource(_trackerObject); // IReferenceTracker - Marshal.Release(_trackerObject); - } - - _contextToken = GetContextToken(); - _nativeObjectWrapperWeakHandle = GCHandle.Alloc(this, GCHandleType.Weak); - } - - public override void Release() - { - // Remove the entry from the cache that keeps track of the active NativeObjectWrappers. - if (_nativeObjectWrapperWeakHandle.IsAllocated) - { - s_referenceTrackerNativeObjectWrapperCache.Remove(_nativeObjectWrapperWeakHandle); - _nativeObjectWrapperWeakHandle.Free(); - } - - DisconnectTracker(); - - base.Release(); - } - - public void DisconnectTracker() - { - // Return if already disconnected or the tracker isn't set. - if (_trackerObject == IntPtr.Zero || Interlocked.CompareExchange(ref _trackerObjectDisconnected, 1, 0) != 0) - { - return; - } - - // Always release the tracker source during a disconnect. - // This to account for the implied IUnknown ownership by the runtime. - IReferenceTracker.ReleaseFromTrackerSource(_trackerObject); // IUnknown - - // Disconnect from the tracker. - if (_releaseTrackerObject) - { - IReferenceTracker.ReleaseFromTrackerSource(_trackerObject); // IReferenceTracker - Marshal.Release(_trackerObject); - _trackerObject = IntPtr.Zero; - } - } - } - - /// - /// Globally registered instance of the ComWrappers class for reference tracker support. - /// - private static ComWrappers? s_globalInstanceForTrackerSupport; - - /// - /// Globally registered instance of the ComWrappers class for marshalling. - /// - private static ComWrappers? s_globalInstanceForMarshalling; - - private static long s_instanceCounter; - private readonly long id = Interlocked.Increment(ref s_instanceCounter); - - internal static object? GetOrCreateObjectFromWrapper(long wrapperId, IntPtr externalComObject) - { - if (s_globalInstanceForTrackerSupport != null && s_globalInstanceForTrackerSupport.id == wrapperId) - { - return s_globalInstanceForTrackerSupport.GetOrCreateObjectForComInstance(externalComObject, CreateObjectFlags.TrackerObject); - } - else if (s_globalInstanceForMarshalling != null && s_globalInstanceForMarshalling.id == wrapperId) - { - return ComObjectForInterface(externalComObject); - } - else - { - return null; - } - } - - // Custom type instead of a value tuple to avoid rooting 'ITuple' and other value tuple stuff - private struct GetOrCreateComInterfaceForObjectParameters - { - public ComWrappers? This; - public CreateComInterfaceFlags Flags; - } - - /// - /// Create a COM representation of the supplied object that can be passed to a non-managed environment. - /// - /// The managed object to expose outside the .NET runtime. - /// Flags used to configure the generated interface. - /// The generated COM interface that can be passed outside the .NET runtime. - /// - /// If a COM representation was previously created for the specified using - /// this instance, the previously created COM interface will be returned. - /// If not, a new one will be created. - /// - public unsafe IntPtr GetOrCreateComInterfaceForObject(object instance, CreateComInterfaceFlags flags) - { - ArgumentNullException.ThrowIfNull(instance); - - ManagedObjectWrapperHolder managedObjectWrapper = _managedObjectWrapperTable.GetOrAdd(instance, static (c, items) => - { - ManagedObjectWrapper* value = items.This!.CreateManagedObjectWrapper(c, items.Flags); - return new ManagedObjectWrapperHolder(value, c); - }, new GetOrCreateComInterfaceForObjectParameters { This = this, Flags = flags }); - - managedObjectWrapper.AddRef(); - return managedObjectWrapper.ComIp; - } - - private unsafe ManagedObjectWrapper* CreateManagedObjectWrapper(object instance, CreateComInterfaceFlags flags) - { - ComInterfaceEntry* userDefined = ComputeVtables(instance, flags, out int userDefinedCount); - if ((userDefined == null && userDefinedCount != 0) || userDefinedCount < 0) - { - throw new ArgumentException(); - } - - // Maximum number of runtime supplied vtables. - Span runtimeDefinedVtable = stackalloc IntPtr[3]; - int runtimeDefinedCount = 0; - - // Check if the caller will provide the IUnknown table. - if ((flags & CreateComInterfaceFlags.CallerDefinedIUnknown) == CreateComInterfaceFlags.None) - { - runtimeDefinedVtable[runtimeDefinedCount++] = DefaultIUnknownVftblPtr; - } - - if ((flags & CreateComInterfaceFlags.TrackerSupport) != 0) - { - runtimeDefinedVtable[runtimeDefinedCount++] = DefaultIReferenceTrackerTargetVftblPtr; - } - - { - runtimeDefinedVtable[runtimeDefinedCount++] = TaggedImplVftblPtr; - } - - // Compute size for ManagedObjectWrapper instance. - int totalDefinedCount = runtimeDefinedCount + userDefinedCount; - - // Allocate memory for the ManagedObjectWrapper. - IntPtr wrapperMem = (IntPtr)NativeMemory.Alloc( - (nuint)sizeof(ManagedObjectWrapper) + (nuint)totalDefinedCount * (nuint)sizeof(InternalComInterfaceDispatch)); - - // Compute the dispatch section offset and ensure it is aligned. - ManagedObjectWrapper* mow = (ManagedObjectWrapper*)wrapperMem; - - // Dispatches follow immediately after ManagedObjectWrapper - InternalComInterfaceDispatch* pDispatches = (InternalComInterfaceDispatch*)(wrapperMem + sizeof(ManagedObjectWrapper)); - for (int i = 0; i < totalDefinedCount; i++) - { - pDispatches[i].Vtable = (i < userDefinedCount) ? userDefined[i].Vtable : runtimeDefinedVtable[i - userDefinedCount]; - pDispatches[i]._thisPtr = mow; - } - - mow->HolderHandle = IntPtr.Zero; - mow->RefCount = 0; - mow->UserDefinedCount = userDefinedCount; - mow->UserDefined = userDefined; - mow->Flags = (CreateComInterfaceFlagsEx)flags; - mow->Dispatches = pDispatches; - return mow; - } - - /// - /// Get the currently registered managed object or creates a new managed object and registers it. - /// - /// Object to import for usage into the .NET runtime. - /// Flags used to describe the external object. - /// Returns a managed object associated with the supplied external COM object. - /// - /// If a managed object was previously created for the specified - /// using this instance, the previously created object will be returned. - /// If not, a new one will be created. - /// - public object GetOrCreateObjectForComInstance(IntPtr externalComObject, CreateObjectFlags flags) - { - object? obj; - if (!TryGetOrCreateObjectForComInstanceInternal(externalComObject, IntPtr.Zero, flags, null, out obj)) - throw new ArgumentNullException(nameof(externalComObject)); - - return obj; - } - - /// - /// Get the currently registered managed object or uses the supplied managed object and registers it. - /// - /// Object to import for usage into the .NET runtime. - /// Flags used to describe the external object. - /// The to be used as the wrapper for the external object - /// Returns a managed object associated with the supplied external COM object. - /// - /// If the instance already has an associated external object a will be thrown. - /// - public object GetOrRegisterObjectForComInstance(IntPtr externalComObject, CreateObjectFlags flags, object wrapper) - { - return GetOrRegisterObjectForComInstance(externalComObject, flags, wrapper, IntPtr.Zero); - } - - /// - /// Get the currently registered managed object or uses the supplied managed object and registers it. - /// - /// Object to import for usage into the .NET runtime. - /// Flags used to describe the external object. - /// The to be used as the wrapper for the external object - /// Inner for COM aggregation scenarios - /// Returns a managed object associated with the supplied external COM object. - /// - /// This method override is for registering an aggregated COM instance with its associated inner. The inner - /// will be released when the associated wrapper is eventually freed. Note that it will be released on a thread - /// in an unknown apartment state. If the supplied inner is not known to be a free-threaded instance then - /// it is advised to not supply the inner. - /// - /// If the instance already has an associated external object a will be thrown. - /// - public object GetOrRegisterObjectForComInstance(IntPtr externalComObject, CreateObjectFlags flags, object wrapper, IntPtr inner) - { - ArgumentNullException.ThrowIfNull(wrapper); - - object? obj; - if (!TryGetOrCreateObjectForComInstanceInternal(externalComObject, inner, flags, wrapper, out obj)) - throw new ArgumentNullException(nameof(externalComObject)); - - return obj; - } - - private static unsafe ComInterfaceDispatch* TryGetComInterfaceDispatch(IntPtr comObject) - { - // If the first Vtable entry is part of a ManagedObjectWrapper impl, - // we know how to interpret the IUnknown. - IntPtr knownQI = ((IntPtr*)((IntPtr*)comObject)[0])[0]; - if (knownQI != ((IntPtr*)DefaultIUnknownVftblPtr)[0] - || knownQI != ((IntPtr*)DefaultIReferenceTrackerTargetVftblPtr)[0]) - { - // It is possible the user has defined their own IUnknown impl so - // we fallback to the tagged interface approach to be sure. - if (0 != Marshal.QueryInterface(comObject, IID_TaggedImpl, out nint implMaybe)) - { - return null; - } - - IntPtr currentVersion = (IntPtr)(delegate* unmanaged)&ITaggedImpl_IsCurrentVersion; - int hr = ((delegate* unmanaged)(*(*(void***)implMaybe + 3 /* ITaggedImpl.IsCurrentVersion slot */)))(implMaybe, currentVersion); - Marshal.Release(implMaybe); - if (hr != 0) - { - return null; - } - } - - return (ComInterfaceDispatch*)comObject; - } - - private static void DetermineIdentityAndInner( - IntPtr externalComObject, - IntPtr innerMaybe, - CreateObjectFlags flags, - out IntPtr identity, - out IntPtr inner) - { - inner = innerMaybe; - - IntPtr checkForIdentity = externalComObject; - - // Check if the flags indicate we are creating - // an object for an external IReferenceTracker instance - // that we are aggregating with. - bool refTrackerInnerScenario = flags.HasFlag(CreateObjectFlags.TrackerObject) - && flags.HasFlag(CreateObjectFlags.Aggregation); - if (refTrackerInnerScenario && - Marshal.QueryInterface(externalComObject, IID_IReferenceTracker, out IntPtr referenceTrackerPtr) == HResults.S_OK) - { - // We are checking the supplied external value - // for IReferenceTracker since in .NET 5 API usage scenarios - // this could actually be the inner and we want the true identity - // not the inner . This is a trick since the only way - // to get identity from an inner is through a non-IUnknown - // interface QI. Once we have the IReferenceTracker - // instance we can be sure the QI for IUnknown will really - // be the true identity. - using ComHolder referenceTracker = new ComHolder(referenceTrackerPtr); - checkForIdentity = referenceTrackerPtr; - Marshal.ThrowExceptionForHR(Marshal.QueryInterface(checkForIdentity, IID_IUnknown, out identity)); - } - else - { - Marshal.ThrowExceptionForHR(Marshal.QueryInterface(externalComObject, IID_IUnknown, out identity)); - } - - // Set the inner if scenario dictates an update. - if (innerMaybe == IntPtr.Zero && // User didn't supply inner - .NET 5 API scenario sanity check. - checkForIdentity != externalComObject && // Target of check was changed - .NET 5 API scenario sanity check. - externalComObject != identity && // The supplied object doesn't match the computed identity. - refTrackerInnerScenario) // The appropriate flags were set. - { - inner = externalComObject; - } - } - - /// - /// Get the currently registered managed object or creates a new managed object and registers it. - /// - /// Object to import for usage into the .NET runtime. - /// The inner instance if aggregation is involved - /// Flags used to describe the external object. - /// The to be used as the wrapper for the external object. - /// The managed object associated with the supplied external COM object or null if it could not be created. - /// Returns true if a managed object could be retrieved/created, false otherwise - private unsafe bool TryGetOrCreateObjectForComInstanceInternal( - IntPtr externalComObject, - IntPtr innerMaybe, - CreateObjectFlags flags, - object? wrapperMaybe, - [NotNullWhen(true)] out object? retValue) - { - if (externalComObject == IntPtr.Zero) - throw new ArgumentNullException(nameof(externalComObject)); - - if (innerMaybe != IntPtr.Zero && !flags.HasFlag(CreateObjectFlags.Aggregation)) - throw new InvalidOperationException(SR.InvalidOperation_SuppliedInnerMustBeMarkedAggregation); - - DetermineIdentityAndInner( - externalComObject, - innerMaybe, - flags, - out IntPtr identity, - out IntPtr inner); - - using ComHolder releaseIdentity = new ComHolder(identity); - - // If the user has requested a unique instance, - // we will immediately create the object, register it, - // and return. - if (flags.HasFlag(CreateObjectFlags.UniqueInstance)) - { - retValue = CreateAndRegisterObjectForComInstance(identity, inner, flags); - return retValue is not null; - } - - // If we have a live cached wrapper currently, - // return that. - if (_rcwCache.FindProxyForComInstance(identity) is object liveCachedWrapper) - { - retValue = liveCachedWrapper; - return true; - } - - // If the user tried to provide a pre-created managed wrapper, try to register - // that object as the wrapper. - if (wrapperMaybe is not null) - { - retValue = RegisterObjectForComInstance(identity, inner, wrapperMaybe, flags); - return retValue is not null; - } - - // Check if the provided COM instance is actually a managed object wrapper from this - // ComWrappers instance, and use it if it is. - if (flags.HasFlag(CreateObjectFlags.Unwrap)) - { - ComInterfaceDispatch* comInterfaceDispatch = TryGetComInterfaceDispatch(identity); - if (comInterfaceDispatch != null) - { - // If we found a managed object wrapper in this ComWrappers instance - // and it has the same identity pointer as the one we're creating a NativeObjectWrapper for, - // unwrap it. We don't AddRef the wrapper as we don't take a reference to it. - // - // A managed object can have multiple managed object wrappers, with a max of one per context. - // Let's say we have a managed object A and ComWrappers instances C1 and C2. Let B1 and B2 be the - // managed object wrappers for A created with C1 and C2 respectively. - // If we are asked to create an EOC for B1 with the unwrap flag on the C2 ComWrappers instance, - // we will create a new wrapper. In this scenario, we'll only unwrap B2. - object unwrapped = ComInterfaceDispatch.GetInstance(comInterfaceDispatch); - if (_managedObjectWrapperTable.TryGetValue(unwrapped, out ManagedObjectWrapperHolder? unwrappedWrapperInThisContext)) - { - // The unwrapped object has a CCW in this context. Compare with identity - // so we can see if it's the CCW for the unwrapped object in this context. - if (unwrappedWrapperInThisContext.ComIp == identity) - { - retValue = unwrapped; - return true; - } - } - } - } - - // If the user didn't provide a wrapper and couldn't unwrap a managed object wrapper, - // create a new wrapper. - retValue = CreateAndRegisterObjectForComInstance(identity, inner, flags); - return retValue is not null; - } - - private object? CreateAndRegisterObjectForComInstance(IntPtr identity, IntPtr inner, CreateObjectFlags flags) - { - object? retValue = CreateObject(identity, flags); - if (retValue is null) - { - // If ComWrappers instance cannot create wrapper, we can do nothing here. - return null; - } - - return RegisterObjectForComInstance(identity, inner, retValue, flags); - } - - private object RegisterObjectForComInstance(IntPtr identity, IntPtr inner, object comProxy, CreateObjectFlags flags) - { - NativeObjectWrapper nativeObjectWrapper = NativeObjectWrapper.Create( - identity, - inner, - this, - comProxy, - flags); - - object actualProxy = comProxy; - NativeObjectWrapper actualWrapper = nativeObjectWrapper; - if (!nativeObjectWrapper.IsUniqueInstance) - { - // Add our entry to the cache here, using an already existing entry if someone else beat us to it. - (actualWrapper, actualProxy) = _rcwCache.GetOrAddProxyForComInstance(identity, nativeObjectWrapper, comProxy); - if (actualWrapper != nativeObjectWrapper) - { - // We raced with another thread to map identity to nativeObjectWrapper - // and lost the race. We will use the other thread's nativeObjectWrapper, so we can release ours. - nativeObjectWrapper.Release(); - } - } - - // At this point, actualProxy is the RCW object for the identity - // and actualWrapper is the NativeObjectWrapper that is in the RCW cache (if not unique) that associates the identity with actualProxy. - // Register the NativeObjectWrapper to handle lifetime tracking of the references to the COM object. - RegisterWrapperForObject(actualWrapper, actualProxy); - - return actualProxy; - } - - private void RegisterWrapperForObject(NativeObjectWrapper wrapper, object comProxy) - { - // When we call into RegisterWrapperForObject, there is only one valid non-"unique instance" wrapper for a given - // COM instance, which is already registered in the RCW cache. - // If we find a wrapper in the table that is a different NativeObjectWrapper instance - // then it must be for a different COM instance. - // It's possible that we could race here with another thread that is trying to register the same comProxy - // for the same COM instance, but in that case we'll be passed the same NativeObjectWrapper instance - // for both threads. In that case, it doesn't matter which thread adds the entry to the NativeObjectWrapper table - // as the entry is always the same pair. - Debug.Assert(wrapper.ProxyHandle.Target == comProxy); - Debug.Assert(wrapper.IsUniqueInstance || _rcwCache.FindProxyForComInstance(wrapper.ExternalComObject) == comProxy); - - // Add the input wrapper bound to the COM proxy, if there isn't one already. If another thread raced - // against this one and this lost, we'd get the wrapper added from that thread instead. - NativeObjectWrapper registeredWrapper = s_nativeObjectWrapperTable.GetOrAdd(comProxy, wrapper); - - // We lost the race, so we cannot register the incoming wrapper with the target object - if (registeredWrapper != wrapper) - { - Debug.Assert(registeredWrapper.ExternalComObject != wrapper.ExternalComObject); - wrapper.Release(); - throw new NotSupportedException(); - } - - // Always register our wrapper to the reference tracker handle cache here. - // We may not be the thread that registered the handle, but we need to ensure that the wrapper - // is registered before we return to user code. Otherwise the wrapper won't be walked by the - // TrackerObjectManager and we could end up missing a section of the object graph. - // This cache deduplicates, so it is okay that the wrapper will be registered multiple times. - AddWrapperToReferenceTrackerHandleCache(registeredWrapper); - } - - private static void AddWrapperToReferenceTrackerHandleCache(NativeObjectWrapper wrapper) - { - if (wrapper is ReferenceTrackerNativeObjectWrapper referenceTrackerNativeObjectWrapper) - { - s_referenceTrackerNativeObjectWrapperCache.Add(referenceTrackerNativeObjectWrapper._nativeObjectWrapperWeakHandle); - } - } - - private sealed class RcwCache - { - private readonly Lock _lock = new Lock(useTrivialWaits: true); - private readonly Dictionary _cache = []; - - /// - /// Gets the current RCW proxy object for if it exists in the cache or inserts a new entry with . - /// - /// The com instance we want to get or record an RCW for. - /// The for . - /// The proxy object that is associated with . - /// The proxy object currently in the cache for or the proxy object owned by if no entry exists and the corresponding native wrapper. - public (NativeObjectWrapper actualWrapper, object actualProxy) GetOrAddProxyForComInstance(IntPtr comPointer, NativeObjectWrapper wrapper, object comProxy) - { - lock (_lock) - { - Debug.Assert(wrapper.ProxyHandle.Target == comProxy); - ref GCHandle rcwEntry = ref CollectionsMarshal.GetValueRefOrAddDefault(_cache, comPointer, out bool exists); - if (!exists) - { - // Someone else didn't beat us to adding the entry to the cache. - // Add our entry here. - rcwEntry = GCHandle.Alloc(wrapper, GCHandleType.Weak); - } - else if (rcwEntry.Target is not (NativeObjectWrapper cachedWrapper)) - { - Debug.Assert(rcwEntry.IsAllocated); - // The target was collected, so we need to update the cache entry. - rcwEntry.Target = wrapper; - } - else - { - object? existingProxy = cachedWrapper.ProxyHandle.Target; - // The target NativeObjectWrapper was not collected, but we need to make sure - // that the proxy object is still alive. - if (existingProxy is not null) - { - // The existing proxy object is still alive, we will use that. - return (cachedWrapper, existingProxy); - } - - // The proxy object was collected, so we need to update the cache entry. - rcwEntry.Target = wrapper; - } - - // We either added an entry to the cache or updated an existing entry that was dead. - // Return our target object. - return (wrapper, comProxy); - } - } - - public object? FindProxyForComInstance(IntPtr comPointer) - { - lock (_lock) - { - if (_cache.TryGetValue(comPointer, out GCHandle existingHandle)) - { - if (existingHandle.Target is NativeObjectWrapper { ProxyHandle.Target: object cachedProxy }) - { - // The target exists and is still alive. Return it. - return cachedProxy; - } - - // The target was collected, so we need to remove the entry from the cache. - _cache.Remove(comPointer); - existingHandle.Free(); - } - - return null; - } - } - - public void Remove(IntPtr comPointer, NativeObjectWrapper wrapper) + internal sealed unsafe partial class ManagedObjectWrapperHolder + { + private static void RegisterIsRootedCallback() { - lock (_lock) + delegate* unmanaged callback = &IsRootedCallback; + if (!RuntimeImports.RhRegisterRefCountedHandleCallback((nint)callback, MethodTable.Of())) { - // TryGetOrCreateObjectForComInstanceInternal may have put a new entry into the cache - // in the time between the GC cleared the contents of the GC handle but before the - // NativeObjectWrapper finalizer ran. - // Only remove the entry if the target of the GC handle is the NativeObjectWrapper - // or is null (indicating that the corresponding NativeObjectWrapper has been scheduled for finalization). - if (_cache.TryGetValue(comPointer, out GCHandle cachedRef) - && (wrapper == cachedRef.Target - || cachedRef.Target is null)) - { - _cache.Remove(comPointer); - cachedRef.Free(); - } + throw new OutOfMemoryException(); } } - } - - /// - /// Register a instance to be used as the global instance for reference tracker support. - /// - /// Instance to register - /// - /// This function can only be called a single time. Subsequent calls to this function will result - /// in a being thrown. - /// - /// Scenarios where this global instance may be used are: - /// * Object tracking via the and flags. - /// - public static void RegisterForTrackerSupport(ComWrappers instance) - { - ArgumentNullException.ThrowIfNull(instance); - if (null != Interlocked.CompareExchange(ref s_globalInstanceForTrackerSupport, instance, null)) + [UnmanagedCallersOnly] + private static bool IsRootedCallback(IntPtr pObj) { - throw new InvalidOperationException(SR.InvalidOperation_ResetGlobalComWrappersInstance); + // We are paused in the GC, so this is safe. + ManagedObjectWrapperHolder* holder = (ManagedObjectWrapperHolder*)&pObj; + return holder->_wrapper->IsRooted; } - } - - /// - /// Register a instance to be used as the global instance for marshalling in the runtime. - /// - /// Instance to register - /// - /// This function can only be called a single time. Subsequent calls to this function will result - /// in a being thrown. - /// - /// Scenarios where this global instance may be used are: - /// * Usage of COM-related Marshal APIs - /// * P/Invokes with COM-related types - /// * COM activation - /// - [SupportedOSPlatformAttribute("windows")] - public static void RegisterForMarshalling(ComWrappers instance) - { - ArgumentNullException.ThrowIfNull(instance); - if (null != Interlocked.CompareExchange(ref s_globalInstanceForMarshalling, instance, null)) + private static IntPtr AllocateRefCountedHandle(ManagedObjectWrapperHolder holder) { - throw new InvalidOperationException(SR.InvalidOperation_ResetGlobalComWrappersInstance); + return RuntimeImports.RhHandleAllocRefCounted(holder); } } @@ -1243,153 +51,26 @@ public static void RegisterForMarshalling(ComWrappers instance) /// Function pointer to Release. public static unsafe void GetIUnknownImpl(out IntPtr fpQueryInterface, out IntPtr fpAddRef, out IntPtr fpRelease) { - fpQueryInterface = (IntPtr)(delegate* unmanaged)&ComWrappers.IUnknown_QueryInterface; - fpAddRef = RuntimeImports.RhGetIUnknownAddRef(); // Implemented in C/C++ to avoid GC transitions - fpRelease = (IntPtr)(delegate* unmanaged)&ComWrappers.IUnknown_Release; - } - - internal static IntPtr ComInterfaceForObject(object instance) - { - if (s_globalInstanceForMarshalling == null) - { - throw new NotSupportedException(SR.InvalidOperation_ComInteropRequireComWrapperInstance); - } - - return s_globalInstanceForMarshalling.GetOrCreateComInterfaceForObject(instance, CreateComInterfaceFlags.None); - } - - internal static unsafe IntPtr ComInterfaceForObject(object instance, Guid targetIID) - { - IntPtr unknownPtr = ComInterfaceForObject(instance); - IntPtr comObjectInterface; - ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)unknownPtr); - int resultCode = wrapper->QueryInterface(in targetIID, out comObjectInterface); - // We no longer need IUnknownPtr, release reference - Marshal.Release(unknownPtr); - if (resultCode != 0) - { - throw new InvalidCastException(); - } - - return comObjectInterface; - } - - internal static object ComObjectForInterface(IntPtr externalComObject) - { - if (s_globalInstanceForMarshalling == null) - { - throw new NotSupportedException(SR.InvalidOperation_ComInteropRequireComWrapperInstance); - } - - // TrackerObject support and unwrapping matches the built-in semantics that the global marshalling scenario mimics. - return s_globalInstanceForMarshalling.GetOrCreateObjectForComInstance(externalComObject, CreateObjectFlags.TrackerObject | CreateObjectFlags.Unwrap); - } - - internal static IntPtr GetOrCreateTrackerTarget(IntPtr externalComObject) - { - if (s_globalInstanceForTrackerSupport == null) - { - throw new NotSupportedException(SR.InvalidOperation_ComInteropRequireComWrapperTrackerInstance); - } - - object obj = s_globalInstanceForTrackerSupport.GetOrCreateObjectForComInstance(externalComObject, CreateObjectFlags.TrackerObject); - return s_globalInstanceForTrackerSupport.GetOrCreateComInterfaceForObject(obj, CreateComInterfaceFlags.TrackerSupport); - } - - internal static void ReleaseExternalObjectsFromCurrentThread() - { - if (s_globalInstanceForTrackerSupport == null) - { - throw new NotSupportedException(SR.InvalidOperation_ComInteropRequireComWrapperTrackerInstance); - } - - IntPtr contextToken = GetContextToken(); - - List objects = new List(); - - // Here we aren't part of a GC callback, so other threads can still be running - // who are adding and removing from the collection. This means we can possibly race - // with a handle being removed and freed and we can end up accessing a freed handle. - // To avoid this, we take a lock on modifications to the collection while we gather - // the objects. - using (s_referenceTrackerNativeObjectWrapperCache.ModificationLock.EnterScope()) - { - foreach (GCHandle weakNativeObjectWrapperHandle in s_referenceTrackerNativeObjectWrapperCache) - { - ReferenceTrackerNativeObjectWrapper? nativeObjectWrapper = Unsafe.As(weakNativeObjectWrapperHandle.Target); - if (nativeObjectWrapper != null && - nativeObjectWrapper._contextToken == contextToken) - { - object? target = nativeObjectWrapper.ProxyHandle.Target; - if (target != null) - { - objects.Add(target); - } - - // Separate the wrapper from the tracker runtime prior to - // passing them. - nativeObjectWrapper.DisconnectTracker(); - } - } - } - - s_globalInstanceForTrackerSupport.ReleaseObjects(objects); - } - - // Used during GC callback - internal static unsafe void WalkExternalTrackerObjects() - { - bool walkFailed = false; - - foreach (GCHandle weakNativeObjectWrapperHandle in s_referenceTrackerNativeObjectWrapperCache) - { - ReferenceTrackerNativeObjectWrapper? nativeObjectWrapper = Unsafe.As(weakNativeObjectWrapperHandle.Target); - if (nativeObjectWrapper != null && - nativeObjectWrapper.TrackerObject != IntPtr.Zero) - { - FindReferenceTargetsCallback.s_currentRootObjectHandle = nativeObjectWrapper.ProxyHandle; - if (IReferenceTracker.FindTrackerTargets(nativeObjectWrapper.TrackerObject, TrackerObjectManager.s_findReferencesTargetCallback) != HResults.S_OK) - { - walkFailed = true; - FindReferenceTargetsCallback.s_currentRootObjectHandle = default; - break; - } - FindReferenceTargetsCallback.s_currentRootObjectHandle = default; - } - } - - // Report whether walking failed or not. - if (walkFailed) - { - TrackerObjectManager.s_isGlobalPeggingOn = true; - } - IReferenceTrackerManager.FindTrackerTargetsCompleted(TrackerObjectManager.s_trackerManager, walkFailed); + fpQueryInterface = (IntPtr)(delegate* unmanaged[MemberFunction])&ComWrappers.IUnknown_QueryInterface; + fpAddRef = (IntPtr)(delegate*)&RuntimeImports.RhIUnknown_AddRef; // Implemented in C/C++ to avoid GC transitions + fpRelease = (IntPtr)(delegate* unmanaged[MemberFunction])&ComWrappers.IUnknown_Release; } - // Used during GC callback - internal static void DetachNonPromotedObjects() + internal static unsafe void GetUntrackedIUnknownImpl(out delegate* unmanaged[MemberFunction] fpAddRef, out delegate* unmanaged[MemberFunction] fpRelease) { - foreach (GCHandle weakNativeObjectWrapperHandle in s_referenceTrackerNativeObjectWrapperCache) - { - ReferenceTrackerNativeObjectWrapper? nativeObjectWrapper = Unsafe.As(weakNativeObjectWrapperHandle.Target); - if (nativeObjectWrapper != null && - nativeObjectWrapper.TrackerObject != IntPtr.Zero && - !RuntimeImports.RhIsPromoted(nativeObjectWrapper.ProxyHandle.Target)) - { - // Notify the wrapper it was not promoted and is being collected. - TrackerObjectManager.BeforeWrapperFinalized(nativeObjectWrapper.TrackerObject); - } - } + // Implemented in C/C++ to avoid GC transitions during shutdown + fpAddRef = (delegate* unmanaged[MemberFunction])(void*)(delegate*)&RuntimeImports.RhUntracked_AddRefRelease; + fpRelease = (delegate* unmanaged[MemberFunction])(void*)(delegate*)&RuntimeImports.RhUntracked_AddRefRelease; } - [UnmanagedCallersOnly] + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] internal static unsafe int IUnknown_QueryInterface(IntPtr pThis, Guid* guid, IntPtr* ppObject) { ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); return wrapper->QueryInterface(in *guid, out *ppObject); } - [UnmanagedCallersOnly] + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] internal static unsafe uint IUnknown_Release(IntPtr pThis) { ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); @@ -1397,530 +78,129 @@ internal static unsafe uint IUnknown_Release(IntPtr pThis) return refcount; } - [UnmanagedCallersOnly] - internal static unsafe int IReferenceTrackerTarget_QueryInterface(IntPtr pThis, Guid* guid, IntPtr* ppObject) - { - ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); - return wrapper->QueryInterfaceForTracker(in *guid, out *ppObject); - } - - [UnmanagedCallersOnly] - internal static unsafe uint IReferenceTrackerTarget_AddRefFromReferenceTracker(IntPtr pThis) - { - ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); - return wrapper->AddRefFromReferenceTracker(); - } - - [UnmanagedCallersOnly] - internal static unsafe uint IReferenceTrackerTarget_ReleaseFromReferenceTracker(IntPtr pThis) - { - ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); - return wrapper->ReleaseFromReferenceTracker(); - } - - [UnmanagedCallersOnly] - internal static unsafe uint IReferenceTrackerTarget_Peg(IntPtr pThis) - { - ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); - return wrapper->Peg(); - } - - [UnmanagedCallersOnly] - internal static unsafe uint IReferenceTrackerTarget_Unpeg(IntPtr pThis) - { - ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); - return wrapper->Unpeg(); - } - - [UnmanagedCallersOnly] - internal static unsafe int ITaggedImpl_IsCurrentVersion(IntPtr pThis, IntPtr version) - { - return version == (IntPtr)(delegate* unmanaged)&ITaggedImpl_IsCurrentVersion - ? HResults.S_OK - : HResults.E_FAIL; - } - - private static unsafe IntPtr CreateDefaultIUnknownVftbl() - { - IntPtr* vftbl = (IntPtr*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(ComWrappers), 3 * sizeof(IntPtr)); - GetIUnknownImpl(out vftbl[0], out vftbl[1], out vftbl[2]); - return (IntPtr)vftbl; - } - - // This IID represents an internal interface we define to tag any ManagedObjectWrappers we create. - // This interface type and GUID do not correspond to any public interface; it is an internal implementation detail. - private static unsafe IntPtr CreateTaggedImplVftbl() - { - IntPtr* vftbl = (IntPtr*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(ComWrappers), 4 * sizeof(IntPtr)); - GetIUnknownImpl(out vftbl[0], out vftbl[1], out vftbl[2]); - vftbl[3] = (IntPtr)(delegate* unmanaged)&ITaggedImpl_IsCurrentVersion; - return (IntPtr)vftbl; - } - - private static unsafe IntPtr CreateDefaultIReferenceTrackerTargetVftbl() - { - IntPtr* vftbl = (IntPtr*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(ComWrappers), 7 * sizeof(IntPtr)); - vftbl[0] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerTarget_QueryInterface; - GetIUnknownImpl(out _, out vftbl[1], out vftbl[2]); - vftbl[3] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerTarget_AddRefFromReferenceTracker; - vftbl[4] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerTarget_ReleaseFromReferenceTracker; - vftbl[5] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerTarget_Peg; - vftbl[6] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerTarget_Unpeg; - return (IntPtr)vftbl; - } - - [UnmanagedCallersOnly] - internal static unsafe int IReferenceTrackerHost_DisconnectUnusedReferenceSources(IntPtr pThis, uint flags) - { - try - { - // Defined in windows.ui.xaml.hosting.referencetracker.h. - const uint XAML_REFERENCETRACKER_DISCONNECT_SUSPEND = 0x00000001; - - if ((flags & XAML_REFERENCETRACKER_DISCONNECT_SUSPEND) != 0) - { - RuntimeImports.RhCollect(2, InternalGCCollectionMode.Blocking | InternalGCCollectionMode.Optimized, true); - } - else - { - GC.Collect(); - } - return HResults.S_OK; - } - catch (Exception e) - { - return Marshal.GetHRForException(e); - } - - } - - [UnmanagedCallersOnly] - internal static unsafe int IReferenceTrackerHost_ReleaseDisconnectedReferenceSources(IntPtr pThis) - { - // We'd like to call GC.WaitForPendingFinalizers() here, but this could lead to deadlock - // if the finalizer thread is trying to get back to this thread, because we are not pumping - // anymore. Disable this for now. See: https://github.com/dotnet/runtime/issues/109538. - return HResults.S_OK; - } - - [UnmanagedCallersOnly] - internal static unsafe int IReferenceTrackerHost_NotifyEndOfReferenceTrackingOnThread(IntPtr pThis) - { - try - { - ReleaseExternalObjectsFromCurrentThread(); - return HResults.S_OK; - } - catch (Exception e) - { - return Marshal.GetHRForException(e); - } - - } - - // Creates a proxy object (managed object wrapper) that points to the given IUnknown. - // The proxy represents the following: - // 1. Has a managed reference pointing to the external object - // and therefore forms a cycle that can be resolved by GC. - // 2. Forwards data binding requests. - // - // For example: - // NoCW = Native Object Com Wrapper also known as RCW - // - // Grid <---- NoCW Grid <-------- NoCW - // | ^ | ^ - // | | Becomes | | - // v | v | - // Rectangle Rectangle ----->Proxy - // - // Arguments - // obj - An IUnknown* where a NoCW points to (Grid, in this case) - // Notes: - // 1. We can either create a new NoCW or get back an old one from the cache. - // 2. This obj could be a regular tracker runtime object for data binding. - // ppNewReference - The IReferenceTrackerTarget* for the proxy created - // The tracker runtime will call IReferenceTrackerTarget to establish a reference. - // - [UnmanagedCallersOnly] - internal static unsafe int IReferenceTrackerHost_GetTrackerTarget(IntPtr pThis, IntPtr punk, IntPtr* ppNewReference) - { - if (punk == IntPtr.Zero) - { - return HResults.E_INVALIDARG; - } - - if (Marshal.QueryInterface(punk, IID_IUnknown, out IntPtr ppv) != HResults.S_OK) - { - return HResults.COR_E_INVALIDCAST; - } - - try - { - using ComHolder identity = new ComHolder(ppv); - using ComHolder trackerTarget = new ComHolder(GetOrCreateTrackerTarget(identity.Ptr)); - return Marshal.QueryInterface(trackerTarget.Ptr, IID_IReferenceTrackerTarget, out *ppNewReference); - } - catch (Exception e) - { - return Marshal.GetHRForException(e); - } - } - - [UnmanagedCallersOnly] - internal static unsafe int IReferenceTrackerHost_AddMemoryPressure(IntPtr pThis, long bytesAllocated) - { - try - { - GC.AddMemoryPressure(bytesAllocated); - return HResults.S_OK; - } - catch (Exception e) - { - return Marshal.GetHRForException(e); - } - } - - [UnmanagedCallersOnly] - internal static unsafe int IReferenceTrackerHost_RemoveMemoryPressure(IntPtr pThis, long bytesAllocated) - { - try - { - GC.RemoveMemoryPressure(bytesAllocated); - return HResults.S_OK; - } - catch (Exception e) - { - return Marshal.GetHRForException(e); - } - } - - // Lifetime maintained by stack - we don't care about ref counts - [UnmanagedCallersOnly] - internal static unsafe uint Untracked_AddRef(IntPtr pThis) - { - return 1; - } - - [UnmanagedCallersOnly] - internal static unsafe uint Untracked_Release(IntPtr pThis) - { - return 1; - } - - [UnmanagedCallersOnly] - internal static unsafe int IReferenceTrackerHost_QueryInterface(IntPtr pThis, Guid* guid, IntPtr* ppObject) - { - if (*guid == IID_IReferenceTrackerHost || *guid == IID_IUnknown) - { - *ppObject = pThis; - return 0; - } - else - { - return HResults.COR_E_INVALIDCAST; - } - } - - internal static unsafe IntPtr CreateDefaultIReferenceTrackerHostVftbl() - { - IntPtr* vftbl = (IntPtr*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(ComWrappers), 9 * sizeof(IntPtr)); - vftbl[0] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerHost_QueryInterface; - vftbl[1] = (IntPtr)(delegate* unmanaged)&ComWrappers.Untracked_AddRef; - vftbl[2] = (IntPtr)(delegate* unmanaged)&ComWrappers.Untracked_Release; - vftbl[3] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerHost_DisconnectUnusedReferenceSources; - vftbl[4] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerHost_ReleaseDisconnectedReferenceSources; - vftbl[5] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerHost_NotifyEndOfReferenceTrackingOnThread; - vftbl[6] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerHost_GetTrackerTarget; - vftbl[7] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerHost_AddMemoryPressure; - vftbl[8] = (IntPtr)(delegate* unmanaged)&ComWrappers.IReferenceTrackerHost_RemoveMemoryPressure; - return (IntPtr)vftbl; - } - - private static IntPtr GetContextToken() - { -#if TARGET_WINDOWS - Interop.Ole32.CoGetContextToken(out IntPtr contextToken); - return contextToken; -#else - return IntPtr.Zero; -#endif - } - - // Wrapper for IWeakReference - private static unsafe class IWeakReference + private static IntPtr GetTaggedImplCurrentVersion() { - public static int Resolve(IntPtr pThis, Guid guid, out IntPtr inspectable) + unsafe { - fixed (IntPtr* inspectablePtr = &inspectable) - return (*(delegate* unmanaged**)pThis)[3](pThis, &guid, inspectablePtr); + return (IntPtr)(delegate* unmanaged[MemberFunction])&VtableImplementations.ITaggedImpl_IsCurrentVersion; } } - // Wrapper for IWeakReferenceSource - private static unsafe class IWeakReferenceSource - { - public static int GetWeakReference(IntPtr pThis, out IntPtr weakReference) - { - fixed (IntPtr* weakReferencePtr = &weakReference) - return (*(delegate* unmanaged**)pThis)[3](pThis, weakReferencePtr); - } - } + internal static unsafe IntPtr DefaultIUnknownVftblPtr => (IntPtr)Unsafe.AsPointer(in VtableImplementations.IUnknown); + internal static unsafe IntPtr TaggedImplVftblPtr => (IntPtr)Unsafe.AsPointer(in VtableImplementations.ITaggedImpl); + internal static unsafe IntPtr DefaultIReferenceTrackerTargetVftblPtr => (IntPtr)Unsafe.AsPointer(in VtableImplementations.IReferenceTrackerTarget); - private static object? ComWeakRefToObject(IntPtr pComWeakRef, long wrapperId) + /// + /// Define the vtable layout for the COM interfaces we provide. + /// + /// + /// This is defined as a nested class to ensure that the vtable types are the only things initialized in the class's static constructor. + /// As long as that's the case, we can easily guarantee that they are pre-initialized and that we don't end up having startup code + /// needed to set up the vtable layouts. + /// + private static class VtableImplementations { - if (wrapperId == 0) + public unsafe struct IUnknownVftbl { - return null; + public delegate* unmanaged[MemberFunction] QueryInterface; + public delegate* unmanaged[MemberFunction] AddRef; + public delegate* unmanaged[MemberFunction] Release; } - // Using the IWeakReference*, get ahold of the target native COM object's IInspectable*. If this resolve fails or - // returns null, then we assume that the underlying native COM object is no longer alive, and thus we cannot create a - // new RCW for it. - if (IWeakReference.Resolve(pComWeakRef, IID_IInspectable, out IntPtr targetPtr) == HResults.S_OK && - targetPtr != IntPtr.Zero) + public unsafe struct IReferenceTrackerTargetVftbl { - using ComHolder target = new ComHolder(targetPtr); - if (Marshal.QueryInterface(target.Ptr, IID_IUnknown, out IntPtr targetIdentityPtr) == HResults.S_OK) - { - using ComHolder targetIdentity = new ComHolder(targetIdentityPtr); - return GetOrCreateObjectFromWrapper(wrapperId, targetIdentity.Ptr); - } + public delegate* unmanaged[MemberFunction] QueryInterface; + public delegate* unmanaged[MemberFunction] AddRef; + public delegate* unmanaged[MemberFunction] Release; + public delegate* unmanaged[MemberFunction] AddRefFromReferenceTracker; + public delegate* unmanaged[MemberFunction] ReleaseFromReferenceTracker; + public delegate* unmanaged[MemberFunction] Peg; + public delegate* unmanaged[MemberFunction] Unpeg; } - return null; - } - - private static unsafe bool PossiblyComObject(object target) - { - // If the RCW is an aggregated RCW, then the managed object cannot be recreated from the IUnknown - // as the outer IUnknown wraps the managed object. In this case, don't create a weak reference backed - // by a COM weak reference. - return s_nativeObjectWrapperTable.TryGetValue(target, out NativeObjectWrapper? wrapper) && !wrapper.IsAggregatedWithManagedObjectWrapper; - } - - private static unsafe IntPtr ObjectToComWeakRef(object target, out long wrapperId) - { - if (TryGetComInstanceForIID( - target, - IID_IWeakReferenceSource, - out IntPtr weakReferenceSourcePtr, - out wrapperId)) + public unsafe struct ITaggedImplVftbl { - using ComHolder weakReferenceSource = new ComHolder(weakReferenceSourcePtr); - if (IWeakReferenceSource.GetWeakReference(weakReferenceSource.Ptr, out IntPtr weakReference) == HResults.S_OK) - { - return weakReference; - } + public delegate* unmanaged[MemberFunction] QueryInterface; + public delegate* unmanaged[MemberFunction] AddRef; + public delegate* unmanaged[MemberFunction] Release; + public delegate* unmanaged[MemberFunction] IsCurrentVersion; } - return IntPtr.Zero; - } - } - - // This is a GCHandle HashSet implementation based on LowLevelDictionary. - // It uses no locking for readers. While for writers (add / remove), - // it handles the locking itself. - // This implementation specifically makes sure that any readers of this - // collection during GC aren't impacted by other threads being - // frozen while in the middle of an write. It makes no guarantees on - // whether you will observe the element being added / removed, but does - // make sure the collection is in a good state and doesn't run into issues - // while iterating. - internal sealed class GCHandleSet : IEnumerable - { - private const int DefaultSize = 7; + [FixedAddressValueType] + public static readonly IUnknownVftbl IUnknown; - private Entry?[] _buckets = new Entry[DefaultSize]; - private int _numEntries; - private readonly Lock _lock = new Lock(useTrivialWaits: true); + [FixedAddressValueType] + public static readonly IReferenceTrackerTargetVftbl IReferenceTrackerTarget; - public Lock ModificationLock => _lock; + [FixedAddressValueType] + public static readonly ITaggedImplVftbl ITaggedImpl; - public void Add(GCHandle handle) - { - using (_lock.EnterScope()) + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] + internal static unsafe int IReferenceTrackerTarget_QueryInterface(IntPtr pThis, Guid* guid, IntPtr* ppObject) { - int bucket = GetBucket(handle, _buckets.Length); - Entry? prev = null; - Entry? entry = _buckets[bucket]; - while (entry != null) - { - // Handle already exists, nothing to add. - if (handle.Equals(entry.m_value)) - { - return; - } - - prev = entry; - entry = entry.m_next; - } - - Entry newEntry = new Entry() - { - m_value = handle - }; - - if (prev == null) - { - _buckets[bucket] = newEntry; - } - else - { - prev.m_next = newEntry; - } - - // _numEntries is only maintained for the purposes of deciding whether to - // expand the bucket and is not used during iteration to handle the - // scenario where element is in bucket but _numEntries hasn't been incremented - // yet. - _numEntries++; - if (_numEntries > (_buckets.Length * 2)) - { - ExpandBuckets(); - } + ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); + return wrapper->QueryInterfaceForTracker(in *guid, out *ppObject); } - } - private void ExpandBuckets() - { - int newNumBuckets = _buckets.Length * 2 + 1; - Entry?[] newBuckets = new Entry[newNumBuckets]; - for (int i = 0; i < _buckets.Length; i++) + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] + internal static unsafe uint IReferenceTrackerTarget_AddRefFromReferenceTracker(IntPtr pThis) { - Entry? entry = _buckets[i]; - while (entry != null) - { - Entry? nextEntry = entry.m_next; - - int bucket = GetBucket(entry.m_value, newNumBuckets); - - // We are allocating new entries for the bucket to ensure that - // if there is an enumeration already in progress, we don't - // modify what it observes by changing next in existing instances. - Entry newEntry = new Entry() - { - m_value = entry.m_value, - m_next = newBuckets[bucket], - }; - newBuckets[bucket] = newEntry; - - entry = nextEntry; - } + ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); + return wrapper->AddRefFromReferenceTracker(); } - _buckets = newBuckets; - } - public void Remove(GCHandle handle) - { - using (_lock.EnterScope()) + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] + internal static unsafe uint IReferenceTrackerTarget_ReleaseFromReferenceTracker(IntPtr pThis) { - int bucket = GetBucket(handle, _buckets.Length); - Entry? prev = null; - Entry? entry = _buckets[bucket]; - while (entry != null) - { - if (handle.Equals(entry.m_value)) - { - if (prev == null) - { - _buckets[bucket] = entry.m_next; - } - else - { - prev.m_next = entry.m_next; - } - _numEntries--; - return; - } - - prev = entry; - entry = entry.m_next; - } + ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); + return wrapper->ReleaseFromReferenceTracker(); } - } - - private static int GetBucket(GCHandle handle, int numBuckets) - { - int h = handle.GetHashCode(); - return (int)((uint)h % (uint)numBuckets); - } - - public Enumerator GetEnumerator() => new Enumerator(this); - - IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); - - IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); - - private sealed class Entry - { - public GCHandle m_value; - public Entry? m_next; - } - - public struct Enumerator : IEnumerator - { - private readonly Entry?[] _buckets; - private int _currentIdx; - private Entry? _currentEntry; - public Enumerator(GCHandleSet set) + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] + internal static unsafe uint IReferenceTrackerTarget_Peg(IntPtr pThis) { - // We hold onto the buckets of the set rather than the set itself - // so that if it is ever expanded, we are not impacted by that during - // enumeration. - _buckets = set._buckets; - Reset(); + ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); + return wrapper->Peg(); } - public GCHandle Current + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] + internal static unsafe uint IReferenceTrackerTarget_Unpeg(IntPtr pThis) { - get - { - if (_currentEntry == null) - { - throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen"); - } - - return _currentEntry.m_value; - } + ManagedObjectWrapper* wrapper = ComInterfaceDispatch.ToManagedObjectWrapper((ComInterfaceDispatch*)pThis); + return wrapper->Unpeg(); } - object IEnumerator.Current => Current; - - public void Dispose() + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] + internal static unsafe int ITaggedImpl_IsCurrentVersion(IntPtr pThis, IntPtr version) { + return version == (IntPtr)(delegate* unmanaged[MemberFunction])&ITaggedImpl_IsCurrentVersion + ? HResults.S_OK + : HResults.E_FAIL; } - public bool MoveNext() + static unsafe VtableImplementations() { - if (_currentEntry != null) - { - _currentEntry = _currentEntry.m_next; - } - - if (_currentEntry == null) - { - // Certain buckets might be empty, so loop until we find - // one with an entry. - while (++_currentIdx != _buckets.Length) - { - _currentEntry = _buckets[_currentIdx]; - if (_currentEntry != null) - { - return true; - } - } + // Use the "pre-inited vtable" pattern to ensure that ILC can pre-compile these vtables. + GetIUnknownImpl( + fpQueryInterface: out *(nint*)&((IUnknownVftbl*)Unsafe.AsPointer(ref IUnknown))->QueryInterface, + fpAddRef: out *(nint*)&((IUnknownVftbl*)Unsafe.AsPointer(ref IUnknown))->AddRef, + fpRelease: out *(nint*)&((IUnknownVftbl*)Unsafe.AsPointer(ref IUnknown))->Release); - return false; - } - - return true; - } + IReferenceTrackerTarget.QueryInterface = (delegate* unmanaged[MemberFunction])&IReferenceTrackerTarget_QueryInterface; + GetIUnknownImpl( + fpQueryInterface: out _, + fpAddRef: out *(nint*)&((IReferenceTrackerTargetVftbl*)Unsafe.AsPointer(ref IReferenceTrackerTarget))->AddRef, + fpRelease: out *(nint*)&((IReferenceTrackerTargetVftbl*)Unsafe.AsPointer(ref IReferenceTrackerTarget))->Release); + IReferenceTrackerTarget.AddRefFromReferenceTracker = (delegate* unmanaged[MemberFunction])&IReferenceTrackerTarget_AddRefFromReferenceTracker; + IReferenceTrackerTarget.ReleaseFromReferenceTracker = (delegate* unmanaged[MemberFunction])&IReferenceTrackerTarget_ReleaseFromReferenceTracker; + IReferenceTrackerTarget.Peg = (delegate* unmanaged[MemberFunction])&IReferenceTrackerTarget_Peg; + IReferenceTrackerTarget.Unpeg = (delegate* unmanaged[MemberFunction])&IReferenceTrackerTarget_Unpeg; - public void Reset() - { - _currentIdx = -1; - _currentEntry = null; + GetIUnknownImpl( + fpQueryInterface: out *(nint*)&((ITaggedImplVftbl*)Unsafe.AsPointer(ref ITaggedImpl))->QueryInterface, + fpAddRef: out *(nint*)&((ITaggedImplVftbl*)Unsafe.AsPointer(ref ITaggedImpl))->AddRef, + fpRelease: out *(nint*)&((ITaggedImplVftbl*)Unsafe.AsPointer(ref ITaggedImpl))->Release); + ITaggedImpl.IsCurrentVersion = (delegate* unmanaged[MemberFunction])&ITaggedImpl_IsCurrentVersion; } } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs index ff98d77ee098..18e7c9a9596d 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs @@ -20,7 +20,7 @@ internal static int SizeOfHelper(RuntimeType t, bool throwIfNotMarshalable) { Debug.Assert(throwIfNotMarshalable); - if (t.IsPointer /* or IsFunctionPointer */) + if (t.IsPointer || t.IsFunctionPointer) return IntPtr.Size; if (t.IsByRef || t.IsArray || t.ContainsGenericParameters) @@ -90,7 +90,7 @@ internal static unsafe void PtrToStructureImpl(IntPtr ptr, object structure) public static unsafe void DestroyStructure(IntPtr ptr, Type structuretype) { ArgumentNullException.ThrowIfNull(ptr); - ArgumentNullException.ThrowIfNull(structuretype, nameof(structuretype)); + ArgumentNullException.ThrowIfNull(structuretype); RuntimeTypeHandle structureTypeHandle = structuretype.TypeHandle; diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeLibrary.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeLibrary.NativeAot.cs index 3abe04a60385..db2174deef1d 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeLibrary.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeLibrary.NativeAot.cs @@ -16,6 +16,7 @@ internal static bool TryLoad(string libraryName, Assembly assembly, DllImportSea { handle = LoadLibraryByName(libraryName, assembly, + userSpecifiedSearchFlags: true, searchPath, throwOnError: false); return handle != IntPtr.Zero; @@ -26,21 +27,21 @@ internal static IntPtr LoadLibraryByName(string libraryName, Assembly assembly, // First checks if a default dllImportSearchPathFlags was passed in, if so, use that value. // Otherwise checks if the assembly has the DefaultDllImportSearchPathsAttribute attribute. // If so, use that value. - - if (!searchPath.HasValue) + bool userSpecifiedSearchFlags = searchPath.HasValue; + if (!userSpecifiedSearchFlags) { - searchPath = GetDllImportSearchPath(assembly); + searchPath = GetDllImportSearchPath(assembly, out userSpecifiedSearchFlags); } - return LoadLibraryByName(libraryName, assembly, searchPath.Value, throwOnError); + return LoadLibraryByName(libraryName, assembly, userSpecifiedSearchFlags, searchPath!.Value, throwOnError); } - internal static IntPtr LoadLibraryByName(string libraryName, Assembly assembly, DllImportSearchPath searchPath, bool throwOnError) + private static IntPtr LoadLibraryByName(string libraryName, Assembly assembly, bool userSpecifiedSearchFlags, DllImportSearchPath searchPath, bool throwOnError) { int searchPathFlags = (int)(searchPath & ~DllImportSearchPath.AssemblyDirectory); bool searchAssemblyDirectory = (searchPath & DllImportSearchPath.AssemblyDirectory) != 0; LoadLibErrorTracker errorTracker = default; - IntPtr ret = LoadBySearch(assembly, searchAssemblyDirectory, searchPathFlags, ref errorTracker, libraryName); + IntPtr ret = LoadBySearch(assembly, userSpecifiedSearchFlags, searchAssemblyDirectory, searchPathFlags, ref errorTracker, libraryName); if (throwOnError && ret == IntPtr.Zero) { errorTracker.Throw(libraryName); @@ -49,24 +50,31 @@ internal static IntPtr LoadLibraryByName(string libraryName, Assembly assembly, return ret; } - internal static DllImportSearchPath GetDllImportSearchPath(Assembly callingAssembly) + private static DllImportSearchPath GetDllImportSearchPath(Assembly callingAssembly, out bool userSpecifiedSearchFlags) { foreach (CustomAttributeData cad in callingAssembly.CustomAttributes) { if (cad.AttributeType == typeof(DefaultDllImportSearchPathsAttribute)) { + userSpecifiedSearchFlags = true; return (DllImportSearchPath)cad.ConstructorArguments[0].Value!; } } + userSpecifiedSearchFlags = false; return DllImportSearchPath.AssemblyDirectory; } - internal static IntPtr LoadBySearch(Assembly callingAssembly, bool searchAssemblyDirectory, int dllImportSearchPathFlags, ref LoadLibErrorTracker errorTracker, string libraryName) + internal static IntPtr LoadBySearch(Assembly callingAssembly, bool userSpecifiedSearchFlags, bool searchAssemblyDirectory, int dllImportSearchPathFlags, ref LoadLibErrorTracker errorTracker, string libraryName) { IntPtr ret; int loadWithAlteredPathFlags = LoadWithAlteredSearchPathFlag; + const int loadLibrarySearchFlags = (int)DllImportSearchPath.UseDllDirectoryForDependencies + | (int)DllImportSearchPath.ApplicationDirectory + | (int)DllImportSearchPath.UserDirectories + | (int)DllImportSearchPath.System32 + | (int)DllImportSearchPath.SafeDirectories; bool libNameIsRelativePath = !Path.IsPathFullyQualified(libraryName); // P/Invokes are often declared with variations on the actual library name. @@ -80,14 +88,8 @@ internal static IntPtr LoadBySearch(Assembly callingAssembly, bool searchAssembl if (!libNameIsRelativePath) { - int flags = loadWithAlteredPathFlags; - if ((dllImportSearchPathFlags & (int)DllImportSearchPath.UseDllDirectoryForDependencies) != 0) - { - // LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR is the only flag affecting absolute path. Don't OR the flags - // unconditionally as all absolute path P/Invokes could then lose LOAD_WITH_ALTERED_SEARCH_PATH. - flags |= dllImportSearchPathFlags; - } - + // LOAD_WITH_ALTERED_SEARCH_PATH is incompatible with LOAD_LIBRARY_SEARCH flags. Remove those flags if they are set. + int flags = loadWithAlteredPathFlags | (dllImportSearchPathFlags & ~loadLibrarySearchFlags); ret = LoadLibraryHelper(currLibNameVariation, flags, ref errorTracker); if (ret != IntPtr.Zero) { @@ -96,19 +98,32 @@ internal static IntPtr LoadBySearch(Assembly callingAssembly, bool searchAssembl } else if ((callingAssembly != null) && searchAssemblyDirectory) { + // LOAD_WITH_ALTERED_SEARCH_PATH is incompatible with LOAD_LIBRARY_SEARCH flags. Remove those flags if they are set. + int flags = loadWithAlteredPathFlags | (dllImportSearchPathFlags & ~loadLibrarySearchFlags); + // Try to load the module alongside the assembly where the PInvoke was declared. // For PInvokes where the DllImportSearchPath.AssemblyDirectory is specified, look next to the application. - ret = LoadLibraryHelper(Path.Combine(AppContext.BaseDirectory, currLibNameVariation), loadWithAlteredPathFlags | dllImportSearchPathFlags, ref errorTracker); + ret = LoadLibraryHelper(Path.Combine(AppContext.BaseDirectory, currLibNameVariation), flags, ref errorTracker); if (ret != IntPtr.Zero) { return ret; } } - ret = LoadLibraryHelper(currLibNameVariation, dllImportSearchPathFlags, ref errorTracker); - if (ret != IntPtr.Zero) + // Internally, search path flags and whether or not to search the assembly directory are + // tracked separately. However, on the API level, DllImportSearchPath represents them both. + // When unspecified, the default is to search the assembly directory and all OS defaults, + // which maps to searchAssemblyDirectory being true and dllImportSearchPathFlags being 0. + // When a user specifies DllImportSearchPath.AssemblyDirectory, searchAssemblyDirectory is + // true, dllImportSearchPathFlags is 0, and the desired logic is to only search the assembly + // directory (handled above), so we avoid doing any additional load search in that case. + if (!userSpecifiedSearchFlags || !searchAssemblyDirectory || dllImportSearchPathFlags != 0) { - return ret; + ret = LoadLibraryHelper(currLibNameVariation, dllImportSearchPathFlags, ref errorTracker); + if (ret != IntPtr.Zero) + { + return ret; + } } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs index 5ae293874808..323544388d81 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs @@ -100,7 +100,7 @@ private static ConditionalWeakTable GetPInvokeDe [StructLayout(LayoutKind.Sequential, Pack = 1)] internal unsafe struct ThunkContextData { - public GCHandle Handle; // A weak GCHandle to the delegate + public WeakGCHandle Handle; // A weak GCHandle to the delegate public IntPtr FunctionPtr; // Function pointer for open static delegates } @@ -133,7 +133,7 @@ public PInvokeDelegateThunk(Delegate del) ThunkContextData* thunkData = (ThunkContextData*)ContextData; // allocate a weak GChandle for the delegate - thunkData->Handle = GCHandle.Alloc(del, GCHandleType.WeakTrackResurrection); + thunkData->Handle = new WeakGCHandle(del, trackResurrection: true); thunkData->FunctionPtr = openStaticFunctionPointer; } @@ -148,17 +148,17 @@ public PInvokeDelegateThunk(Delegate del) if (ContextData != IntPtr.Zero) { // free the GCHandle - GCHandle handle = ((ThunkContextData*)ContextData)->Handle; + WeakGCHandle handle = ((ThunkContextData*)ContextData)->Handle; if (handle.IsAllocated) { // If the delegate is still alive, defer finalization. - if (handle.Target != null) + if (handle.TryGetTarget(out _)) { GC.ReRegisterForFinalize(this); return; } - handle.Free(); + handle.Dispose(); } // Free the allocated context data memory @@ -205,21 +205,20 @@ private static unsafe PInvokeDelegateThunk AllocateThunk(Delegate del) IntPtr pTarget; if (s_thunkPoolHeap != null && RuntimeAugments.TryGetThunkData(s_thunkPoolHeap, ptr, out pContext, out pTarget)) { - GCHandle handle; + WeakGCHandle handle; unsafe { // Pull out Handle from context handle = ((ThunkContextData*)pContext)->Handle; } - Delegate target = Unsafe.As(handle.Target); - // - // The delegate might already been garbage collected - // User should use GC.KeepAlive or whatever ways necessary to keep the delegate alive - // until they are done with the native function pointer - // - if (target == null) + if (!handle.TryGetTarget(out Delegate? target)) { + // + // The delegate might already been garbage collected + // User should use GC.KeepAlive or whatever ways necessary to keep the delegate alive + // until they are done with the native function pointer + // Environment.FailFast(SR.Delegate_GarbageCollected); } @@ -269,7 +268,7 @@ public static IntPtr GetCurrentCalleeOpenStaticDelegateFunctionPointer() /// /// Retrieves the current delegate that is being called /// - public static T GetCurrentCalleeDelegate() where T : class // constraint can't be System.Delegate + public static T GetCurrentCalleeDelegate() where T : Delegate { // // RH keeps track of the current thunk that is being called through a secret argument / thread @@ -280,7 +279,7 @@ public static T GetCurrentCalleeDelegate() where T : class // constraint can' Debug.Assert(pContext != IntPtr.Zero); - GCHandle handle; + WeakGCHandle handle; unsafe { // Pull out Handle from context @@ -288,18 +287,17 @@ public static T GetCurrentCalleeDelegate() where T : class // constraint can' } - T target = Unsafe.As(handle.Target); - - // - // The delegate might already been garbage collected - // User should use GC.KeepAlive or whatever ways necessary to keep the delegate alive - // until they are done with the native function pointer - // - if (target == null) + if (!handle.TryGetTarget(out Delegate? target)) { + // + // The delegate might already been garbage collected + // User should use GC.KeepAlive or whatever ways necessary to keep the delegate alive + // until they are done with the native function pointer + // Environment.FailFast(SR.Delegate_GarbageCollected); } - return target; + + return Unsafe.As(target); } #endregion diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/TrackerObjectManager.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/TrackerObjectManager.NativeAot.cs index 2e2674fb0e1f..f2175749d7ef 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/TrackerObjectManager.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/TrackerObjectManager.NativeAot.cs @@ -10,11 +10,8 @@ namespace System.Runtime.InteropServices { - internal static class TrackerObjectManager + internal static partial class TrackerObjectManager { - internal static readonly IntPtr s_findReferencesTargetCallback = FindReferenceTargetsCallback.CreateFindReferenceTargetsCallback(); - internal static readonly IntPtr s_globalHostServices = CreateHostServices(); - internal static volatile IntPtr s_trackerManager; internal static volatile bool s_hasTrackingStarted; internal static volatile bool s_isGlobalPeggingOn = true; @@ -29,49 +26,6 @@ public static bool ShouldWalkExternalObjects() return s_trackerManager != IntPtr.Zero; } - // Called when an IReferenceTracker instance is found. - public static void OnIReferenceTrackerFound(IntPtr referenceTracker) - { - Debug.Assert(referenceTracker != IntPtr.Zero); - if (s_trackerManager != IntPtr.Zero) - { - return; - } - - IReferenceTracker.GetReferenceTrackerManager(referenceTracker, out IntPtr referenceTrackerManager); - - // Attempt to set the tracker instance. - // If set, the ownership of referenceTrackerManager has been transferred - if (Interlocked.CompareExchange(ref s_trackerManager, referenceTrackerManager, IntPtr.Zero) == IntPtr.Zero) - { - IReferenceTrackerManager.SetReferenceTrackerHost(s_trackerManager, s_globalHostServices); - - // Our GC callbacks are used only for reference walk of tracker objects, so register it here - // when we find our first tracker object. - RegisterGCCallbacks(); - } - else - { - Marshal.Release(referenceTrackerManager); - } - } - - // Called after wrapper has been created. - public static void AfterWrapperCreated(IntPtr referenceTracker) - { - Debug.Assert(referenceTracker != IntPtr.Zero); - - // Notify tracker runtime that we've created a new wrapper for this object. - // To avoid surprises, we should notify them before we fire the first AddRefFromTrackerSource. - IReferenceTracker.ConnectFromTrackerSource(referenceTracker); - - // Send out AddRefFromTrackerSource callbacks to notify tracker runtime we've done AddRef() - // for certain interfaces. We should do this *after* we made a AddRef() because we should never - // be in a state where report refs > actual refs - IReferenceTracker.AddRefFromTrackerSource(referenceTracker); // IUnknown - IReferenceTracker.AddRefFromTrackerSource(referenceTracker); // IReferenceTracker - } - // Used during GC callback // Called before wrapper is about to be finalized (the same lifetime as short weak handle). public static void BeforeWrapperFinalized(IntPtr referenceTracker) @@ -134,24 +88,36 @@ public static void EndReferenceTracking() s_isGlobalPeggingOn = true; s_hasTrackingStarted = false; } - - public static unsafe void RegisterGCCallbacks() + public static bool AddReferencePath(object target, object foundReference) { - delegate* unmanaged gcStartCallback = &GCStartCollection; - delegate* unmanaged gcStopCallback = &GCStopCollection; - delegate* unmanaged gcAfterMarkCallback = &GCAfterMarkPhase; + return s_referenceCache.AddDependentHandle(target, foundReference); + } - if (!RuntimeImports.RhRegisterGcCallout(RuntimeImports.GcRestrictedCalloutKind.StartCollection, (IntPtr)gcStartCallback) || - !RuntimeImports.RhRegisterGcCallout(RuntimeImports.GcRestrictedCalloutKind.EndCollection, (IntPtr)gcStopCallback) || - !RuntimeImports.RhRegisterGcCallout(RuntimeImports.GcRestrictedCalloutKind.AfterMarkPhase, (IntPtr)gcAfterMarkCallback)) - { - throw new OutOfMemoryException(); - } + private static bool HasReferenceTrackerManager + => s_trackerManager != IntPtr.Zero; + + private static bool TryRegisterReferenceTrackerManager(IntPtr referenceTrackerManager) + { + return Interlocked.CompareExchange(ref s_trackerManager, referenceTrackerManager, IntPtr.Zero) == IntPtr.Zero; } - public static bool AddReferencePath(object target, object foundReference) + internal static bool IsGlobalPeggingEnabled => s_isGlobalPeggingOn; + + private static void RegisterGCCallbacks() { - return s_referenceCache.AddDependentHandle(target, foundReference); + unsafe + { + delegate* unmanaged gcStartCallback = &GCStartCollection; + delegate* unmanaged gcStopCallback = &GCStopCollection; + delegate* unmanaged gcAfterMarkCallback = &GCAfterMarkPhase; + + if (!RuntimeImports.RhRegisterGcCallout(RuntimeImports.GcRestrictedCalloutKind.StartCollection, (IntPtr)gcStartCallback) || + !RuntimeImports.RhRegisterGcCallout(RuntimeImports.GcRestrictedCalloutKind.EndCollection, (IntPtr)gcStopCallback) || + !RuntimeImports.RhRegisterGcCallout(RuntimeImports.GcRestrictedCalloutKind.AfterMarkPhase, (IntPtr)gcAfterMarkCallback)) + { + throw new OutOfMemoryException(); + } + } } // Used during GC callback @@ -183,89 +149,73 @@ private static void GCAfterMarkPhase(int condemnedGeneration) DetachNonPromotedObjects(); } - private static unsafe IntPtr CreateHostServices() - { - IntPtr* wrapperMem = (IntPtr*)NativeMemory.Alloc((nuint)sizeof(IntPtr)); - wrapperMem[0] = CreateDefaultIReferenceTrackerHostVftbl(); - return (IntPtr)wrapperMem; - } - } - - // Wrapper for IReferenceTrackerManager - internal static unsafe class IReferenceTrackerManager - { - // Used during GC callback - public static int ReferenceTrackingStarted(IntPtr pThis) - { - return (*(delegate* unmanaged**)pThis)[3](pThis); - } - // Used during GC callback - public static int FindTrackerTargetsCompleted(IntPtr pThis, bool walkFailed) + internal static unsafe void WalkExternalTrackerObjects() { - return (*(delegate* unmanaged**)pThis)[4](pThis, walkFailed); - } + bool walkFailed = false; - // Used during GC callback - public static int ReferenceTrackingCompleted(IntPtr pThis) - { - return (*(delegate* unmanaged**)pThis)[5](pThis); - } - - public static void SetReferenceTrackerHost(IntPtr pThis, IntPtr referenceTrackerHost) - { - Marshal.ThrowExceptionForHR((*(delegate* unmanaged**)pThis)[6](pThis, referenceTrackerHost)); - } - } - - // Wrapper for IReferenceTracker - internal static unsafe class IReferenceTracker - { - public static void ConnectFromTrackerSource(IntPtr pThis) - { - Marshal.ThrowExceptionForHR((*(delegate* unmanaged**)pThis)[3](pThis)); - } + foreach (GCHandle weakNativeObjectWrapperHandle in s_referenceTrackerNativeObjectWrapperCache) + { + ReferenceTrackerNativeObjectWrapper? nativeObjectWrapper = Unsafe.As(weakNativeObjectWrapperHandle.Target); + if (nativeObjectWrapper != null && + nativeObjectWrapper.TrackerObject != IntPtr.Zero) + { + FindReferenceTargetsCallback.Instance callback = new(nativeObjectWrapper.ProxyHandle); + int hr = IReferenceTracker.FindTrackerTargets(nativeObjectWrapper.TrackerObject, (IntPtr)(void*)&callback); + if (hr < 0) + { + walkFailed = true; + break; + } + } + } - // Used during GC callback - public static int DisconnectFromTrackerSource(IntPtr pThis) - { - return (*(delegate* unmanaged**)pThis)[4](pThis); + // Report whether walking failed or not. + if (walkFailed) + { + s_isGlobalPeggingOn = true; + } + IReferenceTrackerManager.FindTrackerTargetsCompleted(s_trackerManager, walkFailed); } // Used during GC callback - public static int FindTrackerTargets(IntPtr pThis, IntPtr findReferenceTargetsCallback) - { - return (*(delegate* unmanaged**)pThis)[5](pThis, findReferenceTargetsCallback); - } - - public static void GetReferenceTrackerManager(IntPtr pThis, out IntPtr referenceTrackerManager) - { - fixed (IntPtr* ptr = &referenceTrackerManager) - Marshal.ThrowExceptionForHR((*(delegate* unmanaged**)pThis)[6](pThis, ptr)); - } - - public static void AddRefFromTrackerSource(IntPtr pThis) - { - Marshal.ThrowExceptionForHR((*(delegate* unmanaged**)pThis)[7](pThis)); - } - - public static void ReleaseFromTrackerSource(IntPtr pThis) + internal static void DetachNonPromotedObjects() { - Marshal.ThrowExceptionForHR((*(delegate* unmanaged**)pThis)[8](pThis)); - } - - public static void PegFromTrackerSource(IntPtr pThis) - { - Marshal.ThrowExceptionForHR((*(delegate* unmanaged**)pThis)[9](pThis)); + foreach (GCHandle weakNativeObjectWrapperHandle in s_referenceTrackerNativeObjectWrapperCache) + { + ReferenceTrackerNativeObjectWrapper? nativeObjectWrapper = Unsafe.As(weakNativeObjectWrapperHandle.Target); + if (nativeObjectWrapper != null && + nativeObjectWrapper.TrackerObject != IntPtr.Zero && + !RuntimeImports.RhIsPromoted(nativeObjectWrapper.ProxyHandle.Target)) + { + // Notify the wrapper it was not promoted and is being collected. + BeforeWrapperFinalized(nativeObjectWrapper.TrackerObject); + } + } } } // Callback implementation of IFindReferenceTargetsCallback internal static unsafe class FindReferenceTargetsCallback { - internal static GCHandle s_currentRootObjectHandle; + // Define an on-stack compatible COM instance to avoid allocating + // a temporary instance. + [StructLayout(LayoutKind.Sequential)] + internal ref struct Instance + { + private readonly IntPtr _vtable; // First field is IUnknown based vtable. + public GCHandle RootObject; - [UnmanagedCallersOnly] + public Instance(GCHandle handle) + { + _vtable = (IntPtr)Unsafe.AsPointer(in FindReferenceTargetsCallback.Vftbl); + RootObject = handle; + } + } + +#pragma warning disable CS3016 + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] +#pragma warning restore CS3016 private static unsafe int IFindReferenceTargetsCallback_QueryInterface(IntPtr pThis, Guid* guid, IntPtr* ppObject) { if (*guid == IID_IFindReferenceTargetsCallback || *guid == IID_IUnknown) @@ -279,55 +229,51 @@ private static unsafe int IFindReferenceTargetsCallback_QueryInterface(IntPtr pT } } - [UnmanagedCallersOnly] +#pragma warning disable CS3016 + [UnmanagedCallersOnly(CallConvs = [typeof(CallConvMemberFunction)])] +#pragma warning restore CS3016 private static unsafe int IFindReferenceTargetsCallback_FoundTrackerTarget(IntPtr pThis, IntPtr referenceTrackerTarget) { if (referenceTrackerTarget == IntPtr.Zero) { - return HResults.E_INVALIDARG; + return HResults.E_POINTER; } - if (TryGetObject(referenceTrackerTarget, out object? foundObject)) + object sourceObject = ((FindReferenceTargetsCallback.Instance*)pThis)->RootObject.Target!; + + if (!TryGetObject(referenceTrackerTarget, out object? targetObject)) { - // Notify the runtime a reference path was found. - return TrackerObjectManager.AddReferencePath(s_currentRootObjectHandle.Target, foundObject) ? HResults.S_OK : HResults.S_FALSE; + return HResults.S_FALSE; } - return HResults.S_OK; - } + if (sourceObject == targetObject) + { + return HResults.S_FALSE; + } - private static unsafe IntPtr CreateDefaultIFindReferenceTargetsCallbackVftbl() - { - IntPtr* vftbl = (IntPtr*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(FindReferenceTargetsCallback), 4 * sizeof(IntPtr)); - vftbl[0] = (IntPtr)(delegate* unmanaged)&IFindReferenceTargetsCallback_QueryInterface; - vftbl[1] = (IntPtr)(delegate* unmanaged)&ComWrappers.Untracked_AddRef; - vftbl[2] = (IntPtr)(delegate* unmanaged)&ComWrappers.Untracked_Release; - vftbl[3] = (IntPtr)(delegate* unmanaged)&IFindReferenceTargetsCallback_FoundTrackerTarget; - return (IntPtr)vftbl; + // Notify the runtime a reference path was found. + return TrackerObjectManager.AddReferencePath(sourceObject, targetObject) ? HResults.S_OK : HResults.S_FALSE; } - internal static unsafe IntPtr CreateFindReferenceTargetsCallback() + internal struct ReferenceTargetsVftbl { - IntPtr* wrapperMem = (IntPtr*)NativeMemory.Alloc((nuint)sizeof(IntPtr)); - wrapperMem[0] = CreateDefaultIFindReferenceTargetsCallbackVftbl(); - return (IntPtr)wrapperMem; + public delegate* unmanaged[MemberFunction] QueryInterface; + public delegate* unmanaged[MemberFunction] AddRef; + public delegate* unmanaged[MemberFunction] Release; + public delegate* unmanaged[MemberFunction] FoundTrackerTarget; } - } - - internal readonly struct ComHolder : IDisposable - { - private readonly IntPtr _ptr; - internal readonly IntPtr Ptr => _ptr; - - public ComHolder(IntPtr ptr) - { - _ptr = ptr; - } + [FixedAddressValueType] + internal static readonly ReferenceTargetsVftbl Vftbl; - public readonly void Dispose() +#pragma warning disable CA1810 // Initialize reference type static fields inline + // We want this to be explicitly written out to ensure we match the "pre-inited vtable" pattern. + static FindReferenceTargetsCallback() +#pragma warning restore CA1810 // Initialize reference type static fields inline { - Marshal.Release(_ptr); + ComWrappers.GetUntrackedIUnknownImpl(out Vftbl.AddRef, out Vftbl.Release); + Vftbl.QueryInterface = &IFindReferenceTargetsCallback_QueryInterface; + Vftbl.FoundTrackerTarget = &IFindReferenceTargetsCallback_FoundTrackerTarget; } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/UnsafeGCHandle.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/UnsafeGCHandle.cs deleted file mode 100644 index 7e40696a5dbb..000000000000 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/UnsafeGCHandle.cs +++ /dev/null @@ -1,89 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Threading; - -using Internal.Runtime.CompilerServices; - -namespace System.Runtime.InteropServices -{ - /// - /// The unsafe version of the GCHandle structure. - /// - /// - /// Differences from the GCHandle structure: - /// - /// The constructor assumes the handle type is valid; no range check is performed. - /// The pinned flag is not stored in the _handle field. - /// The Target getter and setter assume the UnsafeGCHandle has been allocated. - /// No blittable check is performed when allocating a pinned UnsafeGCHandle or setting its target. - /// The GetRawTargetAddress method returns the raw address of the target (the pointer to - /// its m_pEEType field). - /// The Free method is not thread-safe and does not throw if the UnsafeGCHandle - /// has not been allocated or has been already freed. - /// - /// - [StructLayout(LayoutKind.Sequential)] - internal struct UnsafeGCHandle - { - // IMPORTANT: This must be kept in sync with the GCHandleType enum. - private const GCHandleType MaxHandleType = GCHandleType.Pinned; - - // The actual integer handle value that the EE uses internally. - private IntPtr _handle; - - // Allocate a handle storing the object and the type. - private UnsafeGCHandle(object value, GCHandleType type) - { - Debug.Assert((uint)type <= (uint)MaxHandleType, "Unexpected handle type"); - _handle = RuntimeImports.RhHandleAlloc(value, type); - } - - public static UnsafeGCHandle Alloc(object value, GCHandleType type = GCHandleType.Normal) - { - return new UnsafeGCHandle(value, type); - } - - // Target property - allows getting / updating of the handle's referent. - public object Target - { - get - { - Debug.Assert(IsAllocated, "Handle is not initialized"); - return RuntimeImports.RhHandleGet(_handle); - } - - set - { - Debug.Assert(IsAllocated, "Handle is not initialized"); - RuntimeImports.RhHandleSet(_handle, value); - } - } - - // Frees a GC handle. This method is not thread-safe! - public void Free() - { - if (_handle != default(IntPtr)) - { - RuntimeImports.RhHandleFree(_handle); - } - } - - // Returns the raw address of the target assuming it is pinned. - public unsafe IntPtr GetRawTargetAddress() - { - return *(IntPtr*)_handle; - } - - // Determine whether this handle has been allocated or not. - public bool IsAllocated - { - get - { - return _handle != default(IntPtr); - } - } - } -} diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/Loader/AssemblyLoadContext.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/Loader/AssemblyLoadContext.NativeAot.cs index bd40eed1212b..9c2206e87a86 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/Loader/AssemblyLoadContext.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/Loader/AssemblyLoadContext.NativeAot.cs @@ -43,18 +43,21 @@ public void StartProfileOptimization(string profile) private static Assembly InternalLoadFromPath(string? assemblyPath, string? nativeImagePath) { - // TODO: This is not passing down the AssemblyLoadContext, - // so it won't actually work properly when multiple assemblies with the same identity get loaded. - return ReflectionAugments.Load(assemblyPath); + ArgumentNullException.ThrowIfNull(assemblyPath); + + throw new PlatformNotSupportedException(); } #pragma warning restore IDE0060 -#pragma warning disable CA1822 - internal Assembly InternalLoad(ReadOnlySpan arrAssembly, ReadOnlySpan arrSymbols) +#pragma warning disable CA1822, IDE0060 + internal Assembly InternalLoad(ReadOnlySpan rawAssembly, ReadOnlySpan rawSymbols) { - return ReflectionAugments.Load(arrAssembly, arrSymbols); + if (rawAssembly.IsEmpty) + throw new ArgumentNullException(nameof(rawAssembly)); + + throw new PlatformNotSupportedException(); } -#pragma warning restore CA1822 +#pragma warning restore CA1822, IDE0060 private void ReferenceUnreferencedEvents() { diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs index 7a7588d6fac3..1be52b14cb11 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs @@ -15,7 +15,7 @@ namespace System.Runtime { // CONTRACT with Runtime - // This class lists all the static methods that the redhawk runtime exports to a class library + // This class lists all the static methods that the NativeAOT runtime exports to a class library // These are not expected to change much but are needed by the class library to implement its functionality // // The contents of this file can be modified if needed by the class library @@ -40,10 +40,6 @@ internal static partial class RuntimeImports [RuntimeImport(RuntimeLibrary, "RhGetRuntimeVersion")] internal static extern unsafe byte* RhGetRuntimeVersion(out int cbLength); - [LibraryImport(RuntimeLibrary)] - [SuppressGCTransition] - internal static partial ulong RhpGetTickCount64(); - [LibraryImport(RuntimeLibrary)] internal static partial IntPtr RhpGetCurrentThread(); @@ -94,9 +90,6 @@ internal static void RhWaitForPendingFinalizers(bool allowReentrantWait) RhWaitForPendingFinalizers(allowReentrantWait ? 1 : 0); } - [LibraryImport(RuntimeLibrary)] - internal static partial void RhInitializeFinalizerThread(); - // Get maximum GC generation number. [MethodImplAttribute(MethodImplOptions.InternalCall)] [RuntimeImport(RuntimeLibrary, "RhGetMaxGcGeneration")] @@ -365,7 +358,7 @@ internal static IntPtr RhHandleAllocDependent(object primary, object secondary) internal static extern int RhpGetThunkBlockSize(); [LibraryImport(RuntimeLibrary, EntryPoint = "RhAllocateThunksMapping")] - internal static partial IntPtr RhAllocateThunksMapping(); + internal static unsafe partial int RhAllocateThunksMapping(IntPtr* ppMapping); // // calls to runtime for type equality checks @@ -397,6 +390,10 @@ internal static IntPtr RhHandleAllocDependent(object primary, object secondary) [RuntimeImport(RuntimeLibrary, "RhNewArray")] internal static extern unsafe Array RhNewArray(MethodTable* pEEType, int length); + [MethodImpl(MethodImplOptions.InternalCall)] + [RuntimeImport(RuntimeLibrary, "RhNewVariableSizeObject")] + internal static extern unsafe Array RhNewVariableSizeObject(MethodTable* pEEType, int length); + [MethodImpl(MethodImplOptions.InternalCall)] [RuntimeImport(RuntimeLibrary, "RhNewString")] internal static extern unsafe string RhNewString(MethodTable* pEEType, int length); @@ -504,8 +501,24 @@ internal enum GcRestrictedCalloutKind internal static extern unsafe void RhUnregisterRefCountedHandleCallback(IntPtr pCalloutMethod, MethodTable* pTypeFilter); [MethodImplAttribute(MethodImplOptions.InternalCall)] - [RuntimeImport(RuntimeLibrary, "RhGetIUnknownAddRef")] - internal static extern IntPtr RhGetIUnknownAddRef(); + [RuntimeImport(RuntimeLibrary, +#if TARGET_WINDOWS && TARGET_X86 + "_RhIUnknown_AddRef@4" +#else + "RhIUnknown_AddRef" +#endif + )] + internal static extern uint RhIUnknown_AddRef(nint pThis); + + [MethodImplAttribute(MethodImplOptions.InternalCall)] + [RuntimeImport(RuntimeLibrary, +#if TARGET_WINDOWS && TARGET_X86 + "_RhUntracked_AddRefRelease@4" +#else + "RhUntracked_AddRefRelease" +#endif + )] + internal static extern uint RhUntracked_AddRefRelease(nint pThis); #if FEATURE_OBJCMARSHAL [MethodImplAttribute(MethodImplOptions.InternalCall)] diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/ThunkPool.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/ThunkPool.cs index fcfee9b11342..429513459ce2 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/ThunkPool.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/ThunkPool.cs @@ -87,43 +87,26 @@ private unsafe ThunksHeap(IntPtr commonStubAddress) _allocatedBlocks = new AllocatedBlock(); - IntPtr thunkStubsBlock; - lock (this) - { - thunkStubsBlock = ThunkBlocks.GetNewThunksBlock(); - } - - if (thunkStubsBlock != IntPtr.Zero) - { - IntPtr thunkDataBlock = RuntimeImports.RhpGetThunkDataBlockAddress(thunkStubsBlock); + IntPtr thunkStubsBlock = ThunkBlocks.GetNewThunksBlock(); + IntPtr thunkDataBlock = RuntimeImports.RhpGetThunkDataBlockAddress(thunkStubsBlock); - // Address of the first thunk data cell should be at the beginning of the thunks data block (page-aligned) - Debug.Assert(((nuint)(nint)thunkDataBlock % Constants.PageSize) == 0); + // Address of the first thunk data cell should be at the beginning of the thunks data block (page-aligned) + Debug.Assert(((nuint)(nint)thunkDataBlock % Constants.PageSize) == 0); - // Update the last pointer value in the thunks data section with the value of the common stub address - *(IntPtr*)(thunkDataBlock + (int)(Constants.PageSize - IntPtr.Size)) = commonStubAddress; - Debug.Assert(*(IntPtr*)(thunkDataBlock + (int)(Constants.PageSize - IntPtr.Size)) == commonStubAddress); + // Update the last pointer value in the thunks data section with the value of the common stub address + *(IntPtr*)(thunkDataBlock + (int)(Constants.PageSize - IntPtr.Size)) = commonStubAddress; + Debug.Assert(*(IntPtr*)(thunkDataBlock + (int)(Constants.PageSize - IntPtr.Size)) == commonStubAddress); - // Set the head and end of the linked list - _nextAvailableThunkPtr = thunkDataBlock; - _lastThunkPtr = _nextAvailableThunkPtr + Constants.ThunkDataSize * (Constants.NumThunksPerBlock - 1); + // Set the head and end of the linked list + _nextAvailableThunkPtr = thunkDataBlock; + _lastThunkPtr = _nextAvailableThunkPtr + Constants.ThunkDataSize * (Constants.NumThunksPerBlock - 1); - _allocatedBlocks._blockBaseAddress = thunkStubsBlock; - } + _allocatedBlocks._blockBaseAddress = thunkStubsBlock; } - public static unsafe ThunksHeap? CreateThunksHeap(IntPtr commonStubAddress) + public static unsafe ThunksHeap CreateThunksHeap(IntPtr commonStubAddress) { - try - { - ThunksHeap newHeap = new ThunksHeap(commonStubAddress); - - if (newHeap._nextAvailableThunkPtr != IntPtr.Zero) - return newHeap; - } - catch (Exception) { } - - return null; + return new ThunksHeap(commonStubAddress); } // TODO: Feature @@ -134,47 +117,30 @@ private unsafe ThunksHeap(IntPtr commonStubAddress) // // Note: Expected to be called under lock // - private unsafe bool ExpandHeap() + private unsafe void ExpandHeap() { - AllocatedBlock newBlockInfo; - - try - { - newBlockInfo = new AllocatedBlock(); - } - catch (Exception) - { - return false; - } + AllocatedBlock newBlockInfo = new AllocatedBlock(); IntPtr thunkStubsBlock = ThunkBlocks.GetNewThunksBlock(); + IntPtr thunkDataBlock = RuntimeImports.RhpGetThunkDataBlockAddress(thunkStubsBlock); - if (thunkStubsBlock != IntPtr.Zero) - { - IntPtr thunkDataBlock = RuntimeImports.RhpGetThunkDataBlockAddress(thunkStubsBlock); - - // Address of the first thunk data cell should be at the beginning of the thunks data block (page-aligned) - Debug.Assert(((nuint)(nint)thunkDataBlock % Constants.PageSize) == 0); - - // Update the last pointer value in the thunks data section with the value of the common stub address - *(IntPtr*)(thunkDataBlock + (int)(Constants.PageSize - IntPtr.Size)) = _commonStubAddress; - Debug.Assert(*(IntPtr*)(thunkDataBlock + (int)(Constants.PageSize - IntPtr.Size)) == _commonStubAddress); - - // Link the last entry in the old list to the first entry in the new list - *((IntPtr*)_lastThunkPtr) = thunkDataBlock; + // Address of the first thunk data cell should be at the beginning of the thunks data block (page-aligned) + Debug.Assert(((nuint)(nint)thunkDataBlock % Constants.PageSize) == 0); - // Update the pointer to the last entry in the list - _lastThunkPtr = *((IntPtr*)_lastThunkPtr) + Constants.ThunkDataSize * (Constants.NumThunksPerBlock - 1); + // Update the last pointer value in the thunks data section with the value of the common stub address + *(IntPtr*)(thunkDataBlock + (int)(Constants.PageSize - IntPtr.Size)) = _commonStubAddress; + Debug.Assert(*(IntPtr*)(thunkDataBlock + (int)(Constants.PageSize - IntPtr.Size)) == _commonStubAddress); - newBlockInfo._blockBaseAddress = thunkStubsBlock; - newBlockInfo._nextBlock = _allocatedBlocks; + // Link the last entry in the old list to the first entry in the new list + *((IntPtr*)_lastThunkPtr) = thunkDataBlock; - _allocatedBlocks = newBlockInfo; + // Update the pointer to the last entry in the list + _lastThunkPtr = *((IntPtr*)_lastThunkPtr) + Constants.ThunkDataSize * (Constants.NumThunksPerBlock - 1); - return true; - } + newBlockInfo._blockBaseAddress = thunkStubsBlock; + newBlockInfo._nextBlock = _allocatedBlocks; - return false; + _allocatedBlocks = newBlockInfo; } public unsafe IntPtr AllocateThunk() @@ -192,10 +158,7 @@ public unsafe IntPtr AllocateThunk() if (nextNextAvailableThunkPtr == IntPtr.Zero) { - if (!ExpandHeap()) - { - return IntPtr.Zero; - } + ExpandHeap(); nextAvailableThunkPtr = _nextAvailableThunkPtr; nextNextAvailableThunkPtr = *((IntPtr*)(nextAvailableThunkPtr)); @@ -347,19 +310,12 @@ public static unsafe IntPtr GetNewThunksBlock() } else { - nextThunksBlock = RuntimeImports.RhAllocateThunksMapping(); - - if (nextThunksBlock == IntPtr.Zero) - { - // We either ran out of memory and can't do anymore mappings of the thunks templates sections, - // or we are using the managed runtime services fallback, which doesn't provide the - // file mapping feature (ex: older version of mrt100.dll, or no mrt100.dll at all). - - // The only option is for the caller to attempt and recycle unused thunks to be able to - // find some free entries. - - return IntPtr.Zero; - } + nextThunksBlock = IntPtr.Zero; + int result = RuntimeImports.RhAllocateThunksMapping(&nextThunksBlock); + if (result == HResults.E_OUTOFMEMORY) + throw new OutOfMemoryException(); + else if (result != HResults.S_OK) + throw new PlatformNotSupportedException(SR.PlatformNotSupported_DynamicEntrypoint); // Each mapping consists of multiple blocks of thunk stubs/data pairs. Keep track of those // so that we do not create a new mapping until all blocks in the sections we just mapped are consumed diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeFieldHandle.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeFieldHandle.cs index 01624ba5469d..766d7b4bf794 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeFieldHandle.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeFieldHandle.cs @@ -2,9 +2,11 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.ComponentModel; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Serialization; +using Internal.Metadata.NativeFormat; using Internal.Runtime.Augments; namespace System @@ -29,7 +31,7 @@ public override bool Equals(object? obj) return Equals((RuntimeFieldHandle)obj); } - public bool Equals(RuntimeFieldHandle handle) + public unsafe bool Equals(RuntimeFieldHandle handle) { if (_value == handle._value) return true; @@ -37,26 +39,21 @@ public bool Equals(RuntimeFieldHandle handle) if (_value == IntPtr.Zero || handle._value == IntPtr.Zero) return false; - string fieldName1, fieldName2; - RuntimeTypeHandle declaringType1, declaringType2; + FieldHandleInfo* thisInfo = ToFieldHandleInfo(); + FieldHandleInfo* thatInfo = handle.ToFieldHandleInfo(); - RuntimeAugments.TypeLoaderCallbacks.GetRuntimeFieldHandleComponents(this, out declaringType1, out fieldName1); - RuntimeAugments.TypeLoaderCallbacks.GetRuntimeFieldHandleComponents(handle, out declaringType2, out fieldName2); - - return declaringType1.Equals(declaringType2) && fieldName1 == fieldName2; + return thisInfo->DeclaringType.Equals(thatInfo->DeclaringType) && thisInfo->Handle.Equals(thatInfo->Handle); } - public override int GetHashCode() + public override unsafe int GetHashCode() { if (_value == IntPtr.Zero) return 0; - string fieldName; - RuntimeTypeHandle declaringType; - RuntimeAugments.TypeLoaderCallbacks.GetRuntimeFieldHandleComponents(this, out declaringType, out fieldName); + FieldHandleInfo* info = ToFieldHandleInfo(); - int hashcode = declaringType.GetHashCode(); - return (hashcode + int.RotateLeft(hashcode, 13)) ^ fieldName.GetHashCode(); + int hashcode = info->DeclaringType.GetHashCode(); + return (hashcode + int.RotateLeft(hashcode, 13)) ^ info->Handle.GetHashCode(); } public static RuntimeFieldHandle FromIntPtr(IntPtr value) => new RuntimeFieldHandle(value); @@ -79,5 +76,19 @@ public void GetObjectData(SerializationInfo info, StreamingContext context) { throw new PlatformNotSupportedException(); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal readonly unsafe FieldHandleInfo* ToFieldHandleInfo() + { + return (FieldHandleInfo*)_value; + } + } + + [CLSCompliant(false)] + [StructLayout(LayoutKind.Sequential)] + public struct FieldHandleInfo + { + public RuntimeTypeHandle DeclaringType; + public FieldHandle Handle; } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeMethodHandle.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeMethodHandle.cs index 3de517db8f37..13b10d64e93c 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeMethodHandle.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeMethodHandle.cs @@ -2,10 +2,13 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.ComponentModel; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Serialization; +using Internal.Metadata.NativeFormat; using Internal.Reflection.Augments; +using Internal.Runtime; using Internal.Runtime.Augments; using Internal.Runtime.CompilerServices; @@ -31,7 +34,7 @@ public override bool Equals(object? obj) return Equals((RuntimeMethodHandle)obj); } - public bool Equals(RuntimeMethodHandle handle) + public unsafe bool Equals(RuntimeMethodHandle handle) { if (_value == handle._value) return true; @@ -39,52 +42,40 @@ public bool Equals(RuntimeMethodHandle handle) if (_value == IntPtr.Zero || handle._value == IntPtr.Zero) return false; - RuntimeTypeHandle declaringType1, declaringType2; - MethodNameAndSignature nameAndSignature1, nameAndSignature2; - RuntimeTypeHandle[] genericArgs1, genericArgs2; + MethodHandleInfo* thisInfo = ToMethodHandleInfo(); + MethodHandleInfo* thatInfo = handle.ToMethodHandleInfo(); - RuntimeAugments.TypeLoaderCallbacks.GetRuntimeMethodHandleComponents(this, out declaringType1, out nameAndSignature1, out genericArgs1); - RuntimeAugments.TypeLoaderCallbacks.GetRuntimeMethodHandleComponents(handle, out declaringType2, out nameAndSignature2, out genericArgs2); - - if (!declaringType1.Equals(declaringType2)) + if (!thisInfo->DeclaringType.Equals(thatInfo->DeclaringType)) return false; - if (!nameAndSignature1.Equals(nameAndSignature2)) + if (!thisInfo->Handle.Equals(thatInfo->Handle)) return false; - if ((genericArgs1 == null && genericArgs2 != null) || (genericArgs1 != null && genericArgs2 == null)) + if (thisInfo->NumGenericArgs != thatInfo->NumGenericArgs) return false; - if (genericArgs1 != null) + + RuntimeTypeHandle* thisFirstArg = &thisInfo->FirstArgument; + RuntimeTypeHandle* thatFirstArg = &thatInfo->FirstArgument; + for (int i = 0; i < thisInfo->NumGenericArgs; i++) { - if (genericArgs1.Length != genericArgs2!.Length) + if (!thisFirstArg[i].Equals(thatFirstArg[i])) return false; - for (int i = 0; i < genericArgs1.Length; i++) - { - if (!genericArgs1[i].Equals(genericArgs2![i])) - return false; - } } return true; } - public override int GetHashCode() + public override unsafe int GetHashCode() { if (_value == IntPtr.Zero) return 0; - RuntimeTypeHandle declaringType; - MethodNameAndSignature nameAndSignature; - RuntimeTypeHandle[] genericArgs; - RuntimeAugments.TypeLoaderCallbacks.GetRuntimeMethodHandleComponents(this, out declaringType, out nameAndSignature, out genericArgs); + MethodHandleInfo* info = ToMethodHandleInfo(); - int hashcode = declaringType.GetHashCode(); - hashcode = (hashcode + int.RotateLeft(hashcode, 13)) ^ nameAndSignature.Name.GetHashCode(); - if (genericArgs != null) + int hashcode = info->DeclaringType.GetHashCode(); + hashcode = (hashcode + int.RotateLeft(hashcode, 13)) ^ info->Handle.GetHashCode(); + for (int i = 0; i < info->NumGenericArgs; i++) { - for (int i = 0; i < genericArgs.Length; i++) - { - int argumentHashCode = genericArgs[i].GetHashCode(); - hashcode = (hashcode + int.RotateLeft(hashcode, 13)) ^ argumentHashCode; - } + int argumentHashCode = (&info->FirstArgument)[i].GetHashCode(); + hashcode = (hashcode + int.RotateLeft(hashcode, 13)) ^ argumentHashCode; } return hashcode; @@ -104,12 +95,12 @@ public override int GetHashCode() return !left.Equals(right); } - public IntPtr GetFunctionPointer() + public unsafe IntPtr GetFunctionPointer() { - RuntimeTypeHandle declaringType; - RuntimeAugments.TypeLoaderCallbacks.GetRuntimeMethodHandleComponents(this, out declaringType, out _, out _); + if (_value == IntPtr.Zero) + throw new ArgumentNullException(null, SR.Arg_InvalidHandle); - return ReflectionAugments.GetFunctionPointer(this, declaringType); + return ReflectionAugments.GetFunctionPointer(this, ToMethodHandleInfo()->DeclaringType); } [Obsolete(Obsoletions.LegacyFormatterImplMessage, DiagnosticId = Obsoletions.LegacyFormatterImplDiagId, UrlFormat = Obsoletions.SharedUrlFormat)] @@ -118,5 +109,21 @@ public void GetObjectData(SerializationInfo info, StreamingContext context) { throw new PlatformNotSupportedException(); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal readonly unsafe MethodHandleInfo* ToMethodHandleInfo() + { + return (MethodHandleInfo*)_value; + } + } + + [CLSCompliant(false)] + [StructLayout(LayoutKind.Sequential)] + public struct MethodHandleInfo + { + public RuntimeTypeHandle DeclaringType; + public MethodHandle Handle; + public int NumGenericArgs; + public RuntimeTypeHandle FirstArgument; } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.Unix.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.Unix.cs index 06fb9c4ffc9e..f9e49db53230 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.Unix.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.Unix.cs @@ -87,14 +87,14 @@ private bool JoinInternal(int millisecondsTimeout) } } - private unsafe bool CreateThread(GCHandle thisThreadHandle) + private unsafe bool CreateThread(GCHandle thisThreadHandle) { // Create the Stop event before starting the thread to make sure // it is ready to be signaled at thread shutdown time. // This also avoids OOM after creating the thread. _stopped = new ManualResetEvent(false); - if (!Interop.Sys.CreateThread((IntPtr)_startHelper!._maxStackSize, &ThreadEntryPoint, (IntPtr)thisThreadHandle)) + if (!Interop.Sys.CreateThread((IntPtr)_startHelper!._maxStackSize, &ThreadEntryPoint, GCHandle.ToIntPtr(thisThreadHandle))) { return false; } @@ -137,10 +137,6 @@ private static bool SetApartmentStateUnchecked(ApartmentState state, bool throwO partial void InitializeComOnNewThread(); - internal static void InitializeComForFinalizerThread() - { - } - public void DisableComObjectEagerCleanup() { } public void Interrupt() => WaitSubsystem.Interrupt(this); diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.Windows.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.Windows.cs index d708a313b077..c579f4b3a00e 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.Windows.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.Windows.cs @@ -24,8 +24,6 @@ public sealed partial class Thread private ApartmentState _initialApartmentState = ApartmentState.Unknown; - private static volatile bool s_comInitializedOnFinalizerThread; - partial void PlatformSpecificInitialize(); // Platform-specific initialization of foreign threads, i.e. threads not created by Thread.Start @@ -175,7 +173,7 @@ private bool JoinInternal(int millisecondsTimeout) } } - private unsafe bool CreateThread(GCHandle thisThreadHandle) + private unsafe bool CreateThread(GCHandle thisThreadHandle) { const int AllocationGranularity = 0x10000; // 64 KiB @@ -197,7 +195,7 @@ private unsafe bool CreateThread(GCHandle thisThreadHandle) } _osHandle = Interop.Kernel32.CreateThread(IntPtr.Zero, (IntPtr)stackSize, - &ThreadEntryPoint, (IntPtr)thisThreadHandle, + &ThreadEntryPoint, GCHandle.ToIntPtr(thisThreadHandle), Interop.Kernel32.CREATE_SUSPENDED | Interop.Kernel32.STACK_SIZE_PARAM_IS_A_RESERVATION, out _); @@ -301,27 +299,11 @@ private void InitializeComOnNewThread() InitializeCom(_initialApartmentState); } - internal static void InitializeComForFinalizerThread() - { - InitializeCom(); - - // Prevent re-initialization of COM model on finalizer thread - t_comState |= ComState.Locked; - - s_comInitializedOnFinalizerThread = true; - } - private static void InitializeComForThreadPoolThread() { - // Initialized COM - take advantage of implicit MTA initialized by the finalizer thread - SpinWait sw = default(SpinWait); - while (!s_comInitializedOnFinalizerThread) - { - RuntimeImports.RhInitializeFinalizerThread(); - sw.SpinOnce(0); - } - - // Prevent re-initialization of COM model on threadpool threads + // Process-wide COM is initialized very early before any managed code can run. + // Assume it is done. + // Prevent re-initialization of COM model on threadpool threads from the default one. t_comState |= ComState.Locked; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.cs index dbe64d4381a3..3758fca9e8a0 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Thread.NativeAot.cs @@ -379,7 +379,7 @@ private void StartCore() } bool waitingForThreadStart = false; - GCHandle threadHandle = GCHandle.Alloc(this); + GCHandle threadHandle = new GCHandle(this); try { @@ -404,7 +404,7 @@ private void StartCore() Debug.Assert(!waitingForThreadStart, "Leaked threadHandle"); if (!waitingForThreadStart) { - threadHandle.Free(); + threadHandle.Dispose(); } } @@ -422,8 +422,7 @@ private void StartCore() private static void StartThread(IntPtr parameter) { - GCHandle threadHandle = (GCHandle)parameter; - Thread thread = (Thread)threadHandle.Target!; + Thread thread = GCHandle.FromIntPtr(parameter).Target; try { diff --git a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ExecutionEnvironmentImplementation.MappingTables.cs b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ExecutionEnvironmentImplementation.MappingTables.cs index 46dadfda18b7..2e1724d727ab 100644 --- a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ExecutionEnvironmentImplementation.MappingTables.cs +++ b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ExecutionEnvironmentImplementation.MappingTables.cs @@ -219,39 +219,17 @@ public sealed override unsafe bool TryGetConstructedGenericTypeForComponentsNoCo return TypeLoaderEnvironment.Instance.TryGetConstructedGenericTypeForComponents(genericTypeDefinitionHandle, genericTypeArgumentHandles, out runtimeTypeHandle); } - public sealed override MethodBaseInvoker TryGetMethodInvoker(RuntimeTypeHandle declaringTypeHandle, QMethodDefinition methodHandle, RuntimeTypeHandle[] genericMethodTypeArgumentHandles) + public sealed override void ValidateGenericMethodConstraints(MethodInfo method) { - MethodBase methodInfo = ExecutionDomain.GetMethod(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles); - - // Validate constraints first. This is potentially useless work if the method already exists, but it prevents bad - // inputs to reach the type loader (we don't have support to e.g. represent pointer types within the type loader) - if (genericMethodTypeArgumentHandles != null && genericMethodTypeArgumentHandles.Length > 0) - ConstraintValidator.EnsureSatisfiesClassConstraints((MethodInfo)methodInfo); - - MethodSignatureComparer methodSignatureComparer = new MethodSignatureComparer(methodHandle); + ConstraintValidator.EnsureSatisfiesClassConstraints(method); + } - MethodInvokeInfo methodInvokeInfo; -#if GENERICS_FORCE_USG - // Stress mode to force the usage of universal canonical method targets for reflection invokes. - // It is recommended to use "/SharedGenericsMode GenerateAllUniversalGenerics" NUTC command line argument when - // compiling the application in order to effectively use the GENERICS_FORCE_USG mode. + public sealed override MethodBaseInvoker TryGetMethodInvokerNoConstraintCheck(RuntimeTypeHandle declaringTypeHandle, QMethodDefinition methodHandle, RuntimeTypeHandle[] genericMethodTypeArgumentHandles) + { + MethodBase methodInfo = ExecutionDomain.GetMethod(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles); - // If we are just trying to invoke a non-generic method on a non-generic type, we won't force the universal lookup - if (!RuntimeAugments.IsGenericType(declaringTypeHandle) && (genericMethodTypeArgumentHandles == null || genericMethodTypeArgumentHandles.Length == 0)) - methodInvokeInfo = TryGetMethodInvokeInfo(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles, - methodInfo, ref methodSignatureComparer, CanonicalFormKind.Specific); - else - methodInvokeInfo = TryGetMethodInvokeInfo(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles, - methodInfo, ref methodSignatureComparer, CanonicalFormKind.Universal); -#else - methodInvokeInfo = TryGetMethodInvokeInfo(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles, - methodInfo, ref methodSignatureComparer, CanonicalFormKind.Specific); - - // If we failed to get a MethodInvokeInfo for an exact method, or a canonically equivalent method, check if there is a universal canonically - // equivalent entry that could be used (it will be much slower, and require a calling convention converter) - methodInvokeInfo ??= TryGetMethodInvokeInfo(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles, - methodInfo, ref methodSignatureComparer, CanonicalFormKind.Universal); -#endif + MethodInvokeInfo methodInvokeInfo = TryGetMethodInvokeInfo(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles, + methodInfo); if (methodInvokeInfo == null) return null; @@ -281,12 +259,10 @@ private static RuntimeTypeHandle[] GetTypeSequence(ref ExternalReferencesTable e return result; } - private static IntPtr TryGetVirtualResolveData(NativeFormatModuleInfo module, - RuntimeTypeHandle methodHandleDeclaringType, QMethodDefinition methodHandle, RuntimeTypeHandle[] genericArgs, - ref MethodSignatureComparer methodSignatureComparer) + private static IntPtr TryGetVirtualResolveData(RuntimeTypeHandle methodHandleDeclaringType, QMethodDefinition methodHandle, RuntimeTypeHandle[] genericArgs) { TypeLoaderEnvironment.VirtualResolveDataResult lookupResult; - bool success = TypeLoaderEnvironment.TryGetVirtualResolveData(module, methodHandleDeclaringType, genericArgs, ref methodSignatureComparer, out lookupResult); + bool success = TypeLoaderEnvironment.TryGetVirtualResolveData(methodHandleDeclaringType, methodHandle, genericArgs, out lookupResult); if (!success) return IntPtr.Zero; else @@ -312,16 +288,12 @@ private static IntPtr TryGetVirtualResolveData(NativeFormatModuleInfo module, /// Handle of method to look up /// Runtime handles of generic method arguments /// MethodInfo of method to look up - /// Helper structure used for comparing signatures - /// Requested canon form /// Constructed method invoke info, null on failure private static unsafe MethodInvokeInfo TryGetMethodInvokeInfo( RuntimeTypeHandle declaringTypeHandle, QMethodDefinition methodHandle, RuntimeTypeHandle[] genericMethodTypeArgumentHandles, - MethodBase methodInfo, - ref MethodSignatureComparer methodSignatureComparer, - CanonicalFormKind canonFormKind) + MethodBase methodInfo) { MethodInvokeMetadata methodInvokeMetadata; @@ -329,8 +301,6 @@ private static unsafe MethodInvokeInfo TryGetMethodInvokeInfo( declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles, - ref methodSignatureComparer, - canonFormKind, out methodInvokeMetadata)) { // Method invoke info not found @@ -352,9 +322,7 @@ private static unsafe MethodInvokeInfo TryGetMethodInvokeInfo( IntPtr resolver = IntPtr.Zero; if ((methodInvokeMetadata.InvokeTableFlags & InvokeTableFlags.HasVirtualInvoke) != 0) { - resolver = TryGetVirtualResolveData(ModuleList.Instance.GetModuleInfoForMetadataReader(methodHandle.NativeFormatReader), - declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles, - ref methodSignatureComparer); + resolver = TryGetVirtualResolveData(declaringTypeHandle, methodHandle, genericMethodTypeArgumentHandles); // Unable to find virtual resolution information, cannot return valid MethodInvokeInfo if (resolver == IntPtr.Zero) @@ -718,36 +686,13 @@ private unsafe bool TryGetMethodForOriginalLdFtnResult_InvokeMap_Inner(NativeFor declaringTypeHandle = GetExactDeclaringType(entryType, declaringTypeHandle); } - if ((entryFlags & InvokeTableFlags.HasMetadataHandle) != 0) - { - RuntimeTypeHandle declaringTypeHandleDefinition = GetTypeDefinition(declaringTypeHandle); - QTypeDefinition qTypeDefinition = GetMetadataForNamedType(declaringTypeHandleDefinition); + RuntimeTypeHandle declaringTypeHandleDefinition = GetTypeDefinition(declaringTypeHandle); + QTypeDefinition qTypeDefinition = GetMetadataForNamedType(declaringTypeHandleDefinition); - MethodHandle nativeFormatMethodHandle = - (((int)HandleType.Method << 25) | (int)entryMethodHandleOrNameAndSigRaw).AsMethodHandle(); + MethodHandle nativeFormatMethodHandle = + (((int)HandleType.Method << 25) | (int)entryMethodHandleOrNameAndSigRaw).AsMethodHandle(); - methodHandle = new QMethodDefinition(qTypeDefinition.NativeFormatReader, nativeFormatMethodHandle); - } - else - { -#if FEATURE_SHARED_LIBRARY - uint nameAndSigOffset = entryMethodHandleOrNameAndSigRaw; - MethodNameAndSignature nameAndSig; - if (!TypeLoaderEnvironment.Instance.TryGetMethodNameAndSignatureFromNativeLayoutOffset(mappingTableModule.Handle, nameAndSigOffset, out nameAndSig)) - { - Debug.Assert(false); - return false; - } - - if (!TypeLoaderEnvironment.Instance.TryGetMetadataForTypeMethodNameAndSignature(declaringTypeHandle, nameAndSig, out methodHandle)) - { - Debug.Assert(false); - return false; - } -#else - throw NotImplemented.ByDesign; -#endif - } + methodHandle = new QMethodDefinition(qTypeDefinition.NativeFormatReader, nativeFormatMethodHandle); return true; } @@ -777,8 +722,7 @@ public sealed override FieldAccessor TryGetFieldAccessor( { FieldAccessMetadata fieldAccessMetadata; - if (!TypeLoaderEnvironment.TryGetFieldAccessMetadata( - metadataReader, + if (!TypeLoaderEnvironment.TryGetFieldAccessMetadataFromFieldAccessMap( declaringTypeHandle, fieldHandle, out fieldAccessMetadata)) @@ -825,8 +769,6 @@ public sealed override FieldAccessor TryGetFieldAccessor( } else { - Debug.Assert((fieldAccessMetadata.Flags & FieldTableFlags.IsUniversalCanonicalEntry) == 0); - if (fieldBase != FieldTableFlags.NonGCStatic) { fieldOffset = fieldAccessMetadata.Offset; @@ -859,12 +801,7 @@ public sealed override FieldAccessor TryGetFieldAccessor( // public sealed override unsafe bool TryGetMethodFromHandle(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out QMethodDefinition methodHandle, out RuntimeTypeHandle[] genericMethodTypeArgumentHandles) { - MethodNameAndSignature nameAndSignature; - methodHandle = default(QMethodDefinition); - if (!TypeLoaderEnvironment.Instance.TryGetRuntimeMethodHandleComponents(runtimeMethodHandle, out declaringTypeHandle, out nameAndSignature, out genericMethodTypeArgumentHandles)) - return false; - - return TypeLoaderEnvironment.Instance.TryGetMetadataForTypeMethodNameAndSignature(declaringTypeHandle, nameAndSignature, out methodHandle); + return TypeLoaderEnvironment.Instance.TryGetRuntimeMethodHandleComponents(runtimeMethodHandle, out declaringTypeHandle, out methodHandle, out genericMethodTypeArgumentHandles); } // @@ -880,32 +817,7 @@ public sealed override bool TryGetMethodFromHandleAndType(RuntimeMethodHandle ru // public sealed override unsafe bool TryGetFieldFromHandle(RuntimeFieldHandle runtimeFieldHandle, out RuntimeTypeHandle declaringTypeHandle, out FieldHandle fieldHandle) { - fieldHandle = default(FieldHandle); - - string fieldName; - if (!TypeLoaderEnvironment.Instance.TryGetRuntimeFieldHandleComponents(runtimeFieldHandle, out declaringTypeHandle, out fieldName)) - return false; - - RuntimeTypeHandle metadataLookupTypeHandle = GetTypeDefinition(declaringTypeHandle); - - QTypeDefinition qTypeDefinition = GetMetadataForNamedType(metadataLookupTypeHandle); - - // TODO! Handle ecma style types - MetadataReader reader = qTypeDefinition.NativeFormatReader; - TypeDefinitionHandle typeDefinitionHandle = qTypeDefinition.NativeFormatHandle; - - TypeDefinition typeDefinition = typeDefinitionHandle.GetTypeDefinition(reader); - foreach (FieldHandle fh in typeDefinition.Fields) - { - Field field = fh.GetField(reader); - if (field.Name.StringEquals(fieldName, reader)) - { - fieldHandle = fh; - return true; - } - } - - return false; + return TypeLoaderEnvironment.Instance.TryGetRuntimeFieldHandleComponents(runtimeFieldHandle, out declaringTypeHandle, out fieldHandle); } // diff --git a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ExecutionEnvironmentImplementation.Runtime.cs b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ExecutionEnvironmentImplementation.Runtime.cs index f3e07c4f049f..9a96d8205d45 100644 --- a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ExecutionEnvironmentImplementation.Runtime.cs +++ b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ExecutionEnvironmentImplementation.Runtime.cs @@ -38,7 +38,7 @@ public sealed override void GetInterfaceMap(Type instanceType, [DynamicallyAcces goto notFound; } - MethodBase methodBase = ReflectionExecution.GetMethodBaseFromStartAddressIfAvailable(classRtMethodHandle); + MethodBase methodBase = ReflectionExecution.GetMethodBaseFromOriginalLdftnResult(classRtMethodHandle, instanceType.TypeHandle); if (methodBase == null) { goto notFound; @@ -90,12 +90,6 @@ public sealed override void GetEnumInfo(RuntimeTypeHandle typeHandle, out string out isFlags); return; } -#if ECMA_METADATA_SUPPORT - if (qTypeDefinition.IsEcmaFormatMetadataBased) - { - return EcmaFormatEnumInfo.Create(typeHandle, qTypeDefinition.EcmaFormatReader, qTypeDefinition.EcmaFormatHandle); - } -#endif names = Array.Empty(); values = Array.Empty(); isFlags = false; diff --git a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/MethodInvokers/MethodInvokerWithMethodInvokeInfo.cs b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/MethodInvokers/MethodInvokerWithMethodInvokeInfo.cs index 6bff16c340a9..ae02c0b4612f 100644 --- a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/MethodInvokers/MethodInvokerWithMethodInvokeInfo.cs +++ b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/MethodInvokers/MethodInvokerWithMethodInvokeInfo.cs @@ -42,17 +42,6 @@ internal static MethodBaseInvoker CreateMethodInvoker(RuntimeTypeHandle declarin if (0 != (methodAttributes & MethodAttributes.Static)) isStatic = true; } -#if ECMA_METADATA_SUPPORT - if (methodHandle.IsEcmaFormatMetadataBased) - { - var reader = methodHandle.EcmaFormatReader; - var method = reader.GetMethodDefinition(methodHandle.EcmaFormatHandle); - var blobReader = reader.GetBlobReader(method.Signature); - byte sigByte = blobReader.ReadByte(); - if ((sigByte & (byte)System.Reflection.Metadata.SignatureAttributes.Instance) == 0) - isStatic = true; - } -#endif if (isStatic) return new StaticMethodInvoker(methodInvokeInfo); diff --git a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ReflectionExecution.cs b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ReflectionExecution.cs index 71e470dd3836..f782e0f36dfb 100644 --- a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ReflectionExecution.cs +++ b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/ReflectionExecution.cs @@ -10,7 +10,7 @@ // // Internal.Reflection.Core.Execution has an abstract model // for an "execution engine" - this contract provides the -// concrete implementation of this model for Redhawk. +// concrete implementation of this model for NativeAOT. // // // Implemented by: @@ -18,7 +18,7 @@ // N/A on desktop: // // Consumed by: -// Redhawk app's directly via an under-the-hood ILTransform. +// NativeAOT app's directly via an under-the-hood ILTransform. // System.Private.CoreLib.dll, via a callback (see Internal.System.Runtime.Augment) // @@ -101,6 +101,17 @@ public static MethodBase GetMethodBaseFromStartAddressIfAvailable(IntPtr methodS return ExecutionDomain.GetMethod(declaringTypeHandle, qMethodDefinition, genericMethodTypeArgumentHandles: null); } + public static MethodBase GetMethodBaseFromOriginalLdftnResult(IntPtr methodStartAddress, RuntimeTypeHandle declaringTypeHandle) + { + if (!ExecutionEnvironment.TryGetMethodForOriginalLdFtnResult(methodStartAddress, + ref declaringTypeHandle, out QMethodDefinition qMethodDefinition, out RuntimeTypeHandle[] genericMethodTypeArgumentHandles)) + { + return null; + } + + return ExecutionDomain.GetMethod(declaringTypeHandle, qMethodDefinition, genericMethodTypeArgumentHandles); + } + internal static ExecutionEnvironmentImplementation ExecutionEnvironment { get; private set; } } } diff --git a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Extensions/NonPortable/DelegateMethodInfoRetriever.cs b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Extensions/NonPortable/DelegateMethodInfoRetriever.cs index 5669203a6f39..9bb61c5cfb90 100644 --- a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Extensions/NonPortable/DelegateMethodInfoRetriever.cs +++ b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Extensions/NonPortable/DelegateMethodInfoRetriever.cs @@ -47,7 +47,7 @@ public static MethodInfo GetDelegateMethodInfo(Delegate del) callTryGetMethod = false; methodHandle = QMethodDefinition.FromObjectAndInt(resolver->Reader, resolver->Handle); - if (!TypeLoaderEnvironment.Instance.TryGetRuntimeMethodHandleComponents(resolver->GVMMethodHandle, out _, out _, out genericMethodTypeArgumentHandles)) + if (!TypeLoaderEnvironment.Instance.TryGetRuntimeMethodHandleComponents(resolver->GVMMethodHandle, out _, out QMethodDefinition dummy, out genericMethodTypeArgumentHandles)) throw new NotSupportedException(SR.DelegateGetMethodInfo_NoInstantiation); } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Reflection/Execution/AssemblyBinderImplementation.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Reflection/Execution/AssemblyBinderImplementation.cs index 5700f56e812d..fba014a24159 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Reflection/Execution/AssemblyBinderImplementation.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Reflection/Execution/AssemblyBinderImplementation.cs @@ -25,48 +25,23 @@ public sealed partial class AssemblyBinderImplementation : AssemblyBinder { private AssemblyBinderImplementation() { - _scopeGroups = Array.Empty>(); - + ArrayBuilder> scopes = default; foreach (NativeFormatModuleInfo module in ModuleList.EnumerateModules()) - RegisterModule(module); - } - - public static AssemblyBinderImplementation Instance { get; } = new AssemblyBinderImplementation(); - - partial void BindEcmaFilePath(string assemblyPath, ref AssemblyBindResult bindResult, ref Exception exception, ref bool? result); - partial void BindEcmaBytes(ReadOnlySpan rawAssembly, ReadOnlySpan rawSymbolStore, ref AssemblyBindResult bindResult, ref Exception exception, ref bool? result); - partial void BindEcmaAssemblyName(RuntimeAssemblyName refName, bool cacheMissedLookups, ref AssemblyBindResult result, ref Exception exception, ref Exception preferredException, ref bool resultBoolean); - partial void InsertEcmaLoadedAssemblies(List loadedAssemblies); - - public sealed override bool Bind(string assemblyPath, out AssemblyBindResult bindResult, out Exception exception) - { - bool? result = null; - exception = null; - bindResult = default(AssemblyBindResult); - - BindEcmaFilePath(assemblyPath, ref bindResult, ref exception, ref result); + { + MetadataReader reader = module.MetadataReader; + foreach (ScopeDefinitionHandle scopeDefinitionHandle in reader.ScopeDefinitions) + { + scopes.Add(new KeyValuePair( + scopeDefinitionHandle.ToRuntimeAssemblyName(reader), + new QScopeDefinition(reader, scopeDefinitionHandle) + )); + } + } - // If the Ecma assembly binder isn't linked in, simply throw PlatformNotSupportedException - if (!result.HasValue) - throw new PlatformNotSupportedException(); - else - return result.Value; + ScopeGroups = scopes.ToArray(); } - public sealed override bool Bind(ReadOnlySpan rawAssembly, ReadOnlySpan rawSymbolStore, out AssemblyBindResult bindResult, out Exception exception) - { - bool? result = null; - exception = null; - bindResult = default(AssemblyBindResult); - - BindEcmaBytes(rawAssembly, rawSymbolStore, ref bindResult, ref exception, ref result); - - // If the Ecma assembly binder isn't linked in, simply throw PlatformNotSupportedException - if (!result.HasValue) - throw new PlatformNotSupportedException(); - else - return result.Value; - } + public static AssemblyBinderImplementation Instance { get; } = new AssemblyBinderImplementation(); public sealed override bool Bind(RuntimeAssemblyName refName, bool cacheMissedLookups, out AssemblyBindResult result, out Exception exception) { @@ -76,7 +51,7 @@ public sealed override bool Bind(RuntimeAssemblyName refName, bool cacheMissedLo Exception preferredException = null; - foreach (KeyValuePair group in ScopeGroups) + foreach (KeyValuePair group in ScopeGroups) { if (AssemblyNameMatches(refName, group.Key, ref preferredException)) { @@ -87,18 +62,13 @@ public sealed override bool Bind(RuntimeAssemblyName refName, bool cacheMissedLo } foundMatch = true; - ScopeDefinitionGroup scopeDefinitionGroup = group.Value; + QScopeDefinition scopeDefinitionGroup = group.Value; - result.Reader = scopeDefinitionGroup.CanonicalScope.Reader; - result.ScopeDefinitionHandle = scopeDefinitionGroup.CanonicalScope.Handle; - result.OverflowScopes = scopeDefinitionGroup.OverflowScopes; + result.Reader = scopeDefinitionGroup.Reader; + result.ScopeDefinitionHandle = scopeDefinitionGroup.Handle; } } - BindEcmaAssemblyName(refName, cacheMissedLookups, ref result, ref exception, ref preferredException, ref foundMatch); - if (exception != null) - return false; - if (!foundMatch) { exception = preferredException ?? new FileNotFoundException(SR.Format(SR.FileNotFound_AssemblyNotFound, refName.FullName)); @@ -111,19 +81,16 @@ public sealed override bool Bind(RuntimeAssemblyName refName, bool cacheMissedLo public sealed override IList GetLoadedAssemblies() { List loadedAssemblies = new List(ScopeGroups.Length); - foreach (KeyValuePair group in ScopeGroups) + foreach (KeyValuePair group in ScopeGroups) { - ScopeDefinitionGroup scopeDefinitionGroup = group.Value; + QScopeDefinition scopeDefinitionGroup = group.Value; AssemblyBindResult result = default(AssemblyBindResult); - result.Reader = scopeDefinitionGroup.CanonicalScope.Reader; - result.ScopeDefinitionHandle = scopeDefinitionGroup.CanonicalScope.Handle; - result.OverflowScopes = scopeDefinitionGroup.OverflowScopes; + result.Reader = scopeDefinitionGroup.Reader; + result.ScopeDefinitionHandle = scopeDefinitionGroup.Handle; loadedAssemblies.Add(result); } - InsertEcmaLoadedAssemblies(loadedAssemblies); - return loadedAssemblies; } @@ -188,86 +155,6 @@ private static bool AssemblyVersionMatches(Version refVersion, Version defVersio return true; } - /// - /// This callback gets called whenever a module gets registered. It adds the metadata reader - /// for the new module to the available scopes. The lock in ExecutionEnvironmentImplementation ensures - /// that this function may never be called concurrently so that we can assume that two threads - /// never update the reader and scope list at the same time. - /// - /// Module to register - private void RegisterModule(NativeFormatModuleInfo nativeFormatModuleInfo) - { - LowLevelDictionaryWithIEnumerable scopeGroups = new LowLevelDictionaryWithIEnumerable(); - foreach (KeyValuePair oldGroup in _scopeGroups) - { - scopeGroups.Add(oldGroup.Key, oldGroup.Value); - } - AddScopesFromReaderToGroups(scopeGroups, nativeFormatModuleInfo.MetadataReader); - - // Update reader and scope list - KeyValuePair[] scopeGroupsArray = new KeyValuePair[scopeGroups.Count]; - int i = 0; - foreach (KeyValuePair data in scopeGroups) - { - scopeGroupsArray[i] = data; - i++; - } - - _scopeGroups = scopeGroupsArray; - } - - private KeyValuePair[] ScopeGroups - { - get - { - return _scopeGroups; - } - } - - private static void AddScopesFromReaderToGroups(LowLevelDictionaryWithIEnumerable groups, MetadataReader reader) - { - foreach (ScopeDefinitionHandle scopeDefinitionHandle in reader.ScopeDefinitions) - { - RuntimeAssemblyName defName = scopeDefinitionHandle.ToRuntimeAssemblyName(reader); - ScopeDefinitionGroup scopeDefinitionGroup; - if (groups.TryGetValue(defName, out scopeDefinitionGroup)) - { - scopeDefinitionGroup.AddOverflowScope(new QScopeDefinition(reader, scopeDefinitionHandle)); - } - else - { - scopeDefinitionGroup = new ScopeDefinitionGroup(new QScopeDefinition(reader, scopeDefinitionHandle)); - groups.Add(defName, scopeDefinitionGroup); - } - } - } - - private volatile KeyValuePair[] _scopeGroups; - - private class ScopeDefinitionGroup - { - public ScopeDefinitionGroup(QScopeDefinition canonicalScope) - { - _canonicalScope = canonicalScope; - } - - public QScopeDefinition CanonicalScope { get { return _canonicalScope; } } - - public IEnumerable OverflowScopes - { - get - { - return _overflowScopes.ToArray(); - } - } - - public void AddOverflowScope(QScopeDefinition overflowScope) - { - _overflowScopes.Add(overflowScope); - } - - private readonly QScopeDefinition _canonicalScope; - private ArrayBuilder _overflowScopes; - } + private KeyValuePair[] ScopeGroups { get; } } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/CanonicallyEquivalentEntryLocator.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/CanonicallyEquivalentEntryLocator.cs index 1b1082f3132f..7efdc51725fb 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/CanonicallyEquivalentEntryLocator.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/CanonicallyEquivalentEntryLocator.cs @@ -17,9 +17,8 @@ public struct CanonicallyEquivalentEntryLocator private RuntimeTypeHandle _genericDefinition; private RuntimeTypeHandle[] _genericArgs; private DefType _defType; - private CanonicalFormKind _canonKind; - public CanonicallyEquivalentEntryLocator(RuntimeTypeHandle typeToFind, CanonicalFormKind kind) + public CanonicallyEquivalentEntryLocator(RuntimeTypeHandle typeToFind) { if (RuntimeAugments.IsGenericType(typeToFind)) { @@ -32,16 +31,14 @@ public CanonicallyEquivalentEntryLocator(RuntimeTypeHandle typeToFind, Canonical } _typeToFind = typeToFind; - _canonKind = kind; _defType = null; } - internal CanonicallyEquivalentEntryLocator(DefType typeToFind, CanonicalFormKind kind) + internal CanonicallyEquivalentEntryLocator(DefType typeToFind) { _genericArgs = null; _genericDefinition = default(RuntimeTypeHandle); _typeToFind = default(RuntimeTypeHandle); - _canonKind = kind; _defType = typeToFind; } @@ -50,10 +47,10 @@ public int LookupHashCode get { if (_defType != null) - return _defType.ConvertToCanonForm(_canonKind).GetHashCode(); + return _defType.ConvertToCanonForm(CanonicalFormKind.Specific).GetHashCode(); if (!_genericDefinition.IsNull()) - return TypeLoaderEnvironment.Instance.GetCanonicalHashCode(_typeToFind, _canonKind); + return TypeLoaderEnvironment.Instance.GetCanonicalHashCode(_typeToFind, CanonicalFormKind.Specific); else return _typeToFind.GetHashCode(); } @@ -63,9 +60,9 @@ public bool IsCanonicallyEquivalent(RuntimeTypeHandle other) { if (_defType != null) { - TypeDesc typeToFindAsCanon = _defType.ConvertToCanonForm(_canonKind); + TypeDesc typeToFindAsCanon = _defType.ConvertToCanonForm(CanonicalFormKind.Specific); TypeDesc otherTypeAsTypeDesc = _defType.Context.ResolveRuntimeTypeHandle(other); - TypeDesc otherTypeAsCanon = otherTypeAsTypeDesc.ConvertToCanonForm(_canonKind); + TypeDesc otherTypeAsCanon = otherTypeAsTypeDesc.ConvertToCanonForm(CanonicalFormKind.Specific); return typeToFindAsCanon == otherTypeAsCanon; } @@ -77,7 +74,7 @@ public bool IsCanonicallyEquivalent(RuntimeTypeHandle other) RuntimeTypeHandle[] otherGenericArgs; otherGenericDefinition = RuntimeAugments.GetGenericInstantiation(other, out otherGenericArgs); - return _genericDefinition.Equals(otherGenericDefinition) && TypeLoaderEnvironment.Instance.CanInstantiationsShareCode(_genericArgs, otherGenericArgs, _canonKind); + return _genericDefinition.Equals(otherGenericDefinition) && TypeLoaderEnvironment.Instance.CanInstantiationsShareCode(_genericArgs, otherGenericArgs, CanonicalFormKind.Specific); } else return false; diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/EETypeCreator.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/EETypeCreator.cs index b49eec372a1f..cccb25742a8a 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/EETypeCreator.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/EETypeCreator.cs @@ -216,8 +216,6 @@ private static void CreateEETypeWorker(MethodTable* pTemplateEEType, uint hashCo if (state.ThreadDataSize != 0) dynamicTypeFlags |= DynamicTypeFlags.HasThreadStatics; - // Note: The number of vtable slots on the MethodTable to create is not necessary equal to the number of - // vtable slots on the template type for universal generics (see ComputeVTableLayout) ushort numVtableSlots = state.NumVTableSlots; // Compute the MethodTable size and allocate it @@ -401,27 +399,20 @@ private static void CreateInstanceGCDesc(TypeBuilderState state, MethodTable* pT pEEType->ContainsGCPointers = false; } } - else if (gcBitfield != null) + else { - if (cbGCDesc != 0) + Debug.Assert(gcBitfield == null); + + if (pTemplateEEType != null) { - pEEType->ContainsGCPointers = true; - CreateGCDesc(gcBitfield, baseSize, isValueType, false, ((void**)pEEType) - 1); + Buffer.MemoryCopy((byte*)pTemplateEEType - cbGCDesc, (byte*)pEEType - cbGCDesc, cbGCDesc, cbGCDesc); + pEEType->ContainsGCPointers = pTemplateEEType->ContainsGCPointers; } else { pEEType->ContainsGCPointers = false; } } - else if (pTemplateEEType != null) - { - Buffer.MemoryCopy((byte*)pTemplateEEType - cbGCDesc, (byte*)pEEType - cbGCDesc, cbGCDesc, cbGCDesc); - pEEType->ContainsGCPointers = pTemplateEEType->ContainsGCPointers; - } - else - { - pEEType->ContainsGCPointers = false; - } } private static unsafe int GetInstanceGCDescSize(TypeBuilderState state, MethodTable* pTemplateEEType, bool isValueType, bool isArray) @@ -444,24 +435,24 @@ private static unsafe int GetInstanceGCDescSize(TypeBuilderState state, MethodTa return series > 0 ? (series + 2) * IntPtr.Size : 0; } } - else if (gcBitfield != null) - { - int series = CreateGCDesc(gcBitfield, 0, isValueType, false, null); - return series > 0 ? (series * 2 + 1) * IntPtr.Size : 0; - } - else if (pTemplateEEType != null) - { - return RuntimeAugments.GetGCDescSize(pTemplateEEType->ToRuntimeTypeHandle()); - } else { - return 0; + Debug.Assert(gcBitfield == null); + + if (pTemplateEEType != null) + { + return RuntimeAugments.GetGCDescSize(pTemplateEEType->ToRuntimeTypeHandle()); + } + else + { + return 0; + } } } - private static bool IsAllGCPointers(LowLevelList bitfield) + private static bool IsAllGCPointers(bool[] bitfield) { - int count = bitfield.Count; + int count = bitfield.Length; Debug.Assert(count > 0); for (int i = 0; i < count; i++) @@ -473,7 +464,7 @@ private static bool IsAllGCPointers(LowLevelList bitfield) return true; } - private static unsafe int CreateArrayGCDesc(LowLevelList bitfield, int rank, bool isSzArray, void* gcdesc) + private static unsafe int CreateArrayGCDesc(bool[] bitfield, int rank, bool isSzArray, void* gcdesc) { if (bitfield == null) return 0; @@ -497,7 +488,7 @@ private static unsafe int CreateArrayGCDesc(LowLevelList bitfield, int ran int first = -1; int last = 0; short numPtrs = 0; - while (i < bitfield.Count) + while (i < bitfield.Length) { if (bitfield[i]) { @@ -515,7 +506,7 @@ private static unsafe int CreateArrayGCDesc(LowLevelList bitfield, int ran numSeries++; numPtrs = 0; - while ((i < bitfield.Count) && (bitfield[i])) + while ((i < bitfield.Length) && (bitfield[i])) { numPtrs++; i++; @@ -533,7 +524,7 @@ private static unsafe int CreateArrayGCDesc(LowLevelList bitfield, int ran { if (numSeries > 0) { - *ptr-- = (short)((first + bitfield.Count - last) * IntPtr.Size); + *ptr-- = (short)((first + bitfield.Length - last) * IntPtr.Size); *ptr-- = numPtrs; *(void**)gcdesc = (void*)-numSeries; @@ -544,69 +535,6 @@ private static unsafe int CreateArrayGCDesc(LowLevelList bitfield, int ran return numSeries; } - private static unsafe int CreateGCDesc(LowLevelList bitfield, int size, bool isValueType, bool isStatic, void* gcdesc) - { - int offs = 0; - // if this type is a class we have to account for the gcdesc. - if (isValueType) - offs = IntPtr.Size; - - if (bitfield == null) - return 0; - - void** ptr = (void**)gcdesc - 1; - - int* staticPtr = isStatic ? ((int*)gcdesc + 1) : null; - - int numSeries = 0; - int i = 0; - while (i < bitfield.Count) - { - if (bitfield[i]) - { - numSeries++; - int seriesOffset = i * IntPtr.Size + offs; - int seriesSize = 0; - - while ((i < bitfield.Count) && (bitfield[i])) - { - seriesSize += IntPtr.Size; - i++; - } - - - if (gcdesc != null) - { - if (staticPtr != null) - { - *staticPtr++ = seriesSize; - *staticPtr++ = seriesOffset; - } - else - { - seriesSize -= size; - *ptr-- = (void*)seriesOffset; - *ptr-- = (void*)seriesSize; - } - } - } - else - { - i++; - } - } - - if (gcdesc != null) - { - if (staticPtr != null) - *(int*)gcdesc = numSeries; - else - *(void**)gcdesc = (void*)numSeries; - } - - return numSeries; - } - public static RuntimeTypeHandle CreateFunctionPointerEEType(uint hashCodeOfNewType, RuntimeTypeHandle returnTypeHandle, RuntimeTypeHandle[] parameterHandles, FunctionPointerType functionPointerType) { TypeBuilderState state = new TypeBuilderState(functionPointerType); diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionaryCell.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionaryCell.cs index 215a0f513252..eb911d4037af 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionaryCell.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionaryCell.cs @@ -242,9 +242,6 @@ internal override unsafe void Prepare(TypeBuilder builder) internal override IntPtr Create(TypeBuilder builder) { - // TODO (USG): What if this method's instantiation is a non-shareable one (from a normal canonical - // perspective) and there's an exact method pointer for the method in question, do we still - // construct a method dictionary to be used with the universal canonical method implementation? Debug.Assert(GenericMethod.RuntimeMethodDictionary != IntPtr.Zero); return GenericMethod.RuntimeMethodDictionary; } @@ -253,7 +250,7 @@ internal override IntPtr Create(TypeBuilder builder) private class FieldLdTokenCell : GenericDictionaryCell { internal TypeDesc ContainingType; - internal IntPtr FieldName; + internal int FieldHandle; internal override unsafe void Prepare(TypeBuilder builder) { @@ -267,7 +264,7 @@ internal override unsafe IntPtr Create(TypeBuilder builder) { RuntimeFieldHandle handle = TypeLoaderEnvironment.Instance.GetRuntimeFieldHandleForComponents( builder.GetRuntimeTypeHandle(ContainingType), - FieldName); + FieldHandle); return *(IntPtr*)&handle; } @@ -276,8 +273,6 @@ internal override unsafe IntPtr Create(TypeBuilder builder) private class MethodLdTokenCell : GenericDictionaryCell { internal MethodDesc Method; - internal IntPtr MethodName; - internal RuntimeSignature MethodSignature; internal override unsafe void Prepare(TypeBuilder builder) { @@ -301,8 +296,7 @@ internal override unsafe IntPtr Create(TypeBuilder builder) RuntimeMethodHandle handle = TypeLoaderEnvironment.Instance.GetRuntimeMethodHandleForComponents( builder.GetRuntimeTypeHandle(Method.OwningType), - MethodName, - MethodSignature, + Method.NameAndSignature.Handle, genericArgHandles); return *(IntPtr*)&handle; @@ -467,30 +461,22 @@ internal static GenericDictionaryCell ParseAndCreateCell(NativeLayoutInfoLoadCon case FixupSignatureKind.FieldLdToken: { - NativeParser ldtokenSigParser = parser.GetParserFromRelativeOffset(); - - var type = nativeLayoutInfoLoadContext.GetType(ref ldtokenSigParser); - IntPtr fieldNameSig = ldtokenSigParser.Reader.OffsetToAddress(ldtokenSigParser.Offset); - TypeLoaderLogger.WriteLine("LdToken on: " + type.ToString() + "." + ldtokenSigParser.GetString()); + var type = nativeLayoutInfoLoadContext.GetType(ref parser); + int handle = (int)parser.GetUnsigned(); + TypeLoaderLogger.WriteLine("LdToken on: " + type.ToString() + "." + handle.LowLevelToString()); - cell = new FieldLdTokenCell() { FieldName = fieldNameSig, ContainingType = type }; + cell = new FieldLdTokenCell() { FieldHandle = handle, ContainingType = type }; } break; case FixupSignatureKind.MethodLdToken: { - NativeParser ldtokenSigParser = parser.GetParserFromRelativeOffset(); - - RuntimeSignature methodNameSig; - RuntimeSignature methodSig; - var method = nativeLayoutInfoLoadContext.GetMethod(ref ldtokenSigParser, out methodNameSig, out methodSig); - TypeLoaderLogger.WriteLine("LdToken on: " + method.OwningType.ToString() + "::" + method.NameAndSignature.Name); + var method = nativeLayoutInfoLoadContext.GetMethod(ref parser); + TypeLoaderLogger.WriteLine("LdToken on: " + method.OwningType.ToString() + "::" + method.NameAndSignature.GetName()); cell = new MethodLdTokenCell { Method = method, - MethodName = methodNameSig.NativeLayoutSignature(), - MethodSignature = methodSig }; } break; @@ -515,7 +501,7 @@ internal static GenericDictionaryCell ParseAndCreateCell(NativeLayoutInfoLoadCon case FixupSignatureKind.Method: { - var method = nativeLayoutInfoLoadContext.GetMethod(ref parser, out _, out _); + var method = nativeLayoutInfoLoadContext.GetMethod(ref parser); TypeLoaderLogger.WriteLine("Method: " + method.ToString()); cell = new MethodCell @@ -544,9 +530,7 @@ internal static GenericDictionaryCell ParseAndCreateCell(NativeLayoutInfoLoadCon case FixupSignatureKind.GenericStaticConstrainedMethod: { TypeDesc constraintType = nativeLayoutInfoLoadContext.GetType(ref parser); - - NativeParser ldtokenSigParser = parser.GetParserFromRelativeOffset(); - MethodDesc constrainedMethod = nativeLayoutInfoLoadContext.GetMethod(ref ldtokenSigParser); + MethodDesc constrainedMethod = nativeLayoutInfoLoadContext.GetMethod(ref parser); TypeLoaderLogger.WriteLine("GenericStaticConstrainedMethod: " + constraintType.ToString() + " Method " + constrainedMethod.ToString()); diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/MethodTable.Runtime.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/MethodTable.Runtime.cs index 49b75a220bf0..5826be0d3e6f 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/MethodTable.Runtime.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/MethodTable.Runtime.cs @@ -18,9 +18,9 @@ internal partial struct MethodTable return MethodTable.Of(); } - internal unsafe RuntimeTypeHandle ToRuntimeTypeHandle() + internal readonly unsafe RuntimeTypeHandle ToRuntimeTypeHandle() { - IntPtr result = (IntPtr)Unsafe.AsPointer(ref this); + IntPtr result = (IntPtr)Unsafe.AsPointer(in this); return *(RuntimeTypeHandle*)&result; } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/ModuleList.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/ModuleList.cs index c484eb6e5afd..f4353f9db8f5 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/ModuleList.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/ModuleList.cs @@ -395,22 +395,4 @@ public static NativeFormatModuleInfoEnumerable EnumerateModules(TypeManagerHandl return new NativeFormatModuleInfoEnumerable(Instance._loadedModuleMap, preferredModule); } } - - public static partial class RuntimeSignatureHelper - { - public static ModuleInfo GetModuleInfo(this Internal.Runtime.CompilerServices.RuntimeSignature methodSignature) - { - if (methodSignature.IsNativeLayoutSignature) - { - return ModuleList.Instance.GetModuleInfoByHandle(new TypeManagerHandle(methodSignature.ModuleHandle)); - } - else - { - ModuleInfo moduleInfo; - bool success = ModuleList.Instance.TryGetModuleInfoByHandle(new TypeManagerHandle(methodSignature.ModuleHandle), out moduleInfo); - Debug.Assert(success); - return moduleInfo; - } - } - } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/NativeLayoutInfoLoadContext.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/NativeLayoutInfoLoadContext.cs index 90a2c9857c6d..e7e798a7e672 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/NativeLayoutInfoLoadContext.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/NativeLayoutInfoLoadContext.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Reflection; +using System.Reflection.Runtime.General; using Internal.NativeFormat; using Internal.Runtime; @@ -175,7 +176,7 @@ internal TypeDesc GetType(ref NativeParser parser) } } - internal MethodDesc GetMethod(ref NativeParser parser, out RuntimeSignature methodNameSig, out RuntimeSignature methodSig) + internal MethodDesc GetMethod(ref NativeParser parser) { MethodFlags flags = (MethodFlags)parser.GetUnsigned(); @@ -184,7 +185,8 @@ internal MethodDesc GetMethod(ref NativeParser parser, out RuntimeSignature meth functionPointer = GetExternalReferencePointer(parser.GetUnsigned()); DefType containingType = (DefType)GetType(ref parser); - MethodNameAndSignature nameAndSignature = TypeLoaderEnvironment.GetMethodNameAndSignature(ref parser, _module.Handle, out methodNameSig, out methodSig); + int token = (int)parser.GetUnsigned(); + MethodNameAndSignature nameAndSignature = new MethodNameAndSignature(_module.MetadataReader, token.AsHandle().ToMethodHandle(_module.MetadataReader)); bool unboxingStub = (flags & MethodFlags.IsUnboxingStub) != 0; @@ -208,11 +210,6 @@ internal MethodDesc GetMethod(ref NativeParser parser, out RuntimeSignature meth return retVal; } - internal MethodDesc GetMethod(ref NativeParser parser) - { - return GetMethod(ref parser, out _, out _); - } - internal TypeDesc[] GetTypeSequence(ref NativeParser parser) { uint count = parser.GetSequenceCount(); diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TemplateLocator.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TemplateLocator.cs index d3fd2dd8b24a..07c322552dc8 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TemplateLocator.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TemplateLocator.cs @@ -56,10 +56,6 @@ private static TypeDesc TryGetTypeTemplate_Internal(TypeDesc concreteType, Canon continue; } - Debug.Assert( - (kind != CanonicalFormKind.Universal) || - (kind == CanonicalFormKind.Universal && candidateTemplate == candidateTemplate.ConvertToCanonForm(kind))); - nativeLayoutInfoModule = moduleInfo; return candidateTemplate; } @@ -126,10 +122,6 @@ private static InstantiatedMethod TryGetGenericMethodTemplate_Internal(Instantia continue; } - Debug.Assert( - (kind != CanonicalFormKind.Universal) || - (kind == CanonicalFormKind.Universal && candidateTemplate == candidateTemplate.GetCanonMethodTarget(kind))); - return candidateTemplate; } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilder.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilder.cs index 5d817bff72ed..6f8b02d93ead 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilder.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilder.cs @@ -13,18 +13,6 @@ namespace Internal.Runtime.TypeLoader { - internal static class LowLevelListExtensions - { - public static void Expand(this LowLevelList list, int count) - { - if (list.Capacity < count) - list.Capacity = count; - - while (list.Count < count) - list.Add(default(T)); - } - } - internal class TypeBuilder { public TypeBuilder() @@ -322,10 +310,6 @@ internal void ParseNativeLayoutInfo(InstantiatedMethod method) } } - // Ensure that if this method is non-shareable from a normal canonical perspective, then - // its template MUST be a universal canonical template method - Debug.Assert(!method.IsNonSharableMethod || (method.IsNonSharableMethod && templateMethod.IsCanonicalMethod(CanonicalFormKind.Universal))); - NativeReader nativeLayoutInfoReader = TypeLoaderEnvironment.GetNativeLayoutInfoReader(nativeLayoutModule.Handle); var methodInfoParser = new NativeParser(nativeLayoutInfoReader, nativeLayoutInfoToken); @@ -348,7 +332,7 @@ internal void ParseNativeLayoutInfo(InstantiatedMethod method) break; default: - Debug.Fail("Unexpected BagElementKind for generic method with name " + method.NameAndSignature.Name + "! Only BagElementKind.DictionaryLayout should appear."); + Debug.Fail("Unexpected BagElementKind for generic method with name " + method.NameAndSignature.GetName() + "! Only BagElementKind.DictionaryLayout should appear."); throw new BadImageFormatException(); } } @@ -361,12 +345,6 @@ internal void ParseNativeLayoutInfo(TypeBuilderState state, TypeDesc type) { TypeLoaderLogger.WriteLine("Parsing NativeLayoutInfo for type " + type.ToString() + " ..."); - bool isTemplateUniversalCanon = false; - if (state.TemplateType != null) - { - isTemplateUniversalCanon = state.TemplateType.IsCanonicalSubtype(CanonicalFormKind.Universal); - } - if (state.TemplateType == null) { throw new MissingTemplateException(); @@ -427,15 +405,8 @@ internal void ParseNativeLayoutInfo(TypeBuilderState state, TypeDesc type) state.ThreadStaticDesc = context.GetGCStaticInfo(typeInfoParser.GetUnsigned()); break; - case BagElementKind.FieldLayout: - TypeLoaderLogger.WriteLine("Found BagElementKind.FieldLayout"); - typeInfoParser.SkipInteger(); // Handled in type layout algorithm - break; - case BagElementKind.DictionaryLayout: TypeLoaderLogger.WriteLine("Found BagElementKind.DictionaryLayout"); - Debug.Assert(!isTemplateUniversalCanon, "Universal template nativelayout do not have DictionaryLayout"); - Debug.Assert(state.Dictionary == null); if (!state.TemplateType.RetrieveRuntimeTypeHandleIfPossible()) { @@ -464,24 +435,22 @@ internal void ParseNativeLayoutInfo(TypeBuilderState state, TypeDesc type) /// internal unsafe struct GCLayout { - private LowLevelList _bitfield; + private bool[] _bitfield; private unsafe void* _gcdesc; private int _size; - private bool _isReferenceTypeGCLayout; public static GCLayout None { get { return default(GCLayout); } } - public static GCLayout SingleReference { get; } = new GCLayout(new LowLevelList(new bool[1] { true }), false); + public static GCLayout SingleReference { get; } = new GCLayout([true]); public bool IsNone { get { return _bitfield == null && _gcdesc == null; } } - public GCLayout(LowLevelList bitfield, bool isReferenceTypeGCLayout) + public GCLayout(bool[] bitfield) { Debug.Assert(bitfield != null); _bitfield = bitfield; _gcdesc = null; _size = 0; - _isReferenceTypeGCLayout = isReferenceTypeGCLayout; } public GCLayout(RuntimeTypeHandle rtth) @@ -490,37 +459,27 @@ public GCLayout(RuntimeTypeHandle rtth) Debug.Assert(MethodTable != null); _bitfield = null; - _isReferenceTypeGCLayout = false; // This field is only used for the LowLevelList path _gcdesc = MethodTable->ContainsGCPointers ? (void**)MethodTable - 1 : null; _size = (int)MethodTable->BaseSize; } /// - /// Writes this layout to the given bitfield. + /// Gets this layout in bitfield array. /// - /// The bitfield to write a layout to (may be null, at which - /// point it will be created and assigned). - /// The offset at which we need to write the bitfield. - public void WriteToBitfield(LowLevelList bitfield, int offset) + /// The layout in bitfield. + public bool[] AsBitfield() { - ArgumentNullException.ThrowIfNull(bitfield); - - if (IsNone) - return; + // This method should only be called when not none. + Debug.Assert(!IsNone); // Ensure exactly one of these two are set. Debug.Assert(_gcdesc != null ^ _bitfield != null); - if (_bitfield != null) - MergeBitfields(bitfield, offset); - else - WriteGCDescToBitfield(bitfield, offset); + return _bitfield ?? WriteGCDescToBitfield(); } - private unsafe void WriteGCDescToBitfield(LowLevelList bitfield, int offset) + private unsafe bool[] WriteGCDescToBitfield() { - int startIndex = offset / IntPtr.Size; - void** ptr = (void**)_gcdesc; Debug.Assert(_gcdesc != null); @@ -529,8 +488,8 @@ private unsafe void WriteGCDescToBitfield(LowLevelList bitfield, int offse Debug.Assert(count >= 0); // Ensure capacity for the values we are about to write - int capacity = startIndex + _size / IntPtr.Size - 2; - bitfield.Expand(capacity); + int capacity = _size / IntPtr.Size - 2; + bool[] bitfield = new bool[capacity]; while (count-- >= 0) { @@ -541,35 +500,10 @@ private unsafe void WriteGCDescToBitfield(LowLevelList bitfield, int offse Debug.Assert(offs >= 0); for (int i = 0; i < len; i++) - bitfield[startIndex + offs + i] = true; + bitfield[offs + i] = true; } - } - - private void MergeBitfields(LowLevelList outputBitfield, int offset) - { - int startIndex = offset / IntPtr.Size; - - // These routines represent the GC layout after the MethodTable pointer - // in an object, but the LowLevelList bitfield logically contains - // the EETypepointer if it is describing a reference type. So, skip the - // first value. - int itemsToSkip = _isReferenceTypeGCLayout ? 1 : 0; - - // Assert that we only skip a non-reported pointer. - Debug.Assert(itemsToSkip == 0 || _bitfield[0] == false); - // Ensure capacity for the values we are about to write - int capacity = startIndex + _bitfield.Count - itemsToSkip; - outputBitfield.Expand(capacity); - - - for (int i = itemsToSkip; i < _bitfield.Count; i++) - { - // We should never overwrite a TRUE value in the table. - Debug.Assert(!outputBitfield[startIndex + i - itemsToSkip] || _bitfield[i]); - - outputBitfield[startIndex + i - itemsToSkip] = _bitfield[i]; - } + return bitfield; } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilderState.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilderState.cs index b184395c2a51..f351a1285724 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilderState.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilderState.cs @@ -85,7 +85,7 @@ public TypeDesc TemplateType _templateTypeLoaderNativeLayout = true; _templateComputed = true; - if ((_templateType != null) && !_templateType.IsCanonicalSubtype(CanonicalFormKind.Universal)) + if (_templateType != null) _nativeLayoutTokenComputed = true; } @@ -267,7 +267,6 @@ private ushort ComputeNumVTableSlots() } else { - // This should only happen for non-universal templates Debug.Assert(TypeBeingBuilt.IsTemplateCanonical()); TypeDesc templateType = TypeBeingBuilt.ComputeTemplate(false); @@ -307,9 +306,9 @@ public ushort NumVTableSlots // Sentinel static to allow us to initialize _instanceLayout to something // and then detect that InstanceGCLayout should return null - private static LowLevelList s_emptyLayout = new LowLevelList(); + private static readonly bool[] s_emptyLayout = []; - private LowLevelList _instanceGCLayout; + private bool[] _instanceGCLayout; /// /// The instance gc layout of a dynamically laid out type. @@ -325,14 +324,12 @@ public ushort NumVTableSlots /// If the type is a valuetype array, this is the layout of the valuetype held in the array if the type has GC reference fields /// Otherwise, it is the layout of the fields in the type. /// - public LowLevelList InstanceGCLayout + public bool[] InstanceGCLayout { get { if (_instanceGCLayout == null) { - LowLevelList instanceGCLayout; - if (TypeBeingBuilt is ArrayType) { if (!IsArrayOfReferenceTypes) @@ -341,9 +338,7 @@ public LowLevelList InstanceGCLayout TypeBuilder.GCLayout elementGcLayout = GetFieldGCLayout(arrayType.ElementType); if (!elementGcLayout.IsNone) { - instanceGCLayout = new LowLevelList(); - elementGcLayout.WriteToBitfield(instanceGCLayout, 0); - _instanceGCLayout = instanceGCLayout; + _instanceGCLayout = elementGcLayout.AsBitfield(); } } else diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.ConstructedGenericMethodsLookup.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.ConstructedGenericMethodsLookup.cs index 634770228e4d..4330ad08aea7 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.ConstructedGenericMethodsLookup.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.ConstructedGenericMethodsLookup.cs @@ -4,6 +4,7 @@ using System; using System.Diagnostics; +using System.Reflection.Runtime.General; using System.Threading; using Internal.NativeFormat; @@ -27,7 +28,7 @@ public override int GetHashCode() { if (!_hashCode.HasValue) { - _hashCode = _declaringTypeHandle.GetHashCode() ^ TypeHashingAlgorithms.ComputeGenericInstanceHashCode(TypeHashingAlgorithms.ComputeNameHashCode(_methodNameAndSignature.Name), _genericMethodArgumentHandles); + _hashCode = _declaringTypeHandle.GetHashCode() ^ TypeHashingAlgorithms.ComputeGenericInstanceHashCode(TypeHashingAlgorithms.ComputeNameHashCode(_methodNameAndSignature.GetName()), _genericMethodArgumentHandles); } return _hashCode.Value; } @@ -147,7 +148,7 @@ internal override bool MatchParsedEntry(ref NativeParser entryParser, ref Extern RuntimeTypeHandle parsedDeclaringTypeHandle = externalReferencesLookup.GetRuntimeTypeHandleFromIndex(entryParser.GetUnsigned()); // Hash table names / sigs are indirected through to the native layout info - MethodNameAndSignature nameAndSignature = TypeLoaderEnvironment.Instance.GetMethodNameAndSignatureFromNativeLayoutOffset(moduleHandle, entryParser.GetUnsigned()); + MethodNameAndSignature nameAndSignature = TypeLoaderEnvironment.GetMethodNameAndSignatureFromToken(moduleHandle, entryParser.GetUnsigned()); RuntimeTypeHandle[] parsedArgsHandles = GetTypeSequence(ref externalReferencesLookup, ref entryParser); @@ -186,10 +187,8 @@ public bool TryGetGenericMethodComponents(IntPtr methodDictionary, out RuntimeTy { if (!TryGetDynamicGenericMethodComponents(methodDictionary, out declaringType, out nameAndSignature, out genericMethodArgumentHandles)) { - if (!TryGetStaticGenericMethodComponents(methodDictionary, out declaringType, out TypeManagerHandle typeManager, out uint nameAndSigOffset, out genericMethodArgumentHandles)) + if (!TryGetStaticGenericMethodComponents(methodDictionary, out declaringType, out nameAndSignature, out genericMethodArgumentHandles)) return false; - - nameAndSignature = TypeLoaderEnvironment.Instance.GetMethodNameAndSignatureFromNativeLayoutOffset(typeManager, nameAndSigOffset); } return true; @@ -199,7 +198,7 @@ public static bool TryGetGenericMethodComponents(IntPtr methodDictionary, out Ru { TypeLoaderEnvironment instance = TypeLoaderEnvironment.InstanceOrNull; if (instance == null || !instance.TryGetDynamicGenericMethodComponents(methodDictionary, out declaringType, out _, out genericMethodArgumentHandles)) - if (!TryGetStaticGenericMethodComponents(methodDictionary, out declaringType, out _, out _, out genericMethodArgumentHandles)) + if (!TryGetStaticGenericMethodComponents(methodDictionary, out declaringType, out _, out genericMethodArgumentHandles)) return false; return true; @@ -248,8 +247,6 @@ public bool TryGetGenericVirtualMethodPointer(InstantiatedMethod method, out Int { if (!method.CanShareNormalGenericCode()) { - // First see if we can find an exact method implementation for the GVM (avoid using USG implementations if we can, - // because USG code is much slower). if (TryLookupExactMethodPointer(method, out methodPointer)) { Debug.Assert(methodPointer != IntPtr.Zero); @@ -370,7 +367,7 @@ private bool TryGetDynamicGenericMethodComponents(IntPtr methodDictionary, out R return true; } } - private static unsafe bool TryGetStaticGenericMethodComponents(IntPtr methodDictionary, out RuntimeTypeHandle declaringType, out TypeManagerHandle typeManager, out uint nameAndSigOffset, out RuntimeTypeHandle[] genericMethodArgumentHandles) + private static unsafe bool TryGetStaticGenericMethodComponents(IntPtr methodDictionary, out RuntimeTypeHandle declaringType, out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgumentHandles) { // Generic method dictionaries have a header that has the hash code in it. Locate the header IntPtr dictionaryHeader = IntPtr.Subtract(methodDictionary, IntPtr.Size); @@ -398,8 +395,8 @@ private static unsafe bool TryGetStaticGenericMethodComponents(IntPtr methodDict // We have a match - fill in the results declaringType = externalReferencesLookup.GetRuntimeTypeHandleFromIndex(entryParser.GetUnsigned()); - typeManager = module.Handle; - nameAndSigOffset = entryParser.GetUnsigned(); + int token = (int)entryParser.GetUnsigned(); + nameAndSignature = new MethodNameAndSignature(module.MetadataReader, token.AsHandle().ToMethodHandle(module.MetadataReader)); uint arity = entryParser.GetSequenceCount(); genericMethodArgumentHandles = new RuntimeTypeHandle[arity]; @@ -414,8 +411,7 @@ private static unsafe bool TryGetStaticGenericMethodComponents(IntPtr methodDict } declaringType = default(RuntimeTypeHandle); - typeManager = default; - nameAndSigOffset = 0; + nameAndSignature = null; genericMethodArgumentHandles = null; return false; } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.FieldAccess.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.FieldAccess.cs index efe5e92830c6..06573d1beb03 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.FieldAccess.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.FieldAccess.cs @@ -41,58 +41,21 @@ public struct FieldAccessMetadata public sealed partial class TypeLoaderEnvironment { - /// - /// Try to look up field access info for given canon in metadata blobs for all available modules. - /// - /// Metadata reader for the declaring type - /// Declaring type for the method - /// Field handle - /// Output - metadata information for field accessor construction - /// true when found, false otherwise - public static bool TryGetFieldAccessMetadata( - MetadataReader metadataReader, - RuntimeTypeHandle runtimeTypeHandle, - FieldHandle fieldHandle, - out FieldAccessMetadata fieldAccessMetadata) - { - fieldAccessMetadata = default(FieldAccessMetadata); - - if (TryGetFieldAccessMetadataFromFieldAccessMap( - runtimeTypeHandle, - fieldHandle, - CanonicalFormKind.Specific, - ref fieldAccessMetadata)) - { - return true; - } - - if (TryGetFieldAccessMetadataFromFieldAccessMap( - runtimeTypeHandle, - fieldHandle, - CanonicalFormKind.Universal, - ref fieldAccessMetadata)) - { - return true; - } - - return false; - } - /// /// Try to look up field access info for given canon in metadata blobs for all available modules. /// /// Declaring type for the method /// Field handle - /// Canonical form to use /// Output - metadata information for field accessor construction /// true when found, false otherwise - private static unsafe bool TryGetFieldAccessMetadataFromFieldAccessMap( + public static unsafe bool TryGetFieldAccessMetadataFromFieldAccessMap( RuntimeTypeHandle declaringTypeHandle, FieldHandle fieldHandle, - CanonicalFormKind canonFormKind, - ref FieldAccessMetadata fieldAccessMetadata) + out FieldAccessMetadata fieldAccessMetadata) { - CanonicallyEquivalentEntryLocator canonWrapper = new CanonicallyEquivalentEntryLocator(declaringTypeHandle, canonFormKind); + fieldAccessMetadata = default; + + CanonicallyEquivalentEntryLocator canonWrapper = new CanonicallyEquivalentEntryLocator(declaringTypeHandle); foreach (NativeFormatModuleInfo mappingTableModule in ModuleList.EnumerateModules(RuntimeAugments.GetModuleFromTypeHandle(declaringTypeHandle))) { @@ -119,29 +82,18 @@ private static unsafe bool TryGetFieldAccessMetadataFromFieldAccessMap( FieldTableFlags entryFlags = (FieldTableFlags)entryParser.GetUnsigned(); - if ((canonFormKind == CanonicalFormKind.Universal) != ((entryFlags & FieldTableFlags.IsUniversalCanonicalEntry) != 0)) - continue; - RuntimeTypeHandle entryDeclaringTypeHandle = externalReferences.GetRuntimeTypeHandleFromIndex(entryParser.GetUnsigned()); if (!entryDeclaringTypeHandle.Equals(declaringTypeHandle) && !canonWrapper.IsCanonicallyEquivalent(entryDeclaringTypeHandle)) continue; - if ((entryFlags & FieldTableFlags.HasMetadataHandle) != 0) - { - Handle entryFieldHandle = (((int)HandleType.Field << 25) | (int)entryParser.GetUnsigned()).AsHandle(); - if (!fieldHandle.Equals(entryFieldHandle)) - continue; - } - else - { - Debug.Fail("Multifile path"); - } + Handle entryFieldHandle = (((int)HandleType.Field << 25) | (int)entryParser.GetUnsigned()).AsHandle(); + if (!fieldHandle.Equals(entryFieldHandle)) + continue; int fieldOffset; IntPtr fieldAddressCookie = IntPtr.Zero; - Debug.Assert(canonFormKind != CanonicalFormKind.Universal); if ((entryFlags & FieldTableFlags.FieldOffsetEncodedDirectly) != 0) { fieldOffset = (int)entryParser.GetUnsigned(); diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.GVMResolution.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.GVMResolution.cs index c73df4af62b5..568a8a444c39 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.GVMResolution.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.GVMResolution.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Reflection.Runtime.General; using System.Runtime; using System.Runtime.InteropServices; using System.Threading; @@ -105,7 +106,7 @@ internal static InstantiatedMethod GVMLookupForSlotWorker(DefType targetType, In sb.AppendLine(); sb.AppendLine("Declaring type: " + GetTypeNameDebug(slotMethod.OwningType)); sb.AppendLine("Target type: " + GetTypeNameDebug(targetType)); - sb.AppendLine("Method name: " + slotMethod.NameAndSignature.Name); + sb.AppendLine("Method name: " + slotMethod.Name); sb.AppendLine("Instantiation:"); for (int i = 0; i < slotMethod.Instantiation.Length; i++) { @@ -133,7 +134,7 @@ internal unsafe IntPtr ResolveGenericVirtualMethodTarget(RuntimeTypeHandle type, sb.AppendLine("Failed to create generic virtual method implementation"); sb.AppendLine(); sb.AppendLine("Declaring type: " + GetTypeNameDebug(result.OwningType)); - sb.AppendLine("Method name: " + result.NameAndSignature.Name); + sb.AppendLine("Method name: " + result.Name); sb.AppendLine("Instantiation:"); for (int i = 0; i < result.Instantiation.Length; i++) { @@ -146,19 +147,9 @@ internal unsafe IntPtr ResolveGenericVirtualMethodTarget(RuntimeTypeHandle type, return FunctionPointerOps.GetGenericMethodFunctionPointer(methodPointer, dictionaryPointer); } - private static MethodNameAndSignature GetMethodNameAndSignatureFromNativeReader(NativeReader nativeLayoutReader, TypeManagerHandle moduleHandle, uint nativeLayoutOffset) + public static MethodNameAndSignature GetMethodNameAndSignatureFromToken(TypeManagerHandle moduleHandle, uint token) { - NativeParser parser = new NativeParser(nativeLayoutReader, nativeLayoutOffset); - - string methodName = parser.GetString(); - - // Signatures are indirected to through a relative offset so that we don't have to parse them - // when not comparing signatures (parsing them requires resolving types and is tremendously - // expensive). - NativeParser sigParser = parser.GetParserFromRelativeOffset(); - RuntimeSignature methodSig = RuntimeSignature.CreateFromNativeLayoutSignature(moduleHandle, sigParser.Offset); - - return new MethodNameAndSignature(methodName, methodSig); + return new MethodNameAndSignature(ModuleList.Instance.GetMetadataReaderForModule(moduleHandle), token.AsHandle().ToMethodHandle(null)); } private static RuntimeTypeHandle GetTypeDefinition(RuntimeTypeHandle typeHandle) @@ -191,7 +182,7 @@ private static InstantiatedMethod FindMatchingInterfaceSlot(NativeFormatModuleIn if (nameAndSigToken != SpecialGVMInterfaceEntry.Diamond && nameAndSigToken != SpecialGVMInterfaceEntry.Reabstraction) { - targetMethodNameAndSignature = GetMethodNameAndSignatureFromNativeReader(nativeLayoutReader, module.Handle, nameAndSigToken); + targetMethodNameAndSignature = GetMethodNameAndSignatureFromToken(module.Handle, nameAndSigToken); targetTypeHandle = extRefs.GetRuntimeTypeHandleFromIndex(entryParser.GetUnsigned()); isDefaultInterfaceMethodImplementation = RuntimeAugments.IsInterface(targetTypeHandle); #if GVM_RESOLUTION_TRACE @@ -263,12 +254,23 @@ private static InstantiatedMethod FindMatchingInterfaceSlot(NativeFormatModuleIn DefType interfaceImplType; // We found the GVM slot target for the input interface GVM call, so let's update the interface GVM slot and return success to the caller - if (!RuntimeAugments.IsInterface(targetTypeHandle) || !RuntimeAugments.IsGenericTypeDefinition(targetTypeHandle)) + if (!RuntimeAugments.IsGenericTypeDefinition(targetTypeHandle)) { - // Not a default interface method or default interface method on a non-generic type. - // We have a usable type handle. + // No genericness involved, we can use the type as-is. interfaceImplType = (DefType)context.ResolveRuntimeTypeHandle(targetTypeHandle); } + else if (!isDefaultInterfaceMethodImplementation) + { + // Target type is in open form. We know the concrete form is somewhere in the inheritance hierarchy of targetType. + // This covers cases like: + // interface IFoo { void Frob(); } + // class Base { public void Frob() { } } + // class Derived : Base>, IFoo { } + // In the above case, targetTypeHandle is Base, targetType is Derived and we want Base>. + interfaceImplType = targetType; + while (!interfaceImplType.GetTypeDefinition().RuntimeTypeHandle.Equals(targetTypeHandle)) + interfaceImplType = (DefType)interfaceImplType.BaseType; + } else if (currentIfaceType.HasInstantiation && currentIfaceType.GetTypeDefinition().RuntimeTypeHandle.Equals(targetTypeHandle)) { // Default interface method implemented on the same type that declared the slot. @@ -357,7 +359,7 @@ private static InstantiatedMethod ResolveInterfaceGenericVirtualMethodSlot(DefTy continue; uint nameAndSigToken = entryParser.GetUnsigned(); - MethodNameAndSignature interfaceMethodNameAndSignature = GetMethodNameAndSignatureFromNativeReader(nativeLayoutReader, module.Handle, nameAndSigToken); + MethodNameAndSignature interfaceMethodNameAndSignature = GetMethodNameAndSignatureFromToken(module.Handle, nameAndSigToken); if (!interfaceMethodNameAndSignature.Equals(slotMethod.NameAndSignature)) continue; @@ -489,13 +491,13 @@ private static InstantiatedMethod ResolveGenericVirtualMethodTarget(DefType targ continue; uint parsedCallingNameAndSigToken = entryParser.GetUnsigned(); - MethodNameAndSignature parsedCallingNameAndSignature = GetMethodNameAndSignatureFromNativeReader(nativeLayoutReader, module.Handle, parsedCallingNameAndSigToken); + MethodNameAndSignature parsedCallingNameAndSignature = GetMethodNameAndSignatureFromToken(module.Handle, parsedCallingNameAndSigToken); if (!parsedCallingNameAndSignature.Equals(slotMethod.NameAndSignature)) continue; uint parsedTargetMethodNameAndSigToken = entryParser.GetUnsigned(); - MethodNameAndSignature targetMethodNameAndSignature = GetMethodNameAndSignatureFromNativeReader(nativeLayoutReader, module.Handle, parsedTargetMethodNameAndSigToken); + MethodNameAndSignature targetMethodNameAndSignature = GetMethodNameAndSignatureFromToken(module.Handle, parsedTargetMethodNameAndSigToken); Debug.Assert(targetMethodNameAndSignature != null); diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.LdTokenResultLookup.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.LdTokenResultLookup.cs index dd2bc75e3d61..f28b55e5b0d6 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.LdTokenResultLookup.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.LdTokenResultLookup.cs @@ -11,6 +11,7 @@ using System.Text; using System.Threading; +using Internal.Metadata.NativeFormat; using Internal.NativeFormat; using Internal.Runtime.Augments; using Internal.Runtime.CompilerServices; @@ -22,79 +23,16 @@ namespace Internal.Runtime.TypeLoader { public sealed partial class TypeLoaderEnvironment { - [StructLayout(LayoutKind.Sequential)] - private struct DynamicFieldHandleInfo - { - public IntPtr DeclaringType; - public IntPtr FieldName; - } - - [StructLayout(LayoutKind.Sequential)] - internal struct DynamicMethodHandleInfo - { - public IntPtr DeclaringType; - public IntPtr MethodName; - public RuntimeSignature MethodSignature; - public int NumGenericArgs; - public IntPtr GenericArgsArray; - } - - - #region String conversions - private static unsafe string GetStringFromMemoryInNativeFormat(IntPtr pointerToDataStream) - { - byte* dataStream = (byte*)pointerToDataStream; - uint stringLen = NativePrimitiveDecoder.DecodeUnsigned(ref dataStream); - return Encoding.UTF8.GetString(dataStream, checked((int)stringLen)); - } - - /// - /// From a string, get a pointer to an allocated memory location that holds a NativeFormat encoded string. - /// This is used for the creation of RuntimeFieldHandles from metadata. - /// - /// - /// - public unsafe IntPtr GetNativeFormatStringForString(string str) - { - using (_typeLoaderLock.EnterScope()) - { - IntPtr result; - if (_nativeFormatStrings.TryGetValue(str, out result)) - return result; - - NativePrimitiveEncoder stringEncoder = default; - stringEncoder.Init(); - byte[] utf8Bytes = Encoding.UTF8.GetBytes(str); - stringEncoder.WriteUnsigned(checked((uint)utf8Bytes.Length)); - foreach (byte b in utf8Bytes) - stringEncoder.WriteByte(b); - - void* allocatedNativeFormatString = MemoryHelpers.AllocateMemory(stringEncoder.Size); - unsafe - { - stringEncoder.Save((byte*)allocatedNativeFormatString, stringEncoder.Size); - } - _nativeFormatStrings.Add(str, (IntPtr)allocatedNativeFormatString); - return (IntPtr)allocatedNativeFormatString; - } - } - - private LowLevelDictionary _nativeFormatStrings = new LowLevelDictionary(); - #endregion - - #region Ldtoken Hashtables private struct RuntimeFieldHandleKey : IEquatable { private RuntimeTypeHandle _declaringType; - private string _fieldName; - private int _hashcode; + private FieldHandle _handle; - public RuntimeFieldHandleKey(RuntimeTypeHandle declaringType, string fieldName) + public RuntimeFieldHandleKey(RuntimeTypeHandle declaringType, FieldHandle fieldHandle) { _declaringType = declaringType; - _fieldName = fieldName; - _hashcode = declaringType.GetHashCode() ^ fieldName.GetHashCode(); + _handle = fieldHandle; } public override bool Equals(object obj) @@ -108,37 +46,27 @@ public override bool Equals(object obj) public bool Equals(RuntimeFieldHandleKey other) { - return other._declaringType.Equals(_declaringType) && other._fieldName == _fieldName; + return other._declaringType.Equals(_declaringType) && other._handle.Equals(_handle); } - public override int GetHashCode() { return _hashcode; } + public override int GetHashCode() => _declaringType.GetHashCode() ^ _handle.GetHashCode(); } private struct RuntimeMethodHandleKey : IEquatable { private RuntimeTypeHandle _declaringType; - private string _methodName; - private RuntimeSignature _signature; + private MethodHandle _handle; private RuntimeTypeHandle[] _genericArgs; - private int _hashcode; - public RuntimeMethodHandleKey(RuntimeTypeHandle declaringType, string methodName, RuntimeSignature signature, RuntimeTypeHandle[] genericArgs) + public RuntimeMethodHandleKey(RuntimeTypeHandle declaringType, MethodHandle handle, RuntimeTypeHandle[] genericArgs) { // genericArgs will be null if this is a (typical or not) method definition // genericArgs are non-null only for instantiated generic methods. Debug.Assert(genericArgs == null || genericArgs.Length > 0); _declaringType = declaringType; - _methodName = methodName; - _signature = signature; + _handle = handle; _genericArgs = genericArgs; - int methodNameHashCode = methodName == null ? 0 : methodName.GetHashCode(); - _hashcode = methodNameHashCode ^ signature.GetHashCode(); - - if (genericArgs != null) - _hashcode ^= TypeHashingAlgorithms.ComputeGenericInstanceHashCode(declaringType.GetHashCode(), genericArgs); - else - _hashcode ^= declaringType.GetHashCode(); } public override bool Equals(object obj) @@ -152,7 +80,7 @@ public override bool Equals(object obj) public bool Equals(RuntimeMethodHandleKey other) { - if (!_declaringType.Equals(other._declaringType) || _methodName != other._methodName || !_signature.Equals(other._signature)) + if (!_declaringType.Equals(other._declaringType) || !_handle.Equals(other._handle)) return false; if ((_genericArgs == null) != (other._genericArgs == null)) @@ -171,7 +99,10 @@ public bool Equals(RuntimeMethodHandleKey other) return true; } - public override int GetHashCode() { return _hashcode; } + public override int GetHashCode() + => _handle.GetHashCode() ^ (_genericArgs == null + ? _declaringType.GetHashCode() + : TypeHashingAlgorithms.ComputeGenericInstanceHashCode(_declaringType.GetHashCode(), _genericArgs)); } private LowLevelDictionary _runtimeFieldHandles = new LowLevelDictionary(); @@ -180,30 +111,23 @@ public bool Equals(RuntimeMethodHandleKey other) #region Field Ldtoken Functions - public RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, string fieldName) + public unsafe RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, int handle) { - IntPtr nameAsIntPtr = GetNativeFormatStringForString(fieldName); - return GetRuntimeFieldHandleForComponents(declaringTypeHandle, nameAsIntPtr); + return GetRuntimeFieldHandleForComponents(declaringTypeHandle, handle.AsHandle().ToFieldHandle(null)); } - public unsafe RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, IntPtr fieldName) + public unsafe RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, FieldHandle handle) { - string fieldNameStr = GetStringFromMemoryInNativeFormat(fieldName); - - RuntimeFieldHandleKey key = new RuntimeFieldHandleKey(declaringTypeHandle, fieldNameStr); - RuntimeFieldHandle runtimeFieldHandle = default(RuntimeFieldHandle); + RuntimeFieldHandleKey key = new RuntimeFieldHandleKey(declaringTypeHandle, handle); lock (_runtimeFieldHandles) { - if (!_runtimeFieldHandles.TryGetValue(key, out runtimeFieldHandle)) + if (!_runtimeFieldHandles.TryGetValue(key, out RuntimeFieldHandle runtimeFieldHandle)) { - DynamicFieldHandleInfo* fieldData = (DynamicFieldHandleInfo*)MemoryHelpers.AllocateMemory(sizeof(DynamicFieldHandleInfo)); - fieldData->DeclaringType = *(IntPtr*)&declaringTypeHandle; - fieldData->FieldName = fieldName; - - // Special flag (lowest bit set) in the handle value to indicate it was dynamically allocated - IntPtr runtimeFieldHandleValue = (IntPtr)fieldData + 1; - runtimeFieldHandle = *(RuntimeFieldHandle*)&runtimeFieldHandleValue; + FieldHandleInfo* fieldData = (FieldHandleInfo*)MemoryHelpers.AllocateMemory(sizeof(FieldHandleInfo)); + fieldData->DeclaringType = declaringTypeHandle; + fieldData->Handle = handle; + runtimeFieldHandle = RuntimeFieldHandle.FromIntPtr((nint)fieldData); _runtimeFieldHandles.Add(key, runtimeFieldHandle); } @@ -212,102 +136,50 @@ public unsafe RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeH } } - public bool TryGetRuntimeFieldHandleComponents(RuntimeFieldHandle runtimeFieldHandle, out RuntimeTypeHandle declaringTypeHandle, out string fieldName) + public unsafe bool TryGetRuntimeFieldHandleComponents(RuntimeFieldHandle runtimeFieldHandle, out RuntimeTypeHandle declaringTypeHandle, out FieldHandle fieldHandle) { - return runtimeFieldHandle.IsDynamic() ? - TryGetDynamicRuntimeFieldHandleComponents(runtimeFieldHandle, out declaringTypeHandle, out fieldName) : - TryGetStaticRuntimeFieldHandleComponents(runtimeFieldHandle, out declaringTypeHandle, out fieldName); - } - - private unsafe bool TryGetDynamicRuntimeFieldHandleComponents(RuntimeFieldHandle runtimeFieldHandle, out RuntimeTypeHandle declaringTypeHandle, out string fieldName) - { - IntPtr runtimeFieldHandleValue = *(IntPtr*)&runtimeFieldHandle; - - // Special flag in the handle value to indicate it was dynamically allocated - Debug.Assert((runtimeFieldHandleValue & 0x1) == 0x1); - - DynamicFieldHandleInfo* fieldData = (DynamicFieldHandleInfo*)(runtimeFieldHandleValue - 1); - declaringTypeHandle = *(RuntimeTypeHandle*)&(fieldData->DeclaringType); - - // FieldName points to the field name in NativeLayout format, so we parse it using a NativeParser - IntPtr fieldNamePtr = fieldData->FieldName; - fieldName = GetStringFromMemoryInNativeFormat(fieldNamePtr); - - return true; - } - - private unsafe bool TryGetStaticRuntimeFieldHandleComponents(RuntimeFieldHandle runtimeFieldHandle, out RuntimeTypeHandle declaringTypeHandle, out string fieldName) - { - fieldName = null; - declaringTypeHandle = default(RuntimeTypeHandle); - - // Make sure it's not a dynamically allocated RuntimeFieldHandle before we attempt to use it to parse native layout data - Debug.Assert(((*(IntPtr*)&runtimeFieldHandle).ToInt64() & 0x1) == 0); - - RuntimeFieldHandleInfo* fieldData = *(RuntimeFieldHandleInfo**)&runtimeFieldHandle; - RuntimeSignature signature; - - // The native layout info signature is a pair. - // The first is a pointer that points to the TypeManager indirection cell. - // The second is the offset into the native layout info blob in that TypeManager, where the native signature is encoded. - IntPtr* nativeLayoutInfoSignatureData = (IntPtr*)fieldData->NativeLayoutInfoSignature; - - signature = RuntimeSignature.CreateFromNativeLayoutSignature( - new TypeManagerHandle(*(IntPtr*)nativeLayoutInfoSignatureData[0]), - (uint)nativeLayoutInfoSignatureData[1].ToInt32()); - - RuntimeSignature remainingSignature; - if (!GetTypeFromSignatureAndContext(signature, null, null, out declaringTypeHandle, out remainingSignature)) - return false; - - // GetTypeFromSignatureAndContext parses the type from the signature and returns a pointer to the next - // part of the native layout signature to read which we get the field name from - var reader = GetNativeLayoutInfoReader(remainingSignature); - var parser = new NativeParser(reader, remainingSignature.NativeLayoutOffset); - fieldName = parser.GetString(); - + FieldHandleInfo* fieldData = (FieldHandleInfo*)runtimeFieldHandle.Value; + declaringTypeHandle = fieldData->DeclaringType; + fieldHandle = fieldData->Handle; return true; } #endregion #region Method Ldtoken Functions + public unsafe RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, int handle, RuntimeTypeHandle[] genericMethodArgs) + => GetRuntimeMethodHandleForComponents(declaringTypeHandle, handle.AsHandle().ToMethodHandle(null), genericMethodArgs); + /// /// Create a runtime method handle from name, signature and generic arguments. If the methodSignature /// is constructed from a metadata token, the methodName should be IntPtr.Zero, as it already encodes the method /// name. /// - public unsafe RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, IntPtr methodName, RuntimeSignature methodSignature, RuntimeTypeHandle[] genericMethodArgs) + public unsafe RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, MethodHandle handle, RuntimeTypeHandle[] genericMethodArgs) { - string methodNameStr = methodName == IntPtr.Zero ? null : GetStringFromMemoryInNativeFormat(methodName); - - RuntimeMethodHandleKey key = new RuntimeMethodHandleKey(declaringTypeHandle, methodNameStr, methodSignature, genericMethodArgs); - RuntimeMethodHandle runtimeMethodHandle = default(RuntimeMethodHandle); + RuntimeMethodHandleKey key = new RuntimeMethodHandleKey(declaringTypeHandle, handle, genericMethodArgs); lock (_runtimeMethodHandles) { - if (!_runtimeMethodHandles.TryGetValue(key, out runtimeMethodHandle)) + if (!_runtimeMethodHandles.TryGetValue(key, out RuntimeMethodHandle runtimeMethodHandle)) { - int sizeToAllocate = sizeof(DynamicMethodHandleInfo); + int sizeToAllocate = sizeof(MethodHandleInfo); int numGenericMethodArgs = genericMethodArgs == null ? 0 : genericMethodArgs.Length; // Use checked arithmetics to ensure there aren't any overflows/truncations sizeToAllocate = checked(sizeToAllocate + (numGenericMethodArgs > 0 ? sizeof(IntPtr) * (numGenericMethodArgs - 1) : 0)); - DynamicMethodHandleInfo* methodData = (DynamicMethodHandleInfo*)MemoryHelpers.AllocateMemory(sizeToAllocate); - methodData->DeclaringType = *(IntPtr*)&declaringTypeHandle; - methodData->MethodName = methodName; - methodData->MethodSignature = methodSignature; + MethodHandleInfo* methodData = (MethodHandleInfo*)MemoryHelpers.AllocateMemory(sizeToAllocate); + methodData->DeclaringType = declaringTypeHandle; + methodData->Handle = handle; methodData->NumGenericArgs = numGenericMethodArgs; - IntPtr* genericArgPtr = &(methodData->GenericArgsArray); + RuntimeTypeHandle* genericArgPtr = &methodData->FirstArgument; for (int i = 0; i < numGenericMethodArgs; i++) { RuntimeTypeHandle currentArg = genericMethodArgs[i]; - genericArgPtr[i] = *(IntPtr*)¤tArg; + genericArgPtr[i] = currentArg; } - // Special flag in the handle value to indicate it was dynamically allocated, and doesn't point into the InvokeMap blob - IntPtr runtimeMethodHandleValue = (IntPtr)methodData + 1; - runtimeMethodHandle = *(RuntimeMethodHandle*)&runtimeMethodHandleValue; + runtimeMethodHandle = RuntimeMethodHandle.FromIntPtr((nint)methodData); _runtimeMethodHandles.Add(key, runtimeMethodHandle); } @@ -315,74 +187,16 @@ public unsafe RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTyp return runtimeMethodHandle; } } - public RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, string methodName, RuntimeSignature methodSignature, RuntimeTypeHandle[] genericMethodArgs) - { - IntPtr nameAsIntPtr = GetNativeFormatStringForString(methodName); - return GetRuntimeMethodHandleForComponents(declaringTypeHandle, nameAsIntPtr, methodSignature, genericMethodArgs); - } public MethodDesc GetMethodDescForRuntimeMethodHandle(TypeSystemContext context, RuntimeMethodHandle runtimeMethodHandle) { - return runtimeMethodHandle.IsDynamic() ? - GetMethodDescForDynamicRuntimeMethodHandle(context, runtimeMethodHandle) : - GetMethodDescForStaticRuntimeMethodHandle(context, runtimeMethodHandle); - } - - public bool TryGetRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgs) - { - return runtimeMethodHandle.IsDynamic() ? - TryGetDynamicRuntimeMethodHandleComponents(runtimeMethodHandle, out declaringTypeHandle, out nameAndSignature, out genericMethodArgs) : - TryGetStaticRuntimeMethodHandleComponents(runtimeMethodHandle, out declaringTypeHandle, out nameAndSignature, out genericMethodArgs); - } - - private unsafe bool TryGetDynamicRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgs) - { - IntPtr runtimeMethodHandleValue = *(IntPtr*)&runtimeMethodHandle; - - // Special flag in the handle value to indicate it was dynamically allocated, and doesn't point into the InvokeMap blob - Debug.Assert((runtimeMethodHandleValue & 0x1) == 0x1); - - DynamicMethodHandleInfo* methodData = (DynamicMethodHandleInfo*)(runtimeMethodHandleValue - 1); - - declaringTypeHandle = *(RuntimeTypeHandle*)&(methodData->DeclaringType); - genericMethodArgs = null; - - if (methodData->NumGenericArgs > 0) - { - IntPtr* genericArgPtr = &(methodData->GenericArgsArray); - genericMethodArgs = new RuntimeTypeHandle[methodData->NumGenericArgs]; - for (int i = 0; i < methodData->NumGenericArgs; i++) - { - genericMethodArgs[i] = *(RuntimeTypeHandle*)&(genericArgPtr[i]); - } - } - - if (methodData->MethodSignature.IsNativeLayoutSignature) - { - // MethodName points to the method name in NativeLayout format, so we parse it using a NativeParser - IntPtr methodNamePtr = methodData->MethodName; - string name = GetStringFromMemoryInNativeFormat(methodNamePtr); - - nameAndSignature = new MethodNameAndSignature(name, methodData->MethodSignature); - } - else - { - ModuleInfo moduleInfo = methodData->MethodSignature.GetModuleInfo(); - var metadataReader = ((NativeFormatModuleInfo)moduleInfo).MetadataReader; - var methodHandle = methodData->MethodSignature.Token.AsHandle().ToMethodHandle(metadataReader); - var method = methodHandle.GetMethod(metadataReader); - var name = metadataReader.GetConstantStringValue(method.Name).Value; - nameAndSignature = new MethodNameAndSignature(name, methodData->MethodSignature); - } - - return true; - } - public MethodDesc GetMethodDescForDynamicRuntimeMethodHandle(TypeSystemContext context, RuntimeMethodHandle runtimeMethodHandle) - { - bool success = TryGetDynamicRuntimeMethodHandleComponents(runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, - out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgs); + bool success = TryGetRuntimeMethodHandleComponents(runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, + out MethodHandle handle, out RuntimeTypeHandle[] genericMethodArgs); Debug.Assert(success); + MetadataReader reader = ModuleList.Instance.GetMetadataReaderForModule(RuntimeAugments.GetModuleFromTypeHandle(declaringTypeHandle)); + MethodNameAndSignature nameAndSignature = new MethodNameAndSignature(reader, handle); + DefType type = (DefType)context.ResolveRuntimeTypeHandle(declaringTypeHandle); if (genericMethodArgs != null) @@ -394,57 +208,40 @@ public MethodDesc GetMethodDescForDynamicRuntimeMethodHandle(TypeSystemContext c return context.ResolveRuntimeMethod(unboxingStub: false, type, nameAndSignature); } - private unsafe bool TryGetStaticRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgs) + public unsafe bool TryGetRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out QMethodDefinition handle, out RuntimeTypeHandle[] genericMethodArgs) { - declaringTypeHandle = default(RuntimeTypeHandle); - nameAndSignature = null; - genericMethodArgs = null; - - TypeSystemContext context = TypeSystemContextFactory.Create(); + if (TryGetRuntimeMethodHandleComponents(runtimeMethodHandle, out declaringTypeHandle, out MethodHandle methodHandle, out genericMethodArgs)) + { + MetadataReader reader = ModuleList.Instance.GetMetadataReaderForModule(RuntimeAugments.GetModuleFromTypeHandle(declaringTypeHandle)); + handle = new QMethodDefinition(reader, methodHandle); + return true; + } + handle = default; + return false; + } - MethodDesc parsedMethod = GetMethodDescForStaticRuntimeMethodHandle(context, runtimeMethodHandle); + public unsafe bool TryGetRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out MethodHandle handle, out RuntimeTypeHandle[] genericMethodArgs) + { + MethodHandleInfo* methodData = (MethodHandleInfo*)runtimeMethodHandle.Value; - if (!EnsureTypeHandleForType(parsedMethod.OwningType)) - return false; + declaringTypeHandle = methodData->DeclaringType; + handle = methodData->Handle; - declaringTypeHandle = parsedMethod.OwningType.RuntimeTypeHandle; - nameAndSignature = parsedMethod.NameAndSignature; - if (!parsedMethod.IsMethodDefinition && parsedMethod.Instantiation.Length > 0) + if (methodData->NumGenericArgs > 0) { - genericMethodArgs = new RuntimeTypeHandle[parsedMethod.Instantiation.Length]; - for (int i = 0; i < parsedMethod.Instantiation.Length; ++i) + RuntimeTypeHandle* genericArgPtr = (RuntimeTypeHandle*)&methodData->FirstArgument; + genericMethodArgs = new RuntimeTypeHandle[methodData->NumGenericArgs]; + for (int i = 0; i < methodData->NumGenericArgs; i++) { - if (!EnsureTypeHandleForType(parsedMethod.Instantiation[i])) - return false; - - genericMethodArgs[i] = parsedMethod.Instantiation[i].RuntimeTypeHandle; + genericMethodArgs[i] = genericArgPtr[i]; } } - - TypeSystemContextFactory.Recycle(context); + else + { + genericMethodArgs = null; + } return true; } - - public unsafe MethodDesc GetMethodDescForStaticRuntimeMethodHandle(TypeSystemContext context, RuntimeMethodHandle runtimeMethodHandle) - { - // Make sure it's not a dynamically allocated RuntimeMethodHandle before we attempt to use it to parse native layout data - Debug.Assert(((*(IntPtr*)&runtimeMethodHandle).ToInt64() & 0x1) == 0); - - RuntimeMethodHandleInfo* methodData = *(RuntimeMethodHandleInfo**)&runtimeMethodHandle; - RuntimeSignature signature; - - // The native layout info signature is a pair. - // The first is a pointer that points to the TypeManager indirection cell. - // The second is the offset into the native layout info blob in that TypeManager, where the native signature is encoded. - IntPtr* nativeLayoutInfoSignatureData = (IntPtr*)methodData->NativeLayoutInfoSignature; - - signature = RuntimeSignature.CreateFromNativeLayoutSignature( - new TypeManagerHandle(*(IntPtr*)nativeLayoutInfoSignatureData[0]), - (uint)nativeLayoutInfoSignatureData[1].ToInt32()); - - RuntimeSignature remainingSignature; - return GetMethodFromSignatureAndContext(context, signature, null, null, out remainingSignature); - } #endregion } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.Metadata.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.Metadata.cs index 6c94d937a873..0545b31ada27 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.Metadata.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.Metadata.cs @@ -414,7 +414,7 @@ internal static IntPtr TryGetDefaultConstructorForType(TypeDesc type) { if (type is DefType defType) { - CanonicallyEquivalentEntryLocator canonHelperSpecific = new CanonicallyEquivalentEntryLocator(defType, CanonicalFormKind.Specific); + CanonicallyEquivalentEntryLocator canonHelperSpecific = new CanonicallyEquivalentEntryLocator(defType); foreach (NativeFormatModuleInfo module in ModuleList.EnumerateModules()) { @@ -436,7 +436,7 @@ internal static IntPtr TryGetDefaultConstructorForType(TypeDesc type) /// Function pointer representing the constructor, IntPtr.Zero when not found public IntPtr TryGetDefaultConstructorForType(RuntimeTypeHandle runtimeTypeHandle) { - CanonicallyEquivalentEntryLocator canonHelperSpecific = new CanonicallyEquivalentEntryLocator(runtimeTypeHandle, CanonicalFormKind.Specific); + CanonicallyEquivalentEntryLocator canonHelperSpecific = new CanonicallyEquivalentEntryLocator(runtimeTypeHandle); foreach (NativeFormatModuleInfo module in ModuleList.EnumerateModules(RuntimeAugments.GetModuleFromTypeHandle(runtimeTypeHandle))) { @@ -495,11 +495,12 @@ public struct VirtualResolveDataResult public bool IsGVM; } - public static bool TryGetVirtualResolveData(NativeFormatModuleInfo module, - RuntimeTypeHandle methodHandleDeclaringType, RuntimeTypeHandle[] genericArgs, - ref MethodSignatureComparer methodSignatureComparer, + public static bool TryGetVirtualResolveData( + RuntimeTypeHandle methodHandleDeclaringType, QMethodDefinition method, RuntimeTypeHandle[] genericArgs, out VirtualResolveDataResult lookupResult) { + NativeFormatModuleInfo module = ModuleList.Instance.GetModuleInfoForMetadataReader(method.NativeFormatReader); + lookupResult = default(VirtualResolveDataResult); NativeReader invokeMapReader = GetNativeReaderForBlob(module, ReflectionMapBlob.VirtualInvokeMap); NativeParser invokeMapParser = new NativeParser(invokeMapReader, 0); @@ -520,9 +521,7 @@ public static bool TryGetVirtualResolveData(NativeFormatModuleInfo module, // of the method's containing type instead of the open type definition. // - CanonicallyEquivalentEntryLocator canonHelper = new CanonicallyEquivalentEntryLocator( - methodHandleDeclaringType, - CanonicalFormKind.Specific); + CanonicallyEquivalentEntryLocator canonHelper = new CanonicallyEquivalentEntryLocator(methodHandleDeclaringType); var lookup = invokeHashtable.Lookup(canonHelper.LookupHashCode); @@ -531,19 +530,18 @@ public static bool TryGetVirtualResolveData(NativeFormatModuleInfo module, { // Grammar of an entry in the hash table: // Virtual Method uses a normal slot - // TypeKey + NameAndSig metadata offset into the native layout metadata + (NumberOfStepsUpParentHierarchyToType << 1) + slot + // TypeKey + Handle + (NumberOfStepsUpParentHierarchyToType << 1) + slot // OR // Generic Virtual Method - // TypeKey + NameAndSig metadata offset into the native layout metadata + (NumberOfStepsUpParentHierarchyToType << 1 + 1) + // TypeKey + Handle + (NumberOfStepsUpParentHierarchyToType << 1 + 1) RuntimeTypeHandle entryType = externalReferences.GetRuntimeTypeHandleFromIndex(entryParser.GetUnsigned()); if (!canonHelper.IsCanonicallyEquivalent(entryType)) continue; - uint nameAndSigPointerToken = entryParser.GetUnsigned(); + int token = (int)entryParser.GetUnsigned(); - MethodNameAndSignature nameAndSig = TypeLoaderEnvironment.Instance.GetMethodNameAndSignatureFromNativeLayoutOffset(module.Handle, nameAndSigPointerToken); - if (!methodSignatureComparer.IsMatchingNativeLayoutMethodNameAndSignature(nameAndSig.Name, nameAndSig.Signature)) + if (!token.AsHandle().ToMethodHandle(module.MetadataReader).Equals(method.NativeFormatHandle)) { continue; } @@ -567,16 +565,7 @@ public static bool TryGetVirtualResolveData(NativeFormatModuleInfo module, if (isGenericVirtualMethod) { - RuntimeSignature methodName; - RuntimeSignature methodSignature; - - if (!TypeLoaderEnvironment.Instance.TryGetMethodNameAndSignaturePointersFromNativeLayoutSignature(module.Handle, nameAndSigPointerToken, out methodName, out methodSignature)) - { - Debug.Assert(false); - return false; - } - - RuntimeMethodHandle gvmSlot = TypeLoaderEnvironment.Instance.GetRuntimeMethodHandleForComponents(declaringTypeOfVirtualInvoke, methodName.NativeLayoutSignature(), methodSignature, genericArgs); + RuntimeMethodHandle gvmSlot = TypeLoaderEnvironment.Instance.GetRuntimeMethodHandleForComponents(declaringTypeOfVirtualInvoke, token, genericArgs); lookupResult = new VirtualResolveDataResult { @@ -610,16 +599,12 @@ public static bool TryGetVirtualResolveData(NativeFormatModuleInfo module, /// Declaring type for the method /// Method handle /// Handles of generic argument types - /// Helper class used to compare method signatures - /// Canonical form to use /// Output - metadata information for method invoker construction /// true when found, false otherwise public static bool TryGetMethodInvokeMetadata( RuntimeTypeHandle declaringTypeHandle, QMethodDefinition methodHandle, RuntimeTypeHandle[] genericMethodTypeArgumentHandles, - ref MethodSignatureComparer methodSignatureComparer, - CanonicalFormKind canonFormKind, out MethodInvokeMetadata methodInvokeMetadata) { if (methodHandle.IsNativeFormatMetadataBased) @@ -629,8 +614,6 @@ public static bool TryGetMethodInvokeMetadata( declaringTypeHandle, methodHandle.NativeFormatHandle, genericMethodTypeArgumentHandles, - ref methodSignatureComparer, - canonFormKind, out methodInvokeMetadata)) { return true; @@ -648,8 +631,6 @@ public static bool TryGetMethodInvokeMetadata( /// Declaring type for the method /// Method handle /// Handles of generic argument types - /// Helper class used to compare method signatures - /// Canonical form to use /// Output - metadata information for method invoker construction /// true when found, false otherwise private static bool TryGetMethodInvokeMetadataFromInvokeMap( @@ -657,11 +638,9 @@ private static bool TryGetMethodInvokeMetadataFromInvokeMap( RuntimeTypeHandle declaringTypeHandle, MethodHandle methodHandle, RuntimeTypeHandle[] genericMethodTypeArgumentHandles, - ref MethodSignatureComparer methodSignatureComparer, - CanonicalFormKind canonFormKind, out MethodInvokeMetadata methodInvokeMetadata) { - CanonicallyEquivalentEntryLocator canonHelper = new CanonicallyEquivalentEntryLocator(declaringTypeHandle, canonFormKind); + CanonicallyEquivalentEntryLocator canonHelper = new CanonicallyEquivalentEntryLocator(declaringTypeHandle); TypeManagerHandle methodHandleModule = ModuleList.Instance.GetModuleForMetadataReader(metadataReader); foreach (NativeFormatModuleInfo module in ModuleList.EnumerateModules(RuntimeAugments.GetModuleFromTypeHandle(declaringTypeHandle))) @@ -681,7 +660,6 @@ private static bool TryGetMethodInvokeMetadataFromInvokeMap( var lookup = invokeHashtable.Lookup(canonHelper.LookupHashCode); var entryData = new InvokeMapEntryDataEnumerator( new PreloadedTypeComparator(declaringTypeHandle, genericMethodTypeArgumentHandles), - canonFormKind, module.Handle, methodHandle, methodHandleModule); @@ -689,7 +667,7 @@ private static bool TryGetMethodInvokeMetadataFromInvokeMap( NativeParser entryParser; while (!(entryParser = lookup.GetNext()).IsNull) { - entryData.GetNext(ref entryParser, ref externalReferences, ref methodSignatureComparer, canonHelper); + entryData.GetNext(ref entryParser, ref externalReferences, canonHelper); if (!entryData.IsMatchingOrCompatibleEntry()) continue; @@ -778,7 +756,6 @@ private struct InvokeMapEntryDataEnumerator - /// Metadata reader corresponding to the method declaring type - /// - private readonly MetadataReader _metadataReader; - - /// - /// Method handle - /// - private readonly MethodHandle _methodHandle; - - /// - /// Method instance obtained from the method handle - /// - private readonly Method _method; - - /// - /// Method signature - /// - private readonly MethodSignature _methodSignature; - - /// - /// true = this is a static method - /// - private readonly bool _isStatic; - - /// - /// true = this is a generic method - /// - private readonly bool _isGeneric; - - public MethodSignatureComparer( - QMethodDefinition methodHandle) - { - if (methodHandle.IsNativeFormatMetadataBased) - { - _metadataReader = methodHandle.NativeFormatReader; - _methodHandle = methodHandle.NativeFormatHandle; - - _method = _methodHandle.GetMethod(_metadataReader); - - _methodSignature = _method.Signature.GetMethodSignature(_metadataReader); - _isGeneric = (_methodSignature.GenericParameterCount != 0); - - // Precalculate initial method attributes used in signature queries - _isStatic = (_method.Flags & MethodAttributes.Static) != 0; - } - else - { - _metadataReader = null; - _methodHandle = default(MethodHandle); - _method = default(Method); - _methodSignature = default(MethodSignature); - _isGeneric = false; - _isStatic = false; - } - } - - /// - /// Construct a comparer between NativeFormat metadata methods and native layouts - /// - /// Metadata reader for the method declaring type - /// Handle of method to compare - public MethodSignatureComparer( - MetadataReader metadataReader, - MethodHandle methodHandle) - { - _metadataReader = metadataReader; - _methodHandle = methodHandle; - - _method = methodHandle.GetMethod(metadataReader); - - _methodSignature = _method.Signature.GetMethodSignature(_metadataReader); - _isGeneric = (_methodSignature.GenericParameterCount != 0); - - // Precalculate initial method attributes used in signature queries - _isStatic = (_method.Flags & MethodAttributes.Static) != 0; - } - - public bool IsMatchingNativeLayoutMethodNameAndSignature(string name, RuntimeSignature signature) - { - return _method.Name.StringEquals(name, _metadataReader) && - IsMatchingNativeLayoutMethodSignature(signature); - } - - public bool IsMatchingNativeLayoutMethodSignature(RuntimeSignature signature) - { - NativeParser parser = GetNativeParserForSignature(signature); - - if (!CompareCallingConventions((MethodCallingConvention)parser.GetUnsigned())) - return false; - - if (_isGeneric) - { - uint genericParamCount1 = parser.GetUnsigned(); - int genericParamCount2 = _methodSignature.GenericParameterCount; - - if (genericParamCount1 != genericParamCount2) - return false; - } - - uint parameterCount = parser.GetUnsigned(); - - if (!CompareTypeSigWithType(ref parser, new TypeManagerHandle(signature.ModuleHandle), _methodSignature.ReturnType)) - { - return false; - } - - uint parameterIndexToMatch = 0; - foreach (Handle parameterSignature in _methodSignature.Parameters) - { - if (parameterIndexToMatch >= parameterCount) - { - // The metadata-defined _method has more parameters than the native layout - return false; - } - if (!CompareTypeSigWithType(ref parser, new TypeManagerHandle(signature.ModuleHandle), parameterSignature)) - return false; - parameterIndexToMatch++; - } - - // Make sure that all native layout parameters have been matched - return parameterIndexToMatch == parameterCount; - } - - /// - /// Look up module containing given nativesignature and return the appropriate native parser. - /// - /// Signature to look up - /// Native parser for the signature - internal static NativeParser GetNativeParserForSignature(RuntimeSignature signature) - { - Debug.Assert(signature.IsNativeLayoutSignature); - NativeFormatModuleInfo module = ModuleList.Instance.GetModuleInfoByHandle(new TypeManagerHandle(signature.ModuleHandle)); - - NativeReader reader = TypeLoaderEnvironment.GetNativeReaderForBlob(module, ReflectionMapBlob.NativeLayoutInfo); - return new NativeParser(reader, signature.NativeLayoutOffset); - } - - private bool CompareTypeSigWithType(ref NativeParser parser, TypeManagerHandle moduleHandle, Handle typeHandle) - { - while (typeHandle.HandleType == HandleType.TypeSpecification - || typeHandle.HandleType == HandleType.ModifiedType) - { - if (typeHandle.HandleType == HandleType.TypeSpecification) - { - typeHandle = typeHandle - .ToTypeSpecificationHandle(_metadataReader) - .GetTypeSpecification(_metadataReader) - .Signature; - } - else - { - typeHandle = typeHandle - .ToModifiedTypeHandle(_metadataReader) - .GetModifiedType(_metadataReader) - .Type; - } - } - - // startOffset lets us backtrack to the TypeSignatureKind for external types since the TypeLoader - // expects to read it in. - uint startOffset = parser.Offset; - - uint data; - var typeSignatureKind = parser.GetTypeSignatureKind(out data); - - switch (typeSignatureKind) - { - case TypeSignatureKind.Lookback: - { - NativeParser lookbackParser = parser.GetLookbackParser(data); - return CompareTypeSigWithType(ref lookbackParser, moduleHandle, typeHandle); - } - - case TypeSignatureKind.Modifier: - { - // Ensure the modifier kind (vector, pointer, byref) is the same - TypeModifierKind modifierKind = (TypeModifierKind)data; - switch (modifierKind) - { - case TypeModifierKind.Array: - if (typeHandle.HandleType == HandleType.SZArraySignature) - { - return CompareTypeSigWithType(ref parser, moduleHandle, typeHandle - .ToSZArraySignatureHandle(_metadataReader) - .GetSZArraySignature(_metadataReader) - .ElementType); - } - return false; - - case TypeModifierKind.ByRef: - if (typeHandle.HandleType == HandleType.ByReferenceSignature) - { - return CompareTypeSigWithType(ref parser, moduleHandle, typeHandle - .ToByReferenceSignatureHandle(_metadataReader) - .GetByReferenceSignature(_metadataReader) - .Type); - } - return false; - - case TypeModifierKind.Pointer: - if (typeHandle.HandleType == HandleType.PointerSignature) - { - return CompareTypeSigWithType(ref parser, moduleHandle, typeHandle - .ToPointerSignatureHandle(_metadataReader) - .GetPointerSignature(_metadataReader) - .Type); - } - return false; - - default: - Debug.Assert(null == "invalid type modifier kind"); - return false; - } - } - - case TypeSignatureKind.Variable: - { - bool isMethodVar = (data & 0x1) == 1; - uint index = data >> 1; - - if (isMethodVar) - { - if (typeHandle.HandleType == HandleType.MethodTypeVariableSignature) - { - return index == typeHandle - .ToMethodTypeVariableSignatureHandle(_metadataReader) - .GetMethodTypeVariableSignature(_metadataReader) - .Number; - } - } - else - { - if (typeHandle.HandleType == HandleType.TypeVariableSignature) - { - return index == typeHandle - .ToTypeVariableSignatureHandle(_metadataReader) - .GetTypeVariableSignature(_metadataReader) - .Number; - } - } - - return false; - } - - case TypeSignatureKind.MultiDimArray: - { - if (typeHandle.HandleType != HandleType.ArraySignature) - { - return false; - } - - ArraySignature sig = typeHandle - .ToArraySignatureHandle(_metadataReader) - .GetArraySignature(_metadataReader); - - if (data != sig.Rank) - return false; - - if (!CompareTypeSigWithType(ref parser, moduleHandle, sig.ElementType)) - return false; - - uint boundCount1 = parser.GetUnsigned(); - for (uint i = 0; i < boundCount1; i++) - { - parser.GetUnsigned(); - } - - uint lowerBoundCount1 = parser.GetUnsigned(); - - for (uint i = 0; i < lowerBoundCount1; i++) - { - parser.GetUnsigned(); - } - break; - } - - case TypeSignatureKind.FunctionPointer: - { - // callingConvention is in data - uint argCount1 = parser.GetUnsigned(); - - for (uint i = 0; i < argCount1; i++) - { - if (!CompareTypeSigWithType(ref parser, moduleHandle, typeHandle)) - return false; - } - return false; - } - - case TypeSignatureKind.Instantiation: - { - if (typeHandle.HandleType != HandleType.TypeInstantiationSignature) - { - return false; - } - - TypeInstantiationSignature sig = typeHandle - .ToTypeInstantiationSignatureHandle(_metadataReader) - .GetTypeInstantiationSignature(_metadataReader); - - if (!CompareTypeSigWithType(ref parser, moduleHandle, sig.GenericType)) - { - return false; - } - - uint genericArgIndex = 0; - foreach (Handle genericArgumentTypeHandle in sig.GenericTypeArguments) - { - if (genericArgIndex >= data) - { - // The metadata generic has more parameters than the native layour - return false; - } - if (!CompareTypeSigWithType(ref parser, moduleHandle, genericArgumentTypeHandle)) - { - return false; - } - genericArgIndex++; - } - // Make sure all generic parameters have been matched - return genericArgIndex == data; - } - - case TypeSignatureKind.BuiltIn: - case TypeSignatureKind.External: - { - RuntimeTypeHandle type2; - switch (typeHandle.HandleType) - { - case HandleType.TypeDefinition: - if (!TypeLoaderEnvironment.TryGetNamedTypeForMetadata( - new QTypeDefinition(_metadataReader, typeHandle.ToTypeDefinitionHandle(_metadataReader)), out type2)) - { - return false; - } - break; - - default: - return false; - } - - RuntimeTypeHandle type1; - if (typeSignatureKind == TypeSignatureKind.External) - { - type1 = SigParsing.GetTypeFromNativeLayoutSignature(ref parser, moduleHandle, startOffset); - } - else - { - type1 = ((Internal.TypeSystem.WellKnownType)data).GetRuntimeTypeHandle(); - } - - return type1.Equals(type2); - } - - default: - return false; - } - return true; - } - - private bool CompareCallingConventions(MethodCallingConvention callingConvention) - { - return (callingConvention.HasFlag(MethodCallingConvention.Static) == _isStatic) && - (callingConvention.HasFlag(MethodCallingConvention.Generic) == _isGeneric); - } - } -} diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.SignatureParsing.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.SignatureParsing.cs index bc9c208f472f..eef4c2e793f8 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.SignatureParsing.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.SignatureParsing.cs @@ -22,340 +22,18 @@ namespace Internal.Runtime.TypeLoader { public sealed partial class TypeLoaderEnvironment { - public bool CompareMethodSignatures(RuntimeSignature signature1, RuntimeSignature signature2) - { - if (signature1.IsNativeLayoutSignature && signature2.IsNativeLayoutSignature) - { - if (signature1.StructuralEquals(signature2)) - return true; - - NativeFormatModuleInfo module1 = ModuleList.Instance.GetModuleInfoByHandle(new TypeManagerHandle(signature1.ModuleHandle)); - NativeReader reader1 = GetNativeLayoutInfoReader(signature1); - NativeParser parser1 = new NativeParser(reader1, signature1.NativeLayoutOffset); - - NativeFormatModuleInfo module2 = ModuleList.Instance.GetModuleInfoByHandle(new TypeManagerHandle(signature2.ModuleHandle)); - NativeReader reader2 = GetNativeLayoutInfoReader(signature2); - NativeParser parser2 = new NativeParser(reader2, signature2.NativeLayoutOffset); - - return CompareMethodSigs(parser1, module1, parser2, module2); - } - else if (signature1.IsNativeLayoutSignature) - { - int token = signature2.Token; - MetadataReader metadataReader = ModuleList.Instance.GetMetadataReaderForModule(new TypeManagerHandle(signature2.ModuleHandle)); - - MethodSignatureComparer comparer = new MethodSignatureComparer(metadataReader, token.AsHandle().ToMethodHandle(metadataReader)); - return comparer.IsMatchingNativeLayoutMethodSignature(signature1); - } - else if (signature2.IsNativeLayoutSignature) - { - int token = signature1.Token; - MetadataReader metadataReader = ModuleList.Instance.GetMetadataReaderForModule(new TypeManagerHandle(signature1.ModuleHandle)); - - MethodSignatureComparer comparer = new MethodSignatureComparer(metadataReader, token.AsHandle().ToMethodHandle(metadataReader)); - return comparer.IsMatchingNativeLayoutMethodSignature(signature2); - } - else - { - // For now, RuntimeSignatures are only used to compare for method signature equality (along with their Name) - // So we can implement this with the simple equals check - if (signature1.Token != signature2.Token) - return false; - - if (signature1.ModuleHandle != signature2.ModuleHandle) - return false; - - return true; - } - } - public static bool IsStaticMethodSignature(MethodNameAndSignature signature) { - Debug.Assert(signature.Signature.IsNativeLayoutSignature); - NativeReader reader = GetNativeLayoutInfoReader(signature.Signature); - NativeParser parser = new NativeParser(reader, signature.Signature.NativeLayoutOffset); - - MethodCallingConvention callingConvention = (MethodCallingConvention)parser.GetUnsigned(); - return (callingConvention & MethodCallingConvention.Static) != 0; + var method = signature.Handle.GetMethod(signature.Reader); + return (method.Flags & MethodAttributes.Static) != 0; } public uint GetGenericArgumentCountFromMethodNameAndSignature(MethodNameAndSignature signature) { - if (signature.Signature.IsNativeLayoutSignature) - { - NativeReader reader = GetNativeLayoutInfoReader(signature.Signature); - NativeParser parser = new NativeParser(reader, signature.Signature.NativeLayoutOffset); - - return GetGenericArgCountFromSig(parser); - } - else - { - ModuleInfo module = signature.Signature.GetModuleInfo(); - NativeFormatModuleInfo nativeFormatModule = (NativeFormatModuleInfo)module; - var metadataReader = nativeFormatModule.MetadataReader; - var methodHandle = signature.Signature.Token.AsHandle().ToMethodHandle(metadataReader); - - var method = methodHandle.GetMethod(metadataReader); - var methodSignature = method.Signature.GetMethodSignature(metadataReader); - return checked((uint)methodSignature.GenericParameterCount); - } - } - - public bool TryGetMethodNameAndSignaturePointersFromNativeLayoutSignature(TypeManagerHandle module, uint methodNameAndSigToken, out RuntimeSignature methodNameSig, out RuntimeSignature methodSig) - { - methodNameSig = default(RuntimeSignature); - methodSig = default(RuntimeSignature); - - NativeReader reader = GetNativeLayoutInfoReader(module); - NativeParser parser = new NativeParser(reader, methodNameAndSigToken); - if (parser.IsNull) - return false; - - methodNameSig = RuntimeSignature.CreateFromNativeLayoutSignature(module, parser.Offset); - parser.SkipString(); // methodName - - // Signatures are indirected to through a relative offset so that we don't have to parse them - // when not comparing signatures (parsing them requires resolving types and is tremendously - // expensive). - NativeParser sigParser = parser.GetParserFromRelativeOffset(); - methodSig = RuntimeSignature.CreateFromNativeLayoutSignature(module, sigParser.Offset); - - return true; - } - - public MethodNameAndSignature GetMethodNameAndSignatureFromNativeLayoutOffset(TypeManagerHandle moduleHandle, uint nativeLayoutOffset) - { - NativeReader reader = GetNativeLayoutInfoReader(moduleHandle); - NativeParser parser = new NativeParser(reader, nativeLayoutOffset); - return GetMethodNameAndSignature(ref parser, moduleHandle, out _, out _); - } - - internal static MethodNameAndSignature GetMethodNameAndSignature(ref NativeParser parser, TypeManagerHandle moduleHandle, out RuntimeSignature methodNameSig, out RuntimeSignature methodSig) - { - methodNameSig = RuntimeSignature.CreateFromNativeLayoutSignature(moduleHandle, parser.Offset); - string methodName = parser.GetString(); - - // Signatures are indirected to through a relative offset so that we don't have to parse them - // when not comparing signatures (parsing them requires resolving types and is tremendously - // expensive). - NativeParser sigParser = parser.GetParserFromRelativeOffset(); - methodSig = RuntimeSignature.CreateFromNativeLayoutSignature(moduleHandle, sigParser.Offset); - - return new MethodNameAndSignature(methodName, methodSig); - } - - #region Private Helpers - - private static RuntimeTypeHandle GetExternalTypeHandle(NativeFormatModuleInfo moduleHandle, uint typeIndex) - { - Debug.Assert(moduleHandle != null); - - RuntimeTypeHandle result; - - TypeSystemContext context = TypeSystemContextFactory.Create(); - { - NativeLayoutInfoLoadContext nativeLayoutContext = new NativeLayoutInfoLoadContext(); - nativeLayoutContext._module = moduleHandle; - nativeLayoutContext._typeSystemContext = context; - - TypeDesc type = nativeLayoutContext.GetExternalType(typeIndex); - result = type.RuntimeTypeHandle; - } - TypeSystemContextFactory.Recycle(context); - - Debug.Assert(!result.IsNull()); - return result; - } - - private static uint GetGenericArgCountFromSig(NativeParser parser) - { - MethodCallingConvention callingConvention = (MethodCallingConvention)parser.GetUnsigned(); - - if ((callingConvention & MethodCallingConvention.Generic) == MethodCallingConvention.Generic) - { - return parser.GetUnsigned(); - } - else - { - return 0; - } - } - - private static bool CompareMethodSigs(NativeParser parser1, NativeFormatModuleInfo moduleHandle1, NativeParser parser2, NativeFormatModuleInfo moduleHandle2) - { - MethodCallingConvention callingConvention1 = (MethodCallingConvention)parser1.GetUnsigned(); - MethodCallingConvention callingConvention2 = (MethodCallingConvention)parser2.GetUnsigned(); - - if (callingConvention1 != callingConvention2) - return false; - - if ((callingConvention1 & MethodCallingConvention.Generic) == MethodCallingConvention.Generic) - { - if (parser1.GetUnsigned() != parser2.GetUnsigned()) - return false; - } - - uint parameterCount1 = parser1.GetUnsigned(); - uint parameterCount2 = parser2.GetUnsigned(); - if (parameterCount1 != parameterCount2) - return false; - - // Compare one extra parameter to account for the return type - for (uint i = 0; i <= parameterCount1; i++) - { - if (!CompareTypeSigs(ref parser1, moduleHandle1, ref parser2, moduleHandle2)) - return false; - } - - return true; - } - - private static bool CompareTypeSigs(ref NativeParser parser1, NativeFormatModuleInfo moduleHandle1, ref NativeParser parser2, NativeFormatModuleInfo moduleHandle2) - { - // startOffset lets us backtrack to the TypeSignatureKind for external types since the TypeLoader - // expects to read it in. - uint data1; - uint startOffset1 = parser1.Offset; - var typeSignatureKind1 = parser1.GetTypeSignatureKind(out data1); - - // If the parser is at a lookback type, get a new parser for it and recurse. - // Since we haven't read the element type of parser2 yet, we just pass it in unchanged - if (typeSignatureKind1 == TypeSignatureKind.Lookback) - { - NativeParser lookbackParser1 = parser1.GetLookbackParser(data1); - return CompareTypeSigs(ref lookbackParser1, moduleHandle1, ref parser2, moduleHandle2); - } - - uint data2; - var typeSignatureKind2 = parser2.GetTypeSignatureKind(out data2); - - // If parser2 is a lookback type, we need to rewind parser1 to its startOffset1 - // before recursing. - if (typeSignatureKind2 == TypeSignatureKind.Lookback) - { - NativeParser lookbackParser2 = parser2.GetLookbackParser(data2); - parser1 = new NativeParser(parser1.Reader, startOffset1); - return CompareTypeSigs(ref parser1, moduleHandle1, ref lookbackParser2, moduleHandle2); - } - - if (typeSignatureKind1 != typeSignatureKind2) - return false; - - switch (typeSignatureKind1) - { - case TypeSignatureKind.Lookback: - { - // Recursion above better have removed all lookbacks - Debug.Fail("Unexpected lookback type"); - return false; - } - - case TypeSignatureKind.Modifier: - { - // Ensure the modifier kind (vector, pointer, byref) is the same - if (data1 != data2) - return false; - return CompareTypeSigs(ref parser1, moduleHandle1, ref parser2, moduleHandle2); - } - - case TypeSignatureKind.Variable: - { - // variable index is in data - if (data1 != data2) - return false; - break; - } - - case TypeSignatureKind.MultiDimArray: - { - // rank is in data - if (data1 != data2) - return false; - - if (!CompareTypeSigs(ref parser1, moduleHandle1, ref parser2, moduleHandle2)) - return false; - - uint boundCount1 = parser1.GetUnsigned(); - uint boundCount2 = parser2.GetUnsigned(); - if (boundCount1 != boundCount2) - return false; - - for (uint i = 0; i < boundCount1; i++) - { - if (parser1.GetUnsigned() != parser2.GetUnsigned()) - return false; - } - - uint lowerBoundCount1 = parser1.GetUnsigned(); - uint lowerBoundCount2 = parser2.GetUnsigned(); - if (lowerBoundCount1 != lowerBoundCount2) - return false; - - for (uint i = 0; i < lowerBoundCount1; i++) - { - if (parser1.GetUnsigned() != parser2.GetUnsigned()) - return false; - } - break; - } - - case TypeSignatureKind.FunctionPointer: - { - // callingConvention is in data - if (data1 != data2) - return false; - uint argCount1 = parser1.GetUnsigned(); - uint argCount2 = parser2.GetUnsigned(); - if (argCount1 != argCount2) - return false; - for (uint i = 0; i < argCount1; i++) - { - if (!CompareTypeSigs(ref parser1, moduleHandle1, ref parser2, moduleHandle2)) - return false; - } - break; - } - - case TypeSignatureKind.Instantiation: - { - // Type parameter count is in data - if (data1 != data2) - return false; - - if (!CompareTypeSigs(ref parser1, moduleHandle1, ref parser2, moduleHandle2)) - return false; - - for (uint i = 0; i < data1; i++) - { - if (!CompareTypeSigs(ref parser1, moduleHandle1, ref parser2, moduleHandle2)) - return false; - } - break; - } - - case TypeSignatureKind.BuiltIn: - RuntimeTypeHandle typeHandle3 = ((WellKnownType)data1).GetRuntimeTypeHandle(); - RuntimeTypeHandle typeHandle4 = ((WellKnownType)data2).GetRuntimeTypeHandle(); - if (!typeHandle3.Equals(typeHandle4)) - return false; - - break; - - case TypeSignatureKind.External: - { - RuntimeTypeHandle typeHandle1 = GetExternalTypeHandle(moduleHandle1, data1); - RuntimeTypeHandle typeHandle2 = GetExternalTypeHandle(moduleHandle2, data2); - if (!typeHandle1.Equals(typeHandle2)) - return false; - - break; - } - - default: - return false; - } - return true; + var metadataReader = signature.Reader; + var method = signature.Handle.GetMethod(metadataReader); + var methodSignature = method.Signature.GetMethodSignature(metadataReader); + return checked((uint)methodSignature.GenericParameterCount); } - #endregion } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.cs index 22c07b2e837b..cb1b6462ab3f 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.cs @@ -48,19 +48,9 @@ public override IntPtr GenericLookupFromContextAndSignature(IntPtr context, IntP return TypeLoaderEnvironment.Instance.GenericLookupFromContextAndSignature(context, signature, out auxResult); } - public override bool GetRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgs) + public override RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, MethodHandle handle, RuntimeTypeHandle[] genericMethodArgs) { - return TypeLoaderEnvironment.Instance.TryGetRuntimeMethodHandleComponents(runtimeMethodHandle, out declaringTypeHandle, out nameAndSignature, out genericMethodArgs); - } - - public override RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, string methodName, RuntimeSignature methodSignature, RuntimeTypeHandle[] genericMethodArgs) - { - return TypeLoaderEnvironment.Instance.GetRuntimeMethodHandleForComponents(declaringTypeHandle, methodName, methodSignature, genericMethodArgs); - } - - public override bool CompareMethodSignatures(RuntimeSignature signature1, RuntimeSignature signature2) - { - return TypeLoaderEnvironment.Instance.CompareMethodSignatures(signature1, signature2); + return TypeLoaderEnvironment.Instance.GetRuntimeMethodHandleForComponents(declaringTypeHandle, handle, genericMethodArgs); } public override IntPtr TryGetDefaultConstructorForType(RuntimeTypeHandle runtimeTypeHandle) @@ -73,14 +63,9 @@ public override IntPtr ResolveGenericVirtualMethodTarget(RuntimeTypeHandle targe return TypeLoaderEnvironment.Instance.ResolveGenericVirtualMethodTarget(targetTypeHandle, declMethod); } - public override bool GetRuntimeFieldHandleComponents(RuntimeFieldHandle runtimeFieldHandle, out RuntimeTypeHandle declaringTypeHandle, out string fieldName) - { - return TypeLoaderEnvironment.Instance.TryGetRuntimeFieldHandleComponents(runtimeFieldHandle, out declaringTypeHandle, out fieldName); - } - - public override RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, string fieldName) + public override RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, FieldHandle handle) { - return TypeLoaderEnvironment.Instance.GetRuntimeFieldHandleForComponents(declaringTypeHandle, fieldName); + return TypeLoaderEnvironment.Instance.GetRuntimeFieldHandleForComponents(declaringTypeHandle, handle); } public override IntPtr ConvertUnboxingFunctionPointerToUnderlyingNonUnboxingPointer(IntPtr unboxingFunctionPointer, RuntimeTypeHandle declaringType) @@ -99,18 +84,6 @@ public override bool TryGetArrayTypeForElementType(RuntimeTypeHandle elementType } } - public static class RuntimeSignatureExtensions - { - public static IntPtr NativeLayoutSignature(this RuntimeSignature signature) - { - if (!signature.IsNativeLayoutSignature) - Environment.FailFast("Not a valid native layout signature"); - - NativeReader reader = TypeLoaderEnvironment.GetNativeLayoutInfoReader(signature); - return reader.OffsetToAddress(signature.NativeLayoutOffset); - } - } - public sealed partial class TypeLoaderEnvironment { [ThreadStatic] @@ -210,67 +183,6 @@ private bool EnsureTypeHandleForType(TypeDesc type) return !type.RuntimeTypeHandle.IsNull(); } - // - // Parse a native layout signature pointed to by "signature" in the executable image, optionally using - // "typeArgs" and "methodArgs" for generic type parameter substitution. The first field in "signature" - // must be an encoded type but any data beyond that is user-defined and returned in "remainingSignature" - // - internal bool GetTypeFromSignatureAndContext(RuntimeSignature signature, RuntimeTypeHandle[] typeArgs, RuntimeTypeHandle[] methodArgs, out RuntimeTypeHandle createdType, out RuntimeSignature remainingSignature) - { - NativeReader reader = GetNativeLayoutInfoReader(signature); - NativeParser parser = new NativeParser(reader, signature.NativeLayoutOffset); - - bool result = GetTypeFromSignatureAndContext(ref parser, new TypeManagerHandle(signature.ModuleHandle), typeArgs, methodArgs, out createdType); - - remainingSignature = RuntimeSignature.CreateFromNativeLayoutSignature(signature, parser.Offset); - - return result; - } - - internal bool GetTypeFromSignatureAndContext(ref NativeParser parser, TypeManagerHandle moduleHandle, RuntimeTypeHandle[] typeArgs, RuntimeTypeHandle[] methodArgs, out RuntimeTypeHandle createdType) - { - createdType = default(RuntimeTypeHandle); - TypeSystemContext context = TypeSystemContextFactory.Create(); - - TypeDesc parsedType = TryParseNativeSignatureWorker(context, moduleHandle, ref parser, typeArgs, methodArgs, false) as TypeDesc; - if (parsedType == null) - return false; - - if (!EnsureTypeHandleForType(parsedType)) - return false; - - createdType = parsedType.RuntimeTypeHandle; - - TypeSystemContextFactory.Recycle(context); - return true; - } - - // - // Parse a native layout signature pointed to by "signature" in the executable image, optionally using - // "typeArgs" and "methodArgs" for generic type parameter substitution. The first field in "signature" - // must be an encoded method but any data beyond that is user-defined and returned in "remainingSignature" - // - public MethodDesc GetMethodFromSignatureAndContext(TypeSystemContext context, RuntimeSignature signature, RuntimeTypeHandle[] typeArgs, RuntimeTypeHandle[] methodArgs, out RuntimeSignature remainingSignature) - { - NativeReader reader = GetNativeLayoutInfoReader(signature); - NativeParser parser = new NativeParser(reader, signature.NativeLayoutOffset); - - MethodDesc result = TryParseNativeSignatureWorker(context, new TypeManagerHandle(signature.ModuleHandle), ref parser, typeArgs, methodArgs, true) as MethodDesc; - - remainingSignature = RuntimeSignature.CreateFromNativeLayoutSignature(signature, parser.Offset); - - return result; - } - - // - // Returns the native layout info reader - // - internal static unsafe NativeReader GetNativeLayoutInfoReader(RuntimeSignature signature) - { - Debug.Assert(signature.IsNativeLayoutSignature); - return GetNativeLayoutInfoReader(new TypeManagerHandle(signature.ModuleHandle)); - } - // // Returns the native layout info reader // @@ -463,23 +375,6 @@ public int GetCanonicalHashCode(RuntimeTypeHandle typeHandle, CanonicalFormKind return hashCode; } - private static object TryParseNativeSignatureWorker(TypeSystemContext typeSystemContext, TypeManagerHandle moduleHandle, ref NativeParser parser, RuntimeTypeHandle[] typeGenericArgumentHandles, RuntimeTypeHandle[] methodGenericArgumentHandles, bool isMethodSignature) - { - Instantiation typeGenericArguments = typeSystemContext.ResolveRuntimeTypeHandles(typeGenericArgumentHandles ?? Array.Empty()); - Instantiation methodGenericArguments = typeSystemContext.ResolveRuntimeTypeHandles(methodGenericArgumentHandles ?? Array.Empty()); - - NativeLayoutInfoLoadContext nativeLayoutContext = new NativeLayoutInfoLoadContext(); - nativeLayoutContext._module = ModuleList.Instance.GetModuleInfoByHandle(moduleHandle); - nativeLayoutContext._typeSystemContext = typeSystemContext; - nativeLayoutContext._typeArgumentHandles = typeGenericArguments; - nativeLayoutContext._methodArgumentHandles = methodGenericArguments; - - if (isMethodSignature) - return nativeLayoutContext.GetMethod(ref parser); - else - return nativeLayoutContext.GetType(ref parser); - } - public bool TryGetGenericMethodDictionaryForComponents(RuntimeTypeHandle declaringTypeHandle, RuntimeTypeHandle[] genericMethodArgHandles, MethodNameAndSignature nameAndSignature, out IntPtr methodDictionary) { TypeSystemContext context = TypeSystemContextFactory.Create(); diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderTypeSystemContext.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderTypeSystemContext.cs index fb45055e11db..00736f8178e9 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderTypeSystemContext.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderTypeSystemContext.cs @@ -10,7 +10,7 @@ namespace Internal.Runtime.TypeLoader { /// /// TypeSystemContext that can interfact with the - /// Redhawk runtime type system and native metadata + /// NativeAOT runtime type system and native metadata /// public partial class TypeLoaderTypeSystemContext : TypeSystemContext { diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs index ab3583c38d14..16eda3167983 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs @@ -16,7 +16,7 @@ public static class TypeSystemContextFactory { // Cache the most recent instance of TypeSystemContext in a weak handle, and reuse it if possible // This allows us to avoid recreating the type resolution context again and again, but still allows it to go away once the types are no longer being built - private static GCHandle s_cachedContext = GCHandle.Alloc(null, GCHandleType.Weak); + private static WeakGCHandle s_cachedContext = new WeakGCHandle(null); private static readonly Lock s_lock = new Lock(useTrivialWaits: true); @@ -24,10 +24,9 @@ public static TypeSystemContext Create() { using (s_lock.EnterScope()) { - TypeSystemContext context = (TypeSystemContext)s_cachedContext.Target; - if (context != null) + if (s_cachedContext.TryGetTarget(out TypeSystemContext? context)) { - s_cachedContext.Target = null; + s_cachedContext.SetTarget(null); return context; } } @@ -63,7 +62,7 @@ public static void Recycle(TypeSystemContext context) context.FlushTypeBuilderStates(); // No lock needed here - the reference assignment is atomic - s_cachedContext.Target = context; + s_cachedContext.SetTarget(context); } } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/CanonTypes.Runtime.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/CanonTypes.Runtime.cs index 05f4d6829bc5..b8c4d8b3e9ba 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/CanonTypes.Runtime.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/CanonTypes.Runtime.cs @@ -15,15 +15,7 @@ internal partial class CanonType { partial void Initialize() { - SetRuntimeTypeHandleUnsafe(RuntimeAugments.GetCanonType(CanonTypeKind.NormalCanon)); - } - } - - internal partial class UniversalCanonType - { - partial void Initialize() - { - SetRuntimeTypeHandleUnsafe(RuntimeAugments.GetCanonType(CanonTypeKind.UniversalCanon)); + SetRuntimeTypeHandleUnsafe(RuntimeAugments.GetCanonType()); } } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/RuntimeMethodDesc.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/RuntimeMethodDesc.cs index 042b1957e22b..e07a286b1bb2 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/RuntimeMethodDesc.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/RuntimeMethodDesc.cs @@ -10,7 +10,7 @@ namespace Internal.TypeSystem.NoMetadata { /// - /// Represents a method within the Redhawk runtime + /// Represents a method within the NativeAOT runtime /// internal sealed partial class RuntimeMethodDesc : NoMetadataMethodDesc { @@ -93,7 +93,7 @@ public override string Name { get { - return _nameAndSignature.Name; + return NameAndSignature.GetName(); } } @@ -163,7 +163,7 @@ public override bool HasCustomAttribute(string attributeNamespace, string attrib public override string ToString() { - string result = OwningType.ToString() + ".Method(" + NameAndSignature.Name + ")"; + string result = OwningType.ToString() + ".Method(" + Name + ")"; return result; } #endif diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeDesc.Runtime.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeDesc.Runtime.cs index 96c8c5ea2ac2..295d5b9a4801 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeDesc.Runtime.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeDesc.Runtime.cs @@ -219,22 +219,10 @@ internal static TypeDesc ComputeTemplate(TypeBuilderState state, bool templateRe return templateType; } - internal bool IsTemplateUniversal() - { - TypeDesc templateType = ComputeTemplate(false); - if (templateType == null) - return false; - else - return templateType.IsCanonicalSubtype(CanonicalFormKind.Universal); - } - internal bool IsTemplateCanonical() { TypeDesc templateType = ComputeTemplate(false); - if (templateType == null) - return false; - else - return !templateType.IsCanonicalSubtype(CanonicalFormKind.Universal); + return templateType != null; } } } diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeSystemContext.Runtime.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeSystemContext.Runtime.cs index 1e4b88855222..6391cf78abaa 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeSystemContext.Runtime.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeSystemContext.Runtime.cs @@ -188,10 +188,6 @@ public TypeDesc ResolveRuntimeTypeHandle(RuntimeTypeHandle rtth) { returnedType = CanonType; } - else if (rtth.Equals(UniversalCanonType.RuntimeTypeHandle)) - { - returnedType = UniversalCanonType; - } else if (RuntimeAugments.IsGenericTypeDefinition(rtth)) { unsafe @@ -335,7 +331,7 @@ public RuntimeMethodKey(bool unboxingStub, DefType owningType, MethodNameAndSign _owningType = owningType; _methodNameAndSignature = nameAndSignature; - _hashCode = TypeHashingAlgorithms.ComputeMethodHashCode(owningType.GetHashCode(), TypeHashingAlgorithms.ComputeNameHashCode(nameAndSignature.Name)); + _hashCode = TypeHashingAlgorithms.ComputeMethodHashCode(owningType.GetHashCode(), TypeHashingAlgorithms.ComputeNameHashCode(nameAndSignature.GetName())); } public class RuntimeMethodKeyHashtable : LockFreeReaderHashtable diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/System.Private.TypeLoader.csproj b/src/coreclr/nativeaot/System.Private.TypeLoader/src/System.Private.TypeLoader.csproj index 09881516e295..4ed2c0fec251 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/System.Private.TypeLoader.csproj +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/System.Private.TypeLoader.csproj @@ -4,8 +4,6 @@ TYPE_LOADER_IMPLEMENTATION;$(DefineConstants) TYPE_LOADER_TRACE;$(DefineConstants) GVM_RESOLUTION_TRACE;$(DefineConstants) - CCCONVERTER_TRACE;$(DefineConstants) - GENERICS_FORCE_USG;$(DefineConstants) false @@ -241,7 +239,6 @@ - diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/InitializeFinalizerThread.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/InitializeFinalizerThread.cs deleted file mode 100644 index 08ca09dd867b..000000000000 --- a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/InitializeFinalizerThread.cs +++ /dev/null @@ -1,16 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Threading; - -namespace System.Runtime -{ - internal static class FinalizerInitRunner - { - // Here, we are subscribing to a callback from the runtime. This callback is made from the finalizer - // thread before any objects are finalized. - public static void DoInitialize() - { - } - } -} diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs index 52d1277f0bd0..026f0b8d109c 100644 --- a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs +++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs @@ -10,7 +10,7 @@ namespace System.Runtime { // CONTRACT with Runtime - // This class lists all the static methods that the redhawk runtime exports to a class library + // This class lists all the static methods that the NativeAOT runtime exports to a class library // These are not expected to change much but are needed by the class library to implement its functionality // // The contents of this file can be modified if needed by the class library @@ -80,6 +80,13 @@ internal static IntPtr RhGetModuleSection(TypeManagerHandle module, ReadyToRunSe [RuntimeImport(RuntimeLibrary, "RhNewArray")] private static extern unsafe Array RhNewArray(MethodTable* pEEType, int length); + [MethodImpl(MethodImplOptions.InternalCall)] + [RuntimeImport(RuntimeLibrary, "RhNewString")] + internal static extern unsafe string RhNewString(MethodTable* pEEType, int length); + + [DllImport(RuntimeLibrary)] + internal static extern unsafe void RhAllocateNewArray(MethodTable* pArrayEEType, uint numElements, uint flags, void* pResult); + [DllImport(RuntimeLibrary)] internal static extern unsafe void RhAllocateNewObject(IntPtr pEEType, uint flags, void* pResult); diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj b/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj index 9560f535e729..de37e12fe8ce 100644 --- a/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj +++ b/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj @@ -22,8 +22,6 @@ INPLACE_RUNTIME;$(DefineConstants) - FEATURE_64BIT_ALIGNMENT;$(DefineConstants) - FEATURE_64BIT_ALIGNMENT;$(DefineConstants) $(ArtifactsObjDir)\coreclr\$(TargetOS).$(TargetArchitecture).$(CoreCLRConfiguration) $(IntermediatesDir)\ide @@ -68,9 +66,6 @@ Runtime.Base\src\System\Runtime\TypeCast.cs - - Runtime.Base\src\RhBaseName.cs - Common\TransitionBlock.cs @@ -236,7 +231,6 @@ - diff --git a/src/coreclr/nativeaot/docs/README.md b/src/coreclr/nativeaot/docs/README.md index 2030f2256caf..74002dda6065 100644 --- a/src/coreclr/nativeaot/docs/README.md +++ b/src/coreclr/nativeaot/docs/README.md @@ -5,7 +5,6 @@ - [Building native AOT apps in containers](containers.md) - [Debugging applications](debugging.md) - [Optimizing applications](optimizing.md) -- [Reflection In AOT](reflection-in-aot-mode.md) - [Troubleshooting](troubleshooting.md) - [RD.xml Documentation](rd-xml-format.md) - [Using Native AOT on Android-Bionic](android-bionic.md) diff --git a/src/coreclr/nativeaot/docs/compiling.md b/src/coreclr/nativeaot/docs/compiling.md index f10238fc8bb8..818d2875f71a 100644 --- a/src/coreclr/nativeaot/docs/compiling.md +++ b/src/coreclr/nativeaot/docs/compiling.md @@ -74,9 +74,29 @@ You can use this feature by adding the `StaticICULinking` property to your proje ```xml true + + + /usr/share/icu/74.2/icudt74l.dat ``` +> [!NOTE] +> Some distros, such as Alpine and Gentoo, currently package ICU data as a `icudt*.dat` archive, +> while others, like Ubuntu, do not. +> To use `EmbedIcuDataPath` on a distro that does not provide the `.dat` file, +> you may need to build ICU with `--with-data-packaging=archive` to generate it. +> See https://unicode-org.github.io/icu/userguide/icu_data#building-and-linking-against-icu-data. +> ```sh +> # e.g. to obtain icudt*.dat on Ubuntu +> $ curl -sSL https://github.com/unicode-org/icu/releases/download/release-74-2/icu4c-74_2-src.tgz | tar xzf - +> $ cd icu/source +> $ ./configure --with-data-packaging=archive --enable-static --disable-shared --disable-samples +> $ make -j +> $ find . -path *out/* -name icudt*.dat -exec echo $(pwd)/{} \; +> ``` + This feature is only supported on Linux. This feature is not supported when crosscompiling. License (Unicode): https://github.com/unicode-org/icu/blob/main/icu4c/LICENSE diff --git a/src/coreclr/nativeaot/docs/optimizing.md b/src/coreclr/nativeaot/docs/optimizing.md index 7d790e99cc32..755fcc2710ca 100644 --- a/src/coreclr/nativeaot/docs/optimizing.md +++ b/src/coreclr/nativeaot/docs/optimizing.md @@ -6,4 +6,4 @@ The rest of the document talks about options that exist, but their names and pur ## Options related to code generation * ``: By default, the compiler targets the minimum instruction set supported by the target OS and architecture. This option allows targeting newer instruction sets for better performance. The native binary will require the instruction sets to be supported by the hardware in order to run. For example, `avx2,bmi2,fma,pclmul,popcnt,aes` will produce binary that takes advantage of instruction sets that are typically present on current Intel and AMD processors. `native` will produce a binary that uses instructions that currently running CPU supports (no cross-compilation support). Run `ilc --help` for the full list of available instruction sets. `ilc` can be executed from the NativeAOT package in your local nuget cache e.g. `%USERPROFILE%\.nuget\packages\runtime.win-x64.microsoft.dotnet.ilcompiler\8.0.0-...\tools\ilc.exe` on Windows or `~/.nuget/packages/runtime.linux-arm64.microsoft.dotnet.ilcompiler/8.0.0-.../tools/ilc` on Linux. -* ``: By default, the compiler targets the a `Vector` size of `16` or `32` bytes, depending on the underlying instruction sets supported. This option allows specifying a different maximum bit width. For example, if by default on x64 hardware `Vector` will be 16-bytes. However, if `AVX2` is targeted then `Vector` will automatically grow to be 32-bytes instead, setting `128` would keep the size as 16-bytes. Alternatively, even if `AVX512F` is targeted then by default `Vector` will not grow larger than 32-bytes, setting `512` would allow it to grow to 64-bytes. +* ``: By default, the compiler targets a `Vector` size of `16` or `32` bytes, depending on the underlying instruction sets supported. This option allows specifying a different maximum bit width. For example, by default on x64 hardware `Vector` will be 16-bytes, however, if `AVX2` is targeted then `Vector` will automatically grow to be 32-bytes instead. Setting `128` would keep the size as 16-bytes. Alternatively, even if `AVX512F` is targeted then by default `Vector` will not grow larger than 32-bytes, setting `512` would allow it to grow to 64-bytes. diff --git a/src/coreclr/nativeaot/nativeaot.sln b/src/coreclr/nativeaot/nativeaot.sln deleted file mode 100644 index 30c0b1ec2c84..000000000000 --- a/src/coreclr/nativeaot/nativeaot.sln +++ /dev/null @@ -1,559 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.11.35017.193 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib", "System.Private.CoreLib\src\System.Private.CoreLib.csproj", "{E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.Reflection.Execution", "System.Private.Reflection.Execution\src\System.Private.Reflection.Execution.csproj", "{7498DD7C-76C1-4912-AF72-DA84E05B568F}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.StackTraceMetadata", "System.Private.StackTraceMetadata\src\System.Private.StackTraceMetadata.csproj", "{33CAE331-16EE-443C-A0CC-4337B94A02AD}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.TypeLoader", "System.Private.TypeLoader\src\System.Private.TypeLoader.csproj", "{3E43ACA2-073E-4A66-BA9C-417C5F83D430}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Test.CoreLib", "Test.CoreLib\src\Test.CoreLib.csproj", "{C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}" -EndProject -Project("{D954291E-2A0B-460D-934E-DC6B0785DB48}") = "System.Private.CoreLib.Shared", "..\..\libraries\System.Private.CoreLib\src\System.Private.CoreLib.Shared.shproj", "{977524B8-92D8-4DFC-91E4-11A0582B81BF}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{2E156392-D514-42DE-9532-42D2A7F0B9FB}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{EC8F472E-5375-4962-9963-8B11F2924C2B}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{DF7F14A3-3EB8-4B2C-AE04-678876997BA4}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{B10E7FF9-AA48-4AF0-8242-EF5ED68846AD}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{1BE6A0E4-9096-44DF-8C8C-33956BD0050F}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{5FB0FB7F-BB7A-4E16-B834-82413FFA7C9D}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{361ABA16-E054-4160-9998-A06BF377A82F}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib", "..\..\libraries\System.Private.CoreLib\ref\System.Private.CoreLib.csproj", "{EEB35F24-4878-4417-BB54-86AB1206E8D3}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Interop.SourceGeneration", "..\..\libraries\System.Runtime.InteropServices\gen\Microsoft.Interop.SourceGeneration\Microsoft.Interop.SourceGeneration.csproj", "{73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LibraryImportGenerator", "..\..\libraries\System.Runtime.InteropServices\gen\LibraryImportGenerator\LibraryImportGenerator.csproj", "{795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib.Generators", "..\..\libraries\System.Private.CoreLib\gen\System.Private.CoreLib.Generators.csproj", "{7B1980B2-F6AB-423A-A75F-41190570DE4D}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILLink.CodeFixProvider", "..\..\tools\illink\src\ILLink.CodeFix\ILLink.CodeFixProvider.csproj", "{735FEA0D-852A-47BD-B1FE-6B4D9C83F217}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILLink.RoslynAnalyzer", "..\..\tools\illink\src\ILLink.RoslynAnalyzer\ILLink.RoslynAnalyzer.csproj", "{D20FDE50-2841-46E9-B31A-6B145BFCB09E}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mono.Linker", "..\..\tools\illink\src\linker\ref\Mono.Linker.csproj", "{5B302881-1226-4A80-BDC1-EE70FB283949}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mono.Linker", "..\..\tools\illink\src\linker\Mono.Linker.csproj", "{3E67A258-6F80-435C-B83C-61BE17B93155}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILLink.Tasks", "..\..\tools\illink\src\ILLink.Tasks\ILLink.Tasks.csproj", "{8C7DAB59-A791-42BC-BBA4-194BF1293841}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.DependencyAnalysisFramework", "..\tools\aot\ILCompiler.DependencyAnalysisFramework\ILCompiler.DependencyAnalysisFramework.csproj", "{431A4C42-805F-40CD-9EEE-F794B6AF9B59}" -EndProject -Project("{D954291E-2A0B-460D-934E-DC6B0785DB48}") = "ILLink.Shared", "..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.shproj", "{FF598E93-8E9E-4091-9F50-61A7572663AE}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Checked|Any CPU = Checked|Any CPU - Checked|arm = Checked|arm - Checked|arm64 = Checked|arm64 - Checked|x64 = Checked|x64 - Checked|x86 = Checked|x86 - Debug|Any CPU = Debug|Any CPU - Debug|arm = Debug|arm - Debug|arm64 = Debug|arm64 - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|Any CPU = Release|Any CPU - Release|arm = Release|arm - Release|arm64 = Release|arm64 - Release|x64 = Release|x64 - Release|x86 = Release|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|Any CPU.ActiveCfg = Checked|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|Any CPU.Build.0 = Checked|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|arm.ActiveCfg = Checked|arm - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|arm.Build.0 = Checked|arm - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|arm64.ActiveCfg = Checked|arm64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|arm64.Build.0 = Checked|arm64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|x64.ActiveCfg = Checked|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|x64.Build.0 = Checked|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|x86.ActiveCfg = Checked|x86 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Checked|x86.Build.0 = Checked|x86 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|Any CPU.ActiveCfg = Debug|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|Any CPU.Build.0 = Debug|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|arm.ActiveCfg = Debug|arm - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|arm.Build.0 = Debug|arm - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|arm64.ActiveCfg = Debug|arm64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|arm64.Build.0 = Debug|arm64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|x64.ActiveCfg = Debug|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|x64.Build.0 = Debug|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|x86.ActiveCfg = Debug|x86 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Debug|x86.Build.0 = Debug|x86 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|Any CPU.ActiveCfg = Release|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|Any CPU.Build.0 = Release|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|arm.ActiveCfg = Release|arm - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|arm.Build.0 = Release|arm - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|arm64.ActiveCfg = Release|arm64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|arm64.Build.0 = Release|arm64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|x64.ActiveCfg = Release|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|x64.Build.0 = Release|x64 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|x86.ActiveCfg = Release|x86 - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6}.Release|x86.Build.0 = Release|x86 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|Any CPU.ActiveCfg = Checked|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|Any CPU.Build.0 = Checked|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|arm.ActiveCfg = Checked|arm - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|arm.Build.0 = Checked|arm - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|arm64.ActiveCfg = Checked|arm64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|arm64.Build.0 = Checked|arm64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|x64.ActiveCfg = Checked|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|x64.Build.0 = Checked|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|x86.ActiveCfg = Checked|x86 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Checked|x86.Build.0 = Checked|x86 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|Any CPU.ActiveCfg = Debug|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|Any CPU.Build.0 = Debug|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|arm.ActiveCfg = Debug|arm - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|arm.Build.0 = Debug|arm - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|arm64.ActiveCfg = Debug|arm64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|arm64.Build.0 = Debug|arm64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|x64.ActiveCfg = Debug|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|x64.Build.0 = Debug|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|x86.ActiveCfg = Debug|x86 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Debug|x86.Build.0 = Debug|x86 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|Any CPU.ActiveCfg = Release|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|Any CPU.Build.0 = Release|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|arm.ActiveCfg = Release|arm - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|arm.Build.0 = Release|arm - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|arm64.ActiveCfg = Release|arm64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|arm64.Build.0 = Release|arm64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|x64.ActiveCfg = Release|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|x64.Build.0 = Release|x64 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|x86.ActiveCfg = Release|x86 - {7498DD7C-76C1-4912-AF72-DA84E05B568F}.Release|x86.Build.0 = Release|x86 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|Any CPU.ActiveCfg = Checked|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|Any CPU.Build.0 = Checked|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|arm.ActiveCfg = Checked|arm - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|arm.Build.0 = Checked|arm - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|arm64.ActiveCfg = Checked|arm64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|arm64.Build.0 = Checked|arm64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|x64.ActiveCfg = Checked|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|x64.Build.0 = Checked|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|x86.ActiveCfg = Checked|x86 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Checked|x86.Build.0 = Checked|x86 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|Any CPU.ActiveCfg = Debug|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|Any CPU.Build.0 = Debug|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|arm.ActiveCfg = Debug|arm - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|arm.Build.0 = Debug|arm - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|arm64.ActiveCfg = Debug|arm64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|arm64.Build.0 = Debug|arm64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|x64.ActiveCfg = Debug|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|x64.Build.0 = Debug|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|x86.ActiveCfg = Debug|x86 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Debug|x86.Build.0 = Debug|x86 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|Any CPU.ActiveCfg = Release|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|Any CPU.Build.0 = Release|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|arm.ActiveCfg = Release|arm - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|arm.Build.0 = Release|arm - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|arm64.ActiveCfg = Release|arm64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|arm64.Build.0 = Release|arm64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|x64.ActiveCfg = Release|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|x64.Build.0 = Release|x64 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|x86.ActiveCfg = Release|x86 - {33CAE331-16EE-443C-A0CC-4337B94A02AD}.Release|x86.Build.0 = Release|x86 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|Any CPU.ActiveCfg = Checked|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|Any CPU.Build.0 = Checked|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|arm.ActiveCfg = Checked|arm - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|arm.Build.0 = Checked|arm - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|arm64.ActiveCfg = Checked|arm64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|arm64.Build.0 = Checked|arm64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|x64.ActiveCfg = Checked|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|x64.Build.0 = Checked|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|x86.ActiveCfg = Checked|x86 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Checked|x86.Build.0 = Checked|x86 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|Any CPU.ActiveCfg = Debug|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|Any CPU.Build.0 = Debug|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|arm.ActiveCfg = Debug|arm - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|arm.Build.0 = Debug|arm - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|arm64.ActiveCfg = Debug|arm64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|arm64.Build.0 = Debug|arm64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|x64.ActiveCfg = Debug|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|x64.Build.0 = Debug|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|x86.ActiveCfg = Debug|x86 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Debug|x86.Build.0 = Debug|x86 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|Any CPU.ActiveCfg = Release|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|Any CPU.Build.0 = Release|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|arm.ActiveCfg = Release|arm - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|arm.Build.0 = Release|arm - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|arm64.ActiveCfg = Release|arm64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|arm64.Build.0 = Release|arm64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|x64.ActiveCfg = Release|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|x64.Build.0 = Release|x64 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|x86.ActiveCfg = Release|x86 - {3E43ACA2-073E-4A66-BA9C-417C5F83D430}.Release|x86.Build.0 = Release|x86 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|Any CPU.ActiveCfg = Checked|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|Any CPU.Build.0 = Checked|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|arm.ActiveCfg = Checked|arm - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|arm.Build.0 = Checked|arm - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|arm64.ActiveCfg = Checked|arm64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|arm64.Build.0 = Checked|arm64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|x64.ActiveCfg = Checked|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|x64.Build.0 = Checked|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|x86.ActiveCfg = Checked|x86 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Checked|x86.Build.0 = Checked|x86 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|Any CPU.ActiveCfg = Debug|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|Any CPU.Build.0 = Debug|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|arm.ActiveCfg = Debug|arm - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|arm.Build.0 = Debug|arm - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|arm64.ActiveCfg = Debug|arm64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|arm64.Build.0 = Debug|arm64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|x64.ActiveCfg = Debug|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|x64.Build.0 = Debug|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|x86.ActiveCfg = Debug|x86 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Debug|x86.Build.0 = Debug|x86 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|Any CPU.ActiveCfg = Release|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|Any CPU.Build.0 = Release|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|arm.ActiveCfg = Release|arm - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|arm.Build.0 = Release|arm - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|arm64.ActiveCfg = Release|arm64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|arm64.Build.0 = Release|arm64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|x64.ActiveCfg = Release|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|x64.Build.0 = Release|x64 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|x86.ActiveCfg = Release|x86 - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51}.Release|x86.Build.0 = Release|x86 - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|Any CPU.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|arm.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|arm.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|arm64.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|arm64.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|x64.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|x64.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|x86.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Checked|x86.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|Any CPU.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|arm.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|arm.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|arm64.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|arm64.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|x64.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|x64.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|x86.ActiveCfg = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Debug|x86.Build.0 = Debug|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|Any CPU.ActiveCfg = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|Any CPU.Build.0 = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|arm.ActiveCfg = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|arm.Build.0 = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|arm64.ActiveCfg = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|arm64.Build.0 = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|x64.ActiveCfg = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|x64.Build.0 = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|x86.ActiveCfg = Release|Any CPU - {EEB35F24-4878-4417-BB54-86AB1206E8D3}.Release|x86.Build.0 = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|Any CPU.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|arm.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|arm.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|arm64.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|arm64.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|x64.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|x64.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|x86.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Checked|x86.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|Any CPU.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|arm.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|arm.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|arm64.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|arm64.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|x64.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|x64.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|x86.ActiveCfg = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Debug|x86.Build.0 = Debug|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|Any CPU.ActiveCfg = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|Any CPU.Build.0 = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|arm.ActiveCfg = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|arm.Build.0 = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|arm64.ActiveCfg = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|arm64.Build.0 = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|x64.ActiveCfg = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|x64.Build.0 = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|x86.ActiveCfg = Release|Any CPU - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A}.Release|x86.Build.0 = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|Any CPU.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|arm.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|arm.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|arm64.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|arm64.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|x64.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|x64.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|x86.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Checked|x86.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|Any CPU.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|arm.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|arm.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|arm64.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|arm64.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|x64.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|x64.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|x86.ActiveCfg = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Debug|x86.Build.0 = Debug|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|Any CPU.ActiveCfg = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|Any CPU.Build.0 = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|arm.ActiveCfg = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|arm.Build.0 = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|arm64.ActiveCfg = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|arm64.Build.0 = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|x64.ActiveCfg = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|x64.Build.0 = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|x86.ActiveCfg = Release|Any CPU - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4}.Release|x86.Build.0 = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|Any CPU.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|arm.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|arm.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|arm64.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|arm64.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|x64.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|x64.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|x86.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Checked|x86.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|Any CPU.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|arm.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|arm.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|arm64.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|arm64.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|x64.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|x64.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|x86.ActiveCfg = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Debug|x86.Build.0 = Debug|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|Any CPU.ActiveCfg = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|Any CPU.Build.0 = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|arm.ActiveCfg = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|arm.Build.0 = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|arm64.ActiveCfg = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|arm64.Build.0 = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|x64.ActiveCfg = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|x64.Build.0 = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|x86.ActiveCfg = Release|Any CPU - {7B1980B2-F6AB-423A-A75F-41190570DE4D}.Release|x86.Build.0 = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|Any CPU.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|arm.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|arm.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|arm64.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|arm64.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|x64.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|x64.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|x86.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Checked|x86.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|Any CPU.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|arm.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|arm.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|arm64.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|arm64.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|x64.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|x64.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|x86.ActiveCfg = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Debug|x86.Build.0 = Debug|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|Any CPU.ActiveCfg = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|Any CPU.Build.0 = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|arm.ActiveCfg = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|arm.Build.0 = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|arm64.ActiveCfg = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|arm64.Build.0 = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|x64.ActiveCfg = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|x64.Build.0 = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|x86.ActiveCfg = Release|Any CPU - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217}.Release|x86.Build.0 = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|Any CPU.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|arm.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|arm.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|arm64.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|arm64.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|x64.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|x64.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|x86.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Checked|x86.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|Any CPU.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|arm.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|arm.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|arm64.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|arm64.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|x64.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|x64.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|x86.ActiveCfg = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Debug|x86.Build.0 = Debug|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|Any CPU.ActiveCfg = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|Any CPU.Build.0 = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|arm.ActiveCfg = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|arm.Build.0 = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|arm64.ActiveCfg = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|arm64.Build.0 = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|x64.ActiveCfg = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|x64.Build.0 = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|x86.ActiveCfg = Release|Any CPU - {D20FDE50-2841-46E9-B31A-6B145BFCB09E}.Release|x86.Build.0 = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|Any CPU.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|arm.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|arm.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|arm64.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|arm64.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|x64.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|x64.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|x86.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Checked|x86.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|Any CPU.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|arm.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|arm.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|arm64.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|arm64.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|x64.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|x64.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|x86.ActiveCfg = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Debug|x86.Build.0 = Debug|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|Any CPU.ActiveCfg = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|Any CPU.Build.0 = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|arm.ActiveCfg = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|arm.Build.0 = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|arm64.ActiveCfg = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|arm64.Build.0 = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|x64.ActiveCfg = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|x64.Build.0 = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|x86.ActiveCfg = Release|Any CPU - {5B302881-1226-4A80-BDC1-EE70FB283949}.Release|x86.Build.0 = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|Any CPU.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|arm.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|arm.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|arm64.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|arm64.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|x64.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|x64.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|x86.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Checked|x86.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|Any CPU.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|arm.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|arm.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|arm64.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|arm64.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|x64.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|x64.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|x86.ActiveCfg = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Debug|x86.Build.0 = Debug|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|Any CPU.ActiveCfg = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|Any CPU.Build.0 = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|arm.ActiveCfg = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|arm.Build.0 = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|arm64.ActiveCfg = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|arm64.Build.0 = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|x64.ActiveCfg = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|x64.Build.0 = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|x86.ActiveCfg = Release|Any CPU - {3E67A258-6F80-435C-B83C-61BE17B93155}.Release|x86.Build.0 = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|Any CPU.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|arm.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|arm.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|arm64.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|arm64.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|x64.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|x64.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|x86.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Checked|x86.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|Any CPU.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|arm.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|arm.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|arm64.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|arm64.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|x64.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|x64.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|x86.ActiveCfg = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Debug|x86.Build.0 = Debug|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|Any CPU.ActiveCfg = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|Any CPU.Build.0 = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|arm.ActiveCfg = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|arm.Build.0 = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|arm64.ActiveCfg = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|arm64.Build.0 = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|x64.ActiveCfg = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|x64.Build.0 = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|x86.ActiveCfg = Release|Any CPU - {8C7DAB59-A791-42BC-BBA4-194BF1293841}.Release|x86.Build.0 = Release|Any CPU - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|Any CPU.ActiveCfg = Checked|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|Any CPU.Build.0 = Checked|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|arm.ActiveCfg = Checked|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|arm.Build.0 = Checked|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|arm64.ActiveCfg = Checked|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|arm64.Build.0 = Checked|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|x64.ActiveCfg = Checked|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|x64.Build.0 = Checked|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|x86.ActiveCfg = Checked|x86 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Checked|x86.Build.0 = Checked|x86 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|Any CPU.ActiveCfg = Debug|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|Any CPU.Build.0 = Debug|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|arm.ActiveCfg = Debug|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|arm.Build.0 = Debug|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|arm64.ActiveCfg = Debug|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|arm64.Build.0 = Debug|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|x64.ActiveCfg = Debug|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|x64.Build.0 = Debug|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|x86.ActiveCfg = Debug|x86 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Debug|x86.Build.0 = Debug|x86 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|Any CPU.ActiveCfg = Release|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|Any CPU.Build.0 = Release|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|arm.ActiveCfg = Release|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|arm.Build.0 = Release|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|arm64.ActiveCfg = Release|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|arm64.Build.0 = Release|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|x64.ActiveCfg = Release|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|x64.Build.0 = Release|x64 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|x86.ActiveCfg = Release|x86 - {431A4C42-805F-40CD-9EEE-F794B6AF9B59}.Release|x86.Build.0 = Release|x86 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(NestedProjects) = preSolution - {E4BC768B-F97D-4A8F-9391-B65DF3EB47C6} = {EC8F472E-5375-4962-9963-8B11F2924C2B} - {7498DD7C-76C1-4912-AF72-DA84E05B568F} = {EC8F472E-5375-4962-9963-8B11F2924C2B} - {33CAE331-16EE-443C-A0CC-4337B94A02AD} = {EC8F472E-5375-4962-9963-8B11F2924C2B} - {3E43ACA2-073E-4A66-BA9C-417C5F83D430} = {EC8F472E-5375-4962-9963-8B11F2924C2B} - {C3371E09-E8A6-4F9E-B4CB-B1CE3F4FCC51} = {EC8F472E-5375-4962-9963-8B11F2924C2B} - {977524B8-92D8-4DFC-91E4-11A0582B81BF} = {EC8F472E-5375-4962-9963-8B11F2924C2B} - {1BE6A0E4-9096-44DF-8C8C-33956BD0050F} = {B10E7FF9-AA48-4AF0-8242-EF5ED68846AD} - {5FB0FB7F-BB7A-4E16-B834-82413FFA7C9D} = {B10E7FF9-AA48-4AF0-8242-EF5ED68846AD} - {361ABA16-E054-4160-9998-A06BF377A82F} = {B10E7FF9-AA48-4AF0-8242-EF5ED68846AD} - {EEB35F24-4878-4417-BB54-86AB1206E8D3} = {DF7F14A3-3EB8-4B2C-AE04-678876997BA4} - {73A3DE01-9045-45CC-9BB5-21B0D1ABB46A} = {2E156392-D514-42DE-9532-42D2A7F0B9FB} - {795E1D25-FEA8-4556-B23C-1EE6A5A1CBC4} = {2E156392-D514-42DE-9532-42D2A7F0B9FB} - {7B1980B2-F6AB-423A-A75F-41190570DE4D} = {2E156392-D514-42DE-9532-42D2A7F0B9FB} - {735FEA0D-852A-47BD-B1FE-6B4D9C83F217} = {1BE6A0E4-9096-44DF-8C8C-33956BD0050F} - {D20FDE50-2841-46E9-B31A-6B145BFCB09E} = {1BE6A0E4-9096-44DF-8C8C-33956BD0050F} - {5B302881-1226-4A80-BDC1-EE70FB283949} = {5FB0FB7F-BB7A-4E16-B834-82413FFA7C9D} - {3E67A258-6F80-435C-B83C-61BE17B93155} = {361ABA16-E054-4160-9998-A06BF377A82F} - {8C7DAB59-A791-42BC-BBA4-194BF1293841} = {361ABA16-E054-4160-9998-A06BF377A82F} - {431A4C42-805F-40CD-9EEE-F794B6AF9B59} = {361ABA16-E054-4160-9998-A06BF377A82F} - {FF598E93-8E9E-4091-9F50-61A7572663AE} = {361ABA16-E054-4160-9998-A06BF377A82F} - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {81B16D59-928B-49C1-839D-10E4747B0DC0} - EndGlobalSection - GlobalSection(SharedMSBuildProjectFiles) = preSolution - ..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{3e67a258-6f80-435c-b83c-61be17b93155}*SharedItemsImports = 5 - ..\..\libraries\System.Private.CoreLib\src\System.Private.CoreLib.Shared.projitems*{977524b8-92d8-4dfc-91e4-11a0582b81bf}*SharedItemsImports = 13 - ..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{d20fde50-2841-46e9-b31a-6b145bfcb09e}*SharedItemsImports = 5 - ..\..\libraries\System.Private.CoreLib\src\System.Private.CoreLib.Shared.projitems*{e4bc768b-f97d-4a8f-9391-b65df3eb47c6}*SharedItemsImports = 5 - ..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{ff598e93-8e9e-4091-9f50-61a7572663ae}*SharedItemsImports = 13 - EndGlobalSection -EndGlobal diff --git a/src/coreclr/nativeaot/nativeaot.slnx b/src/coreclr/nativeaot/nativeaot.slnx new file mode 100644 index 000000000000..6ae1ddfe776c --- /dev/null +++ b/src/coreclr/nativeaot/nativeaot.slnx @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/coreclr/nativeresources/CMakeLists.txt b/src/coreclr/nativeresources/CMakeLists.txt index 6959ca2497ad..7dba113d91ea 100644 --- a/src/coreclr/nativeresources/CMakeLists.txt +++ b/src/coreclr/nativeresources/CMakeLists.txt @@ -1,8 +1,13 @@ project(nativeresourcestring) +add_library_clr(nativeresourcestring_objects + OBJECT + resourcestring.cpp +) + add_library_clr(nativeresourcestring STATIC - resourcestring.cpp + $ ) install_clr (TARGETS nativeresourcestring DESTINATIONS lib) diff --git a/src/coreclr/nativeresources/processrc.ps1 b/src/coreclr/nativeresources/processrc.ps1 new file mode 100644 index 000000000000..8ff7559e85ea --- /dev/null +++ b/src/coreclr/nativeresources/processrc.ps1 @@ -0,0 +1,98 @@ +param ( + [string]$Filename, + [string]$Targetname +) + +function Evaluate-ComplexExpression { + param ( + [string]$expression + ) + + # Extract the components of the expression + if ($expression -match '\(0x([0-9A-Fa-f]+)\s*\+\s*\(\(\(0x([0-9A-Fa-f]+)\)\s*&\s*0x([0-9A-Fa-f]+)\)\)\)') { + $baseValue = [convert]::ToInt64($matches[1], 16) + $hexValue = [convert]::ToInt64($matches[2], 16) + $mask = [convert]::ToInt64($matches[3], 16) + + # Perform the bitwise AND operation + $maskedValue = $hexValue -band $mask + + # Add the base value to the masked value + $result = $baseValue + $maskedValue + + return $result + } else { + throw "$expression - Input string was not in the correct format" + } +} + +# Function to determine if the input is a simple hex value or a complex expression +function Evaluate-Input { + param ( + [string]$expr + ) + + # Regular expression for a simple hex value + $hexRegex = '^0x[0-9A-Fa-f]+$' + + # Regular expression for a complex expression + $complexRegex = '^\(0x[0-9A-Fa-f]+\s*\+\s*\(\(\(0x[0-9A-Fa-f]+\)\s*&\s*0x[0-9A-Fa-f]+\)\)\)$' + + if ($expr -match $hexRegex) { + # Input is a simple hex value + return [convert]::ToInt64($expr, 16) + } elseif ($expr -match $complexRegex) { + # Input is a complex expression + return Evaluate-ComplexExpression($expr) + } else { + Write-Host "Input string was not in the correct format" + return 0 + } +} + +$ArrayName = "nativeStringResourceArray_$Targetname" +$TableName = "nativeStringResourceTable_$Targetname" + +$InStringTable = $false +$InBeginEnd = $false +$ResourceArray = @{} + +Get-Content $Filename | ForEach-Object { + $line = $_.Trim() + if ($line -match "^STRINGTABLE\s*DISCARDABLE") { + $InStringTable = $true + } elseif ($line -eq "BEGIN") { + $InBeginEnd = $InStringTable + } elseif ($InBeginEnd -and $line -eq "END") { + $InBeginEnd = $false + $InStringTable = $false + } elseif ($InBeginEnd -and $line -notmatch "^\s*$") { + $id = $line -replace '\".*', '' -replace '\(HRESULT\)', '' -replace 'L', '' + $id = $id.Trim() + $id = Evaluate-Input($id) + + if ($line -match '"([^"]+)"') { + $content = $matches[1] + $ResourceArray[$id.ToString("x8")] = $content + } + } +} + +Write-Output "// Licensed to the .NET Foundation under one or more agreements." +Write-Output "// The .NET Foundation licenses this file to you under the MIT license." +Write-Output "//" +Write-Output "// This code was generated by processrc.ps1 and is not meant to be modified manually." +Write-Output "" +Write-Output "#include " +Write-Output "" +Write-Output "extern NativeStringResourceTable $TableName;" +Write-Output "const NativeStringResource $ArrayName[] = {" + +foreach ($id in $ResourceArray.Keys) { + $hexId = "{0:x8}" -f $id + Write-Output " {0x$hexId,`"$($ResourceArray[$id])`"}," +} + +Write-Output "};" +Write-Output "" +Write-Output "NativeStringResourceTable $TableName __attribute__((visibility(`"default`"))) = { $($ResourceArray.Count), $ArrayName };" diff --git a/src/coreclr/pal/CMakeLists.txt b/src/coreclr/pal/CMakeLists.txt index 9213941ba6da..01ddee53d35d 100644 --- a/src/coreclr/pal/CMakeLists.txt +++ b/src/coreclr/pal/CMakeLists.txt @@ -6,7 +6,12 @@ include_directories(${COREPAL_SOURCE_DIR}/inc) include_directories(${COREPAL_SOURCE_DIR}/src) include_directories(${COREPAL_SOURCE_DIR}/../inc) -add_compile_options(-fexceptions) +if (NOT CLR_CMAKE_TARGET_BROWSER) + add_compile_options(-fexceptions) +else() + add_compile_options(-fwasm-exceptions) + add_link_options(-fwasm-exceptions -sEXIT_RUNTIME=1) +endif() add_subdirectory(src) add_subdirectory(tests) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 86459bc0a6c0..8da66ba36248 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -100,7 +100,6 @@ extern bool g_arm64_atomics_present; /******************* ABI-specific glue *******************************/ #define MAX_PATH 260 -#define _MAX_PATH 260 #define _MAX_DRIVE 3 /* max. length of drive component */ #define _MAX_DIR 256 /* max. length of path component */ #define _MAX_FNAME 256 /* max. length of file name component */ @@ -111,9 +110,7 @@ extern bool g_arm64_atomics_present; #define MAX_PATH_FNAME MAX_PATH #define MAX_LONGPATH 1024 /* max. length of full pathname */ -#define MAXSHORT 0x7fff #define MAXLONG 0x7fffffff -#define MAXCHAR 0x7f #define MAXDWORD 0xffffffff // Sorting IDs. @@ -132,12 +129,6 @@ extern bool g_arm64_atomics_present; #define DECLSPEC_NORETURN PAL_NORETURN -#ifdef __clang_analyzer__ -#define ANALYZER_NORETURN __attribute((analyzer_noreturn)) -#else -#define ANALYZER_NORETURN -#endif - #define EMPTY_BASES_DECL #if !defined(_MSC_VER) || defined(SOURCE_FORMATTING) @@ -425,7 +416,6 @@ PAL_PerfJitDump_Finish(); #define MB_OKCANCEL 0x00000001L #define MB_ABORTRETRYIGNORE 0x00000002L -#define MB_ICONQUESTION 0x00000020L #define MB_ICONEXCLAMATION 0x00000030L #define MB_TASKMODAL 0x00002000L @@ -527,70 +517,6 @@ SearchPathW( #define SearchPath SearchPathW -PALIMPORT -BOOL -PALAPI -CopyFileW( - IN LPCWSTR lpExistingFileName, - IN LPCWSTR lpNewFileName, - IN BOOL bFailIfExists); - -#ifdef UNICODE -#define CopyFile CopyFileW -#else -#define CopyFile CopyFileA -#endif - -PALIMPORT -DWORD -PALAPI -GetFileAttributesW( - IN LPCWSTR lpFileName); - -#ifdef UNICODE -#define GetFileAttributes GetFileAttributesW -#else -#define GetFileAttributes GetFileAttributesA -#endif - -typedef enum _GET_FILEEX_INFO_LEVELS { - GetFileExInfoStandard -} GET_FILEEX_INFO_LEVELS; - -typedef enum _FINDEX_INFO_LEVELS { - FindExInfoStandard, - FindExInfoBasic, - FindExInfoMaxInfoLevel -} FINDEX_INFO_LEVELS; - -typedef enum _FINDEX_SEARCH_OPS { - FindExSearchNameMatch, - FindExSearchLimitToDirectories, - FindExSearchLimitToDevices, - FindExSearchMaxSearchOp -} FINDEX_SEARCH_OPS; - -typedef struct _WIN32_FILE_ATTRIBUTE_DATA { - DWORD dwFileAttributes; - FILETIME ftCreationTime; - FILETIME ftLastAccessTime; - FILETIME ftLastWriteTime; - DWORD nFileSizeHigh; - DWORD nFileSizeLow; -} WIN32_FILE_ATTRIBUTE_DATA, *LPWIN32_FILE_ATTRIBUTE_DATA; - -PALIMPORT -BOOL -PALAPI -GetFileAttributesExW( - IN LPCWSTR lpFileName, - IN GET_FILEEX_INFO_LEVELS fInfoLevelId, - OUT LPVOID lpFileInformation); - -#ifdef UNICODE -#define GetFileAttributesEx GetFileAttributesExW -#endif - typedef struct _OVERLAPPED { ULONG_PTR Internal; ULONG_PTR InternalHigh; @@ -718,21 +644,6 @@ GetFullPathNameW( #define GetFullPathName GetFullPathNameA #endif -PALIMPORT -UINT -PALAPI -GetTempFileNameW( - IN LPCWSTR lpPathName, - IN LPCWSTR lpPrefixString, - IN UINT uUnique, - OUT LPWSTR lpTempFileName); - -#ifdef UNICODE -#define GetTempFileName GetTempFileNameW -#else -#define GetTempFileName GetTempFileNameA -#endif - PALIMPORT DWORD PALAPI @@ -854,6 +765,7 @@ PALAPI PAL_CreateMutexW( IN BOOL bInitialOwner, IN LPCWSTR lpName, + IN BOOL bCurrentUserOnly, IN LPSTR lpSystemCallErrors, IN DWORD dwSystemCallErrorsBufferSize); @@ -875,6 +787,7 @@ HANDLE PALAPI PAL_OpenMutexW( IN LPCWSTR lpName, + IN BOOL bCurrentUserOnly, IN LPSTR lpSystemCallErrors, IN DWORD dwSystemCallErrorsBufferSize); @@ -1550,27 +1463,27 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { M512 Zmm30; M512 Zmm31; }; - + struct { - DWORD64 Egpr16; - DWORD64 Egpr17; - DWORD64 Egpr18; - DWORD64 Egpr19; - DWORD64 Egpr20; - DWORD64 Egpr21; - DWORD64 Egpr22; - DWORD64 Egpr23; - DWORD64 Egpr24; - DWORD64 Egpr25; - DWORD64 Egpr26; - DWORD64 Egpr27; - DWORD64 Egpr28; - DWORD64 Egpr29; - DWORD64 Egpr30; - DWORD64 Egpr31; + DWORD64 R16; + DWORD64 R17; + DWORD64 R18; + DWORD64 R19; + DWORD64 R20; + DWORD64 R21; + DWORD64 R22; + DWORD64 R23; + DWORD64 R24; + DWORD64 R25; + DWORD64 R26; + DWORD64 R27; + DWORD64 R28; + DWORD64 R29; + DWORD64 R30; + DWORD64 R31; }; - + } CONTEXT, *PCONTEXT, *LPCONTEXT; // @@ -2469,6 +2382,28 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS { // } KNONVOLATILE_CONTEXT_POINTERS, *PKNONVOLATILE_CONTEXT_POINTERS; +#elif defined(HOST_WASM) +#define CONTEXT_CONTROL 0 +#define CONTEXT_INTEGER 0 +#define CONTEXT_FLOATING_POINT 0 +#define CONTEXT_DEBUG_REGISTERS 0 +#define CONTEXT_FULL 0 +#define CONTEXT_ALL 0 + +#define CONTEXT_XSTATE 0 + +#define CONTEXT_EXCEPTION_ACTIVE 0x8000000L +#define CONTEXT_SERVICE_ACTIVE 0x10000000L +#define CONTEXT_EXCEPTION_REQUEST 0x40000000L +#define CONTEXT_EXCEPTION_REPORTING 0x80000000L + +typedef struct _CONTEXT { + ULONG ContextFlags; +} CONTEXT, *PCONTEXT, *LPCONTEXT; + +typedef struct _KNONVOLATILE_CONTEXT_POINTERS { + DWORD none; +} KNONVOLATILE_CONTEXT_POINTERS, *PKNONVOLATILE_CONTEXT_POINTERS; #else #error Unknown architecture for defining CONTEXT. @@ -2556,79 +2491,10 @@ typedef BOOL(*UnwindReadMemoryCallback)(PVOID address, PVOID buffer, SIZE_T size PALIMPORT BOOL PALAPI PAL_VirtualUnwind(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *contextPointers); -PALIMPORT BOOL PALAPI PAL_VirtualUnwindOutOfProc(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *contextPointers, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback); +PALIMPORT BOOL PALAPI PAL_VirtualUnwindOutOfProc(CONTEXT *context, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback); PALIMPORT BOOL PALAPI PAL_GetUnwindInfoSize(SIZE_T baseAddress, ULONG64 ehFrameHdrAddr, UnwindReadMemoryCallback readMemoryCallback, PULONG64 ehFrameStart, PULONG64 ehFrameSize); -/* PAL_CS_NATIVE_DATA_SIZE is defined as sizeof(PAL_CRITICAL_SECTION_NATIVE_DATA) */ - -#if defined(__APPLE__) && defined(__i386__) -#define PAL_CS_NATIVE_DATA_SIZE 76 -#elif defined(__APPLE__) && defined(HOST_AMD64) -#define PAL_CS_NATIVE_DATA_SIZE 120 -#elif defined(__APPLE__) && defined(HOST_ARM64) -#define PAL_CS_NATIVE_DATA_SIZE 120 -#elif defined(__FreeBSD__) && defined(HOST_X86) -#define PAL_CS_NATIVE_DATA_SIZE 12 -#elif defined(__FreeBSD__) && defined(__x86_64__) -#define PAL_CS_NATIVE_DATA_SIZE 24 -#elif defined(__FreeBSD__) && defined(HOST_ARM64) -#define PAL_CS_NATIVE_DATA_SIZE 24 -#elif defined(__linux__) && defined(HOST_ARM) -#define PAL_CS_NATIVE_DATA_SIZE 80 -#elif defined(__linux__) && defined(HOST_ARM64) -#define PAL_CS_NATIVE_DATA_SIZE 104 -#elif defined(__linux__) && defined(__i386__) -#define PAL_CS_NATIVE_DATA_SIZE 76 -#elif defined(__linux__) && defined(__x86_64__) -#define PAL_CS_NATIVE_DATA_SIZE 96 -#elif defined(__linux__) && defined(HOST_S390X) -#define PAL_CS_NATIVE_DATA_SIZE 96 -#elif defined(__linux__) && defined(HOST_POWERPC64) -#define PAL_CS_NATIVE_DATA_SIZE 96 -#elif defined(__NetBSD__) && defined(__amd64__) -#define PAL_CS_NATIVE_DATA_SIZE 96 -#elif defined(__NetBSD__) && defined(__earm__) -#define PAL_CS_NATIVE_DATA_SIZE 56 -#elif defined(__NetBSD__) && defined(__i386__) -#define PAL_CS_NATIVE_DATA_SIZE 56 -#elif defined(__sun) && defined(__x86_64__) -#define PAL_CS_NATIVE_DATA_SIZE 48 -#elif defined(__linux__) && defined(__loongarch64) -#define PAL_CS_NATIVE_DATA_SIZE 96 -#elif defined(__linux__) && defined(__riscv) && __riscv_xlen == 64 -#define PAL_CS_NATIVE_DATA_SIZE 96 -#elif defined(__HAIKU__) && defined(__x86_64__) -#define PAL_CS_NATIVE_DATA_SIZE 56 -#else -#error PAL_CS_NATIVE_DATA_SIZE is not defined for this architecture -#endif - -// -typedef struct _CRITICAL_SECTION { - PVOID DebugInfo; - LONG LockCount; - LONG RecursionCount; - HANDLE OwningThread; - ULONG_PTR SpinCount; - -#ifdef PAL_TRACK_CRITICAL_SECTIONS_DATA - BOOL bInternal; -#endif // PAL_TRACK_CRITICAL_SECTIONS_DATA - volatile DWORD dwInitState; - - union CSNativeDataStorage - { - BYTE rgNativeDataStorage[PAL_CS_NATIVE_DATA_SIZE]; - PVOID pvAlign; // make sure the storage is machine-pointer-size aligned - } csnds; -} CRITICAL_SECTION, *PCRITICAL_SECTION, *LPCRITICAL_SECTION; - -PALIMPORT VOID PALAPI EnterCriticalSection(IN OUT LPCRITICAL_SECTION lpCriticalSection); -PALIMPORT VOID PALAPI LeaveCriticalSection(IN OUT LPCRITICAL_SECTION lpCriticalSection); -PALIMPORT VOID PALAPI InitializeCriticalSection(OUT LPCRITICAL_SECTION lpCriticalSection); -PALIMPORT VOID PALAPI DeleteCriticalSection(IN OUT LPCRITICAL_SECTION lpCriticalSection); - #define PAGE_NOACCESS 0x01 #define PAGE_READONLY 0x02 #define PAGE_READWRITE 0x04 @@ -2923,11 +2789,6 @@ FlushInstructionCache( #define MAX_LEADBYTES 12 #define MAX_DEFAULTCHAR 2 -PALIMPORT -UINT -PALAPI -GetACP(void); - typedef struct _cpinfo { UINT MaxCharSize; BYTE DefaultChar[MAX_DEFAULTCHAR]; @@ -3154,30 +3015,6 @@ RaiseFailFastException( IN PCONTEXT pContextRecord, IN DWORD dwFlags); -PALIMPORT -DWORD -PALAPI -GetTickCount(); - -PALIMPORT -ULONGLONG -PALAPI -GetTickCount64(); - -PALIMPORT -BOOL -PALAPI -QueryPerformanceCounter( - OUT LARGE_INTEGER *lpPerformanceCount - ); - -PALIMPORT -BOOL -PALAPI -QueryPerformanceFrequency( - OUT LARGE_INTEGER *lpFrequency - ); - PALIMPORT BOOL PALAPI @@ -3617,6 +3454,27 @@ Define_InterlockMethod( ((PVOID)(UINT_PTR)InterlockedCompareExchange((PLONG)(UINT_PTR)(Destination), (LONG)(UINT_PTR)(ExChange), (LONG)(UINT_PTR)(Comperand))) #endif +#if defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) +FORCEINLINE uint8_t _InterlockedCompareExchange128(int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; + // TODO-LOONGARCH64: the 128-bit CAS is supported starting from the 3A6000 CPU (ISA1.1). + // When running on older hardware that doesn't support native CAS-128, the system falls back + // to a mutex-based approach via libatomic, which is not suitable for runtime requirements. + // + // TODO-RISCV64: double-check if libatomic's emulated CAS-128 works as expected once AOT applications are + // functional on linux-riscv64: https://github.com/dotnet/runtime/issues/106223. + // CAS-128 is natively supported starting with the Zacas extension in Linux 6.8; however, hardware support + // for RVA23 profile is not available at the time of writing. + // + // See https://github.com/dotnet/runtime/issues/109276. + __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); + PAL_InterlockedOperationBarrier(); + pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); + return iComparand == iResult; +} +#endif + /*++ Function: MemoryBarrier @@ -3912,29 +3770,6 @@ PALIMPORT DLLEXPORT int __cdecl _putenv(const char *); #define ERANGE 34 #endif -/****************PAL Perf functions for PInvoke*********************/ -#if PAL_PERF -PALIMPORT -VOID -PALAPI -PAL_EnableProcessProfile(); - -PALIMPORT -VOID -PALAPI -PAL_DisableProcessProfile(); - -PALIMPORT -BOOL -PALAPI -PAL_IsProcessProfileEnabled(); - -PALIMPORT -INT64 -PALAPI -PAL_GetCpuTickCount(); -#endif // PAL_PERF - /******************* PAL functions for exceptions *******/ #ifdef __cplusplus @@ -4081,7 +3916,6 @@ struct PAL_SEHException typedef BOOL (*PHARDWARE_EXCEPTION_HANDLER)(PAL_SEHException* ex); typedef BOOL (*PHARDWARE_EXCEPTION_SAFETY_CHECK_FUNCTION)(PCONTEXT contextRecord, PEXCEPTION_RECORD exceptionRecord); -typedef VOID (*PTERMINATION_REQUEST_HANDLER)(int terminationExitCode); typedef DWORD (*PGET_GCMARKER_EXCEPTION_CODE)(LPVOID ip); PALIMPORT @@ -4104,12 +3938,6 @@ PAL_ThrowExceptionFromContext( IN CONTEXT* context, IN PAL_SEHException* ex); -PALIMPORT -VOID -PALAPI -PAL_SetTerminationRequestHandler( - IN PTERMINATION_REQUEST_HANDLER terminationRequestHandler); - PALIMPORT VOID PALAPI diff --git a/src/coreclr/pal/inc/pal_error.h b/src/coreclr/pal/inc/pal_error.h index b387e6854006..b03f9cefa129 100644 --- a/src/coreclr/pal/inc/pal_error.h +++ b/src/coreclr/pal/inc/pal_error.h @@ -147,7 +147,6 @@ #define ERROR_PALINIT_TLS 65295L #define ERROR_PALINIT_ENV 65296L #define ERROR_PALINIT_DBG_CHANNELS 65297L -#define ERROR_PALINIT_SHARED_MEMORY_MANAGER 65298L #define ERROR_PALINIT_SHM 65299L #define ERROR_PALINIT_MODULE_MANAGER 65300L diff --git a/src/coreclr/pal/inc/pal_mstypes.h b/src/coreclr/pal/inc/pal_mstypes.h index e3423c41f898..a0762726ba18 100644 --- a/src/coreclr/pal/inc/pal_mstypes.h +++ b/src/coreclr/pal/inc/pal_mstypes.h @@ -9,6 +9,7 @@ #define __PAL_MSTYPES_H__ #include +#include #ifdef __cplusplus extern "C" { @@ -316,17 +317,6 @@ typedef union _LARGE_INTEGER { LONGLONG QuadPart; } LARGE_INTEGER, *PLARGE_INTEGER; -#ifndef GUID_DEFINED -typedef struct _GUID { - ULONG Data1; // NOTE: diff from Win32, for LP64 - USHORT Data2; - USHORT Data3; - UCHAR Data4[ 8 ]; -} GUID; -typedef const GUID *LPCGUID; -#define GUID_DEFINED -#endif // !GUID_DEFINED - typedef struct _FILETIME { DWORD dwLowDateTime; DWORD dwHighDateTime; diff --git a/src/coreclr/pal/inc/palprivate.h b/src/coreclr/pal/inc/palprivate.h index eff682055859..17c5a045eff6 100644 --- a/src/coreclr/pal/inc/palprivate.h +++ b/src/coreclr/pal/inc/palprivate.h @@ -20,15 +20,6 @@ CreateFileA( IN DWORD dwFlagsAndAttributes, IN HANDLE hTemplateFile); -PALIMPORT -BOOL -PALAPI -CopyFileA( - IN LPCSTR lpExistingFileName, - IN LPCSTR lpNewFileName, - IN BOOL bFailIfExists); - - PALIMPORT BOOL PALAPI @@ -49,26 +40,6 @@ CreateDirectoryW( IN LPCWSTR lpPathName, IN LPSECURITY_ATTRIBUTES lpSecurityAttributes); -PALIMPORT -DWORD -PALAPI -GetFileAttributesA( - IN LPCSTR lpFileName); - -PALIMPORT -BOOL -PALAPI -SetFileAttributesA( - IN LPCSTR lpFileName, - IN DWORD dwFileAttributes); - -PALIMPORT -BOOL -PALAPI -SetFileAttributesW( - IN LPCWSTR lpFileName, - IN DWORD dwFileAttributes); - PALIMPORT DWORD PALAPI @@ -78,15 +49,6 @@ GetFullPathNameA( OUT LPSTR lpBuffer, OUT LPSTR *lpFilePart); -PALIMPORT -UINT -PALAPI -GetTempFileNameA( - IN LPCSTR lpPathName, - IN LPCSTR lpPrefixString, - IN UINT uUnique, - OUT LPSTR lpTempFileName); - PALIMPORT DWORD PALAPI diff --git a/src/coreclr/pal/inc/rt/palrt.h b/src/coreclr/pal/inc/rt/palrt.h index 9317654958b6..215290ce0934 100644 --- a/src/coreclr/pal/inc/rt/palrt.h +++ b/src/coreclr/pal/inc/rt/palrt.h @@ -2,27 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. // -// -// =========================================================================== -// File: palrt.h -// -// =========================================================================== - /*++ - - Abstract: PAL runtime functions. These are functions which are ordinarily implemented as part of the Win32 API set, but when compiling CoreCLR for Unix-like systems, are implemented as a runtime library on top of the PAL. - -Author: - - - -Revision History: - --*/ #ifndef __PALRT_H__ @@ -213,19 +198,7 @@ EXTERN_C const GUID GUID_NULL; typedef GUID *LPGUID; typedef const GUID FAR *LPCGUID; -#ifdef __cplusplus -extern "C++" { -#if !defined _SYS_GUID_OPERATOR_EQ_ && !defined _NO_SYS_GUID_OPERATOR_EQ_ -#define _SYS_GUID_OPERATOR_EQ_ -inline int IsEqualGUID(REFGUID rguid1, REFGUID rguid2) - { return !memcmp(&rguid1, &rguid2, sizeof(GUID)); } -inline int operator==(REFGUID guidOne, REFGUID guidOther) - { return IsEqualGUID(guidOne,guidOther); } -inline int operator!=(REFGUID guidOne, REFGUID guidOther) - { return !IsEqualGUID(guidOne,guidOther); } -#endif -}; -#endif // __cplusplus +#define IsEqualGUID(guid1, guid2) guid1 == guid2 #define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ EXTERN_C const GUID FAR name @@ -1030,6 +1003,14 @@ typedef struct _DISPATCHER_CONTEXT { DWORD Reserved; } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT; +#elif defined(HOST_WASM) + +typedef struct _DISPATCHER_CONTEXT { + // WASM does not build the VM or JIT at this point, + // so we only provide a dummy definition. + DWORD Reserved; +} DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT; + #else #error Unknown architecture for defining DISPATCHER_CONTEXT. diff --git a/src/coreclr/pal/inc/rt/specstrings.h b/src/coreclr/pal/inc/rt/specstrings.h index 1cccb42e1554..f9b278037b66 100644 --- a/src/coreclr/pal/inc/rt/specstrings.h +++ b/src/coreclr/pal/inc/rt/specstrings.h @@ -465,27 +465,6 @@ __inner_analysis_assume_nullterminated_dec } #endif -#ifdef _PREFIX_ -/************************************************************************** -* Definition of __pfx_assume and __pfx_assert. Thse should be the only -* definitions of these functions. -***************************************************************************/ -#if __cplusplus -extern "C" void __pfx_assert(bool, const char *); -extern "C" void __pfx_assume(bool, const char *); -#else -void __pfx_assert(int, const char *); -void __pfx_assume(int, const char *); -#endif -/************************************************************************** -* Redefinition of __analysis_assume and __analysis_assert for PREFIX build -**************************************************************************/ -#undef __analysis_assume -#undef __analysis_assert -#define __analysis_assume(e) (__pfx_assume(e,"pfx_assume"),__assume(e)); -#define __analysis_assert(e) (__pfx_assert(e,"pfx_assert"),__assume(e)); -#endif /* ifdef _PREFIX_ */ - /************************************************************************** * This include should always be the last thing in this file. * Must avoid redfinitions of macros to workaround rc.exe issues. diff --git a/src/coreclr/pal/inc/unixasmmacros.inc b/src/coreclr/pal/inc/unixasmmacros.inc index 120b26543e3f..83b5ce1523a5 100644 --- a/src/coreclr/pal/inc/unixasmmacros.inc +++ b/src/coreclr/pal/inc/unixasmmacros.inc @@ -38,6 +38,23 @@ .equiv \New, \Old .endm +// GC type flags +#define GC_ALLOC_FINALIZE 1 +#define GC_ALLOC_ALIGN8_BIAS 4 +#define GC_ALLOC_ALIGN8 8 + +#define G_FREE_OBJECT_METHOD_TABLE g_pFreeObjectMethodTable + +// Offset of ee_alloc_context relative to INLINE_GET_ALLOC_CONTEXT_BASE. +// +// Since we have a disparity on how thread locals are accessed on various platforms and +// the current value of OFFSETOF__RuntimeThreadLocals__ee_alloc_context is zero we expect +// the helpers to add the OFFSETOF__RuntimeThreadLocals__ee_alloc_context constant to the +// base and set the additional offset to zero. +// In other words, we treat INLINE_GET_ALLOC_CONTEXT_BASE as returning the same value as +// GetThreadEEAllocContext. +#define OFFSETOF__ee_alloc_context 0 + #if defined(HOST_X86) #include "unixasmmacrosx86.inc" #elif defined(HOST_AMD64) diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc index 31093a4073d2..d84f70e8ad57 100644 --- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc @@ -23,6 +23,16 @@ C_FUNC(\Name) = . .endm +.macro ALTERNATE_ENTRY Name +#if defined(__APPLE__) + .alt_entry C_FUNC(\Name) + .private_extern C_FUNC(\Name) +#else + .global C_FUNC(\Name) +#endif +C_FUNC(\Name): +.endm + .macro LEAF_ENTRY Name, Section .global C_FUNC(\Name) #if defined(__APPLE__) @@ -30,6 +40,7 @@ #else .type \Name, %function #endif + .p2align 4 C_FUNC(\Name): .cfi_startproc .endm @@ -356,3 +367,41 @@ C_FUNC(\Name\()_End): .cfi_same_value rbp .endm + +// Inlined version of GetThreadEEAllocContext. Trashes volatile registers. +.macro INLINE_GET_ALLOC_CONTEXT_BASE +#if defined(FEATURE_EMULATED_TLS) || defined(__APPLE__) + call C_FUNC(GetThreadEEAllocContext) +#else + .att_syntax + .byte 0x66 // data16 prefix - padding to have space for linker relaxations + leaq t_runtime_thread_locals@TLSGD(%rip), %rdi + .byte 0x66 // + .byte 0x66 // + .byte 0x48 // rex.W prefix, also for padding + callq __tls_get_addr@PLT + .intel_syntax noprefix + + .ifnc OFFSETOF__RuntimeThreadLocals__ee_alloc_context, 0 + lea rax, [rax + OFFSETOF__RuntimeThreadLocals__ee_alloc_context] + .endif +#endif +.endm + +// Pushes a TransitionBlock on the stack without saving the argument registers. +// See the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +.macro PUSH_COOP_PINVOKE_FRAME target + set_cfa_register rsp, 8 + + PUSH_CALLEE_SAVED_REGISTERS + // 6 * 8 for argument register space in TransitionBlock + alignment of the stack to 16b + alloc_stack 56 + END_PROLOGUE + + lea \target, [rsp + 8] +.endm + +.macro POP_COOP_PINVOKE_FRAME + free_stack 56 + POP_CALLEE_SAVED_REGISTERS +.endm diff --git a/src/coreclr/pal/inc/unixasmmacrosarm.inc b/src/coreclr/pal/inc/unixasmmacrosarm.inc index f5eb32656cd3..54a6f7d4dc3b 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm.inc @@ -44,10 +44,28 @@ C_FUNC(\Name\()_End): nop .endm +.macro GLOBAL_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): +.endm + .macro PREPARE_EXTERNAL_VAR Name, HelperReg ldr \HelperReg, [pc, #C_FUNC(\Name)@GOTPCREL] .endm +.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg + movw \HelperReg, #:lower16:C_FUNC(\Name) - (. + 12) + movt \HelperReg, #:upper16:C_FUNC(\Name) - (. + 8) + add \HelperReg, pc + ldr \HelperReg, [\HelperReg] +.endm + .macro push_nonvol_reg Register push \Register .save \Register @@ -253,6 +271,29 @@ C_FUNC(\Name\()_End): vpop_nonvol_reg "\RegList" .endm +.macro INLINE_GET_ALLOC_CONTEXT_BASE + bl C_FUNC(GetThreadEEAllocContext) +.endm + +.macro PUSH_COOP_PINVOKE_FRAME target + // Reserve space for argument registers + alloc_stack 16 + PUSH_CALLEE_SAVED_REGISTERS + PROLOG_STACK_SAVE_OFFSET r7, #12 + // let r7 point the saved r7 in the stack (clang FP style) + // align the stack + alloc_stack 4 + CHECK_STACK_ALIGNMENT + END_PROLOGUE + add \target, sp, 4 +.endm + +.macro POP_COOP_PINVOKE_FRAME + free_stack 4 + POP_CALLEE_SAVED_REGISTERS + free_stack 16 +.endm + //----------------------------------------------------------------------------- // Macro used to check (in debug builds only) whether the stack is 64-bit aligned (a requirement before calling // out into C++/OS code). Invoke this directly after your prolog (if the stack frame size is fixed) or directly @@ -271,3 +312,9 @@ C_FUNC(\Name\()_End): 0: #endif .endm + +// Loads a 32bit constant into destination register +.macro MOV32 DestReg, Constant + movw \DestReg, #((\Constant) & 0xFFFF) + movt \DestReg, #((\Constant) >> 16) +.endm diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 9e86779d4511..4e8b9e7c2571 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -26,6 +26,24 @@ C_FUNC(\Name): .endm +.macro ALTERNATE_ENTRY Name +#if defined(__APPLE__) + .alt_entry C_FUNC(\Name) + .private_extern C_FUNC(\Name) +#else + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +#endif +C_FUNC(\Name): +.endm + +// On MacOS, local labels cannot be used in arithmetic expressions. +#if defined(__APPLE__) +#define FIXUP_LABEL(name) name +#else +#define FIXUP_LABEL(name) .L##name +#endif + .macro LEAF_ENTRY Name, Section .global C_FUNC(\Name) #if defined(__APPLE__) @@ -103,6 +121,13 @@ C_FUNC(\Name\()_End): .endif .endm +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs]! + .cfi_adjust_cfa_offset -\ofs + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 +.endm + .macro EPILOG_RESTORE_REG reg, ofs ldr \reg, [sp, \ofs] .cfi_restore \reg @@ -300,6 +325,70 @@ C_FUNC(\Name\()_End): .endm +// Inlined version of GetThreadEEAllocContext. Target cannot be x0 or x1. +.macro INLINE_GET_ALLOC_CONTEXT_BASE target + .ifc \target, x0 + .error "target cannot be x0" + .endif + .ifc \target, x1 + .error "target cannot be x1" + .endif + +#if defined(FEATURE_EMULATED_TLS) || defined(__APPLE__) + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0x20 + PROLOG_SAVE_REG_PAIR x0, x1, 0x10 + + bl C_FUNC(GetThreadEEAllocContext) + mov \target, x0 + + EPILOG_RESTORE_REG_PAIR x0, x1, 0x10 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x20 +#else + PROLOG_SAVE_REG_PAIR_INDEXED x0, lr, -0x10 + + // This sequence of instructions is recognized and potentially patched + // by the linker (GD->IE/LE relaxation). + adrp x0, :tlsdesc:t_runtime_thread_locals + ldr \target, [x0, :tlsdesc_lo12:t_runtime_thread_locals] + add x0, x0, :tlsdesc_lo12:t_runtime_thread_locals + blr \target + // End of the sequence + + mrs \target, TPIDR_EL0 + add \target, \target, x0 + + .ifnc OFFSETOF__RuntimeThreadLocals__ee_alloc_context, 0 + add \target, x0, OFFSETOF__RuntimeThreadLocals__ee_alloc_context + .endif + + EPILOG_RESTORE_REG_PAIR_INDEXED x0, lr, 0x10 +#endif +.endm + +// Pushes a TransitionBlock on the stack without saving the argument registers. +// See the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +.macro PUSH_COOP_PINVOKE_FRAME target + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -176 + + // Spill callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, 16 + PROLOG_SAVE_REG_PAIR x21, x22, 32 + PROLOG_SAVE_REG_PAIR x23, x24, 48 + PROLOG_SAVE_REG_PAIR x25, x26, 64 + PROLOG_SAVE_REG_PAIR x27, x28, 80 + + mov \target, sp +.endm + +.macro POP_COOP_PINVOKE_FRAME + EPILOG_RESTORE_REG_PAIR x19, x20, 16 + EPILOG_RESTORE_REG_PAIR x21, x22, 32 + EPILOG_RESTORE_REG_PAIR x23, x24, 48 + EPILOG_RESTORE_REG_PAIR x25, x26, 64 + EPILOG_RESTORE_REG_PAIR x27, x28, 80 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 176 +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc index 85cf42121326..e44f07ddf4fd 100644 --- a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc @@ -17,6 +17,12 @@ C_FUNC(\Name): .endm +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +C_FUNC(\Name): +.endm + .macro LEAF_ENTRY Name, Section .global C_FUNC(\Name) .type \Name, %function @@ -41,6 +47,16 @@ C_FUNC(\Name\()_End): la.local \HelperReg, \Name .endm +.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg + la.local \HelperReg, \Name + ld.d \HelperReg, \HelperReg, 0 +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT_W Name, HelperReg + la.local \HelperReg, \Name + ld.w \HelperReg, \HelperReg, 0 +.endm + .macro PROLOG_STACK_ALLOC Size addi.d $sp, $sp, -\Size //.cfi_adjust_cfa_offset \Size @@ -416,6 +432,44 @@ C_FUNC(\Name\()_End): .endm +// Inlined version of GetThreadEEAllocContext. Target cannot be a0. +.macro INLINE_GET_ALLOC_CONTEXT_BASE target + .ifc \target, $a0 + .error "target cannot be a0" + .endif + + // Save $a0, $ra + PROLOG_SAVE_REG_PAIR_INDEXED 4, 1, 16, 0 + + // This instruction is recognized and potentially patched + // by the linker (GD->IE/LE relaxation). + la.tls.desc $a0, t_runtime_thread_locals + + addi.d \target, $tp, OFFSETOF__RuntimeThreadLocals__ee_alloc_context + add.d \target, \target, $a0 + + // Restore $a0, $ra + EPILOG_RESTORE_REG_PAIR_INDEXED 4, 1, 16 +.endm + +// Pushes a TransitionBlock on the stack without saving the argument registers. +// See the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +.macro PUSH_COOP_PINVOKE_FRAME target + // Including fp, ra, s0-s8, padding, and (a0-a7)arguments. (1+1+9+1)*8 + 8*8 == 96. + PROLOG_STACK_ALLOC 160 + // $fp,$ra + PROLOG_SAVE_REG_PAIR 22, 1, 0, 1 + // Spill callee saved registers. $sp=$r3. + SAVE_CALLEESAVED_REGISTERS 3, 0 + move \target, $sp +.endm + +.macro POP_COOP_PINVOKE_FRAME + RESTORE_CALLEESAVED_REGISTERS 3, 0 + EPILOG_RESTORE_REG_PAIR 22, 1, 0 + EPILOG_STACK_FREE 160 +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc index a86acf5b897d..8201a03817f9 100644 --- a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc @@ -37,6 +37,12 @@ C_FUNC(\Name\()_End): nop .endm +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +C_FUNC(\Name): +.endm + .macro PREPARE_EXTERNAL_VAR Name, HelperReg lla \HelperReg, \Name .endm @@ -293,7 +299,6 @@ C_FUNC(\Name\()_End): .endm .macro EPILOG_WITH_TRANSITION_BLOCK_RETURN - RESTORE_CALLEESAVED_REGISTERS sp, __PWTB_CalleeSavedRegisters EPILOG_RESTORE_REG_PAIR fp, ra, __PWTB_CalleeSavedRegisters @@ -322,6 +327,28 @@ C_FUNC(\Name\()_End): EPILOG_STACK_FREE __PWTB_StackAlloc .endm +// Inlined version of GetThreadEEAllocContext +.macro INLINE_GET_ALLOC_CONTEXT_BASE + call C_FUNC(GetThreadEEAllocContext) +.endm + +// Pushes a TransitionBlock on the stack without saving the argument registers. +// See the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +.macro PUSH_COOP_PINVOKE_FRAME target + // Including fp, ra, s1-s11, tp, gp, padding and (a0-a7)arguments. (1+1+11+1+1)*8 + 8 + 8*8. + PROLOG_STACK_ALLOC 192 + PROLOG_SAVE_REG_PAIR fp, ra, 0, 1 + // Spill callee saved registers. sp=r2. + SAVE_CALLEESAVED_REGISTERS sp, 0 + mv \target, sp +.endm + +.macro POP_COOP_PINVOKE_FRAME + RESTORE_CALLEESAVED_REGISTERS sp, 0 + EPILOG_RESTORE_REG_PAIR fp, ra, 0 + EPILOG_STACK_FREE 192 +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/pal/inc/unixasmmacrosx86.inc b/src/coreclr/pal/inc/unixasmmacrosx86.inc index 7bc994a779c3..c28b2b170bec 100644 --- a/src/coreclr/pal/inc/unixasmmacrosx86.inc +++ b/src/coreclr/pal/inc/unixasmmacrosx86.inc @@ -24,6 +24,12 @@ C_FUNC(\Name): C_FUNC(\Name): .endm +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): +.endm + .macro LEAF_END Name, Section .size \Name, .-\Name .cfi_endproc @@ -50,6 +56,11 @@ C_FUNC(\Name\()_End): .cfi_rel_offset \Reg, 0 .endm +.macro PROLOG_ALLOC Size + sub esp, \Size + .cfi_adjust_cfa_offset \Size +.endm + .macro PROLOG_END .cfi_def_cfa_register ebp .cfi_def_cfa_offset 8 @@ -63,6 +74,11 @@ C_FUNC(\Name\()_End): .cfi_restore \Reg .endm +.macro EPILOG_FREE Size + add esp, \Size + .cfi_adjust_cfa_offset -\Size +.endm + .macro EPILOG_END pop ebp .endm @@ -75,8 +91,7 @@ C_FUNC(\Name\()_End): .endm .macro ESP_PROLOG_ALLOC Size - sub esp, \Size - .cfi_adjust_cfa_offset \Size + PROLOG_ALLOC \Size .endm .macro ESP_PROLOG_END @@ -91,8 +106,7 @@ C_FUNC(\Name\()_End): .endm .macro ESP_EPILOG_FREE Size - add esp, \Size - .cfi_adjust_cfa_offset -\Size + EPILOG_FREE \Size .endm .macro ESP_EPILOG_END @@ -109,6 +123,49 @@ C_FUNC(\Name\()_End): .intel_syntax noprefix .endm +.macro PUSH_COOP_PINVOKE_FRAME Target + // push ebp-frame + PROLOG_BEG + + // save CalleeSavedRegisters + PROLOG_PUSH ebx + PROLOG_PUSH esi + PROLOG_PUSH edi + + // make space for ArgumentRegisters (8) + alignment (4) + PROLOG_ALLOC 12 + + // set frame pointer + PROLOG_END + + lea \Target, [esp + 4] +.endm + +// Inlined version of GetThreadEEAllocContext. +.macro INLINE_GET_ALLOC_CONTEXT_BASE + push ecx + push eax + call C_FUNC(GetThreadEEAllocContext) + pop eax + pop ecx +.endm + +.macro POP_COOP_PINVOKE_FRAME + // restore stack pointer + EPILOG_BEG + + // skip over alignment (4) + ArgumentRegisters (8) + EPILOG_FREE 12 + + // pop CalleeSavedRegisters + EPILOG_POP edi + EPILOG_POP esi + EPILOG_POP ebx + + // pop ebp-frame + EPILOG_END +.endm + .macro CHECK_STACK_ALIGNMENT #ifdef _DEBUG test esp, 0x0F diff --git a/src/coreclr/pal/prebuilt/inc/corprof.h b/src/coreclr/pal/prebuilt/inc/corprof.h index ad54c4e6b818..b45d1c8f7d65 100644 --- a/src/coreclr/pal/prebuilt/inc/corprof.h +++ b/src/coreclr/pal/prebuilt/inc/corprof.h @@ -583,9 +583,9 @@ enum __MIDL___MIDL_itf_corprof_0000_0000_0005 COR_PRF_DISABLE_ALL_NGEN_IMAGES = 0x80000000, COR_PRF_ALL = 0x8fffffff, COR_PRF_REQUIRE_PROFILE_IMAGE = ( ( COR_PRF_USE_PROFILE_IMAGES | COR_PRF_MONITOR_CODE_TRANSITIONS ) | COR_PRF_MONITOR_ENTERLEAVE ) , - COR_PRF_ALLOWABLE_AFTER_ATTACH = ( ( ( ( ( ( ( ( ( ( COR_PRF_MONITOR_THREADS | COR_PRF_MONITOR_MODULE_LOADS ) | COR_PRF_MONITOR_ASSEMBLY_LOADS ) | COR_PRF_MONITOR_APPDOMAIN_LOADS ) | COR_PRF_ENABLE_STACK_SNAPSHOT ) | COR_PRF_MONITOR_GC ) | COR_PRF_MONITOR_SUSPENDS ) | COR_PRF_MONITOR_CLASS_LOADS ) | COR_PRF_MONITOR_EXCEPTIONS ) | COR_PRF_MONITOR_JIT_COMPILATION ) | COR_PRF_ENABLE_REJIT ) , + COR_PRF_ALLOWABLE_AFTER_ATTACH = ( ( ( ( ( ( ( ( ( ( ( ( COR_PRF_MONITOR_THREADS | COR_PRF_MONITOR_MODULE_LOADS ) | COR_PRF_MONITOR_ASSEMBLY_LOADS ) | COR_PRF_MONITOR_APPDOMAIN_LOADS ) | COR_PRF_ENABLE_STACK_SNAPSHOT ) | COR_PRF_MONITOR_GC ) | COR_PRF_MONITOR_SUSPENDS ) | COR_PRF_MONITOR_CLASS_LOADS ) | COR_PRF_MONITOR_EXCEPTIONS ) | COR_PRF_MONITOR_JIT_COMPILATION ) | COR_PRF_DISABLE_INLINING ) | COR_PRF_DISABLE_OPTIMIZATIONS ) | COR_PRF_ENABLE_REJIT ) , COR_PRF_ALLOWABLE_NOTIFICATION_PROFILER = ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( COR_PRF_MONITOR_FUNCTION_UNLOADS | COR_PRF_MONITOR_CLASS_LOADS ) | COR_PRF_MONITOR_MODULE_LOADS ) | COR_PRF_MONITOR_ASSEMBLY_LOADS ) | COR_PRF_MONITOR_APPDOMAIN_LOADS ) | COR_PRF_MONITOR_JIT_COMPILATION ) | COR_PRF_MONITOR_EXCEPTIONS ) | COR_PRF_MONITOR_OBJECT_ALLOCATED ) | COR_PRF_MONITOR_THREADS ) | COR_PRF_MONITOR_CODE_TRANSITIONS ) | COR_PRF_MONITOR_CCW ) | COR_PRF_MONITOR_SUSPENDS ) | COR_PRF_MONITOR_CACHE_SEARCHES ) | COR_PRF_DISABLE_INLINING ) | COR_PRF_DISABLE_OPTIMIZATIONS ) | COR_PRF_ENABLE_OBJECT_ALLOCATED ) | COR_PRF_MONITOR_CLR_EXCEPTIONS ) | COR_PRF_ENABLE_STACK_SNAPSHOT ) | COR_PRF_USE_PROFILE_IMAGES ) | COR_PRF_DISABLE_ALL_NGEN_IMAGES ) , - COR_PRF_MONITOR_IMMUTABLE = ( ( ( ( ( ( ( ( ( ( ( ( ( ( COR_PRF_MONITOR_CODE_TRANSITIONS | COR_PRF_MONITOR_REMOTING ) | COR_PRF_MONITOR_REMOTING_COOKIE ) | COR_PRF_MONITOR_REMOTING_ASYNC ) | COR_PRF_ENABLE_INPROC_DEBUGGING ) | COR_PRF_ENABLE_JIT_MAPS ) | COR_PRF_DISABLE_OPTIMIZATIONS ) | COR_PRF_DISABLE_INLINING ) | COR_PRF_ENABLE_OBJECT_ALLOCATED ) | COR_PRF_ENABLE_FUNCTION_ARGS ) | COR_PRF_ENABLE_FUNCTION_RETVAL ) | COR_PRF_ENABLE_FRAME_INFO ) | COR_PRF_USE_PROFILE_IMAGES ) | COR_PRF_DISABLE_TRANSPARENCY_CHECKS_UNDER_FULL_TRUST ) | COR_PRF_DISABLE_ALL_NGEN_IMAGES ) + COR_PRF_MONITOR_IMMUTABLE = ( ( ( ( ( ( ( ( ( ( ( ( COR_PRF_MONITOR_CODE_TRANSITIONS | COR_PRF_MONITOR_REMOTING ) | COR_PRF_MONITOR_REMOTING_COOKIE ) | COR_PRF_MONITOR_REMOTING_ASYNC ) | COR_PRF_ENABLE_INPROC_DEBUGGING ) | COR_PRF_ENABLE_JIT_MAPS ) | COR_PRF_ENABLE_OBJECT_ALLOCATED ) | COR_PRF_ENABLE_FUNCTION_ARGS ) | COR_PRF_ENABLE_FUNCTION_RETVAL ) | COR_PRF_ENABLE_FRAME_INFO ) | COR_PRF_USE_PROFILE_IMAGES ) | COR_PRF_DISABLE_TRANSPARENCY_CHECKS_UNDER_FULL_TRUST ) | COR_PRF_DISABLE_ALL_NGEN_IMAGES ) } COR_PRF_MONITOR; typedef /* [public] */ diff --git a/src/coreclr/pal/prebuilt/inc/mscoree.h b/src/coreclr/pal/prebuilt/inc/mscoree.h index 928a7fad88be..5f2f3248a6d0 100644 --- a/src/coreclr/pal/prebuilt/inc/mscoree.h +++ b/src/coreclr/pal/prebuilt/inc/mscoree.h @@ -86,48 +86,16 @@ typedef /* [public][public] */ enum __MIDL___MIDL_itf_mscoree_0000_0000_0001 { STARTUP_CONCURRENT_GC = 0x1, - STARTUP_LOADER_OPTIMIZATION_MASK = ( 0x3 << 1 ) , - STARTUP_LOADER_OPTIMIZATION_SINGLE_DOMAIN = ( 0x1 << 1 ) , - STARTUP_LOADER_OPTIMIZATION_MULTI_DOMAIN = ( 0x2 << 1 ) , - STARTUP_LOADER_OPTIMIZATION_MULTI_DOMAIN_HOST = ( 0x3 << 1 ) , - STARTUP_LOADER_SAFEMODE = 0x10, - STARTUP_LOADER_SETPREFERENCE = 0x100, STARTUP_SERVER_GC = 0x1000, STARTUP_HOARD_GC_VM = 0x2000, - STARTUP_SINGLE_VERSION_HOSTING_INTERFACE = 0x4000, - STARTUP_LEGACY_IMPERSONATION = 0x10000, - STARTUP_DISABLE_COMMITTHREADSTACK = 0x20000, - STARTUP_ALWAYSFLOW_IMPERSONATION = 0x40000, - STARTUP_TRIM_GC_COMMIT = 0x80000, - STARTUP_ETW = 0x100000, - STARTUP_ARM = 0x400000, - STARTUP_SINGLE_APPDOMAIN = 0x800000, - STARTUP_APPX_APP_MODEL = 0x1000000, - STARTUP_DISABLE_RANDOMIZED_STRING_HASHING = 0x2000000 } STARTUP_FLAGS; typedef /* [public] */ enum __MIDL___MIDL_itf_mscoree_0000_0000_0002 { - APPDOMAIN_SECURITY_DEFAULT = 0, - APPDOMAIN_SECURITY_SANDBOXED = 0x1, - APPDOMAIN_SECURITY_FORBID_CROSSAD_REVERSE_PINVOKE = 0x2, - APPDOMAIN_IGNORE_UNHANDLED_EXCEPTIONS = 0x4, APPDOMAIN_FORCE_TRIVIAL_WAIT_OPERATIONS = 0x8, - APPDOMAIN_ENABLE_PINVOKE_AND_CLASSIC_COMINTEROP = 0x10, - APPDOMAIN_ENABLE_PLATFORM_SPECIFIC_APPS = 0x40, - APPDOMAIN_ENABLE_ASSEMBLY_LOADFILE = 0x80, - APPDOMAIN_DISABLE_TRANSPARENCY_ENFORCEMENT = 0x100 } APPDOMAIN_SECURITY_FLAGS; -typedef /* [public] */ -enum __MIDL___MIDL_itf_mscoree_0000_0000_0003 - { - WAIT_MSGPUMP = 0x1, - WAIT_ALERTABLE = 0x2, - WAIT_NOTINDEADLOCK = 0x4 - } WAIT_OPTION; - typedef /* [public] */ enum __MIDL___MIDL_itf_mscoree_0000_0000_0004 { diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt index a46316b56413..a850607a20c3 100644 --- a/src/coreclr/pal/src/CMakeLists.txt +++ b/src/coreclr/pal/src/CMakeLists.txt @@ -10,7 +10,7 @@ elseif (CLR_CMAKE_TARGET_FREEBSD) include_directories(SYSTEM $ENV{ROOTFS_DIR}/usr/local/include) endif() -if(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND) +if(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND AND NOT CLR_CMAKE_TARGET_ARCH_WASM) include_directories(${CLR_SRC_NATIVE_DIR}/external/libunwind/include) include_directories(${CLR_SRC_NATIVE_DIR}/external/libunwind/include/tdep) include_directories(${CLR_ARTIFACTS_OBJ_DIR}/external/libunwind/include) @@ -21,7 +21,7 @@ elseif(NOT CLR_CMAKE_TARGET_APPLE) find_unwind_libs(UNWIND_LIBS) else() add_subdirectory(${CLR_SRC_NATIVE_DIR}/external/libunwind_extras ${CLR_ARTIFACTS_OBJ_DIR}/external/libunwind) -endif(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND) +endif(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND AND NOT CLR_CMAKE_TARGET_ARCH_WASM) include(configure.cmake) @@ -59,6 +59,8 @@ elseif(CLR_CMAKE_HOST_ARCH_S390X) set(PAL_ARCH_SOURCES_DIR s390x) elseif(CLR_CMAKE_HOST_ARCH_POWERPC64) set(PAL_ARCH_SOURCES_DIR ppc64le) +elseif(CLR_CMAKE_HOST_ARCH_WASM) + set(PAL_ARCH_SOURCES_DIR wasm) endif() if(CLR_CMAKE_USE_SYSTEM_LIBUNWIND) @@ -113,18 +115,26 @@ endif(CLR_CMAKE_TARGET_HAIKU) # turn off capability to remove unused functions (which was enabled in debug build with sanitizers) set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} -Wl,--no-gc-sections") -set(ARCH_SOURCES - arch/${PAL_ARCH_SOURCES_DIR}/context2.S - arch/${PAL_ARCH_SOURCES_DIR}/debugbreak.S - arch/${PAL_ARCH_SOURCES_DIR}/exceptionhelper.S -) +if (NOT CLR_CMAKE_TARGET_ARCH_WASM) + set(ARCH_SOURCES + arch/${PAL_ARCH_SOURCES_DIR}/context2.S + arch/${PAL_ARCH_SOURCES_DIR}/debugbreak.S + arch/${PAL_ARCH_SOURCES_DIR}/exceptionhelper.S + ) +endif() -if(NOT CLR_CMAKE_TARGET_APPLE) +if (CLR_CMAKE_TARGET_ARCH_WASM) + set(PLATFORM_SOURCES + arch/${PAL_ARCH_SOURCES_DIR}/stubs.cpp + ) +endif() + +if(NOT CLR_CMAKE_TARGET_APPLE AND NOT CLR_CMAKE_TARGET_ARCH_WASM) list(APPEND PLATFORM_SOURCES arch/${PAL_ARCH_SOURCES_DIR}/callsignalhandlerwrapper.S arch/${PAL_ARCH_SOURCES_DIR}/signalhandlerhelper.cpp ) -endif(NOT CLR_CMAKE_TARGET_APPLE) +endif(NOT CLR_CMAKE_TARGET_APPLE AND NOT CLR_CMAKE_TARGET_ARCH_WASM) if(CLR_CMAKE_HOST_ARCH_ARM) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") @@ -191,7 +201,6 @@ set(SOURCES safecrt/wcsncpy_s.cpp safecrt/wmakepath_s.cpp sharedmemory/sharedmemory.cpp - sync/cs.cpp synchobj/event.cpp synchobj/semaphore.cpp synchobj/mutex.cpp @@ -210,15 +219,19 @@ set_source_files_properties( INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/../inc/rt ) -if(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND) +if(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND AND NOT CLR_CMAKE_TARGET_ARCH_WASM) set(LIBUNWIND_OBJECTS $) -endif(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND) +endif(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND AND NOT CLR_CMAKE_TARGET_ARCH_WASM) -add_library(coreclrpal - STATIC +add_library(coreclrpal_objects + OBJECT ${SOURCES} ${ARCH_SOURCES} ${PLATFORM_SOURCES} +) + +add_library(coreclrpal STATIC + $ ${LIBUNWIND_OBJECTS} ) @@ -300,8 +313,7 @@ if(CLR_CMAKE_TARGET_LINUX) else(NOT CLR_CMAKE_TARGET_ANDROID) target_link_libraries(coreclrpal PUBLIC - ${ANDROID_GLOB} - ${LZMA}) + ${ANDROID_GLOB}) endif(NOT CLR_CMAKE_TARGET_ANDROID) target_link_libraries(coreclrpal diff --git a/src/coreclr/pal/src/arch/ppc64le/context2.S b/src/coreclr/pal/src/arch/ppc64le/context2.S index f48c7414ae6e..2134bfe5b3c9 100644 --- a/src/coreclr/pal/src/arch/ppc64le/context2.S +++ b/src/coreclr/pal/src/arch/ppc64le/context2.S @@ -137,7 +137,7 @@ LEAF_ENTRY RtlRestoreContext, _TEXT lfd %f31, CONTEXT_F31(%r3) // Restore all general purpose registers - ld %r0, CONTEXT_R0(%R3) + ld %r0, CONTEXT_R0(%r3) ld %r1, CONTEXT_R1(%r3) ld %r2, CONTEXT_R2(%r3) ld %r4, CONTEXT_R4(%r3) diff --git a/src/coreclr/pal/src/arch/wasm/stubs.cpp b/src/coreclr/pal/src/arch/wasm/stubs.cpp new file mode 100644 index 000000000000..66b73f8badf1 --- /dev/null +++ b/src/coreclr/pal/src/arch/wasm/stubs.cpp @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "pal/dbgmsg.h" +#include "pal/signal.hpp" + +SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); // some headers have code with asserts, so do this first + +/* debugbreak */ + +extern "C" void +DBG_DebugBreak() +{ + asm volatile ("unreachable"); +} + +/* context */ + +extern "C" void +RtlCaptureContext(OUT PCONTEXT ContextRecord) +{ + _ASSERT("RtlCaptureContext not implemented on wasm"); +} + +extern "C" void +CONTEXT_CaptureContext(LPCONTEXT lpContext) +{ + _ASSERT("CONTEXT_CaptureContext not implemented on wasm"); +} + +extern "C" void ThrowExceptionFromContextInternal(CONTEXT* context, PAL_SEHException* ex) +{ + _ASSERT("ThrowExceptionFromContextInternal not implemented on wasm"); +} + +/* unwind */ + +void ExecuteHandlerOnCustomStack(int code, siginfo_t *siginfo, void *context, size_t sp, SignalHandlerWorkerReturnPoint* returnPoint) +{ + _ASSERT("ExecuteHandlerOnCustomStack not implemented on wasm"); +} + +extern "C" int unw_getcontext(int) +{ + _ASSERT("unw_getcontext not implemented on wasm"); + return 0; +} + +extern "C" int unw_init_local(int, int) +{ + _ASSERT("unw_init_local not implemented on wasm"); + return 0; +} + +extern "C" int unw_step(int) +{ + _ASSERT("unw_step not implemented on wasm"); + return 0; +} + +extern "C" int unw_is_signal_frame(int) +{ + _ASSERT("unw_is_signal_frame not implemented on wasm"); + return 0; +} + +/* threading */ + +extern "C" int pthread_setschedparam(pthread_t, int, const struct sched_param *) +{ + _ASSERT("pthread_setschedparam not implemented on wasm"); + return 0; +} diff --git a/src/coreclr/pal/src/config.h.in b/src/coreclr/pal/src/config.h.in index a47712a855fe..941e60e71ee6 100644 --- a/src/coreclr/pal/src/config.h.in +++ b/src/coreclr/pal/src/config.h.in @@ -45,6 +45,7 @@ #cmakedefine01 HAVE__LWP_SELF #cmakedefine01 HAVE_MACH_THREADS #cmakedefine01 HAVE_MACH_EXCEPTIONS +#cmakedefine01 HAVE_SIGALTSTACK #cmakedefine01 HAVE_VM_ALLOCATE #cmakedefine01 HAVE_VM_READ #cmakedefine01 HAVE_DIRECTIO @@ -62,10 +63,6 @@ #cmakedefine01 HAVE__FPX_SW_BYTES_WITH_XSTATE_BV #cmakedefine01 HAVE_PR_SET_PTRACER -#cmakedefine01 HAVE_STAT_TIMESPEC -#cmakedefine01 HAVE_STAT_TIM -#cmakedefine01 HAVE_STAT_NSEC - #cmakedefine01 HAVE_BSD_REGS_T #cmakedefine01 HAVE_PT_REGS #cmakedefine01 HAVE_GREGSET_T @@ -89,9 +86,6 @@ #cmakedefine01 HAVE_SCHED_GET_PRIORITY #cmakedefine01 HAVE_WORKING_GETTIMEOFDAY #cmakedefine01 HAVE_WORKING_CLOCK_GETTIME -#cmakedefine01 HAVE_CLOCK_MONOTONIC -#cmakedefine01 HAVE_CLOCK_MONOTONIC_COARSE -#cmakedefine01 HAVE_CLOCK_GETTIME_NSEC_NP #cmakedefine01 HAVE_CLOCK_THREAD_CPUTIME #cmakedefine01 HAVE_PTHREAD_CONDATTR_SETCLOCK #cmakedefine01 MMAP_ANON_IGNORES_PROTECTION @@ -117,8 +111,6 @@ #cmakedefine01 SET_SCHEDPARAM_NEEDS_PRIVS #define CHECK_TRACE_SPECIFIERS 0 -#define HAVE_GETHRTIME 0 -#define HAVE_READ_REAL_TIME 0 #define OPEN64_IS_USED_INSTEAD_OF_OPEN 0 #define PAL_IGNORE_NORMAL_THREAD_PRIORITY 0 #define SELF_SUSPEND_FAILS_WITH_NATIVE_SUSPENSION 0 diff --git a/src/coreclr/pal/src/configure.cmake b/src/coreclr/pal/src/configure.cmake index 4ba3ab6f53ce..3d89ba2f593a 100644 --- a/src/coreclr/pal/src/configure.cmake +++ b/src/coreclr/pal/src/configure.cmake @@ -120,7 +120,20 @@ check_function_exists(statvfs HAVE_STATVFS) check_function_exists(thread_self HAVE_THREAD_SELF) check_function_exists(_lwp_self HAVE__LWP_SELF) check_function_exists(pthread_mach_thread_np HAVE_MACH_THREADS) -check_function_exists(thread_set_exception_ports HAVE_MACH_EXCEPTIONS) +check_cxx_source_compiles(" +#include +int main(int argc, char **argv) { + static mach_port_name_t port; + thread_set_exception_ports(mach_thread_self(), EXC_MASK_BAD_ACCESS, port, EXCEPTION_DEFAULT, MACHINE_THREAD_STATE); + return 0; +}" HAVE_MACH_EXCEPTIONS) +check_cxx_source_compiles(" +#include +#include +int main(int argc, char **argv) { + sigaltstack(NULL, NULL); + return 0; +}" HAVE_SIGALTSTACK) check_function_exists(vm_allocate HAVE_VM_ALLOCATE) check_function_exists(vm_read HAVE_VM_READ) check_function_exists(directio HAVE_DIRECTIO) @@ -138,9 +151,6 @@ int main(int argc, char **argv) { return 0; }" HAVE_CPUSET_T) -check_struct_has_member ("struct stat" st_atimespec "sys/types.h;sys/stat.h" HAVE_STAT_TIMESPEC) -check_struct_has_member ("struct stat" st_atim "sys/types.h;sys/stat.h" HAVE_STAT_TIM) -check_struct_has_member ("struct stat" st_atimensec "sys/types.h;sys/stat.h" HAVE_STAT_NSEC) check_struct_has_member ("ucontext_t" uc_mcontext.gregs[0] ucontext.h HAVE_GREGSET_T) check_struct_has_member ("ucontext_t" uc_mcontext.__gregs[0] ucontext.h HAVE___GREGSET_T) check_struct_has_member ("ucontext_t" uc_mcontext.fpregs->__glibc_reserved1[0] ucontext.h HAVE_FPSTATE_GLIBC_RESERVED1) @@ -381,33 +391,6 @@ check_cxx_source_runs(" #include #include -int main() -{ - int ret; - struct timespec ts; - ret = clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); - - exit(ret); -}" HAVE_CLOCK_MONOTONIC_COARSE) -set(CMAKE_REQUIRED_LIBRARIES) - -check_cxx_source_runs(" -#include -#include - -int main() -{ - int ret; - ret = clock_gettime_nsec_np(CLOCK_UPTIME_RAW); - exit((ret == 0) ? 1 : 0); -}" HAVE_CLOCK_GETTIME_NSEC_NP) - -set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_RT_LIBS}) -check_cxx_source_runs(" -#include -#include -#include - int main() { int ret; @@ -513,6 +496,7 @@ int main(void) exit(ret != 1); }" ONE_SHARED_MAPPING_PER_FILEREGION_PER_PROCESS) + set(CMAKE_REQUIRED_LIBRARIES pthread) check_cxx_source_runs(" #include @@ -936,9 +920,12 @@ elseif(CLR_CMAKE_TARGET_HAIKU) # Haiku does not have ptrace. set(DEADLOCK_WHEN_THREAD_IS_SUSPENDED_WHILE_BLOCKED_ON_MUTEX 0) set(HAVE_SCHED_OTHER_ASSIGNABLE 1) +elseif(CLR_CMAKE_TARGET_BROWSER) + set(DEADLOCK_WHEN_THREAD_IS_SUSPENDED_WHILE_BLOCKED_ON_MUTEX 0) + set(HAVE_SCHED_OTHER_ASSIGNABLE 0) else() # Anything else is Linux # LTTNG is not available on Android, so don't error out - if(NOT HAVE_LTTNG_TRACEPOINT_H AND NOT CLR_CMAKE_TARGET_ANDROID AND FEATURE_EVENT_TRACE) + if(FEATURE_EVENTSOURCE_XPLAT AND NOT HAVE_LTTNG_TRACEPOINT_H) unset(HAVE_LTTNG_TRACEPOINT_H CACHE) message(FATAL_ERROR "Cannot find liblttng-ust-dev. Try installing liblttng-ust-dev (or the appropriate packages for your platform)") endif() diff --git a/src/coreclr/pal/src/cruntime/wchar.cpp b/src/coreclr/pal/src/cruntime/wchar.cpp index fb24f6e35c78..06e2e9940c40 100644 --- a/src/coreclr/pal/src/cruntime/wchar.cpp +++ b/src/coreclr/pal/src/cruntime/wchar.cpp @@ -958,7 +958,7 @@ _wfopen( const wchar_16 *fileName, const wchar_16 *mode) { - CHAR mbFileName[ _MAX_PATH ]; + CHAR mbFileName[ MAX_PATH ]; CHAR mbMode[ 10 ]; FILE * filePtr = NULL; diff --git a/src/coreclr/pal/src/debug/debug.cpp b/src/coreclr/pal/src/debug/debug.cpp index 5f598a65494b..b248de3533d7 100644 --- a/src/coreclr/pal/src/debug/debug.cpp +++ b/src/coreclr/pal/src/debug/debug.cpp @@ -88,7 +88,7 @@ const BOOL DBG_DETACH = FALSE; #endif static const char PAL_OUTPUTDEBUGSTRING[] = "PAL_OUTPUTDEBUGSTRING"; -#ifdef _DEBUG +#if defined(_DEBUG) && !defined(TARGET_IOS) && !defined(TARGET_TVOS) #define ENABLE_RUN_ON_DEBUG_BREAK 1 #endif // _DEBUG @@ -111,6 +111,9 @@ This is a no-op for x86 architectures where the instruction and data caches are coherent in hardware. For non-X86 architectures, this call usually maps to a kernel API to flush the D-caches on all processors. +It is also no-op on wasm. We don't have a way to flush the instruction +cache and it is also not needed. + --*/ BOOL PALAPI @@ -422,7 +425,12 @@ DebugBreak( BOOL IsInDebugBreak(void *addr) { +#if defined (__wasm__) + _ASSERT("IsInDebugBreak not implemented on wasm"); + return false; +#else return (addr >= (void *)DBG_DebugBreak) && (addr <= (void *)DBG_DebugBreak_End); +#endif } /*++ diff --git a/src/coreclr/pal/src/eventprovider/CMakeLists.txt b/src/coreclr/pal/src/eventprovider/CMakeLists.txt index 76bbf56e8e11..ae7b58bb9fd0 100644 --- a/src/coreclr/pal/src/eventprovider/CMakeLists.txt +++ b/src/coreclr/pal/src/eventprovider/CMakeLists.txt @@ -1,6 +1,6 @@ set(EVENT_MANIFEST ${VM_DIR}/ClrEtwAll.man) -if(CLR_CMAKE_HOST_LINUX AND NOT CLR_CMAKE_HOST_ANDROID) +if(FEATURE_EVENTSOURCE_XPLAT) add_subdirectory(lttngprovider) else() add_subdirectory(dummyprovider) diff --git a/src/coreclr/pal/src/eventprovider/dummyprovider/CMakeLists.txt b/src/coreclr/pal/src/eventprovider/dummyprovider/CMakeLists.txt index 09986597b7c1..9f9c0f6b91c3 100644 --- a/src/coreclr/pal/src/eventprovider/dummyprovider/CMakeLists.txt +++ b/src/coreclr/pal/src/eventprovider/dummyprovider/CMakeLists.txt @@ -31,9 +31,14 @@ add_custom_command(OUTPUT ${DUMMY_PROVIDER_SOURCES} COMMAND ${GENERATE_COMMAND} DEPENDS ${EVENT_MANIFEST} ${GENERATE_SCRIPT}) +add_library(eventprovider_objects + OBJECT + ${DUMMY_PROVIDER_SOURCES} +) + add_library(eventprovider STATIC - ${DUMMY_PROVIDER_SOURCES} + $ ) set_target_properties(eventprovider PROPERTIES LINKER_LANGUAGE CXX) diff --git a/src/coreclr/pal/src/exception/machexception.cpp b/src/coreclr/pal/src/exception/machexception.cpp index cfa8269cc86a..f52519959a87 100644 --- a/src/coreclr/pal/src/exception/machexception.cpp +++ b/src/coreclr/pal/src/exception/machexception.cpp @@ -21,7 +21,6 @@ SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); // some headers have code with asserts, so do #include "pal/palinternal.h" #if HAVE_MACH_EXCEPTIONS #include "machexception.h" -#include "pal/critsect.h" #include "pal/debug.h" #include "pal/init.h" #include "pal/utils.h" diff --git a/src/coreclr/pal/src/exception/remote-unwind.cpp b/src/coreclr/pal/src/exception/remote-unwind.cpp index 92c07660b380..1208ed112b89 100644 --- a/src/coreclr/pal/src/exception/remote-unwind.cpp +++ b/src/coreclr/pal/src/exception/remote-unwind.cpp @@ -43,7 +43,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "config.h" #include "pal/palinternal.h" #include "pal/dbgmsg.h" -#include "pal/critsect.h" #include "pal/debug.h" #include "pal_endian.h" #include "pal.h" @@ -114,7 +113,7 @@ typedef BOOL(*UnwindReadMemoryCallback)(PVOID address, PVOID buffer, SIZE_T size #define PRId PRId32 #define PRIA "08" #define PRIxA PRIA PRIx -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_S390X) || defined(TARGET_LOONGARCH64) || defined(TARGET_POWERPC64) || defined(TARGET_RISCV64) +#elif defined(TARGET_64BIT) #define PRIx PRIx64 #define PRIu PRIu64 #define PRId PRId64 @@ -1789,122 +1788,6 @@ StepWithCompactEncoding(const libunwindInfo* info, compact_unwind_encoding_t com #endif // defined(__APPLE__) || defined(FEATURE_USE_SYSTEM_LIBUNWIND) -static void GetContextPointer(unw_cursor_t *cursor, unw_context_t *unwContext, int reg, SIZE_T **contextPointer) -{ -#if defined(HAVE_UNW_GET_SAVE_LOC) - unw_save_loc_t saveLoc; - unw_get_save_loc(cursor, reg, &saveLoc); - if (saveLoc.type == UNW_SLT_MEMORY) - { - SIZE_T *pLoc = (SIZE_T *)saveLoc.u.addr; - // Filter out fake save locations that point to unwContext - if (unwContext == NULL || (pLoc < (SIZE_T *)unwContext) || ((SIZE_T *)(unwContext + 1) <= pLoc)) - *contextPointer = (SIZE_T *)saveLoc.u.addr; - } -#else - // Returning NULL indicates that we don't have context pointers available - *contextPointer = NULL; -#endif -} - -static void GetContextPointers(unw_cursor_t *cursor, unw_context_t *unwContext, KNONVOLATILE_CONTEXT_POINTERS *contextPointers) -{ -#if defined(TARGET_AMD64) - GetContextPointer(cursor, unwContext, UNW_X86_64_RBP, (SIZE_T**)&contextPointers->Rbp); - GetContextPointer(cursor, unwContext, UNW_X86_64_RBX, (SIZE_T**)&contextPointers->Rbx); - GetContextPointer(cursor, unwContext, UNW_X86_64_R12, (SIZE_T**)&contextPointers->R12); - GetContextPointer(cursor, unwContext, UNW_X86_64_R13, (SIZE_T**)&contextPointers->R13); - GetContextPointer(cursor, unwContext, UNW_X86_64_R14, (SIZE_T**)&contextPointers->R14); - GetContextPointer(cursor, unwContext, UNW_X86_64_R15, (SIZE_T**)&contextPointers->R15); -#elif defined(TARGET_X86) - GetContextPointer(cursor, unwContext, UNW_X86_EBX, &contextPointers->Ebx); - GetContextPointer(cursor, unwContext, UNW_X86_EBP, &contextPointers->Ebp); - GetContextPointer(cursor, unwContext, UNW_X86_ESI, &contextPointers->Esi); - GetContextPointer(cursor, unwContext, UNW_X86_EDI, &contextPointers->Edi); -#elif defined(TARGET_ARM) - GetContextPointer(cursor, unwContext, UNW_ARM_R4, &contextPointers->R4); - GetContextPointer(cursor, unwContext, UNW_ARM_R5, &contextPointers->R5); - GetContextPointer(cursor, unwContext, UNW_ARM_R6, &contextPointers->R6); - GetContextPointer(cursor, unwContext, UNW_ARM_R7, &contextPointers->R7); - GetContextPointer(cursor, unwContext, UNW_ARM_R8, &contextPointers->R8); - GetContextPointer(cursor, unwContext, UNW_ARM_R9, &contextPointers->R9); - GetContextPointer(cursor, unwContext, UNW_ARM_R10, &contextPointers->R10); - GetContextPointer(cursor, unwContext, UNW_ARM_R11, &contextPointers->R11); -#elif defined(TARGET_ARM64) - GetContextPointer(cursor, unwContext, UNW_AARCH64_X19, (SIZE_T**)&contextPointers->X19); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X20, (SIZE_T**)&contextPointers->X20); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X21, (SIZE_T**)&contextPointers->X21); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X22, (SIZE_T**)&contextPointers->X22); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X23, (SIZE_T**)&contextPointers->X23); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X24, (SIZE_T**)&contextPointers->X24); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X25, (SIZE_T**)&contextPointers->X25); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X26, (SIZE_T**)&contextPointers->X26); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X27, (SIZE_T**)&contextPointers->X27); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X28, (SIZE_T**)&contextPointers->X28); - GetContextPointer(cursor, unwContext, UNW_AARCH64_X29, (SIZE_T**)&contextPointers->Fp); -#elif defined(TARGET_LOONGARCH64) - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R1, (SIZE_T **)&contextPointers->Ra); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R22, (SIZE_T **)&contextPointers->Fp); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R23, (SIZE_T **)&contextPointers->S0); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R24, (SIZE_T **)&contextPointers->S1); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R25, (SIZE_T **)&contextPointers->S2); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R26, (SIZE_T **)&contextPointers->S3); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R27, (SIZE_T **)&contextPointers->S4); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R28, (SIZE_T **)&contextPointers->S5); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R29, (SIZE_T **)&contextPointers->S6); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R30, (SIZE_T **)&contextPointers->S7); - GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R31, (SIZE_T **)&contextPointers->S8); -#elif defined(TARGET_S390X) - GetContextPointer(cursor, unwContext, UNW_S390X_R6, (SIZE_T **)&contextPointers->R6); - GetContextPointer(cursor, unwContext, UNW_S390X_R7, (SIZE_T **)&contextPointers->R7); - GetContextPointer(cursor, unwContext, UNW_S390X_R8, (SIZE_T **)&contextPointers->R8); - GetContextPointer(cursor, unwContext, UNW_S390X_R9, (SIZE_T **)&contextPointers->R9); - GetContextPointer(cursor, unwContext, UNW_S390X_R10, (SIZE_T **)&contextPointers->R10); - GetContextPointer(cursor, unwContext, UNW_S390X_R11, (SIZE_T **)&contextPointers->R11); - GetContextPointer(cursor, unwContext, UNW_S390X_R12, (SIZE_T **)&contextPointers->R12); - GetContextPointer(cursor, unwContext, UNW_S390X_R13, (SIZE_T **)&contextPointers->R13); - GetContextPointer(cursor, unwContext, UNW_S390X_R14, (SIZE_T **)&contextPointers->R14); - GetContextPointer(cursor, unwContext, UNW_S390X_R15, (SIZE_T **)&contextPointers->R15); -#elif defined(TARGET_POWERPC64) - GetContextPointer(cursor, unwContext, UNW_PPC64_R14, (SIZE_T **)&contextPointers->R14); - GetContextPointer(cursor, unwContext, UNW_PPC64_R15, (SIZE_T **)&contextPointers->R15); - GetContextPointer(cursor, unwContext, UNW_PPC64_R16, (SIZE_T **)&contextPointers->R16); - GetContextPointer(cursor, unwContext, UNW_PPC64_R17, (SIZE_T **)&contextPointers->R17); - GetContextPointer(cursor, unwContext, UNW_PPC64_R18, (SIZE_T **)&contextPointers->R18); - GetContextPointer(cursor, unwContext, UNW_PPC64_R19, (SIZE_T **)&contextPointers->R19); - GetContextPointer(cursor, unwContext, UNW_PPC64_R20, (SIZE_T **)&contextPointers->R20); - GetContextPointer(cursor, unwContext, UNW_PPC64_R21, (SIZE_T **)&contextPointers->R21); - GetContextPointer(cursor, unwContext, UNW_PPC64_R22, (SIZE_T **)&contextPointers->R22); - GetContextPointer(cursor, unwContext, UNW_PPC64_R23, (SIZE_T **)&contextPointers->R23); - GetContextPointer(cursor, unwContext, UNW_PPC64_R24, (SIZE_T **)&contextPointers->R24); - GetContextPointer(cursor, unwContext, UNW_PPC64_R25, (SIZE_T **)&contextPointers->R25); - GetContextPointer(cursor, unwContext, UNW_PPC64_R26, (SIZE_T **)&contextPointers->R26); - GetContextPointer(cursor, unwContext, UNW_PPC64_R27, (SIZE_T **)&contextPointers->R27); - GetContextPointer(cursor, unwContext, UNW_PPC64_R28, (SIZE_T **)&contextPointers->R28); - GetContextPointer(cursor, unwContext, UNW_PPC64_R29, (SIZE_T **)&contextPointers->R29); - GetContextPointer(cursor, unwContext, UNW_PPC64_R30, (SIZE_T **)&contextPointers->R30); - GetContextPointer(cursor, unwContext, UNW_PPC64_R31, (SIZE_T **)&contextPointers->R31); -#elif defined(TARGET_RISCV64) - GetContextPointer(cursor, unwContext, UNW_RISCV_X1, (SIZE_T **)&contextPointers->Ra); - GetContextPointer(cursor, unwContext, UNW_RISCV_X3, (SIZE_T **)&contextPointers->Gp); - GetContextPointer(cursor, unwContext, UNW_RISCV_X4, (SIZE_T **)&contextPointers->Tp); - GetContextPointer(cursor, unwContext, UNW_RISCV_X8, (SIZE_T **)&contextPointers->Fp); - GetContextPointer(cursor, unwContext, UNW_RISCV_X9, (SIZE_T **)&contextPointers->S1); - GetContextPointer(cursor, unwContext, UNW_RISCV_X18, (SIZE_T **)&contextPointers->S2); - GetContextPointer(cursor, unwContext, UNW_RISCV_X19, (SIZE_T **)&contextPointers->S3); - GetContextPointer(cursor, unwContext, UNW_RISCV_X20, (SIZE_T **)&contextPointers->S4); - GetContextPointer(cursor, unwContext, UNW_RISCV_X21, (SIZE_T **)&contextPointers->S5); - GetContextPointer(cursor, unwContext, UNW_RISCV_X22, (SIZE_T **)&contextPointers->S6); - GetContextPointer(cursor, unwContext, UNW_RISCV_X23, (SIZE_T **)&contextPointers->S7); - GetContextPointer(cursor, unwContext, UNW_RISCV_X24, (SIZE_T **)&contextPointers->S8); - GetContextPointer(cursor, unwContext, UNW_RISCV_X25, (SIZE_T **)&contextPointers->S9); - GetContextPointer(cursor, unwContext, UNW_RISCV_X26, (SIZE_T **)&contextPointers->S10); - GetContextPointer(cursor, unwContext, UNW_RISCV_X27, (SIZE_T **)&contextPointers->S11); -#else -#error unsupported architecture -#endif -} - static void UnwindContextToContext(unw_cursor_t *cursor, CONTEXT *winContext) { #if defined(TARGET_AMD64) @@ -2329,7 +2212,7 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pip, int nee return unw_get_proc_info_in_range(start_ip, end_ip, ehFrameHdrAddr, ehFrameHdrLen, exidxFrameHdrAddr, exidxFrameHdrLen, as, ip, pip, need_unwind_info, arg); #else // HAVE_GET_PROC_INFO_IN_RANGE || !defined(HOST_UNIX) - // This branch is executed when using llvm-libunwind (macOS and similar platforms) + // This branch is executed when using llvm-libunwind (macOS and similar platforms) // or HP-libunwind version 1.6 and earlier. if (ehFrameHdrAddr == 0) { @@ -2448,14 +2331,13 @@ static unw_accessors_t unwind_accessors = init_unwind_accessors(); Parameters: context - the start context in the target - contextPointers - the context of the next frame functionStart - the pointer to return the starting address of the function or nullptr baseAddress - base address of the module to find the unwind info readMemoryCallback - reads memory from the target --*/ BOOL PALAPI -PAL_VirtualUnwindOutOfProc(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *contextPointers, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback) +PAL_VirtualUnwindOutOfProc(CONTEXT *context, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback) { unw_addr_space_t addrSpace = 0; unw_cursor_t cursor; @@ -2536,10 +2418,6 @@ PAL_VirtualUnwindOutOfProc(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *cont UnwindContextToContext(&cursor, context); - if (contextPointers != NULL) - { - GetContextPointers(&cursor, NULL, contextPointers); - } result = TRUE; exit: @@ -2700,7 +2578,7 @@ PAL_GetUnwindInfoSize(SIZE_T baseAddress, ULONG64 ehFrameHdrAddr, UnwindReadMemo BOOL PALAPI -PAL_VirtualUnwindOutOfProc(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *contextPointers, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback) +PAL_VirtualUnwindOutOfProc(CONTEXT *context, PULONG64 functionStart, SIZE_T baseAddress, UnwindReadMemoryCallback readMemoryCallback) { return FALSE; } diff --git a/src/coreclr/pal/src/exception/seh-unwind.cpp b/src/coreclr/pal/src/exception/seh-unwind.cpp index 7f29df974b18..8b6631a527d1 100644 --- a/src/coreclr/pal/src/exception/seh-unwind.cpp +++ b/src/coreclr/pal/src/exception/seh-unwind.cpp @@ -497,6 +497,8 @@ void UnwindContextToWinContext(unw_cursor_t *cursor, CONTEXT *winContext) unw_get_reg(cursor, UNW_PPC64_R28, (unw_word_t *) &winContext->R28); unw_get_reg(cursor, UNW_PPC64_R29, (unw_word_t *) &winContext->R29); unw_get_reg(cursor, UNW_PPC64_R30, (unw_word_t *) &winContext->R30); +#elif defined(HOST_WASM) + ASSERT("UnwindContextToWinContext not implemented for WASM"); #else #error unsupported architecture #endif @@ -631,6 +633,8 @@ void GetContextPointers(unw_cursor_t *cursor, unw_context_t *unwContext, KNONVOL GetContextPointer(cursor, unwContext, UNW_PPC64_R29, (SIZE_T **)&contextPointers->R29); GetContextPointer(cursor, unwContext, UNW_PPC64_R30, (SIZE_T **)&contextPointers->R30); GetContextPointer(cursor, unwContext, UNW_PPC64_R31, (SIZE_T **)&contextPointers->R31); +#elif defined(HOST_WASM) + ASSERT("GetContextPointers not implemented for WASM"); #else #error unsupported architecture #endif diff --git a/src/coreclr/pal/src/exception/seh.cpp b/src/coreclr/pal/src/exception/seh.cpp index 02a540734001..c2f28cff936c 100644 --- a/src/coreclr/pal/src/exception/seh.cpp +++ b/src/coreclr/pal/src/exception/seh.cpp @@ -21,7 +21,6 @@ Module Name: #include "pal/handleapi.hpp" #include "pal/seh.hpp" #include "pal/dbgmsg.h" -#include "pal/critsect.h" #include "pal/debug.h" #include "pal/init.h" #include "pal/process.h" @@ -175,7 +174,7 @@ PAL_ThrowExceptionFromContext(CONTEXT* context, PAL_SEHException* ex) // We need to make a copy of the exception off stack, since the "ex" is located in one of the stack // frames that will become obsolete by the ThrowExceptionFromContextInternal and the ThrowExceptionHelper // could overwrite the "ex" object by stack e.g. when allocating the low level exception object for "throw". - static __thread BYTE threadLocalExceptionStorage[sizeof(PAL_SEHException)]; + static thread_local BYTE threadLocalExceptionStorage[sizeof(PAL_SEHException)]; ThrowExceptionFromContextInternal(context, new (threadLocalExceptionStorage) PAL_SEHException(std::move(*ex))); } @@ -304,9 +303,9 @@ PAL_ERROR SEHEnable(CPalThread *pthrCurrent) { #if HAVE_MACH_EXCEPTIONS return pthrCurrent->EnableMachExceptions(); -#elif defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__sun) || defined(__HAIKU__) +#elif defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__sun) || defined(__HAIKU__) || defined(__APPLE__) || defined(__wasm__) return NO_ERROR; -#else// HAVE_MACH_EXCEPTIONS +#else // HAVE_MACH_EXCEPTIONS #error not yet implemented #endif // HAVE_MACH_EXCEPTIONS } @@ -329,7 +328,7 @@ PAL_ERROR SEHDisable(CPalThread *pthrCurrent) { #if HAVE_MACH_EXCEPTIONS return pthrCurrent->DisableMachExceptions(); -#elif defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__sun) || defined(__HAIKU__) +#elif defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__sun) || defined(__HAIKU__) || defined(__APPLE__) || defined(__wasm__) return NO_ERROR; #else // HAVE_MACH_EXCEPTIONS #error not yet implemented @@ -372,12 +371,7 @@ bool CatchHardwareExceptionHolder::IsEnabled() --*/ -#if defined(__GNUC__) -static __thread -#else // __GNUC__ -__declspec(thread) static -#endif // !__GNUC__ -NativeExceptionHolderBase *t_nativeExceptionHolderHead = nullptr; +static thread_local NativeExceptionHolderBase *t_nativeExceptionHolderHead = nullptr; extern "C" NativeExceptionHolderBase ** diff --git a/src/coreclr/pal/src/exception/signal.cpp b/src/coreclr/pal/src/exception/signal.cpp index aa727a6f07b3..444bad7e5c07 100644 --- a/src/coreclr/pal/src/exception/signal.cpp +++ b/src/coreclr/pal/src/exception/signal.cpp @@ -188,7 +188,7 @@ BOOL SEHInitializeSignals(CorUnix::CPalThread *pthrCurrent, DWORD flags) handle_signal(SIGINT, sigint_handler, &g_previous_sigint, 0 /* additionalFlags */, true /* skipIgnored */); handle_signal(SIGQUIT, sigquit_handler, &g_previous_sigquit, 0 /* additionalFlags */, true /* skipIgnored */); -#if HAVE_MACH_EXCEPTIONS +#if HAVE_MACH_EXCEPTIONS || !HAVE_SIGALTSTACK handle_signal(SIGSEGV, sigsegv_handler, &g_previous_sigsegv); #else handle_signal(SIGTRAP, sigtrap_handler, &g_previous_sigtrap); @@ -206,7 +206,7 @@ BOOL SEHInitializeSignals(CorUnix::CPalThread *pthrCurrent, DWORD flags) } // Allocate the minimal stack necessary for handling stack overflow - int stackOverflowStackSize = ALIGN_UP(sizeof(SignalHandlerWorkerReturnPoint), 16) + 8 * 4096; + int stackOverflowStackSize = ALIGN_UP(sizeof(SignalHandlerWorkerReturnPoint), 16) + 9 * 4096; // Align the size to virtual page size and add one virtual page as a stack guard stackOverflowStackSize = ALIGN_UP(stackOverflowStackSize, GetVirtualPageSize()) + GetVirtualPageSize(); int flags = MAP_ANONYMOUS | MAP_PRIVATE; @@ -558,8 +558,13 @@ extern "C" void signal_handler_worker(int code, siginfo_t *siginfo, void *contex // fault. We must disassemble the instruction at record.ExceptionAddress // to correctly fill in this value. - // Unmask the activation signal now that we are running on the original stack of the thread - UnmaskActivationSignal(); + if (code != (SIGSEGV | StackOverflowFlag)) + { + // Unmask the activation signal now that we are running on the original stack of the thread + // except for the stack overflow case when we are actually running on a special stack overflow + // stack. + UnmaskActivationSignal(); + } returnPoint->returnFromHandler = common_signal_handler(code, siginfo, context, 2, (size_t)0, (size_t)siginfo->si_addr); @@ -569,6 +574,7 @@ extern "C" void signal_handler_worker(int code, siginfo_t *siginfo, void *contex RtlRestoreContext(&returnPoint->context, NULL); } +#if HAVE_SIGALTSTACK /*++ Function : SwitchStackAndExecuteHandler @@ -609,6 +615,7 @@ static bool SwitchStackAndExecuteHandler(int code, siginfo_t *siginfo, void *con return pReturnPoint->returnFromHandler; } +#endif #endif // !HAVE_MACH_EXCEPTIONS @@ -644,6 +651,10 @@ static void sigsegv_handler(int code, siginfo_t *siginfo, void *context) { if (GetCurrentPalThread()) { +#if defined(TARGET_TVOS) + (void)!write(STDERR_FILENO, StackOverflowMessage, sizeof(StackOverflowMessage) - 1); + PROCAbort(SIGSEGV, siginfo); +#else // TARGET_TVOS size_t handlerStackTop = __sync_val_compare_and_swap((size_t*)&g_stackOverflowHandlerStack, (size_t)g_stackOverflowHandlerStack, 0); if (handlerStackTop == 0) { @@ -672,6 +683,7 @@ static void sigsegv_handler(int code, siginfo_t *siginfo, void *context) PROCAbort(SIGSEGV, siginfo); } (void)!write(STDERR_FILENO, StackOverflowHandlerReturnedMessage, sizeof(StackOverflowHandlerReturnedMessage) - 1); +#endif // TARGET_TVOS } else { @@ -686,6 +698,7 @@ static void sigsegv_handler(int code, siginfo_t *siginfo, void *context) // Now that we know the SIGSEGV didn't happen due to a stack overflow, execute the common // hardware signal handler on the original stack. +#if HAVE_SIGALTSTACK if (GetCurrentPalThread() && IsRunningOnAlternateStack(context)) { if (SwitchStackAndExecuteHandler(code, siginfo, context, 0 /* sp */)) // sp == 0 indicates execution on the original stack @@ -694,6 +707,7 @@ static void sigsegv_handler(int code, siginfo_t *siginfo, void *context) } } else +#endif { // The code flow gets here when the signal handler is not running on an alternate stack or when it wasn't created // by coreclr. In both cases, we execute the common_signal_handler directly. @@ -837,15 +851,9 @@ static void sigterm_handler(int code, siginfo_t *siginfo, void *context) { PROCCreateCrashDumpIfEnabled(code, siginfo, false); } - // g_pSynchronizationManager shouldn't be null if PAL is initialized. - _ASSERTE(g_pSynchronizationManager != nullptr); - - g_pSynchronizationManager->SendTerminationRequestToWorkerThread(); - } - else - { - restore_signal_and_resend(SIGTERM, &g_previous_sigterm); } + + restore_signal_and_resend(SIGTERM, &g_previous_sigterm); } #ifdef INJECT_ACTIVATION_SIGNAL diff --git a/src/coreclr/pal/src/file/file.cpp b/src/coreclr/pal/src/file/file.cpp index 8eafaab2476b..8d0cfa99f789 100644 --- a/src/coreclr/pal/src/file/file.cpp +++ b/src/coreclr/pal/src/file/file.cpp @@ -323,6 +323,15 @@ CorUnix::InternalCanonicalizeRealPath(LPCSTR lpUnixPath, PathCharString& lpBuffe } lpFilename = lpExistingPath; } + else if (pchSeparator == lpExistingPath) + { + // This is a path in the root i.e. '/tmp' + // This scenario will probably only come up in WASM where it is normal to + // have a cwd of '/' and store files in the root of the virtual filesystem + lpBuffer.Clear(); + lpBuffer.Append(lpExistingPath, strlen(lpExistingPath)); + return NO_ERROR; + } else { bool fSetFilename = true; @@ -895,100 +904,6 @@ CreateFileW( } -/*++ -Function: - CopyFileW - -See MSDN doc. - -Notes: - There are several (most) error paths here that do not call SetLastError(). -This is because we know that CreateFile, ReadFile, and WriteFile will do so, -and will have a much better idea of the specific error. ---*/ -BOOL -PALAPI -CopyFileW( - IN LPCWSTR lpExistingFileName, - IN LPCWSTR lpNewFileName, - IN BOOL bFailIfExists) -{ - CPalThread *pThread; - PathCharString sourcePathString; - PathCharString destPathString; - char * source; - char * dest; - int src_size, dest_size, length = 0; - BOOL bRet = FALSE; - - PERF_ENTRY(CopyFileW); - ENTRY("CopyFileW(lpExistingFileName=%p (%S), lpNewFileName=%p (%S), bFailIfExists=%d)\n", - lpExistingFileName?lpExistingFileName:W16_NULLSTRING, - lpExistingFileName?lpExistingFileName:W16_NULLSTRING, - lpNewFileName?lpNewFileName:W16_NULLSTRING, - lpNewFileName?lpNewFileName:W16_NULLSTRING, bFailIfExists); - - pThread = InternalGetCurrentThread(); - if (lpExistingFileName != NULL) - { - length = (PAL_wcslen(lpExistingFileName)+1) * MaxWCharToAcpLengthFactor; - } - - source = sourcePathString.OpenStringBuffer(length); - if (NULL == source) - { - pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY); - goto done; - } - - src_size = WideCharToMultiByte( CP_ACP, 0, lpExistingFileName, -1, source, length, - NULL, NULL ); - - if( src_size == 0 ) - { - sourcePathString.CloseBuffer(0); - DWORD dwLastError = GetLastError(); - ASSERT("WideCharToMultiByte failure! error is %d\n", dwLastError); - pThread->SetLastError(ERROR_INTERNAL_ERROR); - goto done; - } - - sourcePathString.CloseBuffer(src_size - 1); - length = 0; - - if (lpNewFileName != NULL) - { - length = (PAL_wcslen(lpNewFileName)+1) * MaxWCharToAcpLengthFactor; - } - - dest = destPathString.OpenStringBuffer(length); - if (NULL == dest) - { - pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY); - goto done; - } - dest_size = WideCharToMultiByte( CP_ACP, 0, lpNewFileName, -1, dest, length, - NULL, NULL ); - - if( dest_size == 0 ) - { - destPathString.CloseBuffer(0); - DWORD dwLastError = GetLastError(); - ASSERT("WideCharToMultiByte failure! error is %d\n", dwLastError); - pThread->SetLastError(ERROR_INTERNAL_ERROR); - goto done; - } - - destPathString.CloseBuffer(dest_size - 1); - bRet = CopyFileA(source,dest,bFailIfExists); - -done: - LOGEXIT("CopyFileW returns BOOL %d\n", bRet); - PERF_EXIT(CopyFileW); - return bRet; -} - - /*++ Function: DeleteFileA @@ -1054,471 +969,6 @@ DeleteFileA( return bRet; } - -/*++ -Function: - GetFileAttributesA - -Note: - Checking for directory and read-only file. - -Caveats: - There are some important things to note about this implementation, which -are due to the differences between the FAT filesystem and Unix filesystems: - -- fifo's, sockets, and symlinks will return -1, and GetLastError() will - return ERROR_ACCESS_DENIED - -- if a file is write-only, or has no permissions at all, it is treated - the same as if it had mode 'rw'. This is consistent with behaviour on - NTFS files with the same permissions. - -- the following flags will never be returned: - -FILE_ATTRIBUTE_SYSTEM -FILE_ATTRIBUTE_ARCHIVE -FILE_ATTRIBUTE_HIDDEN - ---*/ -DWORD -PALAPI -GetFileAttributesA( - IN LPCSTR lpFileName) -{ - CPalThread *pThread; - struct stat stat_data; - DWORD dwAttr = 0; - DWORD dwLastError = 0; - - PERF_ENTRY(GetFileAttributesA); - ENTRY("GetFileAttributesA(lpFileName=%p (%s))\n", lpFileName?lpFileName:"NULL", lpFileName?lpFileName:"NULL"); - - pThread = InternalGetCurrentThread(); - if (lpFileName == NULL) - { - dwLastError = ERROR_PATH_NOT_FOUND; - goto done; - } - - if ( stat(lpFileName, &stat_data) != 0 ) - { - dwLastError = FILEGetLastErrorFromErrnoAndFilename(lpFileName); - goto done; - } - - if ( (stat_data.st_mode & S_IFMT) == S_IFDIR ) - { - dwAttr |= FILE_ATTRIBUTE_DIRECTORY; - } - else if ( (stat_data.st_mode & S_IFMT) != S_IFREG ) - { - ERROR("Not a regular file or directory, S_IFMT is %#x\n", - stat_data.st_mode & S_IFMT); - dwLastError = ERROR_ACCESS_DENIED; - goto done; - } - - if ( UTIL_IsReadOnlyBitsSet( &stat_data ) ) - { - dwAttr |= FILE_ATTRIBUTE_READONLY; - } - - /* finally, if nothing is set... */ - if ( dwAttr == 0 ) - { - dwAttr = FILE_ATTRIBUTE_NORMAL; - } - -done: - if (dwLastError) - { - pThread->SetLastError(dwLastError); - dwAttr = INVALID_FILE_ATTRIBUTES; - } - - LOGEXIT("GetFileAttributesA returns DWORD %#x\n", dwAttr); - PERF_EXIT(GetFileAttributesA); - return dwAttr; -} - - - - -/*++ -Function: - GetFileAttributesW - -Note: - Checking for directory and read-only file - -See MSDN doc. ---*/ -DWORD -PALAPI -GetFileAttributesW( - IN LPCWSTR lpFileName) -{ - CPalThread *pThread; - int size; - PathCharString filenamePS; - int length = 0; - char * filename; - DWORD dwRet = (DWORD) -1; - - PERF_ENTRY(GetFileAttributesW); - ENTRY("GetFileAttributesW(lpFileName=%p (%S))\n", - lpFileName?lpFileName:W16_NULLSTRING, - lpFileName?lpFileName:W16_NULLSTRING); - - pThread = InternalGetCurrentThread(); - if (lpFileName == NULL) - { - pThread->SetLastError(ERROR_PATH_NOT_FOUND); - goto done; - } - - length = (PAL_wcslen(lpFileName)+1) * MaxWCharToAcpLengthFactor; - filename = filenamePS.OpenStringBuffer(length); - if (NULL == filename) - { - pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY); - goto done; - } - size = WideCharToMultiByte( CP_ACP, 0, lpFileName, -1, filename, length, - NULL, NULL ); - - if( size == 0 ) - { - filenamePS.CloseBuffer(0); - DWORD dwLastError = GetLastError(); - ASSERT("WideCharToMultiByte failure! error is %d\n", dwLastError); - pThread->SetLastError(ERROR_INTERNAL_ERROR); - } - else - { - filenamePS.CloseBuffer(size - 1); - dwRet = GetFileAttributesA( filename ); - } - -done: - LOGEXIT("GetFileAttributesW returns DWORD %#x\n", dwRet); - PERF_EXIT(GetFileAttributesW); - return dwRet; -} - - -/*++ -Function: - GetFileAttributesExW - -See MSDN doc, and notes for GetFileAttributesW. ---*/ -BOOL -PALAPI -GetFileAttributesExW( - IN LPCWSTR lpFileName, - IN GET_FILEEX_INFO_LEVELS fInfoLevelId, - OUT LPVOID lpFileInformation) -{ - CPalThread *pThread; - BOOL bRet = FALSE; - DWORD dwLastError = 0; - LPWIN32_FILE_ATTRIBUTE_DATA attr_data; - - struct stat stat_data; - - char * name; - PathCharString namePS; - int length = 0; - int size; - - PERF_ENTRY(GetFileAttributesExW); - ENTRY("GetFileAttributesExW(lpFileName=%p (%S), fInfoLevelId=%d, " - "lpFileInformation=%p)\n", lpFileName?lpFileName:W16_NULLSTRING, lpFileName?lpFileName:W16_NULLSTRING, - fInfoLevelId, lpFileInformation); - - pThread = InternalGetCurrentThread(); - if ( fInfoLevelId != GetFileExInfoStandard ) - { - ASSERT("Unrecognized value for fInfoLevelId=%d\n", fInfoLevelId); - dwLastError = ERROR_INVALID_PARAMETER; - goto done; - } - - if ( !lpFileInformation ) - { - ASSERT("lpFileInformation is NULL\n"); - dwLastError = ERROR_INVALID_PARAMETER; - goto done; - } - - if (lpFileName == NULL) - { - dwLastError = ERROR_PATH_NOT_FOUND; - goto done; - } - - length = (PAL_wcslen(lpFileName)+1) * MaxWCharToAcpLengthFactor; - name = namePS.OpenStringBuffer(length); - if (NULL == name) - { - dwLastError = ERROR_NOT_ENOUGH_MEMORY; - goto done; - } - size = WideCharToMultiByte( CP_ACP, 0, lpFileName, -1, name, length, - NULL, NULL ); - - if( size == 0 ) - { - namePS.CloseBuffer(0); - dwLastError = GetLastError(); - ASSERT("WideCharToMultiByte failure! error is %d\n", dwLastError); - dwLastError = ERROR_INTERNAL_ERROR; - goto done; - } - - namePS.CloseBuffer(size - 1); - attr_data = (LPWIN32_FILE_ATTRIBUTE_DATA)lpFileInformation; - - attr_data->dwFileAttributes = GetFileAttributesW(lpFileName); - /* assume that GetFileAttributesW will call SetLastError appropriately */ - if ( attr_data->dwFileAttributes == (DWORD)-1 ) - { - goto done; - } - - /* do the stat */ - if ( stat(name, &stat_data) != 0 ) - { - ERROR("stat failed on %S\n", lpFileName); - dwLastError = FILEGetLastErrorFromErrnoAndFilename(name); - goto done; - } - - /* get the file times */ - attr_data->ftCreationTime = - FILEUnixTimeToFileTime( stat_data.st_ctime, - ST_CTIME_NSEC(&stat_data) ); - attr_data->ftLastAccessTime = - FILEUnixTimeToFileTime( stat_data.st_atime, - ST_ATIME_NSEC(&stat_data) ); - attr_data->ftLastWriteTime = - FILEUnixTimeToFileTime( stat_data.st_mtime, - ST_MTIME_NSEC(&stat_data) ); - - /* if Unix mtime is greater than atime, return mtime - as the last access time */ - if (CompareFileTime(&attr_data->ftLastAccessTime, - &attr_data->ftLastWriteTime) < 0) - { - attr_data->ftLastAccessTime = attr_data->ftLastWriteTime; - } - - /* if Unix ctime is greater than mtime, return mtime - as the create time */ - if (CompareFileTime(&attr_data->ftLastWriteTime, - &attr_data->ftCreationTime) < 0) - { - attr_data->ftCreationTime = attr_data->ftLastWriteTime; - } - - /* Get the file size. GetFileSize is not used because it gets the - size of an already-open file */ - attr_data->nFileSizeLow = (DWORD) stat_data.st_size; -#if SIZEOF_OFF_T > 4 - attr_data->nFileSizeHigh = (DWORD)(stat_data.st_size >> 32); -#else - attr_data->nFileSizeHigh = 0; -#endif - - bRet = TRUE; - -done: - if (dwLastError) pThread->SetLastError(dwLastError); - - LOGEXIT("GetFileAttributesExW returns BOOL %d\n", bRet); - PERF_EXIT(GetFileAttributesExW); - return bRet; -} - -/*++ -Function: - SetFileAttributesA - -Notes: - Used for setting read-only attribute on file only. - ---*/ -BOOL -PALAPI -SetFileAttributesA( - IN LPCSTR lpFileName, - IN DWORD dwFileAttributes) -{ - CPalThread *pThread; - struct stat stat_data; - mode_t new_mode; - - DWORD dwLastError = 0; - BOOL bRet = FALSE; - - PERF_ENTRY(SetFileAttributesA); - ENTRY("SetFileAttributesA(lpFileName=%p (%s), dwFileAttributes=%#x)\n", - lpFileName?lpFileName:"NULL", - lpFileName?lpFileName:"NULL", dwFileAttributes); - - pThread = InternalGetCurrentThread(); - - /* Windows behavior for SetFileAttributes is that any valid attributes - are set on a file and any invalid attributes are ignored. SetFileAttributes - returns success and does not set an error even if some or all of the - attributes are invalid. If all the attributes are invalid, SetFileAttributes - sets a file's attribute to NORMAL. */ - - /* If dwFileAttributes does not contain READONLY or NORMAL, set it to NORMAL - and print a warning message. */ - if ( !(dwFileAttributes & (FILE_ATTRIBUTE_READONLY |FILE_ATTRIBUTE_NORMAL)) ) - { - dwFileAttributes = FILE_ATTRIBUTE_NORMAL; - WARN("dwFileAttributes(%#x) contains attributes that are either not supported " - "or cannot be set via SetFileAttributes.\n"); - } - - if ( (dwFileAttributes & FILE_ATTRIBUTE_NORMAL) && - (dwFileAttributes != FILE_ATTRIBUTE_NORMAL) ) - { - WARN("Ignoring FILE_ATTRIBUTE_NORMAL -- it must be used alone\n"); - } - - if (lpFileName == NULL) - { - dwLastError = ERROR_FILE_NOT_FOUND; - goto done; - } - - if ( stat(lpFileName, &stat_data) != 0 ) - { - TRACE("stat failed on %s; errno is %d (%s)\n", - lpFileName, errno, strerror(errno)); - dwLastError = FILEGetLastErrorFromErrnoAndFilename(lpFileName); - goto done; - } - - new_mode = stat_data.st_mode; - TRACE("st_mode is %#x\n", new_mode); - - /* if we can't do GetFileAttributesA on it, don't do SetFileAttributesA */ - if ( !(new_mode & S_IFREG) && !(new_mode & S_IFDIR) ) - { - ERROR("Not a regular file or directory, S_IFMT is %#x\n", - new_mode & S_IFMT); - dwLastError = ERROR_ACCESS_DENIED; - goto done; - } - - /* set or unset the "read-only" attribute */ - if (dwFileAttributes & FILE_ATTRIBUTE_READONLY) - { - /* remove the write bit from everybody */ - new_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - } - else - { - /* give write permission to the owner if the owner - * already has read permission */ - if ( new_mode & S_IRUSR ) - { - new_mode |= S_IWUSR; - } - } - TRACE("new mode is %#x\n", new_mode); - - bRet = TRUE; - if ( new_mode != stat_data.st_mode ) - { - if ( chmod(lpFileName, new_mode) != 0 ) - { - ERROR("chmod(%s, %#x) failed\n", lpFileName, new_mode); - dwLastError = FILEGetLastErrorFromErrnoAndFilename(lpFileName); - bRet = FALSE; - } - } - -done: - if (dwLastError) - { - pThread->SetLastError(dwLastError); - } - - LOGEXIT("SetFileAttributesA returns BOOL %d\n", bRet); - PERF_EXIT(SetFileAttributesA); - return bRet; -} - -/*++ -Function: - SetFileAttributesW - -Notes: - Used for setting read-only attribute on file only. - ---*/ -BOOL -PALAPI -SetFileAttributesW( - IN LPCWSTR lpFileName, - IN DWORD dwFileAttributes) -{ - CPalThread *pThread; - char * name; - PathCharString namePS; - int length = 0; - int size; - - DWORD dwLastError = 0; - BOOL bRet = FALSE; - - PERF_ENTRY(SetFileAttributesW); - ENTRY("SetFileAttributesW(lpFileName=%p (%S), dwFileAttributes=%#x)\n", - lpFileName?lpFileName:W16_NULLSTRING, - lpFileName?lpFileName:W16_NULLSTRING, dwFileAttributes); - - pThread = InternalGetCurrentThread(); - if (lpFileName == NULL) - { - dwLastError = ERROR_PATH_NOT_FOUND; - goto done; - } - - length = (PAL_wcslen(lpFileName)+1) * MaxWCharToAcpLengthFactor; - name = namePS.OpenStringBuffer(length); - if (NULL == name) - { - dwLastError = ERROR_NOT_ENOUGH_MEMORY; - goto done; - } - size = WideCharToMultiByte( CP_ACP, 0, lpFileName, -1, name, length, - NULL, NULL ); - - if( size == 0 ) - { - namePS.CloseBuffer(0); - dwLastError = GetLastError(); - ASSERT("WideCharToMultiByte failure! error is %d\n", dwLastError); - dwLastError = ERROR_INVALID_PARAMETER; - goto done; - } - namePS.CloseBuffer(size - 1); - bRet = SetFileAttributesA(name,dwFileAttributes); - -done: - if (dwLastError) pThread->SetLastError(dwLastError); - - LOGEXIT("SetFileAttributes returns BOOL %d\n", bRet); - PERF_EXIT(SetFileAttributesW); - return bRet; -} - /*++ InternalOpen @@ -2584,335 +2034,6 @@ FlushFileBuffers( uUniqueSeed++;\ } -/*++ - Function: - GetTempFileNameA - -uUnique is always 0. - --*/ -const int MAX_PREFIX = 3; -const int MAX_SEEDSIZE = 8; /* length of "unique portion of - the string, plus extension(FFFF.TMP). */ -static USHORT uUniqueSeed = 0; -static BOOL IsInitialized = FALSE; - -UINT -PALAPI -GetTempFileNameA( - IN LPCSTR lpPathName, - IN LPCSTR lpPrefixString, - IN UINT uUnique, - OUT LPSTR lpTempFileName) -{ - CPalThread *pThread; - CHAR * full_name; - PathCharString full_namePS; - int length; - CHAR * file_template; - PathCharString file_templatePS; - CHAR chLastPathNameChar; - - HANDLE hTempFile; - UINT uRet = 0; - DWORD dwError; - USHORT uLoopCounter = 0; - - PERF_ENTRY(GetTempFileNameA); - ENTRY("GetTempFileNameA(lpPathName=%p (%s), lpPrefixString=%p (%s), uUnique=%u, " - "lpTempFileName=%p)\n", lpPathName?lpPathName:"NULL", lpPathName?lpPathName:"NULL", - lpPrefixString?lpPrefixString:"NULL", - lpPrefixString?lpPrefixString:"NULL", uUnique, - lpTempFileName?lpTempFileName:"NULL"); - - pThread = InternalGetCurrentThread(); - if ( !IsInitialized ) - { - uUniqueSeed = (USHORT)( time( NULL ) ); - - /* On the off chance 0 is returned. - 0 being the error return code. */ - ENSURE_UNIQUE_NOT_ZERO - IsInitialized = TRUE; - } - - if ( !lpPathName || *lpPathName == '\0' ) - { - pThread->SetLastError( ERROR_DIRECTORY ); - goto done; - } - - if ( NULL == lpTempFileName ) - { - ERROR( "lpTempFileName cannot be NULL\n" ); - pThread->SetLastError( ERROR_INVALID_PARAMETER ); - goto done; - } - - if ( strlen( lpPathName ) + MAX_SEEDSIZE + MAX_PREFIX >= MAX_LONGPATH ) - { - WARN( "File names larger than MAX_LONGPATH (%d)!\n", MAX_LONGPATH ); - pThread->SetLastError( ERROR_FILENAME_EXCED_RANGE ); - goto done; - } - - length = strlen(lpPathName) + MAX_SEEDSIZE + MAX_PREFIX + 10; - file_template = file_templatePS.OpenStringBuffer(length); - if (NULL == file_template) - { - pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY); - goto done; - } - *file_template = '\0'; - strcat_s( file_template, file_templatePS.GetSizeOf(), lpPathName ); - file_templatePS.CloseBuffer(length); - - chLastPathNameChar = file_template[strlen(file_template)-1]; - if (chLastPathNameChar != '/') - { - strcat_s( file_template, file_templatePS.GetSizeOf(), "/" ); - } - - if ( lpPrefixString ) - { - strncat_s( file_template, file_templatePS.GetSizeOf(), lpPrefixString, MAX_PREFIX ); - } - strncat_s( file_template, file_templatePS.GetSizeOf(), "%.4x.TMP", MAX_SEEDSIZE ); - - /* Create the file. */ - dwError = GetLastError(); - pThread->SetLastError( NOERROR ); - - length = strlen(file_template) + MAX_SEEDSIZE + MAX_PREFIX; - full_name = full_namePS.OpenStringBuffer(length); - if (NULL == full_name) - { - pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY); - goto done; - } - sprintf_s( full_name, full_namePS.GetSizeOf(), file_template, (0 == uUnique) ? uUniqueSeed : uUnique); - full_namePS.CloseBuffer(length); - - hTempFile = CreateFileA( full_name, GENERIC_WRITE, - FILE_SHARE_READ, NULL, CREATE_NEW, 0, NULL ); - - if (uUnique == 0) - { - /* The USHORT will overflow back to 0 if we go past - 65536 files, so break the loop after 65536 iterations. - If the CreateFile call was not successful within that - number of iterations, then there are no temp file names - left for that directory. */ - while ( ERROR_PATH_NOT_FOUND != GetLastError() && - INVALID_HANDLE_VALUE == hTempFile && uLoopCounter < 0xFFFF ) - { - uUniqueSeed++; - ENSURE_UNIQUE_NOT_ZERO; - - pThread->SetLastError( NOERROR ); - sprintf_s( full_name, full_namePS.GetSizeOf(), file_template, uUniqueSeed ); - hTempFile = CreateFileA( full_name, GENERIC_WRITE, - FILE_SHARE_READ, NULL, CREATE_NEW, 0, NULL ); - uLoopCounter++; - - } - } - - /* Reset the error code.*/ - if ( NOERROR == GetLastError() ) - { - pThread->SetLastError( dwError ); - } - - /* Windows sets ERROR_FILE_EXISTS,if there - are no available temp files. */ - if ( INVALID_HANDLE_VALUE != hTempFile ) - { - if (0 == uUnique) - { - uRet = uUniqueSeed; - uUniqueSeed++; - ENSURE_UNIQUE_NOT_ZERO; - } - else - { - uRet = uUnique; - } - - if ( CloseHandle( hTempFile ) ) - { - if (strcpy_s( lpTempFileName, MAX_LONGPATH, full_name ) != SAFECRT_SUCCESS) - { - ERROR( "strcpy_s failed!\n"); - pThread->SetLastError( ERROR_FILENAME_EXCED_RANGE ); - *lpTempFileName = '\0'; - uRet = 0; - } - } - else - { - ASSERT( "Unable to close the handle %p\n", hTempFile ); - pThread->SetLastError( ERROR_INTERNAL_ERROR ); - *lpTempFileName = '\0'; - uRet = 0; - } - } - else if ( INVALID_HANDLE_VALUE == hTempFile && uLoopCounter < 0xFFFF ) - { - ERROR( "Unable to create temp file. \n" ); - uRet = 0; - - if ( ERROR_PATH_NOT_FOUND == GetLastError() ) - { - /* CreateFile failed because it could not - find the path. */ - pThread->SetLastError( ERROR_DIRECTORY ); - } /* else use the lasterror value from CreateFileA */ - } - else - { - TRACE( "65535 files already exist in the directory. " - "No temp files available for creation.\n" ); - pThread->SetLastError( ERROR_FILE_EXISTS ); - } - -done: - LOGEXIT("GetTempFileNameA returns UINT %u\n", uRet); - PERF_EXIT(GetTempFileNameA); - return uRet; - -} - -/*++ -Function: - GetTempFileNameW - -uUnique is always 0. ---*/ -UINT -PALAPI -GetTempFileNameW( - IN LPCWSTR lpPathName, - IN LPCWSTR lpPrefixString, - IN UINT uUnique, - OUT LPWSTR lpTempFileName) -{ - CPalThread *pThread; - INT path_size = 0; - INT prefix_size = 0; - CHAR * full_name; - CHAR * prefix_string; - CHAR * tempfile_name = NULL; - PathCharString full_namePS, prefix_stringPS; - INT length = 0; - UINT uRet; - - PERF_ENTRY(GetTempFileNameW); - ENTRY("GetTempFileNameW(lpPathName=%p (%S), lpPrefixString=%p (%S), uUnique=%u, " - "lpTempFileName=%p)\n", lpPathName?lpPathName:W16_NULLSTRING, lpPathName?lpPathName:W16_NULLSTRING, - lpPrefixString?lpPrefixString:W16_NULLSTRING, - lpPrefixString?lpPrefixString:W16_NULLSTRING,uUnique, lpTempFileName); - - pThread = InternalGetCurrentThread(); - /* Sanity checks. */ - if ( !lpPathName || *lpPathName == '\0' ) - { - pThread->SetLastError( ERROR_DIRECTORY ); - uRet = 0; - goto done; - } - - length = (PAL_wcslen(lpPathName)+1) * MaxWCharToAcpLengthFactor; - full_name = full_namePS.OpenStringBuffer(length); - if (NULL == full_name) - { - pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY); - uRet = 0; - goto done; - } - path_size = WideCharToMultiByte( CP_ACP, 0, lpPathName, -1, full_name, - length, NULL, NULL ); - - if( path_size == 0 ) - { - full_namePS.CloseBuffer(0); - DWORD dwLastError = GetLastError(); - ASSERT("WideCharToMultiByte failure! error is %d\n", dwLastError); - pThread->SetLastError(ERROR_INTERNAL_ERROR); - uRet = 0; - goto done; - } - - full_namePS.CloseBuffer(path_size - 1); - - if (lpPrefixString != NULL) - { - length = (PAL_wcslen(lpPrefixString)+1) * MaxWCharToAcpLengthFactor; - prefix_string = prefix_stringPS.OpenStringBuffer(length); - if (NULL == prefix_string) - { - pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY); - uRet = 0; - goto done; - } - prefix_size = WideCharToMultiByte( CP_ACP, 0, lpPrefixString, -1, - prefix_string, - MAX_LONGPATH - path_size - MAX_SEEDSIZE, - NULL, NULL ); - - if( prefix_size == 0 ) - { - prefix_stringPS.CloseBuffer(0); - DWORD dwLastError = GetLastError(); - ASSERT("WideCharToMultiByte failure! error is %d\n", dwLastError); - pThread->SetLastError(ERROR_INTERNAL_ERROR); - uRet = 0; - goto done; - } - prefix_stringPS.CloseBuffer(prefix_size - 1); - } - - tempfile_name = (char*)malloc(MAX_LONGPATH); - if (tempfile_name == NULL) - { - pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY); - uRet = 0; - goto done; - } - - uRet = GetTempFileNameA(full_name, - (lpPrefixString == NULL) ? NULL : prefix_string, - 0, tempfile_name); - if (uRet) - { - path_size = MultiByteToWideChar( CP_ACP, 0, tempfile_name, -1, - lpTempFileName, MAX_LONGPATH ); - - if (!path_size) - { - DWORD dwLastError = GetLastError(); - if (dwLastError == ERROR_INSUFFICIENT_BUFFER) - { - WARN("File names larger than MAX_PATH_FNAME (%d)! \n", MAX_LONGPATH); - dwLastError = ERROR_FILENAME_EXCED_RANGE; - } - else - { - ASSERT("MultiByteToWideChar failure! error is %d", dwLastError); - dwLastError = ERROR_INTERNAL_ERROR; - } - pThread->SetLastError(dwLastError); - uRet = 0; - } - } - -done: - free(tempfile_name); - - LOGEXIT("GetTempFileNameW returns UINT %u\n", uRet); - PERF_EXIT(GetTempFileNameW); - return uRet; -} - /*++ Function: FILEGetLastErrorFromErrno @@ -3000,156 +2121,6 @@ DWORD DIRGetLastErrorFromErrno( void ) } -/*++ -Function: - CopyFileA - -See MSDN doc. - -Notes: - There are several (most) error paths here that do not call SetLastError(). -This is because we know that CreateFile, ReadFile, and WriteFile will do so, -and will have a much better idea of the specific error. ---*/ -BOOL -PALAPI -CopyFileA( - IN LPCSTR lpExistingFileName, - IN LPCSTR lpNewFileName, - IN BOOL bFailIfExists) -{ - CPalThread *pThread; - HANDLE hSource = INVALID_HANDLE_VALUE; - HANDLE hDest = INVALID_HANDLE_VALUE; - DWORD dwDestCreationMode; - BOOL bGood = FALSE; - DWORD dwSrcFileAttributes; - struct stat SrcFileStats; - - const int buffer_size = 16*1024; - char *buffer = (char*)alloca(buffer_size); - DWORD bytes_read; - DWORD bytes_written; - int permissions; - - - PERF_ENTRY(CopyFileA); - ENTRY("CopyFileA(lpExistingFileName=%p (%s), lpNewFileName=%p (%s), bFailIfExists=%d)\n", - lpExistingFileName?lpExistingFileName:"NULL", - lpExistingFileName?lpExistingFileName:"NULL", - lpNewFileName?lpNewFileName:"NULL", - lpNewFileName?lpNewFileName:"NULL", bFailIfExists); - - pThread = InternalGetCurrentThread(); - if ( bFailIfExists ) - { - dwDestCreationMode = CREATE_NEW; - } - else - { - dwDestCreationMode = CREATE_ALWAYS; - } - - hSource = CreateFileA( lpExistingFileName, - GENERIC_READ, - FILE_SHARE_READ, - NULL, - OPEN_EXISTING, - 0, - NULL ); - - if ( hSource == INVALID_HANDLE_VALUE ) - { - ERROR("CreateFileA failed for %s\n", lpExistingFileName); - goto done; - } - - /* Need to preserve the file attributes */ - dwSrcFileAttributes = GetFileAttributesA(lpExistingFileName); - if (dwSrcFileAttributes == 0xffffffff) - { - ERROR("GetFileAttributesA failed for %s\n", lpExistingFileName); - goto done; - } - - /* Need to preserve the owner/group and chmod() flags */ - if (stat (lpExistingFileName, &SrcFileStats) == -1) - { - ERROR("stat() failed for %s\n", lpExistingFileName); - pThread->SetLastError(FILEGetLastErrorFromErrnoAndFilename(lpExistingFileName)); - goto done; - } - - hDest = CreateFileA( lpNewFileName, - GENERIC_WRITE, - FILE_SHARE_READ, - NULL, - dwDestCreationMode, - 0, - NULL ); - - if ( hDest == INVALID_HANDLE_VALUE ) - { - ERROR("CreateFileA failed for %s\n", lpNewFileName); - goto done; - } - - // We don't set file attributes in CreateFile. The only attribute - // that is reflected on disk in Unix is read-only, and we set that - // here. - permissions = (S_IRWXU | S_IRWXG | S_IRWXO); - if ((dwSrcFileAttributes & FILE_ATTRIBUTE_READONLY) != 0) - { - permissions &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - } - - /* Make sure the new file has the same chmod() flags. */ - if (chmod(lpNewFileName, SrcFileStats.st_mode & permissions) == -1) - { - WARN ("chmod() failed to set mode 0x%x on new file\n", - SrcFileStats.st_mode & permissions); - pThread->SetLastError(FILEGetLastErrorFromErrnoAndFilename(lpNewFileName)); - goto done; - } - - while( (bGood = ReadFile( hSource, buffer, buffer_size, &bytes_read, NULL )) - && bytes_read > 0 ) - { - bGood = ( WriteFile( hDest, buffer, bytes_read, &bytes_written, NULL ) - && bytes_written == bytes_read); - if (!bGood) break; - } - - if (!bGood) - { - ERROR("Copy failed\n"); - - if ( !CloseHandle(hDest) || - !DeleteFileA(lpNewFileName) ) - { - ERROR("Unable to clean up partial copy\n"); - } - hDest = INVALID_HANDLE_VALUE; - - goto done; - } - -done: - - if ( hSource != INVALID_HANDLE_VALUE ) - { - CloseHandle( hSource ); - } - if ( hDest != INVALID_HANDLE_VALUE ) - { - CloseHandle( hDest ); - } - - LOGEXIT("CopyFileA returns BOOL %d\n", bGood); - PERF_EXIT(CopyFileA); - return bGood; -} - PAL_ERROR CorUnix::InternalCreatePipe( CPalThread *pThread, diff --git a/src/coreclr/pal/src/handlemgr/handlemgr.cpp b/src/coreclr/pal/src/handlemgr/handlemgr.cpp index 7a3b5c20912a..da7d72036ba1 100644 --- a/src/coreclr/pal/src/handlemgr/handlemgr.cpp +++ b/src/coreclr/pal/src/handlemgr/handlemgr.cpp @@ -19,7 +19,6 @@ Module Name: #include "pal/thread.hpp" #include "pal/handlemgr.hpp" -#include "pal/cs.hpp" #include "pal/dbgmsg.h" using namespace CorUnix; @@ -41,7 +40,7 @@ CSimpleHandleManager::Initialize( { PAL_ERROR palError = NO_ERROR; - InternalInitializeCriticalSection(&m_csLock); + minipal_mutex_init(&m_mtxLock); m_fLockInitialized = TRUE; m_dwTableGrowthRate = c_BasicGrowthRate; diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 40f0e9b88990..2f5310c4d661 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -209,6 +209,11 @@ struct sve_context { #define MCREG_Xer(mc) ((mc).gp_regs[37]) #define MCREG_Ccr(mc) ((mc).gp_regs[38]) +#elif defined(HOST_WASM) + +#define MCREG_Sp(mc) 0 +#define MCREG_Pc(mc) 0 + #elif HAVE___GREGSET_T #ifdef HOST_64BIT @@ -994,7 +999,7 @@ inline void *FPREG_Xstate_Hi16Zmm(const ucontext_t *uc, uint32_t *featureSize) #define FPREG_StatusWord(uc) FPSTATE(uc).fp_fxsave.status #define FPREG_TagWord(uc) FPSTATE(uc).fp_fxsave.tag #define FPREG_MxCsr(uc) FPSTATE(uc).fp_fxsave.mxcsr -#define FPREG_MxCsr_Mask(uc) FPSTATE(uc).fp_fxsave.mscsr_mask +#define FPREG_MxCsr_Mask(uc) FPSTATE(uc).fp_fxsave.mxcsr_mask #define FPREG_ErrorOffset(uc) *(DWORD*) &(FPSTATE(uc).fp_fxsave.rip) #define FPREG_ErrorSelector(uc) *((WORD*) &(FPSTATE(uc).fp_fxsave.rip) + 2) #define FPREG_DataOffset(uc) *(DWORD*) &(FPSTATE(uc).fp_fxsave.rdp) @@ -1389,6 +1394,9 @@ inline static DWORD64 CONTEXTGetPC(LPCONTEXT pContext) return pContext->PSWAddr; #elif defined(HOST_POWERPC64) return pContext->Nip; +#elif defined(HOST_WASM) // wasm has no PC + _ASSERT(false); + return 0; #else return pContext->Pc; #endif @@ -1404,6 +1412,8 @@ inline static void CONTEXTSetPC(LPCONTEXT pContext, DWORD64 pc) pContext->PSWAddr = pc; #elif defined(HOST_POWERPC64) pContext->Nip = pc; +#elif defined(HOST_WASM) // wasm has no PC + _ASSERT(false); #else pContext->Pc = pc; #endif @@ -1421,6 +1431,9 @@ inline static DWORD64 CONTEXTGetFP(LPCONTEXT pContext) return pContext->R11; #elif defined(HOST_POWERPC64) return pContext->R31; +#elif defined(HOST_WASM) // wasm has no PC + _ASSERT(false); + return 0; #else return pContext->Fp; #endif diff --git a/src/coreclr/pal/src/include/pal/corunix.hpp b/src/coreclr/pal/src/include/pal/corunix.hpp index 4ebed32d5765..7e606980dd34 100644 --- a/src/coreclr/pal/src/include/pal/corunix.hpp +++ b/src/coreclr/pal/src/include/pal/corunix.hpp @@ -976,10 +976,6 @@ namespace CorUnix CPalThread *pThread ) = 0; - virtual - PAL_ERROR - SendTerminationRequestToWorkerThread() = 0; - // // This routine is primarily meant for use by WaitForMultipleObjects[Ex]. // The caller must individually release each of the returned controller diff --git a/src/coreclr/pal/src/include/pal/critsect.h b/src/coreclr/pal/src/include/pal/critsect.h deleted file mode 100644 index c14baf20a1e4..000000000000 --- a/src/coreclr/pal/src/include/pal/critsect.h +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - include/pal/critsect.h - -Abstract: - - Header file for the critical sections functions. - - - ---*/ - -#ifndef _PAL_CRITSECT_H_ -#define _PAL_CRITSECT_H_ - -#ifdef __cplusplus -extern "C" -{ -#endif // __cplusplus - -VOID InternalInitializeCriticalSection(CRITICAL_SECTION *pcs); -VOID InternalDeleteCriticalSection(CRITICAL_SECTION *pcs); - -/* The following PALCEnterCriticalSection and PALCLeaveCriticalSection - functions are intended to provide CorUnix's InternalEnterCriticalSection - and InternalLeaveCriticalSection functionalities to legacy C code, - which has no knowledge of CPalThread, classes and namespaces. -*/ -VOID PALCEnterCriticalSection(CRITICAL_SECTION *pcs); -VOID PALCLeaveCriticalSection(CRITICAL_SECTION *pcs); - -#ifdef __cplusplus -} -#endif // __cplusplus - -#endif /* _PAL_CRITSECT_H_ */ - diff --git a/src/coreclr/pal/src/include/pal/cs.hpp b/src/coreclr/pal/src/include/pal/cs.hpp deleted file mode 100644 index cb374ffa1ec0..000000000000 --- a/src/coreclr/pal/src/include/pal/cs.hpp +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/////////////////////////////////////////////////////////////////////////////// -// -// File: -// cs.cpp -// -// Purpose: -// Header file for critical sections implementation -// - -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef _PAL_CS_HPP -#define _PAL_CS_HPP - -#include "corunix.hpp" -#include "critsect.h" - -namespace CorUnix -{ - void CriticalSectionSubSysInitialize(void); - - void InternalInitializeCriticalSectionAndSpinCount( - PCRITICAL_SECTION pCriticalSection, - DWORD dwSpinCount, - bool fInternal); - - void InternalEnterCriticalSection( - CPalThread *pThread, - CRITICAL_SECTION *pcs - ); - - void InternalLeaveCriticalSection( - CPalThread *pThread, - CRITICAL_SECTION *pcs - ); - -#ifdef _DEBUG - void PALCS_ReportStatisticalData(void); - void PALCS_DumpCSList(); -#endif // _DEBUG - -} - -#endif // _PAL_CS_HPP - diff --git a/src/coreclr/pal/src/include/pal/environ.h b/src/coreclr/pal/src/include/pal/environ.h index 226279c0425b..14262470c447 100644 --- a/src/coreclr/pal/src/include/pal/environ.h +++ b/src/coreclr/pal/src/include/pal/environ.h @@ -18,6 +18,8 @@ Module Name: #ifndef __ENVIRON_H_ #define __ENVIRON_H_ +#include + #ifdef __cplusplus extern "C" { @@ -32,7 +34,7 @@ Variables : gcsEnvironment: critical section to synchronize access to palEnvironment --*/ extern char **palEnvironment; -extern CRITICAL_SECTION gcsEnvironment; +extern minipal_mutex gcsEnvironment; /*++ diff --git a/src/coreclr/pal/src/include/pal/filetime.h b/src/coreclr/pal/src/include/pal/filetime.h index cf177058be47..efbe99828d23 100644 --- a/src/coreclr/pal/src/include/pal/filetime.h +++ b/src/coreclr/pal/src/include/pal/filetime.h @@ -27,40 +27,6 @@ extern "C" { #endif // __cplusplus -/* Provide consistent access to nanosecond fields, if they exist. */ - -#if HAVE_STAT_TIMESPEC - -#define ST_ATIME_NSEC(statstruct) ((statstruct)->st_atimespec.tv_nsec) -#define ST_MTIME_NSEC(statstruct) ((statstruct)->st_mtimespec.tv_nsec) -#define ST_CTIME_NSEC(statstruct) ((statstruct)->st_ctimespec.tv_nsec) - -#else /* HAVE_STAT_TIMESPEC */ - -#if HAVE_STAT_TIM - -#define ST_ATIME_NSEC(statstruct) ((statstruct)->st_atim.tv_nsec) -#define ST_MTIME_NSEC(statstruct) ((statstruct)->st_mtim.tv_nsec) -#define ST_CTIME_NSEC(statstruct) ((statstruct)->st_ctim.tv_nsec) - -#else /* HAVE_STAT_TIM */ - -#if HAVE_STAT_NSEC - -#define ST_ATIME_NSEC(statstruct) ((statstruct)->st_atimensec) -#define ST_MTIME_NSEC(statstruct) ((statstruct)->st_mtimensec) -#define ST_CTIME_NSEC(statstruct) ((statstruct)->st_ctimensec) - -#else /* HAVE_STAT_NSEC */ - -#define ST_ATIME_NSEC(statstruct) 0 -#define ST_MTIME_NSEC(statstruct) 0 -#define ST_CTIME_NSEC(statstruct) 0 - -#endif /* HAVE_STAT_NSEC */ -#endif /* HAVE_STAT_TIM */ -#endif /* HAVE_STAT_TIMESPEC */ - FILETIME FILEUnixTimeToFileTime( time_t sec, long nsec ); #ifdef __cplusplus diff --git a/src/coreclr/pal/src/include/pal/handlemgr.hpp b/src/coreclr/pal/src/include/pal/handlemgr.hpp index f6a3b5d16374..1e00a6473573 100644 --- a/src/coreclr/pal/src/include/pal/handlemgr.hpp +++ b/src/coreclr/pal/src/include/pal/handlemgr.hpp @@ -22,9 +22,8 @@ Module Name: #include "corunix.hpp" -#include "cs.hpp" #include "pal/thread.hpp" - +#include /* Pseudo handles constant for current thread and process */ extern const HANDLE hPseudoCurrentProcess; @@ -72,7 +71,7 @@ namespace CorUnix DWORD m_dwTableGrowthRate; HANDLE_TABLE_ENTRY* m_rghteHandleTable; - CRITICAL_SECTION m_csLock; + minipal_mutex m_mtxLock; bool m_fLockInitialized; bool ValidateHandle(HANDLE h); @@ -95,7 +94,7 @@ namespace CorUnix { if (m_fLockInitialized) { - DeleteCriticalSection(&m_csLock); + minipal_mutex_destroy(&m_mtxLock); } if (NULL != m_rghteHandleTable) @@ -138,7 +137,7 @@ namespace CorUnix CPalThread *pThread ) { - InternalEnterCriticalSection(pThread, &m_csLock); + minipal_mutex_enter(&m_mtxLock); }; void @@ -146,7 +145,7 @@ namespace CorUnix CPalThread *pThread ) { - InternalLeaveCriticalSection(pThread, &m_csLock); + minipal_mutex_leave(&m_mtxLock); }; }; diff --git a/src/coreclr/pal/src/include/pal/mutex.hpp b/src/coreclr/pal/src/include/pal/mutex.hpp index 016668dafb16..9ff179ca43d1 100644 --- a/src/coreclr/pal/src/include/pal/mutex.hpp +++ b/src/coreclr/pal/src/include/pal/mutex.hpp @@ -37,6 +37,7 @@ namespace CorUnix LPSECURITY_ATTRIBUTES lpMutexAttributes, BOOL bInitialOwner, LPCSTR lpName, + BOOL bCurrentUserOnly, HANDLE *phMutex ); @@ -51,6 +52,7 @@ namespace CorUnix SharedMemorySystemCallErrors *errors, CPalThread *pThread, LPCSTR lpName, + BOOL bCurrentUserOnly, HANDLE *phMutex ); @@ -216,10 +218,10 @@ class NamedMutexProcessData : public SharedMemoryProcessDataBase bool m_hasRefFromLockOwnerThread; public: - static SharedMemoryProcessDataHeader *CreateOrOpen(SharedMemorySystemCallErrors *errors, LPCSTR name, bool acquireLockIfCreated, bool *createdRef); - static SharedMemoryProcessDataHeader *Open(SharedMemorySystemCallErrors *errors, LPCSTR name); + static SharedMemoryProcessDataHeader *CreateOrOpen(SharedMemorySystemCallErrors *errors, LPCSTR name, bool isUserScope, bool acquireLockIfCreated, bool *createdRef); + static SharedMemoryProcessDataHeader *Open(SharedMemorySystemCallErrors *errors, LPCSTR name, bool isUserScope); private: - static SharedMemoryProcessDataHeader *CreateOrOpen(SharedMemorySystemCallErrors *errors, LPCSTR name, bool createIfNotExist, bool acquireLockIfCreated, bool *createdRef); + static SharedMemoryProcessDataHeader *CreateOrOpen(SharedMemorySystemCallErrors *errors, LPCSTR name, bool isUserScope, bool createIfNotExist, bool acquireLockIfCreated, bool *createdRef); public: NamedMutexProcessData( diff --git a/src/coreclr/pal/src/include/pal/palinternal.h b/src/coreclr/pal/src/include/pal/palinternal.h index 30c10150bda3..603ea73b0082 100644 --- a/src/coreclr/pal/src/include/pal/palinternal.h +++ b/src/coreclr/pal/src/include/pal/palinternal.h @@ -157,10 +157,6 @@ function_name() to call the system's implementation the header */ #include -#ifdef PAL_PERF -#include "pal_perf.h" -#endif - #ifdef __record_type_class #undef __record_type_class #endif @@ -168,6 +164,7 @@ function_name() to call the system's implementation #undef __real_type_class #endif +#include #include "pal.h" #include "palprivate.h" @@ -254,11 +251,11 @@ extern "C" typedef enum _TimeConversionConstants { - tccSecondsToMillieSeconds = 1000, // 10^3 + tccSecondsToMilliSeconds = 1000, // 10^3 tccSecondsToMicroSeconds = 1000000, // 10^6 tccSecondsToNanoSeconds = 1000000000, // 10^9 - tccMillieSecondsToMicroSeconds = 1000, // 10^3 - tccMillieSecondsToNanoSeconds = 1000000, // 10^6 + tccMilliSecondsToMicroSeconds = 1000, // 10^3 + tccMilliSecondsToNanoSeconds = 1000000, // 10^6 tccMicroSecondsToNanoSeconds = 1000, // 10^3 tccSecondsTo100NanoSeconds = 10000000, // 10^7 tccMicroSecondsTo100NanoSeconds = 10 // 10^1 diff --git a/src/coreclr/pal/src/include/pal/perftrace.h b/src/coreclr/pal/src/include/pal/perftrace.h index 363709aa1b5f..bf0c361de403 100644 --- a/src/coreclr/pal/src/include/pal/perftrace.h +++ b/src/coreclr/pal/src/include/pal/perftrace.h @@ -24,45 +24,10 @@ Overview of PAL Performance utilities #ifndef _PAL_PERFTRACE_H_ #define _PAL_PERFTRACE_H_ -#ifdef __cplusplus -extern "C" -{ -#endif // __cplusplus - -#if PAL_PERF -#define PERF_ENTRY(x) \ - ULONGLONG pal_perf_start_tick = 0;\ - PERFLogFunctionEntry( PAL_PERF_##x, &pal_perf_start_tick ) -#define PERF_EXIT(x) \ - PERFLogFunctionExit( PAL_PERF_##x, &pal_perf_start_tick ) -#define PERF_ENTRY_ONLY(x) \ - PERFNoLatencyProfileEntry( PAL_PERF_##x ) - -BOOL PERFInitialize(LPWSTR command_line, LPWSTR exe_path) ; -void PERFTerminate( ); -BOOL PERFAllocThreadInfo( ); -void PERFLogFunctionExit(unsigned int pal_api_id, ULONGLONG *pal_perf_start_tick); -void PERFLogFunctionEntry(unsigned int pal_api_id, ULONGLONG *pal_perf_start_tick); -void PERFEnableThreadProfile(BOOL isInternal); -void PERFDisableThreadProfile(BOOL isInternal); -void PERFEnableProcessProfile( ); -void PERFDisableProcessProfile( ); -BOOL PERFIsProcessProfileEnabled( ); -void PERFNoLatencyProfileEntry(unsigned int pal_api_id ); -void PERFCalibrate(const char* msg); - -#else /* PAL_PERF */ - #define PERF_ENTRY(x) #define PERF_ENTRY_ONLY(x) #define PERF_EXIT(x) -#endif /* PAL_PERF */ - -#ifdef __cplusplus -} -#endif // __cplusplus - #endif /* _PAL_PERFTRACE_H_ */ diff --git a/src/coreclr/pal/src/include/pal/sharedmemory.h b/src/coreclr/pal/src/include/pal/sharedmemory.h index 63e5ddd1540f..84a35d2b237d 100644 --- a/src/coreclr/pal/src/include/pal/sharedmemory.h +++ b/src/coreclr/pal/src/include/pal/sharedmemory.h @@ -27,27 +27,26 @@ #define SHARED_MEMORY_MAX_FILE_NAME_CHAR_COUNT (_MAX_FNAME - 1) #define SHARED_MEMORY_MAX_NAME_CHAR_COUNT (STRING_LENGTH("Global\\") + SHARED_MEMORY_MAX_FILE_NAME_CHAR_COUNT) -#define SHARED_MEMORY_RUNTIME_TEMP_DIRECTORY_NAME ".dotnet" -#define SHARED_MEMORY_SHARED_MEMORY_DIRECTORY_NAME ".dotnet/shm" -#define SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME ".dotnet/lockfiles" -static_assert_no_msg(ARRAY_SIZE(SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME) >= ARRAY_SIZE(SHARED_MEMORY_SHARED_MEMORY_DIRECTORY_NAME)); +#define SHARED_MEMORY_USER_UNSCOPED_RUNTIME_TEMP_DIRECTORY_NAME ".dotnet" +#define SHARED_MEMORY_USER_SCOPED_RUNTIME_TEMP_DIRECTORY_NAME_PREFIX ".dotnet-uid" +#define SHARED_MEMORY_SHARED_MEMORY_DIRECTORY_NAME "shm" +#define SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME "lockfiles" +static_assert_no_msg(STRING_LENGTH(SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME) >= STRING_LENGTH(SHARED_MEMORY_SHARED_MEMORY_DIRECTORY_NAME)); #define SHARED_MEMORY_GLOBAL_DIRECTORY_NAME "global" #define SHARED_MEMORY_SESSION_DIRECTORY_NAME_PREFIX "session" -static_assert_no_msg(ARRAY_SIZE(SHARED_MEMORY_SESSION_DIRECTORY_NAME_PREFIX) >= ARRAY_SIZE(SHARED_MEMORY_GLOBAL_DIRECTORY_NAME)); -#define SHARED_MEMORY_UNIQUE_TEMP_NAME_TEMPLATE ".coreclr.XXXXXX" - -#define SHARED_MEMORY_MAX_SESSION_ID_CHAR_COUNT (10) +#define SHARED_MEMORY_UNIQUE_TEMP_NAME_TEMPLATE ".dotnet.XXXXXX" // Note that this Max size does not include the prefix folder path size which is unknown (in the case of sandbox) until runtime #define SHARED_MEMORY_MAX_FILE_PATH_CHAR_COUNT \ ( \ + STRING_LENGTH(SHARED_MEMORY_USER_SCOPED_RUNTIME_TEMP_DIRECTORY_NAME_PREFIX) + \ + 11 /* user ID, path separator */ + \ STRING_LENGTH(SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME) + \ 1 /* path separator */ + \ STRING_LENGTH(SHARED_MEMORY_SESSION_DIRECTORY_NAME_PREFIX) + \ - SHARED_MEMORY_MAX_SESSION_ID_CHAR_COUNT + \ - 1 /* path separator */ + \ + 11 /* session ID, path separator */ + \ SHARED_MEMORY_MAX_FILE_NAME_CHAR_COUNT \ ) @@ -98,12 +97,17 @@ class SharedMemorySystemCallErrors void Append(LPCSTR format, ...); }; +class SharedMemoryId; + class SharedMemoryHelpers { private: - static const mode_t PermissionsMask_CurrentUser_ReadWriteExecute; + static const mode_t PermissionsMask_OwnerUser_ReadWrite; + static const mode_t PermissionsMask_OwnerUser_ReadWriteExecute; + static const mode_t PermissionsMask_NonOwnerUsers_Write; static const mode_t PermissionsMask_AllUsers_ReadWrite; static const mode_t PermissionsMask_AllUsers_ReadWriteExecute; + static const mode_t PermissionsMask_Sticky; public: static const UINT32 InvalidProcessId; static const SIZE_T InvalidThreadId; @@ -114,17 +118,14 @@ class SharedMemoryHelpers static SIZE_T AlignUp(SIZE_T value, SIZE_T alignment); static void *Alloc(SIZE_T byteCount); - - template static void BuildSharedFilesPath(PathCharString& destination, const char (&suffix)[SuffixByteCount]); - static void BuildSharedFilesPath(PathCharString& destination, const char *suffix, int suffixByteCount); static bool AppendUInt32String(PathCharString& destination, UINT32 value); - static bool EnsureDirectoryExists(SharedMemorySystemCallErrors *errors, const char *path, bool isGlobalLockAcquired, bool createIfNotExist = true, bool isSystemDirectory = false); + static bool EnsureDirectoryExists(SharedMemorySystemCallErrors *errors, const char *path, const SharedMemoryId *id, bool isGlobalLockAcquired, bool createIfNotExist = true, bool isSystemDirectory = false); private: static int Open(SharedMemorySystemCallErrors *errors, LPCSTR path, int flags, mode_t mode = static_cast(0)); public: static int OpenDirectory(SharedMemorySystemCallErrors *errors, LPCSTR path); - static int CreateOrOpenFile(SharedMemorySystemCallErrors *errors, LPCSTR path, bool createIfNotExist = true, bool *createdRef = nullptr); + static int CreateOrOpenFile(SharedMemorySystemCallErrors *errors, LPCSTR path, const SharedMemoryId *id, bool createIfNotExist = true, bool *createdRef = nullptr); static void CloseFile(int fileDescriptor); static int ChangeMode(LPCSTR path, mode_t mode); @@ -150,19 +151,24 @@ class SharedMemoryId LPCSTR m_name; SIZE_T m_nameCharCount; bool m_isSessionScope; // false indicates global scope + bool m_isUserScope; + uid_t m_userScopeUid; public: SharedMemoryId(); - SharedMemoryId(LPCSTR name, SIZE_T nameCharCount, bool isSessionScope); - SharedMemoryId(LPCSTR name); + SharedMemoryId(LPCSTR name, bool isUserScope); public: LPCSTR GetName() const; SIZE_T GetNameCharCount() const; + void ReplaceNamePtr(LPCSTR name); bool IsSessionScope() const; - bool Equals(SharedMemoryId *other) const; + bool IsUserScope() const; + uid_t GetUserScopeUid() const; + bool Equals(const SharedMemoryId *other) const; public: + bool AppendRuntimeTempDirectoryName(PathCharString& path) const; bool AppendSessionDirectoryName(PathCharString& path) const; }; @@ -222,7 +228,7 @@ class SharedMemoryProcessDataHeader SharedMemoryProcessDataHeader *m_nextInProcessDataHeaderList; public: - static SharedMemoryProcessDataHeader *CreateOrOpen(SharedMemorySystemCallErrors *errors, LPCSTR name, SharedMemorySharedDataHeader requiredSharedDataHeader, SIZE_T sharedDataByteCount, bool createIfNotExist, bool *createdRef); + static SharedMemoryProcessDataHeader *CreateOrOpen(SharedMemorySystemCallErrors *errors, LPCSTR name, bool isUserScope, SharedMemorySharedDataHeader requiredSharedDataHeader, SIZE_T sharedDataByteCount, bool createIfNotExist, bool *createdRef); public: static SharedMemoryProcessDataHeader *PalObject_GetProcessDataHeader(CorUnix::IPalObject *object); @@ -230,14 +236,14 @@ class SharedMemoryProcessDataHeader static void PalObject_Close(CorUnix::CPalThread *thread, CorUnix::IPalObject *object, bool isShuttingDown); private: - SharedMemoryProcessDataHeader(SharedMemoryId *id, int fileDescriptor, SharedMemorySharedDataHeader *sharedDataHeader, SIZE_T sharedDataTotalByteCount); + SharedMemoryProcessDataHeader(const SharedMemoryId *id, int fileDescriptor, SharedMemorySharedDataHeader *sharedDataHeader, SIZE_T sharedDataTotalByteCount); public: - static SharedMemoryProcessDataHeader *New(SharedMemoryId *id, int fileDescriptor, SharedMemorySharedDataHeader *sharedDataHeader, SIZE_T sharedDataTotalByteCount); + static SharedMemoryProcessDataHeader *New(const SharedMemoryId *id, int fileDescriptor, SharedMemorySharedDataHeader *sharedDataHeader, SIZE_T sharedDataTotalByteCount); ~SharedMemoryProcessDataHeader(); void Close(); public: - SharedMemoryId *GetId(); + const SharedMemoryId *GetId() const; SharedMemoryProcessDataBase *GetData() const; void SetData(SharedMemoryProcessDataBase *data); SharedMemorySharedDataHeader *GetSharedDataHeader() const; @@ -253,11 +259,27 @@ class SharedMemoryProcessDataHeader class SharedMemoryManager { private: - static CRITICAL_SECTION s_creationDeletionProcessLock; + static minipal_mutex s_creationDeletionProcessLock; static int s_creationDeletionLockFileDescriptor; - static PathCharString* s_runtimeTempDirectoryPath; - static PathCharString* s_sharedMemoryDirectoryPath; + struct UserScopeUidAndFileDescriptor + { + uid_t userScopeUid; + int fileDescriptor; + + UserScopeUidAndFileDescriptor() : userScopeUid((uid_t)0), fileDescriptor(-1) + { + } + + UserScopeUidAndFileDescriptor(uid_t userScopeUid, int fileDescriptor) + : userScopeUid(userScopeUid), fileDescriptor(fileDescriptor) + { + } + }; + + static UserScopeUidAndFileDescriptor *s_userScopeUidToCreationDeletionLockFDs; + static int s_userScopeUidToCreationDeletionLockFDsCount; + static int s_userScopeUidToCreationDeletionLockFDsCapacity; private: static SharedMemoryProcessDataHeader *s_processDataHeaderListHead; @@ -269,17 +291,16 @@ class SharedMemoryManager #endif // _DEBUG public: - static bool StaticInitialize(); + static void StaticInitialize(); static void StaticClose(); public: static void AcquireCreationDeletionProcessLock(); static void ReleaseCreationDeletionProcessLock(); - static void AcquireCreationDeletionFileLock(SharedMemorySystemCallErrors *errors); - static void ReleaseCreationDeletionFileLock(); - -public: - static bool CopySharedMemoryBasePath(PathCharString& destination); + static void AcquireCreationDeletionFileLock(SharedMemorySystemCallErrors *errors, const SharedMemoryId *id); + static void ReleaseCreationDeletionFileLock(const SharedMemoryId *id); + static void AddUserScopeUidCreationDeletionLockFD(uid_t userScopeUid, int creationDeletionLockFD); + static int FindUserScopeCreationDeletionLockFD(uid_t userScopeUid); #ifdef _DEBUG public: @@ -290,7 +311,7 @@ class SharedMemoryManager public: static void AddProcessDataHeader(SharedMemoryProcessDataHeader *processDataHeader); static void RemoveProcessDataHeader(SharedMemoryProcessDataHeader *processDataHeader); - static SharedMemoryProcessDataHeader *FindProcessDataHeader(SharedMemoryId *id); + static SharedMemoryProcessDataHeader *FindProcessDataHeader(const SharedMemoryId *id); }; #endif // !_PAL_SHARED_MEMORY_H_ diff --git a/src/coreclr/pal/src/include/pal/sharedmemory.inl b/src/coreclr/pal/src/include/pal/sharedmemory.inl deleted file mode 100644 index 598172c2bbc7..000000000000 --- a/src/coreclr/pal/src/include/pal/sharedmemory.inl +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#ifndef _PAL_SHARED_MEMORY_INL_ -#define _PAL_SHARED_MEMORY_INL_ - -#include "sharedmemory.h" - -#include "dbgmsg.h" - -#include - -template -void SharedMemoryHelpers::BuildSharedFilesPath( - PathCharString& destination, - const char (&suffix)[SuffixByteCount]) -{ - BuildSharedFilesPath(destination, suffix, SuffixByteCount - 1); -} - -#endif // !_PAL_SHARED_MEMORY_INL_ diff --git a/src/coreclr/pal/src/include/pal/synchcache.hpp b/src/coreclr/pal/src/include/pal/synchcache.hpp index b2020d4ad263..81ecec769a31 100644 --- a/src/coreclr/pal/src/include/pal/synchcache.hpp +++ b/src/coreclr/pal/src/include/pal/synchcache.hpp @@ -30,7 +30,7 @@ namespace CorUnix static const int MaxDepth = 256; Volatile m_pHead; - CRITICAL_SECTION m_cs; + minipal_mutex m_cs; Volatile m_iDepth; int m_iMaxDepth; #ifdef _DEBUG @@ -38,9 +38,9 @@ namespace CorUnix #endif void Lock(CPalThread * pthrCurrent) - { InternalEnterCriticalSection(pthrCurrent, &m_cs); } + { minipal_mutex_enter(&m_cs); } void Unlock(CPalThread * pthrCurrent) - { InternalLeaveCriticalSection(pthrCurrent, &m_cs); } + { minipal_mutex_leave(&m_cs); } public: CSynchCache(int iMaxDepth = MaxDepth) : @@ -51,7 +51,7 @@ namespace CorUnix ,m_iMaxTrackedDepth(0) #endif { - InternalInitializeCriticalSection(&m_cs); + minipal_mutex_init(&m_cs); if (m_iMaxDepth < 0) { m_iMaxDepth = 0; @@ -61,7 +61,7 @@ namespace CorUnix ~CSynchCache() { Flush(NULL, true); - InternalDeleteCriticalSection(&m_cs); + minipal_mutex_destroy(&m_cs); } #ifdef _DEBUG @@ -205,7 +205,7 @@ namespace CorUnix // cache before continuing Volatile m_pHead; - CRITICAL_SECTION m_cs; + minipal_mutex m_cs; Volatile m_iDepth; int m_iMaxDepth; #ifdef _DEBUG @@ -213,9 +213,9 @@ namespace CorUnix #endif void Lock(CPalThread * pthrCurrent) - { InternalEnterCriticalSection(pthrCurrent, &m_cs); } + { minipal_mutex_enter(&m_cs); } void Unlock(CPalThread * pthrCurrent) - { InternalLeaveCriticalSection(pthrCurrent, &m_cs); } + { minipal_mutex_leave(&m_cs); } public: CSHRSynchCache(int iMaxDepth = MaxDepth) : @@ -226,7 +226,7 @@ namespace CorUnix ,m_iMaxTrackedDepth(0) #endif { - InternalInitializeCriticalSection(&m_cs); + minipal_mutex_init(&m_cs); if (m_iMaxDepth < 0) { m_iMaxDepth = 0; @@ -236,7 +236,7 @@ namespace CorUnix ~CSHRSynchCache() { Flush(NULL, true); - InternalDeleteCriticalSection(&m_cs); + minipal_mutex_destroy(&m_cs); } #ifdef _DEBUG diff --git a/src/coreclr/pal/src/include/pal/thread.hpp b/src/coreclr/pal/src/include/pal/thread.hpp index 0fafc5a716ba..236bb6d49d9d 100644 --- a/src/coreclr/pal/src/include/pal/thread.hpp +++ b/src/coreclr/pal/src/include/pal/thread.hpp @@ -20,7 +20,6 @@ Module Name: #define _PAL_THREAD_HPP_ #include "corunix.hpp" -#include "cs.hpp" #include #if HAVE_MACH_EXCEPTIONS @@ -32,6 +31,7 @@ Module Name: #include "synchobjects.hpp" #include #include +#include namespace CorUnix { @@ -203,7 +203,7 @@ namespace CorUnix CPalThread *m_pNext; DWORD m_dwExitCode; BOOL m_fExitCodeSet; - CRITICAL_SECTION m_csLock; + minipal_mutex m_mtxLock; bool m_fLockInitialized; bool m_fIsDummy; @@ -372,7 +372,7 @@ namespace CorUnix CPalThread *pThread ) { - InternalEnterCriticalSection(pThread, &m_csLock); + minipal_mutex_enter(&m_mtxLock); }; void @@ -380,7 +380,7 @@ namespace CorUnix CPalThread *pThread ) { - InternalLeaveCriticalSection(pThread, &m_csLock); + minipal_mutex_leave(&m_mtxLock); }; // @@ -572,7 +572,7 @@ namespace CorUnix m_pNext = pNext; }; -#if !HAVE_MACH_EXCEPTIONS +#if !HAVE_MACH_EXCEPTIONS && HAVE_SIGALTSTACK BOOL EnsureSignalAlternateStack( void diff --git a/src/coreclr/pal/src/init/pal.cpp b/src/coreclr/pal/src/init/pal.cpp index 8dcf3a1aee48..6811690132da 100644 --- a/src/coreclr/pal/src/init/pal.cpp +++ b/src/coreclr/pal/src/init/pal.cpp @@ -19,7 +19,6 @@ SET_DEFAULT_DEBUG_CHANNEL(PAL); // some headers have code with asserts, so do th #include "pal/thread.hpp" #include "pal/synchobjects.hpp" #include "pal/procobj.hpp" -#include "pal/cs.hpp" #include "pal/file.hpp" #include "pal/map.hpp" #include "../objmgr/listedobjectmanager.hpp" @@ -111,7 +110,7 @@ BOOL g_useDefaultBaseAddr = FALSE; /* critical section to protect access to init_count. This is allocated on the very first PAL_Initialize call, and is freed afterward. */ -static PCRITICAL_SECTION init_critsec = NULL; +static minipal_mutex* init_critsec = NULL; static DWORD g_initializeDLLFlags = PAL_INITIALIZE_DLL; @@ -311,31 +310,29 @@ Initialize( /*Firstly initiate a lastError */ SetLastError(ERROR_GEN_FAILURE); - CriticalSectionSubSysInitialize(); - if(nullptr == init_critsec) { pthread_mutex_lock(&init_critsec_mutex); // prevents race condition of two threads // initializing the critical section. if(nullptr == init_critsec) { - static CRITICAL_SECTION temp_critsec; + static minipal_mutex temp_critsec; // Want this critical section to NOT be internal to avoid the use of unsafe region markers. - InternalInitializeCriticalSectionAndSpinCount(&temp_critsec, 0, false); + minipal_mutex_init(&temp_critsec); if(nullptr != InterlockedCompareExchangePointer(&init_critsec, &temp_critsec, nullptr)) { // Another thread got in before us! shouldn't happen, if the PAL // isn't initialized there shouldn't be any other threads WARN("Another thread initialized the critical section\n"); - InternalDeleteCriticalSection(&temp_critsec); + minipal_mutex_destroy(&temp_critsec); } } pthread_mutex_unlock(&init_critsec_mutex); } - InternalEnterCriticalSection(pThread, init_critsec); // here pThread is always nullptr + minipal_mutex_enter(init_critsec); if (init_count == 0) { @@ -410,12 +407,7 @@ Initialize( // we use large numbers of threads or have many open files. } - if (!SharedMemoryManager::StaticInitialize()) - { - ERROR("Shared memory static initialization failed!\n"); - palError = ERROR_PALINIT_SHARED_MEMORY_MANAGER; - goto CLEANUP1; - } + SharedMemoryManager::StaticInitialize(); // // Initialize global process data @@ -543,17 +535,6 @@ Initialize( // InitializeProcessCommandLine took ownership of this memory. command_line = nullptr; -#ifdef PAL_PERF - // Initialize the Profiling structure - if(FALSE == PERFInitialize(command_line, exe_path)) - { - ERROR("Performance profiling initial failed\n"); - palError = ERROR_PALINIT_PERF; - goto CLEANUP2; - } - PERFAllocThreadInfo(); -#endif - if (!LOADSetExeName(exe_path)) { ERROR("Unable to set exe name\n"); @@ -607,6 +588,7 @@ Initialize( } } +#ifndef __wasm__ if (flags & PAL_INITIALIZE_SYNC_THREAD) { // @@ -619,7 +601,7 @@ Initialize( goto CLEANUP13; } } - +#endif /* initialize structured exception handling stuff (signals, etc) */ if (FALSE == SEHInitialize(pThread, flags)) { @@ -685,16 +667,7 @@ Initialize( ERROR("PAL_Initialize failed\n"); SetLastError(palError); done: -#ifdef PAL_PERF - if( retval == 0) - { - PERFEnableProcessProfile(); - PERFEnableThreadProfile(FALSE); - PERFCalibrate("Overhead of PERF entry/exit"); - } -#endif - - InternalLeaveCriticalSection(pThread, init_critsec); + minipal_mutex_leave(init_critsec); if (fFirstTimeInit && 0 == retval) { @@ -906,10 +879,7 @@ BOOL PALInitLock(void) return FALSE; } - CPalThread * pThread = - (PALIsThreadDataInitialized() ? InternalGetCurrentThread() : nullptr); - - InternalEnterCriticalSection(pThread, init_critsec); + minipal_mutex_enter(init_critsec); return TRUE; } @@ -928,10 +898,7 @@ void PALInitUnlock(void) return; } - CPalThread * pThread = - (PALIsThreadDataInitialized() ? InternalGetCurrentThread() : nullptr); - - InternalLeaveCriticalSection(pThread, init_critsec); + minipal_mutex_leave(init_critsec); } /* Internal functions *********************************************************/ @@ -949,6 +916,10 @@ Return value: --*/ static BOOL INIT_IncreaseDescriptorLimit(void) { +#ifdef __wasm__ + // WebAssembly cannot set limits + return TRUE; +#endif #ifndef DONT_SET_RLIMIT_NOFILE struct rlimit rlp; int result; diff --git a/src/coreclr/pal/src/init/sxs.cpp b/src/coreclr/pal/src/init/sxs.cpp index 4c1772896a99..8c55aa99c223 100644 --- a/src/coreclr/pal/src/init/sxs.cpp +++ b/src/coreclr/pal/src/init/sxs.cpp @@ -64,7 +64,7 @@ AllocatePalThread(CPalThread **ppThread) goto exit; } -#if !HAVE_MACH_EXCEPTIONS +#if !HAVE_MACH_EXCEPTIONS && HAVE_SIGALTSTACK // Ensure alternate stack for SIGSEGV handling. Our SIGSEGV handler is set to // run on an alternate stack and the stack needs to be allocated per thread. if (!pThread->EnsureSignalAlternateStack()) diff --git a/src/coreclr/pal/src/loader/module.cpp b/src/coreclr/pal/src/loader/module.cpp index c8b07c670175..d7cff970a0b0 100644 --- a/src/coreclr/pal/src/loader/module.cpp +++ b/src/coreclr/pal/src/loader/module.cpp @@ -24,7 +24,6 @@ SET_DEFAULT_DEBUG_CHANNEL(LOADER); // some headers have code with asserts, so do #include "pal/file.hpp" #include "pal/palinternal.h" #include "pal/module.h" -#include "pal/cs.hpp" #include "pal/process.h" #include "pal/file.h" #include "pal/utils.h" @@ -74,7 +73,7 @@ using namespace CorUnix; /* static variables ***********************************************************/ /* critical section that regulates access to the module list */ -CRITICAL_SECTION module_critsec; +minipal_mutex module_critsec; /* always the first, in the in-load-order list */ MODSTRUCT exe_module; @@ -1010,18 +1009,20 @@ BOOL LOADInitializeModules() { _ASSERTE(exe_module.prev == nullptr); - InternalInitializeCriticalSection(&module_critsec); + minipal_mutex_init(&module_critsec); // Initialize module for main executable TRACE("Initializing module for main executable\n"); exe_module.self = (HMODULE)&exe_module; exe_module.dl_handle = dlopen(nullptr, RTLD_LAZY); +#if not defined(__wasm__) // wasm does not support shared libraries if (exe_module.dl_handle == nullptr) { ERROR("Executable module will be broken : dlopen(nullptr) failed\n"); return FALSE; } +#endif exe_module.lib_name = nullptr; exe_module.refcount = -1; exe_module.next = &exe_module; @@ -1862,7 +1863,7 @@ void LockModuleList() CPalThread * pThread = (PALIsThreadDataInitialized() ? InternalGetCurrentThread() : nullptr); - InternalEnterCriticalSection(pThread, &module_critsec); + minipal_mutex_enter(&module_critsec); } /*++ @@ -1884,5 +1885,5 @@ void UnlockModuleList() CPalThread * pThread = (PALIsThreadDataInitialized() ? InternalGetCurrentThread() : nullptr); - InternalLeaveCriticalSection(pThread, &module_critsec); + minipal_mutex_leave(&module_critsec); } diff --git a/src/coreclr/pal/src/locale/unicode.cpp b/src/coreclr/pal/src/locale/unicode.cpp index a14b414c3234..a66a0edc14d4 100644 --- a/src/coreclr/pal/src/locale/unicode.cpp +++ b/src/coreclr/pal/src/locale/unicode.cpp @@ -40,31 +40,6 @@ PALAPI GetConsoleOutputCP( VOID) { - UINT nRet = 0; - PERF_ENTRY(GetConsoleOutputCP); - ENTRY("GetConsoleOutputCP()\n"); - nRet = GetACP(); - LOGEXIT("GetConsoleOutputCP returns UINT %d \n", nRet ); - PERF_EXIT(GetConsoleOutputCP); - return nRet; -} - -/*++ -Function: -GetACP - -See MSDN doc. ---*/ -UINT -PALAPI -GetACP(VOID) -{ - PERF_ENTRY(GetACP); - ENTRY("GetACP(VOID)\n"); - - LOGEXIT("GetACP returning UINT %d\n", CP_UTF8); - PERF_EXIT(GetACP); - return CP_UTF8; } diff --git a/src/coreclr/pal/src/map/map.cpp b/src/coreclr/pal/src/map/map.cpp index 35909bd54f83..8900ccd1058c 100644 --- a/src/coreclr/pal/src/map/map.cpp +++ b/src/coreclr/pal/src/map/map.cpp @@ -21,7 +21,6 @@ Module Name: #include "pal/palinternal.h" #include "pal/dbgmsg.h" #include "pal/init.h" -#include "pal/critsect.h" #include "pal/virtual.h" #include "pal/environ.h" #include "common.h" @@ -55,7 +54,7 @@ SET_DEFAULT_DEBUG_CHANNEL(VIRTUAL); // this critical section. // -CRITICAL_SECTION mapping_critsec; +minipal_mutex mapping_critsec; LIST_ENTRY MappedViewList; #ifndef CORECLR @@ -920,7 +919,7 @@ CorUnix::InternalMapViewOfFile( goto InternalMapViewOfFileExit; } - InternalEnterCriticalSection(pThread, &mapping_critsec); + minipal_mutex_enter(&mapping_critsec); if (FILE_MAP_COPY == dwDesiredAccess) { @@ -1116,7 +1115,7 @@ CorUnix::InternalMapViewOfFile( InternalMapViewOfFileLeaveCriticalSection: - InternalLeaveCriticalSection(pThread, &mapping_critsec); + minipal_mutex_leave(&mapping_critsec); InternalMapViewOfFileExit: @@ -1144,7 +1143,7 @@ CorUnix::InternalUnmapViewOfFile( PMAPPED_VIEW_LIST pView = NULL; IPalObject *pMappingObject = NULL; - InternalEnterCriticalSection(pThread, &mapping_critsec); + minipal_mutex_enter(&mapping_critsec); pView = MAPGetViewForAddress(lpBaseAddress); if (NULL == pView) @@ -1177,7 +1176,7 @@ CorUnix::InternalUnmapViewOfFile( InternalUnmapViewOfFileExit: - InternalLeaveCriticalSection(pThread, &mapping_critsec); + minipal_mutex_leave(&mapping_critsec); // // We can't dereference the file mapping object until after @@ -1209,7 +1208,7 @@ MAPInitialize( void ) { TRACE( "Initialising the critical section.\n" ); - InternalInitializeCriticalSection(&mapping_critsec); + minipal_mutex_init(&mapping_critsec); InitializeListHead(&MappedViewList); @@ -1231,7 +1230,7 @@ Function : void MAPCleanup( void ) { TRACE( "Deleting the critical section.\n" ); - InternalDeleteCriticalSection(&mapping_critsec); + minipal_mutex_destroy(&mapping_critsec); } /*++ @@ -1665,9 +1664,8 @@ BOOL MAPGetRegionInfo(LPVOID lpAddress, PMEMORY_BASIC_INFORMATION lpBuffer) { BOOL fFound = FALSE; - CPalThread * pThread = InternalGetCurrentThread(); - InternalEnterCriticalSection(pThread, &mapping_critsec); + minipal_mutex_enter(&mapping_critsec); for(LIST_ENTRY *pLink = MappedViewList.Flink; pLink != &MappedViewList; @@ -1708,7 +1706,7 @@ BOOL MAPGetRegionInfo(LPVOID lpAddress, } } - InternalLeaveCriticalSection(pThread, &mapping_critsec); + minipal_mutex_leave(&mapping_critsec); return fFound; } @@ -2166,7 +2164,7 @@ void * MAPMapPEFile(HANDLE hFile, off_t offset) // and each of the sections, as well as all the space between them that we give PROT_NONE protections. // We're going to start adding mappings to the mapping list, so take the critical section - InternalEnterCriticalSection(pThread, &mapping_critsec); + minipal_mutex_enter(&mapping_critsec); reserveSize = RoundToPage(virtualSize, offset); if ((ntHeader.OptionalHeader.SectionAlignment) > GetVirtualPageSize()) @@ -2416,7 +2414,7 @@ void * MAPMapPEFile(HANDLE hFile, off_t offset) doneReleaseMappingCriticalSection: - InternalLeaveCriticalSection(pThread, &mapping_critsec); + minipal_mutex_leave(&mapping_critsec); done: @@ -2468,7 +2466,7 @@ BOOL MAPUnmapPEFile(LPCVOID lpAddress) BOOL retval = TRUE; CPalThread * pThread = InternalGetCurrentThread(); - InternalEnterCriticalSection(pThread, &mapping_critsec); + minipal_mutex_enter(&mapping_critsec); PLIST_ENTRY pLink, pLinkNext, pLinkLocal = NULL; unsigned nPESections = 0; @@ -2506,7 +2504,7 @@ BOOL MAPUnmapPEFile(LPCVOID lpAddress) } #endif // _DEBUG - InternalLeaveCriticalSection(pThread, &mapping_critsec); + minipal_mutex_leave(&mapping_critsec); // Now, outside the critical section, do the actual unmapping work @@ -2555,8 +2553,7 @@ BOOL MAPMarkSectionAsNotNeeded(LPCVOID lpAddress) BOOL retval = TRUE; #ifndef TARGET_ANDROID - CPalThread * pThread = InternalGetCurrentThread(); - InternalEnterCriticalSection(pThread, &mapping_critsec); + minipal_mutex_enter(&mapping_critsec); PLIST_ENTRY pLink, pLinkNext = NULL; // Look through the entire MappedViewList for all mappings associated with the @@ -2584,7 +2581,7 @@ BOOL MAPMarkSectionAsNotNeeded(LPCVOID lpAddress) } } - InternalLeaveCriticalSection(pThread, &mapping_critsec); + minipal_mutex_leave(&mapping_critsec); #endif // TARGET_ANDROID TRACE_(LOADER)("MAPMarkSectionAsNotNeeded returning %d\n", retval); diff --git a/src/coreclr/pal/src/map/virtual.cpp b/src/coreclr/pal/src/map/virtual.cpp index aa02c81a1514..dcf3fd5c51c8 100644 --- a/src/coreclr/pal/src/map/virtual.cpp +++ b/src/coreclr/pal/src/map/virtual.cpp @@ -22,7 +22,6 @@ Module Name: SET_DEFAULT_DEBUG_CHANNEL(VIRTUAL); // some headers have code with asserts, so do this first #include "pal/thread.hpp" -#include "pal/cs.hpp" #include "pal/file.hpp" #include "pal/seh.hpp" #include "pal/virtual.h" @@ -48,7 +47,7 @@ SET_DEFAULT_DEBUG_CHANNEL(VIRTUAL); // some headers have code with asserts, so d using namespace CorUnix; -CRITICAL_SECTION virtual_critsec; +minipal_mutex virtual_critsec; // The first node in our list of allocated blocks. static PCMI pVirtualMemory; @@ -175,7 +174,7 @@ VIRTUALInitialize(bool initializeExecutableMemoryAllocator) TRACE("Initializing the Virtual Critical Sections. \n"); - InternalInitializeCriticalSection(&virtual_critsec); + minipal_mutex_init(&virtual_critsec); pVirtualMemory = NULL; @@ -207,9 +206,7 @@ void VIRTUALCleanup() { PCMI pEntry; PCMI pTempEntry; - CPalThread * pthrCurrent = InternalGetCurrentThread(); - - InternalEnterCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); // Clean up the allocated memory. pEntry = pVirtualMemory; @@ -223,10 +220,10 @@ void VIRTUALCleanup() } pVirtualMemory = NULL; - InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); TRACE( "Deleting the Virtual Critical Sections. \n" ); - DeleteCriticalSection( &virtual_critsec ); + minipal_mutex_destroy( &virtual_critsec ); } /*** @@ -344,9 +341,7 @@ static void VIRTUALDisplayList( void ) PCMI p; SIZE_T count; SIZE_T index; - CPalThread * pthrCurrent = InternalGetCurrentThread(); - - InternalEnterCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); p = pVirtualMemory; count = 0; @@ -365,7 +360,7 @@ static void VIRTUALDisplayList( void ) p = p->pNext; } - InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); } #endif @@ -817,8 +812,7 @@ PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange( // ExecutableMemoryAllocator::AllocateMemory() for the reason why it is done SIZE_T reservationSize = ALIGN_UP(dwSize, VIRTUAL_64KB); - CPalThread *currentThread = InternalGetCurrentThread(); - InternalEnterCriticalSection(currentThread, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); void *address = g_executableMemoryAllocator.AllocateMemoryWithinRange(lpBeginAddress, lpEndAddress, reservationSize); if (address != nullptr) @@ -841,7 +835,7 @@ PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange( address, TRUE); - InternalLeaveCriticalSection(currentThread, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); LOGEXIT("PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange returning %p\n", address); PERF_EXIT(PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange); @@ -939,9 +933,9 @@ VirtualAlloc( if ( flAllocationType & MEM_RESERVE ) { - InternalEnterCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); pRetVal = VIRTUALReserveMemory( pthrCurrent, lpAddress, dwSize, flAllocationType, flProtect ); - InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); if ( !pRetVal ) { @@ -952,7 +946,7 @@ VirtualAlloc( if ( flAllocationType & MEM_COMMIT ) { - InternalEnterCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); if ( pRetVal != NULL ) { /* We are reserving and committing. */ @@ -965,7 +959,7 @@ VirtualAlloc( pRetVal = VIRTUALCommitMemory( pthrCurrent, lpAddress, dwSize, flAllocationType, flProtect ); } - InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); } done: @@ -998,7 +992,7 @@ VirtualFree( lpAddress, dwSize, dwFreeType); pthrCurrent = InternalGetCurrentThread(); - InternalEnterCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); /* Sanity Checks. */ if ( !lpAddress ) @@ -1056,9 +1050,12 @@ VirtualFree( goto VirtualFreeExit; } - TRACE( "Un-committing the following page(s) %d to %d.\n", - StartBoundary, MemSize ); + TRACE( "Un-committing the following page(s) %p to %p.\n", + StartBoundary, StartBoundary + MemSize ); + // mmap support on emscripten/wasm is very limited and doesn't support location hints + // (when address is not null) +#ifndef __wasm__ // Explicitly calling mmap instead of mprotect here makes it // that much more clear to the operating system that we no // longer need these pages. @@ -1089,6 +1086,13 @@ VirtualFree( pthrCurrent->SetLastError( ERROR_INTERNAL_ERROR ); goto VirtualFreeExit; } +#else // __wasm__ + // We can't decommit the mapping (MAP_FIXED doesn't work in emscripten), and we can't + // MADV_DONTNEED it (madvise doesn't work in emscripten), but we can at least zero + // the memory so that if an attempt is made to reuse it later, the memory will be + // empty as PAL tests expect it to be. + ZeroMemory((LPVOID) StartBoundary, MemSize); +#endif // __wasm__ } if ( dwFreeType & MEM_RELEASE ) @@ -1147,7 +1151,7 @@ VirtualFree( NULL, bRetVal); - InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); LOGEXIT( "VirtualFree returning %s.\n", bRetVal == TRUE ? "TRUE" : "FALSE" ); PERF_EXIT(VirtualFree); return bRetVal; @@ -1175,15 +1179,13 @@ VirtualProtect( SIZE_T Index = 0; SIZE_T NumberOfPagesToChange = 0; SIZE_T OffSet = 0; - CPalThread * pthrCurrent; PERF_ENTRY(VirtualProtect); ENTRY("VirtualProtect(lpAddress=%p, dwSize=%u, flNewProtect=%#x, " "flOldProtect=%p)\n", lpAddress, dwSize, flNewProtect, lpflOldProtect); - pthrCurrent = InternalGetCurrentThread(); - InternalEnterCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); StartBoundary = (UINT_PTR) ALIGN_DOWN(lpAddress, GetVirtualPageSize()); MemSize = ALIGN_UP((UINT_PTR)lpAddress + dwSize, GetVirtualPageSize()) - StartBoundary; @@ -1239,7 +1241,7 @@ VirtualProtect( } } ExitVirtualProtect: - InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); #if defined _DEBUG VIRTUALDisplayList(); @@ -1434,7 +1436,7 @@ VirtualQuery( lpAddress, lpBuffer, dwLength); pthrCurrent = InternalGetCurrentThread(); - InternalEnterCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); if ( !lpBuffer) { @@ -1515,7 +1517,7 @@ VirtualQuery( ExitVirtualQuery: - InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); LOGEXIT( "VirtualQuery returning %d.\n", sizeof( *lpBuffer ) ); PERF_EXIT(VirtualQuery); @@ -1539,9 +1541,9 @@ Function : void* ReserveMemoryFromExecutableAllocator(CPalThread* pThread, SIZE_T allocationSize) { #ifdef HOST_64BIT - InternalEnterCriticalSection(pThread, &virtual_critsec); + minipal_mutex_enter(&virtual_critsec); void* mem = g_executableMemoryAllocator.AllocateMemory(allocationSize); - InternalLeaveCriticalSection(pThread, &virtual_critsec); + minipal_mutex_leave(&virtual_critsec); return mem; #else // !HOST_64BIT diff --git a/src/coreclr/pal/src/misc/dbgmsg.cpp b/src/coreclr/pal/src/misc/dbgmsg.cpp index 3a1da44c9b79..cc1213e8e73a 100644 --- a/src/coreclr/pal/src/misc/dbgmsg.cpp +++ b/src/coreclr/pal/src/misc/dbgmsg.cpp @@ -21,7 +21,6 @@ Module Name: #include "config.h" #include "pal/dbgmsg.h" #include "pal/cruntime.h" -#include "pal/critsect.h" #include "pal/file.h" #include "pal/environ.h" @@ -126,7 +125,7 @@ static const char INDENT_CHAR = '.'; static BOOL DBG_get_indent(DBG_LEVEL_ID level, const char *format, char *indent_string); -static CRITICAL_SECTION fprintf_crit_section; +static minipal_mutex fprintf_crit_section; /* Function definitions */ @@ -361,7 +360,7 @@ BOOL DBG_init_channels(void) } } - InternalInitializeCriticalSection(&fprintf_crit_section); + minipal_mutex_init(&fprintf_crit_section); return TRUE; } @@ -387,7 +386,7 @@ void DBG_close_channels() output_file = NULL; - DeleteCriticalSection(&fprintf_crit_section); + minipal_mutex_destroy(&fprintf_crit_section); /* if necessary, release TLS key for entry nesting level */ if(0 != max_entry_level) @@ -539,9 +538,9 @@ int DBG_printf(DBG_CHANNEL_ID channel, DBG_LEVEL_ID level, BOOL bHeader, avoid holding a libc lock while another thread is calling SuspendThread on this one. */ - InternalEnterCriticalSection(NULL, &fprintf_crit_section); + minipal_mutex_enter(&fprintf_crit_section); fprintf( output_file, "%s%s", indent, buffer ); - InternalLeaveCriticalSection(NULL, &fprintf_crit_section); + minipal_mutex_leave(&fprintf_crit_section); /* flush the output to file */ if ( fflush(output_file) != 0 ) diff --git a/src/coreclr/pal/src/misc/environ.cpp b/src/coreclr/pal/src/misc/environ.cpp index 53729118a892..4a9eb640e2e6 100644 --- a/src/coreclr/pal/src/misc/environ.cpp +++ b/src/coreclr/pal/src/misc/environ.cpp @@ -20,7 +20,6 @@ Revision History: --*/ #include "pal/palinternal.h" -#include "pal/critsect.h" #include "pal/dbgmsg.h" #include "pal/environ.h" @@ -38,7 +37,7 @@ char **palEnvironment = nullptr; int palEnvironmentCount = 0; int palEnvironmentCapacity = 0; -CRITICAL_SECTION gcsEnvironment; +minipal_mutex gcsEnvironment; /*++ Function: @@ -114,7 +113,7 @@ GetEnvironmentVariableA( // the environment variable value without EnvironGetenv making an // intermediate copy. We will just copy the string to the output // buffer anyway, so just stay in the critical section until then. - InternalEnterCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_enter(&gcsEnvironment); value = EnvironGetenv(lpName, /* copyValue */ FALSE); @@ -134,7 +133,7 @@ GetEnvironmentVariableA( SetLastError(ERROR_SUCCESS); } - InternalLeaveCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_leave(&gcsEnvironment); } if (value == nullptr) @@ -401,7 +400,7 @@ GetEnvironmentStringsW( ENTRY("GetEnvironmentStringsW()\n"); CPalThread * pthrCurrent = InternalGetCurrentThread(); - InternalEnterCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_enter(&gcsEnvironment); envNum = 0; len = 0; @@ -433,7 +432,7 @@ GetEnvironmentStringsW( *tempEnviron = 0; /* Put an extra null at the end */ EXIT: - InternalLeaveCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_leave(&gcsEnvironment); LOGEXIT("GetEnvironmentStringsW returning %p\n", wenviron); PERF_EXIT(GetEnvironmentStringsW); @@ -610,7 +609,7 @@ Return Values BOOL ResizeEnvironment(int newSize) { CPalThread * pthrCurrent = InternalGetCurrentThread(); - InternalEnterCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_enter(&gcsEnvironment); BOOL ret = FALSE; if (newSize >= palEnvironmentCount) @@ -630,7 +629,7 @@ BOOL ResizeEnvironment(int newSize) ASSERT("ResizeEnvironment: newSize < current palEnvironmentCount!\n"); } - InternalLeaveCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_leave(&gcsEnvironment); return ret; } @@ -652,7 +651,7 @@ void EnvironUnsetenv(const char *name) int nameLength = strlen(name); CPalThread * pthrCurrent = InternalGetCurrentThread(); - InternalEnterCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_enter(&gcsEnvironment); for (int i = 0; palEnvironment[i] != nullptr; ++i) { @@ -680,7 +679,7 @@ void EnvironUnsetenv(const char *name) } } - InternalLeaveCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_leave(&gcsEnvironment); } /*++ @@ -746,7 +745,7 @@ BOOL EnvironPutenv(const char* entry, BOOL deleteIfEmpty) { // See if we are replacing an item or adding one. - InternalEnterCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_enter(&gcsEnvironment); fOwningCS = true; int i; @@ -801,7 +800,7 @@ BOOL EnvironPutenv(const char* entry, BOOL deleteIfEmpty) if (fOwningCS) { - InternalLeaveCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_leave(&gcsEnvironment); } return result; @@ -883,7 +882,7 @@ Return Value char* EnvironGetenv(const char* name, BOOL copyValue) { CPalThread * pthrCurrent = InternalGetCurrentThread(); - InternalEnterCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_enter(&gcsEnvironment); char* retValue = FindEnvVarValue(name); @@ -892,7 +891,7 @@ char* EnvironGetenv(const char* name, BOOL copyValue) retValue = strdup(retValue); } - InternalLeaveCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_leave(&gcsEnvironment); return retValue; } @@ -939,10 +938,10 @@ EnvironInitialize(void) { BOOL ret = FALSE; - InternalInitializeCriticalSection(&gcsEnvironment); + minipal_mutex_init(&gcsEnvironment); CPalThread * pthrCurrent = InternalGetCurrentThread(); - InternalEnterCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_enter(&gcsEnvironment); char** sourceEnviron = EnvironGetSystemEnvironment(); @@ -974,7 +973,7 @@ EnvironInitialize(void) palEnvironment[variableCount] = nullptr; } - InternalLeaveCriticalSection(pthrCurrent, &gcsEnvironment); + minipal_mutex_leave(&gcsEnvironment); return ret; } diff --git a/src/coreclr/pal/src/misc/fmtmessage.cpp b/src/coreclr/pal/src/misc/fmtmessage.cpp index cfedd815da3c..f096156f2384 100644 --- a/src/coreclr/pal/src/misc/fmtmessage.cpp +++ b/src/coreclr/pal/src/misc/fmtmessage.cpp @@ -21,7 +21,6 @@ Revision History: #include "pal/palinternal.h" #include "pal/dbgmsg.h" -#include "pal/critsect.h" #include "pal/module.h" #include "errorstrings.h" diff --git a/src/coreclr/pal/src/misc/perfjitdump.cpp b/src/coreclr/pal/src/misc/perfjitdump.cpp index 9040e13dadfd..b9afb95a5740 100644 --- a/src/coreclr/pal/src/misc/perfjitdump.cpp +++ b/src/coreclr/pal/src/misc/perfjitdump.cpp @@ -26,6 +26,7 @@ #include #include #include +#include "minipal/time.h" #include "../inc/llvm/ELF.h" @@ -90,9 +91,7 @@ namespace return static_cast(__rdtsc()); } #endif - LARGE_INTEGER result; - QueryPerformanceCounter(&result); - return result.QuadPart; + return (uint64_t)minipal_hires_ticks(); } @@ -185,16 +184,13 @@ struct PerfJitDumpState { int result = 0; - // On platforms where JITDUMP is used, the PAL QueryPerformanceFrequency - // returns tccSecondsToNanoSeconds, meaning QueryPerformanceCounter - // will return a direct nanosecond value. If this isn't true, + // On platforms where JITDUMP is used, minipal_hires_tick_frequency() + // returns tccSecondsToNanoSeconds. If this isn't true, // then some other method will need to be used to implement GetTimeStampNS. // Validate this is true once in Start here. - LARGE_INTEGER freq; - QueryPerformanceFrequency(&freq); - if (freq.QuadPart != tccSecondsToNanoSeconds) + if (minipal_hires_tick_frequency() != tccSecondsToNanoSeconds) { - _ASSERTE(!"QueryPerformanceFrequency does not return tccSecondsToNanoSeconds. Implement JITDUMP GetTimeStampNS directly for this platform.\n"); + _ASSERTE(!"minipal_hires_tick_frequency() does not return tccSecondsToNanoSeconds. Implement JITDUMP GetTimeStampNS directly for this platform.\n"); FatalError(); } diff --git a/src/coreclr/pal/src/misc/perftrace.cpp b/src/coreclr/pal/src/misc/perftrace.cpp deleted file mode 100644 index 2f5072a8da23..000000000000 --- a/src/coreclr/pal/src/misc/perftrace.cpp +++ /dev/null @@ -1,1340 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - misc/perftrace.c - -Abstract: - Implementation of PAL Performance trace utilities. - - - ---*/ - -/* PAL headers */ - - - -#ifdef PAL_PERF - -/* PAL Headers */ -#include "pal/palinternal.h" -#include "pal/perftrace.h" -#include "pal/dbgmsg.h" -#include "pal/cruntime.h" - -/* Standard headers */ -#include -#include -#include -#include -#include /* for pthread_self */ -#include -#include - -SET_DEFAULT_DEBUG_CHANNEL(MISC); - - -#define PAL_PERF_MAX_LOGLINE 0x400 /* 1K */ -#define PAL_PERF_MAX_INPUT 0x1000 /* 4k for single line of input file */ -#define PAL_PERF_MAX_FUNCTION_NAME 128 /* any one want a function name longer than 127 bytes? */ -#define PAL_PERF_PROFILE_BUFFER_SIZE 0x400000 /* 4M */ -#define PAL_PERF_BUFFER_FULL (PAL_PERF_PROFILE_BUFFER_SIZE - PAL_PERF_MAX_LOGLINE ) /* (Buffer size - 1K) */ - -typedef struct _pal_perf_api_info -{ - ULONGLONG entries; /* number of PERF_ENTRY calls for an API function */ - ULONGLONG counter; /* number of PERF_EXIT calls for an API function */ - ULONGLONG min_duration; /* Minimum duration in CPU clock ticks in an API function */ - ULONGLONG max_duration; /* Maximum duration in CPU clock ticks in an API function */ - ULONGLONG sum_duration; /* Sum of duration*/ - double sum_of_square_duration; /* Sum of square of durations */ - DWORD *histograms; /* An array to store the histogram of an API execution cpu ticks. */ -} pal_perf_api_info; - - -typedef struct _pal_perf_thread_info -{ - DWORD threadId; - pal_perf_api_info * api_table; - char * pal_write_buf; - DWORD buf_offset; - BOOL profile_enabled; - ULONGLONG start_ticks; - ULONGLONG total_duration; -} pal_perf_thread_info; - -typedef struct _pal_thread_list_node -{ - pal_perf_thread_info * thread_info; - struct _pal_thread_list_node * next; - -} pal_thread_list_node; - -typedef struct _pal_perf_program_info -{ - char command_line[PAL_PERF_MAX_LOGLINE]; - char exe_path[PAL_PERF_MAX_LOGLINE]; - char hostname[PAL_PERF_MAX_FUNCTION_NAME]; - double cpu_clock_frequency; - ULONGLONG start_ticks; - ULONGLONG elapsed_time; /* Duration in CPU clock ticks of the program */ - ULONGLONG total_duration; /* Total CPU clock ticks of all the threads */ - ULONGLONG pal_duration; /* Total CPU clock ticks spent inside PAL */ - - pid_t process_id; - char start_time[32]; /* must be at least 26 characters */ -} pal_perf_program_info; - -static ULONGLONG PERFGetTicks(); -static double PERFComputeStandardDeviation(pal_perf_api_info *api); -static void PERFPrintProgramHeaderInfo(FILE * hFile, BOOL completedExecution); -static BOOL PERFInitProgramInfo(LPWSTR command_line, LPWSTR exe_path); -static BOOL PERFReadSetting( ); -static void PERFLogFileName(PathCharString * destFileString, const char *fileName, const char *suffix, int max_length); -static void PERFlushAllLogs(); -static int PERFWriteCounters(pal_perf_api_info * table); -static BOOL PERFFlushLog(pal_perf_thread_info * local_buffer, BOOL output_header); -static void PERFUpdateApiInfo(pal_perf_api_info *api, ULONGLONG duration); -static char * PERFIsValidPath( const char * path ); -static char * PERFIsValidFile( const char * path, const char * file); - -typedef char PAL_API_NAME[PAL_PERF_MAX_FUNCTION_NAME]; - -static PAL_API_NAME API_list[PAL_API_NUMBER] ; -static pal_perf_program_info program_info; - -static pthread_key_t PERF_tlsTableKey=0 ; - -static pal_thread_list_node * process_pal_thread_list=NULL; -static BOOL pal_profile_on=FALSE; -static BOOL pal_perf_enabled=FALSE; -static char * pal_function_map=NULL; -static char * perf_default_path=NULL; -static char * traced_apis_file=NULL; -static char * enabledapis_path=NULL; -static char * profile_log_path=NULL; -static char * profile_summary_log_name=NULL; -static char * profile_time_log_name=NULL; -static BOOL summary_only=FALSE; -static BOOL nested_tracing=FALSE; -static BOOL calibrate=FALSE; - -/* If report_only_called_apis is TRUE, - those PAL APIs with no function entry or exit - will not be shown in the PAL perf summary file. */ -static BOOL report_only_called_apis=FALSE; - -/* If the wait_for_startup is TRUE, process profiling - will not start until the application - has called PAL_EnableProcessProfile(). */ -static BOOL wait_for_startup=FALSE; - -/* The size of a PAL API execution CPU ticks histogram, i.e., - Number of categories of frequency distrubution of PAL API - execution CPU ticks.*/ -static DWORD pal_perf_histogram_size = 0; - -/* The step size in CPU ticks of each category of the - PAL API execution CPU ticks histogram.*/ -static DWORD pal_perf_histogram_step = 100; - -static const char PAL_PERF_TRACING[]="PAL_PERF_TRACING"; -static const char PAL_DEFAULT_PATH[]="PAL_PERF_DEFAULT_PATH"; -static const char PAL_PERF_TRACEDAPIS_PATH[]="PAL_PERF_TRACEDAPIS_FILE"; -static const char PAL_PERF_LOG_PATH[]="PAL_PERF_LOG_PATH"; -static const char PAL_PERF_SUMMARY_LOG_NAME[]="PAL_PERF_SUMMARY_LOG_NAME"; -static const char PAL_PERF_TIME_LOG_NAME[]="PAL_PERF_TIME_LOG_NAME"; -static const char PAL_PERF_ENABLED_APIS_PATH[]="PAL_PERF_ENABLEDAPIS_FILE"; -static const char PAL_SUMMARY_FLAG[]="PAL_PERF_SUMMARY_ONLY"; -static const char PAL_PERF_NESTED_TRACING[]="PAL_PERF_NESTED_TRACING"; -static const char PAL_PERF_CALIBRATE[]="PAL_PERF_CALIBRATE"; -static const char PAL_PERF_REPORT_ONLY_CALLED_APIS[]="PAL_PERF_REPORT_ONLY_CALLED_APIS"; -static const char PAL_PERF_WAIT_FOR_STARTUP[]="PAL_PERF_WAIT_FOR_STARTUP"; -static const char PAL_PERF_HISTOGRAM_SIZE[]="PAL_PERF_HISTOGRAM_SIZE"; -static const char PAL_PERF_HISTOGRAM_STEP[]="PAL_PERF_HISTOGRAM_STEP"; -static const char traced_apis_filename[]="PerfTracedAPIs.txt"; -static const char perf_enabled_filename[]="AllPerfEnabledAPIs.txt"; -static const char PATH_SEPARATOR[] = "/"; - - - -#define LLFORMAT "%llu" - -static -ULONGLONG -PERFGetTicks(){ -#ifdef HOST_X86 // for BSD and Windows. - unsigned long a, d; - #ifdef _MSC_VER - __asm{ - rdtsc - mov a, eax - mov d, edx - } - #else - #undef volatile - asm volatile("rdtsc":"=a" (a), "=d" (d)); - #define volatile DoNotUseVolatileKeyword - #endif - return ((ULONGLONG)((unsigned int)(d)) << 32) | (unsigned int)(a); -#else - return 0; // on non-BSD and non-Windows, we'll return 0 for now. -#endif // HOST_X86 -} - -static -double -PERFComputeStandardDeviation(pal_perf_api_info *api) -{ - double n; - double sum_of_variance; - if (api->counter <= 1) - return 0.0; - n = (double) api->counter; - // Calculates standard deviation based on the entire population given as arguments. - // Same as stdevp in Excel. - sum_of_variance = (n*api->sum_of_square_duration) - (api->sum_duration*api->sum_duration); - if (sum_of_variance <= 0.0) - return 0.0; - return sqrt(sum_of_variance/(n*n)); -} - - -static -void -PERFPrintProgramHeaderInfo(FILE * hFile, BOOL completedExecution) -{ - ULONGLONG etime = 0; - ULONGLONG ttime = 0; - ULONGLONG ptime = 0; - if (completedExecution) { - etime = program_info.elapsed_time; - ttime = program_info.total_duration; - ptime = program_info.pal_duration; - } - fprintf(hFile,"#LOG\tversion=1.00\n"); - - fprintf(hFile, "#MACHINE\thostname=%s\tcpu_clock_frequency=%g\n", program_info.hostname, - program_info.cpu_clock_frequency); - fprintf(hFile, "#PROCESS\tprocess_id=%d\ttotal_latency=" LLFORMAT "\tthread_times=" LLFORMAT "\tpal_time=" LLFORMAT "\texe_path=%s\tcommand_line=%s\tstart_time=%s", - program_info.process_id, etime, ttime, ptime, - program_info.exe_path,program_info.command_line,program_info.start_time); -} - -static -BOOL -PERFInitProgramInfo(LPWSTR command_line, LPWSTR exe_path) -{ - ULONGLONG start_tick; - struct timeval tv; - - if (WideCharToMultiByte(CP_ACP, 0, command_line, -1, - program_info.command_line, PAL_PERF_MAX_LOGLINE-1, NULL, NULL) == 0) - return FALSE; - if (WideCharToMultiByte(CP_ACP, 0, exe_path, -1, - program_info.exe_path, PAL_PERF_MAX_LOGLINE-1, NULL, NULL) == 0) - return FALSE; - - gethostname(program_info.hostname, PAL_PERF_MAX_FUNCTION_NAME); - program_info.process_id = getpid(); - - gettimeofday(&tv, NULL); - ctime_r(&tv.tv_sec, program_info.start_time); - - // estimate the cpu clock cycles - start_tick = PERFGetTicks(); - if (start_tick != 0) - { - sleep(1); - program_info.cpu_clock_frequency = (double) (PERFGetTicks() - start_tick); - } - else - { - program_info.cpu_clock_frequency = 0.0; - } - - program_info.start_ticks = 0; - program_info.elapsed_time = 0; - program_info.total_duration = 0; - program_info.pal_duration = 0; - - return TRUE; -} - -static -void -PERFCalibrationFunction() -{ - PERF_ENTRY(CalibrationFunction); - PERF_EXIT(CalibrationFunction); -} - -void -PERFCalibrate(const char* msg) -{ - ULONGLONG start_tick, cal_ticks; - int i=0; - int cal_length=100000; - - if (calibrate) { - start_tick = PERFGetTicks(); - for(i=0; ithread_info = local_info; - local_info->api_table=apiTable; - local_info->threadId = THREADSilentGetCurrentThreadId(); - - for (i = 0; i < PAL_API_NUMBER; i++) - { - apiTable[i].entries = 0; - apiTable[i].counter = 0; - apiTable[i].min_duration = _UI64_MAX; - apiTable[i].max_duration = 0; - apiTable[i].sum_duration = 0; - apiTable[i].sum_of_square_duration = 0.0; - if (pal_perf_histogram_size > 0) - { - apiTable[i].histograms = (DWORD *)malloc(pal_perf_histogram_size*sizeof(DWORD)); - if (apiTable[i].histograms == NULL) - { - ret = FALSE; - goto PERFAllocThreadInfoExit; - } - memset(apiTable[i].histograms, 0, pal_perf_histogram_size*sizeof(DWORD)); - } - else - { - apiTable[i].histograms = NULL; - } - } - - log_buf = (char * )malloc( PAL_PERF_PROFILE_BUFFER_SIZE ); - - if(log_buf == NULL) - { - ret = FALSE; - goto PERFAllocThreadInfoExit; - } - - local_info->pal_write_buf=log_buf; - local_info->buf_offset = 0; - local_info->profile_enabled = FALSE; - local_info->total_duration = 0; - local_info->start_ticks = 0; - memset(log_buf, 0, PAL_PERF_PROFILE_BUFFER_SIZE); - - if (pthread_setspecific(PERF_tlsTableKey, local_info) != 0) - ret = FALSE; - -PERFAllocThreadInfoExit: - if (ret == TRUE) - { - node->next = process_pal_thread_list; - process_pal_thread_list = node; - PERFFlushLog(local_info, TRUE); - } - else - { - if (node != NULL) - { - free(node); - } - if (local_info != NULL) - { - free(local_info); - } - if (apiTable != NULL) - { - for (i = 0; i < PAL_API_NUMBER; i++) - { - if (apiTable[i].histograms != NULL) - { - free(apiTable[i].histograms); - } - } - free(apiTable); - } - if (log_buf != NULL) - { - free(log_buf); - } - } - return ret; -} - -static -void -PERFUpdateProgramInfo(pal_perf_thread_info* local_info) -{ - int i; - - if (!local_info) return; - - // add the elapsed time to the program's total - if (local_info->total_duration == 0) - { - // this thread did not go through PERFDisableThreadProfile code - // so compute the total elapsed time for the thread here - local_info->total_duration = PERFGetTicks() - local_info->start_ticks; - } - program_info.total_duration += local_info->total_duration; - - // Add up all the time spent in PAL - if (local_info->api_table) { - for(i=0; iapi_table[i].sum_duration; - } - } -} - - -static -void -PERFlushAllLogs( ) -{ - pal_thread_list_node * current, * node; - pal_perf_api_info * table1, *table0; - int i; - node = process_pal_thread_list; - if(node == NULL || node->thread_info == NULL || node->thread_info->api_table == NULL ) // should not come here - { - return ; - } - process_pal_thread_list = process_pal_thread_list->next; - table0 = node->thread_info->api_table; - - PERFUpdateProgramInfo(node->thread_info); - - while(process_pal_thread_list) - { - current=process_pal_thread_list; - process_pal_thread_list = process_pal_thread_list->next; - if (current->thread_info) - { - if (current->thread_info->api_table) - { - table1 = current->thread_info->api_table; - for(i=0;i table1[i].min_duration) - table0[i].min_duration = table1[i].min_duration; - if (table0[i].max_duration < table1[i].max_duration) - table0[i].max_duration = table1[i].max_duration; - table0[i].sum_duration += table1[i].sum_duration; - table0[i].sum_of_square_duration += table1[i].sum_of_square_duration; - } - PERFUpdateProgramInfo(current->thread_info); - if (table1->histograms != NULL) - { - free(table1->histograms); - } - free(table1); - } - PERFFlushLog(current->thread_info, FALSE); - free(current->thread_info->pal_write_buf); - free(current->thread_info); - } - free(current); - } - PERFWriteCounters(table0); - if (table0->histograms != NULL) - { - free(table0->histograms); - } - free(table0); - PERFFlushLog(node->thread_info, FALSE); - free(node->thread_info->pal_write_buf); - free(node->thread_info); - free(node); -} - -static -void -PERFLogFileName(PathCharString& destFileString, const char *fileName, const char *suffix) -{ - const char *dir_path; - CPalThread* pThread = InternalGetCurrentThread(); - dir_path = (profile_log_path == NULL) ? "." : profile_log_path; - - destFileString.Append(dir_path, strlen(dir_path)); - destFileString.Append(PATH_SEPARATOR, strlen(PATH_SEPARATOR)); - if (fileName != NULL) - { - destFileString.Append(fileName, strlen(fileName)); - } - else - { - char buffer[33]; - char* process_id = itoa(program_info.process_id, buffer, 10); - destFileString.Append(process_id, strlen(process_id)); - destFileString.Append("_", 1); - - char* current_thread = itoa(THREADSilentGetCurrentThreadId(),buffer, 10); - destFileString.Append(current_thread, strlen( current_thread)); - destFileString.Append(suffix, strlen(suffix)); - } - -} - -static -int -PERFWriteCounters( pal_perf_api_info * table ) -{ - PathCharString fileName; - pal_perf_api_info * off; - PERF_FILE * hFile; - int i; - - off = table; - - PERFLogFileName(fileName, profile_summary_log_name, "_perf_summary.log"); - hFile = fopen(fileName, "a+"); - if(hFile != NULL) - { - PERFPrintProgramHeaderInfo(hFile, TRUE); - fprintf(hFile,"#api_name\tapi_id\tperf_entries\tperf_exits\tsum_of_latency\tmin_latency\tmax_latency\tstd_dev_latency\tsum_of_square_latency\n"); - for(i=0;imin_duration == _UI64_MAX) ? 0 : off->min_duration; - if (off->counter >= 1) - { - dev = PERFComputeStandardDeviation(off); - } - else - { - dev = 0.0; - } - - if (off->counter > 0 || !report_only_called_apis) - { - fprintf(hFile,"%s\t%d\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t%g\t%g\n", - API_list[i], i, off->entries, off->counter,off->sum_duration, - min_duration, off->max_duration, dev, off->sum_of_square_duration); - } - - off++; - } - } - else - { - return -1; - } - fclose(hFile); - - if (pal_perf_histogram_size > 0) - { - off = table; - PERFLogFileName(fileName, profile_summary_log_name, "_perf_summary.hist"); - hFile = fopen(fileName, "a+"); - - if (hFile != NULL) - { - DWORD j; - fprintf(hFile,"#api_name\tapi_id"); - for (j = 0; j < pal_perf_histogram_size; j++) - { - fprintf(hFile, "\t%d", j*pal_perf_histogram_step); - } - fprintf(hFile, "\n"); - - for(i = 0; i < PAL_API_NUMBER; i++) - { - if (off->counter > 0) - { - fprintf(hFile,"%s\t%d", API_list[i], i); - - for (j = 0; j < pal_perf_histogram_size; j++) - { - fprintf(hFile, "\t%d", off->histograms[j]); - } - - fprintf(hFile, "\n"); - } - - off++; - } - } - else - { - return -1; - } - fclose(hFile); - } - - return 0; -} - -static -BOOL -PERFReadSetting( ) -{ - // this function is not safe right now. - //more code is required to deal with corrupted input file. - BOOL ret; - unsigned int index; - char line[PAL_PERF_MAX_INPUT]; - char * ptr; - char function_name[PAL_PERF_MAX_FUNCTION_NAME]; //no function can be longer than 127 bytes. - - char * file_name_buf; - PathCharString file_name_bufPS; - char * input_file_name; - char * summary_flag_env; - char * nested_tracing_env; - char * calibrate_env; - char * report_only_called_apis_env; - char * wait_for_startup_env; - char * pal_perf_histogram_size_env; - char * pal_perf_histogram_step_env; - - PAL_FILE * hFile; - - if((pal_function_map == NULL) || (PAL_API_NUMBER < 0) ) - { - // should not be here. - } - - /* do some env setting here */ - summary_flag_env = MiscGetenv(PAL_SUMMARY_FLAG); - if (summary_flag_env == NULL || strlen(summary_flag_env) == 0) - { - summary_only = FALSE; - } - else - { - summary_only = TRUE; - } - nested_tracing_env = MiscGetenv(PAL_PERF_NESTED_TRACING); - if (nested_tracing_env == NULL || strlen(nested_tracing_env) == 0) - { - nested_tracing = FALSE; - } - else - { - nested_tracing = TRUE; - } - - calibrate_env = MiscGetenv(PAL_PERF_CALIBRATE); - if (calibrate_env == NULL || strlen(calibrate_env) == 0) - { - calibrate = FALSE; - } - else - { - calibrate = TRUE; - } - - report_only_called_apis_env = MiscGetenv(PAL_PERF_REPORT_ONLY_CALLED_APIS); - if (report_only_called_apis_env == NULL || strlen(report_only_called_apis_env) == 0) - { - report_only_called_apis = FALSE; - } - else - { - report_only_called_apis = TRUE; - } - - wait_for_startup_env = MiscGetenv(PAL_PERF_WAIT_FOR_STARTUP); - if (wait_for_startup_env == NULL || strlen(wait_for_startup_env) == 0) - { - wait_for_startup = FALSE; - } - else - { - wait_for_startup = TRUE; - } - - pal_perf_histogram_size_env = MiscGetenv(PAL_PERF_HISTOGRAM_SIZE); - if (pal_perf_histogram_size_env != NULL && strlen(pal_perf_histogram_size_env) > 0) - { - long value; - char *endptr; - value = strtol(pal_perf_histogram_size_env, &endptr, 10); - if (value > 0) - { - pal_perf_histogram_size = (DWORD) value; - } - } - - pal_perf_histogram_step_env = MiscGetenv(PAL_PERF_HISTOGRAM_STEP); - if (pal_perf_histogram_step_env != NULL && strlen(pal_perf_histogram_step_env) > 0) - { - long value; - char *endptr; - value = strtol(pal_perf_histogram_step_env, &endptr, 10); - if (value > 0) - { - pal_perf_histogram_step = (DWORD) value; - } - } - - traced_apis_file = PERFIsValidFile("", MiscGetenv(PAL_PERF_TRACEDAPIS_PATH)); - enabledapis_path = PERFIsValidFile("", MiscGetenv(PAL_PERF_ENABLED_APIS_PATH)); - profile_log_path = PERFIsValidPath(MiscGetenv(PAL_PERF_LOG_PATH)); - perf_default_path = PERFIsValidPath( MiscGetenv(PAL_DEFAULT_PATH)); - profile_summary_log_name = MiscGetenv(PAL_PERF_SUMMARY_LOG_NAME); - if (profile_summary_log_name != NULL && strlen(profile_summary_log_name) == 0) - profile_summary_log_name = NULL; - profile_time_log_name = MiscGetenv(PAL_PERF_TIME_LOG_NAME); - if (profile_time_log_name != NULL && strlen(profile_time_log_name) == 0) - profile_time_log_name = NULL; - - if( traced_apis_file == NULL) - { - if(perf_default_path==NULL) - { - ret=FALSE; - input_file_name = NULL; - } - else - { - if( PERFIsValidFile(perf_default_path,traced_apis_filename)) - { - int length = strlen(perf_default_path) + strlen(PATH_SEPARATOR) + strlen(traced_apis_filename); - file_name_buf = file_name_bufPS.OpenStringBuffer(length); - if ((strcpy_s(file_name_buf, file_name_bufPS.GetSizeOf(), perf_default_path) != SAFECRT_SUCCESS) || - (strcat_s(file_name_buf, file_name_bufPS.GetSizeOf(), PATH_SEPARATOR) != SAFECRT_SUCCESS) || - (strcat_s(file_name_buf, file_name_bufPS.GetSizeOf(), traced_apis_filename) != SAFECRT_SUCCESS)) - { - file_name_bufPS.CloseBuffer(0); - ret = FALSE; - input_file_name = NULL; - } - else - { - file_name_bufPS.CloseBuffer(length); - input_file_name = file_name_buf; - } - } - else - { - ret = FALSE; - input_file_name=NULL; - } - } - } - else - { - input_file_name=traced_apis_file; - } - - if(input_file_name) - { - hFile = PAL_fopen(input_file_name, "r+"); - if ( hFile == NULL ) - { - memset(pal_function_map, 1, PAL_API_NUMBER); - ret = FALSE; - } - else - { - memset(pal_function_map, 0, PAL_API_NUMBER); - - PAL_fseek(hFile, 0L, SEEK_SET); - - /* Read a line of data from file: */ - while ( PAL_fgets(line, PAL_PERF_MAX_INPUT, hFile) != NULL ) - { - if(strlen(line)==0) - continue; - ptr = strchr( line, '#'); - if( ptr ) - continue; - sscanf_s(line, "%s %u", function_name,&index); - - if( index >= PAL_API_NUMBER) - { - // some code here to deal with incorrect index. - // use function name to cover it. - } - else if(pal_function_map[index]==1) - { - // some code here to deal with conflict index. - // use function name to cover it. - } - else - { - pal_function_map[index]=1; - } - - } - - PAL_fclose(hFile); - ret = TRUE; - } - } - else - { - memset(pal_function_map, 1, PAL_API_NUMBER); - ret = FALSE; - } - - if( enabledapis_path == NULL) - { - if(perf_default_path==NULL) - { - input_file_name = NULL; - } - else - { - if( PERFIsValidFile(perf_default_path,perf_enabled_filename)) - { - if ((strcpy_s(file_name_buf, sizeof(file_name_buf), perf_default_path) != SAFECRT_SUCCESS) || - (strcat_s(file_name_buf, sizeof(file_name_buf), PATH_SEPARATOR) != SAFECRT_SUCCESS) || - (strcat_s(file_name_buf, sizeof(file_name_buf), perf_enabled_filename) != SAFECRT_SUCCESS)) - { - ret = FALSE; - input_file_name = NULL; - } - else - { - input_file_name = file_name_buf; - } - } - else - { - input_file_name=NULL; - } - } - } - else - { - input_file_name=enabledapis_path; - } - - if(input_file_name == NULL) - { - return ret; - } - - hFile = PAL_fopen(input_file_name, "r+"); - - if ( hFile != NULL ) - { - PAL_fseek(hFile, 0L, SEEK_SET); - - /* Read a line of data from file: */ - while (PAL_fgets(line, PAL_PERF_MAX_INPUT, hFile) != NULL) - { - if(strlen(line)==0) - continue; - ptr = strchr( line, '#'); - if( ptr ) - continue; - sscanf_s(line, "%s %u", function_name,&index); - - if( index >= PAL_API_NUMBER) - { - // some code here to deal with incorrect index. - // use function name to cover it. - continue; - } - - if (strcpy_s(API_list[index], sizeof(API_list[index]), function_name) != SAFECRT_SUCCESS) - { - ret = FALSE; - break; - } - } - - PAL_fclose(hFile); - } - - return ret; - -} - - -static -BOOL -PERFFlushLog(pal_perf_thread_info * local_info, BOOL output_header) -{ - BOOL ret = FALSE; - PathCharString fileName; - int nWrittenBytes = 0; - PERF_FILE * hFile; - - if (summary_only) - return TRUE; - - PERFLogFileName(fileName, profile_time_log_name, "_perf_time.log"); - - hFile = fopen(fileName, "a+"); - - if(hFile) - { - if (output_header) - { - PERFPrintProgramHeaderInfo(hFile, FALSE); - } - if (local_info->buf_offset > 0) - { - nWrittenBytes = fwrite(local_info->pal_write_buf, local_info->buf_offset, 1, hFile); - if (nWrittenBytes < 1) - { - ERROR("fwrite() failed with errno == %d\n", errno); - return ret; - } - local_info->buf_offset = 0; - } - fclose(hFile); - ret = TRUE; - } - - return ret; -} - -void -PERFLogFunctionEntry(unsigned int pal_api_id, ULONGLONG *pal_perf_start_tick ) -{ - pal_perf_thread_info * local_info=NULL; - pal_perf_api_info * table; - char * write_buf; - int32_t buf_off; - short bufused = 0; - - - struct timeval tv; - - - if(!pal_perf_enabled || pal_function_map==NULL || !pal_profile_on ) // haven't initialize, just quit. - return; - - if( pal_function_map[pal_api_id] ) - { - local_info= (pal_perf_thread_info * )pthread_getspecific(PERF_tlsTableKey); - - if (local_info==NULL ) - { - return; - } - if ( !local_info->profile_enabled ) /* prevent recursion. */ - { - return; - } - // turn on this flag before call any other functions - local_info->profile_enabled = FALSE; - table = local_info->api_table; - table[pal_api_id].entries++; - - if(!summary_only) - { - write_buf = (local_info->pal_write_buf); - if(local_info->buf_offset >= PAL_PERF_BUFFER_FULL) - { - PERFFlushLog(local_info, FALSE); - } - - gettimeofday(&tv, NULL); - - buf_off = local_info->buf_offset; - - bufused = snprintf(&write_buf[buf_off], PAL_PERF_MAX_LOGLINE, "----> %d %lu %06u entry.\n", pal_api_id, tv.tv_sec, tv.tv_usec ); - local_info->buf_offset += bufused; - } - if(nested_tracing) - local_info->profile_enabled = TRUE; - *pal_perf_start_tick = PERFGetTicks(); - } - return; -} - -static -void -PERFUpdateApiInfo(pal_perf_api_info *api, ULONGLONG duration) -{ - DWORD iBucket; - - api->counter++; - if (api->min_duration > duration) - api->min_duration = duration; - if (api->max_duration < duration) - api->max_duration = duration; - api->sum_duration += duration; - api->sum_of_square_duration += (double) duration * (double)duration; - - if (pal_perf_histogram_size > 0) - { - iBucket = (DWORD)(duration / pal_perf_histogram_step); - if (iBucket >= pal_perf_histogram_size) - { - iBucket = pal_perf_histogram_size - 1; - } - api->histograms[iBucket]++; - } - -} - -void -PERFLogFunctionExit(unsigned int pal_api_id, ULONGLONG *pal_perf_start_tick ) -{ - - pal_perf_thread_info * local_info; - char * buf; - short bufused = 0; - DWORD off; - ULONGLONG duration = 0; - struct timeval timev; - - - if(!pal_perf_enabled || (pal_function_map == NULL) || !pal_profile_on ) // haven't initiallize yet, just quit. - return; - - if (*pal_perf_start_tick != 0) - { - duration = PERFGetTicks() - *pal_perf_start_tick; - } - else - { - return; // pal_perf_start_tick == 0 indicates that we exited PERFLogFunctionEntry before getting the ticks. - } - - if( pal_function_map[pal_api_id] ) - { - local_info = (pal_perf_thread_info*)pthread_getspecific(PERF_tlsTableKey); - - if (NULL == local_info ){ - return; - } - PERFUpdateApiInfo(&local_info->api_table[pal_api_id], duration); - *pal_perf_start_tick = 0; - - if(summary_only) - { - local_info->profile_enabled = TRUE; - return; - } - - gettimeofday(&timev, NULL); - - buf = local_info->pal_write_buf; - if(local_info->buf_offset >= PAL_PERF_BUFFER_FULL) - { - PERFFlushLog(local_info, FALSE); - } - off = local_info->buf_offset; - - bufused = snprintf(&buf[off], PAL_PERF_MAX_LOGLINE, "<---- %d %lu %06u exit. \n", pal_api_id, timev.tv_sec, timev.tv_usec ); - local_info->buf_offset += bufused; - local_info->profile_enabled = TRUE; - } - return; -} - -void -PERFNoLatencyProfileEntry(unsigned int pal_api_id ) -{ - pal_perf_thread_info * local_info=NULL; - pal_perf_api_info * table; - - if(!pal_perf_enabled || pal_function_map==NULL || !pal_profile_on ) // haven't initialize, just quit. - return; - if( pal_function_map[pal_api_id] ) - { - local_info= (pal_perf_thread_info * )pthread_getspecific(PERF_tlsTableKey); - if (local_info==NULL ) - { - return; - } - else{ - table = local_info->api_table; - table[pal_api_id].entries++; - } - } - return; -} - - -void -PERFEnableThreadProfile(BOOL isInternal) -{ - pal_perf_thread_info * local_info; - if (!pal_perf_enabled) - return; - if (NULL != (local_info = (pal_perf_thread_info*)pthread_getspecific(PERF_tlsTableKey))) - { - if (!isInternal || nested_tracing) { - local_info->profile_enabled = TRUE; - local_info->start_ticks = PERFGetTicks(); - } - } -} - - -void -PERFDisableThreadProfile(BOOL isInternal) -{ - pal_perf_thread_info * local_info; - if (!pal_perf_enabled) - return; - if (NULL != (local_info = (pal_perf_thread_info*)pthread_getspecific(PERF_tlsTableKey))) - { - if (!isInternal || nested_tracing) { - local_info->profile_enabled = FALSE; - local_info->total_duration = PERFGetTicks() - local_info->start_ticks; - } - } -} - - -void -PERFEnableProcessProfile( ) -{ - if (!pal_perf_enabled || wait_for_startup) - return; - pal_profile_on = TRUE; - PERFCalibrate("Overhead when profiling is disabled temporarily for a thread"); - // record the cpu clock ticks at the beginning of the profiling. - program_info.start_ticks = PERFGetTicks(); -} - - -void -PERFDisableProcessProfile( ) -{ - if (!pal_perf_enabled) - return; - pal_profile_on = FALSE; - // compute the total program duration in cpu clock ticks. - if (program_info.start_ticks != 0) - { - program_info.elapsed_time += (PERFGetTicks() - program_info.start_ticks); - program_info.start_ticks = 0; - } -} - -BOOL -PERFIsProcessProfileEnabled( ) -{ - return pal_profile_on; -} - -static -char * -PERFIsValidPath( const char * path ) -{ - DIR * dir; - - if(( path==NULL) || (strlen(path)==0)) - return NULL; - - dir = opendir(path); - if( dir!=NULL) - { - closedir(dir); - return ((char *)path); - } - return NULL; -} - -static -char * -PERFIsValidFile( const char * path, const char * file) -{ - FILE * hFile; - char * temp; - PathCharString tempPS; - - if(file==NULL || strlen(file)==0) - return NULL; - - if ( strcmp(path, "") ) - { - int length = strlen(path) + strlen(PATH_SEPARATOR) + strlen(file); - temp = tempPS.OpenStringBuffer(length); - if ((strcpy_s(temp, sizeof(temp), path) != SAFECRT_SUCCESS) || - (strcat_s(temp, sizeof(temp), PATH_SEPARATOR) != SAFECRT_SUCCESS) || - (strcat_s(temp, sizeof(temp), file) != SAFECRT_SUCCESS)) - { - tempPS.CloseBuffer(0); - return NULL; - } - - tempPS.CloseBuffer(length); - hFile = fopen(temp, "r"); - } - else - { - hFile = fopen(file, "r"); - } - - if(hFile) - { - fclose(hFile); - return ((char *) file); - } - else - return NULL; - -} - -PALIMPORT -VOID -PALAPI -PAL_EnableProcessProfile(VOID) -{ - wait_for_startup = FALSE; - pal_profile_on = TRUE; - PERFEnableProcessProfile(); -} - -PALIMPORT -VOID -PALAPI -PAL_DisableProcessProfile(VOID) -{ - pal_profile_on = FALSE; - PERFDisableProcessProfile(); -} - -PALIMPORT -BOOL -PALAPI -PAL_IsProcessProfileEnabled(VOID) -{ - return PERFIsProcessProfileEnabled(); -} - -PALIMPORT -INT64 -PALAPI -PAL_GetCpuTickCount(VOID) -{ - return PERFGetTicks(); -} - -#endif /* PAL_PERF */ - - - - diff --git a/src/coreclr/pal/src/misc/sysinfo.cpp b/src/coreclr/pal/src/misc/sysinfo.cpp index f4fef6977f3d..2d6f9edb620a 100644 --- a/src/coreclr/pal/src/misc/sysinfo.cpp +++ b/src/coreclr/pal/src/misc/sysinfo.cpp @@ -221,6 +221,8 @@ GetSystemInfo( lpSystemInfo->lpMaximumApplicationAddress = (PVOID) VM_MAX_PAGE_ADDRESS; #elif defined(__HAIKU__) lpSystemInfo->lpMaximumApplicationAddress = (PVOID) 0x7fffffe00000ul; +#elif defined(__wasm__) + lpSystemInfo->lpMaximumApplicationAddress = (PVOID) (1ul << 31); #elif defined(USERLIMIT) lpSystemInfo->lpMaximumApplicationAddress = (PVOID) USERLIMIT; #elif defined(HOST_64BIT) diff --git a/src/coreclr/pal/src/misc/time.cpp b/src/coreclr/pal/src/misc/time.cpp index 2774cb7f125b..22f0c83871eb 100644 --- a/src/coreclr/pal/src/misc/time.cpp +++ b/src/coreclr/pal/src/misc/time.cpp @@ -100,7 +100,7 @@ GetSystemTime( int old_seconds; int new_seconds; - lpSystemTime->wMilliseconds = (WORD)(timeval.tv_usec/tccMillieSecondsToMicroSeconds); + lpSystemTime->wMilliseconds = (WORD)(timeval.tv_usec/tccMilliSecondsToMicroSeconds); old_seconds = utPtr->tm_sec; new_seconds = timeval.tv_sec%60; @@ -118,109 +118,6 @@ GetSystemTime( PERF_EXIT(GetSystemTime); } -/*++ -Function: - GetTickCount - -The GetTickCount function retrieves the number of milliseconds that -have elapsed since the system was started. It is limited to the -resolution of the system timer. To obtain the system timer resolution, -use the GetSystemTimeAdjustment function. - -Parameters - -This function has no parameters. - -Return Values - -The return value is the number of milliseconds that have elapsed since -the system was started. - -In the PAL implementation the return value is the elapsed time since -the start of the epoch. - ---*/ -DWORD -PALAPI -GetTickCount( - VOID) -{ - DWORD retval = 0; - PERF_ENTRY(GetTickCount); - ENTRY("GetTickCount ()\n"); - - // Get the 64-bit count from GetTickCount64 and truncate the results. - retval = (DWORD) GetTickCount64(); - - LOGEXIT("GetTickCount returns DWORD %u\n", retval); - PERF_EXIT(GetTickCount); - return retval; -} - -BOOL -PALAPI -QueryPerformanceCounter( - OUT LARGE_INTEGER *lpPerformanceCount - ) -{ - BOOL retval = TRUE; - PERF_ENTRY(QueryPerformanceCounter); - ENTRY("QueryPerformanceCounter()\n"); - -#if HAVE_CLOCK_GETTIME_NSEC_NP - lpPerformanceCount->QuadPart = (LONGLONG)clock_gettime_nsec_np(CLOCK_UPTIME_RAW); -#elif HAVE_CLOCK_MONOTONIC - struct timespec ts; - int result = clock_gettime(CLOCK_MONOTONIC, &ts); - - if (result != 0) - { - ASSERT("clock_gettime(CLOCK_MONOTONIC) failed: %d\n", result); - retval = FALSE; - } - else - { - lpPerformanceCount->QuadPart = - ((LONGLONG)(ts.tv_sec) * (LONGLONG)(tccSecondsToNanoSeconds)) + (LONGLONG)(ts.tv_nsec); - } -#else - #error "The PAL requires either mach_absolute_time() or clock_gettime(CLOCK_MONOTONIC) to be supported." -#endif - - LOGEXIT("QueryPerformanceCounter\n"); - PERF_EXIT(QueryPerformanceCounter); - return retval; -} - -BOOL -PALAPI -QueryPerformanceFrequency( - OUT LARGE_INTEGER *lpFrequency - ) -{ - BOOL retval = TRUE; - PERF_ENTRY(QueryPerformanceFrequency); - ENTRY("QueryPerformanceFrequency()\n"); - -#if HAVE_CLOCK_GETTIME_NSEC_NP - lpFrequency->QuadPart = (LONGLONG)(tccSecondsToNanoSeconds); -#elif HAVE_CLOCK_MONOTONIC - // clock_gettime() returns a result in terms of nanoseconds rather than a count. This - // means that we need to either always scale the result by the actual resolution (to - // get a count) or we need to say the resolution is in terms of nanoseconds. We prefer - // the latter since it allows the highest throughput and should minimize error propagated - // to the user. - - lpFrequency->QuadPart = (LONGLONG)(tccSecondsToNanoSeconds); -#else - #error "The PAL requires either mach_absolute_time() or clock_gettime(CLOCK_MONOTONIC) to be supported." -#endif - - LOGEXIT("QueryPerformanceFrequency\n"); - PERF_EXIT(QueryPerformanceFrequency); - return retval; -} - /*++ Function: QueryThreadCycleTime @@ -259,58 +156,6 @@ QueryThreadCycleTime( return retval; } -/*++ -Function: - GetTickCount64 - -Returns a 64-bit tick count with a millisecond resolution. It tries its best -to return monotonically increasing counts and avoid being affected by changes -to the system clock (either due to drift or due to explicit changes to system -time). ---*/ -PALAPI -ULONGLONG -GetTickCount64() -{ - LONGLONG retval = 0; - -#if HAVE_CLOCK_GETTIME_NSEC_NP - return (LONGLONG)clock_gettime_nsec_np(CLOCK_UPTIME_RAW) / (LONGLONG)(tccMillieSecondsToNanoSeconds); -#elif HAVE_CLOCK_MONOTONIC || HAVE_CLOCK_MONOTONIC_COARSE - struct timespec ts; - -#if HAVE_CLOCK_MONOTONIC_COARSE - // CLOCK_MONOTONIC_COARSE has enough precision for GetTickCount but - // doesn't have the same overhead as CLOCK_MONOTONIC. This allows - // overall higher throughput. See dotnet/coreclr#2257 for more details. - - const clockid_t clockType = CLOCK_MONOTONIC_COARSE; -#else - const clockid_t clockType = CLOCK_MONOTONIC; -#endif - - int result = clock_gettime(clockType, &ts); - - if (result != 0) - { -#if HAVE_CLOCK_MONOTONIC_COARSE - ASSERT("clock_gettime(CLOCK_MONOTONIC_COARSE) failed: %d\n", result); -#else - ASSERT("clock_gettime(CLOCK_MONOTONIC) failed: %d\n", result); -#endif - retval = FALSE; - } - else - { - retval = ((LONGLONG)(ts.tv_sec) * (LONGLONG)(tccSecondsToMillieSeconds)) + ((LONGLONG)(ts.tv_nsec) / (LONGLONG)(tccMillieSecondsToNanoSeconds)); - } -#else - #error "The PAL requires either mach_absolute_time() or clock_gettime(CLOCK_MONOTONIC) to be supported." -#endif - - return (ULONGLONG)(retval); -} - /*++ Function: PAL_nanosleep diff --git a/src/coreclr/pal/src/objmgr/listedobject.cpp b/src/coreclr/pal/src/objmgr/listedobject.cpp index 1f15fba20529..349c76cdf625 100644 --- a/src/coreclr/pal/src/objmgr/listedobject.cpp +++ b/src/coreclr/pal/src/objmgr/listedobject.cpp @@ -17,7 +17,6 @@ Module Name: --*/ #include "listedobject.hpp" -#include "pal/cs.hpp" #include "pal/dbgmsg.h" #include @@ -158,7 +157,7 @@ CListedObject::AcquireObjectDestructionLock( pthr ); - InternalEnterCriticalSection(pthr, m_pcsObjListLock); + minipal_mutex_enter(m_pcsObjListLock); LOGEXIT("CListedObject::AcquireObjectDestructionLock\n"); } @@ -196,7 +195,7 @@ CListedObject::ReleaseObjectDestructionLock( RemoveEntryList(&m_le); } - InternalLeaveCriticalSection(pthr, m_pcsObjListLock); + minipal_mutex_leave(m_pcsObjListLock); } /*++ diff --git a/src/coreclr/pal/src/objmgr/listedobject.hpp b/src/coreclr/pal/src/objmgr/listedobject.hpp index a75bb54af28b..11e346e4c397 100644 --- a/src/coreclr/pal/src/objmgr/listedobject.hpp +++ b/src/coreclr/pal/src/objmgr/listedobject.hpp @@ -42,7 +42,7 @@ namespace CorUnix // The lock that guards access to that list // - CRITICAL_SECTION *m_pcsObjListLock; + minipal_mutex *m_pcsObjListLock; virtual void @@ -67,7 +67,7 @@ namespace CorUnix CListedObject( CObjectType *pot, - CRITICAL_SECTION *pcsObjListLock + minipal_mutex *pcsObjListLock ) : CPalObjectBase(pot), @@ -144,7 +144,7 @@ namespace CorUnix CSharedMemoryWaitableObject( CObjectType *pot, - CRITICAL_SECTION *pcsObjListLock + minipal_mutex *pcsObjListLock ) : CListedObject(pot, pcsObjListLock) diff --git a/src/coreclr/pal/src/objmgr/listedobjectmanager.cpp b/src/coreclr/pal/src/objmgr/listedobjectmanager.cpp index e058c60211f9..5e00282c0a4a 100644 --- a/src/coreclr/pal/src/objmgr/listedobjectmanager.cpp +++ b/src/coreclr/pal/src/objmgr/listedobjectmanager.cpp @@ -18,7 +18,6 @@ Module Name: #include "listedobjectmanager.hpp" #include "listedobject.hpp" -#include "pal/cs.hpp" #include "pal/thread.hpp" #include "pal/procobj.hpp" #include "pal/dbgmsg.h" @@ -60,7 +59,7 @@ CListedObjectManager::Initialize( InitializeListHead(&m_leNamedObjects); InitializeListHead(&m_leAnonymousObjects); - InternalInitializeCriticalSection(&m_csListLock); + minipal_mutex_init(&m_csListLock); m_fListLockInitialized = TRUE; palError = m_HandleManager.Initialize(); @@ -97,7 +96,7 @@ CListedObjectManager::Shutdown( pthr ); - InternalEnterCriticalSection(pthr, &m_csListLock); + minipal_mutex_enter(&m_csListLock); while (!IsListEmpty(&m_leAnonymousObjects)) { @@ -113,7 +112,7 @@ CListedObjectManager::Shutdown( pshmobj->CleanupForProcessShutdown(pthr); } - InternalLeaveCriticalSection(pthr, &m_csListLock); + minipal_mutex_leave(&m_csListLock); LOGEXIT("CListedObjectManager::Shutdown returns %d\n", NO_ERROR); @@ -246,7 +245,7 @@ CListedObjectManager::RegisterObject( potObj = pobjToRegister->GetObjectType(); - InternalEnterCriticalSection(pthr, &m_csListLock); + minipal_mutex_enter(&m_csListLock); if (0 != poa->sObjectName.GetStringLength()) { @@ -336,7 +335,7 @@ CListedObjectManager::RegisterObject( RegisterObjectExit: - InternalLeaveCriticalSection(pthr, &m_csListLock); + minipal_mutex_leave(&m_csListLock); if (NULL != pobjToRegister) { @@ -397,7 +396,7 @@ CListedObjectManager::LocateObject( TRACE("Searching for object name %S\n", psObjectToLocate->GetString()); - InternalEnterCriticalSection(pthr, &m_csListLock); + minipal_mutex_enter(&m_csListLock); // // Search the local named object list for this object @@ -462,7 +461,7 @@ CListedObjectManager::LocateObject( LocateObjectExit: - InternalLeaveCriticalSection(pthr, &m_csListLock); + minipal_mutex_leave(&m_csListLock); LOGEXIT("CListedObjectManager::LocateObject returns %d\n", palError); diff --git a/src/coreclr/pal/src/objmgr/listedobjectmanager.hpp b/src/coreclr/pal/src/objmgr/listedobjectmanager.hpp index e893d2e6a15b..ffe98cb04d4c 100644 --- a/src/coreclr/pal/src/objmgr/listedobjectmanager.hpp +++ b/src/coreclr/pal/src/objmgr/listedobjectmanager.hpp @@ -30,7 +30,7 @@ namespace CorUnix { protected: - CRITICAL_SECTION m_csListLock; + minipal_mutex m_csListLock; bool m_fListLockInitialized; LIST_ENTRY m_leNamedObjects; LIST_ENTRY m_leAnonymousObjects; diff --git a/src/coreclr/pal/src/objmgr/palobjbase.hpp b/src/coreclr/pal/src/objmgr/palobjbase.hpp index ceb49ca79b41..866c26747122 100644 --- a/src/coreclr/pal/src/objmgr/palobjbase.hpp +++ b/src/coreclr/pal/src/objmgr/palobjbase.hpp @@ -20,7 +20,6 @@ Module Name: #define _PALOBJBASE_HPP_ #include "pal/corunix.hpp" -#include "pal/cs.hpp" #include "pal/thread.hpp" namespace CorUnix @@ -29,7 +28,7 @@ namespace CorUnix { private: - CRITICAL_SECTION m_cs; + minipal_mutex m_cs; bool m_fInitialized; public: @@ -44,7 +43,7 @@ namespace CorUnix { if (m_fInitialized) { - InternalDeleteCriticalSection(&m_cs); + minipal_mutex_destroy(&m_cs); } }; @@ -55,7 +54,7 @@ namespace CorUnix { PAL_ERROR palError = NO_ERROR; - InternalInitializeCriticalSection(&m_cs); + minipal_mutex_init(&m_cs); m_fInitialized = TRUE; return palError; @@ -67,7 +66,7 @@ namespace CorUnix IDataLock **pDataLock ) { - InternalEnterCriticalSection(pthr, &m_cs); + minipal_mutex_enter(&m_cs); *pDataLock = static_cast(this); }; @@ -78,7 +77,7 @@ namespace CorUnix bool fDataChanged ) { - InternalLeaveCriticalSection(pthr, &m_cs); + minipal_mutex_leave(&m_cs); }; }; diff --git a/src/coreclr/pal/src/safecrt/internal.h b/src/coreclr/pal/src/safecrt/internal.h deleted file mode 100644 index 8c1a6e572436..000000000000 --- a/src/coreclr/pal/src/safecrt/internal.h +++ /dev/null @@ -1,1066 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*** -*internal.h - contains declarations of internal routines and variables -* - -* -*Purpose: -* Declares routines and variables used internally by the C run-time. -* -* [Internal] -* -****/ - -#if _MSC_VER > 1000 -#pragma once -#endif /* _MSC_VER > 1000 */ - -#ifndef _INC_INTERNAL -#define _INC_INTERNAL - -#include - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#include -#include - -/* - * Conditionally include windows.h to pick up the definition of - * CRITICAL_SECTION. - */ -#include - -#ifdef _MSC_VER -#pragma pack(push,_CRT_PACKING) -#endif /* _MSC_VER */ - -/* Define function types used in several startup sources */ - -typedef void (__cdecl *_PVFV)(void); -typedef int (__cdecl *_PIFV)(void); -typedef void (__cdecl *_PVFI)(int); - -#if _MSC_VER >= 1400 && defined(_M_CEE) -typedef const void* (__clrcall *_PVFVM)(void); -typedef int (__clrcall *_PIFVM)(void); -typedef void (__clrcall *_CPVFV)(void); -#endif /* _MSC_VER >= 1400 && defined(_M_CEE) */ - -#if defined (_M_CEE_PURE) || (defined (_DLL) && defined (_M_IX86)) -/* Retained for compatibility with VC++ 5.0 and earlier versions */ -_CRTIMP int * __cdecl __p__commode(void); -#endif /* defined (_M_CEE_PURE) || (defined (_DLL) && defined (_M_IX86)) */ -#if defined (SPECIAL_CRTEXE) && defined (_DLL) - extern int _commode; -#else /* defined (SPECIAL_CRTEXE) && defined (_DLL) */ -#ifndef _M_CEE_PURE -_CRTIMP extern int _commode; -#else /* _M_CEE_PURE */ -#define _commode (*__p___commode()) -#endif /* _M_CEE_PURE */ -#endif /* defined (SPECIAL_CRTEXE) && defined (_DLL) */ - -#define __IOINFO_TM_ANSI 0 /* Regular Text */ -#define __IOINFO_TM_UTF8 1 /* UTF8 Encoded */ -#define __IOINFO_TM_UTF16LE 2 /* UTF16 Little Endian Encoded */ - -/* - * Control structure for lowio file handles - */ -typedef struct { - intptr_t osfhnd; /* underlying OS file HANDLE */ - char osfile; /* attributes of file (e.g., open in text mode?) */ - char pipech; /* one char buffer for handles opened on pipes */ - int lockinitflag; - CRITICAL_SECTION lock; -#ifndef _SAFECRT_IMPL - /* Not used in the safecrt downlevel. We do not define them, so we cannot use them accidentally */ - char textmode : 7; /* __IOINFO_TM_ANSI or __IOINFO_TM_UTF8 or __IOINFO_TM_UTF16LE */ - char unicode : 1; /* Was the file opened as unicode? */ - char pipech2[2]; /* 2 more peak ahead chars for UNICODE mode */ -#endif /* _SAFECRT_IMPL */ - } ioinfo; - -/* - * Definition of IOINFO_L2E, the log base 2 of the number of elements in each - * array of ioinfo structs. - */ -#define IOINFO_L2E 5 - -/* - * Definition of IOINFO_ARRAY_ELTS, the number of elements in ioinfo array - */ -#define IOINFO_ARRAY_ELTS (1 << IOINFO_L2E) - -/* - * Definition of IOINFO_ARRAYS, maximum number of supported ioinfo arrays. - */ -#define IOINFO_ARRAYS 64 - -#define _NHANDLE_ (IOINFO_ARRAYS * IOINFO_ARRAY_ELTS) - -#define _TZ_STRINGS_SIZE 64 - -/* - * Access macros for getting at an ioinfo struct and its fields from a - * file handle - */ -#define _pioinfo(i) ( __pioinfo[(i) >> IOINFO_L2E] + ((i) & (IOINFO_ARRAY_ELTS - \ - 1)) ) -#define _osfhnd(i) ( _pioinfo(i)->osfhnd ) - -#define _osfile(i) ( _pioinfo(i)->osfile ) - -#define _pipech(i) ( _pioinfo(i)->pipech ) - -#define _pipech2(i) ( _pioinfo(i)->pipech2 ) - -#define _textmode(i) ( _pioinfo(i)->textmode ) - -#define _tm_unicode(i) ( _pioinfo(i)->unicode ) - -/* - * Safer versions of the above macros. Currently, only _osfile_safe is - * used. - */ -#define _pioinfo_safe(i) ( (((i) != -1) && ((i) != -2)) ? _pioinfo(i) : &__badioinfo ) - -#define _osfhnd_safe(i) ( _pioinfo_safe(i)->osfhnd ) - -#define _osfile_safe(i) ( _pioinfo_safe(i)->osfile ) - -#define _pipech_safe(i) ( _pioinfo_safe(i)->pipech ) - -#define _pipech2_safe(i) ( _pioinfo_safe(i)->pipech2 ) - -#ifdef _SAFECRT_IMPL -/* safecrt does not have support for textmode, so we always return __IOINFO_TM_ANSI */ -#define _textmode_safe(i) __IOINFO_TM_ANSI -#define _tm_unicode_safe(i) 0 -#else /* _SAFECRT_IMPL */ -#define _textmode_safe(i) ( _pioinfo_safe(i)->textmode ) -#define _tm_unicode_safe(i) ( _pioinfo_safe(i)->unicode ) -#endif /* _SAFECRT_IMPL */ - -#ifndef _M_CEE_PURE -#ifdef _SAFECRT_IMPL -/* We need to get this from the downlevel DLL, even when we build safecrt.lib */ -extern __declspec(dllimport) ioinfo __badioinfo; -extern __declspec(dllimport) ioinfo * __pioinfo[]; -#else /* _SAFECRT_IMPL */ -/* - * Special, static ioinfo structure used only for more graceful handling - * of a C file handle value of -1 (results from common errors at the stdio - * level). - */ -extern _CRTIMP ioinfo __badioinfo; - -/* - * Array of arrays of control structures for lowio files. - */ -extern _CRTIMP ioinfo * __pioinfo[]; -#endif /* _SAFECRT_IMPL */ -#endif /* _M_CEE_PURE */ - -/* - * Current number of allocated ioinfo structures (_NHANDLE_ is the upper - * limit). - */ -extern int _nhandle; - -int __cdecl _alloc_osfhnd(void); -int __cdecl _free_osfhnd(int); -int __cdecl _set_osfhnd(int, intptr_t); - -/* - fileno for stdout, stdin & stderr when there is no console -*/ -#define _NO_CONSOLE_FILENO (intptr_t)-2 - - -extern const char __dnames[]; -extern const char __mnames[]; - -extern int _days[]; -extern int _lpdays[]; - -extern __time32_t __cdecl __loctotime32_t(int, int, int, int, int, int, int); -extern __time64_t __cdecl __loctotime64_t(int, int, int, int, int, int, int); - -extern void __cdecl __tzset(void); - -extern int __cdecl _validdrive(unsigned); - -/* - * If we are only interested in years between 1901 and 2099, we could use this: - * - * #define IS_LEAP_YEAR(y) (y % 4 == 0) - */ - -#define IS_LEAP_YEAR(y) (((y) % 4 == 0 && (y) % 100 != 0) || (y) % 400 == 0) - -/* - * This variable is in the C start-up; the length must be kept synchronized - * It is used by the *cenvarg.c modules - */ - -extern char _acfinfo[]; /* "_C_FILE_INFO=" */ - -#define CFI_LENGTH 12 /* "_C_FILE_INFO" is 12 bytes long */ - - -/* - * stdio internals - */ -#ifndef _FILE_DEFINED -struct _iobuf { - char *_ptr; - int _cnt; - char *_base; - int _flag; - int _file; - int _charbuf; - int _bufsiz; - char *_tmpfname; - }; -typedef struct _iobuf FILE; -#define _FILE_DEFINED -#endif /* _FILE_DEFINED */ - -#if !defined (_FILEX_DEFINED) && defined (_WINDOWS_) - -/* - * Variation of FILE type used for the dynamically allocated portion of - * __piob[]. For single thread, _FILEX is the same as FILE. For multithread - * models, _FILEX has two fields: the FILE struct and the CRITICAL_SECTION - * struct used to serialize access to the FILE. - */ - -typedef struct { - FILE f; - CRITICAL_SECTION lock; - } _FILEX; - - -#define _FILEX_DEFINED -#endif /* !defined (_FILEX_DEFINED) && defined (_WINDOWS_) */ - -/* - * Number of entries supported in the array pointed to by __piob[]. That is, - * the number of stdio-level files which may be open simultaneously. This - * is normally set to _NSTREAM_ by the stdio initialization code. - */ -extern int _nstream; - -/* - * Pointer to the array of pointers to FILE/_FILEX structures that are used - * to manage stdio-level files. - */ -extern void **__piob; - -FILE * __cdecl _getstream(void); -FILE * __cdecl _openfile(_In_z_ const char * _Filename, _In_z_ const char * _Mode, _In_ int _ShFlag, _Out_ FILE * _File); -FILE * __cdecl _wopenfile(_In_z_ const char16_t * _Filename, _In_z_ const char16_t * _Mode, _In_ int _ShFlag, _Out_ FILE * _File); -void __cdecl _getbuf(_Out_ FILE * _File); -int __cdecl _filwbuf (__inout FILE * _File); -int __cdecl _flswbuf(_In_ int _Ch, __inout FILE * _File); -void __cdecl _freebuf(__inout FILE * _File); -int __cdecl _stbuf(__inout FILE * _File); -void __cdecl _ftbuf(int _Flag, __inout FILE * _File); - -#ifdef _SAFECRT_IMPL - -int __cdecl _output(__inout FILE * _File, _In_z_ __format_string const char *_Format, va_list _ArgList); -int __cdecl _woutput(__inout FILE * _File, _In_z_ __format_string const char16_t *_Format, va_list _ArgList); -int __cdecl _output_s(__inout FILE * _File, _In_z_ __format_string const char *_Format, va_list _ArgList); -int __cdecl _output_p(__inout FILE * _File, _In_z_ __format_string const char *_Format, va_list _ArgList); -typedef int (*OUTPUTFN)(FILE *, const char *, va_list); - -#else /* _SAFECRT_IMPL */ - -int __cdecl _output_l(__inout FILE * _File, _In_z_ __format_string const char *_Format, _In_opt_ _locale_t _Locale, va_list _ArgList); -int __cdecl _woutput_l(__inout FILE * _File, _In_z_ __format_string const char16_t *_Format, _In_opt_ _locale_t _Locale, va_list _ArgList); -int __cdecl _output_s_l(__inout FILE * _File, _In_z_ __format_string const char *_Format, _In_opt_ _locale_t _Locale, va_list _ArgList); -int __cdecl _output_p_l(__inout FILE * _File, _In_z_ __format_string const char *_Format, _In_opt_ _locale_t _Locale, va_list _ArgList); -typedef int (*OUTPUTFN)(__inout FILE * _File, const char *, _locale_t, va_list); - -#endif /* _SAFECRT_IMPL */ - -#ifdef _SAFECRT_IMPL - -int __cdecl _input(_In_ FILE * _File, _In_z_ __format_string const unsigned char * _Format, va_list _ArgList); -int __cdecl _winput(_In_ FILE * _File, _In_z_ __format_string const char16_t * _Format, va_list _ArgList); -int __cdecl _input_s(_In_ FILE * _File, _In_z_ __format_string const unsigned char * _Format, va_list _ArgList); -typedef int (*INPUTFN)(FILE *, const unsigned char *, va_list); -typedef int (*WINPUTFN)(FILE *, const char16_t *, va_list); - -#else /* _SAFECRT_IMPL */ - -int __cdecl _input_l(__inout FILE * _File, _In_z_ __format_string const unsigned char *, _In_opt_ _locale_t _Locale, va_list _ArgList); -int __cdecl _winput_l(__inout FILE * _File, _In_z_ __format_string const char16_t *, _In_opt_ _locale_t _Locale, va_list _ArgList); -int __cdecl _input_s_l(__inout FILE * _File, _In_z_ __format_string const unsigned char *, _In_opt_ _locale_t _Locale, va_list _ArgList); -int __cdecl _winput_s_l(__inout FILE * _File, _In_z_ __format_string const char16_t *, _In_opt_ _locale_t _Locale, va_list _ArgList); -typedef int (*INPUTFN)(FILE *, const unsigned char *, _locale_t, va_list); -typedef int (*WINPUTFN)(FILE *, const char16_t *, _locale_t, va_list); - -#ifdef _UNICODE -#define TINPUTFN WINPUTFN -#else /* _UNICODE */ -#define TINPUTFN INPUTFN -#endif /* _UNICODE */ - -#endif /* _SAFECRT_IMPL */ - -int __cdecl _flush(__inout FILE * _File); -void __cdecl _endstdio(void); - -errno_t __cdecl _sopen_helper(_In_z_ const char * _Filename, - _In_ int _OFlag, _In_ int _ShFlag, _In_ int _PMode, - _Out_ int * _PFileHandle, int _BSecure); -errno_t __cdecl _wsopen_helper(_In_z_ const char16_t * _Filename, - _In_ int _OFlag, _In_ int _ShFlag, _In_ int _PMode, - _Out_ int * _PFileHandle, int _BSecure); - -#ifndef CRTDLL -extern int _cflush; -#endif /* CRTDLL */ - -extern unsigned int _tempoff; - -extern unsigned int _old_pfxlen; - -extern int _umaskval; /* the umask value */ - -extern char _pipech[]; /* pipe lookahead */ - -extern char _exitflag; /* callable termination flag */ - -extern int _C_Termination_Done; /* termination done flag */ - -char * __cdecl _getpath(_In_z_ const char * _Src, _Out_writes_z_(_SizeInChars) char * _Dst, _In_ size_t _SizeInChars); -char16_t * __cdecl _wgetpath(_In_z_ const char16_t * _Src, _Out_writes_z_(_SizeInWords) char16_t * _Dst, _In_ size_t _SizeInWords); - -extern int _dowildcard; /* flag to enable argv[] wildcard expansion */ - -#ifndef _PNH_DEFINED -typedef int (__cdecl * _PNH)( size_t ); -#define _PNH_DEFINED -#endif /* _PNH_DEFINED */ - -#if _MSC_VER >= 1400 && defined(_M_CEE) -#ifndef __MPNH_DEFINED -typedef int (__clrcall * __MPNH)( size_t ); -#define __MPNH_DEFINED -#endif /* __MPNH_DEFINED */ -#endif /* _MSC_VER >= 1400 && defined(_M_CEE) */ - - -/* calls the currently installed new handler */ -int __cdecl _callnewh(_In_ size_t _Size); - -extern int _newmode; /* malloc new() handler mode */ - -/* pointer to initial environment block that is passed to [w]main */ -#ifndef _M_CEE_PURE -extern _CRTIMP char16_t **__winitenv; -extern _CRTIMP char **__initenv; -#endif /* _M_CEE_PURE */ - -/* _calloca helper */ -#define _calloca(count, size) ((count<=0 || size<=0 || ((((size_t)_HEAP_MAXREQ) / ((size_t)count)) < ((size_t)size)))? NULL : _malloca(count * size)) - -/* startup set values */ -extern char *_aenvptr; /* environment ptr */ -extern char16_t *_wenvptr; /* wide environment ptr */ - -/* command line */ - -#if defined (_DLL) -_CRTIMP char ** __cdecl __p__acmdln(void); -_CRTIMP char16_t ** __cdecl __p__wcmdln(void); -#endif /* defined (_DLL) */ -#ifndef _M_CEE_PURE -_CRTIMP extern char *_acmdln; -_CRTIMP extern char16_t *_wcmdln; -#else /* _M_CEE_PURE */ -#define _acmdln (*__p__acmdln()) -#define _wcmdln (*__p__wcmdln()) -#endif /* _M_CEE_PURE */ - -/* - * prototypes for internal startup functions - */ -int __cdecl _cwild(void); /* wild.c */ -int __cdecl _wcwild(void); /* wwild.c */ -int __cdecl _mtinit(void); /* tidtable.c */ -void __cdecl _mtterm(void); /* tidtable.c */ -int __cdecl _mtinitlocks(void); /* mlock.c */ -void __cdecl _mtdeletelocks(void); /* mlock.c */ -int __cdecl _mtinitlocknum(int); /* mlock.c */ - -/* Wrapper for InitializeCriticalSection API, with default spin count */ -int __cdecl __crtInitCritSecAndSpinCount(PCRITICAL_SECTION, DWORD); -#define _CRT_SPINCOUNT 4000 - -/* - * C source build only!!!! - * - * more prototypes for internal startup functions - */ -void __cdecl _amsg_exit(int); /* crt0.c */ -void __cdecl __crtExitProcess(int); /* crt0dat.c */ -void __cdecl __crtCorExitProcess(int); /* crt0dat.c */ -void __cdecl __crtdll_callstaticterminators(void); /* crt0dat.c */ - -/* -_cinit now allows the caller to suppress floating point precision init -This allows the DLLs that use the CRT to not initialise FP precision, -allowing the EXE's setting to persist even when a DLL is loaded -*/ -int __cdecl _cinit(int /* initFloatingPrecision */); /* crt0dat.c */ -void __cdecl __doinits(void); /* astart.asm */ -void __cdecl __doterms(void); /* astart.asm */ -void __cdecl __dopreterms(void); /* astart.asm */ -void __cdecl _FF_MSGBANNER(void); -void __cdecl _fpmath(int /*initPrecision*/); -void __cdecl _fpclear(void); -void __cdecl _fptrap(void); /* crt0fp.c */ -int __cdecl _heap_init(int); -void __cdecl _heap_term(void); -void __cdecl _heap_abort(void); -void __cdecl __initconin(void); /* initcon.c */ -void __cdecl __initconout(void); /* initcon.c */ -int __cdecl _ioinit(void); /* crt0.c, crtlib.c */ -void __cdecl _ioterm(void); /* crt0.c, crtlib.c */ -char * __cdecl _GET_RTERRMSG(int); -void __cdecl _NMSG_WRITE(int); -int __CRTDECL _setargv(void); /* setargv.c, stdargv.c */ -int __CRTDECL __setargv(void); /* stdargv.c */ -int __CRTDECL _wsetargv(void); /* wsetargv.c, wstdargv.c */ -int __CRTDECL __wsetargv(void); /* wstdargv.c */ -int __cdecl _setenvp(void); /* stdenvp.c */ -int __cdecl _wsetenvp(void); /* wstdenvp.c */ -void __cdecl __setmbctable(unsigned int); /* mbctype.c */ - -#ifdef MRTDLL -_MRTIMP int __cdecl _onexit_process(_CPVFV); -_MRTIMP int __cdecl _onexit_app_domain(_CPVFV); -#endif /* MRTDLL */ - -#ifndef _MANAGED_MAIN -int __CRTDECL main(_In_ int _Argc, _In_reads_z_(_Argc) char ** _Argv, _In_z_ char ** _Env); -int __CRTDECL wmain(_In_ int _Argc, _In_reads_z_(_Argc) char16_t ** _Argv, _In_z_ char16_t ** _Env); -#endif /* _MANAGED_MAIN */ - -/* helper functions for wide/multibyte environment conversion */ -int __cdecl __mbtow_environ (void); -int __cdecl __wtomb_environ (void); - -/* These two functions take a char ** for the environment option - At some point during their execution, they take ownership of the - memory block passed in using option. At this point, they - NULL out the incoming char * / char16_t * to ensure there is no - double-free -*/ -int __cdecl __crtsetenv(_Outptr_opt_ char ** _POption, _In_ const int _Primary); -int __cdecl __crtwsetenv(_Outptr_opt_ char16_t ** _POption, _In_ const int _Primary); - -#ifndef _M_CEE_PURE -_CRTIMP extern void (__cdecl * _aexit_rtn)(int); -#endif /* _M_CEE_PURE */ - -#if defined (_DLL) || defined (CRTDLL) - -#ifndef _STARTUP_INFO_DEFINED -typedef struct -{ - int newmode; -} _startupinfo; -#define _STARTUP_INFO_DEFINED -#endif /* _STARTUP_INFO_DEFINED */ - -_CRTIMP int __cdecl __getmainargs(_Out_ int * _Argc, _Outptr_result_buffer_(*_Argc) char *** _Argv, - _Outptr_opt_ char *** _Env, _In_ int _DoWildCard, - _In_ _startupinfo * _StartInfo); - -_CRTIMP int __cdecl __wgetmainargs(_Out_ int * _Argc, _Outptr_result_buffer_(*_Argc)char16_t *** _Argv, - _Outptr_opt_ char16_t *** _Env, _In_ int _DoWildCard, - _In_ _startupinfo * _StartInfo); - -#endif /* defined (_DLL) || defined (CRTDLL) */ - -/* - * Prototype, variables and constants which determine how error messages are - * written out. - */ -#define _UNKNOWN_APP 0 -#define _CONSOLE_APP 1 -#define _GUI_APP 2 - -extern int __app_type; - -#if !defined (_M_CEE_PURE) - -extern Volatile __native_startup_lock; - -#define __NO_REASON UINT_MAX -extern Volatile __native_dllmain_reason; -extern Volatile __native_vcclrit_reason; - -#if defined (__cplusplus) - -#pragma warning(push) -#pragma warning(disable: 4483) -#if _MSC_FULL_VER >= 140050415 -#define _NATIVE_STARTUP_NAMESPACE __identifier("") -#else /* _MSC_FULL_VER >= 140050415 */ -#define _NATIVE_STARTUP_NAMESPACE __CrtImplementationDetails -#endif /* _MSC_FULL_VER >= 140050415 */ - -namespace _NATIVE_STARTUP_NAMESPACE -{ - class NativeDll - { - private: - static const unsigned int ProcessDetach = 0; - static const unsigned int ProcessAttach = 1; - static const unsigned int ThreadAttach = 2; - static const unsigned int ThreadDetach = 3; - static const unsigned int ProcessVerifier = 4; - - public: - - inline static bool IsInDllMain() - { - return (__native_dllmain_reason != __NO_REASON); - } - - inline static bool IsInProcessAttach() - { - return (__native_dllmain_reason == ProcessAttach); - } - - inline static bool IsInProcessDetach() - { - return (__native_dllmain_reason == ProcessDetach); - } - - inline static bool IsInVcclrit() - { - return (__native_vcclrit_reason != __NO_REASON); - } - - inline static bool IsSafeForManagedCode() - { - if (!IsInDllMain()) - { - return true; - } - - if (IsInVcclrit()) - { - return true; - } - - return !IsInProcessAttach() && !IsInProcessDetach(); - } - }; -} -#pragma warning(pop) - -#endif /* defined (__cplusplus) */ - -#endif /* !defined (_M_CEE_PURE) */ - -extern int __error_mode; - -_CRTIMP void __cdecl __set_app_type(int); -#if defined (CRTDLL) && !defined (_SYSCRT) -/* - * All these function pointer are used for creating global state of CRT - * functions. Either all of them will be set or all of them will be NULL - */ -typedef void (__cdecl *_set_app_type_function)(int); -typedef int (__cdecl *_get_app_type_function)(); -extern _set_app_type_function __set_app_type_server; -extern _get_app_type_function __get_app_type_server; -#endif /* defined (CRTDLL) && !defined (_SYSCRT) */ - -/* - * C source build only!!!! - * - * map Win32 errors into Xenix errno values -- for modules written in C - */ -_CRTIMP void __cdecl _dosmaperr(unsigned long); -extern int __cdecl _get_errno_from_oserr(unsigned long); - -/* - * internal routines used by the exec/spawn functions - */ - -extern intptr_t __cdecl _dospawn(_In_ int _Mode, _In_opt_z_ const char * _Name, __inout_z char * _Cmd, _In_opt_z_ char * _Env); -extern intptr_t __cdecl _wdospawn(_In_ int _Mode, _In_opt_z_ const char16_t * _Name, __inout_z char16_t * _Cmd, _In_opt_z_ char16_t * _Env); -extern int __cdecl _cenvarg(_In_z_ const char * const * _Argv, _In_opt_z_ const char * const * _Env, - _Outptr_opt_ char ** _ArgBlk, _Outptr_opt_ char ** _EnvBlk, _In_z_ const char *_Name); -extern int __cdecl _wcenvarg(_In_z_ const char16_t * const * _Argv, _In_opt_z_ const char16_t * const * _Env, - _Outptr_opt_ char16_t ** _ArgBlk, _Outptr_opt_ char16_t ** _EnvBlk, _In_z_ const char16_t * _Name); -#ifndef _M_IX86 -extern char ** _capture_argv(_In_ va_list *, _In_z_ const char * _FirstArg, _Out_writes_z_(_MaxCount) char ** _Static_argv, _In_ size_t _MaxCount); -extern char16_t ** _wcapture_argv(_In_ va_list *, _In_z_ const char16_t * _FirstArg, _Out_writes_z_(_MaxCount) char16_t ** _Static_argv, _In_ size_t _MaxCount); -#endif /* _M_IX86 */ - -/* - * internal routine used by the abort - */ - -extern _PHNDLR __cdecl __get_sigabrt(void); - -/* - * Type from ntdef.h - */ - -typedef LONG NTSTATUS; - -/* - * Exception code used in _invalid_parameter - */ - -#ifndef STATUS_INVALID_PARAMETER -#define STATUS_INVALID_PARAMETER ((NTSTATUS)0xC000000DL) -#endif /* STATUS_INVALID_PARAMETER */ - -/* - * Exception code used for abort and _CALL_REPORTFAULT - */ - -#ifndef STATUS_FATAL_APP_EXIT -#define STATUS_FATAL_APP_EXIT ((NTSTATUS)0x40000015L) -#endif /* STATUS_FATAL_APP_EXIT */ - -/* - * Validate functions - */ -#include /* _ASSERTE */ -#include - -#define __STR2WSTR(str) L##str - -#define _STR2WSTR(str) __STR2WSTR(str) - -#define __FILEW__ _STR2WSTR(__FILE__) -#define __FUNCTIONW__ _STR2WSTR(__FUNCTION__) - -/* We completely fill the buffer only in debug (see _SECURECRT__FILL_STRING - * and _SECURECRT__FILL_BYTE macros). - */ -#if !defined (_SECURECRT_FILL_BUFFER) -#ifdef _DEBUG -#define _SECURECRT_FILL_BUFFER 1 -#else /* _DEBUG */ -#define _SECURECRT_FILL_BUFFER 0 -#endif /* _DEBUG */ -#endif /* !defined (_SECURECRT_FILL_BUFFER) */ - -#ifndef _SAFECRT_IMPL -/* _invalid_parameter is already defined in safecrt.h and safecrt.lib */ -#if !defined (_NATIVE_char16_t_DEFINED) && defined (_M_CEE_PURE) -extern "C++" -#endif /* !defined (_NATIVE_char16_t_DEFINED) && defined (_M_CEE_PURE) */ -_CRTIMP -#endif /* _SAFECRT_IMPL */ -void __cdecl _invalid_parameter(_In_opt_z_ const char16_t *, _In_opt_z_ const char16_t *, _In_opt_z_ const char16_t *, unsigned int, uintptr_t); - -#if !defined (_NATIVE_char16_t_DEFINED) && defined (_M_CEE_PURE) -extern "C++" -#endif /* !defined (_NATIVE_char16_t_DEFINED) && defined (_M_CEE_PURE) */ -_CRTIMP -void __cdecl _invoke_watson(_In_opt_z_ const char16_t *, _In_opt_z_ const char16_t *, _In_opt_z_ const char16_t *, unsigned int, uintptr_t); - -#ifndef _DEBUG -#if !defined (_NATIVE_char16_t_DEFINED) && defined (_M_CEE_PURE) -extern "C++" -#endif /* !defined (_NATIVE_char16_t_DEFINED) && defined (_M_CEE_PURE) */ -_CRTIMP -void __cdecl _invalid_parameter_noinfo(void); -#endif /* _DEBUG */ - -/* Invoke Watson if _ExpressionError is not 0; otherwise simply return _ExpressionError */ -__forceinline -void _invoke_watson_if_error( - errno_t _ExpressionError, - const char16_t *_Expression, - const char16_t *_Function, - const char16_t *_File, - unsigned int _Line, - uintptr_t _Reserved - ) -{ - if (_ExpressionError == 0) - { - return; - } - _invoke_watson(_Expression, _Function, _File, _Line, _Reserved); -} - -/* Invoke Watson if _ExpressionError is not 0 and equal to _ErrorValue1 or _ErrorValue2; otherwise simply return _ExpressionError */ -__forceinline -errno_t _invoke_watson_if_oneof( - errno_t _ExpressionError, - errno_t _ErrorValue1, - errno_t _ErrorValue2, - const char16_t *_Expression, - const char16_t *_Function, - const char16_t *_File, - unsigned int _Line, - uintptr_t _Reserved - ) -{ - if (_ExpressionError == 0 || (_ExpressionError != _ErrorValue1 && _ExpressionError != _ErrorValue2)) - { - return _ExpressionError; - } - _invoke_watson(_Expression, _Function, _File, _Line, _Reserved); - return _ExpressionError; -} - -/* - * Assert in debug builds. - * set errno and return - * - */ -#ifdef _DEBUG -#define _CALL_INVALID_PARAMETER_FUNC(funcname, expr) funcname(expr, __FUNCTIONW__, __FILEW__, __LINE__, 0) -#define _INVOKE_WATSON_IF_ERROR(expr) _invoke_watson_if_error((expr), __STR2WSTR(#expr), __FUNCTIONW__, __FILEW__, __LINE__, 0) -#define _INVOKE_WATSON_IF_ONEOF(expr, errvalue1, errvalue2) _invoke_watson_if_oneof(expr, (errvalue1), (errvalue2), __STR2WSTR(#expr), __FUNCTIONW__, __FILEW__, __LINE__, 0) -#else /* _DEBUG */ -#define _CALL_INVALID_PARAMETER_FUNC(funcname, expr) funcname(NULL, NULL, NULL, 0, 0) -#define _INVOKE_WATSON_IF_ERROR(expr) _invoke_watson_if_error(expr, NULL, NULL, NULL, 0, 0) -#define _INVOKE_WATSON_IF_ONEOF(expr, errvalue1, errvalue2) _invoke_watson_if_oneof((expr), (errvalue1), (errvalue2), NULL, NULL, NULL, 0, 0) -#endif /* _DEBUG */ - -#define _INVALID_PARAMETER(expr) _CALL_INVALID_PARAMETER_FUNC(_invalid_parameter, expr) - -#define _VALIDATE_RETURN_VOID( expr, errorcode ) \ - { \ - int _Expr_val=!!(expr); \ - _ASSERT_EXPR( ( _Expr_val ), _CRT_WIDE(#expr) ); \ - if ( !( _Expr_val ) ) \ - { \ - errno = errorcode; \ - _INVALID_PARAMETER(_CRT_WIDE(#expr)); \ - return; \ - } \ - } - -/* - * Assert in debug builds. - * set errno and return value - */ - -#ifndef _VALIDATE_RETURN -#define _VALIDATE_RETURN( expr, errorcode, retexpr ) \ - { \ - int _Expr_val=!!(expr); \ - _ASSERT_EXPR( ( _Expr_val ), _CRT_WIDE(#expr) ); \ - if ( !( _Expr_val ) ) \ - { \ - errno = errorcode; \ - _INVALID_PARAMETER(_CRT_WIDE(#expr) ); \ - return ( retexpr ); \ - } \ - } -#endif /* _VALIDATE_RETURN */ - -#ifndef _VALIDATE_RETURN_NOEXC -#define _VALIDATE_RETURN_NOEXC( expr, errorcode, retexpr ) \ - { \ - if ( !(expr) ) \ - { \ - errno = errorcode; \ - return ( retexpr ); \ - } \ - } -#endif /* _VALIDATE_RETURN_NOEXC */ - -/* - * Assert in debug builds. - * set errno and set retval for later usage - */ - -#define _VALIDATE_SETRET( expr, errorcode, retval, retexpr ) \ - { \ - int _Expr_val=!!(expr); \ - _ASSERT_EXPR( ( _Expr_val ), _CRT_WIDE(#expr) ); \ - if ( !( _Expr_val ) ) \ - { \ - errno = errorcode; \ - _INVALID_PARAMETER(_CRT_WIDE(#expr)); \ - retval=( retexpr ); \ - } \ - } - -#define _CHECK_FH_RETURN( handle, errorcode, retexpr ) \ - { \ - if(handle == _NO_CONSOLE_FILENO) \ - { \ - errno = errorcode; \ - return ( retexpr ); \ - } \ - } - -/* - We use _VALIDATE_STREAM_ANSI_RETURN to ensure that ANSI file operations( - fprintf etc) aren't called on files opened as UNICODE. We do this check - only if it's an actual FILE pointer & not a string -*/ - -#define _VALIDATE_STREAM_ANSI_RETURN( stream, errorcode, retexpr ) \ - { \ - FILE *_Stream=stream; \ - _VALIDATE_RETURN(( (_Stream->_flag & _IOSTRG) || \ - ( (_textmode_safe(_fileno(_Stream)) == __IOINFO_TM_ANSI) && \ - !_tm_unicode_safe(_fileno(_Stream)))), \ - errorcode, retexpr) \ - } - -/* - We use _VALIDATE_STREAM_ANSI_SETRET to ensure that ANSI file operations( - fprintf etc) aren't called on files opened as UNICODE. We do this check - only if it's an actual FILE pointer & not a string. It doesn't actually return - immediately -*/ - -#define _VALIDATE_STREAM_ANSI_SETRET( stream, errorcode, retval, retexpr) \ - { \ - FILE *_Stream=stream; \ - _VALIDATE_SETRET(( (_Stream->_flag & _IOSTRG) || \ - ( (_textmode_safe(_fileno(_Stream)) == __IOINFO_TM_ANSI) && \ - !_tm_unicode_safe(_fileno(_Stream)))), \ - errorcode, retval, retexpr) \ - } - -/* - * Assert in debug builds. - * Return value (do not set errno) - */ - -#define _VALIDATE_RETURN_NOERRNO( expr, retexpr ) \ - { \ - int _Expr_val=!!(expr); \ - _ASSERT_EXPR( ( _Expr_val ), _CRT_WIDE(#expr) ); \ - if ( !( _Expr_val ) ) \ - { \ - _INVALID_PARAMETER(_CRT_WIDE(#expr)); \ - return ( retexpr ); \ - } \ - } - -/* - * Assert in debug builds. - * set errno and return errorcode - */ - -#define _VALIDATE_RETURN_ERRCODE( expr, errorcode ) \ - { \ - int _Expr_val=!!(expr); \ - _ASSERT_EXPR( ( _Expr_val ), _CRT_WIDE(#expr) ); \ - if ( !( _Expr_val ) ) \ - { \ - errno = errorcode; \ - _INVALID_PARAMETER(_CRT_WIDE(#expr)); \ - return ( errorcode ); \ - } \ - } - -#define _VALIDATE_RETURN_ERRCODE_NOEXC( expr, errorcode ) \ - { \ - if (!(expr)) \ - { \ - errno = errorcode; \ - return ( errorcode ); \ - } \ - } - -#define _VALIDATE_CLEAR_OSSERR_RETURN( expr, errorcode, retexpr ) \ - { \ - int _Expr_val=!!(expr); \ - _ASSERT_EXPR( ( _Expr_val ), _CRT_WIDE(#expr) ); \ - if ( !( _Expr_val ) ) \ - { \ - _doserrno = 0L; \ - errno = errorcode; \ - _INVALID_PARAMETER(_CRT_WIDE(#expr) ); \ - return ( retexpr ); \ - } \ - } - -#define _CHECK_FH_CLEAR_OSSERR_RETURN( handle, errorcode, retexpr ) \ - { \ - if(handle == _NO_CONSOLE_FILENO) \ - { \ - _doserrno = 0L; \ - errno = errorcode; \ - return ( retexpr ); \ - } \ - } - -#define _VALIDATE_CLEAR_OSSERR_RETURN_ERRCODE( expr, errorcode ) \ - { \ - int _Expr_val=!!(expr); \ - _ASSERT_EXPR( ( _Expr_val ), _CRT_WIDE(#expr) ); \ - if ( !( _Expr_val ) ) \ - { \ - _doserrno = 0L; \ - errno = errorcode; \ - _INVALID_PARAMETER(_CRT_WIDE(#expr)); \ - return ( errorcode ); \ - } \ - } - -#define _CHECK_FH_CLEAR_OSSERR_RETURN_ERRCODE( handle, retexpr ) \ - { \ - if(handle == _NO_CONSOLE_FILENO) \ - { \ - _doserrno = 0L; \ - return ( retexpr ); \ - } \ - } - -#ifdef _DEBUG -extern size_t __crtDebugFillThreshold; -#endif /* _DEBUG */ - -#if !defined (_SECURECRT_FILL_BUFFER_THRESHOLD) -#ifdef _DEBUG -#define _SECURECRT_FILL_BUFFER_THRESHOLD __crtDebugFillThreshold -#else /* _DEBUG */ -#define _SECURECRT_FILL_BUFFER_THRESHOLD ((size_t)0) -#endif /* _DEBUG */ -#endif /* !defined (_SECURECRT_FILL_BUFFER_THRESHOLD) */ - -#if _SECURECRT_FILL_BUFFER -#define _SECURECRT__FILL_STRING(_String, _Size, _Offset) \ - if ((_Size) != ((size_t)-1) && (_Size) != INT_MAX && \ - ((size_t)(_Offset)) < (_Size)) \ - { \ - memset((_String) + (_Offset), \ - _SECURECRT_FILL_BUFFER_PATTERN, \ - (_SECURECRT_FILL_BUFFER_THRESHOLD < ((size_t)((_Size) - (_Offset))) ? \ - _SECURECRT_FILL_BUFFER_THRESHOLD : \ - ((_Size) - (_Offset))) * sizeof(*(_String))); \ - } -#else /* _SECURECRT_FILL_BUFFER */ -#define _SECURECRT__FILL_STRING(_String, _Size, _Offset) -#endif /* _SECURECRT_FILL_BUFFER */ - -#if _SECURECRT_FILL_BUFFER -#define _SECURECRT__FILL_BYTE(_Position) \ - if (_SECURECRT_FILL_BUFFER_THRESHOLD > 0) \ - { \ - (_Position) = _SECURECRT_FILL_BUFFER_PATTERN; \ - } -#else /* _SECURECRT_FILL_BUFFER */ -#define _SECURECRT__FILL_BYTE(_Position) -#endif /* _SECURECRT_FILL_BUFFER */ - -#ifdef __cplusplus -#define _REDIRECT_TO_L_VERSION_FUNC_PROLOGUE extern "C" -#else /* __cplusplus */ -#define _REDIRECT_TO_L_VERSION_FUNC_PROLOGUE -#endif /* __cplusplus */ - -/* helper macros to redirect an mbs function to the corresponding _l version */ -#define _REDIRECT_TO_L_VERSION_1(_ReturnType, _FunctionName, _Type1) \ - _REDIRECT_TO_L_VERSION_FUNC_PROLOGUE \ - _ReturnType __cdecl _FunctionName(_Type1 _Arg1) \ - { \ - return _FunctionName##_l(_Arg1, NULL); \ - } - -#define _REDIRECT_TO_L_VERSION_2(_ReturnType, _FunctionName, _Type1, _Type2) \ - _REDIRECT_TO_L_VERSION_FUNC_PROLOGUE \ - _ReturnType __cdecl _FunctionName(_Type1 _Arg1, _Type2 _Arg2) \ - { \ - return _FunctionName##_l(_Arg1, _Arg2, NULL); \ - } - -#define _REDIRECT_TO_L_VERSION_3(_ReturnType, _FunctionName, _Type1, _Type2, _Type3) \ - _REDIRECT_TO_L_VERSION_FUNC_PROLOGUE \ - _ReturnType __cdecl _FunctionName(_Type1 _Arg1, _Type2 _Arg2, _Type3 _Arg3) \ - { \ - return _FunctionName##_l(_Arg1, _Arg2, _Arg3, NULL); \ - } - -#define _REDIRECT_TO_L_VERSION_4(_ReturnType, _FunctionName, _Type1, _Type2, _Type3, _Type4) \ - _REDIRECT_TO_L_VERSION_FUNC_PROLOGUE \ - _ReturnType __cdecl _FunctionName(_Type1 _Arg1, _Type2 _Arg2, _Type3 _Arg3, _Type4 _Arg4) \ - { \ - return _FunctionName##_l(_Arg1, _Arg2, _Arg3, _Arg4, NULL); \ - } - -#define _REDIRECT_TO_L_VERSION_5(_ReturnType, _FunctionName, _Type1, _Type2, _Type3, _Type4, _Type5) \ - _REDIRECT_TO_L_VERSION_FUNC_PROLOGUE \ - _ReturnType __cdecl _FunctionName(_Type1 _Arg1, _Type2 _Arg2, _Type3 _Arg3, _Type4 _Arg4, _Type5 _Arg5) \ - { \ - return _FunctionName##_l(_Arg1, _Arg2, _Arg3, _Arg4, _Arg5, NULL); \ - } - -#define _REDIRECT_TO_L_VERSION_6(_ReturnType, _FunctionName, _Type1, _Type2, _Type3, _Type4, _Type5, _Type6) \ - _REDIRECT_TO_L_VERSION_FUNC_PROLOGUE \ - _ReturnType __cdecl _FunctionName(_Type1 _Arg1, _Type2 _Arg2, _Type3 _Arg3, _Type4 _Arg4, _Type5 _Arg5, _Type6 _Arg6) \ - { \ - return _FunctionName##_l(_Arg1, _Arg2, _Arg3, _Arg4, _Arg5, _Arg6, NULL); \ - } - -/* internal helper functions for encoding and decoding pointers */ -void __cdecl _init_pointers(); -_CRTIMP void * __cdecl _encode_pointer(void *); -_CRTIMP void * __cdecl _encoded_null(); -_CRTIMP void * __cdecl _decode_pointer(void *); - -/* internal helper function for communicating with the debugger */ -BOOL DebuggerKnownHandle(); - -#define _ERRCHECK(e) \ - _INVOKE_WATSON_IF_ERROR(e) - -#define _ERRCHECK_EINVAL(e) \ - _INVOKE_WATSON_IF_ONEOF(e, EINVAL, EINVAL) - -#define _ERRCHECK_EINVAL_ERANGE(e) \ - _INVOKE_WATSON_IF_ONEOF(e, EINVAL, ERANGE) - -#define _ERRCHECK_SPRINTF(_PrintfCall) \ - { \ - errno_t _SaveErrno = errno; \ - errno = 0; \ - if ( ( _PrintfCall ) < 0) \ - { \ - _ERRCHECK_EINVAL_ERANGE(errno); \ - } \ - errno = _SaveErrno; \ - } - -/* internal helper function to access environment variable in read-only mode */ -const char16_t * __cdecl _wgetenv_helper_nolock(const char16_t *); -const char * __cdecl _getenv_helper_nolock(const char *); - -/* internal helper routines used to query a PE image header. */ -BOOL __cdecl _ValidateImageBase(PBYTE pImageBase); -PIMAGE_SECTION_HEADER __cdecl _FindPESection(PBYTE pImageBase, DWORD_PTR rva); -BOOL __cdecl _IsNonwritableInCurrentImage(PBYTE pTarget); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#ifdef _MSC_VER -#pragma pack(pop) -#endif /* _MSC_VER */ - -#endif /* _INC_INTERNAL */ diff --git a/src/coreclr/pal/src/safecrt/internal_securecrt.h b/src/coreclr/pal/src/safecrt/internal_securecrt.h index f5117457c5d8..1c323efe0084 100644 --- a/src/coreclr/pal/src/safecrt/internal_securecrt.h +++ b/src/coreclr/pal/src/safecrt/internal_securecrt.h @@ -71,8 +71,6 @@ #define _TRUNCATE ((size_t)-1) #endif /* !defined (_TRUNCATE) */ -/* #include */ - #define _VALIDATE_RETURN_VOID( expr, errorcode ) \ { \ int _Expr_val=!!(expr); \ diff --git a/src/coreclr/pal/src/sharedmemory/sharedmemory.cpp b/src/coreclr/pal/src/sharedmemory/sharedmemory.cpp index a6074852b08a..5d12b850f9b9 100644 --- a/src/coreclr/pal/src/sharedmemory/sharedmemory.cpp +++ b/src/coreclr/pal/src/sharedmemory/sharedmemory.cpp @@ -25,8 +25,6 @@ SET_DEFAULT_DEBUG_CHANNEL(SHMEM); // some headers have code with asserts, so do using namespace CorUnix; -#include "pal/sharedmemory.inl" - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // AutoFreeBuffer @@ -127,11 +125,14 @@ void SharedMemorySystemCallErrors::Append(LPCSTR format, ...) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // SharedMemoryHelpers -const mode_t SharedMemoryHelpers::PermissionsMask_CurrentUser_ReadWriteExecute = S_IRUSR | S_IWUSR | S_IXUSR; +const mode_t SharedMemoryHelpers::PermissionsMask_OwnerUser_ReadWrite = S_IRUSR | S_IWUSR; +const mode_t SharedMemoryHelpers::PermissionsMask_OwnerUser_ReadWriteExecute = S_IRUSR | S_IWUSR | S_IXUSR; +const mode_t SharedMemoryHelpers::PermissionsMask_NonOwnerUsers_Write = S_IWGRP | S_IWOTH; const mode_t SharedMemoryHelpers::PermissionsMask_AllUsers_ReadWrite = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; const mode_t SharedMemoryHelpers::PermissionsMask_AllUsers_ReadWriteExecute = PermissionsMask_AllUsers_ReadWrite | (S_IXUSR | S_IXGRP | S_IXOTH); +const mode_t SharedMemoryHelpers::PermissionsMask_Sticky = S_ISVTX; const UINT32 SharedMemoryHelpers::InvalidProcessId = static_cast(-1); const SIZE_T SharedMemoryHelpers::InvalidThreadId = static_cast(-1); const UINT64 SharedMemoryHelpers::InvalidSharedThreadId = static_cast(-1); @@ -161,15 +162,20 @@ SIZE_T SharedMemoryHelpers::AlignUp(SIZE_T value, SIZE_T alignment) bool SharedMemoryHelpers::EnsureDirectoryExists( SharedMemorySystemCallErrors *errors, const char *path, + const SharedMemoryId *id, bool isGlobalLockAcquired, bool createIfNotExist, bool isSystemDirectory) { _ASSERTE(path != nullptr); + _ASSERTE(id != nullptr); _ASSERTE(!(isSystemDirectory && createIfNotExist)); // should not create or change permissions on system directories _ASSERTE(SharedMemoryManager::IsCreationDeletionProcessLockAcquired()); _ASSERTE(!isGlobalLockAcquired || SharedMemoryManager::IsCreationDeletionFileLockAcquired()); + mode_t permissionsMask = + id->IsUserScope() ? PermissionsMask_OwnerUser_ReadWriteExecute : PermissionsMask_AllUsers_ReadWriteExecute; + // Check if the path already exists struct stat statInfo; int statResult = stat(path, &statInfo); @@ -189,15 +195,16 @@ bool SharedMemoryHelpers::EnsureDirectoryExists( if (isGlobalLockAcquired) { - int operationResult = mkdir(path, PermissionsMask_AllUsers_ReadWriteExecute); + int operationResult = mkdir(path, permissionsMask); if (operationResult != 0) { if (errors != nullptr) { int errorCode = errno; errors->Append( - "mkdir(\"%s\", AllUsers_ReadWriteExecute) == %d; errno == %s;", + "mkdir(\"%s\", %s_ReadWriteExecute) == %d; errno == %s;", path, + id->IsUserScope() ? "OwnerUser" : "AllUsers", operationResult, GetFriendlyErrorCodeString(errorCode)); } @@ -205,15 +212,16 @@ bool SharedMemoryHelpers::EnsureDirectoryExists( throw SharedMemoryException(static_cast(SharedMemoryError::IO)); } - operationResult = ChangeMode(path, PermissionsMask_AllUsers_ReadWriteExecute); + operationResult = ChangeMode(path, permissionsMask); if (operationResult != 0) { if (errors != nullptr) { int errorCode = errno; errors->Append( - "chmod(\"%s\", AllUsers_ReadWriteExecute) == %d; errno == %s;", + "chmod(\"%s\", %s_ReadWriteExecute) == %d; errno == %s;", path, + id->IsUserScope() ? "OwnerUser" : "AllUsers", operationResult, GetFriendlyErrorCodeString(errorCode)); } @@ -226,7 +234,7 @@ bool SharedMemoryHelpers::EnsureDirectoryExists( } PathCharString tempPath; - BuildSharedFilesPath(tempPath, SHARED_MEMORY_UNIQUE_TEMP_NAME_TEMPLATE); + VerifyStringOperation(tempPath.Set(*gSharedFilesPath) && tempPath.Append(SHARED_MEMORY_UNIQUE_TEMP_NAME_TEMPLATE)); if (mkdtemp(tempPath.OpenStringBuffer()) == nullptr) { @@ -242,15 +250,16 @@ bool SharedMemoryHelpers::EnsureDirectoryExists( throw SharedMemoryException(static_cast(SharedMemoryError::IO)); } - int operationResult = ChangeMode(tempPath, PermissionsMask_AllUsers_ReadWriteExecute); + int operationResult = ChangeMode(tempPath, permissionsMask); if (operationResult != 0) { if (errors != nullptr) { int errorCode = errno; errors->Append( - "chmod(\"%s\", AllUsers_ReadWriteExecute) == %d; errno == %s;", + "chmod(\"%s\", %s_ReadWriteExecute) == %d; errno == %s;", (const char *)tempPath, + id->IsUserScope() ? "OwnerUser" : "AllUsers", operationResult, GetFriendlyErrorCodeString(errorCode)); } @@ -300,12 +309,21 @@ bool SharedMemoryHelpers::EnsureDirectoryExists( if (isSystemDirectory) { // For system directories (such as TEMP_DIRECTORY_PATH), require sufficient permissions only for the - // current user. For instance, "docker run --mount ..." to mount /tmp to some directory on the host mounts the + // owner user. For instance, "docker run --mount ..." to mount /tmp to some directory on the host mounts the // destination directory with the same permissions as the source directory, which may not include some permissions for // other users. In the docker container, other user permissions are typically not relevant and relaxing the permissions // requirement allows for that scenario to work without having to work around it by first giving sufficient permissions // for all users. - if ((statInfo.st_mode & PermissionsMask_CurrentUser_ReadWriteExecute) == PermissionsMask_CurrentUser_ReadWriteExecute) + // + // If the directory is being used for user-scoped shared memory data, also ensure that either it has the sticky bit or + // it's owned by the current user and without write access for other users. + permissionsMask = PermissionsMask_OwnerUser_ReadWriteExecute; + if ((statInfo.st_mode & permissionsMask) == permissionsMask && + ( + !id->IsUserScope() || + statInfo.st_mode & PermissionsMask_Sticky || + (statInfo.st_uid == id->GetUserScopeUid() && !(statInfo.st_mode & PermissionsMask_NonOwnerUsers_Write)) + )) { return true; } @@ -313,38 +331,71 @@ bool SharedMemoryHelpers::EnsureDirectoryExists( if (errors != nullptr) { errors->Append( - "stat(\"%s\", &info) == 0; info.st_mode == 0x%x; (info.st_mode & CurrentUser_ReadWriteExecute) != CurrentUser_ReadWriteExecute;", + "stat(\"%s\", &info) == 0; info.st_mode == 0x%x; info.st_uid == %u; info.st_mode || info.st_uid;", path, - (int)statInfo.st_mode); + (int)statInfo.st_mode, + (int)statInfo.st_uid); + } + + throw SharedMemoryException(static_cast(SharedMemoryError::IO)); + } + + // For non-system directories (such as gSharedFilesPath/SHARED_MEMORY_USER_UNSCOPED_RUNTIME_TEMP_DIRECTORY_NAME), + // require the sufficient permissions and try to update them if requested to create the directory, so that + // shared memory files may be shared according to its scope. + + // For user-scoped directories, verify the owner UID + if (id->IsUserScope() && statInfo.st_uid != id->GetUserScopeUid()) + { + if (errors != nullptr) + { + errors->Append( + "stat(\"%s\", &info) == 0; info.st_uid == %u; info.st_uid != %u;", + path, + (int)statInfo.st_uid, + (int)id->GetUserScopeUid()); } throw SharedMemoryException(static_cast(SharedMemoryError::IO)); } - // For non-system directories (such as gSharedFilesPath/SHARED_MEMORY_RUNTIME_TEMP_DIRECTORY_NAME), - // require sufficient permissions for all users and try to update them if requested to create the directory, so that - // shared memory files may be shared by all processes on the system. - if ((statInfo.st_mode & PermissionsMask_AllUsers_ReadWriteExecute) == PermissionsMask_AllUsers_ReadWriteExecute) + // Verify the permissions, or try to change them if possible + if ((statInfo.st_mode & PermissionsMask_AllUsers_ReadWriteExecute) == permissionsMask || + (createIfNotExist && ChangeMode(path, permissionsMask) == 0)) { return true; } - if (!createIfNotExist || ChangeMode(path, PermissionsMask_AllUsers_ReadWriteExecute) != 0) + + // We were not able to verify or set the necessary permissions. For user-scoped directories, this is treated as a failure + // since other users aren't sufficiently restricted in permissions. + if (id->IsUserScope()) { - // We were not asked to create the path or we weren't able to set the new permissions. - // As a last resort, check that at least the current user has full access. - if ((statInfo.st_mode & PermissionsMask_CurrentUser_ReadWriteExecute) != PermissionsMask_CurrentUser_ReadWriteExecute) + if (errors != nullptr) { - if (errors != nullptr) - { - errors->Append( - "stat(\"%s\", &info) == 0; info.st_mode == 0x%x; (info.st_mode & CurrentUser_ReadWriteExecute) != CurrentUser_ReadWriteExecute;", - path, - (int)statInfo.st_mode); - } + errors->Append( + "stat(\"%s\", &info) == 0; info.st_mode == 0x%x; (info.st_mode & AllUsers_ReadWriteExecute) != OwnerUser_ReadWriteExecute;", + path, + (int)statInfo.st_mode); + } - throw SharedMemoryException(static_cast(SharedMemoryError::IO)); + throw SharedMemoryException(static_cast(SharedMemoryError::IO)); + } + + // For user-unscoped directories, as a last resort, check that at least the owner user has full access. + permissionsMask = PermissionsMask_OwnerUser_ReadWriteExecute; + if ((statInfo.st_mode & permissionsMask) != permissionsMask) + { + if (errors != nullptr) + { + errors->Append( + "stat(\"%s\", &info) == 0; info.st_mode == 0x%x; (info.st_mode & OwnerUser_ReadWriteExecute) != OwnerUser_ReadWriteExecute;", + path, + (int)statInfo.st_mode); } + + throw SharedMemoryException(static_cast(SharedMemoryError::IO)); } + return true; } @@ -412,6 +463,7 @@ int SharedMemoryHelpers::OpenDirectory(SharedMemorySystemCallErrors *errors, LPC int SharedMemoryHelpers::CreateOrOpenFile( SharedMemorySystemCallErrors *errors, LPCSTR path, + const SharedMemoryId *id, bool createIfNotExist, bool *createdRef) { @@ -425,12 +477,64 @@ int SharedMemoryHelpers::CreateOrOpenFile( int fileDescriptor = Open(errors, path, openFlags); if (fileDescriptor != -1) { + // For user-scoped files, verify the owner UID and permissions + if (id->IsUserScope()) + { + struct stat statInfo; + int statResult = fstat(fileDescriptor, &statInfo); + if (statResult != 0) + { + if (errors != nullptr) + { + int errorCode = errno; + errors->Append( + "fstat(\"%s\", ...) == %d; errno == %s;", + path, + statResult, + GetFriendlyErrorCodeString(errorCode)); + } + + CloseFile(fileDescriptor); + throw SharedMemoryException((DWORD)SharedMemoryError::IO); + } + + if (statInfo.st_uid != id->GetUserScopeUid()) + { + if (errors != nullptr) + { + errors->Append( + "fstat(\"%s\", &info) == 0; info.st_uid == %u; info.st_uid != %u;", + path, + (int)statInfo.st_uid, + (int)id->GetUserScopeUid()); + } + + CloseFile(fileDescriptor); + throw SharedMemoryException((DWORD)SharedMemoryError::IO); + } + + if ((statInfo.st_mode & PermissionsMask_AllUsers_ReadWriteExecute) != PermissionsMask_OwnerUser_ReadWrite) + { + if (errors != nullptr) + { + errors->Append( + "fstat(\"%s\", &info) == 0; info.st_mode == 0x%x; (info.st_mode & AllUsers_ReadWriteExecute) != OwnerUser_ReadWrite;", + path, + (int)statInfo.st_mode); + } + + CloseFile(fileDescriptor); + throw SharedMemoryException((DWORD)SharedMemoryError::IO); + } + } + if (createdRef != nullptr) { *createdRef = false; } return fileDescriptor; } + _ASSERTE(errno == ENOENT); if (!createIfNotExist) { @@ -443,20 +547,22 @@ int SharedMemoryHelpers::CreateOrOpenFile( // File does not exist, create the file openFlags |= O_CREAT | O_EXCL; - fileDescriptor = Open(errors, path, openFlags, PermissionsMask_AllUsers_ReadWrite); + mode_t permissionsMask = id->IsUserScope() ? PermissionsMask_OwnerUser_ReadWrite : PermissionsMask_AllUsers_ReadWrite; + fileDescriptor = Open(errors, path, openFlags, permissionsMask); _ASSERTE(fileDescriptor != -1); // The permissions mask passed to open() is filtered by the process' permissions umask, so open() may not set all of // the requested permissions. Use chmod() to set the proper permissions. - int operationResult = ChangeMode(path, PermissionsMask_AllUsers_ReadWrite); + int operationResult = ChangeMode(path, permissionsMask); if (operationResult != 0) { if (errors != nullptr) { int errorCode = errno; errors->Append( - "chmod(\"%s\", AllUsers_ReadWrite) == %d; errno == %s;", + "chmod(\"%s\", %s_ReadWrite) == %d; errno == %s;", path, + id->IsUserScope() ? "OwnerUser" : "AllUsers", operationResult, GetFriendlyErrorCodeString(errorCode)); } @@ -663,14 +769,6 @@ void SharedMemoryHelpers::ReleaseFileLock(int fileDescriptor) } while (flockResult != 0 && errno == EINTR); } -void SharedMemoryHelpers::BuildSharedFilesPath(PathCharString& destination, const char *suffix, int suffixCharCount) -{ - _ASSERTE((int)strlen(suffix) == suffixCharCount); - - VerifyStringOperation(destination.Set(*gSharedFilesPath)); - VerifyStringOperation(destination.Append(suffix, suffixCharCount)); -} - bool SharedMemoryHelpers::AppendUInt32String( PathCharString& destination, UINT32 value) @@ -694,20 +792,12 @@ void SharedMemoryHelpers::VerifyStringOperation(bool success) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // SharedMemoryId -SharedMemoryId::SharedMemoryId() : m_name(nullptr) -{ -} - -SharedMemoryId::SharedMemoryId(LPCSTR name, SIZE_T nameCharCount, bool isSessionScope) - : m_name(name), m_nameCharCount(nameCharCount), m_isSessionScope(isSessionScope) +SharedMemoryId::SharedMemoryId() + : m_name(nullptr), m_nameCharCount(0), m_isSessionScope(false), m_isUserScope(false), m_userScopeUid((uid_t)0) { - _ASSERTE(name != nullptr); - _ASSERTE(nameCharCount != 0); - _ASSERTE(nameCharCount <= SHARED_MEMORY_MAX_FILE_NAME_CHAR_COUNT); - _ASSERTE(strlen(name) == nameCharCount); } -SharedMemoryId::SharedMemoryId(LPCSTR name) +SharedMemoryId::SharedMemoryId(LPCSTR name, bool isUserScope) { _ASSERTE(name != nullptr); @@ -746,6 +836,16 @@ SharedMemoryId::SharedMemoryId(LPCSTR name) throw SharedMemoryException(static_cast(SharedMemoryError::NameInvalid)); } } + + m_isUserScope = isUserScope; + m_userScopeUid = isUserScope ? geteuid() : (uid_t)0; + + // The uid_t is converted to UINT32 to create a directory name, verify that it's valid + static_assert_no_msg(sizeof(uid_t) <= sizeof(UINT32)); + if ((uid_t)(UINT32)m_userScopeUid != m_userScopeUid) + { + throw SharedMemoryException(static_cast(SharedMemoryError::IO)); + } } LPCSTR SharedMemoryId::GetName() const @@ -760,20 +860,56 @@ SIZE_T SharedMemoryId::GetNameCharCount() const return m_nameCharCount; } +void SharedMemoryId::ReplaceNamePtr(LPCSTR name) +{ + _ASSERTE(name != nullptr); + _ASSERTE(m_nameCharCount != 0); + _ASSERTE(strlen(name) == m_nameCharCount); + + m_name = name; +} + bool SharedMemoryId::IsSessionScope() const { _ASSERTE(m_name != nullptr); return m_isSessionScope; } -bool SharedMemoryId::Equals(SharedMemoryId *other) const +bool SharedMemoryId::IsUserScope() const +{ + _ASSERTE(m_name != nullptr); + return m_isUserScope; +} + +uid_t SharedMemoryId::GetUserScopeUid() const +{ + _ASSERTE(m_name != nullptr); + _ASSERTE(m_isUserScope); + return m_userScopeUid; +} + +bool SharedMemoryId::Equals(const SharedMemoryId *other) const { return GetNameCharCount() == other->GetNameCharCount() && IsSessionScope() == other->IsSessionScope() && + IsUserScope() == other->IsUserScope() && + (!IsUserScope() || GetUserScopeUid() == other->GetUserScopeUid()) && strcmp(GetName(), other->GetName()) == 0; } +bool SharedMemoryId::AppendRuntimeTempDirectoryName(PathCharString& path) const +{ + if (IsUserScope()) + { + return + path.Append(SHARED_MEMORY_USER_SCOPED_RUNTIME_TEMP_DIRECTORY_NAME_PREFIX) && + SharedMemoryHelpers::AppendUInt32String(path, (UINT32)GetUserScopeUid()); + } + + return path.Append(SHARED_MEMORY_USER_UNSCOPED_RUNTIME_TEMP_DIRECTORY_NAME); +} + bool SharedMemoryId::AppendSessionDirectoryName(PathCharString& path) const { if (IsSessionScope()) @@ -826,6 +962,7 @@ void *SharedMemorySharedDataHeader::GetData() SharedMemoryProcessDataHeader *SharedMemoryProcessDataHeader::CreateOrOpen( SharedMemorySystemCallErrors *errors, LPCSTR name, + bool isUserScope, SharedMemorySharedDataHeader requiredSharedDataHeader, SIZE_T sharedDataByteCount, bool createIfNotExist, @@ -843,11 +980,11 @@ SharedMemoryProcessDataHeader *SharedMemoryProcessDataHeader::CreateOrOpen( } PathCharString filePath; - SharedMemoryId id(name); + SharedMemoryId id(name, isUserScope); struct AutoCleanup { - bool m_acquiredCreationDeletionFileLock; + const SharedMemoryId *m_acquiredCreationDeletionFileLockForId; PathCharString *m_filePath; SIZE_T m_sessionDirectoryPathCharCount; bool m_createdFile; @@ -858,7 +995,7 @@ SharedMemoryProcessDataHeader *SharedMemoryProcessDataHeader::CreateOrOpen( bool m_cancel; AutoCleanup() - : m_acquiredCreationDeletionFileLock(false), + : m_acquiredCreationDeletionFileLockForId(nullptr), m_filePath(nullptr), m_sessionDirectoryPathCharCount(0), m_createdFile(false), @@ -907,9 +1044,9 @@ SharedMemoryProcessDataHeader *SharedMemoryProcessDataHeader::CreateOrOpen( rmdir(*m_filePath); } - if (m_acquiredCreationDeletionFileLock) + if (m_acquiredCreationDeletionFileLockForId != nullptr) { - SharedMemoryManager::ReleaseCreationDeletionFileLock(); + SharedMemoryManager::ReleaseCreationDeletionFileLock(m_acquiredCreationDeletionFileLockForId); } } } autoCleanup; @@ -924,14 +1061,16 @@ SharedMemoryProcessDataHeader *SharedMemoryProcessDataHeader::CreateOrOpen( return processDataHeader; } - SharedMemoryManager::AcquireCreationDeletionFileLock(errors); - autoCleanup.m_acquiredCreationDeletionFileLock = true; + SharedMemoryManager::AcquireCreationDeletionFileLock(errors, &id); + autoCleanup.m_acquiredCreationDeletionFileLockForId = &id; // Create the session directory - SharedMemoryHelpers::VerifyStringOperation(SharedMemoryManager::CopySharedMemoryBasePath(filePath)); - SharedMemoryHelpers::VerifyStringOperation(filePath.Append('/')); - SharedMemoryHelpers::VerifyStringOperation(id.AppendSessionDirectoryName(filePath)); - if (!SharedMemoryHelpers::EnsureDirectoryExists(errors, filePath, true /* isGlobalLockAcquired */, createIfNotExist)) + SharedMemoryHelpers::VerifyStringOperation( + filePath.Set(*gSharedFilesPath) && + id.AppendRuntimeTempDirectoryName(filePath) && + filePath.Append('/') && filePath.Append(SHARED_MEMORY_SHARED_MEMORY_DIRECTORY_NAME) && + filePath.Append('/') && id.AppendSessionDirectoryName(filePath)); + if (!SharedMemoryHelpers::EnsureDirectoryExists(errors, filePath, &id, true /* isGlobalLockAcquired */, createIfNotExist)) { _ASSERTE(!createIfNotExist); return nullptr; @@ -940,11 +1079,9 @@ SharedMemoryProcessDataHeader *SharedMemoryProcessDataHeader::CreateOrOpen( autoCleanup.m_sessionDirectoryPathCharCount = filePath.GetCount(); // Create or open the shared memory file - SharedMemoryHelpers::VerifyStringOperation(filePath.Append('/')); - SharedMemoryHelpers::VerifyStringOperation(filePath.Append(id.GetName(), id.GetNameCharCount())); - + SharedMemoryHelpers::VerifyStringOperation(filePath.Append('/') && filePath.Append(id.GetName(), id.GetNameCharCount())); bool createdFile; - int fileDescriptor = SharedMemoryHelpers::CreateOrOpenFile(errors, filePath, createIfNotExist, &createdFile); + int fileDescriptor = SharedMemoryHelpers::CreateOrOpenFile(errors, filePath, &id, createIfNotExist, &createdFile); if (fileDescriptor == -1) { _ASSERTE(!createIfNotExist); @@ -1039,8 +1176,8 @@ SharedMemoryProcessDataHeader *SharedMemoryProcessDataHeader::CreateOrOpen( // the shared data. The caller must release the file lock afterwards. if (!createdFile) { - autoCleanup.m_acquiredCreationDeletionFileLock = false; - SharedMemoryManager::ReleaseCreationDeletionFileLock(); + autoCleanup.m_acquiredCreationDeletionFileLockForId = nullptr; + SharedMemoryManager::ReleaseCreationDeletionFileLock(&id); } processDataHeader = SharedMemoryProcessDataHeader::New(&id, fileDescriptor, sharedDataHeader, sharedDataTotalByteCount); @@ -1108,12 +1245,13 @@ void SharedMemoryProcessDataHeader::PalObject_Close( } SharedMemoryProcessDataHeader::SharedMemoryProcessDataHeader( - SharedMemoryId *id, + const SharedMemoryId *id, int fileDescriptor, SharedMemorySharedDataHeader *sharedDataHeader, SIZE_T sharedDataTotalByteCount) : m_refCount(1), + m_id(*id), m_data(nullptr), m_fileDescriptor(fileDescriptor), m_sharedDataHeader(sharedDataHeader), @@ -1131,13 +1269,13 @@ SharedMemoryProcessDataHeader::SharedMemoryProcessDataHeader( char *nameCopy = reinterpret_cast(this + 1); SIZE_T nameByteCount = id->GetNameCharCount() + 1; memcpy_s(nameCopy, nameByteCount, id->GetName(), nameByteCount); - m_id = SharedMemoryId(nameCopy, id->GetNameCharCount(), id->IsSessionScope()); + m_id.ReplaceNamePtr(nameCopy); SharedMemoryManager::AddProcessDataHeader(this); } SharedMemoryProcessDataHeader *SharedMemoryProcessDataHeader::New( - SharedMemoryId *id, + const SharedMemoryId *id, int fileDescriptor, SharedMemorySharedDataHeader *sharedDataHeader, SIZE_T sharedDataTotalByteCount) @@ -1177,17 +1315,17 @@ void SharedMemoryProcessDataHeader::Close() struct AutoReleaseCreationDeletionFileLock { - bool m_acquired; + const SharedMemoryId *m_acquiredForId; - AutoReleaseCreationDeletionFileLock() : m_acquired(false) + AutoReleaseCreationDeletionFileLock() : m_acquiredForId(nullptr) { } ~AutoReleaseCreationDeletionFileLock() { - if (m_acquired) + if (m_acquiredForId != nullptr) { - SharedMemoryManager::ReleaseCreationDeletionFileLock(); + SharedMemoryManager::ReleaseCreationDeletionFileLock(m_acquiredForId); } } } autoReleaseCreationDeletionFileLock; @@ -1201,8 +1339,8 @@ void SharedMemoryProcessDataHeader::Close() bool releaseSharedData = false; try { - SharedMemoryManager::AcquireCreationDeletionFileLock(nullptr); - autoReleaseCreationDeletionFileLock.m_acquired = true; + SharedMemoryManager::AcquireCreationDeletionFileLock(nullptr, GetId()); + autoReleaseCreationDeletionFileLock.m_acquiredForId = GetId(); SharedMemoryHelpers::ReleaseFileLock(m_fileDescriptor); if (SharedMemoryHelpers::TryAcquireFileLock(nullptr, m_fileDescriptor, LOCK_EX | LOCK_NB)) @@ -1246,11 +1384,12 @@ void SharedMemoryProcessDataHeader::Close() { // Delete the shared memory file, and the session directory if it's not empty PathCharString path; - SharedMemoryHelpers::VerifyStringOperation(SharedMemoryManager::CopySharedMemoryBasePath(path)); - SharedMemoryHelpers::VerifyStringOperation(path.Append('/')); - SharedMemoryHelpers::VerifyStringOperation(m_id.AppendSessionDirectoryName(path)); - SharedMemoryHelpers::VerifyStringOperation(path.Append('/')); - + SharedMemoryHelpers::VerifyStringOperation( + path.Set(*gSharedFilesPath) && + m_id.AppendRuntimeTempDirectoryName(path) && + path.Append('/') && path.Append(SHARED_MEMORY_SHARED_MEMORY_DIRECTORY_NAME) && + path.Append('/') && m_id.AppendSessionDirectoryName(path) && + path.Append('/')); SIZE_T sessionDirectoryPathCharCount = path.GetCount(); SharedMemoryHelpers::VerifyStringOperation(path.Append(m_id.GetName(), m_id.GetNameCharCount())); unlink(path); @@ -1263,7 +1402,7 @@ void SharedMemoryProcessDataHeader::Close() } } -SharedMemoryId *SharedMemoryProcessDataHeader::GetId() +const SharedMemoryId *SharedMemoryProcessDataHeader::GetId() const { return &m_id; } @@ -1342,40 +1481,23 @@ void SharedMemoryProcessDataHeader::DecRefCount() //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // SharedMemoryManager -CRITICAL_SECTION SharedMemoryManager::s_creationDeletionProcessLock; +minipal_mutex SharedMemoryManager::s_creationDeletionProcessLock; int SharedMemoryManager::s_creationDeletionLockFileDescriptor = -1; +SharedMemoryManager::UserScopeUidAndFileDescriptor *SharedMemoryManager::s_userScopeUidToCreationDeletionLockFDs; +int SharedMemoryManager::s_userScopeUidToCreationDeletionLockFDsCount; +int SharedMemoryManager::s_userScopeUidToCreationDeletionLockFDsCapacity; + SharedMemoryProcessDataHeader *SharedMemoryManager::s_processDataHeaderListHead = nullptr; -PathCharString* SharedMemoryManager::s_runtimeTempDirectoryPath; -PathCharString* SharedMemoryManager::s_sharedMemoryDirectoryPath; #ifdef _DEBUG SIZE_T SharedMemoryManager::s_creationDeletionProcessLockOwnerThreadId = SharedMemoryHelpers::InvalidThreadId; SIZE_T SharedMemoryManager::s_creationDeletionFileLockOwnerThreadId = SharedMemoryHelpers::InvalidThreadId; #endif // _DEBUG -bool SharedMemoryManager::StaticInitialize() +void SharedMemoryManager::StaticInitialize() { - InitializeCriticalSection(&s_creationDeletionProcessLock); - - s_runtimeTempDirectoryPath = new(std::nothrow) PathCharString(); - s_sharedMemoryDirectoryPath = new(std::nothrow) PathCharString(); - - if (s_runtimeTempDirectoryPath && s_sharedMemoryDirectoryPath) - { - try - { - SharedMemoryHelpers::BuildSharedFilesPath(*s_runtimeTempDirectoryPath, SHARED_MEMORY_RUNTIME_TEMP_DIRECTORY_NAME); - SharedMemoryHelpers::BuildSharedFilesPath(*s_sharedMemoryDirectoryPath, SHARED_MEMORY_SHARED_MEMORY_DIRECTORY_NAME); - - return true; - } - catch (SharedMemoryException) - { - // Ignore and let function fail - } - } - return false; + minipal_mutex_init(&s_creationDeletionProcessLock); } void SharedMemoryManager::StaticClose() @@ -1400,7 +1522,7 @@ void SharedMemoryManager::AcquireCreationDeletionProcessLock() _ASSERTE(!IsCreationDeletionProcessLockAcquired()); _ASSERTE(!IsCreationDeletionFileLockAcquired()); - EnterCriticalSection(&s_creationDeletionProcessLock); + minipal_mutex_enter(&s_creationDeletionProcessLock); #ifdef _DEBUG s_creationDeletionProcessLockOwnerThreadId = THREADSilentGetCurrentThreadId(); #endif // _DEBUG @@ -1414,19 +1536,26 @@ void SharedMemoryManager::ReleaseCreationDeletionProcessLock() #ifdef _DEBUG s_creationDeletionProcessLockOwnerThreadId = SharedMemoryHelpers::InvalidThreadId; #endif // _DEBUG - LeaveCriticalSection(&s_creationDeletionProcessLock); + minipal_mutex_leave(&s_creationDeletionProcessLock); } -void SharedMemoryManager::AcquireCreationDeletionFileLock(SharedMemorySystemCallErrors *errors) +void SharedMemoryManager::AcquireCreationDeletionFileLock(SharedMemorySystemCallErrors *errors, const SharedMemoryId *id) { + _ASSERTE(id != nullptr); _ASSERTE(IsCreationDeletionProcessLockAcquired()); _ASSERTE(!IsCreationDeletionFileLockAcquired()); - if (s_creationDeletionLockFileDescriptor == -1) + int creationDeletionLockFD = + id->IsUserScope() ? FindUserScopeCreationDeletionLockFD(id->GetUserScopeUid()) : s_creationDeletionLockFileDescriptor; + if (creationDeletionLockFD == -1) { + // Create the shared files directory + PathCharString dirPath; + SharedMemoryHelpers::VerifyStringOperation(dirPath.Set(*gSharedFilesPath)); if (!SharedMemoryHelpers::EnsureDirectoryExists( errors, - *gSharedFilesPath, + dirPath, + id, false /* isGlobalLockAcquired */, false /* createIfNotExist */, true /* isSystemDirectory */)) @@ -1440,49 +1569,117 @@ void SharedMemoryManager::AcquireCreationDeletionFileLock(SharedMemorySystemCall throw SharedMemoryException(static_cast(SharedMemoryError::IO)); } - SharedMemoryHelpers::EnsureDirectoryExists( - errors, - *s_runtimeTempDirectoryPath, - false /* isGlobalLockAcquired */); + // Create the runtime temp directory + SharedMemoryHelpers::VerifyStringOperation(id->AppendRuntimeTempDirectoryName(dirPath)); + SharedMemoryHelpers::EnsureDirectoryExists(errors, dirPath, id, false /* isGlobalLockAcquired */); - SharedMemoryHelpers::EnsureDirectoryExists( - errors, - *s_sharedMemoryDirectoryPath, - false /* isGlobalLockAcquired */); + // Create the shared memory directory + SharedMemoryHelpers::VerifyStringOperation( + dirPath.Append('/') && dirPath.Append(SHARED_MEMORY_SHARED_MEMORY_DIRECTORY_NAME)); + SharedMemoryHelpers::EnsureDirectoryExists(errors, dirPath, id, false /* isGlobalLockAcquired */); - s_creationDeletionLockFileDescriptor = SharedMemoryHelpers::OpenDirectory(errors, *s_sharedMemoryDirectoryPath); - if (s_creationDeletionLockFileDescriptor == -1) + // Open the shared memory directory + creationDeletionLockFD = SharedMemoryHelpers::OpenDirectory(errors, dirPath); + if (creationDeletionLockFD == -1) { if (errors != nullptr) { int errorCode = errno; errors->Append( "open(\"%s\", O_RDONLY | O_CLOEXEC, 0) == -1; errno == %s;", - (const char *)*s_sharedMemoryDirectoryPath, + (const char *)dirPath, GetFriendlyErrorCodeString(errorCode)); } throw SharedMemoryException(static_cast(SharedMemoryError::IO)); } + + if (id->IsUserScope()) + { + AddUserScopeUidCreationDeletionLockFD(id->GetUserScopeUid(), creationDeletionLockFD); + } + else + { + s_creationDeletionLockFileDescriptor = creationDeletionLockFD; + } } - bool acquiredFileLock = SharedMemoryHelpers::TryAcquireFileLock(errors, s_creationDeletionLockFileDescriptor, LOCK_EX); + bool acquiredFileLock = SharedMemoryHelpers::TryAcquireFileLock(errors, creationDeletionLockFD, LOCK_EX); _ASSERTE(acquiredFileLock); #ifdef _DEBUG s_creationDeletionFileLockOwnerThreadId = THREADSilentGetCurrentThreadId(); #endif // _DEBUG } -void SharedMemoryManager::ReleaseCreationDeletionFileLock() +void SharedMemoryManager::ReleaseCreationDeletionFileLock(const SharedMemoryId *id) { + _ASSERTE(id != nullptr); _ASSERTE(IsCreationDeletionProcessLockAcquired()); _ASSERTE(IsCreationDeletionFileLockAcquired()); - _ASSERTE(s_creationDeletionLockFileDescriptor != -1); + + int creationDeletionLockFD = + id->IsUserScope() ? FindUserScopeCreationDeletionLockFD(id->GetUserScopeUid()) : s_creationDeletionLockFileDescriptor; + _ASSERTE(creationDeletionLockFD != -1); #ifdef _DEBUG s_creationDeletionFileLockOwnerThreadId = SharedMemoryHelpers::InvalidThreadId; #endif // _DEBUG - SharedMemoryHelpers::ReleaseFileLock(s_creationDeletionLockFileDescriptor); + SharedMemoryHelpers::ReleaseFileLock(creationDeletionLockFD); +} + +void SharedMemoryManager::AddUserScopeUidCreationDeletionLockFD(uid_t userScopeUid, int creationDeletionLockFD) +{ + _ASSERTE(IsCreationDeletionProcessLockAcquired()); + _ASSERTE(creationDeletionLockFD != -1); + _ASSERTE(FindUserScopeCreationDeletionLockFD(userScopeUid) == -1); + + int count = s_userScopeUidToCreationDeletionLockFDsCount; + int capacity = s_userScopeUidToCreationDeletionLockFDsCapacity; + if (count >= capacity) + { + int newCapacity = capacity == 0 ? 1 : capacity * 2; + if (newCapacity <= capacity || + newCapacity * sizeof(UserScopeUidAndFileDescriptor) / sizeof(UserScopeUidAndFileDescriptor) != (SIZE_T)newCapacity) + { + throw SharedMemoryException(static_cast(SharedMemoryError::OutOfMemory)); + } + + UserScopeUidAndFileDescriptor *newArray = new(std::nothrow) UserScopeUidAndFileDescriptor[newCapacity]; + if (newArray == nullptr) + { + throw SharedMemoryException(static_cast(SharedMemoryError::OutOfMemory)); + } + + if (count != 0) + { + UserScopeUidAndFileDescriptor *oldArray = s_userScopeUidToCreationDeletionLockFDs; + CopyMemory(newArray, oldArray, count * sizeof(newArray[0])); + delete[] oldArray; + } + + s_userScopeUidToCreationDeletionLockFDs = newArray; + s_userScopeUidToCreationDeletionLockFDsCapacity = newCapacity; + } + + s_userScopeUidToCreationDeletionLockFDs[count] = UserScopeUidAndFileDescriptor(userScopeUid, creationDeletionLockFD); + s_userScopeUidToCreationDeletionLockFDsCount = count + 1; +} + +int SharedMemoryManager::FindUserScopeCreationDeletionLockFD(uid_t userScopeUid) +{ + _ASSERTE(IsCreationDeletionProcessLockAcquired()); + + UserScopeUidAndFileDescriptor *arr = s_userScopeUidToCreationDeletionLockFDs; + for (int i = 0; i < s_userScopeUidToCreationDeletionLockFDsCount; i++) + { + _ASSERTE(arr[i].fileDescriptor != -1); + if (arr[i].userScopeUid == userScopeUid) + { + return arr[i].fileDescriptor; + } + } + + return -1; } #ifdef _DEBUG @@ -1535,7 +1732,7 @@ void SharedMemoryManager::RemoveProcessDataHeader(SharedMemoryProcessDataHeader _ASSERTE(false); } -SharedMemoryProcessDataHeader *SharedMemoryManager::FindProcessDataHeader(SharedMemoryId *id) +SharedMemoryProcessDataHeader *SharedMemoryManager::FindProcessDataHeader(const SharedMemoryId *id) { _ASSERTE(IsCreationDeletionProcessLockAcquired()); @@ -1551,8 +1748,3 @@ SharedMemoryProcessDataHeader *SharedMemoryManager::FindProcessDataHeader(Shared } return nullptr; } - -bool SharedMemoryManager::CopySharedMemoryBasePath(PathCharString& destination) -{ - return destination.Set(*s_sharedMemoryDirectoryPath) != FALSE; -} diff --git a/src/coreclr/pal/src/sync/cs.cpp b/src/coreclr/pal/src/sync/cs.cpp deleted file mode 100644 index 032ff1c189f3..000000000000 --- a/src/coreclr/pal/src/sync/cs.cpp +++ /dev/null @@ -1,1457 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/////////////////////////////////////////////////////////////////////////////// -// -// File: -// cs.cpp -// -// Purpose: -// Implementation of critical sections -// -/////////////////////////////////////////////////////////////////////////////// - -#include "pal/thread.hpp" -#include "pal/cs.hpp" -#include "pal/list.h" -#include "pal/dbgmsg.h" -#include "pal/init.h" -#include "pal/process.h" - -#include -#include - -using namespace CorUnix; - -// -// Uncomment the following line to turn CS behavior from -// unfair to fair lock -// -// #define PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - -// -// Uncomment the following line to enable simple mutex based CSs -// Note: when MUTEX_BASED_CSS is defined, PALCS_TRANSFER_OWNERSHIP_ON_RELEASE -// has no effect -// -// #define MUTEX_BASED_CSS - -// -// Important notes on critical sections layout/semantics on Unix -// -// 1) The PAL_CRITICAL_SECTION structure below must match the size of the -// CRITICAL_SECTION defined in pal.h. Besides the "windows part" -// of both the structures must be identical. -// 2) Both PAL_CRITICAL_SECTION and CRITICAL_SECTION currently do not match -// the size of the Windows' CRITICAL_SECTION. -// - From unmanaged code point of view, one should never make assumptions -// on the size and layout of the CRITICAL_SECTION structure, and anyway -// on Unix PAL's CRITICAL_SECTION extends the Windows one, so that some -// assumptions may still work. -// - From managed code point of view, one could try to interop directly -// to unmanaged critical sections APIs (though that would be quite -// meaningless). In order to do that, one would need to define a copy -// of the CRITICAL_SECTION structure in one's code, and that may lead -// to access random data beyond the structure limit, if that managed -// code is compiled on Unix. -// In case such scenario should be supported, the current implementation -// will have to be modified in a way to go back to the original Windows -// CRITICAL_SECTION layout. That would require to dynamically allocate -// the native data and use LockSemaphore as a pointer to it. The current -// solution intentionally avoids that since an effort has been made to -// make CSs objects completely independent from any other PAL subsystem, -// so that they can be used during initialization and shutdown. -// In case the "dynamically allocate native data" solution should be -// implemented, CSs would acquire a dependency on memory allocation and -// thread suspension subsystems, since the first contention on a specific -// CS would trigger the native data allocation. -// 3) The semantics of the LockCount field has not been kept compatible with -// the Windows implementation. -// Both on Windows and Unix the lower bit of LockCount indicates -// whether or not the CS is locked (for both fair and unfair lock -// solution), the second bit indicates whether or not currently there is a -// waiter that has been awakened and that is trying to acquire the CS -// (only unfair lock solution, unused in the fair one); starting from the -// third bit, LockCount represents the number of waiter threads currently -// waiting on the CS. -// Windows, anyway, implements this semantics in negative logic, so that -// an unlocked CS is represented by a LockCount == -1 (i.e. 0xFFFFFFFF, -// all the bits set), while on Unix an unlocked CS has LockCount == 0. -// Windows needs to use negative logic to support legacy code bad enough -// to directly access CS's fields making the assumption that -// LockCount == -1 means CS unlocked. Unix will not support that, and -// it uses positive logic. -// 4) The CRITICAL_SECTION_DEBUG_INFO layout on Unix is intentionally not -// compatible with the Windows layout. -// 5) For legacy code dependencies issues similar to those just described for -// the LockCount field, Windows CS code maintains a per-process list of -// debug info for all the CSs, both on debug and free/retail builds. On -// Unix such a list is maintained only on debug builds, and no debug -// info structure is allocated on free/retail builds -// - -SET_DEFAULT_DEBUG_CHANNEL(CRITSEC); - -#ifdef TRACE_CS_LOGIC -#define CS_TRACE TRACE -#else -#ifdef __GNUC__ -#define CS_TRACE(args...) -#else -#define CS_TRACE(...) -#endif -#endif // TRACE_CS_LOGIC - -// -// Note: PALCS_LOCK_WAITER_INC must be 2 * PALCS_LOCK_AWAKENED_WAITER -// -#define PALCS_LOCK_INIT 0 -#define PALCS_LOCK_BIT 1 -#define PALCS_LOCK_AWAKENED_WAITER 2 -#define PALCS_LOCK_WAITER_INC 4 - -#define PALCS_GETLBIT(val) ((int)(0!=(PALCS_LOCK_BIT&val))) -#define PALCS_GETAWBIT(val) ((int)(0!=(PALCS_LOCK_AWAKENED_WAITER&val))) -#define PALCS_GETWCOUNT(val) (val/PALCS_LOCK_WAITER_INC) - -enum PalCsInitState -{ - PalCsNotInitialized, // Critical section not initialized (InitializedCriticalSection - // has not yet been called, or DeleteCriticalsection has been - // called). - PalCsUserInitialized, // Critical section initialized from the user point of view, - // i.e. InitializedCriticalSection has been called. - PalCsFullyInitializing, // A thread found the CS locked, this is the first contention on - // this CS, and the thread is initializing the CS's native data. - PalCsFullyInitialized // Internal CS's native data has been fully initialized. -}; - -enum PalCsWaiterReturnState -{ - PalCsReturnWaiterAwakened, - PalCsWaiterDidntWait -}; - -struct _PAL_CRITICAL_SECTION; // fwd declaration - -typedef struct _CRITICAL_SECTION_DEBUG_INFO -{ - LIST_ENTRY Link; - struct _PAL_CRITICAL_SECTION * pOwnerCS; - Volatile lAcquireCount; - Volatile lEnterCount; - Volatile lContentionCount; -} CRITICAL_SECTION_DEBUG_INFO, *PCRITICAL_SECTION_DEBUG_INFO; - -typedef struct _PAL_CRITICAL_SECTION_NATIVE_DATA -{ - pthread_mutex_t mutex; - pthread_cond_t condition; - int iPredicate; -} PAL_CRITICAL_SECTION_NATIVE_DATA, *PPAL_CRITICAL_SECTION_NATIVE_DATA; - -typedef struct _PAL_CRITICAL_SECTION { - // Windows part - PCRITICAL_SECTION_DEBUG_INFO DebugInfo; - Volatile LockCount; - LONG RecursionCount; - SIZE_T OwningThread; - ULONG_PTR SpinCount; - // Private Unix part -#ifdef PAL_TRACK_CRITICAL_SECTIONS_DATA - BOOL fInternal; -#endif // PAL_TRACK_CRITICAL_SECTIONS_DATA - Volatile cisInitState; - PAL_CRITICAL_SECTION_NATIVE_DATA csndNativeData; -} PAL_CRITICAL_SECTION, *PPAL_CRITICAL_SECTION, *LPPAL_CRITICAL_SECTION; - -#ifdef _DEBUG -namespace CorUnix -{ - PAL_CRITICAL_SECTION g_csPALCSsListLock; - LIST_ENTRY g_PALCSList = { &g_PALCSList, &g_PALCSList}; -} -#endif // _DEBUG - -#define ObtainCurrentThreadId(thread) ObtainCurrentThreadIdImpl(thread, __func__) -static SIZE_T ObtainCurrentThreadIdImpl(CPalThread *pCurrentThread, const char *callingFuncName) -{ - SIZE_T threadId; - if(pCurrentThread) - { - threadId = pCurrentThread->GetThreadId(); - _ASSERTE(threadId == THREADSilentGetCurrentThreadId()); - } - else - { - threadId = THREADSilentGetCurrentThreadId(); - CS_TRACE("Early %s, no pthread data, getting TID internally\n", callingFuncName); - } - _ASSERTE(0 != threadId); - - return threadId; -} - - -/*++ -Function: - InitializeCriticalSection - -See MSDN doc. ---*/ -void InitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - PERF_ENTRY(InitializeCriticalSection); - ENTRY("InitializeCriticalSection(lpCriticalSection=%p)\n", - lpCriticalSection); - - InternalInitializeCriticalSectionAndSpinCount(lpCriticalSection, - 0, false); - - LOGEXIT("InitializeCriticalSection returns void\n"); - PERF_EXIT(InitializeCriticalSection); -} - -/*++ -Function: - InitializeCriticalSectionAndSpinCount - -See MSDN doc. ---*/ -BOOL InitializeCriticalSectionAndSpinCount(LPCRITICAL_SECTION lpCriticalSection, - DWORD dwSpinCount) -{ - BOOL bRet = TRUE; - PERF_ENTRY(InitializeCriticalSectionAndSpinCount); - ENTRY("InitializeCriticalSectionAndSpinCount(lpCriticalSection=%p, " - "dwSpinCount=%u)\n", lpCriticalSection, dwSpinCount); - - InternalInitializeCriticalSectionAndSpinCount(lpCriticalSection, - dwSpinCount, false); - - LOGEXIT("InitializeCriticalSectionAndSpinCount returns BOOL %d\n", - bRet); - PERF_EXIT(InitializeCriticalSectionAndSpinCount); - return bRet; -} - -/*++ -Function: - DeleteCriticalSection - -See MSDN doc. ---*/ -void DeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - PERF_ENTRY(DeleteCriticalSection); - ENTRY("DeleteCriticalSection(lpCriticalSection=%p)\n", lpCriticalSection); - - InternalDeleteCriticalSection(lpCriticalSection); - - LOGEXIT("DeleteCriticalSection returns void\n"); - PERF_EXIT(DeleteCriticalSection); -} - -/*++ -Function: - EnterCriticalSection - -See MSDN doc. ---*/ -void EnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - PERF_ENTRY(EnterCriticalSection); - ENTRY("EnterCriticalSection(lpCriticalSection=%p)\n", lpCriticalSection); - - CPalThread * pThread = InternalGetCurrentThread(); - - InternalEnterCriticalSection(pThread, lpCriticalSection); - - LOGEXIT("EnterCriticalSection returns void\n"); - PERF_EXIT(EnterCriticalSection); -} - -/*++ -Function: - LeaveCriticalSection - -See MSDN doc. ---*/ -VOID LeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - PERF_ENTRY(LeaveCriticalSection); - ENTRY("LeaveCriticalSection(lpCriticalSection=%p)\n", lpCriticalSection); - - CPalThread * pThread = InternalGetCurrentThread(); - - InternalLeaveCriticalSection(pThread, lpCriticalSection); - - LOGEXIT("LeaveCriticalSection returns void\n"); - PERF_EXIT(LeaveCriticalSection); -} - -/*++ -Function: - InternalInitializeCriticalSection - -Initializes a critical section. It assumes the CS is an internal one, -i.e. thread entering it will be marked unsafe for suspension ---*/ -VOID InternalInitializeCriticalSection(CRITICAL_SECTION *pcs) -{ - InternalInitializeCriticalSectionAndSpinCount(pcs, 0, true); -} - -/*++ -Function: - InternalDeleteCriticalSection - -Deletes a critical section ---*/ -VOID InternalDeleteCriticalSection( - PCRITICAL_SECTION pCriticalSection) -{ - PAL_CRITICAL_SECTION * pPalCriticalSection = - reinterpret_cast(pCriticalSection); - - _ASSERT_MSG(PalCsUserInitialized == pPalCriticalSection->cisInitState || - PalCsFullyInitialized == pPalCriticalSection->cisInitState, - "CS %p is not initialized", pPalCriticalSection); - -#ifdef _DEBUG - CPalThread * pThread = - (PALIsThreadDataInitialized() ? GetCurrentPalThread() : NULL); - - if (0 != pPalCriticalSection->LockCount) - { - SIZE_T tid; - tid = ObtainCurrentThreadId(pThread); - int iWaiterCount = (int)PALCS_GETWCOUNT(pPalCriticalSection->LockCount); - - if (0 != (PALCS_LOCK_BIT & pPalCriticalSection->LockCount)) - { - // CS is locked - if (tid != pPalCriticalSection->OwningThread) - { - // not owner - ASSERT("Thread tid=%u deleting a CS owned by thread tid=%u\n", - tid, pPalCriticalSection->OwningThread); - } - else - { - // owner - if (0 != iWaiterCount) - { - ERROR("Thread tid=%u is deleting a CS with %d threads waiting on it\n", - tid, iWaiterCount); - } - else - { - WARN("Thread tid=%u is deleting a critical section it still owns\n", - tid); - } - } - } - else - { - // CS is not locked - if (0 != iWaiterCount) - { - ERROR("Deleting a CS with %d threads waiting on it\n", - iWaiterCount); - } - else - { - ERROR("Thread tid=%u is deleting a critical section currently not " - "owned, but with one waiter awakened\n", tid); - } - } - } - - if (NULL != pPalCriticalSection->DebugInfo) - { - if (pPalCriticalSection != &CorUnix::g_csPALCSsListLock) - { - InternalEnterCriticalSection(pThread, - reinterpret_cast(&g_csPALCSsListLock)); - RemoveEntryList(&pPalCriticalSection->DebugInfo->Link); - InternalLeaveCriticalSection(pThread, - reinterpret_cast(&g_csPALCSsListLock)); - } - else - { - RemoveEntryList(&pPalCriticalSection->DebugInfo->Link); - } - -#ifdef PAL_TRACK_CRITICAL_SECTIONS_DATA - LONG lVal, lNewVal; - Volatile * plDest; - - // Update delete count - InterlockedIncrement(pPalCriticalSection->fInternal ? - &g_lPALCSInternalDeleteCount : &g_lPALCSDeleteCount); - - // Update acquire count - plDest = pPalCriticalSection->fInternal ? - &g_lPALCSInternalAcquireCount : &g_lPALCSAcquireCount; - do { - lVal = *plDest; - lNewVal = lVal + pPalCriticalSection->DebugInfo->lAcquireCount; - lNewVal = InterlockedCompareExchange(plDest, lNewVal, lVal); - } while (lVal != lNewVal); - - // Update enter count - plDest = pPalCriticalSection->fInternal ? - &g_lPALCSInternalEnterCount : &g_lPALCSEnterCount; - do { - lVal = *plDest; - lNewVal = lVal + pPalCriticalSection->DebugInfo->lEnterCount; - lNewVal = InterlockedCompareExchange(plDest, lNewVal, lVal); - } while (lVal != lNewVal); - - // Update contention count - plDest = pPalCriticalSection->fInternal ? - &g_lPALCSInternalContentionCount : &g_lPALCSContentionCount; - do { - lVal = *plDest; - lNewVal = lVal + pPalCriticalSection->DebugInfo->lContentionCount; - lNewVal = InterlockedCompareExchange(plDest, lNewVal, lVal); - } while (lVal != lNewVal); - -#endif // PAL_TRACK_CRITICAL_SECTIONS_DATA - - delete pPalCriticalSection->DebugInfo; - pPalCriticalSection->DebugInfo = NULL; - } -#endif // _DEBUG - - if (PalCsFullyInitialized == pPalCriticalSection->cisInitState) - { - int iRet; - - // destroy condition - iRet = pthread_cond_destroy(&pPalCriticalSection->csndNativeData.condition); - _ASSERT_MSG(0 == iRet, "Failed destroying condition in CS @ %p " - "[err=%d]\n", pPalCriticalSection, iRet); - - // destroy mutex - iRet = pthread_mutex_destroy(&pPalCriticalSection->csndNativeData.mutex); - _ASSERT_MSG(0 == iRet, "Failed destroying mutex in CS @ %p " - "[err=%d]\n", pPalCriticalSection, iRet); - } - - // Reset critical section state - pPalCriticalSection->cisInitState = PalCsNotInitialized; -} - -// The following PALCEnterCriticalSection and PALCLeaveCriticalSection -// functions are intended to provide CorUnix's InternalEnterCriticalSection -// and InternalLeaveCriticalSection functionalities to legacy C code, -// which has no knowledge of CPalThread, classes and namespaces. - -/*++ -Function: - PALCEnterCriticalSection - -Provides CorUnix's InternalEnterCriticalSection functionality to legacy C code, -which has no knowledge of CPalThread, classes and namespaces. ---*/ -VOID PALCEnterCriticalSection(CRITICAL_SECTION * pcs) -{ - CPalThread * pThread = - (PALIsThreadDataInitialized() ? GetCurrentPalThread() : NULL); - CorUnix::InternalEnterCriticalSection(pThread, pcs); -} - -/*++ -Function: - PALCLeaveCriticalSection - -Provides CorUnix's InternalLeaveCriticalSection functionality to legacy C code, -which has no knowledge of CPalThread, classes and namespaces. ---*/ -VOID PALCLeaveCriticalSection(CRITICAL_SECTION * pcs) -{ - CPalThread * pThread = - (PALIsThreadDataInitialized() ? GetCurrentPalThread() : NULL); - CorUnix::InternalLeaveCriticalSection(pThread, pcs); -} - -namespace CorUnix -{ - static PalCsWaiterReturnState PALCS_WaitOnCS( - PAL_CRITICAL_SECTION * pPalCriticalSection, - LONG lInc); - static PAL_ERROR PALCS_DoActualWait(PAL_CRITICAL_SECTION * pPalCriticalSection); - static PAL_ERROR PALCS_WakeUpWaiter(PAL_CRITICAL_SECTION * pPalCriticalSection); - static bool PALCS_FullyInitialize(PAL_CRITICAL_SECTION * pPalCriticalSection); - -#ifdef _DEBUG - enum CSSubSysInitState - { - CSSubSysNotInitialized, - CSSubSysInitializing, - CSSubSysInitialized - }; - static Volatile csssInitState = CSSubSysNotInitialized; - -#ifdef PAL_TRACK_CRITICAL_SECTIONS_DATA - static Volatile g_lPALCSInitializeCount = 0; - static Volatile g_lPALCSDeleteCount = 0; - static Volatile g_lPALCSAcquireCount = 0; - static Volatile g_lPALCSEnterCount = 0; - static Volatile g_lPALCSContentionCount = 0; - static Volatile g_lPALCSInternalInitializeCount = 0; - static Volatile g_lPALCSInternalDeleteCount = 0; - static Volatile g_lPALCSInternalAcquireCount = 0; - static Volatile g_lPALCSInternalEnterCount = 0; - static Volatile g_lPALCSInternalContentionCount = 0; -#endif // PAL_TRACK_CRITICAL_SECTIONS_DATA -#endif // _DEBUG - - - /*++ - Function: - CorUnix::CriticalSectionSubSysInitialize - - Initializes CS subsystem - --*/ - void CriticalSectionSubSysInitialize() - { - static_assert(sizeof(CRITICAL_SECTION) >= sizeof(PAL_CRITICAL_SECTION), - "PAL fatal internal error: sizeof(CRITICAL_SECTION) is " - "smaller than sizeof(PAL_CRITICAL_SECTION)"); - -#ifdef _DEBUG - LONG lRet = InterlockedCompareExchange((LONG *)&csssInitState, - (LONG)CSSubSysInitializing, - (LONG)CSSubSysNotInitialized); - if ((LONG)CSSubSysNotInitialized == lRet) - { - InitializeListHead(&g_PALCSList); - - InternalInitializeCriticalSectionAndSpinCount( - reinterpret_cast(&g_csPALCSsListLock), - 0, true); - InterlockedExchange((LONG *)&csssInitState, - (LONG)CSSubSysInitialized); - } - else - { - while (csssInitState != CSSubSysInitialized) - { - sched_yield(); - } - } -#endif // _DEBUG - } - - /*++ - Function: - CorUnix::InternalInitializeCriticalSectionAndSpinCount - - Initializes a CS with the given spin count. If 'fInternal' is true - the CS will be treatead as an internal one for its whole lifetime, - i.e. any thread that will enter it will be marked as unsafe for - suspension as long as it holds the CS - --*/ - void InternalInitializeCriticalSectionAndSpinCount( - PCRITICAL_SECTION pCriticalSection, - DWORD dwSpinCount, - bool fInternal) - { - PAL_CRITICAL_SECTION * pPalCriticalSection = - reinterpret_cast(pCriticalSection); - -#ifndef PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - // Make sure bits are defined in a usable way - _ASSERTE(PALCS_LOCK_AWAKENED_WAITER * 2 == PALCS_LOCK_WAITER_INC); -#endif // !PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - - // Make sure structure sizes are compatible - _ASSERTE(sizeof(CRITICAL_SECTION) >= sizeof(PAL_CRITICAL_SECTION)); - -#ifdef _DEBUG - if (sizeof(CRITICAL_SECTION) > sizeof(PAL_CRITICAL_SECTION)) - { - WARN("PAL_CS_NATIVE_DATA_SIZE appears to be defined to a value (%d) " - "larger than needed on this platform (%d).\n", - sizeof(CRITICAL_SECTION), sizeof(PAL_CRITICAL_SECTION)); - } -#endif // _DEBUG - - // Init CS data - pPalCriticalSection->DebugInfo = NULL; - pPalCriticalSection->LockCount = 0; - pPalCriticalSection->RecursionCount = 0; - pPalCriticalSection->SpinCount = dwSpinCount; - pPalCriticalSection->OwningThread = 0; - -#ifdef _DEBUG - CPalThread * pThread = - (PALIsThreadDataInitialized() ? GetCurrentPalThread() : NULL); - - pPalCriticalSection->DebugInfo = new(std::nothrow) CRITICAL_SECTION_DEBUG_INFO(); - _ASSERT_MSG(NULL != pPalCriticalSection->DebugInfo, - "Failed to allocate debug info for new CS\n"); - - // Init debug info data - pPalCriticalSection->DebugInfo->lAcquireCount = 0; - pPalCriticalSection->DebugInfo->lEnterCount = 0; - pPalCriticalSection->DebugInfo->lContentionCount = 0; - pPalCriticalSection->DebugInfo->pOwnerCS = pPalCriticalSection; - - // Insert debug info struct in global list - if (pPalCriticalSection != &g_csPALCSsListLock) - { - InternalEnterCriticalSection(pThread, - reinterpret_cast(&g_csPALCSsListLock)); - InsertTailList(&g_PALCSList, &pPalCriticalSection->DebugInfo->Link); - InternalLeaveCriticalSection(pThread, - reinterpret_cast(&g_csPALCSsListLock)); - } - else - { - InsertTailList(&g_PALCSList, &pPalCriticalSection->DebugInfo->Link); - } - -#ifdef PAL_TRACK_CRITICAL_SECTIONS_DATA - pPalCriticalSection->fInternal = fInternal; - InterlockedIncrement(fInternal ? - &g_lPALCSInternalInitializeCount : &g_lPALCSInitializeCount); -#endif // PAL_TRACK_CRITICAL_SECTIONS_DATA -#endif // _DEBUG - - // Set initializazion state - pPalCriticalSection->cisInitState = PalCsUserInitialized; - -#ifdef MUTEX_BASED_CSS - bool fInit; - do - { - fInit = PALCS_FullyInitialize(pPalCriticalSection); - _ASSERTE(fInit); - } while (!fInit && 0 == sched_yield()); - - if (fInit) - { - // Set initializazion state - pPalCriticalSection->cisInitState = PalCsFullyInitialized; - } -#endif // MUTEX_BASED_CSS - } - -#ifndef MUTEX_BASED_CSS - /*++ - Function: - CorUnix::InternalEnterCriticalSection - - Enters a CS, causing the thread to block if the CS is owned by - another thread - --*/ - void InternalEnterCriticalSection( - CPalThread * pThread, - PCRITICAL_SECTION pCriticalSection) - { - PAL_CRITICAL_SECTION * pPalCriticalSection = - reinterpret_cast(pCriticalSection); - - LONG lSpinCount; - LONG lVal, lNewVal; - LONG lBitsToChange, lWaitInc; - PalCsWaiterReturnState cwrs; - SIZE_T threadId; - - _ASSERTE(PalCsNotInitialized != pPalCriticalSection->cisInitState); - - threadId = ObtainCurrentThreadId(pThread); - - - // Check if the current thread already owns the CS - // - // Note: there is no need for this double check to be atomic. In fact - // if the first check fails, the second doesn't count (and it's not - // even executed). If the first one succeeds and the second one - // doesn't, it doesn't matter if LockCount has already changed by the - // time OwningThread is tested. Instead, if the first one succeeded, - // and the second also succeeds, LockCount cannot have changed in the - // meanwhile, since this is the owning thread and only the owning - // thread can change the lock bit when the CS is owned. - if ((pPalCriticalSection->LockCount & PALCS_LOCK_BIT) && - (pPalCriticalSection->OwningThread == threadId)) - { - pPalCriticalSection->RecursionCount += 1; -#ifdef _DEBUG - if (NULL != pPalCriticalSection->DebugInfo) - { - pPalCriticalSection->DebugInfo->lEnterCount += 1; - } -#endif // _DEBUG - goto IECS_exit; - } - - // Set bits to change and waiter increment for an incoming thread - lBitsToChange = PALCS_LOCK_BIT; - lWaitInc = PALCS_LOCK_WAITER_INC; - lSpinCount = pPalCriticalSection->SpinCount; - - while (TRUE) - { - // Either this is an incoming thread, and therefore lBitsToChange - // is just PALCS_LOCK_BIT, or this is an awakened waiter - _ASSERTE(PALCS_LOCK_BIT == lBitsToChange || - (PALCS_LOCK_BIT | PALCS_LOCK_AWAKENED_WAITER) == lBitsToChange); - - // Make sure the waiter increment is in a valid range - _ASSERTE(PALCS_LOCK_WAITER_INC == lWaitInc || - PALCS_LOCK_AWAKENED_WAITER == lWaitInc); - - do { - lVal = pPalCriticalSection->LockCount; - - while (0 == (lVal & PALCS_LOCK_BIT)) - { - // CS is not locked: try lo lock it - - // Make sure that whether we are an incoming thread - // or the PALCS_LOCK_AWAKENED_WAITER bit is set - _ASSERTE((PALCS_LOCK_BIT == lBitsToChange) || - (PALCS_LOCK_AWAKENED_WAITER & lVal)); - - lNewVal = lVal ^ lBitsToChange; - - // Make sure we are actually trying to lock - _ASSERTE(lNewVal & PALCS_LOCK_BIT); - - CS_TRACE("[ECS %p] Switching from {%d, %d, %d} to " - "{%d, %d, %d} ==>\n", pPalCriticalSection, - PALCS_GETWCOUNT(lVal), PALCS_GETAWBIT(lVal), PALCS_GETLBIT(lVal), - PALCS_GETWCOUNT(lNewVal), PALCS_GETAWBIT(lNewVal), PALCS_GETLBIT(lNewVal)); - - // Try to switch the value - lNewVal = InterlockedCompareExchange (&pPalCriticalSection->LockCount, - lNewVal, lVal); - - CS_TRACE("[ECS %p] ==> %s LockCount={%d, %d, %d} " - "lVal={%d, %d, %d}\n", pPalCriticalSection, - (lNewVal == lVal) ? "OK" : "NO", - PALCS_GETWCOUNT(pPalCriticalSection->LockCount), - PALCS_GETAWBIT(pPalCriticalSection->LockCount), - PALCS_GETLBIT(pPalCriticalSection->LockCount), - PALCS_GETWCOUNT(lVal), PALCS_GETAWBIT(lVal), PALCS_GETLBIT(lVal)); - - if (lNewVal == lVal) - { - // CS successfully acquired - goto IECS_set_ownership; - } - - // Acquisition failed, some thread raced with us; - // update value for next loop - lVal = lNewVal; - } - - if (0 < lSpinCount) - { - sched_yield(); - } - } while (0 <= --lSpinCount); - - cwrs = PALCS_WaitOnCS(pPalCriticalSection, lWaitInc); - - if (PalCsReturnWaiterAwakened == cwrs) - { -#ifdef PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - // - // Fair Critical Sections - // - // In the fair lock case, when a waiter wakes up the CS - // must be locked (i.e. ownership passed on to the waiter) - _ASSERTE(0 != (PALCS_LOCK_BIT & pPalCriticalSection->LockCount)); - - // CS successfully acquired - goto IECS_set_ownership; - -#else // PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - // - // Unfair Critical Sections - // - _ASSERTE(PALCS_LOCK_AWAKENED_WAITER & pPalCriticalSection->LockCount); - - lBitsToChange = PALCS_LOCK_BIT | PALCS_LOCK_AWAKENED_WAITER; - lWaitInc = PALCS_LOCK_AWAKENED_WAITER; -#endif // PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - } - } - - IECS_set_ownership: - // Critical section acquired: set ownership data - pPalCriticalSection->OwningThread = threadId; - pPalCriticalSection->RecursionCount = 1; -#ifdef _DEBUG - if (NULL != pPalCriticalSection->DebugInfo) - { - pPalCriticalSection->DebugInfo->lAcquireCount += 1; - pPalCriticalSection->DebugInfo->lEnterCount += 1; - } -#endif // _DEBUG - - IECS_exit: - return; - } - - /*++ - Function: - CorUnix::InternalLeaveCriticalSection - - Leaves a currently owned CS - --*/ - void InternalLeaveCriticalSection(CPalThread * pThread, - PCRITICAL_SECTION pCriticalSection) - { - PAL_CRITICAL_SECTION * pPalCriticalSection = - reinterpret_cast(pCriticalSection); - LONG lVal, lNewVal; - -#ifdef _DEBUG - SIZE_T threadId; - - _ASSERTE(PalCsNotInitialized != pPalCriticalSection->cisInitState); - - threadId = ObtainCurrentThreadId(pThread); - _ASSERTE(threadId == pPalCriticalSection->OwningThread); -#endif // _DEBUG - - _ASSERT_MSG(PALCS_LOCK_BIT & pPalCriticalSection->LockCount, - "Trying to release an unlocked CS\n"); - _ASSERT_MSG(0 < pPalCriticalSection->RecursionCount, - "Trying to release an unlocked CS\n"); - - if (--pPalCriticalSection->RecursionCount > 0) - { - // Recursion was > 1, still owning the CS - goto ILCS_cs_exit; - } - - // Reset CS ownership - pPalCriticalSection->OwningThread = 0; - - // Load the current LockCount value - lVal = pPalCriticalSection->LockCount; - - while (true) - { - _ASSERT_MSG(0 != (PALCS_LOCK_BIT & lVal), - "Trying to release an unlocked CS\n"); - - // NB: In the fair lock case (PALCS_TRANSFER_OWNERSHIP_ON_RELEASE) the - // PALCS_LOCK_AWAKENED_WAITER bit is not used - if ( (PALCS_LOCK_BIT == lVal) -#ifndef PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - || (PALCS_LOCK_AWAKENED_WAITER & lVal) -#endif // !PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - ) - { - // Whether there are no waiters (PALCS_LOCK_BIT == lVal) - // or a waiter has already been awakened, therefore we - // just need to reset the lock bit and return - lNewVal = lVal & ~PALCS_LOCK_BIT; - CS_TRACE("[LCS-UN %p] Switching from {%d, %d, %d} to " - "{%d, %d, %d} ==>\n", pPalCriticalSection, - PALCS_GETWCOUNT(lVal), PALCS_GETAWBIT(lVal), PALCS_GETLBIT(lVal), - PALCS_GETWCOUNT(lNewVal), PALCS_GETAWBIT(lNewVal), PALCS_GETLBIT(lNewVal)); - - lNewVal = InterlockedCompareExchange(&pPalCriticalSection->LockCount, - lNewVal, lVal); - - CS_TRACE("[LCS-UN %p] ==> %s\n", pPalCriticalSection, - (lNewVal == lVal) ? "OK" : "NO"); - - if (lNewVal == lVal) - { - goto ILCS_cs_exit; - } - } - else - { - // There is at least one waiter, we need to wake it up - -#ifdef PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - // Fair lock case: passing ownership on to the first waiter. - // Here we need only to decrement the waiters count. CS will - // remain locked and ownership will be passed to the waiter, - // which will take care of setting ownership data as soon as - // it wakes up - lNewVal = lVal - PALCS_LOCK_WAITER_INC; -#else // PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - // Unfair lock case: we need to atomically decrement the waiters - // count (we are about ot wake up one of them), set the - // "waiter awakened" bit and to reset the "CS locked" bit. - // Note that, since we know that at this time PALCS_LOCK_BIT - // is set and PALCS_LOCK_AWAKENED_WAITER is not set, none of - // the addenda will affect bits other than its target bit(s), - // i.e. PALCS_LOCK_BIT will not affect PALCS_LOCK_AWAKENED_WAITER, - // PALCS_LOCK_AWAKENED_WAITER will not affect the actual - // count of waiters, and the latter will not change the two - // former ones - lNewVal = lVal - PALCS_LOCK_WAITER_INC + - PALCS_LOCK_AWAKENED_WAITER - PALCS_LOCK_BIT; -#endif // PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - CS_TRACE("[LCS-CN %p] Switching from {%d, %d, %d} to {%d, %d, %d} ==>\n", - pPalCriticalSection, - PALCS_GETWCOUNT(lVal), PALCS_GETAWBIT(lVal), PALCS_GETLBIT(lVal), - PALCS_GETWCOUNT(lNewVal), PALCS_GETAWBIT(lNewVal), PALCS_GETLBIT(lNewVal)); - - lNewVal = InterlockedCompareExchange(&pPalCriticalSection->LockCount, - lNewVal, lVal); - - CS_TRACE("[LCS-CN %p] ==> %s\n", pPalCriticalSection, - (lNewVal == lVal) ? "OK" : "NO"); - - if (lNewVal == lVal) - { - // Wake up the waiter - PALCS_WakeUpWaiter (pPalCriticalSection); - -#ifdef PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - // In the fair lock case, we need to yield here to defeat - // the inherently unfair nature of the condition/predicate - // construct - sched_yield(); -#endif // PALCS_TRANSFER_OWNERSHIP_ON_RELEASE - - goto ILCS_cs_exit; - } - } - - // CS unlock failed due to race with another thread trying to - // register as waiter on it. We need to keep on looping. We - // intentionally do not yield here in order to reserve higher - // priority for the releasing thread. - // - // At this point lNewVal contains the latest LockCount value - // retrieved by one of the two InterlockedCompareExchange above; - // we can use this value as expected LockCount for the next loop, - // without the need to fetch it again. - lVal = lNewVal; - } - - ILCS_cs_exit: - return; - } - -#endif // MUTEX_BASED_CSS - - /*++ - Function: - CorUnix::PALCS_FullyInitialize - - Fully initializes a CS which was previously initialized in InitializeCriticalSection. - This method is called at the first contention on the target CS - --*/ - bool PALCS_FullyInitialize(PAL_CRITICAL_SECTION * pPalCriticalSection) - { - LONG lVal, lNewVal; - bool fRet = true; - - lVal = pPalCriticalSection->cisInitState; - if (PalCsFullyInitialized == lVal) - { - goto PCDI_exit; - } - if (PalCsUserInitialized == lVal) - { - int iRet; - lNewVal = (LONG)PalCsFullyInitializing; - lNewVal = InterlockedCompareExchange( - (LONG *)&pPalCriticalSection->cisInitState, lNewVal, lVal); - if (lNewVal != lVal) - { - if (PalCsFullyInitialized == lNewVal) - { - // Another thread did initialize this CS: we can - // safely return 'true' - goto PCDI_exit; - } - - // Another thread is still initializing this CS: yield and - // spin by returning 'false' - sched_yield(); - fRet = false; - goto PCDI_exit; - } - - // - // Actual native initialization - // - // Mutex - iRet = pthread_mutex_init(&pPalCriticalSection->csndNativeData.mutex, NULL); - if (0 != iRet) - { - ASSERT("Failed initializing mutex in CS @ %p [err=%d]\n", - pPalCriticalSection, iRet); - pPalCriticalSection->cisInitState = PalCsUserInitialized; - fRet = false; - goto PCDI_exit; - } -#ifndef MUTEX_BASED_CSS - // Condition - iRet = pthread_cond_init(&pPalCriticalSection->csndNativeData.condition, NULL); - if (0 != iRet) - { - ASSERT("Failed initializing condition in CS @ %p [err=%d]\n", - pPalCriticalSection, iRet); - pthread_mutex_destroy(&pPalCriticalSection->csndNativeData.mutex); - pPalCriticalSection->cisInitState = PalCsUserInitialized; - fRet = false; - goto PCDI_exit; - } - // Predicate - pPalCriticalSection->csndNativeData.iPredicate = 0; -#endif - - pPalCriticalSection->cisInitState = PalCsFullyInitialized; - } - else if (PalCsFullyInitializing == lVal) - { - // Another thread is still initializing this CS: yield and - // spin by returning 'false' - sched_yield(); - fRet = false; - goto PCDI_exit; - } - else - { - ASSERT("CS %p is not initialized", pPalCriticalSection); - fRet = false; - goto PCDI_exit; - } - - PCDI_exit: - return fRet; - } - - - /*++ - Function: - CorUnix::PALCS_WaitOnCS - - Waits on a CS owned by another thread. It returns PalCsReturnWaiterAwakened - if the thread actually waited on the CS and it has been awakened on CS - release. It returns PalCsWaiterDidntWait if another thread is currently - fully-initializing the CS and therefore the current thread couldn't wait - on it - --*/ - PalCsWaiterReturnState PALCS_WaitOnCS(PAL_CRITICAL_SECTION * pPalCriticalSection, - LONG lInc) - { - DWORD lVal, lNewVal; - PAL_ERROR palErr = NO_ERROR; - - if (PalCsFullyInitialized != pPalCriticalSection->cisInitState) - { - // First contention, the CS native wait support need to be - // initialized at this time - if (!PALCS_FullyInitialize(pPalCriticalSection)) - { - // The current thread failed the full initialization of the CS, - // whether because another thread is race-initializing it, or - // there are no enough memory/resources at this time, or - // InitializeCriticalSection has never been called. By - // returning we will cause the thread to spin on CS trying - // again until the CS is initialized - return PalCsWaiterDidntWait; - } - } - - // Make sure we have a valid waiter increment - _ASSERTE(PALCS_LOCK_WAITER_INC == lInc || - PALCS_LOCK_AWAKENED_WAITER == lInc); - - do { - lVal = pPalCriticalSection->LockCount; - - // Make sure the waiter increment is compatible with the - // awakened waiter bit value - _ASSERTE(PALCS_LOCK_WAITER_INC == lInc || - PALCS_LOCK_AWAKENED_WAITER & lVal); - - if (0 == (lVal & PALCS_LOCK_BIT)) - { - // the CS is no longer locked, let's bail out - return PalCsWaiterDidntWait; - } - - lNewVal = lVal + lInc; - - // Make sure that this thread was whether an incoming one or it - // was an awakened waiter and, in this case, we are now going to - // turn off the awakened waiter bit - _ASSERT_MSG(PALCS_LOCK_WAITER_INC == lInc || - 0 == (PALCS_LOCK_AWAKENED_WAITER & lNewVal)); - - CS_TRACE("[WCS %p] Switching from {%d, %d, %d} to " - "{%d, %d, %d} ==> ", pPalCriticalSection, - PALCS_GETWCOUNT(lVal), PALCS_GETAWBIT(lVal), PALCS_GETLBIT(lVal), - PALCS_GETWCOUNT(lNewVal), PALCS_GETAWBIT(lNewVal), PALCS_GETLBIT(lNewVal)); - - lNewVal = InterlockedCompareExchange (&pPalCriticalSection->LockCount, - lNewVal, lVal); - - CS_TRACE("[WCS %p] ==> %s\n", pPalCriticalSection, - (lNewVal == lVal) ? "OK" : "NO"); - - } while (lNewVal != lVal); - -#ifdef _DEBUG - if (NULL != pPalCriticalSection->DebugInfo) - { - pPalCriticalSection->DebugInfo->lContentionCount += 1; - } -#endif // _DEBUG - - // Do the actual native wait - palErr = PALCS_DoActualWait(pPalCriticalSection); - _ASSERT_MSG(NO_ERROR == palErr, "Native CS wait failed\n"); - - return PalCsReturnWaiterAwakened; - } - - /*++ - Function: - CorUnix::PALCS_DoActualWait - - Performs the actual native wait on the CS - --*/ - PAL_ERROR PALCS_DoActualWait(PAL_CRITICAL_SECTION * pPalCriticalSection) - { - int iRet; - PAL_ERROR palErr = NO_ERROR; - - CS_TRACE("Trying to go to sleep [CS=%p]\n", pPalCriticalSection); - - // Lock the mutex - iRet = pthread_mutex_lock(&pPalCriticalSection->csndNativeData.mutex); - if (0 != iRet) - { - palErr = ERROR_INTERNAL_ERROR; - goto PCDAW_exit; - } - - CS_TRACE("Actually Going to sleep [CS=%p]\n", pPalCriticalSection); - - while (0 == pPalCriticalSection->csndNativeData.iPredicate) - { - // Wait on the condition - iRet = pthread_cond_wait(&pPalCriticalSection->csndNativeData.condition, - &pPalCriticalSection->csndNativeData.mutex); - - CS_TRACE("Got a signal on condition [pred=%d]!\n", - pPalCriticalSection->csndNativeData.iPredicate); - if (0 != iRet) - { - // Failed: unlock the mutex and bail out - ASSERT("Failed waiting on condition in CS %p [err=%d]\n", - pPalCriticalSection, iRet); - pthread_mutex_unlock(&pPalCriticalSection->csndNativeData.mutex); - palErr = ERROR_INTERNAL_ERROR; - goto PCDAW_exit; - } - } - - // Reset the predicate - pPalCriticalSection->csndNativeData.iPredicate = 0; - - // Unlock the mutex - iRet = pthread_mutex_unlock(&pPalCriticalSection->csndNativeData.mutex); - if (0 != iRet) - { - palErr = ERROR_INTERNAL_ERROR; - goto PCDAW_exit; - } - - PCDAW_exit: - - CS_TRACE("Just woken up [CS=%p]\n", pPalCriticalSection); - - return palErr; - } - - /*++ - Function: - CorUnix::PALCS_WakeUpWaiter - - Wakes up the first thread waiting on the CS - --*/ - PAL_ERROR PALCS_WakeUpWaiter(PAL_CRITICAL_SECTION * pPalCriticalSection) - { - int iRet; - PAL_ERROR palErr = NO_ERROR; - - _ASSERT_MSG(PalCsFullyInitialized == pPalCriticalSection->cisInitState, - "Trying to wake up a waiter on CS not fully initialized\n"); - - // Lock the mutex - iRet = pthread_mutex_lock(&pPalCriticalSection->csndNativeData.mutex); - if (0 != iRet) - { - palErr = ERROR_INTERNAL_ERROR; - goto PCWUW_exit; - } - - // Set the predicate - pPalCriticalSection->csndNativeData.iPredicate = 1; - - CS_TRACE("Signaling condition/predicate [pred=%d]!\n", - pPalCriticalSection->csndNativeData.iPredicate); - - // Signal the condition - iRet = pthread_cond_signal(&pPalCriticalSection->csndNativeData.condition); - if (0 != iRet) - { - // Failed: set palErr, but continue in order to unlock - // the mutex anyway - ASSERT("Failed setting condition in CS %p [ret=%d]\n", - pPalCriticalSection, iRet); - palErr = ERROR_INTERNAL_ERROR; - } - - // Unlock the mutex - iRet = pthread_mutex_unlock(&pPalCriticalSection->csndNativeData.mutex); - if (0 != iRet) - { - palErr = ERROR_INTERNAL_ERROR; - goto PCWUW_exit; - } - - PCWUW_exit: - return palErr; - } - -#ifdef _DEBUG - /*++ - Function: - CorUnix::PALCS_ReportStatisticalData - - Report creation/acquisition/contention statistical data for the all the - CSs so far existed and no longer existing in the current process - --*/ - void PALCS_ReportStatisticalData() - { -#ifdef PAL_TRACK_CRITICAL_SECTIONS_DATA - CPalThread * pThread = InternalGetCurrentThread(); - - if (NULL == pThread) DebugBreak(); - - // Take the lock for the global list of CS debug infos - InternalEnterCriticalSection(pThread, (CRITICAL_SECTION*)&g_csPALCSsListLock); - - LONG lPALCSInitializeCount = g_lPALCSInitializeCount; - LONG lPALCSDeleteCount = g_lPALCSDeleteCount; - LONG lPALCSAcquireCount = g_lPALCSAcquireCount; - LONG lPALCSEnterCount = g_lPALCSEnterCount; - LONG lPALCSContentionCount = g_lPALCSContentionCount; - LONG lPALCSInternalInitializeCount = g_lPALCSInternalInitializeCount; - LONG lPALCSInternalDeleteCount = g_lPALCSInternalDeleteCount; - LONG lPALCSInternalAcquireCount = g_lPALCSInternalAcquireCount; - LONG lPALCSInternalEnterCount = g_lPALCSInternalEnterCount; - LONG lPALCSInternalContentionCount = g_lPALCSInternalContentionCount; - - PLIST_ENTRY pItem = g_PALCSList.Flink; - while (&g_PALCSList != pItem) - { - PCRITICAL_SECTION_DEBUG_INFO pDebugInfo = - (PCRITICAL_SECTION_DEBUG_INFO)pItem; - - if (pDebugInfo->pOwnerCS->fInternal) - { - lPALCSInternalAcquireCount += pDebugInfo->lAcquireCount; - lPALCSInternalEnterCount += pDebugInfo->lEnterCount; - lPALCSInternalContentionCount += pDebugInfo->lContentionCount; - } - else - { - lPALCSAcquireCount += pDebugInfo->lAcquireCount; - lPALCSEnterCount += pDebugInfo->lEnterCount; - lPALCSContentionCount += pDebugInfo->lContentionCount; - } - - pItem = pItem->Flink; - } - - // Release the lock for the global list of CS debug infos - InternalLeaveCriticalSection(pThread, (CRITICAL_SECTION*)&g_csPALCSsListLock); - - TRACE("Critical Sections Statistical Data:\n"); - TRACE("{\n"); - TRACE(" Client code CSs:\n"); - TRACE(" {\n"); - TRACE(" Initialize Count: %d\n", lPALCSInitializeCount); - TRACE(" Delete Count: %d\n", lPALCSDeleteCount); - TRACE(" Acquire Count: %d\n", lPALCSAcquireCount); - TRACE(" Enter Count: %d\n", lPALCSEnterCount); - TRACE(" Contention Count: %d\n", lPALCSContentionCount); - TRACE(" }\n"); - TRACE(" Internal PAL CSs:\n"); - TRACE(" {\n"); - TRACE(" Initialize Count: %d\n", lPALCSInternalInitializeCount); - TRACE(" Delete Count: %d\n", lPALCSInternalDeleteCount); - TRACE(" Acquire Count: %d\n", lPALCSInternalAcquireCount); - TRACE(" Enter Count: %d\n", lPALCSInternalEnterCount); - TRACE(" Contention Count: %d\n", lPALCSInternalContentionCount); - TRACE(" }\n"); - TRACE("}\n"); -#endif // PAL_TRACK_CRITICAL_SECTIONS_DATA - } - - /*++ - Function: - CorUnix::PALCS_DumpCSList - - Dumps the list of all the CS currently existing in this process. - --*/ - void PALCS_DumpCSList() - { - CPalThread * pThread = InternalGetCurrentThread(); - - // Take the lock for the global list of CS debug infos - InternalEnterCriticalSection(pThread, (CRITICAL_SECTION*)&g_csPALCSsListLock); - - PLIST_ENTRY pItem = g_PALCSList.Flink; - while (&g_PALCSList != pItem) - { - PCRITICAL_SECTION_DEBUG_INFO pDebugInfo = - (PCRITICAL_SECTION_DEBUG_INFO)pItem; - PPAL_CRITICAL_SECTION pCS = pDebugInfo->pOwnerCS; - - printf("CS @ %p \n" - "{\tDebugInfo = %p -> \n", - pCS, pDebugInfo); - - printf("\t{\n\t\t[Link]\n\t\tpOwnerCS = %p\n" - "\t\tAcquireCount \t= %d\n" - "\t\tEnterCount \t= %d\n" - "\t\tContentionCount = %d\n", - pDebugInfo->pOwnerCS, pDebugInfo->lAcquireCount.Load(), - pDebugInfo->lEnterCount.Load(), pDebugInfo->lContentionCount.Load()); - printf("\t}\n"); - - printf("\tLockCount \t= %#x\n" - "\tRecursionCount \t= %d\n" - "\tOwningThread \t= %p\n" - "\tSpinCount \t= %u\n" - "\tfInternal \t= %d\n" - "\teInitState \t= %u\n" - "\tpNativeData \t= %p ->\n", - pCS->LockCount.Load(), pCS->RecursionCount, (void *)pCS->OwningThread, - (unsigned)pCS->SpinCount, -#ifdef PAL_TRACK_CRITICAL_SECTIONS_DATA - (int)pCS->fInternal, -#else - (int)0, -#endif // PAL_TRACK_CRITICAL_SECTIONS_DATA - pCS->cisInitState.Load(), &pCS->csndNativeData); - - printf("\t{\n\t\t[mutex]\n\t\t[condition]\n" - "\t\tPredicate \t= %d\n" - "\t}\n}\n",pCS->csndNativeData.iPredicate); - - printf("}\n"); - - pItem = pItem->Flink; - } - - // Release the lock for the global list of CS debug infos - InternalLeaveCriticalSection(pThread, (CRITICAL_SECTION*)&g_csPALCSsListLock); - } -#endif // _DEBUG - - -#if defined(MUTEX_BASED_CSS) || defined(_DEBUG) - /*++ - Function: - CorUnix::InternalEnterCriticalSection - - Enters a CS, causing the thread to block if the CS is owned by - another thread - --*/ -#ifdef MUTEX_BASED_CSS - void InternalEnterCriticalSection( - CPalThread * pThread, - PCRITICAL_SECTION pCriticalSection) -#else // MUTEX_BASED_CSS - void MTX_InternalEnterCriticalSection( - CPalThread * pThread, - PCRITICAL_SECTION pCriticalSection) -#endif // MUTEX_BASED_CSS - - { - PAL_CRITICAL_SECTION * pPalCriticalSection = - reinterpret_cast(pCriticalSection); - int iRet; - SIZE_T threadId; - - _ASSERTE(PalCsNotInitialized != pPalCriticalSection->cisInitState); - - threadId = ObtainCurrentThreadId(pThread); - - /* check if the current thread already owns the criticalSection */ - if (pPalCriticalSection->OwningThread == threadId) - { - _ASSERTE(0 < pPalCriticalSection->RecursionCount); - pPalCriticalSection->RecursionCount += 1; - return; - } - - iRet = pthread_mutex_lock(&pPalCriticalSection->csndNativeData.mutex); - _ASSERTE(0 == iRet); - - pPalCriticalSection->OwningThread = threadId; - pPalCriticalSection->RecursionCount = 1; - } - - - /*++ - Function: - CorUnix::InternalLeaveCriticalSection - - Leaves a currently owned CS - --*/ -#ifdef MUTEX_BASED_CSS - void InternalLeaveCriticalSection( - CPalThread * pThread, - PCRITICAL_SECTION pCriticalSection) -#else // MUTEX_BASED_CSS - void MTX_InternalLeaveCriticalSection( - CPalThread * pThread, - PCRITICAL_SECTION pCriticalSection) -#endif // MUTEX_BASED_CSS - { - PAL_CRITICAL_SECTION * pPalCriticalSection = - reinterpret_cast(pCriticalSection); - int iRet; -#ifdef _DEBUG - SIZE_T threadId; - - _ASSERTE(PalCsNotInitialized != pPalCriticalSection->cisInitState); - - threadId = ObtainCurrentThreadId(pThread); - _ASSERTE(threadId == pPalCriticalSection->OwningThread); - - if (0 >= pPalCriticalSection->RecursionCount) - DebugBreak(); - - _ASSERTE(0 < pPalCriticalSection->RecursionCount); -#endif // _DEBUG - - if (0 < --pPalCriticalSection->RecursionCount) - return; - - pPalCriticalSection->OwningThread = 0; - - iRet = pthread_mutex_unlock(&pPalCriticalSection->csndNativeData.mutex); - _ASSERTE(0 == iRet); - } - -#endif // MUTEX_BASED_CSS || _DEBUG -} diff --git a/src/coreclr/pal/src/synchmgr/synchmanager.cpp b/src/coreclr/pal/src/synchmgr/synchmanager.cpp index eaccd19e209b..0665df179caa 100644 --- a/src/coreclr/pal/src/synchmgr/synchmanager.cpp +++ b/src/coreclr/pal/src/synchmgr/synchmanager.cpp @@ -44,18 +44,6 @@ SET_DEFAULT_DEBUG_CHANNEL(SYNC); // some headers have code with asserts, so do t const int CorUnix::CThreadSynchronizationInfo::PendingSignalingsArraySize; -// We use the synchronization manager's worker thread to handle -// process termination requests. It does so by calling the -// registered handler function. -PTERMINATION_REQUEST_HANDLER g_terminationRequestHandler = NULL; - -// Set the handler for process termination requests. -VOID PALAPI PAL_SetTerminationRequestHandler( - IN PTERMINATION_REQUEST_HANDLER terminationHandler) -{ - g_terminationRequestHandler = terminationHandler; -} - namespace CorUnix { ///////////////////////////////// @@ -153,8 +141,8 @@ namespace CorUnix CPalSynchronizationManager * CPalSynchronizationManager::s_pObjSynchMgr = NULL; Volatile CPalSynchronizationManager::s_lInitStatus = SynchMgrStatusIdle; - CRITICAL_SECTION CPalSynchronizationManager::s_csSynchProcessLock; - CRITICAL_SECTION CPalSynchronizationManager::s_csMonitoredProcessesLock; + minipal_mutex CPalSynchronizationManager::s_csSynchProcessLock; + minipal_mutex CPalSynchronizationManager::s_csMonitoredProcessesLock; CPalSynchronizationManager::CPalSynchronizationManager() : m_dwWorkerThreadTid(0), @@ -1160,25 +1148,6 @@ namespace CorUnix return palErr; } - /*++ - Method: - CPalSynchronizationManager::SendTerminationRequestToWorkerThread - - Send a request to the worker thread to initiate process termination. - --*/ - PAL_ERROR CPalSynchronizationManager::SendTerminationRequestToWorkerThread() - { - PAL_ERROR palErr = GetInstance()->WakeUpLocalWorkerThread(SynchWorkerCmdTerminationRequest); - if (palErr != NO_ERROR) - { - ERROR("Failed to wake up worker thread [errno=%d {%s%}]\n", - errno, strerror(errno)); - palErr = ERROR_INTERNAL_ERROR; - } - - return palErr; - } - /*++ Method: CPalSynchronizationManager::AreAPCsPending @@ -1329,8 +1298,8 @@ namespace CorUnix goto I_exit; } - InternalInitializeCriticalSection(&s_csSynchProcessLock); - InternalInitializeCriticalSection(&s_csMonitoredProcessesLock); + minipal_mutex_init(&s_csSynchProcessLock); + minipal_mutex_init(&s_csMonitoredProcessesLock); pSynchManager = new(std::nothrow) CPalSynchronizationManager(); if (NULL == pSynchManager) @@ -1340,13 +1309,14 @@ namespace CorUnix goto I_exit; } +#ifndef __wasm__ if (!pSynchManager->CreateProcessPipe()) { ERROR("Unable to create process pipe \n"); palErr = ERROR_OPEN_FAILED; goto I_exit; } - +#endif s_pObjSynchMgr = pSynchManager; // Initialization was successful @@ -1557,22 +1527,6 @@ namespace CorUnix return palErr; } - // Entry point routine for the thread that initiates process termination. - DWORD PALAPI TerminationRequestHandlingRoutine(LPVOID pArg) - { - // Call the termination request handler if one is registered. - if (g_terminationRequestHandler != NULL) - { - // The process will terminate normally by calling exit. - // We use an exit code of '128 + signo'. This is a convention used in popular - // shells to calculate an exit code when the process was terminated by a signal. - // This is also used by the Process.ExitCode implementation. - g_terminationRequestHandler(128 + SIGTERM); - } - - return 0; - } - /*++ Method: CPalSynchronizationManager::WorkerThread @@ -1611,31 +1565,6 @@ namespace CorUnix } switch (swcCmd) { - case SynchWorkerCmdTerminationRequest: - // This worker thread is being asked to initiate process termination - - HANDLE hTerminationRequestHandlingThread; - palErr = InternalCreateThread(pthrWorker, - NULL, - 0, - &TerminationRequestHandlingRoutine, - NULL, - 0, - PalWorkerThread, - NULL, - &hTerminationRequestHandlingThread); - - if (NO_ERROR != palErr) - { - ERROR("Unable to create worker thread\n"); - } - - if (hTerminationRequestHandlingThread != NULL) - { - CloseHandle(hTerminationRequestHandlingThread); - } - - break; case SynchWorkerCmdNop: TRACE("Synch Worker: received SynchWorkerCmdNop\n"); if (fShuttingDown) @@ -1775,8 +1704,7 @@ namespace CorUnix } _ASSERT_MSG(SynchWorkerCmdNop == swcWorkerCmd || - SynchWorkerCmdShutdown == swcWorkerCmd || - SynchWorkerCmdTerminationRequest == swcWorkerCmd, + SynchWorkerCmdShutdown == swcWorkerCmd, "Unknown worker command code %u\n", swcWorkerCmd); TRACE("Got cmd %u from process pipe\n", swcWorkerCmd); @@ -1834,9 +1762,9 @@ namespace CorUnix } else { - tv.tv_usec = (iTimeout % tccSecondsToMillieSeconds) * - tccMillieSecondsToMicroSeconds; - tv.tv_sec = iTimeout / tccSecondsToMillieSeconds; + tv.tv_usec = (iTimeout % tccSecondsToMilliSeconds) * + tccMilliSecondsToMicroSeconds; + tv.tv_sec = iTimeout / tccSecondsToMilliSeconds; ptv = &tv; } @@ -1865,9 +1793,9 @@ namespace CorUnix } else { - ts.tv_nsec = (iTimeout % tccSecondsToMillieSeconds) * - tccMillieSecondsToNanoSeconds; - ts.tv_sec = iTimeout / tccSecondsToMillieSeconds; + ts.tv_nsec = (iTimeout % tccSecondsToMilliSeconds) * + tccMilliSecondsToNanoSeconds; + ts.tv_sec = iTimeout / tccSecondsToMilliSeconds; pts = &ts; } @@ -2215,9 +2143,8 @@ namespace CorUnix "Value too big for swcWorkerCmd\n"); _ASSERT_MSG((SynchWorkerCmdNop == swcWorkerCmd) || - (SynchWorkerCmdShutdown == swcWorkerCmd) || - (SynchWorkerCmdTerminationRequest == swcWorkerCmd), - "WakeUpLocalWorkerThread supports only SynchWorkerCmdNop, SynchWorkerCmdShutdown, and SynchWorkerCmdTerminationRequest." + (SynchWorkerCmdShutdown == swcWorkerCmd), + "WakeUpLocalWorkerThread supports only SynchWorkerCmdNop and SynchWorkerCmdShutdown." "[received cmd=%d]\n", swcWorkerCmd); BYTE byCmd = (BYTE)(swcWorkerCmd & 0xFF); @@ -2416,7 +2343,7 @@ namespace CorUnix VALIDATEOBJECT(psdSynchData); - InternalEnterCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_enter(&s_csMonitoredProcessesLock); fMonitoredProcessesLock = true; @@ -2465,7 +2392,7 @@ namespace CorUnix } // Unlock - InternalLeaveCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_leave(&s_csMonitoredProcessesLock); fMonitoredProcessesLock = false; if (fWakeUpWorker) @@ -2485,8 +2412,7 @@ namespace CorUnix RPFM_exit: if (fMonitoredProcessesLock) { - InternalLeaveCriticalSection(pthrCurrent, - &s_csMonitoredProcessesLock); + minipal_mutex_leave(&s_csMonitoredProcessesLock); } return palErr; @@ -2511,7 +2437,7 @@ namespace CorUnix VALIDATEOBJECT(psdSynchData); - InternalEnterCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_enter(&s_csMonitoredProcessesLock); pmpln = m_pmplnMonitoredProcesses; while (pmpln) @@ -2550,7 +2476,7 @@ namespace CorUnix palErr = ERROR_NOT_FOUND; } - InternalLeaveCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_leave(&s_csMonitoredProcessesLock); return palErr; } @@ -2609,7 +2535,7 @@ namespace CorUnix // lock is needed in order to support object promotion. // Grab the monitored processes lock - InternalEnterCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_enter(&s_csMonitoredProcessesLock); fMonitoredProcessesLock = true; lInitialNodeCount = m_lMonitoredProcessesCount; @@ -2654,7 +2580,7 @@ namespace CorUnix } // Release the monitored processes lock - InternalLeaveCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_leave(&s_csMonitoredProcessesLock); fMonitoredProcessesLock = false; if (lRemovingCount > 0) @@ -2664,7 +2590,7 @@ namespace CorUnix fLocalSynchLock = true; // Acquire the monitored processes lock - InternalEnterCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_enter(&s_csMonitoredProcessesLock); fMonitoredProcessesLock = true; // Start from the beginning of the exited processes list @@ -2724,7 +2650,7 @@ namespace CorUnix if (fMonitoredProcessesLock) { - InternalLeaveCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_leave(&s_csMonitoredProcessesLock); } if (fLocalSynchLock) @@ -2750,7 +2676,7 @@ namespace CorUnix MonitoredProcessesListNode * pNode; // Grab the monitored processes lock - InternalEnterCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_enter(&s_csMonitoredProcessesLock); while (m_pmplnMonitoredProcesses) { @@ -2762,7 +2688,7 @@ namespace CorUnix } // Release the monitored processes lock - InternalLeaveCriticalSection(pthrCurrent, &s_csMonitoredProcessesLock); + minipal_mutex_leave(&s_csMonitoredProcessesLock); } /*++ @@ -3652,8 +3578,8 @@ namespace CorUnix #endif if (0 == iRet) { - ptsAbsTmo->tv_sec += dwTimeout / tccSecondsToMillieSeconds; - ptsAbsTmo->tv_nsec += (dwTimeout % tccSecondsToMillieSeconds) * tccMillieSecondsToNanoSeconds; + ptsAbsTmo->tv_sec += dwTimeout / tccSecondsToMilliSeconds; + ptsAbsTmo->tv_nsec += (dwTimeout % tccSecondsToMilliSeconds) * tccMilliSecondsToNanoSeconds; while (ptsAbsTmo->tv_nsec >= tccSecondsToNanoSeconds) { ptsAbsTmo->tv_sec += 1; diff --git a/src/coreclr/pal/src/synchmgr/synchmanager.hpp b/src/coreclr/pal/src/synchmgr/synchmanager.hpp index 2dd0600d4d9c..7d86ab6fb12d 100644 --- a/src/coreclr/pal/src/synchmgr/synchmanager.hpp +++ b/src/coreclr/pal/src/synchmgr/synchmanager.hpp @@ -21,7 +21,6 @@ Module Name: #include "pal/synchobjects.hpp" #include "pal/synchcache.hpp" -#include "pal/cs.hpp" #include "pal/corunix.hpp" #include "pal/thread.hpp" #include "pal/procobj.hpp" @@ -488,7 +487,6 @@ namespace CorUnix { SynchWorkerCmdNop, SynchWorkerCmdShutdown, - SynchWorkerCmdTerminationRequest, SynchWorkerCmdLast }; @@ -524,8 +522,8 @@ namespace CorUnix // static members static CPalSynchronizationManager * s_pObjSynchMgr; static Volatile s_lInitStatus; - static CRITICAL_SECTION s_csSynchProcessLock; - static CRITICAL_SECTION s_csMonitoredProcessesLock; + static minipal_mutex s_csSynchProcessLock; + static minipal_mutex s_csMonitoredProcessesLock; // members DWORD m_dwWorkerThreadTid; @@ -591,7 +589,7 @@ namespace CorUnix if (1 == ++pthrCurrent->synchronizationInfo.m_lLocalSynchLockCount) { - InternalEnterCriticalSection(pthrCurrent, &s_csSynchProcessLock); + minipal_mutex_enter(&s_csSynchProcessLock); } } static void ReleaseLocalSynchLock(CPalThread * pthrCurrent) @@ -599,7 +597,7 @@ namespace CorUnix _ASSERTE(0 < pthrCurrent->synchronizationInfo.m_lLocalSynchLockCount); if (0 == --pthrCurrent->synchronizationInfo.m_lLocalSynchLockCount) { - InternalLeaveCriticalSection(pthrCurrent, &s_csSynchProcessLock); + minipal_mutex_leave(&s_csSynchProcessLock); #if SYNCHMGR_SUSPENSION_SAFE_CONDITION_SIGNALING pthrCurrent->synchronizationInfo.RunDeferredThreadConditionSignalings(); @@ -614,7 +612,7 @@ namespace CorUnix if (0 < lRet) { pthrCurrent->synchronizationInfo.m_lLocalSynchLockCount = 0; - InternalLeaveCriticalSection(pthrCurrent, &s_csSynchProcessLock); + minipal_mutex_leave(&s_csSynchProcessLock); #if SYNCHMGR_SUSPENSION_SAFE_CONDITION_SIGNALING pthrCurrent->synchronizationInfo.RunDeferredThreadConditionSignalings(); @@ -783,8 +781,6 @@ namespace CorUnix PAPCFUNC pfnAPC, ULONG_PTR uptrData); - virtual PAL_ERROR SendTerminationRequestToWorkerThread(); - virtual bool AreAPCsPending(CPalThread * pthrTarget); virtual PAL_ERROR DispatchPendingAPCs(CPalThread * pthrCurrent); diff --git a/src/coreclr/pal/src/synchobj/mutex.cpp b/src/coreclr/pal/src/synchobj/mutex.cpp index f720fbcfb5fe..f23254612c55 100644 --- a/src/coreclr/pal/src/synchobj/mutex.cpp +++ b/src/coreclr/pal/src/synchobj/mutex.cpp @@ -33,8 +33,7 @@ SET_DEFAULT_DEBUG_CHANNEL(SYNC); // some headers have code with asserts, so do t #include #include #include - -#include "pal/sharedmemory.inl" +#include "minipal/time.h" using namespace CorUnix; @@ -89,7 +88,7 @@ CreateMutexW( IN BOOL bInitialOwner, IN LPCWSTR lpName) { - return PAL_CreateMutexW(bInitialOwner, lpName, nullptr, 0); + return PAL_CreateMutexW(bInitialOwner, lpName, false /* bCurrentUserOnly */, nullptr, 0); } /*++ @@ -113,6 +112,7 @@ PALAPI PAL_CreateMutexW( IN BOOL bInitialOwner, IN LPCWSTR lpName, + IN BOOL bCurrentUserOnly, IN LPSTR lpSystemCallErrors, IN DWORD dwSystemCallErrorsBufferSize) { @@ -171,6 +171,7 @@ PAL_CreateMutexW( nullptr, bInitialOwner, lpName == nullptr ? nullptr : utf8Name, + bCurrentUserOnly, &hMutex ); } @@ -238,6 +239,7 @@ CorUnix::InternalCreateMutex( LPSECURITY_ATTRIBUTES lpMutexAttributes, BOOL bInitialOwner, LPCSTR lpName, + BOOL bCurrentUserOnly, HANDLE *phMutex ) { @@ -317,7 +319,8 @@ CorUnix::InternalCreateMutex( SharedMemoryProcessDataHeader *processDataHeader; try { - processDataHeader = NamedMutexProcessData::CreateOrOpen(errors, lpName, !!bInitialOwner, &createdNamedMutex); + processDataHeader = + NamedMutexProcessData::CreateOrOpen(errors, lpName, !!bCurrentUserOnly, !!bInitialOwner, &createdNamedMutex); } catch (SharedMemoryException ex) { @@ -543,7 +546,7 @@ OpenMutexA ( goto OpenMutexAExit; } - palError = InternalOpenMutex(nullptr, pthr, lpName, &hMutex); + palError = InternalOpenMutex(nullptr, pthr, lpName, false /* bCurrentUserOnly */, &hMutex); OpenMutexAExit: if (NO_ERROR != palError) @@ -571,7 +574,7 @@ OpenMutexW( IN BOOL bInheritHandle, IN LPCWSTR lpName) { - return PAL_OpenMutexW(lpName, nullptr, 0); + return PAL_OpenMutexW(lpName, false /* bCurrentUserOnly */, nullptr, 0); } /*++ @@ -593,6 +596,7 @@ HANDLE PALAPI PAL_OpenMutexW( IN LPCWSTR lpName, + IN BOOL bCurrentUserOnly, IN LPSTR lpSystemCallErrors, IN DWORD dwSystemCallErrorsBufferSize) { @@ -612,10 +616,11 @@ PAL_OpenMutexW( /* validate parameters */ if (lpName == nullptr || + lpName[0] == W('\0') || (int)dwSystemCallErrorsBufferSize < 0 || (lpSystemCallErrors == nullptr) != (dwSystemCallErrorsBufferSize == 0)) { - ERROR("name is NULL or other parameters are invalid\n"); + ERROR("One or more parameters are invalid\n"); palError = ERROR_INVALID_PARAMETER; goto OpenMutexWExit; } @@ -643,7 +648,7 @@ PAL_OpenMutexW( } SharedMemorySystemCallErrors errors(lpSystemCallErrors, (int)dwSystemCallErrorsBufferSize); - palError = InternalOpenMutex(&errors, pthr, lpName == nullptr ? nullptr : utf8Name, &hMutex); + palError = InternalOpenMutex(&errors, pthr, lpName == nullptr ? nullptr : utf8Name, bCurrentUserOnly, &hMutex); } OpenMutexWExit: @@ -675,6 +680,7 @@ CorUnix::InternalOpenMutex( SharedMemorySystemCallErrors *errors, CPalThread *pthr, LPCSTR lpName, + BOOL bCurrentUserOnly, HANDLE *phMutex ) { @@ -711,7 +717,7 @@ CorUnix::InternalOpenMutex( SharedMemoryProcessDataHeader *processDataHeader; try { - processDataHeader = NamedMutexProcessData::Open(errors, lpName); + processDataHeader = NamedMutexProcessData::Open(errors, lpName, bCurrentUserOnly); } catch (SharedMemoryException ex) { @@ -1074,20 +1080,29 @@ const DWORD NamedMutexProcessData::PollLoopMaximumSleepMilliseconds = 100; SharedMemoryProcessDataHeader *NamedMutexProcessData::CreateOrOpen( SharedMemorySystemCallErrors *errors, LPCSTR name, + bool isUserScope, bool acquireLockIfCreated, bool *createdRef) { - return CreateOrOpen(errors, name, true /* createIfNotExist */, acquireLockIfCreated, createdRef); + return CreateOrOpen(errors, name, isUserScope, true /* createIfNotExist */, acquireLockIfCreated, createdRef); } -SharedMemoryProcessDataHeader *NamedMutexProcessData::Open(SharedMemorySystemCallErrors *errors, LPCSTR name) +SharedMemoryProcessDataHeader *NamedMutexProcessData::Open(SharedMemorySystemCallErrors *errors, LPCSTR name, bool isUserScope) { - return CreateOrOpen(errors, name, false /* createIfNotExist */, false /* acquireLockIfCreated */, nullptr /* createdRef */); + return + CreateOrOpen( + errors, + name, + isUserScope, + false /* createIfNotExist */, + false /* acquireLockIfCreated */, + nullptr /* createdRef */); } SharedMemoryProcessDataHeader *NamedMutexProcessData::CreateOrOpen( SharedMemorySystemCallErrors *errors, LPCSTR name, + bool isUserScope, bool createIfNotExist, bool acquireLockIfCreated, bool *createdRef) @@ -1153,7 +1168,8 @@ SharedMemoryProcessDataHeader *NamedMutexProcessData::CreateOrOpen( if (m_acquiredCreationDeletionFileLock) { - SharedMemoryManager::ReleaseCreationDeletionFileLock(); + _ASSERTE(m_processDataHeader != nullptr); + SharedMemoryManager::ReleaseCreationDeletionFileLock(m_processDataHeader->GetId()); } if (!m_cancel && m_processDataHeader != nullptr) @@ -1178,6 +1194,7 @@ SharedMemoryProcessDataHeader *NamedMutexProcessData::CreateOrOpen( SharedMemoryProcessDataHeader::CreateOrOpen( errors, name, + isUserScope, SharedMemorySharedDataHeader(SharedMemoryType::Mutex, SyncSystemVersion), sizeof(NamedMutexSharedData), createIfNotExist, @@ -1186,6 +1203,12 @@ SharedMemoryProcessDataHeader *NamedMutexProcessData::CreateOrOpen( { *createdRef = created; } + if (processDataHeader == nullptr) + { + _ASSERTE(!created); + _ASSERTE(!createIfNotExist); + return nullptr; + } if (created) { // If the shared memory file was created, the creation/deletion file lock would have been acquired so that we can @@ -1193,11 +1216,6 @@ SharedMemoryProcessDataHeader *NamedMutexProcessData::CreateOrOpen( _ASSERTE(SharedMemoryManager::IsCreationDeletionFileLockAcquired()); autoCleanup.m_acquiredCreationDeletionFileLock = true; } - if (processDataHeader == nullptr) - { - _ASSERTE(!createIfNotExist); - return nullptr; - } autoCleanup.m_processDataHeader = processDataHeader; if (created) @@ -1210,27 +1228,29 @@ SharedMemoryProcessDataHeader *NamedMutexProcessData::CreateOrOpen( { #if !NAMED_MUTEX_USE_PTHREAD_MUTEX // Create the lock files directory - SharedMemoryHelpers::BuildSharedFilesPath(lockFilePath, SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME); + const SharedMemoryId *id = processDataHeader->GetId(); + SharedMemoryHelpers::VerifyStringOperation( + lockFilePath.Set(*gSharedFilesPath) && + id->AppendRuntimeTempDirectoryName(lockFilePath) && + lockFilePath.Append('/') && lockFilePath.Append(SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME)); if (created) { - SharedMemoryHelpers::EnsureDirectoryExists(errors, lockFilePath, true /* isGlobalLockAcquired */); + SharedMemoryHelpers::EnsureDirectoryExists(errors, lockFilePath, id, true /* isGlobalLockAcquired */); } // Create the session directory - SharedMemoryId *id = processDataHeader->GetId(); - SharedMemoryHelpers::VerifyStringOperation(lockFilePath.Append('/')); - SharedMemoryHelpers::VerifyStringOperation(id->AppendSessionDirectoryName(lockFilePath)); + SharedMemoryHelpers::VerifyStringOperation(lockFilePath.Append('/') && id->AppendSessionDirectoryName(lockFilePath)); if (created) { - SharedMemoryHelpers::EnsureDirectoryExists(errors, lockFilePath, true /* isGlobalLockAcquired */); + SharedMemoryHelpers::EnsureDirectoryExists(errors, lockFilePath, id, true /* isGlobalLockAcquired */); autoCleanup.m_lockFilePath = &lockFilePath; autoCleanup.m_sessionDirectoryPathCharCount = lockFilePath.GetCount(); } // Create or open the lock file - SharedMemoryHelpers::VerifyStringOperation(lockFilePath.Append('/')); - SharedMemoryHelpers::VerifyStringOperation(lockFilePath.Append(id->GetName(), id->GetNameCharCount())); - int lockFileDescriptor = SharedMemoryHelpers::CreateOrOpenFile(errors, lockFilePath, created); + SharedMemoryHelpers::VerifyStringOperation( + lockFilePath.Append('/') && lockFilePath.Append(id->GetName(), id->GetNameCharCount())); + int lockFileDescriptor = SharedMemoryHelpers::CreateOrOpenFile(errors, lockFilePath, id, created); if (lockFileDescriptor == -1) { _ASSERTE(!created); @@ -1394,11 +1414,13 @@ void NamedMutexProcessData::Close(bool isAbruptShutdown, bool releaseSharedData) { // Delete the lock file, and the session directory if it's not empty PathCharString path; - SharedMemoryHelpers::BuildSharedFilesPath(path, SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME); - SharedMemoryId *id = m_processDataHeader->GetId(); - SharedMemoryHelpers::VerifyStringOperation(path.Append('/')); - SharedMemoryHelpers::VerifyStringOperation(id->AppendSessionDirectoryName(path)); - SharedMemoryHelpers::VerifyStringOperation(path.Append('/')); + const SharedMemoryId *id = m_processDataHeader->GetId(); + SharedMemoryHelpers::VerifyStringOperation( + path.Set(*gSharedFilesPath) && + id->AppendRuntimeTempDirectoryName(path) && + path.Append('/') && path.Append(SHARED_MEMORY_LOCK_FILES_DIRECTORY_NAME) && + path.Append('/') && id->AppendSessionDirectoryName(path) && + path.Append('/')); SIZE_T sessionDirectoryPathCharCount = path.GetCount(); SharedMemoryHelpers::VerifyStringOperation(path.Append(id->GetName(), id->GetNameCharCount())); unlink(path); @@ -1476,7 +1498,7 @@ MutexTryAcquireLockResult NamedMutexProcessData::TryAcquireLock(SharedMemorySyst DWORD startTime = 0; if (timeoutMilliseconds != static_cast(-1) && timeoutMilliseconds != 0) { - startTime = GetTickCount(); + startTime = (DWORD)minipal_lowres_ticks(); } // Acquire the process lock. A file lock can only be acquired once per file descriptor, so to synchronize the threads of @@ -1605,7 +1627,7 @@ MutexTryAcquireLockResult NamedMutexProcessData::TryAcquireLock(SharedMemorySyst // Poll for the file lock do { - DWORD elapsedMilliseconds = GetTickCount() - startTime; + DWORD elapsedMilliseconds = (DWORD)minipal_lowres_ticks() - startTime; if (elapsedMilliseconds >= timeoutMilliseconds) { return MutexTryAcquireLockResult::TimedOut; diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 82be2fb000d6..de8d063ea39e 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -316,6 +316,11 @@ typedef int __ptrace_request; ASSIGN_REG(R29) \ ASSIGN_REG(R30) +#elif defined(HOST_WASM) +#define ASSIGN_CONTROL_REGS \ + ASSERT("WASM does not have registers"); +#define ASSIGN_INTEGER_REGS \ + ASSERT("WASM does not have registers"); #else #error "Don't know how to assign registers on this architecture" #endif @@ -882,7 +887,7 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) dest = FPREG_Xstate_Egpr(native, &size); _ASSERT(size == (sizeof(DWORD64) * 16)); - memcpy_s(dest, sizeof(DWORD64) * 16, &lpContext->Egpr16, sizeof(DWORD64) * 16); + memcpy_s(dest, sizeof(DWORD64) * 16, &lpContext->R16, sizeof(DWORD64) * 16); } #endif // !TARGET_OSX } @@ -1238,7 +1243,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex { src = FPREG_Xstate_Egpr(native, &size); _ASSERT(size == (sizeof(DWORD64) * 16)); - memcpy_s(&lpContext->Egpr16, sizeof(DWORD64) * 16, src, sizeof(DWORD64) * 16); + memcpy_s(&lpContext->R16, sizeof(DWORD64) * 16, src, sizeof(DWORD64) * 16); lpContext->XStateFeaturesMask |= XSTATE_MASK_APX; } @@ -2183,6 +2188,8 @@ DBG_FlushInstructionCache( #endif syscall(__NR_riscv_flush_icache, (char *)lpBaseAddress, (char *)((INT_PTR)lpBaseAddress + dwSize), 0 /* all harts */); +#elif defined(HOST_WASM) + // do nothing, no instruction cache to flush #elif defined(HOST_APPLE) && !defined(HOST_OSX) sys_icache_invalidate((void *)lpBaseAddress, dwSize); #else @@ -2197,18 +2204,20 @@ CONTEXT& CONTEXT::operator=(const CONTEXT& ctx) size_t copySize; if (ctx.ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) { - if ((ctx.XStateFeaturesMask & XSTATE_MASK_APX) == XSTATE_MASK_APX) - { - copySize = sizeof(CONTEXT); - } - else if ((ctx.XStateFeaturesMask & XSTATE_MASK_AVX512) == XSTATE_MASK_AVX512) + if ((ctx.XStateFeaturesMask & XSTATE_MASK_AVX512) == XSTATE_MASK_AVX512) { - copySize = offsetof(CONTEXT, Egpr16); + copySize = offsetof(CONTEXT, R16); } else { copySize = offsetof(CONTEXT, KMask0); } + + if ((ctx.XStateFeaturesMask & XSTATE_MASK_APX) == XSTATE_MASK_APX) + { + // Copy APX EGPRs separately. + memcpy(&(this->R16), &(ctx.R16), sizeof(DWORD64) * 16); + } } else { diff --git a/src/coreclr/pal/src/thread/process.cpp b/src/coreclr/pal/src/thread/process.cpp index 20c09eb357dd..c56cc0637dcd 100644 --- a/src/coreclr/pal/src/thread/process.cpp +++ b/src/coreclr/pal/src/thread/process.cpp @@ -25,7 +25,6 @@ SET_DEFAULT_DEBUG_CHANNEL(PROCESS); // some headers have code with asserts, so d #include "pal/palinternal.h" #include "pal/process.h" #include "pal/init.h" -#include "pal/critsect.h" #include "pal/debug.h" #include "pal/utils.h" #include "pal/environ.h" @@ -69,6 +68,9 @@ SET_DEFAULT_DEBUG_CHANNEL(PROCESS); // some headers have code with asserts, so d #define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__) #elif HAVE_SYS_MEMBARRIER_H #include +#ifdef TARGET_BROWSER +#define membarrier(cmd, flags, cpu_id) 0 // browser/wasm is currently single threaded +#endif #endif #ifdef __APPLE__ @@ -155,7 +157,7 @@ IPalObject* CorUnix::g_pobjProcess; // Critical section that protects process data (e.g., the // list of active threads)/ // -CRITICAL_SECTION g_csProcess; +minipal_mutex g_csProcess; // // List and count of active threads @@ -510,6 +512,9 @@ CorUnix::InternalCreateProcess( LPPROCESS_INFORMATION lpProcessInformation ) { +#ifdef TARGET_TVOS + return ERROR_NOT_SUPPORTED; +#else PAL_ERROR palError = NO_ERROR; IPalObject *pobjProcess = NULL; IPalObject *pobjProcessRegistered = NULL; @@ -1046,6 +1051,7 @@ CorUnix::InternalCreateProcess( } return palError; +#endif // !TARGET_TVOS } @@ -2177,7 +2183,7 @@ PROCCreateCrashDump( INT cbErrorMessageBuffer, bool serialize) { -#if defined(TARGET_IOS) +#if defined(TARGET_IOS) || defined(TARGET_TVOS) return FALSE; #else _ASSERTE(argv.size() > 0); @@ -2307,7 +2313,7 @@ PROCCreateCrashDump( } } return true; -#endif // !TARGET_IOS +#endif // !TARGET_IOS && !TARGET_TVOS } /*++ @@ -2449,6 +2455,16 @@ PAL_GenerateCoreDump( (no return value) --*/ +#ifdef HOST_ANDROID +#include +VOID +PROCCreateCrashDumpIfEnabled(int signal, siginfo_t* siginfo, bool serialize) +{ + // TODO: Dump all managed threads callstacks into logcat and/or file? + // TODO: Dump stress log into logcat and/or file when enabled? + minipal_log_write_fatal("Aborting process.\n"); +} +#else VOID PROCCreateCrashDumpIfEnabled(int signal, siginfo_t* siginfo, bool serialize) { @@ -2517,6 +2533,7 @@ PROCCreateCrashDumpIfEnabled(int signal, siginfo_t* siginfo, bool serialize) free(signalAddressArg); } } +#endif /*++ Function: @@ -2769,14 +2786,14 @@ CorUnix::InitializeProcessData( pGThreadList = NULL; g_dwThreadCount = 0; - InternalInitializeCriticalSection(&g_csProcess); + minipal_mutex_init(&g_csProcess); fLockInitialized = TRUE; if (NO_ERROR != palError) { if (fLockInitialized) { - InternalDeleteCriticalSection(&g_csProcess); + minipal_mutex_destroy(&g_csProcess); } } @@ -2822,7 +2839,7 @@ CorUnix::InitializeProcessCommandLine( ERROR("Invalid full path\n"); palError = ERROR_INTERNAL_ERROR; goto exit; - } + } lpwstr[0] = '\0'; size_t n = PAL_wcslen(lpwstrFullPath) + 1; @@ -2993,7 +3010,7 @@ PROCCleanupInitialProcess(VOID) { CPalThread *pThread = InternalGetCurrentThread(); - InternalEnterCriticalSection(pThread, &g_csProcess); + minipal_mutex_enter(&g_csProcess); /* Free the application directory */ free(g_lpwstrAppDir); @@ -3001,7 +3018,7 @@ PROCCleanupInitialProcess(VOID) /* Free the stored command line */ free(g_lpwstrCmdLine); - InternalLeaveCriticalSection(pThread, &g_csProcess); + minipal_mutex_leave(&g_csProcess); // // Object manager shutdown will handle freeing the underlying @@ -3029,7 +3046,7 @@ CorUnix::PROCAddThread( { /* protect the access of the thread list with critical section for mutithreading access */ - InternalEnterCriticalSection(pCurrentThread, &g_csProcess); + minipal_mutex_enter(&g_csProcess); pTargetThread->SetNext(pGThreadList); pGThreadList = pTargetThread; @@ -3038,7 +3055,7 @@ CorUnix::PROCAddThread( TRACE("Thread 0x%p (id %#x) added to the process thread list\n", pTargetThread, pTargetThread->GetThreadId()); - InternalLeaveCriticalSection(pCurrentThread, &g_csProcess); + minipal_mutex_leave(&g_csProcess); } @@ -3064,7 +3081,7 @@ CorUnix::PROCRemoveThread( /* protect the access of the thread list with critical section for mutithreading access */ - InternalEnterCriticalSection(pCurrentThread, &g_csProcess); + minipal_mutex_enter(&g_csProcess); curThread = pGThreadList; @@ -3105,7 +3122,7 @@ CorUnix::PROCRemoveThread( WARN("Thread %p not removed (it wasn't found in the list)\n", pTargetThread); EXIT: - InternalLeaveCriticalSection(pCurrentThread, &g_csProcess); + minipal_mutex_leave(&g_csProcess); } @@ -3150,7 +3167,7 @@ PROCProcessLock( CPalThread * pThread = (PALIsThreadDataInitialized() ? InternalGetCurrentThread() : NULL); - InternalEnterCriticalSection(pThread, &g_csProcess); + minipal_mutex_enter(&g_csProcess); } @@ -3174,7 +3191,7 @@ PROCProcessUnlock( CPalThread * pThread = (PALIsThreadDataInitialized() ? InternalGetCurrentThread() : NULL); - InternalLeaveCriticalSection(pThread, &g_csProcess); + minipal_mutex_leave(&g_csProcess); } #if USE_SYSV_SEMAPHORES diff --git a/src/coreclr/pal/src/thread/thread.cpp b/src/coreclr/pal/src/thread/thread.cpp index 73203bfa3eff..fe48f04dfe63 100644 --- a/src/coreclr/pal/src/thread/thread.cpp +++ b/src/coreclr/pal/src/thread/thread.cpp @@ -17,7 +17,6 @@ SET_DEFAULT_DEBUG_CHANNEL(THREAD); // some headers have code with asserts, so do #include "pal/thread.hpp" #include "pal/mutex.hpp" #include "pal/handlemgr.hpp" -#include "pal/cs.hpp" #include "pal/seh.hpp" #include "pal/signal.hpp" @@ -134,7 +133,7 @@ static void InternalEndCurrentThreadWrapper(void *arg) will lock its own critical section */ LOADCallDllMain(DLL_THREAD_DETACH, NULL); -#if !HAVE_MACH_EXCEPTIONS +#if !HAVE_MACH_EXCEPTIONS && HAVE_SIGALTSTACK pThread->FreeSignalAlternateStack(); #endif // !HAVE_MACH_EXCEPTIONS @@ -794,10 +793,6 @@ CorUnix::InternalEndCurrentThread( PAL_ERROR palError = NO_ERROR; ISynchStateController *pSynchStateController = NULL; -#ifdef PAL_PERF - PERFDisableThreadProfile(UserCreatedThread != pThread->GetThreadType()); -#endif - // // Abandon any objects owned by this thread // @@ -1552,7 +1547,7 @@ CPalThread::ThreadEntry( } #endif // HAVE_SCHED_GETAFFINITY && HAVE_SCHED_SETAFFINITY -#if !HAVE_MACH_EXCEPTIONS +#if !HAVE_MACH_EXCEPTIONS && HAVE_SIGALTSTACK if (!pThread->EnsureSignalAlternateStack()) { ASSERT("Cannot allocate alternate stack for SIGSEGV!\n"); @@ -1617,11 +1612,6 @@ CPalThread::ThreadEntry( LOADCallDllMain(DLL_THREAD_ATTACH, NULL); } -#ifdef PAL_PERF - PERFAllocThreadInfo(); - PERFEnableThreadProfile(UserCreatedThread != pThread->GetThreadType()); -#endif - /* call the startup routine */ pfnStartRoutine = pThread->GetStartAddress(); pvPar = pThread->GetStartParameter(); @@ -2049,7 +2039,7 @@ CPalThread::RunPreCreateInitializers( // First, perform initialization of CPalThread private members // - InternalInitializeCriticalSection(&m_csLock); + minipal_mutex_init(&m_mtxLock); m_fLockInitialized = TRUE; iError = pthread_mutex_init(&m_startMutex, NULL); @@ -2108,7 +2098,7 @@ CPalThread::~CPalThread() if (m_fLockInitialized) { - InternalDeleteCriticalSection(&m_csLock); + minipal_mutex_destroy(&m_mtxLock); } if (m_fStartItemsInitialized) @@ -2275,7 +2265,7 @@ CPalThread::WaitForStartStatus( return m_fStartStatus; } -#if !HAVE_MACH_EXCEPTIONS +#if !HAVE_MACH_EXCEPTIONS && HAVE_SIGALTSTACK /*++ Function : EnsureSignalAlternateStack diff --git a/src/coreclr/pal/tests/palsuite/CMakeLists.txt b/src/coreclr/pal/tests/palsuite/CMakeLists.txt index cc0fec6d2aa7..a63387917112 100644 --- a/src/coreclr/pal/tests/palsuite/CMakeLists.txt +++ b/src/coreclr/pal/tests/palsuite/CMakeLists.txt @@ -48,15 +48,6 @@ add_executable_clr(paltests #composite/object_management/semaphore/nonshared/semaphore.cpp #composite/object_management/semaphore/shared/main.cpp #composite/object_management/semaphore/shared/semaphore.cpp - #composite/synchronization/criticalsection/criticalsection.cpp - #composite/synchronization/criticalsection/mainWrapper.cpp - #composite/synchronization/nativecriticalsection/mtx_critsect.cpp - #composite/synchronization/nativecriticalsection/pal_composite_native_cs.cpp - #composite/synchronization/nativecriticalsection/resultbuffer.cpp - #composite/synchronization/nativecs_interlocked/interlocked.cpp - #composite/synchronization/nativecs_interlocked/mtx_critsect.cpp - #composite/synchronization/nativecs_interlocked/pal_composite_native_cs.cpp - #composite/synchronization/nativecs_interlocked/resultbuffer.cpp #composite/wfmo/main.cpp #composite/wfmo/mutex.cpp c_runtime/atof/test1/test1.cpp @@ -257,24 +248,11 @@ add_executable_clr(paltests filemapping_memmgt/VirtualProtect/test6/VirtualProtect.cpp filemapping_memmgt/VirtualProtect/test7/VirtualProtect.cpp filemapping_memmgt/VirtualQuery/test1/VirtualQuery.cpp - file_io/CopyFileA/test1/CopyFileA.cpp - file_io/CopyFileA/test2/test2.cpp - file_io/CopyFileA/test3/test3.cpp - file_io/CopyFileA/test4/test4.cpp - file_io/CopyFileW/test1/CopyFileW.cpp - file_io/CopyFileW/test2/test2.cpp - file_io/CopyFileW/test3/test3.cpp file_io/CreateFileA/test1/CreateFileA.cpp file_io/CreateFileW/test1/CreateFileW.cpp file_io/errorpathnotfound/test1/test1.cpp - file_io/errorpathnotfound/test2/test2.cpp file_io/FILECanonicalizePath/FILECanonicalizePath.cpp file_io/FlushFileBuffers/test1/FlushFileBuffers.cpp - file_io/GetConsoleOutputCP/test1/GetConsoleOutputCP.cpp - file_io/GetFileAttributesA/test1/GetFileAttributesA.cpp - file_io/GetFileAttributesExW/test1/test1.cpp - file_io/GetFileAttributesExW/test2/test2.cpp - file_io/GetFileAttributesW/test1/GetFileAttributesW.cpp file_io/GetFileSize/test1/GetFileSize.cpp file_io/GetFileSizeEx/test1/GetFileSizeEx.cpp file_io/GetFullPathNameA/test1/GetFullPathNameA.cpp @@ -289,12 +267,6 @@ add_executable_clr(paltests file_io/GetStdHandle/test2/GetStdHandle.cpp file_io/GetSystemTime/test1/test.cpp file_io/GetSystemTimeAsFileTime/test1/GetSystemTimeAsFileTime.cpp - file_io/GetTempFileNameA/test1/GetTempFileNameA.cpp - file_io/GetTempFileNameA/test2/GetTempFileNameA.cpp - file_io/GetTempFileNameA/test3/gettempfilenamea.cpp - file_io/GetTempFileNameW/test1/GetTempFileNameW.cpp - file_io/GetTempFileNameW/test2/GetTempFileNameW.cpp - file_io/GetTempFileNameW/test3/gettempfilenamew.cpp file_io/gettemppatha/test1/gettemppatha.cpp file_io/GetTempPathW/test1/GetTempPathW.cpp file_io/ReadFile/test1/ReadFile.cpp @@ -329,7 +301,6 @@ add_executable_clr(paltests loader/LoadLibraryW/test5/loadlibraryw.cpp #locale_info/CompareStringA/test1/test1.cpp #locale_info/CompareStringW/test1/test1.cpp - locale_info/GetACP/test1/test1.cpp #locale_info/GetLocaleInfoW/test1/test1.cpp #locale_info/GetLocaleInfoW/test2/test2.cpp locale_info/MultiByteToWideChar/test1/test1.cpp @@ -368,7 +339,6 @@ add_executable_clr(paltests miscellaneous/GetEnvironmentVariableW/test6/test6.cpp miscellaneous/GetLastError/test1/test.cpp miscellaneous/GetSystemInfo/test1/test.cpp - miscellaneous/GetTickCount/test1/test.cpp miscellaneous/InterlockedCompareExchange/test1/test.cpp miscellaneous/InterlockedCompareExchange/test2/test.cpp miscellaneous/InterlockedCompareExchange64/test1/test.cpp @@ -391,8 +361,6 @@ add_executable_clr(paltests #miscellaneous/IsBadWritePtr/test1/test.cpp #miscellaneous/IsBadWritePtr/test2/test2.cpp #miscellaneous/IsBadWritePtr/test3/test3.cpp - miscellaneous/queryperformancecounter/test1/test1.cpp - miscellaneous/queryperformancefrequency/test1/test1.cpp miscellaneous/SetEnvironmentVariableA/test1/test1.cpp miscellaneous/SetEnvironmentVariableA/test2/test2.cpp miscellaneous/SetEnvironmentVariableA/test3/test3.cpp @@ -402,7 +370,6 @@ add_executable_clr(paltests miscellaneous/SetEnvironmentVariableW/test3/test3.cpp miscellaneous/SetEnvironmentVariableW/test4/test4.cpp miscellaneous/SetLastError/test1/test.cpp -# pal_specific/PAL_GetUserTempDirectoryW/test1/PAL_GetUserTempDirectoryW.cpp pal_specific/PAL_Initialize_Terminate/test1/PAL_Initialize_Terminate.cpp pal_specific/PAL_Initialize_Terminate/test2/pal_initialize_twice.cpp # pal_specific/PAL_RegisterLibraryW_UnregisterLibraryW/test1/PAL_RegisterLibraryW_UnregisterLibraryW.cpp @@ -424,13 +391,6 @@ add_executable_clr(paltests threading/CreateThread/test1/test1.cpp threading/CreateThread/test2/test2.cpp threading/CreateThread/test3/test3.cpp - threading/CriticalSectionFunctions/test1/InitializeCriticalSection.cpp - threading/CriticalSectionFunctions/test2/test2.cpp - threading/CriticalSectionFunctions/test4/test4.cpp - threading/CriticalSectionFunctions/test5/test5.cpp - threading/CriticalSectionFunctions/test6/test6.cpp - threading/CriticalSectionFunctions/test7/test7.cpp - threading/CriticalSectionFunctions/test8/test8.cpp threading/DuplicateHandle/test1/test1.cpp threading/DuplicateHandle/test10/test10.cpp threading/DuplicateHandle/test11/childprocess.cpp @@ -523,3 +483,7 @@ target_link_libraries(paltests install (TARGETS paltests DESTINATION paltests COMPONENT paltests EXCLUDE_FROM_ALL) add_dependencies(paltests_install paltests) install (PROGRAMS runpaltests.sh runpaltestshelix.sh DESTINATION paltests COMPONENT paltests EXCLUDE_FROM_ALL) + +if(CLR_CMAKE_HOST_BROWSER) + install(FILES wasm/index.html paltestlist.txt DESTINATION ${CMAKE_CURRENT_BINARY_DIR} COMPONENT paltests EXCLUDE_FROM_ALL) +endif(CLR_CMAKE_HOST_BROWSER) diff --git a/src/coreclr/pal/tests/palsuite/DisabledTests.txt b/src/coreclr/pal/tests/palsuite/DisabledTests.txt index 0f7094c4a8e8..83b9bcc843ca 100644 --- a/src/coreclr/pal/tests/palsuite/DisabledTests.txt +++ b/src/coreclr/pal/tests/palsuite/DisabledTests.txt @@ -15,21 +15,6 @@ The above testcases were disabled in the palsuite, because they depend heavily o WaitForDebugEvent,DebugActiveProcess and ContinueDebugEvent, where these api's have been removed from the PAL. - -file_io/gettempfilenamea/test2 : -======================================= -This test takes longer than 60 seconds to run. The test creates -about 65000 files and then deletes them. The test that takes longer -than 60 seconds will be flagged as an error and so in such a case -the test will have to be run manually. - -file_io/gettempfilenamew/test2 : -======================================= -This test takes longer than 60 seconds to run. The test creates -about 65000 files and then deletes them. The test that takes longer -than 60 seconds will be flagged as an error and so in such a case -the test will have to be run manually. - locale_info/getcpinfo/test2: ======================================= This test will be useful in future versions for testing various diff --git a/src/coreclr/pal/tests/palsuite/README.txt b/src/coreclr/pal/tests/palsuite/README.txt index 84a297538f7a..a8bfb707f3a5 100644 --- a/src/coreclr/pal/tests/palsuite/README.txt +++ b/src/coreclr/pal/tests/palsuite/README.txt @@ -9,14 +9,9 @@ 2. RUNNING THE SUITES 3. ENVIRONMENT VARIABLES AND AUTOMATED TESTING SPECIFICS -3.1 PAL_DISABLE_MESSAGEBOX -3.2 Other Notes 4. ADDITIONAL NOTES ON TESTING/SPECIFIC TEST CASE ISSUES File_IO: getfilesize/test1, setfilepointer/test(5,6,7) - File_IO: gettempfilename(a,w)/test2 - File_IO: setfileattributesa/test(1,4), setfileattributesw/test(1,4) - Miscellaneous: messageboxw/test(1,2) Pal_specific:: pal_get_stdin/test1, pal_get_stdout/test1, pal_get_stderr/test1 Threading: setconsolectrlhandler/test(3,4) @@ -45,7 +40,6 @@ The results are logged to %ROTOR_DIR%\tests\pvtResults.log. See notes in section 4 on the following test cases if running automated tests: - Miscellaneous: messageboxw/test(1,2) Threading: setconsolectrlhandler/test(3,4) @@ -59,20 +53,6 @@ ample disk space. On systems with less than 6Gb free disk space expect these test cases to fail. -File_IO: gettempfilename(a,w)/test2 - -These test cases take longer than 60 seconds to run. Currently, the Test -Harness will timeout any test case that exceeds 60 seconds. - - -Miscellaneous: messageboxw/test(1,2) - -Setting PAL_MESSAGEBOX_DISABLE=1 for these test cases prevents message box pop -ups that occur during the tests' execution on Windows. For automated testing -where user interaction is not desired/possible, setting this environment -variable will prevent a pause in the automated test run. - - ic: pal_get_stdin/test1, pal_get_stdout/test1, pal_get_stderr/test1 These test cases should be manually inspected to ensure the information being returned diff --git a/src/coreclr/pal/tests/palsuite/common/palsuite.cpp b/src/coreclr/pal/tests/palsuite/common/palsuite.cpp index 34972558853c..6d05884b5c28 100644 --- a/src/coreclr/pal/tests/palsuite/common/palsuite.cpp +++ b/src/coreclr/pal/tests/palsuite/common/palsuite.cpp @@ -12,13 +12,13 @@ #include "palsuite.h" +#include "minipal/time.h" const char* szTextFile = "text.txt"; HANDLE hToken[NUM_TOKENS]; -CRITICAL_SECTION CriticalSection; -WCHAR* convert(const char * aString) +WCHAR* convert(const char * aString) { WCHAR* wideBuffer = nullptr; @@ -36,7 +36,7 @@ WCHAR* convert(const char * aString) return wideBuffer; } -char* convertC(const WCHAR * wString) +char* convertC(const WCHAR * wString) { int size; char * MultiBuffer = NULL; @@ -51,17 +51,6 @@ char* convertC(const WCHAR * wString) return MultiBuffer; } -UINT64 GetHighPrecisionTimeStamp(LARGE_INTEGER performanceFrequency) -{ - LARGE_INTEGER ts; - if (!QueryPerformanceCounter(&ts)) - { - Fail("ERROR: Unable to query performance counter!\n"); - } - - return ts.QuadPart / (performanceFrequency.QuadPart / 1000); -} - static const char* rgchPathDelim = "/"; @@ -78,8 +67,8 @@ mkAbsoluteFilename( LPSTR dirName, sizeFN = strlen( fileName ); sizeAPN = (sizeDN + 1 + sizeFN + 1); - /* ensure ((dirName + DELIM + fileName + \0) =< _MAX_PATH ) */ - if( sizeAPN > _MAX_PATH ) + /* ensure ((dirName + DELIM + fileName + \0) =< MAX_PATH ) */ + if( sizeAPN > MAX_PATH ) { return ( 0 ); } @@ -115,17 +104,17 @@ BOOL Cleanup(HANDLE *hArray, DWORD dwIndex) while (--dwIndex > 0) { - bCHRet = CleanupHelper(&hArray[0], dwIndex); + bCHRet = CleanupHelper(&hArray[0], dwIndex); } - + bCRet = CloseHandle(hArray[0]); if (!bCRet) { Trace("PALSUITE ERROR: Unable to execute CloseHandle(%p) during " "clean up.\nGetLastError returned '%u'.\n", hArray[dwIndex], - GetLastError()); + GetLastError()); } - + return (bCRet&&bCHRet); } @@ -138,11 +127,11 @@ BOOL Cleanup(HANDLE *hArray, DWORD dwIndex) * Returns: The number of wide characters in the resulting string. * 0 is returned on Error. */ -int -mkAbsoluteFilenameW ( - LPWSTR dirName, - DWORD dwDirLength, - LPCWSTR fileName, +int +mkAbsoluteFilenameW ( + LPWSTR dirName, + DWORD dwDirLength, + LPCWSTR fileName, DWORD dwFileLength, LPWSTR absPathName ) { @@ -154,19 +143,19 @@ mkAbsoluteFilenameW ( sizeFN = wcslen( fileName ); sizeAPN = (sizeDN + 1 + sizeFN + 1); - /* ensure ((dirName + DELIM + fileName + \0) =< _MAX_PATH ) */ - if ( sizeAPN > _MAX_PATH ) + /* ensure ((dirName + DELIM + fileName + \0) =< MAX_PATH ) */ + if ( sizeAPN > MAX_PATH ) { return ( 0 ); } - + wcsncpy(absPathName, dirName, dwDirLength +1); wcsncpy(absPathName, szPathDelimW, 2); wcsncpy(absPathName, fileName, dwFileLength +1); return (sizeAPN); -} +} /* * Take two wide strings representing file and directory names @@ -177,11 +166,11 @@ mkAbsoluteFilenameW ( * Returns: The number of wide characters in the resulting string. * 0 is returned on Error. */ -int -mkAbsoluteFilenameA ( - LPSTR dirName, - DWORD dwDirLength, - LPCSTR fileName, +int +mkAbsoluteFilenameA ( + LPSTR dirName, + DWORD dwDirLength, + LPCSTR fileName, DWORD dwFileLength, LPSTR absPathName ) { @@ -190,24 +179,24 @@ mkAbsoluteFilenameA ( DWORD sizeDN; DWORD sizeFN; DWORD sizeAPN; - + sizeDN = strlen( dirName ); sizeFN = strlen( fileName ); sizeAPN = (sizeDN + 1 + sizeFN + 1); - - /* ensure ((dirName + DELIM + fileName + \0) =< _MAX_PATH ) */ - if ( sizeAPN > _MAX_PATH ) + + /* ensure ((dirName + DELIM + fileName + \0) =< MAX_PATH ) */ + if ( sizeAPN > MAX_PATH ) { return ( 0 ); } - + strncpy(absPathName, dirName, dwDirLength +1); strcat(absPathName, szPathDelimA); strcat(absPathName, fileName); - + return (sizeAPN); - -} + +} BOOL DeleteFileW( @@ -215,7 +204,7 @@ DeleteFileW( { _ASSERTE(lpFileName != NULL); - CHAR mbFileName[ _MAX_PATH ]; + CHAR mbFileName[ MAX_PATH ]; if (WideCharToMultiByte( CP_ACP, 0, lpFileName, -1, mbFileName, sizeof(mbFileName), NULL, NULL ) != 0 ) { @@ -223,4 +212,4 @@ DeleteFileW( } return FALSE; -} \ No newline at end of file +} diff --git a/src/coreclr/pal/tests/palsuite/common/palsuite.h b/src/coreclr/pal/tests/palsuite/common/palsuite.h index 9494daed71be..8e7336ca8e59 100644 --- a/src/coreclr/pal/tests/palsuite/common/palsuite.h +++ b/src/coreclr/pal/tests/palsuite/common/palsuite.h @@ -25,6 +25,7 @@ typedef unsigned short char16_t; #include #include #include +#include #include #define PALTEST(testfunc, testname) \ @@ -81,71 +82,6 @@ struct PALTest } }; -#ifdef PAL_PERF - -int __cdecl Test_Main(int argc, char **argv); -int PAL_InitializeResult = 0; -static const char PALTEST_LOOP_ENV[]="PALTEST_LOOP_COUNT"; - -int __cdecl main(int argc, char **argv) -{ - int lastMainResult=0; - - int loopCount=1; // default: run the test's main once - int loopIndex=0; - char *szPerfLoopEnv = NULL; - - // Run PAL_Initialize once, save off the result. Any failures here - // will be detected later by calls to PAL_Initialize in the test's main. - PAL_InitializeResult = PAL_Initialize(argc, argv); - - // Check the environment to see if we need to run the test's main - // multiple times. Ideally, we want to do this before PAL_Initialize so - // that the overhead of checking the environment is not included in the - // time between PAL_Initialize and PAL_Terminate. However, getenv in PAL - // can be run only after PAL_Initialize. - szPerfLoopEnv = getenv(PALTEST_LOOP_ENV); - if (szPerfLoopEnv != NULL) - { - loopCount = atoi(szPerfLoopEnv); - if (loopCount <= 0) loopCount = 1; - } - - // call the test's actual main in a loop - for(loopIndex=0; loopIndex -#include "resultbuffer.h" - -//Global Variables -DWORD dwThreadId; -long long GLOBAL_COUNTER ; -HANDLE g_hEvent; - -/* Test Input Variables */ -unsigned int USE_PROCESS_COUNT = 0; -unsigned int THREAD_COUNT = 0; -unsigned int REPEAT_COUNT = 0; -unsigned int SLEEP_LENGTH = 0; -unsigned int RELATION_ID = 0; - - -CRITICAL_SECTION CriticalSectionM; /* Critical Section Object (used as mutex) */ - - -/* Capture statistics for each worker thread */ -struct statistics{ - unsigned int processId; - unsigned int operationsFailed; - unsigned int operationsPassed; - unsigned int operationsTotal; - DWORD operationTime; //Milliseconds - unsigned int relationId; -}; - - -/*Capture Statistics at a Process level*/ -struct processStatistics{ - unsigned int processId; - DWORD operationTime; //Milliseconds - unsigned int relationId; -}; - - -ResultBuffer *resultBuffer; - -//function declarations -int GetParameters( int , char **); -void setup (void); -void cleanup(void); -void incrementCounter(void); -DWORD PALAPI enterandleavecs( LPVOID ); - - -/* -*Setup for the test case -*/ - -VOID -setup(VOID) -{ - -g_hEvent = CreateEvent(NULL,TRUE,FALSE, NULL); -if(g_hEvent == NULL) -{ - Fail("Create Event Failed\n" - "GetLastError returned %d\n", GetLastError()); -} - -GLOBAL_COUNTER=0; -/* -* Create mutual exclusion mechanisms -*/ -InitializeCriticalSection ( &CriticalSectionM ); - -} - - -/* -* Cleanup for the test case -*/ -VOID -cleanup(VOID) -{ - /* - * Clean up Critical Section object - */ - DeleteCriticalSection(&CriticalSectionM); - PAL_Terminate(); -} - - -/*function that increments a counter*/ -VOID -incrementCounter(VOID) -{ - - if (INT_MAX==GLOBAL_COUNTER) - GLOBAL_COUNTER=0; - - GLOBAL_COUNTER++; - -} - -/* - * Enter and Leave Critical Section - */ -DWORD -PALAPI -enterandleavecs( LPVOID lpParam ) -{ - - struct statistics stats; - int loopcount = REPEAT_COUNT; - int i; - DWORD dwStart =0; - - int Id=(int)lpParam; - - //initialize structure to hold thread level statistics - stats.relationId = RELATION_ID; - stats.processId = USE_PROCESS_COUNT; - stats.operationsFailed = 0; - stats.operationsPassed = 0; - stats.operationsTotal = 0; - stats.operationTime = 0; - - //Wait for main thread to signal event - if (WAIT_OBJECT_0 != WaitForSingleObject(g_hEvent,INFINITE)) - { - Fail ("readfile: Wait for Single Object (g_hEvent) failed. Failing test.\n" - "GetLastError returned %d\n", GetLastError()); - } - - //Collect operation start time - dwStart = GetTickCount(); - - //Operation starts loopcount times - for(i = 0; i < loopcount; i++) - { - - EnterCriticalSection(&CriticalSectionM); - /* - *Do Some Thing once you enter critical section - */ - incrementCounter(); - LeaveCriticalSection(&CriticalSectionM); - - stats.operationsPassed++; - stats.operationsTotal++; - } - //collect operation end time - stats.operationTime = GetTickCount() - dwStart; - - /*Trace("\n\n\n\nOperation Time %d\n", stats.operationTime); - Trace("Operation Passed %d\n", stats.operationsPassed); - Trace("Operation Total %d\n", stats.operationsTotal); - Trace("Operation Failed %d\n", stats.operationsFailed); */ - - if(resultBuffer->LogResult(Id, (char *)&stats)) - { - Fail("Error while writing to shared memory, Thread Id is[%d] and Process id is [%d]\n", Id, USE_PROCESS_COUNT); - } - - - return 0; -} - - -PALTEST(composite_synchronization_criticalsection_paltest_synchronization_criticalsection, "composite/synchronization/criticalsection/paltest_synchronization_criticalsection") -{ - -/* -* Parameter to the threads that will be created -*/ -DWORD dwThrdParam = 0; -HANDLE hThread[64]; -unsigned int i = 0; -DWORD dwStart; - -/* Variables to capture the file name and the file pointer*/ -char fileName[MAX_PATH_FNAME]; -char processFileName[MAX_PATH_FNAME]; -FILE *hFile,*hProcessFile; -struct processStatistics processStats; - -struct statistics* buffer; -int statisticsSize = 0; - -/* -* PAL Initialize -*/ -if(0 != (PAL_Initialize(argc, argv))) - { - return FAIL; - } - -if(GetParameters(argc, argv)) - { - Fail("Error in obtaining the parameters\n"); - } - - -/*setup file for process result collection */ -_snprintf(processFileName, MAX_PATH_FNAME, "%d_process_criticalsection_%d_.txt", USE_PROCESS_COUNT, RELATION_ID); -hProcessFile = fopen(processFileName, "w+"); -if(hProcessFile == NULL) - { - Fail("Error in opening file to write process results for process [%d]\n", USE_PROCESS_COUNT); - } - -//Initialize Process Stats Variables -processStats.operationTime = 0; -processStats.processId = USE_PROCESS_COUNT; -processStats.relationId = RELATION_ID; //Will change later - -//Start Process Time Capture -dwStart = GetTickCount(); - -//setup file for thread result collection -statisticsSize = sizeof(struct statistics); -_snprintf(fileName, MAX_PATH_FNAME, "%d_thread_criticalsection_%d_.txt", USE_PROCESS_COUNT, RELATION_ID); -hFile = fopen(fileName, "w+"); -if(hFile == NULL) -{ - Fail("Error in opening file for write for process [%d]\n", USE_PROCESS_COUNT); -} - -// For each thread we will log operations failed (int), passed (int), total (int) -// and number of ticks (DWORD) for the operations -resultBuffer = new ResultBuffer( THREAD_COUNT, statisticsSize); - -/* -* Call the Setup Routine -*/ -setup(); - -//Create Thread Count Worker Threads - -while (i< THREAD_COUNT) -{ - dwThrdParam = i; - - hThread[i] = CreateThread( - NULL, - 0, - enterandleavecs, - (LPVOID)dwThrdParam, - 0, - &dwThreadId); - - if ( NULL == hThread[i] ) - { - Fail ( "CreateThread() returned NULL. Failing test.\n" - "GetLastError returned %d\n", GetLastError()); - } - i++; -} - -/* -* Set Event to signal all threads to start using the CS -*/ - -if (0==SetEvent(g_hEvent)) -{ - Fail ( "SetEvent returned Zero. Failing test.\n" - "GetLastError returned %d\n", GetLastError()); -} - -/* - * Wait for worker threads to complete - * - */ -if ( WAIT_OBJECT_0 != WaitForMultipleObjects (THREAD_COUNT,hThread,TRUE, INFINITE)) -{ - Fail ( "WaitForMultipleObject Failed. Failing test.\n" - "GetLastError returned %d\n", GetLastError()); -} - - -//Get the end time of the process -processStats.operationTime = GetTickCount() - dwStart; - -//Write Process Result Contents to File -if(hProcessFile!= NULL) - { - fprintf(hProcessFile, "%d,%lu,%d\n", processStats.processId, processStats.operationTime, processStats.relationId ); - } - -if (0!=fclose(hProcessFile)) -{ - Fail("Unable to write process results to file" - "GetLastError returned %d\n", GetLastError()); -} - - -/*Write Threads Results to a file*/ -if(hFile!= NULL) -{ - for( i = 0; i < THREAD_COUNT; i++ ) - { - buffer = (struct statistics *)resultBuffer->getResultBuffer(i); - fprintf(hFile, "%d,%d,%d,%d,%lu,%d\n", buffer->processId, buffer->operationsFailed, buffer->operationsPassed, buffer->operationsTotal, buffer->operationTime, buffer->relationId ); - //Trace("Iteration %d over\n", i); - } -} - -if (0!=fclose(hFile)) -{ - Fail("Unable to write thread results to file" - "GetLastError returned %d\n", GetLastError()); -} - - /* Logging for the test case over, clean up the handles */ - //Trace("Contents of the buffer are [%s]\n", resultBuffer->getResultBuffer()); - - -//Call Cleanup for Test Case -cleanup(); - -//Trace("Value of GLOBAL COUNTER %d \n", GLOBAL_COUNTER); -return (PASS); - -} - - -int GetParameters( int argc, char **argv) -{ - - if( (argc != 5) || ((argc == 1) && !strcmp(argv[1],"/?")) - || !strcmp(argv[1],"/h") || !strcmp(argv[1],"/H")) - { - printf("PAL -Composite Critical Section Test\n"); - printf("Usage:\n"); - printf("\t[PROCESS_COUNT] Greater than or Equal to 1 \n"); - printf("\t[WORKER_THREAD_MULTIPLIER_COUNT] Greater than or Equal to 1 and Less than or Equal to 64 \n"); - printf("\t[REPEAT_COUNT] Greater than or Equal to 1\n"); - printf("\t[RELATION_ID [Greater than or Equal to 1]\n"); - return -1; - } - -// Trace("Args 1 is [%s], Arg 2 is [%s], Arg 3 is [%s]\n", argv[1], argv[2], argv[3]); - - USE_PROCESS_COUNT = atoi(argv[1]); - if( USE_PROCESS_COUNT < 0) - { - printf("\nPROCESS_COUNT to greater than or equal to 1\n"); - return -1; - } - - THREAD_COUNT = atoi(argv[2]); - if( THREAD_COUNT < 1 || THREAD_COUNT > 64) - { - printf("\nTHREAD_COUNT to be greater than or equal to 1 or less than or equal to 64\n"); - return -1; - } - - REPEAT_COUNT = atoi(argv[3]); - if( REPEAT_COUNT < 1) - { - printf("\nREPEAT_COUNT to greater than or equal to 1\n"); - return -1; - } - - RELATION_ID = atoi(argv[4]); - if( RELATION_ID < 1) - { - printf("\nMain Process:Invalid RELATION_ID number, Pass greater than 1\n"); - return -1; - } - - return 0; -} - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/criticalsection/mainWrapper.cpp b/src/coreclr/pal/tests/palsuite/composite/synchronization/criticalsection/mainWrapper.cpp deleted file mode 100644 index 529dddb3065e..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/criticalsection/mainWrapper.cpp +++ /dev/null @@ -1,254 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/* -Source Code: mainWrapper.c - -mainWrapper.c creates Composite Test Case Processes and waits for all processes to get over - -Algorithm -o Create PROCESS_COUNT processes. - -Author: RameshG -*/ - -#include -#include "resulttime.h" - -/* Test Input Variables */ -unsigned int USE_PROCESS_COUNT = 0; //default -unsigned int THREAD_COUNT = 0; //default -unsigned int REPEAT_COUNT = 0; //default -unsigned int SLEEP_LENGTH = 0; //default -unsigned int RELATION_ID = 1001; - - -//Structure to capture application wide statistics -struct applicationStatistics{ - DWORD operationTime; - unsigned int relationId; - unsigned int processCount; - unsigned int threadCount; - unsigned int repeatCount; - char* buildNumber; - -}; - - -//Get parameters from the commandline -int GetParameters( int argc, char **argv) -{ - - if( (argc != 5) || ((argc == 1) && !strcmp(argv[1],"/?")) - || !strcmp(argv[1],"/h") || !strcmp(argv[1],"/H")) - { - printf("Main Wrapper PAL -Composite Critical Section Test\n"); - printf("Usage:\n"); - printf("\t[PROCESS_COUNT] Greater than or Equal to 1 \n"); - printf("\t[THREAD_COUNT] Greater than or Equal to 1 and Less than or Equal to 64 \n"); - printf("\t[REPEAT_COUNT] Greater than or Equal to 1\n"); - printf("\t[RELATION_ID [Greater than or Equal to 1]\n"); - - return -1; - } - - USE_PROCESS_COUNT = atoi(argv[1]); - if( USE_PROCESS_COUNT < 0) - { - printf("\nPROCESS_COUNT to greater than or equal to 1\n"); - return -1; - } - - THREAD_COUNT = atoi(argv[2]); - if( THREAD_COUNT < 1 || THREAD_COUNT > 64) - { - printf("\nTHREAD_COUNT to be greater than or equal to 1 or less than or equal to 64\n"); - return -1; - } - - REPEAT_COUNT = atoi(argv[3]); - if( REPEAT_COUNT < 1) - { - printf("\nREPEAT_COUNT to greater than or equal to 1\n"); - return -1; - } - - RELATION_ID = atoi(argv[4]); - if( RELATION_ID < 1) - { - printf("\nMain Process:Invalid RELATION_ID number, Pass greater than 1\n"); - return -1; - } - - - - - return 0; -} - -//Main entry point for the application -PALTEST(composite_synchronization_criticalsection_paltest_synchronization_criticalsection, "composite/synchronization/criticalsection/paltest_synchronization_criticalsection") -{ - unsigned int i = 0; - HANDLE hProcess[MAXIMUM_WAIT_OBJECTS]; //Array to hold Process handles - DWORD processReturnCode = 0; - int testReturnCode = PASS; - STARTUPINFO si[MAXIMUM_WAIT_OBJECTS]; - PROCESS_INFORMATION pi[MAXIMUM_WAIT_OBJECTS]; - FILE *hFile; //handle to application results file - char fileName[MAX_PATH]; //file name of the application results file - struct applicationStatistics appStats; - DWORD dwStart=0; //to store the tick count - char lpCommandLine[MAX_PATH] = ""; - int returnCode = 0; - - if(0 != (PAL_Initialize(argc, argv))) - { - return ( FAIL ); - } - - - - - if(GetParameters(argc, argv)) - { - Fail("Error in obtaining the parameters\n"); - } - - //Initialize Application Statistics Structure - appStats.operationTime=0; - appStats.relationId = RELATION_ID; - appStats.processCount = USE_PROCESS_COUNT; - appStats.threadCount = THREAD_COUNT; - appStats.repeatCount = REPEAT_COUNT; - appStats.buildNumber = getBuildNumber(); - - -_snprintf(fileName, MAX_PATH, "main_criticalsection_%d_.txt", RELATION_ID); - -hFile = fopen(fileName, "w+"); - -if(hFile == NULL) - { - Fail("Error in opening file to write application results for Critical Section Test, and error code is %d\n", GetLastError()); - } - -//Start Process Time Capture -dwStart = GetTickCount(); - -for( i = 0; i < USE_PROCESS_COUNT; i++ ) - { - - ZeroMemory( lpCommandLine, MAX_PATH ); - if ( _snprintf( lpCommandLine, MAX_PATH-1, "criticalsection %d %d %d %d", i, THREAD_COUNT, REPEAT_COUNT, RELATION_ID) < 0 ) - { - Trace ("Error: Insufficient commandline string length for iteration [%d]\n", i); - } - - /* Zero the data structure space */ - ZeroMemory ( &pi[i], sizeof(pi[i]) ); - ZeroMemory ( &si[i], sizeof(si[i]) ); - - /* Set the process flags and standard io handles */ - si[i].cb = sizeof(si[i]); - - //Printing the Command Line - //Trace("Command Line \t %s \n", lpCommandLine); - - //Create Process - if(!CreateProcess( NULL, /* lpApplicationName*/ - lpCommandLine, /* lpCommandLine */ - NULL, /* lpProcessAttributes */ - NULL, /* lpThreadAttributes */ - TRUE, /* bInheritHandles */ - 0, /* dwCreationFlags, */ - NULL, /* lpEnvironment */ - NULL, /* pCurrentDirectory */ - &si[i], /* lpStartupInfo */ - &pi[i] /* lpProcessInformation */ - )) - { - Fail("Process Not created for [%d] and failed with error code %d\n", i, GetLastError()); - } - else - { - hProcess[i] = pi[i].hProcess; - //Trace("Process created for [%d]\n", i); - } - - } - - returnCode = WaitForMultipleObjects( USE_PROCESS_COUNT, hProcess, TRUE, INFINITE); - if( WAIT_OBJECT_0 != returnCode ) - { - Trace("Wait for Object(s) @ Main thread for %d processes returned %d, and GetLastError value is %d\n", USE_PROCESS_COUNT, returnCode, GetLastError()); - testReturnCode = FAIL; - } - - for( i = 0; i < USE_PROCESS_COUNT; i++ ) - { - /* check the exit code from the process */ - if( ! GetExitCodeProcess( pi[i].hProcess, &processReturnCode ) ) - { - Trace( "GetExitCodeProcess call failed for iteration %d with error code %u\n", - i, GetLastError() ); - - testReturnCode = FAIL; - } - - if(processReturnCode == FAIL) - { - Trace( "Process [%d] failed and returned FAIL\n", i); - testReturnCode = FAIL; - } - - if(!CloseHandle(pi[i].hThread)) - { - Trace("Error:%d: CloseHandle failed for Process [%d] hThread\n", GetLastError(), i); - testReturnCode = FAIL; - } - - if(!CloseHandle(pi[i].hProcess) ) - { - Trace("Error:%d: CloseHandle failed for Process [%d] hProcess\n", GetLastError(), i); - testReturnCode = FAIL; - } - } - -//Get the end time of the process -appStats.operationTime = GetTickCount() - dwStart; - -if( testReturnCode == PASS) - { - Trace("Test Passed\n"); - - } - else - { - Fail("Test Failed\n"); - - } - -//Write Process Result Contents to File -if(hFile!= NULL) - { - fprintf(hFile, "%lu,%d,%d,%d,%d,%s\n", appStats.operationTime, appStats.relationId,appStats.processCount, appStats.threadCount, appStats.repeatCount, appStats.buildNumber); - } - -if (0!=fclose(hFile)) -{ - Trace("Error:%d: fclose failed for file %s\n", GetLastError(), fileName); -} - - PAL_Terminate(); - -if( testReturnCode == PASS) -{ - return PASS; -} -else -{ - return FAIL; -} - -} diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/criticalsection/readme.txt b/src/coreclr/pal/tests/palsuite/composite/synchronization/criticalsection/readme.txt deleted file mode 100644 index 974497cfaff9..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/criticalsection/readme.txt +++ /dev/null @@ -1,11 +0,0 @@ -To compile: - -1) create a dat file (say criticalsection.dat) with contents: -PAL,Composite,palsuite\composite\synchronization\criticalsection,criticalsection=mainWrapper.c,criticalsection.c,,, - -2) perl rrunmod.pl -r criticalsection.dat - - -To execute: -mainwrapper [PROCESS_COUNT] [WORKER_THREAD_MULTIPLIER_COUNT] [REPEAT_COUNT] - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/mtx_critsect.cpp b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/mtx_critsect.cpp deleted file mode 100644 index 6f5a032a62cd..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/mtx_critsect.cpp +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -//#include -#include -#include "mtx_critsect.h" - -CsWaiterReturnState MTXWaitOnCS(LPCRITICAL_SECTION lpCriticalSection); -void MTXDoActualWait(LPCRITICAL_SECTION lpCriticalSection); -void MTXWakeUpWaiter(LPCRITICAL_SECTION lpCriticalSection); - -/*extern "C" { - LONG InterlockedCompareExchange( - LONG volatile *Destination, - LONG Exchange, - LONG Comperand); -} -*/ -int MTXInitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - int retcode = 0; - - lpCriticalSection->DebugInfo = NULL; - lpCriticalSection->LockCount = 0; - lpCriticalSection->RecursionCount = 0; - lpCriticalSection->SpinCount = 0; - lpCriticalSection->OwningThread = NULL; - - lpCriticalSection->LockSemaphore = (HANDLE)&lpCriticalSection->NativeData; - - if (0!= pthread_mutex_init(&lpCriticalSection->NativeData.Mutex, NULL)) - { - printf("Error Initializing Critical Section\n"); - retcode = -1; - } - - - lpCriticalSection->InitCount = CS_INITIALIZED; - return retcode; -} - -int MTXDeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - int retcode = 0; - - if (lpCriticalSection->InitCount == CS_INITIALIZED) - { - - if (0!=pthread_mutex_destroy(&lpCriticalSection->NativeData.Mutex)) - { - printf("Error Deleting Critical Section\n"); - retcode = -1; - } - } - - lpCriticalSection->InitCount = CS_NOT_INIZIALIZED; - return retcode; -} - -int MTXEnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - - DWORD thread_id; - int retcode = 0; - - thread_id = (DWORD)THREADSilentGetCurrentThreadId(); - - /* check if the current thread already owns the criticalSection */ - if (lpCriticalSection->OwningThread == (HANDLE)thread_id) - { - lpCriticalSection->RecursionCount++; - //Check if this is a failure condition - return 0; - } - - if (0!= pthread_mutex_lock(&lpCriticalSection->NativeData.Mutex)) - { - //Error Condition - printf("Error Entering Critical Section\n"); - retcode = -1; - } - else - { - lpCriticalSection->OwningThread = (HANDLE)thread_id; - lpCriticalSection->RecursionCount = 1; - } - - return retcode; -} - -int MTXLeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - int retcode = 0; - - if (--lpCriticalSection->RecursionCount > 0) - //*****check this ***** - return 0; - - lpCriticalSection->OwningThread = 0; - - if (0!= pthread_mutex_unlock(&lpCriticalSection->NativeData.Mutex)) - { - //Error Condition - printf("Error Leaving Critical Section\n"); - retcode = -1; - } - - return retcode; -} - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/mtx_critsect.h b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/mtx_critsect.h deleted file mode 100644 index 90c36cc61cdc..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/mtx_critsect.h +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -typedef void VOID; -typedef unsigned long DWORD; -typedef long LONG; -typedef unsigned long ULONG; -typedef void* HANDLE; -typedef unsigned long ULONG_PTR; - -#define FALSE 0 -#define TRUE 1 - -#define CSBIT_CS_IS_LOCKED 1 -#define CSBIT_NEW_WAITER 2 - -typedef enum CsInitState { CS_NOT_INIZIALIZED, CS_INITIALIZED, CS_FULLY_INITIALIZED } CsInitState; -typedef enum _CsWaiterReturnState { CS_WAITER_WOKEN_UP, CS_WAITER_DIDNT_WAIT } CsWaiterReturnState; - -typedef struct _CRITICAL_SECTION_DEBUG_INFO { - LONG volatile ContentionCount; - LONG volatile InternalContentionCount; - ULONG volatile AcquireCount; - ULONG volatile EnterCount; -} CRITICAL_SECTION_DEBUG_INFO, *PCRITICAL_SECTION_DEBUG_INFO; - -typedef struct _CRITICAL_SECTION_NATIVE_DATA { - pthread_mutex_t Mutex; -} CRITICAL_SECTION_NATIVE_DATA, *PCRITICAL_SECTION_NATIVE_DATA; - -typedef struct _CRITICAL_SECTION { - - CsInitState InitCount; - PCRITICAL_SECTION_DEBUG_INFO DebugInfo; - LONG LockCount; - LONG RecursionCount; - HANDLE OwningThread; - HANDLE LockSemaphore; - ULONG_PTR SpinCount; - CRITICAL_SECTION_NATIVE_DATA NativeData; - -} CRITICAL_SECTION, *PCRITICAL_SECTION, *LPCRITICAL_SECTION; - -int MTXInitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection); -int MTXDeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection); -int MTXEnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection); -int MTXLeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection); - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/pal_composite_native_cs.cpp b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/pal_composite_native_cs.cpp deleted file mode 100644 index 5cee88142c46..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/pal_composite_native_cs.cpp +++ /dev/null @@ -1,466 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include -#include -#include -#include -//#include -#include "mtx_critsect.cpp" -//#include "mtx_critsect.h" -#include "resultbuffer.h" - - - -#define LONGLONG long long -#define ULONGLONG unsigned LONGLONG -/*Defining Global Variables*/ - -int THREAD_COUNT=0; -int REPEAT_COUNT=0; -int GLOBAL_COUNTER=0; -int USE_PROCESS_COUNT = 0; -int RELATION_ID =0; -int g_counter = 0; -int MAX_PATH = 256; -LONGLONG calibrationValue = 0; - -pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t g_cv = PTHREAD_COND_INITIALIZER; -pthread_cond_t g_cv2 = PTHREAD_COND_INITIALIZER; -CRITICAL_SECTION g_cs; - -/* Capture statistics for each worker thread */ -struct statistics{ - unsigned int processId; - unsigned int operationsFailed; - unsigned int operationsPassed; - unsigned int operationsTotal; - DWORD operationTime; - unsigned int relationId; -}; - - -struct applicationStatistics{ - DWORD operationTime; - unsigned int relationId; - unsigned int processCount; - unsigned int threadCount; - unsigned int repeatCount; - char* buildNumber; - -}; - -ResultBuffer *resultBuffer; - - -void* waitforworkerthreads(void*); -void starttests(int); -int setuptest(void); -int cleanuptest(void); -int GetParameters( int , char **); -void incrementCounter(void); -ULONGLONG GetTicks(void); -ULONGLONG getPerfCalibrationValue(void); - - - -PALTEST(composite_synchronization_nativecriticalsection_paltest_synchronization_nativecriticalsection, "composite/synchronization/nativecriticalsection/paltest_synchronization_nativecriticalsection") - { - //Variable Declaration - pthread_t pthreads[640]; - int threadID[640]; - int i=0; - int j=0; - int rtn=0; - ULONGLONG startTicks = 0; - - /* Variables to capture the file name and the file pointer*/ - char fileName[MAX_PATH]; - FILE *hFile; - struct statistics* buffer; - int statisticsSize = 0; - - /*Variable to Captutre Information at the Application Level*/ - struct applicationStatistics appStats; - char mainFileName[MAX_PATH]; - FILE *hMainFile; - - //Get perfCalibrationValue - - calibrationValue = getPerfCalibrationValue(); - printf("Calibration Value for this Platform %llu \n", calibrationValue); - - - //Get Parameters - if(GetParameters(argc, argv)) - { - printf("Error in obtaining the parameters\n"); - exit(-1); - } - - //Assign Values to Application Statistics Members - appStats.relationId=RELATION_ID; - appStats.operationTime=0; - appStats.buildNumber = "999.99"; - appStats.processCount = USE_PROCESS_COUNT; - appStats.threadCount = THREAD_COUNT; - appStats.repeatCount = REPEAT_COUNT; - - printf("RELATION ID : %d\n", appStats.relationId); - printf("Process Count : %d\n", appStats.processCount); - printf("Thread Count : %d\n", appStats.threadCount); - printf("Repeat Count : %d\n", appStats.repeatCount); - - - //Open file for Application Statistics Collection - snprintf(mainFileName, MAX_PATH, "main_nativecriticalsection_%d_.txt",appStats.relationId); - hMainFile = fopen(mainFileName, "w+"); - - if(hMainFile == NULL) - { - printf("Error in opening main file for write\n"); - } - - - for (i=0;igetResultBuffer(i); - fprintf(hFile, "%d,%d,%d,%d,%lu,%d\n", buffer->processId, buffer->operationsFailed, buffer->operationsPassed, buffer->operationsTotal, buffer->operationTime, buffer->relationId ); - //printf("Iteration %d over\n", i); - } - } - fclose(hFile); - - - - //Call Test Case Cleanup Routine - if (0!=cleanuptest()) - { - //Error Condition - printf("Error Cleaning up Test Case"); - exit(-1); - } - - - if(hMainFile!= NULL) - { - printf("Writing to Main File \n"); - fprintf(hMainFile, "%lu,%d,%d,%d,%d,%s\n", appStats.operationTime, appStats.relationId, appStats.processCount, appStats.threadCount, appStats.repeatCount, appStats.buildNumber); - - } - fclose(hMainFile); - return 0; - } - -void * waitforworkerthreads(void * threadId) -{ - - int *threadParam = (int*) threadId; - -// printf("Thread ID : %d \n", *threadParam); - - //Acquire Lock - if (0!=pthread_mutex_lock(&g_mutex)) - { - //Error Condition - printf("Error Acquiring Mutex Lock in Wait for Worker Thread\n"); - exit(-1); - } - - //Increment Global Counter - GLOBAL_COUNTER++; - - - //If global counter is equal to thread count then signal main thread - if (GLOBAL_COUNTER == THREAD_COUNT) - { - if (0!=pthread_cond_signal(&g_cv2)) - { - //Error Condition - printf("Error in setting conditional variable\n"); - exit(-1); - } - } - - //Wait for main thread to signal - if (0!=pthread_cond_wait(&g_cv,&g_mutex)) - { - //Error Condition - printf("Error waiting on conditional variable in Worker Thread\n"); - exit(-1); - } - - //Release the mutex lock - if (0!=pthread_mutex_unlock(&g_mutex)) - { - //Error Condition - printf("Error Releasing Mutex Lock in Worker Thread\n"); - exit(-1); - } - - //Start the test - starttests(*threadParam); - -} - -void starttests(int threadID) -{ - /*All threads beign executing tests cases*/ - int i = 0; - int Id = threadID; - struct statistics stats; - ULONGLONG startTime = 0; - ULONGLONG endTime = 0; - - - stats.relationId = RELATION_ID; - stats.processId = USE_PROCESS_COUNT; - stats.operationsFailed = 0; - stats.operationsPassed = 0; - stats.operationsTotal = 0; - stats.operationTime = 0; - - //Enter and Leave Critical Section in a loop REPEAT_COUNT Times - - startTime = GetTicks(); - - for (i=0;iLogResult(Id, (char *)&stats)) - { - printf("Error while writing to shared memory, Thread Id is[??] and Process id is [%d]\n", USE_PROCESS_COUNT); - } - -} - -int setuptest(void) -{ - - //Initialize Critical Section - if (0!=MTXInitializeCriticalSection( &g_cs)) - { - return -1; - } - return 0; -} - -int cleanuptest(void) -{ - - //Delete Critical Section - if (0!=MTXDeleteCriticalSection(&g_cs)) - { - return -1; - } - return 0; -} - -int GetParameters( int argc, char **argv) -{ - - if( (argc != 5) || ((argc == 1) && !strcmp(argv[1],"/?")) - || !strcmp(argv[1],"/h") || !strcmp(argv[1],"/H")) - { - printf("PAL -Composite Native Critical Section Test\n"); - printf("Usage:\n"); - printf("\t[PROCESS_ID ( greater than 1] \n"); - printf("\t[THREAD_COUNT ( greater than 1] \n"); - printf("\t[REPEAT_COUNT ( greater than 1]\n"); - printf("\t[RELATION_ID [greater than or Equal to 1]\n"); - return -1; - } - - - USE_PROCESS_COUNT = atoi(argv[1]); - if( USE_PROCESS_COUNT < 0) - { - printf("\nInvalid THREAD_COUNT number, Pass greater than 1\n"); - return -1; - } - - THREAD_COUNT = atoi(argv[2]); - if( THREAD_COUNT < 1) - { - printf("\nInvalid THREAD_COUNT number, Pass greater than 1\n"); - return -1; - } - - REPEAT_COUNT = atoi(argv[3]); - if( REPEAT_COUNT < 1) - { - printf("\nInvalid REPEAT_COUNT number, Pass greater than 1\n"); - return -1; - } - - RELATION_ID = atoi(argv[4]); - if( RELATION_ID < 1) - { - printf("\nInvalid RELATION_ID number, Pass greater than 1\n"); - return -1; - } - - - return 0; -} - -void incrementCounter(void) -{ - g_counter ++; -} - - -//Implementation borrowed from pertrace.c -ULONGLONG GetTicks(void) -{ -#ifdef i386 - unsigned long a, d; - asm volatile("rdtsc":"=a" (a), "=d" (d)); - return ((ULONGLONG)((unsigned int)(d)) << 32) | (unsigned int)(a); -#else - // #error Don''t know how to get ticks on this platform - return (ULONGLONG)gethrtime(); -#endif // i386 -} - - -/**/ -ULONGLONG getPerfCalibrationValue(void) -{ - ULONGLONG startTicks; - ULONGLONG endTicks; - - startTicks = GetTicks(); - sleep(1); - endTicks = GetTicks(); - - return ((endTicks-startTicks)/1000); //Return number of Ticks in One Milliseconds - -} - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/readme.txt b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/readme.txt deleted file mode 100644 index 8d83bf794cdb..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/readme.txt +++ /dev/null @@ -1,19 +0,0 @@ -To compile: - -For FReeBSD Platform use the following to compile: -gcc -pthread -lm -lgcc -lstdc++ -xc++ -Di386 pal_composite_native_cs.c - -For Solaris Platform use the following to compile: -gcc -lpthread -lm -lgcc -lstdc++ -xc++ -D__sparc__ pal_composite_native_cs.c - -For HPUX Platform use the following to compile: -gcc -lpthread -mlp64 -lm -lgcc -lstdc++ -xc++ -D_HPUX_ -D__ia64__ pal_composite_native_cs.c - -To execute: -./a.out [PROCESS_COUNT] [THREAD_COUNT] [REPEAT_COUNT] - - - ./a.out 1 32 1000000 4102406 - - - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.cpp b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.cpp deleted file mode 100644 index 9988a49f9c50..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -//#include "stdafx.h" -#include "resultbuffer.h" - -ResultBuffer:: ResultBuffer(int ThreadCount, int ThreadLogSize) - { - // Declare an internal status variable - int Status=0; - - // Update the maximum thread count - MaxThreadCount = ThreadCount; - - // Allocate the memory buffer based on the passed in thread and process counts - // and the specified size of the thread specific buffer - buffer = NULL; - buffer = (char*)malloc(ThreadCount*ThreadLogSize); - // Check to see if the buffer memory was allocated - if (buffer == NULL) - Status = -1; - // Initialize the buffer to 0 to prevent bogus data - memset(buffer,0,ThreadCount*ThreadLogSize); - - // The ThreadOffset is equal to the total number of bytes that will be stored per thread - ThreadOffset = ThreadLogSize; - - } - - - int ResultBuffer::LogResult(int Thread, char* Data) - { - // Declare an internal status flad - int status = 0; - - // Declare an object to store the offset address into the buffer - int Offset; - - // Check to make sure the Thread index is not out of range - if(Thread > MaxThreadCount) - { - printf("Thread index is out of range, Value of Thread[%d], Value of MaxThreadCount[%d]\n", Thread, MaxThreadCount); - status = -1; - return(status); - } - - // Calculate the offset into the shared buffer based on the process and thread indices - Offset = (Thread)*ThreadOffset; - - // Write the passed in data to the reserved buffer - memcpy(buffer+Offset,Data,ThreadOffset); - - return(status); - } - - - char* ResultBuffer::getResultBuffer(int threadId) - { - - return (buffer + threadId*ThreadOffset); - - } - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.h b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.h deleted file mode 100644 index c3d9a27fdb78..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.h +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include -//#include -#ifndef _RESULT_BUFFER_H_ -#define _RESULT_BUFFER_H_ - -//#include - -struct ResultData -{ - int value; - int size; -// ResultData* NextResult; -}; - - class ResultBuffer -{ - // Declare a pointer to a memory buffer to store the logged results - char* buffer; - // Declare an object to store the maximum Thread count - int MaxThreadCount; - // Declare and internal data object to store the calculated offset between adjacent threads data sets - int ThreadOffset; - - // Declare a linked list object to store the parameter values -public: - - // Declare a constructor for the single process case - ResultBuffer(int ThreadCount, int ThreadLogSize); - // Declare a method to log data for the single process instance - int LogResult(int Thread, char* Data); - - char* getResultBuffer(int threadId); -}; - -#include "resultbuffer.cpp" -#endif // _RESULT_BUFFER_H_ - - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/interlocked.cpp b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/interlocked.cpp deleted file mode 100644 index a87b6c4a2842..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/interlocked.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - - -typedef long LONG; - -extern "C" { -LONG InterlockedCompareExchange( - LONG volatile *Destination, - LONG Exchange, - LONG Comperand) -{ -#ifdef i386 - LONG result; - - __asm__ __volatile__( - "lock; cmpxchgl %2,(%1)" - : "=a" (result) - : "r" (Destination), "r" (Exchange), "0" (Comperand) - : "memory" - ); - - return result; -#endif -} -} diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/mtx_critsect.cpp b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/mtx_critsect.cpp deleted file mode 100644 index cd62a7840a1c..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/mtx_critsect.cpp +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -#include -#include "mtx_critsect.h" - -CsWaiterReturnState MTXWaitOnCS(LPCRITICAL_SECTION lpCriticalSection); -void MTXDoActualWait(LPCRITICAL_SECTION lpCriticalSection); -void MTXWakeUpWaiter(LPCRITICAL_SECTION lpCriticalSection); - -/*extern "C" { - LONG InterlockedCompareExchange( - LONG volatile *Destination, - LONG Exchange, - LONG Comperand); -} -*/ -int MTXInitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - int retcode = 0; - - lpCriticalSection->DebugInfo = NULL; - lpCriticalSection->LockCount = 0; - lpCriticalSection->RecursionCount = 0; - lpCriticalSection->SpinCount = 0; - lpCriticalSection->OwningThread = NULL; - - lpCriticalSection->LockSemaphore = (HANDLE)&lpCriticalSection->NativeData; - - if (0!= pthread_mutex_init(&lpCriticalSection->NativeData.Mutex, NULL)) - { - printf("Error Initializing Critical Section\n"); - retcode = -1; - } - - - lpCriticalSection->InitCount = CS_INITIALIZED; - return retcode; -} - -int MTXDeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - int retcode = 0; - - if (lpCriticalSection->InitCount == CS_INITIALIZED) - { - - if (0!=pthread_mutex_destroy(&lpCriticalSection->NativeData.Mutex)) - { - printf("Error Deleting Critical Section\n"); - retcode = -1; - } - } - - lpCriticalSection->InitCount = CS_NOT_INIZIALIZED; - return retcode; -} - -int MTXEnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - - DWORD thread_id; - int retcode = 0; - - thread_id = (DWORD)THREADSilentGetCurrentThreadId(); - - /* check if the current thread already owns the criticalSection */ - if (lpCriticalSection->OwningThread == (HANDLE)thread_id) - { - lpCriticalSection->RecursionCount++; - //Check if this is a failure condition - return 0; - } - - if (0!= pthread_mutex_lock(&lpCriticalSection->NativeData.Mutex)) - { - //Error Condition - printf("Error Entering Critical Section\n"); - retcode = -1; - } - else - { - lpCriticalSection->OwningThread = (HANDLE)thread_id; - lpCriticalSection->RecursionCount = 1; - } - - return retcode; -} - -int MTXLeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection) -{ - int retcode = 0; - - if (--lpCriticalSection->RecursionCount > 0) - //*****check this ***** - return 0; - - lpCriticalSection->OwningThread = 0; - - if (0!= pthread_mutex_unlock(&lpCriticalSection->NativeData.Mutex)) - { - //Error Condition - printf("Error Leaving Critical Section\n"); - retcode = -1; - } - - return retcode; -} - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/mtx_critsect.h b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/mtx_critsect.h deleted file mode 100644 index 16e9eb9cbb39..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/mtx_critsect.h +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -typedef void VOID; -typedef void* HANDLE; -typedef unsigned long ULONG_PTR; - -#ifdef HPUX - typedef unsigned int DWORD; - typedef int LONG; - typedef unsigned int ULONG; -#else - typedef unsigned long DWORD; - typedef long LONG; - typedef unsigned long ULONG; -#endif - - - - -#define FALSE 0 -#define TRUE 1 - -#define CSBIT_CS_IS_LOCKED 1 -#define CSBIT_NEW_WAITER 2 - -typedef enum CsInitState { CS_NOT_INIZIALIZED, CS_INITIALIZED, CS_FULLY_INITIALIZED } CsInitState; -typedef enum _CsWaiterReturnState { CS_WAITER_WOKEN_UP, CS_WAITER_DIDNT_WAIT } CsWaiterReturnState; - -typedef struct _CRITICAL_SECTION_DEBUG_INFO { - LONG volatile ContentionCount; - LONG volatile InternalContentionCount; - ULONG volatile AcquireCount; - ULONG volatile EnterCount; -} CRITICAL_SECTION_DEBUG_INFO, *PCRITICAL_SECTION_DEBUG_INFO; - -typedef struct _CRITICAL_SECTION_NATIVE_DATA { - pthread_mutex_t Mutex; -} CRITICAL_SECTION_NATIVE_DATA, *PCRITICAL_SECTION_NATIVE_DATA; - -typedef struct _CRITICAL_SECTION { - - CsInitState InitCount; - PCRITICAL_SECTION_DEBUG_INFO DebugInfo; - LONG LockCount; - LONG RecursionCount; - HANDLE OwningThread; - HANDLE LockSemaphore; - ULONG_PTR SpinCount; - CRITICAL_SECTION_NATIVE_DATA NativeData; - -} CRITICAL_SECTION, *PCRITICAL_SECTION, *LPCRITICAL_SECTION; - -int MTXInitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection); -int MTXDeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection); -int MTXEnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection); -int MTXLeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection); - -extern "C" { - LONG InterlockedCompareExchange( - LONG volatile *Destination, - LONG Exchange, - LONG Comperand); -} diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/pal_composite_native_cs.cpp b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/pal_composite_native_cs.cpp deleted file mode 100644 index d98669c2b283..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/pal_composite_native_cs.cpp +++ /dev/null @@ -1,470 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include -#include -#include -#include -//#include -//#include "mtx_critsect.cpp" -#include "mtx_critsect.h" -#include "resultbuffer.h" - - - -#define LONGLONG long long -#define ULONGLONG unsigned LONGLONG -/*Defining Global Variables*/ - -int THREAD_COUNT=0; -int REPEAT_COUNT=0; -int GLOBAL_COUNTER=0; -int USE_PROCESS_COUNT = 0; -int RELATION_ID =0; -int g_counter = 0; -int MAX_PATH = 256; -LONGLONG calibrationValue = 0; - -pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t g_cv = PTHREAD_COND_INITIALIZER; -pthread_cond_t g_cv2 = PTHREAD_COND_INITIALIZER; -CRITICAL_SECTION g_cs; - -/* Capture statistics for each worker thread */ -struct statistics{ - unsigned int processId; - unsigned int operationsFailed; - unsigned int operationsPassed; - unsigned int operationsTotal; - DWORD operationTime; - unsigned int relationId; -}; - - -struct applicationStatistics{ - DWORD operationTime; - unsigned int relationId; - unsigned int processCount; - unsigned int threadCount; - unsigned int repeatCount; - char* buildNumber; - -}; - -ResultBuffer *resultBuffer; - - -void* waitforworkerthreads(void*); -void starttests(int); -int setuptest(void); -int cleanuptest(void); -int GetParameters( int , char **); -void incrementCounter(void); -ULONGLONG GetTicks(void); -ULONGLONG getPerfCalibrationValue(void); - - - -PALTEST(composite_synchronization_nativecs_interlocked_paltest_synchronization_nativecs_interlocked, "composite/synchronization/nativecs_interlocked/paltest_synchronization_nativecs_interlocked") - { - //Variable Declaration - pthread_t pthreads[640]; - int threadID[640]; - int i=0; - int j=0; - int rtn=0; - ULONGLONG startTicks = 0; - - /* Variables to capture the file name and the file pointer*/ - char fileName[MAX_PATH]; - FILE *hFile; - struct statistics* buffer; - int statisticsSize = 0; - - /*Variable to Captutre Information at the Application Level*/ - struct applicationStatistics appStats; - char mainFileName[MAX_PATH]; - FILE *hMainFile; - - //Get perfCalibrationValue - - calibrationValue = getPerfCalibrationValue(); - printf("Calibration Value for this Platform %llu \n", calibrationValue); - - - //Get Parameters - if(GetParameters(argc, argv)) - { - printf("Error in obtaining the parameters\n"); - exit(-1); - } - - //Assign Values to Application Statistics Members - appStats.relationId=RELATION_ID; - appStats.operationTime=0; - appStats.buildNumber = "999.99"; - appStats.processCount = USE_PROCESS_COUNT; - appStats.threadCount = THREAD_COUNT; - appStats.repeatCount = REPEAT_COUNT; - - printf("RELATION ID : %d\n", appStats.relationId); - printf("Process Count : %d\n", appStats.processCount); - printf("Thread Count : %d\n", appStats.threadCount); - printf("Repeat Count : %d\n", appStats.repeatCount); - - - //Open file for Application Statistics Collection - snprintf(mainFileName, MAX_PATH, "main_nativecriticalsection_%d_.txt",appStats.relationId); - hMainFile = fopen(mainFileName, "w+"); - - if(hMainFile == NULL) - { - printf("Error in opening main file for write\n"); - } - - - for (i=0;igetResultBuffer(i); - fprintf(hFile, "%d,%d,%d,%d,%lu,%d\n", buffer->processId, buffer->operationsFailed, buffer->operationsPassed, buffer->operationsTotal, buffer->operationTime, buffer->relationId ); - //printf("Iteration %d over\n", i); - } - } - fclose(hFile); - - - - //Call Test Case Cleanup Routine - if (0!=cleanuptest()) - { - //Error Condition - printf("Error Cleaning up Test Case"); - exit(-1); - } - - - if(hMainFile!= NULL) - { - printf("Writing to Main File \n"); - fprintf(hMainFile, "%lu,%d,%d,%d,%d,%s\n", appStats.operationTime, appStats.relationId, appStats.processCount, appStats.threadCount, appStats.repeatCount, appStats.buildNumber); - - } - fclose(hMainFile); - return 0; - } - -void * waitforworkerthreads(void * threadId) -{ - - int *threadParam = (int*) threadId; - -// printf("Thread ID : %d \n", *threadParam); - - //Acquire Lock - if (0!=pthread_mutex_lock(&g_mutex)) - { - //Error Condition - printf("Error Acquiring Mutex Lock in Wait for Worker Thread\n"); - exit(-1); - } - - //Increment Global Counter - GLOBAL_COUNTER++; - - - //If global counter is equal to thread count then signal main thread - if (GLOBAL_COUNTER == THREAD_COUNT) - { - if (0!=pthread_cond_signal(&g_cv2)) - { - //Error Condition - printf("Error in setting conditional variable\n"); - exit(-1); - } - } - - //Wait for main thread to signal - if (0!=pthread_cond_wait(&g_cv,&g_mutex)) - { - //Error Condition - printf("Error waiting on conditional variable in Worker Thread\n"); - exit(-1); - } - - //Release the mutex lock - if (0!=pthread_mutex_unlock(&g_mutex)) - { - //Error Condition - printf("Error Releasing Mutex Lock in Worker Thread\n"); - exit(-1); - } - - //Start the test - starttests(*threadParam); - -} - -void starttests(int threadID) -{ - /*All threads beign executing tests cases*/ - int i = 0; - int Id = threadID; - struct statistics stats; - ULONGLONG startTime = 0; - ULONGLONG endTime = 0; - - LONG volatile Destination; - LONG Exchange; - LONG Comperand; - LONG result; - - stats.relationId = RELATION_ID; - stats.processId = USE_PROCESS_COUNT; - stats.operationsFailed = 0; - stats.operationsPassed = 0; - stats.operationsTotal = 0; - stats.operationTime = 0; - - //Enter and Leave Critical Section in a loop REPEAT_COUNT Times - - - startTime = GetTicks(); - - for (i=0;iLogResult(Id, (char *)&stats)) - { - printf("Error while writing to shared memory, Thread Id is[??] and Process id is [%d]\n", USE_PROCESS_COUNT); - } - -} - -int setuptest(void) -{ - - //Initialize Critical Section - /* - if (0!=MTXInitializeCriticalSection( &g_cs)) - { - return -1; - } - */ - return 0; -} - -int cleanuptest(void) -{ - - //Delete Critical Section - /* - if (0!=MTXDeleteCriticalSection(&g_cs)) - { - return -1; - } - */ - return 0; -} - -int GetParameters( int argc, char **argv) -{ - - if( (argc != 5) || ((argc == 1) && !strcmp(argv[1],"/?")) - || !strcmp(argv[1],"/h") || !strcmp(argv[1],"/H")) - { - printf("PAL -Composite Native Critical Section Test\n"); - printf("Usage:\n"); - printf("\t[PROCESS_ID ( greater than 1] \n"); - printf("\t[THREAD_COUNT ( greater than 1] \n"); - printf("\t[REPEAT_COUNT ( greater than 1]\n"); - printf("\t[RELATION_ID [greater than or Equal to 1]\n"); - return -1; - } - - - USE_PROCESS_COUNT = atoi(argv[1]); - if( USE_PROCESS_COUNT < 0) - { - printf("\nInvalid THREAD_COUNT number, Pass greater than 1\n"); - return -1; - } - - THREAD_COUNT = atoi(argv[2]); - if( THREAD_COUNT < 1) - { - printf("\nInvalid THREAD_COUNT number, Pass greater than 1\n"); - return -1; - } - - REPEAT_COUNT = atoi(argv[3]); - if( REPEAT_COUNT < 1) - { - printf("\nInvalid REPEAT_COUNT number, Pass greater than 1\n"); - return -1; - } - - RELATION_ID = atoi(argv[4]); - if( RELATION_ID < 1) - { - printf("\nInvalid RELATION_ID number, Pass greater than 1\n"); - return -1; - } - - - return 0; -} - -void incrementCounter(void) -{ - g_counter ++; -} - - -//Implementation borrowed from pertrace.c -ULONGLONG GetTicks(void) -{ -#ifdef i386 - unsigned long a, d; - asm volatile("rdtsc":"=a" (a), "=d" (d)); - return ((ULONGLONG)((unsigned int)(d)) << 32) | (unsigned int)(a); -#else - // #error Don''t know how to get ticks on this platform - return (ULONGLONG)gethrtime(); -#endif // i386 -} - - -/**/ -ULONGLONG getPerfCalibrationValue(void) -{ - ULONGLONG startTicks; - ULONGLONG endTicks; - - startTicks = GetTicks(); - sleep(1); - endTicks = GetTicks(); - - return ((endTicks-startTicks)/1000); //Return number of Ticks in One Milliseconds - -} - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.cpp b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.cpp deleted file mode 100644 index 9988a49f9c50..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -//#include "stdafx.h" -#include "resultbuffer.h" - -ResultBuffer:: ResultBuffer(int ThreadCount, int ThreadLogSize) - { - // Declare an internal status variable - int Status=0; - - // Update the maximum thread count - MaxThreadCount = ThreadCount; - - // Allocate the memory buffer based on the passed in thread and process counts - // and the specified size of the thread specific buffer - buffer = NULL; - buffer = (char*)malloc(ThreadCount*ThreadLogSize); - // Check to see if the buffer memory was allocated - if (buffer == NULL) - Status = -1; - // Initialize the buffer to 0 to prevent bogus data - memset(buffer,0,ThreadCount*ThreadLogSize); - - // The ThreadOffset is equal to the total number of bytes that will be stored per thread - ThreadOffset = ThreadLogSize; - - } - - - int ResultBuffer::LogResult(int Thread, char* Data) - { - // Declare an internal status flad - int status = 0; - - // Declare an object to store the offset address into the buffer - int Offset; - - // Check to make sure the Thread index is not out of range - if(Thread > MaxThreadCount) - { - printf("Thread index is out of range, Value of Thread[%d], Value of MaxThreadCount[%d]\n", Thread, MaxThreadCount); - status = -1; - return(status); - } - - // Calculate the offset into the shared buffer based on the process and thread indices - Offset = (Thread)*ThreadOffset; - - // Write the passed in data to the reserved buffer - memcpy(buffer+Offset,Data,ThreadOffset); - - return(status); - } - - - char* ResultBuffer::getResultBuffer(int threadId) - { - - return (buffer + threadId*ThreadOffset); - - } - diff --git a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.h b/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.h deleted file mode 100644 index c3d9a27fdb78..000000000000 --- a/src/coreclr/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.h +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include -//#include -#ifndef _RESULT_BUFFER_H_ -#define _RESULT_BUFFER_H_ - -//#include - -struct ResultData -{ - int value; - int size; -// ResultData* NextResult; -}; - - class ResultBuffer -{ - // Declare a pointer to a memory buffer to store the logged results - char* buffer; - // Declare an object to store the maximum Thread count - int MaxThreadCount; - // Declare and internal data object to store the calculated offset between adjacent threads data sets - int ThreadOffset; - - // Declare a linked list object to store the parameter values -public: - - // Declare a constructor for the single process case - ResultBuffer(int ThreadCount, int ThreadLogSize); - // Declare a method to log data for the single process instance - int LogResult(int Thread, char* Data); - - char* getResultBuffer(int threadId); -}; - -#include "resultbuffer.cpp" -#endif // _RESULT_BUFFER_H_ - - diff --git a/src/coreclr/pal/tests/palsuite/composite/wfmo/main.cpp b/src/coreclr/pal/tests/palsuite/composite/wfmo/main.cpp index 0e0684e5f4f0..e12cb5d596b8 100644 --- a/src/coreclr/pal/tests/palsuite/composite/wfmo/main.cpp +++ b/src/coreclr/pal/tests/palsuite/composite/wfmo/main.cpp @@ -127,7 +127,7 @@ PALTEST(composite_wfmo_paltest_composite_wfmo, "composite/wfmo/paltest_composite } /* Register the start time */ - dwStartTime = GetTickCount(); + dwStartTime = (DWORD)minipal_lowres_ticks(); testStats.relationId = 0; testStats.relationId = RELATION_ID; testStats.processCount = PROCESS_COUNT; diff --git a/src/coreclr/pal/tests/palsuite/composite/wfmo/mutex.cpp b/src/coreclr/pal/tests/palsuite/composite/wfmo/mutex.cpp index 82f59880c404..ed5de0fafd16 100644 --- a/src/coreclr/pal/tests/palsuite/composite/wfmo/mutex.cpp +++ b/src/coreclr/pal/tests/palsuite/composite/wfmo/mutex.cpp @@ -146,7 +146,7 @@ PALTEST(composite_wfmo_paltest_composite_wfmo, "composite/wfmo/paltest_composite } /* Register the start time */ - dwStartTime = GetTickCount(); + dwStartTime = (DWORD)minipal_lowres_ticks(); processStats.relationId = RELATION_ID; processStats.processId = USE_PROCESS_COUNT; @@ -306,7 +306,7 @@ void PALAPI Run_Thread_composite_wfmo (LPVOID lpParam) } /* Register the start time */ - dwStartTime = GetTickCount(); + dwStartTime = (DWORD)minipal_lowres_ticks(); /* Run the tests repeat count times */ for( i = 0; i < REPEAT_COUNT; i++ ) diff --git a/src/coreclr/pal/tests/palsuite/exception_handling/pal_sxs/test1/CMakeLists.txt b/src/coreclr/pal/tests/palsuite/exception_handling/pal_sxs/test1/CMakeLists.txt index ca5e4383d57f..cda9df41a65e 100644 --- a/src/coreclr/pal/tests/palsuite/exception_handling/pal_sxs/test1/CMakeLists.txt +++ b/src/coreclr/pal/tests/palsuite/exception_handling/pal_sxs/test1/CMakeLists.txt @@ -1,3 +1,5 @@ +if(NOT CLR_CMAKE_HOST_BROWSER) + if(CLR_CMAKE_HOST_UNIX) add_definitions(-DFEATURE_ENABLE_HARDWARE_EXCEPTIONS) endif(CLR_CMAKE_HOST_UNIX) @@ -95,3 +97,5 @@ install (TARGETS paltest_pal_sxs_test1 DESTINATION paltests/exception_handling/p install (TARGETS paltest_pal_sxs_test1_dll1 DESTINATION paltests/exception_handling/pal_sxs/test1 COMPONENT paltests EXCLUDE_FROM_ALL) install (TARGETS paltest_pal_sxs_test1_dll2 DESTINATION paltests/exception_handling/pal_sxs/test1 COMPONENT paltests EXCLUDE_FROM_ALL) add_dependencies(paltests_install paltest_pal_sxs_test1 paltest_pal_sxs_test1_dll1 paltest_pal_sxs_test1_dll2) + +endif(NOT CLR_CMAKE_HOST_BROWSER) \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test1/CopyFileA.cpp b/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test1/CopyFileA.cpp deleted file mode 100644 index 486c4dcbd6c6..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test1/CopyFileA.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: CopyFileA.c -** -** Purpose: Tests the PAL implementation of the CopyFileA function -** -** -**===================================================================*/ - -/* - 1. copy an existing file to existing with overwrite true - 2. copy an existing file to existing with overwrite false - 3. copy an existing file to non-existent with overwrite true - 4. copy an existing file to non-existent with overwrite false - 5. copy non-existent file to existing with overwrite true - 6. copy non-existent file to existing with overwrite false - 7. copy non-existent file to non-existent with overwrite true - 8. copy non-existent file to non-existent with overwrite false -*/ - -#include - -struct TESTS{ - char* lpSource; - char* lpDestination; - BOOL bFailIfExists; - int nResult; - }; - - -PALTEST(file_io_CopyFileA_test1_paltest_copyfilea_test1, "file_io/CopyFileA/test1/paltest_copyfilea_test1") -{ - char szSrcExisting[] = {"src_existing.tmp"}; - char szSrcNonExistent[] = {"src_non-existent.tmp"}; - char szDstExisting[] = {"dst_existing.tmp"}; - char szDstNonExistent[] = {"dst_non-existent.tmp"}; - BOOL bRc = TRUE; - BOOL bSuccess = TRUE; - FILE* tempFile = NULL; - int i; - struct TESTS testCase[] = - { - {szSrcExisting, szDstExisting, FALSE, 1}, - {szSrcExisting, szDstExisting, TRUE, 0}, - {szSrcExisting, szDstNonExistent, FALSE, 1}, - {szSrcExisting, szDstNonExistent, TRUE, 1}, - {szSrcNonExistent, szDstExisting, FALSE, 0}, - {szSrcNonExistent, szDstExisting, TRUE, 0}, - {szSrcNonExistent, szDstNonExistent, FALSE, 0}, - {szSrcNonExistent, szDstNonExistent, TRUE, 0} - }; - - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* create the src_existing file */ - tempFile = fopen(szSrcExisting, "w"); - if (tempFile != NULL) - { - fprintf(tempFile, "CopyFileA test file: src_existing.tmp\n"); - fclose(tempFile); - } - else - { - Fail("CopyFileA: ERROR-> Couldn't create \"src_existing.tmp\" with " - "error %ld\n", - GetLastError()); - } - - /* create the dst_existing file */ - tempFile = fopen(szDstExisting, "w"); - if (tempFile != NULL) - { - fprintf(tempFile, "CopyFileA test file: dst_existing.tmp\n"); - fclose(tempFile); - } - else - { - Fail("CopyFileA: ERROR-> Couldn't create \"dst_existing.tmp\" with " - "error %ld\n", - GetLastError()); - } - - - - for (i = 0; i < (sizeof(testCase) / sizeof(struct TESTS)); i++) - { - bRc = CopyFileA(testCase[i].lpSource, - testCase[i].lpDestination, - testCase[i].bFailIfExists); - if (!bRc) - { - if (testCase[i].nResult == 1) - { - Trace("CopyFileA: FAILED: %s -> %s with bFailIfExists = %d " - "with error %ld\n", - testCase[i].lpSource, - testCase[i].lpDestination, - testCase[i].bFailIfExists, - GetLastError()); - bSuccess = FALSE; - } - } - else - { - if (testCase[i].nResult == 0) - { - Trace("CopyFileA: FAILED: %s -> %s with bFailIfExists = %d\n", - testCase[i].lpSource, - testCase[i].lpDestination, - testCase[i].bFailIfExists); - bSuccess = FALSE; - } - else - { - /* verify the file was moved */ - if (GetFileAttributesA(testCase[i].lpDestination) == -1) - { - Trace("CopyFileA: GetFileAttributes of destination file " - "failed with error code %ld. \n", - GetLastError()); - bSuccess = FALSE; - } - else if (GetFileAttributesA(testCase[i].lpSource) == -1) - { - Trace("CopyFileA: GetFileAttributes of source file " - "failed with error code %ld. \n", - GetLastError()); - bSuccess = FALSE; - } - else - { - /* verify attributes of destination file to source file*/ - if(GetFileAttributes(testCase[i].lpSource) != - GetFileAttributes(testCase[i].lpDestination)) - { - Trace("CopyFileA : The file attributes of the " - "destination file do not match the file " - "attributes of the source file.\n"); - bSuccess = FALSE; - } - } - } - } - /* delete file file but don't worry if it fails */ - remove(szDstNonExistent); - } - - int exitCode = bSuccess ? PASS : FAIL; - PAL_TerminateEx(exitCode); - return exitCode; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test2/test2.cpp deleted file mode 100644 index cfc5237a34d3..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test2/test2.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: test2.c -** -** Purpose: Tests the PAL implementation of the CopyFileA function -** to see if a file can be copied to itself -** -** -**===================================================================*/ - - -#include - -PALTEST(file_io_CopyFileA_test2_paltest_copyfilea_test2, "file_io/CopyFileA/test2/paltest_copyfilea_test2") -{ - - BOOL bRc = TRUE; - char* szSrcExisting = "src_existing.tmp"; - FILE* tempFile = NULL; - DWORD temp; - int retCode; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* create the src_existing file */ - tempFile = fopen(szSrcExisting, "w"); - if (tempFile != NULL) - { - retCode = fputs("CopyFileA test file: src_existing.tmp ", tempFile); - if(retCode < 0) - { - Fail("CopyFileA: ERROR-> Couldn't write to %s with error " - "%u.\n", szSrcExisting, GetLastError()); - } - retCode = fclose(tempFile); - if(retCode != 0) - { - Fail("CopyFileA: ERROR-> Couldn't close file: %s with error " - "%u.\n", szSrcExisting, GetLastError()); - } - - } - else - { - Fail("CopyFileA: ERROR-> Couldn't create %s with " - "error %ld\n",szSrcExisting,GetLastError()); - } - - /* Get file attributes of source */ - temp = GetFileAttributes(szSrcExisting); - if (temp == -1) - { - Fail("CopyFileA: GetFileAttributes of source file " - "failed with error code %ld. \n", - GetLastError()); - } - - /* make sure a file can't copy to itself - first testing with IfFileExists flag set to true */ - bRc = CopyFileA(szSrcExisting,szSrcExisting,TRUE); - if(bRc) - { - Fail("ERROR: Cannot copy a file to itself, %u",GetLastError()); - } - - /* try to get file attributes of destination */ - if (GetFileAttributesA(szSrcExisting) == -1) - { - Fail("CopyFileA: GetFileAttributes of destination file " - "failed with error code %ld. \n", - GetLastError()); - } - else - { - /* verify attributes of destination file to source file*/ - - if(temp != GetFileAttributes(szSrcExisting)) - { - Fail("CopyFileA : The file attributes of the " - "destination file do not match the file " - "attributes of the source file.\n"); - } - } - - /* testing with IfFileExists flags set to false - should fail in Windows and pass in UNIX */ - bRc = CopyFileA(szSrcExisting,szSrcExisting,FALSE); - if(bRc && (GetLastError() != ERROR_ALREADY_EXISTS)) - { - Fail("ERROR: Cannot copy a file to itself, %u",GetLastError()); - } - - if (GetFileAttributesA(szSrcExisting) == -1) - { - Fail("CopyFileA: GetFileAttributes of destination file " - "failed with error code %ld. \n", - GetLastError()); - } - else - { - /* verify attributes of destination file to source file*/ - - if(temp != GetFileAttributes(szSrcExisting)) - { - Fail("CopyFileA : The file attributes of the " - "destination file do not match the file " - "attributes of the source file.\n"); - } - } - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test3/test3.cpp b/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test3/test3.cpp deleted file mode 100644 index 33d3c541c5e9..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test3/test3.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: test3.c -** -** Purpose: Tests the PAL implementation of the CopyFileA function -** to see if a file can be copied to itself -** -** -**===================================================================*/ - -#include - -PALTEST(file_io_CopyFileA_test3_paltest_copyfilea_test3, "file_io/CopyFileA/test3/paltest_copyfilea_test3") -{ - - BOOL bRc = TRUE; - char* szSrcExisting = "src_existing.tmp"; - char* szDest = "src_dest.tmp"; - FILE* tempFile = NULL; - int retCode; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* create the src_existing file */ - tempFile = fopen(szSrcExisting, "w"); - if (tempFile != NULL) - { - retCode = fputs("CopyFileA test file: src_existing.tmp ", tempFile); - if(retCode < 0) - { - retCode = fclose(tempFile); - if(retCode != 0) - { - Trace("CopyFileA: ERROR-> Couldn't close file: %s with error " - "%u.\n", szSrcExisting, GetLastError()); - } - - Fail("CopyFileA: ERROR-> Couldn't write to %s with error " - "%u.\n", szSrcExisting, GetLastError()); - } - retCode = fclose(tempFile); - if(retCode != 0) - { - Fail("CopyFileA: ERROR-> Couldn't close file: %s with error " - "%u.\n", szSrcExisting, GetLastError()); - } - - } - else - { - Fail("CopyFileA: ERROR-> Couldn't create %s with " - "error %ld\n",szSrcExisting,GetLastError()); - } - - /* set the file attributes of the source file to readonly */ - bRc = SetFileAttributesA(szSrcExisting, FILE_ATTRIBUTE_READONLY); - if(!bRc) - { - Fail("CopyFileA: ERROR-> Couldn't set file attributes for " - "file %s with error %u\n", szSrcExisting, GetLastError()); - } - - // Check the file attributes to make sure SetFileAttributes() above actually succeeded - DWORD fileAttributes = GetFileAttributesA(szSrcExisting); - if (fileAttributes == INVALID_FILE_ATTRIBUTES) - { - Fail("CopyFileA: Failed to get file attributes for source file, %u\n", GetLastError()); - } - if ((fileAttributes & FILE_ATTRIBUTE_READONLY) == 0) - { - Fail("CopyFileA: SetFileAttributes(read-only) on source file returned success but did not make it read-only.\n"); - } - - /* copy the file */ - bRc = CopyFileA(szSrcExisting,szDest,TRUE); - if(!bRc) - { - Fail("CopyFileA: Cannot copy a file with error, %u",GetLastError()); - } - - - /* try to get file attributes of destination file */ - fileAttributes = GetFileAttributesA(szDest); - if (fileAttributes == INVALID_FILE_ATTRIBUTES) - { - Fail("CopyFileA: GetFileAttributes of destination file " - "failed with error code %ld. \n", - GetLastError()); - } - - /* verify attributes of destination file to source file*/ - if((fileAttributes & FILE_ATTRIBUTE_READONLY) != FILE_ATTRIBUTE_READONLY) - { - Fail("CopyFileA : The file attributes of the " - "destination file do not match the file " - "attributes of the source file.\n"); - } - - /* set the attributes of the destination file to normal again */ - bRc = SetFileAttributesA(szDest, FILE_ATTRIBUTE_NORMAL); - if(!bRc) - { - Fail("CopyFileA: ERROR-> Couldn't set file attributes for " - "file %s with error %u\n", szDest, GetLastError()); - } - - /* delete the newly copied file */ - int st = remove(szDest); - if(st != 0) - { - Fail("CopyFileA: remove failed to delete the" - "file correctly with error,%u.\n",errno); - } - - /* set the attributes of the source file to normal again */ - bRc = SetFileAttributesA(szSrcExisting, FILE_ATTRIBUTE_NORMAL); - if(!bRc) - { - Fail("CopyFileA: ERROR-> Couldn't set file attributes for " - "file %s with error %u\n", szSrcExisting, GetLastError()); - } - - /* delete the original file */ - st = remove(szSrcExisting); - if(st != 0) - { - Fail("CopyFileA: remove failed to delete the" - "file correctly with error,%u.\n",errno); - } - - PAL_Terminate(); - return PASS; - -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test4/test4.cpp b/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test4/test4.cpp deleted file mode 100644 index 9eaecf2702c0..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/CopyFileA/test4/test4.cpp +++ /dev/null @@ -1,179 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: test4.c -** -** Purpose: Tests the PAL implementation of the CopyFileA function -** to see if a file can through different users belonging to -** different groups. -** - -=====================================================================*/ - -/* USECASE - Copy a file from a different user, belonging to a different group to - the current user, who is a member of the current group. Then check - to see that the current user has the basic access rights to the copied - file. - - Thie original file used is the passwd file in the etc directory of a - BSD machine. This file should exist on all machines. -*/ - -#include - -PALTEST(file_io_CopyFileA_test4_paltest_copyfilea_test4, "file_io/CopyFileA/test4/paltest_copyfilea_test4") -{ - -#if WIN32 - return PASS; - -#else - - BOOL bRc = TRUE; - char* szSrcExisting = "/etc/passwd"; - char* szDest = "temp.tmp"; - char* szStringTest = "Marry had a little lamb"; - char szStringRead[30]; /* large enough for string szStringTest */ - - HANDLE hFile = NULL; - DWORD dwBytesWritten=0; - DWORD dwBytesRead=0; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* copy the file */ - bRc = CopyFileA(szSrcExisting,szDest,TRUE); - if(!bRc) - { - Fail("CopyFileA: Cannot copy a file with error, %u",GetLastError()); - } - - /* try to get file attributes of destination file */ - if (GetFileAttributesA(szDest) == -1) - { - Fail("CopyFileA: GetFileAttributes of destination file " - "failed with error code %u. \n", - GetLastError()); - } - - /* set the attributes of the destination file to normal again */ - bRc = SetFileAttributesA(szDest, FILE_ATTRIBUTE_NORMAL); - if(!bRc) - { - Fail("CopyFileA: ERROR-> Couldn't set file attributes for " - "file %s with error %u\n", szDest, GetLastError()); - } - - /* open the file for write purposes */ - hFile = CreateFile(szDest, - GENERIC_WRITE, - 0, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - - if(hFile == INVALID_HANDLE_VALUE) - { - Fail("CopyFileA: ERROR -> Unable to create file \"%s\".\n", - szDest); - } - - /* Attempt to write to the file */ - bRc = WriteFile(hFile, szStringTest, strlen(szStringTest), &dwBytesWritten, NULL); - if (!bRc) - { - Trace("CopyFileA: ERROR -> Unable to write to copied file with error " - "%u.\n", GetLastError()); - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileA: ERROR -> Unable to close file \"%s\" with " - "error %u.\n",szDest, GetLastError()); - } - Fail(""); - - } - - /* Close the file handle */ - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileA: ERROR -> Unable to close file \"%s\" with error %u " - ".\n",szDest,GetLastError()); - } - - - /* open the file for read purposes */ - hFile = CreateFile(szDest, - GENERIC_READ, - 0, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - - if(hFile == INVALID_HANDLE_VALUE) - { - Fail("CopyFileA: ERROR -> Unable to create file \"%s\".\n", - szDest); - } - - /* Attempt to read from the file */ - bRc = ReadFile(hFile, szStringRead, strlen(szStringTest), &dwBytesRead, NULL); - if (!bRc) - { - Trace("CopyFileA: ERROR -> Unable to read from copied file with " - "error %u.\n",GetLastError()); - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileA: ERROR -> Unable to close file \"%s\" with " - "error %u.\n",szDest, GetLastError()); - } - Fail(""); - - } - - if(strncmp(szStringTest,szStringRead, strlen(szStringTest)) != 0) - { - Trace("CopyFileA: ERROR -> The string which was written '%s' does not " - "match the string '%s' which was read from the copied file.\n", - szStringTest,szStringRead); - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileA: ERROR -> Unable to close file \"%s\" with " - "error %u.\n",szDest, GetLastError()); - } - Fail(""); - } - - /* Close the file handle */ - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileA: ERROR -> Unable to close file \"%s\" with error %u " - ".\n",szDest,GetLastError()); - } - - /* Remove the temporary file */ - int st = remove(szDest); - if(st != 0) - { - Fail("CopyFileA: Could not remove copied file with error %u\n", - errno); - } - - PAL_Terminate(); - return PASS; - -#endif - -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test1/CopyFileW.cpp b/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test1/CopyFileW.cpp deleted file mode 100644 index 27dc32921368..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test1/CopyFileW.cpp +++ /dev/null @@ -1,154 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: CopyFileW.c -** -** Purpose: Tests the PAL implementation of the CopyFileW function -** -** -**===================================================================*/ - -/* -1. copy an existing file to non-existent with overwrite true -2. copy an existing file to non-existent with overwrite false -3. copy an existing file to existing with overwrite true -4. copy an existing file to existing with overwrite false -5. copy non-existent file to non-existent with overwrite true -6. copy non-existent file to non-existent with overwrite false -7. copy non-existent file to existing with overwrite true -8. copy non-existent file to existing with overwrite false -*/ - -#include - -PALTEST(file_io_CopyFileW_test1_paltest_copyfilew_test1, "file_io/CopyFileW/test1/paltest_copyfilew_test1") -{ - LPSTR lpSource[2] = {"src_existing.tmp", "src_non-existent.tmp"}; - LPSTR lpDestination[2] = {"dst_existing.tmp", "dst_non-existent.tmp"}; - WCHAR* wcSource; - WCHAR* wcDest; - BOOL bFailIfExists[3] = {FALSE, TRUE}; - BOOL bRc = TRUE; - BOOL bSuccess = TRUE; - char results[20]; - FILE* resultsFile = NULL; - FILE* tempFile = NULL; - int nCounter = 0; - int i, j, k; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* load the expected results */ - resultsFile = fopen("expectedresults.txt", "r"); - memset (results, 0, 20); - fgets(results, 20, resultsFile); - fclose(resultsFile); - - nCounter = 0; - - /* create the src_existing file */ - tempFile = fopen(lpSource[0], "w"); - if (tempFile != NULL) - { - fprintf(tempFile, "CopyFileW test file: src_existing.tmp\n"); - fclose(tempFile); - } - else - { - Fail("CopyFileW: ERROR-> Couldn't create \"src_existing.tmp\"\n"); - } - - /* create the dst_existing file */ - tempFile = fopen(lpDestination[0], "w"); - if (tempFile != NULL) - { - fprintf(tempFile, "CopyFileW test file: dst_existing.tmp\n"); - fclose(tempFile); - } - else - { - Fail("CopyFileW: ERROR-> Couldn't create \"dst_existing.tmp\"\n"); - } - - - /* lpSource loop */ - for (i = 0; i < 2; i++) - { - /* lpDestination loop */ - for (j = 0; j < 2; j++) - { - /* bFailIfExists loop */ - for (k = 0; k < 2; k++) - { - wcSource = convert(lpSource[i]); - wcDest = convert(lpDestination[j]); - bRc = CopyFileW(wcSource, - wcDest, - bFailIfExists[k]); - free(wcSource); - free(wcDest); - if (!bRc) - { - if (results[nCounter] == '1') - { - Trace("CopyFileW: FAILED: test[%d][%d][%d]\n", i, j, k); - bSuccess = FALSE; - } - } - else - { - if (results[nCounter] == '0') - { - Trace("CopyFileW: FAILED: test[%d][%d][%d]\n", i, j, k); - bSuccess = FALSE; - } - else - { - /* verify the file was moved */ - if (GetFileAttributesA(lpDestination[j]) == -1) - { - Trace("CopyFileW: GetFileAttributes of destination" - "file failed on test[%d][%d][%d] with error " - "code %ld. \n",i,j,k,GetLastError()); - bSuccess = FALSE; - } - else if (GetFileAttributesA(lpSource[i]) == -1) - { - Trace("CopyFileW: GetFileAttributes of source file " - "file failed on test[%d][%d][%d] with error " - "code %ld. \n",i,j,k,GetLastError()); - bSuccess = FALSE; - } - else - { - /* verify attributes of destination file to - source file*/ - if(GetFileAttributes(lpSource[i]) != - GetFileAttributes(lpDestination[j])) - { - Trace("CopyFileW : The file attributes of the " - "destination file do not match the file " - "attributes of the source file on test " - "[%d][%d][%d].\n",i,j,k); - bSuccess = FALSE; - } - } - } - - } - nCounter++; - /* delete file file but don't worry if it fails */ - DeleteFileA(lpDestination[1]); - } - } - } - - int exitCode = bSuccess ? PASS : FAIL; - PAL_TerminateEx(exitCode); - return exitCode; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test1/ExpectedResults.txt b/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test1/ExpectedResults.txt deleted file mode 100644 index 535a89fe5074..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test1/ExpectedResults.txt +++ /dev/null @@ -1 +0,0 @@ -10110000 \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test2/test2.cpp deleted file mode 100644 index 96877d95fcb6..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test2/test2.cpp +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: test2.c -** -** Purpose: Tests the PAL implementation of the CopyFileW function -** Attempt to copy a file to itself -** -** -**===================================================================*/ - -#include - -PALTEST(file_io_CopyFileW_test2_paltest_copyfilew_test2, "file_io/CopyFileW/test2/paltest_copyfilew_test2") -{ - LPSTR szSrcExisting = "src_existing.tmp"; - WCHAR* wcSource; - BOOL bRc = TRUE; - FILE* tempFile = NULL; - DWORD temp; - int retCode; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* create the src_existing file */ - tempFile = fopen(szSrcExisting, "w"); - if (tempFile != NULL) - { - retCode = fputs("CopyFileA test file: src_existing.tmp ", tempFile); - if(retCode < 0) - { - Fail("CopyFileW: ERROR-> Couldn't write to %s with error " - "%u.\n", szSrcExisting, GetLastError()); - } - retCode = fclose(tempFile); - if(retCode != 0) - { - Fail("CopyFileW: ERROR-> Couldn't close file: %s with error " - "%u.\n", szSrcExisting, GetLastError()); - } - } - else - { - Fail("CopyFileW: ERROR-> Couldn't create %s.\n", szSrcExisting); - } - - /* convert source string to wide character */ - wcSource = convert(szSrcExisting); - - /* Get file attributes of source */ - temp = GetFileAttributes(szSrcExisting); - if (temp == -1) - { - free(wcSource); - Fail("CopyFileW: GetFileAttributes of source file " - "failed with error code %ld. \n", - GetLastError()); - } - - /* make sure a file can't copy to itself - first testing with IfFileExists flag set to true */ - bRc = CopyFileW(wcSource,wcSource,TRUE); - if(bRc) - { - free(wcSource); - Fail("ERROR: Cannot copy a file to itself, %u",GetLastError()); - } - - /* try to get file attributes of destination */ - if (GetFileAttributesA(szSrcExisting) == -1) - { - free(wcSource); - Fail("CopyFileW: GetFileAttributes of destination file " - "failed with error code %ld. \n", - GetLastError()); - } - else - { - /* verify attributes of destination file to source file*/ - if(temp != GetFileAttributes(szSrcExisting)) - { - free(wcSource); - Fail("CopyFileW : The file attributes of the " - "destination file do not match the file " - "attributes of the source file.\n"); - } - } - - /* testing with IfFileExists flags set to false - should fail in Windows and pass in UNIX */ - bRc = CopyFileW(wcSource,wcSource,FALSE); - free(wcSource); - if(bRc && (GetLastError() != ERROR_ALREADY_EXISTS)) - { - Fail("ERROR: Cannot copy a file to itself, %u",GetLastError()); - } - - if (GetFileAttributesA(szSrcExisting) == -1) - { - Fail("CopyFileW: GetFileAttributes of destination file " - "failed with error code %ld. \n", - GetLastError()); - } - else - { - /* verify attributes of destination file to source file*/ - - if(temp != GetFileAttributes(szSrcExisting)) - { - Fail("CopyFileW : The file attributes of the " - "destination file do not match the file " - "attributes of the source file.\n"); - } - } - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test3/test3.cpp b/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test3/test3.cpp deleted file mode 100644 index 733e7a95ae2b..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/CopyFileW/test3/test3.cpp +++ /dev/null @@ -1,195 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: test4.c -** -** Purpose: Tests the PAL implementation of the CopyFileW function -** to see if a file can through different users belonging to -** different groups. -** - -=====================================================================*/ - -/* USECASE - Copy a file from a different user, belonging to a different group to - the current user, who is a member of the current group. Then check - to see that the current user has the basic access rights to the copied - file. - - Thie original file used is the passwd file in the etc directory of a - BSD machine. This file should exist on all machines. -*/ - -#include - -PALTEST(file_io_CopyFileW_test3_paltest_copyfilew_test3, "file_io/CopyFileW/test3/paltest_copyfilew_test3") -{ - -#if WIN32 - return PASS; - -#else - - BOOL bRc = TRUE; - WCHAR szSrcExisting[] = {'/','e','t','c','/','p','a','s','s','w','d','\0'}; - WCHAR szDest[] = {'t','e','m','p','.','t','m','p','\0'}; - WCHAR szStringTest[] = {'M','a','r','r','y',' ','h','a','d',' ','a',' ', - 'l','i','t','t','l','e',' ','l','a','m','b','\0'}; - WCHAR szStringRead[30]; /* large enough for string szStringTest */ - - HANDLE hFile = NULL; - DWORD dwBytesWritten=0; - DWORD dwBytesRead=0; - int size=0; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* copy the file */ - bRc = CopyFileW(szSrcExisting,szDest,TRUE); - if(!bRc) - { - Fail("CopyFileW: Cannot copy a file with error, %u",GetLastError()); - } - - /* try to get file attributes of destination file */ - if (GetFileAttributesW(szDest) == -1) - { - Fail("CopyFileW: GetFileAttributes of destination file " - "failed with error code %u. \n", - GetLastError()); - } - - /* set the attributes of the destination file to normal again */ - bRc = SetFileAttributesW(szDest, FILE_ATTRIBUTE_NORMAL); - if(!bRc) - { - Fail("CopyFileW: ERROR-> Couldn't set file attributes for " - "file %S with error %u\n", szDest, GetLastError()); - } - - /* open the file for write purposes */ - hFile = CreateFileW((WCHAR *)szDest, - GENERIC_WRITE, - 0, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - - if(hFile == INVALID_HANDLE_VALUE) - { - Fail("CopyFileW: ERROR -> Unable to create file \"%S\".\n", - szDest); - } - - /* To account for the size of a WCHAR is twice that of a char */ - size = wcslen(szStringTest); - size = size*sizeof(WCHAR); - - /* Attempt to write to the file */ - bRc = WriteFile(hFile, - szStringTest, - size, - &dwBytesWritten, - NULL); - - if (!bRc) - { - Trace("CopyFileW: ERROR -> Unable to write to copied file with error " - "%u.\n", GetLastError()); - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileW: ERROR -> Unable to close file \"%S\" with " - "error %u.\n",szDest, GetLastError()); - } - Fail(""); - - } - - /* Close the file handle */ - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileW: ERROR -> Unable to close file \"%S\" with error %u " - ".\n",szDest,GetLastError()); - } - - - /* open the file for read purposes */ - hFile = CreateFileW((WCHAR *)szDest, - GENERIC_READ, - 0, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - - if(hFile == INVALID_HANDLE_VALUE) - { - Fail("CopyFileW: ERROR -> Unable to create file \"%S\".\n", - szDest); - } - - /* Attempt to read from the file */ - bRc = ReadFile(hFile, - szStringRead, - size, - &dwBytesRead, - NULL); - - if (!bRc) - { - Trace("CopyFileW: ERROR -> Unable to read from copied file with " - "error %u.\n",GetLastError()); - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileW: ERROR -> Unable to close file \"%S\" with " - "error %u.\n",szDest, GetLastError()); - } - Fail(""); - - } - - if(wcsncmp(szStringTest,szStringRead, wcslen(szStringTest)) != 0) - { - Trace("CopyFileW: ERROR -> The string which was written '%S' does not " - "match the string '%S' which was read from the copied file.\n", - szStringTest,szStringRead); - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileW: ERROR -> Unable to close file \"%S\" with " - "error %u.\n",szDest, GetLastError()); - } - Fail(""); - } - - /* Close the file handle */ - bRc = CloseHandle(hFile); - if (!bRc) - { - Fail("CopyFileW: ERROR -> Unable to close file \"%S\" with error %u " - ".\n",szDest,GetLastError()); - } - - /* Remove the temporary file */ - bRc = DeleteFileW(szDest); - if(!bRc) - { - Fail("CopyFileW: Could not remove copied file with error %u.\n", - GetLastError()); - } - - PAL_Terminate(); - return PASS; - -#endif - -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetConsoleOutputCP/test1/GetConsoleOutputCP.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetConsoleOutputCP/test1/GetConsoleOutputCP.cpp deleted file mode 100644 index 7c391e3f1765..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetConsoleOutputCP/test1/GetConsoleOutputCP.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: GetConsoleOutputCP.c (test 1) -** -** Purpose: Tests the PAL implementation of the GetConsoleOutputCP function. -** -** -**===================================================================*/ - -#include - - -PALTEST(file_io_GetConsoleOutputCP_test1_paltest_getconsoleoutputcp_test1, "file_io/GetConsoleOutputCP/test1/paltest_getconsoleoutputcp_test1") -{ - UINT uiCP = 0; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - uiCP = GetConsoleOutputCP(); - if ((uiCP != CP_ACP) && (uiCP != GetACP()) && (uiCP != 437)) /*437 for MSDOS*/ - { - Fail("GetConsoleOutputCP: ERROR -> The invalid code page %d was returned.\n", - uiCP); - } - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_file deleted file mode 100644 index 0d1ac31cfa7f..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_file +++ /dev/null @@ -1 +0,0 @@ -Hidden file \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_ro_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_ro_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_ro_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_ro_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_ro_file deleted file mode 100644 index 8f78fcb436f6..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/.hidden_ro_file +++ /dev/null @@ -1 +0,0 @@ -.hidden_ro_file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/GetFileAttributesA.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/GetFileAttributesA.cpp deleted file mode 100644 index 309be929e113..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/GetFileAttributesA.cpp +++ /dev/null @@ -1,341 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================================= -** -** Source: GetFileAttributesA.c -** -** Purpose: Tests the PAL implementation of the GetFileAttributesA function by -** checking the attributes of: -** - a normal directory and file -** - a read only directory and file -** - a read write directory and file -** - a hidden directory and file -** - a read only hidden directory and file -** - a directory and a file with no attributes -** - an invalid file name -** -** -**===========================================================================*/ -#include - -const int TYPE_DIR = 0; -const int TYPE_FILE = 1; -/* Structure defining a test case */ -typedef struct -{ - char *name; /* name of the file/directory */ - DWORD expectedAttribs; /* expected attributes */ - HANDLE hFile; /* Handle to the file */ - int isFile; /* is file (1) or dir (0) */ -}TestCaseFile; - -typedef struct -{ - char *name; /* name of the file/directory */ - DWORD expectedAttribs; /* expected attributes */ - HANDLE hFile; /* Handle to the file */ - int isFile; /* is file (1) or dir (0) */ -}TestCaseDir; - -DWORD desiredAccessFile_GetFileAttributesA_test1 = GENERIC_READ | GENERIC_WRITE; -DWORD shareModeFile_GetFileAttributesA_test1 = FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE; -LPSECURITY_ATTRIBUTES lpAttrFile_GetFileAttributesA_test1 = NULL; -DWORD dwCreationDispFile_GetFileAttributesA_test1 = CREATE_NEW; -DWORD dwFlagsAttribFile_GetFileAttributesA_test1 = FILE_ATTRIBUTE_NORMAL; -HANDLE hTemplateFile_GetFileAttributesA_test1 = NULL; - -int numFileTests_A = 6; -TestCaseFile gfaTestsFile_A[6]; /* GetFileAttributes tests list */ - -int numDirTests_A = 6; -TestCaseDir gfaTestsDir_A[6]; /* GetFileAttributes tests list */ - -BOOL CleanUpFiles_GetFileAttributesA_test1() -{ - DWORD dwAtt; - int i; - BOOL result = TRUE; - for (i = 0; i < numFileTests_A -1 ; i++ ) - { - dwAtt = GetFileAttributesA(gfaTestsFile_A[i].name); - - if( dwAtt != INVALID_FILE_ATTRIBUTES ) - { - //Trace("Files iteration %d\n", i); - if(!SetFileAttributesA (gfaTestsFile_A[i].name, FILE_ATTRIBUTE_NORMAL)) - { - result = FALSE; - Trace("ERROR:%d: Error setting attributes [%s][%d]\n", GetLastError(), gfaTestsFile_A[i].name, FILE_ATTRIBUTE_NORMAL); - } - - if(!DeleteFileA (gfaTestsFile_A[i].name)) - { - result = FALSE; - Trace("ERROR:%d: Error deleting file [%s][%d]\n", GetLastError(), gfaTestsFile_A[i].name, dwAtt); - } - - } - } -// Trace("Value of result is %d\n", result); - return result; -} -BOOL SetUpFiles_GetFileAttributesA_test1() -{ - int i = 0; - BOOL result = TRUE; - for (i = 0; i < numFileTests_A -1; i++ ) - { - gfaTestsFile_A[i].hFile = CreateFile(gfaTestsFile_A[i].name, - desiredAccessFile_GetFileAttributesA_test1, - shareModeFile_GetFileAttributesA_test1, - lpAttrFile_GetFileAttributesA_test1, - dwCreationDispFile_GetFileAttributesA_test1, - dwFlagsAttribFile_GetFileAttributesA_test1, - hTemplateFile_GetFileAttributesA_test1); - - if( gfaTestsFile_A[i].hFile == NULL ) - { - Fail("Error while creating files for iteration %d\n", i); - } - - if(!SetFileAttributesA (gfaTestsFile_A[i].name, gfaTestsFile_A[i].expectedAttribs)) - { - result = FALSE; - Trace("ERROR:%d: Error setting attributes [%s][%d]\n", GetLastError(), gfaTestsFile_A[i].name, gfaTestsFile_A[i].expectedAttribs); - } - } - - return result; -} - -BOOL CleanUpDirs_GetFileAttributesA_test1() -{ - DWORD dwAtt; - int i; - BOOL result = TRUE; - for (i = 0; i < numDirTests_A -1 ; i++ ) - { - dwAtt = GetFileAttributesA(gfaTestsDir_A[i].name); - - if( dwAtt != INVALID_FILE_ATTRIBUTES ) - { - - if(!SetFileAttributesA (gfaTestsDir_A[i].name, FILE_ATTRIBUTE_DIRECTORY)) - { - result = FALSE; - Trace("ERROR:%d: Error setting attributes [%s][%d]\n", GetLastError(), gfaTestsDir_A[i].name, (FILE_ATTRIBUTE_NORMAL | FILE_ATTRIBUTE_DIRECTORY)); - } - - LPWSTR nameW = convert(gfaTestsDir_A[i].name); - if(!RemoveDirectoryW (nameW)) - { - result = FALSE; - Trace("ERROR:%d: Error deleting file [%s][%d]\n", GetLastError(), gfaTestsDir_A[i].name, dwAtt); - } - - free(nameW); - } - } - - return result; -} - -BOOL SetUpDirs_GetFileAttributesA_test1() -{ - int i = 0; - BOOL result = TRUE; - DWORD ret = 0; - for (i = 0; i < numDirTests_A - 1 ; i++ ) - { - result = CreateDirectoryA(gfaTestsDir_A[i].name, - NULL); - - if(!result ) - { - result = FALSE; - Fail("Error while creating directory for iteration %d\n", i); - } - - if(!SetFileAttributesA (gfaTestsDir_A[i].name, gfaTestsDir_A[i].expectedAttribs)) - { - result = FALSE; - Trace("ERROR:%d: Error setting attributes [%s][%d]\n", GetLastError(), gfaTestsDir_A[i].name, gfaTestsDir_A[i].expectedAttribs); - } - - ret = GetFileAttributesA (gfaTestsDir_A[i].name); - if(ret != gfaTestsDir_A[i].expectedAttribs) - { - result = FALSE; - Trace("ERROR: Error setting attributes [%s][%d]\n", gfaTestsDir_A[i].name, gfaTestsDir_A[i].expectedAttribs); - } - //Trace("Setup Dir setting attr [%d], returned [%d]\n", gfaTestsDir_A[i].expectedAttribs, ret); - - } - //Trace("Setup dirs returning %d\n", result); - return result; -} -PALTEST(file_io_GetFileAttributesA_test1_paltest_getfileattributesa_test1, "file_io/GetFileAttributesA/test1/paltest_getfileattributesa_test1") -{ - int i; - BOOL bFailed = FALSE; - DWORD result; - - char * NormalDirectoryName = "normal_test_directory"; - char * ReadOnlyDirectoryName = "ro_test_directory"; - char * ReadWriteDirectoryName = "rw_directory"; - char * HiddenDirectoryName = ".hidden_directory"; - char * HiddenReadOnlyDirectoryName = ".hidden_ro_directory"; - char * NoDirectoryName = "no_directory"; - - char * NormalFileName = "normal_test_file"; - char * ReadOnlyFileName = "ro_test_file"; - char * ReadWriteFileName = "rw_file"; - char * HiddenFileName = ".hidden_file"; - char * HiddenReadOnlyFileName = ".hidden_ro_file"; - char * NotReallyAFileName = "not_really_a_file"; - - /* Tests on directory */ - gfaTestsDir_A[0].name = NormalDirectoryName; - gfaTestsDir_A[0].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY; - gfaTestsDir_A[0].isFile = TYPE_DIR; - - gfaTestsDir_A[1].name = ReadOnlyDirectoryName; - gfaTestsDir_A[1].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY | - FILE_ATTRIBUTE_READONLY; - gfaTestsDir_A[1].isFile = TYPE_DIR; - - gfaTestsDir_A[2].name = ReadWriteDirectoryName; - gfaTestsDir_A[2].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY; - gfaTestsDir_A[2].isFile = TYPE_DIR; - - gfaTestsDir_A[3].name = HiddenDirectoryName; - gfaTestsDir_A[3].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY; //| - //FILE_ATTRIBUTE_HIDDEN; - gfaTestsDir_A[3].isFile = TYPE_DIR; - - gfaTestsDir_A[4].name = HiddenReadOnlyDirectoryName; - gfaTestsDir_A[4].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY | - FILE_ATTRIBUTE_READONLY; //| - //FILE_ATTRIBUTE_HIDDEN; - gfaTestsDir_A[4].isFile = TYPE_DIR; - - gfaTestsDir_A[5].name = NoDirectoryName; - gfaTestsDir_A[5].expectedAttribs = INVALID_FILE_ATTRIBUTES; - gfaTestsDir_A[5].isFile = TYPE_DIR; - - /* Tests on file */ - gfaTestsFile_A[0].name = NormalFileName; - gfaTestsFile_A[0].expectedAttribs = FILE_ATTRIBUTE_NORMAL; - gfaTestsFile_A[0].isFile = TYPE_FILE; - - - gfaTestsFile_A[1].name = ReadOnlyFileName; - gfaTestsFile_A[1].expectedAttribs = FILE_ATTRIBUTE_READONLY; - gfaTestsFile_A[1].isFile = TYPE_FILE; - - gfaTestsFile_A[2].name = ReadWriteFileName; - gfaTestsFile_A[2].expectedAttribs = FILE_ATTRIBUTE_NORMAL; - gfaTestsFile_A[2].isFile = TYPE_FILE; - - gfaTestsFile_A[3].name = HiddenFileName; - gfaTestsFile_A[3].expectedAttribs = FILE_ATTRIBUTE_NORMAL; //FILE_ATTRIBUTE_HIDDEN; - gfaTestsFile_A[3].isFile = TYPE_FILE; - - gfaTestsFile_A[4].name = HiddenReadOnlyFileName; - gfaTestsFile_A[4].expectedAttribs = FILE_ATTRIBUTE_READONLY; //| - //FILE_ATTRIBUTE_HIDDEN; - gfaTestsFile_A[4].isFile = TYPE_FILE; - - - gfaTestsFile_A[5].name = NotReallyAFileName; - gfaTestsFile_A[5].expectedAttribs = INVALID_FILE_ATTRIBUTES; - gfaTestsFile_A[5].isFile = TYPE_FILE; - - /* Initialize PAL environment */ - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - if(!CleanUpFiles_GetFileAttributesA_test1()) - { - Fail("GetFileAttributesA: Pre-Clean Up Files Failed\n"); - } - - if(0 == SetUpFiles_GetFileAttributesA_test1()) - { - Fail("GetFileAttributesA: SetUp Files Failed\n"); - } - - if(!CleanUpDirs_GetFileAttributesA_test1()) - { - Fail("GetFileAttributesA: Pre-Clean Up Directories Failed\n"); - } - - if(!SetUpDirs_GetFileAttributesA_test1()) - { - Fail("GetFileAttributesA: SetUp Directories Failed\n"); - } - - /* - * Go through all the test cases above, - * call GetFileAttributesA on the name and - * make sure the return value is the one expected - */ - for( i = 0; i < numFileTests_A; i++ ) - { - result = GetFileAttributesA(gfaTestsFile_A[i].name); - - if( result != gfaTestsFile_A[i].expectedAttribs ) - { - bFailed = TRUE; - - Trace("ERROR: GetFileAttributesA Test#%u on %s " - "returned %u instead of %u. \n", - i, - gfaTestsFile_A[i].name, - result, - gfaTestsFile_A[i].expectedAttribs); - - } - } - - - for( i = 0; i < numDirTests_A; i++ ) - { - result = GetFileAttributesA(gfaTestsDir_A[i].name); - - if( result != gfaTestsDir_A[i].expectedAttribs ) - { - bFailed = TRUE; - - Trace("ERROR: GetFileAttributesA on Directories Test#%u on %s " - "returned %u instead of %u. \n", - i, - gfaTestsDir_A[i].name, - result, - gfaTestsDir_A[i].expectedAttribs); - - } - } - - if(!CleanUpFiles_GetFileAttributesA_test1()) - { - Fail("GetFileAttributesA: Post-Clean Up Files Failed\n"); - } - - if(!CleanUpDirs_GetFileAttributesA_test1()) - { - Fail("GetFileAttributesA: Post-Clean Up Directories Failed\n"); - } - - /* If any errors, just call Fail() */ - if( bFailed ) - { - Fail(""); - } - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/no_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/no_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/no_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/no_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/no_file deleted file mode 100644 index 3d631e8103f0..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/no_file +++ /dev/null @@ -1 +0,0 @@ -No attribs file \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/normal_test_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/normal_test_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/normal_test_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/normal_test_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/normal_test_file deleted file mode 100644 index a6e1e627a887..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/normal_test_file +++ /dev/null @@ -1,6 +0,0 @@ -file_io -CopyFileW -Positive Test for CopyFileW -test the CopyFileW function -DEFAULT -CopyFileW \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/ro_test_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/ro_test_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/ro_test_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/ro_test_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/ro_test_file deleted file mode 100644 index a6e1e627a887..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/ro_test_file +++ /dev/null @@ -1,6 +0,0 @@ -file_io -CopyFileW -Positive Test for CopyFileW -test the CopyFileW function -DEFAULT -CopyFileW \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/rw_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/rw_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/rw_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/rw_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/rw_file deleted file mode 100644 index 39d66f0365ba..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesA/test1/rw_file +++ /dev/null @@ -1 +0,0 @@ -Read Write file \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/.hidden_directory/anchor.txt b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/.hidden_directory/anchor.txt deleted file mode 100644 index 9a277fa04ed8..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/.hidden_directory/anchor.txt +++ /dev/null @@ -1,2 +0,0 @@ -This file is here so this directory gets checked out even with the -P -option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/.hidden_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/.hidden_file deleted file mode 100644 index 0d1ac31cfa7f..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/.hidden_file +++ /dev/null @@ -1 +0,0 @@ -Hidden file \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/normal_test_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/normal_test_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/normal_test_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/normal_test_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/normal_test_file deleted file mode 100644 index ab7622ffb19c..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/normal_test_file +++ /dev/null @@ -1,6 +0,0 @@ -file_io -CopyFileW -Positive Test for CopyFileW -test the CopyFileW function -DEFAULT -CopyFileW \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/ro_test_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/ro_test_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/ro_test_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/ro_test_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/ro_test_file deleted file mode 100644 index ab7622ffb19c..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/ro_test_file +++ /dev/null @@ -1,6 +0,0 @@ -file_io -CopyFileW -Positive Test for CopyFileW -test the CopyFileW function -DEFAULT -CopyFileW \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/test1.cpp deleted file mode 100644 index c31529979627..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test1/test1.cpp +++ /dev/null @@ -1,175 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: test1.c -** -** Purpose: Tests the PAL implementation of the GetFileAttributesExW function. -** Call the function on a normal directory and file and a read-only directory -** and file and a hidden file and directory. -** Ensure that the returned attributes and file sizes are correct. -** -** -**===================================================================*/ - -#define UNICODE -#include - -typedef enum Item -{ - IS_DIR, - IS_FILE -}ItemType; - -/* This function takes a structure and checks that the information - within the structure is correct. The 'Attribs' are the expected - file attributes, 'TheType' is IS_DIR or IS_FILE and the 'Name' is the - name of the file/directory in question. -*/ -void VerifyInfo(WIN32_FILE_ATTRIBUTE_DATA InfoStruct, - DWORD Attribs, ItemType TheType, WCHAR* Name) -{ - HANDLE hFile; - FILETIME CorrectCreation, CorrectAccess, CorrectModify; - WCHAR CopyName[64]; - - wcscpy(CopyName,Name); - free(Name); - - /* Check to see that the file attributes were recorded */ - if(InfoStruct.dwFileAttributes != Attribs) - { - Fail("ERROR: The file attributes on the file/directory were " - "recorded as being %d instead of %d.\n", - InfoStruct.dwFileAttributes, - Attribs); - } - - /* Note: We can't open a handle to a directory in windows. This - block of tests will only be run on files. - */ - if(TheType == IS_FILE) - { - - /* Get a handle to the file */ - hFile = CreateFile(CopyName, - 0, - 0, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - - if (hFile == INVALID_HANDLE_VALUE) - { - Fail("ERROR: Could not open a handle to the file " - "'%S'. GetLastError() returned %d.",CopyName, - GetLastError()); - } - - - if(InfoStruct.nFileSizeLow != GetFileSize(hFile,NULL)) - { - Fail("ERROR: The file size reported by GetFileAttributesEx " - "did not match the file size given by GetFileSize.\n"); - } - - if(CloseHandle(hFile) == 0) - { - Fail("ERROR: Failed to properly close the handle to the " - "file we're testing. GetLastError() returned %d.\n", - GetLastError()); - - } - - } - - -} - -/* Given a file/directory name, the expected attribs and whether or not it - is a file or directory, call GetFileAttributesEx and verify the - results are correct. -*/ - -void RunTest_GetFileAttributesExW_test1(char* Name, DWORD Attribs, ItemType TheType ) -{ - WCHAR* TheName; - WIN32_FILE_ATTRIBUTE_DATA InfoStruct; - DWORD TheResult; - - TheName = convert(Name); - - TheResult = GetFileAttributesEx(TheName, - GetFileExInfoStandard, - &InfoStruct); - if(TheResult == 0) - { - free(TheName); - Fail("ERROR: GetFileAttributesEx returned 0, indicating failure. " - "GetLastError returned %d.\n",GetLastError()); - } - - VerifyInfo(InfoStruct, Attribs, TheType, TheName); - -} - -PALTEST(file_io_GetFileAttributesExW_test1_paltest_getfileattributesexw_test1, "file_io/GetFileAttributesExW/test1/paltest_getfileattributesexw_test1") -{ - DWORD TheResult; - WCHAR* FileName; - WIN32_FILE_ATTRIBUTE_DATA InfoStruct; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* Test a Directory */ - RunTest_GetFileAttributesExW_test1("normal_test_directory", FILE_ATTRIBUTE_DIRECTORY, IS_DIR); - - - /* Test a Normal File */ - - RunTest_GetFileAttributesExW_test1("normal_test_file", FILE_ATTRIBUTE_NORMAL, IS_FILE); - - /* Test a Read-Only Directory */ - - RunTest_GetFileAttributesExW_test1("ro_test_directory", - FILE_ATTRIBUTE_READONLY|FILE_ATTRIBUTE_DIRECTORY, IS_DIR); - - /* Test a Read-Only File */ - - RunTest_GetFileAttributesExW_test1("ro_test_file", FILE_ATTRIBUTE_READONLY, IS_FILE); - - /* Test a Hidden File */ - - RunTest_GetFileAttributesExW_test1(".hidden_file", FILE_ATTRIBUTE_HIDDEN, IS_FILE); - - /* Test a Hidden Directory */ - - RunTest_GetFileAttributesExW_test1(".hidden_directory", - FILE_ATTRIBUTE_HIDDEN|FILE_ATTRIBUTE_DIRECTORY, IS_DIR); - - /* Test a Non-Existent File */ - - FileName = convert("nonexistent_test_file"); - - TheResult = GetFileAttributesEx(FileName, - GetFileExInfoStandard, - &InfoStruct); - - if(TheResult != 0) - { - free(FileName); - Fail("ERROR: GetFileAttributesEx returned non-zero, indicating " - "success when it should have failed. It was called on a " - "non-existent file."); - } - - free(FileName); - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test2/test2.cpp deleted file mode 100644 index 31fe689214e0..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesExW/test2/test2.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: getfileattributesexw.c (getfileattributesexw\test2) -** -** Purpose: Tests the PAL implementation of GetFileAttributesExW. -** First get a file's attributes, modify the file, -** re-get its attributes -** and compare the two sets of attributes. -** -** -**===================================================================*/ -#include - -/** - * This is a helper function which takes two FILETIME structures and - * checks that the second time isn't before the first. - */ -static int IsFileTimeOk(FILETIME FirstTime, FILETIME SecondTime) -{ - - ULONG64 TimeOne, TimeTwo; - - TimeOne = ((((ULONG64)FirstTime.dwHighDateTime)<<32) | - ((ULONG64)FirstTime.dwLowDateTime)); - - TimeTwo = ((((ULONG64)SecondTime.dwHighDateTime)<<32) | - ((ULONG64)SecondTime.dwLowDateTime)); - - return(TimeOne <= TimeTwo); -} - -PALTEST(file_io_GetFileAttributesExW_test2_paltest_getfileattributesexw_test2, "file_io/GetFileAttributesExW/test2/paltest_getfileattributesexw_test2") -{ - DWORD res; - char fileName[MAX_PATH] = "test_file"; - WCHAR *wFileName; - WIN32_FILE_ATTRIBUTE_DATA beforeAttribs; - WIN32_FILE_ATTRIBUTE_DATA afterAttribs; - FILE *testFile; - ULONG64 beforeFileSize; - ULONG64 afterFileSize; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - /* Create the file */ - testFile = fopen(fileName, "w+"); - if( NULL == testFile ) - { - Fail("Unexpected error: Unable to open file %S " - "with fopen. \n", - fileName); - } - - if( EOF == fputs( "testing", testFile ) ) - { - Fail("Unexpected error: Unable to write to file %S " - "with fputs. \n", - fileName); - } - - if( 0 != fclose(testFile) ) - { - Fail("Unexpected error: Unable to close file %S " - "with fclose. \n", - fileName); - } - - /* Test the Values returned by GetFileAttributesExW - * before and after manipulating a file shouldn't be the same. - */ - - wFileName = convert(fileName); - - res = GetFileAttributesExW(wFileName, - GetFileExInfoStandard, - &beforeAttribs); - - if(res == 0) - { - Fail("ERROR: unable to get initial file attributes with " - "GetFileAttributesEx that returned 0 with error %d.\n", - GetLastError()); - } - - /* Make sure the time are different */ - Sleep(500); - - testFile = fopen(fileName, "w+"); - if( NULL == testFile ) - { - Fail("Unexpected error: Unable to open file %S " - "with fopen. \n", - fileName); - } - - if( EOF == fputs( "testing GetFileAttributesExW", testFile ) ) - { - Fail("Unexpected error: Unable to write to file %S " - "with fputs. \n", - fileName); - } - - if( 0 != fclose(testFile) ) - { - Fail("Unexpected error: Unable to close file %S " - "with fclose. \n", - fileName); - } - - res = GetFileAttributesExW(wFileName, - GetFileExInfoStandard, - &afterAttribs); - - if(res == 0) - { - Fail("ERROR: unable to get file attributes after operations with " - "GetFileAttributesEx that returned 0 with error %d.\n", - GetLastError()); - } - - /* Check the creation time */ - if(!IsFileTimeOk(beforeAttribs.ftCreationTime, - afterAttribs.ftCreationTime)) - { - Fail("ERROR: Creation time after the fputs operation " - "is earlier than the creation time before the fputs.\n"); - } - - /* Check the last access time */ - if(!IsFileTimeOk(beforeAttribs.ftLastAccessTime, - afterAttribs.ftLastAccessTime)) - { - Fail("ERROR: Last access time after the fputs operation " - "is earlier than the last access time before the fputs.\n"); - } - - /* Check the last write time */ - if(!IsFileTimeOk(beforeAttribs.ftLastWriteTime, - afterAttribs.ftLastWriteTime)) - { - Fail("ERROR: Last write time after the fputs operation " - "is earlier than the last write time before the fputs.\n"); - } - - beforeFileSize = ((ULONG64)beforeAttribs.nFileSizeHigh)<< 32 | - ((ULONG64)beforeAttribs.nFileSizeLow); - - afterFileSize = ((ULONG64)afterAttribs.nFileSizeHigh)<< 32 | - ((ULONG64)afterAttribs.nFileSizeLow); - - /* Check the file size */ - if( afterFileSize <= beforeFileSize ) - { - Fail("ERROR: the file should have had a bigger size " - "after(%d) the operations than before(%d)\n", - afterAttribs.nFileSizeLow, - beforeAttribs.nFileSizeLow); - } - - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_file deleted file mode 100644 index 0d1ac31cfa7f..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_file +++ /dev/null @@ -1 +0,0 @@ -Hidden file \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_ro_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_ro_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_ro_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_ro_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_ro_file deleted file mode 100644 index 8f78fcb436f6..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/.hidden_ro_file +++ /dev/null @@ -1 +0,0 @@ -.hidden_ro_file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/GetFileAttributesW.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/GetFileAttributesW.cpp deleted file mode 100644 index 7efc9f880534..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/GetFileAttributesW.cpp +++ /dev/null @@ -1,346 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================================= -** -** Source: GetFileAttributesW.c -** -** Purpose: Tests the PAL implementation of the GetFileAttributesW function by -** checking the attributes of: -** - a normal directory and file -** - a read only directory and file -** - a read write directory and file -** - a hidden directory and file -** - a read only hidden directory and file -** - a directory and a file with no attributes -** - an invalid file name -** -** -**===========================================================================*/ -#include - -const int TYPE_DIR = 0; -const int TYPE_FILE = 1; -/* Structure defining a test case */ -typedef struct -{ - char *name; /* name of the file/directory */ - DWORD expectedAttribs; /* expected attributes */ - HANDLE hFile; /* Handle to the file */ - int isFile; /* is file (1) or dir (0) */ -}TestCaseFile; - -typedef struct -{ - char *name; /* name of the file/directory */ - DWORD expectedAttribs; /* expected attributes */ - HANDLE hFile; /* Handle to the file */ - int isFile; /* is file (1) or dir (0) */ -}TestCaseDir; - -DWORD desiredAccessFile_GetFileAttributesW_test1 = GENERIC_READ | GENERIC_WRITE; -DWORD shareModeFile_GetFileAttributesW_test1 = FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE; -LPSECURITY_ATTRIBUTES lpAttrFile_GetFileAttributesW_test1 = NULL; -DWORD dwCreationDispFile_GetFileAttributesW_test1 = CREATE_NEW; -DWORD dwFlagsAttribFile_GetFileAttributesW_test1 = FILE_ATTRIBUTE_NORMAL; -HANDLE hTemplateFile_GetFileAttributesW_test1 = NULL; - -int numFileTests_GetFileAttributesW_test1 = 6; -TestCaseFile gfaTestsFile_GetFileAttributesW_test1[6]; /* GetFileAttributes tests list */ - -int numDirTests_GetFileAttributesW_test1 = 6; -TestCaseDir gfaTestsDir_GetFileAttributesW_test1[6]; /* GetFileAttributes tests list */ - -BOOL CleanUpFiles_GetFileAttributesW_test1() -{ - DWORD dwAtt; - int i; - BOOL result = TRUE; - for (i = 0; i < numFileTests_GetFileAttributesW_test1 - 1 ; i++ ) - { - dwAtt = GetFileAttributesA(gfaTestsFile_GetFileAttributesW_test1[i].name); - - if( dwAtt != INVALID_FILE_ATTRIBUTES ) - { - //Trace("Files iteration %d\n", i); - if(!SetFileAttributesA (gfaTestsFile_GetFileAttributesW_test1[i].name, FILE_ATTRIBUTE_NORMAL)) - { - result = FALSE; - Trace("ERROR:%d: Error setting attributes [%s][%d]\n", GetLastError(), gfaTestsFile_GetFileAttributesW_test1[i].name, FILE_ATTRIBUTE_NORMAL); - } - - if(!DeleteFileA (gfaTestsFile_GetFileAttributesW_test1[i].name)) - { - result = FALSE; - Trace("ERROR:%d: Error deleting file [%s][%d]\n", GetLastError(), gfaTestsFile_GetFileAttributesW_test1[i].name, dwAtt); - } - - } - } -// Trace("Value of result is %d\n", result); - return result; -} -BOOL SetUpFiles_GetFileAttributesW_test1() -{ - int i = 0; - BOOL result = TRUE; - for (i = 0; i < numFileTests_GetFileAttributesW_test1 - 1 ; i++ ) - { - gfaTestsFile_GetFileAttributesW_test1[i].hFile = CreateFile(gfaTestsFile_GetFileAttributesW_test1[i].name, - desiredAccessFile_GetFileAttributesW_test1, - shareModeFile_GetFileAttributesW_test1, - lpAttrFile_GetFileAttributesW_test1, - dwCreationDispFile_GetFileAttributesW_test1, - dwFlagsAttribFile_GetFileAttributesW_test1, - hTemplateFile_GetFileAttributesW_test1); - - if( gfaTestsFile_GetFileAttributesW_test1[i].hFile == NULL ) - { - Fail("Error while creating files for iteration %d\n", i); - } - - if(!SetFileAttributesA (gfaTestsFile_GetFileAttributesW_test1[i].name, gfaTestsFile_GetFileAttributesW_test1[i].expectedAttribs)) - { - result = FALSE; - Trace("ERROR:%d: Error setting attributes [%s][%d]\n", GetLastError(), gfaTestsFile_GetFileAttributesW_test1[i].name, gfaTestsFile_GetFileAttributesW_test1[i].expectedAttribs); - } - } - - return result; -} - -BOOL CleanUpDirs_GetFileAttributesW_test1() -{ - DWORD dwAtt; - int i; - BOOL result = TRUE; - for (i = 0; i < numDirTests_GetFileAttributesW_test1 - 1; i++ ) - { - dwAtt = GetFileAttributesA(gfaTestsDir_GetFileAttributesW_test1[i].name); - - if( dwAtt != INVALID_FILE_ATTRIBUTES ) - { - - if(!SetFileAttributesA (gfaTestsDir_GetFileAttributesW_test1[i].name, FILE_ATTRIBUTE_DIRECTORY)) - { - result = FALSE; - Trace("ERROR:%d: Error setting attributes [%s][%d]\n", GetLastError(), gfaTestsDir_GetFileAttributesW_test1[i].name, (FILE_ATTRIBUTE_NORMAL | FILE_ATTRIBUTE_DIRECTORY)); - } - - LPWSTR nameW = convert(gfaTestsDir_GetFileAttributesW_test1[i].name); - if(!RemoveDirectoryW (nameW)) - { - result = FALSE; - Trace("ERROR:%d: Error deleting file [%s][%d]\n", GetLastError(), gfaTestsDir_GetFileAttributesW_test1[i].name, dwAtt); - } - - free(nameW); - } - } - - return result; -} - -BOOL SetUpDirs_GetFileAttributesW_test1() -{ - int i = 0; - BOOL result = TRUE; - DWORD ret = 0; - for (i = 0; i < numDirTests_GetFileAttributesW_test1 - 1; i++ ) - { - result = CreateDirectoryA(gfaTestsDir_GetFileAttributesW_test1[i].name, - NULL); - - if(!result ) - { - result = FALSE; - Fail("Error while creating directory for iteration %d\n", i); - } - - if(!SetFileAttributesA (gfaTestsDir_GetFileAttributesW_test1[i].name, gfaTestsDir_GetFileAttributesW_test1[i].expectedAttribs)) - { - result = FALSE; - Trace("ERROR:%d: Error setting attributes [%s][%d]\n", GetLastError(), gfaTestsDir_GetFileAttributesW_test1[i].name, gfaTestsDir_GetFileAttributesW_test1[i].expectedAttribs); - } - - ret = GetFileAttributesA (gfaTestsDir_GetFileAttributesW_test1[i].name); - if(ret != gfaTestsDir_GetFileAttributesW_test1[i].expectedAttribs) - { - result = FALSE; - Trace("ERROR: Error setting attributes [%s][%d]\n", gfaTestsDir_GetFileAttributesW_test1[i].name, gfaTestsDir_GetFileAttributesW_test1[i].expectedAttribs); - } - // Trace("Setup Dir setting attr [%d], returned [%d]\n", gfaTestsDir_GetFileAttributesW_test1[i].expectedAttribs, ret); - - } -// Trace("Setup dirs returning %d\n", result); - return result; -} -PALTEST(file_io_GetFileAttributesW_test1_paltest_getfileattributesw_test1, "file_io/GetFileAttributesW/test1/paltest_getfileattributesw_test1") -{ - int i; - BOOL bFailed = FALSE; - DWORD result; - - char * NormalDirectoryName = "normal_test_directory"; - char * ReadOnlyDirectoryName = "ro_test_directory"; - char * ReadWriteDirectoryName = "rw_directory"; - char * HiddenDirectoryName = ".hidden_directory"; - char * HiddenReadOnlyDirectoryName = ".hidden_ro_directory"; - char * NoDirectoryName = "no_directory"; - - char * NormalFileName = "normal_test_file"; - char * ReadOnlyFileName = "ro_test_file"; - char * ReadWriteFileName = "rw_file"; - char * HiddenFileName = ".hidden_file"; - char * HiddenReadOnlyFileName = ".hidden_ro_file"; - char * NotReallyAFileName = "not_really_a_file"; - - WCHAR *WStr; - /* Tests on directory */ - gfaTestsDir_GetFileAttributesW_test1[0].name = NormalDirectoryName; - gfaTestsDir_GetFileAttributesW_test1[0].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY; - gfaTestsDir_GetFileAttributesW_test1[0].isFile = TYPE_DIR; - - gfaTestsDir_GetFileAttributesW_test1[1].name = ReadOnlyDirectoryName; - gfaTestsDir_GetFileAttributesW_test1[1].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY | - FILE_ATTRIBUTE_READONLY; - gfaTestsDir_GetFileAttributesW_test1[1].isFile = TYPE_DIR; - - gfaTestsDir_GetFileAttributesW_test1[2].name = ReadWriteDirectoryName; - gfaTestsDir_GetFileAttributesW_test1[2].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY; - gfaTestsDir_GetFileAttributesW_test1[2].isFile = TYPE_DIR; - - gfaTestsDir_GetFileAttributesW_test1[3].name = HiddenDirectoryName; - gfaTestsDir_GetFileAttributesW_test1[3].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY; //| - //FILE_ATTRIBUTE_HIDDEN; - gfaTestsDir_GetFileAttributesW_test1[3].isFile = TYPE_DIR; - - gfaTestsDir_GetFileAttributesW_test1[4].name = HiddenReadOnlyDirectoryName; - gfaTestsDir_GetFileAttributesW_test1[4].expectedAttribs = FILE_ATTRIBUTE_DIRECTORY | - FILE_ATTRIBUTE_READONLY; //| - //FILE_ATTRIBUTE_HIDDEN; - gfaTestsDir_GetFileAttributesW_test1[4].isFile = TYPE_DIR; - - gfaTestsDir_GetFileAttributesW_test1[5].name = NoDirectoryName; - gfaTestsDir_GetFileAttributesW_test1[5].expectedAttribs = INVALID_FILE_ATTRIBUTES; - gfaTestsDir_GetFileAttributesW_test1[5].isFile = TYPE_DIR; - - /* Tests on file */ - gfaTestsFile_GetFileAttributesW_test1[0].name = NormalFileName; - gfaTestsFile_GetFileAttributesW_test1[0].expectedAttribs = FILE_ATTRIBUTE_NORMAL; - gfaTestsFile_GetFileAttributesW_test1[0].isFile = TYPE_FILE; - - - gfaTestsFile_GetFileAttributesW_test1[1].name = ReadOnlyFileName; - gfaTestsFile_GetFileAttributesW_test1[1].expectedAttribs = FILE_ATTRIBUTE_READONLY; - gfaTestsFile_GetFileAttributesW_test1[1].isFile = TYPE_FILE; - - gfaTestsFile_GetFileAttributesW_test1[2].name = ReadWriteFileName; - gfaTestsFile_GetFileAttributesW_test1[2].expectedAttribs = FILE_ATTRIBUTE_NORMAL; - gfaTestsFile_GetFileAttributesW_test1[2].isFile = TYPE_FILE; - - gfaTestsFile_GetFileAttributesW_test1[3].name = HiddenFileName; - gfaTestsFile_GetFileAttributesW_test1[3].expectedAttribs = FILE_ATTRIBUTE_NORMAL; //FILE_ATTRIBUTE_HIDDEN; - gfaTestsFile_GetFileAttributesW_test1[3].isFile = TYPE_FILE; - - gfaTestsFile_GetFileAttributesW_test1[4].name = HiddenReadOnlyFileName; - gfaTestsFile_GetFileAttributesW_test1[4].expectedAttribs = FILE_ATTRIBUTE_READONLY; //| - //FILE_ATTRIBUTE_HIDDEN; - gfaTestsFile_GetFileAttributesW_test1[4].isFile = TYPE_FILE; - - - gfaTestsFile_GetFileAttributesW_test1[5].name = NotReallyAFileName; - gfaTestsFile_GetFileAttributesW_test1[5].expectedAttribs = INVALID_FILE_ATTRIBUTES; - gfaTestsFile_GetFileAttributesW_test1[5].isFile = TYPE_FILE; - - /* Initialize PAL environment */ - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - if(!CleanUpFiles_GetFileAttributesW_test1()) - { - Fail("GetFileAttributesW: Pre-Clean Up Files Failed\n"); - } - - if(0 == SetUpFiles_GetFileAttributesW_test1()) - { - Fail("GetFileAttributesW: SetUp Files Failed\n"); - } - - if(!CleanUpDirs_GetFileAttributesW_test1()) - { - Fail("GetFileAttributesW: Pre-Clean Up Directories Failed\n"); - } - - if(!SetUpDirs_GetFileAttributesW_test1()) - { - Fail("GetFileAttributesW: SetUp Directories Failed\n"); - } - - /* - * Go through all the test cases above, - * call GetFileAttributesW on the name and - * make sure the return value is the one expected - */ - for( i = 0; i < numFileTests_GetFileAttributesW_test1; i++ ) - { - WStr = convert(gfaTestsFile_GetFileAttributesW_test1[i].name); - result = GetFileAttributesW(WStr); - - if( result != gfaTestsFile_GetFileAttributesW_test1[i].expectedAttribs ) - { - bFailed = TRUE; - - Trace("ERROR: GetFileAttributesW Test#%u on %s " - "returned %u instead of %u. \n", - i, - gfaTestsFile_GetFileAttributesW_test1[i].name, - result, - gfaTestsFile_GetFileAttributesW_test1[i].expectedAttribs); - - } - free(WStr); - } - - - for( i = 0; i < numDirTests_GetFileAttributesW_test1; i++ ) - { - WStr = convert(gfaTestsDir_GetFileAttributesW_test1[i].name); - result = GetFileAttributesW(WStr); - - if( result != gfaTestsDir_GetFileAttributesW_test1[i].expectedAttribs ) - { - bFailed = TRUE; - - Trace("ERROR: GetFileAttributesW on Directories Test#%u on %s " - "returned %u instead of %u. \n", - i, - gfaTestsDir_GetFileAttributesW_test1[i].name, - result, - gfaTestsDir_GetFileAttributesW_test1[i].expectedAttribs); - - } - free(WStr); - } - - if(!CleanUpFiles_GetFileAttributesW_test1()) - { - Fail("GetFileAttributesW: Post-Clean Up Files Failed\n"); - } - - if(!CleanUpDirs_GetFileAttributesW_test1()) - { - Fail("GetFileAttributesW: Post-Clean Up Directories Failed\n"); - } - - /* If any errors, just call Fail() */ - if( bFailed ) - { - Fail(""); - } - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/no_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/no_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/no_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/no_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/no_file deleted file mode 100644 index 3d631e8103f0..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/no_file +++ /dev/null @@ -1 +0,0 @@ -No attribs file \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/normal_test_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/normal_test_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/normal_test_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/normal_test_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/normal_test_file deleted file mode 100644 index a6e1e627a887..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/normal_test_file +++ /dev/null @@ -1,6 +0,0 @@ -file_io -CopyFileW -Positive Test for CopyFileW -test the CopyFileW function -DEFAULT -CopyFileW \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/ro_test_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/ro_test_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/ro_test_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/ro_test_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/ro_test_file deleted file mode 100644 index a6e1e627a887..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/ro_test_file +++ /dev/null @@ -1,6 +0,0 @@ -file_io -CopyFileW -Positive Test for CopyFileW -test the CopyFileW function -DEFAULT -CopyFileW \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/rw_file b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/rw_file deleted file mode 100644 index 39d66f0365ba..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/rw_file +++ /dev/null @@ -1 +0,0 @@ -Read Write file \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/rw_test_directory/keepme b/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/rw_test_directory/keepme deleted file mode 100644 index 31eade7217eb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetFileAttributesW/test1/rw_test_directory/keepme +++ /dev/null @@ -1 +0,0 @@ -Make CVS checkout this directory even with -p option. \ No newline at end of file diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test1/GetTempFileNameA.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test1/GetTempFileNameA.cpp deleted file mode 100644 index 96d45cd90ccf..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test1/GetTempFileNameA.cpp +++ /dev/null @@ -1,124 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: GetTempFileNameA.c (test 1) -** -** Purpose: Tests the PAL implementation of the GetTempFileNameA function. -** -** Depends on: -** GetFileAttributesA -** DeleteFileA -** -** -**===================================================================*/ - -#include - - - -PALTEST(file_io_GetTempFileNameA_test1_paltest_gettempfilenamea_test1, "file_io/GetTempFileNameA/test1/paltest_gettempfilenamea_test1") -{ - UINT uiError = 0; - const UINT uUnique = 0; - const char* szDot = {"."}; - const char* szValidPrefix = {"cfr"}; - const char* szLongValidPrefix = {"cfrwxyz"}; - char szReturnedName[256]; - char szTempString[256]; - - if (0 != PAL_Initialize(argc, argv)) - { - return FAIL; - } - - /* valid path with null prefix */ - uiError = GetTempFileNameA(szDot, NULL, uUnique, szReturnedName); - if (uiError == 0) - { - Fail("GetTempFileNameA: ERROR -> Call failed with a valid path " - "with the error code: %ld\n", GetLastError()); - } - else - { - /* verify temp file was created */ - if (GetFileAttributesA(szReturnedName) == -1) - { - Fail("GetTempFileNameA: ERROR -> GetFileAttributes failed on the " - "returned temp file \"%s\" with error code: %ld.\n", - szReturnedName, - GetLastError()); - } - if (DeleteFileA(szReturnedName) != TRUE) - { - Fail("GetTempFileNameA: ERROR -> DeleteFileW failed to delete" - "the created temp file with error code: %ld.\n", GetLastError()); - } - } - - - /* valid path with valid prefix */ - uiError = GetTempFileNameA(szDot, szValidPrefix, uUnique, szReturnedName); - if (uiError == 0) - { - Fail("GetTempFileNameA: ERROR -> Call failed with a valid path and " - "prefix with the error code: %ld\n", GetLastError()); - } - else - { - /* verify temp file was created */ - if (GetFileAttributesA(szReturnedName) == -1) - { - Fail("GetTempFileNameA: ERROR -> GetFileAttributes failed on the " - "returned temp file \"%s\" with error code: %ld.\n", - szReturnedName, - GetLastError()); - } - if (DeleteFileA(szReturnedName) != TRUE) - { - Fail("GetTempFileNameA: ERROR -> DeleteFileW failed to delete" - "the created temp \"%s\" file with error code: %ld.\n", - szReturnedName, - GetLastError()); - } - } - - /* valid path with long prefix */ - uiError = GetTempFileNameA(szDot, szLongValidPrefix, uUnique, szReturnedName); - if (uiError == 0) - { - Fail("GetTempFileNameA: ERROR -> Call failed with a valid path and " - "prefix with the error code: %ld\n", GetLastError()); - } - else - { - /* verify temp file was created */ - if (GetFileAttributesA(szReturnedName) == -1) - { - Fail("GetTempFileNameA: ERROR -> GetFileAttributes failed on the " - "returned temp file \"%s\" with error code: %ld.\n", - szReturnedName, - GetLastError()); - } - - /* now verify that it only used the first 3 characters of the prefix */ - sprintf_s(szTempString, ARRAY_SIZE(szTempString), "%s\\%s", szDot, szLongValidPrefix); - if (strncmp(szTempString, szReturnedName, 6) == 0) - { - Fail("GetTempFileNameA: ERROR -> It appears that an improper prefix " - "was used.\n"); - } - - if (DeleteFileA(szReturnedName) != TRUE) - { - Fail("GetTempFileNameA: ERROR -> DeleteFileW failed to delete" - "the created temp file \"%s\" with error code: %ld.\n", - szReturnedName, - GetLastError()); - } - } - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test2/GetTempFileNameA.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test2/GetTempFileNameA.cpp deleted file mode 100644 index 9edaf483985a..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test2/GetTempFileNameA.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: GetTempFileNameA.c (test 2) -** -** Purpose: Tests the number of files GetTempFileNameA can create. -** -** Depends on: -** GetFileAttributesA -** oodles of free disk space (>4.07GB) -** -** -**===================================================================*/ - -#include - - - -PALTEST(file_io_GetTempFileNameA_test2_paltest_gettempfilenamea_test2, "file_io/GetTempFileNameA/test2/paltest_gettempfilenamea_test2") -{ - UINT uiError = 0; - DWORD dwError = 0; - const UINT uUnique = 0; - const char* szDot = {"."}; - const char* szValidPrefix = {"cfr"}; - char szReturnedName[256]; - DWORD i; - - if (0 != PAL_Initialize(argc, argv)) - { - return FAIL; - } - - - /* test the number of temp files that can be created */ - for (i = 0; i < 0x10005; i++) - { - uiError = GetTempFileNameA(szDot, szValidPrefix, uUnique, szReturnedName); - if (uiError == 0) - { - dwError = GetLastError(); - if (dwError == ERROR_FILE_EXISTS) - { - /* file already existes so break out of the loop */ - i--; /* decrement the count because it wasn't successful */ - break; - } - else - { - /* it was something other than the file already existing? */ - Fail("GetTempFileNameA: ERROR -> Call failed with a valid " - "path and prefix with the error code: %ld\n", GetLastError()); - } - } - else - { - /* verify temp file was created */ - if (GetFileAttributesA(szReturnedName) == -1) - { - Fail("GetTempFileNameA: ERROR -> GetFileAttributes failed " - "on the returned temp file \"%s\" with error code: %ld.\n", - szReturnedName, - GetLastError()); - } - } - } - - /* did it create more than 0xffff files */ - if (i > 0xffff) - { - Fail("GetTempFileNameA: ERROR -> Was able to create more than 0xffff" - " temp files.\n"); - } - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test3/gettempfilenamea.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test3/gettempfilenamea.cpp deleted file mode 100644 index fa9112c49f1c..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameA/test3/gettempfilenamea.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: GetTempFileNameA.c (test 3) -** -** Purpose: Tests the PAL implementation of the GetTempFileNameA function. -** Checks the file attributes and ensures that getting a file name, -** deleting the file and getting another doesn't produce the same -** as the just deleted file. Also checks the file size is 0. -** -** Depends on: -** GetFileAttributesA -** CloseHandle -** DeleteFileA -** CreateFileA -** GetFileSize -** -** -**===================================================================*/ - -#include - - - -PALTEST(file_io_GetTempFileNameA_test3_paltest_gettempfilenamea_test3, "file_io/GetTempFileNameA/test3/paltest_gettempfilenamea_test3") -{ - const UINT uUnique = 0; - UINT uiError; - const char* szDot = {"."}; - char szReturnedName[MAX_LONGPATH]; - char szReturnedName_02[MAX_LONGPATH]; - DWORD dwFileSize = 0; - HANDLE hFile; - - if (0 != PAL_Initialize(argc, argv)) - { - return FAIL; - } - - - /* valid path with null prefix */ - uiError = GetTempFileNameA(szDot, NULL, uUnique, szReturnedName); - if (uiError == 0) - { - Fail("GetTempFileNameA: ERROR -> Call failed with a valid path " - "with the error code: %u.\n", - GetLastError()); - } - - /* verify temp file was created */ - if (GetFileAttributesA(szReturnedName) == -1) - { - Fail("GetTempFileNameA: ERROR -> GetFileAttributes failed on the " - "returned temp file \"%s\" with error code: %u.\n", - szReturnedName, - GetLastError()); - } - - /* - ** verify that the file size is 0 bytes - */ - - hFile = CreateFileA(szReturnedName, - GENERIC_READ, - FILE_SHARE_READ, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - if (hFile == INVALID_HANDLE_VALUE) - { - Trace("GetTempFileNameA: ERROR -> CreateFileA failed to open" - " the created temp file with error code: %u.\n", - GetLastError()); - if (!DeleteFileA(szReturnedName)) - { - Trace("GetTempFileNameA: ERROR -> DeleteFileA failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - Fail(""); - } - - if ((dwFileSize = GetFileSize(hFile, NULL)) != (DWORD)0) - { - Trace("GetTempFileNameA: ERROR -> GetFileSize returned %u whereas" - "it should have returned 0.\n", - dwFileSize); - if (!CloseHandle(hFile)) - { - Trace("GetTempFileNameA: ERROR -> CloseHandle failed. " - "GetLastError returned: %u.\n", - GetLastError()); - } - if (!DeleteFileA(szReturnedName)) - { - Trace("GetTempFileNameA: ERROR -> DeleteFileA failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - Fail(""); - } - - - if (!CloseHandle(hFile)) - { - Fail("GetTempFileNameA: ERROR -> CloseHandle failed. " - "GetLastError returned: %u.\n", - GetLastError()); - } - - if (DeleteFileA(szReturnedName) != TRUE) - { - Fail("GetTempFileNameA: ERROR -> DeleteFileA failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - - /* get another and make sure it's not the same as the last */ - uiError = GetTempFileNameA(szDot, NULL, uUnique, szReturnedName_02); - if (uiError == 0) - { - Fail("GetTempFileNameA: ERROR -> Call failed with a valid path " - "with the error code: %u.\n", - GetLastError()); - } - - /* did we get different names? */ - if (strcmp(szReturnedName, szReturnedName_02) == 0) - { - Trace("GetTempFileNameA: ERROR -> The first call returned \"%s\". " - "The second call returned \"%s\" and the two should not be" - " the same.\n", - szReturnedName, - szReturnedName_02); - if (!DeleteFileA(szReturnedName_02)) - { - Trace("GetTempFileNameA: ERROR -> DeleteFileA failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - Fail(""); - } - - /* clean up */ - if (!DeleteFileA(szReturnedName_02)) - { - Fail("GetTempFileNameA: ERROR -> DeleteFileA failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test1/GetTempFileNameW.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test1/GetTempFileNameW.cpp deleted file mode 100644 index 02a01a4fe753..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test1/GetTempFileNameW.cpp +++ /dev/null @@ -1,133 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: GetTempFileNameW.c (test 1) -** -** Purpose: Tests the PAL implementation of the GetTempFileNameW function. -** -** -**===================================================================*/ - -#include - - - -PALTEST(file_io_GetTempFileNameW_test1_paltest_gettempfilenamew_test1, "file_io/GetTempFileNameW/test1/paltest_gettempfilenamew_test1") -{ - UINT uiError = 0; - const UINT uUnique = 0; - WCHAR* wPrefix = NULL; - WCHAR* wPath = NULL; - WCHAR wReturnedName[256]; - WCHAR wTempString[256]; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - - // valid path with null ext - wPath = convert("."); - uiError = GetTempFileNameW(wPath, wPrefix, uUnique, wReturnedName); - free (wPath); - if (uiError == 0) - { - Fail("GetTempFileNameW: ERROR -> Call failed with a valid path " - "with the error code: %ld\n", GetLastError()); - } - else - { - // verify temp file was created - if (GetFileAttributesW(wReturnedName) == -1) - { - Fail("GetTempFileNameW: ERROR -> GetFileAttributes failed on the " - "returned temp file with error code: %ld.\n", GetLastError()); - } - if (DeleteFileW(wReturnedName) != TRUE) - { - Fail("GetTempFileNameW: ERROR -> DeleteFileW failed to delete" - "the created temp file with error code: %lld.\n", GetLastError()); - } - } - - - // valid path with valid prefix - wPath = convert("."); - wPrefix = convert("cfr"); - uiError = GetTempFileNameW(wPath, wPrefix, uUnique, wReturnedName); - free (wPath); - free (wPrefix); - if (uiError == 0) - { - Fail("GetTempFileNameW: ERROR -> Call failed with a valid path and " - "prefix with the error code: %ld\n", GetLastError()); - } - else - { - // verify temp file was created - if (GetFileAttributesW(wReturnedName) == -1) - { - Fail("GetTempFileNameW: ERROR -> GetFileAttributes failed on the " - "returned temp file with error code: %ld.\n", GetLastError()); - } - if (DeleteFileW(wReturnedName) != TRUE) - { - Fail("GetTempFileNameW: ERROR -> DeleteFileW failed to delete" - "the created temp file with error code: %lld.\n", GetLastError()); - } - } - - // valid path with long prefix - wPath = convert("."); - wPrefix = convert("cfrwxyz"); - uiError = GetTempFileNameW(wPath, wPrefix, uUnique, wReturnedName); - if (uiError == 0) - { - free (wPath); - free (wPrefix); - Fail("GetTempFileNameW: ERROR -> Call failed with a valid path and " - "prefix with the error code: %ld\n", GetLastError()); - } - else - { - // verify temp file was created - if (GetFileAttributesW(wReturnedName) == -1) - { - free (wPath); - free (wPrefix); - Fail("GetTempFileNameW: ERROR -> GetFileAttributes failed on the " - "returned temp file with error code: %ld.\n", GetLastError()); - } - - // now verify that it only used the first 3 characters of the prefix - WCHAR* wCurr = wTempString; - memcpy(wCurr, wPath, wcslen(wPath) * sizeof(WCHAR)); - wCurr += wcslen(wPath); - wcscat(wCurr, W("/")); - wCurr += wcslen(W("/")); - wcscat(wCurr, wPrefix); - if (memcmp(wTempString, wReturnedName, wcslen(wTempString)*sizeof(WCHAR)) == 0) - { - free (wPath); - free (wPrefix); - Fail("GetTempFileNameW: ERROR -> It appears that an improper prefix " - "was used.\n"); - } - - if (DeleteFileW(wReturnedName) != TRUE) - { - free (wPath); - free (wPrefix); - Fail("GetTempFileNameW: ERROR -> DeleteFileW failed to delete" - "the created temp file with error code: %lld.\n", GetLastError()); - } - } - - free (wPath); - free (wPrefix); - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test2/GetTempFileNameW.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test2/GetTempFileNameW.cpp deleted file mode 100644 index d79e4cad67fb..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test2/GetTempFileNameW.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: GetTempFileNameW.c (test 2) -** -** Purpose: Tests the PAL implementation of the GetTempFileNameW function. -** -** -**===================================================================*/ - -#include - - - -PALTEST(file_io_GetTempFileNameW_test2_paltest_gettempfilenamew_test2, "file_io/GetTempFileNameW/test2/paltest_gettempfilenamew_test2") -{ - UINT uiError = 0; - DWORD dwError = 0; - const UINT uUnique = 0; - WCHAR* wPrefix = NULL; - WCHAR* wPath = NULL; - WCHAR wReturnedName[256]; - DWORD i; - - if (0 != PAL_Initialize(argc,argv)) - { - return FAIL; - } - - - // test the number of temp files that can be created - wPrefix = convert("cfr"); - wPath = convert("."); - for (i = 0; i < 0x10005; i++) - { - uiError = GetTempFileNameW(wPath, wPrefix, uUnique, wReturnedName); - if (uiError == 0) - { - dwError = GetLastError(); - if (dwError == ERROR_FILE_EXISTS) - { - // file already existes so break out of the loop - i--; // decrement the count because it wasn't successful - break; - } - else - { - // it was something other than the file already existing? - free (wPath); - free (wPrefix); - Fail("GetTempFileNameW: ERROR -> Call failed with a valid " - "path and prefix with the error code: %ld\n", GetLastError()); - } - } - else - { - // verify temp file was created - if (GetFileAttributesW(wReturnedName) == -1) - { - free (wPath); - free (wPrefix); - Fail("GetTempFileNameW: ERROR -> GetFileAttributes failed " - "on the returned temp file with error code: %ld.\n", - GetLastError()); - } - } - } - - free (wPath); - free (wPrefix); - - // did it create more than 0xffff files - if (i > 0xffff) - { - Fail("GetTempFileNameW: ERROR -> Was able to create more than 0xffff" - " temp files.\n"); - } - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test3/gettempfilenamew.cpp b/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test3/gettempfilenamew.cpp deleted file mode 100644 index 312138b08e49..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/GetTempFileNameW/test3/gettempfilenamew.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: GetTempFileNameW.c (test 3) -** -** Purpose: Tests the PAL implementation of the GetTempFileNameW function. -** Checks the file attributes and ensures that getting a file name, -** deleting the file and getting another doesn't produce the same -** as the just deleted file. Also checks the file size is 0. -** -** Depends on: -** GetFileAttributesW -** DeleteFileW -** CreateFileW -** GetFileSize -** CloseHandle -** -** -**===================================================================*/ - -#include - - - -PALTEST(file_io_GetTempFileNameW_test3_paltest_gettempfilenamew_test3, "file_io/GetTempFileNameW/test3/paltest_gettempfilenamew_test3") -{ - const UINT uUnique = 0; - UINT uiError; - WCHAR szwReturnedName[MAX_LONGPATH]; - WCHAR szwReturnedName_02[MAX_LONGPATH]; - DWORD dwFileSize = 0; - HANDLE hFile; - const WCHAR szwDot[] = {'.','\0'}; - const WCHAR szwPre[] = {'c','\0'}; - - if (0 != PAL_Initialize(argc, argv)) - { - return FAIL; - } - - - /* valid path with null prefix */ - uiError = GetTempFileNameW(szwDot, szwPre, uUnique, szwReturnedName); - if (uiError == 0) - { - Fail("GetTempFileNameW: ERROR -> Call failed with a valid path " - "with the error code: %u.\n", - GetLastError()); - } - - /* verify temp file was created */ - if (GetFileAttributesW(szwReturnedName) == -1) - { - Fail("GetTempFileNameW: ERROR -> GetFileAttributes failed on the " - "returned temp file \"%S\" with error code: %u.\n", - szwReturnedName, - GetLastError()); - } - - /* - ** verify that the file size is 0 bytes - */ - - hFile = CreateFileW(szwReturnedName, - GENERIC_READ, - FILE_SHARE_READ, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - if (hFile == INVALID_HANDLE_VALUE) - { - Trace("GetTempFileNameW: ERROR -> CreateFileW failed to open" - " the created temp file with error code: %u.\n", - GetLastError()); - if (!DeleteFileW(szwReturnedName)) - { - Trace("GetTempFileNameW: ERROR -> DeleteFileW failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - Fail(""); - } - - if ((dwFileSize = GetFileSize(hFile, NULL)) != (DWORD)0) - { - Trace("GetTempFileNameW: ERROR -> GetFileSize returned %u whereas" - "it should have returned 0.\n", - dwFileSize); - if (!CloseHandle(hFile)) - { - Trace("GetTempFileNameW: ERROR -> CloseHandle was unable to close the " - "opened file. GetLastError returned %u.\n", - GetLastError()); - } - if (!DeleteFileW(szwReturnedName)) - { - Trace("GetTempFileNameW: ERROR -> DeleteFileW failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - Fail(""); - } - - if (!CloseHandle(hFile)) - { - Fail("GetTempFileNameW: ERROR -> CloseHandle was unable to close the " - "opened file. GetLastError returned %u.\n", - GetLastError()); - } - - - /* delete the file to see if we get the same name next time around */ - if (DeleteFileW(szwReturnedName) != TRUE) - { - Fail("GetTempFileNameW: ERROR -> DeleteFileW failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - - /* get another and make sure it's not the same as the last */ - uiError = GetTempFileNameW(szwDot, szwPre, uUnique, szwReturnedName_02); - if (uiError == 0) - { - Fail("GetTempFileNameW: ERROR -> Call failed with a valid path " - "with the error code: %u.\n", - GetLastError()); - } - - /* did we get different names? */ - if (wcsncmp(szwReturnedName, szwReturnedName_02, wcslen(szwReturnedName)) == 0) - { - Fail("GetTempFileNameW: ERROR -> The first call returned \"%S\". " - "The second call returned \"%S\" and the two should not be" - " the same.\n", - szwReturnedName, - szwReturnedName_02); - if (!DeleteFileW(szwReturnedName_02)) - { - Trace("GetTempFileNameW: ERROR -> DeleteFileW failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - Fail(""); - } - - /* clean up */ - if (!DeleteFileW(szwReturnedName_02)) - { - Fail("GetTempFileNameW: ERROR -> DeleteFileW failed to delete" - " the created temp file with error code: %u.\n", - GetLastError()); - } - - - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/file_io/SearchPathW/test1/SearchPathW.cpp b/src/coreclr/pal/tests/palsuite/file_io/SearchPathW/test1/SearchPathW.cpp index 7b61e753e949..4b851a463859 100644 --- a/src/coreclr/pal/tests/palsuite/file_io/SearchPathW/test1/SearchPathW.cpp +++ b/src/coreclr/pal/tests/palsuite/file_io/SearchPathW/test1/SearchPathW.cpp @@ -67,7 +67,7 @@ #define szFileNameExistsWithExt "searchpathw.c" -char fileloc_SearchPathW_test1[_MAX_PATH]; +char fileloc_SearchPathW_test1[MAX_PATH]; void removeFileHelper_SearchPathW_test1(LPSTR pfile, int location) { @@ -105,7 +105,7 @@ PALTEST(file_io_SearchPathW_test1_paltest_searchpathw_test1, "file_io/SearchPath WCHAR* lpFileName = NULL; WCHAR* lpExtension = NULL; DWORD nBufferLength = 0; - WCHAR lpBuffer[_MAX_PATH]; + WCHAR lpBuffer[MAX_PATH]; WCHAR** lpFilePart = NULL; DWORD error = 0; DWORD result = 0; @@ -132,7 +132,7 @@ PALTEST(file_io_SearchPathW_test1_paltest_searchpathw_test1, "file_io/SearchPath Fail("ERROR: GetTempPathA failed to get a path\n"); } - memset(fileloc_SearchPathW_test1, 0, _MAX_PATH); + memset(fileloc_SearchPathW_test1, 0, MAX_PATH); sprintf_s(fileloc_SearchPathW_test1, ARRAY_SIZE(fileloc_SearchPathW_test1), "%s%s", fullPath, szFileNameExistsWithExt); RemoveAll_SearchPathW_test1(); diff --git a/src/coreclr/pal/tests/palsuite/file_io/WriteFile/test1/WriteFile.cpp b/src/coreclr/pal/tests/palsuite/file_io/WriteFile/test1/WriteFile.cpp index 8664c9e0c987..be2017c6d523 100644 --- a/src/coreclr/pal/tests/palsuite/file_io/WriteFile/test1/WriteFile.cpp +++ b/src/coreclr/pal/tests/palsuite/file_io/WriteFile/test1/WriteFile.cpp @@ -75,9 +75,8 @@ PALTEST(file_io_WriteFile_test1_paltest_writefile_test1, "file_io/WriteFile/test last_error, szReadOnlyFile); } - if (!SetFileAttributesA(szReadOnlyFile, FILE_ATTRIBUTE_READONLY)) + if ((last_error = chmod(szReadOnlyFile, S_IRUSR | S_IRGRP | S_IROTH)) != 0) { - last_error = GetLastError(); Trace("WriteFile: ERROR[%ld] -> Unable to make the file read-only.\n", last_error); do_cleanup_WriteFile_test1(); Fail("WriteFile: ERROR[%ld] -> Unable to make the file read-only.\n", last_error); @@ -102,9 +101,8 @@ PALTEST(file_io_WriteFile_test1_paltest_writefile_test1, "file_io/WriteFile/test } //To delete file need to make it normal - if(!SetFileAttributesA(szReadOnlyFile,FILE_ATTRIBUTE_NORMAL)) + if ((last_error = chmod(szReadOnlyFile, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) != 0) { - last_error = GetLastError(); Fail("WriteFile: ERROR[%ld] -> Unable to make the file attribute NORMAL.\n", last_error); } diff --git a/src/coreclr/pal/tests/palsuite/file_io/errorpathnotfound/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/file_io/errorpathnotfound/test1/test1.cpp index 1b805708ee75..d3551a773a5a 100644 --- a/src/coreclr/pal/tests/palsuite/file_io/errorpathnotfound/test1/test1.cpp +++ b/src/coreclr/pal/tests/palsuite/file_io/errorpathnotfound/test1/test1.cpp @@ -17,7 +17,7 @@ ** Functions covered by this test are: -** CopyFileA, CopyFileW, CreateFileA,CreateFileW, +** CreateFileA,CreateFileW, ** DeleteFileA and DeleteFileW. @@ -84,162 +84,6 @@ PALTEST(file_io_errorpathnotfound_test1_paltest_errorpathnotfound_test1, "file_i - /*...................Test CopyFileW.............................*/ - - - - /* test with an invalid path */ - - bRc = CopyFileW(wBadFilePath,wDest,TRUE); - - if(!bRc) - - { - - if(GetLastError()!= ERROR_PATH_NOT_FOUND) - - { - - Trace("CopyFileW: calling GetLastError() after copying a file" - - " with wrong path returned [%u] while it should return [%u]\n" - - ,GetLastError(), ERROR_PATH_NOT_FOUND); - - testPass = FALSE; - - } - - } - - else - - { - - testPass = FALSE; - - } - - - - /* test with invalid file name */ - - bRc = CopyFileW(wBadFileName,wDest,TRUE); - - if(!bRc) - - { - - if(GetLastError()!= ERROR_FILE_NOT_FOUND) - - { - - Trace("CopyFileW: calling GetLastError() after copying a file" - - " with wrong name returned [%u] while it should return [%u]\n" - - ,GetLastError(), ERROR_FILE_NOT_FOUND); - - testPass = FALSE; - - } - - - - } - - else - - { - - Trace("CopyFileW: managed to copy a file with wrong name\n"); - - testPass = FALSE; - - } - - - - - - - - /*..................CopyFileA...................................*/ - - - - /* test with an invalid path */ - - bRc = CopyFileA(sBadFilePath,sDest,TRUE); - - if(! bRc) - - { - - if(GetLastError()!= ERROR_PATH_NOT_FOUND) - - { - - Trace("CopyFileA: calling GetLastError() after copying a file" - - " with wrong path returned [%u] while it should return [%u]\n" - - ,GetLastError(), ERROR_PATH_NOT_FOUND); - - testPass = FALSE; - - } - - } - - else - - { - - Trace("CopyFileA: managed to copy a file with wrong path\n"); - - testPass = FALSE; - - } - - - - /* test with an invalid file name */ - - bRc = CopyFileA(sBadFileName,sDest,TRUE); - - if(! bRc) - - { - - if(GetLastError()!= ERROR_FILE_NOT_FOUND) - - { - - Trace("CopyFileA: calling GetLastError() after copying a file" - - " with wrong name returned [%u] while it should return [%u]\n" - - ,GetLastError(), ERROR_FILE_NOT_FOUND); - - testPass = FALSE; - - } - - } - - else - - { - - Trace("CopyFileA: managed to copy a file with wrong name\n"); - - testPass = FALSE; - - } - - - diff --git a/src/coreclr/pal/tests/palsuite/file_io/errorpathnotfound/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/file_io/errorpathnotfound/test2/test2.cpp deleted file mode 100644 index 034fdfdccaf7..000000000000 --- a/src/coreclr/pal/tests/palsuite/file_io/errorpathnotfound/test2/test2.cpp +++ /dev/null @@ -1,273 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== - -** - -** Source: test2.c - -** - -** Purpose: Test the return value of GetLastError() after calling - -** some file_io functions with an invalid path. - -** - -** Functions covered by this test are: - -** GetFileAttributesA, GetFileAttributesW, - -** -** - -** - - - -** - -**===================================================================*/ - - - -#include - - - -PALTEST(file_io_errorpathnotfound_test2_paltest_errorpathnotfound_test2, "file_io/errorpathnotfound/test2/paltest_errorpathnotfound_test2") - -{ - - - - BOOL testPass = TRUE; - - BOOL bRc = TRUE; - - HANDLE hFile; - - DWORD fileAttrib; - - - - const char* sBadFilePath = "bad/badPath.tmp"; - - const char* sBadFileName = "badName.tmp"; - - - - const WCHAR wBadFilePath[] = - - {'w','b','a','d','/','b','a', - - 'd','.','t','m','p','\0'}; - - const WCHAR wBadFileName[] = - - {'w','B','a','d','.','t','m','p','\0'}; - - const WCHAR wDest[] = - - {'w','d','e','s','t','.','t','m','p','\0'}; - - - - - - if (0 != PAL_Initialize(argc,argv)) - - { - - return FAIL; - - } - - - - /*...................Test GetFileAttributesW.............................*/ - - - - /* test with an invalid path */ - - fileAttrib = GetFileAttributesW(wBadFilePath); - - if(fileAttrib == -1) - - { - - if(GetLastError()!= ERROR_PATH_NOT_FOUND) - - { - - Trace("GetFileAttributesW: calling GetLastError() after getting" - - " the attributes of a file with wrong path returned [%u]" - - " while it should return [%u]\n", - - GetLastError(), ERROR_PATH_NOT_FOUND); - - testPass = FALSE; - - } - - } - - else - - { - - Trace("GetFileAttributesW: managed to get the attrib of a file" - - " with wrong path\n"); - - testPass = FALSE; - - } - - - - /* test with invalid file name */ - - fileAttrib = GetFileAttributesW(wBadFileName); - - if(fileAttrib == -1) - - { - - if(GetLastError()!= ERROR_FILE_NOT_FOUND) - - { - - Trace("GetFileAttributesW: calling GetLastError() after getting" - - " the attributes of a file with wrong name returned [%u] " - - "while it should return [%u]\n" - - ,GetLastError(), ERROR_FILE_NOT_FOUND); - - testPass = FALSE; - - } - - } - - else - - { - - Trace("GetFileAttributesW: managed to get the attrib of a file" - - " with wrong name\n"); - - testPass = FALSE; - - } - - - - /*...................Test GetFileAttributesA.............................*/ - - - - /* test with an invalid path */ - - fileAttrib = GetFileAttributesA(sBadFilePath); - - if(fileAttrib == -1) - - { - - if(GetLastError()!= ERROR_PATH_NOT_FOUND) - - { - - Trace("GetFileAttributesA: calling GetLastError() after getting" - - " the attributes of a file with wrong path returned [%u] while" - - " it should return [%u]\n", - - GetLastError(), ERROR_PATH_NOT_FOUND); - - testPass = FALSE; - - } - - } - - else - - { - - Trace("GetFileAttributesA: managed to get the attrib of a file" - - " with wrong path\n"); - - testPass = FALSE; - - } - - - - /* test with invalid file name */ - - fileAttrib = GetFileAttributesA(sBadFileName); - - if(fileAttrib == -1) - - { - - if(GetLastError()!= ERROR_FILE_NOT_FOUND) - - { - - Trace("GetFileAttributesA: calling GetLastError() after getting " - - "the attributes of a file with wrong name returned [%u] " - - "while it should return [%u]\n" - - ,GetLastError(), ERROR_FILE_NOT_FOUND); - - testPass = FALSE; - - } - - - - } - - else - - { - - Trace("GetFileAttributesA: managed to get the attrib of a file with" - - " wrong name\n"); - - testPass = FALSE; - - } - - - if(! testPass) - - { - - Fail(""); - - } - - PAL_Terminate(); - - return PASS; - -} - - - diff --git a/src/coreclr/pal/tests/palsuite/filemapping_memmgt/MapViewOfFile/test1/MapViewOfFile.cpp b/src/coreclr/pal/tests/palsuite/filemapping_memmgt/MapViewOfFile/test1/MapViewOfFile.cpp index e718387a84a6..f6b5989ee6d2 100644 --- a/src/coreclr/pal/tests/palsuite/filemapping_memmgt/MapViewOfFile/test1/MapViewOfFile.cpp +++ b/src/coreclr/pal/tests/palsuite/filemapping_memmgt/MapViewOfFile/test1/MapViewOfFile.cpp @@ -35,7 +35,7 @@ PALTEST(filemapping_memmgt_MapViewOfFile_test1_paltest_mapviewoffile_test1, "fil HANDLE hFile = INVALID_HANDLE_VALUE; LPSTR buf = NULL; CHAR ch[MAPPINGSIZE]; - CHAR lpFilePath[MAX_PATH]; + CHAR* lpFilePath = TEMP_DIRECTORY_PATH"tst"; DWORD dwBytesWritten = 0; DWORD dwInitialSize = 0; DWORD dwFinalSize = 0; @@ -51,8 +51,6 @@ PALTEST(filemapping_memmgt_MapViewOfFile_test1_paltest_mapviewoffile_test1, "fil return FAIL; } - GetTempFileName(TEMP_DIRECTORY_PATH, "tst", 0, lpFilePath); - /* Create a file handle with CreateFile. */ hFile = CreateFile( lpFilePath, diff --git a/src/coreclr/pal/tests/palsuite/issues.targets b/src/coreclr/pal/tests/palsuite/issues.targets index 67b5df72eeb3..a4381220bb96 100644 --- a/src/coreclr/pal/tests/palsuite/issues.targets +++ b/src/coreclr/pal/tests/palsuite/issues.targets @@ -18,10 +18,4 @@ - - - https://github.com/dotnet/runtime/issues/7639 - - - diff --git a/src/coreclr/pal/tests/palsuite/locale_info/GetACP/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/locale_info/GetACP/test1/test1.cpp deleted file mode 100644 index 0f315f2ffe61..000000000000 --- a/src/coreclr/pal/tests/palsuite/locale_info/GetACP/test1/test1.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================================ -** -** Source: test1.c -** -** Purpose: Tests that GetACP returns the expected default code page. -** -** -**==========================================================================*/ - - -#include - -/* - * NOTE: We only support code page 65001 (UTF-8). - */ - -#define EXPECTED_CP 65001 - -PALTEST(locale_info_GetACP_test1_paltest_getacp_test1, "locale_info/GetACP/test1/paltest_getacp_test1") -{ - int ret; - - if (PAL_Initialize(argc, argv)) - { - return FAIL; - } - - ret = GetACP(); - if (ret != EXPECTED_CP) - { - Fail("ERROR: got incorrect result for current ANSI code page!\n" - "Expected %d, got %d\n", EXPECTED_CP, ret); - } - - PAL_Terminate(); - return PASS; -} - diff --git a/src/coreclr/pal/tests/palsuite/manual-inspect.dat b/src/coreclr/pal/tests/palsuite/manual-inspect.dat index 22f9ebc1f8ff..fc01f1d05ec3 100644 --- a/src/coreclr/pal/tests/palsuite/manual-inspect.dat +++ b/src/coreclr/pal/tests/palsuite/manual-inspect.dat @@ -1,11 +1,6 @@ # Licensed to the .NET Foundation under one or more agreements. # The .NET Foundation licenses this file to you under the MIT license. -# Automatable to detect gross errors; also manually inspect for proper behaviour -miscellaneous/messageboxw/test1,1 -# Automatable to detect gross errors; also manually inspect for proper behaviour -# Env var PAL_DISABLE_MESSAGEBOX=1 disables msg boxes for automation on Windows -miscellaneous/messageboxw/test2,1 # Automatable to detect gross errors; also manually inspect for proper behaviour pal_specific/pal_get_stderr/test1,1 pal_specific/pal_get_stdout/test1,1 diff --git a/src/coreclr/pal/tests/palsuite/manual-unautomatable.dat b/src/coreclr/pal/tests/palsuite/manual-unautomatable.dat index 78e1831fbc6a..24575263b2e9 100644 --- a/src/coreclr/pal/tests/palsuite/manual-unautomatable.dat +++ b/src/coreclr/pal/tests/palsuite/manual-unautomatable.dat @@ -13,9 +13,6 @@ debug_api/outputdebugstringa/test1,1 pal_specific/pal_get_stdin/test1,1 threading/setconsolectrlhandler/test1,1 threading/setconsolectrlhandler/test4,1 -# These tests take several minutes to run and time out when run with the harness -file_io/gettempfilenamea/test2,1 -file_io/gettempfilenamew/test2,1 # getstdhandle fails under Windows if the output is redirected so # it must be run from the command line file_io/getstdhandle/test1,1 diff --git a/src/coreclr/pal/tests/palsuite/miscellaneous/GetTickCount/test1/test.cpp b/src/coreclr/pal/tests/palsuite/miscellaneous/GetTickCount/test1/test.cpp deleted file mode 100644 index f783ff878ceb..000000000000 --- a/src/coreclr/pal/tests/palsuite/miscellaneous/GetTickCount/test1/test.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================ -** -** Source: -** -** Source : test1.c -** -** Purpose: Test for GetTickCount() function -** -** -**=========================================================*/ - -#include - -PALTEST(miscellaneous_GetTickCount_test1_paltest_gettickcount_test1, "miscellaneous/GetTickCount/test1/paltest_gettickcount_test1") -{ - - DWORD FirstCount = 0; - DWORD SecondCount = 0; - - /* - * Initialize the PAL and return FAILURE if this fails - */ - - if(0 != (PAL_Initialize(argc, argv))) - { - return FAIL; - } - - /* Grab a FirstCount, then loop for a bit to make the clock increase */ - FirstCount = GetTickCount(); - - /* Make sure some time passes */ - Sleep(60); //Since the get tick count is accurate within 55 milliseconds. - - /* Get a second count */ - SecondCount = GetTickCount(); - - /* Make sure the second one is bigger than the first. - This isn't the best test, but it at least shows that it's returning a - DWORD which is increasing. - */ - - if(FirstCount >= SecondCount) - { - Fail("ERROR: The first time (%d) was greater/equal than the second time " - " (%d). The tick count should have increased.\n", - FirstCount,SecondCount); - } - - PAL_Terminate(); - return PASS; -} - - - diff --git a/src/coreclr/pal/tests/palsuite/miscellaneous/queryperformancecounter/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/miscellaneous/queryperformancecounter/test1/test1.cpp deleted file mode 100644 index d14fa2fa2bb2..000000000000 --- a/src/coreclr/pal/tests/palsuite/miscellaneous/queryperformancecounter/test1/test1.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================ -** -** Source: test1.c -** -** Purpose: Test for QueryPerformanceCounter function -** -** -**=========================================================*/ - -/* Depends on: QueryPerformanceFrequency. */ - -#include - - -PALTEST(miscellaneous_queryperformancecounter_test1_paltest_queryperformancecounter_test1, "miscellaneous/queryperformancecounter/test1/paltest_queryperformancecounter_test1") -{ - /* Milliseconds of error which are acceptable Function execution time, etc. - FreeBSD has a "standard" resolution of 50ms for waiting operations, so we - must take that into account as well */ - DWORD AcceptableTimeError = 15; - - int i; - int NumIterations = 100; - DWORD AvgTimeDiff; - DWORD TimeDiff[100]; - DWORD TotalTimeDiff = 0; - DWORD SleepInterval = 50; - LARGE_INTEGER StartTime; - LARGE_INTEGER EndTime; - LARGE_INTEGER Freq; - - /* Initialize the PAL. - */ - - if(0 != (PAL_Initialize(argc, argv))) - { - return FAIL; - } - - /* Get the frequency of the High-Performance Counter, - * in order to convert counter time to milliseconds. - */ - if (!QueryPerformanceFrequency(&Freq)) - { - Fail("ERROR:%u:Unable to retrieve the frequency of the " - "high-resolution performance counter.\n", - GetLastError()); - } - - /* Perform this set of sleep timings a number of times. - */ - for(i=0; i < NumIterations; i++) - { - - /* Get the current counter value. - */ - if (!QueryPerformanceCounter(&StartTime)) - { - Fail("ERROR:%u:Unable to retrieve the current value of the " - "high-resolution performance counter.\n", - GetLastError()); - } - - /* Sleep a predetermined interval. - */ - Sleep(SleepInterval); - - /* Get the new current counter value. - */ - if (!QueryPerformanceCounter(&EndTime)) - { - Fail("ERROR:%u:Unable to retrieve the current value of the " - "high-resolution performance counter.\n", - GetLastError()); - } - - /* Determine elapsed time, in milliseconds. Compare the elapsed time - * with the sleep interval, and add to counter. - */ - TimeDiff[i] = (DWORD)(((EndTime.QuadPart - StartTime.QuadPart)*1000)/ - (Freq.QuadPart)); - TotalTimeDiff += TimeDiff[i] - SleepInterval; - - } - - /* Verify that the average of the difference between the performance - * counter and the sleep interval is within our acceptable range. - */ - AvgTimeDiff = TotalTimeDiff / NumIterations; - if (AvgTimeDiff > AcceptableTimeError) - { - Fail("ERROR: average diff %u acceptable %u.\n", - AvgTimeDiff, - AcceptableTimeError); - } - - /* Terminate the PAL. - */ - PAL_Terminate(); - return PASS; -} - - diff --git a/src/coreclr/pal/tests/palsuite/miscellaneous/queryperformancefrequency/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/miscellaneous/queryperformancefrequency/test1/test1.cpp deleted file mode 100644 index 3e370ddafb10..000000000000 --- a/src/coreclr/pal/tests/palsuite/miscellaneous/queryperformancefrequency/test1/test1.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================ -** -** Source: test1.c -** -** Purpose: Test for QueryPerformanceFrequency function -** -** -**=========================================================*/ - -#include - -PALTEST(miscellaneous_queryperformancefrequency_test1_paltest_queryperformancefrequency_test1, "miscellaneous/queryperformancefrequency/test1/paltest_queryperformancefrequency_test1") -{ - - LARGE_INTEGER Freq; - - /* Initialize the PAL. - */ - - if(0 != (PAL_Initialize(argc, argv))) - { - return FAIL; - } - - /* Check the return value of the performance - * frequency, a value of zero indicates that - * either the call has failed or the - * high-resolution performance counter is not - * installed. - */ - if (!QueryPerformanceFrequency(&Freq)) - { - - Fail("ERROR:%u:Unable to retrieve the frequency of the " - "high-resolution performance counter.\n", - GetLastError()); - } - - - /* Check the return value the frequency the - * value should be non-zero. - */ - if (Freq.QuadPart == 0) - { - - Fail("ERROR: The frequency has been determined to be 0 " - "the frequency should be non-zero.\n"); - - } - - /* Terminate the PAL. - */ - PAL_Terminate(); - return PASS; -} diff --git a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_GetUserTempDirectoryW/test1/PAL_GetUserTempDirectoryW.cpp b/src/coreclr/pal/tests/palsuite/pal_specific/PAL_GetUserTempDirectoryW/test1/PAL_GetUserTempDirectoryW.cpp deleted file mode 100644 index b09d3f626319..000000000000 --- a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_GetUserTempDirectoryW/test1/PAL_GetUserTempDirectoryW.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================= -** -** Source: pal_getusertempdirectoryw.c -** -** Purpose: Positive test the PAL_GetUserTempDirectoryW API. -** Call PAL_GetUserTempDirectoryW to retrieve the user -** temp directory. -** -** -**============================================================*/ -#define UNICODE -#include - -#define DIRECTORYLENGTH 1024 - -PALTEST(pal_specific_PAL_GetUserTempDirectoryW_test1_paltest_pal_getusertempdirectoryw_test1, "pal_specific/PAL_GetUserTempDirectoryW/test1/paltest_pal_getusertempdirectoryw_test1") -{ - int err; - DWORD dwFileAttribute; - DWORD cch = DIRECTORYLENGTH; - WCHAR wDirectoryName[DIRECTORYLENGTH]; - - //Initialize the PAL environment - err = PAL_Initialize(argc, argv); - if(0 != err) - { - ExitProcess(FAIL); - } - - //retrieve the user temp directory - err = PAL_GetUserTempDirectory(ddtInstallationDependentDirectory, wDirectoryName, &cch); - - if(0 == err || 0 == strlen(convertC(wDirectoryName))) - { - Fail("Failed to call PAL_GetUserTempDirectoryW API!\n"); - } - - - //retrieve the attributes of a file or directory - dwFileAttribute = GetFileAttributesW(wDirectoryName); - - - //check if the retrieved attribute indicates a directory - if( FILE_ATTRIBUTE_DIRECTORY != (FILE_ATTRIBUTE_DIRECTORY & dwFileAttribute)) - { - Fail("PAL_GetUserTempDirectoryW API returned a non-directory name!\n"); - } - - printf ("PAL_GetUserTempDirectoryW returns %S\n", wDirectoryName); - - PAL_Terminate(); - return PASS; - -} diff --git a/src/coreclr/pal/tests/palsuite/paltestlist.txt b/src/coreclr/pal/tests/palsuite/paltestlist.txt index d8fd3a875739..b55a93735f16 100644 --- a/src/coreclr/pal/tests/palsuite/paltestlist.txt +++ b/src/coreclr/pal/tests/palsuite/paltestlist.txt @@ -152,19 +152,8 @@ filemapping_memmgt/VirtualProtect/test4/paltest_virtualprotect_test4 filemapping_memmgt/VirtualProtect/test6/paltest_virtualprotect_test6 filemapping_memmgt/VirtualProtect/test7/paltest_virtualprotect_test7 filemapping_memmgt/VirtualQuery/test1/paltest_virtualquery_test1 -file_io/CopyFileA/test1/paltest_copyfilea_test1 -file_io/CopyFileA/test2/paltest_copyfilea_test2 -file_io/CopyFileA/test3/paltest_copyfilea_test3 -file_io/CopyFileA/test4/paltest_copyfilea_test4 -file_io/CopyFileW/test2/paltest_copyfilew_test2 -file_io/CopyFileW/test3/paltest_copyfilew_test3 -file_io/errorpathnotfound/test2/paltest_errorpathnotfound_test2 file_io/FILECanonicalizePath/paltest_filecanonicalizepath_test1 file_io/FlushFileBuffers/test1/paltest_flushfilebuffers_test1 -file_io/GetConsoleOutputCP/test1/paltest_getconsoleoutputcp_test1 -file_io/GetFileAttributesA/test1/paltest_getfileattributesa_test1 -file_io/GetFileAttributesExW/test2/paltest_getfileattributesexw_test2 -file_io/GetFileAttributesW/test1/paltest_getfileattributesw_test1 file_io/GetFileSize/test1/paltest_getfilesize_test1 file_io/GetFileSizeEx/test1/paltest_getfilesizeex_test1 file_io/GetFullPathNameA/test1/paltest_getfullpathnamea_test1 @@ -176,10 +165,6 @@ file_io/GetFullPathNameW/test4/paltest_getfullpathnamew_test4 file_io/GetStdHandle/test2/paltest_getstdhandle_test2 file_io/GetSystemTime/test1/paltest_getsystemtime_test1 file_io/GetSystemTimeAsFileTime/test1/paltest_getsystemtimeasfiletime_test1 -file_io/GetTempFileNameA/test1/paltest_gettempfilenamea_test1 -file_io/GetTempFileNameA/test2/paltest_gettempfilenamea_test2 -file_io/GetTempFileNameA/test3/paltest_gettempfilenamea_test3 -file_io/GetTempFileNameW/test3/paltest_gettempfilenamew_test3 file_io/gettemppatha/test1/paltest_gettemppatha_test1 file_io/GetTempPathW/test1/paltest_gettemppathw_test1 file_io/ReadFile/test2/paltest_readfile_test2 @@ -203,7 +188,6 @@ loader/LoadLibraryA/test5/paltest_loadlibrarya_test5 loader/LoadLibraryW/test2/paltest_loadlibraryw_test2 loader/LoadLibraryW/test3/paltest_loadlibraryw_test3 loader/LoadLibraryW/test5/paltest_loadlibraryw_test5 -locale_info/GetACP/test1/paltest_getacp_test1 locale_info/MultiByteToWideChar/test1/paltest_multibytetowidechar_test1 locale_info/MultiByteToWideChar/test2/paltest_multibytetowidechar_test2 locale_info/MultiByteToWideChar/test3/paltest_multibytetowidechar_test3 @@ -236,7 +220,6 @@ miscellaneous/GetEnvironmentVariableW/test5/paltest_getenvironmentvariablew_test miscellaneous/GetEnvironmentVariableW/test6/paltest_getenvironmentvariablew_test6 miscellaneous/GetLastError/test1/paltest_getlasterror_test1 miscellaneous/GetSystemInfo/test1/paltest_getsysteminfo_test1 -miscellaneous/GetTickCount/test1/paltest_gettickcount_test1 miscellaneous/InterlockedCompareExchange/test1/paltest_interlockedcompareexchange_test1 miscellaneous/InterlockedCompareExchange/test2/paltest_interlockedcompareexchange_test2 miscellaneous/InterlockedCompareExchange64/test1/paltest_interlockedcompareexchange64_test1 @@ -253,8 +236,6 @@ miscellaneous/InterlockedIncrement/test1/paltest_interlockedincrement_test1 miscellaneous/InterlockedIncrement/test2/paltest_interlockedincrement_test2 miscellaneous/InterlockedIncrement64/test1/paltest_interlockedincrement64_test1 miscellaneous/InterlockedIncrement64/test2/paltest_interlockedincrement64_test2 -miscellaneous/queryperformancecounter/test1/paltest_queryperformancecounter_test1 -miscellaneous/queryperformancefrequency/test1/paltest_queryperformancefrequency_test1 miscellaneous/SetEnvironmentVariableA/test1/paltest_setenvironmentvariablea_test1 miscellaneous/SetEnvironmentVariableA/test2/paltest_setenvironmentvariablea_test2 miscellaneous/SetEnvironmentVariableA/test3/paltest_setenvironmentvariablea_test3 @@ -276,11 +257,6 @@ threading/CreateSemaphoreW_ReleaseSemaphore/test1/paltest_createsemaphorew_relea threading/CreateSemaphoreW_ReleaseSemaphore/test2/paltest_createsemaphorew_releasesemaphore_test2 threading/CreateThread/test1/paltest_createthread_test1 threading/CreateThread/test3/paltest_createthread_test3 -threading/CriticalSectionFunctions/test1/paltest_criticalsectionfunctions_test1 -threading/CriticalSectionFunctions/test2/paltest_criticalsectionfunctions_test2 -threading/CriticalSectionFunctions/test4/paltest_criticalsectionfunctions_test4 -threading/CriticalSectionFunctions/test7/paltest_criticalsectionfunctions_test7 -threading/CriticalSectionFunctions/test8/paltest_criticalsectionfunctions_test8 threading/DuplicateHandle/test10/paltest_duplicatehandle_test10 threading/DuplicateHandle/test2/paltest_duplicatehandle_test2 threading/DuplicateHandle/test4/paltest_duplicatehandle_test4 @@ -327,4 +303,3 @@ threading/WaitForSingleObject/WFSOMutexTest/paltest_waitforsingleobject_wfsomute threading/WaitForSingleObject/WFSOSemaphoreTest/paltest_waitforsingleobject_wfsosemaphoretest threading/WaitForSingleObject/WFSOThreadTest/paltest_waitforsingleobject_wfsothreadtest threading/YieldProcessor/test1/paltest_yieldprocessor_test1 -eventprovider/eventprovidertest diff --git a/src/coreclr/pal/tests/palsuite/paltestlist_to_be_reviewed.txt b/src/coreclr/pal/tests/palsuite/paltestlist_to_be_reviewed.txt index 964540f7dc90..d861a469b370 100644 --- a/src/coreclr/pal/tests/palsuite/paltestlist_to_be_reviewed.txt +++ b/src/coreclr/pal/tests/palsuite/paltestlist_to_be_reviewed.txt @@ -1,16 +1,7 @@ This is a list of failing PAL tests that need to be reviewed because. They should either be fixed or deleted if they are no longer applicable. -c_runtime/ferror/test1/paltest_ferror_test1 -c_runtime/ferror/test2/paltest_ferror_test2 -c_runtime/fputs/test2/paltest_fputs_test2 -c_runtime/fread/test1/paltest_fread_test1 -c_runtime/fread/test2/paltest_fread_test2 -c_runtime/fread/test3/paltest_fread_test3 -c_runtime/ftell/test1/paltest_ftell_test1 c_runtime/iswprint/test1/paltest_iswprint_test1 -c_runtime/vprintf/test1/paltest_vprintf_test1 -c_runtime/_getw/test1/paltest_getw_test1 debug_api/DebugBreak/test1/paltest_debugbreak_test1 debug_api/OutputDebugStringA/test1/paltest_outputdebugstringa_test1 debug_api/WriteProcessMemory/test1/paltest_writeprocessmemory_test1 @@ -30,8 +21,6 @@ exception_handling/PAL_EXCEPT_FILTER_EX/test1/paltest_pal_except_filter_ex_test1 exception_handling/PAL_EXCEPT_FILTER_EX/test2/paltest_pal_except_filter_ex_test2 exception_handling/PAL_EXCEPT_FILTER_EX/test3/paltest_pal_except_filter_ex_test3 exception_handling/pal_finally/test1/paltest_pal_finally_test1 -exception_handling/PAL_GetBottommostRegistration/test1/paltest_pal_getbottommostregistration_test1 -exception_handling/PAL_GetBottommostRegistration/test2/paltest_pal_getbottommostregistration_test2 exception_handling/PAL_TRY_EXCEPT/test1/paltest_pal_try_except_test1 exception_handling/PAL_TRY_EXCEPT/test2/paltest_pal_try_except_test2 exception_handling/PAL_TRY_EXCEPT_EX/test1/paltest_pal_try_except_ex_test1 @@ -48,19 +37,12 @@ filemapping_memmgt/GetModuleFileNameA/test1/paltest_getmodulefilenamea_test1 filemapping_memmgt/GetModuleFileNameW/test1/paltest_getmodulefilenamew_test1 filemapping_memmgt/GetProcAddress/test1/paltest_getprocaddress_test1 filemapping_memmgt/GetProcAddress/test2/paltest_getprocaddress_test2 -filemapping_memmgt/ReadProcessMemory/ReadProcessMemory_neg1/paltest_readprocessmemory_readprocessmemory_neg1 -filemapping_memmgt/ReadProcessMemory/test1/paltest_readprocessmemory_test1 -filemapping_memmgt/ReadProcessMemory/test2/paltest_readprocessmemory_test2 -file_io/CopyFileW/test1/paltest_copyfilew_test1 file_io/CreateFileA/test1/paltest_createfilea_test1 file_io/CreateFileW/test1/paltest_createfilew_test1 file_io/errorpathnotfound/test1/paltest_errorpathnotfound_test1 -file_io/GetFileAttributesExW/test1/paltest_getfileattributesexw_test1 file_io/GetFullPathNameA/test2/paltest_getfullpathnamea_test2 file_io/GetFullPathNameW/test2/paltest_getfullpathnamew_test2 file_io/GetStdHandle/test1/paltest_getstdhandle_test1 -file_io/GetTempFileNameW/test1/paltest_gettempfilenamew_test1 -file_io/GetTempFileNameW/test2/paltest_gettempfilenamew_test2 file_io/gettemppatha/test1/paltest_gettemppatha_test1 file_io/GetTempPathW/test1/paltest_gettemppathw_test1 file_io/ReadFile/test1/paltest_readfile_test1 @@ -74,8 +56,6 @@ locale_info/CompareStringA/test1/paltest_comparestringa_test1 locale_info/CompareStringW/test1/paltest_comparestringw_test1 locale_info/GetLocaleInfoW/test1/paltest_getlocaleinfow_test1 locale_info/GetLocaleInfoW/test2/paltest_getlocaleinfow_test2 -locale_info/GetStringTypeExW/test1/paltest_getstringtypeexw_test1 -locale_info/GetStringTypeExW/test2/paltest_getstringtypeexw_test2 locale_info/WideCharToMultiByte/test4/paltest_widechartomultibyte_test4 miscellaneous/FormatMessageW/test4/paltest_formatmessagew_test4 miscellaneous/FormatMessageW/test5/paltest_formatmessagew_test5 @@ -86,11 +66,6 @@ miscellaneous/IsBadReadPtr/test1/paltest_isbadreadptr_test1 miscellaneous/IsBadWritePtr/test1/paltest_isbadwriteptr_test1 miscellaneous/IsBadWritePtr/test2/paltest_isbadwriteptr_test2 miscellaneous/IsBadWritePtr/test3/paltest_isbadwriteptr_test3 -miscellaneous/wsprintfW/test2/paltest_wsprintfw_test2 -miscellaneous/wsprintfW/test7/paltest_wsprintfw_test7 -pal_specific/PAL_GetUserTempDirectoryW/test1/paltest_pal_getusertempdirectoryw_test1 -pal_specific/PAL_get_stderr/test1/paltest_pal_get_stderr_test1 -pal_specific/PAL_get_stdin/test1/paltest_pal_get_stdin_test1 pal_specific/PAL_get_stdout/test1/paltest_pal_get_stdout_test1 pal_specific/PAL_RegisterLibraryW_UnregisterLibraryW/test1/paltest_pal_registerlibraryw_unregisterlibraryw_test1 samples/test2/paltest_samples_test2 @@ -98,8 +73,6 @@ threading/CreateEventW/test3/paltest_createeventw_test3 threading/CreateMutexW_ReleaseMutex/test2/paltest_createmutexw_releasemutex_test2 threading/CreateSemaphoreW_ReleaseSemaphore/test3/paltest_createsemaphorew_releasesemaphore_test3 threading/CreateThread/test2/paltest_createthread_test2 -threading/CriticalSectionFunctions/test5/paltest_criticalsectionfunctions_test5 -threading/CriticalSectionFunctions/test6/paltest_criticalsectionfunctions_test6 threading/DuplicateHandle/test1/paltest_duplicatehandle_test1 threading/DuplicateHandle/test11/paltest_duplicatehandle_test11 threading/DuplicateHandle/test12/paltest_duplicatehandle_test12 @@ -116,12 +89,10 @@ threading/OpenEventW/test4/paltest_openeventw_test4 threading/OpenEventW/test5/paltest_openeventw_test5 threading/OpenProcess/test1/paltest_openprocess_test1 threading/QueueUserAPC/test1/paltest_queueuserapc_test1 -threading/setthreadcontext/test1/paltest_setthreadcontext_test1 threading/Sleep/test1/paltest_sleep_test1 threading/SleepEx/test1/paltest_sleepex_test1 threading/SleepEx/test2/paltest_sleepex_test2 threading/TerminateProcess/test1/paltest_terminateprocess_test1 -threading/TLS/test6_optimizedtls/paltest_tls_test6_optimizedtls threading/WaitForMultipleObjectsEx/test5/paltest_waitformultipleobjectsex_test5 threading/WaitForMultipleObjectsEx/test6/paltest_waitformultipleobjectsex_test6 threading/WaitForSingleObject/WFSOProcessTest/paltest_waitforsingleobject_wfsoprocesstest diff --git a/src/coreclr/pal/tests/palsuite/paltests.cpp b/src/coreclr/pal/tests/palsuite/paltests.cpp index 4ec0a3982e3c..07e8ff6ece73 100644 --- a/src/coreclr/pal/tests/palsuite/paltests.cpp +++ b/src/coreclr/pal/tests/palsuite/paltests.cpp @@ -54,7 +54,7 @@ int __cdecl main(int argc, char *argv[]) { return PrintTests(argc, argv); } - + PALTest *testCur = PALTest::s_tests; for (;testCur != 0; testCur = testCur->_next) { diff --git a/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test1/childProcess.cpp b/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test1/childProcess.cpp index dc059cf7e69f..dd0e35f5311f 100644 --- a/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test1/childProcess.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test1/childProcess.cpp @@ -5,7 +5,7 @@ ** ** Source: CreateProcessW/test1/childprocess.c ** -** Purpose: Test to ensure CreateProcessW starts a new process. This test +** Purpose: Test to ensure CreateProcessW starts a new process. This test ** launches a child process, and examines a file written by the child. ** This code is the child code. ** @@ -17,7 +17,7 @@ ** fopen ** fclose ** fprintf -** +** ** **=========================================================*/ @@ -25,7 +25,7 @@ #define UNICODE #include -const WCHAR szCommonFileW[] = +const WCHAR szCommonFileW[] = {'c','h','i','l','d','d','a','t','a','.','t','m','p','\0'}; @@ -39,19 +39,19 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1_child, "thre DWORD dwFileLength; DWORD dwDirLength; DWORD dwSize; - + char *szAbsPathNameA; WCHAR szDirNameW[_MAX_DIR]; - WCHAR szAbsPathNameW[_MAX_PATH]; + WCHAR szAbsPathNameW[MAX_PATH]; if(0 != (PAL_Initialize(argc, argv))) { return ( FAIL ); } - dwDirLength = GetTempPath(_MAX_PATH, szDirNameW); + dwDirLength = GetTempPath(MAX_PATH, szDirNameW); - if (0 == dwDirLength) + if (0 == dwDirLength) { Fail ("GetTempPath call failed. Could not get " "temp directory\n. Exiting.\n"); @@ -59,7 +59,7 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1_child, "thre dwFileLength = wcslen( szCommonFileW ); - dwSize = mkAbsoluteFilenameW( szDirNameW, dwDirLength, szCommonFileW, + dwSize = mkAbsoluteFilenameW( szDirNameW, dwDirLength, szCommonFileW, dwFileLength, szAbsPathNameW ); if (0 == dwSize) @@ -67,23 +67,23 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1_child, "thre Fail ("Palsuite Code: mkAbsoluteFilename() call failed. Could " "not build absolute path name to file\n. Exiting.\n"); } - + /* set the string length for the open call */ - szAbsPathNameA = (char*)malloc(dwSize +1); + szAbsPathNameA = (char*)malloc(dwSize +1); if (NULL == szAbsPathNameA) { Fail ("Unable to malloc (%d) bytes. Exiting\n", (dwSize +1) ); } - WideCharToMultiByte (CP_ACP, 0, szAbsPathNameW, -1, szAbsPathNameA, - (dwSize + 1), NULL, NULL); + WideCharToMultiByte (CP_ACP, 0, szAbsPathNameW, -1, szAbsPathNameA, + (dwSize + 1), NULL, NULL); - if ( NULL == ( fp = fopen ( szAbsPathNameA , "w+" ) ) ) + if ( NULL == ( fp = fopen ( szAbsPathNameA , "w+" ) ) ) { - /* + /* * A return value of NULL indicates an error condition or an - * EOF condition + * EOF condition */ Fail ("%s unable to open %s for writing. Exiting.\n", argv[0] , szAbsPathNameA ); @@ -96,14 +96,14 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1_child, "thre Fail("%s unable to write to %s. Exiting.\n", argv[0] , szAbsPathNameA ); } - - if (0 != (fclose ( fp ))) + + if (0 != (fclose ( fp ))) { Fail ("%s unable to close file %s. Pid may not be " "written to file. Exiting.\n", argv[0], szAbsPathNameA ); } PAL_Terminate(); - return ( PASS ); - + return ( PASS ); + } diff --git a/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test1/parentProcess.cpp b/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test1/parentProcess.cpp index 293b2c167356..00e6f2c30147 100644 --- a/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test1/parentProcess.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test1/parentProcess.cpp @@ -5,9 +5,9 @@ ** ** Source: CreateProcessW/test1/parentprocess.c ** -** Purpose: Test to ensure CreateProcessW starts a new process. This test +** Purpose: Test to ensure CreateProcessW starts a new process. This test ** launches a child process, and examines a file written by the child. -** This process (the parent process) reads the file created by the child and +** This process (the parent process) reads the file created by the child and ** compares the value the child wrote to the file. (a const char *) ** ** Dependencies: GetTempPath @@ -19,7 +19,7 @@ ** fopen ** fclose ** Fail -** +** ** **=========================================================*/ @@ -27,7 +27,7 @@ #define UNICODE #include -const WCHAR szCommonFileW[] = +const WCHAR szCommonFileW[] = {'c','h','i','l','d','d','a','t','a','.','t','m','p','\0'}; const WCHAR szChildFileW[] = u"threading/CreateProcessW/test1/paltest_createprocessw_test1_child"; @@ -46,14 +46,14 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1, "threading/ DWORD dwFileLength; DWORD dwDirLength; DWORD dwSize; - + size_t cslen; - + char szReadStringA[256]; - char szAbsPathNameA[_MAX_PATH]; - WCHAR szDirNameW[_MAX_DIR]; - WCHAR absPathBuf[_MAX_PATH]; + char szAbsPathNameA[MAX_PATH]; + WCHAR szDirNameW[_MAX_DIR]; + WCHAR absPathBuf[MAX_PATH]; WCHAR *szAbsPathNameW; @@ -61,16 +61,16 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1, "threading/ { return ( FAIL ); } - + ZeroMemory ( &si, sizeof(si) ); si.cb = sizeof(si); ZeroMemory ( &pi, sizeof(pi) ); - + szAbsPathNameW=&absPathBuf[0]; - dwDirLength = GetTempPath(_MAX_PATH, szDirNameW); + dwDirLength = GetTempPath(MAX_PATH, szDirNameW); - if (0 == dwDirLength) + if (0 == dwDirLength) { Fail ("GetTempPath call failed. Could not get " "temp directory\n. Exiting.\n"); @@ -85,32 +85,32 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1, "threading/ wcscat(szAbsPathNameW, u" "); wcscat(szAbsPathNameW, szChildFileW); - - if ( !CreateProcessW ( NULL, + + if ( !CreateProcessW ( NULL, szAbsPathNameW, - NULL, - NULL, - FALSE, + NULL, + NULL, + FALSE, CREATE_NEW_CONSOLE, - NULL, - NULL, - &si, - &pi ) + NULL, + NULL, + &si, + &pi ) ) { - Fail ( "CreateProcess call failed. GetLastError returned %d\n", + Fail ( "CreateProcess call failed. GetLastError returned %d\n", GetLastError() ); } - + WaitForSingleObject ( pi.hProcess, INFINITE ); - + szAbsPathNameW=&absPathBuf[0]; dwFileLength = wcslen( szCommonFileW ); - dwSize = mkAbsoluteFilenameW( szDirNameW, dwDirLength, szCommonFileW, + dwSize = mkAbsoluteFilenameW( szDirNameW, dwDirLength, szCommonFileW, dwFileLength, szAbsPathNameW ); - + /* set the string length for the open call*/ if (0 == dwSize) @@ -118,13 +118,13 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1, "threading/ Fail ("Palsuite Code: mkAbsoluteFilename() call failed. Could " "not build absolute path name to file\n. Exiting.\n"); } - - WideCharToMultiByte (CP_ACP, 0, szAbsPathNameW, -1, szAbsPathNameA, + + WideCharToMultiByte (CP_ACP, 0, szAbsPathNameW, -1, szAbsPathNameA, (dwSize + 1), NULL, NULL); if ( NULL == ( fp = fopen ( szAbsPathNameA , "r" ) ) ) { - Fail ("%s\nunable to open %s\nfor reading. Exiting.\n", argv[0], + Fail ("%s\nunable to open %s\nfor reading. Exiting.\n", argv[0], szAbsPathNameA ); } @@ -132,12 +132,12 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1, "threading/ if ( NULL == fgets( szReadStringA, (cslen + 1), fp )) { - /* + /* * A return value of NULL indicates an error condition or an - * EOF condition + * EOF condition */ Fail ("%s\nunable to read file\n%s\nszReadStringA is %s\n" - "Exiting.\n", argv[0], szAbsPathNameA, + "Exiting.\n", argv[0], szAbsPathNameA, szReadStringA ); } @@ -151,8 +151,8 @@ PALTEST(threading_CreateProcessW_test1_paltest_createprocessw_test1, "threading/ { Trace ("string comparison passed.\n"); } - - if (0 != (fclose ( fp ))) + + if (0 != (fclose ( fp ))) { Trace ("%s unable to close file %s. This may cause a file pointer " "leak. Continuing.\n", argv[0], szAbsPathNameA ); diff --git a/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test2/parentprocess.cpp b/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test2/parentprocess.cpp index 19bfc74ba6af..7c580a9ea9bd 100644 --- a/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test2/parentprocess.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/CreateProcessW/test2/parentprocess.cpp @@ -55,7 +55,7 @@ PALTEST(threading_CreateProcessW_test2_paltest_createprocessw_test2, "threading/ char szStdOutBuf[BUF_LEN]; char szStdErrBuf[BUF_LEN]; - WCHAR szFullPathNameW[_MAX_PATH]; + WCHAR szFullPathNameW[MAX_PATH]; /******************************************* diff --git a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test1/InitializeCriticalSection.cpp b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test1/InitializeCriticalSection.cpp deleted file mode 100644 index 750e42d0672b..000000000000 --- a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test1/InitializeCriticalSection.cpp +++ /dev/null @@ -1,234 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================ -** -** Source: criticalsectionfunctions/test1/initializecriticalsection.c -** -** Purpose: Test Semaphore operation using classic IPC problem: -** "Producer-Consumer Problem". -** -** Dependencies: CreateThread -** InitializeCriticalSection -** EnterCriticalSection -** LeaveCriticalSection -** DeleteCriticalSection -** WaitForSingleObject -** Sleep -** - -** -**=========================================================*/ - -#include - -#define PRODUCTION_TOTAL 26 - -#define _BUF_SIZE 10 - -DWORD dwThreadId_CriticalSectionFunctions_test1; /* consumer thread identifier */ - -HANDLE hThread_CriticalSectionFunctions_test1; /* handle to consumer thread */ - -CRITICAL_SECTION CriticalSectionM_CriticalSectionFunctions_test1; /* Critical Section Object (used as mutex) */ - -typedef struct Buffer -{ - short readIndex; - short writeIndex; - CHAR message[_BUF_SIZE]; - -} BufferStructure; - -CHAR producerItems_CriticalSectionFunctions_test1[PRODUCTION_TOTAL + 1]; - -CHAR consumerItems_CriticalSectionFunctions_test1[PRODUCTION_TOTAL + 1]; - -/* - * Read next message from the Buffer into provided pointer. - * Returns: 0 on failure, 1 on success. - */ -int -readBuf_CriticalSectionFunctions_test1(BufferStructure *Buffer, char *c) -{ - if( Buffer -> writeIndex == Buffer -> readIndex ) - { - return 0; - } - *c = Buffer -> message[Buffer -> readIndex++]; - Buffer -> readIndex %= _BUF_SIZE; - return 1; -} - -/* - * Write message generated by the producer to Buffer. - * Returns: 0 on failure, 1 on success. - */ -int -writeBuf_CriticalSectionFunctions_test1(BufferStructure *Buffer, CHAR c) -{ - if( ( ((Buffer -> writeIndex) + 1) % _BUF_SIZE) == - (Buffer -> readIndex) ) - { - return 0; - } - Buffer -> message[Buffer -> writeIndex++] = c; - Buffer -> writeIndex %= _BUF_SIZE; - return 1; -} - -/* - * Sleep 500 milleseconds. - */ -VOID -consumerSleep_CriticalSectionFunctions_test1(VOID) -{ - Sleep(500); -} - -/* - * Sleep between 10 milleseconds. - */ -VOID -producerSleep_CriticalSectionFunctions_test1(VOID) -{ - Sleep(10); -} - -/* - * Produce a message and write the message to Buffer. - */ -VOID -producer_CriticalSectionFunctions_test1(BufferStructure *Buffer) -{ - - int n = 0; - char c; - - while (n < PRODUCTION_TOTAL) - { - c = 'A' + n ; /* Produce Item */ - - EnterCriticalSection(&CriticalSectionM_CriticalSectionFunctions_test1); - - if (writeBuf_CriticalSectionFunctions_test1(Buffer, c)) - { - printf("Producer produces %c.\n", c); - producerItems_CriticalSectionFunctions_test1[n++] = c; - } - - LeaveCriticalSection(&CriticalSectionM_CriticalSectionFunctions_test1); - - producerSleep_CriticalSectionFunctions_test1(); - } - - return; -} - -/* - * Read and "Consume" the messages in Buffer. - */ -DWORD -PALAPI -consumer_CriticalSectionFunctions_test1( LPVOID lpParam ) -{ - int n = 0; - char c; - - consumerSleep_CriticalSectionFunctions_test1(); - - while (n < PRODUCTION_TOTAL) - { - - EnterCriticalSection(&CriticalSectionM_CriticalSectionFunctions_test1); - - if (readBuf_CriticalSectionFunctions_test1((BufferStructure*)lpParam, &c)) - { - printf("\tConsumer consumes %c.\n", c); - consumerItems_CriticalSectionFunctions_test1[n++] = c; - } - - LeaveCriticalSection(&CriticalSectionM_CriticalSectionFunctions_test1); - - consumerSleep_CriticalSectionFunctions_test1(); - } - - return 0; -} - -PALTEST(threading_CriticalSectionFunctions_test1_paltest_criticalsectionfunctions_test1, "threading/CriticalSectionFunctions/test1/paltest_criticalsectionfunctions_test1") -{ - - BufferStructure Buffer, *pBuffer; - - pBuffer = &Buffer; - - if(0 != (PAL_Initialize(argc, argv))) - { - return FAIL; - } - - /* - * Create mutual exclusion mechanisms - */ - - InitializeCriticalSection ( &CriticalSectionM_CriticalSectionFunctions_test1 ); - - /* - * Initialize Buffer - */ - pBuffer->writeIndex = pBuffer->readIndex = 0; - - - - /* - * Create Consumer - */ - hThread_CriticalSectionFunctions_test1 = CreateThread( - NULL, - 0, - consumer_CriticalSectionFunctions_test1, - &Buffer, - 0, - &dwThreadId_CriticalSectionFunctions_test1); - - if ( NULL == hThread_CriticalSectionFunctions_test1 ) - { - Fail ( "CreateThread() returned NULL. Failing test.\n" - "GetLastError returned %d\n", GetLastError()); - } - - /* - * Start producing - */ - producer_CriticalSectionFunctions_test1(pBuffer); - - /* - * Wait for consumer to complete - */ - WaitForSingleObject (hThread_CriticalSectionFunctions_test1, INFINITE); - - /* - * Compare items produced vs. items consumed - */ - if ( 0 != strncmp (producerItems_CriticalSectionFunctions_test1, consumerItems_CriticalSectionFunctions_test1, PRODUCTION_TOTAL) ) - { - Fail("The producerItems_CriticalSectionFunctions_test1 string %s\n and the consumerItems_CriticalSectionFunctions_test1 string " - "%s\ndo not match. This could be a problem with the strncmp()" - " function\n FailingTest\nGetLastError() returned %d\n", - producerItems_CriticalSectionFunctions_test1, consumerItems_CriticalSectionFunctions_test1, GetLastError()); - } - - /* - * Clean up Critical Section object - */ - DeleteCriticalSection(&CriticalSectionM_CriticalSectionFunctions_test1); - - Trace("producerItems_CriticalSectionFunctions_test1 and consumerItems_CriticalSectionFunctions_test1 arrays match. All %d\nitems " - "were produced and consumed in order.\nTest passed.\n", - PRODUCTION_TOTAL); - - PAL_Terminate(); - return (PASS); - -} diff --git a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test2/test2.cpp deleted file mode 100644 index 4bb75dfcf95f..000000000000 --- a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test2/test2.cpp +++ /dev/null @@ -1,223 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================ -** -** Source: CriticalSectionFunctions/test2/test2.c -** -** Purpose: Test that we are able to nest critical section calls. -** The initial thread makes a call to EnterCriticalSection once, -** blocking on a CRITICAL_SECTION object and creates a new thread. -** The newly created thread blocks on the same CRITICAL_SECTION object. -** The first thread now makes a call to LeaveCriticalSection. -** Test to see that the new thread doesn't get unblocked. -** -** Dependencies: CreateThread -** InitializeCriticalSection -** EnterCriticalSection -** LeaveCriticalSection -** DeleteCriticalSection -** WaitForSingleObject -** - -** -**=========================================================*/ - -#include - -volatile BOOL t0_tflag = FAIL; /* thread 0 timeout flag */ -volatile BOOL t1_aflag = FAIL; /* thread 1 access flag */ -volatile BOOL t1_cflag = FAIL; /* thread 1 critical section flag */ -volatile BOOL bTestResult = FAIL; - -DWORD PALAPI Thread_CriticalSectionFunctions_test2(LPVOID lpParam) -{ - t1_aflag = PASS; - EnterCriticalSection(&CriticalSection); - t1_cflag = PASS; - LeaveCriticalSection(&CriticalSection); - return 0; -} - -PALTEST(threading_CriticalSectionFunctions_test2_paltest_criticalsectionfunctions_test2, "threading/CriticalSectionFunctions/test2/paltest_criticalsectionfunctions_test2") -{ - HANDLE hThread; - DWORD dwThreadId; - DWORD dwRet; - - if(0 != (PAL_Initialize(argc, argv))) - { - return (bTestResult); - } - - /* - * Create critical section object and enter it - */ - InitializeCriticalSection ( &CriticalSection ); - EnterCriticalSection(&CriticalSection); - - /* - * Create a suspended thread - */ - hThread = CreateThread(NULL, - 0, - &Thread_CriticalSectionFunctions_test2, - (LPVOID) NULL, - CREATE_SUSPENDED, - &dwThreadId); - - if (hThread == NULL) - { - Trace("PALSUITE ERROR: CreateThread call failed. GetLastError " - "returned %d.\n", GetLastError()); - LeaveCriticalSection(&CriticalSection); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - EnterCriticalSection(&CriticalSection); - /* - * Set priority of the thread to greater than that of the currently - * running thread so it is guaranteed to run. - */ - dwRet = (DWORD) SetThreadPriority(hThread, THREAD_PRIORITY_ABOVE_NORMAL); - - if (0 == dwRet) - { - Trace("PALSUITE ERROR: SetThreadPriority (%p, %d) call failed.\n" - "GetLastError returned %d.\n", hThread, - THREAD_PRIORITY_NORMAL, GetLastError()); - LeaveCriticalSection(&CriticalSection); - CloseHandle(hThread); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - dwRet = ResumeThread(hThread); - - if (-1 == dwRet) - { - Trace("PALSUITE ERROR: ResumeThread(%p) call failed.\nGetLastError " - "returned %d.\n", hThread, GetLastError()); - LeaveCriticalSection(&CriticalSection); - CloseHandle(hThread); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - /* - * Sleep until we know the thread has been invoked. This sleep in - * combination with the higher priority of the other thread should - * guarantee both threads block on the critical section. - */ - while (t1_aflag == FAIL) - { - Sleep(1); - } - - LeaveCriticalSection(&CriticalSection); - - switch ((WaitForSingleObject( - hThread, - 10000))) /* Wait 10 seconds */ - { - case WAIT_OBJECT_0: - /* Object (thread) is signaled */ - LeaveCriticalSection(&CriticalSection); - CloseHandle(hThread); - DeleteCriticalSection(&CriticalSection); - Fail("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_TIMEOUT ('%d'), instead it returned " - "WAIT_OBJECT_0 ('%d').\nA nested LeaveCriticalSection(%p) " - "call released both threads that were waiting on it!\n", - hThread, 10000, WAIT_TIMEOUT, WAIT_OBJECT_0, &CriticalSection); - break; - case WAIT_ABANDONED: - /* - * Object was mutex object whose owning - * thread has terminated. Shouldn't occur. - */ - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_TIMEOUT ('%d'), instead it returned " - "WAIT_ABANDONED ('%d').\nGetLastError returned '%d'\n", - hThread, 10000, WAIT_TIMEOUT, WAIT_ABANDONED, GetLastError()); - LeaveCriticalSection(&CriticalSection); - CloseHandle(hThread); - DeleteCriticalSection(&CriticalSection); - Fail(""); - break; - case WAIT_FAILED: /* WaitForSingleObject function failed */ - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_TIMEOUT ('%d'), instead it returned " - "WAIT_FAILED ('%d').\nGetLastError returned '%d'\n", - hThread, 10000, WAIT_TIMEOUT, WAIT_FAILED, GetLastError()); - LeaveCriticalSection(&CriticalSection); - CloseHandle(hThread); - DeleteCriticalSection(&CriticalSection); - Fail(""); - break; - case WAIT_TIMEOUT: - /* - * We expect this thread to timeout waiting for the - * critical section object to become available. - */ - t0_tflag = PASS; - break; - } - - LeaveCriticalSection(&CriticalSection); - - if (WAIT_OBJECT_0 != WaitForSingleObject (hThread, 10000)) - { - if (0 == CloseHandle(hThread)) - { - Trace("PALSUITE ERROR: CloseHandle(%p) call failed.\n" - "WaitForSingleObject(%p,%d) should have returned " - "WAIT_OBJECT_0 ('%d').\nBoth calls failed. " - "Deleted CRITICAL_SECTION object which likely means\n" - "thread %p is now in an undefined state. GetLastError " - "returned '%d'.\n", hThread, hThread, 10000, WAIT_OBJECT_0, - hThread, GetLastError()); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - else - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned WAIT_OBJECT_0 ('%d').\n GetLastError returned " - "'%d'.\n", hThread, hThread, 10000, WAIT_OBJECT_0, - hThread, GetLastError()); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - } - - if (0 == CloseHandle(hThread)) - { - Trace("PALSUITE ERROR: CloseHandle(%p) call failed.\n" - "Deleted CRITICAL_SECTION object which likely means\n" - "thread %p is now in an undefined state. GetLastError " - "returned '%d'.\n", hThread, hThread, GetLastError()); - DeleteCriticalSection(&CriticalSection); - Fail(""); - - } - DeleteCriticalSection(&CriticalSection); - /* - * Ensure both thread 0 experienced a wait timeout and thread 1 - * accessed the critical section or fail the test, otherwise pass it. - */ - if ((t0_tflag == FAIL) || (t1_cflag == FAIL)) - { - Trace("PALSUITE ERROR: Thread 0 returned %d when %d was expected.\n" - "Thread 1 returned %d when %d was expected.\n", t0_tflag, - PASS, t1_cflag, PASS); - bTestResult=FAIL; - } - else - { - bTestResult=PASS; - } - - PAL_TerminateEx(bTestResult); - return (bTestResult); -} diff --git a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test4/test4.cpp b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test4/test4.cpp deleted file mode 100644 index 14a737abd3a9..000000000000 --- a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test4/test4.cpp +++ /dev/null @@ -1,240 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*============================================================ -** -** Source: criticalsectionfunctions/test4/test4.c -** -** Purpose: Test to see if threads blocked on a CRITICAL_SECTION object will -** be released in an orderly manner. This case looks at the following -** scenario. If one thread owns a CRITICAL_SECTION object and two threads -** block in EnterCriticalSection, trying to hold the already owned -** CRITICAL_SECTION object, when the first thread releases the CRITICAL_SECTION -** object, will one and only one of the waiters get unblocked? -** -** Dependencies: CreateThread -** InitializeCriticalSection -** EnterCriticalSection -** LeaveCriticalSection -** DeleteCriticalSection -** Sleep -** WaitForSingleObject -** - -** -**=========================================================*/ - -#include - -#define NUM_BLOCKING_THREADS 2 - -BOOL bTestResult_CriticalSectionFunctions_test4; -CRITICAL_SECTION CriticalSection_CriticalSectionFunctions_test4; -HANDLE hThread_CriticalSectionFunctions_test4[NUM_BLOCKING_THREADS]; -HANDLE hEvent_CriticalSectionFunctions_test4; -DWORD dwThreadId_CriticalSectionFunctions_test4[NUM_BLOCKING_THREADS]; -volatile int flags_CriticalSectionFunctions_test4[NUM_BLOCKING_THREADS] = {0,0}; - -DWORD PALAPI ThreadTest1_CriticalSectionFunctions_test4(LPVOID lpParam) -{ - - EnterCriticalSection ( &CriticalSection_CriticalSectionFunctions_test4 ); - - flags_CriticalSectionFunctions_test4[0] = 1; - - return 0; - -} - -DWORD PALAPI ThreadTest2_CriticalSectionFunctions_test4(LPVOID lpParam) -{ - - EnterCriticalSection ( &CriticalSection_CriticalSectionFunctions_test4 ); - - flags_CriticalSectionFunctions_test4[1] = 1; - - return 0; - -} - -PALTEST(threading_CriticalSectionFunctions_test4_paltest_criticalsectionfunctions_test4, "threading/CriticalSectionFunctions/test4/paltest_criticalsectionfunctions_test4") -{ - - DWORD dwRet; - DWORD dwRet1; - bTestResult_CriticalSectionFunctions_test4 = FAIL; - - if ((PAL_Initialize(argc,argv)) != 0) - { - return(bTestResult_CriticalSectionFunctions_test4); - } - - /* - * Create Critical Section Object - */ - InitializeCriticalSection ( &CriticalSection_CriticalSectionFunctions_test4 ); - - EnterCriticalSection ( &CriticalSection_CriticalSectionFunctions_test4 ); - - hThread_CriticalSectionFunctions_test4[0] = CreateThread(NULL, - 0, - &ThreadTest1_CriticalSectionFunctions_test4, - (LPVOID) 0, - CREATE_SUSPENDED, - &dwThreadId_CriticalSectionFunctions_test4[0]); - if (hThread_CriticalSectionFunctions_test4[0] == NULL) - { - Trace("PALSUITE ERROR: CreateThread(%p, %d, %p, %p, %d, %p) call " - "failed.\nGetLastError returned %d.\n", NULL, 0, &ThreadTest1_CriticalSectionFunctions_test4, - (LPVOID) 0, CREATE_SUSPENDED, &dwThreadId_CriticalSectionFunctions_test4[0], GetLastError()); - LeaveCriticalSection(&CriticalSection_CriticalSectionFunctions_test4); - DeleteCriticalSection ( &CriticalSection_CriticalSectionFunctions_test4 ); - Fail(""); - } - - hThread_CriticalSectionFunctions_test4[1] = CreateThread(NULL, - 0, - &ThreadTest2_CriticalSectionFunctions_test4, - (LPVOID) 0, - CREATE_SUSPENDED, - &dwThreadId_CriticalSectionFunctions_test4[1]); - if (hThread_CriticalSectionFunctions_test4[1] == NULL) - { - Trace("PALSUITE ERROR: CreateThread(%p, %d, %p, %p, %d, %p) call " - "failed.\nGetLastError returned %d.\n", NULL, 0, &ThreadTest2_CriticalSectionFunctions_test4, - (LPVOID) 0, CREATE_SUSPENDED, &dwThreadId_CriticalSectionFunctions_test4[1], GetLastError()); - LeaveCriticalSection(&CriticalSection_CriticalSectionFunctions_test4); - - dwRet = ResumeThread(hThread_CriticalSectionFunctions_test4[0]); - if (-1 == dwRet) - { - Trace("PALSUITE ERROR: ResumeThread(%p) call failed.\n" - "GetLastError returned '%d'.\n", hThread_CriticalSectionFunctions_test4[0], - GetLastError()); - } - - dwRet = WaitForSingleObject(hThread_CriticalSectionFunctions_test4[0], 10000); - if (WAIT_OBJECT_0 == dwRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p, %d) call " - "failed. '%d' was returned instead of the expected '%d'.\n" - "GetLastError returned '%d'.\n", hThread_CriticalSectionFunctions_test4[0], 10000, dwRet, - WAIT_OBJECT_0, GetLastError()); - } - - if (0 == CloseHandle(hThread_CriticalSectionFunctions_test4[0])) - { - Trace("PALSUITE NOTIFICATION: CloseHandle(%p) call failed.\n" - "GetLastError returned %d. Not failing tests.\n", - hThread_CriticalSectionFunctions_test4[0], GetLastError()); - } - - DeleteCriticalSection(&CriticalSection_CriticalSectionFunctions_test4); - Fail(""); - } - - /* - * Set other thread priorities to be higher than ours & Sleep to ensure - * we give up the processor. - */ - dwRet = (DWORD) SetThreadPriority(hThread_CriticalSectionFunctions_test4[0], - THREAD_PRIORITY_ABOVE_NORMAL); - if (0 == dwRet) - { - Trace("PALSUITE ERROR: SetThreadPriority(%p, %d) call failed.\n" - "GetLastError returned %d", hThread_CriticalSectionFunctions_test4[0], - THREAD_PRIORITY_ABOVE_NORMAL, GetLastError()); - } - - dwRet = (DWORD) SetThreadPriority(hThread_CriticalSectionFunctions_test4[1], - THREAD_PRIORITY_ABOVE_NORMAL); - if (0 == dwRet) - { - Trace("PALSUITE ERROR: SetThreadPriority(%p, %d) call failed.\n" - "GetLastError returned %d", hThread_CriticalSectionFunctions_test4[1], - THREAD_PRIORITY_ABOVE_NORMAL, GetLastError()); - } - - dwRet = ResumeThread(hThread_CriticalSectionFunctions_test4[0]); - if (-1 == dwRet) - { - Trace("PALSUITE ERROR: ResumeThread(%p, %d) call failed.\n" - "GetLastError returned %d", hThread_CriticalSectionFunctions_test4[0], - GetLastError() ); - } - - dwRet = ResumeThread(hThread_CriticalSectionFunctions_test4[1]); - if (-1 == dwRet) - { - Trace("PALSUITE ERROR: ResumeThread(%p, %d) call failed.\n" - "GetLastError returned %d", hThread_CriticalSectionFunctions_test4[0], - GetLastError()); - } - - Sleep (0); - - LeaveCriticalSection (&CriticalSection_CriticalSectionFunctions_test4); - - dwRet = WaitForSingleObject(hThread_CriticalSectionFunctions_test4[0], 10000); - dwRet1 = WaitForSingleObject(hThread_CriticalSectionFunctions_test4[1], 10000); - - if ((WAIT_OBJECT_0 == dwRet) || - (WAIT_OBJECT_0 == dwRet1)) - { - if ((1 == flags_CriticalSectionFunctions_test4[0] && 0 == flags_CriticalSectionFunctions_test4[1]) || - (0 == flags_CriticalSectionFunctions_test4[0] && 1 == flags_CriticalSectionFunctions_test4[1])) - { - bTestResult_CriticalSectionFunctions_test4 = PASS; - } - else - { - bTestResult_CriticalSectionFunctions_test4 = FAIL; - Trace ("PALSUITE ERROR: flags[%d] = {%d,%d}. These values are" - "inconsistent.\nCriticalSection test failed.\n", - NUM_BLOCKING_THREADS, flags_CriticalSectionFunctions_test4[0], flags_CriticalSectionFunctions_test4[1]); - } - - /* Fail the test if both threads returned WAIT_OBJECT_0 */ - if ((WAIT_OBJECT_0 == dwRet) && (WAIT_OBJECT_0 == dwRet1)) - { - bTestResult_CriticalSectionFunctions_test4 = FAIL; - Trace ("PALSUITE ERROR: WaitForSingleObject(%p, %d) and " - "WaitForSingleObject(%p, %d)\nboth returned dwRet = '%d'\n" - "One should have returned WAIT_TIMEOUT ('%d').\n", - hThread_CriticalSectionFunctions_test4[0], 10000, hThread_CriticalSectionFunctions_test4[1], 10000, dwRet, WAIT_TIMEOUT); - } - } - else - { - bTestResult_CriticalSectionFunctions_test4 = FAIL; - Trace ("PALSUITE ERROR: WaitForSingleObject(%p, %d) and " - "WaitForSingleObject(%p, %d)\nReturned dwRet = '%d' and\n" - "dwRet1 = '%d' respectively.\n", hThread_CriticalSectionFunctions_test4[0], 10000, hThread_CriticalSectionFunctions_test4[1], - 10000, dwRet, dwRet1); - } - - if (WAIT_OBJECT_0 == dwRet) - { - if (0 == CloseHandle(hThread_CriticalSectionFunctions_test4[0])) - { - Trace("PALSUITE NOTIFICATION: CloseHandle(%p) call failed.\n" - "GetLastError returned %d. Not failing tests.\n", - hThread_CriticalSectionFunctions_test4[0], GetLastError()); - } - } - if (WAIT_OBJECT_0 == dwRet1) - { - if (0 == CloseHandle(hThread_CriticalSectionFunctions_test4[1])) - { - Trace("PALSUITE NOTIFICATION: CloseHandle(%p) call failed.\n" - "GetLastError returned %d. Not failing tests.\n", - hThread_CriticalSectionFunctions_test4[1], GetLastError()); - } - } - - /* Leaking the CS on purpose, since there is still a thread - waiting on it */ - - PAL_TerminateEx(bTestResult_CriticalSectionFunctions_test4); - return (bTestResult_CriticalSectionFunctions_test4); -} diff --git a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test5/test5.cpp b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test5/test5.cpp deleted file mode 100644 index 4556c082f67e..000000000000 --- a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test5/test5.cpp +++ /dev/null @@ -1,142 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: CriticalSectionFunctions/test5/test5.c -** -** Purpose: Attempt to delete a critical section owned by another -** thread. -** -** -**===================================================================*/ -#include - -DWORD PALAPI Thread_CriticalSectionFunctions_test5(LPVOID lpParam) -{ - DWORD dwTRet; - - EnterCriticalSection(&CriticalSection); - - /* signal thread 0 */ - if (0 == SetEvent(hToken[0])) - { - Trace("PALSUITE ERROR: Unable to execute SetEvent(%p) during " - "clean up.\nGetLastError returned '%u'.\n", hToken[0], - GetLastError()); - LeaveCriticalSection(&CriticalSection); - Cleanup (&hToken[0], NUM_TOKENS); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - /* wait to be signaled */ - dwTRet = WaitForSingleObject(hToken[1], 10000); - if (WAIT_OBJECT_0 != dwTRet) - - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_OBJECT_0 ('%d'), instead it returned " - "('%d').\nGetLastError returned '%u'.\n", - hToken[1], 10000, WAIT_OBJECT_0, dwTRet, GetLastError()); - LeaveCriticalSection(&CriticalSection); - Cleanup (&hToken[0], NUM_TOKENS); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - LeaveCriticalSection(&CriticalSection); - return 0; -} - -PALTEST(threading_CriticalSectionFunctions_test5_paltest_criticalsectionfunctions_test5, "threading/CriticalSectionFunctions/test5/paltest_criticalsectionfunctions_test5") -{ - DWORD dwThreadId; - DWORD dwMRet; - - if ((PAL_Initialize(argc,argv)) != 0) - { - return(FAIL); - } - - /* thread 0 event */ - hToken[0] = CreateEvent(NULL, TRUE, FALSE, NULL); - if (NULL == hToken[0]) - { - Fail("PALSUITE ERROR: CreateEvent call #0 failed. GetLastError " - "returned %u.\n", GetLastError()); - } - - /* thread 1 event */ - hToken[1] = CreateEvent(NULL, TRUE, FALSE, NULL); - if (NULL == hToken[1]) - { - Trace("PALSUITE ERROR: CreateEvent call #1 failed. GetLastError " - "returned %u.\n", GetLastError()); - Cleanup(&hToken[0], (NUM_TOKENS - 2)); - Fail(""); - } - - InitializeCriticalSection(&CriticalSection); - - hToken[2] = CreateThread(NULL, - 0, - &Thread_CriticalSectionFunctions_test5, - (LPVOID) NULL, - 0, - &dwThreadId); - if (hToken[2] == NULL) - { - Trace("PALSUITE ERROR: CreateThread call #0 failed. GetLastError " - "returned %u.\n", GetLastError()); - Cleanup(&hToken[0], (NUM_TOKENS - 1)); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - /* wait for thread 0 to be signaled */ - dwMRet = WaitForSingleObject(hToken[0], 10000); - if (WAIT_OBJECT_0 != dwMRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_OBJECT_0 ('%d'), instead it returned " - "('%d').\nGetLastError returned '%u'.\n", hToken[0], 10000, - WAIT_OBJECT_0, dwMRet, GetLastError()); - Cleanup(&hToken[0], NUM_TOKENS); - Fail(""); - } - - /* - * Attempt to do delete CriticalSection object owned by other thread - */ - DeleteCriticalSection(&CriticalSection); - - /* signal thread 1 */ - if (0 == SetEvent(hToken[1])) - { - Trace("PALSUITE ERROR: Unable to execute SetEvent(%p) call.\n" - "GetLastError returned '%u'.\n", hToken[1], - GetLastError()); - Cleanup(&hToken[0], NUM_TOKENS); - Fail(""); - } - - dwMRet = WaitForSingleObject(hToken[2], 10000); - if (WAIT_OBJECT_0 != dwMRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p, %d) call " - "returned an unexpected value '%d'.\nGetLastError returned " - "%u.\n", hToken[2], 10000, dwMRet, GetLastError()); - Cleanup(&hToken[0], NUM_TOKENS); - Fail(""); - } - - if (!Cleanup(&hToken[0], NUM_TOKENS)) - { - Fail(""); - } - - PAL_Terminate(); - - return (PASS); -} diff --git a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test6/test6.cpp b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test6/test6.cpp deleted file mode 100644 index 672637159c3d..000000000000 --- a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test6/test6.cpp +++ /dev/null @@ -1,146 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: CriticalSectionFunctions/test6/test6.c -** -** Purpose: Attempt to leave a critical section which is owned by -** another thread. -** -** -**===================================================================*/ -#include - - -DWORD PALAPI Thread_CriticalSectionFunctions_test6(LPVOID lpParam) -{ - DWORD dwTRet; - - EnterCriticalSection(&CriticalSection); - - /* signal thread 0 */ - if (0 == SetEvent(hToken[0])) - { - Trace("PALSUITE ERROR: Unable to execute SetEvent(%p) during " - "clean up.\nGetLastError returned '%u'.\n", hToken[0], - GetLastError()); - LeaveCriticalSection(&CriticalSection); - Cleanup (&hToken[0], NUM_TOKENS); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - /* wait to be signaled */ - dwTRet = WaitForSingleObject(hToken[1], 10000); - if (WAIT_OBJECT_0 != dwTRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_OBJECT_0 ('%d'), instead it returned " - "('%d').\nGetLastError returned '%u'.\n", - hToken[1], 10000, WAIT_OBJECT_0, dwTRet, GetLastError()); - LeaveCriticalSection(&CriticalSection); - Cleanup (&hToken[0], NUM_TOKENS); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - LeaveCriticalSection(&CriticalSection); - - return 0; -} - -PALTEST(threading_CriticalSectionFunctions_test6_paltest_criticalsectionfunctions_test6, "threading/CriticalSectionFunctions/test6/paltest_criticalsectionfunctions_test6") -{ - DWORD dwThreadId; - DWORD dwMRet; - - if ((PAL_Initialize(argc,argv)) != 0) - { - return(FAIL); - } - - /* thread 0 event */ - hToken[0] = CreateEvent(NULL, TRUE, FALSE, NULL); - - if (hToken[0] == NULL) - { - Fail("PALSUITE ERROR: CreateEvent call #0 failed. GetLastError " - "returned %u.\n", GetLastError()); - } - - /* thread 1 event */ - hToken[1] = CreateEvent(NULL, TRUE, FALSE, NULL); - - if (hToken[1] == NULL) - { - Trace("PALSUITE ERROR: CreateEvent call #1 failed. GetLastError " - "returned %u.\n", GetLastError()); - Cleanup(&hToken[0], (NUM_TOKENS - 2)); - Fail(""); - } - - InitializeCriticalSection(&CriticalSection); - - hToken[2] = CreateThread(NULL, - 0, - &Thread_CriticalSectionFunctions_test6, - (LPVOID) NULL, - 0, - &dwThreadId); - - if (hToken[2] == NULL) - { - Trace("PALSUITE ERROR: CreateThread call #0 failed. GetLastError " - "returned %u.\n", GetLastError()); - Cleanup(&hToken[0], (NUM_TOKENS - 1)); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - /* wait for thread 0 to be signaled */ - dwMRet = WaitForSingleObject(hToken[0], 10000); - if (WAIT_OBJECT_0 != dwMRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_OBJECT_0 ('%d'), instead it returned " - "('%d').\nGetLastError returned '%u'.\n", hToken[0], 10000, - WAIT_OBJECT_0, dwMRet, GetLastError()); - Cleanup(&hToken[0], NUM_TOKENS); - Fail(""); - } - - /* - * Attempt to leave critical section which is owned by the other thread. - */ - LeaveCriticalSection(&CriticalSection); - - /* signal thread 1 */ - if (0 == SetEvent(hToken[1])) - { - Trace("PALSUITE ERROR: Unable to execute SetEvent(%p) call.\n" - "GetLastError returned '%u'.\n", hToken[1], - GetLastError()); - Cleanup(&hToken[0], NUM_TOKENS); - Fail(""); - } - - dwMRet = WaitForSingleObject(hToken[2], 10000); - if (WAIT_OBJECT_0 != dwMRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p, %d) call " - "returned an unexpected value '%d'.\nGetLastError returned " - "%u.\n", hToken[2], 10000, dwMRet, GetLastError()); - Cleanup(&hToken[0], NUM_TOKENS); - Fail(""); - } - - if (!Cleanup(&hToken[0], NUM_TOKENS)) - { - Fail(""); - } - - PAL_Terminate(); - - return(PASS); -} diff --git a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test7/test7.cpp b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test7/test7.cpp deleted file mode 100644 index e4ad81364c4a..000000000000 --- a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test7/test7.cpp +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: CriticalSectionFunctions/test7/test7.c -** -** Purpose: Attempt to delete a critical section owned by the current -** thread. -** -** -**===================================================================*/ -#include - -DWORD PALAPI Thread_CriticalSectionFunctions_test7(LPVOID lpParam) -{ - DWORD dwTRet; - - EnterCriticalSection(&CriticalSection); - - /* signal thread 0 */ - if (0 == SetEvent(hToken[0])) - { - Trace("PALSUITE ERROR: Unable to execute SetEvent(%p) during " - "clean up.\nGetLastError returned '%u'.\n", hToken[0], - GetLastError()); - LeaveCriticalSection(&CriticalSection); - Cleanup (&hToken[0], NUM_TOKENS); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - /* wait to be signaled */ - dwTRet = WaitForSingleObject(hToken[1], 10000); - if (WAIT_OBJECT_0 != dwTRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_OBJECT_0 ('%d'), instead it returned " - "('%d').\nGetLastError returned '%u'.\n", - hToken[0], 10000, WAIT_OBJECT_0, dwTRet, GetLastError()); - LeaveCriticalSection(&CriticalSection); - Cleanup (&hToken[0], NUM_TOKENS); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - DeleteCriticalSection(&CriticalSection); - - return 0; -} - -PALTEST(threading_CriticalSectionFunctions_test7_paltest_criticalsectionfunctions_test7, "threading/CriticalSectionFunctions/test7/paltest_criticalsectionfunctions_test7") -{ - DWORD dwThreadId; - DWORD dwMRet; - - if ((PAL_Initialize(argc,argv)) != 0) - { - return(FAIL); - } - - /* thread 0 event */ - hToken[0] = CreateEvent(NULL, TRUE, FALSE, NULL); - - if (hToken[0] == NULL) - { - Fail("PALSUITE ERROR: CreateEvent call #0 failed. GetLastError " - "returned %u.\n", GetLastError()); - } - - /* thread 1 event */ - hToken[1] = CreateEvent(NULL, TRUE, FALSE, NULL); - - if (hToken[1] == NULL) - { - Trace("PALSUITE ERROR: CreateEvent call #1 failed. GetLastError " - "returned %u.\n", GetLastError()); - Cleanup (&hToken[0], (NUM_TOKENS - 2)); - Fail(""); - } - - InitializeCriticalSection(&CriticalSection); - - hToken[2] = CreateThread(NULL, - 0, - &Thread_CriticalSectionFunctions_test7, - (LPVOID) NULL, - 0, - &dwThreadId); - - if (hToken[2] == NULL) - { - Trace("PALSUITE ERROR: CreateThread call #0 failed. GetLastError " - "returned %u.\n", GetLastError()); - Cleanup (&hToken[0], (NUM_TOKENS - 1)); - DeleteCriticalSection(&CriticalSection); - Fail(""); - } - - /* wait for thread 0 to be signaled */ - dwMRet = WaitForSingleObject(hToken[0], 10000); - if (WAIT_OBJECT_0 != dwMRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p,%d) should have " - "returned\nWAIT_OBJECT_0 ('%d'), instead it returned " - "('%d').\nGetLastError returned '%u'.\n", hToken[0], 10000, - WAIT_OBJECT_0, dwMRet, GetLastError()); - Cleanup (&hToken[0], NUM_TOKENS); - Fail(""); - } - - /* signal thread 1 */ - if (0 == SetEvent(hToken[1])) - { - Trace("PALSUITE ERROR: Unable to execute SetEvent(%p) call.\n" - "GetLastError returned '%u'.\n", hToken[1], - GetLastError()); - Cleanup (&hToken[0], NUM_TOKENS); - Fail(""); - } - - dwMRet = WaitForSingleObject(hToken[2], 10000); - if (WAIT_OBJECT_0 != dwMRet) - { - Trace("PALSUITE ERROR: WaitForSingleObject(%p, %d) call " - "returned an unexpected value '%d'.\nGetLastError returned " - "%u.\n", hToken[2], 10000, dwMRet, GetLastError()); - Cleanup (&hToken[0], NUM_TOKENS); - Fail(""); - } - - if (!Cleanup(&hToken[0], NUM_TOKENS)) - { - Fail(""); - } - - PAL_Terminate(); - - return (PASS); -} - - - diff --git a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test8/test8.cpp b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test8/test8.cpp deleted file mode 100644 index 8081b69109a9..000000000000 --- a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test8/test8.cpp +++ /dev/null @@ -1,217 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*===================================================================== -** -** Source: CriticalSectionFunctions/test8/test8.c -** -** Pyrpose: Ensure critical section functionality is working by -** having multiple threads racing on a CS under different -** scenarios -** -** -**===================================================================*/ -#include -#include - -#define MAX_THREAD_COUNT 128 -#define DEFAULT_THREAD_COUNT 10 -#define DEFAULT_LOOP_COUNT 1000 - -#ifndef MIN -#define MIN(a,b) (((a)<(b)) ? (a) : (b)) -#endif - -int g_iThreadCount = DEFAULT_THREAD_COUNT; -int g_iLoopCount = DEFAULT_LOOP_COUNT; -volatile LONG g_lCriticalCount = 0; -HANDLE g_hEvStart = NULL; - -CRITICAL_SECTION g_cs; -DWORD PALAPI Thread_CriticalSectionFunctions_test8(LPVOID lpParam) -{ - int i, j, iLpCnt; - DWORD dwRet = 0; - DWORD dwTid = GetCurrentThreadId(); - LONG lRet; - BOOL bSleepInside; - BOOL bSleepOutside; - - Trace("[tid=%u] Thread starting\n", dwTid); - - dwRet = WaitForSingleObject(g_hEvStart, INFINITE); - if (WAIT_OBJECT_0 != dwRet) - { - Fail("WaitForSingleObject returned unexpected %u [GetLastError()=%u]\n", - dwRet, GetLastError()); - } - - for (j=0;j<8;j++) - { - bSleepInside = 2 & j; - bSleepOutside = 4 & j; - - iLpCnt = g_iLoopCount; - if (bSleepInside || bSleepOutside) - { - iLpCnt /= 10; - } - - for (i=0;i= iVal) - { - g_iThreadCount = iVal; - } - } - break; - default: - break; - } - } - } - - Trace ("Iterations:\t%d\n", g_iLoopCount); - Trace ("Threads:\t%d\n", g_iThreadCount); - - g_hEvStart = CreateEvent(NULL, TRUE, FALSE, NULL); - - if (g_hEvStart == NULL) - { - Fail("CreateEvent call failed. GetLastError " - "returned %u.\n", GetLastError()); - } - - InitializeCriticalSection(&g_cs); - - for (i=0;i iThreadCount) - { - Fail("Failed to create minimum number if threads, i.e. 2\n"); - } - - if (!SetEvent(g_hEvStart)) - { - Fail("SetEvent failed [GetLastError()=%u]\n", GetLastError()); - } - - for (i=0; i -const char *const SessionPrefix = "Local\\"; -const char *const GlobalPrefix = "Global\\"; - -const char *const NamePrefix = "paltest_namedmutex_test1_"; -const char *const TempNamePrefix = "paltest_namedmutex_test1_temp_"; -const char *const InvalidNamePrefix0 = "paltest\\namedmutex_"; -const char *const InvalidNamePrefix1 = "paltest/namedmutex_"; -const char *const ParentEventNamePrefix0 = "paltest_namedmutex_test1_pe0_"; -const char *const ParentEventNamePrefix1 = "paltest_namedmutex_test1_pe1_"; -const char *const ChildEventNamePrefix0 = "paltest_namedmutex_test1_ce0_"; -const char *const ChildEventNamePrefix1 = "paltest_namedmutex_test1_ce1_"; -const char *const ChildRunningEventNamePrefix = "paltest_namedmutex_test1_cr_"; - -const char *const GlobalShmFilePathPrefix = "/tmp/.dotnet/shm/global/"; +const char CurrentSessionOnlyPrefix[] = "Local\\"; +const char AllSessionsPrefix[] = "Global\\"; + +const char NamePrefix[] = "paltest_namedmutex_test1_"; +const char TempNamePrefix[] = "paltest_namedmutex_test1_temp_"; +const char HeaderMismatchTestsNamePrefix[] = "paltest_namedmutex_test1_headermismatchtests_"; +const char InvalidNamePrefix0[] = "paltest\\namedmutex_"; +const char InvalidNamePrefix1[] = "paltest/namedmutex_"; +const char ParentEventNamePrefix0[] = "paltest_namedmutex_test1_pe0_"; +const char ParentEventNamePrefix1[] = "paltest_namedmutex_test1_pe1_"; +const char ChildEventNamePrefix0[] = "paltest_namedmutex_test1_ce0_"; +const char ChildEventNamePrefix1[] = "paltest_namedmutex_test1_ce1_"; +const char ChildRunningEventNamePrefix[] = "paltest_namedmutex_test1_cr_"; #define MaxPathSize 200 const DWORD PollLoopSleepMilliseconds = 100; @@ -28,6 +27,8 @@ const DWORD FailTimeoutMilliseconds = 30000; DWORD g_expectedTimeoutMilliseconds = 500; bool g_isParent = true; +bool g_currentUserOnly = true; +bool g_currentSessionOnly = true; bool g_isStress = false; #define MaxProcessPathSize 4096 char g_processPath[MaxProcessPathSize], g_processCommandLinePath[MaxProcessPathSize]; @@ -41,13 +42,15 @@ extern int (*test_sscanf)(const char *str, const char *format, ...); extern int(*test_close)(int fd); extern int (*test_unlink)(const char *pathname); extern unsigned int test_getpid(); +extern unsigned int test_getsid(); +extern unsigned int test_geteuid(); extern int test_kill(unsigned int pid); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Test helpers extern bool TestFileExists(const char *path); -extern bool WriteHeaderInfo(const char *path, char sharedMemoryType, char version, int *fdRef); +extern bool WriteHeaderInfo(const char *path, bool currentUserOnly, char sharedMemoryType, char version, int *fdRef); #define TestAssert(expression) \ do \ @@ -56,56 +59,76 @@ extern bool WriteHeaderInfo(const char *path, char sharedMemoryType, char versio { \ if (!g_isParent) \ { \ - Trace("'paltest_namedmutex_test1' child process failed at line %u. Expression: " #expression "\n", __LINE__); \ + Trace( \ + "'paltest_namedmutex_test1' child process failed at line %u. CurrentUserOnly: %d, CurrentSessionOnly: %d. Expression: " #expression "\n", \ + __LINE__, \ + (int)g_currentUserOnly, \ + (int)g_currentSessionOnly); \ } \ else \ { \ - Trace("'paltest_namedmutex_test1' failed at line %u. Expression: " #expression "\n", __LINE__); \ + Trace( \ + "'paltest_namedmutex_test1' failed at line %u. CurrentUserOnly: %d, CurrentSessionOnly: %d. Expression: " #expression "\n", \ + __LINE__, \ + (int)g_currentUserOnly, \ + (int)g_currentSessionOnly); \ } \ fflush(stdout); \ return false; \ } \ } while(false) -char *BuildName(const char *testName, char *buffer, const char *prefix0, const char *prefix1 = nullptr) +char *BuildName(const char *testName, char *buffer, const char *namePrefix = nullptr) { size_t nameLength = 0; - const char *prefixes[] = {prefix0, prefix1}; - for (int i = 0; i < 2; ++i) + if (!g_currentSessionOnly) { - const char *prefix = prefixes[i]; - if (prefix == nullptr) - { - break; - } - test_strcpy(&buffer[nameLength], prefix); - nameLength += test_strlen(prefix); + test_strcpy(&buffer[nameLength], AllSessionsPrefix); + nameLength += STRING_LENGTH(AllSessionsPrefix); + } + + if (namePrefix != nullptr) + { + nameLength += test_snprintf(&buffer[nameLength], MaxPathSize - nameLength, "%s", namePrefix); } if (g_isStress) { // Append the test name so that tests can run in parallel - nameLength += test_snprintf(&buffer[nameLength], MaxPathSize - nameLength, "%s", testName); - buffer[nameLength++] = '_'; + nameLength += test_snprintf(&buffer[nameLength], MaxPathSize - nameLength, "%s_", testName); } nameLength += test_snprintf(&buffer[nameLength], MaxPathSize - nameLength, "%u", g_parentPid); return buffer; } -char *BuildGlobalShmFilePath(const char *testName, char *buffer, const char *namePrefix) +char *BuildShmFilePath(const char *testName, char *buffer, const char *namePrefix) { size_t pathLength = 0; - test_strcpy(&buffer[pathLength], GlobalShmFilePathPrefix); - pathLength += test_strlen(GlobalShmFilePathPrefix); - test_strcpy(&buffer[pathLength], namePrefix); - pathLength += test_strlen(namePrefix); + if (g_currentUserOnly) + { + pathLength += test_snprintf(&buffer[pathLength], MaxPathSize - pathLength, "/tmp/.dotnet-uid%u/shm/", test_geteuid()); + } + else + { + pathLength += test_snprintf(&buffer[pathLength], MaxPathSize - pathLength, "%s", "/tmp/.dotnet/shm/"); + } + + if (g_currentSessionOnly) + { + pathLength += test_snprintf(&buffer[pathLength], MaxPathSize - pathLength, "session%u/", test_getsid()); + } + else + { + pathLength += test_snprintf(&buffer[pathLength], MaxPathSize - pathLength, "%s", "global/"); + } + + pathLength += test_snprintf(&buffer[pathLength], MaxPathSize - pathLength, "%s", namePrefix); if (g_isStress) { // Append the test name so that tests can run in parallel - pathLength += test_snprintf(&buffer[pathLength], MaxPathSize - pathLength, "%s", testName); - buffer[pathLength++] = '_'; + pathLength += test_snprintf(&buffer[pathLength], MaxPathSize - pathLength, "%s_", testName); } pathLength += test_snprintf(&buffer[pathLength], MaxPathSize - pathLength, "%u", g_parentPid); @@ -175,37 +198,33 @@ void TestCreateMutex(AutoCloseMutexHandle &m, const char *name, bool initiallyOw { m.Close(); LPWSTR nameW = convert(name); - m = CreateMutex(nullptr, initiallyOwned, nameW); + m = PAL_CreateMutexW(initiallyOwned, nameW, g_currentUserOnly, nullptr, 0); free(nameW); } HANDLE TestOpenMutex(const char *name) { - return OpenMutexA(SYNCHRONIZE, false, name); + LPWSTR nameW = convert(name); + HANDLE h = PAL_OpenMutexW(nameW, g_currentUserOnly, nullptr, 0); + free(nameW); + return h; } bool StartProcess(const char *funcName) { - // Command line format: [stress] - - size_t processCommandLinePathLength = 0; - g_processCommandLinePath[processCommandLinePathLength++] = '\"'; - test_strcpy(&g_processCommandLinePath[processCommandLinePathLength], g_processPath); - processCommandLinePathLength += test_strlen(g_processPath); - g_processCommandLinePath[processCommandLinePathLength++] = '\"'; - g_processCommandLinePath[processCommandLinePathLength++] = ' '; - const char* testname = "threading/NamedMutex/test1/paltest_namedmutex_test1"; - processCommandLinePathLength += test_snprintf(&g_processCommandLinePath[processCommandLinePathLength], MaxProcessPathSize - processCommandLinePathLength, "%s ", testname); - processCommandLinePathLength += test_snprintf(&g_processCommandLinePath[processCommandLinePathLength], MaxProcessPathSize - processCommandLinePathLength, "%u", g_parentPid); - g_processCommandLinePath[processCommandLinePathLength++] = ' '; - test_strcpy(&g_processCommandLinePath[processCommandLinePathLength], funcName); - processCommandLinePathLength += test_strlen(funcName); - - if (g_isStress) - { - test_strcpy(&g_processCommandLinePath[processCommandLinePathLength], " stress"); - processCommandLinePathLength += STRING_LENGTH("stress"); - } + // Command line format: + // <0|1> /* currentUserOnly */ <0|1> /* currentSessionOnly */ [stress] + test_snprintf( + g_processCommandLinePath, + MaxProcessPathSize, + "\"%s\" %s %u %s %u %u%s", + g_processPath, + "threading/NamedMutex/test1/paltest_namedmutex_test1", + g_parentPid, + funcName, + g_currentUserOnly ? 1 : 0, + g_currentSessionOnly ? 1 : 0, + g_isStress ? " stress" : ""); STARTUPINFO si; memset(&si, 0, sizeof(si)); @@ -247,8 +266,8 @@ bool StartThread(LPTHREAD_START_ROUTINE func, void *arg = nullptr, HANDLE *threa bool WaitForMutexToBeCreated(const char *testName, AutoCloseMutexHandle &m, const char *eventNamePrefix) { char eventName[MaxPathSize]; - BuildName(testName, eventName, GlobalPrefix, eventNamePrefix); - DWORD startTime = GetTickCount(); + BuildName(testName, eventName, eventNamePrefix); + DWORD startTime = (DWORD)minipal_lowres_ticks(); while (true) { m = TestOpenMutex(eventName); @@ -256,7 +275,7 @@ bool WaitForMutexToBeCreated(const char *testName, AutoCloseMutexHandle &m, cons { return true; } - if (GetTickCount() - startTime >= FailTimeoutMilliseconds) + if ((DWORD)minipal_lowres_ticks() - startTime >= FailTimeoutMilliseconds) { return false; } @@ -276,7 +295,7 @@ bool WaitForMutexToBeCreated(const char *testName, AutoCloseMutexHandle &m, cons bool AcquireChildRunningEvent(const char *testName, AutoCloseMutexHandle &childRunningEvent) { char name[MaxPathSize]; - TestCreateMutex(childRunningEvent, BuildName(testName, name, GlobalPrefix, ChildRunningEventNamePrefix)); + TestCreateMutex(childRunningEvent, BuildName(testName, name, ChildRunningEventNamePrefix)); TestAssert(WaitForSingleObject(childRunningEvent, FailTimeoutMilliseconds) == WAIT_OBJECT_0); return true; } @@ -289,7 +308,7 @@ bool InitializeParent(const char *testName, AutoCloseMutexHandle parentEvents[2] { TestCreateMutex( parentEvents[i], - BuildName(testName, name, GlobalPrefix, i == 0 ? ParentEventNamePrefix0 : ParentEventNamePrefix1), + BuildName(testName, name, i == 0 ? ParentEventNamePrefix0 : ParentEventNamePrefix1), true); TestAssert(parentEvents[i] != nullptr); TestAssert(GetLastError() != ERROR_ALREADY_EXISTS); @@ -332,7 +351,7 @@ bool InitializeChild( { TestCreateMutex( childEvents[i], - BuildName(testName, name, GlobalPrefix, i == 0 ? ChildEventNamePrefix0 : ChildEventNamePrefix1), + BuildName(testName, name, i == 0 ? ChildEventNamePrefix0 : ChildEventNamePrefix1), true); TestAssert(childEvents[i] != nullptr); TestAssert(GetLastError() != ERROR_ALREADY_EXISTS); @@ -400,17 +419,24 @@ bool NameTests() TestAssert(m != nullptr); // Normal name - TestCreateMutex(m, BuildName(testName, name, NamePrefix)); + BuildName(testName, name, NamePrefix); + TestCreateMutex(m, name); TestAssert(m != nullptr); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, NamePrefix))) != nullptr); - TestCreateMutex(m, BuildName(testName, name, SessionPrefix, NamePrefix)); - TestAssert(m != nullptr); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, SessionPrefix, NamePrefix))) != nullptr); - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); - TestAssert(m != nullptr); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix))) != nullptr); + TestAssert(AutoCloseMutexHandle(TestOpenMutex(name)) != nullptr); + if (g_currentSessionOnly) + { + // When creating or opening a mutex scoped to the current session, the prefix ("Local\") is optional + char nameWithExplicitPrefix[MaxPathSize]; + test_strcpy(nameWithExplicitPrefix, CurrentSessionOnlyPrefix); + BuildName(testName, &nameWithExplicitPrefix[STRING_LENGTH(CurrentSessionOnlyPrefix)], NamePrefix); + TestAssert(AutoCloseMutexHandle(TestOpenMutex(nameWithExplicitPrefix)) != nullptr); + TestCreateMutex(m, nameWithExplicitPrefix); + TestAssert(m != nullptr); + TestAssert(AutoCloseMutexHandle(TestOpenMutex(name)) != nullptr); + } - // Name too long. The maximum allowed length depends on the file system, so we're not checking for that. + // Name too long. The maximum allowed path length depends on the file system, so we're not checking for that. + if(g_currentSessionOnly) { char name[257]; memset(name, 'a', STRING_LENGTH(name)); @@ -420,36 +446,49 @@ bool NameTests() TestAssert(GetLastError() == ERROR_FILENAME_EXCED_RANGE); TestAssert(AutoCloseMutexHandle(TestOpenMutex(name)) == nullptr); TestAssert(GetLastError() == ERROR_FILENAME_EXCED_RANGE); + + name[STRING_LENGTH(name) - 1] = '\0'; + TestCreateMutex(m, name); + TestAssert(m != nullptr); + } + else + { + char name[STRING_LENGTH(AllSessionsPrefix) + 257]; + test_strcpy(name, AllSessionsPrefix); + memset(&name[STRING_LENGTH(AllSessionsPrefix)], 'a', STRING_LENGTH(name) - STRING_LENGTH(AllSessionsPrefix)); + name[STRING_LENGTH(name)] = '\0'; + TestCreateMutex(m, name); + TestAssert(m == nullptr); + TestAssert(GetLastError() == ERROR_FILENAME_EXCED_RANGE); + TestAssert(AutoCloseMutexHandle(TestOpenMutex(name)) == nullptr); + TestAssert(GetLastError() == ERROR_FILENAME_EXCED_RANGE); + + name[STRING_LENGTH(name) - 1] = '\0'; + TestCreateMutex(m, name); + TestAssert(m != nullptr); } // Invalid characters in name - TestCreateMutex(m, BuildName(testName, name, InvalidNamePrefix0)); - TestAssert(m == nullptr); - TestAssert(GetLastError() == ERROR_INVALID_NAME); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, InvalidNamePrefix0))) == nullptr); - TestAssert(GetLastError() == ERROR_INVALID_NAME); - TestCreateMutex(m, BuildName(testName, name, InvalidNamePrefix1)); - TestAssert(m == nullptr); - TestAssert(GetLastError() == ERROR_INVALID_NAME); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, InvalidNamePrefix1))) == nullptr); - TestAssert(GetLastError() == ERROR_INVALID_NAME); - TestCreateMutex(m, BuildName(testName, name, SessionPrefix, InvalidNamePrefix0)); + BuildName(testName, name, InvalidNamePrefix0); + TestCreateMutex(m, name); TestAssert(m == nullptr); TestAssert(GetLastError() == ERROR_INVALID_NAME); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, SessionPrefix, InvalidNamePrefix0))) == nullptr); + TestAssert(AutoCloseMutexHandle(TestOpenMutex(name)) == nullptr); TestAssert(GetLastError() == ERROR_INVALID_NAME); - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, InvalidNamePrefix1)); + BuildName(testName, name, InvalidNamePrefix1); + TestCreateMutex(m, name); TestAssert(m == nullptr); TestAssert(GetLastError() == ERROR_INVALID_NAME); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, GlobalPrefix, InvalidNamePrefix1))) == nullptr); + TestAssert(AutoCloseMutexHandle(TestOpenMutex(name)) == nullptr); TestAssert(GetLastError() == ERROR_INVALID_NAME); // Creating a second reference to the same named mutex yields an error indicating that it was opened, not created { - TestCreateMutex(m, BuildName(testName, name, NamePrefix)); + BuildName(testName, name, NamePrefix); + TestCreateMutex(m, name); TestAssert(m != nullptr); AutoCloseMutexHandle m2; - TestCreateMutex(m2, BuildName(testName, name, NamePrefix)); + TestCreateMutex(m2, name); TestAssert(m2 != nullptr); TestAssert(GetLastError() == ERROR_ALREADY_EXISTS); } @@ -462,28 +501,32 @@ bool HeaderMismatchTests() const char *testName = "HeaderMismatchTests"; AutoCloseMutexHandle m, m2; - char name[MaxPathSize]; + char name[MaxPathSize], path[MaxPathSize]; int fd; // Create and hold onto a mutex during this test to create the shared memory directory - TestCreateMutex(m2, BuildName(testName, name, GlobalPrefix, TempNamePrefix)); + TestCreateMutex(m2, BuildName(testName, name, TempNamePrefix)); TestAssert(m2 != nullptr); + // Init name and path for the remaining tests + BuildName(testName, name, HeaderMismatchTestsNamePrefix); + BuildShmFilePath(testName, path, HeaderMismatchTestsNamePrefix); + // Unknown shared memory type - TestAssert(WriteHeaderInfo(BuildGlobalShmFilePath(testName, name, NamePrefix), -1, 1, &fd)); - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestAssert(WriteHeaderInfo(path, g_currentUserOnly, -1, 1, &fd)); + TestCreateMutex(m, name); TestAssert(m == nullptr); TestAssert(GetLastError() == ERROR_INVALID_HANDLE); TestAssert(test_close(fd) == 0); - TestAssert(test_unlink(BuildGlobalShmFilePath(testName, name, NamePrefix)) == 0); + TestAssert(test_unlink(path) == 0); // Mismatched version - TestAssert(WriteHeaderInfo(BuildGlobalShmFilePath(testName, name, NamePrefix), 0, -1, &fd)); - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestAssert(WriteHeaderInfo(path, g_currentUserOnly, 0, -1, &fd)); + TestCreateMutex(m, name); TestAssert(m == nullptr); TestAssert(GetLastError() == ERROR_INVALID_HANDLE); TestAssert(test_close(fd) == 0); - TestAssert(test_unlink(BuildGlobalShmFilePath(testName, name, NamePrefix)) == 0); + TestAssert(test_unlink(path) == 0); return true; } @@ -498,7 +541,7 @@ bool MutualExclusionTests_Parent() char name[MaxPathSize]; AutoCloseMutexHandle m; - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); // Recursive locking with various timeouts @@ -539,7 +582,7 @@ DWORD PALAPI MutualExclusionTests_Child(void *arg = nullptr) char name[MaxPathSize]; AutoCloseMutexHandle m; - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, 0) == WAIT_OBJECT_0); // lock the mutex YieldToParent(parentEvents, childEvents, ei); // parent attempts to lock/release, and fails @@ -611,23 +654,23 @@ bool LifetimeTests_Parent() char name[MaxPathSize]; AutoCloseMutexHandle m; - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); // create first reference to mutex + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); // create first reference to mutex TestAssert(m != nullptr); - TestAssert(TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child creates second reference to mutex using CreateMutex m.Close(); // close first reference - TestAssert(TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child closes second reference - TestAssert(!TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(!TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); // create first reference to mutex + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); // create first reference to mutex TestAssert(m != nullptr); - TestAssert(TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child creates second reference to mutex using OpenMutex m.Close(); // close first reference - TestAssert(TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child closes second reference - TestAssert(!TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(!TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); TestAssert(UninitializeParent(testName, parentEvents)); return true; @@ -646,13 +689,13 @@ DWORD PALAPI LifetimeTests_Child(void *arg = nullptr) AutoCloseMutexHandle m; // ... parent creates first reference to mutex - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); // create second reference to mutex using CreateMutex + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); // create second reference to mutex using CreateMutex TestAssert(m != nullptr); TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent closes first reference m.Close(); // close second reference TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent verifies, and creates first reference to mutex again - m = TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix)); // create second reference to mutex using OpenMutex + m = TestOpenMutex(BuildName(testName, name, NamePrefix)); // create second reference to mutex using OpenMutex TestAssert(m != nullptr); TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent closes first reference m.Close(); // close second reference @@ -673,11 +716,11 @@ bool LifetimeTests() char name[MaxPathSize]; // Shm file should be created and deleted - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); - TestAssert(TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); m.Close(); - TestAssert(!TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(!TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); } // Shm file should not be deleted until last reference is released @@ -702,7 +745,7 @@ bool AbandonTests_Parent() TestAssert(InitializeParent(testName, parentEvents, childEvents)); int ei = 0; - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child locks mutex TestAssert(parentEvents[0].Release()); @@ -742,17 +785,17 @@ bool AbandonTests_Parent() // Since the child abandons the mutex, and a child process may not release the file lock on the shared memory file before // indicating completion to the parent, make sure to delete the shared memory file by repeatedly opening/closing the mutex // until the parent process becomes the last process to reference the mutex and closing it deletes the file. - DWORD startTime = GetTickCount(); + DWORD startTime = (DWORD)minipal_lowres_ticks(); while (true) { m.Close(); - if (!TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))) + if (!TestFileExists(BuildShmFilePath(testName, name, NamePrefix))) { break; } - TestAssert(GetTickCount() - startTime < FailTimeoutMilliseconds); - m = TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestAssert((DWORD)minipal_lowres_ticks() - startTime < FailTimeoutMilliseconds); + m = TestOpenMutex(BuildName(testName, name, NamePrefix)); } return true; @@ -771,7 +814,7 @@ DWORD PALAPI AbandonTests_Child_GracefulExit_Close(void *arg = nullptr) AutoCloseMutexHandle m; // ... parent waits for child to lock mutex - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, 0) == WAIT_OBJECT_0); TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent waits on mutex @@ -800,7 +843,7 @@ DWORD AbandonTests_Child_GracefulExit_NoClose(void *arg = nullptr) AutoCloseMutexHandle m; // ... parent waits for child to lock mutex - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, 0) == WAIT_OBJECT_0); TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent waits on mutex @@ -829,7 +872,7 @@ DWORD AbandonTests_Child_AbruptExit(void *arg = nullptr) AutoCloseMutexHandle m; // ... parent waits for child to lock mutex - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, 0) == WAIT_OBJECT_0); TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent waits on mutex @@ -859,7 +902,7 @@ DWORD AbandonTests_Child_FileLocksNotInherited_Parent_AbruptExit(void *arg = nul AutoCloseMutexHandle m; // ... root parent waits for child to lock mutex - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, 0) == WAIT_OBJECT_0); @@ -893,7 +936,7 @@ DWORD AbandonTests_Child_FileLocksNotInherited_Child_AbruptExit(void *arg = null AutoCloseMutexHandle m; // ... immediate parent expects child to wait on mutex - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, FailTimeoutMilliseconds) == WAIT_ABANDONED_0); // attempt to lock and see abandoned mutex TestAssert(YieldToParent(parentEvents, childEvents, ei)); // root parent waits on mutex @@ -918,7 +961,7 @@ DWORD PALAPI AbandonTests_Child_TryLock(void *arg) AutoCloseMutexHandle m; // ... parent waits for child to lock mutex - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, 0) == WAIT_TIMEOUT); // try to lock the mutex while the parent holds the lock TestAssert(WaitForSingleObject(m, g_expectedTimeoutMilliseconds) == WAIT_TIMEOUT); @@ -960,7 +1003,7 @@ bool LockAndCloseWithoutThreadExitTests_Parent_CloseOnSameThread() char name[MaxPathSize]; AutoCloseMutexHandle m; - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child locks mutex and closes second reference to mutex on lock-owner thread @@ -971,9 +1014,9 @@ bool LockAndCloseWithoutThreadExitTests_Parent_CloseOnSameThread() TestAssert(m.Release()); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child exits - TestAssert(TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); m.Close(); - TestAssert(!TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(!TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); TestAssert(UninitializeParent(testName, parentEvents)); return true; @@ -991,10 +1034,10 @@ DWORD PALAPI LockAndCloseWithoutThreadExitTests_Child_CloseOnSameThread(void *ar char name[MaxPathSize]; // ... parent waits for child to lock and close second reference to mutex - AutoCloseMutexHandle m(TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix))); + AutoCloseMutexHandle m(TestOpenMutex(BuildName(testName, name, NamePrefix))); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, 0) == WAIT_OBJECT_0); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix))) != nullptr); + TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, NamePrefix))) != nullptr); TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent waits for child to close last reference to mutex m.Close(); // close mutex on lock-owner thread without releasing lock @@ -1016,7 +1059,7 @@ bool LockAndCloseWithoutThreadExitTests_Parent_CloseOnDifferentThread() char name[MaxPathSize]; AutoCloseMutexHandle m; - TestCreateMutex(m, BuildName(testName, name, GlobalPrefix, NamePrefix)); + TestCreateMutex(m, BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child locks mutex and closes second reference to mutex on lock-owner thread @@ -1025,7 +1068,7 @@ bool LockAndCloseWithoutThreadExitTests_Parent_CloseOnDifferentThread() TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child closes last reference to mutex on non-lock-owner thread TestAssert(WaitForSingleObject(m, 0) == WAIT_TIMEOUT); // attempt to lock and fail m.Close(); - m = TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix)); + m = TestOpenMutex(BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); // child has implicit reference to mutex TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child closes new reference to mutex on lock-owner thread @@ -1033,9 +1076,9 @@ bool LockAndCloseWithoutThreadExitTests_Parent_CloseOnDifferentThread() TestAssert(m.Release()); TestAssert(YieldToChild(parentEvents, childEvents, ei)); // child exits - TestAssert(TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); m.Close(); - TestAssert(!TestFileExists(BuildGlobalShmFilePath(testName, name, NamePrefix))); + TestAssert(!TestFileExists(BuildShmFilePath(testName, name, NamePrefix))); TestAssert(UninitializeParent(testName, parentEvents)); return true; @@ -1053,10 +1096,10 @@ DWORD PALAPI LockAndCloseWithoutThreadExitTests_Child_CloseOnDifferentThread(voi char name[MaxPathSize]; // ... parent waits for child to lock and close second reference to mutex - AutoCloseMutexHandle m(TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix))); + AutoCloseMutexHandle m(TestOpenMutex(BuildName(testName, name, NamePrefix))); TestAssert(m != nullptr); TestAssert(WaitForSingleObject(m, 0) == WAIT_OBJECT_0); - TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix))) != nullptr); + TestAssert(AutoCloseMutexHandle(TestOpenMutex(BuildName(testName, name, NamePrefix))) != nullptr); TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent waits for child to close last reference to mutex // Close the mutex on a thread that is not the lock-owner thread, without releasing the lock @@ -1068,7 +1111,7 @@ DWORD PALAPI LockAndCloseWithoutThreadExitTests_Child_CloseOnDifferentThread(voi m.Abandon(); // mutex is already closed, don't close it again TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent verifies while this lock-owner thread is still active - m = TestOpenMutex(BuildName(testName, name, GlobalPrefix, NamePrefix)); + m = TestOpenMutex(BuildName(testName, name, NamePrefix)); TestAssert(m != nullptr); m.Close(); // close mutex on lock-owner thread without releasing lock TestAssert(YieldToParent(parentEvents, childEvents, ei)); // parent verifies while this thread is still active @@ -1110,14 +1153,24 @@ bool (*const TestList[])() = bool RunTests() { + const bool Bools[] = {false, true}; bool allPassed = true; - for (SIZE_T i = 0; i < ARRAY_SIZE(TestList); ++i) + for (int i = 0; i < ARRAY_SIZE(TestList); i++) { - if (!TestList[i]()) + for (int j = 0; j < ARRAY_SIZE(Bools); j++) { - allPassed = false; + g_currentUserOnly = Bools[j]; + for (int k = 0; k < ARRAY_SIZE(Bools); k++) + { + g_currentSessionOnly = Bools[k]; + if (!TestList[i]()) + { + allPassed = false; + } + } } } + return allPassed; } @@ -1129,7 +1182,7 @@ DWORD PALAPI StressTest(void *arg) { // Run the specified test continuously for the stress duration SIZE_T testIndex = reinterpret_cast(arg); - DWORD startTime = GetTickCount(); + DWORD startTime = (DWORD)minipal_lowres_ticks(); do { ++g_stressTestCounts[testIndex]; @@ -1140,7 +1193,7 @@ DWORD PALAPI StressTest(void *arg) } } while ( InterlockedCompareExchange(&g_stressResult, false, false) == true && - GetTickCount() - startTime < g_stressDurationMilliseconds); + (DWORD)minipal_lowres_ticks() - startTime < g_stressDurationMilliseconds); return 0; } @@ -1189,7 +1242,7 @@ bool StressTests(DWORD durationMinutes) PALTEST(threading_NamedMutex_test1_paltest_namedmutex_test1, "threading/NamedMutex/test1/paltest_namedmutex_test1") { - if (argc < 1 || argc > 4) + if (argc < 1 || argc > 6) { return FAIL; } @@ -1228,10 +1281,17 @@ PALTEST(threading_NamedMutex_test1_paltest_namedmutex_test1, "threading/NamedMut return result; } - // Child test process arguments: [stress] + // Child test process arguments: + // <0|1> /* currentUserOnly */ <0|1> /* currentSessionOnly */ [stress] g_isParent = false; + if (argc < 5) + { + ExitProcess(FAIL); + return FAIL; + } + // Get parent process' ID from argument if (test_sscanf(argv[1], "%u", &g_parentPid) != 1) { @@ -1239,7 +1299,19 @@ PALTEST(threading_NamedMutex_test1_paltest_namedmutex_test1, "threading/NamedMut return FAIL; } - if (argc >= 4 && test_strcmp(argv[3], "stress") == 0) + // Get the current-user-only and current-session-only args + if ((argv[3][0] != '0' && argv[3][0] != '1') || + argv[3][1] != '\0' || + (argv[4][0] != '0' && argv[4][0] != '1') || + argv[4][1] != '\0') + { + ExitProcess(FAIL); + return FAIL; + } + g_currentUserOnly = argv[3][0] != '0'; + g_currentSessionOnly = argv[4][0] != '0'; + + if (argc >= 6 && test_strcmp(argv[5], "stress") == 0) { g_isStress = true; } diff --git a/src/coreclr/pal/tests/palsuite/threading/NamedMutex/test1/nopal.cpp b/src/coreclr/pal/tests/palsuite/threading/NamedMutex/test1/nopal.cpp index 77665fe69c40..435f53108b93 100644 --- a/src/coreclr/pal/tests/palsuite/threading/NamedMutex/test1/nopal.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/NamedMutex/test1/nopal.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -27,6 +28,16 @@ unsigned int test_getpid() return getpid(); } +unsigned int test_getsid() +{ + return getsid(0); +} + +unsigned int test_geteuid() +{ + return geteuid(); +} + int test_kill(unsigned int pid) { return kill(pid, SIGKILL); @@ -41,11 +52,24 @@ bool TestFileExists(const char *path) return true; } -bool WriteHeaderInfo(const char *path, char sharedMemoryType, char version, int *fdRef) +bool WriteHeaderInfo(const char *path, bool currentUserOnly, char sharedMemoryType, char version, int *fdRef) { int fd = open(path, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (fd == -1) return false; + + if (currentUserOnly) + { + int chmodResult; + do + { + chmodResult = chmod(path, S_IRUSR | S_IWUSR); + } while (chmodResult != 0 && errno == EINTR); + + if (chmodResult != 0) + return false; + } + *fdRef = fd; if (ftruncate(fd, getpagesize()) != 0) return false; diff --git a/src/coreclr/pal/tests/palsuite/threading/OpenProcess/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/threading/OpenProcess/test1/test1.cpp index 38cd61014a01..9bcdcb129eab 100644 --- a/src/coreclr/pal/tests/palsuite/threading/OpenProcess/test1/test1.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/OpenProcess/test1/test1.cpp @@ -44,7 +44,7 @@ PALTEST(threading_OpenProcess_test1_paltest_openprocess_test1, "threading/OpenPr HANDLE hChildProcess; char rgchDirName[_MAX_DIR]; - char absPathBuf[_MAX_PATH]; + char absPathBuf[MAX_PATH]; char* rgchAbsPathName; BOOL ret = FAIL; diff --git a/src/coreclr/pal/tests/palsuite/threading/QueryThreadCycleTime/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/threading/QueryThreadCycleTime/test1/test1.cpp index 2244d54e65e4..7f4f37e3e1c8 100644 --- a/src/coreclr/pal/tests/palsuite/threading/QueryThreadCycleTime/test1/test1.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/QueryThreadCycleTime/test1/test1.cpp @@ -56,7 +56,7 @@ PALTEST(threading_QueryThreadCycleTime_test1_paltest_querythreadcycletime_test1, LONG64 x; /* Init is in milliseconds, so we will convert later */ - Init = (LONG64)GetTickCount(); + Init = minipal_lowres_ticks(); x = Init + 3; volatile int counter; do { @@ -65,8 +65,8 @@ PALTEST(threading_QueryThreadCycleTime_test1_paltest_querythreadcycletime_test1, // spin to consume CPU time } - } while (x > GetTickCount()); - Expected += (GetTickCount() - Init) * MSEC_TO_NSEC; + } while (x > minipal_lowres_ticks()); + Expected += (minipal_lowres_ticks() - Init) * MSEC_TO_NSEC; /* Get a second count */ if (!QueryThreadCycleTime(cThread, (PULONG64)&SecondCount)) { diff --git a/src/coreclr/pal/tests/palsuite/threading/Sleep/test1/Sleep.cpp b/src/coreclr/pal/tests/palsuite/threading/Sleep/test1/Sleep.cpp index 3cc3d9244828..e48d73c3d59c 100644 --- a/src/coreclr/pal/tests/palsuite/threading/Sleep/test1/Sleep.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/Sleep/test1/Sleep.cpp @@ -5,13 +5,13 @@ ** ** Source: Sleep.c ** -** Purpose: Test to establish whether the Sleep function stops the thread from +** Purpose: Test to establish whether the Sleep function stops the thread from ** executing for the specified times. ** ** Dependencies: GetSystemTime -** Fail +** Fail ** Trace -** +** ** **=========================================================*/ @@ -32,8 +32,8 @@ PALTEST(threading_Sleep_test1_paltest_sleep_test1, "threading/Sleep/test1/paltes /* Milliseconds of error which are acceptable Function execution time, etc. */ DWORD AcceptableTimeError = 150; - UINT64 OldTimeStamp; - UINT64 NewTimeStamp; + int64_t OldTimeStamp; + int64_t NewTimeStamp; DWORD MaxDelta; DWORD TimeDelta; DWORD i; @@ -43,19 +43,13 @@ PALTEST(threading_Sleep_test1_paltest_sleep_test1, "threading/Sleep/test1/paltes return ( FAIL ); } - LARGE_INTEGER performanceFrequency; - if (!QueryPerformanceFrequency(&performanceFrequency)) - { - return FAIL; - } - for( i = 0; i < sizeof(SleepTimes) / sizeof(DWORD); i++) { - OldTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + OldTimeStamp = minipal_hires_ticks(); Sleep(SleepTimes[i]); - NewTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + NewTimeStamp = minipal_hires_ticks(); - TimeDelta = NewTimeStamp - OldTimeStamp; + TimeDelta = (NewTimeStamp - OldTimeStamp) / (minipal_hires_tick_frequency() / 1000); /* For longer intervals use a 10 percent tolerance */ if ((SleepTimes[i] * 0.1) > AcceptableTimeError) diff --git a/src/coreclr/pal/tests/palsuite/threading/Sleep/test2/sleep.cpp b/src/coreclr/pal/tests/palsuite/threading/Sleep/test2/sleep.cpp index a4e1b465af2c..f6a06e1b5273 100644 --- a/src/coreclr/pal/tests/palsuite/threading/Sleep/test2/sleep.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/Sleep/test2/sleep.cpp @@ -5,11 +5,9 @@ ** ** Source: Sleep.c ** -** Purpose: Test to establish whether the Sleep function stops the thread from +** Purpose: Test to establish whether the Sleep function stops the thread from ** executing for the specified times. ** -** Dependencies: GetTickCount -** ** **=========================================================*/ @@ -18,7 +16,7 @@ PALTEST(threading_Sleep_test2_paltest_sleep_test2, "threading/Sleep/test2/paltest_sleep_test2") { - /* + /* * times in 10^(-3) seconds */ @@ -33,8 +31,8 @@ PALTEST(threading_Sleep_test2_paltest_sleep_test2, "threading/Sleep/test2/paltes /* Milliseconds of error which are acceptable Function execution time, etc. */ DWORD AcceptableTimeError = 150; - UINT64 OldTimeStamp; - UINT64 NewTimeStamp; + int64_t OldTimeStamp; + int64_t NewTimeStamp; DWORD MaxDelta; DWORD TimeDelta; DWORD i; @@ -44,19 +42,13 @@ PALTEST(threading_Sleep_test2_paltest_sleep_test2, "threading/Sleep/test2/paltes return ( FAIL ); } - LARGE_INTEGER performanceFrequency; - if (!QueryPerformanceFrequency(&performanceFrequency)) - { - return FAIL; - } - for( i = 0; i < sizeof(SleepTimes) / sizeof(DWORD); i++) { - OldTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + OldTimeStamp = minipal_hires_ticks(); Sleep(SleepTimes[i]); - NewTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + NewTimeStamp = minipal_hires_ticks(); - TimeDelta = NewTimeStamp - OldTimeStamp; + TimeDelta = (NewTimeStamp - OldTimeStamp) / (minipal_hires_tick_frequency() / 1000); MaxDelta = SleepTimes[i] + AcceptableTimeError; diff --git a/src/coreclr/pal/tests/palsuite/threading/SleepEx/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/threading/SleepEx/test1/test1.cpp index 61bdf136b8f8..eeeceecbddf3 100644 --- a/src/coreclr/pal/tests/palsuite/threading/SleepEx/test1/test1.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/SleepEx/test1/test1.cpp @@ -39,8 +39,8 @@ PALTEST(threading_SleepEx_test1_paltest_sleepex_test1, "threading/SleepEx/test1/ {2000, TRUE}, }; - UINT64 OldTimeStamp; - UINT64 NewTimeStamp; + int64_t OldTimeStamp; + int64_t NewTimeStamp; DWORD MaxDelta; DWORD TimeDelta; DWORD i; @@ -50,21 +50,15 @@ PALTEST(threading_SleepEx_test1_paltest_sleepex_test1, "threading/SleepEx/test1/ return FAIL; } - LARGE_INTEGER performanceFrequency; - if (!QueryPerformanceFrequency(&performanceFrequency)) - { - return FAIL; - } - for (i = 0; i AcceptableTimeError) diff --git a/src/coreclr/pal/tests/palsuite/threading/SleepEx/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/threading/SleepEx/test2/test2.cpp index b139e2d85bc7..15e11174b65a 100644 --- a/src/coreclr/pal/tests/palsuite/threading/SleepEx/test2/test2.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/SleepEx/test2/test2.cpp @@ -14,12 +14,12 @@ #include const int ChildThreadSleepTime = 2000; -const int InterruptTime = 1000; +const int InterruptTime = 1000; /* We need to keep in mind that BSD has a timer resolution of 10ms, so - we need to adjust our delta to keep that in mind. Besides we need some - tolerance to account for different scheduling strategies, heavy load + we need to adjust our delta to keep that in mind. Besides we need some + tolerance to account for different scheduling strategies, heavy load scenarios, etc. - + Real-world data also tells us we can expect a big difference between values when run on real iron vs run in a hypervisor. @@ -50,15 +50,15 @@ PALTEST(threading_SleepEx_test2_paltest_sleepex_test2, "threading/SleepEx/test2/ } /* - On some platforms (e.g. FreeBSD 4.9) the first call to some synch objects - (such as conditions) involves some pthread internal initialization that + On some platforms (e.g. FreeBSD 4.9) the first call to some synch objects + (such as conditions) involves some pthread internal initialization that can make the first wait slighty longer, potentially going above the acceptable delta for this test. Let's add a dummy wait to preinitialize internal structures */ Sleep(100); - - /* + + /* * Check that Queueing an APC in the middle of a sleep does interrupt * it, if it's in an alertable state. */ @@ -73,12 +73,12 @@ PALTEST(threading_SleepEx_test2_paltest_sleepex_test2, "threading/SleepEx/test2/ if (dwAvgDelta > AcceptableDelta) { Fail("Expected thread to sleep for %d ms (and get interrupted).\n" - "Average delta: %u ms, acceptable delta: %u\n", + "Average delta: %u ms, acceptable delta: %u\n", InterruptTime, dwAvgDelta, AcceptableDelta); } - /* - * Check that Queueing an APC in the middle of a sleep does NOT interrupt + /* + * Check that Queueing an APC in the middle of a sleep does NOT interrupt * it, if it is not in an alertable state. */ dwAvgDelta = 0; @@ -92,7 +92,7 @@ PALTEST(threading_SleepEx_test2_paltest_sleepex_test2, "threading/SleepEx/test2/ if (dwAvgDelta > AcceptableDelta) { Fail("Expected thread to sleep for %d ms (and not be interrupted).\n" - "Average delta: %u ms, acceptable delta: %u\n", + "Average delta: %u ms, acceptable delta: %u\n", ChildThreadSleepTime, dwAvgDelta, AcceptableDelta); } @@ -108,7 +108,7 @@ void RunTest_SleepEx_test2(BOOL AlertThread) s_preWaitTimestampRecorded = false; hThread = CreateThread( NULL, - 0, + 0, (LPTHREAD_START_ROUTINE)SleeperProc_SleepEx_test2, (LPVOID) AlertThread, 0, @@ -141,7 +141,7 @@ void RunTest_SleepEx_test2(BOOL AlertThread) ret = WaitForSingleObject(hThread, INFINITE); if (ret == WAIT_FAILED) { - Fail("Unable to wait on child thread!\nGetLastError returned %d.", + Fail("Unable to wait on child thread!\nGetLastError returned %d.", GetLastError()); } } @@ -155,25 +155,19 @@ VOID PALAPI APCFunc_SleepEx_test2(ULONG_PTR dwParam) /* Entry Point for child thread. */ DWORD PALAPI SleeperProc_SleepEx_test2(LPVOID lpParameter) { - UINT64 OldTimeStamp; - UINT64 NewTimeStamp; + int64_t OldTimeStamp; + int64_t NewTimeStamp; BOOL Alertable; DWORD ret; Alertable = (BOOL)(SIZE_T) lpParameter; - LARGE_INTEGER performanceFrequency; - if (!QueryPerformanceFrequency(&performanceFrequency)) - { - return FAIL; - } - - OldTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + OldTimeStamp = minipal_hires_ticks(); s_preWaitTimestampRecorded = true; ret = SleepEx(ChildThreadSleepTime, Alertable); - - NewTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + + NewTimeStamp = minipal_hires_ticks(); if (Alertable && ret != WAIT_IO_COMPLETION) { @@ -186,7 +180,7 @@ DWORD PALAPI SleeperProc_SleepEx_test2(LPVOID lpParameter) } - ThreadSleepDelta = NewTimeStamp - OldTimeStamp; + ThreadSleepDelta = (NewTimeStamp - OldTimeStamp) / (minipal_hires_tick_frequency() / 1000); return 0; } diff --git a/src/coreclr/pal/tests/palsuite/threading/WaitForMultipleObjectsEx/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/threading/WaitForMultipleObjectsEx/test2/test2.cpp index 96124434e768..776cd621d423 100644 --- a/src/coreclr/pal/tests/palsuite/threading/WaitForMultipleObjectsEx/test2/test2.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/WaitForMultipleObjectsEx/test2/test2.cpp @@ -55,7 +55,7 @@ PALTEST(threading_WaitForMultipleObjectsEx_test2_paltest_waitformultipleobjectse RunTest_WFMO_test2(TRUE); // Make sure that the wait returns in time greater than interrupt and less than // wait timeout - if ( + if ( ((ThreadWaitDelta_WFMO_test2 >= ChildThreadWaitTime) && (ThreadWaitDelta_WFMO_test2 - ChildThreadWaitTime) > TOLERANCE) || (( ThreadWaitDelta_WFMO_test2 < InterruptTime) && (InterruptTime - ThreadWaitDelta_WFMO_test2) > TOLERANCE) ) @@ -140,8 +140,8 @@ VOID PALAPI APCFunc_WFMO_test2(ULONG_PTR dwParam) DWORD PALAPI WaiterProc_WFMO_test2(LPVOID lpParameter) { HANDLE Semaphore; - UINT64 OldTimeStamp; - UINT64 NewTimeStamp; + int64_t OldTimeStamp; + int64_t NewTimeStamp; BOOL Alertable; DWORD ret; @@ -156,19 +156,13 @@ DWORD PALAPI WaiterProc_WFMO_test2(LPVOID lpParameter) Alertable = (BOOL)(SIZE_T) lpParameter; - LARGE_INTEGER performanceFrequency; - if (!QueryPerformanceFrequency(&performanceFrequency)) - { - Fail("Failed to query performance frequency!"); - } - - OldTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + OldTimeStamp = minipal_hires_ticks(); s_preWaitTimestampRecorded = true; ret = WaitForMultipleObjectsEx(1, &Semaphore, FALSE, ChildThreadWaitTime, Alertable); - NewTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + NewTimeStamp = minipal_hires_ticks(); if (Alertable && ret != WAIT_IO_COMPLETION) @@ -182,7 +176,7 @@ DWORD PALAPI WaiterProc_WFMO_test2(LPVOID lpParameter) "Expected return of WAIT_TIMEOUT, got %d.\n", ret); } - ThreadWaitDelta_WFMO_test2 = NewTimeStamp - OldTimeStamp; + ThreadWaitDelta_WFMO_test2 = (NewTimeStamp - OldTimeStamp) / (minipal_hires_tick_frequency() / 1000); ret = CloseHandle(Semaphore); if (!ret) diff --git a/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExMutexTest/WFSOExMutexTest.cpp b/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExMutexTest/WFSOExMutexTest.cpp index c98659db838e..66ae05bf230f 100644 --- a/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExMutexTest/WFSOExMutexTest.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExMutexTest/WFSOExMutexTest.cpp @@ -5,7 +5,7 @@ ** ** Source: WFSOExMutex.c ** -** Purpose: Tests a child thread in the middle of a +** Purpose: Tests a child thread in the middle of a ** WaitForSingleObjectEx call will be interrupted by QueueUserAPC ** if the alert flag was set. ** @@ -31,31 +31,31 @@ static volatile bool s_preWaitTimestampRecorded = false; PALTEST(threading_WaitForSingleObject_WFSOExMutexTest_paltest_waitforsingleobject_wfsoexmutextest, "threading/WaitForSingleObject/WFSOExMutexTest/paltest_waitforsingleobject_wfsoexmutextest") { int ret=0; - + if (0 != (PAL_Initialize(argc, argv))) { return FAIL; } /* - On some platforms (e.g. FreeBSD 4.9) the first call to some synch objects - (such as conditions) involves some pthread internal initialization that + On some platforms (e.g. FreeBSD 4.9) the first call to some synch objects + (such as conditions) involves some pthread internal initialization that can make the first wait slighty longer, potentially going above the acceptable delta for this test. Let's add a dummy wait to preinitialize internal structures */ Sleep(100); - + /* - The state of a mutex object is signaled when it is not owned by any thread. - The creating thread can use the bInitialOwner flag to request immediate ownership - of the mutex. Otherwise, a thread must use one of the wait functions to request - ownership. When the mutex's state is signaled, one waiting thread is granted - ownership, the mutex's state changes to nonsignaled, and the wait function returns. - Only one thread can own a mutex at any given time. The owning thread uses the + The state of a mutex object is signaled when it is not owned by any thread. + The creating thread can use the bInitialOwner flag to request immediate ownership + of the mutex. Otherwise, a thread must use one of the wait functions to request + ownership. When the mutex's state is signaled, one waiting thread is granted + ownership, the mutex's state changes to nonsignaled, and the wait function returns. + Only one thread can own a mutex at any given time. The owning thread uses the ReleaseMutex function to release its ownership. */ - + /* Create a mutex that is not in the signalled state */ hMutex_WFSOExMutexTest = CreateMutex(NULL, //No security attributes TRUE, //Iniitally owned @@ -66,7 +66,7 @@ PALTEST(threading_WaitForSingleObject_WFSOExMutexTest_paltest_waitforsingleobjec Fail("Failed to create mutex! GetLastError returned %d.\n", GetLastError()); } - /* + /* * Check that Queueing an APC in the middle of a wait does interrupt * it, if it's in an alertable state. */ @@ -75,25 +75,25 @@ PALTEST(threading_WaitForSingleObject_WFSOExMutexTest_paltest_waitforsingleobjec if ((ThreadWaitDelta_WFSOExMutexTest - InterruptTime) > AcceptableDelta) { Fail("Expected thread to wait for %d ms (and get interrupted).\n" - "Thread waited for %d ms! (Acceptable delta: %d)\n", + "Thread waited for %d ms! (Acceptable delta: %d)\n", InterruptTime, ThreadWaitDelta_WFSOExMutexTest, AcceptableDelta); } - /* - * Check that Queueing an APC in the middle of a wait does NOT interrupt + /* + * Check that Queueing an APC in the middle of a wait does NOT interrupt * it, if it is not in an alertable state. */ RunTest_WFSOExMutexTest(FALSE); if ((ThreadWaitDelta_WFSOExMutexTest - ChildThreadWaitTime) > AcceptableDelta) { Fail("Expected thread to wait for %d ms (and not be interrupted).\n" - "Thread waited for %d ms! (Acceptable delta: %d)\n", + "Thread waited for %d ms! (Acceptable delta: %d)\n", ChildThreadWaitTime, ThreadWaitDelta_WFSOExMutexTest, AcceptableDelta); } - + //Release Mutex ret = ReleaseMutex(hMutex_WFSOExMutexTest); if (0==ret) @@ -109,14 +109,14 @@ PALTEST(threading_WaitForSingleObject_WFSOExMutexTest_paltest_waitforsingleobjec Fail("Unable to close handle to Mutex!\n" "GetLastError returned %d\n", GetLastError()); } - + PAL_Terminate(); return PASS; } void RunTest_WFSOExMutexTest(BOOL AlertThread) { - + HANDLE hThread = 0; DWORD dwThreadId = 0; @@ -124,7 +124,7 @@ void RunTest_WFSOExMutexTest(BOOL AlertThread) s_preWaitTimestampRecorded = false; hThread = CreateThread( NULL, - 0, + 0, (LPTHREAD_START_ROUTINE)WaiterProc_WFSOExMutexTest, (LPVOID) AlertThread, 0, @@ -146,58 +146,52 @@ void RunTest_WFSOExMutexTest(BOOL AlertThread) Sleep(InterruptTime); ret = QueueUserAPC(APCFunc_WFSOExMutexTest, hThread, 0); - + if (ret == 0) { - Fail("QueueUserAPC failed! GetLastError returned %d\n", + Fail("QueueUserAPC failed! GetLastError returned %d\n", GetLastError()); } - + ret = WaitForSingleObject(hThread, INFINITE); - + if (ret == WAIT_FAILED) { - Fail("Unable to wait on child thread!\nGetLastError returned %d.\n", + Fail("Unable to wait on child thread!\nGetLastError returned %d.\n", GetLastError()); } - + if (0==CloseHandle(hThread)) { - Trace("Could not close Thread handle\n"); - Fail ( "GetLastError returned %d\n", GetLastError()); - } + Trace("Could not close Thread handle\n"); + Fail ( "GetLastError returned %d\n", GetLastError()); + } } /* Function doesn't do anything, just needed to interrupt the wait*/ VOID PALAPI APCFunc_WFSOExMutexTest(ULONG_PTR dwParam) -{ +{ } /* Entry Point for child thread. */ DWORD PALAPI WaiterProc_WFSOExMutexTest(LPVOID lpParameter) { - UINT64 OldTimeStamp; - UINT64 NewTimeStamp; + int64_t OldTimeStamp; + int64_t NewTimeStamp; BOOL Alertable; DWORD ret; Alertable = (BOOL)(SIZE_T) lpParameter; - LARGE_INTEGER performanceFrequency; - if (!QueryPerformanceFrequency(&performanceFrequency)) - { - Fail("Failed to query performance frequency!"); - } - - OldTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + OldTimeStamp = minipal_hires_ticks(); s_preWaitTimestampRecorded = true; - ret = WaitForSingleObjectEx( hMutex_WFSOExMutexTest, - ChildThreadWaitTime, + ret = WaitForSingleObjectEx( hMutex_WFSOExMutexTest, + ChildThreadWaitTime, Alertable); - - NewTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + + NewTimeStamp = minipal_hires_ticks(); if (Alertable && ret != WAIT_IO_COMPLETION) { @@ -210,8 +204,8 @@ DWORD PALAPI WaiterProc_WFSOExMutexTest(LPVOID lpParameter) "Expected return of WAIT_TIMEOUT, got %d.\n", ret); } - ThreadWaitDelta_WFSOExMutexTest = NewTimeStamp - OldTimeStamp; - + ThreadWaitDelta_WFSOExMutexTest = (NewTimeStamp - OldTimeStamp) / (minipal_hires_tick_frequency() / 1000); + return 0; } diff --git a/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExSemaphoreTest/WFSOExSemaphoreTest.cpp b/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExSemaphoreTest/WFSOExSemaphoreTest.cpp index 3859e4abd071..b67776597fd9 100644 --- a/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExSemaphoreTest/WFSOExSemaphoreTest.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExSemaphoreTest/WFSOExSemaphoreTest.cpp @@ -133,17 +133,11 @@ VOID PALAPI APCFunc_WFSOExSemaphoreTest(ULONG_PTR dwParam) DWORD PALAPI WaiterProc_WFSOExSemaphoreTest(LPVOID lpParameter) { HANDLE hSemaphore; - UINT64 OldTimeStamp; - UINT64 NewTimeStamp; + int64_t OldTimeStamp; + int64_t NewTimeStamp; BOOL Alertable; DWORD ret; - LARGE_INTEGER performanceFrequency; - if (!QueryPerformanceFrequency(&performanceFrequency)) - { - Fail("Failed to query performance frequency!"); - } - /* Create a semaphore that is not in the signalled state */ hSemaphore = CreateSemaphoreExW(NULL, 0, 1, NULL, 0, 0); @@ -155,14 +149,14 @@ DWORD PALAPI WaiterProc_WFSOExSemaphoreTest(LPVOID lpParameter) Alertable = (BOOL)(SIZE_T) lpParameter; - OldTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + OldTimeStamp = minipal_hires_ticks(); s_preWaitTimestampRecorded = true; ret = WaitForSingleObjectEx( hSemaphore, ChildThreadWaitTime, Alertable); - NewTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + NewTimeStamp = minipal_hires_ticks(); if (Alertable && ret != WAIT_IO_COMPLETION) @@ -177,7 +171,7 @@ DWORD PALAPI WaiterProc_WFSOExSemaphoreTest(LPVOID lpParameter) } - ThreadWaitDelta_WFSOExSemaphoreTest = NewTimeStamp - OldTimeStamp; + ThreadWaitDelta_WFSOExSemaphoreTest = (NewTimeStamp - OldTimeStamp) / (minipal_hires_tick_frequency() / 1000); ret = CloseHandle(hSemaphore); if (!ret) diff --git a/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExThreadTest/WFSOExThreadTest.cpp b/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExThreadTest/WFSOExThreadTest.cpp index 078ae531353a..eaf0433cc5c6 100644 --- a/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExThreadTest/WFSOExThreadTest.cpp +++ b/src/coreclr/pal/tests/palsuite/threading/WaitForSingleObject/WFSOExThreadTest/WFSOExThreadTest.cpp @@ -5,7 +5,7 @@ ** ** Source: WFSOExThreadTest.c ** -** Purpose: Tests a child thread in the middle of a +** Purpose: Tests a child thread in the middle of a ** WaitForSingleObjectEx call will be interrupted by QueueUserAPC ** if the alert flag was set. ** @@ -17,7 +17,7 @@ /*Based on SleepEx/test2 */ const int ChildThreadWaitTime = 4000; -const int InterruptTime = 2000; +const int InterruptTime = 2000; const DWORD AcceptableDelta = 300; void RunTest_WFSOExThreadTest(BOOL AlertThread); @@ -36,15 +36,15 @@ PALTEST(threading_WaitForSingleObject_WFSOExThreadTest_paltest_waitforsingleobje } /* - On some platforms (e.g. FreeBSD 4.9) the first call to some synch objects - (such as conditions) involves some pthread internal initialization that + On some platforms (e.g. FreeBSD 4.9) the first call to some synch objects + (such as conditions) involves some pthread internal initialization that can make the first wait slighty longer, potentially going above the acceptable delta for this test. Let's add a dummy wait to preinitialize internal structures */ Sleep(100); - /* + /* * Check that Queueing an APC in the middle of a wait does interrupt * it, if it's in an alertable state. */ @@ -53,20 +53,20 @@ PALTEST(threading_WaitForSingleObject_WFSOExThreadTest_paltest_waitforsingleobje if (abs(ThreadWaitDelta_WFSOExThreadTest - InterruptTime) > AcceptableDelta) { Fail("Expected thread to wait for %d ms (and get interrupted).\n" - "Thread waited for %d ms! (Acceptable delta: %d)\n", + "Thread waited for %d ms! (Acceptable delta: %d)\n", InterruptTime, ThreadWaitDelta_WFSOExThreadTest, AcceptableDelta); } - /* - * Check that Queueing an APC in the middle of a wait does NOT interrupt + /* + * Check that Queueing an APC in the middle of a wait does NOT interrupt * it, if it is not in an alertable state. */ RunTest_WFSOExThreadTest(FALSE); if (abs(ThreadWaitDelta_WFSOExThreadTest - ChildThreadWaitTime) > AcceptableDelta) { Fail("Expected thread to wait for %d ms (and not be interrupted).\n" - "Thread waited for %d ms! (Acceptable delta: %d)\n", + "Thread waited for %d ms! (Acceptable delta: %d)\n", ChildThreadWaitTime, ThreadWaitDelta_WFSOExThreadTest, AcceptableDelta); } @@ -81,10 +81,10 @@ void RunTest_WFSOExThreadTest(BOOL AlertThread) DWORD dwThreadId = 0; int ret; - //Create thread + //Create thread s_preWaitTimestampRecorded = false; hThread = CreateThread( NULL, - 0, + 0, (LPTHREAD_START_ROUTINE)WaiterProc_WFSOExThreadTest, (LPVOID) AlertThread, 0, @@ -108,48 +108,48 @@ void RunTest_WFSOExThreadTest(BOOL AlertThread) ret = QueueUserAPC(APCFunc_WFSOExThreadTest, hThread, 0); if (ret == 0) { - Fail("QueueUserAPC failed! GetLastError returned %d\n", + Fail("QueueUserAPC failed! GetLastError returned %d\n", GetLastError()); } - + ret = WaitForSingleObject(hThread, INFINITE); if (ret == WAIT_FAILED) { - Fail("Unable to wait on child thread!\nGetLastError returned %d.\n", + Fail("Unable to wait on child thread!\nGetLastError returned %d.\n", GetLastError()); } if (0==CloseHandle(hThread)) { - Trace("Could not close Thread handle\n"); - Fail ( "GetLastError returned %d\n", GetLastError()); - } + Trace("Could not close Thread handle\n"); + Fail ( "GetLastError returned %d\n", GetLastError()); + } } /* Function doesn't do anything, just needed to interrupt the wait*/ VOID PALAPI APCFunc_WFSOExThreadTest(ULONG_PTR dwParam) -{ +{ } /* Entry Point for child thread. */ DWORD PALAPI WaiterProc_WFSOExThreadTest(LPVOID lpParameter) { HANDLE hWaitThread; - UINT64 OldTimeStamp; - UINT64 NewTimeStamp; + int64_t OldTimeStamp; + int64_t NewTimeStamp; BOOL Alertable; DWORD ret; DWORD dwThreadId = 0; /* -When a thread terminates, the thread object attains a signaled state, +When a thread terminates, the thread object attains a signaled state, satisfying any threads that were waiting on the object. */ /* Create a thread that does not return immediately to maintain a non signaled test*/ - hWaitThread = CreateThread( NULL, - 0, + hWaitThread = CreateThread( NULL, + 0, (LPTHREAD_START_ROUTINE)WorkerThread_WFSOExThreadTest, NULL, 0, @@ -163,20 +163,14 @@ satisfying any threads that were waiting on the object. Alertable = (BOOL)(SIZE_T) lpParameter; - LARGE_INTEGER performanceFrequency; - if (!QueryPerformanceFrequency(&performanceFrequency)) - { - Fail("Failed to query performance frequency!"); - } - - OldTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + OldTimeStamp = minipal_hires_ticks(); s_preWaitTimestampRecorded = true; - ret = WaitForSingleObjectEx( hWaitThread, - ChildThreadWaitTime, + ret = WaitForSingleObjectEx( hWaitThread, + ChildThreadWaitTime, Alertable); - - NewTimeStamp = GetHighPrecisionTimeStamp(performanceFrequency); + + NewTimeStamp = minipal_hires_ticks(); if (Alertable && ret != WAIT_IO_COMPLETION) @@ -190,7 +184,7 @@ satisfying any threads that were waiting on the object. "Expected return of WAIT_TIMEOUT, got %d.\n", ret); } - ThreadWaitDelta_WFSOExThreadTest = NewTimeStamp - OldTimeStamp; + ThreadWaitDelta_WFSOExThreadTest = (NewTimeStamp - OldTimeStamp) / (minipal_hires_tick_frequency() / 1000); ret = CloseHandle(hWaitThread); if (!ret) @@ -205,7 +199,7 @@ satisfying any threads that were waiting on the object. void WorkerThread_WFSOExThreadTest(void) { - + //Make the worker thread sleep to test WFSOEx Functionality Sleep(2*ChildThreadWaitTime); diff --git a/src/coreclr/pal/tests/palsuite/wasm/index.html b/src/coreclr/pal/tests/palsuite/wasm/index.html new file mode 100644 index 000000000000..a50c1ff3473f --- /dev/null +++ b/src/coreclr/pal/tests/palsuite/wasm/index.html @@ -0,0 +1,184 @@ + + + + + PAL Tests WASM + + +

PAL Tests WASM

+

+    
+    
+
diff --git a/src/coreclr/pgosupport.cmake b/src/coreclr/pgosupport.cmake
index 99a7b358188a..ace1fda4e9c4 100644
--- a/src/coreclr/pgosupport.cmake
+++ b/src/coreclr/pgosupport.cmake
@@ -4,12 +4,11 @@ include(CheckCXXCompilerFlag)
 # VC++ guarantees support for LTCG (LTO's equivalent)
 if(NOT WIN32)
   # Function required to give CMAKE_REQUIRED_* local scope
-  function(check_have_lto)
-    set(CMAKE_REQUIRED_FLAGS -flto)
+  function(check_have_lto_and_pgodata_supported profile_path)
+    set(CMAKE_REQUIRED_FLAGS "-flto -fprofile-instr-use=${profile_path} -Wno-profile-instr-out-of-date -Wno-profile-instr-unprofiled")
     set(CMAKE_REQUIRED_LIBRARIES -flto)
-    check_cxx_source_compiles("int main() { return 0; }" HAVE_LTO)
-  endfunction(check_have_lto)
-  check_have_lto()
+    check_cxx_source_compiles("int main() { return 0; }" HAVE_LTO_AND_PGO_DATA_SUPPORTED)
+  endfunction(check_have_lto_and_pgodata_supported)
 
   check_cxx_compiler_flag(-faligned-new COMPILER_SUPPORTS_F_ALIGNED_NEW)
 endif(NOT WIN32)
@@ -52,17 +51,17 @@ function(add_pgo TargetName)
                 add_compile_definitions(WITH_NATIVE_PGO)
             else(CLR_CMAKE_HOST_WIN32)
                 if(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL RELEASE OR UPPERCASE_CMAKE_BUILD_TYPE STREQUAL RELWITHDEBINFO)
-                    if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16))
-                        if(HAVE_LTO)
+                    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+                        check_have_lto_and_pgodata_supported(${ProfilePath})
+                        if(HAVE_LTO_AND_PGO_DATA_SUPPORTED)
+                            message(STATUS "Enabling profile guided optimizations for ${TargetName}")
                             target_compile_options(${TargetName} PRIVATE -flto -fprofile-instr-use=${ProfilePath} -Wno-profile-instr-out-of-date -Wno-profile-instr-unprofiled)
                             set_property(TARGET ${TargetName} APPEND_STRING PROPERTY LINK_FLAGS " -flto -fprofile-instr-use=${ProfilePath}")
                             add_compile_definitions(WITH_NATIVE_PGO)
-                        else(HAVE_LTO)
-                            message(WARNING "LTO is not supported, skipping profile guided optimizations")
-                        endif(HAVE_LTO)
-                    else((CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16))
-                        message(WARNING "PGO is not supported; Clang 16 or later is required for profile guided optimizations")
-                    endif((CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16))
+                        else(HAVE_LTO_AND_PGO_DATA_SUPPORTED)
+                            message(WARNING "LTO is not supported or PGO optimization data not compatible, skipping profile guided optimizations for ${TargetName}")
+                        endif(HAVE_LTO_AND_PGO_DATA_SUPPORTED)
+                    endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
                 endif(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL RELEASE OR UPPERCASE_CMAKE_BUILD_TYPE STREQUAL RELWITHDEBINFO)
             endif(CLR_CMAKE_HOST_WIN32)
         endif(NOT EXISTS ${ProfilePath})
diff --git a/src/coreclr/runtime-prereqs.proj b/src/coreclr/runtime-prereqs.proj
index b1d1cf8b041d..6eb71f172ad4 100644
--- a/src/coreclr/runtime-prereqs.proj
+++ b/src/coreclr/runtime-prereqs.proj
@@ -2,7 +2,7 @@
   
   
     $(ArtifactsObjDir)_version.h
-    $(ArtifactsObjDir)_version.c
+    $(ArtifactsObjDir)_version.c
     $(ArtifactsObjDir)runtime_version.h
     $(ArtifactsObjDir)native.sourcelink.json
     false
@@ -13,7 +13,23 @@
   
   
 
-  
+  
+  
+  
+  
+    
+      $(ArtifactsObjDir)_version.h
+    
+  
+  
+    
+      $(ArtifactsObjDir)_version.c
+    
+  
   
   
+      <_CoreClrBuildArg Condition="'$(BaseRid)' == ''" Include="-cmakeargs "-DCLR_DOTNET_RID=$(TargetRid)"" />
+      <_CoreClrBuildArg Condition="'$(BaseRid)' != ''" Include="-cmakeargs "-DCLR_DOTNET_RID=$(BaseRid)"" />
       <_CoreClrBuildArg Condition="'$(BuildSubdirectory)' != ''" Include="-subdir $(BuildSubdirectory)" />
       <_CoreClrBuildArg Include="-cmakeargs "-DCLR_DOTNET_HOST_PATH=$(DOTNET_HOST_PATH)"" />
       <_CoreClrBuildArg Condition="'$(HasCdacBuildTool)' == 'true'" Include="-cmakeargs "-DCDAC_BUILD_TOOL_BINARY_PATH=$(RuntimeBinDir)cdac-build-tool\cdac-build-tool.dll"" />
+      <_CoreClrBuildArg Condition="'$(_IcuDir)' != ''" Include="-cmakeargs "-DCLR_CMAKE_ICU_DIR=$(_IcuDir)"" />
+      <_CoreClrBuildArg Condition="'$(FeatureXplatEventSource)' == 'false'" Include="-cmakeargs "-DFEATURE_EVENTSOURCE_XPLAT=0"" />
     
 
     
@@ -75,39 +82,69 @@
       <_CoreClrBuildArg Condition="'$(ClrSpmiSubset)' == 'true'" Include="-component spmi" />
       <_CoreClrBuildArg Condition="'$(ClrCrossComponentsSubset)' == 'true'" Include="-component crosscomponents" />
       <_CoreClrBuildArg Condition="'$(ClrDebugSubset)' == 'true'" Include="-component debug" />
+      <_CoreClrBuildArg Condition="'$(ClrCdacSubset)' == 'true'" Include= "-component cdac" />
     
 
-    
-      <_CoreClrBuildArg Include="-DCMAKE_TOOLCHAIN_FILE=$(ANDROID_NDK_ROOT)/build/cmake/android.toolchain.cmake"/>
-      <_CoreClrBuildArg Include="-DANDROID_NDK=$(ANDROID_NDK_ROOT)"/>
-      <_CoreClrBuildArg Include="-DANDROID_STL=c++_static"/>
-      <_CoreClrBuildArg Include="-DANDROID_CPP_FEATURES="no-rtti exceptions""/>
-      <_CoreClrBuildArg Include="-DANDROID_PLATFORM=android-$(AndroidApiLevelMin)"/>
-      <_CoreClrBuildArg Include="-DANDROID_NATIVE_API_LEVEL=$(AndroidApiLevelMin)"/>
-      <_CoreClrBuildArg Condition="'$(Platform)' == 'arm64'" Include="-DANDROID_ABI=arm64-v8a" />
-      <_CoreClrBuildArg Condition="'$(Platform)' == 'arm'" Include="-DANDROID_ABI=armeabi-v7a" />
-      <_CoreClrBuildArg Condition="'$(Platform)' == 'x86'" Include="-DANDROID_ABI=x86" />
-      <_CoreClrBuildArg Condition="'$(Platform)' == 'x64'" Include="-DANDROID_ABI=x86_64" />
+    
+      <_AndroidToolChainPath>$([MSBuild]::NormalizePath('$(ANDROID_NDK_ROOT)', 'build', 'cmake', 'android.toolchain.cmake'))
+    
+
+    
+      <_CoreClrBuildArg Include="-cmakeargs "-DANDROID_STL=c++_static""/>
+
+      
+      <_CoreClrBuildArg Condition="!$([MSBuild]::IsOsPlatform(Windows))"
+                        Include="-cmakeargs -DANDROID_CPP_FEATURES="no-rtti exceptions""/>
     
 
     
       <_CoreClrBuildArg Include="-cmakeargs "-DCLR_CMAKE_ESRP_CLIENT=$(DotNetEsrpToolPath)"" />
     
 
+<<<<<<< HEAD
     
       <_CoreClrBuildArg Include="-cmakeargs "-DCMAKE_USE_PTHREADS=1"" />
+=======
+    
+      <_CoreClrBuildArg Include="-keepnativesymbols" />
+>>>>>>> upstream-jun
     
 
     
       <_CoreClrBuildScript Condition="$([MSBuild]::IsOsPlatform(Windows))">build-runtime.cmd
       <_CoreClrBuildScript Condition="!$([MSBuild]::IsOsPlatform(Windows))">build-runtime.sh
+      <_CoreClrBuildPreSource Condition="'$(TargetsBrowser)' == 'true' and $([MSBuild]::IsOsPlatform(Windows))">"$([MSBuild]::NormalizePath('$(RepoRoot)src/mono/browser/emsdk', 'emsdk_env.cmd'))" && 
+      <_CoreClrBuildPreSource Condition="'$(TargetsBrowser)' == 'true' and !$([MSBuild]::IsOsPlatform(Windows))">source "$(RepoRoot)src/mono/browser/emsdk/emsdk_env.sh" && 
     
 
-    
-    
-    
+    
+    
   
 
+  
+
+    
+    
+
+    
+    
+    
+  
+
   
 
diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp b/src/coreclr/runtime/CachedInterfaceDispatch.cpp
similarity index 87%
rename from src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp
rename to src/coreclr/runtime/CachedInterfaceDispatch.cpp
index 2938ee709740..3f0c479f24df 100644
--- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp
+++ b/src/coreclr/runtime/CachedInterfaceDispatch.cpp
@@ -7,31 +7,10 @@
 //
 // ============================================================================
 #include "common.h"
-#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
-
-#include "CommonTypes.h"
-#include "CommonMacros.h"
-#include "daccess.h"
-#include "DebugMacrosExt.h"
-#include "PalRedhawkCommon.h"
-#include "PalRedhawk.h"
-#include "rhassert.h"
-#include "slist.h"
-#include "holder.h"
-#include "Crst.h"
-#include "RedhawkWarnings.h"
-#include "TargetPtrs.h"
-#include "MethodTable.h"
-#include "Range.h"
-#include "allocheap.h"
-#include "rhbinder.h"
-#include "ObjectLayout.h"
-#include "shash.h"
-#include "TypeManager.h"
-#include "RuntimeInstance.h"
-#include "MethodTable.inl"
-#include "CommonMacros.inl"
+#include 
 
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+#include "CachedInterfaceDispatchPal.h"
 #include "CachedInterfaceDispatch.h"
 
 // We always allocate cache sizes with a power of 2 number of entries. We have a maximum size we support,
@@ -212,9 +191,6 @@ static InterfaceDispatchCache * g_rgFreeLists[CID_MAX_CACHE_SIZE_LOG2 + 1];
 // it imposes too much space overhead on list entries on 64-bit (each is actually 16 bytes).
 static CrstStatic g_sListLock;
 
-// The base memory allocator.
-static AllocHeap * g_pAllocHeap = NULL;
-
 // Each cache size has an associated stub used to perform lookup over that cache.
 extern "C" void RhpInterfaceDispatch1();
 extern "C" void RhpInterfaceDispatch2();
@@ -267,13 +243,14 @@ static uint32_t CacheSizeToIndex(uint32_t cCacheEntries)
 // address of the interface dispatch stub associated with this size of cache is returned.
 static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache * pExistingCache, const DispatchCellInfo *pNewCellInfo, void ** ppStub)
 {
+#ifndef FEATURE_NATIVEAOT
     if (pNewCellInfo->CellType == DispatchCellType::VTableOffset)
     {
-        ASSERT(pNewCellInfo->VTableOffset < InterfaceDispatchCell::IDC_MaxVTableOffsetPlusOne);
         *ppStub = (void *)&RhpVTableOffsetDispatch;
-        ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->VTableOffset));
-        return pNewCellInfo->VTableOffset;
+        ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->GetVTableOffset()));
+        return pNewCellInfo->GetVTableOffset();
     }
+#endif
 
     ASSERT((cCacheEntries >= 1) && (cCacheEntries <= CID_MAX_CACHE_SIZE));
     ASSERT((pExistingCache == NULL) || (pExistingCache->m_cEntries < cCacheEntries));
@@ -299,9 +276,8 @@ static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache *
     if (pCache == NULL)
     {
         // No luck with the free list, allocate the cache from via the AllocHeap.
-        pCache = (InterfaceDispatchCache*)g_pAllocHeap->AllocAligned(sizeof(InterfaceDispatchCache) +
-                                                                     (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries),
-                                                                     sizeof(void*) * 2);
+        pCache = (InterfaceDispatchCache*)InterfaceDispatch_AllocDoublePointerAligned(sizeof(InterfaceDispatchCache) +
+                                                                     (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries));
         if (pCache == NULL)
             return (uintptr_t)NULL;
 
@@ -342,7 +318,7 @@ static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache *
 
 // Discards a cache by adding it to a list of caches that may still be in use but will be made available for
 // re-allocation at the next GC.
-static void DiscardCache(InterfaceDispatchCache * pCache)
+void InterfaceDispatch_DiscardCache(InterfaceDispatchCache * pCache)
 {
     CID_COUNTER_INC(CacheDiscards);
 
@@ -365,7 +341,7 @@ static void DiscardCache(InterfaceDispatchCache * pCache)
     if (pDiscardedCacheBlock != NULL)
         g_pDiscardedCacheFree = pDiscardedCacheBlock->m_pNext;
     else
-        pDiscardedCacheBlock = (DiscardedCacheBlock *)g_pAllocHeap->Alloc(sizeof(DiscardedCacheBlock));
+        pDiscardedCacheBlock = (DiscardedCacheBlock *)InterfaceDispatch_AllocPointerAligned(sizeof(DiscardedCacheBlock));
 
     if (pDiscardedCacheBlock != NULL) // if we did NOT get the memory, we leak the discarded block
     {
@@ -379,7 +355,7 @@ static void DiscardCache(InterfaceDispatchCache * pCache)
 
 // Called during a GC to empty the list of discarded caches (which we can now guarantee aren't being accessed)
 // and sort the results into the free lists we maintain for each cache size.
-void ReclaimUnusedInterfaceDispatchCaches()
+void InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches()
 {
     // No need for any locks, we're not racing with any other threads any more.
 
@@ -431,13 +407,9 @@ void ReclaimUnusedInterfaceDispatchCaches()
 }
 
 // One time initialization of interface dispatch.
-bool InitializeInterfaceDispatch()
+bool InterfaceDispatch_Initialize()
 {
-    g_pAllocHeap = new (nothrow) AllocHeap();
-    if (g_pAllocHeap == NULL)
-        return false;
-
-    if (!g_pAllocHeap->Init())
+    if (!InterfaceDispatch_InitializePal())
         return false;
 
     g_sListLock.Init(CrstInterfaceDispatchGlobalLists, CRST_DEFAULT);
@@ -445,7 +417,7 @@ bool InitializeInterfaceDispatch()
     return true;
 }
 
-FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo)
+PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo)
 {
     // Attempt to update the cache with this new mapping (if we have any cache at all, the initial state
     // is none).
@@ -511,35 +483,9 @@ FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE
     // value or the cache we just allocated (another thread performed an update first).
     InterfaceDispatchCache * pDiscardedCache = UpdateCellStubAndCache(pCell, pStub, newCacheValue);
     if (pDiscardedCache)
-        DiscardCache(pDiscardedCache);
+        InterfaceDispatch_DiscardCache(pDiscardedCache);
 
     return (PCODE)pTargetCode;
 }
-FCIMPLEND
-
-FCIMPL2(PCODE, RhpSearchDispatchCellCache, InterfaceDispatchCell * pCell, MethodTable* pInstanceType)
-{
-    // This function must be implemented in native code so that we do not take a GC while walking the cache
-    InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache();
-    if (pCache != NULL)
-    {
-        InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries;
-        for (uint32_t i = 0; i < pCache->m_cEntries; i++, pCacheEntry++)
-            if (pCacheEntry->m_pInstanceType == pInstanceType)
-                return pCacheEntry->m_pTargetCode;
-    }
-
-    return (PCODE)nullptr;
-}
-FCIMPLEND
-
-// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented
-// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed
-// code due to its use of the GC state as a lock, and as lifetime control
-FCIMPL2(void, RhpGetDispatchCellInfo, InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo)
-{
-    *pDispatchCellInfo = pCell->GetDispatchCellInfo();
-}
-FCIMPLEND
 
 #endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/runtime/CachedInterfaceDispatch.h b/src/coreclr/runtime/CachedInterfaceDispatch.h
new file mode 100644
index 000000000000..690b1ebaf86b
--- /dev/null
+++ b/src/coreclr/runtime/CachedInterfaceDispatch.h
@@ -0,0 +1,75 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// ==--==
+//
+// Shared (non-architecture specific) portions of a mechanism to perform interface dispatch using an alternate
+// mechanism to VSD that does not require runtime generation of code.
+//
+// ============================================================================
+
+#ifndef __CACHEDINTERFACEDISPATCH_H__
+#define __CACHEDINTERFACEDISPATCH_H__
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+// Interface dispatch caches contain an array of these entries. An instance of a cache is paired with a stub
+// that implicitly knows how many entries are contained. These entries must be aligned to twice the alignment
+// of a pointer due to the synchonization mechanism used to update them at runtime.
+struct InterfaceDispatchCacheEntry
+{
+    MethodTable *    m_pInstanceType;    // Potential type of the object instance being dispatched on
+    PCODE            m_pTargetCode;      // Method to dispatch to if the actual instance type matches the above
+};
+
+// The interface dispatch cache itself. As well as the entries we include the cache size (since logic such as
+// cache miss processing needs to determine this value in a synchronized manner, so it can't be contained in
+// the owning interface dispatch indirection cell) and a list entry used to link the caches in one of a couple
+// of lists related to cache reclamation.
+
+#pragma warning(push)
+#pragma warning(disable:4200) // nonstandard extension used: zero-sized array in struct/union
+struct InterfaceDispatchCell;
+struct InterfaceDispatchCache
+{
+    InterfaceDispatchCacheHeader m_cacheHeader;
+    union
+    {
+        InterfaceDispatchCache *    m_pNextFree;    // next in free list
+#ifdef INTERFACE_DISPATCH_CACHE_HAS_CELL_BACKPOINTER
+        // On ARM and x86 the slow path in the stubs needs to reload the cell pointer from the cache due to the lack
+        // of available (volatile non-argument) registers.
+        InterfaceDispatchCell  *    m_pCell;        // pointer back to interface dispatch cell
+#endif
+    };
+    uint32_t                      m_cEntries;
+    InterfaceDispatchCacheEntry m_rgEntries[];
+};
+#pragma warning(pop)
+
+bool InterfaceDispatch_Initialize();
+PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo);
+void InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches();
+void InterfaceDispatch_DiscardCache(InterfaceDispatchCache * pCache);
+inline void InterfaceDispatch_DiscardCacheHeader(InterfaceDispatchCacheHeader * pCache)
+{
+    return InterfaceDispatch_DiscardCache((InterfaceDispatchCache*)pCache);
+}
+
+inline PCODE InterfaceDispatch_SearchDispatchCellCache(InterfaceDispatchCell * pCell, MethodTable* pInstanceType)
+{
+    // This function must be implemented in native code so that we do not take a GC while walking the cache
+    InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache();
+    if (pCache != NULL)
+    {
+        InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries;
+        for (uint32_t i = 0; i < pCache->m_cEntries; i++, pCacheEntry++)
+            if (pCacheEntry->m_pInstanceType == pInstanceType)
+                return pCacheEntry->m_pTargetCode;
+    }
+
+    return (PCODE)nullptr;
+}
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
+
+#endif // __CACHEDINTERFACEDISPATCH_H__
\ No newline at end of file
diff --git a/src/coreclr/runtime/amd64/AllocFast.S b/src/coreclr/runtime/amd64/AllocFast.S
new file mode 100644
index 000000000000..d5b366b876da
--- /dev/null
+++ b/src/coreclr/runtime/amd64/AllocFast.S
@@ -0,0 +1,296 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include "AsmMacros_Shared.h"
+
+// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+// allocation context then automatically fallback to the slow allocation path.
+//  RDI == MethodTable
+LEAF_ENTRY RhpNewFast, _TEXT
+
+        push_nonvol_reg rbx
+        mov         rbx, rdi
+
+        // rax = ee_alloc_context pointer; trashes volatile registers
+        INLINE_GET_ALLOC_CONTEXT_BASE
+
+        //
+        // rbx contains MethodTable pointer
+        //
+        mov         edx, [rbx + OFFSETOF__MethodTable__m_uBaseSize]
+
+        //
+        // rax: ee_alloc_context pointer
+        // rbx: MethodTable pointer
+        // rdx: base size
+        //
+
+        // Load potential new object address into rsi.
+        mov         rsi, [rax + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]
+
+        // Load and calculate the maximum size of object we can fit.ß
+        mov         rdi, [rax + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit]
+        sub         rdi, rsi
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         rdx, rdi
+        ja          LOCAL_LABEL(RhpNewFast_RarePath)
+
+        // Calculate the new alloc pointer to account for the allocation.
+        add         rdx, rsi
+
+        // Set the new object's MethodTable pointer.
+        mov         [rsi + OFFSETOF__Object__m_pEEType], rbx
+
+        // Update the alloc pointer to the newly calculated one.
+        mov         [rax + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], rdx
+
+        mov         rax, rsi
+
+        .cfi_remember_state
+        pop_nonvol_reg rbx
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 16          // workaround cfi_restore_state bug
+LOCAL_LABEL(RhpNewFast_RarePath):
+        mov         rdi, rbx            // restore MethodTable
+        xor         esi, esi
+        pop_nonvol_reg rbx
+        jmp         C_FUNC(RhpNewObject)
+
+LEAF_END RhpNewFast, _TEXT
+
+
+// Allocate non-array object with finalizer
+//  RDI == MethodTable
+LEAF_ENTRY RhpNewFinalizable, _TEXT
+        mov         esi, GC_ALLOC_FINALIZE
+        jmp         C_FUNC(RhpNewObject)
+LEAF_END RhpNewFinalizable, _TEXT
+
+
+// Allocate non-array object
+//  RDI == MethodTable
+//  ESI == alloc flags
+NESTED_ENTRY RhpNewObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME rcx
+
+        // RCX: transition frame
+
+        // Preserve the MethodTable in RBX
+        mov         rbx, rdi
+
+        xor         edx, edx    // numElements
+
+        // Call the rest of the allocation helper.
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call        C_FUNC(RhpGcAlloc)
+
+        test        rax, rax
+        jz          LOCAL_LABEL(NewOutOfMemory)
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 96          // workaround cfi_restore_state bug
+LOCAL_LABEL(NewOutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         rdi, rbx            // MethodTable pointer
+        xor         esi, esi            // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewObject, _TEXT
+
+
+// Shared code for RhNewString, RhpNewArrayFast and RhpNewPtrArrayFast
+//  RAX == string/array size
+//  RDI == MethodTable
+//  ESI == character/element count
+.macro NEW_ARRAY_FAST
+
+        push_nonvol_reg rbx
+        push_nonvol_reg r12
+        push_nonvol_reg r13
+
+        mov         rbx, rdi    // save MethodTable
+        mov         r12, rsi    // save element count
+        mov         r13, rax    // save size
+
+        // rax = ee_alloc_context pointer; trashes volatile registers
+        INLINE_GET_ALLOC_CONTEXT_BASE
+
+        mov         rcx, rax
+
+        // rcx == ee_alloc_context*
+        // rbx == MethodTable
+        // r12 == element count
+        // r13 == string/array size
+
+        mov         rax, [rcx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]
+        mov         rdi, [rcx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit]
+        sub         rdi, rax
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         r13, rdi
+        ja          1f
+
+        add         r13, rax
+        mov         [rax + OFFSETOF__Object__m_pEEType], rbx
+        mov         [rax + OFFSETOF__Array__m_Length], r12d
+        mov         [rcx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r13
+
+        .cfi_remember_state
+        pop_nonvol_reg r13
+        pop_nonvol_reg r12
+        pop_nonvol_reg rbx
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 32  // workaround cfi_restore_state bug
+1:
+        mov         rdi, rbx    // restore MethodTable
+        mov         rsi, r12    // restore element count
+
+        pop_nonvol_reg r13
+        pop_nonvol_reg r12
+        pop_nonvol_reg rbx
+        jmp C_FUNC(RhpNewVariableSizeObject)
+
+.endm // NEW_ARRAY_FAST
+
+
+// Allocate a string.
+//  RDI == MethodTable
+//  ESI == character/element count
+LEAF_ENTRY RhNewString, _TEXT
+
+        // we want to limit the element count to the non-negative 32-bit int range
+        cmp         rsi, MAX_STRING_LENGTH
+        ja          LOCAL_LABEL(StringSizeOverflow)
+
+        // Compute overall allocation size (align(base size + (element size * elements), 8)).
+        lea         rax, [rsi * STRING_COMPONENT_SIZE + STRING_BASE_SIZE + 7]
+        and         rax, -8
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(StringSizeOverflow):
+        // We get here if the size of the final string object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an OOM exception that the caller of this allocator understands.
+
+        // rdi holds MethodTable pointer already
+        xor         esi, esi            // Indicate that we should throw OOM.
+        jmp         EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation)
+
+LEAF_END RhNewString, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY).
+//  RDI == MethodTable
+//  ESI == element count
+LEAF_ENTRY RhpNewArrayFast, _TEXT
+
+        // we want to limit the element count to the non-negative 32-bit int range
+        cmp         rsi, 0x07fffffff
+        ja          LOCAL_LABEL(ArraySizeOverflow)
+
+        // Compute overall allocation size (align(base size + (element size * elements), 8)).
+        movzx       eax, word ptr [rdi + OFFSETOF__MethodTable__m_usComponentSize]
+        imul        rax, rsi
+        lea         rax, [rax + SZARRAY_BASE_SIZE + 7]
+        and         rax, -8
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(ArraySizeOverflow):
+        // We get here if the size of the final array object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an overflow exception that the caller of this allocator understands.
+
+        // rdi holds MethodTable pointer already
+        mov         esi, 1              // Indicate that we should throw OverflowException
+        jmp         EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation)
+
+LEAF_END RhpNewArrayFast, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+//  RDI == MethodTable
+//  ESI == element count
+LEAF_ENTRY RhpNewPtrArrayFast, _TEXT
+
+        // Delegate overflow handling to the generic helper conservatively
+
+        cmp         rsi, (0x40000000 / 8) // sizeof(void*)
+        jae         C_FUNC(RhpNewArrayFast)
+
+        // In this case we know the element size is sizeof(void *), or 8 for x64
+        // This helps us in two ways - we can shift instead of multiplying, and
+        // there's no need to align the size either
+
+        lea         eax, [esi * 8 + SZARRAY_BASE_SIZE]
+
+        // No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed
+        // to be a multiple of 8.
+
+        NEW_ARRAY_FAST
+
+LEAF_END RhpNewPtrArrayFast, _TEXT
+
+
+NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler
+
+        // rdi == MethodTable
+        // rsi == element count
+
+        PUSH_COOP_PINVOKE_FRAME rcx
+
+        // rcx: transition frame
+
+        // Preserve the MethodTable in RBX
+        mov         rbx, rdi
+
+        mov         rdx, rsi        // numElements
+
+        // passing MethodTable in rdi
+        xor         rsi, rsi        // uFlags
+        // passing pTransitionFrame in rcx
+
+        // Call the rest of the allocation helper.
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call        C_FUNC(RhpGcAlloc)
+
+        test        rax, rax
+        jz          LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory)
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 96          // workaround cfi_restore_state bug
+LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         rdi, rbx            // MethodTable pointer
+        xor         esi, esi            // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewVariableSizeObject, _TEXT
diff --git a/src/coreclr/runtime/amd64/AllocFast.asm b/src/coreclr/runtime/amd64/AllocFast.asm
new file mode 100644
index 000000000000..c099cb829b2c
--- /dev/null
+++ b/src/coreclr/runtime/amd64/AllocFast.asm
@@ -0,0 +1,249 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros_Shared.inc
+
+
+;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+;; allocation context then automatically fallback to the slow allocation path.
+;;  RCX == MethodTable
+LEAF_ENTRY RhpNewFast, _TEXT
+
+        ;; rdx = ee_alloc_context pointer, TRASHES rax
+        INLINE_GET_ALLOC_CONTEXT_BASE rdx, rax
+
+        ;;
+        ;; rcx contains MethodTable pointer
+        ;;
+        mov         r8d, [rcx + OFFSETOF__MethodTable__m_uBaseSize]
+
+        ;;
+        ;; eax: base size
+        ;; rcx: MethodTable pointer
+        ;; rdx: ee_alloc_context pointer
+        ;;
+
+        mov         rax, [rdx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]
+        mov         r9, [rdx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit]
+        sub         r9, rax
+        cmp         r8, r9
+        ja          RhpNewFast_RarePath
+
+        ;; Calculate the new alloc pointer to account for the allocation.
+        add         r8, rax
+
+        ;; Set the new object's MethodTable pointer
+        mov         [rax + OFFSETOF__Object__m_pEEType], rcx
+
+        ;; Set the new alloc pointer
+        mov         [rdx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8
+
+        ret
+
+RhpNewFast_RarePath:
+        xor         edx, edx
+        jmp         RhpNewObject
+
+LEAF_END RhpNewFast, _TEXT
+
+
+;; Allocate non-array object with finalizer
+;;  RCX == MethodTable
+LEAF_ENTRY RhpNewFinalizable, _TEXT
+
+        mov         edx, GC_ALLOC_FINALIZE
+        jmp         RhpNewObject
+
+LEAF_END RhpNewFinalizable, _TEXT
+
+
+;; Allocate non-array object
+;;  RCX == MethodTable
+;;  EDX == alloc flags
+NESTED_ENTRY RhpNewObject, _TEXT
+
+        PUSH_COOP_PINVOKE_FRAME r9
+
+        ; R9: transition frame
+
+        ;; Preserve the MethodTable in RSI
+        mov         rsi, rcx
+
+        xor         r8d, r8d        ; numElements
+
+        ;; Call the rest of the allocation helper.
+        ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        test        rax, rax
+        jz          NewOutOfMemory
+
+        POP_COOP_PINVOKE_FRAME
+        ret
+
+NewOutOfMemory:
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         rcx, rsi            ; MethodTable pointer
+        xor         edx, edx            ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         RhExceptionHandling_FailedAllocation
+
+NESTED_END RhpNewObject, _TEXT
+
+
+;; Shared code for RhNewString, RhpNewArrayFast and RhpNewPtrArrayFast
+;;  RAX == string/array size
+;;  RCX == MethodTable
+;;  RDX == character/element count
+NEW_ARRAY_FAST MACRO
+
+        ; r10 = ee_alloc_context pointer, TRASHES r8
+        INLINE_GET_ALLOC_CONTEXT_BASE r10, r8
+
+        mov         r8, rax
+        mov         rax, [r10 + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]
+        mov         r9, [r10 + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit]
+        sub         r9, rax
+
+        ; rax == new object ptr
+        ; rcx == MethodTable
+        ; rdx == element count
+        ; r8 == array size
+        ; r10 == ee_alloc_context pointer
+        cmp         r8, r9
+        ja          RhpNewVariableSizeObject
+
+        add         r8, rax
+        mov         [rax + OFFSETOF__Object__m_pEEType], rcx
+        mov         [rax + OFFSETOF__Array__m_Length], edx
+        mov         [r10 + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8
+        ret
+
+ENDM ; NEW_ARRAY_FAST
+
+
+;; Allocate a string.
+;;  RCX == MethodTable
+;;  EDX == character/element count
+LEAF_ENTRY RhNewString, _TEXT
+
+        ; we want to limit the element count to the non-negative 32-bit int range
+        cmp         rdx, MAX_STRING_LENGTH
+        ja          StringSizeOverflow
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 8)).
+        lea         rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)]
+        and         rax, -8
+
+        NEW_ARRAY_FAST
+
+StringSizeOverflow:
+        ; We get here if the size of the final string object can't be represented as an unsigned
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an OOM exception that the caller of this allocator understands.
+
+        ; rcx holds MethodTable pointer already
+        xor         edx, edx            ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+
+LEAF_END RhNewString, _TEXT
+
+
+;; Allocate one dimensional, zero based array (SZARRAY).
+;;  RCX == MethodTable
+;;  EDX == element count
+LEAF_ENTRY RhpNewArrayFast, _TEXT
+
+        ; we want to limit the element count to the non-negative 32-bit int range
+        cmp         rdx, 07fffffffh
+        ja          ArraySizeOverflow
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 8)).
+        movzx       eax, word ptr [rcx + OFFSETOF__MethodTable__m_usComponentSize]
+        imul        rax, rdx
+        lea         rax, [rax + SZARRAY_BASE_SIZE + 7]
+        and         rax, -8
+
+        NEW_ARRAY_FAST
+
+ArraySizeOverflow:
+        ; We get here if the size of the final array object can't be represented as an unsigned
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an overflow exception that the caller of this allocator understands.
+
+        ; rcx holds MethodTable pointer already
+        mov         edx, 1              ; Indicate that we should throw OverflowException
+        jmp         RhExceptionHandling_FailedAllocation
+
+LEAF_END RhpNewArrayFast, _TEXT
+
+
+;; Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+;;  RCX == MethodTable
+;;  EDX == element count
+LEAF_ENTRY RhpNewPtrArrayFast, _TEXT
+
+        ; Delegate overflow handling to the generic helper conservatively
+
+        cmp         rdx, (40000000h / 8) ; sizeof(void*)
+        jae         RhpNewArrayFast
+
+        ; In this case we know the element size is sizeof(void *), or 8 for x64
+        ; This helps us in two ways - we can shift instead of multiplying, and
+        ; there's no need to align the size either
+
+        lea         eax, [edx * 8 + SZARRAY_BASE_SIZE]
+
+        ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed
+        ; to be a multiple of 8.
+
+        NEW_ARRAY_FAST
+
+LEAF_END RhpNewPtrArrayFast, _TEXT
+
+
+NESTED_ENTRY RhpNewVariableSizeObject, _TEXT
+
+        ; rcx == MethodTable
+        ; rdx == element count
+
+        PUSH_COOP_PINVOKE_FRAME r9
+
+        ; r9: transition frame
+
+        ; Preserve the MethodTable in RSI
+        mov         rsi, rcx
+
+        ; passing MethodTable in rcx
+        mov         r8, rdx         ; numElements
+        xor         rdx, rdx        ; uFlags
+        ; passing pTransitionFrame in r9
+
+        ; Call the rest of the allocation helper.
+        ; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        test        rax, rax
+        jz          RhpNewVariableSizeObject_OutOfMemory
+
+        POP_COOP_PINVOKE_FRAME
+        ret
+
+RhpNewVariableSizeObject_OutOfMemory:
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         rcx, rsi            ; MethodTable pointer
+        xor         edx, edx            ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         RhExceptionHandling_FailedAllocation
+
+NESTED_END RhpNewVariableSizeObject, _TEXT
+
+        END
diff --git a/src/coreclr/runtime/amd64/StubDispatch.S b/src/coreclr/runtime/amd64/StubDispatch.S
new file mode 100644
index 000000000000..0ae32980c5a9
--- /dev/null
+++ b/src/coreclr/runtime/amd64/StubDispatch.S
@@ -0,0 +1,96 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include "AsmMacros_Shared.h"
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+#if defined(__APPLE__)
+    // Currently the build is failing without this due to an issue if the first method in the assembly file has an alternate entry at the start of the file.
+    // Fix, but adding an empty, unused method
+    LEAF_ENTRY RhpStubDispatchDoNotFailToBuild, _TEXT
+       ret
+    LEAF_END RhpStubDispatchDoNotFailToBuild, _TEXT
+#endif
+
+
+// trick to avoid PLT relocation at runtime which corrupts registers
+#define REL_C_FUNC(name) C_FUNC(name)@gotpcrel
+
+
+// Macro that generates a stub consuming a cache with the given number of entries.
+.macro DEFINE_INTERFACE_DISPATCH_STUB entries
+
+LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT
+
+        // r11 currently contains the indirection cell address.
+        // load r10 to point to the cache block.
+        mov     r10, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        // Load the MethodTable from the object instance in rdi.
+#ifdef TARGET_APPLE
+// Apple's linker has issues which break unwind info if
+// an ALTERNATE_ENTRY is present in the middle of a function see https://github.com/dotnet/runtime/pull/114982#discussion_r2083272768
+.cfi_endproc
+#endif
+        ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries
+#ifdef TARGET_APPLE
+.cfi_startproc
+#endif
+        mov     rax, [rdi]
+
+        CurrentOffset = OFFSETOF__InterfaceDispatchCache__m_rgEntries
+
+        // For each entry in the cache, see if its MethodTable type matches the MethodTable in rax.
+        // If so, call the second cache entry.  If not, skip the InterfaceDispatchCacheEntry.
+        .rept \entries
+            cmp     rax, [r10 + CurrentOffset]
+            jne     0f
+            jmp     [r10 + CurrentOffset + 8]
+        0:
+            CurrentOffset = CurrentOffset + 16
+        .endr
+
+        // r11 still contains the indirection cell address.
+
+        jmp     C_FUNC(RhpInterfaceDispatchSlow)
+LEAF_END RhpInterfaceDispatch\entries, _TEXT
+
+.endm // DEFINE_INTERFACE_DISPATCH_STUB
+
+
+
+// Define all the stub routines we currently need.
+//
+// The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed.
+// If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo
+//
+// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the
+// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens
+// during the interface dispatch.
+//
+DEFINE_INTERFACE_DISPATCH_STUB 1
+DEFINE_INTERFACE_DISPATCH_STUB 2
+DEFINE_INTERFACE_DISPATCH_STUB 4
+DEFINE_INTERFACE_DISPATCH_STUB 8
+DEFINE_INTERFACE_DISPATCH_STUB 16
+DEFINE_INTERFACE_DISPATCH_STUB 32
+DEFINE_INTERFACE_DISPATCH_STUB 64
+
+// Initial dispatch on an interface when we don't have a cache yet.
+LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+        // Trigger an AV if we're dispatching on a null this.
+        // The exception handling infrastructure is aware of the fact that this is the first
+        // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here
+        // to a NullReferenceException at the callsite.
+        cmp     byte ptr [rdi], 0
+
+        // Just tail call to the cache miss helper.
+        jmp     C_FUNC(RhpInterfaceDispatchSlow)
+
+LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
\ No newline at end of file
diff --git a/src/coreclr/runtime/amd64/StubDispatch.asm b/src/coreclr/runtime/amd64/StubDispatch.asm
new file mode 100644
index 000000000000..1863a43c1472
--- /dev/null
+++ b/src/coreclr/runtime/amd64/StubDispatch.asm
@@ -0,0 +1,88 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros_Shared.inc
+
+
+ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+EXTERN RhpInterfaceDispatchSlow : PROC
+
+;; Macro that generates code to check a single cache entry.
+CHECK_CACHE_ENTRY macro entry
+NextLabel textequ @CatStr( Attempt, %entry+1 )
+        cmp     rax, [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16)]
+        jne     NextLabel
+        jmp     qword ptr [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16) + 8]
+NextLabel:
+endm
+
+
+;; Macro that generates a stub consuming a cache with the given number of entries.
+DEFINE_INTERFACE_DISPATCH_STUB macro entries
+
+StubName textequ @CatStr( RhpInterfaceDispatch, entries )
+StubAVLocation textequ @CatStr( RhpInterfaceDispatchAVLocation, entries )
+
+LEAF_ENTRY StubName, _TEXT
+
+;EXTERN CID_g_cInterfaceDispatches : DWORD
+        ;inc     [CID_g_cInterfaceDispatches]
+
+        ;; r11 currently contains the indirection cell address.
+        ;; load r10 to point to the cache block.
+        mov     r10, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Load the MethodTable from the object instance in rcx.
+        ALTERNATE_ENTRY StubAVLocation
+        mov     rax, [rcx]
+
+CurrentEntry = 0
+    while CurrentEntry lt entries
+        CHECK_CACHE_ENTRY %CurrentEntry
+CurrentEntry = CurrentEntry + 1
+    endm
+
+        ;; r11 still contains the indirection cell address.
+
+        jmp RhpInterfaceDispatchSlow
+
+LEAF_END StubName, _TEXT
+
+    endm ;; DEFINE_INTERFACE_DISPATCH_STUB
+
+
+;; Define all the stub routines we currently need.
+;;
+;; The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed.
+;; If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo
+;;
+;; If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the
+;; *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens
+;; during the interface dispatch.
+;;
+DEFINE_INTERFACE_DISPATCH_STUB 1
+DEFINE_INTERFACE_DISPATCH_STUB 2
+DEFINE_INTERFACE_DISPATCH_STUB 4
+DEFINE_INTERFACE_DISPATCH_STUB 8
+DEFINE_INTERFACE_DISPATCH_STUB 16
+DEFINE_INTERFACE_DISPATCH_STUB 32
+DEFINE_INTERFACE_DISPATCH_STUB 64
+
+;; Initial dispatch on an interface when we don't have a cache yet.
+LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+        ;; Trigger an AV if we're dispatching on a null this.
+        ;; The exception handling infrastructure is aware of the fact that this is the first
+        ;; instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here
+        ;; to a NullReferenceException at the callsite.
+        cmp     byte ptr [rcx], 0
+
+        ;; Just tail call to the cache miss helper.
+        jmp RhpInterfaceDispatchSlow
+
+LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+endif ;; FEATURE_CACHED_INTERFACE_DISPATCH
+
+end
diff --git a/src/coreclr/runtime/amd64/WriteBarriers.S b/src/coreclr/runtime/amd64/WriteBarriers.S
new file mode 100644
index 000000000000..c15ee30e5dff
--- /dev/null
+++ b/src/coreclr/runtime/amd64/WriteBarriers.S
@@ -0,0 +1,344 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include "AsmMacros_Shared.h"
+
+#if defined(__APPLE__)
+    // Currently the build is failing without this due to an issue if the first method in the assembly file has an alternate entry at the start of the file.
+    // Fix, but adding an empty, unused method
+    LEAF_ENTRY RhpWriteBarriersDoNotFailToBuild, _TEXT
+       ret
+    LEAF_END RhpWriteBarriersDoNotFailToBuild, _TEXT
+#endif
+
+#ifdef WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG
+
+    // If g_GCShadow is 0, don't perform the check.
+    cmp     qword ptr [C_VAR(g_GCShadow)], 0
+    je      LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG)
+
+    // Save DESTREG since we're about to modify it (and we need the original value both within the macro and
+    // once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of
+    // the prolog inside a method without a frame. But given that this is only debug code and generally we
+    // shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier
+    // variants to set up frames. The compiler knows exactly which registers are trashed in the simple write
+    // barrier case, so we don't have any more scratch registers to play with (and doing so would only make
+    // things harder if at a later stage we want to allow multiple barrier versions based on the input
+    // registers).
+    push    \DESTREG
+
+    // Transform DESTREG into the equivalent address in the shadow heap.
+    sub     \DESTREG, [C_VAR(g_lowest_address)]
+    jb      LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG)
+    add     \DESTREG, [C_VAR(g_GCShadow)]
+    cmp     \DESTREG, [C_VAR(g_GCShadowEnd)]
+    jae     LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG)
+
+    // Update the shadow heap.
+    mov     [\DESTREG], \REFREG
+
+    // Now check that the real heap location still contains the value we just wrote into the shadow heap. This
+    // read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to
+    // recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock
+    // prefix).
+    xchg    [rsp], \DESTREG
+    cmp     [\DESTREG], \REFREG
+    jne     LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG)
+
+    // The original DESTREG value is now restored but the stack has a value (the shadow version of the
+    // location) pushed. Need to discard this push before we are done.
+    add     rsp, 8
+    jmp     LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG)
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG):
+    // Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+    // guarantee whose shadow update won.
+
+    // Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an
+    // additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg
+    // variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 64-bit
+    // immediate and therefore must be moved into a register before it can be written to the shadow
+    // location.
+    xchg    [rsp], \DESTREG
+    push    \REFREG
+    movabs  \REFREG, INVALIDGCVALUE
+    mov     qword ptr [\DESTREG], \REFREG
+    pop     \REFREG
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG):
+    // Restore original DESTREG value from the stack.
+    pop     \DESTREG
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG):
+.endm
+
+#else // WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG
+.endm
+
+#endif // WRITE_BARRIER_CHECK
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+// name of the register that points to the location to be updated and the name of the register that holds the
+// object reference (this should be in upper case as it's used in the definition of the name of the helper).
+.macro DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG
+
+    // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    // we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW \BASENAME, \REFREG, rdi
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    mov     r11, [C_VAR(g_write_watch_table)]
+    cmp     r11, 0x0
+    je      LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG)
+
+    mov     r10, rdi
+    shr     r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
+    add     r10, r11
+    cmp     byte ptr [r10], 0x0
+    jne     LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG)
+    mov     byte ptr [r10], 0xFF
+#endif
+
+LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG):
+
+    // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    // (since the object won't be collected or moved by an ephemeral collection).
+    cmp     \REFREG, [C_VAR(g_ephemeral_low)]
+    jb      LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+    cmp     \REFREG, [C_VAR(g_ephemeral_high)]
+    jae     LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+
+    // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    // track this write. The location address is translated into an offset in the card table bitmap. We set
+    // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    // the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     rdi, 0x0B
+    mov     r10, [C_VAR(g_card_table)]
+    cmp     byte ptr [rdi + r10], 0x0FF
+    je      LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+
+// We get here if it's necessary to update the card table.
+    mov     byte ptr [rdi + r10], 0xFF
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    // Shift rdi by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
+    shr     rdi, 0x0A
+    add     rdi, [C_VAR(g_card_bundle_table)]
+    cmp     byte ptr [rdi], 0xFF
+    je      LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+
+    mov     byte ptr [rdi], 0xFF
+#endif
+
+LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG):
+    ret
+
+.endm
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+// name of the register that will hold the object reference (this should be in upper case as it's used in the
+// definition of the name of the helper).
+.macro DEFINE_UNCHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME
+
+// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+// location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpAssignRef\EXPORT_REG_NAME, _TEXT
+
+    // Export the canonical write barrier under unqualified name as well
+    .ifc \REFREG, RSI
+    ALTERNATE_ENTRY RhpAssignRef
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+    .endif
+
+    // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    // and the card table update we may perform below.
+    mov     qword ptr [rdi], \REFREG
+
+    DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, \REFREG
+
+LEAF_END RhpAssignRef\EXPORT_REG_NAME, _TEXT
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is RSI.
+DEFINE_UNCHECKED_WRITE_BARRIER RSI, ESI
+
+//
+// Define the helpers used to implement the write barrier required when writing an object reference into a
+// location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+// non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+// collection.
+//
+
+.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, REFREG
+
+    // The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+    // in which case no write barrier is required.
+    cmp     rdi, [C_VAR(g_lowest_address)]
+    jb      LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+    cmp     rdi, [C_VAR(g_highest_address)]
+    jae     LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+
+    DEFINE_UNCHECKED_WRITE_BARRIER_CORE \BASENAME, \REFREG
+
+.endm
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+// name of the register that will hold the object reference (this should be in upper case as it's used in the
+// definition of the name of the helper).
+.macro DEFINE_CHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME
+
+// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is always in RDI. The object reference that will be assigned into
+// that location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
+
+    // Export the canonical write barrier under unqualified name as well
+    .ifc \REFREG, RSI
+    ALTERNATE_ENTRY RhpCheckedAssignRef
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+    .endif
+
+    // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    // and the card table update we may perform below.
+    mov     qword ptr [rdi], \REFREG
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \REFREG
+
+LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is RSI.
+DEFINE_CHECKED_WRITE_BARRIER RSI, ESI
+
+LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
+    mov             rax, rdx
+    lock cmpxchg    [rdi], rsi
+    jne             LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_RSI)
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, RSI
+
+LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+LEAF_ENTRY RhpCheckedXchg, _TEXT
+
+    // Setup rax with the new object for the exchange, that way it will automatically hold the correct result
+    // afterwards and we can leave rdx unaltered ready for the GC write barrier below.
+    mov             rax, rsi
+    xchg            [rdi], rax
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RSI
+
+LEAF_END RhpCheckedXchg, _TEXT
+
+//
+// RhpByRefAssignRef simulates movs instruction for object references.
+//
+// On entry:
+//      rdi: address of ref-field (assigned to)
+//      rsi: address of the data (source)
+//
+// On exit:
+//      rdi, rsi are incremented by 8,
+//      rcx, rax: trashed
+//
+// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+//       if you add more trashed registers.
+//
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1/2
+// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
+    mov     rcx, [rsi]
+#ifdef TARGET_APPLE
+// Apple's linker has issues which break unwind info if
+// an ALTERNATE_ENTRY is present in the middle of a function see https://github.com/dotnet/runtime/pull/114982#discussion_r2083272768
+.cfi_endproc
+#endif
+ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2
+#ifdef TARGET_APPLE
+.cfi_startproc
+#endif
+    mov     [rdi], rcx
+
+    // Check whether the writes were even into the heap. If not there's no card update required.
+    cmp     rdi, [C_VAR(g_lowest_address)]
+    jb      LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+    cmp     rdi, [C_VAR(g_highest_address)]
+    jae     LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+
+    // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    // we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW BASENAME, rcx, rdi
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    cmp     qword ptr [C_VAR(g_write_watch_table)], 0x0
+    je      LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable)
+
+    mov     rax, rdi
+    shr     rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
+    add     rax, [C_VAR(g_write_watch_table)]
+    cmp     byte ptr [rax], 0x0
+    jne     LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable)
+    mov     byte ptr [rax], 0xFF
+#endif
+
+LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable):
+
+    // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    // (since the object won't be collected or moved by an ephemeral collection).
+    cmp     rcx, [C_VAR(g_ephemeral_low)]
+    jb      LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+    cmp     rcx, [C_VAR(g_ephemeral_high)]
+    jae     LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+
+    // move current rdi value into rcx, we need to keep rdi and eventually increment by 8
+    mov     rcx, rdi
+
+    // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    // track this write. The location address is translated into an offset in the card table bitmap. We set
+    // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    // the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     rcx, 0x0B
+    mov     rax, [C_VAR(g_card_table)]
+    cmp     byte ptr [rcx + rax], 0x0FF
+    je      LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+
+// We get here if it's necessary to update the card table.
+    mov     byte ptr [rcx + rax], 0xFF
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
+    shr     rcx, 0x0A
+    add     rcx, [C_VAR(g_card_bundle_table)]
+    cmp     byte ptr [rcx], 0xFF
+    je      LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+
+    mov     byte ptr [rcx], 0xFF
+#endif
+
+LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired):
+    // Increment the pointers before leaving
+    add     rdi, 0x8
+    add     rsi, 0x8
+    ret
+LEAF_END RhpByRefAssignRef, _TEXT
diff --git a/src/coreclr/runtime/amd64/WriteBarriers.asm b/src/coreclr/runtime/amd64/WriteBarriers.asm
new file mode 100644
index 000000000000..c08109d65ea6
--- /dev/null
+++ b/src/coreclr/runtime/amd64/WriteBarriers.asm
@@ -0,0 +1,346 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros_Shared.inc
+
+;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+;; during garbage collections to verify that object references where never written to the heap without using a
+;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing
+;; new references to the real heap. Since this can't be solved perfectly without critical sections around the
+;; entire update process, we instead update the shadow location and then re-check the real location (as two
+;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value
+;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+ifdef WRITE_BARRIER_CHECK
+
+g_GCShadow      TEXTEQU 
+g_GCShadowEnd   TEXTEQU 
+INVALIDGCVALUE  EQU 0CCCCCCCDh
+
+EXTERN  g_GCShadow : QWORD
+EXTERN  g_GCShadowEnd : QWORD
+
+UPDATE_GC_SHADOW macro BASENAME, REFREG, DESTREG
+
+    ;; If g_GCShadow is 0, don't perform the check.
+    cmp     g_GCShadow, 0
+    je      &BASENAME&_UpdateShadowHeap_Done_&REFREG&
+
+    ;; Save DESTREG since we're about to modify it (and we need the original value both within the macro and
+    ;; once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of
+    ;; the prolog inside a method without a frame. But given that this is only debug code and generally we
+    ;; shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier
+    ;; variants to set up frames. Unlike RhpBulkWriteBarrier below which is treated as a helper call using the
+    ;; usual calling convention, the compiler knows exactly which registers are trashed in the simple write
+    ;; barrier case, so we don't have any more scratch registers to play with (and doing so would only make
+    ;; things harder if at a later stage we want to allow multiple barrier versions based on the input
+    ;; registers).
+    push    DESTREG
+
+    ;; Transform DESTREG into the equivalent address in the shadow heap.
+    sub     DESTREG, g_lowest_address
+    jb      &BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG&
+    add     DESTREG, [g_GCShadow]
+    cmp     DESTREG, [g_GCShadowEnd]
+    jae     &BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG&
+
+    ;; Update the shadow heap.
+    mov     [DESTREG], REFREG
+
+    ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. This
+    ;; read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to
+    ;; recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock
+    ;; prefix).
+    xchg    [rsp], DESTREG
+    cmp     [DESTREG], REFREG
+    jne     &BASENAME&_UpdateShadowHeap_Invalidate_&REFREG&
+
+    ;; The original DESTREG value is now restored but the stack has a value (the shadow version of the
+    ;; location) pushed. Need to discard this push before we are done.
+    add     rsp, 8
+    jmp     &BASENAME&_UpdateShadowHeap_Done_&REFREG&
+
+&BASENAME&_UpdateShadowHeap_Invalidate_&REFREG&:
+    ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+    ;; guarantee whose shadow update won.
+
+    ;; Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an
+    ;; additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg
+    ;; variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 64-bit
+    ;; immediate and therefore must be moved into a register before it can be written to the shadow
+    ;; location.
+    xchg    [rsp], DESTREG
+    push    REFREG
+    mov     REFREG, INVALIDGCVALUE
+    mov     qword ptr [DESTREG], REFREG
+    pop     REFREG
+
+&BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG&:
+    ;; Restore original DESTREG value from the stack.
+    pop     DESTREG
+
+&BASENAME&_UpdateShadowHeap_Done_&REFREG&:
+endm
+
+else ; WRITE_BARRIER_CHECK
+
+UPDATE_GC_SHADOW macro BASENAME, REFREG, DESTREG
+endm
+
+endif ; WRITE_BARRIER_CHECK
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+;; name of the register that points to the location to be updated and the name of the register that holds the
+;; object reference (this should be in upper case as it's used in the definition of the name of the helper).
+DEFINE_UNCHECKED_WRITE_BARRIER_CORE macro BASENAME, REFREG
+
+    ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    ;; we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW BASENAME, REFREG, rcx
+
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    mov     r11, [g_write_watch_table]
+    cmp     r11, 0
+    je      &BASENAME&_CheckCardTable_&REFREG&
+
+    mov     r10, rcx
+    shr     r10, 0Ch ;; SoftwareWriteWatch::AddressToTableByteIndexShift
+    add     r10, r11
+    cmp     byte ptr [r10], 0
+    jne     &BASENAME&_CheckCardTable_&REFREG&
+    mov     byte ptr [r10], 0FFh
+endif
+
+&BASENAME&_CheckCardTable_&REFREG&:
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     REFREG, [g_ephemeral_low]
+    jb      &BASENAME&_NoBarrierRequired_&REFREG&
+    cmp     REFREG, [g_ephemeral_high]
+    jae     &BASENAME&_NoBarrierRequired_&REFREG&
+
+    ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    ;; track this write. The location address is translated into an offset in the card table bitmap. We set
+    ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     rcx, 0Bh
+    mov     r10, [g_card_table]
+    cmp     byte ptr [rcx + r10], 0FFh
+    je      &BASENAME&_NoBarrierRequired_&REFREG&
+
+    ;; We get here if it's necessary to update the card table.
+    mov     byte ptr [rcx + r10], 0FFh
+
+ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    ;; Shift rcx by 0Ah more to get the card bundle byte (we shifted by 0x0B already)
+    shr     rcx, 0Ah
+    add     rcx, [g_card_bundle_table]
+    cmp     byte ptr [rcx], 0FFh
+    je      &BASENAME&_NoBarrierRequired_&REFREG&
+
+    mov     byte ptr [rcx], 0FFh
+endif
+
+&BASENAME&_NoBarrierRequired_&REFREG&:
+    ret
+
+endm
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+;; name of the register that will hold the object reference (this should be in upper case as it's used in the
+;; definition of the name of the helper).
+DEFINE_UNCHECKED_WRITE_BARRIER macro REFREG, EXPORT_REG_NAME
+
+;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+;; location is in one of the other general registers determined by the value of REFREG.
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpAssignRef&EXPORT_REG_NAME&, _TEXT
+
+    ;; Export the canonical write barrier under unqualified name as well
+    ifidni , 
+    ALTERNATE_ENTRY RhpAssignRef
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+    endif
+
+    ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    ;; and the card table update we may perform below.
+    mov     qword ptr [rcx], REFREG
+
+    DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, REFREG
+
+LEAF_END RhpAssignRef&EXPORT_REG_NAME&, _TEXT
+endm
+
+;; One day we might have write barriers for all the possible argument registers but for now we have
+;; just one write barrier that assumes the input register is RDX.
+DEFINE_UNCHECKED_WRITE_BARRIER RDX, EDX
+
+;;
+;; Define the helpers used to implement the write barrier required when writing an object reference into a
+;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+;; collection.
+;;
+
+DEFINE_CHECKED_WRITE_BARRIER_CORE macro BASENAME, REFREG
+
+    ;; The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+    ;; in which case no write barrier is required.
+    cmp     rcx, [g_lowest_address]
+    jb      &BASENAME&_NoBarrierRequired_&REFREG&
+    cmp     rcx, [g_highest_address]
+    jae     &BASENAME&_NoBarrierRequired_&REFREG&
+
+    DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG
+
+endm
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+;; name of the register that will hold the object reference (this should be in upper case as it's used in the
+;; definition of the name of the helper).
+DEFINE_CHECKED_WRITE_BARRIER macro REFREG, EXPORT_REG_NAME
+
+;; Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+;; decoration). The location to be updated is always in RCX. The object reference that will be assigned into
+;; that location is in one of the other general registers determined by the value of REFREG.
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedAssignRef&EXPORT_REG_NAME&, _TEXT
+
+    ;; Export the canonical write barrier under unqualified name as well
+    ifidni , 
+    ALTERNATE_ENTRY RhpCheckedAssignRef
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+    endif
+
+    ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    ;; and the card table update we may perform below.
+    mov     qword ptr [rcx], REFREG
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, REFREG
+
+LEAF_END RhpCheckedAssignRef&EXPORT_REG_NAME&, _TEXT
+endm
+
+;; One day we might have write barriers for all the possible argument registers but for now we have
+;; just one write barrier that assumes the input register is RDX.
+DEFINE_CHECKED_WRITE_BARRIER RDX, EDX
+
+LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
+    mov             rax, r8
+    lock cmpxchg    [rcx], rdx
+    jne             RhpCheckedLockCmpXchg_NoBarrierRequired_RDX
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, RDX
+
+LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+LEAF_ENTRY RhpCheckedXchg, _TEXT
+
+    ;; Setup rax with the new object for the exchange, that way it will automatically hold the correct result
+    ;; afterwards and we can leave rdx unaltered ready for the GC write barrier below.
+    mov             rax, rdx
+    xchg            [rcx], rax
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RDX
+
+LEAF_END RhpCheckedXchg, _TEXT
+
+;;
+;; RhpByRefAssignRef simulates movs instruction for object references.
+;;
+;; On entry:
+;;      rdi: address of ref-field (assigned to)
+;;      rsi: address of the data (source)
+;;
+;; On exit:
+;;      rdi, rsi are incremented by 8,
+;;      rcx, rax: trashed
+;;
+;; NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+;;       if you add more trashed registers.
+;;
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1/2
+;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
+    mov     rcx, [rsi]
+ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2
+    mov     [rdi], rcx
+
+    ;; Check whether the writes were even into the heap. If not there's no card update required.
+    cmp     rdi, [g_lowest_address]
+    jb      RhpByRefAssignRef_NoBarrierRequired
+    cmp     rdi, [g_highest_address]
+    jae     RhpByRefAssignRef_NoBarrierRequired
+
+    ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    ;; we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW BASENAME, rcx, rdi
+
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    cmp     [g_write_watch_table], 0
+    je      RhpByRefAssignRef_CheckCardTable
+
+    mov     rax, rdi
+    shr     rax, 0Ch ;; SoftwareWriteWatch::AddressToTableByteIndexShift
+    add     rax, [g_write_watch_table]
+    cmp     byte ptr [rax], 0
+    jne     RhpByRefAssignRef_CheckCardTable
+    mov     byte ptr [rax], 0FFh
+endif
+
+RhpByRefAssignRef_CheckCardTable:
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     rcx, [g_ephemeral_low]
+    jb      RhpByRefAssignRef_NoBarrierRequired
+    cmp     rcx, [g_ephemeral_high]
+    jae     RhpByRefAssignRef_NoBarrierRequired
+
+    ;; move current rdi value into rcx, we need to keep rdi and eventually increment by 8
+    mov     rcx, rdi
+
+    ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    ;; track this write. The location address is translated into an offset in the card table bitmap. We set
+    ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     rcx, 0Bh
+    mov     rax, [g_card_table]
+    cmp     byte ptr [rcx + rax], 0FFh
+    je      RhpByRefAssignRef_NoBarrierRequired
+
+;; We get here if it's necessary to update the card table.
+    mov     byte ptr [rcx + rax], 0FFh
+
+ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    ;; Shift rcx by 0Ah more to get the card bundle byte (we shifted by 0Bh already)
+    shr     rcx, 0Ah
+    add     rcx, [g_card_bundle_table]
+    cmp     byte ptr [rcx], 0FFh
+    je      RhpByRefAssignRef_NoBarrierRequired
+
+    mov     byte ptr [rcx], 0FFh
+endif
+
+RhpByRefAssignRef_NoBarrierRequired:
+    ;; Increment the pointers before leaving
+    add     rdi, 8h
+    add     rsi, 8h
+    ret
+LEAF_END RhpByRefAssignRef, _TEXT
+
+    end
diff --git a/src/coreclr/runtime/arm/AllocFast.S b/src/coreclr/runtime/arm/AllocFast.S
new file mode 100644
index 000000000000..e57679edf72f
--- /dev/null
+++ b/src/coreclr/runtime/arm/AllocFast.S
@@ -0,0 +1,455 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include "AsmMacros_Shared.h"
+
+// Shared code for RhpNewFast, RhpNewFastAlign8 and RhpNewFastMisalign
+//  r0 == MethodTable
+.macro NEW_FAST Variation
+        PROLOG_PUSH "{r4,lr}"
+        mov	    r4, r0 // save MethodTable
+
+        // r0 = ee_alloc_context pointer; trashes volatile registers, expects saved lr
+        INLINE_GET_ALLOC_CONTEXT_BASE
+
+        ldr         r2, [r4, #OFFSETOF__MethodTable__m_uBaseSize]
+
+        // Load potential new object address into r3.
+        ldr         r3, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        // Load and calculate the maximum size of object we can fit.
+        ldr         r1, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)]
+        sub         r1, r3
+
+        // When doing aligned or misaligned allocation we first check
+        // the alignment and skip to the regular path if it's already
+        // matching the expectation.
+        // Otherwise, we try to allocate size + ASM_MIN_OBJECT_SIZE and
+        // then prepend a dummy free object at the beginning of the
+        // allocation.
+.ifnc \Variation,
+        tst         r3, #0x7
+.ifc \Variation,Align8
+        beq         1f // AlreadyAligned
+.else // Variation == "Misalign"
+        bne         1f // AlreadyAligned
+.endif
+
+        add         r2, ASM_MIN_OBJECT_SIZE
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         r2, r1
+        bhi         2f // AllocFailed
+
+        // Update the alloc pointer to account for the allocation.
+        add         r2, r3
+        str         r2, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        // Initialize the padding object preceeding the new object.
+        PREPARE_EXTERNAL_VAR_INDIRECT G_FREE_OBJECT_METHOD_TABLE, r2
+        str         r2, [r3, #OFFSETOF__Object__m_pEEType]
+        mov         r2, #0
+        str         r2, [r3, #OFFSETOF__Array__m_Length]
+
+        // Calculate the new object pointer and initialize it.
+        add         r3, ASM_MIN_OBJECT_SIZE
+        str         r4, [r3, #OFFSETOF__Object__m_pEEType]
+
+        // Return the object allocated in r0.
+        mov         r0, r3
+
+        EPILOG_POP  "{r4,pc}"
+.endif // Variation != ""
+
+1: // AlreadyAligned
+
+        // r0: ee_alloc_context pointer
+        // r1: ee_alloc_context.combined_limit
+        // r2: base size
+        // r3: ee_alloc_context.alloc_ptr
+        // r4: MethodTable pointer
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         r2, r1
+        bhi         2f // AllocFailed
+
+        // Calculate the new alloc pointer to account for the allocation.
+        add         r2, r3
+
+        // Set the new object's MethodTable pointer.
+        str         r4, [r3, #OFFSETOF__Object__m_pEEType]
+
+        // Update the alloc pointer to the newly calculated one.
+        str         r2, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        // Return the object allocated in r0.
+        mov         r0, r3
+
+        EPILOG_POP  "{r4,pc}"
+
+2: // AllocFailed
+        mov         r0, r4            // restore MethodTable
+.ifc \Variation,
+        mov         r1, #0
+.else
+.ifc \Variation,Align8
+        mov         r1, #GC_ALLOC_ALIGN8
+.else
+        mov         r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS)
+.endif
+.endif
+        EPILOG_POP  "{r4,lr}"
+        b           C_FUNC(RhpNewObject)
+.endm
+
+
+// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+// allocation context then automatically fallback to the slow allocation path.
+//  r0 == MethodTable
+LEAF_ENTRY RhpNewFast, _TEXT
+        NEW_FAST
+LEAF_END RhpNewFast, _TEXT
+
+
+// Allocate simple object (not finalizable, array or value type) on an 8 byte boundary.
+//  r0 == MethodTable
+LEAF_ENTRY RhpNewFastAlign8, _TEXT
+        NEW_FAST Align8
+LEAF_END RhpNewFastAlign8, _TEXT
+
+
+// Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload
+// itself is 8 byte aligned).
+//  r0 == MethodTable
+LEAF_ENTRY RhpNewFastMisalign, _TEXT
+        NEW_FAST Misalign
+LEAF_END RhpNewFastMisalign, _TEXT
+
+
+// Allocate non-array object with finalizer.
+//  r0 == MethodTable
+LEAF_ENTRY RhpNewFinalizable, _TEXT
+        mov         r1, #GC_ALLOC_FINALIZE
+        b           C_FUNC(RhpNewObject)
+LEAF_END RhpNewFinalizable, _TEXT
+
+
+// Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary.
+//  r0 == MethodTable
+LEAF_ENTRY RhpNewFinalizableAlign8, _TEXT
+        mov         r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8)
+        b           C_FUNC(RhpNewObject)
+LEAF_END RhpNewFinalizableAlign8, _TEXT
+
+
+// Allocate non-array object.
+//  r0 == MethodTable
+//  r1 == alloc flags
+NESTED_ENTRY RhpNewObject, _TEXT, NoHandler
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        // r0: MethodTable
+        // r1: alloc flags
+        // r3: transition frame
+
+        // Preserve the MethodTable in r5.
+        mov         r5, r0
+
+        mov         r2, #0              // numElements
+
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        blx         C_FUNC(RhpGcAlloc)
+
+        cbz         r0, LOCAL_LABEL(NewOutOfMemory)
+
+        POP_COOP_PINVOKE_FRAME
+        bx          lr
+
+LOCAL_LABEL(NewOutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         r0, r5            // MethodTable pointer
+        mov         r1, #0            // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+NESTED_END RhpNewObject, _TEXT
+
+// Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArray
+//  r0 == MethodTable
+//  r1 == character/element count
+//  r2 == string/array size
+.macro NEW_ARRAY_FAST_PROLOG
+        PROLOG_PUSH "{r4-r6,lr}"
+.endm
+
+.macro NEW_ARRAY_FAST_TAIL_EPILOG
+        EPILOG_POP  "{r4-r6,lr}"
+.endm
+
+.macro NEW_ARRAY_FAST
+        mov         r4, r0 // Save MethodTable
+        mov         r5, r1 // Save element count
+        mov         r6, r2 // Save string/array size
+
+        // r0 = ee_alloc_context pointer; trashes volatile registers, expects saved lr
+        INLINE_GET_ALLOC_CONTEXT_BASE
+
+        // r4 == MethodTable
+        // r5 == element count
+        // r6 == string/array size
+        // r0 == ee_alloc_context*
+
+        // Load potential new object address into r3.
+        ldr         r3, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        // Load and calculate the maximum size of object we can fit
+        ldr         r1, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)]
+        sub         r1, r3
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         r6, r1
+        bhi         1f
+
+        // Calculate the alloc pointer to account for the allocation.
+        add         r6, r3
+
+        // Set the new object's MethodTable pointer and element count.
+        str         r4, [r3, #OFFSETOF__Object__m_pEEType]
+        str         r5, [r3, #OFFSETOF__Array__m_Length]
+
+        // Update the alloc pointer to the newly calculated one.
+        str         r6, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        // Return the object allocated in r0.
+        mov         r0, r3
+        EPILOG_POP  "{r4-r6,pc}"
+
+1:
+        mov         r0, r4
+        mov         r1, r5
+        // r0 == MethodTable
+        // r1 == element count
+        NEW_ARRAY_FAST_TAIL_EPILOG
+        b           C_FUNC(RhpNewVariableSizeObject)
+.endm
+
+
+// Allocate a string.
+//  r0 == MethodTable
+//  r1 == element/character count
+LEAF_ENTRY RhNewString, _TEXT
+        NEW_ARRAY_FAST_PROLOG
+
+        // Make sure computing the overall allocation size won't overflow
+        MOV32       r12, MAX_STRING_LENGTH
+        cmp         r1, r12
+        bhi         LOCAL_LABEL(StringSizeOverflow)
+
+        // Compute overall allocation size (align(base size + (element size * elements), 4)).
+        mov         r2, #(STRING_BASE_SIZE + 3)
+#if STRING_COMPONENT_SIZE == 2
+        add         r2, r2, r1, lsl #1                  // r2 += characters * 2
+#else
+        NotImplementedComponentSize
+#endif
+        bic         r2, r2, #3
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(StringSizeOverflow):
+        // We get here if the size of the final string object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an OOM exception that the caller of this allocator understands.
+
+        // MethodTable is in r0 already
+        mov         r1, 0                  // Indicate that we should throw OOM
+        NEW_ARRAY_FAST_TAIL_EPILOG
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+LEAF_END RhNewString, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY).
+//  r0 == MethodTable
+//  r1 == element count
+LEAF_ENTRY RhpNewArrayFast, _TEXT
+        NEW_ARRAY_FAST_PROLOG
+
+        // Compute overall allocation size (align(base size + (element size * elements), 4)).
+        // if the element count is <= 0x10000, no overflow is possible because the component
+        // size is <= 0xffff (it's an unsigned 16-bit value) and thus the product is <= 0xffff0000
+        // and the base size for the worst case (32 dimensional MdArray) is less than 0xffff.
+        ldrh        r2, [r0, #OFFSETOF__MethodTable__m_usComponentSize]
+        cmp         r1, #0x10000
+        bhi         LOCAL_LABEL(ArraySizeBig)
+        umull       r2, r3, r2, r1
+        adds        r2, #(SZARRAY_BASE_SIZE + 3)
+LOCAL_LABEL(ArrayAlignSize):
+        bic         r2, r2, #3
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(ArraySizeBig):
+        // if the element count is negative, it's an overflow error
+        cmp         r1, #0
+        blt         LOCAL_LABEL(ArraySizeOverflow)
+
+        // now we know the element count is in the signed int range [0..0x7fffffff]
+        // overflow in computing the total size of the array size gives an out of memory exception,
+        // NOT an overflow exception
+        // we already have the component size in r2
+        umull       r2, r3, r2, r1
+        cbnz        r3, LOCAL_LABEL(ArrayOutOfMemoryFinal)
+        ldr         r3, [r0, #OFFSETOF__MethodTable__m_uBaseSize]
+        adds        r2, r3
+        bcs         LOCAL_LABEL(ArrayOutOfMemoryFinal)
+        adds        r2, #3
+        bcs         LOCAL_LABEL(ArrayOutOfMemoryFinal)
+        b           LOCAL_LABEL(ArrayAlignSize)
+
+LOCAL_LABEL(ArrayOutOfMemoryFinal):
+
+        // MethodTable is in r0 already
+        mov         r1, #0                  // Indicate that we should throw OOM.
+        NEW_ARRAY_FAST_TAIL_EPILOG
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+LOCAL_LABEL(ArraySizeOverflow):
+        // We get here if the size of the final array object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an overflow exception that the caller of this allocator understands.
+
+        // MethodTable is in r0 already
+        mov         r1, #1                  // Indicate that we should throw OverflowException
+        NEW_ARRAY_FAST_TAIL_EPILOG
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+LEAF_END RhpNewArrayFast, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+//  r0 == MethodTable
+//  r1 == element count
+LEAF_ENTRY RhpNewPtrArrayFast, _TEXT
+        NEW_ARRAY_FAST_PROLOG
+
+        // Delegate overflow handling to the generic helper conservatively
+
+        mov         r2, #1 << 28 // 0x40000000 / sizeof(void*)
+        cmp         r1, r2
+        bhs         LOCAL_LABEL(RhpNewPtrArrayFast_RarePath)
+
+        mov         r2, #SZARRAY_BASE_SIZE
+        add         r2, r2, r1, lsl #2
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(RhpNewPtrArrayFast_RarePath):
+        NEW_ARRAY_FAST_TAIL_EPILOG
+        b           C_FUNC(RhpNewArrayFast)
+LEAF_END RhpNewPtrArrayFast, _TEXT
+
+
+// Allocate variable sized object (eg. array, string) using the slow path that calls a runtime helper.
+//  r0 == MethodTable
+//  r1 == element count
+NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        // Preserve the MethodTable in r5.
+        mov         r5, r0
+
+        mov         r2, r1          // numElements
+        mov         r1, #0          // uFlags
+
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        blx         C_FUNC(RhpGcAlloc)
+
+        // Test for failure (NULL return).
+        cbz         r0, LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory)
+
+        POP_COOP_PINVOKE_FRAME
+        bx          lr
+
+LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory):
+
+        mov         r0, r5       // MethodTable
+        mov         r1, #0       // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewVariableSizeObject, _TEXT
+
+
+// Allocate an array on an 8 byte boundary.
+//  r0 == MethodTable
+//  r1 == element count
+NESTED_ENTRY RhpNewArrayFastAlign8, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        // Compute overall allocation size (base size + align((element size * elements), 4)).
+        ldrh        r2, [r0, #OFFSETOF__MethodTable__m_usComponentSize]
+        umull       r2, r4, r2, r1
+        cbnz        r4, LOCAL_LABEL(Array8SizeOverflow)
+        adds        r2, #3
+        bcs         LOCAL_LABEL(Array8SizeOverflow)
+        bic         r2, r2, #3
+        ldr         r4, [r0, #OFFSETOF__MethodTable__m_uBaseSize]
+        adds        r2, r4
+        bcs         LOCAL_LABEL(Array8SizeOverflow)
+
+        // Preserve the MethodTable in r5.
+        mov         r5, r0
+
+        mov         r2, r1                  // numElements
+        mov         r1, #GC_ALLOC_ALIGN8    // uFlags
+
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        blx         C_FUNC(RhpGcAlloc)
+
+        // Test for failure (NULL return).
+        cbz         r0, LOCAL_LABEL(Array8OutOfMemory)
+
+        POP_COOP_PINVOKE_FRAME
+
+        bx          lr
+
+LOCAL_LABEL(Array8SizeOverflow):
+        // We get here if the size of the final array object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an OOM or overflow exception that the caller of this allocator understands.
+
+        // if the element count is non-negative, it's an OOM error
+        cmp         r1, #0
+        bge         LOCAL_LABEL(Array8OutOfMemory1)
+
+        // r0 holds MethodTable pointer already
+        mov         r1, #1              // Indicate that we should throw OverflowException
+
+        POP_COOP_PINVOKE_FRAME
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+LOCAL_LABEL(Array8OutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         r0, r5              // MethodTable pointer
+
+LOCAL_LABEL(Array8OutOfMemory1):
+
+        mov         r1, #0              // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewArrayFastAlign8, _TEXT
diff --git a/src/coreclr/runtime/arm/StubDispatch.S b/src/coreclr/runtime/arm/StubDispatch.S
new file mode 100644
index 000000000000..3001b67453dd
--- /dev/null
+++ b/src/coreclr/runtime/arm/StubDispatch.S
@@ -0,0 +1,107 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include "AsmMacros_Shared.h"
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+// Macro that generates a stub consuming a cache with the given number of entries.
+.macro DEFINE_INTERFACE_DISPATCH_STUB entries
+
+NESTED_ENTRY RhpInterfaceDispatch\entries, _TEXT, NoHandler
+        // r12 currently contains the indirection cell address. But we need more scratch registers and
+        // we may A/V on a null this. Store r1 and r2 in red zone.
+        str         r1, [sp, #-8]
+        str         r2, [sp, #-4]
+
+        // r12 currently holds the indirection cell address. We need to get the cache structure instead.
+        ldr         r2, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        // Load the MethodTable from the object instance in r0.
+        GLOBAL_LABEL RhpInterfaceDispatchAVLocation\entries
+        ldr         r1, [r0]
+
+        CurrentOffset = OFFSETOF__InterfaceDispatchCache__m_rgEntries
+        // For each entry in the cache, see if its MethodTable type matches the MethodTable in r1.
+        // If so, call the second cache entry.  If not, skip the InterfaceDispatchCacheEntry.
+        //  R1 : Instance MethodTable*
+        //  R2: Cache data structure
+        //  R12 : Trashed. On successful check, set to the target address to jump to.
+        .rept \entries
+              ldr        r12, [r2, #CurrentOffset]
+              cmp        r1, r12
+              bne        0f
+              ldr        r12, [r2, #(CurrentOffset + 4)]
+              b          LOCAL_LABEL(99_\entries)
+        0:
+              CurrentOffset = CurrentOffset + 8
+        .endr
+
+        // Point r12 to the indirection cell using the back pointer in the cache block
+        ldr         r12, [r2, #OFFSETOF__InterfaceDispatchCache__m_pCell]
+
+        ldr         r1, [sp, #-8]
+        ldr         r2, [sp, #-4]
+        b           C_FUNC(RhpInterfaceDispatchSlow)
+
+        // Common epilog for cache hits. Have to out of line it here due to limitation on the number of
+        // epilogs imposed by the unwind code macros.
+LOCAL_LABEL(99_\entries):
+        // R2 contains address of the cache block. We store it in the red zone in case the target we jump
+        // to needs it.
+        // R12 contains the target address to jump to
+        ldr         r1, [sp, #-8]
+        // We have to store R2 with address of the cache block into red zone before restoring original r2.
+        str         r2, [sp, #-8]
+        ldr         r2, [sp, #-4]
+        EPILOG_BRANCH_REG r12
+
+NESTED_END RhpInterfaceDispatch\entries, _TEXT
+
+.endm // DEFINE_INTERFACE_DISPATCH_STUB
+
+// Define all the stub routines we currently need.
+//
+// The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed.
+// If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo
+//
+DEFINE_INTERFACE_DISPATCH_STUB 1
+DEFINE_INTERFACE_DISPATCH_STUB 2
+DEFINE_INTERFACE_DISPATCH_STUB 4
+DEFINE_INTERFACE_DISPATCH_STUB 8
+DEFINE_INTERFACE_DISPATCH_STUB 16
+DEFINE_INTERFACE_DISPATCH_STUB 32
+DEFINE_INTERFACE_DISPATCH_STUB 64
+
+// Initial dispatch on an interface when we don't have a cache yet.
+LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+        // Just tail call to the cache miss helper.
+        b           C_FUNC(RhpInterfaceDispatchSlow)
+LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+// No as alternate entry due to missed thumb bit in this case
+// See https://github.com/dotnet/runtime/issues/8608
+LEAF_ENTRY RhpInitialDynamicInterfaceDispatch, _TEXT
+        // Just tail call to the cache miss helper.
+        b           C_FUNC(RhpInterfaceDispatchSlow)
+LEAF_END RhpInitialDynamicInterfaceDispatch, _TEXT
+
+// Cache miss case, call the runtime to resolve the target and update the cache.
+// Use universal transition helper to allow an exception to flow out of resolution
+LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
+        // r12 has the interface dispatch cell address in it.
+        // The calling convention of the universal thunk is that the parameter
+        // for the universal thunk target is to be placed in sp-8
+        // and the universal thunk target address is to be placed in sp-4
+        str         r12, [sp, #-8]
+        PREPARE_EXTERNAL_VAR RhpCidResolve, r12
+        str         r12, [sp, #-4]
+
+        // jump to universal transition thunk
+        b           C_FUNC(RhpUniversalTransition_DebugStepTailCall)
+LEAF_END RhpInterfaceDispatchSlow, _TEXT
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/runtime/arm/WriteBarriers.S b/src/coreclr/runtime/arm/WriteBarriers.S
new file mode 100644
index 000000000000..30afebc760d0
--- /dev/null
+++ b/src/coreclr/runtime/arm/WriteBarriers.S
@@ -0,0 +1,329 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include "AsmMacros_Shared.h"
+
+#ifdef WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG
+        // Todo: implement, debugging helper
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG):
+
+.endm
+
+#else  // WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG
+.endm
+
+#endif // WRITE_BARRIER_CHECK
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+.macro UPDATE_WRITE_WATCH_TABLE ptrReg, tmpReg, __wbScratch
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, \__wbScratch
+        cbz \__wbScratch, 2f
+        add \__wbScratch, \__wbScratch, \ptrReg, lsr #0xc  // SoftwareWriteWatch::AddressToTableByteIndexShift
+
+        ldrb \tmpReg, [\__wbScratch]
+        cmp \tmpReg, #0xff
+        itt ne
+        movne \tmpReg, 0xff
+        strbne \tmpReg, [\__wbScratch]
+
+2:
+.endm
+#else
+.macro UPDATE_WRITE_WATCH_TABLE ptrReg, tmpReg, __wbScratch
+.endm
+#endif
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+// name of the register that points to the location to be updated and the name of the register that holds the
+// object reference (this should be in upper case as it's used in the definition of the name of the helper).
+.macro DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG, TMPREG
+
+          // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+          // we're in a debug build and write barrier checking has been enabled).
+          UPDATE_GC_SHADOW \BASENAME, \REFREG, r0
+
+          UPDATE_WRITE_WATCH_TABLE r0, r12, \TMPREG
+
+          // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+          // (since the object won't be collected or moved by an ephemeral collection).
+          PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, r12
+          cmp          \REFREG, r12
+          blo          LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG)
+
+          PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, r12
+          cmp          \REFREG, r12
+          bhs          LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG)
+
+
+          // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+          // track this write. The location address is translated into an offset in the card table bitmap. We set
+          // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+          // the byte if it hasn't already been done since writes are expensive and impact scaling.
+          PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, r12
+          add          r0, r12, r0, lsr #10
+          ldrb         r12, [r0]
+          cmp          r12, #0x0FF
+          bne          LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG)
+
+LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG):
+          b            LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG)
+
+// We get here if it's necessary to update the card table.
+LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG):
+          mov          r12, #0x0FF
+          strb         r12, [r0]
+
+LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG):
+
+.endm
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+// name of the register that will hold the object reference (this should be in upper case as it's used in the
+// definition of the name of the helper).
+.macro DEFINE_UNCHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME
+
+// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+// location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at WriteBarrierFunctionAvLOC
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
+LEAF_ENTRY RhpAssignRef\EXPORT_REG_NAME, _TEXT
+
+// Export the canonical write barrier under unqualified name as well
+.ifc \REFREG, r1
+ALTERNATE_ENTRY RhpAssignRef
+.endif
+
+          // Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The
+          // intent is that writes to the target object ($REFREG) will be visible across all CPUs before the
+          // write to the destination ($DESTREG). This covers most of the common scenarios where the programmer
+          // might assume strongly ordered accessess, namely where the preceding writes are used to initialize
+          // the object and the final write, made by this barrier in the instruction following the DMB,
+          // publishes that object for other threads/cpus to see.
+          //
+          // Note that none of this is relevant for single cpu machines. We may choose to implement a
+          // uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again.
+          dmb
+
+          // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+          // and the card table update we may perform below.
+GLOBAL_LABEL "RhpAssignRefAvLocation"\EXPORT_REG_NAME  // WriteBarrierFunctionAvLocation
+.ifc \REFREG, r1
+GLOBAL_LABEL RhpAssignRefAVLocation
+.endif
+          str          \REFREG, [r0]
+
+          DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, \REFREG, r3
+
+          bx           lr
+LEAF_END RhpAssignRef\EXPORT_REG_NAME, _TEXT
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is RSI.
+DEFINE_UNCHECKED_WRITE_BARRIER r1, r1
+
+//
+// Define the helpers used to implement the write barrier required when writing an object reference into a
+// location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+// non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+// collection.
+//
+
+.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, REFREG, TMPREG
+
+          // The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+          // in which case no write barrier is required.
+          PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, r12
+          cmp          r0, r12
+          blo          LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+          PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, r12
+          cmp          r0, r12
+          bhs          LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+
+          DEFINE_UNCHECKED_WRITE_BARRIER_CORE \BASENAME, \REFREG, \TMPREG
+
+.endm
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+// name of the register that will hold the object reference (this should be in upper case as it's used in the
+// definition of the name of the helper).
+.macro DEFINE_CHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME
+
+// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is always in R0. The object reference that will be assigned into
+// that location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
+LEAF_ENTRY RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
+
+// Export the canonical write barrier under unqualified name as well
+.ifc \REFREG, r1
+ALTERNATE_ENTRY RhpCheckedAssignRef
+.endif
+
+          // Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The
+          // intent is that writes to the target object ($REFREG) will be visible across all CPUs before the
+          // write to the destination ($DESTREG). This covers most of the common scenarios where the programmer
+          // might assume strongly ordered accessess, namely where the preceding writes are used to initialize
+          // the object and the final write, made by this barrier in the instruction following the DMB,
+          // publishes that object for other threads/cpus to see.
+          //
+          // Note that none of this is relevant for single cpu machines. We may choose to implement a
+          // uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again.
+          dmb
+          // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+          // and the card table update we may perform below.
+GLOBAL_LABEL "RhpCheckedAssignRefAvLocation"\EXPORT_REG_NAME // WriteBarrierFunctionAvLocation
+.ifc \REFREG, r1
+GLOBAL_LABEL RhpCheckedAssignRefAVLocation
+.endif
+          str          \REFREG, [r0]
+
+          DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \REFREG, r3
+
+          bx           lr
+LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is RSI.
+DEFINE_CHECKED_WRITE_BARRIER r1, r1
+
+#ifdef FEATURE_NATIVEAOT
+
+// r0 = destination address
+// r1 = value
+// r2 = comparand
+LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
+          // To implement our chosen memory model for ARM we insert a memory barrier at GC write brriers. This
+          // barrier must occur before the object reference update, so we have to do it unconditionally even
+          // though the update may fail below.
+          dmb
+LOCAL_LABEL(RhpCheckedLockCmpXchgRetry):
+          ldrex        r3, [r0]
+          cmp          r2, r3
+          bne          LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_r1)
+          strex        r3, r1, [r0]
+          cmp          r3, #0
+          bne          LOCAL_LABEL(RhpCheckedLockCmpXchgRetry)
+          mov          r3, r2
+
+          DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, r1, r2
+
+          mov          r0, r3
+          bx           lr
+LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+// r0 = destination address
+// r1 = value
+LEAF_ENTRY RhpCheckedXchg, _TEXT
+          // To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This
+          // barrier must occur before the object reference update.
+          dmb
+LOCAL_LABEL(RhpCheckedXchgRetry):
+          ldrex        r2, [r0]
+          strex        r3, r1, [r0]
+          cmp          r3, #0
+          bne          LOCAL_LABEL(RhpCheckedXchgRetry)
+
+          DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, r1, r3
+
+          // The original value is currently in r2. We need to return it in r0.
+          mov          r0, r2
+
+          bx           lr
+LEAF_END RhpCheckedXchg, _TEXT
+#endif // FEATURE_NATIVEAOT
+
+//
+// RhpByRefAssignRef simulates movs instruction for object references.
+//
+// On entry:
+//      r0: address of ref-field (assigned to)
+//      r1: address of the data (source)
+//      r2, r3: be trashed
+//
+// On exit:
+//      r0, r1 are incremented by 4,
+//      r2, r3: trashed
+//
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1/2
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+          // See comment in RhpAssignRef
+          dmb
+
+GLOBAL_LABEL RhpByRefAssignRefAVLocation1
+          ldr          r2, [r1]
+GLOBAL_LABEL RhpByRefAssignRefAVLocation2
+          str          r2, [r0]
+
+          // Check whether the writes were even into the heap. If not there's no card update required.
+          PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, r3
+          cmp          r0, r3
+          blo          LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+          PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, r3
+          cmp          r0, r3
+          bhs          LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+
+          // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+          // we're in a debug build and write barrier checking has been enabled).
+          UPDATE_GC_SHADOW BASENAME, r2, r0
+
+          UPDATE_WRITE_WATCH_TABLE r0, r12, r3
+
+          // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+          // (since the object won't be collected or moved by an ephemeral collection).
+          PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, r3
+          cmp          r2, r3
+          blo          LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+          PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, r3
+          cmp          r2, r3
+          bhs          LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+
+          // move current r0 value into r2 and then increment the pointers
+          mov          r2, r0
+          add          r1, #4
+          add          r0, #4
+
+          // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+          // track this write. The location address is translated into an offset in the card table bitmap. We set
+          // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+          // the byte if it hasn't already been done since writes are expensive and impact scaling.
+          PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, r3
+          add           r2, r3, r2, lsr #10
+          ldrb          r3, [r2]
+          cmp           r3, #0x0FF
+          bne           LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable)
+          bx            lr
+
+// We get here if it's necessary to update the card table.
+LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable):
+          mov           r3, #0x0FF
+          strb          r3, [r2]
+          bx            lr
+
+LOCAL_LABEL(RhpByRefAssignRef_NotInHeap):
+          // Increment the pointers before leaving
+          add           r0, #4
+          add           r1, #4
+          bx            lr
+LEAF_END RhpByRefAssignRef, _TEXT
diff --git a/src/coreclr/runtime/arm64/AllocFast.S b/src/coreclr/runtime/arm64/AllocFast.S
new file mode 100644
index 000000000000..33e8c83ae3ef
--- /dev/null
+++ b/src/coreclr/runtime/arm64/AllocFast.S
@@ -0,0 +1,282 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+// allocation context then automatically fallback to the slow allocation path.
+//  x0 == MethodTable
+    LEAF_ENTRY RhpNewFast, _TEXT
+
+        // x3 = ee_alloc_context pointer
+        INLINE_GET_ALLOC_CONTEXT_BASE x3
+
+        //
+        // x0 contains MethodTable pointer
+        //
+        ldr         w2, [x0, #OFFSETOF__MethodTable__m_uBaseSize]
+
+        //
+        // x0: MethodTable pointer
+        // x2: base size
+        // x3: ee_alloc_context pointer
+        //
+
+        // Load potential new object address into x12.
+        ldr         x12, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        // Load and calculate the maximum size of object we can fit.
+        ldr         x13, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)]
+        sub         x13, x13, x12
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         x2, x13
+        bhi         LOCAL_LABEL(RhpNewFast_RarePath)
+
+        // Calculate the new alloc pointer to account for the allocation.
+        add         x2, x2, x12
+
+        // Set the new object's MethodTable pointer.
+        str         x0, [x12, #OFFSETOF__Object__m_pEEType]
+
+        // Update the alloc pointer to the newly calculated one.
+        str         x2, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        mov         x0, x12
+        ret
+
+LOCAL_LABEL(RhpNewFast_RarePath):
+        mov         x1, #0
+        b           C_FUNC(RhpNewObject)
+
+    LEAF_END RhpNewFast, _TEXT
+
+
+// Allocate non-array object with finalizer.
+//  x0 == MethodTable
+    LEAF_ENTRY RhpNewFinalizable, _TEXT
+        mov         x1, #GC_ALLOC_FINALIZE
+        b           C_FUNC(RhpNewObject)
+    LEAF_END RhpNewFinalizable, _TEXT
+
+
+// Allocate non-array object.
+//  x0 == MethodTable
+//  x1 == alloc flags
+    NESTED_ENTRY RhpNewObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME x3
+
+        // x3: transition frame
+
+        // Preserve the MethodTable in x19
+        mov         x19, x0
+
+        mov         w2, 0               // numElements
+
+        // Call the rest of the allocation helper.
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        bl          C_FUNC(RhpGcAlloc)
+
+        // Set the new object's MethodTable pointer on success.
+        cbz         x0, LOCAL_LABEL(NewOutOfMemory)
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+        .cfi_restore_state
+LOCAL_LABEL(NewOutOfMemory):
+        // This is the OOM failure path. We are going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         x0, x19            // MethodTable pointer
+        mov         x1, 0              // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        b C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    NESTED_END RhpNewObject, _TEXT
+
+
+// Shared code for RhNewString, RhpNewArrayFast and RhpNewPtrArrayFast
+//  x0 == MethodTable
+//  x1 == character/element count
+//  x2 == string/array size
+    .macro NEW_ARRAY_FAST
+
+        INLINE_GET_ALLOC_CONTEXT_BASE x3
+
+        // Load potential new object address into x12.
+        ldr         x12, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        // Load and calculate the maximum size of object we can fit.
+        ldr         x13, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)]
+        sub         x13, x13, x12
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         x2, x13
+#if defined(__APPLE__)
+        bhi         1f
+#else
+        bhi         C_FUNC(RhpNewVariableSizeObject)
+#endif
+
+        // Calculate the new alloc pointer to account for the allocation.
+        add         x2, x2, x12
+
+        // Set the new object's MethodTable pointer and element count.
+        str         x0, [x12, #OFFSETOF__Object__m_pEEType]
+        str         x1, [x12, #OFFSETOF__Array__m_Length]
+
+        // Update the alloc pointer to the newly calculated one.
+        str         x2, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        // Return the object allocated in x0.
+        mov         x0, x12
+
+        ret
+
+#if defined(__APPLE__)
+1:
+        b           C_FUNC(RhpNewVariableSizeObject)
+#endif
+
+    .endm
+
+
+// Allocate a string.
+//  x0 == MethodTable
+//  x1 == element/character count
+    LEAF_ENTRY RhNewString, _TEXT
+
+        // Make sure computing the overall allocation size wont overflow
+        movz        x2, MAX_STRING_LENGTH & 0xFFFF
+        movk        x2, MAX_STRING_LENGTH >> 16, lsl 16
+        cmp         x1, x2
+        bhi         LOCAL_LABEL(StringSizeOverflow)
+
+        // Compute overall allocation size (align(base size + (element size * elements), 8)).
+        mov         w2, #STRING_COMPONENT_SIZE
+        mov         x3, #(STRING_BASE_SIZE + 7)
+        umaddl      x2, w1, w2, x3          // x2 = w1 * w2 + x3
+        and         x2, x2, #-8
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(StringSizeOverflow):
+        // We get here if the length of the final string object can not be represented as an unsigned
+        // 32-bit value. We are going to tail-call to a managed helper that will throw
+        // an OOM exception that the caller of this allocator understands.
+
+        // x0 holds MethodTable pointer already
+        mov         x1, #1                  // Indicate that we should throw OverflowException
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    LEAF_END    RhNewString, _Text
+
+
+// Allocate one dimensional, zero based array (SZARRAY).
+//  x0 == MethodTable
+//  x1 == element count
+    LEAF_ENTRY RhpNewArrayFast, _Text
+
+        // We want to limit the element count to the non-negative 32-bit int range.
+        // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component
+        // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst
+        // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits.
+        mov         x2, #0x7FFFFFFF
+        cmp         x1, x2
+        bhi         LOCAL_LABEL(ArraySizeOverflow)
+
+        ldrh        w2, [x0, #OFFSETOF__MethodTable__m_usComponentSize]
+        umull       x2, w1, w2
+        add         x2, x2, #(SZARRAY_BASE_SIZE + 7)
+        and         x2, x2, #-8
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(ArraySizeOverflow):
+        // We get here if the size of the final array object can not be represented as an unsigned
+        // 32-bit value. We are going to tail-call to a managed helper that will throw
+        // an overflow exception that the caller of this allocator understands.
+
+        // x0 holds MethodTable pointer already
+        mov         x1, #1                  // Indicate that we should throw OverflowException
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    LEAF_END    RhpNewArrayFast, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+//  x0 == MethodTable
+//  x1 == element count
+    LEAF_ENTRY RhpNewPtrArrayFast, _Text
+
+        // Delegate overflow handling to the generic helper conservatively
+
+        mov         x2, #(0x40000000 / 8) // sizeof(void*)
+        cmp         x1, x2
+#if defined(__APPLE__)
+        bhs         1f
+#else
+        bhs         C_FUNC(RhpNewArrayFast)
+#endif
+
+        // In this case we know the element size is sizeof(void *), or 8 for arm64
+        // This helps us in two ways - we can shift instead of multiplying, and
+        // there's no need to align the size either
+
+        lsl         x2, x1, #3
+        add         x2, x2, #SZARRAY_BASE_SIZE
+
+        // No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed
+        // to be a multiple of 8.
+
+        NEW_ARRAY_FAST
+
+#if defined(__APPLE__)
+1:
+        b           C_FUNC(RhpNewVariableSizeObject)
+#endif
+
+    LEAF_END    RhpNewPtrArrayFast, _TEXT
+
+
+// Allocate variable sized object (eg. array, string) using the slow path that calls a runtime helper.
+//  x0 == MethodTable
+//  x1 == element count
+    NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME x3
+
+        // Preserve data we will need later into the callee saved registers
+        mov         x19, x0             // Preserve MethodTable
+
+        mov         x2, x1              // numElements
+        mov         x1, #0              // uFlags
+
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        bl          C_FUNC(RhpGcAlloc)
+
+        // Set the new object's MethodTable pointer and length on success.
+        cbz         x0, LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory)
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+        .cfi_restore_state
+LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory):
+        // This is the OOM failure path. We are going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         x0, x19             // MethodTable Pointer
+        mov         x1, 0               // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        b C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    NESTED_END RhpNewVariableSizeObject, _TEXT
diff --git a/src/coreclr/runtime/arm64/AllocFast.asm b/src/coreclr/runtime/arm64/AllocFast.asm
new file mode 100644
index 000000000000..ad67eb2107e7
--- /dev/null
+++ b/src/coreclr/runtime/arm64/AllocFast.asm
@@ -0,0 +1,252 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+    TEXTAREA
+
+;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+;; allocation context then automatically fallback to the slow allocation path.
+;;  x0 == MethodTable
+    LEAF_ENTRY RhpNewFast
+
+        ;; x3 = ee_alloc_context pointer, TRASHES x2
+        INLINE_GET_ALLOC_CONTEXT_BASE x3, x2
+
+        ;;
+        ;; x0 contains MethodTable pointer
+        ;;
+        ldr         w2, [x0, #OFFSETOF__MethodTable__m_uBaseSize]
+
+        ;;
+        ;; x0: MethodTable pointer
+        ;; x2: base size
+        ;; x3: ee_alloc_context pointer
+        ;;
+
+        ;; Load potential new object address into x12.
+        ldr         x12, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        ;; Load and calculate the maximum size of object we can fit.
+        ldr         x13, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)]
+        sub         x13, x13, x12
+
+        ;; Determine whether the end of the object is too big for the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         x2, x13
+        bhi         RhpNewFast_RarePath
+
+        ;; Calculate the new alloc pointer to account for the allocation.
+        add         x2, x2, x12
+
+        ;; Set the new object's MethodTable pointer.
+        str         x0, [x12, #OFFSETOF__Object__m_pEEType]
+
+        ;; Update the alloc pointer to the newly calculated one.
+        str         x2, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        mov         x0, x12
+        ret
+
+RhpNewFast_RarePath
+        mov         x1, #0
+        b           RhpNewObject
+    LEAF_END RhpNewFast
+
+;; Allocate non-array object with finalizer.
+;;  x0 == MethodTable
+    LEAF_ENTRY RhpNewFinalizable
+        mov         x1, #GC_ALLOC_FINALIZE
+        b           RhpNewObject
+    LEAF_END RhpNewFinalizable
+
+;; Allocate non-array object.
+;;  x0 == MethodTable
+;;  x1 == alloc flags
+    NESTED_ENTRY RhpNewObject
+
+        PUSH_COOP_PINVOKE_FRAME x3
+
+        ;; x3: transition frame
+
+        ;; Preserve the MethodTable in x19
+        mov         x19, x0
+
+        mov         w2, #0              ; numElements
+
+        ;; Call the rest of the allocation helper.
+        ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        bl          RhpGcAlloc
+
+        cbz         x0, NewOutOfMemory
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+NewOutOfMemory
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         x0, x19             ; MethodTable pointer
+        mov         x1, #0              ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_NOP b RhExceptionHandling_FailedAllocation
+
+    NESTED_END RhpNewObject
+
+;; Shared code for RhNewString, RhpNewArrayFast and RhpNewPtrArrayFast
+;;  x0 == MethodTable
+;;  x1 == character/element count
+;;  x2 == string/array size
+    MACRO
+        NEW_ARRAY_FAST
+
+        INLINE_GET_ALLOC_CONTEXT_BASE x3, x5
+
+        ;; Load potential new object address into x12.
+        ldr         x12, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        ;; Load and calculate the maximum size of object we can fit.
+        ldr         x13, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)]
+        sub         x13, x13, x12
+
+        ;; Determine whether the end of the object is too big for the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        cmp         x2, x13
+        bhi         RhpNewVariableSizeObject
+
+        ;; Calculate the new alloc pointer to account for the allocation.
+        add         x2, x2, x12
+
+        ;; Set the new object's MethodTable pointer and element count.
+        str         x0, [x12, #OFFSETOF__Object__m_pEEType]
+        str         x1, [x12, #OFFSETOF__Array__m_Length]
+
+        ;; Update the alloc pointer to the newly calculated one.
+        str         x2, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)]
+
+        ;; Return the object allocated in x0.
+        mov         x0, x12
+
+        ret
+
+    MEND
+
+;; Allocate a string.
+;;  x0 == MethodTable
+;;  x1 == element/character count
+    LEAF_ENTRY RhNewString
+        ;; Make sure computing the overall allocation size won't overflow
+        movz        x2, #(MAX_STRING_LENGTH & 0xFFFF)
+        movk        x2, #(MAX_STRING_LENGTH >> 16), lsl #16
+        cmp         x1, x2
+        bhi         StringSizeOverflow
+
+        ;; Compute overall allocation size (align(base size + (element size * elements), 8)).
+        mov         w2, #STRING_COMPONENT_SIZE
+        mov         x3, #(STRING_BASE_SIZE + 7)
+        umaddl      x2, w1, w2, x3          ; x2 = w1 * w2 + x3
+        and         x2, x2, #-8
+
+        NEW_ARRAY_FAST
+
+StringSizeOverflow
+        ; We get here if the length of the final string object can't be represented as an unsigned
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an OOM exception that the caller of this allocator understands.
+
+        ; x0 holds MethodTable pointer already
+        mov         x1, #1                  ; Indicate that we should throw OverflowException
+        b           RhExceptionHandling_FailedAllocation
+    LEAF_END    RhNewString
+
+;; Allocate one dimensional, zero based array (SZARRAY).
+;;  x0 == MethodTable
+;;  x1 == element count
+    LEAF_ENTRY RhpNewArrayFast
+
+        ;; We want to limit the element count to the non-negative 32-bit int range.
+        ;; If the element count is <= 0x7FFFFFFF, no overflow is possible because the component
+        ;; size is <= 0xffff (it's an unsigned 16-bit value), and the base size for the worst
+        ;; case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits.
+        mov         x2, #0x7FFFFFFF
+        cmp         x1, x2
+        bhi         ArraySizeOverflow
+
+        ldrh        w2, [x0, #OFFSETOF__MethodTable__m_usComponentSize]
+        umull       x2, w1, w2
+        add         x2, x2, #(SZARRAY_BASE_SIZE + 7)
+        and         x2, x2, #-8
+
+        NEW_ARRAY_FAST
+
+ArraySizeOverflow
+        ; We get here if the size of the final array object can't be represented as an unsigned
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an overflow exception that the caller of this allocator understands.
+
+        ; x0 holds MethodTable pointer already
+        mov         x1, #1                  ; Indicate that we should throw OverflowException
+        b           RhExceptionHandling_FailedAllocation
+    LEAF_END    RhpNewArrayFast
+
+;; Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+;;  x0 == MethodTable
+;;  x1 == element count
+    LEAF_ENTRY RhpNewPtrArrayFast
+
+        ; Delegate overflow handling to the generic helper conservatively
+
+        mov         x2, #(0x40000000 / 8) ; sizeof(void*)
+        cmp         x1, x2
+        bhs         RhpNewArrayFast
+
+        ; In this case we know the element size is sizeof(void *), or 8 for arm64
+        ; This helps us in two ways - we can shift instead of multiplying, and
+        ; there's no need to align the size either
+
+        lsl         x2, x1, #3
+        add         x2, x2, #SZARRAY_BASE_SIZE
+
+        ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed
+        ; to be a multiple of 8.
+
+        NEW_ARRAY_FAST
+
+    LEAF_END    RhpNewPtrArrayFast
+
+;; Allocate variable sized object (eg. array, string) using the slow path that calls a runtime helper.
+;;  x0 == MethodTable
+;;  x1 == element count
+    NESTED_ENTRY RhpNewVariableSizeObject
+
+        PUSH_COOP_PINVOKE_FRAME x3
+
+        ; Preserve data we'll need later into the callee saved registers
+        mov         x19, x0             ; Preserve MethodTable
+
+        mov         x2, x1              ; numElements
+        mov         x1, #0              ; uFlags
+
+        ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        bl          RhpGcAlloc
+
+        cbz         x0, RhpNewVariableSizeObject_OutOfMemory
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+RhpNewVariableSizeObject_OutOfMemory
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         x0, x19             ; MethodTable Pointer
+        mov         x1, #0              ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_NOP b RhExceptionHandling_FailedAllocation
+
+    NESTED_END RhpNewVariableSizeObject
+
+    END
diff --git a/src/coreclr/runtime/arm64/StubDispatch.S b/src/coreclr/runtime/arm64/StubDispatch.S
new file mode 100644
index 000000000000..750a99db4263
--- /dev/null
+++ b/src/coreclr/runtime/arm64/StubDispatch.S
@@ -0,0 +1,98 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+#if defined(__APPLE__)
+    // Currently the build is failing without this due to an issue if the first method in the assembly file has an alternate entry at the start of the file.
+    // Fix, but adding an empty, unused method
+    LEAF_ENTRY RhpStubDispatchDoNotFailToBuild, _TEXT
+       ret
+    LEAF_END RhpStubDispatchDoNotFailToBuild, _TEXT
+#endif
+
+    // Macro that generates code to check a single cache entry.
+    .macro CHECK_CACHE_ENTRY entry
+        // Check a single entry in the cache.
+        //  x9   : Cache data structure. Also used for target address jump.
+        //  x10  : Instance MethodTable*
+        //  x11  : Indirection cell address, preserved
+        //  x12  : Trashed
+        ldr     x12, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))]
+        cmp     x10, x12
+        bne     0f
+        ldr     x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)]
+        br      x9
+0:
+    .endm
+
+//
+// Macro that generates a stub consuming a cache with the given number of entries.
+//
+    .macro DEFINE_INTERFACE_DISPATCH_STUB entries
+
+    NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler
+
+        // x11 holds the indirection cell address. Load the cache pointer.
+        ldr     x9, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        // Load the MethodTable from the object instance in x0.
+#ifdef TARGET_APPLE
+// Apple's linker has issues which break unwind info if
+// an ALTERNATE_ENTRY is present in the middle of a function see https://github.com/dotnet/runtime/pull/114982#discussion_r2083272768
+.cfi_endproc
+#endif
+        ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries
+#ifdef TARGET_APPLE
+.cfi_startproc
+#endif
+        ldr     x10, [x0]
+
+    .global CurrentEntry
+    .set CurrentEntry, 0
+
+    .rept \entries
+        CHECK_CACHE_ENTRY CurrentEntry
+        .set CurrentEntry, CurrentEntry + 1
+    .endr
+
+        // x11 still contains the indirection cell address.
+        b C_FUNC(RhpInterfaceDispatchSlow)
+
+    NESTED_END "RhpInterfaceDispatch\entries", _TEXT
+
+    .endm
+
+//
+// Define all the stub routines we currently need.
+//
+// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the
+// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens
+// during the interface dispatch.
+//
+    DEFINE_INTERFACE_DISPATCH_STUB 1
+    DEFINE_INTERFACE_DISPATCH_STUB 2
+    DEFINE_INTERFACE_DISPATCH_STUB 4
+    DEFINE_INTERFACE_DISPATCH_STUB 8
+    DEFINE_INTERFACE_DISPATCH_STUB 16
+    DEFINE_INTERFACE_DISPATCH_STUB 32
+    DEFINE_INTERFACE_DISPATCH_STUB 64
+
+//
+// Initial dispatch on an interface when we don't have a cache yet.
+//
+    LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+    ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+        // Trigger an AV if we're dispatching on a null this.
+        // The exception handling infrastructure is aware of the fact that this is the first
+        // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here
+        // to a NullReferenceException at the callsite.
+        ldr     xzr, [x0]
+
+        // Just tail call to the cache miss helper.
+        b C_FUNC(RhpInterfaceDispatchSlow)
+    LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/runtime/arm64/StubDispatch.asm b/src/coreclr/runtime/arm64/StubDispatch.asm
new file mode 100644
index 000000000000..697d3a10f52e
--- /dev/null
+++ b/src/coreclr/runtime/arm64/StubDispatch.asm
@@ -0,0 +1,92 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+    TEXTAREA
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+    EXTERN RhpInterfaceDispatchSlow
+
+    ;; Macro that generates code to check a single cache entry.
+    MACRO
+        CHECK_CACHE_ENTRY $entry
+        ;; Check a single entry in the cache.
+        ;;  x9   : Cache data structure. Also used for target address jump.
+        ;;  x10  : Instance MethodTable*
+        ;;  x11  : Indirection cell address, preserved
+        ;;  x12  : Trashed
+        ldr     x12, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16))]
+        cmp     x10, x12
+        bne     %ft0
+        ldr     x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16) + 8)]
+        br      x9
+0
+    MEND
+
+
+;;
+;; Macro that generates a stub consuming a cache with the given number of entries.
+;;
+    MACRO
+        DEFINE_INTERFACE_DISPATCH_STUB $entries
+
+    NESTED_ENTRY RhpInterfaceDispatch$entries
+
+        ;; x11 holds the indirection cell address. Load the cache pointer.
+        ldr     x9, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Load the MethodTable from the object instance in x0.
+        ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation$entries
+        ldr     x10, [x0]
+
+    GBLA CurrentEntry
+CurrentEntry SETA 0
+
+    WHILE CurrentEntry < $entries
+        CHECK_CACHE_ENTRY CurrentEntry
+CurrentEntry SETA CurrentEntry + 1
+    WEND
+
+        ;; x11 still contains the indirection cell address.
+        b RhpInterfaceDispatchSlow
+
+    NESTED_END RhpInterfaceDispatch$entries
+
+    MEND
+
+;;
+;; Define all the stub routines we currently need.
+;;
+;; If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the
+;; *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens
+;; during the interface dispatch.
+;;
+    DEFINE_INTERFACE_DISPATCH_STUB 1
+    DEFINE_INTERFACE_DISPATCH_STUB 2
+    DEFINE_INTERFACE_DISPATCH_STUB 4
+    DEFINE_INTERFACE_DISPATCH_STUB 8
+    DEFINE_INTERFACE_DISPATCH_STUB 16
+    DEFINE_INTERFACE_DISPATCH_STUB 32
+    DEFINE_INTERFACE_DISPATCH_STUB 64
+
+
+;;
+;; Initial dispatch on an interface when we don't have a cache yet.
+;;
+    LEAF_ENTRY RhpInitialInterfaceDispatch
+    ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+        ;; Trigger an AV if we're dispatching on a null this.
+        ;; The exception handling infrastructure is aware of the fact that this is the first
+        ;; instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here
+        ;; to a NullReferenceException at the callsite.
+        ldr     xzr, [x0]
+
+        ;; Just tail call to the cache miss helper.
+        b RhpInterfaceDispatchSlow
+    LEAF_END RhpInitialInterfaceDispatch
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
+
+    END
diff --git a/src/coreclr/runtime/arm64/WriteBarriers.S b/src/coreclr/runtime/arm64/WriteBarriers.S
new file mode 100644
index 000000000000..8087b290f002
--- /dev/null
+++ b/src/coreclr/runtime/arm64/WriteBarriers.S
@@ -0,0 +1,414 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+#if defined(__APPLE__)
+    // Currently the build is failing without this due to an issue if the first method in the assembly file has an alternate entry at the start of the file.
+    // Fix, but adding an empty, unused method
+    LEAF_ENTRY RhpWriteBarriersDoNotFailToBuild, _TEXT
+       ret
+    LEAF_END RhpWriteBarriersDoNotFailToBuild, _TEXT
+#endif
+
+// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+// during garbage collections to verify that object references where never written to the heap without using a
+// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing
+// new references to the real heap. Since this can not be solved perfectly without critical sections around the
+// entire update process, we instead update the shadow location and then re-check the real location (as two
+// ordered operations) and if there is a disparity we will re-write the shadow location with a special value
+// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+#ifdef WRITE_BARRIER_CHECK
+
+    .global     $g_GCShadow
+    .global     $g_GCShadowEnd
+
+        // On entry:
+        //  $destReg: location to be updated
+        //  $refReg: objectref to be stored
+        //
+        // On exit:
+        //  x12,x17: trashed
+        //  other registers are preserved
+        //
+        .macro UPDATE_GC_SHADOW destReg, refReg
+
+        // If g_GCShadow is 0, don't perform the check.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, X12
+        cbz     x12, 1f
+
+        // Save destReg since we're about to modify it (and we need the original value both within the macro and
+        // once we exit the macro).
+        mov     x17, \destReg
+
+        // Transform destReg into the equivalent address in the shadow heap.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, X12
+        subs    \destReg, \destReg, x12
+        blo     0f
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, X12
+        add     \destReg, \destReg, x12
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, X12
+        cmp     \destReg, x12
+        bhs     0f
+
+        // Update the shadow heap.
+        str     \refReg, [\destReg]
+
+        // The following read must be strongly ordered wrt to the write we have just performed in order to
+        // prevent race conditions.
+        dmb     ish
+
+        // Now check that the real heap location still contains the value we just wrote into the shadow heap.
+        mov     x12, x17
+        ldr     x12, [x12]
+        cmp     x12, \refReg
+        beq     0f
+
+        // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we can not
+        // guarantee whose shadow update won.
+        movz x12, (INVALIDGCVALUE & 0xFFFF) // #0xcccd
+        movk x12, ((INVALIDGCVALUE >> 16) & 0xFFFF), LSL #16
+        str     x12, [\destReg]
+
+0:
+        // Restore original destReg value
+        mov     \destReg, x17
+
+1:
+    .endm
+
+#else // WRITE_BARRIER_CHECK
+
+    .macro UPDATE_GC_SHADOW destReg, refReg
+    .endm
+
+#endif // WRITE_BARRIER_CHECK
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+// name of the register that points to the location to be updated and the name of the register that holds the
+// object reference (this should be in upper case as it is used in the definition of the name of the helper).
+
+// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for
+// some interlocked helpers that need an inline barrier.
+
+        // On entry:
+        //   destReg: location to be updated (cannot be x12,x17)
+        //   refReg:  objectref to be stored (cannot be x12,x17)
+        //
+        // On exit:
+        //   x12,x17: trashed
+        //
+        .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg
+
+        // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+        // we are in a debug build and write barrier checking has been enabled).
+        UPDATE_GC_SHADOW \destReg, \refReg
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        // Update the write watch table if necessary
+        PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12
+
+        cbz     x12, 2f
+        add     x12, x12, \destReg, lsr #0xc  // SoftwareWriteWatch::AddressToTableByteIndexShift
+        ldrb    w17, [x12]
+        cbnz    x17, 2f
+        mov     w17, #0xFF
+        strb    w17, [x12]
+#endif
+
+2:
+        // We can skip the card table write if the reference is to
+        // an object not on the epehemeral segment.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low,  x12
+        PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x17
+        cmp     \refReg, x12
+        ccmp    \refReg, x17, #0x2, hs
+        bhs     0f
+
+        // Set this objects card, if it has not already been set.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12
+        add     x17, x12, \destReg, lsr #11
+
+        // Check that this card has not already been written. Avoiding useless writes is a big win on
+        // multi-proc systems since it avoids cache thrashing.
+        ldrb    w12, [x17]
+        cmp     x12, 0xFF
+        beq     0f
+
+        mov     x12, 0xFF
+        strb    w12, [x17]
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        // Check if we need to update the card bundle table
+        PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
+        add     x17, x12, \destReg, lsr #21
+        ldrb    w12, [x17]
+        cmp     x12, 0xFF
+        beq     0f
+
+        mov     x12, 0xFF
+        strb    w12, [x17]
+#endif
+
+0:
+        // Exit label
+    .endm
+
+        // On entry:
+        //   destReg: location to be updated
+        //   refReg:  objectref to be stored
+        //
+        // On exit:
+        //   x12, x17:   trashed
+        //
+        .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg
+
+        // The "check" of this checked write barrier - is destReg
+        // within the heap? if no, early out.
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12
+        cmp     \destReg, x12
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x12
+
+        // If \destReg >= g_lowest_address, compare \destReg to g_highest_address.
+        // Otherwise, set the C flag (0x2) to take the next branch.
+        ccmp    \destReg, x12, #0x2, hs
+        bhs     0f
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg
+
+0:
+        // Exit label
+    .endm
+
+// void JIT_ByRefWriteBarrier
+// On entry:
+//   x13  : the source address (points to object reference to write)
+//   x14  : the destination address (object reference written here)
+//
+// On exit:
+//   x13  : incremented by 8
+//   x14  : incremented by 8
+//   x15  : trashed
+//   x12, x17  : trashed
+//
+//   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+//         if you add more trashed registers.
+//
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
+LEAF_ENTRY RhpByRefAssignRefArm64, _TEXT
+
+    ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
+        ldr     x15, [x13], 8
+        b       C_FUNC(RhpCheckedAssignRefArm64)
+
+LEAF_END RhpByRefAssignRefArm64, _TEXT
+
+// JIT_CheckedWriteBarrier(Object** dst, Object* src)
+//
+// Write barrier for writes to objects that may reside
+// on the managed heap.
+//
+// On entry:
+//   x14 : the destination address (LHS of the assignment).
+//         May not be a heap location (hence the checked).
+//   x15 : the object reference (RHS of the assignment).
+//
+// On exit:
+//   x12, x17 : trashed
+//   x14      : incremented by 8
+ LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT
+
+        // is destReg within the heap?
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address,  x12
+        PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17
+        cmp     x14, x12
+        ccmp    x14, x17, #0x2, hs
+        bhs     LOCAL_LABEL(NotInHeap)
+
+        b       C_FUNC(RhpAssignRefArm64)
+
+LOCAL_LABEL(NotInHeap):
+#ifdef TARGET_APPLE
+// Apple's linker has issues which break unwind info if
+// an ALTERNATE_ENTRY is present in the middle of a function see https://github.com/dotnet/runtime/pull/114982#discussion_r2083272768
+.cfi_endproc
+#endif
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+#ifdef TARGET_APPLE
+.cfi_startproc
+#endif
+        str     x15, [x14], 8
+        ret
+LEAF_END RhpCheckedAssignRefArm64, _TEXT
+
+// JIT_WriteBarrier(Object** dst, Object* src)
+//
+// Write barrier for writes to objects that are known to
+// reside on the managed heap.
+//
+// On entry:
+//  x14 : the destination address (LHS of the assignment).
+//  x15 : the object reference (RHS of the assignment).
+//
+// On exit:
+//  x12, x17 : trashed
+//  x14 : incremented by 8
+LEAF_ENTRY RhpAssignRefArm64, _TEXT
+
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+        stlr    x15, [x14]
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15
+
+        add     x14, x14, 8
+        ret
+
+LEAF_END RhpAssignRefArm64, _TEXT
+
+// Same as RhpAssignRefArm64, but with standard ABI.
+LEAF_ENTRY RhpAssignRef, _TEXT
+        mov     x14, x0                     ; x14 = dst
+        mov     x15, x1                     ; x15 = val
+        b       C_FUNC(RhpAssignRefArm64)
+LEAF_END RhpAssignRef, _TEXT
+
+#ifdef FEATURE_NATIVEAOT
+
+// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon
+// successful updates.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+.arch_extension lse
+#endif
+
+// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand)
+//
+// Interlocked compare exchange on objectref.
+//
+// On entry:
+//  x0: pointer to objectref
+//  x1: exchange value
+//  x2: comparand
+//
+// On exit:
+//  x0: original value of objectref
+//  x10, x12, x16, x17: trashed
+//
+    LEAF_ENTRY RhpCheckedLockCmpXchg
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16
+        tbz    w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(CmpXchgRetry)
+#endif
+
+        mov    x10, x2
+        casal  x10, x1, [x0]                  // exchange
+        cmp    x2, x10
+        bne    LOCAL_LABEL(CmpXchgNoUpdate)
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        b      LOCAL_LABEL(DoCardsCmpXchg)
+LOCAL_LABEL(CmpXchgRetry):
+        // Check location value is what we expect.
+        ldaxr   x10, [x0]
+        cmp     x10, x2
+        bne     LOCAL_LABEL(CmpXchgNoUpdate)
+
+        // Current value matches comparand, attempt to update with the new value.
+        stlxr   w12, x1, [x0]
+        cbnz    w12, LOCAL_LABEL(CmpXchgRetry)
+#endif
+
+LOCAL_LABEL(DoCardsCmpXchg):
+        // We have successfully updated the value of the objectref so now we need a GC write barrier.
+        // The following barrier code takes the destination in x0 and the value in x1 so the arguments are
+        // already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+LOCAL_LABEL(CmpXchgNoUpdate):
+        // x10 still contains the original value.
+        mov     x0, x10
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        tbnz    w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(NoBarrierCmpXchg)
+        InterlockedOperationBarrier
+LOCAL_LABEL(NoBarrierCmpXchg):
+#endif
+        ret     lr
+
+    LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
+
+// RhpCheckedXchg(Object** destination, Object* value)
+//
+// Interlocked exchange on objectref.
+//
+// On entry:
+//  x0: pointer to objectref
+//  x1: exchange value
+//
+// On exit:
+//  x0: original value of objectref
+//  x10: trashed
+//  x12, x16, x17: trashed
+//
+    LEAF_ENTRY RhpCheckedXchg, _TEXT
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16
+        tbz    w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(ExchangeRetry)
+#endif
+
+        swpal  x1, x10, [x0]                   // exchange
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        b      LOCAL_LABEL(DoCardsXchg)
+LOCAL_LABEL(ExchangeRetry):
+        // Read the existing memory location.
+        ldaxr   x10,  [x0]
+
+        // Attempt to update with the new value.
+        stlxr   w12, x1, [x0]
+        cbnz    w12, LOCAL_LABEL(ExchangeRetry)
+#endif
+
+LOCAL_LABEL(DoCardsXchg):
+        // We have successfully updated the value of the objectref so now we need a GC write barrier.
+        // The following barrier code takes the destination in x0 and the value in x1 so the arguments are
+        // already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+        // x10 still contains the original value.
+        mov     x0, x10
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        tbnz    w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(NoBarrierXchg)
+        InterlockedOperationBarrier
+LOCAL_LABEL(NoBarrierXchg):
+#endif
+        ret
+
+    LEAF_END RhpCheckedXchg, _TEXT
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+.arch_extension nolse
+#endif
+#endif // FEATURE_NATIVEAOT
diff --git a/src/coreclr/runtime/arm64/WriteBarriers.asm b/src/coreclr/runtime/arm64/WriteBarriers.asm
new file mode 100644
index 000000000000..10fb789fa37f
--- /dev/null
+++ b/src/coreclr/runtime/arm64/WriteBarriers.asm
@@ -0,0 +1,394 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;;
+;; Define the helpers used to implement the write barrier required when writing an object reference into a
+;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+;; collection.
+;;
+
+#include "AsmMacros_Shared.h"
+
+    TEXTAREA
+
+;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+;; during garbage collections to verify that object references where never written to the heap without using a
+;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing
+;; new references to the real heap. Since this can't be solved perfectly without critical sections around the
+;; entire update process, we instead update the shadow location and then re-check the real location (as two
+;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value
+;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+#ifdef WRITE_BARRIER_CHECK
+
+    SETALIAS    g_GCShadow, ?g_GCShadow@@3PEAEEA
+    SETALIAS    g_GCShadowEnd, ?g_GCShadowEnd@@3PEAEEA
+    EXTERN      $g_GCShadow
+    EXTERN      $g_GCShadowEnd
+
+INVALIDGCVALUE  EQU 0xCCCCCCCD
+
+    MACRO
+        ;; On entry:
+        ;;  $destReg: location to be updated (cannot be x12,x17)
+        ;;  $refReg: objectref to be stored (cannot be x12,x17)
+        ;;
+        ;; On exit:
+        ;;  x12,x17: trashed
+        ;;  other registers are preserved
+        ;;
+        UPDATE_GC_SHADOW $destReg, $refReg
+
+        ;; If g_GCShadow is 0, don't perform the check.
+        PREPARE_EXTERNAL_VAR_INDIRECT $g_GCShadow, x12
+        cbz     x12, %ft1
+
+        ;; Save $destReg since we're about to modify it (and we need the original value both within the macro and
+        ;; once we exit the macro).
+        mov     x17, $destReg
+
+        ;; Transform $destReg into the equivalent address in the shadow heap.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12
+        subs    $destReg, $destReg, x12
+        blo     %ft0
+
+        PREPARE_EXTERNAL_VAR_INDIRECT $g_GCShadow, x12
+        add     $destReg, $destReg, x12
+
+        PREPARE_EXTERNAL_VAR_INDIRECT $g_GCShadowEnd, x12
+        cmp     $destReg, x12
+        bhs     %ft0
+
+        ;; Update the shadow heap.
+        str     $refReg, [$destReg]
+
+        ;; The following read must be strongly ordered wrt to the write we've just performed in order to
+        ;; prevent race conditions.
+        dmb     ish
+
+        ;; Now check that the real heap location still contains the value we just wrote into the shadow heap.
+        mov     x12, x17
+        ldr     x12, [x12]
+        cmp     x12, $refReg
+        beq     %ft0
+
+        ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+        ;; guarantee whose shadow update won.
+        MOVL64  x12, INVALIDGCVALUE, 0
+        str     x12, [$destReg]
+
+0
+        ;; Restore original $destReg value
+        mov     $destReg, x17
+
+1
+    MEND
+
+#else // WRITE_BARRIER_CHECK
+
+    MACRO
+        UPDATE_GC_SHADOW $destReg, $refReg
+    MEND
+
+#endif // WRITE_BARRIER_CHECK
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+;; name of the register that points to the location to be updated and the name of the register that holds the
+;; object reference (this should be in upper case as it's used in the definition of the name of the helper).
+
+;; Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for
+;; some interlocked helpers that need an inline barrier.
+    MACRO
+        ;; On entry:
+        ;;   $destReg:  location to be updated (cannot be x12,x17)
+        ;;   $refReg:   objectref to be stored (cannot be x12,x17)
+        ;;
+        ;; On exit:
+        ;;   x12,x17: trashed
+        ;;
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg
+
+        ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+        ;; we're in a debug build and write barrier checking has been enabled).
+        UPDATE_GC_SHADOW $destReg, $refReg
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        // Update the write watch table if necessary
+        PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12
+
+        cbz     x12, %ft2
+        add     x12, x12, $destReg, lsr #0xc  // SoftwareWriteWatch::AddressToTableByteIndexShift
+        ldrb    w17, [x12]
+        cbnz    x17, %ft2
+        mov     w17, #0xFF
+        strb    w17, [x12]
+#endif
+
+2
+        ;; We can skip the card table write if the reference is to
+        ;; an object not on the epehemeral segment.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low,  x12
+        PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x17
+        cmp     $refReg, x12
+        ccmp    $refReg, x17, #0x2, hs
+        bhs     %ft0
+
+        ;; Set this object's card, if it hasn't already been set.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12
+        add     x17, x12, $destReg lsr #11
+
+        ;; Check that this card hasn't already been written. Avoiding useless writes is a big win on
+        ;; multi-proc systems since it avoids cache trashing.
+        ldrb    w12, [x17]
+        cmp     x12, 0xFF
+        beq     %ft0
+
+        mov     x12, 0xFF
+        strb    w12, [x17]
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        // Check if we need to update the card bundle table
+        PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
+        add     x17, x12, $destReg, lsr #21
+        ldrb    w12, [x17]
+        cmp     x12, 0xFF
+        beq     %ft0
+
+        mov     x12, 0xFF
+        strb    w12, [x17]
+#endif
+
+0
+        ;; Exit label
+    MEND
+
+    MACRO
+        ;; On entry:
+        ;;   $destReg:  location to be updated (cannot be x12,x17)
+        ;;   $refReg:   objectref to be stored (cannot be x12,x17)
+        ;;
+        ;; On exit:
+        ;;   x12, x17:       trashed
+        ;;
+        INSERT_CHECKED_WRITE_BARRIER_CORE $destReg, $refReg
+
+        ;; The "check" of this checked write barrier - is $destReg
+        ;; within the heap? if no, early out.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address,  x12
+        PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17
+        cmp     $destReg, x12
+        ccmp    $destReg, x17, #0x2, hs
+        bhs     %ft0
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg
+
+0
+        ;; Exit label
+    MEND
+
+;; void JIT_ByRefWriteBarrier
+;; On entry:
+;;   x13 : the source address (points to object reference to write)
+;;   x14 : the destination address (object reference written here)
+;;
+;; On exit:
+;;   x13 : incremented by 8
+;;   x14 : incremented by 8
+;;   x15  : trashed
+;;   x12, x17  : trashed
+;;
+;;   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+;;         if you add more trashed registers.
+;;
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1
+;; - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
+    LEAF_ENTRY RhpByRefAssignRefArm64
+
+    ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
+        ldr     x15, [x13], 8
+        b       RhpCheckedAssignRefArm64
+
+    LEAF_END RhpByRefAssignRefArm64
+
+
+;; JIT_CheckedWriteBarrier(Object** dst, Object* src)
+;;
+;; Write barrier for writes to objects that may reside
+;; on the managed heap.
+;;
+;; On entry:
+;;   x14  : the destination address (LHS of the assignment).
+;;          May not be a heap location (hence the checked).
+;;   x15  : the object reference (RHS of the assignment)
+;;
+;; On exit:
+;;   x12, x17 : trashed
+;;   x14      : incremented by 8
+    LEAF_ENTRY RhpCheckedAssignRefArm64
+
+        ;; is destReg within the heap?
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address,  x12
+        PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17
+        cmp     x14, x12
+        ccmp    x14, x17, #0x2, hs
+        blo     RhpAssignRefArm64
+
+NotInHeap
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+        str     x15, [x14], 8
+        ret
+
+    LEAF_END RhpCheckedAssignRefArm64
+
+;; JIT_WriteBarrier(Object** dst, Object* src)
+;;
+;; Write barrier for writes to objects that are known to
+;; reside on the managed heap.
+;;
+;; On entry:
+;;   x14  : the destination address (LHS of the assignment)
+;;   x15  : the object reference (RHS of the assignment)
+;;
+;; On exit:
+;;   x12, x17 : trashed
+;;   x14 : incremented by 8
+    LEAF_ENTRY RhpAssignRefArm64
+
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+        stlr    x15, [x14]
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15
+
+        add     x14, x14, 8
+        ret
+
+    LEAF_END RhpAssignRefArm64
+
+;; same as RhpAssignRefArm64, but with standard ABI.
+    LEAF_ENTRY RhpAssignRef
+        mov     x14, x0             ; x14 = dst
+        mov     x15, x1             ; x15 = val
+        b       RhpAssignRefArm64
+    LEAF_END RhpAssignRef
+
+#ifdef FEATURE_NATIVEAOT
+
+;; Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon
+;; successful updates.
+
+;; RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand)
+;;
+;; Interlocked compare exchange on objectref.
+;;
+;; On entry:
+;;   x0  : pointer to objectref
+;;   x1  : exchange value
+;;   x2  : comparand
+;;
+;; On exit:
+;;  x0: original value of objectref
+;;  x10, x12, x16, x17: trashed
+;;
+    LEAF_ENTRY RhpCheckedLockCmpXchg
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16
+        tbz    x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, CmpXchgRetry
+#endif
+
+        mov    x10, x2
+        casal  x10, x1, [x0]                  ;; exchange
+        cmp    x2, x10
+        bne    CmpXchgNoUpdate
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        b      DoCardsCmpXchg
+CmpXchgRetry
+        ;; Check location value is what we expect.
+        ldaxr   x10, [x0]
+        cmp     x10, x2
+        bne     CmpXchgNoUpdate
+
+        ;; Current value matches comparand, attempt to update with the new value.
+        stlxr   w12, x1, [x0]
+        cbnz    w12, CmpXchgRetry
+#endif
+
+DoCardsCmpXchg
+        ;; We have successfully updated the value of the objectref so now we need a GC write barrier.
+        ;; The following barrier code takes the destination in x0 and the value in x1 so the arguments are
+        ;; already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+CmpXchgNoUpdate
+        ;; x10 still contains the original value.
+        mov     x0, x10
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        tbnz    x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, NoBarrierCmpXchg
+        InterlockedOperationBarrier
+NoBarrierCmpXchg
+#endif
+        ret     lr
+
+    LEAF_END RhpCheckedLockCmpXchg
+
+;; RhpCheckedXchg(Object** destination, Object* value)
+;;
+;; Interlocked exchange on objectref.
+;;
+;; On entry:
+;;   x0  : pointer to objectref
+;;   x1  : exchange value
+;;
+;; On exit:
+;;  x0: original value of objectref
+;;  x10: trashed
+;;  x12, x16, x17: trashed
+;;
+    LEAF_ENTRY RhpCheckedXchg
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16
+        tbz    x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, ExchangeRetry
+#endif
+
+        swpal  x1, x10, [x0]                   ;; exchange
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        b      DoCardsXchg
+ExchangeRetry
+        ;; Read the existing memory location.
+        ldaxr   x10,  [x0]
+
+        ;; Attempt to update with the new value.
+        stlxr   w12, x1, [x0]
+        cbnz    w12, ExchangeRetry
+#endif
+
+DoCardsXchg
+        ;; We have successfully updated the value of the objectref so now we need a GC write barrier.
+        ;; The following barrier code takes the destination in x0 and the value in x1 so the arguments are
+        ;; already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+        ;; x10 still contains the original value.
+        mov     x0, x10
+
+#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
+        tbnz    x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, NoBarrierXchg
+        InterlockedOperationBarrier
+NoBarrierXchg
+#endif
+        ret
+
+    LEAF_END RhpCheckedXchg
+#endif // FEATURE_NATIVEAOT
+
+    end
diff --git a/src/coreclr/runtime/i386/AllocFast.S b/src/coreclr/runtime/i386/AllocFast.S
new file mode 100644
index 000000000000..529a5142bcf2
--- /dev/null
+++ b/src/coreclr/runtime/i386/AllocFast.S
@@ -0,0 +1,274 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include "AsmMacros_Shared.h"
+
+// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+// allocation context then automatically fallback to the slow allocation path.
+//  ECX == MethodTable
+LEAF_ENTRY RhpNewFast, _TEXT
+        // edx = ee_alloc_context pointer, TRASHES eax
+        INLINE_GET_ALLOC_CONTEXT_BASE
+
+        mov         eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize]
+        add         eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]
+        jc          LOCAL_LABEL(RhpNewFast_AllocFailed)
+        cmp         eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit]
+        ja          LOCAL_LABEL(RhpNewFast_AllocFailed)
+        mov         [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax
+
+        // calc the new object pointer and initialize it
+        sub         eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize]
+        mov         [eax + OFFSETOF__Object__m_pEEType], ecx
+
+        ret
+
+LOCAL_LABEL(RhpNewFast_AllocFailed):
+        xor         edx, edx
+        jmp         RhpNewObject
+LEAF_END RhpNewFast, _TEXT
+
+// Allocate non-array object with finalizer.
+//  ECX == MethodTable
+LEAF_ENTRY RhpNewFinalizable, _TEXT
+        mov         edx, GC_ALLOC_FINALIZE                          // Flags
+        jmp         RhpNewObject
+LEAF_END RhpNewFinalizable, _TEXT
+
+// Allocate non-array object
+//  ECX == MethodTable
+//  EDX == alloc flags
+LEAF_ENTRY RhpNewObject, _TEXT
+        PUSH_COOP_PINVOKE_FRAME eax
+
+        // Preserve MethodTable in ESI.
+        mov         esi, ecx
+
+        push        eax                                             // transition frame
+        push        0                                               // numElements
+        push        edx
+        push        ecx
+
+        // Call the rest of the allocation helper.
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        add         esp, 16
+
+        test        eax, eax
+        jz          LOCAL_LABEL(NewOutOfMemory)
+
+        POP_COOP_PINVOKE_FRAME
+
+        ret
+
+LOCAL_LABEL(NewOutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         ecx, esi        // Restore MethodTable pointer
+        xor         edx, edx        // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         RhExceptionHandling_FailedAllocation
+LEAF_END RhpNewObject, _TEXT
+
+// Shared code for RhNewString, RhpNewArrayFast and RhpNewPtrArrayFast
+//  EAX == string/array size
+//  ECX == MethodTable
+//  EDX == character/element count
+.macro NEW_ARRAY_FAST_PROLOG
+        ESP_PROLOG_BEG
+        ESP_PROLOG_PUSH ecx
+        ESP_PROLOG_PUSH edx
+        ESP_EPILOG_END
+.endm
+
+.macro NEW_ARRAY_FAST
+        // edx = ee_alloc_context pointer, TRASHES ecx
+        INLINE_GET_ALLOC_CONTEXT_BASE
+
+        // ECX == scratch
+        // EAX == allocation size
+        // EDX == ee_alloc_context pointer
+
+        mov         ecx, eax
+        add         eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]
+        jc          1f
+        cmp         eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit]
+        ja          1f
+
+        // ECX == allocation size
+        // EAX == new alloc ptr
+        // EDX == ee_alloc_context pointer
+
+        // set the new alloc pointer
+        mov         [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax
+
+        // calc the new object pointer
+        sub         eax, ecx
+
+        ESP_EPILOG_BEG
+        // Restore the element count and put it in edx
+        ESP_EPILOG_POP edx
+        // Restore the MethodTable and put it in ecx
+        ESP_EPILOG_POP ecx
+        ESP_EPILOG_END
+
+        // set the new object's MethodTable pointer and element count
+        mov         [eax + OFFSETOF__Object__m_pEEType], ecx
+        mov         [eax + OFFSETOF__Array__m_Length], edx
+        ret
+
+1:
+        ESP_EPILOG_BEG
+        // Restore the element count and put it in edx
+        ESP_EPILOG_POP edx
+        // Restore the MethodTable and put it in ecx
+        ESP_EPILOG_POP ecx
+        ESP_EPILOG_END
+
+        jmp         RhpNewVariableSizeObject
+.endm
+
+// Allocate a new string.
+//  ECX == MethodTable
+//  EDX == element count
+LEAF_ENTRY RhNewString, _TEXT
+        // Make sure computing the aligned overall allocation size won't overflow
+        cmp         edx, MAX_STRING_LENGTH
+        ja          LOCAL_LABEL(RhNewString_StringSizeOverflow)
+
+        // Compute overall allocation size (align(base size + (element size * elements), 4)).
+        lea         eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)]
+        and         eax, -4
+
+        NEW_ARRAY_FAST_PROLOG
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(RhNewString_StringSizeOverflow):
+        // We get here if the size of the final string object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an OOM exception that the caller of this allocator understands.
+
+        // ecx holds MethodTable pointer already
+        xor         edx, edx            // Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+LEAF_END RhNewString, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY).
+//  ECX == MethodTable
+//  EDX == element count
+LEAF_ENTRY RhpNewArrayFast, _TEXT
+        NEW_ARRAY_FAST_PROLOG
+
+        // Compute overall allocation size (align(base size + (element size * elements), 4)).
+        // if the element count is <= 0x10000, no overflow is possible because the component size is
+        // <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case
+        // (32 dimensional MdArray) is less than 0xffff.
+        movzx       eax, word ptr [ecx + OFFSETOF__MethodTable__m_usComponentSize]
+        cmp         edx, 0x010000
+        ja          LOCAL_LABEL(RhpNewArrayFast_ArraySizeBig)
+        mul         edx
+        lea         eax, [eax + SZARRAY_BASE_SIZE + 3]
+LOCAL_LABEL(RhpNewArrayFast_ArrayAlignSize):
+        and         eax, -4
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(RhpNewArrayFast_ArraySizeBig):
+        // Compute overall allocation size (align(base size + (element size * elements), 4)).
+        // if the element count is negative, it's an overflow, otherwise it's out of memory
+        cmp         edx, 0
+        jl          LOCAL_LABEL(RhpNewArrayFast_ArraySizeOverflow)
+        mul         edx
+        jc          LOCAL_LABEL(RhpNewArrayFast_ArrayOutOfMemoryNoFrame)
+        add         eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize]
+        jc          LOCAL_LABEL(RhpNewArrayFast_ArrayOutOfMemoryNoFrame)
+        add         eax, 3
+        jc          LOCAL_LABEL(RhpNewArrayFast_ArrayOutOfMemoryNoFrame)
+        jmp         LOCAL_LABEL(RhpNewArrayFast_ArrayAlignSize)
+
+LOCAL_LABEL(RhpNewArrayFast_ArrayOutOfMemoryNoFrame):
+        ESP_EPILOG_FREE 8
+
+        // ecx holds MethodTable pointer already
+        xor         edx, edx        // Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+
+LOCAL_LABEL(RhpNewArrayFast_ArraySizeOverflow):
+        ESP_EPILOG_FREE 8
+
+        // We get here if the size of the final array object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an overflow exception that the caller of this allocator understands.
+
+        // ecx holds MethodTable pointer already
+        mov         edx, 1          // Indicate that we should throw OverflowException
+        jmp         RhExceptionHandling_FailedAllocation
+LEAF_END RhpNewArrayFast, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+//  ECX == MethodTable
+//  EDX == element count
+LEAF_ENTRY RhpNewPtrArrayFast, _TEXT
+        // Delegate overflow handling to the generic helper conservatively
+
+        cmp         edx, (0x40000000 / 4) // sizeof(void*)
+        jae         RhpNewArrayFast
+
+        // In this case we know the element size is sizeof(void *), or 4 for x86
+        // This helps us in two ways - we can shift instead of multiplying, and
+        // there's no need to align the size either
+
+        lea         eax, [edx * 4 + SZARRAY_BASE_SIZE]
+
+        NEW_ARRAY_FAST_PROLOG
+        NEW_ARRAY_FAST
+LEAF_END RhpNewPtrArrayFast, _TEXT
+
+//
+// Object* RhpNewVariableSizeObject(MethodTable *pMT, INT_PTR size)
+//
+// ecx == MethodTable
+// edx == element count
+//
+NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler
+        PUSH_COOP_PINVOKE_FRAME eax
+
+        // Preserve MethodTable in ESI.
+        mov         esi, ecx
+
+        // Push alloc helper arguments (transition frame, size, flags, MethodTable).
+        push        eax                                             // transition frame
+        push        edx                                             // numElements
+        push        0                                               // Flags
+        push        ecx                                             // MethodTable
+
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        add         esp, 16
+
+        test        eax, eax
+        jz          LOCAL_LABEL(RhpNewVariableSizeObject_AllocFailed)
+
+        POP_COOP_PINVOKE_FRAME
+
+        ret
+
+LOCAL_LABEL(RhpNewVariableSizeObject_AllocFailed):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         ecx, esi        // Restore MethodTable pointer
+        xor         edx, edx        // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         RhExceptionHandling_FailedAllocation
+NESTED_END RhpNewVariableSizeObject, _TEXT
diff --git a/src/coreclr/runtime/i386/AllocFast.asm b/src/coreclr/runtime/i386/AllocFast.asm
new file mode 100644
index 000000000000..7bb4d6ee6544
--- /dev/null
+++ b/src/coreclr/runtime/i386/AllocFast.asm
@@ -0,0 +1,266 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+include AsmMacros_Shared.inc
+
+; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+; allocation context then automatically fallback to the slow allocation path.
+;  ECX == MethodTable
+FASTCALL_FUNC   RhpNewFast, 4
+        ; edx = ee_alloc_context pointer, TRASHES eax
+        INLINE_GET_ALLOC_CONTEXT_BASE edx, eax
+
+        mov         eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize]
+        add         eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]
+        jc          AllocFailed
+        cmp         eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit]
+        ja          AllocFailed
+        mov         [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax
+
+        ; calc the new object pointer and initialize it
+        sub         eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize]
+        mov         [eax + OFFSETOF__Object__m_pEEType], ecx
+
+        ret
+
+AllocFailed:
+        xor         edx, edx
+        jmp         @RhpNewObject@8
+FASTCALL_ENDFUNC
+
+; Allocate non-array object with finalizer.
+;  ECX == MethodTable
+FASTCALL_FUNC   RhpNewFinalizable, 4
+        mov         edx, GC_ALLOC_FINALIZE                          ; Flags
+        jmp         @RhpNewObject@8
+FASTCALL_ENDFUNC
+
+; Allocate non-array object
+;  ECX == MethodTable
+;  EDX == alloc flags
+FASTCALL_FUNC   RhpNewObject, 8
+        PUSH_COOP_PINVOKE_FRAME eax
+
+        ; Preserve MethodTable in ESI.
+        mov         esi, ecx
+
+        push        eax                                             ; transition frame
+        push        0                                               ; numElements
+        push        edx
+        push        ecx
+
+        ;; Call the rest of the allocation helper.
+        ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        test        eax, eax
+        jz          NewOutOfMemory
+
+        POP_COOP_PINVOKE_FRAME
+
+        ret
+
+NewOutOfMemory:
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         ecx, esi        ; Restore MethodTable pointer
+        xor         edx, edx        ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         RhExceptionHandling_FailedAllocation
+FASTCALL_ENDFUNC
+
+; Shared code for RhNewString, RhpNewArrayFast and RhpNewPtrArrayFast
+;  EAX == string/array size
+;  ECX == MethodTable
+;  EDX == character/element count
+NEW_ARRAY_FAST_PROLOG MACRO
+        push        ecx
+        push        edx
+ENDM
+
+NEW_ARRAY_FAST MACRO
+        LOCAL AllocContextOverflow
+
+        ; EDX = ee_alloc_context pointer, trashes ECX 
+        INLINE_GET_ALLOC_CONTEXT_BASE    edx, ecx
+
+        mov         ecx, eax
+        add         eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]
+        jc          AllocContextOverflow
+        cmp         eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit]
+        ja          AllocContextOverflow
+
+        ; ECX == allocation size
+        ; EAX == new alloc ptr
+        ; EDX == ee_alloc_context pointer
+
+        ; set the new alloc pointer
+        mov         [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax
+
+        ; calc the new object pointer
+        sub         eax, ecx
+
+        ; Restore the element count and put it in edx
+        pop         edx
+        ; Restore the MethodTable and put it in ecx
+        pop         ecx
+
+        ; set the new object's MethodTable pointer and element count
+        mov         [eax + OFFSETOF__Object__m_pEEType], ecx
+        mov         [eax + OFFSETOF__Array__m_Length], edx
+        ret
+
+AllocContextOverflow:
+        ; Restore the element count and put it in edx
+        pop         edx
+        ; Restore the MethodTable and put it in ecx
+        pop         ecx
+
+        jmp         @RhpNewVariableSizeObject@8
+ENDM
+
+; Allocate a new string.
+;  ECX == MethodTable
+;  EDX == element count
+FASTCALL_FUNC   RhNewString, 8
+        ; Make sure computing the aligned overall allocation size won't overflow
+        cmp         edx, MAX_STRING_LENGTH
+        ja          StringSizeOverflow
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 4)).
+        lea         eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)]
+        and         eax, -4
+
+        NEW_ARRAY_FAST_PROLOG
+        NEW_ARRAY_FAST
+        
+StringSizeOverflow:
+        ; We get here if the size of the final string object can't be represented as an unsigned
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an OOM exception that the caller of this allocator understands.
+
+        ; ecx holds MethodTable pointer already
+        xor         edx, edx            ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+FASTCALL_ENDFUNC
+
+; Allocate one dimensional, zero based array (SZARRAY).
+;  ECX == MethodTable
+;  EDX == element count
+FASTCALL_FUNC   RhpNewArrayFast, 8
+        NEW_ARRAY_FAST_PROLOG
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 4)).
+        ; if the element count is <= 0x10000, no overflow is possible because the component size is
+        ; <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case
+        ; (32 dimensional MdArray) is less than 0xffff.
+        movzx       eax, word ptr [ecx + OFFSETOF__MethodTable__m_usComponentSize]
+        cmp         edx, 010000h
+        ja          ArraySizeBig
+        mul         edx
+        lea         eax, [eax + SZARRAY_BASE_SIZE + 3]
+ArrayAlignSize:
+        and         eax, -4
+
+        NEW_ARRAY_FAST
+
+ArraySizeBig:
+        ; Compute overall allocation size (align(base size + (element size * elements), 4)).
+        ; if the element count is negative, it's an overflow, otherwise it's out of memory
+        cmp         edx, 0
+        jl          ArraySizeOverflow
+        mul         edx
+        jc          ArrayOutOfMemoryNoFrame
+        add         eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize]
+        jc          ArrayOutOfMemoryNoFrame
+        add         eax, 3
+        jc          ArrayOutOfMemoryNoFrame
+        jmp         ArrayAlignSize
+
+ArrayOutOfMemoryNoFrame:
+        add         esp, 8
+
+        ; ecx holds MethodTable pointer already
+        xor         edx, edx        ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+
+ArraySizeOverflow:
+        add         esp, 8
+
+        ; We get here if the size of the final array object can't be represented as an unsigned
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an overflow exception that the caller of this allocator understands.
+
+        ; ecx holds MethodTable pointer already
+        mov         edx, 1          ; Indicate that we should throw OverflowException
+        jmp         RhExceptionHandling_FailedAllocation
+FASTCALL_ENDFUNC
+
+; Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+;  ECX == MethodTable
+;  EDX == element count
+FASTCALL_FUNC   RhpNewPtrArrayFast, 8
+        ; Delegate overflow handling to the generic helper conservatively
+
+        cmp         edx, (40000000h / 4) ; sizeof(void*)
+        jae         @RhpNewArrayFast@8
+
+        ; In this case we know the element size is sizeof(void *), or 4 for x86
+        ; This helps us in two ways - we can shift instead of multiplying, and
+        ; there's no need to align the size either
+
+        lea         eax, [edx * 4 + SZARRAY_BASE_SIZE]
+
+        NEW_ARRAY_FAST_PROLOG
+        NEW_ARRAY_FAST
+FASTCALL_ENDFUNC
+
+;
+; Object* RhpNewVariableSizeObject(MethodTable *pMT, INT_PTR size)
+;
+; ecx == MethodTable
+; edx == element count
+;
+FASTCALL_FUNC RhpNewVariableSizeObject, 8
+        PUSH_COOP_PINVOKE_FRAME eax
+
+        ; Preserve MethodTable in ESI.
+        mov         esi, ecx
+
+        ; Push alloc helper arguments (transition frame, size, flags, MethodTable).
+        push        eax                                             ; transition frame
+        push        edx                                             ; numElements
+        push        0                                               ; Flags
+        push        ecx                                             ; MethodTable
+
+        ; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        test        eax, eax
+        jz          RhpNewVariableSizeObject_OutOfMemory
+
+        POP_COOP_PINVOKE_FRAME
+
+        ret
+
+RhpNewVariableSizeObject_OutOfMemory:
+        ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ; an out of memory exception that the caller of this allocator understands.
+
+        mov         ecx, esi        ; Restore MethodTable pointer
+        xor         edx, edx        ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         RhExceptionHandling_FailedAllocation
+FASTCALL_ENDFUNC
+
+        end
diff --git a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.S b/src/coreclr/runtime/i386/StubDispatch.S
similarity index 100%
rename from src/coreclr/nativeaot/Runtime/i386/StubDispatch.S
rename to src/coreclr/runtime/i386/StubDispatch.S
diff --git a/src/coreclr/runtime/i386/StubDispatch.asm b/src/coreclr/runtime/i386/StubDispatch.asm
new file mode 100644
index 000000000000..b3672d4e1eb0
--- /dev/null
+++ b/src/coreclr/runtime/i386/StubDispatch.asm
@@ -0,0 +1,112 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros_Shared.inc
+
+
+ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+EXTERN RhpCidResolve : PROC
+EXTERN _RhpUniversalTransition_DebugStepTailCall@0 : PROC
+
+
+;; Macro that generates code to check a single cache entry.
+CHECK_CACHE_ENTRY macro entry
+NextLabel textequ @CatStr( Attempt, %entry+1 )
+        cmp     ebx, [eax + (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 8))]
+        jne     @F
+        pop     ebx
+        jmp     dword ptr [eax + (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 8) + 4)]
+@@:
+endm
+
+
+;; Macro that generates a stub consuming a cache with the given number of entries.
+DEFINE_INTERFACE_DISPATCH_STUB macro entries
+
+StubName textequ @CatStr( _RhpInterfaceDispatch, entries, <@0> )
+StubAVLocation textequ @CatStr( _RhpInterfaceDispatchAVLocation, entries )
+
+    StubName proc public
+
+        ;; Check the instance here to catch null references. We're going to touch it again below (to cache
+        ;; the MethodTable pointer), but that's after we've pushed ebx below, and taking an A/V there will
+        ;; mess up the stack trace. We also don't have a spare scratch register (eax holds the cache pointer
+        ;; and the push of ebx below is precisely so we can access a second register to hold the MethodTable
+        ;; pointer).
+    ALTERNATE_ENTRY StubAVLocation
+        cmp     dword ptr [ecx], ecx
+
+        ;; eax currently contains the indirection cell address. We need to update it to point to the cache
+        ;; block instead.
+        mov     eax, [eax + OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Cache pointer is already loaded in the only scratch register we have so far, eax. We need
+        ;; another scratch register to hold the instance type so save the value of ebx and use that.
+        push    ebx
+
+        ;; Load the MethodTable from the object instance in ebx.
+        mov     ebx, [ecx]
+
+CurrentEntry = 0
+    while CurrentEntry lt entries
+        CHECK_CACHE_ENTRY %CurrentEntry
+CurrentEntry = CurrentEntry + 1
+    endm
+
+        ;; eax currently contains the cache block. We need to point it back to the
+        ;; indirection cell using the back pointer in the cache block
+        mov     eax, [eax + OFFSETOF__InterfaceDispatchCache__m_pCell]
+        pop     ebx
+        jmp     RhpInterfaceDispatchSlow
+
+    StubName endp
+
+    endm ;; DEFINE_INTERFACE_DISPATCH_STUB
+
+
+;; Define all the stub routines we currently need.
+DEFINE_INTERFACE_DISPATCH_STUB 1
+DEFINE_INTERFACE_DISPATCH_STUB 2
+DEFINE_INTERFACE_DISPATCH_STUB 4
+DEFINE_INTERFACE_DISPATCH_STUB 8
+DEFINE_INTERFACE_DISPATCH_STUB 16
+DEFINE_INTERFACE_DISPATCH_STUB 32
+DEFINE_INTERFACE_DISPATCH_STUB 64
+
+;; Shared out of line helper used on cache misses.
+RhpInterfaceDispatchSlow proc
+;; eax points at InterfaceDispatchCell
+
+        ;; Setup call to Universal Transition thunk
+        push        ebp
+        mov         ebp, esp
+        push        eax   ; First argument (Interface Dispatch Cell)
+        lea         eax, [RhpCidResolve]
+        push        eax ; Second argument (RhpCidResolve)
+
+        ;; Jump to Universal Transition
+        jmp         _RhpUniversalTransition_DebugStepTailCall@0
+RhpInterfaceDispatchSlow endp
+
+;; Initial dispatch on an interface when we don't have a cache yet.
+FASTCALL_FUNC RhpInitialDynamicInterfaceDispatch, 0
+ALTERNATE_ENTRY _RhpInitialInterfaceDispatch
+        ;; Trigger an AV if we're dispatching on a null this.
+        ;; The exception handling infrastructure is aware of the fact that this is the first
+        ;; instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here
+        ;; to a NullReferenceException at the callsite.
+        cmp     dword ptr [ecx], ecx
+
+        jmp RhpInterfaceDispatchSlow
+FASTCALL_ENDFUNC
+
+endif ;; FEATURE_CACHED_INTERFACE_DISPATCH
+
+end
diff --git a/src/coreclr/runtime/i386/WriteBarriers.S b/src/coreclr/runtime/i386/WriteBarriers.S
new file mode 100644
index 000000000000..e9bad9c715db
--- /dev/null
+++ b/src/coreclr/runtime/i386/WriteBarriers.S
@@ -0,0 +1,302 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include "AsmMacros_Shared.h"
+
+// TODO! This is implemented, but not tested.
+
+#ifdef WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, DESTREG, REFREG, TEMPREG
+
+    // If g_GCShadow is 0, don't perform the check.
+    PREPARE_EXTERNAL_VAR g_GCShadow, \TEMPREG
+    cmp     dword ptr [\TEMPREG], 0
+    je      LOCAL_LABEL(\BASENAME\()UpdateShadowHeap_Done_\DESTREG\()_\REFREG)
+
+    // Save DESTREG since we're about to modify it (and we need the original value both within the macro and
+    // once we exit the macro).
+    push    \DESTREG
+
+    // Transform DESTREG into the equivalent address in the shadow heap.
+    PREPARE_EXTERNAL_VAR g_lowest_address, \TEMPREG
+    sub     \DESTREG, [\TEMPREG]
+    jb      LOCAL_LABEL(\BASENAME\()UpdateShadowHeap_PopThenDone_\DESTREG\()_\REFREG)
+    PREPARE_EXTERNAL_VAR g_GCShadow, \TEMPREG
+    add     \DESTREG, [\TEMPREG]
+    PREPARE_EXTERNAL_VAR g_GCShadowEnd, \TEMPREG
+    cmp     \DESTREG, [\TEMPREG]
+    jae     LOCAL_LABEL(\BASENAME\()UpdateShadowHeap_PopThenDone_\DESTREG\()_\REFREG)
+
+    // Update the shadow heap.
+    mov     [\DESTREG], \REFREG
+
+    // Now check that the real heap location still contains the value we just wrote into the shadow heap. This
+    // read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to
+    // recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock
+    // prefix).
+    xchg    [esp], \DESTREG
+    cmp     [\DESTREG], \REFREG
+    jne     LOCAL_LABEL(\BASENAME\()UpdateShadowHeap_Invalidate_\DESTREG\()_\REFREG)
+
+    // The original DESTREG value is now restored but the stack has a value (the shadow version of the
+    // location) pushed. Need to discard this push before we are done.
+    add     esp, 4
+    jmp     LOCAL_LABEL(\BASENAME\()UpdateShadowHeap_Done_\DESTREG\()_\REFREG)
+
+LOCAL_LABEL(\BASENAME\()UpdateShadowHeap_Invalidate_\DESTREG\()_\REFREG):
+    // Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+    // guarantee whose shadow update won.
+
+    // Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an
+    // additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg
+    // variant that doesn't implicitly specify the lock prefix.
+    xchg    [esp], \DESTREG
+    mov     dword ptr [\DESTREG], INVALIDGCVALUE
+
+LOCAL_LABEL(\BASENAME\()UpdateShadowHeap_PopThenDone_\DESTREG\()_\REFREG):
+    // Restore original DESTREG value from the stack.
+    pop     \DESTREG
+
+LOCAL_LABEL(\BASENAME\()UpdateShadowHeap_Done_\DESTREG\()_\REFREG):
+.endm
+
+#else // WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, DESTREG, REFREG, TEMPREG
+.endm
+#endif
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+// name of the register that points to the location to be updated and the name of the register that holds the
+// object reference (this should be in upper case as it's used in the definition of the name of the helper).
+.macro DEFINE_WRITE_BARRIER DESTREG, REFREG, TEMPREG
+
+// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+// location is in one of the other general registers determined by the value of REFREG.
+LEAF_ENTRY RhpAssignRef\REFREG, _TEXT
+
+    // Export the canonical write barrier under unqualified name as well
+    .ifc \REFREG, EDX
+    ALTERNATE_ENTRY RhpAssignRef
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+    .endif
+
+    ALTERNATE_ENTRY RhpAssignRef\REFREG\()AVLocation
+
+    // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    // and the card table update we may perform below.
+    mov     dword ptr [\DESTREG], \REFREG
+
+    // Save a register so that we have an available register as a temporary for PREPARE_EXTERNAL_VAR
+    push \TEMPREG
+
+    // Update the shadow copy of the heap with the same value (if enabled).
+    UPDATE_GC_SHADOW RhpAssignRef, \DESTREG, \REFREG, \TEMPREG
+
+    // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    // (since the object won't be collected or moved by an ephemeral collection).
+    PREPARE_EXTERNAL_VAR g_ephemeral_low, \TEMPREG
+    cmp     \REFREG, [\TEMPREG]
+    jb      LOCAL_LABEL(WriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG)
+    PREPARE_EXTERNAL_VAR g_ephemeral_high, \TEMPREG
+    cmp     \REFREG, [\TEMPREG]
+    jae     LOCAL_LABEL(WriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG)
+
+    // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    // track this write. The location address is translated into an offset in the card table bitmap. We set
+    // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    // the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     \DESTREG, 10
+    PREPARE_EXTERNAL_VAR g_card_table, \TEMPREG
+    add     \DESTREG, [\TEMPREG]
+    cmp     byte ptr [\DESTREG], 0xFF
+    jne     LOCAL_LABEL(WriteBarrier_UpdateCardTable_\DESTREG\()_\REFREG)
+
+LOCAL_LABEL(WriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG):
+    pop \TEMPREG
+    ret
+
+// We get here if it's necessary to update the card table.
+LOCAL_LABEL(WriteBarrier_UpdateCardTable_\DESTREG\()_\REFREG):
+    pop \TEMPREG
+    mov     byte ptr [\DESTREG], 0xFF
+    ret
+LEAF_END RhpAssignRef\REFREG, _TEXT
+.endm
+
+.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, DESTREG, REFREG, TEMPREG
+
+    // The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+    // in which case no write barrier is required.
+    PREPARE_EXTERNAL_VAR g_lowest_address, \TEMPREG
+    cmp     \DESTREG, [\TEMPREG]
+    jb      LOCAL_LABEL(\BASENAME\()CheckedWriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG)
+    PREPARE_EXTERNAL_VAR g_highest_address, \TEMPREG
+    cmp     \DESTREG, [\TEMPREG]
+    jae     LOCAL_LABEL(\BASENAME\()CheckedWriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG)
+
+    // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    // we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW \BASENAME, \DESTREG, \REFREG, \TEMPREG
+
+    // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    // (since the object won't be collected or moved by an ephemeral collection).
+    PREPARE_EXTERNAL_VAR g_ephemeral_low, \TEMPREG
+    cmp     \REFREG, [\TEMPREG]
+    jb      LOCAL_LABEL(\BASENAME\()CheckedWriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG)
+    PREPARE_EXTERNAL_VAR g_ephemeral_high, \TEMPREG
+    cmp     \REFREG, [\TEMPREG]
+    jae     LOCAL_LABEL(\BASENAME\()CheckedWriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG)
+
+    // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    // track this write. The location address is translated into an offset in the card table bitmap. We set
+    // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    // the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     \DESTREG, 10
+    PREPARE_EXTERNAL_VAR g_card_table, \TEMPREG
+    add     \DESTREG, [\TEMPREG]
+    cmp     byte ptr [\DESTREG], 0xFF
+    je      LOCAL_LABEL(\BASENAME\()CheckedWriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG)
+    // We get here if it's necessary to update the card table.
+    mov     byte ptr [\DESTREG], 0xFF
+LOCAL_LABEL(\BASENAME\()CheckedWriteBarrier_NoBarrierRequired_\DESTREG\()_\REFREG):
+.endm
+
+// This macro is very much like the one above except that it generates a variant of the function which also
+// checks whether the destination is actually somewhere within the GC heap.
+.macro DEFINE_CHECKED_WRITE_BARRIER DESTREG, REFREG, TEMPREG
+
+// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into
+// that location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedAssignRef\REFREG, _TEXT
+
+    // Export the canonical write barrier under unqualified name as well
+    .ifc \REFREG, EDX
+    ALTERNATE_ENTRY RhpCheckedAssignRef
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+    .endif
+
+    ALTERNATE_ENTRY RhpCheckedAssignRef\REFREG\()AVLocation
+
+    // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    // and the card table update we may perform below.
+    mov     dword ptr [\DESTREG], \REFREG
+
+    // Save a register so that we have an available register as a temporary for PREPARE_EXTERNAL_VAR
+    push \TEMPREG
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \DESTREG, \REFREG, \TEMPREG
+    pop \TEMPREG
+    ret
+
+LEAF_END RhpCheckedAssignRef\REFREG, _TEXT
+
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is EDX.
+DEFINE_CHECKED_WRITE_BARRIER ECX, EDX, EAX
+DEFINE_WRITE_BARRIER ECX, EDX, EAX
+
+DEFINE_WRITE_BARRIER EDX, EAX, ECX
+DEFINE_WRITE_BARRIER EDX, ECX, EAX
+DEFINE_WRITE_BARRIER EDX, EBX, EAX
+DEFINE_WRITE_BARRIER EDX, ESI, EAX
+DEFINE_WRITE_BARRIER EDX, EDI, EAX
+DEFINE_WRITE_BARRIER EDX, EBP, EAX
+
+DEFINE_CHECKED_WRITE_BARRIER EDX, EAX, ECX
+DEFINE_CHECKED_WRITE_BARRIER EDX, ECX, EAX
+DEFINE_CHECKED_WRITE_BARRIER EDX, EBX, EAX
+DEFINE_CHECKED_WRITE_BARRIER EDX, ESI, EAX
+DEFINE_CHECKED_WRITE_BARRIER EDX, EDI, EAX
+DEFINE_CHECKED_WRITE_BARRIER EDX, EBP, EAX
+
+LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
+    mov             eax, [esp+4]
+    lock cmpxchg    [ecx], edx
+    jne             LOCAL_LABEL(RhpCheckedLockCmpXchg_NoWrite)
+    push eax
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, ECX, EDX, EAX
+    pop eax
+LOCAL_LABEL(RhpCheckedLockCmpXchg_NoWrite):
+    ret 4
+LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+LEAF_ENTRY RhpCheckedXchg, _TEXT
+
+    // Setup eax with the new object for the exchange, that way it will automatically hold the correct result
+    // afterwards and we can leave edx unaltered ready for the GC write barrier below.
+    mov             eax, edx
+    xchg            [ecx], eax
+    push eax
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, ECX, EDX, EAX
+    pop eax
+    ret
+LEAF_END RhpCheckedXchg, _TEXT
+
+//
+// RhpByRefAssignRef simulates movs instruction for object references.
+//
+// On entry:
+//      edi: address of ref-field (assigned to)
+//      esi: address of the data (source)
+//
+// On exit:
+//      edi, esi are incremented by 4,
+//      ecx: trashed
+//
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
+    mov     ecx, [esi]
+ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2
+    mov     [edi], ecx
+
+    push eax
+
+    // Check whether the writes were even into the heap. If not there's no card update required.
+    PREPARE_EXTERNAL_VAR g_lowest_address, eax
+    cmp     edi, [eax]
+    jb      LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+    PREPARE_EXTERNAL_VAR g_highest_address, eax
+    cmp     edi, [eax]
+    jae     LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+
+    UPDATE_GC_SHADOW RhpByRefAssignRef, ecx, edi, eax
+
+    // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    // (since the object won't be collected or moved by an ephemeral collection).
+    PREPARE_EXTERNAL_VAR g_ephemeral_low, eax
+    cmp     ecx, [eax]
+    jb      LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+    PREPARE_EXTERNAL_VAR g_ephemeral_high, eax
+    cmp     ecx, [eax]
+    jae     LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+
+    mov     ecx, edi
+    shr     ecx, 10
+    PREPARE_EXTERNAL_VAR g_card_table, eax
+    add     ecx, [eax]
+    cmp     byte ptr [ecx], 0xFF
+    je      LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
+
+    mov     byte ptr [ecx], 0xFF
+
+LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired):
+    // Increment the pointers before leaving
+    add     esi,4
+    add     edi,4
+    pop     eax
+    ret
+LEAF_END RhpByRefAssignRef, _TEXT
diff --git a/src/coreclr/runtime/i386/WriteBarriers.asm b/src/coreclr/runtime/i386/WriteBarriers.asm
new file mode 100644
index 000000000000..643776216162
--- /dev/null
+++ b/src/coreclr/runtime/i386/WriteBarriers.asm
@@ -0,0 +1,308 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;;
+;; Define the helpers used to implement the write barrier required when writing an object reference into a
+;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+;; collection.
+;;
+
+    .xmm
+    .model  flat
+    option  casemap:none
+    .code
+
+include AsmMacros_Shared.inc
+
+;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+;; during garbage collections to verify that object references where never written to the heap without using a
+;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing
+;; new references to the real heap. Since this can't be solved perfectly without critical sections around the
+;; entire update process, we instead update the shadow location and then re-check the real location (as two
+;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value
+;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+ifdef WRITE_BARRIER_CHECK
+
+g_GCShadow      TEXTEQU 
+g_GCShadowEnd   TEXTEQU 
+INVALIDGCVALUE  EQU 0CCCCCCCDh
+
+EXTERN  g_GCShadow : DWORD
+EXTERN  g_GCShadowEnd : DWORD
+
+UPDATE_GC_SHADOW macro BASENAME, DESTREG, REFREG
+
+    ;; If g_GCShadow is 0, don't perform the check.
+    cmp     g_GCShadow, 0
+    je      &BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG&
+
+    ;; Save DESTREG since we're about to modify it (and we need the original value both within the macro and
+    ;; once we exit the macro).
+    push    DESTREG
+
+    ;; Transform DESTREG into the equivalent address in the shadow heap.
+    sub     DESTREG, G_LOWEST_ADDRESS
+    jb      &BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG&
+    add     DESTREG, [g_GCShadow]
+    cmp     DESTREG, [g_GCShadowEnd]
+    jae     &BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG&
+
+    ;; Update the shadow heap.
+    mov     [DESTREG], REFREG
+
+    ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. This
+    ;; read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to
+    ;; recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock
+    ;; prefix).
+    xchg    [esp], DESTREG
+    cmp     [DESTREG], REFREG
+    jne     &BASENAME&_UpdateShadowHeap_Invalidate_&DESTREG&_&REFREG&
+
+    ;; The original DESTREG value is now restored but the stack has a value (the shadow version of the
+    ;; location) pushed. Need to discard this push before we are done.
+    add     esp, 4
+    jmp     &BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG&
+
+&BASENAME&_UpdateShadowHeap_Invalidate_&DESTREG&_&REFREG&:
+    ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+    ;; guarantee whose shadow update won.
+
+    ;; Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an
+    ;; additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg
+    ;; variant that doesn't implicitly specify the lock prefix.
+    xchg    [esp], DESTREG
+    mov     dword ptr [DESTREG], INVALIDGCVALUE
+
+&BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG&:
+    ;; Restore original DESTREG value from the stack.
+    pop     DESTREG
+
+&BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG&:
+endm
+
+else ; WRITE_BARRIER_CHECK
+
+UPDATE_GC_SHADOW macro BASENAME, DESTREG, REFREG
+endm
+
+endif ; WRITE_BARRIER_CHECK
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+;; name of the register that points to the location to be updated and the name of the register that holds the
+;; object reference (this should be in upper case as it's used in the definition of the name of the helper).
+DEFINE_WRITE_BARRIER macro DESTREG, REFREG
+
+;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+;; location is in one of the other general registers determined by the value of REFREG.
+FASTCALL_FUNC RhpAssignRef&REFREG&, 8
+
+    ;; Export the canonical write barrier under unqualified name as well
+    ifidni , 
+    ALTERNATE_ENTRY RhpAssignRef
+    ALTERNATE_ENTRY @RhpAssignRef@8
+    ALTERNATE_ENTRY _RhpAssignRefAVLocation
+    endif
+
+    ALTERNATE_ENTRY _RhpAssignRef&REFREG&AVLocation
+
+    ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    ;; and the card table update we may perform below.
+    mov     dword ptr [DESTREG], REFREG
+
+    ;; Update the shadow copy of the heap with the same value (if enabled).
+    UPDATE_GC_SHADOW RhpAssignRef, DESTREG, REFREG
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     REFREG, [G_EPHEMERAL_LOW]
+    jb      WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG&
+    cmp     REFREG, [G_EPHEMERAL_HIGH]
+    jae     WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG&
+
+    ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    ;; track this write. The location address is translated into an offset in the card table bitmap. We set
+    ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     DESTREG, 10
+    add     DESTREG, [G_CARD_TABLE]
+    cmp     byte ptr [DESTREG], 0FFh
+    jne     WriteBarrier_UpdateCardTable_&DESTREG&_&REFREG&
+
+WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG&:
+    ret
+
+;; We get here if it's necessary to update the card table.
+WriteBarrier_UpdateCardTable_&DESTREG&_&REFREG&:
+    mov     byte ptr [DESTREG], 0FFh
+    ret
+FASTCALL_ENDFUNC
+endm
+
+RET4    macro
+    ret     4
+endm
+
+DEFINE_CHECKED_WRITE_BARRIER_CORE macro BASENAME, DESTREG, REFREG, RETINST
+
+    ;; The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+    ;; in which case no write barrier is required.
+    cmp     DESTREG, [G_LOWEST_ADDRESS]
+    jb      &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&
+    cmp     DESTREG, [G_HIGHEST_ADDRESS]
+    jae     &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&
+
+    ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    ;; we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW BASENAME, DESTREG, REFREG
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     REFREG, [G_EPHEMERAL_LOW]
+    jb      &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&
+    cmp     REFREG, [G_EPHEMERAL_HIGH]
+    jae     &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&
+
+    ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    ;; track this write. The location address is translated into an offset in the card table bitmap. We set
+    ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     DESTREG, 10
+    add     DESTREG, [G_CARD_TABLE]
+    cmp     byte ptr [DESTREG], 0FFh
+    jne     &BASENAME&_UpdateCardTable_&DESTREG&_&REFREG&
+
+&BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&:
+    RETINST
+
+;; We get here if it's necessary to update the card table.
+&BASENAME&_UpdateCardTable_&DESTREG&_&REFREG&:
+    mov     byte ptr [DESTREG], 0FFh
+    RETINST
+
+endm
+
+
+;; This macro is very much like the one above except that it generates a variant of the function which also
+;; checks whether the destination is actually somewhere within the GC heap.
+DEFINE_CHECKED_WRITE_BARRIER macro DESTREG, REFREG
+
+;; Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into
+;; that location is in one of the other general registers determined by the value of REFREG.
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
+FASTCALL_FUNC RhpCheckedAssignRef&REFREG&, 8
+
+    ;; Export the canonical write barrier under unqualified name as well
+    ifidni , 
+    ALTERNATE_ENTRY RhpCheckedAssignRef
+    ALTERNATE_ENTRY @RhpCheckedAssignRef@8
+    ALTERNATE_ENTRY _RhpCheckedAssignRefAVLocation
+    endif
+
+    ALTERNATE_ENTRY _RhpCheckedAssignRef&REFREG&AVLocation
+
+    ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    ;; and the card table update we may perform below.
+    mov     dword ptr [DESTREG], REFREG
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, DESTREG, REFREG, ret
+
+FASTCALL_ENDFUNC
+
+endm
+
+;; One day we might have write barriers for all the possible argument registers but for now we have
+;; just one write barrier that assumes the input register is EDX.
+DEFINE_CHECKED_WRITE_BARRIER ECX, EDX
+DEFINE_WRITE_BARRIER ECX, EDX
+
+DEFINE_WRITE_BARRIER EDX, EAX
+DEFINE_WRITE_BARRIER EDX, ECX
+DEFINE_WRITE_BARRIER EDX, EBX
+DEFINE_WRITE_BARRIER EDX, ESI
+DEFINE_WRITE_BARRIER EDX, EDI
+DEFINE_WRITE_BARRIER EDX, EBP
+
+DEFINE_CHECKED_WRITE_BARRIER EDX, EAX
+DEFINE_CHECKED_WRITE_BARRIER EDX, ECX
+DEFINE_CHECKED_WRITE_BARRIER EDX, EBX
+DEFINE_CHECKED_WRITE_BARRIER EDX, ESI
+DEFINE_CHECKED_WRITE_BARRIER EDX, EDI
+DEFINE_CHECKED_WRITE_BARRIER EDX, EBP
+
+FASTCALL_FUNC RhpCheckedLockCmpXchg, 12
+    mov             eax, [esp+4]
+    lock cmpxchg    [ecx], edx
+    jne             RhpCheckedLockCmpXchg_NoBarrierRequired_ECX_EDX
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, ECX, EDX, ret 4
+
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC RhpCheckedXchg, 8
+
+    ;; Setup eax with the new object for the exchange, that way it will automatically hold the correct result
+    ;; afterwards and we can leave edx unaltered ready for the GC write barrier below.
+    mov             eax, edx
+    xchg            [ecx], eax
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, ECX, EDX, ret
+
+FASTCALL_ENDFUNC
+
+;;
+;; RhpByRefAssignRef simulates movs instruction for object references.
+;;
+;; On entry:
+;;      edi: address of ref-field (assigned to)
+;;      esi: address of the data (source)
+;;
+;; On exit:
+;;      edi, esi are incremented by 4,
+;;      ecx: trashed
+;;
+FASTCALL_FUNC RhpByRefAssignRef, 8
+ALTERNATE_ENTRY _RhpByRefAssignRefAVLocation1
+    mov     ecx, [esi]
+ALTERNATE_ENTRY _RhpByRefAssignRefAVLocation2
+    mov     [edi], ecx
+
+    ;; Check whether the writes were even into the heap. If not there's no card update required.
+    cmp     edi, [G_LOWEST_ADDRESS]
+    jb      RhpByRefAssignRef_NoBarrierRequired
+    cmp     edi, [G_HIGHEST_ADDRESS]
+    jae     RhpByRefAssignRef_NoBarrierRequired
+
+    UPDATE_GC_SHADOW BASENAME, ecx, edi
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     ecx, [G_EPHEMERAL_LOW]
+    jb      RhpByRefAssignRef_NoBarrierRequired
+    cmp     ecx, [G_EPHEMERAL_HIGH]
+    jae     RhpByRefAssignRef_NoBarrierRequired
+
+    mov     ecx, edi
+    shr     ecx, 10
+    add     ecx, [G_CARD_TABLE]
+    cmp     byte ptr [ecx], 0FFh
+    je      RhpByRefAssignRef_NoBarrierRequired
+
+    mov     byte ptr [ecx], 0FFh
+
+RhpByRefAssignRef_NoBarrierRequired:
+    ;; Increment the pointers before leaving
+    add     esi,4
+    add     edi,4
+    ret
+FASTCALL_ENDFUNC
+
+    end
diff --git a/src/coreclr/runtime/loongarch64/AllocFast.S b/src/coreclr/runtime/loongarch64/AllocFast.S
new file mode 100644
index 000000000000..1a55fa7da9e5
--- /dev/null
+++ b/src/coreclr/runtime/loongarch64/AllocFast.S
@@ -0,0 +1,252 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+// allocation context then automatically fallback to the slow allocation path.
+//  $a0 == MethodTable
+    LEAF_ENTRY RhpNewFast, _TEXT
+
+        // a1 = ee_alloc_context pointer
+        INLINE_GET_ALLOC_CONTEXT_BASE  $a1
+
+        //
+        // a0 contains MethodTable pointer
+        //
+        ld.w  $a2, $a0, OFFSETOF__ee_alloc_context + OFFSETOF__MethodTable__m_uBaseSize
+
+        //
+        // a0: MethodTable pointer
+        // a1: ee_alloc_context pointer
+        // a2: base size
+        //
+
+        // Load potential new object address into t3.
+        ld.d  $t3, $a1, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr
+
+        // Load and calculate the maximum size of object we can fit.
+        ld.d  $t2, $a1, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit
+        sub.d  $t2, $t2, $t3
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        bltu  $t2, $a2, LOCAL_LABEL(RhpNewFast_RarePath)
+
+        // Calculate the new alloc pointer to account for the allocation.
+        add.d  $a2, $a2, $t3
+
+        // Set the new object's MethodTable pointer.
+        st.d  $a0, $t3, OFFSETOF__Object__m_pEEType
+
+        // Update the alloc pointer to the newly calculated one.
+        st.d  $a2, $a1, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr
+
+        ori  $a0, $t3, 0
+        jirl  $r0, $ra, 0
+
+LOCAL_LABEL(RhpNewFast_RarePath):
+        ori  $a1, $zero, 0
+        b  RhpNewObject
+    LEAF_END RhpNewFast, _TEXT
+
+// Allocate non-array object with finalizer.
+//  a0 == MethodTable
+    LEAF_ENTRY RhpNewFinalizable, _TEXT
+        ori  $a1, $zero, GC_ALLOC_FINALIZE
+        b  RhpNewObject
+    LEAF_END RhpNewFinalizable, _TEXT
+
+// Allocate non-array object.
+//  a0 == MethodTable
+//  a1 == alloc flags
+    NESTED_ENTRY RhpNewObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME $a3
+
+        // a3: transition frame
+
+        // Preserve the MethodTable in s0
+        ori  $s0, $a0, 0
+
+        ori  $a2, $zero, 0 // numElements
+
+        // Call the rest of the allocation helper.
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        bl  C_FUNC(RhpGcAlloc)
+
+        // Set the new object's MethodTable pointer on success.
+        beqz  $a0, LOCAL_LABEL(NewOutOfMemory)
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+        .cfi_restore_state
+LOCAL_LABEL(NewOutOfMemory):
+        // This is the OOM failure path. We are going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        ori  $a0, $s0, 0                // MethodTable pointer
+        ori  $a1, $zero, 0              // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        b C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    NESTED_END RhpNewObject, _TEXT
+
+// Shared code for RhNewString, RhpNewArrayFast and RhpNewPtrArrayFast
+//  a0 == MethodTable
+//  a1 == character/element count
+//  a2 == string/array size
+    .macro NEW_ARRAY_FAST
+
+        INLINE_GET_ALLOC_CONTEXT_BASE $a3
+
+        // Load potential new object address into t3.
+        ld.d  $t3, $a3, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr
+
+        // Load and calculate the maximum size of object we can fit.
+        ld.d  $t2, $a3, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit
+        sub.d  $t2, $t2, $t3
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        bltu  $t2, $a2, C_FUNC(RhpNewVariableSizeObject)
+
+        // Calculate the new alloc pointer to account for the allocation.
+        add.d  $a2, $a2, $t3
+
+        // Set the new object's MethodTable pointer and element count.
+        st.d  $a0, $t3, OFFSETOF__Object__m_pEEType
+        st.d  $a1, $t3, OFFSETOF__Array__m_Length
+
+        // Update the alloc pointer to the newly calculated one.
+        st.d  $a2, $a3, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr
+
+        // Return the object allocated in a0.
+        ori  $a0, $t3, 0
+
+        jirl  $r0, $ra, 0
+
+    .endm
+
+// Allocate a string.
+//  a0 == MethodTable
+//  a1 == element/character count
+    LEAF_ENTRY RhNewString, _TEXT
+
+        // Make sure computing the overall allocation size wont overflow
+        lu12i.w  $a2, ((MAX_STRING_LENGTH >> 12) & 0xFFFFF)
+        ori  $a2, $a2, (MAX_STRING_LENGTH & 0xFFF)
+        bltu  $a2, $a1, LOCAL_LABEL(StringSizeOverflow)
+
+        // Compute overall allocation size (align(base size + (element size * elements), 8)).
+        ori  $a2, $zero, STRING_COMPONENT_SIZE
+        mulw.d.w  $a2, $a1, $a2                 // a2 = (a1[31:0] * a2[31:0])[64:0]
+        addi.d  $a2, $a2, STRING_BASE_SIZE + 7  // a2 = a2 + STRING_BASE_SIZE + 7
+        bstrins.d  $a2, $r0, 2, 0               // clear the bits[2:0] of $a2
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(StringSizeOverflow):
+        // We get here if the length of the final string object can not be represented as an unsigned
+        // 32-bit value. We are going to tail-call to a managed helper that will throw
+        // an OOM exception that the caller of this allocator understands.
+
+        // a0 holds MethodTable pointer already
+        ori  $a1, $zero, 1                  // Indicate that we should throw OverflowException
+        b  C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    LEAF_END    RhNewString, _Text
+
+// Allocate one dimensional, zero based array (SZARRAY).
+//  $a0 == MethodTable
+//  $a1 == element count
+    LEAF_ENTRY RhpNewArrayFast, _Text
+
+        // We want to limit the element count to the non-negative 32-bit int range.
+        // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component
+        // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst
+        // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits.
+        lu12i.w  $a2, 0x7ffff
+        ori  $a2, $a2, 0xfff
+        bltu  $a2, $a1, LOCAL_LABEL(ArraySizeOverflow)
+
+        ld.h  $a2, $a0, OFFSETOF__MethodTable__m_usComponentSize
+        mulw.d.w  $a2, $a1, $a2
+        addi.d  $a2, $a2, SZARRAY_BASE_SIZE + 7
+        bstrins.d  $a2, $r0, 2, 0
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(ArraySizeOverflow):
+        // We get here if the size of the final array object can not be represented as an unsigned
+        // 32-bit value. We are going to tail-call to a managed helper that will throw
+        // an overflow exception that the caller of this allocator understands.
+
+        // $a0 holds MethodTable pointer already
+        ori  $a1, $zero, 1 // Indicate that we should throw OverflowException
+        b  C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    LEAF_END    RhpNewArrayFast, _TEXT
+
+// Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+//  $a0 == MethodTable
+//  $a1 == element count
+    LEAF_ENTRY RhpNewPtrArrayFast, _Text
+
+        // Delegate overflow handling to the generic helper conservatively
+
+        li.w  $a2, (0x40000000 / 8) // sizeof(void*)
+        bgeu  $a1, $a2, C_FUNC(RhpNewArrayFast)
+
+        // In this case we know the element size is sizeof(void *), or 8 for arm64
+        // This helps us in two ways - we can shift instead of multiplying, and
+        // there's no need to align the size either
+
+        slli.d  $a2, $a1, 3
+        addi.d  $a2, $a2, SZARRAY_BASE_SIZE
+
+        // No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed
+        // to be a multiple of 8.
+
+        NEW_ARRAY_FAST
+
+    LEAF_END    RhpNewPtrArrayFast, _TEXT
+
+// Allocate variable sized object (eg. array, string) using the slow path that calls a runtime helper.
+//  a0 == MethodTable
+//  a1 == element count
+    NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME $a3
+
+        // Preserve data we will need later into the callee saved registers
+        ori  $s0, $a0, 0              // Preserve MethodTable
+
+        ori  $a2, $a1, 0              // numElements
+        ori  $a1, $zero, 0            // uFlags
+
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        bl  C_FUNC(RhpGcAlloc)
+
+        // Set the new object's MethodTable pointer and length on success.
+        beqz  $a0, LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory)
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+        .cfi_restore_state
+LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory):
+        // This is the OOM failure path. We are going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        ori  $a0, $s0, 0             // MethodTable Pointer
+        ori  $a1, $zero, 0           // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        b  C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    NESTED_END RhpNewVariableSizeObject, _TEXT
diff --git a/src/coreclr/runtime/loongarch64/StubDispatch.S b/src/coreclr/runtime/loongarch64/StubDispatch.S
new file mode 100644
index 000000000000..af0ef71273ab
--- /dev/null
+++ b/src/coreclr/runtime/loongarch64/StubDispatch.S
@@ -0,0 +1,97 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+    .extern RhpCidResolve
+    .extern RhpUniversalTransition_DebugStepTailCall
+
+    // Macro that generates code to check a single cache entry.
+    .macro CHECK_CACHE_ENTRY entry
+        // Check a single entry in the cache.
+        //  t0 : Cache data structure. Also used for target address jump.
+        //  t2 : Instance MethodTable*
+        //  t8 : Indirection cell address, preserved
+        //  t3 : Trashed
+        ld.d  $t3, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))
+        bne  $t1, $t3, 0f
+        ld.d  $t0, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)
+        jirl  $r0, $t0, 0
+0:
+    .endm
+
+//
+// Macro that generates a stub consuming a cache with the given number of entries.
+//
+    .macro DEFINE_INTERFACE_DISPATCH_STUB entries
+
+    NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler
+
+        // t8 holds the indirection cell address. Load the cache pointer.
+        ld.d  $t0, $t8, OFFSETOF__InterfaceDispatchCell__m_pCache
+
+        // Load the MethodTable from the object instance in a0.
+        ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries
+        ld.d  $t1, $a0, 0
+
+    .global CurrentEntry
+    .set CurrentEntry, 0
+
+    .rept \entries
+        CHECK_CACHE_ENTRY CurrentEntry
+        .set CurrentEntry, CurrentEntry + 1
+    .endr
+
+        // t8 still contains the indirection cell address.
+        b  C_FUNC(RhpInterfaceDispatchSlow)
+
+    NESTED_END "RhpInterfaceDispatch\entries", _TEXT
+
+    .endm
+
+//
+// Define all the stub routines we currently need.
+//
+// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the
+// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens
+// during the interface dispatch.
+//
+    DEFINE_INTERFACE_DISPATCH_STUB 1
+    DEFINE_INTERFACE_DISPATCH_STUB 2
+    DEFINE_INTERFACE_DISPATCH_STUB 4
+    DEFINE_INTERFACE_DISPATCH_STUB 8
+    DEFINE_INTERFACE_DISPATCH_STUB 16
+    DEFINE_INTERFACE_DISPATCH_STUB 32
+    DEFINE_INTERFACE_DISPATCH_STUB 64
+
+//
+// Initial dispatch on an interface when we don't have a cache yet.
+//
+    LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+    ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+        // Trigger an AV if we're dispatching on a null this.
+        // The exception handling infrastructure is aware of the fact that this is the first
+        // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here
+        // to a NullReferenceException at the callsite.
+        ld.d  $zero, $a0, 0
+
+        // Just tail call to the cache miss helper.
+        b  C_FUNC(RhpInterfaceDispatchSlow)
+    LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+//
+// Cache miss case, call the runtime to resolve the target and update the cache.
+// Use universal transition helper to allow an exception to flow out of resolution.
+//
+    LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
+        // t8 contains the interface dispatch cell address.
+        // Calling convention of the universal thunk is:
+        // t7: target address for the thunk to call
+        // t8: parameter of the thunk's target
+        PREPARE_EXTERNAL_VAR RhpCidResolve, $t7
+        b  C_FUNC(RhpUniversalTransition_DebugStepTailCall)
+    LEAF_END RhpInterfaceDispatchSlow, _TEXT
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/runtime/loongarch64/WriteBarriers.S b/src/coreclr/runtime/loongarch64/WriteBarriers.S
new file mode 100644
index 000000000000..20888056b04e
--- /dev/null
+++ b/src/coreclr/runtime/loongarch64/WriteBarriers.S
@@ -0,0 +1,351 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+// during garbage collections to verify that object references where never written to the heap without using a
+// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing
+// new references to the real heap. Since this can not be solved perfectly without critical sections around the
+// entire update process, we instead update the shadow location and then re-check the real location (as two
+// ordered operations) and if there is a disparity we will re-write the shadow location with a special value
+// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+#ifdef WRITE_BARRIER_CHECK
+
+    .global     $g_GCShadow
+    .global     $g_GCShadowEnd
+
+        // On entry:
+        //  $destReg: location to be updated
+        //  $refReg: objectref to be stored
+        //
+        // On exit:
+        //  t3,t4: trashed
+        //  other registers are preserved
+        //
+        .macro UPDATE_GC_SHADOW destReg, refReg
+
+        // If g_GCShadow is 0, don't perform the check.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3
+        beqz  $t3, 1f
+
+        // Save destReg since we're about to modify it (and we need the original value both within the macro and
+        // once we exit the macro).
+        ori  $t4, \destReg, 0
+
+        // Transform destReg into the equivalent address in the shadow heap.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3
+        sub.d  \destReg, \destReg, $t3
+        bltu   \destReg, $zero, 0f
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3
+        add.d  \destReg, \destReg, $t3
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, $t3
+        bgeu  \destReg, $t3, 0f
+
+        // Update the shadow heap.
+        st.d  \refReg, \destReg, 0
+
+        // The following read must be strongly ordered wrt to the write we have just performed in order to
+        // prevent race conditions.
+        dbar  0
+
+        // Now check that the real heap location still contains the value we just wrote into the shadow heap.
+        ori  $t3, $t4, 0
+        ld.d  $t3, $t3, 0
+        beq  $t3, \refReg, 0f
+
+        // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we can not
+        // guarantee whose shadow update won.
+        li.d  $t3, INVALIDGCVALUE
+        st.d  $t3, \destReg, 0
+
+0:
+        // Restore original destReg value
+        ori  \destReg, $t4, 0
+
+1:
+    .endm
+
+#else // WRITE_BARRIER_CHECK
+
+    .macro UPDATE_GC_SHADOW destReg, refReg
+    .endm
+
+#endif // WRITE_BARRIER_CHECK
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+// name of the register that points to the location to be updated and the name of the register that holds the
+// object reference (this should be in upper case as it is used in the definition of the name of the helper).
+
+// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for
+// some interlocked helpers that need an inline barrier.
+
+        // On entry:
+        //   destReg: location to be updated (cannot be t3,t4)
+        //   refReg:  objectref to be stored (cannot be t3,t4)
+        //
+        // On exit:
+        //   t3,t4: trashed
+        //
+        .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg
+
+        // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+        // we are in a debug build and write barrier checking has been enabled).
+        UPDATE_GC_SHADOW \destReg, \refReg
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        // Update the write watch table if necessary
+        PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, $t3
+
+        beqz  $t3, 2f
+        srli.d  $t4, \destReg, 12
+        add.d  $t3, $t3, $t4      // SoftwareWriteWatch::AddressToTableByteIndexShift
+        ld.b  $t4, $t3, 0
+        bnez  $t4, 2f
+        ori  $t4, $zero, 0xFF
+        st.b  $t4, $t3, 0
+#endif
+
+2:
+        // We can skip the card table write if the reference is to
+        // an object not on the epehemeral segment.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, $t3
+        bltu  \refReg, $t3, 0f
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, $t3
+        bgeu  \refReg, $t3, 0f
+
+        // Set this objects card, if it has not already been set.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, $t3
+        srli.d  $t4, \destReg, 11
+        add.d  $t4, $t3, $t4
+
+        // Check that this card has not already been written. Avoiding useless writes is a big win on
+        // multi-proc systems since it avoids cache thrashing.
+        ld.bu  $t3, $t4, 0
+        xori  $t3, $t3, 0xFF
+        beqz  $t3, 0f
+
+        ori  $t3, $zero, 0xFF
+        st.b  $t3, $t4, 0
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        // Check if we need to update the card bundle table
+        PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, $t3
+        srli.d  $t4, \destReg, 21
+        add.d  $t4, $t3, $t4
+        ld.bu  $t3, $t4, 0
+        xori  $t3, $t3, 0xFF
+        beqz  $t3, 0f
+
+        ori  $t3, $zero, 0xFF
+        st.b  $t3, $t4, 0
+#endif
+
+0:
+        // Exit label
+    .endm
+
+        // On entry:
+        //   destReg: location to be updated
+        //   refReg:  objectref to be stored
+        //
+        // On exit:
+        //   t3, t4:   trashed
+        //
+        .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg
+
+        // The "check" of this checked write barrier - is destReg
+        // within the heap? if no, early out.
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3
+        bltu    \destReg, $t3, 0f
+
+        PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3
+
+        // If \destReg >= g_lowest_address, compare \destReg to g_highest_address.
+        // Otherwise, set the C flag (0x2) to take the next branch.
+        bgeu  \destReg, $t3, 0f
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg
+
+0:
+        // Exit label
+    .endm
+
+// void JIT_ByRefWriteBarrier
+// On entry:
+//   t8  : the source address (points to object reference to write)
+//   t6  : the destination address (object reference written here)
+//
+// On exit:
+//   t8  : incremented by 8
+//   t6  : incremented by 8
+//   t7  : trashed
+//   t3, t4  : trashed
+//
+//   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+//         if you add more trashed registers.
+//
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+
+    ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
+        ld.d  $t7, $t8, 0
+        addi.d  $t8, $t8, 8
+        b  C_FUNC(RhpCheckedAssignRef)
+
+LEAF_END RhpByRefAssignRef, _TEXT
+
+// JIT_CheckedWriteBarrier(Object** dst, Object* src)
+//
+// Write barrier for writes to objects that may reside
+// on the managed heap.
+//
+// On entry:
+//   t6 : the destination address (LHS of the assignment).
+//         May not be a heap location (hence the checked).
+//   t7 : the object reference (RHS of the assignment).
+//
+// On exit:
+//   $t3 : trashed
+//   $t6 : incremented by 8 to implement JIT_ByRefWriteBarrier contract
+ LEAF_ENTRY RhpCheckedAssignRef, _TEXT
+
+        // is destReg within the heap?
+        PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3
+        bltu    $t6, $t3, LOCAL_LABEL(NotInHeap)
+        PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3
+        bgeu    $t6, $t3, LOCAL_LABEL(NotInHeap)
+        b  C_FUNC(RhpAssignRefLoongArch64)
+
+LOCAL_LABEL(NotInHeap):
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+        st.d  $t7, $t6, 0
+        addi.d  $t6, $t6, 8
+        jirl  $r0, $ra, 0
+
+LEAF_END RhpCheckedAssignRef, _TEXT
+
+// JIT_WriteBarrier(Object** dst, Object* src)
+//
+// Write barrier for writes to objects that are known to
+// reside on the managed heap.
+//
+// On entry:
+//  t6 : the destination address (LHS of the assignment).
+//  t7 : the object reference (RHS of the assignment).
+//
+// On exit:
+//  t3, t4 : trashed
+//  t6 : incremented by 8
+LEAF_ENTRY RhpAssignRefLoongArch64, _TEXT
+        dbar 0
+
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+        st.d  $t7, $t6, 0
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE  $t6, $t7
+
+        addi.d  $t6, $t6, 8
+        jirl  $r0, $ra, 0
+
+LEAF_END RhpAssignRefLoongArch64, _TEXT
+
+// Same as RhpAssignRefLoongArch64, but with standard ABI.
+LEAF_ENTRY RhpAssignRef, _TEXT
+        ori  $t6, $a0, 0                    ; t6 = dst
+        ori  $t7, $a1, 0                    ; t7 = val
+        b  C_FUNC(RhpAssignRefLoongArch64)
+LEAF_END RhpAssignRef, _TEXT
+
+
+// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon
+// successful updates.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address
+
+// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand)
+//
+// Interlocked compare exchange on objectref.
+//
+// On entry:
+//  a0: pointer to objectref
+//  a1: exchange value
+//  a2: comparand
+//
+// On exit:
+//  a0: original value of objectref
+//  t0, t1, t3, t4: trashed
+//
+LEAF_ENTRY RhpCheckedLockCmpXchg
+
+LOCAL_LABEL(RetryLoop):
+        // Load the current value at the destination address.
+        ll.d  $t0, $a0, 0       // t0 = *dest (load with atomic ordering)
+        // Compare the loaded value with the comparand.
+        bne  $t0, $a2, LOCAL_LABEL(EndOfExchange) // if (*dest != comparand) goto EndOfExchange
+
+        ori  $t1, $a1, 0
+        // Attempt to store the exchange value at the destination address.
+        sc.d  $t1, $a0, 0  // t1 = (store conditional result with atomic, 0 if failed)
+        beqz  $t1, LOCAL_LABEL(RetryLoop) // if store conditional failed, retry
+        b  LOCAL_LABEL(DoCardsCmpXchg)
+
+LOCAL_LABEL(EndOfExchange):
+        dbar  0x700
+        b  LOCAL_LABEL(CmpXchgNoUpdate)
+
+LOCAL_LABEL(DoCardsCmpXchg):
+        // We have successfully updated the value of the objectref so now we need a GC write barrier.
+        // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are
+        // already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE  $a0, $a1
+
+LOCAL_LABEL(CmpXchgNoUpdate):
+        ori   $a0, $t0, 0   // t0 still contains the original value.
+        jirl  $r0, $ra, 0
+
+LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address
+
+// RhpCheckedXchg(Object** destination, Object* value)
+//
+// Interlocked exchange on objectref.
+//
+// On entry:
+//  a0: pointer to objectref
+//  a1: exchange value
+//
+// On exit:
+//  a0: original value of objectref
+//  t1: trashed
+//  t3, t4: trashed
+//
+LEAF_ENTRY RhpCheckedXchg, _TEXT
+        amswap_db.d  $t1, $a1, $a0      // exchange
+
+        // We have successfully updated the value of the objectref so now we need a GC write barrier.
+        // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are
+        // already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE  $a0, $a1
+
+        // $t1 still contains the original value.
+        ori  $a0, $t1, 0
+
+        jirl  $r0, $ra, 0
+
+LEAF_END RhpCheckedXchg, _TEXT
diff --git a/src/coreclr/runtime/riscv64/AllocFast.S b/src/coreclr/runtime/riscv64/AllocFast.S
new file mode 100644
index 000000000000..5fc3d69987e4
--- /dev/null
+++ b/src/coreclr/runtime/riscv64/AllocFast.S
@@ -0,0 +1,287 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+// allocation context then automatically fallback to the slow allocation path.
+//  a0 == MethodTable
+    LEAF_ENTRY RhpNewFast, _TEXT
+        PROLOG_SAVE_REG_PAIR_INDEXED  fp, ra, 0x20
+        PROLOG_SAVE_REG               s1,     0x10
+
+        // Save MethodTable pointer
+        mv    s1, a0
+
+        // a0 = ee_alloc_context pointer; trashes volatile registers
+        INLINE_GET_ALLOC_CONTEXT_BASE
+
+        //
+        // s1 contains MethodTable pointer
+        //
+        lw    t0, OFFSETOF__MethodTable__m_uBaseSize(s1)
+
+        //
+        // s1: MethodTable pointer
+        // a0: ee_alloc_context pointer
+        // t0: base size
+        //
+
+        // Load potential new object address into t1.
+        ld    t1, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)(a0)
+
+        // Load and calculate the maximum size of object we can fit.
+        ld    t2, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)(a0)
+        sub   t2, t2, t1
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        bltu  t2, t0, LOCAL_LABEL(RhpNewFast_RarePath)
+
+        // Calculate the new alloc pointer to account for the allocation.
+        add   t0, t0, t1
+
+        // Set the new object's MethodTable pointer.
+        sd    s1, OFFSETOF__Object__m_pEEType(t1)
+
+        // Update the alloc pointer to the newly calculated one.
+        sd    t0, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)(a0)
+
+        mv    a0, t1
+
+        EPILOG_RESTORE_REG               s1,     0x10
+        EPILOG_RESTORE_REG_PAIR_INDEXED  fp, ra, 0x20
+
+        ret
+
+LOCAL_LABEL(RhpNewFast_RarePath):
+        mv    a1, zero
+        mv    a0, s1
+
+        EPILOG_RESTORE_REG               s1,     0x10
+        EPILOG_RESTORE_REG_PAIR_INDEXED  fp, ra, 0x20
+
+        tail  RhpNewObject
+    LEAF_END RhpNewFast, _TEXT
+
+// Allocate non-array object with finalizer.
+//  a0 == MethodTable
+    LEAF_ENTRY RhpNewFinalizable, _TEXT
+        li    a1, GC_ALLOC_FINALIZE
+        tail  RhpNewObject
+    LEAF_END RhpNewFinalizable, _TEXT
+
+// Allocate non-array object.
+//  a0 == MethodTable
+//  a1 == alloc flags
+    NESTED_ENTRY RhpNewObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME a3
+
+        // a3: transition frame
+
+        // Preserve the MethodTable in s2
+        mv  s2, a0
+
+        li  a2, 0 // numElements
+
+        // Call the rest of the allocation helper.
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call  C_FUNC(RhpGcAlloc)
+
+        // Set the new object's MethodTable pointer on success.
+        beq  a0, zero, LOCAL_LABEL(NewOutOfMemory)
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+        .cfi_restore_state
+
+LOCAL_LABEL(NewOutOfMemory):
+        // This is the OOM failure path. We are going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mv  a0, s2                // MethodTable pointer
+        li  a1, 0                 // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        tail  C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    NESTED_END RhpNewObject, _TEXT
+
+// Shared code for RhNewString, RhpNewArrayFast and RhpNewPtrArrayFast
+//  a0 == MethodTable
+//  a1 == character/element count
+//  t0 == string/array size
+    .macro NEW_ARRAY_FAST
+
+        PROLOG_SAVE_REG_PAIR_INDEXED  fp, ra, 0x20
+        PROLOG_SAVE_REG_PAIR          s1, s2, 0x10
+
+        // Save MethodTable pointer and string length
+        mv    s1, a0
+        mv    s2, a1
+
+        // a0 = ee_alloc_context pointer; trashes volatile registers
+        INLINE_GET_ALLOC_CONTEXT_BASE
+
+        // s1 == MethodTable
+        // s2 == element count
+        // t0 == string/array size
+
+        // Load potential new object address into t3.
+        ld    t1, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)(a0)
+
+        // Load and calculate the maximum size of object we can fit.
+        ld    t2, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)(a0)
+        sub   t2, t2, t1
+
+        // Determine whether the end of the object is too big for the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        bltu  t2, t0, 1f
+
+        // Calculate the new alloc pointer to account for the allocation.
+        add   t0, t0, t1
+
+        // Set the new object's MethodTable pointer.
+        sd    s1, OFFSETOF__Object__m_pEEType(t1)
+        sd    s2, OFFSETOF__Array__m_Length(t1)
+
+        // Update the alloc pointer to the newly calculated one.
+        sd    t0, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)(a0)
+
+        // Return the object allocated in a0.
+        mv    a0, t1
+
+        EPILOG_RESTORE_REG_PAIR          s1, s2, 0x10
+        EPILOG_RESTORE_REG_PAIR_INDEXED  fp, ra, 0x20
+
+        ret
+
+1:
+        mv    a0, s1
+        mv    a1, s2
+
+        EPILOG_RESTORE_REG_PAIR          s1, s2, 0x10
+        EPILOG_RESTORE_REG_PAIR_INDEXED  fp, ra, 0x20
+
+        tail  C_FUNC(RhpNewVariableSizeObject)
+
+    .endm
+
+// Allocate a string.
+//  a0 == MethodTable
+//  a1 == element/character count
+    LEAF_ENTRY RhNewString, _TEXT
+
+        // Make sure computing the overall allocation size won't overflow
+        li    a2, MAX_STRING_LENGTH
+        bltu  a2, a1, LOCAL_LABEL(StringSizeOverflow)   // Branch if a2 < a1 (overflow)
+
+        // Compute overall allocation size (align(base size + (element size * elements), 8)).
+        slli  t0, a1, 1                                 // t0 = a1 * STRING_COMPONENT_SIZE, where STRING_COMPONENT_SIZE == 2
+        addi  t0, t0, STRING_BASE_SIZE + 7              // t0 = t0 + STRING_BASE_SIZE + 7
+        andi  t0, t0, ~0x7                              // Clear the bits[2:0] of t0 (align to 8 bytes)
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(StringSizeOverflow):
+        // We get here if the length of the final string object cannot be represented as an unsigned
+        // 32-bit value. We are going to tail-call to a managed helper that will throw
+        // an OOM exception that the caller of this allocator understands.
+
+        li    a1, 1                  // Indicate that we should throw OverflowException
+        tail  C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    LEAF_END RhNewString, _TEXT
+
+// Allocate one-dimensional, zero-based array (SZARRAY).
+//  a0 == MethodTable
+//  a1 == element count
+    LEAF_ENTRY RhpNewArrayFast, _TEXT
+
+        // We want to limit the element count to the non-negative 32-bit int range.
+        // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component
+        // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst
+        // case (32-dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits.
+        li    a2, 0x7fffffff
+        bltu  a2, a1, LOCAL_LABEL(ArraySizeOverflow)  // Branch if a2 < a1 (check for overflow)
+
+        lhu   t0, OFFSETOF__MethodTable__m_usComponentSize(a0) // Load component size
+        mul   t0, a1, t0                                       // t0 = a1 * component size
+        addi  t0, t0, SZARRAY_BASE_SIZE + 7                    // t0 = t0 + 7
+        andi  t0, t0, ~0x7                                     // Clear the bits[2:0] of t0 (align to 8 bytes)
+
+        NEW_ARRAY_FAST
+
+LOCAL_LABEL(ArraySizeOverflow):
+        // We get here if the size of the final array object cannot be represented as an unsigned
+        // 32-bit value. We are going to tail-call to a managed helper that will throw
+        // an overflow exception that the caller of this allocator understands.
+
+        li   a1, 1 // Indicate that we should throw OverflowException
+        tail  C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    LEAF_END RhpNewArrayFast, _TEXT
+
+// Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements.
+//  a0 == MethodTable
+//  a1 == element count
+    LEAF_ENTRY RhpNewPtrArrayFast, _TEXT
+
+        // Delegate overflow handling to the generic helper conservatively
+
+        li          t0, (0x40000000 / 8) // sizeof(void*)
+        bgeu        a1, t0, C_FUNC(RhpNewArrayFast)
+
+        // In this case we know the element size is sizeof(void *), or 8 for arm64
+        // This helps us in two ways - we can shift instead of multiplying, and
+        // there's no need to align the size either
+
+        sll         t0, a1, 3
+        addi        t0, t0, SZARRAY_BASE_SIZE
+
+        // No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed
+        // to be a multiple of 8.
+
+        NEW_ARRAY_FAST
+
+    LEAF_END RhpNewPtrArrayFast, _TEXT
+
+// Allocate variable sized object (eg. array, string) using the slow path that calls a runtime helper.
+//  a0 == MethodTable
+//  a1 == element count
+    NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME a3
+
+        // Preserve data we will need later into the callee saved registers
+        mv   s2, a0              // Preserve MethodTable
+
+        mv   a2, a1              // numElements
+        li   a1, 0                // uFlags
+
+        // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
+        call  C_FUNC(RhpGcAlloc)
+
+        // Set the new object's MethodTable pointer and length on success.
+        beq  a0, zero, LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory)
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+        .cfi_restore_state
+
+LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory):
+        // This is the OOM failure path. We are going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mv   a0, s2             // MethodTable Pointer
+        li   a1, 0              // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        tail  C_FUNC(RhExceptionHandling_FailedAllocation)
+
+    NESTED_END RhpNewVariableSizeObject, _TEXT
diff --git a/src/coreclr/runtime/riscv64/StubDispatch.S b/src/coreclr/runtime/riscv64/StubDispatch.S
new file mode 100644
index 000000000000..cc4101a4a174
--- /dev/null
+++ b/src/coreclr/runtime/riscv64/StubDispatch.S
@@ -0,0 +1,97 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+    .extern RhpCidResolve
+    .extern RhpUniversalTransition_DebugStepTailCall
+
+    // Macro that generates code to check a single cache entry.
+    .macro CHECK_CACHE_ENTRY entry
+        // Load cache entry data into a temporary register
+        ld    t6, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))(t0)
+
+        // Compare with MethodTable* in t1
+        bne   t1, t6, 0f
+
+        // Load the target address from the cache entry
+        ld    t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)(t0)
+
+        // Jump to the address in t0
+        jr  t0
+
+    0:
+    .endm
+
+    //
+    // Macro that generates a stub consuming a cache with the given number of entries.
+    //
+    .macro DEFINE_INTERFACE_DISPATCH_STUB entries
+
+        NESTED_ENTRY RhpInterfaceDispatch\entries, _TEXT, NoHandler
+
+            // t5 holds the indirection cell address. Load the cache pointer.
+            ld  t0, OFFSETOF__InterfaceDispatchCell__m_pCache(t5)  // Using a1 as an alternative base register
+
+            // Load the MethodTable from the object instance in a0.
+            ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries
+            ld  t1, 0(a0)
+
+        .global CurrentEntry
+        .set CurrentEntry, 0
+
+        .rept \entries
+            CHECK_CACHE_ENTRY CurrentEntry
+            .set CurrentEntry, CurrentEntry + 1
+        .endr
+
+            // t0 still contains the indirection cell address.
+            tail  C_FUNC(RhpInterfaceDispatchSlow)
+
+        NESTED_END RhpInterfaceDispatch\entries, _TEXT
+
+    .endm
+
+    //
+    // Define all the stub routines we currently need.
+    //
+    DEFINE_INTERFACE_DISPATCH_STUB 1
+    DEFINE_INTERFACE_DISPATCH_STUB 2
+    DEFINE_INTERFACE_DISPATCH_STUB 4
+    DEFINE_INTERFACE_DISPATCH_STUB 8
+    DEFINE_INTERFACE_DISPATCH_STUB 16
+    DEFINE_INTERFACE_DISPATCH_STUB 32
+    DEFINE_INTERFACE_DISPATCH_STUB 64
+
+    //
+    // Initial dispatch on an interface when we don't have a cache yet.
+    //
+    LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+    ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+        // Trigger an AV if we're dispatching on a null this.
+        // The exception handling infrastructure is aware of the fact that this is the first
+        // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here
+        // to a NullReferenceException at the callsite.
+        lw zero, 0(a0)
+
+        // Just tail call to the cache miss helper.
+        tail       C_FUNC(RhpInterfaceDispatchSlow)
+    LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+    //
+    // Cache miss case, call the runtime to resolve the target and update the cache.
+    // Use universal transition helper to allow an exception to flow out of resolution.
+    //
+    LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
+        // t5 contains the interface dispatch cell address.
+        // Calling convention of the universal thunk is:
+        //  t0: target address for the thunk to call
+        //  t1: parameter of the thunk's target
+        PREPARE_EXTERNAL_VAR RhpCidResolve, t0
+        mv t1, t5
+        tail       C_FUNC(RhpUniversalTransition_DebugStepTailCall)
+    LEAF_END RhpInterfaceDispatchSlow, _TEXT
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/runtime/riscv64/WriteBarriers.S b/src/coreclr/runtime/riscv64/WriteBarriers.S
new file mode 100644
index 000000000000..469908665b35
--- /dev/null
+++ b/src/coreclr/runtime/riscv64/WriteBarriers.S
@@ -0,0 +1,371 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros_Shared.h"
+
+// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+// during garbage collections to verify that object references were never written to the heap without using a
+// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing
+// new references to the real heap. Since this cannot be solved perfectly without critical sections around the
+// entire update process, we instead update the shadow location and then re-check the real location (as two
+// ordered operations) and if there is a disparity we will re-write the shadow location with a special value
+// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+#ifdef WRITE_BARRIER_CHECK
+
+    .global     g_GCShadow
+    .global     g_GCShadowEnd
+
+        // On entry:
+        //  destReg: location to be updated
+        //  refReg: objectref to be stored
+        //
+        // On exit:
+        //  t3,t4: trashed
+        //  other registers are preserved
+        //
+        .macro UPDATE_GC_SHADOW destReg, refReg
+
+        // If g_GCShadow is 0, don't perform the check.
+        la    t3, g_GCShadow
+        ld    t3, 0(t3)
+        beq  t3, zero, 1f
+
+        // Save destReg since we're about to modify it (and we need the original value both within the macro and
+        // once we exit the macro).
+        mv   t4, \destReg
+
+        // Transform destReg into the equivalent address in the shadow heap.
+        la    t3, g_lowest_address
+        ld    t3, 0(t3)
+        sub  \destReg, \destReg, t3
+        bltz \destReg, 0f
+
+        la    t3, g_GCShadow
+        ld    t3, 0(t3)
+        add  \destReg, \destReg, t3
+
+        la    t3, g_GCShadowEnd
+        ld    t3, 0(t3)
+        bgeu \destReg, t3, 0f
+
+        // Update the shadow heap.
+        sd   \refReg, 0(\destReg)
+
+        // The following read must be strongly ordered with respect to the write we have just performed in order to
+        // prevent race conditions.
+        fence rw, rw
+
+        // Now check that the real heap location still contains the value we just wrote into the shadow heap.
+        mv   t3, t4
+        ld   t3, 0(t3)
+        beq  t3, \refReg, 0f
+
+        // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we cannot
+        // guarantee whose shadow update won.
+        li   t3, INVALIDGCVALUE
+        sd   t3, 0(\destReg)
+
+0:
+        // Restore original destReg value
+        mv   \destReg, t4
+
+1:
+    .endm
+
+#else // WRITE_BARRIER_CHECK
+
+    .macro UPDATE_GC_SHADOW destReg, refReg
+    .endm
+
+#endif // WRITE_BARRIER_CHECK
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+// name of the register that points to the location to be updated and the name of the register that holds the
+// object reference (this should be in upper case as it is used in the definition of the name of the helper).
+
+// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for
+// some interlocked helpers that need an inline barrier.
+
+        // On entry:
+        //   destReg: location to be updated (cannot be t2,t6)
+        //   refReg:  objectref to be stored (cannot be t2,t6)
+        //
+        // On exit:
+        //   t2, t6: trashed
+        //
+        .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg
+
+        // Update the shadow copy of the heap with the same value just written to the same heap.
+        // (A no-op unless we are in a debug build and write barrier checking has been enabled).
+        UPDATE_GC_SHADOW \destReg, \refReg
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        // Update the write watch table if necessary
+        la      t2, g_write_watch_table
+        ld      t2, (t2)
+
+        beqz    t2, 2f
+        srli    t6, \destReg, 12   // SoftwareWriteWatch::AddressToTableByteIndexShift
+        add     t2, t2, t6
+        lb      t6, 0(t2)
+        bnez    t6, 2f
+        li      t6, 0xFF
+        sb      t6, 0(t2)
+#endif
+
+2:
+        // We can skip the card table write if the reference is to
+        // an object not on the ephemeral segment.
+        la      t2, g_ephemeral_low
+        ld      t2, (t2)
+        la      t6, g_ephemeral_high
+        ld      t6, (t6)
+        bltu    \refReg, t2, 0f
+        bgeu    \refReg, t6, 0f
+
+        // Set this object's card, if it has not already been set.
+        la      t2, g_card_table
+        ld      t2, (t2)
+        srli    t6, \destReg, 11
+        add     t6, t2, t6
+
+        // Check that this card has not already been written. Avoiding useless writes
+        // is a big win on multi-proc systems since it avoids cache thrashing.
+        lbu     t2, 0(t6)
+        addi    t2, t2, -0xFF
+        beqz    t2, 0f
+
+        li      t2, 0xFF
+        sb      t2, 0(t6)
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        // Check if we need to update the card bundle table
+        la      t2, g_card_bundle_table
+        ld      t2, (t2)
+
+        srli    t6, \destReg, 21
+        add     t6, t2, t6
+        lbu     t2, 0(t6)
+        addi    t2, t2, -0xFF
+        beqz    t2, 0f
+
+        li      t2, 0xFF
+        sb      t2, 0(t6)
+#endif
+
+0:
+        // Exit label
+    .endm
+
+        // On entry:
+        //   destReg: location to be updated
+        //   refReg:  objectref to be stored
+        //
+        // On exit:
+        //   t2, t6: trashed
+        //
+        .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg
+
+        // The "check" of this checked write barrier - is destReg within the heap?
+        // If no, early out.
+
+        la      t2, g_lowest_address
+        ld      t2, (t2)
+        bltu    \destReg, t2, 0f
+
+        la      t2, g_highest_address
+        ld      t2, (t2)
+        bgeu    \destReg, t2, 0f
+
+1:
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg
+
+0:
+        // Exit label
+    .endm
+
+// void JIT_ByRefWriteBarrier
+// On entry:
+//   t5  : the source address (points to object reference to write)
+//   t3  : the destination address (object reference written here)
+//
+// On exit:
+//   t5  : incremented by 8
+//   t3  : incremented by 8
+//   t4  : trashed
+//   t2, t3  : trashed
+//
+//   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+//         if you add more trashed registers.
+//
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+
+    ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
+        ld    t4, 0(t5)
+        addi  t5, t5, 8
+        j C_FUNC(RhpCheckedAssignRef)
+
+LEAF_END RhpByRefAssignRef, _TEXT
+
+// JIT_CheckedWriteBarrier(Object** dst, Object* src)
+//
+// Write barrier for writes to objects that may reside
+// on the managed heap.
+//
+// On entry:
+//   t3 : the destination address (LHS of the assignment).
+//         May not be a heap location (hence the checked).
+//   t4 : the object reference (RHS of the assignment).
+//
+// On exit:
+//   t2, t6 : trashed
+//   t3      : incremented by 8
+LEAF_ENTRY RhpCheckedAssignRef, _TEXT
+
+        # Check if the destination is within the heap bounds
+        la      t2, C_FUNC(g_lowest_address)
+        ld      t2, (t2)
+        la      t6, C_FUNC(g_highest_address)
+        ld      t6, (t6)
+
+        bltu    t3, t2, LOCAL_LABEL(NotInHeap)
+        bgeu    t3, t6, LOCAL_LABEL(NotInHeap)
+
+        j       C_FUNC(RhpAssignRefRiscV64)
+
+LOCAL_LABEL(NotInHeap):
+        ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+        sd      t4, 0(t3)
+        addi    t3, t3, 8
+
+        ret
+
+LEAF_END RhpCheckedAssignRef, _TEXT
+
+// JIT_WriteBarrier(Object** dst, Object* src)
+//
+// Write barrier for writes to objects that are known to
+// reside on the managed heap.
+//
+// On entry:
+//  t3 : the destination address (LHS of the assignment).
+//  t4 : the object reference (RHS of the assignment).
+//
+// On exit:
+//  t2, t6 : trashed
+//  t3 : incremented by 8
+LEAF_ENTRY RhpAssignRefRiscV64, _TEXT
+        fence rw, rw
+
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+        sd    t4, 0(t3)
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE t3, t4
+
+        addi  t3, t3, 8
+
+        ret
+
+LEAF_END RhpAssignRefRiscV64, _TEXT
+
+// Same as RhpAssignRefRiscV64, but with standard ABI.
+LEAF_ENTRY RhpAssignRef, _TEXT
+        mv   t3, a0                    ; t3 = dst
+        mv   t4, a1                    ; t4 = val
+        mv   a1, ra
+        j    C_FUNC(RhpAssignRefRiscV64)
+LEAF_END RhpAssignRef, _TEXT
+
+
+// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon
+// successful updates.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedLockCmpXchgAVLocation
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address
+
+// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand)
+//
+// Interlocked compare exchange on objectref.
+//
+// On entry:
+//  a0: pointer to objectref
+//  a1: exchange value
+//  a2: comparand
+//
+// On exit:
+//  a0: original value of objectref
+//  t0, t1, t2, t6: trashed
+//
+LEAF_ENTRY RhpCheckedLockCmpXchg
+
+LOCAL_LABEL(CmpXchgRetry):
+        // Load the current value at the destination address.
+        lr.d.aqrl    t0, (a0)       // t0 = *dest (load with acquire-release ordering)
+        // Compare the loaded value with the comparand.
+        bne     t0, a2, LOCAL_LABEL(CmpXchgNoUpdate) // if (*dest != comparand) goto CmpXchgNoUpdate
+
+        // Attempt to store the exchange value at the destination address.
+        sc.d.rl    t1, a1, (a0)  // t1 = (store conditional result: 0 if successful, with release ordering)
+        bnez    t1, LOCAL_LABEL(CmpXchgRetry) // if store conditional failed, retry
+
+        // See comment at the top of PalInterlockedOperationBarrier method for explanation why this memory
+        // barrier is necessary.
+        fence   rw, rw
+
+LOCAL_LABEL(DoCardsCmpXchg):
+        // We have successfully updated the value of the objectref so now we need a GC write barrier.
+        // The following barrier code takes the destination in a0 and the value in a1 so the arguments are
+        // already correctly set up.
+        INSERT_CHECKED_WRITE_BARRIER_CORE a0, a1
+
+LOCAL_LABEL(CmpXchgNoUpdate):
+        // t0 still contains the original value.
+        mv      a0, t0
+
+        ret
+
+LEAF_END RhpCheckedLockCmpXchg
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation
+// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address
+
+// RhpCheckedXchg(Object** destination, Object* value)
+//
+// Interlocked exchange on objectref.
+//
+// On entry:
+//  a0: pointer to objectref
+//  a1: exchange value
+//
+// On exit:
+//  a0: original value of objectref
+//  t1, t6: trashed
+//
+LEAF_ENTRY RhpCheckedXchg
+        amoswap.d.aqrl t1, a1, (a0)
+
+        // See comment at the top of PalInterlockedOperationBarrier method for explanation why this memory
+        // barrier is necessary.
+        fence rw, rw
+
+DoCardsXchg:
+        // We have successfully updated the value of the objectref so now we need a GC write barrier.
+        // The following barrier code takes the destination in a0 and the value in a1 so the arguments are
+        // already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE a0, a1
+
+        // t1 still contains the original value.
+        mv   a0, t1
+
+        jalr ra
+
+LEAF_END RhpCheckedXchg, _TEXT
diff --git a/src/coreclr/scripts/fuzzlyn_run.py b/src/coreclr/scripts/fuzzlyn_run.py
index 686abe65f10e..2b2f80cbcab3 100644
--- a/src/coreclr/scripts/fuzzlyn_run.py
+++ b/src/coreclr/scripts/fuzzlyn_run.py
@@ -119,7 +119,7 @@ def run(self):
 
             if new_line:
                 evt = json.loads(new_line)
-                # Only reduce BadResult examples since crashes take very long to reduce.
+                # Do not reduce crash examples since those take a very long to reduce.
                 # We will still report crashes, just not with a reduced example.
                 if evt["Kind"] == "ExampleFound":
                     ex = evt["Example"]
@@ -147,7 +147,7 @@ def run(self):
                             "--seed", str(ex['Seed']),
                             "--collect-spmi-to", spmi_collections_path,
                             "--output", output_path]
-                        run_command(cmd)
+                        run_command(cmd, _timeout=60*25)
                         if path.exists(output_path):
                             num_reduced += 1
                             if num_reduced >= 5:
@@ -156,6 +156,8 @@ def run(self):
 
                             if ex_assert_err is not None:
                                 self.reduced_jit_asserts.add(ex_assert_err)
+                        else:
+                            print("  Reduction failed, output file not present")
 
 
 def main(main_args):
diff --git a/src/coreclr/scripts/genEventing.py b/src/coreclr/scripts/genEventing.py
index e1ca570166e7..ffa616469fd9 100644
--- a/src/coreclr/scripts/genEventing.py
+++ b/src/coreclr/scripts/genEventing.py
@@ -467,7 +467,7 @@ def parseTemplateNodes(templateNodes):
 
     return allTemplates
 
-def generateClrallEvents(eventNodes, allTemplates, target_cpp, runtimeFlavor, write_xplatheader, providerName, inclusionList, generatedFileType, user_events):
+def generateClrallEvents(eventNodes, allTemplates, target_cpp, runtimeFlavor, is_host_windows, write_xplatheader, providerName, inclusionList, generatedFileType, user_events):
     clrallEvents = []
     for eventNode in eventNodes:
         eventName = eventNode.getAttribute('symbol')
@@ -495,7 +495,7 @@ def generateClrallEvents(eventNodes, allTemplates, target_cpp, runtimeFlavor, wr
                 clrallEvents.append("EventPipeEventEnabled" + eventName + "()")
 
                 if runtimeFlavor.coreclr or write_xplatheader or runtimeFlavor.nativeaot:
-                    if os.name == 'posix':
+                    if not is_host_windows:
                         if user_events and runtimeFlavor.coreclr:
                             clrallEvents.append(" || UserEventsEventEnabled" + eventName + "()")
                         # native AOT does not support non-windows eventing other than via event pipe
@@ -872,10 +872,10 @@ def getKeywordsMaskCombined(keywords, keywordsToMask):
 
     return mask
 
-def updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, clrallevents, inclusion_list, generatedFileType, user_events):
-    is_windows = os.name == 'nt'
+def updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, is_host_windows, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, clrallevents, inclusion_list, generatedFileType, user_events):
     with open_for_update(clrallevents) as Clrallevents:
         Clrallevents.write(stdprolog)
+        Clrallevents.write('#include \n\n')
         if generatedFileType=="header-impl":
             if runtimeFlavor.mono:
                 Clrallevents.write(getCoreCLRMonoNativeAotTypeAdaptionDefines() + "\n")
@@ -889,7 +889,7 @@ def updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, eventpipe_
             Clrallevents.write('#define CLR_ETW_ALL_MAIN_H\n\n')
         elif generatedFileType == "source-impl":
             Clrallevents.write('#include \n')
-            Clrallevents.write('#include \n')
+            Clrallevents.write('#include \n')
             Clrallevents.write('#include "clretwallmain.h"\n')
             Clrallevents.write('#include "clreventpipewriteevents.h"\n')
             if user_events and runtimeFlavor.coreclr:
@@ -901,23 +901,23 @@ def updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, eventpipe_
         elif generatedFileType == "source-impl-noop":
             Clrallevents.write('#include \n')
             Clrallevents.write('#include \n\n')
-            Clrallevents.write('#include \n\n')
+            Clrallevents.write('#include \n\n')
             Clrallevents.write('#ifndef ERROR_SUCCESS\n')
             Clrallevents.write('#define ERROR_SUCCESS 0L\n')
             Clrallevents.write('#endif\n\n')
 
         # define DOTNET_TRACE_CONTEXT depending on the platform
-        if is_windows and not runtimeFlavor.nativeaot:
+        if is_host_windows and not runtimeFlavor.nativeaot:
             Clrallevents.write(eventpipe_trace_context_typedef)  # define EVENTPIPE_TRACE_CONTEXT
             if runtimeFlavor.coreclr or write_xplatheader:
                 Clrallevents.write(dotnet_trace_context_typedef_windows + "\n")
             else:
                 Clrallevents.write("\n")
-        
-        if not is_windows and runtimeFlavor.coreclr:
+
+        if not is_host_windows and runtimeFlavor.coreclr:
             Clrallevents.write(user_events_trace_context_typedef)
 
-        if not is_windows and not write_xplatheader and not runtimeFlavor.nativeaot:
+        if not is_host_windows and not write_xplatheader and not runtimeFlavor.nativeaot:
             Clrallevents.write(eventpipe_trace_context_typedef)  # define EVENTPIPE_TRACE_CONTEXT
             Clrallevents.write("\n")
 
@@ -929,22 +929,22 @@ def updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, eventpipe_
             eventNodes = providerNode.getElementsByTagName('event')
 
             #vm header:
-            Clrallevents.write(generateClrallEvents(eventNodes, allTemplates, target_cpp, runtimeFlavor, write_xplatheader, providerName, inclusion_list, generatedFileType, user_events))
+            Clrallevents.write(generateClrallEvents(eventNodes, allTemplates, target_cpp, runtimeFlavor, is_host_windows, write_xplatheader, providerName, inclusion_list, generatedFileType, user_events))
 
             providerName = providerNode.getAttribute('name')
             providerSymbol = providerNode.getAttribute('symbol')
 
             eventpipeProviderCtxName = providerSymbol + "_EVENTPIPE_Context"
-            if is_windows and not (write_xplatheader or runtimeFlavor.nativeaot):
+            if is_host_windows and not (write_xplatheader or runtimeFlavor.nativeaot):
                 Clrallevents.write(('constexpr ' if target_cpp else 'static const ') + 'EVENTPIPE_TRACE_CONTEXT ' + eventpipeProviderCtxName + ' = { W("' + providerName + '"), 0, false, 0 };\n')
 
-            if not is_windows and not write_xplatheader and not runtimeFlavor.nativeaot:
+            if not is_host_windows and not write_xplatheader and not runtimeFlavor.nativeaot:
                 Clrallevents.write('__attribute__((weak)) EVENTPIPE_TRACE_CONTEXT ' + eventpipeProviderCtxName + ' = { W("' + providerName + '"), 0, false, 0 };\n')
 
         if generatedFileType == "header":
             Clrallevents.write("#endif // __CLR_ETW_ALL_MAIN_H__\n")
 
-def generatePlatformIndependentFiles(sClrEtwAllMan, incDir, etmDummyFile, extern, write_xplatheader, target_cpp, runtimeFlavor, inclusion_list, user_events):
+def generatePlatformIndependentFiles(sClrEtwAllMan, incDir, etmDummyFile, extern, write_xplatheader, target_cpp, runtimeFlavor, is_host_windows, inclusion_list, user_events):
 
     generateEtmDummyHeader(sClrEtwAllMan,etmDummyFile)
     tree           = DOM.parse(sClrEtwAllMan)
@@ -1014,16 +1014,14 @@ def generatePlatformIndependentFiles(sClrEtwAllMan, incDir, etmDummyFile, extern
 #endif // DOTNET_TRACE_CONTEXT_DEF
 """
 
-    is_windows = os.name == 'nt'
-
     # Write the main source(s) for FireETW* functions
     # nativeaot requires header and source file to be separated as well as a noop implementation
     if runtimeFlavor.nativeaot:
-        updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, os.path.join(incDir, "clretwallmain.cpp"), inclusion_list, "source-impl", user_events)
-        updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, os.path.join(incDir, "clretwallmain.h"), inclusion_list, "header", user_events)
-        updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, os.path.join(incDir, "disabledclretwallmain.cpp"), inclusion_list, "source-impl-noop", user_events)
+        updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, is_host_windows, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, os.path.join(incDir, "clretwallmain.cpp"), inclusion_list, "source-impl", user_events)
+        updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, is_host_windows, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, os.path.join(incDir, "clretwallmain.h"), inclusion_list, "header", user_events)
+        updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, is_host_windows, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, os.path.join(incDir, "disabledclretwallmain.cpp"), inclusion_list, "source-impl-noop", user_events)
     else:
-        updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, os.path.join(incDir, "clretwallmain.h"), inclusion_list, "header-impl", user_events)
+        updateclreventsfile(write_xplatheader, target_cpp, runtimeFlavor, is_host_windows, eventpipe_trace_context_typedef, dotnet_trace_context_typedef_windows, user_events_trace_context_typedef, tree, os.path.join(incDir, "clretwallmain.h"), inclusion_list, "header-impl", user_events)
 
     if write_xplatheader:
         clrproviders = os.path.join(incDir, "clrproviders.h")
@@ -1035,7 +1033,7 @@ def generatePlatformIndependentFiles(sClrEtwAllMan, incDir, etmDummyFile, extern
     ULONGLONG const Keyword;
 } EVENT_DESCRIPTOR;
 """)
-            if not is_windows and not runtimeFlavor.nativeaot:
+            if not is_host_windows and not runtimeFlavor.nativeaot:
                 Clrproviders.write(eventpipe_trace_context_typedef)  # define EVENTPIPE_TRACE_CONTEXT
                 Clrproviders.write(lttng_trace_context_typedef)  # define LTTNG_TRACE_CONTEXT
                 Clrproviders.write(user_events_trace_context_typedef)
@@ -1050,7 +1048,7 @@ def generatePlatformIndependentFiles(sClrEtwAllMan, incDir, etmDummyFile, extern
                 providerSymbol = str(providerNode.getAttribute('symbol'))
                 nbProviders += 1
                 nbKeywords = 0
-                if not is_windows and not runtimeFlavor.nativeaot:
+                if not is_host_windows and not runtimeFlavor.nativeaot:
                     eventpipeProviderCtxName = providerSymbol + "_EVENTPIPE_Context"
                     Clrproviders.write('__attribute__((weak)) EVENTPIPE_TRACE_CONTEXT ' + eventpipeProviderCtxName + ' = { W("' + providerName + '"), 0, false, 0 };\n')
                     lttngProviderCtxName = providerSymbol + "_LTTNG_Context"
@@ -1077,7 +1075,7 @@ def generatePlatformIndependentFiles(sClrEtwAllMan, incDir, etmDummyFile, extern
                 allProviders.append("&" + providerSymbol + "_LTTNG_Context")
 
             # define and initialize runtime providers' DOTNET_TRACE_CONTEXT depending on the platform
-            if not is_windows and not runtimeFlavor.nativeaot:
+            if not is_host_windows and not runtimeFlavor.nativeaot:
                 Clrproviders.write('#define NB_PROVIDERS ' + str(nbProviders) + '\n')
                 Clrproviders.write(('constexpr ' if target_cpp else 'static const ') + 'LTTNG_TRACE_CONTEXT * ALL_LTTNG_PROVIDERS_CONTEXT[NB_PROVIDERS] = { ')
                 Clrproviders.write(', '.join(allProviders))
@@ -1143,7 +1141,8 @@ def main(argv):
     required.add_argument('--dummy',  type=str,default=None,
                                     help='full path to file that will have dummy definitions of FireEtw functions')
     required.add_argument('--runtimeflavor', type=str,default="CoreCLR",
-                                    help='runtime flavor')
+                                    help='runtime flavor'),
+    required.add_argument('--targetos', type=str,default=None),
     required.add_argument('--nonextern', action='store_true',
                                     help='if specified, will not generated extern function stub headers' )
     required.add_argument('--noxplatheader', action='store_true',
@@ -1163,7 +1162,13 @@ def main(argv):
     extern            = not args.nonextern
     write_xplatheader = not args.noxplatheader
     user_events       = args.userevents
+    targetOS          = args.targetos
+
+    if targetOS is None:
+        if os.name == "nt":
+            targetOS = "windows"
 
+    is_host_windows = targetOS == "windows"
     target_cpp = True
     if runtimeFlavor.mono:
         extern = False
@@ -1172,7 +1177,7 @@ def main(argv):
 
     inclusion_list = parseInclusionList(inclusion_filename)
 
-    generatePlatformIndependentFiles(sClrEtwAllMan, incdir, etmDummyFile, extern, write_xplatheader, target_cpp, runtimeFlavor, inclusion_list, user_events)
+    generatePlatformIndependentFiles(sClrEtwAllMan, incdir, etmDummyFile, extern, write_xplatheader, target_cpp, runtimeFlavor, is_host_windows, inclusion_list, user_events)
 
 if __name__ == '__main__':
     return_code = main(sys.argv[1:])
diff --git a/src/coreclr/scripts/genRuntimeEventSources.py b/src/coreclr/scripts/genRuntimeEventSources.py
deleted file mode 100644
index ed13eeec7238..000000000000
--- a/src/coreclr/scripts/genRuntimeEventSources.py
+++ /dev/null
@@ -1,484 +0,0 @@
-#
-## Licensed to the .NET Foundation under one or more agreements.
-## The .NET Foundation licenses this file to you under the MIT license.
-#
-
-import os
-import xml.dom.minidom as DOM
-from utilities import open_for_update, parseInclusionList
-import argparse
-import sys
-
-generatedCodeFileHeader="""// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/**********************************************************************
-
-DO NOT MODIFY. AUTOGENERATED FILE.
-This file is generated by /src/coreclr/scripts/genRuntimeEventSources.py
-
-**********************************************************************/
-"""
-
-########################################################################
-# START CONFIGURATION
-########################################################################
-manifestsToGenerate = {
-    "Microsoft-Windows-DotNETRuntime" : "NativeRuntimeEventSource.Generated.cs"
-}
-
-providerNameToClassNameMap = {
-    "Microsoft-Windows-DotNETRuntime" : "NativeRuntimeEventSource"
-}
-
-manifestTypeToCSharpTypeMap = {
-    "win:UInt8" : "byte",
-    "win:UInt16" : "ushort",
-    "win:UInt32" : "uint",
-    "win:UInt64" : "ulong",
-    "win:Int32" : "int",
-    "win:Int64" : "long",
-    "win:Pointer" : "IntPtr",
-    "win:UnicodeString" : "string",
-    "win:Binary" : "byte[]",
-    "win:Double" : "double",
-    "win:Boolean" : "bool",
-    "win:GUID" : "Guid",
-}
-
-overrideEnumBackingTypes = {
-    "Microsoft-Windows-DotNETRuntime" : {
-        "GCSuspendEEReasonMap" : "win:UInt32",
-        "GCRootKindMap" : "win:UInt32"
-    }
-}
-########################################################################
-# END CONFIGURATION
-########################################################################
-
-tabText = ""
-
-def increaseTabLevel():
-    global tabText
-    tabText += "    "
-
-def decreaseTabLevel():
-    global tabText
-    tabText = tabText[:-4]
-
-def writeOutput(outputFile, str):
-    outputFile.write(tabText + str)
-
-def getCSharpTypeFromManifestType(manifestType):
-    return manifestTypeToCSharpTypeMap[manifestType]
-
-def getManifestsToGenerate():
-    return manifestsToGenerate
-
-def includeEvent(inclusionList, providerName, eventName):
-    if len(inclusionList) == 0:
-        return True
-    if providerName in inclusionList and eventName in inclusionList[providerName]:
-        return True
-    elif providerName in inclusionList and "*" in inclusionList[providerName]:
-        return True
-    elif "*" in inclusionList and eventName in inclusionList["*"]:
-        return True
-    elif "*" in inclusionList and "*" in inclusionList["*"]:
-        return True
-    else:
-        return False
-
-def generateEvent(eventNode, providerNode, outputFile, stringTable):
-
-    # Some threading events are defined manually in NativeRuntimeEventSource.Threading.cs
-    symbol = eventNode.getAttribute("symbol")
-    if any(s in symbol for s in ["ThreadPool", "Contention", "WaitHandle"]):
-        return
-
-    evtLevel = eventNode.getAttribute("level")[4:]
-    evtKeywords = ""
-    # Write the event attribute.
-    writeOutput(outputFile, "[Event("+ eventNode.getAttribute("value") + ", Version = " + eventNode.getAttribute("version") + ", Level = EventLevel." + evtLevel)
-
-    # Not all events have keywords specified, and some have multiple keywords specified.
-    keywords = eventNode.getAttribute("keywords")
-    if keywords:
-        if " " not in keywords:
-            outputFile.write(", Keywords = Keywords." + keywords)
-            evtKeywords = "Keywords." + keywords
-        else:
-            keywords = keywords.split()
-            outputFile.write(", Keywords = ")
-            for keywordIndex in range(len(keywords)):
-                evtKeywords += "Keywords." + keywords[keywordIndex]
-                if keywordIndex < (len(keywords) - 1):
-                    evtKeywords += " | "
-            outputFile.write(evtKeywords)
-    outputFile.write(")]\n")
-
-    # Get the template for the event.
-    templateNode = None
-    templateKey = eventNode.getAttribute("template")
-    if templateKey is not None:
-        for node in providerNode.getElementsByTagName("templates"):
-            templatesNode = node
-            break
-        for node in templatesNode.getElementsByTagName("template"):
-            if node.getAttribute("tid") == templateKey:
-                templateNode = node
-                break
-
-    # Write the beginning of the method signature.
-    writeOutput(outputFile, "private void " + eventNode.getAttribute("symbol") + "(")
-
-    # Write the function signature.
-    argumentCount = 0
-    if templateNode is not None:
-        argumentNodes = templateNode.childNodes
-
-        # Calculate the number of arguments.
-        for argumentNode in argumentNodes:
-            if argumentNode.nodeName == "data":
-                if argumentNode.getAttribute("inType") != "win:Binary" and argumentNode.getAttribute("inType") != "win:AnsiString" and argumentNode.getAttribute("count") == "":
-                    argumentCount += 1
-                else:
-                    break
-            elif argumentNode.nodeName == "struct":
-                break
-
-        argumentsProcessed = 0
-        for argumentIndex in range(len(argumentNodes)):
-            argumentNode = argumentNodes[argumentIndex]
-            if argumentNode.nodeName == "data":
-                argumentName = argumentNode.getAttribute("name")
-                argumentInType = argumentNode.getAttribute("inType")
-
-                #### Disable enums until they are needed ####
-                # argumentMap = argumentNode.getAttribute("map")
-                # if not argumentMap:
-                #     argumentCSharpType = getCSharpTypeFromManifestType(argumentInType)
-                # else:
-                #     argumentCSharpType = argumentMap[:-3]
-                #### Disable enums until they are needed ####
-
-                argumentCSharpType = getCSharpTypeFromManifestType(argumentInType)
-                outputFile.write(argumentCSharpType + " " + argumentName)
-                argumentsProcessed += 1
-                if argumentsProcessed < argumentCount:
-                    outputFile.write(", ")
-            if argumentsProcessed == argumentCount:
-                break
-
-    outputFile.write(")\n")
-    writeOutput(outputFile, "{\n")
-
-    increaseTabLevel()
-    writeOutput(outputFile, "// To have this event be emitted from managed side, refer to NativeRuntimeEventSource.cs\n")
-    writeOutput(outputFile, "throw new NotImplementedException();\n")
-    decreaseTabLevel()
-    writeOutput(outputFile, "}\n\n")
-
-
-def generateEvents(providerNode, outputFile, stringTable, inclusion_list):
-
-    providerName = providerNode.getAttribute("name")
-
-    # Get the events element.
-    for node in providerNode.getElementsByTagName("events"):
-        eventsNode = node
-        break
-
-    # Get the list of event nodes.
-    eventNodes = eventsNode.getElementsByTagName("event")
-
-    # Build a list of events to be emitted.  This is where old versions of events are stripped.
-    # key = eventID, value = version
-    eventList = dict()
-    for eventNode in eventNodes:
-        eventName    = eventNode.getAttribute('symbol')
-        if not includeEvent(inclusion_list, providerName, eventName):
-            continue
-
-        eventID = eventNode.getAttribute("value")
-        eventVersion = eventNode.getAttribute("version")
-        eventList[eventID] = eventVersion
-
-    # Iterate over each event node and process it.
-    # Only emit events for the latest version of the event, otherwise EventSource initialization will fail.
-    for eventNode in eventNodes:
-        eventName    = eventNode.getAttribute('symbol')
-        if not includeEvent(inclusion_list, providerName, eventName):
-            continue
-
-        eventID = eventNode.getAttribute("value")
-        eventVersion = eventNode.getAttribute("version")
-        if eventID in eventList and eventList[eventID] == eventVersion:
-            generateEvent(eventNode, providerNode, outputFile, stringTable)
-        elif eventID not in eventList:
-            raise ValueError("eventID could not be found in the list of events to emit.", eventID)
-
-def generateValueMapEnums(providerNode, outputFile, stringTable, enumTypeMap):
-
-    # Get the maps element.
-    for node in providerNode.getElementsByTagName("maps"):
-        mapsNode = node
-        break
-
-    # Iterate over each map and create an enum out of it.
-    for valueMapNode in mapsNode.getElementsByTagName("valueMap"):
-
-        # Get the backing type of the enum.
-        typeName = enumTypeMap[valueMapNode.getAttribute("name")]
-        if typeName is None:
-            raise ValueError("No mapping from mapName to enum backing type.", valueMapNode.getAttribute("name"))
-
-        enumType = getCSharpTypeFromManifestType(typeName)
-        writeOutput(outputFile, "public enum " + valueMapNode.getAttribute("name")[:-3] + " : " + enumType + "\n")
-        writeOutput(outputFile, "{\n")
-        increaseTabLevel()
-        for mapNode in valueMapNode.getElementsByTagName("map"):
-            # Each map value has a message, which we should use as the enum value.
-            messageKey = mapNode.getAttribute("message")[9:-1]
-            writeOutput(outputFile, stringTable[messageKey] + " = " + mapNode.getAttribute("value") + ",\n")
-        decreaseTabLevel()
-        writeOutput(outputFile, "}\n\n")
-
-def generateBitMapEnums(providerNode, outputFile, stringTable, enumTypeMap):
-
-    # Get the maps element.
-    for node in providerNode.getElementsByTagName("maps"):
-        mapsNode = node
-        break
-
-    # Iterate over each map and create an enum out of it.
-    for valueMapNode in mapsNode.getElementsByTagName("bitMap"):
-
-        # Get the backing type of the enum.
-        typeName = enumTypeMap[valueMapNode.getAttribute("name")]
-        if typeName is None:
-            raise ValueError("No mapping from mapName to enum backing type.", valueMapNode.getAttribute("name"))
-
-        enumType = getCSharpTypeFromManifestType(typeName)
-        writeOutput(outputFile, "[Flags]\n")
-        writeOutput(outputFile, "public enum " + valueMapNode.getAttribute("name")[:-3] + " : " + enumType + "\n")
-        writeOutput(outputFile, "{\n")
-        increaseTabLevel()
-        for mapNode in valueMapNode.getElementsByTagName("map"):
-            # Each map value has a message, which we should use as the enum value.
-            messageKey = mapNode.getAttribute("message")[9:-1]
-            writeOutput(outputFile, stringTable[messageKey] + " = " + mapNode.getAttribute("value") + ",\n")
-        decreaseTabLevel()
-        writeOutput(outputFile, "}\n\n")
-
-def generateEnumTypeMap(providerNode):
-
-    providerName = providerNode.getAttribute("name")
-    templatesNodes = providerNode.getElementsByTagName("templates")
-    templatesNode = templatesNodes[0]
-    mapsNodes = providerNode.getElementsByTagName("maps")
-
-    # Keep a list of mapName -> inType.
-    # This map contains the first inType seen for the specified mapName.
-    typeMap = dict()
-
-    # There are a couple of maps that are used by multiple events but have different backing types.
-    # Because only one of the uses will be consumed by EventSource/EventListener we can hack the backing type here
-    # and suppress the warning that we'd otherwise get.
-    overrideTypeMap = dict()
-    if providerName in overrideEnumBackingTypes:
-        overrideTypeMap = overrideEnumBackingTypes[providerName]
-
-    for mapsNode in mapsNodes:
-        for valueMapNode in mapsNode.getElementsByTagName("valueMap"):
-            mapName = valueMapNode.getAttribute("name")
-            dataNodes = templatesNode.getElementsByTagName("data")
-
-            # If we've never seen the map used, save its usage with the inType.
-            # If we have seen the map used, make sure that the inType saved previously matches the current inType.
-            for dataNode in dataNodes:
-                if dataNode.getAttribute("map") == mapName:
-                    if mapName in overrideTypeMap:
-                        typeMap[mapName] = overrideTypeMap[mapName]
-                    elif mapName in typeMap and typeMap[mapName] != dataNode.getAttribute("inType"):
-                        print("WARNING: Map " + mapName + " is used multiple times with different types.  This may cause functional bugs in tracing.")
-                    elif not mapName in typeMap:
-                        typeMap[mapName] = dataNode.getAttribute("inType")
-        for bitMapNode in mapsNode.getElementsByTagName("bitMap"):
-            mapName = bitMapNode.getAttribute("name")
-            dataNodes = templatesNode.getElementsByTagName("data")
-
-            # If we've never seen the map used, save its usage with the inType.
-            # If we have seen the map used, make sure that the inType saved previously matches the current inType.
-            for dataNode in dataNodes:
-                if dataNode.getAttribute("map") == mapName:
-                    if mapName in overrideTypeMap:
-                        typeMap[mapName] = overrideTypeMap[mapName]
-                    elif mapName in typeMap and typeMap[mapName] != dataNode.getAttribute("inType"):
-                        print("Map " + mapName + " is used multiple times with different types.")
-                    elif not mapName in typeMap:
-                        typeMap[mapName] = dataNode.getAttribute("inType")
-
-    return typeMap
-
-def generateKeywordsClass(providerNode, outputFile, inclusion_list):
-
-    providerName = providerNode.getAttribute("name")
-
-    # Get the events element.
-    for node in providerNode.getElementsByTagName("events"):
-        eventsNode = node
-        break
-
-    # Get the list of event nodes.
-    eventNodes = eventsNode.getElementsByTagName("event")
-
-    # Build the list of used keywords
-    keywordSet = set()
-    for eventNode in eventNodes:
-        eventName    = eventNode.getAttribute('symbol')
-        if not includeEvent(inclusion_list, providerName, eventName):
-            continue
-
-        # Not all events have keywords specified, and some have multiple keywords specified.
-        keywords = eventNode.getAttribute("keywords")
-        if keywords:
-            keywordSet = keywordSet.union(keywords.split())
-
-    # Find the keywords element.
-    for node in providerNode.getElementsByTagName("keywords"):
-        keywordsNode = node
-        break;
-
-    writeOutput(outputFile, "public static class Keywords\n")
-    writeOutput(outputFile, "{\n")
-    increaseTabLevel()
-
-    for keywordNode in keywordsNode.getElementsByTagName("keyword"):
-        keywordName = keywordNode.getAttribute("name")
-        if keywordName not in keywordSet:
-            continue;
-
-        writeOutput(outputFile, "public const EventKeywords " + keywordName + " = (EventKeywords)" + keywordNode.getAttribute("mask") + ";\n")
-
-    decreaseTabLevel()
-    writeOutput(outputFile, "}\n\n")
-
-def loadStringTable(manifest):
-
-    # Create the string table dictionary.
-    stringTable = dict()
-
-    # Get the string table element.
-    for node in manifest.getElementsByTagName("stringTable"):
-        stringTableNode = node
-        break
-
-    # Iterate through each string and save it.
-    for stringElem in stringTableNode.getElementsByTagName("string"):
-        stringTable[stringElem.getAttribute("id")] = stringElem.getAttribute("value")
-
-    return stringTable
-
-def generateEventSources(manifestFullPath, intermediatesDirFullPath, inclusion_list):
-
-    # Open the manifest for reading.
-    manifest = DOM.parse(manifestFullPath)
-
-    # Load the string table.
-    stringTable = loadStringTable(manifest)
-
-    # Iterate over each provider that we want to generate an EventSource for.
-    for providerName, outputFileName in getManifestsToGenerate().items():
-        for node in manifest.getElementsByTagName("provider"):
-            if node.getAttribute("name") == providerName:
-                providerNode = node
-                break
-
-        if providerNode is None:
-            raise ValueError("Unable to find provider node.", providerName)
-
-        # Generate a full path to the output file and open the file for open_for_update.
-        outputFilePath = os.path.join(intermediatesDirFullPath, outputFileName)
-        with open_for_update(outputFilePath) as outputFile:
-
-            # Write the license header.
-            writeOutput(outputFile, generatedCodeFileHeader)
-
-            # Write the class header.
-            header = """
-using System;
-
-namespace System.Diagnostics.Tracing
-{
-"""
-            writeOutput(outputFile, header)
-            increaseTabLevel()
-
-            className = providerNameToClassNameMap[providerName]
-            writeOutput(outputFile, "internal sealed partial class " + className + " : EventSource\n")
-            writeOutput(outputFile, "{\n")
-            increaseTabLevel()
-
-            # Write the keywords class.
-            generateKeywordsClass(providerNode, outputFile, inclusion_list)
-
-            #### Disable enums until they are needed ####
-            # Generate the enum type map.
-            # This determines what the backing type for each enum should be.
-            # enumTypeMap = generateEnumTypeMap(providerNode)
-
-            # Generate enums for value maps.
-            # generateValueMapEnums(providerNode, outputFile, stringTable, enumTypeMap)
-
-            # Generate enums for bit maps.
-            # generateBitMapEnums(providerNode, outputFile, stringTable, enumTypeMap)
-            #### Disable enums until they are needed ####
-
-            # Generate events.
-            generateEvents(providerNode, outputFile, stringTable, inclusion_list)
-
-            # Write the class footer.
-            decreaseTabLevel()
-            writeOutput(outputFile, "}\n")
-            decreaseTabLevel()
-            writeOutput(outputFile, "}\n")
-
-def main(argv):
-
-    # Parse command line arguments.
-    parser = argparse.ArgumentParser(
-        description="Generates C# EventSource classes that represent the runtime's native event providers.")
-
-    required = parser.add_argument_group('required arguments')
-    required.add_argument('--man', type=str, required=True,
-                          help='full path to manifest containing the description of events')
-    required.add_argument('--intermediate', type=str, required=True,
-                          help='full path to eventprovider intermediate directory')
-    required.add_argument('--inc',  type=str,default="",
-                          help='full path to inclusion list')
-    args, unknown = parser.parse_known_args(argv)
-    if unknown:
-        print('Unknown argument(s): ', ', '.join(unknown))
-        return 1
-
-    manifestFullPath = args.man
-    intermediatesDirFullPath = args.intermediate
-    inclusion_filename = args.inc
-
-    # Ensure the intermediates directory exists.
-    try:
-        os.makedirs(intermediatesDirFullPath)
-    except OSError:
-        if not os.path.isdir(intermediatesDirFullPath):
-            raise
-
-    inclusion_list = parseInclusionList(inclusion_filename)
-
-    # Generate event sources.
-    generateEventSources(manifestFullPath, intermediatesDirFullPath, inclusion_list)
-    return 0
-
-if __name__ == '__main__':
-    return_code = main(sys.argv[1:])
-    sys.exit(return_code)
diff --git a/src/coreclr/scripts/jitrollingbuild.py b/src/coreclr/scripts/jitrollingbuild.py
index 795030178e50..8c328354336f 100644
--- a/src/coreclr/scripts/jitrollingbuild.py
+++ b/src/coreclr/scripts/jitrollingbuild.py
@@ -208,8 +208,8 @@ def process_git_hash_arg(coreclr_args, return_first_hash=False):
         baseline_hash = stdout_git_merge_base.decode('utf-8').strip()
         logging.info("Baseline hash: %s", baseline_hash)
 
-        # Enumerate the last 20 changes, starting with the baseline, that included JIT changes.
-        command = [ "git", "log", "--pretty=format:%H", baseline_hash, "-20", "--", "src/coreclr/jit/*" ]
+        # Enumerate the last 20 changes, starting with the baseline, that included JIT and JIT-EE GUID changes.
+        command = [ "git", "log", "--pretty=format:%H", baseline_hash, "-20", "--", "src/coreclr/jit/*", "src/coreclr/inc/jiteeversionguid.h" ]
         logging.debug("Invoking: {}".format(" ".join(command)))
         proc = subprocess.Popen(command, stdout=subprocess.PIPE)
         stdout_change_list, _ = proc.communicate()
@@ -435,8 +435,8 @@ def upload_blob(file, blob_name):
         # from the root of the runtime repo.
 
         with ChangeDir(coreclr_args.runtime_repo_location):
-            # Enumerate the last change, starting with the jit_git_hash, that included JIT changes.
-            command = [ "git", "log", "--pretty=format:%H", jit_git_hash, "-1", "--", "src/coreclr/jit/*" ]
+            # Enumerate the last change, starting with the jit_git_hash, that included JIT and JIT-EE GUID changes.
+            command = [ "git", "log", "--pretty=format:%H", jit_git_hash, "-1", "--", "src/coreclr/jit/*", "src/coreclr/inc/jiteeversionguid.h" ]
             logging.info("Invoking: {}".format(" ".join(command)))
             proc = subprocess.Popen(command, stdout=subprocess.PIPE)
             stdout_change_list, _ = proc.communicate()
@@ -457,7 +457,7 @@ def upload_blob(file, blob_name):
 
     try:
         from azure.storage.blob import BlobServiceClient
-        from azure.identity import DefaultAzureCredential
+        from azure.identity import AzureCliCredential
 
     except:
         logging.warning("Please install:")
@@ -466,7 +466,7 @@ def upload_blob(file, blob_name):
         logging.warning("See also https://learn.microsoft.com/azure/storage/blobs/storage-quickstart-blobs-python")
         raise RuntimeError("Missing azure storage or identity packages.")
 
-    default_credential = DefaultAzureCredential()
+    default_credential = AzureCliCredential()
 
     blob_service_client = BlobServiceClient(account_url=az_blob_storage_account_uri, credential=default_credential)
     blob_folder_name = "{}/{}/{}/{}/{}".format(az_builds_root_folder, jit_git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type)
diff --git a/src/coreclr/scripts/jitutil.py b/src/coreclr/scripts/jitutil.py
index b397cecdc53b..0f917e2958f5 100644
--- a/src/coreclr/scripts/jitutil.py
+++ b/src/coreclr/scripts/jitutil.py
@@ -20,6 +20,7 @@
 import logging
 import time
 import tarfile
+import threading
 import urllib
 import urllib.request
 import zipfile
@@ -129,7 +130,7 @@ def decode_and_print(str_to_decode):
         return output
 
 
-def run_command(command_to_run, _cwd=None, _exit_on_fail=False, _output_file=None, _env=None):
+def run_command(command_to_run, _cwd=None, _exit_on_fail=False, _output_file=None, _env=None, _timeout=None):
     """ Runs the command.
 
     Args:
@@ -138,6 +139,7 @@ def run_command(command_to_run, _cwd=None, _exit_on_fail=False, _output_file=Non
         _exit_on_fail (bool): If it should exit on failure.
         _output_file ():
         _env: environment for sub-process, passed to subprocess.Popen()
+        _timeout: timeout in seconds, or None for no timeout
     Returns:
         (string, string, int): Returns a tuple of stdout, stderr, and command return code if _output_file= None
         Otherwise stdout, stderr are empty.
@@ -156,27 +158,44 @@ def run_command(command_to_run, _cwd=None, _exit_on_fail=False, _output_file=Non
     output_type = subprocess.STDOUT if _output_file else subprocess.PIPE
     with subprocess.Popen(command_to_run, env=_env, stdout=subprocess.PIPE, stderr=output_type, cwd=_cwd) as proc:
 
-        # For long running command, continuously print the output
-        if _output_file:
-            while True:
-                with open(_output_file, 'a') as of:
-                    output = proc.stdout.readline()
-                    if proc.poll() is not None:
-                        break
-                    if output:
-                        output_str = decode_and_print(output.strip())
-                        of.write(output_str + "\n")
-        else:
-            command_stdout, command_stderr = proc.communicate()
-            if len(command_stdout) > 0:
-                decode_and_print(command_stdout)
-            if len(command_stderr) > 0:
-                decode_and_print(command_stderr)
+        timer = None
+        if _timeout is not None:
+            def try_kill():
+                try:
+                    print("  Timeout reached; killing process")
+                    proc.kill()
+                except:
+                    pass
+
+            timer = threading.Timer(_timeout, try_kill)
+            timer.start()
+
+        try:
+            # For long running command, continuously print the output
+            if _output_file:
+                while True:
+                    with open(_output_file, 'a') as of:
+                        output = proc.stdout.readline()
+                        if proc.poll() is not None:
+                            break
+                        if output:
+                            output_str = decode_and_print(output.strip())
+                            of.write(output_str + "\n")
+            else:
+                command_stdout, command_stderr = proc.communicate()
+                if len(command_stdout) > 0:
+                    decode_and_print(command_stdout)
+                if len(command_stderr) > 0:
+                    decode_and_print(command_stderr)
+        finally:
+            if timer:
+                timer.cancel()
 
         return_code = proc.returncode
         if _exit_on_fail and return_code != 0:
             print("Command failed. Exiting.")
             sys.exit(1)
+
     return command_stdout, command_stderr, return_code
 
 
@@ -537,13 +556,13 @@ def require_azure_storage_libraries(need_azure_storage_blob=True, need_azure_ide
         Once we've done it once, we don't do it again.
 
         For this to work for cross-module usage, after you call this function, you need to add a line like:
-            from jitutil import BlobClient, DefaultAzureCredential
+            from jitutil import BlobClient, AzureCliCredential
         naming all the types you want to use.
 
         The full set of types this function loads:
-            BlobServiceClient, BlobClient, ContainerClient, DefaultAzureCredential
+            BlobServiceClient, BlobClient, ContainerClient, AzureCliCredential
     """
-    global azure_storage_libraries_check, BlobServiceClient, BlobClient, ContainerClient, DefaultAzureCredential
+    global azure_storage_libraries_check, BlobServiceClient, BlobClient, ContainerClient, AzureCliCredential
 
     if azure_storage_libraries_check:
         return
@@ -560,7 +579,7 @@ def require_azure_storage_libraries(need_azure_storage_blob=True, need_azure_ide
     azure_identity_import_ok = True
     if need_azure_identity:
         try:
-            from azure.identity import DefaultAzureCredential
+            from azure.identity import AzureCliCredential
         except:
             azure_identity_import_ok = False
 
@@ -608,7 +627,7 @@ def download_with_azure(uri, target_location, fail_if_not_found=True):
     logging.info("Download: %s -> %s", uri, target_location)
 
     ok = True
-    az_credential = DefaultAzureCredential()
+    az_credential = AzureCliCredential()
     blob = BlobClient.from_blob_url(uri, credential=az_credential)
     with open(target_location, "wb") as my_blob:
         try:
diff --git a/src/coreclr/scripts/superpmi-collect.proj b/src/coreclr/scripts/superpmi-collect.proj
index a9a1f1069822..fba11793645e 100644
--- a/src/coreclr/scripts/superpmi-collect.proj
+++ b/src/coreclr/scripts/superpmi-collect.proj
@@ -120,6 +120,9 @@
   
     --tiered_pgo
   
+  
+    --tiered_pgo --jitoptrepeat_all
+  
   
     
   
@@ -245,20 +248,20 @@
     
   
   
-    
-    
-    
-    
-    
-    
+    
+    
+    
+    
+    
+    
     
   
-    
-    
-    
-    
-    
-    
+    
+    
+    
+    
+    
+    
     
 
   
diff --git a/src/coreclr/scripts/superpmi-replay.proj b/src/coreclr/scripts/superpmi-replay.proj
index bd4d99c660e7..c19831f43fb2 100644
--- a/src/coreclr/scripts/superpmi-replay.proj
+++ b/src/coreclr/scripts/superpmi-replay.proj
@@ -24,11 +24,17 @@
      timeout: %(HelixWorkItem.Timeout)  '"/>
    -->
 
+  
+    
+    standard
+    $(_SuperPmiReplayType)
+  
+
   
     %HELIX_PYTHONPATH%
     %HELIX_CORRELATION_PAYLOAD%
     %HELIX_WORKITEM_UPLOAD_ROOT%
-    $(Python) $(ProductDirectory)\superpmi_replay.py -jit_directory $(ProductDirectory)
+    $(Python) $(ProductDirectory)\superpmi_replay.py -type $(SuperPmiReplayType) -jit_directory $(ProductDirectory)
     3:15
   
 
@@ -49,38 +55,95 @@
     
   
 
-  
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
+  
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+  
+
+  
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
   
 
-  
-    
-    
-    
-    
-    
-    
+  
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
   
 
   
     
       $(WorkItemCommand) -arch %(HelixWorkItem.Architecture) -platform %(HelixWorkItem.Platform) -partition %(HelixWorkItem.Partition) -partition_count %(HelixWorkItem.PartitionCount) -log_directory $(SuperpmiLogsLocation)
       $(WorkItemTimeout)
-      superpmi_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture)_%(HelixWorkItem.Partition).log
+      superpmi_final_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture)_%(HelixWorkItem.Partition).log
     
   
   
diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py
index 229dd3bdfec8..8c48fc55ec5d 100644
--- a/src/coreclr/scripts/superpmi.py
+++ b/src/coreclr/scripts/superpmi.py
@@ -24,26 +24,27 @@
 import locale
 import logging
 import math
-import os
 import multiprocessing
+import os
 import platform
+import queue
+import re
 import shutil
 import subprocess
 import sys
+import tarfile
 import tempfile
-import queue
-import re
+import time
 import urllib
 import urllib.request
 import zipfile
-import time
 
 from coreclr_arguments import *
 from jitutil import TempDir, ChangeDir, remove_prefix, is_zero_length_file, is_nonzero_length_file, \
     make_safe_filename, find_file, download_one_url, download_files, report_azure_error, \
     require_azure_storage_libraries, authenticate_using_azure, \
     create_unique_directory_name, create_unique_file_name, get_files_from_path, determine_jit_name, \
-    get_deepest_existing_directory
+    get_deepest_existing_directory, run_command
 
 locale.setlocale(locale.LC_ALL, '')  # Use '' for auto, or force e.g. to 'en_US.UTF-8'
 
@@ -145,13 +146,13 @@
 
 host_os_help = "OS (windows, osx, linux). Default: current OS."
 
-arch_help = "Architecture (x64, x86, arm, arm64). Default: current architecture."
+arch_help = "Architecture (x64, x86, arm, arm64, loongarch64, riscv64). Default: current architecture."
 
 target_os_help = "Target OS, for use with cross-compilation JIT (windows, osx, linux). Default: current OS."
 
-target_arch_help = "Target architecture, for use with cross-compilation JIT (x64, x86, arm, arm64). Passed as asm diffs target to SuperPMI. Default: current architecture."
+target_arch_help = "Target architecture, for use with cross-compilation JIT (x64, x86, arm, arm64, loongarch64, riscv64). Passed as asm diffs target to SuperPMI. Default: current architecture."
 
-mch_arch_help = "Architecture of MCH files to download, used for cross-compilation altjit (x64, x86, arm, arm64). Default: target architecture."
+mch_arch_help = "Architecture of MCH files to download, used for cross-compilation altjit (x64, x86, arm, arm64, loongarch64, riscv64). Default: target architecture."
 
 build_type_help = "Build type (Debug, Checked, Release). Default: Checked."
 
@@ -314,6 +315,7 @@ def add_core_root_arguments(parser, build_type_default, build_type_help):
 collect_parser.add_argument("--disable_r2r", action="store_true", help="Sets DOTNET_ReadyToRun=0 when doing collection to cause ReadyToRun images to not be used, and thus causes JIT compilation and SuperPMI collection of these methods.")
 collect_parser.add_argument("--tiered_compilation", action="store_true", help="Sets DOTNET_TieredCompilation=1 when doing collections.")
 collect_parser.add_argument("--tiered_pgo", action="store_true", help="Sets DOTNET_TieredCompilation=1 and DOTNET_TieredPGO=1 when doing collections.")
+collect_parser.add_argument("--jitoptrepeat_all", action="store_true", help="Sets DOTNET_JitOptRepeat=* when doing collections.")
 collect_parser.add_argument("--ci", action="store_true", help="Special collection mode for handling zero-sized files in Azure DevOps + Helix pipelines collections.")
 
 # Allow for continuing a collection in progress
@@ -864,6 +866,9 @@ def __collect_mc_files__(self):
             else:
                 dotnet_env["TieredCompilation"] = "0"
 
+            if self.coreclr_args.jitoptrepeat_all:
+                dotnet_env["JitOptRepeat"] = "*"
+
             if self.coreclr_args.disable_r2r:
                 dotnet_env["ReadyToRun"] = "0"
 
@@ -1614,15 +1619,27 @@ def report_replay_asserts(asserts, output_mch_file):
 
     if asserts:
         logging.info("============================== Assertions:")
+        assertion_num = 0
+        assertion_count = len(asserts.items())
         for assertion_key, assertion_value in asserts.items():
-            logging.info("%s", assertion_key)
+            assertion_num += 1
+            assertion_instance_count = len(assertion_value)
+            logging.info("=== Assertion #%s/%s (count: %s): %s", assertion_num, assertion_count, assertion_instance_count, assertion_key)
             # Sort the values by increasing il size
             sorted_instances = sorted(assertion_value, key=lambda d: d['il'])
+            instance_num = 0
+            # Arbitrary maximum number of instances to show to avoid too much output. Note that all the assertions are in the log file,
+            # just not summarized.
+            instance_max = 25
             for instance in sorted_instances:
                 if output_mch_file:
                     logging.info("  %s # %s : IL size %s", instance['mch_file'], instance['mc_num'], instance['il'])
                 else:
                     logging.info("  # %s : IL size %s", instance['mc_num'], instance['il'])
+                instance_num += 1
+                if instance_num >= instance_max:
+                    logging.info("  ... omitting %s instances", assertion_instance_count - instance_num)
+                    break
 
 
 ################################################################################
@@ -3685,14 +3702,14 @@ def list_superpmi_collections_container_via_azure_api(path_filter=lambda unused:
     """
 
     require_azure_storage_libraries()
-    from jitutil import ContainerClient, DefaultAzureCredential
+    from jitutil import ContainerClient, AzureCliCredential
 
     superpmi_container_url = az_blob_storage_superpmi_container_uri
 
     paths = []
     ok = True
     try:
-        az_credential = DefaultAzureCredential()
+        az_credential = AzureCliCredential()
         container = ContainerClient.from_container_url(superpmi_container_url, credential=az_credential)
         blob_name_prefix = az_collections_root_folder + "/"
         blob_list = container.list_blobs(name_starts_with=blob_name_prefix, retry_total=0)
@@ -3881,28 +3898,75 @@ def download_mch_from_azure(coreclr_args, target_dir):
         list containing the local path of files downloaded
     """
 
+    blob_url_prefix = "{}/{}/".format(az_blob_storage_superpmi_container_uri, az_collections_root_folder)
     blob_filter_string =  "{}/{}/{}/".format(coreclr_args.jit_ee_version, coreclr_args.target_os, coreclr_args.mch_arch).lower()
 
-    # Determine if a URL in Azure Storage should be allowed. The path looks like:
-    #   jit-ee-guid/Linux/x64/Linux.x64.Checked.frameworks.mch.zip
-    # Filter to just the current jit-ee-guid, OS, and architecture.
-    # Include both MCH and MCT files as well as the CLR JIT dll (processed below).
-    # If there are filters, only download those matching files.
-    def filter_superpmi_collections(path):
-        path = path.lower()
-        return path.startswith(blob_filter_string) and ((coreclr_args.filter is None) or any((filter_item.lower() in path) for filter_item in coreclr_args.filter))
-
-    paths = list_superpmi_collections_container(filter_superpmi_collections)
-    if paths is None or len(paths) == 0:
-        print("No Azure Storage MCH files to download from {}".format(blob_filter_string))
-        return []
+    path_var = os.environ.get("PATH")
+    azcopy_exe = "azcopy.exe" if platform.system() == "Windows" else "azcopy"
+    azcopy_path = find_file(azcopy_exe, path_var.split(os.pathsep)) if path_var is not None else None
+
+    if azcopy_path is None or authenticate_using_azure:
+        # Determine if a URL in Azure Storage should be allowed. The path looks like:
+        #   jit-ee-guid/Linux/x64/Linux.x64.Checked.frameworks.mch.zip
+        # Filter to just the current jit-ee-guid, OS, and architecture.
+        # Include both MCH and MCT files as well as the CLR JIT dll (processed below).
+        # If there are filters, only download those matching files.
+        def filter_superpmi_collections(path):
+            path = path.lower()
+            return path.startswith(blob_filter_string) and ((coreclr_args.filter is None) or any((filter_item.lower() in path) for filter_item in coreclr_args.filter))
+
+        paths = list_superpmi_collections_container(filter_superpmi_collections)
+        if paths is None or len(paths) == 0:
+            print("No Azure Storage MCH files to download from {}".format(blob_filter_string))
+            return []
+
+        urls = [blob_url_prefix + path for path in paths]
+
+        skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress
+        return download_files(urls, target_dir, is_azure_storage=True, display_progress=not skip_progress)
+    else:
+        logging.info("azcopy was found in PATH; will use azcopy for download")
+        local_paths = []
+        with TempDir() as temp_location:
+            source_url = "{}{}*".format(blob_url_prefix, blob_filter_string)
+            cli = [azcopy_path, "cp", source_url, temp_location]
+            if coreclr_args.filter is not None:
+                cli.append("--include-pattern")
+                cli.append(";".join("*" + filter_name + "*" for filter_name in coreclr_args.filter))
+    
+            # Log to a file to get "tee-like" behavior (streaming output in the console)
+            azcopy_log_path = os.path.join(temp_location, "azcopy.log")
+            run_command(cli, _output_file=azcopy_log_path)
+            os.remove(azcopy_log_path)
+
+            for file in os.listdir(temp_location):
+                download_path = os.path.join(temp_location, file)
+                if file.lower().endswith(".zip") or file.lower().endswith(".tar.gz"):
+                    logging.info("Uncompress %s => %s", download_path, target_dir)
+
+                    if file.lower().endswith(".zip"):
+                        with zipfile.ZipFile(download_path, "r") as zip:
+                            zip.extractall(target_dir)
+                            archive_names = zip.namelist()
+                    else:
+                        with tarfile.open(download_path, "r") as tar:
+                            tar.extractall(target_dir)
+                            archive_names = tar.getnames()
 
-    blob_url_prefix = "{}/{}/".format(az_blob_storage_superpmi_container_uri, az_collections_root_folder)
-    urls = [blob_url_prefix + path for path in paths]
+                    for archive_name in archive_names:
+                        if archive_name.endswith("/"):
+                            # Directory
+                            continue
 
-    skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress
-    return download_files(urls, target_dir, is_azure_storage=True, display_progress=not skip_progress)
+                        target_path = os.path.join(target_dir, archive_name.replace("/", os.path.sep))
+                        local_paths.append(target_path)
+                else:
+                    logging.info("Copy %s => %s", download_path, target_dir)
+                    target_path = os.path.join(target_dir, file)
+                    shutil.copy2(download_path, target_path)
+                    local_paths.append(target_path)
 
+        return local_paths
 
 def upload_mch(coreclr_args):
     """ Upload a set of MCH files. Each MCH file is first ZIP compressed to save data space and upload/download time.
@@ -3912,7 +3976,7 @@ def upload_mch(coreclr_args):
     """
 
     require_azure_storage_libraries(need_azure_identity=True)
-    from jitutil import BlobServiceClient, DefaultAzureCredential
+    from jitutil import BlobServiceClient, AzureCliCredential
 
     def upload_blob(file, blob_name):
         blob_client = blob_service_client.get_blob_client(container=az_superpmi_container_name, blob=blob_name)
@@ -3948,7 +4012,7 @@ def upload_blob(file, blob_name):
     for item in files_to_upload:
         logging.info("  %s", item)
 
-    default_credential = DefaultAzureCredential()
+    default_credential = AzureCliCredential()
 
     blob_service_client = BlobServiceClient(account_url=az_blob_storage_account_uri, credential=default_credential)
     blob_folder_name = "{}/{}/{}/{}".format(az_collections_root_folder, coreclr_args.jit_ee_version, coreclr_args.target_os, coreclr_args.mch_arch)
@@ -4357,8 +4421,8 @@ def process_base_jit_path_arg(coreclr_args):
             baseline_hash = coreclr_args.base_git_hash
 
         if coreclr_args.base_git_hash is None:
-            # Enumerate the last 20 changes, starting with the baseline, that included JIT changes.
-            command = [ "git", "log", "--pretty=format:%H", baseline_hash, "-20", "--", "src/coreclr/jit/*" ]
+            # Enumerate the last 20 changes, starting with the baseline, that included JIT and JIT-EE GUID changes.
+            command = [ "git", "log", "--pretty=format:%H", baseline_hash, "-20", "--", "src/coreclr/jit/*", "src/coreclr/inc/jiteeversionguid.h" ]
             logging.debug("Invoking: %s", " ".join(command))
             proc = subprocess.Popen(command, stdout=subprocess.PIPE)
             stdout_change_list, _ = proc.communicate()
@@ -4562,6 +4626,9 @@ def setup_spmi_location_arg(spmi_location):
         logger.addHandler(file_handler)
         logging.critical("================ Logging to %s", log_file)
 
+    # Log the original command-line
+    logging.debug("Command line: %s", " ".join(sys.argv))
+
     # Finish verifying the arguments
 
     def setup_jit_ee_version_arg(jit_ee_version):
@@ -4610,6 +4677,15 @@ def verify_target_args():
                             lambda mch_arch: "Unknown mch_arch {}\nSupported architectures: {}".format(mch_arch, (", ".join(coreclr_args.valid_arches))),
                             modify_arg=lambda mch_arch: mch_arch if mch_arch is not None else coreclr_args.target_arch) # Default to `target_arch`
 
+        # For LoongArch64, RiscV64, assume we are doing altjit cross-compilation and set mch_arch to 'arch', and target_os to Linux.
+        if coreclr_args.target_arch == "loongarch64" or coreclr_args.target_arch == "riscv64":
+            if coreclr_args.target_os == coreclr_args.host_os and coreclr_args.target_os != "linux":
+                logging.warning("Overriding 'target_os' to 'linux'")
+                coreclr_args.target_os = "linux"
+            if coreclr_args.mch_arch == coreclr_args.target_arch and coreclr_args.mch_arch != coreclr_args.arch:
+                logging.warning("Overriding 'mch_arch' to '%s'", coreclr_args.arch)
+                coreclr_args.mch_arch = coreclr_args.arch
+
     def verify_superpmi_common_args():
 
         coreclr_args.verify(args,
@@ -4911,6 +4987,11 @@ def verify_base_diff_args():
                             lambda unused: True,
                             "Unable to set tiered_pgo")
 
+        coreclr_args.verify(args,
+                            "jitoptrepeat_all",
+                            lambda unused: True,
+                            "Unable to set jitoptrepeat_all")
+
         coreclr_args.verify(args,
                             "pmi_path",
                             lambda unused: True,
@@ -5518,7 +5599,6 @@ def main(args):
 # __main__
 ################################################################################
 
-
 if __name__ == "__main__":
     args = parser.parse_args()
     sys.exit(main(args))
diff --git a/src/coreclr/scripts/superpmi_benchmarks.py b/src/coreclr/scripts/superpmi_benchmarks.py
index 57c7eeb383ef..bc1319a2b8a8 100644
--- a/src/coreclr/scripts/superpmi_benchmarks.py
+++ b/src/coreclr/scripts/superpmi_benchmarks.py
@@ -39,6 +39,7 @@
 parser.add_argument("-benchmark_binary", help="Benchmark binary to execute")
 parser.add_argument("--tiered_compilation", action="store_true", help="Sets DOTNET_TieredCompilation=1 when doing collections.")
 parser.add_argument("--tiered_pgo", action="store_true", help="Sets DOTNET_TieredCompilation=1 and DOTNET_TieredPGO=1 when doing collections.")
+parser.add_argument("--jitoptrepeat_all", action="store_true", help="Sets DOTNET_JitOptRepeat=*.")
 
 def setup_args(args):
     """ Setup the args for SuperPMI to use.
@@ -113,6 +114,11 @@ def setup_args(args):
                         lambda unused: True,
                         "Unable to set tiered_pgo")
 
+    coreclr_args.verify(args,
+                        "jitoptrepeat_all",
+                        lambda unused: True,
+                        "Unable to set jitoptrepeat_all")
+
     return coreclr_args
 
 
@@ -288,6 +294,9 @@ def build_and_run(coreclr_args, output_mch_name):
     else:
         collection_command += "DOTNET_TieredCompilation:0"
 
+    if coreclr_args.jitoptrepeat_all:
+        collection_command += " DOTNET_JitOptRepeat:\"*\""
+
     # Generate the execution script in Temp location
     with TempDir() as temp_location:
         script_name = os.path.join(temp_location, script_name)
diff --git a/src/coreclr/scripts/superpmi_collect_setup.py b/src/coreclr/scripts/superpmi_collect_setup.py
index 59217f27bb95..c3a4d6e7348e 100644
--- a/src/coreclr/scripts/superpmi_collect_setup.py
+++ b/src/coreclr/scripts/superpmi_collect_setup.py
@@ -43,7 +43,7 @@
 
 parser = argparse.ArgumentParser(description="description")
 
-parser.add_argument("-collection_type", required=True, help="Type of the SPMI collection to be done (nativeaot, crossgen2, pmi, run, run_tiered, run_pgo)")
+parser.add_argument("-collection_type", required=True, help="Type of the SPMI collection to be done (nativeaot, crossgen2, pmi, run, run_tiered, run_pgo, run_pgo_optrepeat)")
 parser.add_argument("-collection_name", required=True, help="Name of the SPMI collection to be done (e.g., libraries, libraries_tests, coreclr_tests, benchmarks)")
 parser.add_argument("-payload_directory", required=True, help="Path to payload directory to create: subdirectories are created for the correlation payload as well as the per-partition work items")
 parser.add_argument("-source_directory", required=True, help="Path to source directory")
@@ -57,7 +57,7 @@
 
 is_windows = platform.system() == "Windows"
 
-legal_collection_types = [ "nativeaot", "crossgen2", "pmi", "run", "run_tiered", "run_pgo" ]
+legal_collection_types = [ "nativeaot", "crossgen2", "pmi", "run", "run_tiered", "run_pgo", "run_pgo_optrepeat" ]
 
 directories_to_ignore = [
     "runtimes", # This appears to be the result of a nuget package that includes a bunch of native code
@@ -457,28 +457,40 @@ def main(main_args):
     # Determine the Helix queue name to use when running jobs.
     if coreclr_args.public_queues:
         if platform_name == "windows":
-            helix_queue = "Windows.11.Arm64.Open" if arch == "arm64" else "Windows.10.Amd64.Open"
+            if arch == "arm64": # public windows_arm64
+                helix_queue = "Windows.11.Arm64.Open"
+            else: # public windows_x64
+                helix_queue = "Windows.10.Amd64.Open"
         elif platform_name == "linux":
-            if arch == "arm":
-                helix_queue = "(Debian.12.Arm32.Open)Ubuntu.2004.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-helix-arm32v7"
-            elif arch == "arm64":
-                helix_queue = "(Ubuntu.2004.Arm64.Open)Ubuntu.2004.Armarch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-helix-arm64v8"
-            else:
-                helix_queue = "Ubuntu.2204.Amd64.Open"
+            if arch == "arm": # public linux_arm
+                helix_queue = "(Debian.12.Arm32.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-helix-arm32v7"
+            elif arch == "arm64": # public linux_arm64
+                helix_queue = "(Ubuntu.2404.Arm64.Open)Ubuntu.2204.Armarch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-24.04-helix-arm64v8"
+            else: # public linux_x64
+                helix_queue = "azurelinux.3.amd64.open"
         elif platform_name == "osx":
-            helix_queue = "OSX.1200.ARM64.Open" if arch == "arm64" else "OSX.1200.Amd64.Open"
+            if arch == "arm64": # public osx_arm64
+                helix_queue = "osx.13.arm64.open"
+            else: # public osx_x64
+                helix_queue = "OSX.1200.Amd64.Open"
     else:
         if platform_name == "windows":
-            helix_queue = "Windows.11.Arm64" if arch == "arm64" else "Windows.10.Amd64.X86.Rt"
+            if arch == "arm64": # internal windows_arm64
+                helix_queue = "Windows.11.Arm64"
+            else: # internal superpmi windows_x64
+                helix_queue = "Windows.10.Amd64.X86.Rt"
         elif platform_name == "linux":
-            if arch == "arm":
-                helix_queue = "(Debian.12.Arm32)Ubuntu.2004.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-helix-arm32v7"
-            elif arch == "arm64":
-                helix_queue = "(Ubuntu.1804.Arm64)Ubuntu.2004.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-18.04-helix-arm64v8"
-            else:
-                helix_queue = "Ubuntu.2204.Amd64"
+            if arch == "arm": # internal linux_arm
+                helix_queue = "(Debian.12.Arm32)Ubuntu.2204.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-helix-arm32v7"
+            elif arch == "arm64": # internal linux_arm64
+                helix_queue = "(Ubuntu.2404.Arm64)Ubuntu.2204.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-24.04-helix-arm64v8"
+            else: # internal linux_x64
+                helix_queue = "azurelinux.3.amd64"
         elif platform_name == "osx":
-            helix_queue = "OSX.1200.ARM64" if arch == "arm64" else "OSX.1200.Amd64"
+            if arch == "arm64": # internal osx_arm64
+                helix_queue = "OSX.1200.ARM64"
+            else: # internal osx_x64
+                helix_queue = "OSX.1200.Amd64"
 
     # Copy the superpmi scripts
 
diff --git a/src/coreclr/scripts/superpmi_replay.py b/src/coreclr/scripts/superpmi_replay.py
index d2cf1eec37fb..6af44151cb59 100644
--- a/src/coreclr/scripts/superpmi_replay.py
+++ b/src/coreclr/scripts/superpmi_replay.py
@@ -19,25 +19,36 @@
 
 parser = argparse.ArgumentParser(description="description")
 
+parser.add_argument("-type", required=True, help="Type of replay (standard, apx)")
 parser.add_argument("-arch", help="Architecture")
 parser.add_argument("-platform", help="OS platform")
 parser.add_argument("-jit_directory", help="path to the directory containing clrjit binaries")
 parser.add_argument("-log_directory", help="path to the directory containing superpmi log files")
 parser.add_argument("-partition", help="Partition number specifying which set of flags to use: between 1 and the `-partition_count` value")
-parser.add_argument("-partition_count", help="Count of the total number of partitions we are using: should be <= 9 (number of jit_flags_all elements)")
-
-jit_flags_all = [
-    "JitStressRegs=0",
-    "JitStressRegs=1",
-    "JitStressRegs=2",
-    "JitStressRegs=3",
-    "JitStressRegs=4",
-    "JitStressRegs=8",
-    "JitStressRegs=0x10",
-    "JitStressRegs=0x80",
-    "JitStressRegs=0x1000",
+parser.add_argument("-partition_count", help="Count of the total number of partitions we are using: should be <= the total number of flags combinations for the type")
+
+configuration_standard = [
+    [ "JitStressRegs=0" ],
+    [ "JitStressRegs=1" ],
+    [ "JitStressRegs=2" ],
+    [ "JitStressRegs=3" ],
+    [ "JitStressRegs=4" ],
+    [ "JitStressRegs=8" ],
+    [ "JitStressRegs=0x10" ],
+    [ "JitStressRegs=0x80" ],
+    [ "JitStressRegs=0x1000" ]
 ]
 
+configuration_apx = [
+    [ "RunAltJitCode=0", "EnableAPX=1" ],
+    [ "RunAltJitCode=0", "EnableAPX=1", "EnableApxNDD=1" ],
+    [ "RunAltJitCode=0", "EnableAPX=1", "JitStressRex2Encoding=1" ],
+    [ "RunAltJitCode=0", "EnableAPX=1", "JitStressPromotedEvexEncoding=1" ],
+    [ "RunAltJitCode=0", "EnableAPX=1", "JitStressRegs=4000" ],
+    [ "RunAltJitCode=0", "EnableAPX=1", "EnableApxNDD=1", "JitStressRex2Encoding=1", "JitStressPromotedEvexEncoding=1", "JitStressRegs=4000" ]
+]
+
+
 def split(a, n):
     """ Splits array `a` in `n` partitions.
         Slightly modified from https://stackoverflow.com/a/2135920.
@@ -66,6 +77,11 @@ def setup_args(args):
     coreclr_args = CoreclrArguments(args, require_built_core_root=False, require_built_product_dir=False,
                                     require_built_test_dir=False, default_build_type="Checked")
 
+    coreclr_args.verify(args,
+                        "type",
+                        lambda type: type in ["standard", "apx"],
+                        "Invalid type \"{}\"".format)
+
     coreclr_args.verify(args,
                         "arch",
                         lambda unused: True,
@@ -137,8 +153,14 @@ def main(main_args):
     os_name = "universal" if arch_name.startswith("arm") else os_name
     jit_path = os.path.join(coreclr_args.jit_directory, 'clrjit_{}_{}_{}.dll'.format(os_name, arch_name, host_arch_name))
 
-    jit_flags_partitioned = split(jit_flags_all, coreclr_args.partition_count)
-    jit_flags = jit_flags_partitioned[coreclr_args.partition - 1] # partition number is 1-based
+    type_configuration_settings = None
+    if coreclr_args.type == 'standard':
+        type_configuration_settings = configuration_standard
+    elif coreclr_args.type == 'apx':
+        type_configuration_settings = configuration_apx
+
+    configuration_settings_partitioned = split(type_configuration_settings, coreclr_args.partition_count)
+    partition_configuration_settings = configuration_settings_partitioned[coreclr_args.partition - 1] # partition number is 1-based
 
     print("Running superpmi.py download")
     run_command([python_path,
@@ -152,33 +174,47 @@ def main(main_args):
             "-log_level", "debug"], _exit_on_fail=True)
 
     failed_runs = []
-    for jit_flag in jit_flags:
-        log_file = os.path.join(log_directory, 'superpmi_{}.log'.format(jit_flag.replace("=", "_")))
-        print("Running superpmi.py replay for {}".format(jit_flag))
+    for configuration_settings in partition_configuration_settings:
+        # Construct the command-line options and log file based on the configuration settings
+        log_file_tag = "_".join(configuration_settings).replace("=", "_")
+        log_file = os.path.join(log_directory, 'superpmi_config_{}.log'.format(log_file_tag))
+
+        config_arguments = []
+        config_display = ""
+        for flag in configuration_settings:
+            config_arguments += "-jitoption", flag
+            config_display += " " + flag
+
+        # Special case: setting altjit requires passing `--altjit` to superpmi.py.
+        if coreclr_args.type == 'apx':
+            config_arguments += [ "--altjit" ]
+            config_display += " --altjit"
+
+        print("Running superpmi.py replay for{}".format(config_display))
 
-        _, _, return_code = run_command([
+        command_line = [
             python_path,
             os.path.join(cwd, "superpmi.py"),
             "replay",
             "-core_root", cwd,
-            "-jitoption", jit_flag,
             "-target_os", platform_name,
             "-target_arch", arch_name,
             "-arch", host_arch_name,
             "-jit_path", jit_path,
             "-spmi_location", spmi_location,
             "-log_level", "debug",
-            "-log_file", log_file])
+            "-log_file", log_file] + config_arguments
 
+        _, _, return_code = run_command(command_line)
         if return_code != 0:
             failed_runs.append("Failure in {}".format(log_file))
 
     # Consolidate all superpmi_*.logs in superpmi_platform_architecture.log
-    final_log_name = os.path.join(log_directory, "superpmi_{}_{}_{}.log".format(platform_name, arch_name, coreclr_args.partition))
+    final_log_name = os.path.join(log_directory, "superpmi_final_{}_{}_{}.log".format(platform_name, arch_name, coreclr_args.partition))
     print("Consolidating final {}".format(final_log_name))
     with open(final_log_name, "a") as final_superpmi_log:
         for superpmi_log in os.listdir(log_directory):
-            if not superpmi_log.startswith("superpmi_Jit") or not superpmi_log.endswith(".log"):
+            if not superpmi_log.startswith("superpmi_config_") or not superpmi_log.endswith(".log"):
                 continue
 
             print("Appending {}".format(superpmi_log))
@@ -193,7 +229,7 @@ def main(main_args):
         if len(failed_runs) > 0:
             final_superpmi_log.write(os.linesep)
             final_superpmi_log.write(os.linesep)
-            final_superpmi_log.write("========Failed runs summary========".format(os.linesep))
+            final_superpmi_log.write("========Failed runs summary========{}".format(os.linesep))
             final_superpmi_log.write(os.linesep.join(failed_runs))
 
     return 0 if len(failed_runs) == 0 else 1
diff --git a/src/coreclr/scripts/superpmi_replay_setup.py b/src/coreclr/scripts/superpmi_replay_setup.py
index b7717da8efaf..d3c62f561cab 100644
--- a/src/coreclr/scripts/superpmi_replay_setup.py
+++ b/src/coreclr/scripts/superpmi_replay_setup.py
@@ -22,8 +22,9 @@
 parser = argparse.ArgumentParser(description="description")
 
 parser.add_argument("-arch", help="Architecture")
-parser.add_argument("-source_directory", help="path to the directory containing binaries")
-parser.add_argument("-product_directory", help="path to the directory containing binaries")
+parser.add_argument("-type", required=True, help="Type of diff (standard, apx)")
+parser.add_argument("-source_directory", required=True, help="Path to the root directory of the dotnet/runtime source tree")
+parser.add_argument("-product_directory", required=True, help="path to the directory containing binaries")
 
 
 def setup_args(args):
@@ -54,6 +55,11 @@ def setup_args(args):
                         lambda product_directory: os.path.isdir(product_directory),
                         "product_directory doesn't exist")
 
+    coreclr_args.verify(args,
+                        "type",
+                        lambda type: type in ["standard", "apx"],
+                        "Invalid type \"{}\"".format)
+
     return coreclr_args
 
 
diff --git a/src/coreclr/tools/CMakeLists.txt b/src/coreclr/tools/CMakeLists.txt
index 48e1466b197a..23eba1bc3a87 100644
--- a/src/coreclr/tools/CMakeLists.txt
+++ b/src/coreclr/tools/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_subdirectory(SOS)
-add_subdirectory(superpmi)
 
-if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)
-  add_subdirectory(StressLogAnalyzer)
+if (NOT CLR_CMAKE_TARGET_BROWSER)
+  add_subdirectory(superpmi)
 endif()
diff --git a/src/coreclr/tools/Common/CommandLineHelpers.cs b/src/coreclr/tools/Common/CommandLineHelpers.cs
index 99c7894817d1..f02b0d417751 100644
--- a/src/coreclr/tools/Common/CommandLineHelpers.cs
+++ b/src/coreclr/tools/Common/CommandLineHelpers.cs
@@ -1,8 +1,9 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.CommandLine.Help;
 using System.Collections.Generic;
+using System.CommandLine.Help;
+using System.CommandLine.Invocation;
 using System.CommandLine.Parsing;
 using System.IO;
 using System.IO.Compression;
@@ -24,11 +25,11 @@ internal static partial class Helpers
     {
         public const string DefaultSystemModule = "System.Private.CoreLib";
 
-        public static Dictionary BuildPathDictionary(IReadOnlyList tokens, bool strict)
+        public static Dictionary BuildPathDictionary(IReadOnlyList tokens, bool strict)
         {
             Dictionary dictionary = new(StringComparer.OrdinalIgnoreCase);
 
-            foreach (CliToken token in tokens)
+            foreach (Token token in tokens)
             {
                 AppendExpandedPaths(dictionary, token.Value, strict);
             }
@@ -36,11 +37,11 @@ public static Dictionary BuildPathDictionary(IReadOnlyList BuildPathList(IReadOnlyList tokens)
+        public static List BuildPathList(IReadOnlyList tokens)
         {
             List paths = new();
             Dictionary dictionary = new(StringComparer.OrdinalIgnoreCase);
-            foreach (CliToken token in tokens)
+            foreach (Token token in tokens)
             {
                 AppendExpandedPaths(dictionary, token.Value, false);
                 foreach (string file in dictionary.Values)
@@ -121,7 +122,7 @@ public static TargetArchitecture GetTargetArchitecture(string token)
             }
         }
 
-        public static CliRootCommand UseVersion(this CliRootCommand command)
+        public static RootCommand UseVersion(this RootCommand command)
         {
             for (int i = 0; i < command.Options.Count; i++)
             {
@@ -135,15 +136,13 @@ public static CliRootCommand UseVersion(this CliRootCommand command)
             return command;
         }
 
-        public static CliRootCommand UseExtendedHelp(this CliRootCommand command, Func>> customizer)
+        public static RootCommand UseExtendedHelp(this RootCommand command, Action customizer)
         {
-            foreach (CliOption option in command.Options)
+            foreach (Option option in command.Options)
             {
                 if (option is HelpOption helpOption)
                 {
-                    HelpBuilder builder = new();
-                    builder.CustomizeLayout(customizer);
-                    helpOption.Action = new HelpAction { Builder = builder };
+                    helpOption.Action = new CustomizedHelpAction(helpOption, customizer);
                     break;
                 }
             }
@@ -215,7 +214,7 @@ public static void MakeReproPackage(string makeReproPath, string outputFilePath,
                 Dictionary outputToReproPackageFileName = new();
 
                 List rspFile = new List();
-                foreach (CliOption option in res.CommandResult.Command.Options)
+                foreach (Option option in res.CommandResult.Command.Options)
                 {
                     OptionResult optionResult = res.GetResult(option);
                     if (optionResult is null || option.Name == "--make-repro-path")
@@ -272,7 +271,7 @@ public static void MakeReproPackage(string makeReproPath, string outputFilePath,
                     }
                 }
 
-                foreach (CliArgument argument in res.CommandResult.Command.Arguments)
+                foreach (Argument argument in res.CommandResult.Command.Arguments)
                 {
                     ArgumentResult argumentResult = res.GetResult(argument);
                     if (argumentResult is null)
@@ -433,5 +432,26 @@ public static bool TryReadResponseFile(string filePath, out IReadOnlyList _customizer;
+
+            public CustomizedHelpAction(HelpOption helpOption, Action customizer)
+            {
+                _helpAction = (HelpAction)helpOption.Action;
+                _customizer = customizer;
+            }
+
+            public override int Invoke(ParseResult parseResult)
+            {
+                int result = _helpAction.Invoke(parseResult);
+
+                _customizer(parseResult);
+
+                return result;
+            }
+        }
     }
 }
diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs
index cecffda8bfe5..9444cdb58aad 100644
--- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs
+++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs
@@ -32,17 +32,6 @@ public ObjectNodeSection(string name, SectionType type, string comdatName)
         public ObjectNodeSection(string name, SectionType type) : this(name, type, null)
         { }
 
-        /// 
-        /// Returns true if the section is a standard one (defined as text, data, or rdata currently)
-        /// 
-        public bool IsStandardSection
-        {
-            get
-            {
-                return this == DataSection || this == ReadOnlyDataSection || this == FoldableReadOnlyDataSection || this == TextSection || this == XDataSection || this == BssSection;
-            }
-        }
-
         public static readonly ObjectNodeSection XDataSection = new ObjectNodeSection("xdata", SectionType.ReadOnly);
         public static readonly ObjectNodeSection DataSection = new ObjectNodeSection("data", SectionType.Writeable);
         public static readonly ObjectNodeSection ReadOnlyDataSection = new ObjectNodeSection("rdata", SectionType.ReadOnly);
diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs
index 52515ac03ff5..a20b7316eb67 100644
--- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs
+++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs
@@ -478,7 +478,7 @@ private static unsafe void PutLoongArch64JIR(uint* pCode, long imm38)
 
             uint pcInstr = *pCode;
 
-            Debug.Assert(pcInstr == 0x1e00000e);  // Must be pcaddu18i R14, 0
+            Debug.Assert(pcInstr == 0x1e000010);  // Must be pcaddu18i t4, 0
 
             long relOff = imm38 & 0x20000;
             long imm = imm38 + relOff;
diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/SortableDependencyNode.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/SortableDependencyNode.cs
index fb40d95fc56a..d9858b093e11 100644
--- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/SortableDependencyNode.cs
+++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/SortableDependencyNode.cs
@@ -13,6 +13,9 @@ namespace ILCompiler.DependencyAnalysis
     public abstract partial class SortableDependencyNode : DependencyNodeCore, ISortableNode
     {
 #if !SUPPORT_JIT
+        // Custom sort order. Used to override the default sorting mechanics.
+        public int CustomSort = int.MaxValue;
+
         /// 
         /// Allows grouping of  instances such that all nodes in a lower phase
         /// will be ordered before nodes in a later phase.
@@ -151,8 +154,6 @@ public int Compare(DependencyNodeCore x1, DependencyNodeCore= -2048) && (offset <= 2047));
diff --git a/src/coreclr/tools/Common/Compiler/DevirtualizationManager.cs b/src/coreclr/tools/Common/Compiler/DevirtualizationManager.cs
index 5574a9f8aa10..2728cb113d3a 100644
--- a/src/coreclr/tools/Common/Compiler/DevirtualizationManager.cs
+++ b/src/coreclr/tools/Common/Compiler/DevirtualizationManager.cs
@@ -202,6 +202,8 @@ protected virtual MethodDesc ResolveVirtualMethod(MethodDesc declMethod, DefType
         }
 
 #if !READYTORUN
+        public virtual bool IsGenericDefinitionMethodTableReflectionVisible(TypeDesc type) => true;
+
         /// 
         /// Gets a value indicating whether it might be possible to obtain a constructed type data structure for the given type
         /// in this compilation (i.e. is it possible to reference a constructed MethodTable symbol for this).
diff --git a/src/coreclr/tools/Common/Compiler/GenericCycleDetection/GraphBuilder.cs b/src/coreclr/tools/Common/Compiler/GenericCycleDetection/GraphBuilder.cs
index dda635a8b8c2..8ac16e3bb3c4 100644
--- a/src/coreclr/tools/Common/Compiler/GenericCycleDetection/GraphBuilder.cs
+++ b/src/coreclr/tools/Common/Compiler/GenericCycleDetection/GraphBuilder.cs
@@ -9,6 +9,8 @@
 using Internal.TypeSystem;
 using Internal.TypeSystem.Ecma;
 
+using Debug = System.Diagnostics.Debug;
+
 namespace ILCompiler
 {
     internal static partial class LazyGenericsSupport
@@ -207,6 +209,30 @@ public GraphBuilder(EcmaModule assembly)
                             }
                         }
                     }
+
+                    if (isGenericType)
+                    {
+                        Instantiation typeContext = default;
+
+                        foreach (FieldDefinitionHandle fieldHandle in typeDefinition.GetFields())
+                        {
+                            try
+                            {
+                                var ecmaField = (EcmaField)assembly.GetObject(fieldHandle);
+
+                                if (typeContext.IsNull)
+                                {
+                                    typeContext = ecmaField.OwningType.Instantiation;
+                                    Debug.Assert(!typeContext.IsNull);
+                                }
+
+                                ProcessTypeReference(ecmaField.FieldType, typeContext, default);
+                            }
+                            catch (TypeSystemException)
+                            {
+                            }
+                        }
+                    }
                 }
                 return;
             }
diff --git a/src/coreclr/tools/Common/Compiler/GenericCycleDetection/ModuleCycleInfo.cs b/src/coreclr/tools/Common/Compiler/GenericCycleDetection/ModuleCycleInfo.cs
index 5d0a4f36cc1d..cc64a7517b96 100644
--- a/src/coreclr/tools/Common/Compiler/GenericCycleDetection/ModuleCycleInfo.cs
+++ b/src/coreclr/tools/Common/Compiler/GenericCycleDetection/ModuleCycleInfo.cs
@@ -142,7 +142,20 @@ private bool IsDeepPossiblyCyclicInstantiation(TypeDesc type, ref int breadthCou
                 {
                     case TypeFlags.Array:
                     case TypeFlags.SzArray:
-                        return IsDeepPossiblyCyclicInstantiation(((ParameterizedType)type).ParameterType, ref breadthCounter, seenTypes);
+                        TypeDesc parameterType = type;
+                        int arrayNesting = 0;
+                        do
+                        {
+                            parameterType = ((ParameterizedType)parameterType).ParameterType;
+                            arrayNesting++;
+                        } while (parameterType.IsArray);
+
+                        if (arrayNesting > _depthCutoff)
+                        {
+                            return true;
+                        }
+
+                        return IsDeepPossiblyCyclicInstantiation(parameterType, ref breadthCounter, seenTypes);
                     default:
                         TypeDesc typeDef = type.GetTypeDefinition();
                         if (type != typeDef)
@@ -223,10 +236,10 @@ public void DetectCycle(TypeSystemEntity owner, TypeSystemEntity referent)
                 if (_depthCutoff < 0)
                     return;
 
-                // Not clear if generic recursion through fields is a thing
-                if (referent is FieldDesc)
+                // Fields don't introduce more genericness than their owning type, so treat as their owning type
+                if (referent is FieldDesc referentField)
                 {
-                    return;
+                    referent = referentField.OwningType;
                 }
 
                 var ownerType = owner as TypeDesc;
diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
index dd3f4ca8cf14..09a4902060ba 100644
--- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
+++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
@@ -48,6 +48,9 @@ public static void AddRuntimeRequiredIsaFlagsToBuilder(InstructionSetSupportBuil
                 case TargetArchitecture.ARM64:
                     Arm64IntrinsicConstants.AddToBuilder(builder, flags);
                     break;
+                case TargetArchitecture.RiscV64:
+                    RiscV64IntrinsicConstants.AddToBuilder(builder, flags);
+                    break;
                 default:
                     Debug.Fail("Probably unimplemented");
                     break;
@@ -58,99 +61,73 @@ public static void AddRuntimeRequiredIsaFlagsToBuilder(InstructionSetSupportBuil
         private static class XArchIntrinsicConstants
         {
             // SSE and SSE2 are baseline ISAs - they're always available
-            public const int Aes = 0x0001;
-            public const int Pclmulqdq = 0x0002;
-            public const int Sse3 = 0x0004;
-            public const int Ssse3 = 0x0008;
-            public const int Sse41 = 0x0010;
-            public const int Sse42 = 0x0020;
-            public const int Popcnt = 0x0040;
-            public const int Avx = 0x0080;
-            public const int Fma = 0x0100;
-            public const int Avx2 = 0x0200;
-            public const int Bmi1 = 0x0400;
-            public const int Bmi2 = 0x0800;
-            public const int Lzcnt = 0x1000;
-            public const int AvxVnni = 0x2000;
-            public const int Movbe = 0x4000;
-            public const int Avx512 = 0x8000;
-            public const int Avx512Vbmi = 0x10000;
-            public const int Serialize = 0x20000;
-            public const int Avx10v1 = 0x40000;
-            public const int Evex = 0x80000;
-            public const int Apx = 0x100000;
-            public const int Vpclmulqdq = 0x200000;
-            public const int Avx10v2 = 0x400000;
-            public const int Gfni = 0x800000;
+            public const int Sse42 = (1 << 0);
+            public const int Avx = (1 << 1);
+            public const int Avx2 = (1 << 2);
+            public const int Avx512 = (1 << 3);
+
+            public const int Avx512v2 = (1 << 4);
+            public const int Avx512v3 = (1 << 5);
+            public const int Avx10v1 = (1 << 6);
+            public const int Avx10v2 = (1 << 7);
+            public const int Apx = (1 << 8);
+
+            public const int Aes = (1 << 9);
+            public const int Avx512Vp2intersect = (1 << 10);
+            public const int AvxIfma = (1 << 11);
+            public const int AvxVnni = (1 << 12);
+            public const int Gfni = (1 << 13);
+            public const int Sha = (1 << 14);
+            public const int Vaes = (1 << 15);
+            public const int WaitPkg = (1 << 16);
+            public const int X86Serialize = (1 << 17);
 
             public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
             {
-                if ((flags & Aes) != 0)
-                    builder.AddSupportedInstructionSet("aes");
-                if ((flags & Pclmulqdq) != 0)
-                    builder.AddSupportedInstructionSet("pclmul");
-                if ((flags & Sse3) != 0)
+                if ((flags & Sse42) != 0)
+                {
                     builder.AddSupportedInstructionSet("sse3");
-                if ((flags & Ssse3) != 0)
                     builder.AddSupportedInstructionSet("ssse3");
-                if ((flags & Sse41) != 0)
                     builder.AddSupportedInstructionSet("sse4.1");
-                if ((flags & Sse42) != 0)
                     builder.AddSupportedInstructionSet("sse4.2");
-                if ((flags & Popcnt) != 0)
                     builder.AddSupportedInstructionSet("popcnt");
+                }
                 if ((flags & Avx) != 0)
                     builder.AddSupportedInstructionSet("avx");
-                if ((flags & Fma) != 0)
-                    builder.AddSupportedInstructionSet("fma");
                 if ((flags & Avx2) != 0)
+                {
                     builder.AddSupportedInstructionSet("avx2");
-                if ((flags & Bmi1) != 0)
                     builder.AddSupportedInstructionSet("bmi");
-                if ((flags & Bmi2) != 0)
                     builder.AddSupportedInstructionSet("bmi2");
-                if ((flags & Lzcnt) != 0)
-                    builder.AddSupportedInstructionSet("lzcnt");
-                if ((flags & AvxVnni) != 0)
-                    builder.AddSupportedInstructionSet("avxvnni");
-                if ((flags & Movbe) != 0)
+                    builder.AddSupportedInstructionSet("fma");
+                    builder.AddSupportedInstructionSet("lznct");
                     builder.AddSupportedInstructionSet("movbe");
-                if ((flags & Avx512) != 0)
-                {
-                    builder.AddSupportedInstructionSet("avx512f");
-                    builder.AddSupportedInstructionSet("avx512f_vl");
-                    builder.AddSupportedInstructionSet("avx512bw");
-                    builder.AddSupportedInstructionSet("avx512bw_vl");
-                    builder.AddSupportedInstructionSet("avx512cd");
-                    builder.AddSupportedInstructionSet("avx512cd_vl");
-                    builder.AddSupportedInstructionSet("avx512dq");
-                    builder.AddSupportedInstructionSet("avx512dq_vl");
                 }
-                if ((flags & Avx512Vbmi) != 0)
-                {
-                    builder.AddSupportedInstructionSet("avx512vbmi");
-                    builder.AddSupportedInstructionSet("avx512vbmi_vl");
-                }
-                if ((flags & Serialize) != 0)
-                    builder.AddSupportedInstructionSet("serialize");
+                if ((flags & Avx512) != 0)
+                    builder.AddSupportedInstructionSet("avx512");
+
+                if ((flags & Avx512v2) != 0)
+                    builder.AddSupportedInstructionSet("avx512v2");
+                if ((flags & Avx512v3) != 0)
+                    builder.AddSupportedInstructionSet("avx512v3");
                 if ((flags & Avx10v1) != 0)
                     builder.AddSupportedInstructionSet("avx10v1");
-                if (((flags & Avx10v1) != 0) && ((flags & Avx512) != 0))
-                    builder.AddSupportedInstructionSet("avx10v1_v512");
-                if ((flags & Evex) != 0)
-                    builder.AddSupportedInstructionSet("evex");
+                if ((flags & Avx10v2) != 0)
+                    builder.AddSupportedInstructionSet("avx10v2");
                 if ((flags & Apx) != 0)
                     builder.AddSupportedInstructionSet("apx");
-                if ((flags & Vpclmulqdq) != 0)
+
+                if ((flags & Aes) != 0)
                 {
-                    builder.AddSupportedInstructionSet("vpclmul");
-                    if ((flags & Avx512) != 0)
-                        builder.AddSupportedInstructionSet("vpclmul_v512");
+                    builder.AddSupportedInstructionSet("aes");
+                    builder.AddSupportedInstructionSet("pclmul");
                 }
-                if ((flags & Avx10v2) != 0)
-                    builder.AddSupportedInstructionSet("avx10v2");
-                if (((flags & Avx10v2) != 0) && ((flags & Avx512) != 0))
-                    builder.AddSupportedInstructionSet("avx10v2_v512");
+                if ((flags & Avx512Vp2intersect) != 0)
+                    builder.AddSupportedInstructionSet("avx512vp2intersect");
+                if ((flags & AvxIfma) != 0)
+                    builder.AddSupportedInstructionSet("avxifma");
+                if ((flags & AvxVnni) != 0)
+                    builder.AddSupportedInstructionSet("avxvnni");
                 if ((flags & Gfni) != 0)
                 {
                     builder.AddSupportedInstructionSet("gfni");
@@ -159,6 +136,22 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
                     if ((flags & Avx512) != 0)
                         builder.AddSupportedInstructionSet("gfni_v512");
                 }
+                if ((flags & Sha) != 0)
+                    builder.AddSupportedInstructionSet("sha");
+                if ((flags & Vaes) != 0)
+                {
+                    builder.AddSupportedInstructionSet("vaes");
+                    builder.AddSupportedInstructionSet("vpclmul");
+                    if ((flags & Avx512) != 0)
+                    {
+                        builder.AddSupportedInstructionSet("vaes_v512");
+                        builder.AddSupportedInstructionSet("vpclmul_v512");
+                    }
+                }
+                if ((flags & WaitPkg) != 0)
+                    builder.AddSupportedInstructionSet("waitpkg");
+                if ((flags & X86Serialize) != 0)
+                    builder.AddSupportedInstructionSet("x86serialize");
             }
 
             public static int FromInstructionSet(InstructionSet instructionSet)
@@ -170,75 +163,83 @@ public static int FromInstructionSet(InstructionSet instructionSet)
                 return instructionSet switch
                 {
                     // Optional ISAs - only available via opt-in or opportunistic light-up
-                    InstructionSet.X64_AES => Aes,
-                    InstructionSet.X64_AES_X64 => Aes,
-                    InstructionSet.X64_PCLMULQDQ => Pclmulqdq,
-                    InstructionSet.X64_PCLMULQDQ_X64 => Pclmulqdq,
-                    InstructionSet.X64_SSE3 => Sse3,
-                    InstructionSet.X64_SSE3_X64 => Sse3,
-                    InstructionSet.X64_SSSE3 => Ssse3,
-                    InstructionSet.X64_SSSE3_X64 => Ssse3,
-                    InstructionSet.X64_SSE41 => Sse41,
-                    InstructionSet.X64_SSE41_X64 => Sse41,
+                    InstructionSet.X64_SSE3 => Sse42,
+                    InstructionSet.X64_SSE3_X64 => Sse42,
+                    InstructionSet.X64_SSSE3 => Sse42,
+                    InstructionSet.X64_SSSE3_X64 => Sse42,
+                    InstructionSet.X64_SSE41 => Sse42,
+                    InstructionSet.X64_SSE41_X64 => Sse42,
                     InstructionSet.X64_SSE42 => Sse42,
                     InstructionSet.X64_SSE42_X64 => Sse42,
-                    InstructionSet.X64_POPCNT => Popcnt,
-                    InstructionSet.X64_POPCNT_X64 => Popcnt,
+                    InstructionSet.X64_POPCNT => Sse42,
+                    InstructionSet.X64_POPCNT_X64 => Sse42,
+
                     InstructionSet.X64_AVX => Avx,
                     InstructionSet.X64_AVX_X64 => Avx,
-                    InstructionSet.X64_FMA => Fma,
-                    InstructionSet.X64_FMA_X64 => Fma,
+
                     InstructionSet.X64_AVX2 => Avx2,
                     InstructionSet.X64_AVX2_X64 => Avx2,
-                    InstructionSet.X64_BMI1 => Bmi1,
-                    InstructionSet.X64_BMI1_X64 => Bmi1,
-                    InstructionSet.X64_BMI2 => Bmi2,
-                    InstructionSet.X64_BMI2_X64 => Bmi2,
-                    InstructionSet.X64_LZCNT => Lzcnt,
-                    InstructionSet.X64_LZCNT_X64 => Lzcnt,
-                    InstructionSet.X64_AVXVNNI => AvxVnni,
-                    InstructionSet.X64_AVXVNNI_X64 => AvxVnni,
-                    InstructionSet.X64_MOVBE => Movbe,
-                    InstructionSet.X64_AVX512F => Avx512,
-                    InstructionSet.X64_AVX512F_X64 => Avx512,
-                    InstructionSet.X64_AVX512F_VL => Avx512,
-                    InstructionSet.X64_AVX512BW => Avx512,
-                    InstructionSet.X64_AVX512BW_X64 => Avx512,
-                    InstructionSet.X64_AVX512BW_VL => Avx512,
-                    InstructionSet.X64_AVX512CD => Avx512,
-                    InstructionSet.X64_AVX512CD_X64 => Avx512,
-                    InstructionSet.X64_AVX512CD_VL => Avx512,
-                    InstructionSet.X64_AVX512DQ => Avx512,
-                    InstructionSet.X64_AVX512DQ_X64 => Avx512,
-                    InstructionSet.X64_AVX512DQ_VL => Avx512,
-                    InstructionSet.X64_AVX512VBMI => Avx512Vbmi,
-                    InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi,
-                    InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi,
-                    InstructionSet.X64_X86Serialize => Serialize,
-                    InstructionSet.X64_X86Serialize_X64 => Serialize,
+                    InstructionSet.X64_BMI1 => Avx2,
+                    InstructionSet.X64_BMI1_X64 => Avx2,
+                    InstructionSet.X64_BMI2 => Avx2,
+                    InstructionSet.X64_BMI2_X64 => Avx2,
+                    InstructionSet.X64_FMA => Avx2,
+                    InstructionSet.X64_FMA_X64 => Avx2,
+                    InstructionSet.X64_LZCNT => Avx2,
+                    InstructionSet.X64_LZCNT_X64 => Avx2,
+                    InstructionSet.X64_MOVBE => Avx2,
+
+                    InstructionSet.X64_AVX512 => Avx512,
+                    InstructionSet.X64_AVX512_X64 => Avx512,
+
+                    InstructionSet.X64_AVX512VBMI => Avx512v2,
+                    InstructionSet.X64_AVX512VBMI_X64 => Avx512v2,
+
+                    InstructionSet.X64_AVX512v3 => Avx512v3,
+                    InstructionSet.X64_AVX512v3_X64 => Avx512v3,
+
                     InstructionSet.X64_AVX10v1 => Avx10v1,
                     InstructionSet.X64_AVX10v1_X64 => Avx10v1,
-                    InstructionSet.X64_AVX10v1_V512 => (Avx10v1 | Avx512),
-                    InstructionSet.X64_AVX10v1_V512_X64 => (Avx10v1 | Avx512),
-                    InstructionSet.X64_EVEX => Evex,
-                    InstructionSet.X64_APX => Apx,
-                    InstructionSet.X64_PCLMULQDQ_V256 => Vpclmulqdq,
-                    InstructionSet.X64_PCLMULQDQ_V512 => (Vpclmulqdq | Avx512),
+
                     InstructionSet.X64_AVX10v2 => Avx10v2,
                     InstructionSet.X64_AVX10v2_X64 => Avx10v2,
-                    InstructionSet.X64_AVX10v2_V512 => (Avx10v2 | Avx512),
-                    InstructionSet.X64_AVX10v2_V512_X64 => (Avx10v2 | Avx512),
+
+                    InstructionSet.X64_APX => Apx,
+
+                    InstructionSet.X64_AES => Aes,
+                    InstructionSet.X64_AES_X64 => Aes,
+                    InstructionSet.X64_AES_V256 => (Vaes | Avx),
+                    InstructionSet.X64_AES_V512 => (Vaes | Avx512),
+
+                    InstructionSet.X64_PCLMULQDQ => Aes,
+                    InstructionSet.X64_PCLMULQDQ_X64 => Aes,
+                    InstructionSet.X64_PCLMULQDQ_V256 => (Vaes | Avx),
+                    InstructionSet.X64_PCLMULQDQ_V512 => (Vaes | Avx512),
+
+                    InstructionSet.X64_AVX512VP2INTERSECT => Avx512Vp2intersect,
+                    InstructionSet.X64_AVX512VP2INTERSECT_X64 => Avx512Vp2intersect,
+
+                    InstructionSet.X64_AVXIFMA => AvxIfma,
+                    InstructionSet.X64_AVXIFMA_X64 => AvxIfma,
+
+                    InstructionSet.X64_AVXVNNI => AvxVnni,
+                    InstructionSet.X64_AVXVNNI_X64 => AvxVnni,
+
                     InstructionSet.X64_GFNI => Gfni,
                     InstructionSet.X64_GFNI_X64 => Gfni,
                     InstructionSet.X64_GFNI_V256 => (Gfni | Avx),
                     InstructionSet.X64_GFNI_V512 => (Gfni | Avx512),
 
-                    // Baseline ISAs - they're always available
-                    InstructionSet.X64_SSE => 0,
-                    InstructionSet.X64_SSE_X64 => 0,
-                    InstructionSet.X64_SSE2 => 0,
-                    InstructionSet.X64_SSE2_X64 => 0,
+                    InstructionSet.X64_SHA => Sha,
+                    InstructionSet.X64_SHA_X64 => Sha,
+
+                    InstructionSet.X64_WAITPKG => WaitPkg,
+                    InstructionSet.X64_WAITPKG_X64 => WaitPkg,
 
+                    InstructionSet.X64_X86Serialize => X86Serialize,
+                    InstructionSet.X64_X86Serialize_X64 => X86Serialize,
+
+                    // Baseline ISAs - they're always available
                     InstructionSet.X64_X86Base => 0,
                     InstructionSet.X64_X86Base_X64 => 0,
 
@@ -255,22 +256,20 @@ public static int FromInstructionSet(InstructionSet instructionSet)
         // Keep these enumerations in sync with cpufeatures.h in the minipal.
         private static class Arm64IntrinsicConstants
         {
-            public const int AdvSimd = 0x0001;
-            public const int Aes = 0x0002;
-            public const int Crc32 = 0x0004;
-            public const int Dp = 0x0008;
-            public const int Rdm = 0x0010;
-            public const int Sha1 = 0x0020;
-            public const int Sha256 = 0x0040;
-            public const int Atomics = 0x0080;
-            public const int Rcpc = 0x0100;
-            public const int Rcpc2 = 0x0200;
-            public const int Sve = 0x0400;
+            public const int Aes = (1 << 0);
+            public const int Crc32 = (1 << 1);
+            public const int Dp = (1 << 2);
+            public const int Rdm = (1 << 3);
+            public const int Sha1 = (1 << 4);
+            public const int Sha256 = (1 << 5);
+            public const int Atomics = (1 << 6);
+            public const int Rcpc = (1 << 7);
+            public const int Rcpc2 = (1 << 8);
+            public const int Sve = (1 << 9);
+            public const int Sve2 = (1 << 10);
 
             public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
             {
-                if ((flags & AdvSimd) != 0)
-                    builder.AddSupportedInstructionSet("neon");
                 if ((flags & Aes) != 0)
                     builder.AddSupportedInstructionSet("aes");
                 if ((flags & Crc32) != 0)
@@ -291,6 +290,8 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
                     builder.AddSupportedInstructionSet("rcpc2");
                 if ((flags & Sve) != 0)
                     builder.AddSupportedInstructionSet("sve");
+                if ((flags & Sve2) != 0)
+                    builder.AddSupportedInstructionSet("sve2");
             }
 
             public static int FromInstructionSet(InstructionSet instructionSet)
@@ -301,8 +302,8 @@ public static int FromInstructionSet(InstructionSet instructionSet)
                     // Baseline ISAs - they're always available
                     InstructionSet.ARM64_ArmBase => 0,
                     InstructionSet.ARM64_ArmBase_Arm64 => 0,
-                    InstructionSet.ARM64_AdvSimd => AdvSimd,
-                    InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd,
+                    InstructionSet.ARM64_AdvSimd => 0,
+                    InstructionSet.ARM64_AdvSimd_Arm64 => 0,
 
                     // Optional ISAs - only available via opt-in or opportunistic light-up
                     InstructionSet.ARM64_Aes => Aes,
@@ -322,13 +323,45 @@ public static int FromInstructionSet(InstructionSet instructionSet)
                     InstructionSet.ARM64_Rcpc2 => Rcpc2,
                     InstructionSet.ARM64_Sve => Sve,
                     InstructionSet.ARM64_Sve_Arm64 => Sve,
+                    InstructionSet.ARM64_Sve2 => Sve2,
+                    InstructionSet.ARM64_Sve2_Arm64 => Sve2,
 
                     // Vector Sizes
-                    InstructionSet.ARM64_VectorT128 => AdvSimd,
+                    InstructionSet.ARM64_VectorT128 => 0,
 
                     _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString())
                 };
             }
         }
+
+        // Keep these enumerations in sync with cpufeatures.h in the minipal.
+        private static class RiscV64IntrinsicConstants
+        {
+            public const int Zba = (1 << 0);
+            public const int Zbb = (1 << 1);
+
+            public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
+            {
+                if ((flags & Zba) != 0)
+                    builder.AddSupportedInstructionSet("zba");
+                if ((flags & Zbb) != 0)
+                    builder.AddSupportedInstructionSet("zbb");
+            }
+
+            public static int FromInstructionSet(InstructionSet instructionSet)
+            {
+                return instructionSet switch
+                {
+                    // Baseline ISAs - they're always available
+                    InstructionSet.RiscV64_RiscV64Base => 0,
+
+                    // Optional ISAs - only available via opt-in or opportunistic light-up
+                    InstructionSet.RiscV64_Zba => Zba,
+                    InstructionSet.RiscV64_Zbb => Zbb,
+
+                    _ => throw new NotSupportedException(((InstructionSet_RiscV64)instructionSet).ToString())
+                };
+            }
+        }
     }
 }
diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
index 57c62c612b3e..6c7b1fd7d628 100644
--- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
+++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
@@ -11,12 +11,6 @@
 
 namespace ILCompiler
 {
-    [Flags]
-    public enum InstructionSetSupportFlags
-    {
-        Vector512Throttling = 0x1,
-    }
-
     public class InstructionSetSupport
     {
         private readonly TargetArchitecture _targetArchitecture;
@@ -24,21 +18,19 @@ public class InstructionSetSupport
         private readonly InstructionSetFlags _supportedInstructionSets;
         private readonly InstructionSetFlags _unsupportedInstructionSets;
         private readonly InstructionSetFlags _nonSpecifiableInstructionSets;
-        private readonly InstructionSetSupportFlags _flags;
 
         public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, TargetArchitecture architecture) :
             this(supportedInstructionSets, unsupportedInstructionSets, supportedInstructionSets, default(InstructionSetFlags), architecture)
         {
         }
 
-        public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, InstructionSetFlags optimisticInstructionSets, InstructionSetFlags nonSpecifiableInstructionSets, TargetArchitecture architecture, InstructionSetSupportFlags flags = 0)
+        public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, InstructionSetFlags optimisticInstructionSets, InstructionSetFlags nonSpecifiableInstructionSets, TargetArchitecture architecture)
         {
             _supportedInstructionSets = supportedInstructionSets;
             _unsupportedInstructionSets = unsupportedInstructionSets;
             _optimisticInstructionSets = optimisticInstructionSets;
             _targetArchitecture = architecture;
             _nonSpecifiableInstructionSets = nonSpecifiableInstructionSets;
-            _flags = flags;
         }
 
         public bool IsInstructionSetSupported(InstructionSet instructionSet)
@@ -63,8 +55,6 @@ public bool IsInstructionSetExplicitlyUnsupported(InstructionSet instructionSet)
 
         public TargetArchitecture Architecture => _targetArchitecture;
 
-        public InstructionSetSupportFlags Flags => _flags;
-
         public static string GetHardwareIntrinsicId(TargetArchitecture architecture, TypeDesc potentialTypeDesc)
         {
             if (!potentialTypeDesc.IsIntrinsic || !(potentialTypeDesc is MetadataType potentialType))
@@ -125,10 +115,13 @@ public SimdVectorLength GetVectorTSimdVector()
                 Debug.Assert(InstructionSet.X64_VectorT256 == InstructionSet.X86_VectorT256);
                 Debug.Assert(InstructionSet.X64_VectorT512 == InstructionSet.X86_VectorT512);
 
-                // TODO-XArch: Add support for 512-bit Vector
-                Debug.Assert(!IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT512));
-
-                if (IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT256))
+                if (IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT512))
+                {
+                    Debug.Assert(!IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT128));
+                    Debug.Assert(!IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT256));
+                    return SimdVectorLength.Vector512Bit;
+                }
+                else if (IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT256))
                 {
                     Debug.Assert(!IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT128));
                     return SimdVectorLength.Vector256Bit;
@@ -211,7 +204,10 @@ private static Dictionary ComputeInstructSetSupportForAr
             {
                 // Only instruction sets with associated R2R enum values are specifiable
                 if (instructionSet.Specifiable)
-                    support.Add(instructionSet.Name, instructionSet.InstructionSet);
+                {
+                    _ = support.TryAdd(instructionSet.Name, instructionSet.InstructionSet);
+                    Debug.Assert(support[instructionSet.Name] == instructionSet.InstructionSet);
+                }
             }
 
             return support;
@@ -336,44 +332,11 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth,
             if ((_architecture == TargetArchitecture.X86) || (_architecture == TargetArchitecture.ARM))
                 unsupportedInstructionSets.Set64BitInstructionSetVariantsUnconditionally(_architecture);
 
-            // While it's possible to enable individual AVX-512 ISA's, it is not
-            // optimal to do so, since they aren't totally functional this way,
-            // plus it is extremely rare to encounter hardware that doesn't support
-            // all of them. So, here we ensure that we are enabling all the ISA's
-            // if one is specified in the Crossgen2 or ILC command-lines.
-            //
-            // For more information, check this Github comment:
-            // https://github.com/dotnet/runtime/issues/106450#issuecomment-2299504035
-
             if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx512")))
             {
-                // We can simply try adding all of the AVX-512 ISA's here,
-                // since SortedSet just ignores the value if it is already present.
-
-                _supportedInstructionSets.Add("avx512f");
-                _supportedInstructionSets.Add("avx512f_vl");
-                _supportedInstructionSets.Add("avx512bw");
-                _supportedInstructionSets.Add("avx512bw_vl");
-                _supportedInstructionSets.Add("avx512cd");
-                _supportedInstructionSets.Add("avx512cd_vl");
-                _supportedInstructionSets.Add("avx512dq");
-                _supportedInstructionSets.Add("avx512dq_vl");
-
-                // If AVX-512VBMI is specified, then we have to include its VL
-                // counterpart as well.
-
-                if (_supportedInstructionSets.Contains("avx512vbmi"))
-                    _supportedInstructionSets.Add("avx512vbmi_vl");
-
                 // These ISAs should automatically extend to 512-bit if
                 // AVX-512 is enabled.
 
-                if (_supportedInstructionSets.Contains("avx10v1"))
-                    _supportedInstructionSets.Add("avx10v1_v512");
-
-                if (_supportedInstructionSets.Contains("avx10v2"))
-                    _supportedInstructionSets.Add("avx10v2_v512");
-
                 if (_supportedInstructionSets.Contains("gfni"))
                     _supportedInstructionSets.Add("gfni_v512");
 
@@ -430,9 +393,9 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth,
                 case TargetArchitecture.X64:
                 case TargetArchitecture.X86:
                 {
-                    Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2);
+                    Debug.Assert(InstructionSet.X86_X86Base == InstructionSet.X64_X86Base);
                     Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2);
-                    Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F);
+                    Debug.Assert(InstructionSet.X86_AVX512 == InstructionSet.X64_AVX512);
 
                     Debug.Assert(InstructionSet.X86_VectorT128 == InstructionSet.X64_VectorT128);
                     Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256);
@@ -441,19 +404,19 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth,
                     // We only want one size supported for Vector and we want the other sizes explicitly
                     // unsupported to ensure we throw away the given methods if runtime picks a larger size
 
-                    Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.X86_SSE2));
+                    Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.X86_X86Base));
                     Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128));
                     supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128);
 
-                    if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2))
+                    if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX512) && (maxVectorTBitWidth >= 512))
                     {
-                        if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))
-                        {
-                            supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128);
-                            supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256);
-                        }
-
-                        // TODO-XArch: Add support for 512-bit Vector
+                        supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128);
+                        supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512);
+                    }
+                    else if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2) && (maxVectorTBitWidth is 0 or >= 256))
+                    {
+                        supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128);
+                        supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256);
                     }
                     break;
                 }
diff --git a/src/coreclr/tools/Common/Compiler/TypeExtensions.cs b/src/coreclr/tools/Common/Compiler/TypeExtensions.cs
index f16b643e0cbb..ac69f6872a6b 100644
--- a/src/coreclr/tools/Common/Compiler/TypeExtensions.cs
+++ b/src/coreclr/tools/Common/Compiler/TypeExtensions.cs
@@ -104,32 +104,6 @@ public static bool IsCanonicalDefinitionType(this TypeDesc type, CanonicalFormKi
             return type.Context.IsCanonicalDefinitionType(type, kind);
         }
 
-        /// 
-        /// Gets the value of the field ordinal. Ordinals are computed by also including static fields, but excluding
-        /// literal fields and fields with RVAs.
-        /// 
-        public static int GetFieldOrdinal(this FieldDesc inputField)
-        {
-            // Make sure we are asking the question for a valid instance or static field
-            Debug.Assert(!inputField.HasRva && !inputField.IsLiteral);
-
-            int fieldOrdinal = 0;
-            foreach (FieldDesc field in inputField.OwningType.GetFields())
-            {
-                // If this field does not contribute to layout, skip
-                if (field.HasRva || field.IsLiteral)
-                    continue;
-
-                if (field == inputField)
-                    return fieldOrdinal;
-
-                fieldOrdinal++;
-            }
-
-            Debug.Assert(false);
-            return -1;
-        }
-
         /// 
         /// Determine if a type has a generic depth greater than a given value
         /// 
diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs
index 14940fc74db9..4e55eaab2afb 100644
--- a/src/coreclr/tools/Common/InstructionSetHelpers.cs
+++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs
@@ -20,12 +20,11 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
             string mustNotBeMessage, string invalidImplicationMessage, Logger logger, bool optimizingForSize = false)
         {
             InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture);
-            InstructionSetSupportFlags flags = 0;
 
             // Ready to run images are built with certain instruction set baselines
             if ((targetArchitecture == TargetArchitecture.X86) || (targetArchitecture == TargetArchitecture.X64))
             {
-                instructionSetSupportBuilder.AddSupportedInstructionSet("sse2"); // Lower baselines included by implication
+                instructionSetSupportBuilder.AddSupportedInstructionSet("base");
             }
             else if (targetArchitecture == TargetArchitecture.ARM64)
             {
@@ -36,7 +35,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
                 }
                 else
                 {
-                    instructionSetSupportBuilder.AddSupportedInstructionSet("neon"); // Lower baselines included by implication
+                    instructionSetSupportBuilder.AddSupportedInstructionSet("neon");
                 }
             }
 
@@ -45,6 +44,8 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
             // compile both branches of IsSupported checks.
             bool allowOptimistic = !optimizingForSize;
 
+            bool throttleAvx512 = false;
+
             if (instructionSet == "native")
             {
                 // We're compiling for a specific chip
@@ -92,7 +93,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
                                     // * Cascade Lake
                                     // * Cooper Lake
 
-                                    flags |= InstructionSetSupportFlags.Vector512Throttling;
+                                    throttleAvx512 = true;
                                 }
                             }
                             else if (extendedModel == 0x06)
@@ -101,13 +102,13 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
                                 {
                                     // * Cannon Lake
 
-                                    flags |= InstructionSetSupportFlags.Vector512Throttling;
+                                    throttleAvx512 = true;
                                 }
                             }
                         }
                     }
 
-                    if ((flags & InstructionSetSupportFlags.Vector512Throttling) != 0 && logger.IsVerbose)
+                    if (throttleAvx512 && logger.IsVerbose)
                         logger.LogMessage("Vector512 is throttled");
                 }
 
@@ -180,62 +181,63 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
             InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(instructionSetSupportBuilder);
 
             // Optimistically assume some instruction sets are present.
-            if (allowOptimistic && (targetArchitecture == TargetArchitecture.X86 || targetArchitecture == TargetArchitecture.X64))
+            if (allowOptimistic && targetArchitecture is TargetArchitecture.X86 or TargetArchitecture.X64)
             {
                 // We set these hardware features as opportunistically enabled as most of hardware in the wild supports them.
                 // Note that we do not indicate support for AVX, or any other instruction set which uses the VEX encodings as
                 // the presence of those makes otherwise acceptable code be unusable on hardware which does not support VEX encodings.
                 //
                 optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sse4.2"); // Lower SSE versions included by implication
+                optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("popcnt");
                 optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes");
                 optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("pclmul");
-                optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("movbe");
-                optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("popcnt");
-                optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lzcnt");
-                optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("serialize");
                 optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni");
+                optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha");
+                optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("waitpkg");
+                optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("x86serialize");
 
                 // If AVX was enabled, we can opportunistically enable instruction sets which use the VEX encodings
                 Debug.Assert(InstructionSet.X64_AVX == InstructionSet.X86_AVX);
+                Debug.Assert(InstructionSet.X64_AVX2 == InstructionSet.X86_AVX2);
+
                 if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX))
                 {
-                    // TODO: Enable optimistic usage of AVX2 once we validate it doesn't break Vector usage
-                    // optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2");
-
-                    if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX2))
-                    {
-                        optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni");
-                    }
-
-                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("fma");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2");
-                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("fma");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lzcnt");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("movbe");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxifma");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v256");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vaes");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul");
+
+                    // If AVX2 is not in the supported set, we need to restrict the optimistic Vector size, because
+                    // 256-bit Vector cannot be fully accelerated based on AVX2 being in the optimistic set only.
+
+                    if (!supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX2))
+                    {
+                        maxVectorTBitWidth = 128;
+                    }
                 }
 
-                Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F);
-                if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F))
+                Debug.Assert(InstructionSet.X64_AVX512 == InstructionSet.X86_AVX512);
+                if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512))
                 {
-                    Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F_VL));
-                    Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW));
-                    Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW_VL));
-                    Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD));
-                    Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD_VL));
-                    Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ));
-                    Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL));
-
-                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi");
-                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi_vl");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512v2");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512v3");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1");
-                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1_v512");
-                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul_v512");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2");
-                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2_v512");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vp2intersect");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vaes_v512");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul_v512");
+
                 }
             }
-            else if (targetArchitecture == TargetArchitecture.ARM64)
+            else if (allowOptimistic && targetArchitecture is TargetArchitecture.ARM64)
             {
                 optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes");
                 optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("crc");
@@ -252,12 +254,42 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
             optimisticInstructionSet.Remove(unsupportedInstructionSet);
             optimisticInstructionSet.Add(supportedInstructionSet);
 
+            if (throttleAvx512)
+            {
+                Debug.Assert(InstructionSet.X86_AVX512 == InstructionSet.X64_AVX512);
+                if (supportedInstructionSet.HasInstructionSet(InstructionSet.X86_AVX512))
+                {
+                    Debug.Assert(InstructionSet.X86_Vector256 == InstructionSet.X64_Vector256);
+                    Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256);
+                    Debug.Assert(InstructionSet.X86_VectorT512 == InstructionSet.X64_VectorT512);
+
+                    // AVX-512 is supported, but we are compiling specifically for hardware that has a performance penalty for
+                    // using 512-bit ops. We want to tell JIT not to consider Vector512 to be hardware accelerated, which we do
+                    // by passing a PreferredVectorBitWidth value, in the form of a virtual vector ISA of the appropriate size.
+                    //
+                    // If we are downgrading the max accelerated vector size, we also need to downgrade Vector size.
+
+                    supportedInstructionSet.AddInstructionSet(InstructionSet.X86_Vector256);
+
+                    if (supportedInstructionSet.HasInstructionSet(InstructionSet.X86_VectorT512))
+                    {
+                        supportedInstructionSet.RemoveInstructionSet(InstructionSet.X86_VectorT512);
+                        supportedInstructionSet.AddInstructionSet(InstructionSet.X86_VectorT256);
+                    }
+
+                    if (optimisticInstructionSet.HasInstructionSet(InstructionSet.X86_VectorT512))
+                    {
+                        optimisticInstructionSet.RemoveInstructionSet(InstructionSet.X86_VectorT512);
+                        optimisticInstructionSet.AddInstructionSet(InstructionSet.X86_VectorT256);
+                    }
+                }
+            }
+
             return new InstructionSetSupport(supportedInstructionSet,
                 unsupportedInstructionSet,
                 optimisticInstructionSet,
                 InstructionSetSupportBuilder.GetNonSpecifiableInstructionSetsForArch(targetArchitecture),
-                targetArchitecture,
-                flags);
+                targetArchitecture);
         }
     }
 }
diff --git a/src/coreclr/tools/Common/Internal/NativeFormat/NativeFormat.cs b/src/coreclr/tools/Common/Internal/NativeFormat/NativeFormat.cs
index 19a75854b9bb..dbe629602172 100644
--- a/src/coreclr/tools/Common/Internal/NativeFormat/NativeFormat.cs
+++ b/src/coreclr/tools/Common/Internal/NativeFormat/NativeFormat.cs
@@ -50,7 +50,7 @@ enum BagElementKind : uint
         ThreadStaticDesc            = 0x48,
         ThreadStaticIndex           = 0x49,
         ThreadStaticOffset          = 0x4a,
-        FieldLayout                 = 0x4b,
+        // unused                   = 0x4b,
         // unused                   = 0x4c,
         // unused                   = 0x4d,
         ClassConstructorPointer     = 0x4e,
@@ -167,18 +167,6 @@ enum GenericContextKind : uint
         NeedsUSGContext             = 0x08,
     };
 
-#if NATIVEFORMAT_PUBLICWRITER
-    public
-#else
-    internal
-#endif
-    enum CallingConventionConverterKind : uint
-    {
-        NoInstantiatingParam        = 0x00,   // The calling convention interpreter can assume that the calling convention of the target method has no instantiating parameter
-        HasInstantiatingParam       = 0x01,   // The calling convention interpreter can assume that the calling convention of the target method has an instantiating parameter
-        MaybeInstantiatingParam     = 0x02,   // The calling convention interpreter can assume that the calling convention of the target method may be a fat function pointer
-    }
-
     [Flags]
 #if NATIVEFORMAT_PUBLICWRITER
     public
@@ -190,7 +178,6 @@ enum MethodFlags : uint
         HasInstantiation            = 0x1,
         IsUnboxingStub              = 0x2,
         HasFunctionPointer          = 0x4,
-        FunctionPointerIsUSG        = 0x8,
     };
 
     [Flags]
@@ -205,17 +192,4 @@ enum MethodCallingConvention : uint
         Static                      = 0x2,
         Unmanaged                   = 0x4,
     };
-
-#if NATIVEFORMAT_PUBLICWRITER
-    public
-#else
-    internal
-#endif
-    enum FieldStorage : uint
-    {
-        Instance                    = 0x0,
-        NonGCStatic                 = 0x1,
-        GCStatic                    = 0x2,
-        TLSStatic                   = 0x3,
-    }
 }
diff --git a/src/coreclr/tools/Common/Internal/NativeFormat/NativeFormatWriter.cs b/src/coreclr/tools/Common/Internal/NativeFormat/NativeFormatWriter.cs
index 0a347d7ab3c5..f2f0f33a8c6f 100644
--- a/src/coreclr/tools/Common/Internal/NativeFormat/NativeFormatWriter.cs
+++ b/src/coreclr/tools/Common/Internal/NativeFormat/NativeFormatWriter.cs
@@ -428,14 +428,6 @@ public Vertex GetTuple(Vertex item1, Vertex item2, Vertex item3)
             return Unify(vertex);
         }
 
-        public Vertex GetMethodNameAndSigSignature(string name, Vertex signature)
-        {
-            MethodNameAndSigSignature sig = new MethodNameAndSigSignature(
-                GetStringConstant(name),
-                GetRelativeOffsetSignature(signature));
-            return Unify(sig);
-        }
-
         public Vertex GetStringConstant(string value)
         {
             StringConstant vertex = new StringConstant(value);
@@ -460,15 +452,9 @@ public Vertex GetExternalTypeSignature(uint externalTypeId)
             return Unify(sig);
         }
 
-        public Vertex GetMethodSignature(uint flags, uint fptrReferenceId, Vertex containingType, Vertex methodNameAndSig, Vertex[] args)
+        public Vertex GetMethodSignature(uint flags, uint fptrReferenceId, Vertex containingType, int token, Vertex[] args)
         {
-            MethodSignature sig = new MethodSignature(flags, fptrReferenceId, containingType, methodNameAndSig, args);
-            return Unify(sig);
-        }
-
-        public Vertex GetFieldSignature(Vertex containingType, string name)
-        {
-            FieldSignature sig = new FieldSignature(containingType, name);
+            MethodSignature sig = new MethodSignature(flags, fptrReferenceId, containingType, token, args);
             return Unify(sig);
         }
 
@@ -765,43 +751,6 @@ public override int GetHashCode()
         }
     }
 
-#if NATIVEFORMAT_PUBLICWRITER
-    public
-#else
-    internal
-#endif
-    class MethodNameAndSigSignature : Vertex
-    {
-        private Vertex _methodName;
-        private Vertex _signature;
-
-        public MethodNameAndSigSignature(Vertex methodName, Vertex signature)
-        {
-            _methodName = methodName;
-            _signature = signature;
-        }
-
-        internal override void Save(NativeWriter writer)
-        {
-            _methodName.Save(writer);
-            _signature.Save(writer);
-        }
-
-        public override int GetHashCode()
-        {
-            return 509 * 197 + _methodName.GetHashCode() + 647 * _signature.GetHashCode();
-        }
-
-        public override bool Equals(object obj)
-        {
-            MethodNameAndSigSignature other = obj as MethodNameAndSigSignature;
-            if (other == null)
-                return false;
-
-            return Equals(_methodName, other._methodName) && Equals(_signature, other._signature);
-        }
-    }
-
 #if NATIVEFORMAT_PUBLICWRITER
     public
 #else
@@ -958,15 +907,15 @@ class MethodSignature : Vertex
         private uint _flags;
         private uint _fptrReferenceId;
         private Vertex _containingType;
-        private Vertex _methodNameAndSig;
+        private int _token;
         private Vertex[] _args;
 
-        public MethodSignature(uint flags, uint fptrReferenceId, Vertex containingType, Vertex methodNameAndSig, Vertex[] args)
+        public MethodSignature(uint flags, uint fptrReferenceId, Vertex containingType, int token, Vertex[] args)
         {
             _flags = flags;
             _fptrReferenceId = fptrReferenceId;
             _containingType = containingType;
-            _methodNameAndSig = methodNameAndSig;
+            _token = token;
             _args = args;
 
             if ((flags & (uint)MethodFlags.HasInstantiation) != 0)
@@ -981,7 +930,7 @@ internal override void Save(NativeWriter writer)
             if ((_flags & (uint)MethodFlags.HasFunctionPointer) != 0)
                 writer.WriteUnsigned(_fptrReferenceId);
             _containingType.Save(writer);
-            _methodNameAndSig.Save(writer);
+            writer.WriteUnsigned((uint)_token);
             if ((_flags & (uint)MethodFlags.HasInstantiation) != 0)
             {
                 writer.WriteUnsigned((uint)_args.Length);
@@ -998,7 +947,7 @@ public override int GetHashCode()
             hash += (hash << 5) + _containingType.GetHashCode();
             for (uint iArg = 0; _args != null && iArg < _args.Length; iArg++)
                 hash += (hash << 5) + _args[iArg].GetHashCode();
-            hash += (hash << 5) + _methodNameAndSig.GetHashCode();
+            hash += (hash << 5) + _token.GetHashCode();
             return hash;
         }
 
@@ -1011,8 +960,8 @@ public override bool Equals(object obj)
             if (!(
                 _flags == other._flags &&
                 _fptrReferenceId == other._fptrReferenceId &&
-                Equals(_containingType, other._containingType) &&
-                Equals(_methodNameAndSig, other._methodNameAndSig)))
+                _token == other._token &&
+                Equals(_containingType, other._containingType)))
             {
                 return false;
             }
@@ -1032,53 +981,6 @@ public override bool Equals(object obj)
         }
     }
 
-#if NATIVEFORMAT_PUBLICWRITER
-    public
-#else
-    internal
-#endif
-    class FieldSignature : Vertex
-    {
-        private Vertex _containingType;
-        private string _name;
-
-        public FieldSignature(Vertex containingType, string name)
-        {
-            _containingType = containingType;
-            _name = name;
-        }
-
-        internal override void Save(NativeWriter writer)
-        {
-            _containingType.Save(writer);
-            writer.WriteString(_name);
-        }
-
-        public override int GetHashCode()
-        {
-            int hash = 113 + 97 * _containingType.GetHashCode();
-            foreach (char c in _name)
-                hash += (hash << 5) + c * 19;
-
-            return hash;
-        }
-
-        public override bool Equals(object obj)
-        {
-            var other = obj as FieldSignature;
-            if (other == null)
-                return false;
-
-            if (!Equals(other._containingType, _containingType))
-                return false;
-
-            if (!Equals(other._name, _name))
-                return false;
-
-            return true;
-        }
-    }
-
 #if NATIVEFORMAT_PUBLICWRITER
     public
 #else
diff --git a/src/coreclr/tools/Common/Internal/Runtime/CanonTypeKind.cs b/src/coreclr/tools/Common/Internal/Runtime/CanonTypeKind.cs
deleted file mode 100644
index 661725af6939..000000000000
--- a/src/coreclr/tools/Common/Internal/Runtime/CanonTypeKind.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-namespace Internal.Runtime
-{
-    public enum CanonTypeKind
-    {
-        NormalCanon,
-        UniversalCanon,
-    }
-}
diff --git a/src/coreclr/tools/Common/Internal/Runtime/MappingTableFlags.cs b/src/coreclr/tools/Common/Internal/Runtime/MappingTableFlags.cs
index 189be63698bc..b85d3c48d0cd 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/MappingTableFlags.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/MappingTableFlags.cs
@@ -22,11 +22,11 @@ public enum InvokeTableFlags : uint
     {
         HasVirtualInvoke = 0x00000001,
         IsGenericMethod = 0x00000002,
-        HasMetadataHandle = 0x00000004,
+        // = 0x00000004,
         IsDefaultConstructor = 0x00000008,
         RequiresInstArg = 0x00000010,
         HasEntrypoint = 0x00000020,
-        IsUniversalCanonicalEntry = 0x00000040,
+        // = 0x00000040,
         NeedsParameterInterpretation = 0x00000080,
         CallingConventionDefault = 0x00000000,
         Cdecl = 0x00001000,
@@ -47,8 +47,8 @@ public enum FieldTableFlags : uint
 
         StorageClass = 0x03,
 
-        IsUniversalCanonicalEntry = 0x04,
-        HasMetadataHandle = 0x08,
+        // = 0x04,
+        // = 0x08,
         FieldOffsetEncodedDirectly = 0x20,
         IsAnyCanonicalEntry = 0x40,
         IsInitOnly = 0x80
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
index 10ed724001d5..e8fdf0308934 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
@@ -15,7 +15,7 @@ internal struct ReadyToRunHeaderConstants
     {
         public const uint Signature = 0x00525452; // 'RTR'
 
-        public const ushort CurrentMajorVersion = 12;
+        public const ushort CurrentMajorVersion = 14;
         public const ushort CurrentMinorVersion = 0;
     }
 #if READYTORUN
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
index 8bcbe0667812..0c697c610241 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
@@ -140,7 +140,7 @@ public enum ReadyToRunFixupKind
         VirtualEntry = 0x16,                // For invoking a virtual method
         VirtualEntry_DefToken = 0x17,       // Smaller version of VirtualEntry - method is def token
         VirtualEntry_RefToken = 0x18,       // Smaller version of VirtualEntry - method is ref token
-        VirtualEntry_Slot = 0x19,           // Smaller version of VirtualEntry - type & slot
+        VirtualEntry_Slot = 0x19,           // Smaller version of VirtualEntry - type & slot - OBSOLETE, not currently used, and hasn't ever been used in R2R codegen since crossgen2 was introduced, and may not have ever been used.
 
         Helper = 0x1A,                      // Helper
         StringHandle = 0x1B,                // String handle
@@ -312,6 +312,8 @@ public enum ReadyToRunHelper
         Dbl2UIntOvf                 = 0xD5,
         Dbl2ULng                    = 0xD6,
         Dbl2ULngOvf                 = 0xD7,
+        Lng2Flt                     = 0xD8,
+        ULng2Flt                    = 0xD9,
 
         // Floating point ops
         DblRem                      = 0xE0,
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
index d486d9338bf7..47837ff594ce 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
@@ -57,7 +57,7 @@ public enum ReadyToRunInstructionSet
         Sve=43,
         Avx10v1=44,
         Avx10v1_V512=46,
-        EVEX=47,
+        Evex=47,
         Apx=48,
         Pclmulqdq_V256=49,
         Pclmulqdq_V512=50,
@@ -66,6 +66,30 @@ public enum ReadyToRunInstructionSet
         Gfni=53,
         Gfni_V256=54,
         Gfni_V512=55,
+        RiscV64Base=56,
+        Zba=57,
+        Zbb=58,
+        Sve2=59,
+        Aes_V256=64,
+        Aes_V512=65,
+        AvxIfma=66,
+        F16C=67,
+        Sha=68,
+        WaitPkg=69,
+        Avx512Bitalg=70,
+        Avx512Bitalg_VL=71,
+        Avx512Bf16=72,
+        Avx512Bf16_VL=73,
+        Avx512Fp16=74,
+        Avx512Fp16_VL=75,
+        Avx512Ifma=76,
+        Avx512Vbmi2=77,
+        Avx512Vbmi2_VL=78,
+        Avx512Vnni=79,
+        Avx512Vp2intersect=80,
+        Avx512Vp2intersect_VL=81,
+        Avx512Vpopcntdq=82,
+        Avx512Vpopcntdq_VL=83,
 
     }
 }
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
index 5e50f7cc00b7..8e3abccd0118 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
@@ -48,6 +48,20 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.ARM64_Rcpc2: return ReadyToRunInstructionSet.Rcpc2;
                             case InstructionSet.ARM64_Sve: return ReadyToRunInstructionSet.Sve;
                             case InstructionSet.ARM64_Sve_Arm64: return ReadyToRunInstructionSet.Sve;
+                            case InstructionSet.ARM64_Sve2: return ReadyToRunInstructionSet.Sve2;
+                            case InstructionSet.ARM64_Sve2_Arm64: return ReadyToRunInstructionSet.Sve2;
+
+                            default: throw new Exception("Unknown instruction set");
+                        }
+                    }
+
+                case TargetArchitecture.RiscV64:
+                    {
+                        switch (instructionSet)
+                        {
+                            case InstructionSet.RiscV64_RiscV64Base: return ReadyToRunInstructionSet.RiscV64Base;
+                            case InstructionSet.RiscV64_Zba: return ReadyToRunInstructionSet.Zba;
+                            case InstructionSet.RiscV64_Zbb: return ReadyToRunInstructionSet.Zbb;
 
                             default: throw new Exception("Unknown instruction set");
                         }
@@ -59,10 +73,6 @@ public static class ReadyToRunInstructionSetHelper
                         {
                             case InstructionSet.X64_X86Base: return ReadyToRunInstructionSet.X86Base;
                             case InstructionSet.X64_X86Base_X64: return ReadyToRunInstructionSet.X86Base;
-                            case InstructionSet.X64_SSE: return ReadyToRunInstructionSet.Sse;
-                            case InstructionSet.X64_SSE_X64: return ReadyToRunInstructionSet.Sse;
-                            case InstructionSet.X64_SSE2: return ReadyToRunInstructionSet.Sse2;
-                            case InstructionSet.X64_SSE2_X64: return ReadyToRunInstructionSet.Sse2;
                             case InstructionSet.X64_SSE3: return ReadyToRunInstructionSet.Sse3;
                             case InstructionSet.X64_SSE3_X64: return ReadyToRunInstructionSet.Sse3;
                             case InstructionSet.X64_SSSE3: return ReadyToRunInstructionSet.Ssse3;
@@ -71,12 +81,12 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.X64_SSE41_X64: return ReadyToRunInstructionSet.Sse41;
                             case InstructionSet.X64_SSE42: return ReadyToRunInstructionSet.Sse42;
                             case InstructionSet.X64_SSE42_X64: return ReadyToRunInstructionSet.Sse42;
+                            case InstructionSet.X64_POPCNT: return ReadyToRunInstructionSet.Popcnt;
+                            case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt;
                             case InstructionSet.X64_AVX: return ReadyToRunInstructionSet.Avx;
                             case InstructionSet.X64_AVX_X64: return ReadyToRunInstructionSet.Avx;
                             case InstructionSet.X64_AVX2: return ReadyToRunInstructionSet.Avx2;
                             case InstructionSet.X64_AVX2_X64: return ReadyToRunInstructionSet.Avx2;
-                            case InstructionSet.X64_AES: return ReadyToRunInstructionSet.Aes;
-                            case InstructionSet.X64_AES_X64: return ReadyToRunInstructionSet.Aes;
                             case InstructionSet.X64_BMI1: return ReadyToRunInstructionSet.Bmi1;
                             case InstructionSet.X64_BMI1_X64: return ReadyToRunInstructionSet.Bmi1;
                             case InstructionSet.X64_BMI2: return ReadyToRunInstructionSet.Bmi2;
@@ -85,52 +95,48 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.X64_FMA_X64: return ReadyToRunInstructionSet.Fma;
                             case InstructionSet.X64_LZCNT: return ReadyToRunInstructionSet.Lzcnt;
                             case InstructionSet.X64_LZCNT_X64: return ReadyToRunInstructionSet.Lzcnt;
+                            case InstructionSet.X64_MOVBE: return ReadyToRunInstructionSet.Movbe;
+                            case InstructionSet.X64_AVX512: return ReadyToRunInstructionSet.Evex;
+                            case InstructionSet.X64_AVX512_X64: return ReadyToRunInstructionSet.Evex;
+                            case InstructionSet.X64_AVX512VBMI: return ReadyToRunInstructionSet.Avx512Ifma;
+                            case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Ifma;
+                            case InstructionSet.X64_AVX512v3: return ReadyToRunInstructionSet.Avx512Bitalg;
+                            case InstructionSet.X64_AVX512v3_X64: return ReadyToRunInstructionSet.Avx512Bitalg;
+                            case InstructionSet.X64_AVX10v1: return ReadyToRunInstructionSet.Avx512Bf16;
+                            case InstructionSet.X64_AVX10v1_X64: return ReadyToRunInstructionSet.Avx512Bf16;
+                            case InstructionSet.X64_AVX10v2: return ReadyToRunInstructionSet.Avx10v2;
+                            case InstructionSet.X64_AVX10v2_X64: return ReadyToRunInstructionSet.Avx10v2;
+                            case InstructionSet.X64_APX: return ReadyToRunInstructionSet.Apx;
+                            case InstructionSet.X64_AES: return ReadyToRunInstructionSet.Aes;
+                            case InstructionSet.X64_AES_X64: return ReadyToRunInstructionSet.Aes;
                             case InstructionSet.X64_PCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq;
                             case InstructionSet.X64_PCLMULQDQ_X64: return ReadyToRunInstructionSet.Pclmulqdq;
+                            case InstructionSet.X64_AVX512VP2INTERSECT: return ReadyToRunInstructionSet.Avx512Vp2intersect;
+                            case InstructionSet.X64_AVX512VP2INTERSECT_X64: return ReadyToRunInstructionSet.Avx512Vp2intersect;
+                            case InstructionSet.X64_AVXIFMA: return ReadyToRunInstructionSet.AvxIfma;
+                            case InstructionSet.X64_AVXIFMA_X64: return ReadyToRunInstructionSet.AvxIfma;
+                            case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni;
+                            case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni;
+                            case InstructionSet.X64_GFNI: return ReadyToRunInstructionSet.Gfni;
+                            case InstructionSet.X64_GFNI_X64: return ReadyToRunInstructionSet.Gfni;
+                            case InstructionSet.X64_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256;
+                            case InstructionSet.X64_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512;
+                            case InstructionSet.X64_SHA: return ReadyToRunInstructionSet.Sha;
+                            case InstructionSet.X64_SHA_X64: return ReadyToRunInstructionSet.Sha;
+                            case InstructionSet.X64_AES_V256: return ReadyToRunInstructionSet.Aes_V256;
+                            case InstructionSet.X64_AES_V512: return ReadyToRunInstructionSet.Aes_V512;
                             case InstructionSet.X64_PCLMULQDQ_V256: return ReadyToRunInstructionSet.Pclmulqdq_V256;
                             case InstructionSet.X64_PCLMULQDQ_V512: return ReadyToRunInstructionSet.Pclmulqdq_V512;
-                            case InstructionSet.X64_POPCNT: return ReadyToRunInstructionSet.Popcnt;
-                            case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt;
+                            case InstructionSet.X64_WAITPKG: return ReadyToRunInstructionSet.WaitPkg;
+                            case InstructionSet.X64_WAITPKG_X64: return ReadyToRunInstructionSet.WaitPkg;
+                            case InstructionSet.X64_X86Serialize: return ReadyToRunInstructionSet.X86Serialize;
+                            case InstructionSet.X64_X86Serialize_X64: return ReadyToRunInstructionSet.X86Serialize;
                             case InstructionSet.X64_Vector128: return null;
                             case InstructionSet.X64_Vector256: return null;
                             case InstructionSet.X64_Vector512: return null;
-                            case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni;
-                            case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni;
-                            case InstructionSet.X64_MOVBE: return ReadyToRunInstructionSet.Movbe;
-                            case InstructionSet.X64_X86Serialize: return ReadyToRunInstructionSet.X86Serialize;
-                            case InstructionSet.X64_X86Serialize_X64: return ReadyToRunInstructionSet.X86Serialize;
-                            case InstructionSet.X64_EVEX: return ReadyToRunInstructionSet.EVEX;
-                            case InstructionSet.X64_AVX512F: return ReadyToRunInstructionSet.Avx512F;
-                            case InstructionSet.X64_AVX512F_X64: return ReadyToRunInstructionSet.Avx512F;
-                            case InstructionSet.X64_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL;
-                            case InstructionSet.X64_AVX512BW: return ReadyToRunInstructionSet.Avx512BW;
-                            case InstructionSet.X64_AVX512BW_X64: return ReadyToRunInstructionSet.Avx512BW;
-                            case InstructionSet.X64_AVX512BW_VL: return ReadyToRunInstructionSet.Avx512BW_VL;
-                            case InstructionSet.X64_AVX512CD: return ReadyToRunInstructionSet.Avx512CD;
-                            case InstructionSet.X64_AVX512CD_X64: return ReadyToRunInstructionSet.Avx512CD;
-                            case InstructionSet.X64_AVX512CD_VL: return ReadyToRunInstructionSet.Avx512CD_VL;
-                            case InstructionSet.X64_AVX512DQ: return ReadyToRunInstructionSet.Avx512DQ;
-                            case InstructionSet.X64_AVX512DQ_X64: return ReadyToRunInstructionSet.Avx512DQ;
-                            case InstructionSet.X64_AVX512DQ_VL: return ReadyToRunInstructionSet.Avx512DQ_VL;
-                            case InstructionSet.X64_AVX512VBMI: return ReadyToRunInstructionSet.Avx512Vbmi;
-                            case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi;
-                            case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
-                            case InstructionSet.X64_AVX10v1: return ReadyToRunInstructionSet.Avx10v1;
-                            case InstructionSet.X64_AVX10v1_X64: return ReadyToRunInstructionSet.Avx10v1;
-                            case InstructionSet.X64_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512;
-                            case InstructionSet.X64_AVX10v1_V512_X64: return ReadyToRunInstructionSet.Avx10v1_V512;
                             case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128;
                             case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256;
                             case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512;
-                            case InstructionSet.X64_APX: return ReadyToRunInstructionSet.Apx;
-                            case InstructionSet.X64_AVX10v2: return ReadyToRunInstructionSet.Avx10v2;
-                            case InstructionSet.X64_AVX10v2_X64: return ReadyToRunInstructionSet.Avx10v2;
-                            case InstructionSet.X64_AVX10v2_V512: return ReadyToRunInstructionSet.Avx10v2_V512;
-                            case InstructionSet.X64_AVX10v2_V512_X64: return ReadyToRunInstructionSet.Avx10v2_V512;
-                            case InstructionSet.X64_GFNI: return ReadyToRunInstructionSet.Gfni;
-                            case InstructionSet.X64_GFNI_X64: return ReadyToRunInstructionSet.Gfni;
-                            case InstructionSet.X64_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256;
-                            case InstructionSet.X64_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512;
 
                             default: throw new Exception("Unknown instruction set");
                         }
@@ -142,10 +148,6 @@ public static class ReadyToRunInstructionSetHelper
                         {
                             case InstructionSet.X86_X86Base: return ReadyToRunInstructionSet.X86Base;
                             case InstructionSet.X86_X86Base_X64: return null;
-                            case InstructionSet.X86_SSE: return ReadyToRunInstructionSet.Sse;
-                            case InstructionSet.X86_SSE_X64: return null;
-                            case InstructionSet.X86_SSE2: return ReadyToRunInstructionSet.Sse2;
-                            case InstructionSet.X86_SSE2_X64: return null;
                             case InstructionSet.X86_SSE3: return ReadyToRunInstructionSet.Sse3;
                             case InstructionSet.X86_SSE3_X64: return null;
                             case InstructionSet.X86_SSSE3: return ReadyToRunInstructionSet.Ssse3;
@@ -154,12 +156,12 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.X86_SSE41_X64: return null;
                             case InstructionSet.X86_SSE42: return ReadyToRunInstructionSet.Sse42;
                             case InstructionSet.X86_SSE42_X64: return null;
+                            case InstructionSet.X86_POPCNT: return ReadyToRunInstructionSet.Popcnt;
+                            case InstructionSet.X86_POPCNT_X64: return null;
                             case InstructionSet.X86_AVX: return ReadyToRunInstructionSet.Avx;
                             case InstructionSet.X86_AVX_X64: return null;
                             case InstructionSet.X86_AVX2: return ReadyToRunInstructionSet.Avx2;
                             case InstructionSet.X86_AVX2_X64: return null;
-                            case InstructionSet.X86_AES: return ReadyToRunInstructionSet.Aes;
-                            case InstructionSet.X86_AES_X64: return null;
                             case InstructionSet.X86_BMI1: return ReadyToRunInstructionSet.Bmi1;
                             case InstructionSet.X86_BMI1_X64: return null;
                             case InstructionSet.X86_BMI2: return ReadyToRunInstructionSet.Bmi2;
@@ -168,52 +170,48 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.X86_FMA_X64: return null;
                             case InstructionSet.X86_LZCNT: return ReadyToRunInstructionSet.Lzcnt;
                             case InstructionSet.X86_LZCNT_X64: return null;
+                            case InstructionSet.X86_MOVBE: return ReadyToRunInstructionSet.Movbe;
+                            case InstructionSet.X86_AVX512: return ReadyToRunInstructionSet.Evex;
+                            case InstructionSet.X86_AVX512_X64: return null;
+                            case InstructionSet.X86_AVX512VBMI: return ReadyToRunInstructionSet.Avx512Ifma;
+                            case InstructionSet.X86_AVX512VBMI_X64: return null;
+                            case InstructionSet.X86_AVX512v3: return ReadyToRunInstructionSet.Avx512Bitalg;
+                            case InstructionSet.X86_AVX512v3_X64: return null;
+                            case InstructionSet.X86_AVX10v1: return ReadyToRunInstructionSet.Avx512Bf16;
+                            case InstructionSet.X86_AVX10v1_X64: return null;
+                            case InstructionSet.X86_AVX10v2: return ReadyToRunInstructionSet.Avx10v2;
+                            case InstructionSet.X86_AVX10v2_X64: return null;
+                            case InstructionSet.X86_APX: return ReadyToRunInstructionSet.Apx;
+                            case InstructionSet.X86_AES: return ReadyToRunInstructionSet.Aes;
+                            case InstructionSet.X86_AES_X64: return null;
                             case InstructionSet.X86_PCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq;
                             case InstructionSet.X86_PCLMULQDQ_X64: return null;
+                            case InstructionSet.X86_AVX512VP2INTERSECT: return ReadyToRunInstructionSet.Avx512Vp2intersect;
+                            case InstructionSet.X86_AVX512VP2INTERSECT_X64: return null;
+                            case InstructionSet.X86_AVXIFMA: return ReadyToRunInstructionSet.AvxIfma;
+                            case InstructionSet.X86_AVXIFMA_X64: return null;
+                            case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni;
+                            case InstructionSet.X86_AVXVNNI_X64: return null;
+                            case InstructionSet.X86_GFNI: return ReadyToRunInstructionSet.Gfni;
+                            case InstructionSet.X86_GFNI_X64: return null;
+                            case InstructionSet.X86_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256;
+                            case InstructionSet.X86_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512;
+                            case InstructionSet.X86_SHA: return ReadyToRunInstructionSet.Sha;
+                            case InstructionSet.X86_SHA_X64: return null;
+                            case InstructionSet.X86_AES_V256: return ReadyToRunInstructionSet.Aes_V256;
+                            case InstructionSet.X86_AES_V512: return ReadyToRunInstructionSet.Aes_V512;
                             case InstructionSet.X86_PCLMULQDQ_V256: return ReadyToRunInstructionSet.Pclmulqdq_V256;
                             case InstructionSet.X86_PCLMULQDQ_V512: return ReadyToRunInstructionSet.Pclmulqdq_V512;
-                            case InstructionSet.X86_POPCNT: return ReadyToRunInstructionSet.Popcnt;
-                            case InstructionSet.X86_POPCNT_X64: return null;
+                            case InstructionSet.X86_WAITPKG: return ReadyToRunInstructionSet.WaitPkg;
+                            case InstructionSet.X86_WAITPKG_X64: return null;
+                            case InstructionSet.X86_X86Serialize: return ReadyToRunInstructionSet.X86Serialize;
+                            case InstructionSet.X86_X86Serialize_X64: return null;
                             case InstructionSet.X86_Vector128: return null;
                             case InstructionSet.X86_Vector256: return null;
                             case InstructionSet.X86_Vector512: return null;
-                            case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni;
-                            case InstructionSet.X86_AVXVNNI_X64: return null;
-                            case InstructionSet.X86_MOVBE: return ReadyToRunInstructionSet.Movbe;
-                            case InstructionSet.X86_X86Serialize: return ReadyToRunInstructionSet.X86Serialize;
-                            case InstructionSet.X86_X86Serialize_X64: return null;
-                            case InstructionSet.X86_EVEX: return ReadyToRunInstructionSet.EVEX;
-                            case InstructionSet.X86_AVX512F: return ReadyToRunInstructionSet.Avx512F;
-                            case InstructionSet.X86_AVX512F_X64: return null;
-                            case InstructionSet.X86_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL;
-                            case InstructionSet.X86_AVX512BW: return ReadyToRunInstructionSet.Avx512BW;
-                            case InstructionSet.X86_AVX512BW_X64: return null;
-                            case InstructionSet.X86_AVX512BW_VL: return ReadyToRunInstructionSet.Avx512BW_VL;
-                            case InstructionSet.X86_AVX512CD: return ReadyToRunInstructionSet.Avx512CD;
-                            case InstructionSet.X86_AVX512CD_X64: return null;
-                            case InstructionSet.X86_AVX512CD_VL: return ReadyToRunInstructionSet.Avx512CD_VL;
-                            case InstructionSet.X86_AVX512DQ: return ReadyToRunInstructionSet.Avx512DQ;
-                            case InstructionSet.X86_AVX512DQ_X64: return null;
-                            case InstructionSet.X86_AVX512DQ_VL: return ReadyToRunInstructionSet.Avx512DQ_VL;
-                            case InstructionSet.X86_AVX512VBMI: return ReadyToRunInstructionSet.Avx512Vbmi;
-                            case InstructionSet.X86_AVX512VBMI_X64: return null;
-                            case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
-                            case InstructionSet.X86_AVX10v1: return ReadyToRunInstructionSet.Avx10v1;
-                            case InstructionSet.X86_AVX10v1_X64: return null;
-                            case InstructionSet.X86_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512;
-                            case InstructionSet.X86_AVX10v1_V512_X64: return null;
                             case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128;
                             case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256;
                             case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512;
-                            case InstructionSet.X86_APX: return ReadyToRunInstructionSet.Apx;
-                            case InstructionSet.X86_AVX10v2: return ReadyToRunInstructionSet.Avx10v2;
-                            case InstructionSet.X86_AVX10v2_X64: return null;
-                            case InstructionSet.X86_AVX10v2_V512: return ReadyToRunInstructionSet.Avx10v2_V512;
-                            case InstructionSet.X86_AVX10v2_V512_X64: return null;
-                            case InstructionSet.X86_GFNI: return ReadyToRunInstructionSet.Gfni;
-                            case InstructionSet.X86_GFNI_X64: return null;
-                            case InstructionSet.X86_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256;
-                            case InstructionSet.X86_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512;
 
                             default: throw new Exception("Unknown instruction set");
                         }
diff --git a/src/coreclr/tools/Common/Internal/Runtime/RiscVLoongArch64FpStruct.cs b/src/coreclr/tools/Common/Internal/Runtime/RiscVLoongArch64FpStruct.cs
index ad066f1d9ec6..577f60deb6c9 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/RiscVLoongArch64FpStruct.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/RiscVLoongArch64FpStruct.cs
@@ -78,7 +78,8 @@ private static void SetFpStructInRegistersInfoField(ref FpStructInRegistersInfo
             (index == 0 ? ref info.offset1st : ref info.offset2nd) = offset;
         }
 
-        private static bool HandleInlineArray(int elementTypeIndex, int nElements, ref FpStructInRegistersInfo info, ref int typeIndex)
+        private static bool HandleInlineArray(int elementTypeIndex, int nElements,
+            ref FpStructInRegistersInfo info, ref int typeIndex, ref uint occupiedBytesMap)
         {
             int nFlattenedFieldsPerElement = typeIndex - elementTypeIndex;
             if (nFlattenedFieldsPerElement == 0)
@@ -110,6 +111,16 @@ private static bool HandleInlineArray(int elementTypeIndex, int nElements, ref F
                 int sizeShiftMask = (int)(info.flags & FpStruct.SizeShift1stMask) << 2;
                 info.flags |= (FpStruct)(floatFlag | sizeShiftMask); // merge with 1st field
                 info.offset2nd = info.offset1st + info.Size1st(); // bump up the field offset
+
+                Debug.Assert(info.Size1st() == info.Size2nd());
+                uint startOffset = info.offset2nd;
+                uint endOffset = startOffset + info.Size2nd();
+
+                uint fieldOccupation = (~0u << (int)startOffset) ^ (~0u << (int)endOffset);
+                if ((occupiedBytesMap & fieldOccupation) != 0)
+                    return false; // duplicated array element overlaps with other fields
+
+                occupiedBytesMap |= fieldOccupation;
             }
             return true;
         }
@@ -119,23 +130,30 @@ private static bool FlattenFields(TypeDesc td, uint offset, ref FpStructInRegist
             IEnumerable fields = td.GetFields();
             int nFields = 0;
             int elementTypeIndex = typeIndex;
-            FieldDesc prevField = null;
+            FieldDesc lastField = null;
+            uint occupiedBytesMap = 0;
             foreach (FieldDesc field in fields)
             {
                 if (field.IsStatic)
                     continue;
                 nFields++;
 
-                if (prevField != null && prevField.Offset.AsInt + prevField.FieldType.GetElementSize().AsInt > field.Offset.AsInt)
+                uint startOffset = offset + (uint)field.Offset.AsInt;
+                uint endOffset = startOffset + (uint)field.FieldType.GetElementSize().AsInt;
+
+                uint fieldOccupation = (~0u << (int)startOffset) ^ (~0u << (int)endOffset);
+                if ((occupiedBytesMap & fieldOccupation) != 0)
                     return false; // fields overlap, treat as union
 
-                prevField = field;
+                occupiedBytesMap |= fieldOccupation;
+
+                lastField = field;
 
                 TypeFlags category = field.FieldType.Category;
                 if (category == TypeFlags.ValueType)
                 {
                     TypeDesc nested = field.FieldType;
-                    if (!FlattenFields(nested, offset + (uint)field.Offset.AsInt, ref info, ref typeIndex))
+                    if (!FlattenFields(nested, startOffset, ref info, ref typeIndex))
                         return false;
                 }
                 else if (field.FieldType.GetElementSize().AsInt <= TARGET_POINTER_SIZE)
@@ -145,7 +163,7 @@ private static bool FlattenFields(TypeDesc td, uint offset, ref FpStructInRegist
 
                     bool isFloating = category is TypeFlags.Single or TypeFlags.Double;
                     SetFpStructInRegistersInfoField(ref info, typeIndex++,
-                        isFloating, (uint)field.FieldType.GetElementSize().AsInt, offset + (uint)field.Offset.AsInt);
+                        isFloating, (uint)field.FieldType.GetElementSize().AsInt, startOffset);
                 }
                 else
                 {
@@ -156,7 +174,7 @@ private static bool FlattenFields(TypeDesc td, uint offset, ref FpStructInRegist
             if ((td as MetadataType).HasImpliedRepeatedFields())
             {
                 Debug.Assert(nFields == 1);
-                int nElements = td.GetElementSize().AsInt / prevField.FieldType.GetElementSize().AsInt;
+                int nElements = td.GetElementSize().AsInt / lastField.FieldType.GetElementSize().AsInt;
 
                 // Only InlineArrays can have element type of empty struct, fixed-size buffers take only primitives
                 if ((typeIndex - elementTypeIndex) == 0 && (td as MetadataType).IsInlineArray)
@@ -165,7 +183,7 @@ private static bool FlattenFields(TypeDesc td, uint offset, ref FpStructInRegist
                     return false; // struct containing an array of empty structs is passed by integer calling convention
                 }
 
-                if (!HandleInlineArray(elementTypeIndex, nElements, ref info, ref typeIndex))
+                if (!HandleInlineArray(elementTypeIndex, nElements, ref info, ref typeIndex, ref occupiedBytesMap))
                     return false;
             }
             return true;
@@ -189,6 +207,20 @@ public static FpStructInRegistersInfo GetFpStructInRegistersInfo(TypeDesc td, Ta
                 return new FpStructInRegistersInfo{}; // struct has no floating fields
 
             Debug.Assert(nFields == 1 || nFields == 2);
+            if (nFields == 2 && info.offset1st > info.offset2nd)
+            {
+                // swap fields to match memory order
+                info.flags = (FpStruct)(
+                    ((uint)(info.flags & FloatInt) << (PosIntFloat - PosFloatInt)) |
+                    ((uint)(info.flags & IntFloat) >> (PosIntFloat - PosFloatInt)) |
+                    ((uint)(info.flags & SizeShift1stMask) << (PosSizeShift2nd - PosSizeShift1st)) |
+                    ((uint)(info.flags & SizeShift2ndMask) >> (PosSizeShift2nd - PosSizeShift1st))
+                );
+                (info.offset2nd, info.offset1st) = (info.offset1st, info.offset2nd);
+            }
+            Debug.Assert((info.flags & (OnlyOne | BothFloat)) == 0);
+            Debug.Assert((info.flags & FloatInt) == 0 || info.Size1st() == sizeof(float) || info.Size1st() == sizeof(double));
+            Debug.Assert((info.flags & IntFloat) == 0 || info.Size2nd() == sizeof(float) || info.Size2nd() == sizeof(double));
 
             if ((info.flags & (FloatInt | IntFloat)) == (FloatInt | IntFloat))
             {
diff --git a/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs
index 9d356cb06681..944adfd79f5a 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs
@@ -33,6 +33,9 @@ internal static class ArrayTypesConstants
     internal enum GC_ALLOC_FLAGS
     {
         GC_ALLOC_NO_FLAGS = 0,
+        GC_ALLOC_FINALIZE = 1,
+        GC_ALLOC_ALIGN8_BIAS = 4,
+        GC_ALLOC_ALIGN8 = 8,
         GC_ALLOC_ZEROING_OPTIONAL = 16,
         GC_ALLOC_PINNED_OBJECT_HEAP = 64,
     }
diff --git a/src/coreclr/tools/Common/Internal/Runtime/UniversalGenericParameterLayout.cs b/src/coreclr/tools/Common/Internal/Runtime/UniversalGenericParameterLayout.cs
deleted file mode 100644
index 47d9234f1fbf..000000000000
--- a/src/coreclr/tools/Common/Internal/Runtime/UniversalGenericParameterLayout.cs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using Internal.TypeSystem;
-
-namespace Internal.Runtime
-{
-    internal static class UniversalGenericParameterLayout
-    {
-        private enum HasVarsInvestigationLevel
-        {
-            Parameter,
-            NotParameter
-        }
-
-        /// 
-        /// IF THESE SEMANTICS EVER CHANGE UPDATE THE LOGIC WHICH DEFINES THIS BEHAVIOR IN
-        /// THE DYNAMIC TYPE LOADER AS WELL AS THE COMPILER.
-        /// (There is a version of this in TypeLoaderEnvironment.SignatureParsing.cs that must be kept in sync with this.)
-        ///
-        /// Parameter's are considered to have type layout dependent on their generic instantiation
-        /// if the type of the parameter in its signature is a type variable, or if the type is a generic
-        /// structure which meets 2 characteristics:
-        /// 1. Structure size/layout is affected by the size/layout of one or more of its generic parameters
-        /// 2. One or more of the generic parameters is a type variable, or a generic structure which also recursively
-        ///    would satisfy constraint 2. (Note, that in the recursion case, whether or not the structure is affected
-        ///    by the size/layout of its generic parameters is not investigated.)
-        ///
-        /// Examples parameter types, and behavior.
-        ///
-        /// T = true
-        /// List[T] = false
-        /// StructNotDependentOnArgsForSize[T] = false
-        /// GenStructDependencyOnArgsForSize[T] = true
-        /// StructNotDependentOnArgsForSize[GenStructDependencyOnArgsForSize[T]] = true
-        /// StructNotDependentOnArgsForSize[GenStructDependencyOnArgsForSize[List[T]]]] = false
-        ///
-        /// Example non-parameter type behavior
-        /// T = true
-        /// List[T] = false
-        /// StructNotDependentOnArgsForSize[T] = *true*
-        /// GenStructDependencyOnArgsForSize[T] = true
-        /// StructNotDependentOnArgsForSize[GenStructDependencyOnArgsForSize[T]] = true
-        /// StructNotDependentOnArgsForSize[GenStructDependencyOnArgsForSize[List[T]]]] = false
-        /// 
-        public static bool IsLayoutDependentOnGenericInstantiation(TypeDesc type)
-        {
-            return IsLayoutDependentOnGenericInstantiation(type, HasVarsInvestigationLevel.Parameter);
-        }
-
-        private static bool IsLayoutDependentOnGenericInstantiation(TypeDesc type, HasVarsInvestigationLevel investigationLevel)
-        {
-            if (type.IsSignatureVariable)
-            {
-                return true;
-            }
-            else if (type.HasInstantiation && type.IsValueType)
-            {
-                foreach (TypeDesc valueTypeInstantiationParam in type.Instantiation)
-                {
-                    if (IsLayoutDependentOnGenericInstantiation(valueTypeInstantiationParam, HasVarsInvestigationLevel.NotParameter))
-                    {
-                        if (investigationLevel == HasVarsInvestigationLevel.Parameter)
-                        {
-                            DefType universalCanonForm = (DefType)type.ConvertToCanonForm(CanonicalFormKind.Universal);
-                            return universalCanonForm.InstanceFieldSize.IsIndeterminate;
-                        }
-                        else
-                        {
-                            return true;
-                        }
-                    }
-                }
-                return false;
-            }
-            else
-            {
-                // All other forms of type do not change their shape dependent on signature variables.
-                return false;
-            }
-        }
-
-        public static bool MethodSignatureHasVarsNeedingCallingConventionConverter(TypeSystem.MethodSignature methodSignature)
-        {
-            if (IsLayoutDependentOnGenericInstantiation(methodSignature.ReturnType, HasVarsInvestigationLevel.Parameter))
-                return true;
-
-            for (int i = 0; i < methodSignature.Length; i++)
-            {
-                if (IsLayoutDependentOnGenericInstantiation(methodSignature[i], HasVarsInvestigationLevel.Parameter))
-                    return true;
-            }
-
-            return false;
-        }
-
-        public static bool VTableMethodRequiresCallingConventionConverter(MethodDesc method)
-        {
-            if (!MethodSignatureHasVarsNeedingCallingConventionConverter(method.GetTypicalMethodDefinition().Signature))
-                return false;
-
-            MethodDesc slotDecl = MetadataVirtualMethodAlgorithm.FindSlotDefiningMethodForVirtualMethod(method).GetCanonMethodTarget(CanonicalFormKind.Specific);
-            return slotDecl.IsCanonicalMethod(CanonicalFormKind.Universal);
-        }
-    }
-}
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
index d11a739ae0d1..006fc44be2e6 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
@@ -28,7 +28,9 @@ public enum CorInfoHelpFunc
         CORINFO_HELP_LMOD,
         CORINFO_HELP_ULDIV,
         CORINFO_HELP_ULMOD,
+        CORINFO_HELP_LNG2FLT,               // Convert a signed int64 to a float
         CORINFO_HELP_LNG2DBL,               // Convert a signed int64 to a double
+        CORINFO_HELP_ULNG2FLT,              // Convert a unsigned int64 to a float
         CORINFO_HELP_ULNG2DBL,              // Convert a unsigned int64 to a double
         CORINFO_HELP_DBL2INT,
         CORINFO_HELP_DBL2INT_OVF,
@@ -55,7 +57,7 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_NEW_MDARR_RARE, // rare multi-dim array helper (Rank == 1)
         CORINFO_HELP_NEWARR_1_DIRECT,   // helper for any one dimensional array creation
         CORINFO_HELP_NEWARR_1_MAYBEFROZEN, // allocator for arrays that *might* allocate them on a frozen segment
-        CORINFO_HELP_NEWARR_1_OBJ,      // optimized 1-D object arrays
+        CORINFO_HELP_NEWARR_1_PTR,      // optimized arrays of pointer sized elements
         CORINFO_HELP_NEWARR_1_VC,       // optimized 1-D value class arrays
         CORINFO_HELP_NEWARR_1_ALIGN8,   // like VC, but aligns the array start
 
@@ -96,6 +98,7 @@ which is the right helper to use to allocate an object of a given type. */
 
         CORINFO_HELP_THROW,             // Throw an exception object
         CORINFO_HELP_RETHROW,           // Rethrow the currently active exception
+        CORINFO_HELP_THROWEXACT,        // Throw an exception object, preserving stack trace
         CORINFO_HELP_USER_BREAKPOINT,   // For a user program to break to the debugger
         CORINFO_HELP_RNGCHKFAIL,        // array bounds check failed
         CORINFO_HELP_OVERFLOW,          // throw an overflow exception
@@ -269,7 +272,7 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_STACK_PROBE,               // Probes each page of the allocated stack frame
 
         CORINFO_HELP_PATCHPOINT,                // Notify runtime that code has reached a patchpoint
-        CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT,  // Notify runtime that code has reached a part of the method that wasn't originally jitted.
+        CORINFO_HELP_PATCHPOINT_FORCED,         // Notify runtime that code has reached a part of the method that needs to transition
 
         CORINFO_HELP_CLASSPROFILE32,            // Update 32-bit class profile for a call site
         CORINFO_HELP_CLASSPROFILE64,            // Update 64-bit class profile for a call site
@@ -285,6 +288,7 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_VALIDATE_INDIRECT_CALL,    // CFG: Validate function pointer
         CORINFO_HELP_DISPATCH_INDIRECT_CALL,    // CFG: Validate and dispatch to pointer
 
+<<<<<<< HEAD
         CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP,
         CORINFO_HELP_LLVM_EH_CATCH,
         CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES,
@@ -294,6 +298,11 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_LLVM_RESOLVE_INTERFACE_CALL_TARGET,
         CORINFO_HELP_LLVM_GET_EXTERNAL_CALL_TARGET,
         CORINFO_HELP_LLVM_STRESS_GC,
+=======
+        CORINFO_HELP_ALLOC_CONTINUATION,
+        CORINFO_HELP_ALLOC_CONTINUATION_METHOD,
+        CORINFO_HELP_ALLOC_CONTINUATION_CLASS,
+>>>>>>> upstream-jun
 
         CORINFO_HELP_COUNT,
     }
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
index 3150dc29f664..b85e35fd1749 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
@@ -1264,6 +1264,23 @@ private CorInfoInline canInline(CORINFO_METHOD_STRUCT_* callerHnd, CORINFO_METHO
             MethodDesc callerMethod = HandleToObject(callerHnd);
             MethodDesc calleeMethod = HandleToObject(calleeHnd);
 
+            EcmaModule rootModule = (MethodBeingCompiled.OwningType as MetadataType)?.Module as EcmaModule;
+            EcmaModule calleeModule = (calleeMethod.OwningType as MetadataType)?.Module as EcmaModule;
+
+            // If this inline crosses module boundaries, ensure the modules agree on exception wrapping behavior.
+            if ((rootModule != calleeModule) && (rootModule != null) && (calleeModule != null))
+            {
+                if (rootModule.IsWrapNonExceptionThrows != calleeModule.IsWrapNonExceptionThrows)
+                {
+                    var calleeIL = _compilation.GetMethodIL(calleeMethod);
+                    if (calleeIL.GetExceptionRegions().Length != 0)
+                    {
+                        // Fail inlining if root method and callee have different exception wrapping behavior
+                        return CorInfoInline.INLINE_FAIL;
+                    }
+                }
+            }
+
             if (_compilation.CanInline(callerMethod, calleeMethod))
             {
                 // No restrictions on inlining
@@ -1272,6 +1289,10 @@ private CorInfoInline canInline(CORINFO_METHOD_STRUCT_* callerHnd, CORINFO_METHO
             else
             {
                 // Call may not be inlined
+                //
+                // Note despite returning INLINE_NEVER here, in compilations where jitting is possible
+                // the jit may still be able to inline this method. So we rely on reportInliningDecision
+                // to not propagate this into metadata to short-circuit future inlining attempts.
                 return CorInfoInline.INLINE_NEVER;
             }
         }
@@ -2637,30 +2658,6 @@ private CorInfoHelpFunc getSharedCCtorHelper(CORINFO_CLASS_STRUCT_* clsHnd)
             return ObjectToHandle(typeForBox);
         }
 
-        private CORINFO_CLASS_STRUCT_* getTypeForBoxOnStack(CORINFO_CLASS_STRUCT_* cls)
-        {
-            TypeDesc clsTypeDesc = HandleToObject(cls);
-            if (clsTypeDesc.IsNullable)
-            {
-                clsTypeDesc = clsTypeDesc.Instantiation[0];
-            }
-
-            if (clsTypeDesc.RequiresAlign8())
-            {
-                // Conservatively give up on such types (32bit)
-                return null;
-            }
-
-            // Instantiate StackAllocatedBox helper type with the type we're boxing
-            MetadataType placeholderType = _compilation.TypeSystemContext.SystemModule.GetType("System.Runtime.CompilerServices", "StackAllocatedBox`1", throwIfNotFound: false);
-            if (placeholderType == null)
-            {
-                // Give up if corelib does not have support for stackallocation
-                return null;
-            }
-            return ObjectToHandle(placeholderType.MakeInstantiatedType(clsTypeDesc));
-        }
-
         private CorInfoHelpFunc getBoxHelper(CORINFO_CLASS_STRUCT_* cls)
         {
             var type = HandleToObject(cls);
@@ -3373,6 +3370,11 @@ private void getEEInfo(ref CORINFO_EE_INFO pEEInfoOut)
             pEEInfoOut.osType = TargetToOs(_compilation.NodeFactory.Target);
         }
 
+        private void getAsyncInfo(ref CORINFO_ASYNC_INFO pAsyncInfoOut)
+        {
+            throw new NotImplementedException();
+        }
+
         private mdToken getMethodDefFromMethod(CORINFO_METHOD_STRUCT_* hMethod)
         {
             MethodDesc method = HandleToObject(hMethod);
@@ -3700,6 +3702,13 @@ private bool getTailCallHelpers(ref CORINFO_RESOLVED_TOKEN callToken, CORINFO_SI
 #endif
         }
 
+#pragma warning disable CA1822 // Mark members as static
+        private CORINFO_METHOD_STRUCT_* getAsyncResumptionStub()
+#pragma warning restore CA1822 // Mark members as static
+        {
+            return null;
+        }
+
         private byte[] _code;
         private byte[] _coldCode;
         private int _codeAlignment;
@@ -4221,9 +4230,8 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes)
             flags.InstructionSetFlags.Add(_compilation.InstructionSetSupport.OptimisticFlags);
 
             // Set the rest of the flags that don't make sense to expose publicly.
-            flags.Set(CorJitFlag.CORJIT_FLAG_READYTORUN);
+            flags.Set(CorJitFlag.CORJIT_FLAG_AOT);
             flags.Set(CorJitFlag.CORJIT_FLAG_RELOC);
-            flags.Set(CorJitFlag.CORJIT_FLAG_PREJIT);
             flags.Set(CorJitFlag.CORJIT_FLAG_USE_PINVOKE_HELPERS);
 
             TargetArchitecture targetArchitecture = _compilation.TypeSystemContext.Target.Architecture;
@@ -4232,11 +4240,8 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes)
             {
                 case TargetArchitecture.X64:
                 case TargetArchitecture.X86:
-                    Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2);
-                    Debug.Assert(_compilation.InstructionSetSupport.IsInstructionSetSupported(InstructionSet.X86_SSE2));
-
-                    if ((_compilation.InstructionSetSupport.Flags & InstructionSetSupportFlags.Vector512Throttling) != 0)
-                        flags.Set(CorJitFlag.CORJIT_FLAG_VECTOR512_THROTTLING);
+                    Debug.Assert(InstructionSet.X86_X86Base == InstructionSet.X64_X86Base);
+                    Debug.Assert(_compilation.InstructionSetSupport.IsInstructionSetSupported(InstructionSet.X86_X86Base));
                     break;
 
                 case TargetArchitecture.ARM64:
@@ -4438,29 +4443,38 @@ private bool notifyInstructionSetUsage(InstructionSet instructionSet, bool suppo
                 }
                 else
                 {
+                    // We want explicitly implemented ISimdVector APIs to still be expanded where possible
+                    // but, they all prefix the qualified name of the interface first, so we'll check for that and
+                    // skip the prefix before trying to resolve the method.
                     ReadOnlySpan methodName = MethodBeingCompiled.Name.AsSpan();
 
-                    if (methodName.StartsWith("System.Runtime.Intrinsics.ISimdVector APIs to still be expanded where possible
-                        // but, they all prefix the qualified name of the interface first, so we'll check for that and
-                        // skip the prefix before trying to resolve the method.
-
-                        ReadOnlySpan partialMethodName = methodName.Slice(70);
+                        ReadOnlySpan partialMethodName = methodName.Slice(45);
 
-                        if (partialMethodName.StartsWith(",T>."))
+                        if (partialMethodName.StartsWith("Numerics.Vector"))
                         {
-                            methodName = partialMethodName.Slice(7);
-                        }
-                        else if (partialMethodName.StartsWith("64,T>."))
-                        {
-                            methodName = partialMethodName.Slice(9);
+                            partialMethodName = partialMethodName.Slice(15);
+
+                            if (partialMethodName.StartsWith(",T>."))
+                            {
+                                methodName = partialMethodName.Slice(7);
+                            }
                         }
-                        else if (partialMethodName.StartsWith("128,T>.") ||
-                                 partialMethodName.StartsWith("256,T>.") ||
-                                 partialMethodName.StartsWith("512,T>."))
+                        if (partialMethodName.StartsWith("Runtime.Intrinsics.Vector"))
                         {
-                            methodName = partialMethodName.Slice(10);
+                            partialMethodName = partialMethodName.Slice(25);
+
+                            if (partialMethodName.StartsWith("64,T>."))
+                            {
+                                methodName = partialMethodName.Slice(9);
+                            }
+                            else if (partialMethodName.StartsWith("128,T>.") ||
+                                    partialMethodName.StartsWith("256,T>.") ||
+                                    partialMethodName.StartsWith("512,T>."))
+                            {
+                                methodName = partialMethodName.Slice(10);
+                            }
                         }
                     }
 
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs
index 91df884c5827..8b1a6f5d8d04 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs
@@ -972,21 +972,6 @@ private static CorInfoHelpFunc _getSharedCCtorHelper(IntPtr thisHandle, IntPtr*
             }
         }
 
-        [UnmanagedCallersOnly]
-        private static CORINFO_CLASS_STRUCT_* _getTypeForBoxOnStack(IntPtr thisHandle, IntPtr* ppException, CORINFO_CLASS_STRUCT_* cls)
-        {
-            var _this = GetThis(thisHandle);
-            try
-            {
-                return _this.getTypeForBoxOnStack(cls);
-            }
-            catch (Exception ex)
-            {
-                *ppException = _this.AllocException(ex);
-                return default;
-            }
-        }
-
         [UnmanagedCallersOnly]
         private static CorInfoHelpFunc _getBoxHelper(IntPtr thisHandle, IntPtr* ppException, CORINFO_CLASS_STRUCT_* cls)
         {
@@ -1784,6 +1769,20 @@ private static void _getEEInfo(IntPtr thisHandle, IntPtr* ppException, CORINFO_E
             }
         }
 
+        [UnmanagedCallersOnly]
+        private static void _getAsyncInfo(IntPtr thisHandle, IntPtr* ppException, CORINFO_ASYNC_INFO* pAsyncInfoOut)
+        {
+            var _this = GetThis(thisHandle);
+            try
+            {
+                _this.getAsyncInfo(ref *pAsyncInfoOut);
+            }
+            catch (Exception ex)
+            {
+                *ppException = _this.AllocException(ex);
+            }
+        }
+
         [UnmanagedCallersOnly]
         private static mdToken _getMethodDefFromMethod(IntPtr thisHandle, IntPtr* ppException, CORINFO_METHOD_STRUCT_* hMethod)
         {
@@ -2329,6 +2328,21 @@ private static byte _getTailCallHelpers(IntPtr thisHandle, IntPtr* ppException,
             }
         }
 
+        [UnmanagedCallersOnly]
+        private static CORINFO_METHOD_STRUCT_* _getAsyncResumptionStub(IntPtr thisHandle, IntPtr* ppException)
+        {
+            var _this = GetThis(thisHandle);
+            try
+            {
+                return _this.getAsyncResumptionStub();
+            }
+            catch (Exception ex)
+            {
+                *ppException = _this.AllocException(ex);
+                return default;
+            }
+        }
+
         [UnmanagedCallersOnly]
         private static byte _convertPInvokeCalliToCall(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, byte mustConvert)
         {
@@ -2623,7 +2637,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_
 
         private static IntPtr GetUnmanagedCallbacks()
         {
-            void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 177);
+            void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 178);
 
             callbacks[0] = (delegate* unmanaged)&_isIntrinsic;
             callbacks[1] = (delegate* unmanaged)&_notifyMethodInfoUsage;
@@ -2690,61 +2704,61 @@ private static IntPtr GetUnmanagedCallbacks()
             callbacks[62] = (delegate* unmanaged)&_getCastingHelper;
             callbacks[63] = (delegate* unmanaged)&_getSharedCCtorHelper;
             callbacks[64] = (delegate* unmanaged)&_getTypeForBox;
-            callbacks[65] = (delegate* unmanaged)&_getTypeForBoxOnStack;
-            callbacks[66] = (delegate* unmanaged)&_getBoxHelper;
-            callbacks[67] = (delegate* unmanaged)&_getUnBoxHelper;
-            callbacks[68] = (delegate* unmanaged)&_getRuntimeTypePointer;
-            callbacks[69] = (delegate* unmanaged)&_isObjectImmutable;
-            callbacks[70] = (delegate* unmanaged)&_getStringChar;
-            callbacks[71] = (delegate* unmanaged)&_getObjectType;
-            callbacks[72] = (delegate* unmanaged)&_getReadyToRunHelper;
-            callbacks[73] = (delegate* unmanaged)&_getReadyToRunDelegateCtorHelper;
-            callbacks[74] = (delegate* unmanaged)&_initClass;
-            callbacks[75] = (delegate* unmanaged)&_classMustBeLoadedBeforeCodeIsRun;
-            callbacks[76] = (delegate* unmanaged)&_getBuiltinClass;
-            callbacks[77] = (delegate* unmanaged)&_getTypeForPrimitiveValueClass;
-            callbacks[78] = (delegate* unmanaged)&_getTypeForPrimitiveNumericClass;
-            callbacks[79] = (delegate* unmanaged)&_canCast;
-            callbacks[80] = (delegate* unmanaged)&_compareTypesForCast;
-            callbacks[81] = (delegate* unmanaged)&_compareTypesForEquality;
-            callbacks[82] = (delegate* unmanaged)&_isMoreSpecificType;
-            callbacks[83] = (delegate* unmanaged)&_isExactType;
-            callbacks[84] = (delegate* unmanaged)&_isGenericType;
-            callbacks[85] = (delegate* unmanaged)&_isNullableType;
-            callbacks[86] = (delegate* unmanaged)&_isEnum;
-            callbacks[87] = (delegate* unmanaged)&_getParentType;
-            callbacks[88] = (delegate* unmanaged)&_getChildType;
-            callbacks[89] = (delegate* unmanaged)&_isSDArray;
-            callbacks[90] = (delegate* unmanaged)&_getArrayRank;
-            callbacks[91] = (delegate* unmanaged)&_getArrayIntrinsicID;
-            callbacks[92] = (delegate* unmanaged)&_getArrayInitializationData;
-            callbacks[93] = (delegate* unmanaged)&_canAccessClass;
-            callbacks[94] = (delegate* unmanaged)&_printFieldName;
-            callbacks[95] = (delegate* unmanaged)&_getFieldClass;
-            callbacks[96] = (delegate* unmanaged)&_getFieldType;
-            callbacks[97] = (delegate* unmanaged)&_getFieldOffset;
-            callbacks[98] = (delegate* unmanaged)&_getFieldInfo;
-            callbacks[99] = (delegate* unmanaged)&_getThreadLocalFieldInfo;
-            callbacks[100] = (delegate* unmanaged)&_getThreadLocalStaticBlocksInfo;
-            callbacks[101] = (delegate* unmanaged)&_getThreadLocalStaticInfo_NativeAOT;
-            callbacks[102] = (delegate* unmanaged)&_isFieldStatic;
-            callbacks[103] = (delegate* unmanaged)&_getArrayOrStringLength;
-            callbacks[104] = (delegate* unmanaged)&_getBoundaries;
-            callbacks[105] = (delegate* unmanaged)&_setBoundaries;
-            callbacks[106] = (delegate* unmanaged)&_getVars;
-            callbacks[107] = (delegate* unmanaged)&_setVars;
-            callbacks[108] = (delegate* unmanaged)&_reportRichMappings;
-            callbacks[109] = (delegate* unmanaged)&_reportMetadata;
-            callbacks[110] = (delegate* unmanaged)&_allocateArray;
-            callbacks[111] = (delegate* unmanaged)&_freeArray;
-            callbacks[112] = (delegate* unmanaged)&_getArgNext;
-            callbacks[113] = (delegate* unmanaged)&_getArgType;
-            callbacks[114] = (delegate* unmanaged)&_getExactClasses;
-            callbacks[115] = (delegate* unmanaged)&_getArgClass;
-            callbacks[116] = (delegate* unmanaged)&_getHFAType;
-            callbacks[117] = (delegate* unmanaged)&_runWithErrorTrap;
-            callbacks[118] = (delegate* unmanaged)&_runWithSPMIErrorTrap;
-            callbacks[119] = (delegate* unmanaged)&_getEEInfo;
+            callbacks[65] = (delegate* unmanaged)&_getBoxHelper;
+            callbacks[66] = (delegate* unmanaged)&_getUnBoxHelper;
+            callbacks[67] = (delegate* unmanaged)&_getRuntimeTypePointer;
+            callbacks[68] = (delegate* unmanaged)&_isObjectImmutable;
+            callbacks[69] = (delegate* unmanaged)&_getStringChar;
+            callbacks[70] = (delegate* unmanaged)&_getObjectType;
+            callbacks[71] = (delegate* unmanaged)&_getReadyToRunHelper;
+            callbacks[72] = (delegate* unmanaged)&_getReadyToRunDelegateCtorHelper;
+            callbacks[73] = (delegate* unmanaged)&_initClass;
+            callbacks[74] = (delegate* unmanaged)&_classMustBeLoadedBeforeCodeIsRun;
+            callbacks[75] = (delegate* unmanaged)&_getBuiltinClass;
+            callbacks[76] = (delegate* unmanaged)&_getTypeForPrimitiveValueClass;
+            callbacks[77] = (delegate* unmanaged)&_getTypeForPrimitiveNumericClass;
+            callbacks[78] = (delegate* unmanaged)&_canCast;
+            callbacks[79] = (delegate* unmanaged)&_compareTypesForCast;
+            callbacks[80] = (delegate* unmanaged)&_compareTypesForEquality;
+            callbacks[81] = (delegate* unmanaged)&_isMoreSpecificType;
+            callbacks[82] = (delegate* unmanaged)&_isExactType;
+            callbacks[83] = (delegate* unmanaged)&_isGenericType;
+            callbacks[84] = (delegate* unmanaged)&_isNullableType;
+            callbacks[85] = (delegate* unmanaged)&_isEnum;
+            callbacks[86] = (delegate* unmanaged)&_getParentType;
+            callbacks[87] = (delegate* unmanaged)&_getChildType;
+            callbacks[88] = (delegate* unmanaged)&_isSDArray;
+            callbacks[89] = (delegate* unmanaged)&_getArrayRank;
+            callbacks[90] = (delegate* unmanaged)&_getArrayIntrinsicID;
+            callbacks[91] = (delegate* unmanaged)&_getArrayInitializationData;
+            callbacks[92] = (delegate* unmanaged)&_canAccessClass;
+            callbacks[93] = (delegate* unmanaged)&_printFieldName;
+            callbacks[94] = (delegate* unmanaged)&_getFieldClass;
+            callbacks[95] = (delegate* unmanaged)&_getFieldType;
+            callbacks[96] = (delegate* unmanaged)&_getFieldOffset;
+            callbacks[97] = (delegate* unmanaged)&_getFieldInfo;
+            callbacks[98] = (delegate* unmanaged)&_getThreadLocalFieldInfo;
+            callbacks[99] = (delegate* unmanaged)&_getThreadLocalStaticBlocksInfo;
+            callbacks[100] = (delegate* unmanaged)&_getThreadLocalStaticInfo_NativeAOT;
+            callbacks[101] = (delegate* unmanaged)&_isFieldStatic;
+            callbacks[102] = (delegate* unmanaged)&_getArrayOrStringLength;
+            callbacks[103] = (delegate* unmanaged)&_getBoundaries;
+            callbacks[104] = (delegate* unmanaged)&_setBoundaries;
+            callbacks[105] = (delegate* unmanaged)&_getVars;
+            callbacks[106] = (delegate* unmanaged)&_setVars;
+            callbacks[107] = (delegate* unmanaged)&_reportRichMappings;
+            callbacks[108] = (delegate* unmanaged)&_reportMetadata;
+            callbacks[109] = (delegate* unmanaged)&_allocateArray;
+            callbacks[110] = (delegate* unmanaged)&_freeArray;
+            callbacks[111] = (delegate* unmanaged)&_getArgNext;
+            callbacks[112] = (delegate* unmanaged)&_getArgType;
+            callbacks[113] = (delegate* unmanaged)&_getExactClasses;
+            callbacks[114] = (delegate* unmanaged)&_getArgClass;
+            callbacks[115] = (delegate* unmanaged)&_getHFAType;
+            callbacks[116] = (delegate* unmanaged)&_runWithErrorTrap;
+            callbacks[117] = (delegate* unmanaged)&_runWithSPMIErrorTrap;
+            callbacks[118] = (delegate* unmanaged)&_getEEInfo;
+            callbacks[119] = (delegate* unmanaged)&_getAsyncInfo;
             callbacks[120] = (delegate* unmanaged)&_getMethodDefFromMethod;
             callbacks[121] = (delegate* unmanaged)&_printMethodName;
             callbacks[122] = (delegate* unmanaged)&_getMethodNameFromMetadata;
@@ -2782,26 +2796,27 @@ private static IntPtr GetUnmanagedCallbacks()
             callbacks[154] = (delegate* unmanaged)&_GetDelegateCtor;
             callbacks[155] = (delegate* unmanaged)&_MethodCompileComplete;
             callbacks[156] = (delegate* unmanaged)&_getTailCallHelpers;
-            callbacks[157] = (delegate* unmanaged)&_convertPInvokeCalliToCall;
-            callbacks[158] = (delegate* unmanaged)&_notifyInstructionSetUsage;
-            callbacks[159] = (delegate* unmanaged)&_updateEntryPointForTailCall;
-            callbacks[160] = (delegate* unmanaged)&_allocMem;
-            callbacks[161] = (delegate* unmanaged)&_reserveUnwindInfo;
-            callbacks[162] = (delegate* unmanaged)&_allocUnwindInfo;
-            callbacks[163] = (delegate* unmanaged)&_allocGCInfo;
-            callbacks[164] = (delegate* unmanaged)&_setEHcount;
-            callbacks[165] = (delegate* unmanaged)&_setEHinfo;
-            callbacks[166] = (delegate* unmanaged)&_logMsg;
-            callbacks[167] = (delegate* unmanaged)&_doAssert;
-            callbacks[168] = (delegate* unmanaged)&_reportFatalError;
-            callbacks[169] = (delegate* unmanaged)&_getPgoInstrumentationResults;
-            callbacks[170] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema;
-            callbacks[171] = (delegate* unmanaged)&_recordCallSite;
-            callbacks[172] = (delegate* unmanaged)&_recordRelocation;
-            callbacks[173] = (delegate* unmanaged)&_getRelocTypeHint;
-            callbacks[174] = (delegate* unmanaged)&_getExpectedTargetArchitecture;
-            callbacks[175] = (delegate* unmanaged)&_getJitFlags;
-            callbacks[176] = (delegate* unmanaged)&_getSpecialCopyHelper;
+            callbacks[157] = (delegate* unmanaged)&_getAsyncResumptionStub;
+            callbacks[158] = (delegate* unmanaged)&_convertPInvokeCalliToCall;
+            callbacks[159] = (delegate* unmanaged)&_notifyInstructionSetUsage;
+            callbacks[160] = (delegate* unmanaged)&_updateEntryPointForTailCall;
+            callbacks[161] = (delegate* unmanaged)&_allocMem;
+            callbacks[162] = (delegate* unmanaged)&_reserveUnwindInfo;
+            callbacks[163] = (delegate* unmanaged)&_allocUnwindInfo;
+            callbacks[164] = (delegate* unmanaged)&_allocGCInfo;
+            callbacks[165] = (delegate* unmanaged)&_setEHcount;
+            callbacks[166] = (delegate* unmanaged)&_setEHinfo;
+            callbacks[167] = (delegate* unmanaged)&_logMsg;
+            callbacks[168] = (delegate* unmanaged)&_doAssert;
+            callbacks[169] = (delegate* unmanaged)&_reportFatalError;
+            callbacks[170] = (delegate* unmanaged)&_getPgoInstrumentationResults;
+            callbacks[171] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema;
+            callbacks[172] = (delegate* unmanaged)&_recordCallSite;
+            callbacks[173] = (delegate* unmanaged)&_recordRelocation;
+            callbacks[174] = (delegate* unmanaged)&_getRelocTypeHint;
+            callbacks[175] = (delegate* unmanaged)&_getExpectedTargetArchitecture;
+            callbacks[176] = (delegate* unmanaged)&_getJitFlags;
+            callbacks[177] = (delegate* unmanaged)&_getSpecialCopyHelper;
 
             return (IntPtr)callbacks;
         }
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
index 7752d65befc8..7947bfd1db61 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
@@ -32,6 +32,7 @@ public enum InstructionSet
         ARM64_VectorT128 = InstructionSet_ARM64.VectorT128,
         ARM64_Rcpc2 = InstructionSet_ARM64.Rcpc2,
         ARM64_Sve = InstructionSet_ARM64.Sve,
+        ARM64_Sve2 = InstructionSet_ARM64.Sve2,
         ARM64_ArmBase_Arm64 = InstructionSet_ARM64.ArmBase_Arm64,
         ARM64_AdvSimd_Arm64 = InstructionSet_ARM64.AdvSimd_Arm64,
         ARM64_Aes_Arm64 = InstructionSet_ARM64.Aes_Arm64,
@@ -41,154 +42,142 @@ public enum InstructionSet
         ARM64_Sha1_Arm64 = InstructionSet_ARM64.Sha1_Arm64,
         ARM64_Sha256_Arm64 = InstructionSet_ARM64.Sha256_Arm64,
         ARM64_Sve_Arm64 = InstructionSet_ARM64.Sve_Arm64,
+        ARM64_Sve2_Arm64 = InstructionSet_ARM64.Sve2_Arm64,
+        RiscV64_RiscV64Base = InstructionSet_RiscV64.RiscV64Base,
+        RiscV64_Zba = InstructionSet_RiscV64.Zba,
+        RiscV64_Zbb = InstructionSet_RiscV64.Zbb,
         X64_X86Base = InstructionSet_X64.X86Base,
-        X64_SSE = InstructionSet_X64.SSE,
-        X64_SSE2 = InstructionSet_X64.SSE2,
         X64_SSE3 = InstructionSet_X64.SSE3,
         X64_SSSE3 = InstructionSet_X64.SSSE3,
         X64_SSE41 = InstructionSet_X64.SSE41,
         X64_SSE42 = InstructionSet_X64.SSE42,
+        X64_POPCNT = InstructionSet_X64.POPCNT,
         X64_AVX = InstructionSet_X64.AVX,
         X64_AVX2 = InstructionSet_X64.AVX2,
-        X64_AES = InstructionSet_X64.AES,
         X64_BMI1 = InstructionSet_X64.BMI1,
         X64_BMI2 = InstructionSet_X64.BMI2,
         X64_FMA = InstructionSet_X64.FMA,
         X64_LZCNT = InstructionSet_X64.LZCNT,
+        X64_MOVBE = InstructionSet_X64.MOVBE,
+        X64_AVX512 = InstructionSet_X64.AVX512,
+        X64_AVX512VBMI = InstructionSet_X64.AVX512VBMI,
+        X64_AVX512v3 = InstructionSet_X64.AVX512v3,
+        X64_AVX10v1 = InstructionSet_X64.AVX10v1,
+        X64_AVX10v2 = InstructionSet_X64.AVX10v2,
+        X64_APX = InstructionSet_X64.APX,
+        X64_AES = InstructionSet_X64.AES,
         X64_PCLMULQDQ = InstructionSet_X64.PCLMULQDQ,
+        X64_AVX512VP2INTERSECT = InstructionSet_X64.AVX512VP2INTERSECT,
+        X64_AVXIFMA = InstructionSet_X64.AVXIFMA,
+        X64_AVXVNNI = InstructionSet_X64.AVXVNNI,
+        X64_GFNI = InstructionSet_X64.GFNI,
+        X64_GFNI_V256 = InstructionSet_X64.GFNI_V256,
+        X64_GFNI_V512 = InstructionSet_X64.GFNI_V512,
+        X64_SHA = InstructionSet_X64.SHA,
+        X64_AES_V256 = InstructionSet_X64.AES_V256,
+        X64_AES_V512 = InstructionSet_X64.AES_V512,
         X64_PCLMULQDQ_V256 = InstructionSet_X64.PCLMULQDQ_V256,
         X64_PCLMULQDQ_V512 = InstructionSet_X64.PCLMULQDQ_V512,
-        X64_POPCNT = InstructionSet_X64.POPCNT,
+        X64_WAITPKG = InstructionSet_X64.WAITPKG,
+        X64_X86Serialize = InstructionSet_X64.X86Serialize,
         X64_Vector128 = InstructionSet_X64.Vector128,
         X64_Vector256 = InstructionSet_X64.Vector256,
         X64_Vector512 = InstructionSet_X64.Vector512,
-        X64_AVXVNNI = InstructionSet_X64.AVXVNNI,
-        X64_MOVBE = InstructionSet_X64.MOVBE,
-        X64_X86Serialize = InstructionSet_X64.X86Serialize,
-        X64_EVEX = InstructionSet_X64.EVEX,
-        X64_AVX512F = InstructionSet_X64.AVX512F,
-        X64_AVX512F_VL = InstructionSet_X64.AVX512F_VL,
-        X64_AVX512BW = InstructionSet_X64.AVX512BW,
-        X64_AVX512BW_VL = InstructionSet_X64.AVX512BW_VL,
-        X64_AVX512CD = InstructionSet_X64.AVX512CD,
-        X64_AVX512CD_VL = InstructionSet_X64.AVX512CD_VL,
-        X64_AVX512DQ = InstructionSet_X64.AVX512DQ,
-        X64_AVX512DQ_VL = InstructionSet_X64.AVX512DQ_VL,
-        X64_AVX512VBMI = InstructionSet_X64.AVX512VBMI,
-        X64_AVX512VBMI_VL = InstructionSet_X64.AVX512VBMI_VL,
-        X64_AVX10v1 = InstructionSet_X64.AVX10v1,
-        X64_AVX10v1_V512 = InstructionSet_X64.AVX10v1_V512,
         X64_VectorT128 = InstructionSet_X64.VectorT128,
         X64_VectorT256 = InstructionSet_X64.VectorT256,
         X64_VectorT512 = InstructionSet_X64.VectorT512,
-        X64_APX = InstructionSet_X64.APX,
-        X64_AVX10v2 = InstructionSet_X64.AVX10v2,
-        X64_AVX10v2_V512 = InstructionSet_X64.AVX10v2_V512,
-        X64_GFNI = InstructionSet_X64.GFNI,
-        X64_GFNI_V256 = InstructionSet_X64.GFNI_V256,
-        X64_GFNI_V512 = InstructionSet_X64.GFNI_V512,
         X64_X86Base_X64 = InstructionSet_X64.X86Base_X64,
-        X64_SSE_X64 = InstructionSet_X64.SSE_X64,
-        X64_SSE2_X64 = InstructionSet_X64.SSE2_X64,
         X64_SSE3_X64 = InstructionSet_X64.SSE3_X64,
         X64_SSSE3_X64 = InstructionSet_X64.SSSE3_X64,
         X64_SSE41_X64 = InstructionSet_X64.SSE41_X64,
         X64_SSE42_X64 = InstructionSet_X64.SSE42_X64,
+        X64_POPCNT_X64 = InstructionSet_X64.POPCNT_X64,
         X64_AVX_X64 = InstructionSet_X64.AVX_X64,
         X64_AVX2_X64 = InstructionSet_X64.AVX2_X64,
-        X64_AES_X64 = InstructionSet_X64.AES_X64,
         X64_BMI1_X64 = InstructionSet_X64.BMI1_X64,
         X64_BMI2_X64 = InstructionSet_X64.BMI2_X64,
         X64_FMA_X64 = InstructionSet_X64.FMA_X64,
         X64_LZCNT_X64 = InstructionSet_X64.LZCNT_X64,
-        X64_PCLMULQDQ_X64 = InstructionSet_X64.PCLMULQDQ_X64,
-        X64_POPCNT_X64 = InstructionSet_X64.POPCNT_X64,
-        X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64,
-        X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64,
-        X64_AVX512F_X64 = InstructionSet_X64.AVX512F_X64,
-        X64_AVX512BW_X64 = InstructionSet_X64.AVX512BW_X64,
-        X64_AVX512CD_X64 = InstructionSet_X64.AVX512CD_X64,
-        X64_AVX512DQ_X64 = InstructionSet_X64.AVX512DQ_X64,
+        X64_AVX512_X64 = InstructionSet_X64.AVX512_X64,
         X64_AVX512VBMI_X64 = InstructionSet_X64.AVX512VBMI_X64,
+        X64_AVX512v3_X64 = InstructionSet_X64.AVX512v3_X64,
         X64_AVX10v1_X64 = InstructionSet_X64.AVX10v1_X64,
-        X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64,
         X64_AVX10v2_X64 = InstructionSet_X64.AVX10v2_X64,
-        X64_AVX10v2_V512_X64 = InstructionSet_X64.AVX10v2_V512_X64,
+        X64_AES_X64 = InstructionSet_X64.AES_X64,
+        X64_PCLMULQDQ_X64 = InstructionSet_X64.PCLMULQDQ_X64,
+        X64_AVX512VP2INTERSECT_X64 = InstructionSet_X64.AVX512VP2INTERSECT_X64,
+        X64_AVXIFMA_X64 = InstructionSet_X64.AVXIFMA_X64,
+        X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64,
         X64_GFNI_X64 = InstructionSet_X64.GFNI_X64,
+        X64_SHA_X64 = InstructionSet_X64.SHA_X64,
+        X64_WAITPKG_X64 = InstructionSet_X64.WAITPKG_X64,
+        X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64,
         X86_X86Base = InstructionSet_X86.X86Base,
-        X86_SSE = InstructionSet_X86.SSE,
-        X86_SSE2 = InstructionSet_X86.SSE2,
         X86_SSE3 = InstructionSet_X86.SSE3,
         X86_SSSE3 = InstructionSet_X86.SSSE3,
         X86_SSE41 = InstructionSet_X86.SSE41,
         X86_SSE42 = InstructionSet_X86.SSE42,
+        X86_POPCNT = InstructionSet_X86.POPCNT,
         X86_AVX = InstructionSet_X86.AVX,
         X86_AVX2 = InstructionSet_X86.AVX2,
-        X86_AES = InstructionSet_X86.AES,
         X86_BMI1 = InstructionSet_X86.BMI1,
         X86_BMI2 = InstructionSet_X86.BMI2,
         X86_FMA = InstructionSet_X86.FMA,
         X86_LZCNT = InstructionSet_X86.LZCNT,
+        X86_MOVBE = InstructionSet_X86.MOVBE,
+        X86_AVX512 = InstructionSet_X86.AVX512,
+        X86_AVX512VBMI = InstructionSet_X86.AVX512VBMI,
+        X86_AVX512v3 = InstructionSet_X86.AVX512v3,
+        X86_AVX10v1 = InstructionSet_X86.AVX10v1,
+        X86_AVX10v2 = InstructionSet_X86.AVX10v2,
+        X86_APX = InstructionSet_X86.APX,
+        X86_AES = InstructionSet_X86.AES,
         X86_PCLMULQDQ = InstructionSet_X86.PCLMULQDQ,
+        X86_AVX512VP2INTERSECT = InstructionSet_X86.AVX512VP2INTERSECT,
+        X86_AVXIFMA = InstructionSet_X86.AVXIFMA,
+        X86_AVXVNNI = InstructionSet_X86.AVXVNNI,
+        X86_GFNI = InstructionSet_X86.GFNI,
+        X86_GFNI_V256 = InstructionSet_X86.GFNI_V256,
+        X86_GFNI_V512 = InstructionSet_X86.GFNI_V512,
+        X86_SHA = InstructionSet_X86.SHA,
+        X86_AES_V256 = InstructionSet_X86.AES_V256,
+        X86_AES_V512 = InstructionSet_X86.AES_V512,
         X86_PCLMULQDQ_V256 = InstructionSet_X86.PCLMULQDQ_V256,
         X86_PCLMULQDQ_V512 = InstructionSet_X86.PCLMULQDQ_V512,
-        X86_POPCNT = InstructionSet_X86.POPCNT,
+        X86_WAITPKG = InstructionSet_X86.WAITPKG,
+        X86_X86Serialize = InstructionSet_X86.X86Serialize,
         X86_Vector128 = InstructionSet_X86.Vector128,
         X86_Vector256 = InstructionSet_X86.Vector256,
         X86_Vector512 = InstructionSet_X86.Vector512,
-        X86_AVXVNNI = InstructionSet_X86.AVXVNNI,
-        X86_MOVBE = InstructionSet_X86.MOVBE,
-        X86_X86Serialize = InstructionSet_X86.X86Serialize,
-        X86_EVEX = InstructionSet_X86.EVEX,
-        X86_AVX512F = InstructionSet_X86.AVX512F,
-        X86_AVX512F_VL = InstructionSet_X86.AVX512F_VL,
-        X86_AVX512BW = InstructionSet_X86.AVX512BW,
-        X86_AVX512BW_VL = InstructionSet_X86.AVX512BW_VL,
-        X86_AVX512CD = InstructionSet_X86.AVX512CD,
-        X86_AVX512CD_VL = InstructionSet_X86.AVX512CD_VL,
-        X86_AVX512DQ = InstructionSet_X86.AVX512DQ,
-        X86_AVX512DQ_VL = InstructionSet_X86.AVX512DQ_VL,
-        X86_AVX512VBMI = InstructionSet_X86.AVX512VBMI,
-        X86_AVX512VBMI_VL = InstructionSet_X86.AVX512VBMI_VL,
-        X86_AVX10v1 = InstructionSet_X86.AVX10v1,
-        X86_AVX10v1_V512 = InstructionSet_X86.AVX10v1_V512,
         X86_VectorT128 = InstructionSet_X86.VectorT128,
         X86_VectorT256 = InstructionSet_X86.VectorT256,
         X86_VectorT512 = InstructionSet_X86.VectorT512,
-        X86_APX = InstructionSet_X86.APX,
-        X86_AVX10v2 = InstructionSet_X86.AVX10v2,
-        X86_AVX10v2_V512 = InstructionSet_X86.AVX10v2_V512,
-        X86_GFNI = InstructionSet_X86.GFNI,
-        X86_GFNI_V256 = InstructionSet_X86.GFNI_V256,
-        X86_GFNI_V512 = InstructionSet_X86.GFNI_V512,
         X86_X86Base_X64 = InstructionSet_X86.X86Base_X64,
-        X86_SSE_X64 = InstructionSet_X86.SSE_X64,
-        X86_SSE2_X64 = InstructionSet_X86.SSE2_X64,
         X86_SSE3_X64 = InstructionSet_X86.SSE3_X64,
         X86_SSSE3_X64 = InstructionSet_X86.SSSE3_X64,
         X86_SSE41_X64 = InstructionSet_X86.SSE41_X64,
         X86_SSE42_X64 = InstructionSet_X86.SSE42_X64,
+        X86_POPCNT_X64 = InstructionSet_X86.POPCNT_X64,
         X86_AVX_X64 = InstructionSet_X86.AVX_X64,
         X86_AVX2_X64 = InstructionSet_X86.AVX2_X64,
-        X86_AES_X64 = InstructionSet_X86.AES_X64,
         X86_BMI1_X64 = InstructionSet_X86.BMI1_X64,
         X86_BMI2_X64 = InstructionSet_X86.BMI2_X64,
         X86_FMA_X64 = InstructionSet_X86.FMA_X64,
         X86_LZCNT_X64 = InstructionSet_X86.LZCNT_X64,
-        X86_PCLMULQDQ_X64 = InstructionSet_X86.PCLMULQDQ_X64,
-        X86_POPCNT_X64 = InstructionSet_X86.POPCNT_X64,
-        X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64,
-        X86_X86Serialize_X64 = InstructionSet_X86.X86Serialize_X64,
-        X86_AVX512F_X64 = InstructionSet_X86.AVX512F_X64,
-        X86_AVX512BW_X64 = InstructionSet_X86.AVX512BW_X64,
-        X86_AVX512CD_X64 = InstructionSet_X86.AVX512CD_X64,
-        X86_AVX512DQ_X64 = InstructionSet_X86.AVX512DQ_X64,
+        X86_AVX512_X64 = InstructionSet_X86.AVX512_X64,
         X86_AVX512VBMI_X64 = InstructionSet_X86.AVX512VBMI_X64,
+        X86_AVX512v3_X64 = InstructionSet_X86.AVX512v3_X64,
         X86_AVX10v1_X64 = InstructionSet_X86.AVX10v1_X64,
-        X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64,
         X86_AVX10v2_X64 = InstructionSet_X86.AVX10v2_X64,
-        X86_AVX10v2_V512_X64 = InstructionSet_X86.AVX10v2_V512_X64,
+        X86_AES_X64 = InstructionSet_X86.AES_X64,
+        X86_PCLMULQDQ_X64 = InstructionSet_X86.PCLMULQDQ_X64,
+        X86_AVX512VP2INTERSECT_X64 = InstructionSet_X86.AVX512VP2INTERSECT_X64,
+        X86_AVXIFMA_X64 = InstructionSet_X86.AVXIFMA_X64,
+        X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64,
         X86_GFNI_X64 = InstructionSet_X86.GFNI_X64,
+        X86_SHA_X64 = InstructionSet_X86.SHA_X64,
+        X86_WAITPKG_X64 = InstructionSet_X86.WAITPKG_X64,
+        X86_X86Serialize_X64 = InstructionSet_X86.X86Serialize_X64,
     }
     public enum InstructionSet_ARM64
     {
@@ -210,15 +199,26 @@ public enum InstructionSet_ARM64
         VectorT128 = 14,
         Rcpc2 = 15,
         Sve = 16,
-        ArmBase_Arm64 = 17,
-        AdvSimd_Arm64 = 18,
-        Aes_Arm64 = 19,
-        Crc32_Arm64 = 20,
-        Dp_Arm64 = 21,
-        Rdm_Arm64 = 22,
-        Sha1_Arm64 = 23,
-        Sha256_Arm64 = 24,
-        Sve_Arm64 = 25,
+        Sve2 = 17,
+        ArmBase_Arm64 = 18,
+        AdvSimd_Arm64 = 19,
+        Aes_Arm64 = 20,
+        Crc32_Arm64 = 21,
+        Dp_Arm64 = 22,
+        Rdm_Arm64 = 23,
+        Sha1_Arm64 = 24,
+        Sha256_Arm64 = 25,
+        Sve_Arm64 = 26,
+        Sve2_Arm64 = 27,
+    }
+
+    public enum InstructionSet_RiscV64
+    {
+        ILLEGAL = InstructionSet.ILLEGAL,
+        NONE = InstructionSet.NONE,
+        RiscV64Base = 1,
+        Zba = 2,
+        Zbb = 3,
     }
 
     public enum InstructionSet_X64
@@ -226,79 +226,71 @@ public enum InstructionSet_X64
         ILLEGAL = InstructionSet.ILLEGAL,
         NONE = InstructionSet.NONE,
         X86Base = 1,
-        SSE = 2,
-        SSE2 = 3,
-        SSE3 = 4,
-        SSSE3 = 5,
-        SSE41 = 6,
-        SSE42 = 7,
-        AVX = 8,
-        AVX2 = 9,
-        AES = 10,
-        BMI1 = 11,
-        BMI2 = 12,
-        FMA = 13,
-        LZCNT = 14,
-        PCLMULQDQ = 15,
-        PCLMULQDQ_V256 = 16,
-        PCLMULQDQ_V512 = 17,
-        POPCNT = 18,
-        Vector128 = 19,
-        Vector256 = 20,
-        Vector512 = 21,
-        AVXVNNI = 22,
-        MOVBE = 23,
-        X86Serialize = 24,
-        EVEX = 25,
-        AVX512F = 26,
-        AVX512F_VL = 27,
-        AVX512BW = 28,
-        AVX512BW_VL = 29,
-        AVX512CD = 30,
-        AVX512CD_VL = 31,
-        AVX512DQ = 32,
-        AVX512DQ_VL = 33,
-        AVX512VBMI = 34,
-        AVX512VBMI_VL = 35,
-        AVX10v1 = 36,
-        AVX10v1_V512 = 37,
+        SSE3 = 2,
+        SSSE3 = 3,
+        SSE41 = 4,
+        SSE42 = 5,
+        POPCNT = 6,
+        AVX = 7,
+        AVX2 = 8,
+        BMI1 = 9,
+        BMI2 = 10,
+        FMA = 11,
+        LZCNT = 12,
+        MOVBE = 13,
+        AVX512 = 14,
+        AVX512VBMI = 15,
+        AVX512v3 = 16,
+        AVX10v1 = 17,
+        AVX10v2 = 18,
+        APX = 19,
+        AES = 20,
+        PCLMULQDQ = 21,
+        AVX512VP2INTERSECT = 22,
+        AVXIFMA = 23,
+        AVXVNNI = 24,
+        GFNI = 25,
+        GFNI_V256 = 26,
+        GFNI_V512 = 27,
+        SHA = 28,
+        AES_V256 = 29,
+        AES_V512 = 30,
+        PCLMULQDQ_V256 = 31,
+        PCLMULQDQ_V512 = 32,
+        WAITPKG = 33,
+        X86Serialize = 34,
+        Vector128 = 35,
+        Vector256 = 36,
+        Vector512 = 37,
         VectorT128 = 38,
         VectorT256 = 39,
         VectorT512 = 40,
-        APX = 41,
-        AVX10v2 = 42,
-        AVX10v2_V512 = 43,
-        GFNI = 44,
-        GFNI_V256 = 45,
-        GFNI_V512 = 46,
-        X86Base_X64 = 47,
-        SSE_X64 = 48,
-        SSE2_X64 = 49,
-        SSE3_X64 = 50,
-        SSSE3_X64 = 51,
-        SSE41_X64 = 52,
-        SSE42_X64 = 53,
-        AVX_X64 = 54,
-        AVX2_X64 = 55,
-        AES_X64 = 56,
-        BMI1_X64 = 57,
-        BMI2_X64 = 58,
-        FMA_X64 = 59,
-        LZCNT_X64 = 60,
-        PCLMULQDQ_X64 = 61,
-        POPCNT_X64 = 62,
-        AVXVNNI_X64 = 63,
-        X86Serialize_X64 = 64,
-        AVX512F_X64 = 65,
-        AVX512BW_X64 = 66,
-        AVX512CD_X64 = 67,
-        AVX512DQ_X64 = 68,
-        AVX512VBMI_X64 = 69,
-        AVX10v1_X64 = 70,
-        AVX10v1_V512_X64 = 71,
-        AVX10v2_X64 = 72,
-        AVX10v2_V512_X64 = 73,
-        GFNI_X64 = 74,
+        X86Base_X64 = 41,
+        SSE3_X64 = 42,
+        SSSE3_X64 = 43,
+        SSE41_X64 = 44,
+        SSE42_X64 = 45,
+        POPCNT_X64 = 46,
+        AVX_X64 = 47,
+        AVX2_X64 = 48,
+        BMI1_X64 = 49,
+        BMI2_X64 = 50,
+        FMA_X64 = 51,
+        LZCNT_X64 = 52,
+        AVX512_X64 = 53,
+        AVX512VBMI_X64 = 54,
+        AVX512v3_X64 = 55,
+        AVX10v1_X64 = 56,
+        AVX10v2_X64 = 57,
+        AES_X64 = 58,
+        PCLMULQDQ_X64 = 59,
+        AVX512VP2INTERSECT_X64 = 60,
+        AVXIFMA_X64 = 61,
+        AVXVNNI_X64 = 62,
+        GFNI_X64 = 63,
+        SHA_X64 = 64,
+        WAITPKG_X64 = 65,
+        X86Serialize_X64 = 66,
     }
 
     public enum InstructionSet_X86
@@ -306,79 +298,71 @@ public enum InstructionSet_X86
         ILLEGAL = InstructionSet.ILLEGAL,
         NONE = InstructionSet.NONE,
         X86Base = 1,
-        SSE = 2,
-        SSE2 = 3,
-        SSE3 = 4,
-        SSSE3 = 5,
-        SSE41 = 6,
-        SSE42 = 7,
-        AVX = 8,
-        AVX2 = 9,
-        AES = 10,
-        BMI1 = 11,
-        BMI2 = 12,
-        FMA = 13,
-        LZCNT = 14,
-        PCLMULQDQ = 15,
-        PCLMULQDQ_V256 = 16,
-        PCLMULQDQ_V512 = 17,
-        POPCNT = 18,
-        Vector128 = 19,
-        Vector256 = 20,
-        Vector512 = 21,
-        AVXVNNI = 22,
-        MOVBE = 23,
-        X86Serialize = 24,
-        EVEX = 25,
-        AVX512F = 26,
-        AVX512F_VL = 27,
-        AVX512BW = 28,
-        AVX512BW_VL = 29,
-        AVX512CD = 30,
-        AVX512CD_VL = 31,
-        AVX512DQ = 32,
-        AVX512DQ_VL = 33,
-        AVX512VBMI = 34,
-        AVX512VBMI_VL = 35,
-        AVX10v1 = 36,
-        AVX10v1_V512 = 37,
+        SSE3 = 2,
+        SSSE3 = 3,
+        SSE41 = 4,
+        SSE42 = 5,
+        POPCNT = 6,
+        AVX = 7,
+        AVX2 = 8,
+        BMI1 = 9,
+        BMI2 = 10,
+        FMA = 11,
+        LZCNT = 12,
+        MOVBE = 13,
+        AVX512 = 14,
+        AVX512VBMI = 15,
+        AVX512v3 = 16,
+        AVX10v1 = 17,
+        AVX10v2 = 18,
+        APX = 19,
+        AES = 20,
+        PCLMULQDQ = 21,
+        AVX512VP2INTERSECT = 22,
+        AVXIFMA = 23,
+        AVXVNNI = 24,
+        GFNI = 25,
+        GFNI_V256 = 26,
+        GFNI_V512 = 27,
+        SHA = 28,
+        AES_V256 = 29,
+        AES_V512 = 30,
+        PCLMULQDQ_V256 = 31,
+        PCLMULQDQ_V512 = 32,
+        WAITPKG = 33,
+        X86Serialize = 34,
+        Vector128 = 35,
+        Vector256 = 36,
+        Vector512 = 37,
         VectorT128 = 38,
         VectorT256 = 39,
         VectorT512 = 40,
-        APX = 41,
-        AVX10v2 = 42,
-        AVX10v2_V512 = 43,
-        GFNI = 44,
-        GFNI_V256 = 45,
-        GFNI_V512 = 46,
-        X86Base_X64 = 47,
-        SSE_X64 = 48,
-        SSE2_X64 = 49,
-        SSE3_X64 = 50,
-        SSSE3_X64 = 51,
-        SSE41_X64 = 52,
-        SSE42_X64 = 53,
-        AVX_X64 = 54,
-        AVX2_X64 = 55,
-        AES_X64 = 56,
-        BMI1_X64 = 57,
-        BMI2_X64 = 58,
-        FMA_X64 = 59,
-        LZCNT_X64 = 60,
-        PCLMULQDQ_X64 = 61,
-        POPCNT_X64 = 62,
-        AVXVNNI_X64 = 63,
-        X86Serialize_X64 = 64,
-        AVX512F_X64 = 65,
-        AVX512BW_X64 = 66,
-        AVX512CD_X64 = 67,
-        AVX512DQ_X64 = 68,
-        AVX512VBMI_X64 = 69,
-        AVX10v1_X64 = 70,
-        AVX10v1_V512_X64 = 71,
-        AVX10v2_X64 = 72,
-        AVX10v2_V512_X64 = 73,
-        GFNI_X64 = 74,
+        X86Base_X64 = 41,
+        SSE3_X64 = 42,
+        SSSE3_X64 = 43,
+        SSE41_X64 = 44,
+        SSE42_X64 = 45,
+        POPCNT_X64 = 46,
+        AVX_X64 = 47,
+        AVX2_X64 = 48,
+        BMI1_X64 = 49,
+        BMI2_X64 = 50,
+        FMA_X64 = 51,
+        LZCNT_X64 = 52,
+        AVX512_X64 = 53,
+        AVX512VBMI_X64 = 54,
+        AVX512v3_X64 = 55,
+        AVX10v1_X64 = 56,
+        AVX10v2_X64 = 57,
+        AES_X64 = 58,
+        PCLMULQDQ_X64 = 59,
+        AVX512VP2INTERSECT_X64 = 60,
+        AVXIFMA_X64 = 61,
+        AVXVNNI_X64 = 62,
+        GFNI_X64 = 63,
+        SHA_X64 = 64,
+        WAITPKG_X64 = 65,
+        X86Serialize_X64 = 66,
     }
 
     public unsafe struct InstructionSetFlags : IEnumerable
@@ -388,6 +372,8 @@ public unsafe struct InstructionSetFlags : IEnumerable
         private fixed ulong _flags[FlagsFieldCount];
         public IEnumerable ARM64Flags => this.Select((x) => (InstructionSet_ARM64)x);
 
+        public IEnumerable RiscV64Flags => this.Select((x) => (InstructionSet_RiscV64)x);
+
         public IEnumerable X64Flags => this.Select((x) => (InstructionSet_X64)x);
 
         public IEnumerable X86Flags => this.Select((x) => (InstructionSet_X86)x);
@@ -509,17 +495,17 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS
             case TargetArchitecture.X64:
                 switch (input)
                 {
-                case InstructionSet.X64_Vector128: return InstructionSet.X64_SSE;
+                case InstructionSet.X64_Vector128: return InstructionSet.X64_X86Base;
                 case InstructionSet.X64_Vector256: return InstructionSet.X64_AVX;
-                case InstructionSet.X64_Vector512: return InstructionSet.X64_AVX512F;
+                case InstructionSet.X64_Vector512: return InstructionSet.X64_AVX512;
                 }
                 break;
             case TargetArchitecture.X86:
                 switch (input)
                 {
-                case InstructionSet.X86_Vector128: return InstructionSet.X86_SSE;
+                case InstructionSet.X86_Vector128: return InstructionSet.X86_X86Base;
                 case InstructionSet.X86_Vector256: return InstructionSet.X86_AVX;
-                case InstructionSet.X86_Vector512: return InstructionSet.X86_AVX512F;
+                case InstructionSet.X86_Vector512: return InstructionSet.X86_AVX512;
                 }
                 break;
             }
@@ -573,6 +559,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.ARM64_Sve_Arm64);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve_Arm64))
                         resultflags.AddInstructionSet(InstructionSet.ARM64_Sve);
+                    if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve2))
+                        resultflags.AddInstructionSet(InstructionSet.ARM64_Sve2_Arm64);
+                    if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve2_Arm64))
+                        resultflags.AddInstructionSet(InstructionSet.ARM64_Sve2);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd))
                         resultflags.AddInstructionSet(InstructionSet.ARM64_ArmBase);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_Aes))
@@ -595,6 +585,15 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve))
                         resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
+                    if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve2))
+                        resultflags.AddInstructionSet(InstructionSet.ARM64_Sve);
+                    break;
+
+                case TargetArchitecture.RiscV64:
+                    if (resultflags.HasInstructionSet(InstructionSet.RiscV64_Zbb))
+                        resultflags.AddInstructionSet(InstructionSet.RiscV64_RiscV64Base);
+                    if (resultflags.HasInstructionSet(InstructionSet.RiscV64_Zba))
+                        resultflags.AddInstructionSet(InstructionSet.RiscV64_RiscV64Base);
                     break;
 
                 case TargetArchitecture.X64:
@@ -602,14 +601,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Base_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE3_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3_X64))
@@ -626,6 +617,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE42_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT))
+                        resultflags.AddInstructionSet(InstructionSet.X64_POPCNT_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX_X64))
@@ -634,10 +629,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX2_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AES_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AES);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_BMI1))
                         resultflags.AddInstructionSet(InstructionSet.X64_BMI1_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_BMI1_X64))
@@ -654,68 +645,64 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X64_LZCNT_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_LZCNT_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_LZCNT);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT))
-                        resultflags.AddInstructionSet(InstructionSet.X64_POPCNT_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize))
-                        resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512v3))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512v3_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512v3_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512v3);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXIFMA))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXIFMA_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXIFMA_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXIFMA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI))
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
-                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_SHA))
+                        resultflags.AddInstructionSet(InstructionSet.X64_SHA_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_SHA_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_SHA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_WAITPKG))
+                        resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_WAITPKG_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize))
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSSE3))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE3);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE41))
@@ -725,9 +712,7 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                     if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX);
+                        resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_BMI1))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_BMI2))
@@ -735,59 +720,51 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                     if (resultflags.HasInstructionSet(InstructionSet.X64_FMA))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_LZCNT))
-                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_BMI1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_BMI2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_FMA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_LZCNT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_MOVBE);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_BMI1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_BMI2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X64_FMA);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X64_EVEX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_LZCNT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_MOVBE);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512v3))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512v3);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AES))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXIFMA))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize))
-                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE41);
+                        resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256))
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256))
@@ -795,56 +772,46 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512))
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
-                        resultflags.AddInstructionSet(InstructionSet.X64_EVEX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_SHA))
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_WAITPKG))
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize))
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE);
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_Vector512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
                     break;
 
                 case TargetArchitecture.X86:
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE))
-                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_SSE3))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE2);
+                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_SSSE3))
                         resultflags.AddInstructionSet(InstructionSet.X86_SSE3);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_SSE41))
@@ -854,9 +821,7 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                     if (resultflags.HasInstructionSet(InstructionSet.X86_POPCNT))
                         resultflags.AddInstructionSet(InstructionSet.X86_SSE42);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE42);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX);
+                        resultflags.AddInstructionSet(InstructionSet.X86_POPCNT);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_BMI1))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_BMI2))
@@ -864,59 +829,51 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                     if (resultflags.HasInstructionSet(InstructionSet.X86_FMA))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_LZCNT))
-                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_MOVBE))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE42);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_BMI1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_BMI2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_FMA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_LZCNT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_MOVBE);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_BMI1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_BMI2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X86_FMA);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X86_EVEX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_LZCNT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_MOVBE);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512v3))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512v3);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AES))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE2);
+                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256))
-                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VP2INTERSECT))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVXIFMA))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize))
-                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE41);
+                        resultflags.AddInstructionSet(InstructionSet.X86_SSE42);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256))
                         resultflags.AddInstructionSet(InstructionSet.X86_GFNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256))
@@ -924,47 +881,41 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                     if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512))
                         resultflags.AddInstructionSet(InstructionSet.X86_GFNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1))
-                        resultflags.AddInstructionSet(InstructionSet.X86_EVEX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_SHA))
+                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_WAITPKG))
+                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize))
+                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE);
+                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_Vector512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE2);
+                        resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
                     break;
                 }
             } while (!oldflags.Equals(resultflags));
@@ -1006,6 +957,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.ARM64_Sha256);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve_Arm64))
                         resultflags.AddInstructionSet(InstructionSet.ARM64_Sve);
+                    if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve2_Arm64))
+                        resultflags.AddInstructionSet(InstructionSet.ARM64_Sve2);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_ArmBase))
                         resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_ArmBase))
@@ -1028,15 +981,20 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.ARM64_VectorT128);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd))
                         resultflags.AddInstructionSet(InstructionSet.ARM64_Sve);
+                    if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve))
+                        resultflags.AddInstructionSet(InstructionSet.ARM64_Sve2);
+                    break;
+
+                case TargetArchitecture.RiscV64:
+                    if (resultflags.HasInstructionSet(InstructionSet.RiscV64_RiscV64Base))
+                        resultflags.AddInstructionSet(InstructionSet.RiscV64_Zbb);
+                    if (resultflags.HasInstructionSet(InstructionSet.RiscV64_RiscV64Base))
+                        resultflags.AddInstructionSet(InstructionSet.RiscV64_Zba);
                     break;
 
                 case TargetArchitecture.X64:
                     if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE3);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSSE3_X64))
@@ -1045,12 +1003,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE41);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AES);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_BMI1_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_BMI1);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_BMI2_X64))
@@ -1059,40 +1017,36 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X64_FMA);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_LZCNT_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_LZCNT);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512v3_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512v3);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64))
-                        resultflags.AddInstructionSet(InstructionSet.X64_GFNI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
-                        resultflags.AddInstructionSet(InstructionSet.X64_SSE3);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXIFMA_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXIFMA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_GFNI);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_SHA_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_SHA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_WAITPKG_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
+                        resultflags.AddInstructionSet(InstructionSet.X64_SSE3);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSSE3);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSSE3))
@@ -1101,69 +1055,59 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42))
                         resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X64_BMI1);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X64_BMI2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X64_FMA);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X64_LZCNT);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X64_MOVBE);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_BMI1))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_BMI2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_FMA))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_LZCNT))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
-                        resultflags.AddInstructionSet(InstructionSet.X64_EVEX);
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_BMI1))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_BMI2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_FMA))
-                        resultflags.AddInstructionSet(InstructionSet.X64_EVEX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_LZCNT))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512v3);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512v3))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X64_AES);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES))
                         resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXIFMA);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
-                        resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE41))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42))
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI))
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V256);
@@ -1171,56 +1115,46 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V256);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI))
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
+                        resultflags.AddInstructionSet(InstructionSet.X64_SHA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AES_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
+                        resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
+                        resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X64_Vector128);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X64_Vector256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X64_Vector512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X64_VectorT128);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
                         resultflags.AddInstructionSet(InstructionSet.X64_VectorT256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X64_VectorT512);
                     break;
 
                 case TargetArchitecture.X86:
                     if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE))
-                        resultflags.AddInstructionSet(InstructionSet.X86_SSE2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2))
                         resultflags.AddInstructionSet(InstructionSet.X86_SSE3);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_SSE3))
                         resultflags.AddInstructionSet(InstructionSet.X86_SSSE3);
@@ -1230,69 +1164,59 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X86_SSE42);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42))
                         resultflags.AddInstructionSet(InstructionSet.X86_POPCNT);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_POPCNT))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X86_BMI1);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X86_BMI2);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X86_FMA);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X86_LZCNT);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X86_MOVBE);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_BMI1))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_BMI2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_FMA))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_LZCNT))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_MOVBE))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
-                        resultflags.AddInstructionSet(InstructionSet.X86_EVEX);
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_BMI1))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_BMI2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_FMA))
-                        resultflags.AddInstructionSet(InstructionSet.X86_EVEX);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_LZCNT))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_MOVBE))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512v3);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512v3))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X86_AES);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES))
                         resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ))
-                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
-                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256))
-                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512VP2INTERSECT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVXIFMA);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
-                        resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE41))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42))
                         resultflags.AddInstructionSet(InstructionSet.X86_GFNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI))
                         resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V256);
@@ -1300,47 +1224,41 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V256);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI))
                         resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
-                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
+                        resultflags.AddInstructionSet(InstructionSet.X86_SHA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AES_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ))
+                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
+                        resultflags.AddInstructionSet(InstructionSet.X86_WAITPKG);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
+                        resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X86_Vector128);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
                         resultflags.AddInstructionSet(InstructionSet.X86_Vector256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X86_Vector512);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X86_VectorT128);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
                         resultflags.AddInstructionSet(InstructionSet.X86_VectorT256);
-                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
                         resultflags.AddInstructionSet(InstructionSet.X86_VectorT512);
                     break;
                 }
@@ -1351,16 +1269,16 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
 
         private static Dictionary<(string, TargetArchitecture), string> AllInstructionSetGroups { get; } = new()
             {
-                { ("x86-x64",    TargetArchitecture.X64),   "sse2" },
-                { ("x86-x64",    TargetArchitecture.X86),   "sse2" },
-                { ("x86-x64-v2", TargetArchitecture.X64),   "sse4.2 popcnt" },
-                { ("x86-x64-v2", TargetArchitecture.X86),   "sse4.2 popcnt" },
-                { ("x86-x64-v3", TargetArchitecture.X64),   "x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma" },
-                { ("x86-x64-v3", TargetArchitecture.X86),   "x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma" },
-                { ("skylake",    TargetArchitecture.X64),   "x86-x64-v3" },
-                { ("skylake",    TargetArchitecture.X86),   "x86-x64-v3" },
-                { ("x86-x64-v4", TargetArchitecture.X64),   "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl" },
-                { ("x86-x64-v4", TargetArchitecture.X86),   "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl" },
+                { ("x86-64",     TargetArchitecture.X64),   "base" },
+                { ("x86-64",     TargetArchitecture.X86),   "base" },
+                { ("x86-64-v2",  TargetArchitecture.X64),   "x86-64 sse4.2 popcnt" },
+                { ("x86-64-v2",  TargetArchitecture.X86),   "x86-64 sse4.2 popcnt" },
+                { ("x86-64-v3",  TargetArchitecture.X64),   "x86-64-v2 avx2 bmi bmi2 fma lzcnt movbe" },
+                { ("x86-64-v3",  TargetArchitecture.X86),   "x86-64-v2 avx2 bmi bmi2 fma lzcnt movbe" },
+                { ("skylake",    TargetArchitecture.X64),   "x86-64-v3" },
+                { ("skylake",    TargetArchitecture.X86),   "x86-64-v3" },
+                { ("x86-64-v4",  TargetArchitecture.X64),   "x86-64-v3 avx512" },
+                { ("x86-64-v4",  TargetArchitecture.X86),   "x86-64-v3 avx512" },
                 { ("armv8-a",    TargetArchitecture.ARM64), "neon" },
                 { ("armv8.1-a",  TargetArchitecture.ARM64), "armv8-a lse crc rdma" },
                 { ("armv8.2-a",  TargetArchitecture.ARM64), "armv8.1-a" },
@@ -1415,104 +1333,151 @@ public static IEnumerable ArchitectureToValidInstructionSets
                     yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.ARM64_VectorT128, true);
                     yield return new InstructionSetInfo("rcpc2", "", InstructionSet.ARM64_Rcpc2, true);
                     yield return new InstructionSetInfo("sve", "Sve", InstructionSet.ARM64_Sve, true);
+                    yield return new InstructionSetInfo("sve2", "Sve2", InstructionSet.ARM64_Sve2, true);
+                    break;
+
+                case TargetArchitecture.RiscV64:
+                    yield return new InstructionSetInfo("base", "RiscV64Base", InstructionSet.RiscV64_RiscV64Base, true);
+                    yield return new InstructionSetInfo("zba", "Zba", InstructionSet.RiscV64_Zba, true);
+                    yield return new InstructionSetInfo("zbb", "Zbb", InstructionSet.RiscV64_Zbb, true);
                     break;
 
                 case TargetArchitecture.X64:
                     yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X64_X86Base, true);
-                    yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X64_SSE, true);
-                    yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X64_SSE2, true);
+                    yield return new InstructionSetInfo("base", "Sse", InstructionSet.X64_X86Base, true);
+                    yield return new InstructionSetInfo("base", "Sse2", InstructionSet.X64_X86Base, true);
                     yield return new InstructionSetInfo("sse3", "Sse3", InstructionSet.X64_SSE3, true);
                     yield return new InstructionSetInfo("ssse3", "Ssse3", InstructionSet.X64_SSSE3, true);
                     yield return new InstructionSetInfo("sse4.1", "Sse41", InstructionSet.X64_SSE41, true);
                     yield return new InstructionSetInfo("sse4.2", "Sse42", InstructionSet.X64_SSE42, true);
+                    yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true);
                     yield return new InstructionSetInfo("avx", "Avx", InstructionSet.X64_AVX, true);
                     yield return new InstructionSetInfo("avx2", "Avx2", InstructionSet.X64_AVX2, true);
-                    yield return new InstructionSetInfo("aes", "Aes", InstructionSet.X64_AES, true);
                     yield return new InstructionSetInfo("bmi", "Bmi1", InstructionSet.X64_BMI1, true);
                     yield return new InstructionSetInfo("bmi2", "Bmi2", InstructionSet.X64_BMI2, true);
+                    yield return new InstructionSetInfo("avx2", "F16C", InstructionSet.X64_AVX2, true);
                     yield return new InstructionSetInfo("fma", "Fma", InstructionSet.X64_FMA, true);
                     yield return new InstructionSetInfo("lzcnt", "Lzcnt", InstructionSet.X64_LZCNT, true);
+                    yield return new InstructionSetInfo("movbe", "", InstructionSet.X64_MOVBE, true);
+                    yield return new InstructionSetInfo("avx512", "", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512F", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512F_VL", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512BW", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512BW_VL", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512CD", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512CD_VL", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512DQ", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512DQ_VL", InstructionSet.X64_AVX512, true);
+                    yield return new InstructionSetInfo("avx512v2", "", InstructionSet.X64_AVX512VBMI, true);
+                    yield return new InstructionSetInfo("avx512v2", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true);
+                    yield return new InstructionSetInfo("avx512v2", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Bitalg", InstructionSet.X64_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Bitalg_VL", InstructionSet.X64_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Vbmi2", InstructionSet.X64_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Vbmi2_VL", InstructionSet.X64_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "", InstructionSet.X64_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Vpopcntdq", InstructionSet.X64_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Vpopcntdq_VL", InstructionSet.X64_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx512Bf16", InstructionSet.X64_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx512Bf16_VL", InstructionSet.X64_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx512Fp16", InstructionSet.X64_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx512Fp16_VL", InstructionSet.X64_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X64_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx10v1_V512", InstructionSet.X64_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X64_AVX10v2, true);
+                    yield return new InstructionSetInfo("avx10v2", "Avx10v2_V512", InstructionSet.X64_AVX10v2, true);
+                    yield return new InstructionSetInfo("apx", "", InstructionSet.X64_APX, true);
+                    yield return new InstructionSetInfo("aes", "Aes", InstructionSet.X64_AES, true);
                     yield return new InstructionSetInfo("pclmul", "Pclmulqdq", InstructionSet.X64_PCLMULQDQ, true);
+                    yield return new InstructionSetInfo("avx512vp2intersect", "Avx512Vp2intersect", InstructionSet.X64_AVX512VP2INTERSECT, true);
+                    yield return new InstructionSetInfo("avx512vp2intersect", "Avx512Vp2intersect_VL", InstructionSet.X64_AVX512VP2INTERSECT, true);
+                    yield return new InstructionSetInfo("avxifma", "AvxIfma", InstructionSet.X64_AVXIFMA, true);
+                    yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true);
+                    yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X64_GFNI, true);
+                    yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X64_GFNI_V256, true);
+                    yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X64_GFNI_V512, true);
+                    yield return new InstructionSetInfo("sha", "Sha", InstructionSet.X64_SHA, true);
+                    yield return new InstructionSetInfo("vaes", "Aes_V256", InstructionSet.X64_AES_V256, true);
+                    yield return new InstructionSetInfo("vaes_v512", "Aes_V512", InstructionSet.X64_AES_V512, true);
                     yield return new InstructionSetInfo("vpclmul", "Pclmulqdq_V256", InstructionSet.X64_PCLMULQDQ_V256, true);
                     yield return new InstructionSetInfo("vpclmul_v512", "Pclmulqdq_V512", InstructionSet.X64_PCLMULQDQ_V512, true);
-                    yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true);
+                    yield return new InstructionSetInfo("waitpkg", "WaitPkg", InstructionSet.X64_WAITPKG, true);
+                    yield return new InstructionSetInfo("x86serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true);
                     yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false);
                     yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false);
                     yield return new InstructionSetInfo("Vector512", "", InstructionSet.X64_Vector512, false);
-                    yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true);
-                    yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X64_MOVBE, true);
-                    yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true);
-                    yield return new InstructionSetInfo("evex", "EVEX", InstructionSet.X64_EVEX, true);
-                    yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X64_AVX512F, true);
-                    yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X64_AVX512F_VL, true);
-                    yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X64_AVX512BW, true);
-                    yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X64_AVX512BW_VL, true);
-                    yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X64_AVX512CD, true);
-                    yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X64_AVX512CD_VL, true);
-                    yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X64_AVX512DQ, true);
-                    yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true);
-                    yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true);
-                    yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI_VL, true);
-                    yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X64_AVX10v1, true);
-                    yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X64_AVX10v1_V512, true);
-                    yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true);
-                    yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true);
-                    yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true);
-                    yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X64_APX, true);
-                    yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X64_AVX10v2, true);
-                    yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X64_AVX10v2_V512, true);
-                    yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X64_GFNI, true);
-                    yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X64_GFNI_V256, true);
-                    yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X64_GFNI_V512, true);
+                    yield return new InstructionSetInfo("vectort128", "", InstructionSet.X64_VectorT128, true);
+                    yield return new InstructionSetInfo("vectort256", "", InstructionSet.X64_VectorT256, true);
+                    yield return new InstructionSetInfo("vectort512", "", InstructionSet.X64_VectorT512, true);
                     break;
 
                 case TargetArchitecture.X86:
                     yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X86_X86Base, true);
-                    yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X86_SSE, true);
-                    yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X86_SSE2, true);
+                    yield return new InstructionSetInfo("base", "Sse", InstructionSet.X86_X86Base, true);
+                    yield return new InstructionSetInfo("base", "Sse2", InstructionSet.X86_X86Base, true);
                     yield return new InstructionSetInfo("sse3", "Sse3", InstructionSet.X86_SSE3, true);
                     yield return new InstructionSetInfo("ssse3", "Ssse3", InstructionSet.X86_SSSE3, true);
                     yield return new InstructionSetInfo("sse4.1", "Sse41", InstructionSet.X86_SSE41, true);
                     yield return new InstructionSetInfo("sse4.2", "Sse42", InstructionSet.X86_SSE42, true);
+                    yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true);
                     yield return new InstructionSetInfo("avx", "Avx", InstructionSet.X86_AVX, true);
                     yield return new InstructionSetInfo("avx2", "Avx2", InstructionSet.X86_AVX2, true);
-                    yield return new InstructionSetInfo("aes", "Aes", InstructionSet.X86_AES, true);
                     yield return new InstructionSetInfo("bmi", "Bmi1", InstructionSet.X86_BMI1, true);
                     yield return new InstructionSetInfo("bmi2", "Bmi2", InstructionSet.X86_BMI2, true);
+                    yield return new InstructionSetInfo("avx2", "F16C", InstructionSet.X86_AVX2, true);
                     yield return new InstructionSetInfo("fma", "Fma", InstructionSet.X86_FMA, true);
                     yield return new InstructionSetInfo("lzcnt", "Lzcnt", InstructionSet.X86_LZCNT, true);
+                    yield return new InstructionSetInfo("movbe", "", InstructionSet.X86_MOVBE, true);
+                    yield return new InstructionSetInfo("avx512", "", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512F", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512F_VL", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512BW", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512BW_VL", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512CD", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512CD_VL", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512DQ", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512", "Avx512DQ_VL", InstructionSet.X86_AVX512, true);
+                    yield return new InstructionSetInfo("avx512v2", "", InstructionSet.X86_AVX512VBMI, true);
+                    yield return new InstructionSetInfo("avx512v2", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true);
+                    yield return new InstructionSetInfo("avx512v2", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Bitalg", InstructionSet.X86_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Bitalg_VL", InstructionSet.X86_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Vbmi2", InstructionSet.X86_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Vbmi2_VL", InstructionSet.X86_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "", InstructionSet.X86_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Vpopcntdq", InstructionSet.X86_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx512v3", "Avx512Vpopcntdq_VL", InstructionSet.X86_AVX512v3, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx512Bf16", InstructionSet.X86_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx512Bf16_VL", InstructionSet.X86_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx512Fp16", InstructionSet.X86_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx512Fp16_VL", InstructionSet.X86_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X86_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx10v1_V512", InstructionSet.X86_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X86_AVX10v2, true);
+                    yield return new InstructionSetInfo("avx10v2", "Avx10v2_V512", InstructionSet.X86_AVX10v2, true);
+                    yield return new InstructionSetInfo("apx", "", InstructionSet.X86_APX, true);
+                    yield return new InstructionSetInfo("aes", "Aes", InstructionSet.X86_AES, true);
                     yield return new InstructionSetInfo("pclmul", "Pclmulqdq", InstructionSet.X86_PCLMULQDQ, true);
+                    yield return new InstructionSetInfo("avx512vp2intersect", "Avx512Vp2intersect", InstructionSet.X86_AVX512VP2INTERSECT, true);
+                    yield return new InstructionSetInfo("avx512vp2intersect", "Avx512Vp2intersect_VL", InstructionSet.X86_AVX512VP2INTERSECT, true);
+                    yield return new InstructionSetInfo("avxifma", "AvxIfma", InstructionSet.X86_AVXIFMA, true);
+                    yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true);
+                    yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X86_GFNI, true);
+                    yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X86_GFNI_V256, true);
+                    yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X86_GFNI_V512, true);
+                    yield return new InstructionSetInfo("sha", "Sha", InstructionSet.X86_SHA, true);
+                    yield return new InstructionSetInfo("vaes", "Aes_V256", InstructionSet.X86_AES_V256, true);
+                    yield return new InstructionSetInfo("vaes_v512", "Aes_V512", InstructionSet.X86_AES_V512, true);
                     yield return new InstructionSetInfo("vpclmul", "Pclmulqdq_V256", InstructionSet.X86_PCLMULQDQ_V256, true);
                     yield return new InstructionSetInfo("vpclmul_v512", "Pclmulqdq_V512", InstructionSet.X86_PCLMULQDQ_V512, true);
-                    yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true);
+                    yield return new InstructionSetInfo("waitpkg", "WaitPkg", InstructionSet.X86_WAITPKG, true);
+                    yield return new InstructionSetInfo("x86serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true);
                     yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false);
                     yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false);
                     yield return new InstructionSetInfo("Vector512", "", InstructionSet.X86_Vector512, false);
-                    yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true);
-                    yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X86_MOVBE, true);
-                    yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true);
-                    yield return new InstructionSetInfo("evex", "EVEX", InstructionSet.X86_EVEX, true);
-                    yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X86_AVX512F, true);
-                    yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X86_AVX512F_VL, true);
-                    yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X86_AVX512BW, true);
-                    yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X86_AVX512BW_VL, true);
-                    yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X86_AVX512CD, true);
-                    yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X86_AVX512CD_VL, true);
-                    yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X86_AVX512DQ, true);
-                    yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true);
-                    yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true);
-                    yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI_VL, true);
-                    yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X86_AVX10v1, true);
-                    yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X86_AVX10v1_V512, true);
-                    yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true);
-                    yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true);
-                    yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true);
-                    yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X86_APX, true);
-                    yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X86_AVX10v2, true);
-                    yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X86_AVX10v2_V512, true);
-                    yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X86_GFNI, true);
-                    yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X86_GFNI_V256, true);
-                    yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X86_GFNI_V512, true);
+                    yield return new InstructionSetInfo("vectort128", "", InstructionSet.X86_VectorT128, true);
+                    yield return new InstructionSetInfo("vectort256", "", InstructionSet.X86_VectorT256, true);
+                    yield return new InstructionSetInfo("vectort512", "", InstructionSet.X86_VectorT512, true);
                     break;
             }
         }
@@ -1541,15 +1506,16 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture)
                         AddInstructionSet(InstructionSet.ARM64_Sha256_Arm64);
                     if (HasInstructionSet(InstructionSet.ARM64_Sve))
                         AddInstructionSet(InstructionSet.ARM64_Sve_Arm64);
+                    if (HasInstructionSet(InstructionSet.ARM64_Sve2))
+                        AddInstructionSet(InstructionSet.ARM64_Sve2_Arm64);
+                    break;
+
+                case TargetArchitecture.RiscV64:
                     break;
 
                 case TargetArchitecture.X64:
                     if (HasInstructionSet(InstructionSet.X64_X86Base))
                         AddInstructionSet(InstructionSet.X64_X86Base_X64);
-                    if (HasInstructionSet(InstructionSet.X64_SSE))
-                        AddInstructionSet(InstructionSet.X64_SSE_X64);
-                    if (HasInstructionSet(InstructionSet.X64_SSE2))
-                        AddInstructionSet(InstructionSet.X64_SSE2_X64);
                     if (HasInstructionSet(InstructionSet.X64_SSE3))
                         AddInstructionSet(InstructionSet.X64_SSE3_X64);
                     if (HasInstructionSet(InstructionSet.X64_SSSE3))
@@ -1558,12 +1524,12 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture)
                         AddInstructionSet(InstructionSet.X64_SSE41_X64);
                     if (HasInstructionSet(InstructionSet.X64_SSE42))
                         AddInstructionSet(InstructionSet.X64_SSE42_X64);
+                    if (HasInstructionSet(InstructionSet.X64_POPCNT))
+                        AddInstructionSet(InstructionSet.X64_POPCNT_X64);
                     if (HasInstructionSet(InstructionSet.X64_AVX))
                         AddInstructionSet(InstructionSet.X64_AVX_X64);
                     if (HasInstructionSet(InstructionSet.X64_AVX2))
                         AddInstructionSet(InstructionSet.X64_AVX2_X64);
-                    if (HasInstructionSet(InstructionSet.X64_AES))
-                        AddInstructionSet(InstructionSet.X64_AES_X64);
                     if (HasInstructionSet(InstructionSet.X64_BMI1))
                         AddInstructionSet(InstructionSet.X64_BMI1_X64);
                     if (HasInstructionSet(InstructionSet.X64_BMI2))
@@ -1572,34 +1538,34 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture)
                         AddInstructionSet(InstructionSet.X64_FMA_X64);
                     if (HasInstructionSet(InstructionSet.X64_LZCNT))
                         AddInstructionSet(InstructionSet.X64_LZCNT_X64);
-                    if (HasInstructionSet(InstructionSet.X64_PCLMULQDQ))
-                        AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
-                    if (HasInstructionSet(InstructionSet.X64_POPCNT))
-                        AddInstructionSet(InstructionSet.X64_POPCNT_X64);
-                    if (HasInstructionSet(InstructionSet.X64_AVXVNNI))
-                        AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
-                    if (HasInstructionSet(InstructionSet.X64_X86Serialize))
-                        AddInstructionSet(InstructionSet.X64_X86Serialize_X64);
-                    if (HasInstructionSet(InstructionSet.X64_AVX512F))
-                        AddInstructionSet(InstructionSet.X64_AVX512F_X64);
-                    if (HasInstructionSet(InstructionSet.X64_AVX512BW))
-                        AddInstructionSet(InstructionSet.X64_AVX512BW_X64);
-                    if (HasInstructionSet(InstructionSet.X64_AVX512CD))
-                        AddInstructionSet(InstructionSet.X64_AVX512CD_X64);
-                    if (HasInstructionSet(InstructionSet.X64_AVX512DQ))
-                        AddInstructionSet(InstructionSet.X64_AVX512DQ_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVX512))
+                        AddInstructionSet(InstructionSet.X64_AVX512_X64);
                     if (HasInstructionSet(InstructionSet.X64_AVX512VBMI))
                         AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVX512v3))
+                        AddInstructionSet(InstructionSet.X64_AVX512v3_X64);
                     if (HasInstructionSet(InstructionSet.X64_AVX10v1))
                         AddInstructionSet(InstructionSet.X64_AVX10v1_X64);
-                    if (HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
-                        AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64);
                     if (HasInstructionSet(InstructionSet.X64_AVX10v2))
                         AddInstructionSet(InstructionSet.X64_AVX10v2_X64);
-                    if (HasInstructionSet(InstructionSet.X64_AVX10v2_V512))
-                        AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AES))
+                        AddInstructionSet(InstructionSet.X64_AES_X64);
+                    if (HasInstructionSet(InstructionSet.X64_PCLMULQDQ))
+                        AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT))
+                        AddInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVXIFMA))
+                        AddInstructionSet(InstructionSet.X64_AVXIFMA_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVXVNNI))
+                        AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
                     if (HasInstructionSet(InstructionSet.X64_GFNI))
                         AddInstructionSet(InstructionSet.X64_GFNI_X64);
+                    if (HasInstructionSet(InstructionSet.X64_SHA))
+                        AddInstructionSet(InstructionSet.X64_SHA_X64);
+                    if (HasInstructionSet(InstructionSet.X64_WAITPKG))
+                        AddInstructionSet(InstructionSet.X64_WAITPKG_X64);
+                    if (HasInstructionSet(InstructionSet.X64_X86Serialize))
+                        AddInstructionSet(InstructionSet.X64_X86Serialize_X64);
                     break;
 
                 case TargetArchitecture.X86:
@@ -1622,68 +1588,68 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc
                     AddInstructionSet(InstructionSet.ARM64_Sha1_Arm64);
                     AddInstructionSet(InstructionSet.ARM64_Sha256_Arm64);
                     AddInstructionSet(InstructionSet.ARM64_Sve_Arm64);
+                    AddInstructionSet(InstructionSet.ARM64_Sve2_Arm64);
+                    break;
+
+                case TargetArchitecture.RiscV64:
                     break;
 
                 case TargetArchitecture.X64:
                     AddInstructionSet(InstructionSet.X64_X86Base_X64);
-                    AddInstructionSet(InstructionSet.X64_SSE_X64);
-                    AddInstructionSet(InstructionSet.X64_SSE2_X64);
                     AddInstructionSet(InstructionSet.X64_SSE3_X64);
                     AddInstructionSet(InstructionSet.X64_SSSE3_X64);
                     AddInstructionSet(InstructionSet.X64_SSE41_X64);
                     AddInstructionSet(InstructionSet.X64_SSE42_X64);
+                    AddInstructionSet(InstructionSet.X64_POPCNT_X64);
                     AddInstructionSet(InstructionSet.X64_AVX_X64);
                     AddInstructionSet(InstructionSet.X64_AVX2_X64);
-                    AddInstructionSet(InstructionSet.X64_AES_X64);
                     AddInstructionSet(InstructionSet.X64_BMI1_X64);
                     AddInstructionSet(InstructionSet.X64_BMI2_X64);
                     AddInstructionSet(InstructionSet.X64_FMA_X64);
                     AddInstructionSet(InstructionSet.X64_LZCNT_X64);
-                    AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
-                    AddInstructionSet(InstructionSet.X64_POPCNT_X64);
-                    AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
-                    AddInstructionSet(InstructionSet.X64_X86Serialize_X64);
-                    AddInstructionSet(InstructionSet.X64_AVX512F_X64);
-                    AddInstructionSet(InstructionSet.X64_AVX512BW_X64);
-                    AddInstructionSet(InstructionSet.X64_AVX512CD_X64);
-                    AddInstructionSet(InstructionSet.X64_AVX512DQ_X64);
+                    AddInstructionSet(InstructionSet.X64_AVX512_X64);
                     AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64);
+                    AddInstructionSet(InstructionSet.X64_AVX512v3_X64);
                     AddInstructionSet(InstructionSet.X64_AVX10v1_X64);
-                    AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64);
                     AddInstructionSet(InstructionSet.X64_AVX10v2_X64);
-                    AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64);
+                    AddInstructionSet(InstructionSet.X64_AES_X64);
+                    AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
+                    AddInstructionSet(InstructionSet.X64_AVX512VP2INTERSECT_X64);
+                    AddInstructionSet(InstructionSet.X64_AVXIFMA_X64);
+                    AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
                     AddInstructionSet(InstructionSet.X64_GFNI_X64);
+                    AddInstructionSet(InstructionSet.X64_SHA_X64);
+                    AddInstructionSet(InstructionSet.X64_WAITPKG_X64);
+                    AddInstructionSet(InstructionSet.X64_X86Serialize_X64);
                     break;
 
                 case TargetArchitecture.X86:
                     AddInstructionSet(InstructionSet.X86_X86Base_X64);
-                    AddInstructionSet(InstructionSet.X86_SSE_X64);
-                    AddInstructionSet(InstructionSet.X86_SSE2_X64);
                     AddInstructionSet(InstructionSet.X86_SSE3_X64);
                     AddInstructionSet(InstructionSet.X86_SSSE3_X64);
                     AddInstructionSet(InstructionSet.X86_SSE41_X64);
                     AddInstructionSet(InstructionSet.X86_SSE42_X64);
+                    AddInstructionSet(InstructionSet.X86_POPCNT_X64);
                     AddInstructionSet(InstructionSet.X86_AVX_X64);
                     AddInstructionSet(InstructionSet.X86_AVX2_X64);
-                    AddInstructionSet(InstructionSet.X86_AES_X64);
                     AddInstructionSet(InstructionSet.X86_BMI1_X64);
                     AddInstructionSet(InstructionSet.X86_BMI2_X64);
                     AddInstructionSet(InstructionSet.X86_FMA_X64);
                     AddInstructionSet(InstructionSet.X86_LZCNT_X64);
-                    AddInstructionSet(InstructionSet.X86_PCLMULQDQ_X64);
-                    AddInstructionSet(InstructionSet.X86_POPCNT_X64);
-                    AddInstructionSet(InstructionSet.X86_AVXVNNI_X64);
-                    AddInstructionSet(InstructionSet.X86_X86Serialize_X64);
-                    AddInstructionSet(InstructionSet.X86_AVX512F_X64);
-                    AddInstructionSet(InstructionSet.X86_AVX512BW_X64);
-                    AddInstructionSet(InstructionSet.X86_AVX512CD_X64);
-                    AddInstructionSet(InstructionSet.X86_AVX512DQ_X64);
+                    AddInstructionSet(InstructionSet.X86_AVX512_X64);
                     AddInstructionSet(InstructionSet.X86_AVX512VBMI_X64);
+                    AddInstructionSet(InstructionSet.X86_AVX512v3_X64);
                     AddInstructionSet(InstructionSet.X86_AVX10v1_X64);
-                    AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64);
                     AddInstructionSet(InstructionSet.X86_AVX10v2_X64);
-                    AddInstructionSet(InstructionSet.X86_AVX10v2_V512_X64);
+                    AddInstructionSet(InstructionSet.X86_AES_X64);
+                    AddInstructionSet(InstructionSet.X86_PCLMULQDQ_X64);
+                    AddInstructionSet(InstructionSet.X86_AVX512VP2INTERSECT_X64);
+                    AddInstructionSet(InstructionSet.X86_AVXIFMA_X64);
+                    AddInstructionSet(InstructionSet.X86_AVXVNNI_X64);
                     AddInstructionSet(InstructionSet.X86_GFNI_X64);
+                    AddInstructionSet(InstructionSet.X86_SHA_X64);
+                    AddInstructionSet(InstructionSet.X86_WAITPKG_X64);
+                    AddInstructionSet(InstructionSet.X86_X86Serialize_X64);
                     break;
             }
         }
@@ -1792,6 +1758,28 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.ARM64_Sve; }
 
+                    case "Sve2":
+                        if (nestedTypeName == "Arm64")
+                        { return InstructionSet.ARM64_Sve2_Arm64; }
+                        else
+                        { return InstructionSet.ARM64_Sve2; }
+
+                }
+                break;
+
+                case TargetArchitecture.RiscV64:
+                switch (typeName)
+                {
+
+                    case "RiscV64Base":
+                        { return InstructionSet.RiscV64_RiscV64Base; }
+
+                    case "Zba":
+                        { return InstructionSet.RiscV64_Zba; }
+
+                    case "Zbb":
+                        { return InstructionSet.RiscV64_Zbb; }
+
                 }
                 break;
 
@@ -1807,15 +1795,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
 
                     case "Sse":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_SSE_X64; }
+                        { return InstructionSet.X64_X86Base_X64; }
                         else
-                        { return InstructionSet.X64_SSE; }
+                        { return InstructionSet.X64_X86Base; }
 
                     case "Sse2":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_SSE2_X64; }
+                        { return InstructionSet.X64_X86Base_X64; }
                         else
-                        { return InstructionSet.X64_SSE2; }
+                        { return InstructionSet.X64_X86Base; }
 
                     case "Sse3":
                         if (nestedTypeName == "X64")
@@ -1841,6 +1829,12 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X64_SSE42; }
 
+                    case "Popcnt":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_POPCNT_X64; }
+                        else
+                        { return InstructionSet.X64_POPCNT; }
+
                     case "Avx":
                         if (nestedTypeName == "X64")
                         { return InstructionSet.X64_AVX_X64; }
@@ -1853,12 +1847,6 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X64_AVX2; }
 
-                    case "Aes":
-                        if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_AES_X64; }
-                        else
-                        { return InstructionSet.X64_AES; }
-
                     case "Bmi1":
                         if (nestedTypeName == "X64")
                         { return InstructionSet.X64_BMI1_X64; }
@@ -1871,6 +1859,12 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X64_BMI2; }
 
+                    case "F16C":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AVX2_X64; }
+                        else
+                        { return InstructionSet.X64_AVX2; }
+
                     case "Fma":
                         if (nestedTypeName == "X64")
                         { return InstructionSet.X64_FMA_X64; }
@@ -1883,123 +1877,198 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X64_LZCNT; }
 
-                    case "Pclmulqdq":
+                    case "Avx512F":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_PCLMULQDQ_X64; }
+                        { return InstructionSet.X64_AVX512_X64; }
                         else
-                        if (nestedTypeName == "V256")
-                        { return InstructionSet.X64_PCLMULQDQ_V256; }
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512_X64; }
                         else
-                        if (nestedTypeName == "V512")
-                        { return InstructionSet.X64_PCLMULQDQ_V512; }
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X64_AVX512; }
                         else
-                        { return InstructionSet.X64_PCLMULQDQ; }
+                        { return InstructionSet.X64_AVX512; }
 
-                    case "Popcnt":
+                    case "Avx512BW":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_POPCNT_X64; }
+                        { return InstructionSet.X64_AVX512_X64; }
                         else
-                        { return InstructionSet.X64_POPCNT; }
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512_X64; }
+                        else
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X64_AVX512; }
+                        else
+                        { return InstructionSet.X64_AVX512; }
 
-                    case "AvxVnni":
+                    case "Avx512CD":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_AVXVNNI_X64; }
+                        { return InstructionSet.X64_AVX512_X64; }
                         else
-                        { return InstructionSet.X64_AVXVNNI; }
-
-                    case "Movbe":
-                        { return InstructionSet.X64_MOVBE; }
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512_X64; }
+                        else
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X64_AVX512; }
+                        else
+                        { return InstructionSet.X64_AVX512; }
 
-                    case "X86Serialize":
+                    case "Avx512DQ":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_X86Serialize_X64; }
+                        { return InstructionSet.X64_AVX512_X64; }
                         else
-                        { return InstructionSet.X64_X86Serialize; }
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512_X64; }
+                        else
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X64_AVX512; }
+                        else
+                        { return InstructionSet.X64_AVX512; }
 
-                    case "EVEX":
-                        { return InstructionSet.X64_EVEX; }
+                    case "Avx512Vbmi":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AVX512VBMI_X64; }
+                        else
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512VBMI_X64; }
+                        else
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X64_AVX512VBMI; }
+                        else
+                        { return InstructionSet.X64_AVX512VBMI; }
 
-                    case "Avx512F":
+                    case "Avx512Bitalg":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_AVX512F_X64; }
+                        { return InstructionSet.X64_AVX512v3_X64; }
+                        else
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512v3_X64; }
                         else
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X64_AVX512F_VL; }
+                        { return InstructionSet.X64_AVX512v3; }
                         else
-                        { return InstructionSet.X64_AVX512F; }
+                        { return InstructionSet.X64_AVX512v3; }
 
-                    case "Avx512BW":
+                    case "Avx512Vbmi2":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_AVX512BW_X64; }
+                        { return InstructionSet.X64_AVX512v3_X64; }
+                        else
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512v3_X64; }
                         else
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X64_AVX512BW_VL; }
+                        { return InstructionSet.X64_AVX512v3; }
                         else
-                        { return InstructionSet.X64_AVX512BW; }
+                        { return InstructionSet.X64_AVX512v3; }
 
-                    case "Avx512CD":
+                    case "Avx512Vpopcntdq":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_AVX512CD_X64; }
+                        { return InstructionSet.X64_AVX512v3_X64; }
+                        else
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512v3_X64; }
                         else
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X64_AVX512CD_VL; }
+                        { return InstructionSet.X64_AVX512v3; }
                         else
-                        { return InstructionSet.X64_AVX512CD; }
+                        { return InstructionSet.X64_AVX512v3; }
 
-                    case "Avx512DQ":
+                    case "Avx512Bf16":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_AVX512DQ_X64; }
+                        { return InstructionSet.X64_AVX10v1_X64; }
+                        else
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX10v1_X64; }
                         else
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X64_AVX512DQ_VL; }
+                        { return InstructionSet.X64_AVX10v1; }
                         else
-                        { return InstructionSet.X64_AVX512DQ; }
+                        { return InstructionSet.X64_AVX10v1; }
 
-                    case "Avx512Vbmi":
+                    case "Avx512Fp16":
                         if (nestedTypeName == "X64")
-                        { return InstructionSet.X64_AVX512VBMI_X64; }
+                        { return InstructionSet.X64_AVX10v1_X64; }
+                        else
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX10v1_X64; }
                         else
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X64_AVX512VBMI_VL; }
+                        { return InstructionSet.X64_AVX10v1; }
                         else
-                        { return InstructionSet.X64_AVX512VBMI; }
+                        { return InstructionSet.X64_AVX10v1; }
 
                     case "Avx10v1":
                         if (nestedTypeName == "X64")
                         { return InstructionSet.X64_AVX10v1_X64; }
                         else
                         if (nestedTypeName == "V512_X64")
-                        { return InstructionSet.X64_AVX10v1_V512_X64; }
+                        { return InstructionSet.X64_AVX10v1_X64; }
                         else
                         if (nestedTypeName == "V512")
-                        { return InstructionSet.X64_AVX10v1_V512; }
+                        { return InstructionSet.X64_AVX10v1; }
                         else
                         { return InstructionSet.X64_AVX10v1; }
 
-                    case "VectorT128":
-                        { return InstructionSet.X64_VectorT128; }
-
-                    case "VectorT256":
-                        { return InstructionSet.X64_VectorT256; }
-
-                    case "VectorT512":
-                        { return InstructionSet.X64_VectorT512; }
-
-                    case "Apx":
-                        { return InstructionSet.X64_APX; }
-
                     case "Avx10v2":
                         if (nestedTypeName == "X64")
                         { return InstructionSet.X64_AVX10v2_X64; }
                         else
                         if (nestedTypeName == "V512_X64")
-                        { return InstructionSet.X64_AVX10v2_V512_X64; }
+                        { return InstructionSet.X64_AVX10v2_X64; }
                         else
                         if (nestedTypeName == "V512")
-                        { return InstructionSet.X64_AVX10v2_V512; }
+                        { return InstructionSet.X64_AVX10v2; }
                         else
                         { return InstructionSet.X64_AVX10v2; }
 
+                    case "Aes":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AES_X64; }
+                        else
+                        if (nestedTypeName == "V256")
+                        { return InstructionSet.X64_AES_V256; }
+                        else
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X64_AES_V512; }
+                        else
+                        { return InstructionSet.X64_AES; }
+
+                    case "Pclmulqdq":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_PCLMULQDQ_X64; }
+                        else
+                        if (nestedTypeName == "V256")
+                        { return InstructionSet.X64_PCLMULQDQ_V256; }
+                        else
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X64_PCLMULQDQ_V512; }
+                        else
+                        { return InstructionSet.X64_PCLMULQDQ; }
+
+                    case "Avx512Vp2intersect":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AVX512VP2INTERSECT_X64; }
+                        else
+                        if (nestedTypeName == "VL_X64")
+                        { return InstructionSet.X64_AVX512VP2INTERSECT_X64; }
+                        else
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X64_AVX512VP2INTERSECT; }
+                        else
+                        { return InstructionSet.X64_AVX512VP2INTERSECT; }
+
+                    case "AvxIfma":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AVXIFMA_X64; }
+                        else
+                        { return InstructionSet.X64_AVXIFMA; }
+
+                    case "AvxVnni":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AVXVNNI_X64; }
+                        else
+                        { return InstructionSet.X64_AVXVNNI; }
+
                     case "Gfni":
                         if (nestedTypeName == "X64")
                         { return InstructionSet.X64_GFNI_X64; }
@@ -2012,6 +2081,24 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X64_GFNI; }
 
+                    case "Sha":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_SHA_X64; }
+                        else
+                        { return InstructionSet.X64_SHA; }
+
+                    case "WaitPkg":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_WAITPKG_X64; }
+                        else
+                        { return InstructionSet.X64_WAITPKG; }
+
+                    case "X86Serialize":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_X86Serialize_X64; }
+                        else
+                        { return InstructionSet.X64_X86Serialize; }
+
                 }
                 break;
 
@@ -2023,10 +2110,10 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         { return InstructionSet.X86_X86Base; }
 
                     case "Sse":
-                        { return InstructionSet.X86_SSE; }
+                        { return InstructionSet.X86_X86Base; }
 
                     case "Sse2":
-                        { return InstructionSet.X86_SSE2; }
+                        { return InstructionSet.X86_X86Base; }
 
                     case "Sse3":
                         { return InstructionSet.X86_SSE3; }
@@ -2040,105 +2127,132 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                     case "Sse42":
                         { return InstructionSet.X86_SSE42; }
 
+                    case "Popcnt":
+                        { return InstructionSet.X86_POPCNT; }
+
                     case "Avx":
                         { return InstructionSet.X86_AVX; }
 
                     case "Avx2":
                         { return InstructionSet.X86_AVX2; }
 
-                    case "Aes":
-                        { return InstructionSet.X86_AES; }
-
                     case "Bmi1":
                         { return InstructionSet.X86_BMI1; }
 
                     case "Bmi2":
                         { return InstructionSet.X86_BMI2; }
 
+                    case "F16C":
+                        { return InstructionSet.X86_AVX2; }
+
                     case "Fma":
                         { return InstructionSet.X86_FMA; }
 
                     case "Lzcnt":
                         { return InstructionSet.X86_LZCNT; }
 
-                    case "Pclmulqdq":
-                        if (nestedTypeName == "V256")
-                        { return InstructionSet.X86_PCLMULQDQ_V256; }
-                        else
-                        if (nestedTypeName == "V512")
-                        { return InstructionSet.X86_PCLMULQDQ_V512; }
-                        else
-                        { return InstructionSet.X86_PCLMULQDQ; }
-
-                    case "Popcnt":
-                        { return InstructionSet.X86_POPCNT; }
-
-                    case "AvxVnni":
-                        { return InstructionSet.X86_AVXVNNI; }
-
-                    case "Movbe":
-                        { return InstructionSet.X86_MOVBE; }
-
-                    case "X86Serialize":
-                        { return InstructionSet.X86_X86Serialize; }
-
-                    case "EVEX":
-                        { return InstructionSet.X86_EVEX; }
-
                     case "Avx512F":
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X86_AVX512F_VL; }
+                        { return InstructionSet.X86_AVX512; }
                         else
-                        { return InstructionSet.X86_AVX512F; }
+                        { return InstructionSet.X86_AVX512; }
 
                     case "Avx512BW":
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X86_AVX512BW_VL; }
+                        { return InstructionSet.X86_AVX512; }
                         else
-                        { return InstructionSet.X86_AVX512BW; }
+                        { return InstructionSet.X86_AVX512; }
 
                     case "Avx512CD":
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X86_AVX512CD_VL; }
+                        { return InstructionSet.X86_AVX512; }
                         else
-                        { return InstructionSet.X86_AVX512CD; }
+                        { return InstructionSet.X86_AVX512; }
 
                     case "Avx512DQ":
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X86_AVX512DQ_VL; }
+                        { return InstructionSet.X86_AVX512; }
                         else
-                        { return InstructionSet.X86_AVX512DQ; }
+                        { return InstructionSet.X86_AVX512; }
 
                     case "Avx512Vbmi":
                         if (nestedTypeName == "VL")
-                        { return InstructionSet.X86_AVX512VBMI_VL; }
+                        { return InstructionSet.X86_AVX512VBMI; }
                         else
                         { return InstructionSet.X86_AVX512VBMI; }
 
-                    case "Avx10v1":
-                        if (nestedTypeName == "V512")
-                        { return InstructionSet.X86_AVX10v1_V512; }
+                    case "Avx512Bitalg":
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X86_AVX512v3; }
                         else
-                        { return InstructionSet.X86_AVX10v1; }
+                        { return InstructionSet.X86_AVX512v3; }
 
-                    case "VectorT128":
-                        { return InstructionSet.X86_VectorT128; }
+                    case "Avx512Vbmi2":
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X86_AVX512v3; }
+                        else
+                        { return InstructionSet.X86_AVX512v3; }
 
-                    case "VectorT256":
-                        { return InstructionSet.X86_VectorT256; }
+                    case "Avx512Vpopcntdq":
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X86_AVX512v3; }
+                        else
+                        { return InstructionSet.X86_AVX512v3; }
+
+                    case "Avx512Bf16":
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X86_AVX10v1; }
+                        else
+                        { return InstructionSet.X86_AVX10v1; }
 
-                    case "VectorT512":
-                        { return InstructionSet.X86_VectorT512; }
+                    case "Avx512Fp16":
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X86_AVX10v1; }
+                        else
+                        { return InstructionSet.X86_AVX10v1; }
 
-                    case "Apx":
-                        { return InstructionSet.X86_APX; }
+                    case "Avx10v1":
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X86_AVX10v1; }
+                        else
+                        { return InstructionSet.X86_AVX10v1; }
 
                     case "Avx10v2":
                         if (nestedTypeName == "V512")
-                        { return InstructionSet.X86_AVX10v2_V512; }
+                        { return InstructionSet.X86_AVX10v2; }
                         else
                         { return InstructionSet.X86_AVX10v2; }
 
+                    case "Aes":
+                        if (nestedTypeName == "V256")
+                        { return InstructionSet.X86_AES_V256; }
+                        else
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X86_AES_V512; }
+                        else
+                        { return InstructionSet.X86_AES; }
+
+                    case "Pclmulqdq":
+                        if (nestedTypeName == "V256")
+                        { return InstructionSet.X86_PCLMULQDQ_V256; }
+                        else
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X86_PCLMULQDQ_V512; }
+                        else
+                        { return InstructionSet.X86_PCLMULQDQ; }
+
+                    case "Avx512Vp2intersect":
+                        if (nestedTypeName == "VL")
+                        { return InstructionSet.X86_AVX512VP2INTERSECT; }
+                        else
+                        { return InstructionSet.X86_AVX512VP2INTERSECT; }
+
+                    case "AvxIfma":
+                        { return InstructionSet.X86_AVXIFMA; }
+
+                    case "AvxVnni":
+                        { return InstructionSet.X86_AVXVNNI; }
+
                     case "Gfni":
                         if (nestedTypeName == "V256")
                         { return InstructionSet.X86_GFNI_V256; }
@@ -2148,6 +2262,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X86_GFNI; }
 
+                    case "Sha":
+                        { return InstructionSet.X86_SHA; }
+
+                    case "WaitPkg":
+                        { return InstructionSet.X86_WAITPKG; }
+
+                    case "X86Serialize":
+                        { return InstructionSet.X86_X86Serialize; }
+
                 }
                 break;
 
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
index 08d7df78c946..9e75ee782fab 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
@@ -620,8 +620,7 @@ public enum CORINFO_EH_CLAUSE_FLAGS
         CORINFO_EH_CLAUSE_FILTER = 0x0001, // If this bit is on, then this EH entry is for a filter
         CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause
         CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause
-        CORINFO_EH_CLAUSE_DUPLICATED = 0x0008, // Duplicated clause. This clause was duplicated to a funclet which was pulled out of line
-        CORINFO_EH_CLAUSE_SAMETRY = 0x0010, // This clause covers same try block as the previous one. (Used by NativeAOT ABI.)
+        CORINFO_EH_CLAUSE_SAMETRY = 0x0010, // This clause covers same try block as the previous one.
     };
 
     public struct CORINFO_EH_CLAUSE
@@ -873,6 +872,18 @@ public struct InlinedCallFrameInfo
         public CORINFO_OS osType;
     }
 
+    public unsafe struct CORINFO_ASYNC_INFO
+    {
+        // Class handle for System.Runtime.CompilerServices.Continuation
+        public CORINFO_CLASS_STRUCT_* continuationClsHnd;
+        // 'Next' field
+        public CORINFO_FIELD_STRUCT_* continuationNextFldHnd;
+        // 'Data' field
+        public CORINFO_FIELD_STRUCT_* continuationDataFldHnd;
+        // 'GCData' field
+        public CORINFO_FIELD_STRUCT_* continuationGCDataFldHnd;
+    }
+
     // Flags passed from JIT to runtime.
     public enum CORINFO_GET_TAILCALL_HELPERS_FLAGS
     {
@@ -1381,10 +1392,10 @@ public enum CorJitFlag : uint
         CORJIT_FLAG_ALT_JIT                 = 8, // JIT should consider itself an ALT_JIT
         CORJIT_FLAG_FROZEN_ALLOC_ALLOWED    = 9, // JIT is allowed to use *_MAYBEFROZEN allocators
         // CORJIT_FLAG_UNUSED               = 10,
-        CORJIT_FLAG_READYTORUN              = 11, // Use version-resilient code generation
+        CORJIT_FLAG_AOT                     = 11, // Do ahead-of-time code generation (ReadyToRun or NativeAOT)
         CORJIT_FLAG_PROF_ENTERLEAVE         = 12, // Instrument prologues/epilogues
         CORJIT_FLAG_PROF_NO_PINVOKE_INLINE  = 13, // Disables PInvoke inlining
-        CORJIT_FLAG_PREJIT                  = 14, // prejit is the execution engine.
+        // CORJIT_FLAG_UNUSED               = 14,
         CORJIT_FLAG_RELOC                   = 15, // Generate relocatable code
         CORJIT_FLAG_IL_STUB                 = 16, // method is an IL stub
         CORJIT_FLAG_PROCSPLIT               = 17, // JIT should separate code into hot and cold sections
@@ -1403,9 +1414,6 @@ public enum CorJitFlag : uint
         // ARM only
         CORJIT_FLAG_RELATIVE_CODE_RELOCS    = 29, // JIT should generate PC-relative address computations instead of EE relocation records
         CORJIT_FLAG_SOFTFP_ABI              = 30, // Enable armel calling convention
-
-        // x86/x64 only
-        CORJIT_FLAG_VECTOR512_THROTTLING    = 31, // On x86/x64, 512-bit vector usage may incur CPU frequency throttling
     }
 
     public struct CORJIT_FLAGS
diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
index d206c3056f81..f7ce50804daa 100644
--- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
+++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
@@ -23,174 +23,210 @@
 ; DO NOT CHANGE R2R NUMERIC VALUES OF THE EXISTING SETS. Changing R2R numeric values definitions would be R2R format breaking change.
 
 ; The ISA definitions should also be mapped to `hwintrinsicIsaRangeArray` in hwintrinsic.cpp.
+; NEXT_AVAILABLE_R2R_BIT = 82
 
 ; Definition of X86 instruction sets
 definearch         ,X86   ,32Bit                ,X64, X64
 
-instructionset     ,X86   ,X86Base              ,        ,22 ,X86Base               ,base
-instructionset     ,X86   ,Sse                  ,        ,1  ,SSE                   ,sse
-instructionset     ,X86   ,Sse2                 ,        ,2  ,SSE2                  ,sse2
-instructionset     ,X86   ,Sse3                 ,        ,3  ,SSE3                  ,sse3
-instructionset     ,X86   ,Ssse3                ,        ,4  ,SSSE3                 ,ssse3
-instructionset     ,X86   ,Sse41                ,        ,5  ,SSE41                 ,sse4.1
-instructionset     ,X86   ,Sse42                ,        ,6  ,SSE42                 ,sse4.2
-instructionset     ,X86   ,Avx                  ,        ,7  ,AVX                   ,avx
-instructionset     ,X86   ,Avx2                 ,        ,8  ,AVX2                  ,avx2
-instructionset     ,X86   ,Aes                  ,        ,9  ,AES                   ,aes
-instructionset     ,X86   ,Bmi1                 ,        ,10 ,BMI1                  ,bmi
-instructionset     ,X86   ,Bmi2                 ,        ,11 ,BMI2                  ,bmi2
-instructionset     ,X86   ,Fma                  ,        ,12 ,FMA                   ,fma
-instructionset     ,X86   ,Lzcnt                ,        ,13 ,LZCNT                 ,lzcnt
-instructionset     ,X86   ,Pclmulqdq            ,        ,14 ,PCLMULQDQ             ,pclmul
-instructionset     ,X86   ,Pclmulqdq_V256       ,        ,49 ,PCLMULQDQ_V256        ,vpclmul
-instructionset     ,X86   ,Pclmulqdq_V512       ,        ,50 ,PCLMULQDQ_V512        ,vpclmul_v512
-instructionset     ,X86   ,Popcnt               ,        ,15 ,POPCNT                ,popcnt
-instructionset     ,X86   ,                     ,        ,   ,Vector128             ,
-instructionset     ,X86   ,                     ,        ,   ,Vector256             ,
-instructionset     ,X86   ,                     ,        ,   ,Vector512             ,
-instructionset     ,X86   ,AvxVnni              ,        ,25 ,AVXVNNI               ,avxvnni
-instructionset     ,X86   ,Movbe                ,        ,27 ,MOVBE                 ,movbe
-instructionset     ,X86   ,X86Serialize         ,        ,28 ,X86Serialize          ,serialize
-instructionset     ,X86   ,EVEX                 ,        ,47 ,EVEX                  ,evex
-instructionset     ,X86   ,Avx512F              ,        ,29 ,AVX512F               ,avx512f
-instructionset     ,X86   ,Avx512F_VL           ,        ,30 ,AVX512F_VL            ,avx512f_vl
-instructionset     ,X86   ,Avx512BW             ,        ,31 ,AVX512BW              ,avx512bw
-instructionset     ,X86   ,Avx512BW_VL          ,        ,32 ,AVX512BW_VL           ,avx512bw_vl
-instructionset     ,X86   ,Avx512CD             ,        ,33 ,AVX512CD              ,avx512cd
-instructionset     ,X86   ,Avx512CD_VL          ,        ,34 ,AVX512CD_VL           ,avx512cd_vl
-instructionset     ,X86   ,Avx512DQ             ,        ,35 ,AVX512DQ              ,avx512dq
-instructionset     ,X86   ,Avx512DQ_VL          ,        ,36 ,AVX512DQ_VL           ,avx512dq_vl
-instructionset     ,X86   ,Avx512Vbmi           ,        ,37 ,AVX512VBMI            ,avx512vbmi
-instructionset     ,X86   ,Avx512Vbmi_VL        ,        ,38 ,AVX512VBMI_VL         ,avx512vbmi_vl
-instructionset     ,X86   ,Avx10v1              ,        ,44 ,AVX10v1               ,avx10v1
-instructionset     ,X86   ,Avx10v1_V512         ,        ,46 ,AVX10v1_V512          ,avx10v1_v512
-instructionset     ,X86   ,VectorT128           ,        ,39 ,VectorT128            ,vectort128
-instructionset     ,X86   ,VectorT256           ,        ,40 ,VectorT256            ,vectort256
-instructionset     ,X86   ,VectorT512           ,        ,41 ,VectorT512            ,vectort512
-instructionset     ,X86   ,Apx                  ,        ,48 ,APX                   ,apx
-instructionset     ,X86   ,Avx10v2              ,        ,51 ,AVX10v2               ,avx10v2
-instructionset     ,X86   ,Avx10v2_V512         ,        ,52 ,AVX10v2_V512          ,avx10v2_v512
-instructionset     ,X86   ,Gfni                 ,        ,53 ,GFNI                  ,gfni
-instructionset     ,X86   ,Gfni_V256            ,        ,54 ,GFNI_V256             ,gfni_v256
-instructionset     ,X86   ,Gfni_V512            ,        ,55 ,GFNI_V512             ,gfni_v512
+instructionset     ,X86   ,X86Base              ,                   ,22 ,X86Base               ,base
+instructionset     ,X86   ,Sse                  ,                   ,1  ,X86Base               ,base
+instructionset     ,X86   ,Sse2                 ,                   ,2  ,X86Base               ,base
+
+instructionset     ,X86   ,Sse3                 ,                   ,3  ,SSE3                  ,sse3
+instructionset     ,X86   ,Ssse3                ,                   ,4  ,SSSE3                 ,ssse3
+instructionset     ,X86   ,Sse41                ,                   ,5  ,SSE41                 ,sse4.1
+instructionset     ,X86   ,Sse42                ,                   ,6  ,SSE42                 ,sse4.2
+instructionset     ,X86   ,Popcnt               ,                   ,15 ,POPCNT                ,popcnt
+
+instructionset     ,X86   ,Avx                  ,                   ,7  ,AVX                   ,avx
+
+instructionset     ,X86   ,Avx2                 ,                   ,8  ,AVX2                  ,avx2
+instructionset     ,X86   ,Bmi1                 ,                   ,10 ,BMI1                  ,bmi
+instructionset     ,X86   ,Bmi2                 ,                   ,11 ,BMI2                  ,bmi2
+instructionset     ,X86   ,F16C                 ,                   ,67 ,AVX2                  ,avx2
+instructionset     ,X86   ,Fma                  ,                   ,12 ,FMA                   ,fma
+instructionset     ,X86   ,Lzcnt                ,                   ,13 ,LZCNT                 ,lzcnt
+instructionset     ,X86   ,                     ,Movbe              ,27 ,MOVBE                 ,movbe
+
+instructionset     ,X86   ,                     ,Evex               ,47 ,AVX512                ,avx512
+instructionset     ,X86   ,Avx512F              ,                   ,29 ,AVX512                ,avx512
+instructionset     ,X86   ,Avx512F_VL           ,                   ,30 ,AVX512                ,avx512
+instructionset     ,X86   ,Avx512BW             ,                   ,31 ,AVX512                ,avx512
+instructionset     ,X86   ,Avx512BW_VL          ,                   ,32 ,AVX512                ,avx512
+instructionset     ,X86   ,Avx512CD             ,                   ,33 ,AVX512                ,avx512
+instructionset     ,X86   ,Avx512CD_VL          ,                   ,34 ,AVX512                ,avx512
+instructionset     ,X86   ,Avx512DQ             ,                   ,35 ,AVX512                ,avx512
+instructionset     ,X86   ,Avx512DQ_VL          ,                   ,36 ,AVX512                ,avx512
+
+instructionset     ,X86   ,                     ,Avx512Ifma         ,76 ,AVX512VBMI            ,avx512v2
+instructionset     ,X86   ,Avx512Vbmi           ,                   ,37 ,AVX512VBMI            ,avx512v2
+instructionset     ,X86   ,Avx512Vbmi_VL        ,                   ,38 ,AVX512VBMI            ,avx512v2
+
+instructionset     ,X86   ,Avx512Bitalg         ,                   ,70 ,AVX512v3              ,avx512v3
+instructionset     ,X86   ,Avx512Bitalg_VL      ,                   ,71 ,AVX512v3              ,avx512v3
+instructionset     ,X86   ,Avx512Vbmi2          ,                   ,77 ,AVX512v3              ,avx512v3
+instructionset     ,X86   ,Avx512Vbmi2_VL       ,                   ,78 ,AVX512v3              ,avx512v3
+instructionset     ,X86   ,                     ,Avx512Vnni         ,79 ,AVX512v3              ,avx512v3
+instructionset     ,X86   ,Avx512Vpopcntdq      ,                   ,82 ,AVX512v3              ,avx512v3
+instructionset     ,X86   ,Avx512Vpopcntdq_VL   ,                   ,83 ,AVX512v3              ,avx512v3
+
+instructionset     ,X86   ,Avx512Bf16           ,                   ,72 ,AVX10v1               ,avx10v1
+instructionset     ,X86   ,Avx512Bf16_VL        ,                   ,73 ,AVX10v1               ,avx10v1
+instructionset     ,X86   ,Avx512Fp16           ,                   ,74 ,AVX10v1               ,avx10v1
+instructionset     ,X86   ,Avx512Fp16_VL        ,                   ,75 ,AVX10v1               ,avx10v1
+instructionset     ,X86   ,Avx10v1              ,                   ,44 ,AVX10v1               ,avx10v1
+instructionset     ,X86   ,Avx10v1_V512         ,                   ,46 ,AVX10v1               ,avx10v1
+
+instructionset     ,X86   ,Avx10v2              ,                   ,51 ,AVX10v2               ,avx10v2
+instructionset     ,X86   ,Avx10v2_V512         ,                   ,52 ,AVX10v2               ,avx10v2
+
+instructionset     ,X86   ,                     ,Apx                ,48 ,APX                   ,apx
+
+instructionset     ,X86   ,Aes                  ,                   ,9  ,AES                   ,aes
+instructionset     ,X86   ,Pclmulqdq            ,                   ,14 ,PCLMULQDQ             ,pclmul
+
+instructionset     ,X86   ,Avx512Vp2intersect   ,                   ,80 ,AVX512VP2INTERSECT    ,avx512vp2intersect
+instructionset     ,X86   ,Avx512Vp2intersect_VL,                   ,81 ,AVX512VP2INTERSECT    ,avx512vp2intersect
+
+instructionset     ,X86   ,AvxIfma              ,                   ,66 ,AVXIFMA               ,avxifma
+instructionset     ,X86   ,AvxVnni              ,                   ,25 ,AVXVNNI               ,avxvnni
+
+instructionset     ,X86   ,Gfni                 ,                   ,53 ,GFNI                  ,gfni
+instructionset     ,X86   ,Gfni_V256            ,                   ,54 ,GFNI_V256             ,gfni_v256
+instructionset     ,X86   ,Gfni_V512            ,                   ,55 ,GFNI_V512             ,gfni_v512
+
+instructionset     ,X86   ,Sha                  ,                   ,68 ,SHA                   ,sha
+
+instructionset     ,X86   ,Aes_V256             ,                   ,64 ,AES_V256              ,vaes
+instructionset     ,X86   ,Aes_V512             ,                   ,65 ,AES_V512              ,vaes_v512
+instructionset     ,X86   ,Pclmulqdq_V256       ,                   ,49 ,PCLMULQDQ_V256        ,vpclmul
+instructionset     ,X86   ,Pclmulqdq_V512       ,                   ,50 ,PCLMULQDQ_V512        ,vpclmul_v512
+
+instructionset     ,X86   ,WaitPkg              ,                   ,69 ,WAITPKG               ,waitpkg
+instructionset     ,X86   ,X86Serialize         ,                   ,28 ,X86Serialize          ,x86serialize
+
+instructionset     ,X86   ,                     ,                   ,   ,Vector128             ,
+instructionset     ,X86   ,                     ,                   ,   ,Vector256             ,
+instructionset     ,X86   ,                     ,                   ,   ,Vector512             ,
+
+instructionset     ,X86   ,                     ,VectorT128         ,39 ,VectorT128            ,vectort128
+instructionset     ,X86   ,                     ,VectorT256         ,40 ,VectorT256            ,vectort256
+instructionset     ,X86   ,                     ,VectorT512         ,41 ,VectorT512            ,vectort512
+
+; 64-bit Instruction Sets
 
 instructionset64bit,X86   ,X86Base
-instructionset64bit,X86   ,SSE
-instructionset64bit,X86   ,SSE2
+
 instructionset64bit,X86   ,SSE3
 instructionset64bit,X86   ,SSSE3
 instructionset64bit,X86   ,SSE41
 instructionset64bit,X86   ,SSE42
+instructionset64bit,X86   ,POPCNT
+
 instructionset64bit,X86   ,AVX
+
 instructionset64bit,X86   ,AVX2
-instructionset64bit,X86   ,AES
 instructionset64bit,X86   ,BMI1
 instructionset64bit,X86   ,BMI2
 instructionset64bit,X86   ,FMA
 instructionset64bit,X86   ,LZCNT
-instructionset64bit,X86   ,PCLMULQDQ
-instructionset64bit,X86   ,POPCNT
-instructionset64bit,X86   ,AVXVNNI
-instructionset64bit,X86   ,X86Serialize
-instructionset64bit,X86   ,AVX512F
-instructionset64bit,X86   ,AVX512BW
-instructionset64bit,X86   ,AVX512CD
-instructionset64bit,X86   ,AVX512DQ
+
+instructionset64bit,X86   ,AVX512
 instructionset64bit,X86   ,AVX512VBMI
+instructionset64bit,X86   ,AVX512v3
+
 instructionset64bit,X86   ,AVX10v1
-instructionset64bit,X86   ,AVX10v1_V512
 instructionset64bit,X86   ,AVX10v2
-instructionset64bit,X86   ,AVX10v2_V512
+
+instructionset64bit,X86   ,AES
+instructionset64bit,X86   ,PCLMULQDQ
+
+instructionset64bit,X86   ,AVX512VP2INTERSECT
+
+instructionset64bit,X86   ,AVXIFMA
+instructionset64bit,X86   ,AVXVNNI
 instructionset64bit,X86   ,GFNI
+instructionset64bit,X86   ,SHA
+instructionset64bit,X86   ,WAITPKG
+instructionset64bit,X86   ,X86Serialize
+
+; Vector Instruction Sets
 
 vectorinstructionset,X86  ,Vector128
 vectorinstructionset,X86  ,Vector256
 vectorinstructionset,X86  ,Vector512
 
-; x86-64-v1
+; Implications
 
-implication        ,X86   ,SSE                  ,X86Base
-implication        ,X86   ,SSE2                 ,SSE
-
-; x86-64-v2
-
-implication        ,X86   ,SSE3                 ,SSE2
+implication        ,X86   ,SSE3                 ,X86Base
 implication        ,X86   ,SSSE3                ,SSE3
 implication        ,X86   ,SSE41                ,SSSE3
 implication        ,X86   ,SSE42                ,SSE41
 implication        ,X86   ,POPCNT               ,SSE42
 
-; x86-64-v3
+implication        ,X86   ,AVX                  ,POPCNT
 
-implication        ,X86   ,AVX                  ,SSE42
-implication        ,X86   ,AVX2                 ,AVX
 implication        ,X86   ,BMI1                 ,AVX
 implication        ,X86   ,BMI2                 ,AVX
 implication        ,X86   ,FMA                  ,AVX
-implication        ,X86   ,LZCNT                ,X86Base
-implication        ,X86   ,MOVBE                ,SSE42
-
-; x86-64-v4
-
-implication        ,X86   ,EVEX                 ,AVX2
-implication        ,X86   ,EVEX                 ,FMA
-implication        ,X86   ,AVX512F              ,EVEX
-implication        ,X86   ,AVX512F_VL           ,AVX512F
-implication        ,X86   ,AVX512BW             ,AVX512F
-implication        ,X86   ,AVX512BW_VL          ,AVX512BW
-implication        ,X86   ,AVX512BW_VL          ,AVX512F_VL
-implication        ,X86   ,AVX512CD             ,AVX512F
-implication        ,X86   ,AVX512CD_VL          ,AVX512CD
-implication        ,X86   ,AVX512CD_VL          ,AVX512F_VL
-implication        ,X86   ,AVX512DQ             ,AVX512F
-implication        ,X86   ,AVX512DQ_VL          ,AVX512DQ
-implication        ,X86   ,AVX512DQ_VL          ,AVX512F_VL
-implication        ,X86   ,AVX512VBMI           ,AVX512BW
-implication        ,X86   ,AVX512VBMI_VL        ,AVX512VBMI
-implication        ,X86   ,AVX512VBMI_VL        ,AVX512BW_VL
-
-; Unversioned
-
-implication        ,X86   ,AES                  ,SSE2
-implication        ,X86   ,PCLMULQDQ            ,SSE2
-implication        ,X86   ,PCLMULQDQ_V256       ,PCLMULQDQ
-implication        ,X86   ,PCLMULQDQ_V256       ,AVX
-implication        ,X86   ,PCLMULQDQ_V512       ,PCLMULQDQ_V256
-implication        ,X86   ,PCLMULQDQ_V512       ,AVX512F
+implication        ,X86   ,LZCNT                ,AVX
+implication        ,X86   ,MOVBE                ,AVX
+implication        ,X86   ,AVX2                 ,BMI1
+implication        ,X86   ,AVX2                 ,BMI2
+implication        ,X86   ,AVX2                 ,FMA
+implication        ,X86   ,AVX2                 ,LZCNT
+implication        ,X86   ,AVX2                 ,MOVBE
+
+implication        ,X86   ,AVX512               ,AVX2
+implication        ,X86   ,AVX512               ,BMI1
+implication        ,X86   ,AVX512               ,BMI2
+implication        ,X86   ,AVX512               ,FMA
+implication        ,X86   ,AVX512               ,LZCNT
+implication        ,X86   ,AVX512               ,MOVBE
+
+implication        ,X86   ,AVX512VBMI           ,AVX512
+implication        ,X86   ,AVX512v3             ,AVX512VBMI
+implication        ,X86   ,AVX10v1              ,AVX512v3
+implication        ,X86   ,AVX10v2              ,AVX10v1
+
+implication        ,X86   ,AES                  ,X86Base
+implication        ,X86   ,PCLMULQDQ            ,AES
+
+implication        ,X86   ,AVX512VP2INTERSECT   ,AVX512
+implication        ,X86   ,AVXIFMA              ,AVX2
 implication        ,X86   ,AVXVNNI              ,AVX2
-implication        ,X86   ,X86Serialize         ,X86Base
-implication        ,X86   ,GFNI                 ,SSE41
+
+implication        ,X86   ,GFNI                 ,SSE42
 implication        ,X86   ,GFNI_V256            ,GFNI
 implication        ,X86   ,GFNI_V256            ,AVX
 implication        ,X86   ,GFNI_V512            ,GFNI
-implication        ,X86   ,GFNI_V512            ,AVX512F
-
-implication        ,X86   ,AVX10v1              ,EVEX
-implication        ,X86   ,AVX10v1_V512         ,AVX10v1
-implication        ,X86   ,AVX10v1_V512         ,AVX512F
-implication        ,X86   ,AVX10v1_V512         ,AVX512F_VL
-implication        ,X86   ,AVX10v1_V512         ,AVX512CD
-implication        ,X86   ,AVX10v1_V512         ,AVX512CD_VL
-implication        ,X86   ,AVX10v1_V512         ,AVX512BW
-implication        ,X86   ,AVX10v1_V512         ,AVX512BW_VL
-implication        ,X86   ,AVX10v1_V512         ,AVX512DQ
-implication        ,X86   ,AVX10v1_V512         ,AVX512DQ_VL
-implication        ,X86   ,AVX10v1_V512         ,AVX512VBMI
-implication        ,X86   ,AVX10v1_V512         ,AVX512VBMI_VL
-implication        ,X86   ,AVX10v2              ,AVX10v1
-implication        ,X86   ,AVX10v2_V512         ,AVX10v1_V512
+implication        ,X86   ,GFNI_V512            ,AVX512
+
+implication        ,X86   ,SHA                  ,X86Base
+
+implication        ,X86   ,AES_V256             ,AES
+implication        ,X86   ,AES_V256             ,AVX
+implication        ,X86   ,AES_V512             ,AES_V256
+implication        ,X86   ,AES_V512             ,AVX512
+implication        ,X86   ,PCLMULQDQ_V256       ,PCLMULQDQ
+implication        ,X86   ,PCLMULQDQ_V256       ,AES_V256
+implication        ,X86   ,PCLMULQDQ_V512       ,PCLMULQDQ_V256
+implication        ,X86   ,PCLMULQDQ_V512       ,AES_V512
+
+implication        ,X86   ,WAITPKG              ,X86Base
+implication        ,X86   ,X86Serialize         ,X86Base
 
 ; These synthetic ISAs need to appear after the core ISAs
 ; as they depend on the other implications being correct first
 ; otherwise they may not be disabled if the required isa is disabled
 
-implication        ,X86   ,Vector128            ,SSE
+implication        ,X86   ,Vector128            ,X86Base
 implication        ,X86   ,Vector256            ,AVX
-implication        ,X86   ,Vector512            ,AVX512F
+implication        ,X86   ,Vector512            ,AVX512
 
-implication        ,X86   ,VectorT128           ,SSE2
+implication        ,X86   ,VectorT128           ,X86Base
 implication        ,X86   ,VectorT256           ,AVX2
-implication        ,X86   ,VectorT512           ,AVX512F
+implication        ,X86   ,VectorT512           ,AVX512
 
 ; Definition of X64 instruction sets
 definearch         ,X64   ,64Bit     ,X64, X64
@@ -216,6 +252,7 @@ instructionset     ,ARM64 ,                      ,Rcpc    ,26 ,Rcpc
 instructionset     ,ARM64 ,VectorT128            ,        ,39 ,VectorT128            ,vectort128
 instructionset     ,ARM64 ,                      ,Rcpc2   ,42 ,Rcpc2                 ,rcpc2
 instructionset     ,ARM64 ,Sve                   ,        ,43 ,Sve                   ,sve
+instructionset     ,ARM64 ,Sve2                  ,        ,59 ,Sve2                  ,sve2
 
 instructionset64bit,ARM64 ,ArmBase
 instructionset64bit,ARM64 ,AdvSimd
@@ -226,6 +263,7 @@ instructionset64bit,ARM64 ,Rdm
 instructionset64bit,ARM64 ,Sha1
 instructionset64bit,ARM64 ,Sha256
 instructionset64bit,ARM64 ,Sve
+instructionset64bit,ARM64 ,Sve2
 
 vectorinstructionset,ARM64,Vector64
 vectorinstructionset,ARM64,Vector128
@@ -241,14 +279,24 @@ implication        ,ARM64 ,Vector64   ,AdvSimd
 implication        ,ARM64 ,Vector128  ,AdvSimd
 implication        ,ARM64 ,VectorT128 ,AdvSimd
 implication        ,ARM64 ,Sve        ,AdvSimd
+implication        ,ARM64 ,Sve2       ,Sve
+
+; Definition of Riscv64 instruction sets
+definearch         ,RiscV64 ,64Bit     ,RiscV64, RiscV64
+
+instructionset     ,RiscV64 ,RiscV64Base         ,        ,56 ,RiscV64Base         ,base
+instructionset     ,RiscV64 ,Zba                 ,        ,57 ,Zba                 ,zba
+instructionset     ,RiscV64 ,Zbb                 ,        ,58 ,Zbb                 ,zbb
+
+implication        ,RiscV64 ,Zbb                 ,RiscV64Base
+implication        ,RiscV64 ,Zba                 ,RiscV64Base
 
 ;                    ,name and aliases           ,archs    ,lower baselines included by implication
-;
-instructionsetgroup  ,x86-x64                    ,X64 X86  ,sse2
-instructionsetgroup  ,x86-x64-v2                 ,X64 X86  ,sse4.2 popcnt
-instructionsetgroup  ,x86-x64-v3                 ,X64 X86  ,x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma
-instructionsetgroup  ,skylake                    ,X64 X86  ,x86-x64-v3
-instructionsetgroup  ,x86-x64-v4                 ,X64 X86  ,x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl
+instructionsetgroup  ,x86-64                     ,X64 X86  ,base
+instructionsetgroup  ,x86-64-v2                  ,X64 X86  ,x86-64 sse4.2 popcnt
+instructionsetgroup  ,x86-64-v3                  ,X64 X86  ,x86-64-v2 avx2 bmi bmi2 fma lzcnt movbe
+instructionsetgroup  ,skylake                    ,X64 X86  ,x86-64-v3
+instructionsetgroup  ,x86-64-v4                  ,X64 X86  ,x86-64-v3 avx512
 
 instructionsetgroup  ,armv8-a                    ,ARM64    ,neon
 instructionsetgroup  ,armv8.1-a                  ,ARM64    ,armv8-a lse crc rdma
diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs
index de6ba3e2fc9d..3d12f0c12efa 100644
--- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs
+++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs
@@ -88,8 +88,8 @@ public InstructionSetImplication(string architecture, InstructionSetImplication
         private SortedDictionary _r2rNamesByName = new SortedDictionary();
         private SortedDictionary _r2rNamesByNumber = new SortedDictionary();
         private SortedSet _architectures = new SortedSet();
-        private Dictionary> _architectureJitNames = new Dictionary>();
-        private Dictionary> _architectureVectorInstructionSetJitNames = new Dictionary>();
+        private Dictionary> _architectureJitNames = new Dictionary>();
+        private Dictionary> _architectureVectorInstructionSetJitNames = new Dictionary>();
         private HashSet _64BitArchitectures = new HashSet();
         private Dictionary _64BitVariantArchitectureJitNameSuffix = new Dictionary();
         private Dictionary _64BitVariantArchitectureManagedNameSuffix = new Dictionary();
@@ -103,9 +103,9 @@ private void ArchitectureEncountered(string arch)
                 _64bitVariants.Add(arch, new HashSet());
             _architectures.Add(arch);
             if (!_architectureJitNames.ContainsKey(arch))
-                _architectureJitNames.Add(arch, new List());
+                _architectureJitNames.Add(arch, new HashSet());
             if (!_architectureVectorInstructionSetJitNames.ContainsKey(arch))
-                _architectureVectorInstructionSetJitNames.Add(arch, new List());
+                _architectureVectorInstructionSetJitNames.Add(arch, new HashSet());
         }
 
         private void ValidateArchitectureEncountered(string arch)
@@ -118,6 +118,8 @@ private static string ArchToIfDefArch(string arch)
         {
             if (arch == "X64")
                 return "AMD64";
+            if (arch == "RiscV64")
+                return "RISCV64";
             return arch;
         }
 
@@ -333,9 +335,12 @@ public static class ReadyToRunInstructionSetHelper
                         switch (instructionSet)
                         {{
 ");
+                HashSet handledJitNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
+                    if (!handledJitNames.Add(instructionSet.JitName)) continue;
 
                     string r2rEnumerationValue;
                     if (!string.IsNullOrEmpty(instructionSet.R2rName))
@@ -545,7 +550,7 @@ public void ExpandInstructionSetByImplication(TargetArchitecture architecture)
 
         public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionSets(TargetArchitecture architecture, InstructionSet input)
         {
-            switch(architecture)
+            switch (architecture)
             {
 ");
             foreach (string architecture in _architectures)
@@ -554,7 +559,7 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS
                     continue;
 
                 tr.Write($@"            case TargetArchitecture.{architecture}:
-                switch(input)
+                switch (input)
                 {{
 ");
                 foreach (var vectorInstructionSet in _architectureVectorInstructionSetJitNames[architecture])
@@ -600,11 +605,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                 tr.Write($@"
                 case TargetArchitecture.{architecture}:
 ");
+                HashSet handledJitNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
                     if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName))
                     {
+                        if (!handledJitNames.Add(instructionSet.JitName)) continue;
                         AddImplication(architecture, instructionSet.JitName, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}");
                         AddImplication(architecture, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}", instructionSet.JitName);
                     }
@@ -635,7 +643,7 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
             do
             {
                 oldflags = resultflags;
-                switch(architecture)
+                switch (architecture)
                 {
 ");
             foreach (string architecture in _architectures)
@@ -643,11 +651,16 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                 tr.Write($@"
                 case TargetArchitecture.{architecture}:
 ");
+                HashSet handledJitNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
                     if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName))
+                    {
+                        if (!handledJitNames.Add(instructionSet.JitName)) continue;
                         AddReverseImplication(architecture, instructionSet.JitName, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}");
+                    }
                 }
                 foreach (var implication in _implications)
                 {
@@ -737,12 +750,14 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture)
                 tr.Write($@"
                 case TargetArchitecture.{architecture}:
 ");
+                HashSet handledJitNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
-
                     if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName))
                     {
+                        if (!handledJitNames.Add(instructionSet.JitName)) continue;
                         tr.WriteLine($"                    if (HasInstructionSet(InstructionSet.{architecture}_{instructionSet.JitName}))");
                         tr.WriteLine($"                        AddInstructionSet(InstructionSet.{architecture}_{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)});");
                     }
@@ -763,12 +778,16 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc
                 tr.Write($@"
                 case TargetArchitecture.{architecture}:
 ");
+                HashSet handledJitNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
-
                     if (_64bitVariants[architecture].Contains(instructionSet.JitName))
+                    {
+                        if (!handledJitNames.Add(instructionSet.JitName)) continue;
                         tr.WriteLine($"                    AddInstructionSet(InstructionSet.{architecture}_{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)});");
+                    }
                 }
 
                 tr.WriteLine("                    break;");
@@ -1034,12 +1053,14 @@ void Set64BitInstructionSetVariants()
             foreach (string architecture in _architectures)
             {
                 tr.WriteLine($"#ifdef TARGET_{ArchToIfDefArch(architecture)}");
+                HashSet handledJitNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
-
                     if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName))
                     {
+                        if (!handledJitNames.Add(instructionSet.JitName)) continue;
                         tr.WriteLine($"        if (HasInstructionSet(InstructionSet_{instructionSet.JitName}))");
                         tr.WriteLine($"            AddInstructionSet(InstructionSet_{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)});");
                     }
@@ -1067,11 +1088,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             foreach (string architecture in _architectures)
             {
                 tr.WriteLine($"#ifdef TARGET_{ArchToIfDefArch(architecture)}");
+                HashSet handledJitNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
                     if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName))
                     {
+                        if (!handledJitNames.Add(instructionSet.JitName)) continue;
                         AddImplication(architecture, instructionSet.JitName, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}");
                         AddImplication(architecture, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}", instructionSet.JitName);
                     }
@@ -1101,9 +1125,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             foreach (string architecture in _architectures)
             {
                 tr.WriteLine($"#ifdef TARGET_{ArchToIfDefArch(architecture)}");
+                HashSet handledJitNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
+                    if (!handledJitNames.Add(instructionSet.JitName)) continue;
                     tr.WriteLine($"        case InstructionSet_{instructionSet.JitName} :");
                     tr.WriteLine($"            return \"{instructionSet.JitName}\";");
                     if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName))
@@ -1136,15 +1163,14 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
             foreach (string architecture in _architectures)
             {
                 tr.WriteLine($"#ifdef TARGET_{ArchToIfDefArch(architecture)}");
+                HashSet handledR2rNames = new HashSet();
+
                 foreach (var instructionSet in _instructionSets)
                 {
                     if (instructionSet.Architecture != architecture) continue;
-                    string r2rEnumerationValue;
-                    if (string.IsNullOrEmpty(instructionSet.R2rName))
-                        continue;
-
-                    r2rEnumerationValue = $"READYTORUN_INSTRUCTION_{instructionSet.R2rName}";
-
+                    if (string.IsNullOrEmpty(instructionSet.R2rName)) continue;
+                    if (!handledR2rNames.Add(instructionSet.R2rName)) continue;
+                    string r2rEnumerationValue = $"READYTORUN_INSTRUCTION_{instructionSet.R2rName}";
                     tr.WriteLine($"        case {r2rEnumerationValue}: return InstructionSet_{instructionSet.JitName};");
                 }
                 tr.WriteLine($"#endif // TARGET_{ArchToIfDefArch(architecture)}");
diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt
index 3aaa80673334..6030b745b013 100644
--- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt
+++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt
@@ -82,6 +82,7 @@ CORINFO_SIG_INFO*
 CORINFO_RESOLVED_TOKEN*,ref CORINFO_RESOLVED_TOKEN
 CORINFO_RESOLVED_TOKEN_PTR,CORINFO_RESOLVED_TOKEN*,CORINFO_RESOLVED_TOKEN*,CORINFO_RESOLVED_TOKEN*
 CORINFO_EE_INFO*,ref CORINFO_EE_INFO
+CORINFO_ASYNC_INFO*,ref CORINFO_ASYNC_INFO
 CORINFO_TAILCALL_HELPERS*,ref CORINFO_TAILCALL_HELPERS
 CORINFO_SWIFT_LOWERING*,ref CORINFO_SWIFT_LOWERING
 CORINFO_FPSTRUCT_LOWERING*,ref CORINFO_FPSTRUCT_LOWERING
@@ -229,7 +230,6 @@ FUNCTIONS
     CorInfoHelpFunc getCastingHelper(CORINFO_RESOLVED_TOKEN* pResolvedToken, bool fThrowing)
     CorInfoHelpFunc getSharedCCtorHelper(CORINFO_CLASS_HANDLE clsHnd)
     CORINFO_CLASS_HANDLE getTypeForBox(CORINFO_CLASS_HANDLE cls)
-    CORINFO_CLASS_HANDLE getTypeForBoxOnStack(CORINFO_CLASS_HANDLE cls)
     CorInfoHelpFunc getBoxHelper(CORINFO_CLASS_HANDLE cls)
     CorInfoHelpFunc getUnBoxHelper(CORINFO_CLASS_HANDLE cls)
     CORINFO_OBJECT_HANDLE getRuntimeTypePointer(CORINFO_CLASS_HANDLE cls)
@@ -284,6 +284,7 @@ FUNCTIONS
     [ManualNativeWrapper] bool runWithErrorTrap(ICorJitInfo::errorTrapFunction function, void* parameter);
     [ManualNativeWrapper] bool runWithSPMIErrorTrap(ICorJitInfo::errorTrapFunction function, void* parameter);
     void getEEInfo(CORINFO_EE_INFO* pEEInfoOut);
+    void getAsyncInfo(CORINFO_ASYNC_INFO* pAsyncInfoOut);
     mdMethodDef getMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod);
     size_t printMethodName(CORINFO_METHOD_HANDLE ftn, char* buffer, size_t bufferSize, size_t* pRequiredBufferSize)
     const char* getMethodNameFromMetadata(CORINFO_METHOD_HANDLE ftn, const char **className, const char **namespaceName, const char **enclosingClassNames, size_t maxEnclosingClassNames);
@@ -321,6 +322,7 @@ FUNCTIONS
     CORINFO_METHOD_HANDLE GetDelegateCtor(CORINFO_METHOD_HANDLE  methHnd, CORINFO_CLASS_HANDLE   clsHnd, CORINFO_METHOD_HANDLE  targetMethodHnd, DelegateCtorArgs *     pCtorData);
     void MethodCompileComplete(CORINFO_METHOD_HANDLE methHnd);
     bool getTailCallHelpers(CORINFO_RESOLVED_TOKEN* callToken, CORINFO_SIG_INFO* sig, CORINFO_GET_TAILCALL_HELPERS_FLAGS flags, CORINFO_TAILCALL_HELPERS* pResult);
+    CORINFO_METHOD_HANDLE getAsyncResumptionStub();
     bool convertPInvokeCalliToCall(CORINFO_RESOLVED_TOKEN * pResolvedToken, bool mustConvert);
     bool notifyInstructionSetUsage(CORINFO_InstructionSet instructionSet,bool supportEnabled);
     void updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint);
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/CastingHelper.TypeEquivalence.cs b/src/coreclr/tools/Common/TypeSystem/Common/CastingHelper.TypeEquivalence.cs
index 8797efdde09b..a757df953fea 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/CastingHelper.TypeEquivalence.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/CastingHelper.TypeEquivalence.cs
@@ -202,6 +202,15 @@ static bool CompareStructuresForEquivalence(MetadataType type1, MetadataType typ
                     return false;
                 }
 
+                bool explicitLayout = false;
+                if (!enumMode)
+                {
+                    if (!CompareTypeLayout(type1, type2, out explicitLayout))
+                    {
+                        return false;
+                    }
+                }
+
                 // Compare field types for equivalence
                 var fields1 = type1.GetFields().GetEnumerator();
                 var fields2 = type2.GetFields().GetEnumerator();
@@ -249,20 +258,22 @@ static bool CompareStructuresForEquivalence(MetadataType type1, MetadataType typ
                     {
                         return false;
                     }
-                }
 
-                // At this point we know that the set of fields is the same, and have the same types
-                if (!enumMode)
-                {
-                    if (!CompareTypeLayout(type1, type2))
+                    // If we are in explicit layout mode, we need to compare the offsets
+                    if (explicitLayout)
                     {
-                        return false;
+                        if (field1.MetadataOffset != field2.MetadataOffset)
+                        {
+                            return false;
+                        }
                     }
                 }
+
                 return true;
 
-                static bool CompareTypeLayout(MetadataType type1, MetadataType type2)
+                static bool CompareTypeLayout(MetadataType type1, MetadataType type2, out bool explicitLayout)
                 {
+                    explicitLayout = false;
                     // Types must either be Sequential or Explicit layout
                     if (type1.IsSequentialLayout != type2.IsSequentialLayout)
                     {
@@ -279,7 +290,7 @@ static bool CompareTypeLayout(MetadataType type1, MetadataType type2)
                         return false;
                     }
 
-                    bool explicitLayout = type1.IsExplicitLayout;
+                    explicitLayout = type1.IsExplicitLayout;
 
                     // they must have the same charset
                     if (type1.PInvokeStringFormat != type2.PInvokeStringFormat)
@@ -293,21 +304,6 @@ static bool CompareTypeLayout(MetadataType type1, MetadataType type2)
                         (layoutMetadata1.Size != layoutMetadata2.Size))
                         return false;
 
-                    if ((explicitLayout) && !(layoutMetadata1.Offsets == null && layoutMetadata2.Offsets == null))
-                    {
-                        if (layoutMetadata1.Offsets == null)
-                            return false;
-
-                        if (layoutMetadata2.Offsets == null)
-                            return false;
-
-                        for (int index = 0; index < layoutMetadata1.Offsets.Length; index++)
-                        {
-                            if (layoutMetadata1.Offsets[index].Offset != layoutMetadata2.Offsets[index].Offset)
-                                return false;
-                        }
-                    }
-
                     return true;
                 }
 
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/FieldDesc.FieldLayout.cs b/src/coreclr/tools/Common/TypeSystem/Common/FieldDesc.FieldLayout.cs
index e7b5e6d21478..b08cb3058696 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/FieldDesc.FieldLayout.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/FieldDesc.FieldLayout.cs
@@ -47,5 +47,13 @@ internal void InitializeOffset(LayoutInt offset)
             Debug.Assert(_offset == FieldAndOffset.InvalidOffset || _offset == offset);
             _offset = offset;
         }
+
+        public virtual LayoutInt MetadataOffset
+        {
+            get
+            {
+                return LayoutInt.Indeterminate;
+            }
+        }
     }
 }
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/FieldForInstantiatedType.FieldLayout.cs b/src/coreclr/tools/Common/TypeSystem/Common/FieldForInstantiatedType.FieldLayout.cs
new file mode 100644
index 000000000000..b355d52d3a72
--- /dev/null
+++ b/src/coreclr/tools/Common/TypeSystem/Common/FieldForInstantiatedType.FieldLayout.cs
@@ -0,0 +1,18 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Debug = System.Diagnostics.Debug;
+
+namespace Internal.TypeSystem
+{
+    public sealed partial class FieldForInstantiatedType : FieldDesc
+    {
+        public override LayoutInt MetadataOffset
+        {
+            get
+            {
+                return _fieldDef.MetadataOffset;
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs
index 09818b42cf7d..389675395036 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs
@@ -80,6 +80,21 @@ public enum StaticLayoutKind
         StaticRegionSizesAndFields
     }
 
+    public struct FieldAndOffset
+    {
+        public static readonly LayoutInt InvalidOffset = new LayoutInt(int.MaxValue);
+
+        public readonly FieldDesc Field;
+
+        public readonly LayoutInt Offset;
+
+        public FieldAndOffset(FieldDesc field, LayoutInt offset)
+        {
+            Field = field;
+            Offset = offset;
+        }
+    }
+
     public struct ComputedInstanceFieldLayout
     {
         public LayoutInt FieldSize;
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutIntervalCalculator.cs b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutIntervalCalculator.cs
index 51683d223db0..40611a871ee7 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutIntervalCalculator.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutIntervalCalculator.cs
@@ -135,6 +135,9 @@ private void AddToFieldLayout(List fieldLayout, int offset,
 
                 foreach (FieldDesc field in fieldType.GetFields())
                 {
+                    if (field.IsStatic)
+                        continue;
+
                     int fieldOffset = offset + field.Offset.AsInt;
                     AddToFieldLayout(fieldLayout, fieldOffset, field.FieldType);
                 }
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs
index dde9ff49410d..4f6ee8db15d4 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs
@@ -144,15 +144,12 @@ out instanceByteSizeAndAlignment
                 }
 
                 var layoutMetadata = type.GetClassLayout();
-
                 // If packing is out of range or not a power of two, throw that the size is invalid
                 int packing = layoutMetadata.PackingSize;
                 if (packing < 0 || packing > 128 || ((packing & (packing - 1)) != 0))
                 {
                     ThrowHelper.ThrowTypeLoadException(ExceptionStringID.ClassLoadBadFormat, type);
                 }
-
-                Debug.Assert(layoutMetadata.Offsets == null || layoutMetadata.Offsets.Length == numInstanceFields);
             }
 
             // At this point all special cases are handled and all inputs validated
@@ -329,9 +326,12 @@ protected ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType ty
                 hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields;
             }
 
-            foreach (FieldAndOffset fieldAndOffset in layoutMetadata.Offsets)
+            foreach (FieldDesc field in type.GetFields())
             {
-                TypeDesc fieldType = fieldAndOffset.Field.FieldType;
+                if (field.IsStatic)
+                    continue;
+
+                TypeDesc fieldType = field.FieldType;
                 var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out ComputedFieldData fieldData);
                 if (!fieldData.LayoutAbiStable)
                     layoutAbiStable = false;
@@ -344,10 +344,12 @@ protected ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType ty
 
                 largestAlignmentRequired = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequired);
 
-                if (fieldAndOffset.Offset == FieldAndOffset.InvalidOffset)
+                LayoutInt metadataOffset = field.MetadataOffset;
+
+                if (metadataOffset == LayoutInt.Indeterminate)
                     ThrowHelper.ThrowTypeLoadException(ExceptionStringID.ClassLoadBadFormat, type);
 
-                LayoutInt computedOffset = fieldAndOffset.Offset + cumulativeInstanceFieldPos + offsetBias;
+                LayoutInt computedOffset = metadataOffset + cumulativeInstanceFieldPos + offsetBias;
 
                 // GC pointers MUST be aligned.
                 bool needsToBeAligned =
@@ -363,11 +365,11 @@ protected ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType ty
                     int offsetModulo = computedOffset.AsInt % type.Context.Target.PointerSize;
                     if (offsetModulo != 0)
                     {
-                        ThrowHelper.ThrowTypeLoadException(ExceptionStringID.ClassLoadExplicitLayout, type, fieldAndOffset.Offset.ToStringInvariant());
+                        ThrowHelper.ThrowTypeLoadException(ExceptionStringID.ClassLoadExplicitLayout, type, metadataOffset.ToStringInvariant());
                     }
                 }
 
-                offsets[fieldOrdinal] = new FieldAndOffset(fieldAndOffset.Field, computedOffset);
+                offsets[fieldOrdinal] = new FieldAndOffset(field, computedOffset);
 
                 LayoutInt fieldExtent = computedOffset + fieldSizeAndAlignment.Size;
                 instanceSize = LayoutInt.Max(fieldExtent, instanceSize);
@@ -1080,7 +1082,9 @@ private static ValueTypeShapeCharacteristics ComputeHomogeneousAggregateCharacte
             if (type.Context.Target.Abi == TargetAbi.NativeAotArmel)
                 return NotHA;
 
-            MetadataType metadataType = (MetadataType)type;
+            // If type represents an enum, we want to treat it as its underlying type.
+            MetadataType metadataType = (MetadataType)type.UnderlyingType;
+
             int haElementSize = 0;
 
             switch (metadataType.Category)
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/MetadataType.cs b/src/coreclr/tools/Common/TypeSystem/Common/MetadataType.cs
index 722aa66a878f..18d3ee8f754d 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/MetadataType.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/MetadataType.cs
@@ -110,21 +110,5 @@ public struct ClassLayoutMetadata
     {
         public int PackingSize;
         public int Size;
-        public FieldAndOffset[] Offsets;
-    }
-
-    public struct FieldAndOffset
-    {
-        public static readonly LayoutInt InvalidOffset = new LayoutInt(int.MaxValue);
-
-        public readonly FieldDesc Field;
-
-        public readonly LayoutInt Offset;
-
-        public FieldAndOffset(FieldDesc field, LayoutInt offset)
-        {
-            Field = field;
-            Offset = offset;
-        }
     }
 }
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/Utilities/CustomAttributeTypeNameParser.cs b/src/coreclr/tools/Common/TypeSystem/Common/Utilities/CustomAttributeTypeNameParser.cs
index 83e74b02dd9f..8f2eda81f660 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/Utilities/CustomAttributeTypeNameParser.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/Utilities/CustomAttributeTypeNameParser.cs
@@ -21,7 +21,7 @@ public static class CustomAttributeTypeNameParser
         /// This is the inverse of what  does.
         /// 
         public static TypeDesc GetTypeByCustomAttributeTypeName(this ModuleDesc module, string name, bool throwIfNotFound = true,
-            Func canonResolver = null)
+            Func canonGenericResolver = null)
         {
             if (!TypeName.TryParse(name.AsSpan(), out TypeName parsed, s_typeNameParseOptions))
                 ThrowHelper.ThrowTypeLoadException(name, module);
@@ -31,7 +31,7 @@ public static TypeDesc GetTypeByCustomAttributeTypeName(this ModuleDesc module,
                 _context = module.Context,
                 _module = module,
                 _throwIfNotFound = throwIfNotFound,
-                _canonResolver = canonResolver
+                _canonGenericResolver = canonGenericResolver
             }.Resolve(parsed);
         }
 
@@ -91,7 +91,7 @@ private struct TypeNameResolver
             internal TypeSystemContext _context;
             internal ModuleDesc _module;
             internal bool _throwIfNotFound;
-            internal Func _canonResolver;
+            internal Func _canonGenericResolver;
 
             internal List _referencedModules;
 
@@ -136,30 +136,30 @@ private TypeDesc GetSimpleType(TypeName typeName)
                 }
 
                 ModuleDesc module = _module;
-                if (topLevelTypeName.AssemblyName != null)
+                if (topLevelTypeName.AssemblyName is not null)
                 {
                     module = _context.ResolveAssembly(typeName.AssemblyName, throwIfNotFound: _throwIfNotFound);
                     if (module == null)
                         return null;
                 }
 
-                if (module != null)
+                if (module is not null)
                 {
                     TypeDesc type = GetSimpleTypeFromModule(typeName, module);
-                    if (type != null)
+                    if (type is not null)
                     {
                         _referencedModules?.Add(module);
                         return type;
                     }
                 }
 
-                // If it didn't resolve and wasn't assembly-qualified, we also try core library
                 if (topLevelTypeName.AssemblyName == null)
                 {
+                    // If it didn't resolve and wasn't assembly-qualified, we also try core library
                     if (module != _context.SystemModule)
                     {
                         TypeDesc type = GetSimpleTypeFromModule(typeName, _context.SystemModule);
-                        if (type != null)
+                        if (type is not null)
                         {
                             _referencedModules?.Add(_context.SystemModule);
                             return type;
@@ -179,21 +179,18 @@ private TypeDesc GetSimpleTypeFromModule(TypeName typeName, ModuleDesc module)
                     TypeDesc type = GetSimpleTypeFromModule(typeName.DeclaringType, module);
                     if (type == null)
                         return null;
-                    return ((MetadataType)type).GetNestedType(TypeNameHelpers.Unescape(typeName.Name));
+                    return ((MetadataType)type).GetNestedType(TypeName.Unescape(typeName.Name));
                 }
 
-                string fullName = TypeNameHelpers.Unescape(typeName.FullName);
-
-                if (_canonResolver != null)
+                if (_canonGenericResolver != null)
                 {
-                    MetadataType canonType = _canonResolver(module, fullName);
+                    string fullName = TypeName.Unescape(typeName.FullName);
+                    TypeDesc canonType = _canonGenericResolver(module, fullName);
                     if (canonType != null)
                         return canonType;
                 }
 
-                (string typeNamespace, string name) = TypeNameHelpers.Split(fullName);
-
-                return module.GetType(typeNamespace, name, throwIfNotFound: false);
+                return module.GetType(TypeName.Unescape(typeName.Namespace), TypeName.Unescape(typeName.Name), throwIfNotFound: false);
             }
 
             private TypeDesc GetGenericType(TypeName typeName)
diff --git a/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaField.cs b/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaField.cs
index 01356a720005..8812678f72ae 100644
--- a/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaField.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaField.cs
@@ -297,6 +297,15 @@ public override MarshalAsDescriptor GetMarshalAsDescriptor()
 
             return null;
         }
+
+        public override LayoutInt MetadataOffset
+        {
+            get
+            {
+                int offset = MetadataReader.GetFieldDefinition(_handle).GetOffset();
+                return offset == -1 ? LayoutInt.Indeterminate : new LayoutInt(offset);
+            }
+        }
     }
 
     public static class EcmaFieldExtensions
diff --git a/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaModule.cs b/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaModule.cs
index 6758f4a54986..cd87728b53e8 100644
--- a/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaModule.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaModule.cs
@@ -16,6 +16,8 @@ public partial class EcmaModule : ModuleDesc
     {
         private readonly PEReader _peReader;
         protected readonly MetadataReader _metadataReader;
+        private volatile bool _isWrapNonExceptionThrowsComputed;
+        private volatile bool _isWrapNonExceptionThrows;
 
         internal interface IEntityHandleObject
         {
@@ -690,5 +692,52 @@ public override string ToString()
             ModuleDefinition moduleDefinition = _metadataReader.GetModuleDefinition();
             return _metadataReader.GetString(moduleDefinition.Name);
         }
+
+        public bool IsWrapNonExceptionThrows
+        {
+            get
+            {
+                if (!_isWrapNonExceptionThrowsComputed)
+                {
+                    ComputeIsWrapNonExceptionThrows();
+                    _isWrapNonExceptionThrowsComputed = true;
+                }
+                return _isWrapNonExceptionThrows;
+            }
+        }
+
+        private void ComputeIsWrapNonExceptionThrows()
+        {
+            var reader = MetadataReader;
+            var c = reader.StringComparer;
+            bool foundAttribute = false;
+            foreach (var attr in reader.GetAssemblyDefinition().GetCustomAttributes())
+            {
+                if (reader.GetAttributeNamespaceAndName(attr, out var ns, out var n))
+                {
+                    if (c.Equals(ns, "System.Runtime.CompilerServices") && c.Equals(n, "RuntimeCompatibilityAttribute"))
+                    {
+                        var dec = reader.GetCustomAttribute(attr).DecodeValue(new CustomAttributeTypeProvider(this));
+
+                        foreach (var arg in dec.NamedArguments)
+                        {
+                            if (arg.Name == "WrapNonExceptionThrows")
+                            {
+                                if (!(arg.Value is bool))
+                                    ThrowHelper.ThrowBadImageFormatException();
+                                _isWrapNonExceptionThrows = (bool)arg.Value;
+                                foundAttribute = true;
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                if (foundAttribute)
+                {
+                    break;
+                }
+            }
+        }
     }
 }
diff --git a/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaType.cs b/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaType.cs
index 3c349b3615c3..a444ebb2e3b5 100644
--- a/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaType.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaType.cs
@@ -567,46 +567,11 @@ public override ClassLayoutMetadata GetClassLayout()
         {
             TypeLayout layout = _typeDefinition.GetLayout();
 
-            ClassLayoutMetadata result;
-            result.PackingSize = layout.PackingSize;
-            result.Size = layout.Size;
-
-            // Skip reading field offsets if this is not explicit layout
-            if (IsExplicitLayout)
+            return new ClassLayoutMetadata
             {
-                var fieldDefinitionHandles = _typeDefinition.GetFields();
-                var numInstanceFields = 0;
-
-                foreach (var handle in fieldDefinitionHandles)
-                {
-                    var fieldDefinition = MetadataReader.GetFieldDefinition(handle);
-                    if ((fieldDefinition.Attributes & FieldAttributes.Static) != 0)
-                        continue;
-
-                    numInstanceFields++;
-                }
-
-                result.Offsets = new FieldAndOffset[numInstanceFields];
-
-                int index = 0;
-                foreach (var handle in fieldDefinitionHandles)
-                {
-                    var fieldDefinition = MetadataReader.GetFieldDefinition(handle);
-                    if ((fieldDefinition.Attributes & FieldAttributes.Static) != 0)
-                        continue;
-
-                    // Note: GetOffset() returns -1 when offset was not set in the metadata
-                    int specifiedOffset = fieldDefinition.GetOffset();
-                    result.Offsets[index] =
-                        new FieldAndOffset(_module.GetField(handle, this), specifiedOffset == -1 ? FieldAndOffset.InvalidOffset : new LayoutInt(specifiedOffset));
-
-                    index++;
-                }
-            }
-            else
-                result.Offsets = null;
-
-            return result;
+                PackingSize = layout.PackingSize,
+                Size = layout.Size
+            };
         }
 
         public override bool IsExplicitLayout
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/GetCanonTypeIntrinsic.cs b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/GetCanonTypeIntrinsic.cs
index 9c44adad0d5f..92f6b53684ff 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/GetCanonTypeIntrinsic.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/GetCanonTypeIntrinsic.cs
@@ -15,7 +15,7 @@ public static class GetCanonTypeIntrinsic
     {
         public static MethodIL EmitIL(MethodDesc target)
         {
-            Debug.Assert(target.Signature.Length == 1);
+            Debug.Assert(target.Signature.Length == 0);
 
             ILEmitter emitter = new ILEmitter();
             var codeStream = emitter.NewCodeStream();
@@ -24,35 +24,7 @@ public static MethodIL EmitIL(MethodDesc target)
             TypeDesc runtimeTypeHandleType = context.GetWellKnownType(WellKnownType.RuntimeTypeHandle);
             Debug.Assert(target.Signature.ReturnType == runtimeTypeHandleType);
 
-            if (context.SupportsCanon)
-            {
-                ILCodeLabel lNotCanon = emitter.NewCodeLabel();
-                codeStream.Emit(ILOpcode.ldarg_0);
-                codeStream.EmitLdc((int)CanonTypeKind.NormalCanon);
-                codeStream.Emit(ILOpcode.bne_un, lNotCanon);
-                codeStream.Emit(ILOpcode.ldtoken, emitter.NewToken(context.CanonType));
-                codeStream.Emit(ILOpcode.ret);
-                codeStream.EmitLabel(lNotCanon);
-
-                // We're not conditioning this on SupportsUniversalCanon because the runtime type loader
-                // does a lot of comparisons against UniversalCanon and not having a RuntimeTypeHandle
-                // for it makes these checks awkward.
-                // Would be nice if we didn't have to emit the MethodTable if universal canonical code wasn't enabled
-                // at the time of compilation.
-                ILCodeLabel lNotUniversalCanon = emitter.NewCodeLabel();
-                codeStream.Emit(ILOpcode.ldarg_0);
-                codeStream.EmitLdc((int)CanonTypeKind.UniversalCanon);
-                codeStream.Emit(ILOpcode.bne_un, lNotUniversalCanon);
-                codeStream.Emit(ILOpcode.ldtoken, emitter.NewToken(context.UniversalCanonType));
-                codeStream.Emit(ILOpcode.ret);
-                codeStream.EmitLabel(lNotUniversalCanon);
-            }
-
-            ILLocalVariable vNullTypeHandle = emitter.NewLocal(runtimeTypeHandleType);
-            codeStream.EmitLdLoca(vNullTypeHandle);
-            codeStream.Emit(ILOpcode.initobj, emitter.NewToken(runtimeTypeHandleType));
-            codeStream.EmitLdLoc(vNullTypeHandle);
-
+            codeStream.Emit(ILOpcode.ldtoken, emitter.NewToken(context.CanonType));
             codeStream.Emit(ILOpcode.ret);
 
             return emitter.Link(target);
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/UnsafeAccessors.cs b/src/coreclr/tools/Common/TypeSystem/IL/UnsafeAccessors.cs
index e8e97f2eb319..b6be413b9baa 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/UnsafeAccessors.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/UnsafeAccessors.cs
@@ -2,10 +2,13 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Collections;
 using System.Diagnostics;
 using System.Globalization;
+using System.Reflection;
 using System.Reflection.Metadata;
 using System.Runtime.InteropServices;
+using System.Threading;
 using Internal.IL.Stubs;
 using Internal.TypeSystem;
 using Internal.TypeSystem.Ecma;
@@ -40,15 +43,21 @@ public static MethodIL TryGetIL(EcmaMethod method)
                 Declaration = method
             };
 
-            MethodSignature sig = method.Signature;
-            TypeDesc retType = sig.ReturnType;
-            TypeDesc firstArgType = null;
-            if (sig.Length > 0)
+            SetTargetResult result;
+
+            result = TrySetTargetMethodSignature(ref context);
+            if (result is not SetTargetResult.Success)
             {
-                firstArgType = sig[0];
+                return GenerateAccessorSpecificFailure(ref context, name, result);
             }
 
-            SetTargetResult result;
+            TypeDesc retType = context.DeclarationSignature.ReturnType;
+
+            TypeDesc firstArgType = null;
+            if (context.DeclarationSignature.Length > 0)
+            {
+                firstArgType = context.DeclarationSignature[0];
+            }
 
             // Using the kind type, perform the following:
             //  1) Validate the basic type information from the signature.
@@ -110,7 +119,7 @@ public static MethodIL TryGetIL(EcmaMethod method)
                 case UnsafeAccessorKind.Field:
                 case UnsafeAccessorKind.StaticField:
                     // Field access requires a single argument for target type and a return type.
-                    if (sig.Length != 1 || retType.IsVoid)
+                    if (context.DeclarationSignature.Length != 1 || retType.IsVoid)
                     {
                         return GenerateAccessorBadImageFailure(method);
                     }
@@ -209,6 +218,8 @@ private struct GenerationContext
         {
             public UnsafeAccessorKind Kind;
             public EcmaMethod Declaration;
+            public MethodSignature DeclarationSignature;
+            public BitArray ReplacedSignatureElements;
             public TypeDesc TargetType;
             public bool IsTargetStatic;
             public MethodDesc TargetMethod;
@@ -241,7 +252,7 @@ private static bool ValidateTargetType(TypeDesc targetTypeMaybe, out TypeDesc va
 
         private static bool DoesMethodMatchUnsafeAccessorDeclaration(ref GenerationContext context, MethodDesc method, bool ignoreCustomModifiers)
         {
-            MethodSignature declSig = context.Declaration.Signature;
+            MethodSignature declSig = context.DeclarationSignature;
             MethodSignature maybeSig = method.Signature;
 
             // Check if we need to also validate custom modifiers.
@@ -249,14 +260,14 @@ private static bool DoesMethodMatchUnsafeAccessorDeclaration(ref GenerationConte
             if (!ignoreCustomModifiers)
             {
                 // Compare any unmanaged callconv and custom modifiers on the signatures.
-                // We treat unmanaged calling conventions at the same level of precedance
+                // We treat unmanaged calling conventions at the same level of precedence
                 // as custom modifiers, eventhough they are normally bits in a signature.
-                ReadOnlySpan kinds = new EmbeddedSignatureDataKind[]
-                {
+                ReadOnlySpan kinds =
+                [
                     EmbeddedSignatureDataKind.UnmanagedCallConv,
                     EmbeddedSignatureDataKind.RequiredCustomModifier,
                     EmbeddedSignatureDataKind.OptionalCustomModifier
-                };
+                ];
 
                 var declData = declSig.GetEmbeddedSignatureData(kinds) ?? Array.Empty();
                 var maybeData = maybeSig.GetEmbeddedSignatureData(kinds) ?? Array.Empty();
@@ -403,8 +414,10 @@ private enum SetTargetResult
         {
             Success,
             Missing,
+            MissingType,
             Ambiguous,
             Invalid,
+            NotSupported
         }
 
         private static SetTargetResult TrySetTargetMethod(ref GenerationContext context, string name, bool ignoreCustomModifiers = true)
@@ -494,20 +507,250 @@ private static SetTargetResult TrySetTargetField(ref GenerationContext context,
             return SetTargetResult.Missing;
         }
 
+        private static bool IsValidInitialTypeForReplacementType(TypeDesc initialType, TypeDesc replacementType)
+        {
+            if (replacementType.IsByRef)
+            {
+                if (!initialType.IsByRef)
+                {
+                    // We can't replace a non-byref with a byref.
+                    return false;
+                }
+
+                return IsValidInitialTypeForReplacementType(((ByRefType)initialType).ParameterType, ((ByRefType)replacementType).ParameterType);
+            }
+            else if (initialType.IsByRef)
+            {
+                // We can't replace a byref with a non-byref.
+                return false;
+            }
+
+            if (replacementType.IsPointer)
+            {
+                return initialType is PointerType { ParameterType.IsVoid: true };
+            }
+
+            Debug.Assert(!replacementType.IsValueType);
+
+            return initialType.IsObject;
+        }
+
+        private static SetTargetResult TrySetTargetMethodSignature(ref GenerationContext context)
+        {
+            EcmaMethod method = context.Declaration;
+            MetadataReader reader = method.MetadataReader;
+            MethodDefinition methodDef = reader.GetMethodDefinition(method.Handle);
+            ParameterHandleCollection parameters = methodDef.GetParameters();
+
+            MethodSignature originalSignature = method.Signature;
+
+            MethodSignatureBuilder updatedSignature = new MethodSignatureBuilder(originalSignature);
+
+            foreach (ParameterHandle parameterHandle in parameters)
+            {
+                Parameter parameter = reader.GetParameter(parameterHandle);
+
+                if (parameter.SequenceNumber > originalSignature.Length)
+                {
+                    // This is invalid metadata (parameter metadata for a parameter that doesn't exist in the signature).
+                    return SetTargetResult.Invalid;
+                }
+
+                CustomAttributeHandle unsafeAccessorTypeAttributeHandle = FindUnsafeAccessorTypeAttribute(reader, parameter);
+
+                if (unsafeAccessorTypeAttributeHandle.IsNil)
+                {
+                    continue;
+                }
+
+                bool isReturnValue = parameter.SequenceNumber == 0;
+
+                TypeDesc initialType = isReturnValue ? originalSignature.ReturnType : originalSignature[parameter.SequenceNumber - 1];
+
+                if (isReturnValue && initialType.IsByRef)
+                {
+                    // We can't support UnsafeAccessorTypeAttribute on by-ref returns
+                    // today as it would create a type-safety hole.
+                    return SetTargetResult.NotSupported;
+                }
+
+                SetTargetResult decodeResult = DecodeUnsafeAccessorType(method, reader.GetCustomAttribute(unsafeAccessorTypeAttributeHandle), out TypeDesc replacementType);
+                if (decodeResult != SetTargetResult.Success)
+                {
+                    return decodeResult;
+                }
+
+                // Future versions of the runtime may support
+                // UnsafeAccessorTypeAttribute on value types.
+                if (replacementType.IsValueType)
+                {
+                    return SetTargetResult.NotSupported;
+                }
+
+                if (!IsValidInitialTypeForReplacementType(initialType, replacementType))
+                {
+                    return SetTargetResult.Invalid;
+                }
+
+                context.ReplacedSignatureElements ??= new BitArray(originalSignature.Length + 1, false);
+                context.ReplacedSignatureElements[parameter.SequenceNumber] = true;
+
+                if (isReturnValue)
+                {
+                    updatedSignature.ReturnType = replacementType;
+                }
+                else
+                {
+                    updatedSignature[parameter.SequenceNumber - 1] = replacementType;
+                }
+            }
+
+            context.DeclarationSignature = updatedSignature.ToSignature();
+            return SetTargetResult.Success;
+        }
+
+        private static SetTargetResult DecodeUnsafeAccessorType(EcmaMethod method, CustomAttribute unsafeAccessorTypeAttribute, out TypeDesc replacementType)
+        {
+            replacementType = null;
+            CustomAttributeValue decoded = unsafeAccessorTypeAttribute.DecodeValue(
+                new CustomAttributeTypeProvider(method.Module));
+
+            if (decoded.FixedArguments[0].Value is not string replacementTypeName)
+            {
+                return SetTargetResult.Invalid;
+            }
+
+            replacementType = method.Module.GetTypeByCustomAttributeTypeName(
+                replacementTypeName,
+                throwIfNotFound: false,
+                canonGenericResolver: (module, name) =>
+                {
+                    if (!name.StartsWith('!'))
+                    {
+                        return null;
+                    }
+
+                    bool isMethodParameter = name.StartsWith("!!", StringComparison.Ordinal);
+
+                    if (!int.TryParse(name.AsSpan(isMethodParameter ? 2 : 1), NumberStyles.None, CultureInfo.InvariantCulture, out int index))
+                    {
+                        return null;
+                    }
+
+                    if (isMethodParameter)
+                    {
+                        if (index >= method.Instantiation.Length)
+                        {
+                            return null;
+                        }
+                    }
+                    else
+                    {
+                        if (index >= method.OwningType.Instantiation.Length)
+                        {
+                            return null;
+                        }
+                    }
+
+                    return module.Context.GetSignatureVariable(index, isMethodParameter);
+                });
+
+            return replacementType is null
+                ? SetTargetResult.MissingType
+                : SetTargetResult.Success;
+        }
+
+        private static CustomAttributeHandle FindUnsafeAccessorTypeAttribute(MetadataReader reader, Parameter parameter)
+        {
+            foreach (CustomAttributeHandle customAttributeHandle in parameter.GetCustomAttributes())
+            {
+                reader.GetAttributeNamespaceAndName(customAttributeHandle, out StringHandle namespaceName, out StringHandle name);
+                if (reader.StringComparer.Equals(namespaceName, "System.Runtime.CompilerServices")
+                    && reader.StringComparer.Equals(name, "UnsafeAccessorTypeAttribute"))
+                {
+                    return customAttributeHandle;
+                }
+            }
+
+            return default;
+        }
+
+        private static ParameterHandle FindParameterForSequenceNumber(MetadataReader reader, ref ParameterHandleCollection.Enumerator parameterEnumerator, int sequenceNumber)
+        {
+            Parameter currentParameter = reader.GetParameter(parameterEnumerator.Current);
+            if (currentParameter.SequenceNumber == sequenceNumber)
+            {
+                return parameterEnumerator.Current;
+            }
+
+            // Scan until we are either at this parameter or at the first one after it (if there is no Parameter row in the table)
+            while (parameterEnumerator.MoveNext())
+            {
+                Parameter thisParameterMaybe = reader.GetParameter(parameterEnumerator.Current);
+                if (thisParameterMaybe.SequenceNumber > sequenceNumber)
+                {
+                    // We've passed where it should be.
+                    return default;
+                }
+
+                if (thisParameterMaybe.SequenceNumber == sequenceNumber)
+                {
+                    // We found it.
+                    return parameterEnumerator.Current;
+                }
+            }
+
+            return default;
+        }
+
         private static MethodIL GenerateAccessor(ref GenerationContext context)
         {
             ILEmitter emit = new ILEmitter();
             ILCodeStream codeStream = emit.NewCodeStream();
 
+            MetadataReader reader = context.Declaration.MetadataReader;
+            ParameterHandleCollection.Enumerator parameterEnumerator = reader.GetMethodDefinition(context.Declaration.Handle).GetParameters().GetEnumerator();
+            parameterEnumerator.MoveNext();
+
             // Load stub arguments.
             // When the target is static, the first argument is only
             // used to look up the target member to access and ignored
             // during dispatch.
             int beginIndex = context.IsTargetStatic ? 1 : 0;
-            int stubArgCount = context.Declaration.Signature.Length;
+            int stubArgCount = context.DeclarationSignature.Length;
+            Stubs.ILLocalVariable?[] localsToRestore = null;
+
             for (int i = beginIndex; i < stubArgCount; ++i)
             {
                 codeStream.EmitLdArg(i);
+                if (context.ReplacedSignatureElements?[i + 1] == true)
+                {
+                    if (context.DeclarationSignature[i] is { Category: TypeFlags.Class } classType)
+                    {
+                        codeStream.Emit(ILOpcode.unbox_any, emit.NewToken(classType));
+                    }
+                    else if (context.DeclarationSignature[i] is ByRefType { ParameterType.Category: TypeFlags.Class } byrefType)
+                    {
+                        localsToRestore ??= new Stubs.ILLocalVariable?[stubArgCount];
+
+                        TypeDesc targetType = byrefType.ParameterType;
+                        Stubs.ILLocalVariable local = emit.NewLocal(targetType);
+                        codeStream.EmitLdInd(targetType);
+                        codeStream.Emit(ILOpcode.unbox_any, emit.NewToken(targetType));
+                        codeStream.EmitStLoc(local);
+                        codeStream.EmitLdLoca(local);
+
+                        // Only mark the local to be restored after the call
+                        // if the parameter is not marked as "in".
+                        // The "sequence number" for parameters is 1-based, whereas the parameter index is 0-based.
+                        ParameterHandle paramHandle = FindParameterForSequenceNumber(reader, ref parameterEnumerator, i + 1);
+                        if (paramHandle.IsNil
+                            || !reader.GetParameter(paramHandle).Attributes.HasFlag(ParameterAttributes.In))
+                        {
+                            localsToRestore[i] = local;
+                        }
+                    }
+                }
             }
 
             // Provide access to the target member
@@ -538,6 +781,19 @@ private static MethodIL GenerateAccessor(ref GenerationContext context)
                     break;
             }
 
+            if (localsToRestore is not null)
+            {
+                for (int i = beginIndex; i < stubArgCount; ++i)
+                {
+                    if (localsToRestore[i] != null)
+                    {
+                        codeStream.EmitLdArg(i);
+                        codeStream.EmitLdLoc(localsToRestore[i].Value);
+                        codeStream.EmitStInd(((ParameterizedType)context.Declaration.Signature[i]).ParameterType);
+                    }
+                }
+            }
+
             // Return from the generated stub
             codeStream.Emit(ILOpcode.ret);
             return emit.Link(context.Declaration);
@@ -563,6 +819,14 @@ private static MethodIL GenerateAccessorSpecificFailure(ref GenerationContext co
                 codeStream.EmitLdc((int)ExceptionStringID.InvalidProgramDefault);
                 thrower = typeSysContext.GetHelperEntryPoint("ThrowHelpers", "ThrowInvalidProgramException");
             }
+            else if (result is SetTargetResult.NotSupported)
+            {
+                thrower = typeSysContext.GetHelperEntryPoint("ThrowHelpers", "ThrowNotSupportedException");
+            }
+            else if (result is SetTargetResult.MissingType)
+            {
+                thrower = typeSysContext.GetHelperEntryPoint("ThrowHelpers", "ThrowUnavailableType");
+            }
             else
             {
                 Debug.Assert(result is SetTargetResult.Missing);
diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.Aot.cs b/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.Aot.cs
index 29abb9313ea3..b5e9d32dff01 100644
--- a/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.Aot.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.Aot.cs
@@ -19,10 +19,9 @@ public static bool IsStructMarshallingRequired(TypeDesc typeDesc)
 
             typeDesc = typeDesc.UnderlyingType;
 
-            // TODO: There are primitive types which require marshalling, such as bool, char.
             if (typeDesc.IsPrimitive)
             {
-                return false;
+                return typeDesc.Category is TypeFlags.Boolean or TypeFlags.Char;
             }
 
             MetadataType type = typeDesc as MetadataType;
diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/IL/Marshaller.Aot.cs b/src/coreclr/tools/Common/TypeSystem/Interop/IL/Marshaller.Aot.cs
index 0dff54cd75a1..f1fc3547a09a 100644
--- a/src/coreclr/tools/Common/TypeSystem/Interop/IL/Marshaller.Aot.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Interop/IL/Marshaller.Aot.cs
@@ -680,11 +680,9 @@ protected override void TransformNativeToManaged(ILCodeStream codeStream)
             LoadManagedValue(codeStream);
             codeStream.Emit(ILOpcode.brtrue, lNonNull);
 
-            MethodDesc ctor = ManagedType.GetParameterlessConstructor();
-            if (ctor == null)
-                throw new InvalidProgramException();
-
-            codeStream.Emit(ILOpcode.newobj, emitter.NewToken(ctor));
+            codeStream.Emit(ILOpcode.ldtoken, emitter.NewToken(ManagedType));
+            codeStream.Emit(ILOpcode.call, emitter.NewToken(InteropTypes.GetType(Context).GetMethod("GetTypeFromHandle", null)));
+            codeStream.Emit(ILOpcode.call, emitter.NewToken(InteropTypes.GetRuntimeHelpers(Context).GetKnownMethod("GetUninitializedObject", null)));
             StoreManagedValue(codeStream);
 
             codeStream.EmitLabel(lNonNull);
@@ -736,12 +734,9 @@ protected override void AllocManagedToNative(ILCodeStream codeStream)
         protected override void AllocNativeToManaged(ILCodeStream codeStream)
         {
             ILEmitter emitter = _ilCodeStreams.Emitter;
-
-            MethodDesc ctor = ManagedType.GetParameterlessConstructor();
-            if (ctor == null)
-                throw new InvalidProgramException();
-
-            codeStream.Emit(ILOpcode.newobj, emitter.NewToken(ctor));
+            codeStream.Emit(ILOpcode.ldtoken, emitter.NewToken(ManagedType));
+            codeStream.Emit(ILOpcode.call, emitter.NewToken(InteropTypes.GetType(Context).GetMethod("GetTypeFromHandle", null)));
+            codeStream.Emit(ILOpcode.call, emitter.NewToken(InteropTypes.GetRuntimeHelpers(Context).GetKnownMethod("GetUninitializedObject", null)));
             StoreManagedValue(codeStream);
         }
 
diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/IL/NativeStructType.cs b/src/coreclr/tools/Common/TypeSystem/Interop/IL/NativeStructType.cs
index 595613289900..3e0c35c0ae92 100644
--- a/src/coreclr/tools/Common/TypeSystem/Interop/IL/NativeStructType.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Interop/IL/NativeStructType.cs
@@ -249,29 +249,6 @@ public override ClassLayoutMetadata GetClassLayout()
             result.PackingSize = layout.PackingSize;
             result.Size = layout.Size;
 
-            if (IsExplicitLayout)
-            {
-                result.Offsets = new FieldAndOffset[layout.Offsets.Length];
-
-                Debug.Assert(layout.Offsets.Length <= _fields.Length);
-
-                int layoutIndex = 0;
-                for (int index = 0; index < _fields.Length; index++)
-                {
-                    if (_fields[index].Name == layout.Offsets[layoutIndex].Field.Name)
-                    {
-                        result.Offsets[layoutIndex] = new FieldAndOffset(_fields[index], layout.Offsets[layoutIndex].Offset);
-                        layoutIndex++;
-                    }
-                }
-
-                Debug.Assert(layoutIndex == layout.Offsets.Length);
-            }
-            else
-            {
-                result.Offsets = null;
-            }
-
             return result;
         }
 
@@ -440,6 +417,14 @@ public override string Name
                 }
             }
 
+            public override LayoutInt MetadataOffset
+            {
+                get
+                {
+                    return _managedField.MetadataOffset;
+                }
+            }
+
             public NativeStructField(TypeDesc nativeType, MetadataType owningType, FieldDesc managedField)
             {
                 _fieldType = nativeType;
diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/InteropTypes.cs b/src/coreclr/tools/Common/TypeSystem/Interop/InteropTypes.cs
index 7fdd0f065962..247e09e7f77e 100644
--- a/src/coreclr/tools/Common/TypeSystem/Interop/InteropTypes.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Interop/InteropTypes.cs
@@ -12,6 +12,11 @@ public static MetadataType GetGC(TypeSystemContext context)
             return context.SystemModule.GetKnownType("System", "GC");
         }
 
+        public static MetadataType GetType(TypeSystemContext context)
+        {
+            return context.SystemModule.GetKnownType("System", "Type");
+        }
+
         public static MetadataType GetSafeHandle(TypeSystemContext context)
         {
             return context.SystemModule.GetKnownType("System.Runtime.InteropServices", "SafeHandle");
@@ -32,6 +37,11 @@ public static MetadataType GetPInvokeMarshal(TypeSystemContext context)
             return context.SystemModule.GetKnownType("System.Runtime.InteropServices", "PInvokeMarshal");
         }
 
+        public static MetadataType GetRuntimeHelpers(TypeSystemContext context)
+        {
+            return context.SystemModule.GetKnownType("System.Runtime.CompilerServices", "RuntimeHelpers");
+        }
+
         public static MetadataType GetMarshal(TypeSystemContext context)
         {
             return context.SystemModule.GetKnownType("System.Runtime.InteropServices", "Marshal");
diff --git a/src/coreclr/tools/GCLogParser/parse-hb-log.sln b/src/coreclr/tools/GCLogParser/parse-hb-log.sln
deleted file mode 100644
index 71aa126764ce..000000000000
--- a/src/coreclr/tools/GCLogParser/parse-hb-log.sln
+++ /dev/null
@@ -1,31 +0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.28803.452
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "parse-hb-log", "parse-hb-log.csproj", "{6AE26CD6-C971-48D6-8C03-2FFA272B942C}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{6AE26CD6-C971-48D6-8C03-2FFA272B942C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{6AE26CD6-C971-48D6-8C03-2FFA272B942C}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{6AE26CD6-C971-48D6-8C03-2FFA272B942C}.Debug|x64.ActiveCfg = Debug|x64
-		{6AE26CD6-C971-48D6-8C03-2FFA272B942C}.Debug|x64.Build.0 = Debug|x64
-		{6AE26CD6-C971-48D6-8C03-2FFA272B942C}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{6AE26CD6-C971-48D6-8C03-2FFA272B942C}.Release|Any CPU.Build.0 = Release|Any CPU
-		{6AE26CD6-C971-48D6-8C03-2FFA272B942C}.Release|x64.ActiveCfg = Release|x64
-		{6AE26CD6-C971-48D6-8C03-2FFA272B942C}.Release|x64.Build.0 = Release|x64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {7A3E16A0-5AC4-4DB6-A016-161E57874740}
-	EndGlobalSection
-EndGlobal
diff --git a/src/coreclr/tools/GCLogParser/parse-hb-log.slnx b/src/coreclr/tools/GCLogParser/parse-hb-log.slnx
new file mode 100644
index 000000000000..45db5764b44d
--- /dev/null
+++ b/src/coreclr/tools/GCLogParser/parse-hb-log.slnx
@@ -0,0 +1,9 @@
+
+  
+    
+    
+  
+  
+    
+  
+
diff --git a/src/coreclr/tools/ILVerification/ILImporter.StackValue.cs b/src/coreclr/tools/ILVerification/ILImporter.StackValue.cs
index 18c9736aeb3f..e5618020feb7 100644
--- a/src/coreclr/tools/ILVerification/ILImporter.StackValue.cs
+++ b/src/coreclr/tools/ILVerification/ILImporter.StackValue.cs
@@ -161,9 +161,6 @@ static public StackValue CreateFromType(TypeDesc type)
 
         public override bool Equals(object obj)
         {
-            if (Object.ReferenceEquals(this, obj))
-                return true;
-
             if (!(obj is StackValue))
                 return false;
 
diff --git a/src/coreclr/tools/ILVerification/ILVerification.projitems b/src/coreclr/tools/ILVerification/ILVerification.projitems
index fc02753d6010..335aaef17ad8 100644
--- a/src/coreclr/tools/ILVerification/ILVerification.projitems
+++ b/src/coreclr/tools/ILVerification/ILVerification.projitems
@@ -66,9 +66,6 @@
     
       Utilities\CustomAttributeTypeNameParser.cs
     
-    
-      Utilities\TypeNameHelpers.cs
-    
     
       Utilities\ValueStringBuilder.cs
     
@@ -99,6 +96,9 @@
     
       TypeSystem\Common\FieldForInstantiatedType.cs
     
+    
+      TypeSystem\Common\FieldForInstantiatedType.FieldLayout.cs
+    
     
       TypeSystem\Common\FieldDesc.cs
     
diff --git a/src/coreclr/tools/ILVerify/ILVerifyRootCommand.cs b/src/coreclr/tools/ILVerify/ILVerifyRootCommand.cs
index 9f5eac54af8e..62e1aecba47b 100644
--- a/src/coreclr/tools/ILVerify/ILVerifyRootCommand.cs
+++ b/src/coreclr/tools/ILVerify/ILVerifyRootCommand.cs
@@ -8,33 +8,33 @@
 
 namespace ILVerify
 {
-    internal sealed class ILVerifyRootCommand : CliRootCommand
+    internal sealed class ILVerifyRootCommand : RootCommand
     {
-        public CliArgument> InputFilePath { get; } =
+        public Argument> InputFilePath { get; } =
             new("input-file-path") { CustomParser = result => Helpers.BuildPathDictionary(result.Tokens, true), Description = "Input file(s)", Arity = ArgumentArity.OneOrMore };
-        public CliOption> Reference { get; } =
+        public Option> Reference { get; } =
             new("--reference", "-r") { CustomParser = result => Helpers.BuildPathDictionary(result.Tokens, false), DefaultValueFactory = result => Helpers.BuildPathDictionary(result.Tokens, false), Description = "Reference metadata from the specified assembly" };
-        public CliOption SystemModule { get; } =
+        public Option SystemModule { get; } =
             new("--system-module", "-s") { Description = "System module name (default: mscorlib)" };
-        public CliOption SanityChecks { get; } =
+        public Option SanityChecks { get; } =
             new("--sanity-checks", "-c") { Description = "Check for valid constructs that are likely mistakes" };
-        public CliOption Include { get; } =
+        public Option Include { get; } =
             new("--include", "-i") { Description = "Use only methods/types/namespaces, which match the given regular expression(s)" };
-        public CliOption IncludeFile { get; } =
-            new CliOption("--include-file") { Description = "Same as --include, but the regular expression(s) are declared line by line in the specified file." }.AcceptExistingOnly();
-        public CliOption Exclude { get; } =
+        public Option IncludeFile { get; } =
+            new Option("--include-file") { Description = "Same as --include, but the regular expression(s) are declared line by line in the specified file." }.AcceptExistingOnly();
+        public Option Exclude { get; } =
             new("--exclude", "-e") { Description = "Skip methods/types/namespaces, which match the given regular expression(s)" };
-        public CliOption ExcludeFile { get; } =
-            new CliOption("--exclude-file") { Description = "Same as --exclude, but the regular expression(s) are declared line by line in the specified file." }.AcceptExistingOnly();
-        public CliOption IgnoreError { get; } =
+        public Option ExcludeFile { get; } =
+            new Option("--exclude-file") { Description = "Same as --exclude, but the regular expression(s) are declared line by line in the specified file." }.AcceptExistingOnly();
+        public Option IgnoreError { get; } =
             new("--ignore-error", "-g") { Description = "Ignore errors, which match the given regular expression(s)" };
-        public CliOption IgnoreErrorFile { get; } =
-            new CliOption("--ignore-error-file") { Description = "Same as --ignore-error, but the regular expression(s) are declared line by line in the specified file." }.AcceptExistingOnly();
-        public CliOption Statistics { get; } =
+        public Option IgnoreErrorFile { get; } =
+            new Option("--ignore-error-file") { Description = "Same as --ignore-error, but the regular expression(s) are declared line by line in the specified file." }.AcceptExistingOnly();
+        public Option Statistics { get; } =
             new("--statistics") { Description = "Print verification statistics" };
-        public CliOption Verbose { get; } =
+        public Option Verbose { get; } =
             new("--verbose") { Description = "Verbose output" };
-        public CliOption Tokens { get; } =
+        public Option Tokens { get; } =
             new("--tokens", "-t") { Description = "Include metadata tokens in error messages" };
 
         public ParseResult Result;
diff --git a/src/coreclr/tools/ILVerify/Program.cs b/src/coreclr/tools/ILVerify/Program.cs
index 7dd39082a0c0..eae57d8bfd75 100644
--- a/src/coreclr/tools/ILVerify/Program.cs
+++ b/src/coreclr/tools/ILVerify/Program.cs
@@ -475,11 +475,11 @@ public PEReader Resolve(string simpleName)
             return null;
         }
 
-        private T Get(CliOption option) => _command.Result.GetValue(option);
-        private T Get(CliArgument argument) => _command.Result.GetValue(argument);
+        private T Get(Option option) => _command.Result.GetValue(option);
+        private T Get(Argument argument) => _command.Result.GetValue(argument);
 
         private static int Main(string[] args) =>
-            new CliConfiguration(new ILVerifyRootCommand().UseVersion())
+            new CommandLineConfiguration(new ILVerifyRootCommand().UseVersion())
             {
                 ResponseFileTokenReplacer = Helpers.TryReadResponseFile,
                 EnableDefaultExceptionHandler = false,
diff --git a/src/coreclr/tools/PdbChecker/PdbChecker.sln b/src/coreclr/tools/PdbChecker/PdbChecker.sln
deleted file mode 100644
index 3ba7e9ae4c33..000000000000
--- a/src/coreclr/tools/PdbChecker/PdbChecker.sln
+++ /dev/null
@@ -1,25 +0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.3.32708.82
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PdbChecker", "PdbChecker.csproj", "{6247A503-5387-4BE1-ACA3-027CADA30CA9}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Release|Any CPU = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{6247A503-5387-4BE1-ACA3-027CADA30CA9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{6247A503-5387-4BE1-ACA3-027CADA30CA9}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{6247A503-5387-4BE1-ACA3-027CADA30CA9}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{6247A503-5387-4BE1-ACA3-027CADA30CA9}.Release|Any CPU.Build.0 = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {4033231C-763B-4C57-BA35-7C1AC007AD0E}
-	EndGlobalSection
-EndGlobal
diff --git a/src/coreclr/tools/PdbChecker/PdbChecker.slnx b/src/coreclr/tools/PdbChecker/PdbChecker.slnx
new file mode 100644
index 000000000000..e3db3c19e9b5
--- /dev/null
+++ b/src/coreclr/tools/PdbChecker/PdbChecker.slnx
@@ -0,0 +1,3 @@
+
+  
+
diff --git a/src/coreclr/tools/StressLogAnalyzer/CMakeLists.txt b/src/coreclr/tools/StressLogAnalyzer/CMakeLists.txt
deleted file mode 100644
index 3ef795455d70..000000000000
--- a/src/coreclr/tools/StressLogAnalyzer/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-add_executable_clr(StressLogAnalyzer StressLogAnalyzer.cpp StressLogDump.cpp StressLogPlugin.cpp)
-
-if(CLR_CMAKE_TARGET_WIN32)
-    target_link_libraries(StressLogAnalyzer
-        PRIVATE
-        ${STATIC_MT_CRT_LIB}
-        ${STATIC_MT_VCRT_LIB}
-    )
-else()
-    target_link_libraries(StressLogAnalyzer
-        PRIVATE
-        coreclrpal
-    )
-endif(CLR_CMAKE_TARGET_WIN32)
-
-install_clr(TARGETS StressLogAnalyzer DESTINATIONS . COMPONENT runtime)
diff --git a/src/coreclr/tools/StressLogAnalyzer/StressLogAnalyzer.cpp b/src/coreclr/tools/StressLogAnalyzer/StressLogAnalyzer.cpp
deleted file mode 100644
index 8b5523299762..000000000000
--- a/src/coreclr/tools/StressLogAnalyzer/StressLogAnalyzer.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include 
-#include 
-#include 
-#include 
-
-#include "assert.h"
-
-#include 
-
-#define MEMORY_MAPPED_STRESSLOG
-
-#ifdef HOST_WINDOWS
-#define MEMORY_MAPPED_STRESSLOG_BASE_ADDRESS (void*)0x400000000000
-#else
-#define MEMORY_MAPPED_STRESSLOG_BASE_ADDRESS nullptr
-#endif
-
-// This macro is used to standardize the wide character string literals between UNIX and Windows.
-// Unix L"" is UTF32, and on windows it's UTF16.  Because of built-in assumptions on the size
-// of string literals, it's important to match behaviour between Unix and Windows.  Unix will be defined
-// as u"" (char16_t)
-#ifdef TARGET_UNIX
-#define W(str)  u##str
-#else // TARGET_UNIX
-#define W(str)  L##str
-#endif // TARGET_UNIX
-
-int ParseCommandLine(char* s, char** argv, int maxArgc)
-{
-    int argc = 0;
-    bool prevWasSpace = true;
-    bool insideString = false;
-    while (*s)
-    {
-        if (!insideString)
-        {
-            if (isspace(*s))
-            {
-                *s = '\0';
-                prevWasSpace = true;
-            }
-            else if (prevWasSpace)
-            {
-                // argument begins here
-                if (argc < maxArgc - 1)
-                {
-                    argv[argc++] = s;
-                }
-                prevWasSpace = false;
-            }
-        }
-        if (*s == '"')
-        {
-            insideString = !insideString;
-        }
-        else if (*s == '\\' && s[1] != '\0')
-        {
-            s++;
-        }
-        s++;
-    }
-    if (argc > 0)
-    {
-        argv[argc] = nullptr;
-    }
-    return argc;
-}
-
-int ProcessStressLog(void* baseAddress, int argc, char* argv[]);
-
-int main(int argc, char *argv[])
-{
-#ifdef HOST_UNIX
-    int exitCode = PAL_Initialize(argc, argv);
-    if (exitCode != 0)
-    {
-        fprintf(stderr, "PAL initialization FAILED %d\n", exitCode);
-        return exitCode;
-    }
-#endif
-
-    if (argc < 2 || strcmp(argv[1], "-?") == 0)
-    {
-        printf("Usage: StressLog  \n");
-        printf("       StressLog  -? for list of options\n");
-        return 1;
-    }
-    WCHAR filename[MAX_PATH];
-    if (MultiByteToWideChar(CP_ACP, 0, argv[1], -1, filename, MAX_PATH) == 0)
-        return 1;
-
-    HANDLE file = CreateFile(filename, GENERIC_READ, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-    if (file == INVALID_HANDLE_VALUE)
-    {
-        printf("file not found\n");
-        return 1;
-    }
-    LARGE_INTEGER lsize;
-    if (!GetFileSizeEx(file, &lsize))
-    {
-        printf("could not get file size\n");
-        return 1;
-    }
-    size_t size = lsize.QuadPart;
-#define USE_FILE_MAPPING
-#ifdef USE_FILE_MAPPING
-    HANDLE map = CreateFileMapping(file, NULL, PAGE_READONLY, (DWORD)(size >> 32), (DWORD)size, NULL);
-    if (map == nullptr)
-    {
-        printf("could not create file mapping\n");
-        return 1;
-    }
-    void* baseAddress = MapViewOfFileEx(map, FILE_MAP_READ, 0, 0, size, MEMORY_MAPPED_STRESSLOG_BASE_ADDRESS);
-    if (baseAddress == nullptr)
-    {
-        printf("could not map view of file\n");
-        return 1;
-    }
-#else
-    void* baseAddress = VirtualAlloc((void*)0x400000000000, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
-    size_t remainingSize = size;
-    const size_t maxReadSize = 0x80000000;
-    char* readPtr = (char*)baseAddress;
-    while (remainingSize >= maxReadSize)
-    {
-        DWORD sizeRead = 0;
-        BOOL success = ReadFile(file, readPtr, maxReadSize, &sizeRead, NULL);
-        if (!success || sizeRead != maxReadSize)
-        {
-            printf("oops, reading the file didn't work\n");
-            return 1;
-        }
-        remainingSize -= maxReadSize;
-        readPtr += maxReadSize;
-    }
-    if (remainingSize > 0)
-    {
-        DWORD sizeRead = 0;
-        BOOL success = ReadFile(file, readPtr, remainingSize, &sizeRead, NULL);
-        if (!success || sizeRead != remainingSize)
-        {
-            printf("oops, reading the file didn't work\n");
-            return 1;
-        }
-    }
-#endif
-    argc -= 2;
-    argv += 2;
-    char* largv[128];
-    memset(largv, 0, sizeof(largv));
-    while (true)
-    {
-        int error = ProcessStressLog(baseAddress, argc, argv);
-
-        if (error != 0)
-        {
-            printf("error %d occurred\n", error);
-        }
-
-        bool runAgain = false;
-        char s[1024];
-        while (true)
-        {
-            printf("'q' to quit, 'r' to run again\n>");
-            if (fgets(s, 1023, stdin) == nullptr)
-                continue;
-            switch (s[0])
-            {
-            case 'r':
-            case 'R':
-                runAgain = true;
-                break;
-
-            case 'q':
-            case 'Q':
-                break;
-
-            default:
-                continue;
-            }
-            break;
-        }
-        if (runAgain)
-        {
-            int largc = ParseCommandLine(&s[1], largv, ARRAY_SIZE(largv));
-            if (largc > 0)
-            {
-                argc = largc;
-                argv = largv;
-            }
-        }
-        else
-        {
-            break;
-        }
-    }
-
-    return 0;
-}
diff --git a/src/coreclr/tools/StressLogAnalyzer/StressLogDump.cpp b/src/coreclr/tools/StressLogAnalyzer/StressLogDump.cpp
deleted file mode 100644
index 379f6e3ccaf0..000000000000
--- a/src/coreclr/tools/StressLogAnalyzer/StressLogDump.cpp
+++ /dev/null
@@ -1,564 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include "strike.h"
-#include "util.h"
-#include 
-#include 
-#include 
-#include 
-
-#ifndef STRESS_LOG
-#define STRESS_LOG
-
-class MapViewHolder
-{
-    void* whatever;
-};
-
-#ifndef HOST_WINDOWS
-#define FEATURE_PAL
-#endif
-#endif // STRESS_LOG
-#define STRESS_LOG_READONLY
-#include "../../../inc/stresslog.h"
-#include "StressMsgReader.h"
-
-#ifdef HOST_WINDOWS
-#include 
-#endif
-
-
-void GcHistClear();
-void GcHistAddLog(LPCSTR msg, StressMsgReader stressMsg);
-
-
-/*********************************************************************************/
-static const char* getTime(const FILETIME* time, _Out_writes_ (buffLen) char* buff, int buffLen)
-{
-    SYSTEMTIME systemTime;
-    static const char badTime[] = "BAD TIME";
-
-    if (!FileTimeToSystemTime(time, &systemTime))
-        return badTime;
-
-    int length = _snprintf_s(buff, buffLen, _TRUNCATE, "%02d:%02d:%02d", systemTime.wHour, systemTime.wMinute, systemTime.wSecond);
-    if (length <= 0)
-        return badTime;
-
-    return buff;
-}
-
-/*********************************************************************************/
-static inline int64_t& toInt64(FILETIME& t)
-{
-    return *((int64_t *) &t);
-}
-
-/*********************************************************************************/
-ThreadStressLog* ThreadStressLog::FindLatestThreadLog() const
-{
-    const ThreadStressLog* latestLog = 0;
-    for (const ThreadStressLog* ptr = this; ptr != NULL; ptr = ptr->next)
-    {
-        if (ptr->readPtr != NULL)
-            if (latestLog == 0 || StressMsgReader(ptr->readPtr).GetTimeStamp() > StressMsgReader(latestLog->readPtr).GetTimeStamp())
-                latestLog = ptr;
-    }
-    return const_cast(latestLog);
-}
-
-const char *getFacilityName(DWORD_PTR lf)
-{
-    struct FacilityName_t { size_t lf; const char* lfName; };
-    #define DEFINE_LOG_FACILITY(logname, value) {logname, #logname},
-    static FacilityName_t facilities[] =
-    {
-        #include "../../../inc/loglf.h"
-        { LF_ALWAYS, "LF_ALWAYS" }
-    };
-    static char buff[1024] = "`";
-    if ( lf == LF_ALL )
-    {
-        return "`ALL`";
-    }
-    else if ((((DWORD)lf) & (LF_ALWAYS | 0xfffe | LF_GC)) == (LF_ALWAYS | LF_GC))
-    {
-        sprintf_s(buff, ARRAY_SIZE(buff), "`GC l=%d`", (int)((lf >> 16) & 0x7fff));
-        return buff;
-    }
-    else
-    {
-        buff[1] = '\0';
-        for ( int i = 0; i < 32; ++i )
-        {
-            if ( lf & 0x1 )
-            {
-                strcat_s ( buff, ARRAY_SIZE(buff), &(facilities[i].lfName[3]) );
-                strcat_s ( buff, ARRAY_SIZE(buff), "`" );
-            }
-            lf >>= 1;
-        }
-        return buff;
-    }
-}
-
-/***********************************************************************************/
-/* recognize special pretty printing instructions in the format string             */
-/* Note that this function might have side effect such that args array value might */
-/* be altered if format string contains %s                                         */
-// TODO: This function assumes the pointer size of the target equals the pointer size of the host
-// TODO: replace uses of void* with appropriate TADDR or CLRDATA_ADDRESS
-void formatOutput(struct IDebugDataSpaces* memCallBack, ___in FILE* file, __inout __inout_z char* format, uint64_t threadId, double timeStamp, DWORD_PTR facility, ___in void** args, bool fPrintFormatString)
-{
-    if (threadId & 0x8000000000000000)
-        fprintf(file, "GC%2d %13.9f : ", (unsigned)threadId, timeStamp);
-    else if (threadId & 0x4000000000000000)
-        fprintf(file, "BG%2d %13.9f : ", (unsigned)threadId, timeStamp);
-    else
-        fprintf(file, "%4x %13.9f : ", (int)threadId, timeStamp);
-    fprintf(file, "%-20s ", getFacilityName ( facility ));
-
-    if (fPrintFormatString)
-    {
-        fprintf(file, "***|\"%s\"|*** ", format);
-    }
-    CQuickBytes fullname;
-    void** argsPtr = args;
-    static char formatCopy[256];
-
-    int iArgCount = 0;
-
-    strcpy_s(formatCopy, ARRAY_SIZE(formatCopy), format);
-    char* ptr = formatCopy;
-    format = formatCopy;
-    for(;;)
-    {
-        char c = *ptr++;
-        if (c == 0)
-            break;
-        if (c == '{')           // Reverse the '{' 's because the log is displayed backwards
-            ptr[-1] = '}';
-        else if (c == '}')
-            ptr[-1] = '{';
-        else if (c == '%')
-        {
-            argsPtr++;          // This format will consume one of the args
-            if (*ptr == '%')
-            {
-                ptr++;          // skip the whole %%
-                --argsPtr;      // except for a %%
-            }
-            else if (*ptr == 'p')
-            {   // It is a %p
-                ptr++;
-                if (isalpha(*ptr))
-                {   // It is a special %p formatter
-                        // Print the string up to that point
-                    c = *ptr;
-                    *ptr = 0;       // Terminate the string temporarily
-                    fprintf(file, format, args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8], args[9], args[10]);
-                    *ptr = c;       // Put it back
-
-                        // move the argument pointers past the part the was printed
-                    format = ptr + 1;
-                    args = argsPtr;
-                    iArgCount = -1;
-                    DWORD_PTR arg = DWORD_PTR(argsPtr[-1]);
-
-                    switch (c)
-                    {
-                        case 'M':   // format as a method Desc
-                            if (g_bDacBroken)
-                            {
-                                fprintf(file," (MethodDesc: %p)", (void*)arg);
-                            }
-                            else
-                            {
-                                if (!IsMethodDesc(arg))
-                                {
-                                    if (arg != 0)
-                                        fprintf(file, " (BAD Method)");
-                                }
-                                else
-                                {
-                                    DacpMethodDescData MethodDescData;
-                                    MethodDescData.Request(g_sos,(CLRDATA_ADDRESS)arg);
-
-                                    static WCHAR wszNameBuffer[1024]; // should be large enough
-                                    static char szNameBuffer[(ARRAY_SIZE(wszNameBuffer) * 3) + 1];
-                                    if (g_sos->GetMethodDescName(arg, ARRAY_SIZE(wszNameBuffer), wszNameBuffer, NULL) == S_OK)
-                                    {
-                                        WideCharToMultiByte(CP_UTF8, 0, wszNameBuffer, -1, szNameBuffer, ARRAY_SIZE(szNameBuffer), NULL, NULL);
-                                    }
-                                    else
-                                    {
-                                        strcpy_s(szNameBuffer, ARRAY_SIZE(szNameBuffer), "UNKNOWN METHODDESC");
-                                    }
-
-                                    fprintf(file, " (%s)", szNameBuffer);
-                                }
-                            }
-                            break;
-
-                            // fall through
-                        case 'T':       // format as a MethodTable
-                            if (g_bDacBroken)
-                            {
-                                fprintf(file, "(MethodTable: %p)", (void*)arg);
-                            }
-                            else
-                            {
-                                if (arg & 3)
-                                {
-                                    arg &= ~3;      // GC steals the lower bits for its own use during GC.
-                                    fprintf(file, " Low Bit(s) Set");
-                                }
-                                if (!IsMethodTable(arg))
-                                {
-                                    fprintf(file, " (BAD MethodTable)");
-                                }
-                                else
-                                {
-                                    NameForMT_s (arg, g_mdName, mdNameLen);
-                                    fprintf(file, " (%s)", g_mdName);
-                                }
-                            }
-                            break;
-
-                        case 'V':
-                            {   // format as a C vtable pointer
-                            char Symbol[1024];
-                            ULONG64 Displacement;
-                            HRESULT hr = g_ExtSymbols->GetNameByOffset(TO_CDADDR(arg), Symbol, 1024, NULL, &Displacement);
-                            if (SUCCEEDED(hr) && Symbol[0] != '\0' && Displacement == 0)
-                                fprintf(file, " (%s)", Symbol);
-                            else
-                                fprintf(file, " (Unknown VTable)");
-                            }
-                            break;
-                        case 'K':
-                            {   // format a frame in stack trace
-                                char Symbol[1024];
-                                ULONG64 Displacement;
-                                HRESULT hr = g_ExtSymbols->GetNameByOffset (TO_CDADDR(arg), Symbol, 1024, NULL, &Displacement);
-                                if (SUCCEEDED (hr) && Symbol[0] != '\0')
-                                {
-                                    fprintf (file, " (%s", Symbol);
-                                    if (Displacement)
-                                    {
-                                        fprintf (file, "+%#llx", (unsigned long long)Displacement);
-                                    }
-                                    fprintf (file, ")");
-                                }
-                                else
-                                    fprintf (file, " (Unknown function)");
-                            }
-                            break;
-                        default:
-                            format = ptr;   // Just print the character.
-                    }
-                }
-            }
-            else if (*ptr == 's' || (*ptr == 'h' && *(ptr+1) == 's' && ++ptr))
-            {
-                HRESULT     hr;
-
-                // need to _alloca, instead of declaring a local buffer
-                // since we may have more than one %s in the format
-                ULONG cbStrBuf = 256;
-                char* strBuf = (char *)_alloca(cbStrBuf);
-
-                hr = memCallBack->ReadVirtual(TO_CDADDR((char* )args[iArgCount]), strBuf, cbStrBuf, 0);
-                if (hr != S_OK)
-                {
-                    strcpy_s(strBuf, cbStrBuf, "(#Could not read address of string#)");
-                }
-
-                args[iArgCount] = strBuf;
-            }
-            else if (*ptr == 'S' || (*ptr == 'l' && *(ptr+1) == 's' && ++ptr))
-            {
-                HRESULT     hr;
-
-                // need to _alloca, instead of declaring a local buffer
-                // since we may have more than one %s in the format
-                ULONG cbWstrBuf = 256 * sizeof(WCHAR);
-                WCHAR* wstrBuf = (WCHAR *)_alloca(cbWstrBuf);
-
-                hr = memCallBack->ReadVirtual(TO_CDADDR((char* )args[iArgCount]), wstrBuf, cbWstrBuf, 0);
-                if (hr != S_OK)
-                {
-                    wcscpy_s(wstrBuf, cbWstrBuf/sizeof(WCHAR), W("(#Could not read address of string#)"));
-                }
-
-                args[iArgCount] = wstrBuf;
-            }
-            iArgCount++;
-        }
-    }
-
-    // Print anything after the last special format instruction.
-    fprintf(file, format, args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8], args[9], args[10]);
-    fprintf(file, "\n");
-}
-
-void __cdecl
-vDoOut(BOOL bToConsole, FILE* file, PCSTR Format, ...)
-{
-    va_list Args;
-
-    va_start(Args, Format);
-
-    if (bToConsole)
-    {
-        OutputVaList(DEBUG_OUTPUT_NORMAL, Format, Args);
-    }
-    else
-    {
-        vfprintf(file, Format, Args);
-    }
-
-    va_end(Args);
-}
-
-
-/*********************************************************************************/
-HRESULT StressLog::Dump(ULONG64 outProcLog, const char* fileName, struct IDebugDataSpaces* memCallBack)
-{
-    ULONG64 g_hThisInst;
-    BOOL    bDoGcHist = (fileName == NULL);
-    FILE*   file = NULL;
-
-    // Fetch the circular buffer bookkeeping data
-    StressLog inProcLog;
-    HRESULT hr = memCallBack->ReadVirtual(UL64_TO_CDA(outProcLog), &inProcLog, sizeof(StressLog), 0);
-    if (hr != S_OK)
-    {
-        return hr;
-    }
-    if (inProcLog.logs.Load() == NULL || inProcLog.moduleOffset == 0)
-    {
-        ExtOut ( "----- No thread logs in the image: The stress log was probably not initialized correctly. -----\n");
-        return S_FALSE;
-    }
-
-    g_hThisInst = (ULONG64) inProcLog.moduleOffset;
-
-    if (bDoGcHist)
-    {
-        GcHistClear();
-    }
-    else
-    {
-        ExtOut("Writing to file: %s\n", fileName);
-        ExtOut("Stress log in module 0x%p\n", SOS_PTR(g_hThisInst));
-        ExtOut("Stress log address = 0x%p\n", SOS_PTR(outProcLog));
-    }
-    // Fetch the circular buffers for each thread into the 'logs' list
-    ThreadStressLog* logs = 0;
-
-    CLRDATA_ADDRESS outProcPtr = TO_CDADDR(inProcLog.logs.Load());
-    ThreadStressLog* inProcPtr;
-    ThreadStressLog** logsPtr = &logs;
-    int threadCtr = 0;
-    uint64_t lastTimeStamp = 0;// timestamp of last log entry
-
-    while(outProcPtr != 0) {
-        inProcPtr = new ThreadStressLog;
-        hr = memCallBack->ReadVirtual(outProcPtr, inProcPtr, sizeof (*inProcPtr), 0);
-        if (hr != S_OK || inProcPtr->chunkListHead == NULL)
-        {
-            delete inProcPtr;
-            goto FREE_MEM;
-        }
-
-        CLRDATA_ADDRESS outProcListHead = TO_CDADDR(inProcPtr->chunkListHead);
-        CLRDATA_ADDRESS outProcChunkPtr = outProcListHead;
-        StressLogChunk ** chunksPtr = &inProcPtr->chunkListHead;
-        StressLogChunk * inProcPrevChunkPtr = NULL;
-        BOOL curPtrInitialized = FALSE;
-        do
-        {
-            StressLogChunk * inProcChunkPtr = new StressLogChunk;
-            hr = memCallBack->ReadVirtual (outProcChunkPtr, inProcChunkPtr, sizeof (*inProcChunkPtr), 0);
-            if (hr != S_OK || !inProcChunkPtr->IsValid ())
-            {
-                if (hr != S_OK)
-                    ExtOut ("ReadVirtual failed with code hr = %x.\n", hr );
-                else
-                    ExtOut ("Invalid stress log chunk: %p", SOS_PTR(outProcChunkPtr));
-
-                // Now cleanup
-                delete inProcChunkPtr;
-                // if this is the first time through, inProcPtr->chunkListHead may still contain
-                // the out-of-process value for the chunk pointer.  NULL it to avoid AVs
-                if (TO_CDADDR(inProcPtr->chunkListHead) == outProcListHead)
-                   inProcPtr->chunkListHead = NULL;
-                delete inProcPtr;
-                goto FREE_MEM;
-            }
-
-            if (!curPtrInitialized && outProcChunkPtr == TO_CDADDR(inProcPtr->curWriteChunk))
-            {
-                inProcPtr->curPtr = (StressMsg *)((BYTE *)inProcChunkPtr + ((BYTE *)inProcPtr->curPtr - (BYTE *)inProcPtr->curWriteChunk));
-                inProcPtr->curWriteChunk = inProcChunkPtr;
-                curPtrInitialized = TRUE;
-            }
-
-            outProcChunkPtr = TO_CDADDR(inProcChunkPtr->next);
-            *chunksPtr = inProcChunkPtr;
-            chunksPtr = &inProcChunkPtr->next;
-            inProcChunkPtr->prev = inProcPrevChunkPtr;
-            inProcPrevChunkPtr = inProcChunkPtr;
-
-            if (outProcChunkPtr == outProcListHead)
-            {
-                inProcChunkPtr->next = inProcPtr->chunkListHead;
-                inProcPtr->chunkListHead->prev = inProcChunkPtr;
-                inProcPtr->chunkListTail = inProcChunkPtr;
-            }
-        } while (outProcChunkPtr != outProcListHead);
-
-        if (!curPtrInitialized)
-        {
-            delete inProcPtr;
-            goto FREE_MEM;
-        }
-
-        // TODO: fix on 64 bit
-        inProcPtr->Activate ();
-        if (StressMsgReader(inProcPtr->readPtr).GetTimeStamp() > lastTimeStamp)
-        {
-            lastTimeStamp = StressMsgReader(inProcPtr->readPtr).GetTimeStamp();
-        }
-
-        outProcPtr = TO_CDADDR(inProcPtr->next);
-        *logsPtr = inProcPtr;
-        logsPtr = &inProcPtr->next;
-        threadCtr++;
-    }
-
-    if (!bDoGcHist && ((fopen_s(&file, fileName, "w")) != 0))
-    {
-        hr = GetLastError();
-        goto FREE_MEM;
-    }
-    hr = S_FALSE;       // return false if there are no message to print to the log
-
-    vDoOut(bDoGcHist, file, "STRESS LOG:\n"
-              "    facilitiesToLog  = 0x%x\n"
-              "    levelToLog       = %d\n"
-              "    MaxLogSizePerThread = 0x%x (%d)\n"
-              "    MaxTotalLogSize = 0x%x (%d)\n"
-              "    CurrentTotalLogChunk = %d\n"
-              "    ThreadsWithLogs  = %d\n",
-        inProcLog.facilitiesToLog, inProcLog.levelToLog, inProcLog.MaxSizePerThread, inProcLog.MaxSizePerThread,
-        inProcLog.MaxSizeTotal, inProcLog.MaxSizeTotal, inProcLog.totalChunk.Load(), threadCtr);
-
-    FILETIME endTime;
-    double totalSecs;
-    totalSecs = ((double) (lastTimeStamp - inProcLog.startTimeStamp)) / inProcLog.tickFrequency;
-    toInt64(endTime) = toInt64(inProcLog.startTime) + ((int64_t) (totalSecs * 1.0E7));
-
-    char timeBuff[64];
-    vDoOut(bDoGcHist, file, "    Clock frequency  = %5.3f GHz\n", inProcLog.tickFrequency / 1.0E9);
-    vDoOut(bDoGcHist, file, "    Start time         %s\n", getTime(&inProcLog.startTime, timeBuff, ARRAY_SIZE(timeBuff)));
-    vDoOut(bDoGcHist, file, "    Last message time  %s\n", getTime(&endTime, timeBuff, ARRAY_SIZE(timeBuff)));
-    vDoOut(bDoGcHist, file, "    Total elapsed time %5.3f sec\n", totalSecs);
-
-    if (!bDoGcHist)
-    {
-        fprintf(file, "\nTHREAD  TIMESTAMP     FACILITY                              MESSAGE\n");
-        fprintf(file, "  ID  (sec from start)\n");
-        fprintf(file, "--------------------------------------------------------------------------------------\n");
-    }
-    char format[257];
-    format[256] = format[0] = 0;
-    void** args;
-    unsigned msgCtr;
-    msgCtr = 0;
-    for (;;)
-    {
-        ThreadStressLog* latestLog = logs->FindLatestThreadLog();
-
-        if (IsInterrupt())
-        {
-            vDoOut(bDoGcHist, file, "----- Interrupted by user -----\n");
-            break;
-        }
-
-        if (latestLog == 0)
-        {
-            break;
-        }
-
-        StressMsgReader latestMsg = latestLog->readPtr;
-        if (latestMsg.GetFormatOffset() != 0 && !latestLog->CompletedDump())
-        {
-            TADDR taFmt = (latestMsg.GetFormatOffset()) + TO_TADDR(g_hThisInst);
-            hr = memCallBack->ReadVirtual(TO_CDADDR(taFmt), format, 256, 0);
-            if (hr != S_OK)
-                strcpy_s(format, ARRAY_SIZE(format), "Could not read address of format string");
-
-            double deltaTime = ((double) (latestMsg.GetTimeStamp() - inProcLog.startTimeStamp)) / inProcLog.tickFrequency;
-            if (bDoGcHist)
-            {
-                if (strcmp(format, ThreadStressLog::TaskSwitchMsg()) == 0)
-                {
-                    latestLog->threadId = (unsigned)(size_t)latestMsg.GetArgs()[0];
-                }
-                GcHistAddLog(format, latestMsg);
-            }
-            else
-            {
-                if (strcmp(format, ThreadStressLog::TaskSwitchMsg()) == 0)
-                {
-                    fprintf (file, "Task was switched from %x\n", (unsigned)(size_t)latestMsg.GetArgs()[0]);
-                    latestLog->threadId = (unsigned)(size_t)latestMsg.GetArgs()[0];
-                }
-                else
-                {
-                    args = latestMsg.GetArgs();
-                    formatOutput(memCallBack, file, format, (unsigned)latestLog->threadId, deltaTime, latestMsg.GetFacility(), args);
-                }
-            }
-            msgCtr++;
-        }
-
-        latestLog->readPtr = latestLog->AdvanceRead(latestMsg.GetNumberOfArgs());
-        if (latestLog->CompletedDump())
-        {
-            latestLog->readPtr = NULL;
-            if (!bDoGcHist)
-            {
-                fprintf(file, "------------ Last message from thread %llx -----------\n", (unsigned long long)latestLog->threadId);
-            }
-        }
-
-        if (msgCtr % 64 == 0)
-        {
-            ExtOut(".");        // to indicate progress
-            if (msgCtr % (64*64) == 0)
-                ExtOut("\n");
-        }
-    }
-    ExtOut("\n");
-
-    vDoOut(bDoGcHist, file, "---------------------------- %d total entries ------------------------------------\n", msgCtr);
-    if (!bDoGcHist)
-    {
-        fclose(file);
-    }
-
-FREE_MEM:
-    // clean up the 'logs' list
-    while (logs) {
-        ThreadStressLog* temp = logs;
-        logs = logs->next;
-        delete temp;
-    }
-
-    return hr;
-}
-
diff --git a/src/coreclr/tools/StressLogAnalyzer/StressLogPlugin.cpp b/src/coreclr/tools/StressLogAnalyzer/StressLogPlugin.cpp
deleted file mode 100644
index 20d0f1b6b229..000000000000
--- a/src/coreclr/tools/StressLogAnalyzer/StressLogPlugin.cpp
+++ /dev/null
@@ -1,1555 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#ifndef INFINITY
-#define INFINITY 1e300 // Practically good enough - not sure why we miss this in our Linux build.
-#endif
-
-#ifndef DLLEXPORT
-#ifdef _MSC_VER
-#define DLLEXPORT __declspec(dllexport)
-#else
-#define DLLEXPORT __attribute__ ((visibility ("default")))
-#endif // _MSC_VER
-#endif // DLLEXPORT
-
-#include "strike.h"
-#include "util.h"
-
-#include "assert.h"
-
-#define STRESS_LOG
-#define STRESS_LOG_ANALYZER
-#define MEMORY_MAPPED_STRESSLOG
-
-class MapViewHolder
-{
-    void* whatever;
-};
-
-bool IsInCantAllocStressLogRegion()
-{
-    return true;
-}
-
-#include 
-#include "../../../inc/stresslog.h"
-#include "StressMsgReader.h"
-
-using std::min;
-using std::max;
-
-size_t StressLog::writing_base_address;
-size_t StressLog::reading_base_address;
-
-bool s_showAllMessages = false;
-bool s_showDefaultMessages = true;
-BOOL g_bDacBroken;
-char g_mdName[1];
-SYMBOLS* g_ExtSymbols;
-SOS* g_sos;
-
-HRESULT OutputVaList(ULONG mask, PCSTR format, va_list args)
-{
-    return vprintf(format, args);
-}
-
-void ExtOut(PCSTR format, ...)
-{
-    va_list args;
-    va_start(args, format);
-    vprintf(format, args);
-}
-
-void GcHistClear()
-{
-}
-
-void GcHistAddLog(LPCSTR msg, StressMsgReader stressMsg)
-{
-}
-
-// this is just to read string literals out of the coreclr and clrgc images
-struct CorClrData : IDebugDataSpaces
-{
-    StressLog::StressLogHeader* hdr;
-    CorClrData(StressLog::StressLogHeader* h) : hdr(h) { }
-
-    virtual HRESULT ReadVirtual(void* src, void* dest, size_t size, int)
-    {
-        size_t cumSize = 0;
-        for (size_t moduleIndex = 0; moduleIndex < StressLog::MAX_MODULES; moduleIndex++)
-        {
-            ptrdiff_t offs = (uint8_t*)src - hdr->modules[moduleIndex].baseAddress;
-            if ((size_t)offs < hdr->modules[moduleIndex].size && (size_t)offs + size < hdr->modules[moduleIndex].size)
-            {
-                memcpy(dest, &hdr->moduleImage[offs + cumSize], size);
-                return S_OK;
-            }
-            cumSize += hdr->modules[moduleIndex].size;
-        }
-        return E_FAIL;
-    }
-};
-
-const int MAX_NUMBER_OF_HEAPS = 1024;
-static volatile int64_t s_maxHeapNumberSeen = -1;
-static volatile uint64_t s_threadIdOfHeap[MAX_NUMBER_OF_HEAPS][2];
-
-enum GcThreadKind
-{
-    GC_THREAD_FG,
-    GC_THREAD_BG,
-};
-
-struct GcThread
-{
-    GcThreadKind    kind;
-    int             heapNumber;
-};
-
-bool LookupGcThread(uint64_t threadId, GcThread *gcThread)
-{
-    for (int i = 0; i <= s_maxHeapNumberSeen; i++)
-    {
-        if (s_threadIdOfHeap[i][GC_THREAD_FG] == threadId)
-        {
-            gcThread->heapNumber = i;
-            gcThread->kind = GC_THREAD_FG;
-            return true;
-        }
-        if (s_threadIdOfHeap[i][GC_THREAD_BG] == threadId)
-        {
-            gcThread->heapNumber = i;
-            gcThread->kind = GC_THREAD_BG;
-            return true;
-        }
-    }
-    return false;
-}
-
-#define InterestingStrings \
-d(IS_UNKNOWN,                   "")                                                                                         \
-d(IS_THREAD_WAIT,               ThreadStressLog::gcServerThread0StartMsg())                                                 \
-d(IS_THREAD_WAIT_DONE,          ThreadStressLog::gcServerThreadNStartMsg())                                                 \
-d(IS_GCSTART,                   ThreadStressLog::gcDetailedStartMsg())                                                      \
-d(IS_GCEND,                     ThreadStressLog::gcDetailedEndMsg())                                                        \
-d(IS_MARK_START,                ThreadStressLog::gcStartMarkMsg())                                                          \
-d(IS_PLAN_START,                ThreadStressLog::gcStartPlanMsg())                                                          \
-d(IS_RELOCATE_START,            ThreadStressLog::gcStartRelocateMsg())                                                      \
-d(IS_RELOCATE_END,              ThreadStressLog::gcEndRelocateMsg())                                                        \
-d(IS_COMPACT_START,             ThreadStressLog::gcStartCompactMsg())                                                       \
-d(IS_COMPACT_END,               ThreadStressLog::gcEndCompactMsg())                                                         \
-d(IS_GCROOT,                    ThreadStressLog::gcRootMsg())                                                               \
-d(IS_PLUG_MOVE,                 ThreadStressLog::gcPlugMoveMsg())                                                           \
-d(IS_GCMEMCOPY,                 ThreadStressLog::gcMemCopyMsg())                                                            \
-d(IS_GCROOT_PROMOTE,            ThreadStressLog::gcRootPromoteMsg())                                                        \
-d(IS_PLAN_PLUG,                 ThreadStressLog::gcPlanPlugMsg())                                                           \
-d(IS_PLAN_PINNED_PLUG,          ThreadStressLog::gcPlanPinnedPlugMsg())                                                     \
-d(IS_DESIRED_NEW_ALLOCATION,    ThreadStressLog::gcDesiredNewAllocationMsg())                                               \
-d(IS_MAKE_UNUSED_ARRAY,         ThreadStressLog::gcMakeUnusedArrayMsg())                                                    \
-d(IS_START_BGC_THREAD,          ThreadStressLog::gcStartBgcThread())                                                        \
-d(IS_RELOCATE_REFERENCE,        ThreadStressLog::gcRelocateReferenceMsg())                                                  \
-d(IS_LOGGING_OFF,               ThreadStressLog::gcLoggingIsOffMsg())                                                       \
-d(IS_UNINTERESTING,             "")
-
-enum InterestingStringId : unsigned char
-{
-#define d(a,b)  a,
-    InterestingStrings
-    IS_INTERESTING
-#undef d
-};
-
-const int MAX_INTERESTING_STRINGS = 1024;
-int s_interestingStringCount = IS_INTERESTING;
-const char* s_interestingStringTable[MAX_INTERESTING_STRINGS] =
-{
-#define d(a,b)  b,
-    InterestingStrings
-#undef d
-};
-
-bool s_interestingStringMatchMode[MAX_INTERESTING_STRINGS];
-
-bool s_interestingStringFilter[MAX_INTERESTING_STRINGS];
-
-static void AddInterestingString(const char* s, bool matchMode)
-{
-    for (int i = 1; i < s_interestingStringCount; i++)
-    {
-        if (strcmp(s_interestingStringTable[i], s) == 0)
-        {
-            s_interestingStringFilter[i] = true;
-            return;
-        }
-    }
-    int i = s_interestingStringCount++;
-    s_interestingStringTable[i] = s;
-    s_interestingStringMatchMode[i] = matchMode;
-    s_interestingStringFilter[IS_INTERESTING] = true;
-}
-
-
-InterestingStringId mapImageToStringId[sizeof(StressLog::StressLogHeader::moduleImage)];
-
-InterestingStringId FindStringId(StressLog::StressLogHeader* hdr, char* format)
-{
-    size_t offset = format - (char*)hdr->moduleImage;
-    assert(offset < sizeof(mapImageToStringId));
-    InterestingStringId id = mapImageToStringId[offset];
-    if (id != IS_UNKNOWN)
-        return id;
-    for (int i = 1; s_interestingStringTable[i] != nullptr; i++)
-    {
-        if (i != IS_UNINTERESTING)
-        {
-            bool match = false;
-            if (s_interestingStringMatchMode[i])
-            {
-                match = (strstr(format, s_interestingStringTable[i]) == format);
-            }
-            else
-            {
-                match = (strcmp(format, s_interestingStringTable[i]) == 0);
-            }
-            if (match)
-            {
-                id = (InterestingStringId)i;
-                if (id > IS_INTERESTING)
-                    id = IS_INTERESTING;
-                mapImageToStringId[offset] = id;
-                return id;
-            }
-        }
-    }
-    mapImageToStringId[offset] = IS_UNINTERESTING;
-    return IS_UNINTERESTING;
-}
-
-const int MAX_LEVEL_FILTERS = 100;
-static int s_levelFilterCount;
-struct LevelFilter
-{
-    unsigned long minLevel;
-    unsigned long maxLevel;
-};
-
-static LevelFilter s_levelFilter[MAX_LEVEL_FILTERS];
-
-struct GcStartEnd
-{
-    double startTime;
-    double endTime;
-};
-
-const int MAX_GC_INDEX = 1024 * 1024;
-static GcStartEnd s_gcStartEnd[MAX_GC_INDEX];
-
-static unsigned long s_gcFilterStart;
-static unsigned long s_gcFilterEnd;
-
-const int MAX_VALUE_FILTERS = 100;
-static int s_valueFilterCount;
-
-struct ValueFilter
-{
-    ULONGLONG start;
-    ULONGLONG end;
-};
-
-static ValueFilter s_valueFilter[MAX_VALUE_FILTERS];
-
-const int MAX_THREAD_FILTERS = 1024;
-static int s_threadFilterCount;
-static uint64_t s_threadFilter[MAX_THREAD_FILTERS];
-
-static bool s_gcThreadFilter[MAX_NUMBER_OF_HEAPS][2];
-static bool s_hadGcThreadFilters;
-
-static bool s_printHexTidForGcThreads;
-
-static uint32_t s_facilityIgnore;
-
-static bool s_printEarliestMessages;
-static int s_printEarliestMessageFromThreadCount;
-static uint64_t s_printEarliestMessageFromThread[MAX_THREAD_FILTERS];
-static bool s_printEarliestMessageFromGcThread[MAX_NUMBER_OF_HEAPS][2];
-
-static bool FilterThread(ThreadStressLog* tsl)
-{
-    //    return tsl->threadId == 0x6ff8;
-
-    if (s_gcFilterStart != 0)
-    {
-        // we have a filter based on a GC index
-        // include all message for now so we don't miss any
-        // GC start/end messages
-        // we will throw away message for other threads later
-        return true;
-    }
-
-    if (s_hadGcThreadFilters)
-    {
-        GcThread gcThread;
-        if (!LookupGcThread(tsl->threadId, &gcThread))
-        {
-            // this may or may not be a GC thread - we don't know yet
-            // include its messages to be conservative - we will have
-            // a filter later to remove these messages
-            return true;
-        }
-        return s_gcThreadFilter[gcThread.heapNumber][gcThread.kind];
-    }
-    else
-    {
-        if (s_threadFilterCount == 0)
-            return true;
-        // we can filter now
-        for (int i = 0; i < s_threadFilterCount; i++)
-        {
-            if (s_threadFilter[i] == tsl->threadId)
-                return true;
-        }
-        return false;
-    }
-}
-
-
-int GcLogLevel(uint32_t facility)
-{
-    if ((facility & (LF_ALWAYS | 0xfffe | LF_GC)) == (LF_ALWAYS | LF_GC))
-    {
-        return (facility >> 16) & 0x7fff;
-    }
-    return 0;
-}
-
-static void RememberThreadForHeap(uint64_t threadId, int64_t heapNumber, GcThreadKind threadKind)
-{
-    if (s_maxHeapNumberSeen == -1 && heapNumber == 0)
-    {
-        // we don't want to remember these associations for WKS GC,
-        // which can execute on any thread - as soon as we see
-        // a heap number != 0, we assume SVR GC and remember it
-        return;
-    }
-
-    if (heapNumber < MAX_NUMBER_OF_HEAPS)
-    {
-        s_threadIdOfHeap[heapNumber][threadKind] = threadId;
-        int64_t maxHeapNumberSeen = s_maxHeapNumberSeen;
-        while (maxHeapNumberSeen < heapNumber)
-        {
-            maxHeapNumberSeen = InterlockedCompareExchange64((volatile LONG64*)&s_maxHeapNumberSeen, heapNumber, maxHeapNumberSeen);
-        }
-    }
-}
-
-bool FilterMessage(StressLog::StressLogHeader* hdr, ThreadStressLog* tsl, uint32_t facility, char* format, double deltaTime, int argCount, void** args)
-{
-    bool fLevelFilter = false;
-    if (s_levelFilterCount > 0)
-    {
-        unsigned long gcLogLevel = (unsigned long)GcLogLevel(facility);
-        for (int i = 0; i < s_levelFilterCount; i++)
-        {
-            if (s_levelFilter[i].minLevel <= gcLogLevel && gcLogLevel <= s_levelFilter[i].maxLevel)
-            {
-                fLevelFilter = true;
-                break;
-            }
-        }
-    }
-
-    if (s_facilityIgnore != 0)
-    {
-        if ((facility & (LF_ALWAYS | 0xfffe | LF_GC)) == (LF_ALWAYS | LF_GC))
-        {
-            // specially encoded GC message including dprintf level
-            if ((s_facilityIgnore & LF_GC) != 0)
-            {
-                return false;
-            }
-        }
-        else if ((s_facilityIgnore & facility) != 0)
-        {
-            return false;
-        }
-    }
-
-    InterestingStringId isd = FindStringId(hdr, format);
-    switch (isd)
-    {
-    case    IS_UNINTERESTING:
-    case    IS_UNKNOWN:
-        break;
-    case    IS_THREAD_WAIT:
-    case    IS_THREAD_WAIT_DONE:
-        RememberThreadForHeap(tsl->threadId, (int64_t)args[0], GC_THREAD_FG);
-        break;
-
-    case    IS_DESIRED_NEW_ALLOCATION:
-    {
-        int genNumber = (int)(int64_t)args[1];
-        if (genNumber <= 1)
-        {
-            // do this only for gen 0 and 1, because otherwise it
-            // may be background GC
-            RememberThreadForHeap(tsl->threadId, (int64_t)args[0], GC_THREAD_FG);
-        }
-        break;
-    }
-
-    case    IS_LOGGING_OFF:
-        return s_showDefaultMessages;
-
-    case    IS_GCSTART:
-    {
-        int gcIndex = (int)(size_t)args[0];
-        if (gcIndex < MAX_GC_INDEX)
-        {
-            s_gcStartEnd[gcIndex].startTime = deltaTime;
-        }
-        return s_showDefaultMessages;
-    }
-
-    case    IS_GCEND:
-    {
-        int gcIndex = (int)(size_t)args[0];
-        if (gcIndex < MAX_GC_INDEX)
-        {
-            s_gcStartEnd[gcIndex].endTime = deltaTime;
-        }
-        return s_showDefaultMessages;
-    }
-
-    case    IS_MARK_START:
-    case    IS_PLAN_START:
-    case    IS_RELOCATE_START:
-    case    IS_RELOCATE_END:
-    case    IS_COMPACT_START:
-    case    IS_COMPACT_END:
-        RememberThreadForHeap(tsl->threadId, (int64_t)args[0], GC_THREAD_FG);
-        return s_showDefaultMessages;
-
-    case    IS_PLAN_PLUG:
-    case    IS_PLAN_PINNED_PLUG:
-        if (s_valueFilterCount > 0)
-        {
-            // print this message if the plug or the gap before it contain (part of) the range we're looking for
-            size_t gapSize = (size_t)args[0];
-            size_t plugStart = (size_t)args[1];
-            size_t gapStart = plugStart - gapSize;
-            size_t plugEnd = (size_t)args[2];
-            for (int i = 0; i < s_valueFilterCount; i++)
-            {
-                if (s_valueFilter[i].end < gapStart || plugEnd < s_valueFilter[i].start)
-                {
-                    // empty intersection with the gap+plug
-                    continue;
-                }
-                return true;
-            }
-        }
-        break;
-
-    case    IS_GCMEMCOPY:
-        if (s_valueFilterCount > 0)
-        {
-            // print this message if the source or destination range contain (part of) the range we're looking for
-            size_t srcStart = (size_t)args[0];
-            size_t dstStart = (size_t)args[1];
-            size_t srcEnd = (size_t)args[2];
-            size_t dstEnd = (size_t)args[3];
-            for (int i = 0; i < s_valueFilterCount; i++)
-            {
-                if ((s_valueFilter[i].end < srcStart || srcEnd < s_valueFilter[i].start) &&
-                    (s_valueFilter[i].end < dstStart || dstEnd < s_valueFilter[i].start))
-                {
-                    // empty intersection with both the source and the destination
-                    continue;
-                }
-                return true;
-            }
-        }
-        break;
-
-    case    IS_MAKE_UNUSED_ARRAY:
-        if (s_valueFilterCount > 0)
-        {
-            // print this message if the source or destination range contain (part of) the range we're looking for
-            size_t start = (size_t)args[0];
-            size_t end = (size_t)args[1];
-            for (int i = 0; i < s_valueFilterCount; i++)
-            {
-                if ((s_valueFilter[i].end < start || end < s_valueFilter[i].start))
-                {
-                    // empty intersection with the unused array
-                    continue;
-                }
-                return true;
-            }
-        }
-        break;
-
-    case    IS_GCROOT:
-    case    IS_PLUG_MOVE:
-    case    IS_GCROOT_PROMOTE:
-    case    IS_INTERESTING:
-        break;
-
-    case    IS_START_BGC_THREAD:
-        RememberThreadForHeap(tsl->threadId, (int64_t)args[0], GC_THREAD_BG);
-        break;
-    case    IS_RELOCATE_REFERENCE:
-        if (s_valueFilterCount > 0)
-        {
-            size_t src = (size_t)args[0];
-            size_t dst_from = (size_t)args[1];
-            size_t dst_to = (size_t)args[2];
-            // print this message if the source or destination contain (part of) the range we're looking for
-            for (int i = 0; i < s_valueFilterCount; i++)
-            {
-                if ((s_valueFilter[i].end < src || src < s_valueFilter[i].start) &&
-                    (s_valueFilter[i].end < dst_from || dst_from < s_valueFilter[i].start) &&
-                    (s_valueFilter[i].end < dst_to || dst_to < s_valueFilter[i].start))
-                {
-                    // empty intersection with both the source and the destination
-                    continue;
-                }
-                return true;
-            }
-        }
-        break;
-    }
-    return fLevelFilter || s_interestingStringFilter[isd];
-}
-
-struct StressThreadAndMsg
-{
-    uint64_t    threadId;
-    StressMsgReader msg;
-    uint64_t    msgId;
-};
-
-int CmpMsg(const void* p1, const void* p2)
-{
-    const StressThreadAndMsg* msg1 = (const StressThreadAndMsg*)p1;
-    const StressThreadAndMsg* msg2 = (const StressThreadAndMsg*)p2;
-
-    if (msg1->msg.GetTimeStamp() < msg2->msg.GetTimeStamp())
-        return 1;
-    if (msg1->msg.GetTimeStamp() > msg2->msg.GetTimeStamp())
-        return -11;
-
-    if (msg1->threadId < msg2->threadId)
-        return -1;
-    if (msg1->threadId > msg2->threadId)
-        return 1;
-
-    if (msg1->msgId < msg2->msgId)
-        return -1;
-    if (msg1->msgId > msg2->msgId)
-        return 1;
-
-    assert(!"unreachable");
-    return 0;
-}
-
-struct ThreadStressLogDesc
-{
-    volatile LONG workStarted;
-    volatile LONG workFinished;
-    ThreadStressLog* tsl;
-    StressMsgReader earliestMessage;
-
-    ThreadStressLogDesc() : workStarted(0), workFinished(0), tsl(nullptr), earliestMessage(nullptr)
-    {
-    }
-};
-
-static const int MAX_THREADSTRESSLOGS = 64 * 1024;
-static ThreadStressLogDesc s_threadStressLogDesc[MAX_THREADSTRESSLOGS];
-static int s_threadStressLogCount;
-static LONG64 s_wrappedWriteThreadCount;
-
-static const LONG MAX_MESSAGE_COUNT = 64 * 1024 * 1024;
-static StressThreadAndMsg* s_threadMsgBuf;
-static volatile LONG64 s_msgCount = 0;
-static volatile LONG64 s_totalMsgCount = 0;
-static double s_timeFilterStart = 0;
-static double s_timeFilterEnd = 0;
-static const char* s_outputFileName = nullptr;
-
-StressLog::StressLogHeader* s_hdr;
-
-static bool s_fPrintFormatStrings;
-
-void Usage()
-{
-    printf("\n");
-    printf("Usage:\n");
-    printf("\n");
-    printf(" -o:: write output to a text file instead of the console\n");
-    printf("\n");
-    printf(" -v:: look for a specific hex value (often used to look for addresses)\n");
-    printf(" -v:-: look for values >= hexlower and <= hexupper\n");
-    printf(" -v:+: look for values >= hexlower and <= hexlower+hexsize\n");
-    printf("\n");
-    printf(" -t:: don't consider messages before start time\n");
-    printf(" -t:-: only consider messages >= start time and <= end time\n");
-    printf(" -t:-: only consider messages in the last seconds\n");
-    printf("\n");
-    printf(" -l:,,... : print messages at dprint level1,level2,...\n");
-    printf("\n");
-    printf(" -g:: only print messages occurring during GC#gc_index\n");
-    printf(" -g:-: as above, for a range of GC indices\n");
-    printf("\n");
-    printf(" -f: print the raw format strings along with the message\n");
-    printf("     (useful to search for the format string in the source code)\n");
-    printf(" -f:: search for a specific format string\n");
-    printf("    e.g. '-f:\"<%%zx>:%%zx\"'\n");
-    printf(" -p:: search for all format strings with a specific prefix\n");
-    printf("    e.g. '-p:\"commit-accounting\"'\n");
-    printf("\n");
-    printf(" -i:: ignore messages from log facilities\n");
-    printf("   e.g. '-i:7ffe' means ignore messages from anything but LF_GC\n");
-    printf("\n");
-    printf(" -tid: print hex thread ids, e.g. 2a08 instead of GC12\n");
-    printf(" -tid:,,...: only print messages from the listed\n");
-    printf("     threads. Thread ids are in hex, given as GC,\n");
-    printf("     or BG\n");
-    printf("     e.g. '-tid:2bc8,GC3,BG14' would print messages from thread 2bc8, the gc thread\n");
-    printf("     associated with heap 3, and the background GC thread for heap 14\n");
-    printf("\n");
-    printf(" -e: print earliest messages from all threads\n");
-    printf(" -e:,,...: print earliest messages from the listed\n");
-    printf("     threads. Thread ids are in hex, given as GC,\n");
-    printf("     or BG\n");
-    printf("     e.g. '-e:2bc8,GC3,BG14' would print the earliest messages from thread 2bc8,\n");
-    printf("     the gc thread associated with heap 3, and the background GC thread for heap 14\n");
-    printf("\n");
-    printf(" -a: print all messages from all threads\n");
-    printf("\n");
-    printf(" -d: suppress default messages\n");
-    printf("\n");
-}
-
-// Translate escape sequences like "\n" - only common ones are handled
-static void InterpretEscapeSequences(char* s)
-{
-    char* d = s;
-    char c = *s++;
-    while (c != '\0')
-    {
-        if (c == '\\')
-        {
-            c = *s++;
-            switch (c)
-            {
-            case    'n': *d++ = '\n'; break;
-            case    't': *d++ = '\t'; break;
-            case    'r': *d++ = '\r'; break;
-            default:     *d++ = c;    break;
-            }
-        }
-        else
-        {
-            *d++ = c;
-        }
-        c = *s++;
-    }
-    *d = '\0';
-}
-
-bool ParseOptions(int argc, char* argv[])
-{
-    int i = 0;
-    while (i < argc)
-    {
-        char* arg = argv[i];
-        if (arg[0] == '-')
-        {
-            switch (arg[1])
-            {
-            case 'v':
-            case 'V':
-                if (s_valueFilterCount >= MAX_VALUE_FILTERS)
-                {
-                    printf("too many value filters - max is %d\n", MAX_VALUE_FILTERS);
-                    return false;
-                }
-                if (arg[2] == ':')
-                {
-                    int i = s_valueFilterCount++;
-                    char* end = nullptr;
-                    s_valueFilter[i].start = strtoull(&arg[3], &end, 16);
-                    if (*end == '-')
-                    {
-                        s_valueFilter[i].end = strtoull(end + 1, &end, 16);
-                    }
-                    else if (*end == '+')
-                    {
-                        s_valueFilter[i].end = s_valueFilter[i].start + strtoull(end + 1, &end, 16);
-                    }
-                    else if (*end != '\0')
-                    {
-                        printf("expected '-' or '+'\n");
-                        return false;
-                    }
-                    else
-                    {
-                        s_valueFilter[i].end = s_valueFilter[i].start;
-                    }
-                    if (*end != '\0')
-                    {
-                        printf("could not parse option %s\n", arg);
-                        return false;
-                    }
-                }
-                else
-                {
-                    printf("expected '-v:'\n");
-                    return false;
-                }
-                break;
-
-            case 't':
-            case 'T':
-                if (arg[2] == ':')
-                {
-                    char* end = nullptr;
-                    s_timeFilterStart = strtod(&arg[3], &end);
-                    if (*end == '-')
-                    {
-                        s_timeFilterEnd = strtod(end + 1, &end);
-                    }
-                    else if (*end == '+')
-                    {
-                        s_timeFilterEnd = s_timeFilterStart + strtod(end + 1, &end);
-                    }
-                    else
-                    {
-                        s_timeFilterEnd = INFINITY;
-                    }
-                    if (*end != '\0')
-                    {
-                        printf("could not parse option %s\n", arg);
-                        return false;
-                    }
-                }
-                else if (_strnicmp(arg, "-tid:", 5) == 0)
-                {
-                    arg = arg + 5;
-                    while (true)
-                    {
-                        if (s_threadFilterCount >= MAX_THREAD_FILTERS)
-                        {
-                            printf("too many thread filters - max is %d\n", MAX_THREAD_FILTERS);
-                            return false;
-                        }
-                        char* end = nullptr;
-                        if (_strnicmp(arg, "gc", 2) == 0 || _strnicmp(arg, "bg", 2) == 0)
-                        {
-                            unsigned long gcHeapNumber = strtoul(arg+2, &end, 10);
-                            GcThreadKind kind = _strnicmp(arg, "gc", 2) == 0 ? GC_THREAD_FG : GC_THREAD_BG;
-                            if (gcHeapNumber < MAX_NUMBER_OF_HEAPS)
-                            {
-                                s_gcThreadFilter[gcHeapNumber][kind] = true;
-                                s_hadGcThreadFilters = true;
-                            }
-                            else
-                            {
-                                printf("expected heap number < %d\n", MAX_NUMBER_OF_HEAPS);
-                                return false;
-                            }
-                        }
-                        else
-                        {
-                            int i = s_threadFilterCount++;
-                            s_threadFilter[i] = strtoull(arg, &end, 16);
-                        }
-                        if (*end == ',')
-                        {
-                            arg = end + 1;
-                        }
-                        else if (*end != '\0')
-                        {
-                            printf("could not parse %s\n", arg);
-                            return false;
-                        }
-                        else
-                        {
-                            break;
-                        }
-                    }
-                }
-                else if (_stricmp(arg, "-tid") == 0)
-                {
-                    s_printHexTidForGcThreads = true;
-                }
-                else
-                {
-                    printf("-t: or -t:<-last seconds> or -t:- or\n");
-                    printf("-tid:,,... expected\n");
-                    return false;
-                }
-                break;
-
-            case 'o':
-            case 'O':
-                if (arg[2] == ':')
-                {
-                    s_outputFileName = &arg[3];
-                }
-                else
-                {
-                    printf("expected '-o:'\n");
-                    return false;
-                }
-                break;
-
-            case 'l':
-            case 'L':
-                if (arg[2] == ':')
-                {
-                    arg = arg + 3;
-                    while (true)
-                    {
-                        if (s_levelFilterCount >= MAX_LEVEL_FILTERS)
-                        {
-                            printf("too many level filters - max is %d\n", MAX_LEVEL_FILTERS);
-                            return false;
-                        }
-                        int i = s_levelFilterCount++;
-                        char* end = nullptr;
-                        if (*arg == '*')
-                        {
-                            s_levelFilter[i].minLevel = 0;
-                            s_levelFilter[i].maxLevel = 0x7fffffff;
-                            end = arg + 1;
-                        }
-                        else
-                        {
-                            s_levelFilter[i].minLevel = strtoul(arg, &end, 10);
-                            if (*end == '-')
-                            {
-                                s_levelFilter[i].maxLevel = strtoul(end + 1, &end, 10);
-                            }
-                            else
-                            {
-                                s_levelFilter[i].maxLevel = s_levelFilter[i].minLevel;
-                            }
-                        }
-                        if (*end == ',')
-                        {
-                            arg = end + 1;
-                        }
-                        else if (*end != '\0')
-                        {
-                            printf("could not parse option %s\n", arg);
-                            return false;
-                        }
-                        else
-                        {
-                            break;
-                        }
-                    }
-                }
-                else
-                {
-                    printf("expected '-l:'\n");
-                    return false;
-                }
-                break;
-
-            case 'a':
-            case 'A':
-                s_showAllMessages = true;
-                break;
-            case 'd':
-            case 'D':
-                s_showDefaultMessages = false;
-                break;
-            case 'f':
-            case 'F':
-                if (arg[2] == '\0')
-                {
-                    s_fPrintFormatStrings = true;
-                }
-                else if (arg[2] == ':')
-                {
-                    if (s_interestingStringCount >= MAX_INTERESTING_STRINGS)
-                    {
-                        printf("too format string filters - max is %d\n", MAX_INTERESTING_STRINGS - IS_INTERESTING);
-                        return false;
-                    }
-                    arg = &arg[3];
-                    char* buf = arg;
-                    size_t actualSize = strlen(buf);
-                    if (actualSize <= 1)
-                    {
-                        printf("-f: expected\n");
-                        return false;
-                    }
-
-                    // remove double quotes around the string, if given
-                    if (actualSize >= 2 && buf[0] == '"' && buf[actualSize - 1] == '"')
-                    {
-                        buf[actualSize - 1] = '\0';
-                        buf++;
-                    }
-                    InterpretEscapeSequences(buf);
-                    AddInterestingString(buf, false);
-                }
-                break;
-            case 'p':
-            case 'P':
-                if (arg[2] == ':')
-                {
-                    if (s_interestingStringCount >= MAX_INTERESTING_STRINGS)
-                    {
-                        printf("too format string filters - max is %d\n", MAX_INTERESTING_STRINGS - IS_INTERESTING);
-                        return false;
-                    }
-                    arg = &arg[3];
-                    char* buf = arg;
-                    size_t actualSize = strlen(buf);
-                    if (actualSize <= 1)
-                    {
-                        printf("-f: expected\n");
-                        return false;
-                    }
-
-                    // remove double quotes around the string, if given
-                    if (actualSize >= 2 && buf[0] == '"' && buf[actualSize - 1] == '"')
-                    {
-                        buf[actualSize - 1] = '\0';
-                        buf++;
-                    }
-                    InterpretEscapeSequences(buf);
-                    AddInterestingString(buf, true);
-                }
-                break;
-            case 'g':
-            case 'G':
-                if (arg[2] == ':')
-                {
-                    char* end = nullptr;
-                    s_gcFilterStart = strtoul(arg+3, &end, 10);
-                    if (*end == '-')
-                    {
-                        s_gcFilterEnd = strtoul(end+1, &end, 10);
-                    }
-                    else
-                    {
-                        s_gcFilterEnd = s_gcFilterStart;
-                    }
-                    if (*end != '\0')
-                    {
-                        printf("could not parse option %s\n", arg);
-                        return false;
-                    }
-                }
-                else
-                {
-                    printf("-g: or -g:- expected\n");
-                    return false;
-                }
-                break;
-
-            case 'i':
-            case 'I':
-                if (arg[2] == ':')
-                {
-                    char* end = nullptr;
-                    s_facilityIgnore = strtoul(arg + 3, &end, 16);
-                    if (*end != '\0')
-                    {
-                        printf("could not parse option %s\n", arg);
-                        return false;
-                    }
-                }
-                else
-                {
-                    printf("-i: expected\n");
-                    return false;
-                }
-                break;
-
-            case 'e':
-            case 'E':
-                if (arg[2] == '\0')
-                {
-                    s_printEarliestMessages = true;
-                }
-                else if (arg[2] == ':')
-                {
-                    arg = arg + 3;
-                    while (true)
-                    {
-                        if (s_printEarliestMessageFromThreadCount >= MAX_THREAD_FILTERS)
-                        {
-                            printf("too many threads - max is %d\n", MAX_THREAD_FILTERS);
-                            return false;
-                        }
-                        char* end = nullptr;
-                        if (_strnicmp(arg, "gc", 2) == 0 || _strnicmp(arg, "bg", 2) == 0)
-                        {
-                            int gcHeapNumber = strtoul(arg + 2, &end, 10);
-                            GcThreadKind kind = _strnicmp(arg, "gc", 2) == 0 ? GC_THREAD_FG : GC_THREAD_BG;
-                            if (gcHeapNumber < MAX_NUMBER_OF_HEAPS)
-                            {
-                                s_printEarliestMessageFromGcThread[gcHeapNumber][kind] = true;
-                            }
-                            else
-                            {
-                                printf("expected heap number < %d\n", MAX_NUMBER_OF_HEAPS);
-                                return false;
-                            }
-                        }
-                        else
-                        {
-                            int i = s_printEarliestMessageFromThreadCount++;
-                            s_printEarliestMessageFromThread[i] = strtoull(arg, &end, 16);
-                        }
-                        if (*end == ',')
-                        {
-                            arg = end + 1;
-                        }
-                        else if (*end != '\0')
-                        {
-                            printf("could not parse %s\n", arg);
-                            return false;
-                        }
-                        else
-                        {
-                            break;
-                        }
-                    }
-                }
-                else
-                {
-                    printf("could not parse option %s\n", arg);
-                    return false;
-                }
-                break;
-
-            case '?':
-                Usage();
-                return false;
-
-            default:
-                printf("unrecognized option %s\n", arg);
-                return false;
-            }
-        }
-        else
-        {
-            return false;
-        }
-        i++;
-    }
-    return true;
-}
-
-static void IncludeMessage(uint64_t threadId, StressMsgReader msg)
-{
-    LONGLONG msgCount = InterlockedIncrement64(&s_msgCount) - 1;
-    if (msgCount < MAX_MESSAGE_COUNT)
-    {
-        s_threadMsgBuf[msgCount].threadId = threadId;
-        s_threadMsgBuf[msgCount].msg = msg;
-        s_threadMsgBuf[msgCount].msgId = msgCount;
-    }
-}
-
-DWORD WINAPI ProcessStresslogWorker(LPVOID)
-{
-    StressLog::StressLogHeader* hdr = s_hdr;
-    LONG totalMsgCount = 0;
-    int wrappedWriteThreadCount = 0;
-    bool fTimeFilter = s_timeFilterStart != 0.0 || s_timeFilterEnd != 0.0;
-    for (int threadStressLogIndex = 0; threadStressLogIndex < s_threadStressLogCount; threadStressLogIndex++)
-    {
-        // is another thread already working on this thread stress log?
-        if (s_threadStressLogDesc[threadStressLogIndex].workStarted != 0 || InterlockedIncrement(&s_threadStressLogDesc[threadStressLogIndex].workStarted) != 1)
-            continue;
-
-        ThreadStressLog* tsl = s_threadStressLogDesc[threadStressLogIndex].tsl;
-        if (!tsl->IsValid())
-            continue;
-        if (!FilterThread(tsl))
-            continue;
-        if (tsl->writeHasWrapped)
-        {
-            wrappedWriteThreadCount++;
-        }
-        // printf("thread: %zx\n", tsl->threadId);
-        void* msg = StressLog::TranslateMemoryMappedPointer(tsl->curPtr);
-        StressLogChunk* slc = StressLog::TranslateMemoryMappedPointer(tsl->curWriteChunk);
-        int chunkCount = 0;
-        void* prevMsg = nullptr;
-        while (true)
-        {
-            // printf("stress log chunk %zx\n", (size_t)slc);
-            if (!slc->IsValid())
-            {
-                printf("oops, invalid stress log chunk\n");
-                slc = slc->next;
-                if (slc == tsl->curWriteChunk)
-                    break;
-                chunkCount++;
-                if (chunkCount >= tsl->chunkListLength)
-                {
-                    printf("oops, more chunks on list than expected\n");
-                    break;
-                }
-                msg = nullptr;
-            }
-            assert(StressLog::TranslateMemoryMappedPointer(StressLog::TranslateMemoryMappedPointer(slc->next)->prev) == slc);
-            assert(StressLog::TranslateMemoryMappedPointer(StressLog::TranslateMemoryMappedPointer(slc->prev)->next) == slc);
-#ifdef _DEBUG
-            int chunkCount1 = 0;
-            for (StressLogChunk* slc1 = StressLog::TranslateMemoryMappedPointer(tsl->curWriteChunk); slc1 != slc; slc1 = StressLog::TranslateMemoryMappedPointer(slc1->next))
-            {
-                chunkCount1++;
-            }
-            if (chunkCount != chunkCount1)
-            {
-                printf("oops, we have a loop\n");
-                break;
-            }
-#endif //_DEBUG
-
-            size_t* p = (size_t*)slc->StartPtr();
-            size_t* end = (size_t*)slc->EndPtr();
-            if (p <= (size_t*)msg && (size_t*)msg < end)
-            {
-                ; // fine
-            }
-            else
-            {
-                while (p < end && *p == 0)
-                    p++;
-                msg = (void*)p;
-            }
-            void* endMsg = (void*)end;
-            while (msg < endMsg)
-            {
-                StressMsgReader msgReader(msg);
-                totalMsgCount++;
-                char* format = (char*)(hdr->moduleImage + msgReader.GetFormatOffset());
-                double deltaTime = ((double)(msgReader.GetTimeStamp() - hdr->startTimeStamp)) / hdr->tickFrequency;
-                bool fIgnoreMessage = false;
-                if (fTimeFilter)
-                {
-                    if (deltaTime < s_timeFilterStart)
-                    {
-                        // we know the times will only get smaller, so can stop here
-                        break;
-                    }
-                    if (deltaTime > s_timeFilterEnd)
-                    {
-                        fIgnoreMessage = true;
-                    }
-                }
-                int numberOfArgs = msgReader.GetNumberOfArgs();
-                if (!fIgnoreMessage)
-                {
-                    bool fIncludeMessage = s_showAllMessages || FilterMessage(hdr, tsl, msgReader.GetFacility(), format, deltaTime, numberOfArgs, msgReader.GetArgs());
-                    if (!fIncludeMessage && s_valueFilterCount > 0)
-                    {
-                        for (int i = 0; i < numberOfArgs; i++)
-                        {
-                            for (int j = 0; j < s_valueFilterCount; j++)
-                            {
-                                if (s_valueFilter[j].start <= (size_t)msgReader.GetArgs()[i] && (size_t)msgReader.GetArgs()[i] <= s_valueFilter[j].end)
-                                {
-                                    fIncludeMessage = true;
-                                    break;
-                                }
-                            }
-                            if (fIncludeMessage)
-                                break;
-                        }
-                    }
-                    if (fIncludeMessage)
-                    {
-                        IncludeMessage(tsl->threadId, msg);
-                    }
-                }
-                prevMsg = msg;
-                msg = (StressMsg*)&msgReader.GetArgs()[numberOfArgs];
-            }
-            if (slc == StressLog::TranslateMemoryMappedPointer(tsl->chunkListTail) && !tsl->writeHasWrapped)
-                break;
-            slc = StressLog::TranslateMemoryMappedPointer(slc->next);
-            if (slc == StressLog::TranslateMemoryMappedPointer(tsl->curWriteChunk))
-                break;
-            if (s_hadGcThreadFilters && !FilterThread(tsl))
-                break;
-            chunkCount++;
-            if (chunkCount >= tsl->chunkListLength)
-            {
-                printf("oops, more chunks on list than expected\n");
-                break;
-            }
-            msg = nullptr;
-        }
-        s_threadStressLogDesc[threadStressLogIndex].earliestMessage = prevMsg;
-        s_threadStressLogDesc[threadStressLogIndex].workFinished = 1;
-    }
-
-    InterlockedAdd64(&s_totalMsgCount, totalMsgCount);
-    InterlockedAdd64(&s_wrappedWriteThreadCount, wrappedWriteThreadCount);
-
-    return 0;
-}
-
-static double FindLatestTime(StressLog::StressLogHeader* hdr)
-{
-    double latestTime = 0.0;
-    for (ThreadStressLog* tsl = StressLog::TranslateMemoryMappedPointer(hdr->logs.t); tsl != nullptr; tsl = StressLog::TranslateMemoryMappedPointer(tsl->next))
-    {
-        StressMsg* msg = StressLog::TranslateMemoryMappedPointer(tsl->curPtr);
-        double deltaTime = ((double)(msg->GetTimeStamp() - hdr->startTimeStamp)) / hdr->tickFrequency;
-        latestTime = max(latestTime, deltaTime);
-    }
-    return latestTime;
-}
-
-static void PrintFriendlyNumber(LONGLONG n)
-{
-    if (n < 1000)
-        printf("%d", (int32_t)n);
-    else if (n < 1000 * 1000)
-        printf("%5.3f thousand", n / 1000.0);
-    else if (n < 1000 * 1000 * 1000)
-        printf("%8.6f million", n / 1000000.0);
-    else
-        printf("%11.9f billion", n / 1000000000.0);
-}
-
-static void PrintMessage(CorClrData& corClrData, FILE *outputFile, uint64_t threadId, StressMsgReader msg)
-{
-    void* argBuffer[StressMsg::maxArgCnt];
-    char* format = (char*)(s_hdr->moduleImage + msg.GetFormatOffset());
-    int numberOfArgs = msg.GetNumberOfArgs();
-    for (int i = 0; i < numberOfArgs; i++)
-    {
-        argBuffer[i] = msg.GetArgs()[i];
-    }
-    double deltaTime = ((double)(msg.GetTimeStamp() - s_hdr->startTimeStamp)) / s_hdr->tickFrequency;
-    if (!s_printHexTidForGcThreads)
-    {
-        GcThread gcThread;
-        if (LookupGcThread(threadId, &gcThread))
-        {
-            threadId = gcThread.heapNumber;
-            if (gcThread.kind == GC_THREAD_FG)
-                threadId |= 0x8000000000000000;
-            else
-                threadId |= 0x4000000000000000;
-        }
-    }
-    formatOutput(&corClrData, outputFile, format, threadId, deltaTime, msg.GetFacility(), argBuffer, s_fPrintFormatStrings);
-}
-
-int ProcessStressLog(void* baseAddress, int argc, char* argv[])
-{
-    for (int threadStressLogIndex = 0; threadStressLogIndex < s_threadStressLogCount; threadStressLogIndex++)
-    {
-        s_threadStressLogDesc[threadStressLogIndex].workStarted = 0;
-        s_threadStressLogDesc[threadStressLogIndex].workFinished = 0;
-    }
-    s_msgCount = 0;
-    s_totalMsgCount = 0;
-    s_timeFilterStart = 0;
-    s_timeFilterEnd = 0;
-    s_outputFileName = nullptr;
-    s_fPrintFormatStrings = false;
-    s_showAllMessages = false;
-    s_showDefaultMessages = true;
-    s_maxHeapNumberSeen = -1;
-    for (int i = IS_INTERESTING; i < s_interestingStringCount; i++)
-    {
-        s_interestingStringTable[i] = nullptr;
-    }
-    s_interestingStringCount = IS_INTERESTING;
-    s_levelFilterCount = 0;
-    s_gcFilterStart = 0;
-    s_gcFilterEnd = 0;
-    s_valueFilterCount = 0;
-    s_threadFilterCount = 0;
-    s_hadGcThreadFilters = false;
-    s_printHexTidForGcThreads = false;
-    s_facilityIgnore = 0;
-    s_printEarliestMessages = false;
-    s_printEarliestMessageFromThreadCount = 0;
-    memset(s_gcThreadFilter, 0, sizeof(s_gcThreadFilter));
-    memset(&mapImageToStringId, 0, sizeof(mapImageToStringId));
-    memset(s_interestingStringFilter, 0, sizeof(s_interestingStringFilter));
-    memset(s_interestingStringMatchMode, 0, sizeof(s_interestingStringMatchMode));
-    memset(s_printEarliestMessageFromGcThread, 0, sizeof(s_printEarliestMessageFromGcThread));
-
-    if (!ParseOptions(argc, argv))
-        return 1;
-
-    StressLog::StressLogHeader* hdr = (StressLog::StressLogHeader*)baseAddress;
-    if (hdr->headerSize != sizeof(*hdr) ||
-        hdr->magic != *(uint32_t*)"LRTS" ||
-        (hdr->version != 0x00010001 &&
-            hdr->version != 0x00010002))
-    {
-        printf("Unrecognized file format\n");
-        return 1;
-    }
-    StressLog::writing_base_address = (size_t)hdr->memoryBase;
-    StressLog::reading_base_address = (size_t)baseAddress;
-    s_hdr = hdr;
-
-    // Workaround for clang SIGKILL (exit code 137) crash.  Apparently, clang does not
-    // like large array instantiation on R.H.S of static variable. It was reproduced on
-    // linux-x64 clang v6, v10 and v13.
-    auto temp = new StressThreadAndMsg[MAX_MESSAGE_COUNT];
-    s_threadMsgBuf = temp;
-
-    int threadStressLogIndex = 0;
-    double latestTime = FindLatestTime(hdr);
-    if (s_timeFilterStart < 0)
-    {
-        s_timeFilterStart = max(latestTime + s_timeFilterStart, 0.0);
-        s_timeFilterEnd = latestTime;
-    }
-    for (ThreadStressLog* tsl = StressLog::TranslateMemoryMappedPointer(hdr->logs.t); tsl != nullptr; tsl = StressLog::TranslateMemoryMappedPointer(tsl->next))
-    {
-        if (!tsl->IsValid())
-            continue;
-        if (!FilterThread(tsl))
-            continue;
-        if (threadStressLogIndex >= MAX_THREADSTRESSLOGS)
-        {
-            printf("too many threads\n");
-            return 1;
-        }
-        s_threadStressLogDesc[threadStressLogIndex].tsl = tsl;
-        threadStressLogIndex++;
-    }
-    s_threadStressLogCount = threadStressLogIndex;
-    s_wrappedWriteThreadCount = 0;
-
-    SYSTEM_INFO systemInfo;
-    GetSystemInfo(&systemInfo);
-
-    DWORD threadCount = min(systemInfo.dwNumberOfProcessors, (DWORD)MAXIMUM_WAIT_OBJECTS);
-    HANDLE threadHandle[64];
-    for (DWORD i = 0; i < threadCount; i++)
-    {
-        threadHandle[i] = CreateThread(NULL, 0, ProcessStresslogWorker, nullptr, 0, nullptr);
-        if (threadHandle[i] == 0)
-        {
-            printf("CreateThread failed\n");
-            return 1;
-        }
-    }
-    WaitForMultipleObjects(threadCount, threadHandle, TRUE, INFINITE);
-
-    // the interlocked increment may have increased s_msgCount beyond MAX_MESSAGE_COUNT -
-    // make sure we don't go beyond the end of the buffer
-    s_msgCount = min((LONG64)s_msgCount, MAX_MESSAGE_COUNT);
-
-    if (s_gcFilterStart != 0)
-    {
-        // find the time interval that includes the GCs in question
-        double startTime = INFINITY;
-        double endTime = 0.0;
-        for (unsigned long i = s_gcFilterStart; i <= s_gcFilterEnd; i++)
-        {
-            startTime = min(startTime, s_gcStartEnd[i].startTime);
-            if (s_gcStartEnd[i].endTime != 0.0)
-            {
-                endTime = max(endTime, s_gcStartEnd[i].endTime);
-            }
-            else
-            {
-                // haven't seen the end - assume it's still in progress
-                endTime = latestTime;
-            }
-        }
-
-        // remove all messages outside of this time interval
-        int remMsgCount = 0;
-        for (int msgIndex = 0; msgIndex < s_msgCount; msgIndex++)
-        {
-            StressMsgReader msg = s_threadMsgBuf[msgIndex].msg;
-            double deltaTime = ((double)(msg.GetTimeStamp() - hdr->startTimeStamp)) / hdr->tickFrequency;
-            if (startTime <= deltaTime && deltaTime <= endTime)
-            {
-                s_threadMsgBuf[remMsgCount] = s_threadMsgBuf[msgIndex];
-                remMsgCount++;
-            }
-        }
-        s_msgCount = remMsgCount;
-    }
-
-    if (s_hadGcThreadFilters)
-    {
-        for (int k = GC_THREAD_FG; k <= GC_THREAD_BG; k++)
-        {
-            for (int heap = 0; heap <= s_maxHeapNumberSeen; heap++)
-            {
-                if (s_gcThreadFilter[heap][k])
-                {
-                    uint64_t threadId = s_threadIdOfHeap[heap][k];
-                    if (threadId != 0)
-                    {
-                        if (s_threadFilterCount < MAX_THREAD_FILTERS)
-                        {
-                            int i = s_threadFilterCount++;
-                            s_threadFilter[i] = threadId;
-                        }
-                        else
-                        {
-                            printf("too many thread filters, max = %d\n", MAX_THREAD_FILTERS);
-                        }
-                    }
-                    else
-                    {
-                        printf("don't know thread id for GC%d, ignoring\n", heap);
-                    }
-                }
-            }
-        }
-    }
-
-    if (s_threadFilterCount > 0)
-    {
-        // remove all messages from other threads
-        int remMsgCount = 0;
-        for (int msgIndex = 0; msgIndex < s_msgCount; msgIndex++)
-        {
-            uint64_t threadId = s_threadMsgBuf[msgIndex].threadId;
-            for (int i = 0; i < s_threadFilterCount; i++)
-            {
-                if (threadId == s_threadFilter[i])
-                {
-                    s_threadMsgBuf[remMsgCount] = s_threadMsgBuf[msgIndex];
-                    remMsgCount++;
-                    break;
-                }
-            }
-        }
-        s_msgCount = remMsgCount;
-    }
-
-    // if the sort becomes a bottleneck, we can do a bucket sort by time
-    // (say fractions of a second), then sort the individual buckets,
-    // perhaps on multiple threads
-    qsort(s_threadMsgBuf, s_msgCount, sizeof(s_threadMsgBuf[0]), CmpMsg);
-
-    CorClrData corClrData(hdr);
-    FILE* outputFile = stdout;
-    if (s_outputFileName != nullptr)
-    {
-        if (fopen_s(&outputFile, s_outputFileName, "w") != 0)
-        {
-            printf("could not create output file %s\n", s_outputFileName);
-            outputFile = stdout;
-        }
-    }
-
-    for (LONGLONG i = 0; i < s_msgCount; i++)
-    {
-        uint64_t threadId = (unsigned)s_threadMsgBuf[i].threadId;
-        StressMsgReader msg = s_threadMsgBuf[i].msg;
-        PrintMessage(corClrData, outputFile, threadId, msg);
-    }
-
-    for (int k = GC_THREAD_FG; k <= GC_THREAD_BG; k++)
-    {
-        for (int heap = 0; heap <= s_maxHeapNumberSeen; heap++)
-        {
-            uint64_t threadId = s_threadIdOfHeap[heap][k];
-            if (threadId != 0)
-            {
-                if (s_printEarliestMessageFromGcThread[heap][k])
-                {
-                    if (s_printEarliestMessageFromThreadCount < MAX_THREAD_FILTERS)
-                    {
-                        int i = s_printEarliestMessageFromThreadCount++;
-                        s_printEarliestMessageFromThread[i] = threadId;
-                    }
-                    else
-                    {
-                        printf("too many threads, max = %d\n", MAX_THREAD_FILTERS);
-                    }
-                }
-            }
-            else
-            {
-                printf("don't know thread id for GC%d, ignoring\n", heap);
-            }
-        }
-    }
-
-    if (s_printEarliestMessages || s_printEarliestMessageFromThreadCount > 0)
-    {
-        fprintf(outputFile, "\nEarliestMessages:\n");
-        LONGLONG earliestStartCount = s_msgCount;
-        for (int threadStressLogIndex = 0; threadStressLogIndex < s_threadStressLogCount; threadStressLogIndex++)
-        {
-            StressMsgReader msg = s_threadStressLogDesc[threadStressLogIndex].earliestMessage;
-            if (msg == nullptr)
-                continue;
-            bool fIncludeMessage = s_printEarliestMessages;
-            uint64_t threadId = s_threadStressLogDesc[threadStressLogIndex].tsl->threadId;
-            if (!fIncludeMessage)
-            {
-                for (int i = 0; i < s_printEarliestMessageFromThreadCount; i++)
-                {
-                    if (threadId == s_printEarliestMessageFromThread[i])
-                    {
-                        fIncludeMessage = true;
-                        break;
-                    }
-                }
-            }
-            if (fIncludeMessage)
-            {
-                IncludeMessage(threadId, msg);
-            }
-        }
-        qsort(&s_threadMsgBuf[earliestStartCount], s_msgCount - earliestStartCount, sizeof(s_threadMsgBuf[0]), CmpMsg);
-        for (LONGLONG i = earliestStartCount; i < s_msgCount; i++)
-        {
-            uint64_t threadId = (unsigned)s_threadMsgBuf[i].threadId;
-            StressMsgReader msg = s_threadMsgBuf[i].msg;
-            PrintMessage(corClrData, outputFile, threadId, msg);
-        }
-    }
-
-    if (outputFile != stdout)
-        fclose(outputFile);
-
-    ptrdiff_t usedSize = hdr->memoryCur - hdr->memoryBase;
-    ptrdiff_t availSize = hdr->memoryLimit - hdr->memoryCur;
-    printf("Used file size: %6.3f GB, still available: %6.3f GB, %d threads total, %d overwrote earlier messages\n",
-        (double)usedSize / (1024 * 1024 * 1024), (double)availSize/ (1024 * 1024 * 1024),
-        s_threadStressLogCount, (int)s_wrappedWriteThreadCount);
-    if (hdr->threadsWithNoLog != 0)
-        printf("%u threads did not get a log!\n", hdr->threadsWithNoLog);
-    printf("Number of messages examined: "); PrintFriendlyNumber(s_totalMsgCount); printf(", printed: "); PrintFriendlyNumber(s_msgCount); printf("\n");
-
-    delete[] s_threadMsgBuf;
-
-    return 0;
-}
diff --git a/src/coreclr/tools/StressLogAnalyzer/StressMsgReader.h b/src/coreclr/tools/StressLogAnalyzer/StressMsgReader.h
deleted file mode 100644
index 2d5bfe70e439..000000000000
--- a/src/coreclr/tools/StressLogAnalyzer/StressMsgReader.h
+++ /dev/null
@@ -1,80 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include 
-
-extern StressLog::StressLogHeader* s_hdr;
-
-// A version-aware reader for memory-mapped stress log messages.
-struct StressMsgReader
-{
-private:
-    struct StressMsgSmallOffset
-    {
-        uint32_t numberOfArgsLow  : 3;                   // at most 7 arguments here
-        uint32_t formatOffset  : 26;                     // low bits offset of format string in modules
-        uint32_t numberOfArgsHigh : 3;                   // extend number of args in a backward compat way
-        uint32_t facility;                               // facility used to log the entry
-        uint64_t timeStamp;                              // time when mssg was logged
-        void* args[0];                                   // variable number of arguments
-    };
-
-    void* m_rawMsg;
-public:
-    StressMsgReader() = default;
-
-    StressMsgReader(void* msg)
-        :m_rawMsg(msg)
-    {
-    }
-
-    uint64_t GetFormatOffset() const
-    {
-        if (s_hdr->version == 0x00010001)
-        {
-            return ((StressMsgSmallOffset*)m_rawMsg)->formatOffset;
-        }
-        return ((StressMsg*)m_rawMsg)->GetFormatOffset();
-    }
-
-    uint32_t GetNumberOfArgs() const
-    {
-        if (s_hdr->version == 0x00010001)
-        {
-            return ((StressMsgSmallOffset*)m_rawMsg)->numberOfArgsHigh << 3 | ((StressMsgSmallOffset*)m_rawMsg)->numberOfArgsLow;
-        }
-        return ((StressMsg*)m_rawMsg)->GetNumberOfArgs();
-    }
-
-    uint32_t GetFacility() const
-    {
-        if (s_hdr->version == 0x00010001)
-        {
-            return ((StressMsgSmallOffset*)m_rawMsg)->facility;
-        }
-        return ((StressMsg*)m_rawMsg)->GetFacility();
-    }
-
-    uint64_t GetTimeStamp() const
-    {
-        if (s_hdr->version == 0x00010001)
-        {
-            return ((StressMsgSmallOffset*)m_rawMsg)->timeStamp;
-        }
-        return ((StressMsg*)m_rawMsg)->GetTimeStamp();
-    }
-
-    void** GetArgs() const
-    {
-        if (s_hdr->version == 0x00010001)
-        {
-            return ((StressMsgSmallOffset*)m_rawMsg)->args;
-        }
-        return ((StressMsg*)m_rawMsg)->args;
-    }
-
-    bool operator==(std::nullptr_t) const
-    {
-        return m_rawMsg == nullptr;
-    }
-};
diff --git a/src/coreclr/tools/StressLogAnalyzer/staticcontract.h b/src/coreclr/tools/StressLogAnalyzer/staticcontract.h
deleted file mode 100644
index d3ef6a4ab9bc..000000000000
--- a/src/coreclr/tools/StressLogAnalyzer/staticcontract.h
+++ /dev/null
@@ -1,9 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#ifndef _ASSERTE
-#define _ASSERTE(a)
-#endif
-
-#define STATIC_CONTRACT_LEAF
-#define STATIC_CONTRACT_WRAPPER
diff --git a/src/coreclr/tools/StressLogAnalyzer/strike.h b/src/coreclr/tools/StressLogAnalyzer/strike.h
deleted file mode 100644
index 846f780bbc67..000000000000
--- a/src/coreclr/tools/StressLogAnalyzer/strike.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include 
-#include 
-#include 
-
-typedef void* TADDR;
-extern BOOL g_bDacBroken;
-
-#define IsMethodDesc(m) FALSE
-#define IsMethodTable(mt) FALSE
-#define IsInterrupt() FALSE
-
-#define NameForMT_s(a,b,c)
-#define DEBUG_OUTPUT_NORMAL            0x00000001
-
-extern char g_mdName[1];
-
-struct SOS
-{
-    HRESULT GetMethodDescName(DWORD_PTR arg, size_t bufferSize, WCHAR* buffer, void*)
-    {
-        return S_FALSE;
-    }
-};
-
-extern SOS* g_sos;
-
-#define TO_CDADDR(a) a
-#define UL64_TO_CDA(a) ((void*)a)
-#define SOS_PTR(a) a
-#define TO_TADDR(a) ((char *)a)
-
-struct SYMBOLS
-{
-    HRESULT GetNameByOffset(DWORD_PTR arg, char *buffer, size_t bufferSize, void*, ULONG64 *displacement)
-    {
-        return S_FALSE;
-    }
-};
-
-extern SYMBOLS* g_ExtSymbols;
-
-typedef void* CLRDATA_ADDRESS;
-
-struct DacpMethodDescData
-{
-    int whatever;
-    void Request(void*, CLRDATA_ADDRESS a)
-    {
-    }
-};
-
-struct IDebugDataSpaces
-{
-    virtual HRESULT ReadVirtual(void* src, void* dest, size_t size, int) = 0;
-};
-
-HRESULT OutputVaList(ULONG mask, PCSTR format, va_list args);
-void ExtOut(PCSTR format, ...);
-#define ___in
-void formatOutput(struct IDebugDataSpaces* memCallBack, ___in FILE* file, __inout __inout_z char* format, uint64_t threadId, double timeStamp, DWORD_PTR facility, ___in void** args, bool fPrintFormatString = false);
diff --git a/src/coreclr/tools/StressLogAnalyzer/util.h b/src/coreclr/tools/StressLogAnalyzer/util.h
deleted file mode 100644
index 752509277a77..000000000000
--- a/src/coreclr/tools/StressLogAnalyzer/util.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-template
-struct Volatile
-{
-    T t;
-    T Load() { return t; }
-};
-
-typedef void* CRITSEC_COOKIE;
-
-#define STRESS_LOG_ANALYZER
-
-#include "staticcontract.h"
-
-// This macro is used to standardize the wide character string literals between UNIX and Windows.
-// Unix L"" is UTF32, and on windows it's UTF16.  Because of built-in assumptions on the size
-// of string literals, it's important to match behaviour between Unix and Windows.  Unix will be defined
-// as u"" (char16_t)
-#ifdef TARGET_UNIX
-#define W(str)  u##str
-#else // TARGET_UNIX
-#define W(str)  L##str
-#endif // TARGET_UNIX
-
-//*****************************************************************************
-//
-// **** CQuickBytes
-// This helper class is useful for cases where 90% of the time you allocate 512
-// or less bytes for a data structure.  This class contains a 512 byte buffer.
-// Alloc() will return a pointer to this buffer if your allocation is small
-// enough, otherwise it asks the heap for a larger buffer which is freed for
-// you.  No mutex locking is required for the small allocation case, making the
-// code run faster, less heap fragmentation, etc...  Each instance will allocate
-// 520 bytes, so use accordinly.
-//
-//*****************************************************************************
-template 
-class CQuickBytesBase
-{
-public:
-    CQuickBytesBase() :
-        pbBuff(0),
-        iSize(0),
-        cbTotal(SIZE)
-    { }
-
-    void Destroy()
-    {
-        if (pbBuff)
-        {
-            delete[](BYTE*)pbBuff;
-            pbBuff = 0;
-        }
-    }
-
-    void* Alloc(SIZE_T iItems)
-    {
-        iSize = iItems;
-        if (iItems <= SIZE)
-        {
-            cbTotal = SIZE;
-            return (&rgData[0]);
-        }
-        else
-        {
-            if (pbBuff)
-                delete[](BYTE*)pbBuff;
-            pbBuff = new BYTE[iItems];
-            cbTotal = pbBuff ? iItems : 0;
-            return (pbBuff);
-        }
-    }
-
-    // This is for conformity to the CQuickBytesBase that is defined by the runtime so
-    // that we can use it inside of some GC code that SOS seems to include as well.
-    //
-    // The plain vanilla "Alloc" version on this CQuickBytesBase doesn't throw either,
-    // so we'll just forward the call.
-    void* AllocNoThrow(SIZE_T iItems)
-    {
-        return Alloc(iItems);
-    }
-
-    HRESULT ReSize(SIZE_T iItems)
-    {
-        void* pbBuffNew;
-        if (iItems <= cbTotal)
-        {
-            iSize = iItems;
-            return NOERROR;
-        }
-
-        pbBuffNew = new BYTE[iItems + INCREMENT];
-        if (!pbBuffNew)
-            return E_OUTOFMEMORY;
-        if (pbBuff)
-        {
-            memcpy(pbBuffNew, pbBuff, cbTotal);
-            delete[](BYTE*)pbBuff;
-        }
-        else
-        {
-            _ASSERTE(cbTotal == SIZE);
-            memcpy(pbBuffNew, rgData, SIZE);
-        }
-        cbTotal = iItems + INCREMENT;
-        iSize = iItems;
-        pbBuff = pbBuffNew;
-        return NOERROR;
-
-    }
-
-    operator PVOID()
-    {
-        return ((pbBuff) ? pbBuff : &rgData[0]);
-    }
-
-    void* Ptr()
-    {
-        return ((pbBuff) ? pbBuff : &rgData[0]);
-    }
-
-    SIZE_T Size()
-    {
-        return (iSize);
-    }
-
-    SIZE_T MaxSize()
-    {
-        return (cbTotal);
-    }
-
-    void* pbBuff;
-    SIZE_T      iSize;              // number of bytes used
-    SIZE_T      cbTotal;            // total bytes allocated in the buffer
-    // use UINT64 to enforce the alignment of the memory
-    UINT64 rgData[(SIZE + sizeof(UINT64) - 1) / sizeof(UINT64)];
-};
-
-#define     CQUICKBYTES_BASE_SIZE           512
-#define     CQUICKBYTES_INCREMENTAL_SIZE    128
-
-class CQuickBytesNoDtor : public CQuickBytesBase
-{
-};
-
-class CQuickBytes : public CQuickBytesNoDtor
-{
-public:
-    CQuickBytes() { }
-
-    ~CQuickBytes()
-    {
-        Destroy();
-    }
-};
diff --git a/src/coreclr/tools/SuperFileCheck/SuperFileCheck.csproj b/src/coreclr/tools/SuperFileCheck/SuperFileCheck.csproj
index 908da8fe32c3..223a9ff21504 100644
--- a/src/coreclr/tools/SuperFileCheck/SuperFileCheck.csproj
+++ b/src/coreclr/tools/SuperFileCheck/SuperFileCheck.csproj
@@ -15,10 +15,10 @@
   
 
   
-    
-    <_jitToolsRidPlatformIndex>$(OutputRID.LastIndexOf('-'))
-    $(OutputRID.Substring(0, $(_jitToolsRidPlatformIndex)))
-    $(OutputRID.Substring($(_jitToolsRidPlatformIndex)).TrimStart('-'))
+    
+    <_jitToolsRidPlatformIndex>$(TargetRid.LastIndexOf('-'))
+    $(TargetRid.Substring(0, $(_jitToolsRidPlatformIndex)))
+    $(TargetRid.Substring($(_jitToolsRidPlatformIndex)).TrimStart('-'))
 
     
     linux
diff --git a/src/coreclr/tools/aot/DependencyGraphViewer/DependencyGraphViewer.sln b/src/coreclr/tools/aot/DependencyGraphViewer/DependencyGraphViewer.sln
deleted file mode 100644
index 03847a1fc026..000000000000
--- a/src/coreclr/tools/aot/DependencyGraphViewer/DependencyGraphViewer.sln
+++ /dev/null
@@ -1,31 +0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.3.32605.63
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DependencyGraphViewer", "DependencyGraphViewer.csproj", "{03F15361-EAF4-4A06-8A2B-BD0304618CEA}"
-EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DependecyGraphViewer.Tests", "Tests\DependecyGraphViewer.Tests.csproj", "{F4950BFC-87A9-4158-AB4F-1311CFC15700}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Release|Any CPU = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{03F15361-EAF4-4A06-8A2B-BD0304618CEA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{03F15361-EAF4-4A06-8A2B-BD0304618CEA}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{03F15361-EAF4-4A06-8A2B-BD0304618CEA}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{03F15361-EAF4-4A06-8A2B-BD0304618CEA}.Release|Any CPU.Build.0 = Release|Any CPU
-		{F4950BFC-87A9-4158-AB4F-1311CFC15700}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{F4950BFC-87A9-4158-AB4F-1311CFC15700}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{F4950BFC-87A9-4158-AB4F-1311CFC15700}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{F4950BFC-87A9-4158-AB4F-1311CFC15700}.Release|Any CPU.Build.0 = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {E06F254B-B3D1-456A-8707-75953B49D7C6}
-	EndGlobalSection
-EndGlobal
diff --git a/src/coreclr/tools/aot/DependencyGraphViewer/DependencyGraphViewer.slnx b/src/coreclr/tools/aot/DependencyGraphViewer/DependencyGraphViewer.slnx
new file mode 100644
index 000000000000..54956dc0080c
--- /dev/null
+++ b/src/coreclr/tools/aot/DependencyGraphViewer/DependencyGraphViewer.slnx
@@ -0,0 +1,4 @@
+
+  
+  
+
diff --git a/src/coreclr/tools/aot/DependencyGraphViewer/README.md b/src/coreclr/tools/aot/DependencyGraphViewer/README.md
index bba9948e62fc..d00846cecab7 100644
--- a/src/coreclr/tools/aot/DependencyGraphViewer/README.md
+++ b/src/coreclr/tools/aot/DependencyGraphViewer/README.md
@@ -5,7 +5,7 @@ If you are developing with NativeAOT or using the IL linker, you might ask yours
 
 ## How to build and run
 1. Launch Visual Studio
-2. Load `DependencyGraphViewer.sln`
+2. Load `DependencyGraphViewer.slnx`
 3. Build and Run as normal
 ## Using ETW logs
 The DependencyGraphViewer must be run as an administrator if collecting ETW events when compiling with NativeAOT. One of the easiest ways of doing this is by running Visual Studio as an administrator and then building and running the program.
diff --git a/src/coreclr/tools/aot/Directory.Build.targets b/src/coreclr/tools/aot/Directory.Build.targets
index 4f855d71288f..2919d6e70222 100644
--- a/src/coreclr/tools/aot/Directory.Build.targets
+++ b/src/coreclr/tools/aot/Directory.Build.targets
@@ -1,6 +1,11 @@
 
+  
+    true
+  
   
   
     true
   
+
+  
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/AnalysisBasedMetadataManager.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/AnalysisBasedMetadataManager.cs
index 68b56bd378ee..0e5a953736a5 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/AnalysisBasedMetadataManager.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/AnalysisBasedMetadataManager.cs
@@ -14,6 +14,7 @@
 using CustomAttributeHandle = System.Reflection.Metadata.CustomAttributeHandle;
 using ExportedTypeHandle = System.Reflection.Metadata.ExportedTypeHandle;
 using FlowAnnotations = ILLink.Shared.TrimAnalysis.FlowAnnotations;
+using ParameterHandle = System.Reflection.Metadata.ParameterHandle;
 
 namespace ILCompiler
 {
@@ -29,6 +30,7 @@ public sealed class AnalysisBasedMetadataManager : MetadataManager, ICompilation
         private readonly Dictionary _reflectableMethods = new Dictionary();
         private readonly Dictionary _reflectableFields = new Dictionary();
         private readonly HashSet _reflectableAttributes = new HashSet();
+        private readonly HashSet _reflectableParameters = new HashSet();
 
         public AnalysisBasedMetadataManager(CompilerTypeSystemContext typeSystemContext)
             : this(typeSystemContext, new FullyBlockedMetadataBlockingPolicy(),
@@ -36,6 +38,7 @@ public AnalysisBasedMetadataManager(CompilerTypeSystemContext typeSystemContext)
                 new NoDynamicInvokeThunkGenerationPolicy(), null, Array.Empty(), Array.Empty(),
                 Array.Empty>(), Array.Empty>(),
                 Array.Empty>(), Array.Empty(),
+                Array.Empty(),
                 default)
         {
         }
@@ -54,6 +57,7 @@ public AnalysisBasedMetadataManager(
             IEnumerable> reflectableMethods,
             IEnumerable> reflectableFields,
             IEnumerable reflectableAttributes,
+            IEnumerable reflectableParameters,
             MetadataManagerOptions options)
             : base(typeSystemContext, blockingPolicy, resourceBlockingPolicy, logFile, stackTracePolicy, invokeThunkGenerationPolicy, options, flowAnnotations)
         {
@@ -92,6 +96,11 @@ public AnalysisBasedMetadataManager(
                 _reflectableAttributes.Add(refAttribute);
             }
 
+            foreach (var refParameter in reflectableParameters)
+            {
+                _reflectableParameters.Add(refParameter);
+            }
+
 #if DEBUG
             HashSet moduleHash = new HashSet(_modulesWithMetadata);
             foreach (var refType in reflectableTypes)
@@ -101,7 +110,8 @@ public AnalysisBasedMetadataManager(
                 Debug.Assert((GetMetadataCategory(refType.Entity.GetTypeDefinition()) & MetadataCategory.Description)
                     == (GetMetadataCategory(refType.Entity) & MetadataCategory.Description));
 
-                Debug.Assert(!(refType.Entity is MetadataType) || moduleHash.Contains(((MetadataType)refType.Entity).Module));
+                Debug.Assert((refType.Category & MetadataCategory.Description) == 0 ||
+                    !(refType.Entity is MetadataType) || moduleHash.Contains(((MetadataType)refType.Entity).Module));
             }
 
             foreach (var refMethod in reflectableMethods)
@@ -134,14 +144,18 @@ protected override void ComputeMetadata(NodeFactory factory,
             out byte[] metadataBlob,
             out List> typeMappings,
             out List> methodMappings,
+            out Dictionary methodMetadataMappings,
             out List> fieldMappings,
+            out Dictionary fieldMetadataMappings,
             out List stackTraceMapping)
         {
             ComputeMetadata(new Policy(_blockingPolicy, this), factory,
                 out metadataBlob,
                 out typeMappings,
                 out methodMappings,
+                out methodMetadataMappings,
                 out fieldMappings,
+                out fieldMetadataMappings,
                 out stackTraceMapping);
         }
 
@@ -238,6 +252,11 @@ public bool GeneratesMetadata(EcmaModule module, CustomAttributeHandle caHandle)
                 return _parent._reflectableAttributes.Contains(new ReflectableCustomAttribute(module, caHandle));
             }
 
+            public bool GeneratesMetadata(EcmaModule module, ParameterHandle paramHandle)
+            {
+                return _parent._reflectableParameters.Contains(new ReflectableParameter(module, paramHandle));
+            }
+
             public bool GeneratesMetadata(EcmaModule module, ExportedTypeHandle exportedTypeHandle)
             {
                 // We'll possibly need to do something else here if we ever use this MetadataManager
@@ -298,4 +317,19 @@ public override bool Equals(object obj)
             => obj is ReflectableCustomAttribute other && Equals(other);
         public override int GetHashCode() => Module.GetHashCode() ^ CustomAttributeHandle.GetHashCode();
     }
+
+    public struct ReflectableParameter : IEquatable
+    {
+        public readonly EcmaModule Module;
+        public readonly ParameterHandle ParameterHandle;
+
+        public ReflectableParameter(EcmaModule module, ParameterHandle paramHandle)
+            => (Module, ParameterHandle) = (module, paramHandle);
+
+        public bool Equals(ReflectableParameter other)
+            => other.Module == Module && other.ParameterHandle == ParameterHandle;
+        public override bool Equals(object obj)
+            => obj is ReflectableParameter other && Equals(other);
+        public override int GetHashCode() => Module.GetHashCode() ^ ParameterHandle.GetHashCode();
+    }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/BodySubstitutionParser.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/BodySubstitutionParser.cs
index 0cf8ec163879..d9f87f6b80ef 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/BodySubstitutionParser.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/BodySubstitutionParser.cs
@@ -144,12 +144,8 @@ protected override void ProcessField(TypeDesc type, XPathNavigator fieldNav)
 
             if (string.Equals(GetAttribute(fieldNav, "initialize"), "true", StringComparison.InvariantCultureIgnoreCase))
             {
-                // We would need to also mess with the cctor of the type to set the field to this value:
-                //
-                // * ILLink will remove all stsfld instructions referencing this field from the cctor
-                // * It will place an explicit stsfld in front of the last "ret" instruction in the cctor
-                //
-                // This approach... has issues.
+                // We would need to also mess with the cctor of the type to set the field to this value,
+                // and doing so correctly is difficult.
                 throw new NotSupportedException();
             }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Compilation.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Compilation.cs
index 28a7bf8adc71..40b5d5acedd9 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Compilation.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Compilation.cs
@@ -266,9 +266,15 @@ public bool NeedsRuntimeLookup(ReadyToRunHelperId lookupKind, object targetOfLoo
 
         public ReadyToRunHelperId GetLdTokenHelperForType(TypeDesc type)
         {
-            return (ConstructedEETypeNode.CreationAllowed(type) && NodeFactory.DevirtualizationManager.CanReferenceConstructedMethodTable(type.NormalizeInstantiation()))
-                ? ReadyToRunHelperId.TypeHandle
-                : ReadyToRunHelperId.NecessaryTypeHandle;
+            bool canPotentiallyConstruct = ConstructedEETypeNode.CreationAllowed(type)
+                && NodeFactory.DevirtualizationManager.CanReferenceConstructedMethodTable(type.NormalizeInstantiation());
+            if (canPotentiallyConstruct)
+                return ReadyToRunHelperId.TypeHandle;
+
+            if (type.IsGenericDefinition && NodeFactory.DevirtualizationManager.IsGenericDefinitionMethodTableReflectionVisible(type))
+                return ReadyToRunHelperId.TypeHandle;
+
+            return ReadyToRunHelperId.NecessaryTypeHandle;
         }
 
         public static MethodDesc GetConstructorForCreateInstanceIntrinsic(TypeDesc type)
@@ -368,9 +374,7 @@ public GenericDictionaryLookup ComputeGenericLookup(MethodDesc contextMethod, Re
             if (lookupKind == ReadyToRunHelperId.TypeHandleForCasting)
             {
                 var type = (TypeDesc)targetOfLookup;
-                if (!type.IsRuntimeDeterminedType ||
-                    (!((RuntimeDeterminedType)type).CanonicalType.IsCanonicalDefinitionType(CanonicalFormKind.Universal) &&
-                    !((RuntimeDeterminedType)type).CanonicalType.IsNullable))
+                if (!type.IsRuntimeDeterminedType || !((RuntimeDeterminedType)type).CanonicalType.IsNullable)
                 {
                     if (type.IsNullable)
                     {
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.Aot.cs
index 8d7f861958dc..699d8d16577e 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.Aot.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.Aot.cs
@@ -70,9 +70,7 @@ protected override RuntimeInterfacesAlgorithm GetRuntimeInterfacesAlgorithmForNo
 
         public override FieldLayoutAlgorithm GetLayoutAlgorithmForType(DefType type)
         {
-            if (type == UniversalCanonType)
-                return UniversalCanonLayoutAlgorithm.Instance;
-            else if (type.IsRuntimeDeterminedType)
+            if (type.IsRuntimeDeterminedType)
                 return _runtimeDeterminedFieldLayoutAlgorithm;
             else if (VectorOfTFieldLayoutAlgorithm.IsVectorOfTType(type))
                 return _vectorOfTFieldLayoutAlgorithm;
@@ -224,34 +222,12 @@ public void LogWarnings(Logger logger)
 
     public class SharedGenericsConfiguration
     {
-        //
-        // Universal Shared Generics heuristics magic values determined empirically
-        //
-        public long UniversalCanonGVMReflectionRootHeuristic_InstantiationCount { get; }
-        public long UniversalCanonGVMDepthHeuristic_NonCanonDepth { get; }
-        public long UniversalCanonGVMDepthHeuristic_CanonDepth { get; }
-
-        // Controls how many different instantiations of a generic method, or method on generic type
-        // should be allowed before trying to fall back to only supplying USG in the reflection
-        // method table.
-        public long UniversalCanonReflectionMethodRootHeuristic_InstantiationCount { get; }
-
         // To avoid infinite generic recursion issues during debug type record generation, attempt to
         // use canonical form for types with high generic complexity.
         public long MaxGenericDepthOfDebugRecord { get; }
 
         public SharedGenericsConfiguration()
         {
-            UniversalCanonGVMReflectionRootHeuristic_InstantiationCount = 4;
-            UniversalCanonGVMDepthHeuristic_NonCanonDepth = 2;
-            UniversalCanonGVMDepthHeuristic_CanonDepth = 1;
-
-            // Unlike the GVM heuristics which are intended to kick in aggressively
-            // this heuristic exists to make it so that a fair amount of generic
-            // expansion is allowed. Numbers are chosen to allow a fairly large
-            // amount of generic expansion before trimming.
-            UniversalCanonReflectionMethodRootHeuristic_InstantiationCount = 1024;
-
             MaxGenericDepthOfDebugRecord = 15;
         }
     }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.GeneratedAssembly.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.GeneratedAssembly.cs
index fc2299858626..ba9801349625 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.GeneratedAssembly.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.GeneratedAssembly.cs
@@ -153,7 +153,6 @@ public override ClassLayoutMetadata GetClassLayout()
             {
                 return new ClassLayoutMetadata
                 {
-                    Offsets = null,
                     PackingSize = 0,
                     Size = 0,
                 };
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/MethodBodyScanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/MethodBodyScanner.cs
index c7df8e767d08..eef3878c2446 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/MethodBodyScanner.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/MethodBodyScanner.cs
@@ -1305,6 +1305,10 @@ private void HandleCall(
                     {
                         MarkArrayValuesAsUnknown(arr, curBasicBlock);
                     }
+                    else if (v is ArrayOfAnnotatedSystemTypeValue arrayOfAnnotated)
+                    {
+                        arrayOfAnnotated.MarkModified();
+                    }
                 }
             }
         }
@@ -1354,6 +1358,10 @@ private void ScanStelem(
                         StoreMethodLocalValue(arrValue.IndexValues, ArrayValue.SanitizeArrayElementValue(valueToStore.Value), indexToStoreAtInt.Value, curBasicBlock, MaxTrackedArrayValues);
                     }
                 }
+                else if (array is ArrayOfAnnotatedSystemTypeValue arrayOfAnnotated)
+                {
+                    arrayOfAnnotated.MarkModified();
+                }
             }
         }
 
@@ -1366,13 +1374,27 @@ private void ScanLdelem(
         {
             StackSlot indexToLoadFrom = PopUnknown(currentStack, 1, methodBody, offset);
             StackSlot arrayToLoadFrom = PopUnknown(currentStack, 1, methodBody, offset);
+
+            bool isByRef = opcode == ILOpcode.ldelema;
+
+            if (arrayToLoadFrom.Value.AsSingleValue() is ArrayOfAnnotatedSystemTypeValue arrayOfAnnotated)
+            {
+                if (isByRef)
+                {
+                    arrayOfAnnotated.MarkModified();
+                }
+                else if (!arrayOfAnnotated.IsModified)
+                {
+                    currentStack.Push(new StackSlot(arrayOfAnnotated.GetAnyElementValue()));
+                    return;
+                }
+            }
+
             if (arrayToLoadFrom.Value.AsSingleValue() is not ArrayValue arr)
             {
                 PushUnknown(currentStack);
                 return;
             }
-            // We don't yet handle arrays of references or pointers
-            bool isByRef = opcode == ILOpcode.ldelema;
 
             int? index = indexToLoadFrom.Value.AsConstInt();
             if (index == null)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/AddressTakenMethodNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/AddressTakenMethodNode.cs
index 2b5f079a308f..a67a3a95bb24 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/AddressTakenMethodNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/AddressTakenMethodNode.cs
@@ -59,7 +59,7 @@ public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilde
 
         public override int CompareToImpl(ISortableNode other, CompilerComparer comparer)
         {
-            return _methodNode.CompareToImpl(((AddressTakenMethodNode)other)._methodNode, comparer);
+            return comparer.Compare(_methodNode, ((AddressTakenMethodNode)other)._methodNode);
         }
 
         public ISymbolNode NodeForLinkage(NodeFactory factory)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ArrayMapNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ArrayMapNode.cs
index 19e7b8866f5b..6ae20dfaff90 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ArrayMapNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ArrayMapNode.cs
@@ -49,15 +49,14 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
             Section hashTableSection = writer.NewSection();
             hashTableSection.Place(typeMapHashTable);
 
-            foreach (var type in factory.MetadataManager.GetTypesWithConstructedEETypes())
+            foreach (var type in factory.MetadataManager.GetTypesWithEETypes())
             {
                 if (!type.IsArray)
                     continue;
 
                 var arrayType = (ArrayType)type;
 
-                // Look at the constructed type symbol. If a constructed type wasn't emitted, then the array map entry isn't valid for use
-                IEETypeNode arrayTypeSymbol = factory.ConstructedTypeSymbol(arrayType);
+                IEETypeNode arrayTypeSymbol = factory.NecessaryTypeSymbol(arrayType);
 
                 Vertex vertex = writer.GetUnsignedConstant(_externalReferences.GetIndex(arrayTypeSymbol));
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/CanonicalEETypeNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/CanonicalEETypeNode.cs
index 7a85ad1fd7f3..4336a5bd88b7 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/CanonicalEETypeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/CanonicalEETypeNode.cs
@@ -45,15 +45,10 @@ protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFact
 
             dependencyList.Add(factory.VTable(closestDefType), "VTable");
 
-            if (_type.IsCanonicalSubtype(CanonicalFormKind.Universal))
-                dependencyList.Add(factory.NativeLayout.TemplateTypeLayout(_type), "Universal generic types always have template layout");
-
             // Track generic virtual methods that will get added to the GVM tables
             if ((_virtualMethodAnalysisFlags & VirtualMethodAnalysisFlags.NeedsGvmEntries) != 0)
             {
                 dependencyList.Add(new DependencyListEntry(factory.TypeGVMEntries(_type.GetTypeDefinition()), "Type with generic virtual methods"));
-
-                AddDependenciesForUniversalGVMSupport(factory, _type, ref dependencyList);
             }
 
             return dependencyList;
@@ -75,22 +70,12 @@ protected override int GCDescSize
         {
             get
             {
-                // No GCDescs for universal canonical types
-                if (_type.IsCanonicalSubtype(CanonicalFormKind.Universal))
-                    return 0;
-
-                Debug.Assert(_type.IsCanonicalSubtype(CanonicalFormKind.Specific));
                 return GCDescEncoder.GetGCDescSize(_type);
             }
         }
 
         protected override void OutputGCDesc(ref ObjectDataBuilder builder)
         {
-            // No GCDescs for universal canonical types
-            if (_type.IsCanonicalSubtype(CanonicalFormKind.Universal))
-                return;
-
-            Debug.Assert(_type.IsCanonicalSubtype(CanonicalFormKind.Specific));
             GCDescEncoder.EncodeGCDesc(ref builder, _type);
         }
 
@@ -107,26 +92,6 @@ protected override void OutputInterfaceMap(NodeFactory factory, ref ObjectDataBu
             }
         }
 
-        protected override int BaseSize
-        {
-            get
-            {
-                if (_type.IsCanonicalSubtype(CanonicalFormKind.Universal) && _type.IsDefType)
-                {
-                    LayoutInt instanceByteCount = ((DefType)_type).InstanceByteCount;
-
-                    if (instanceByteCount.IsIndeterminate)
-                    {
-                        // For USG types, they may be of indeterminate size, and the size of the type may be meaningless.
-                        // In that case emit a fixed constant.
-                        return MinimumObjectSize;
-                    }
-                }
-
-                return base.BaseSize;
-            }
-        }
-
         public override int ClassCode => -1798018602;
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ConstructedEETypeNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ConstructedEETypeNode.cs
index 1905d09e64ef..162d2b9c5a1d 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ConstructedEETypeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ConstructedEETypeNode.cs
@@ -53,22 +53,6 @@ protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFact
 
             dependencyList.Add(factory.VTable(closestDefType), "VTable");
 
-            if (factory.TypeSystemContext.SupportsUniversalCanon)
-            {
-                foreach (var instantiationType in _type.Instantiation)
-                {
-                    if (instantiationType.IsValueType)
-                    {
-                        // All valuetype generic parameters of a constructed type may be effectively constructed. This is generally not that
-                        // critical, but in the presence of universal generics the compiler may generate a Box followed by calls to ToString,
-                        // GetHashcode or Equals in ways that cannot otherwise be detected by dependency analysis. Thus force all struct type
-                        // generic parameters to be considered constructed when walking dependencies of a constructed generic
-                        dependencyList.Add(factory.ConstructedTypeSymbol(instantiationType.ConvertToCanonForm(CanonicalFormKind.Specific)),
-                        "Struct generic parameters in constructed types may be assumed to be used as constructed in constructed generic types");
-                    }
-                }
-            }
-
             // Ask the metadata manager if we have any dependencies due to the presence of the EEType.
             factory.MetadataManager.GetDependenciesDueToEETypePresence(ref dependencyList, factory, _type);
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/EETypeNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/EETypeNode.cs
index 70d9bd026c68..4c066a175eaf 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/EETypeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/EETypeNode.cs
@@ -253,7 +253,7 @@ private bool CanonFormTypeMayExist
         {
             get
             {
-                if (!_type.HasInstantiation)
+                if (_type.IsArrayTypeWithoutGenericInterfaces())
                     return false;
 
                 if (!_type.Context.SupportsCanon)
@@ -263,16 +263,12 @@ private bool CanonFormTypeMayExist
                 if (_type.IsCanonicalSubtype(CanonicalFormKind.Any))
                     return false;
 
-                // If we reach here, a universal canon variant can exist (if universal canon is supported)
-                if (_type.Context.SupportsUniversalCanon)
-                    return true;
-
                 // Attempt to convert to canon. If the type changes, then the CanonForm exists
                 return (_type.ConvertToCanonForm(CanonicalFormKind.Specific) != _type);
             }
         }
 
-        public sealed override bool HasConditionalStaticDependencies
+        public override bool HasConditionalStaticDependencies
         {
             get
             {
@@ -323,7 +319,7 @@ public sealed override bool HasConditionalStaticDependencies
             }
         }
 
-        public sealed override IEnumerable GetConditionalStaticDependencies(NodeFactory factory)
+        public override IEnumerable GetConditionalStaticDependencies(NodeFactory factory)
         {
             List result = new List();
 
@@ -337,9 +333,6 @@ public sealed override IEnumerable GetConditionalSt
                 if (CanonFormTypeMayExist)
                 {
                     result.Add(new CombinedDependencyListEntry(maximallyConstructableType, factory.MaximallyConstructableType(_type.ConvertToCanonForm(CanonicalFormKind.Specific)), "Trigger full type generation if canonical form exists"));
-
-                    if (_type.Context.SupportsUniversalCanon)
-                        result.Add(new CombinedDependencyListEntry(maximallyConstructableType, factory.MaximallyConstructableType(_type.ConvertToCanonForm(CanonicalFormKind.Universal)), "Trigger full type generation if universal canonical form exists"));
                 }
                 return result;
             }
@@ -353,7 +346,7 @@ public sealed override IEnumerable GetConditionalSt
                     "Information about static bases for type with template"));
             }
 
-            if (!_type.IsGenericDefinition && !_type.IsCanonicalSubtype(CanonicalFormKind.Any))
+            if (!_type.IsCanonicalSubtype(CanonicalFormKind.Any))
             {
                 foreach (DefType iface in _type.RuntimeInterfaces)
                 {
@@ -636,8 +629,6 @@ protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFact
                 {
                     dependencies.Add(new DependencyListEntry(factory.TypeGVMEntries(_type.GetTypeDefinition()), "Type with generic virtual methods"));
 
-                    AddDependenciesForUniversalGVMSupport(factory, _type, ref dependencies);
-
                     TypeDesc canonicalType = _type.ConvertToCanonForm(CanonicalFormKind.Specific);
                     if (canonicalType != _type)
                         dependencies.Add(factory.ConstructedTypeSymbol(canonicalType), "Type with generic virtual methods");
@@ -805,16 +796,9 @@ private void OutputFlags(NodeFactory factory, ref ObjectDataBuilder objData, boo
             if (_type.IsArray)
             {
                 TypeDesc elementType = ((ArrayType)_type).ElementType;
-                if (elementType == elementType.Context.UniversalCanonType)
-                {
-                    // elementSize == 0
-                }
-                else
-                {
-                    int elementSize = elementType.GetElementSize().AsInt;
-                    // We validated that this will fit the short when the node was constructed. No need for nice messages.
-                    flags |= (uint)checked((ushort)elementSize);
-                }
+                int elementSize = elementType.GetElementSize().AsInt;
+                // We validated that this will fit the short when the node was constructed. No need for nice messages.
+                flags |= (uint)checked((ushort)elementSize);
             }
             else if (_type.IsString)
             {
@@ -943,9 +927,7 @@ private void OutputVirtualSlots(NodeFactory factory, ref ObjectDataBuilder objDa
                     declType.ConvertToCanonForm(CanonicalFormKind.Specific) == declType;
 
                 // Note: Canonical type instantiations always have a generic dictionary vtable slot, but it's empty
-                // Note: If the current EETypeNode represents a universal canonical type, any dictionary slot must be empty
                 if (declType.IsCanonicalSubtype(CanonicalFormKind.Any)
-                    || implType.IsCanonicalSubtype(CanonicalFormKind.Universal)
                     || factory.LazyGenericsPolicy.UsesLazyGenerics(declType)
                     || isInterfaceWithAnEmptySlot)
                 {
@@ -1135,7 +1117,8 @@ protected void OutputGenericInstantiationDetails(NodeFactory factory, ref Object
             {
                 if (!_type.IsTypeDefinition)
                 {
-                    IEETypeNode typeDefNode = factory.NecessaryTypeSymbol(_type.GetTypeDefinition());
+                    IEETypeNode typeDefNode = factory.MaximallyConstructableType(_type) == this ?
+                        factory.ConstructedTypeSymbol(_type.GetTypeDefinition()) : factory.NecessaryTypeSymbol(_type.GetTypeDefinition());
                     if (factory.Target.SupportsRelativePointers)
                         objData.EmitReloc(typeDefNode, RelocType.IMAGE_REL_BASED_RELPTR32);
                     else
@@ -1231,32 +1214,6 @@ protected override void OnMarked(NodeFactory context)
             Debug.Assert(_type.IsTypeDefinition || !_type.HasSameTypeDefinition(context.ArrayOfTClass), "Asking for Array MethodTable");
         }
 
-        protected static void AddDependenciesForUniversalGVMSupport(NodeFactory factory, TypeDesc type, ref DependencyList dependencies)
-        {
-            if (factory.TypeSystemContext.SupportsUniversalCanon)
-            {
-                foreach (MethodDesc method in type.GetVirtualMethods())
-                {
-                    if (!method.HasInstantiation)
-                        continue;
-
-                    if (method.IsAbstract)
-                        continue;
-
-                    TypeDesc[] universalCanonArray = new TypeDesc[method.Instantiation.Length];
-                    for (int i = 0; i < universalCanonArray.Length; i++)
-                        universalCanonArray[i] = factory.TypeSystemContext.UniversalCanonType;
-
-                    InstantiatedMethod universalCanonMethodNonCanonicalized = method.MakeInstantiatedMethod(new Instantiation(universalCanonArray));
-                    MethodDesc universalCanonGVMMethod = universalCanonMethodNonCanonicalized.GetCanonMethodTarget(CanonicalFormKind.Universal);
-
-                    dependencies ??= new DependencyList();
-
-                    dependencies.Add(new DependencyListEntry(factory.MethodEntrypoint(universalCanonGVMMethod), "USG GVM Method"));
-                }
-            }
-        }
-
         public override int ClassCode => 1521789141;
 
         public override int CompareToImpl(ISortableNode other, CompilerComparer comparer)
@@ -1312,7 +1269,8 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                 // If the whole program view contains a reference to a preallocated RuntimeType
                 // instance for this type, generate a reference to it.
                 // Otherwise, generate as zero to save size.
-                if (!_type.Type.IsCanonicalSubtype(CanonicalFormKind.Any)
+                if (!relocsOnly
+                    && !_type.Type.IsCanonicalSubtype(CanonicalFormKind.Any)
                     && _type.GetFrozenRuntimeTypeNode(factory) is { Marked: true } runtimeTypeObject)
                 {
                     builder.EmitPointerReloc(runtimeTypeObject);
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ExactMethodInstantiationsNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ExactMethodInstantiationsNode.cs
index 9e1a274e30bc..9740188e3dc3 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ExactMethodInstantiationsNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ExactMethodInstantiationsNode.cs
@@ -73,17 +73,11 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                     arguments.Append(nativeWriter.GetUnsignedConstant(_externalReferences.GetIndex(argNode)));
                 }
 
-                // Get the name and sig of the method.
-                // Note: the method name and signature are stored in the NativeLayoutInfo blob, not in the hashtable we build here.
-
-                NativeLayoutMethodNameAndSignatureVertexNode nameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition());
-                NativeLayoutPlacedSignatureVertexNode placedNameAndSig = factory.NativeLayout.PlacedSignatureVertex(nameAndSig);
-                Debug.Assert(placedNameAndSig.SavedVertex != null);
-                Vertex placedNameAndSigOffsetSig = nativeWriter.GetOffsetSignature(placedNameAndSig.SavedVertex);
+                int token = factory.MetadataManager.GetMetadataHandleForMethod(factory, method.GetTypicalMethodDefinition());
 
                 // Get the vertex for the completed method signature
 
-                Vertex methodSignature = nativeWriter.GetTuple(declaringType, placedNameAndSigOffsetSig, arguments);
+                Vertex methodSignature = nativeWriter.GetTuple(declaringType, nativeWriter.GetUnsignedConstant((uint)token), arguments);
 
                 // Make the generic method entry vertex
 
@@ -118,9 +112,7 @@ public static void GetExactMethodInstantiationDependenciesForMethod(ref Dependen
             foreach (var arg in method.Instantiation)
                 dependencies.Add(new DependencyListEntry(factory.NecessaryTypeSymbol(arg), "Exact method instantiation entry"));
 
-            // Get native layout dependencies for the method signature.
-            NativeLayoutMethodNameAndSignatureVertexNode nameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition());
-            dependencies.Add(new DependencyListEntry(factory.NativeLayout.PlacedSignatureVertex(nameAndSig), "Exact method instantiation entry"));
+            factory.MetadataManager.GetNativeLayoutMetadataDependencies(ref dependencies, factory, method.GetTypicalMethodDefinition());
         }
 
         protected internal override int Phase => (int)ObjectNodePhase.Ordered;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/FieldMetadataNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/FieldMetadataNode.cs
index 5c49cfddb1c2..77fa28233c82 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/FieldMetadataNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/FieldMetadataNode.cs
@@ -58,6 +58,8 @@ public override IEnumerable GetStaticDependencies(NodeFacto
                         TypeMetadataNode.GetMetadataDependencies(ref dependencies, factory, sigData.type, "Modifier in a field signature");
             }
 
+            TypeMetadataNode.GetMetadataDependencies(ref dependencies, factory, _field.FieldType, "Type of the field");
+
             return dependencies;
         }
         protected override string GetName(NodeFactory factory)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericDefinitionEETypeNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericDefinitionEETypeNode.cs
index 22cd3c464973..a87f87bdcb42 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericDefinitionEETypeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericDefinitionEETypeNode.cs
@@ -1,33 +1,29 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Collections.Generic;
 using Internal.Runtime;
+using Internal.Text;
 using Internal.TypeSystem;
 
 using Debug = System.Diagnostics.Debug;
 
 namespace ILCompiler.DependencyAnalysis
 {
-    internal sealed class GenericDefinitionEETypeNode : EETypeNode
+    internal abstract class GenericDefinitionEETypeNode : EETypeNode
     {
         public GenericDefinitionEETypeNode(NodeFactory factory, TypeDesc type) : base(factory, type)
         {
             Debug.Assert(type.IsGenericDefinition);
         }
 
-        public override bool ShouldSkipEmittingObjectNode(NodeFactory factory)
-        {
-            return false;
-        }
+        public override bool HasConditionalStaticDependencies => false;
 
-        protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFactory factory)
-        {
-            DependencyList dependencyList = null;
+        public override IEnumerable GetConditionalStaticDependencies(NodeFactory factory) => null;
 
-            // Ask the metadata manager if we have any dependencies due to the presence of the EEType.
-            factory.MetadataManager.GetDependenciesDueToEETypePresence(ref dependencyList, factory, _type);
-
-            return dependencyList;
+        public override ISymbolNode NodeForLinkage(NodeFactory factory)
+        {
+            return factory.NecessaryTypeSymbol(_type);
         }
 
         protected override ObjectData GetDehydratableData(NodeFactory factory, bool relocsOnly = false)
@@ -63,7 +59,57 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo
 
             return dataBuilder.ToObjectData();
         }
+    }
+
+    internal sealed class ReflectionInvisibleGenericDefinitionEETypeNode : GenericDefinitionEETypeNode
+    {
+        public ReflectionInvisibleGenericDefinitionEETypeNode(NodeFactory factory, TypeDesc type) : base(factory, type)
+        {
+        }
+
+        public override bool ShouldSkipEmittingObjectNode(NodeFactory factory)
+        {
+            return factory.ConstructedTypeSymbol(_type).Marked;
+        }
+
+        protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFactory factory)
+        {
+            return new DependencyList();
+        }
+
+        public override int ClassCode => -287423988;
+    }
+
+    internal sealed class ReflectionVisibleGenericDefinitionEETypeNode : GenericDefinitionEETypeNode
+    {
+        public ReflectionVisibleGenericDefinitionEETypeNode(NodeFactory factory, TypeDesc type) : base(factory, type)
+        {
+        }
+
+        public override bool ShouldSkipEmittingObjectNode(NodeFactory factory)
+        {
+            return false;
+        }
+
+        protected override FrozenRuntimeTypeNode GetFrozenRuntimeTypeNode(NodeFactory factory)
+        {
+            return factory.SerializedConstructedRuntimeTypeObject(_type);
+        }
+
+        protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler) + " reflection visible";
+
+        protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFactory factory)
+        {
+            var dependencyList = new DependencyList();
+
+            dependencyList.Add(factory.NecessaryTypeSymbol(_type), "Reflection invisible type for a visible type");
+
+            // Ask the metadata manager if we have any dependencies due to the presence of the EEType.
+            factory.MetadataManager.GetDependenciesDueToEETypePresence(ref dependencyList, factory, _type);
+
+            return dependencyList;
+        }
 
-        public override int ClassCode => -160325006;
+        public override int ClassCode => 983279111;
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericLookupResult.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericLookupResult.cs
index ee72c6b37a31..efbd5d98bfa6 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericLookupResult.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericLookupResult.cs
@@ -81,7 +81,7 @@ public abstract class GenericLookupResult
         public sealed override bool Equals(object obj)
         {
             GenericLookupResult other = obj as GenericLookupResult;
-            if (obj == null)
+            if (other == null)
                 return false;
 
             return ClassCode == other.ClassCode && EqualsImpl(other);
@@ -467,18 +467,11 @@ public override NativeLayoutVertexNode TemplateDictionaryNode(NodeFactory factor
         {
             MethodDesc canonMethod = _method.GetCanonMethodTarget(CanonicalFormKind.Specific);
 
-            //
-            // For universal canonical methods, we don't need the unboxing stub really, because
-            // the calling convention translation thunk will handle the unboxing (and we can avoid having a double thunk here)
-            // We just need the flag in the native layout info signature indicating that we needed an unboxing stub
-            //
-            bool getUnboxingStubNode = _isUnboxingThunk && !canonMethod.IsCanonicalMethod(CanonicalFormKind.Universal);
-
             // TODO-SIZE: this is address taken only in the delegate target case
             return factory.NativeLayout.MethodEntrypointDictionarySlot(
                 _method,
                 _isUnboxingThunk,
-                factory.AddressTakenMethodEntrypoint(canonMethod, getUnboxingStubNode));
+                factory.AddressTakenMethodEntrypoint(canonMethod, _isUnboxingThunk));
         }
 
         protected override int CompareToImpl(GenericLookupResult other, TypeSystemComparer comparer)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericMethodsHashtableNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericMethodsHashtableNode.cs
index 04d62dbad554..67da861f048d 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericMethodsHashtableNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericMethodsHashtableNode.cs
@@ -67,12 +67,9 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                         arguments.Append(nativeWriter.GetUnsignedConstant(_externalReferences.GetIndex(argNode)));
                     }
 
-                    // Method name and signature
-                    NativeLayoutVertexNode nameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition());
-                    NativeLayoutSavedVertexNode placedNameAndSig = factory.NativeLayout.PlacedSignatureVertex(nameAndSig);
-                    Vertex placedNameAndSigVertexOffset = nativeWriter.GetUnsignedConstant((uint)placedNameAndSig.SavedVertex.VertexOffset);
+                    int token = factory.MetadataManager.GetMetadataHandleForMethod(factory, method.GetTypicalMethodDefinition());
 
-                    fullMethodSignature = nativeWriter.GetTuple(containingType, placedNameAndSigVertexOffset, arguments);
+                    fullMethodSignature = nativeWriter.GetTuple(containingType, nativeWriter.GetUnsignedConstant((uint)token), arguments);
                 }
 
                 // Method's dictionary pointer
@@ -108,10 +105,7 @@ public static void GetGenericMethodsHashtableDependenciesForMethod(ref Dependenc
                 dependencies.Add(new DependencyListEntry(argNode, "GenericMethodsHashtable entry instantiation argument"));
             }
 
-            // Method name and signature
-            NativeLayoutVertexNode nameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition());
-            NativeLayoutSavedVertexNode placedNameAndSig = factory.NativeLayout.PlacedSignatureVertex(nameAndSig);
-            dependencies.Add(new DependencyListEntry(placedNameAndSig, "GenericMethodsHashtable entry signature"));
+            factory.MetadataManager.GetNativeLayoutMetadataDependencies(ref dependencies, factory, method.GetTypicalMethodDefinition());
         }
 
         protected internal override int Phase => (int)ObjectNodePhase.Ordered;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericMethodsTemplateMap.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericMethodsTemplateMap.cs
index d0e038240975..857106fa3d4f 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericMethodsTemplateMap.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericMethodsTemplateMap.cs
@@ -101,12 +101,6 @@ private static bool IsEligibleToBeATemplate(MethodDesc method)
                 Debug.Assert(method == method.GetCanonMethodTarget(CanonicalFormKind.Specific));
                 return true;
             }
-            else if (method.IsCanonicalMethod(CanonicalFormKind.Universal))
-            {
-                // Must be fully canonical
-                if (method == method.GetCanonMethodTarget(CanonicalFormKind.Universal))
-                    return true;
-            }
 
             return false;
         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericTypesHashtableNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericTypesHashtableNode.cs
index 3c2835ed9210..69df3a1a4eef 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericTypesHashtableNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericTypesHashtableNode.cs
@@ -45,14 +45,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
             Section nativeSection = nativeWriter.NewSection();
             nativeSection.Place(hashtable);
 
-            // We go over constructed EETypes only. The places that need to consult this hashtable at runtime
-            // all need constructed EETypes. Placing unconstructed EETypes into this hashtable could make us
-            // accidentally satisfy e.g. MakeGenericType for something that was only used in a cast. Those
-            // should throw MissingRuntimeArtifact instead.
-            //
-            // We already make sure "necessary" EETypes that could potentially be loaded at runtime through
-            // the dynamic type loader get upgraded to constructed EETypes at AOT compile time.
-            foreach (var type in factory.MetadataManager.GetTypesWithConstructedEETypes())
+            foreach (var type in factory.MetadataManager.GetTypesWithEETypes())
             {
                 // If this is an instantiated non-canonical generic type, add it to the generic instantiations hashtable
                 if (!type.HasInstantiation || type.IsGenericDefinition || type.IsCanonicalSubtype(CanonicalFormKind.Any))
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericVirtualMethodImplNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericVirtualMethodImplNode.cs
index 1fb8ab0a4cc2..6912c9358a18 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericVirtualMethodImplNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericVirtualMethodImplNode.cs
@@ -15,7 +15,6 @@ namespace ILCompiler.DependencyAnalysis
     /// 
     public class GenericVirtualMethodImplNode : DependencyNodeCore
     {
-        private const int UniversalCanonGVMDepthHeuristic_CanonDepth = 2;
         private readonly MethodDesc _method;
 
         public GenericVirtualMethodImplNode(MethodDesc method)
@@ -49,12 +48,6 @@ public override IEnumerable GetStaticDependencies(NodeFacto
 
             if (validInstantiation)
             {
-                if (factory.TypeSystemContext.SupportsUniversalCanon && _method.IsGenericDepthGreaterThan(UniversalCanonGVMDepthHeuristic_CanonDepth))
-                {
-                    // fall back to using the universal generic variant of the generic method
-                    return dependencies;
-                }
-
                 bool getUnboxingStub = _method.OwningType.IsValueType && !_method.Signature.IsStatic;
                 dependencies ??= new DependencyList();
                 dependencies.Add(factory.MethodEntrypoint(_method, getUnboxingStub), "GVM Dependency - Canon method");
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericVirtualMethodTableNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericVirtualMethodTableNode.cs
index 210293d94358..6a3affe4b74d 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericVirtualMethodTableNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericVirtualMethodTableNode.cs
@@ -53,11 +53,8 @@ public static void GetGenericVirtualMethodImplementationDependencies(ref Depende
             MethodDesc openCallingMethod = callingMethod.GetTypicalMethodDefinition();
             MethodDesc openImplementationMethod = implementationMethod.GetTypicalMethodDefinition();
 
-            var openCallingMethodNameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(openCallingMethod);
-            var openImplementationMethodNameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(openImplementationMethod);
-
-            dependencies.Add(new DependencyListEntry(factory.NativeLayout.PlacedSignatureVertex(openCallingMethodNameAndSig), "gvm table calling method signature"));
-            dependencies.Add(new DependencyListEntry(factory.NativeLayout.PlacedSignatureVertex(openImplementationMethodNameAndSig), "gvm table implementation method signature"));
+            factory.MetadataManager.GetNativeLayoutMetadataDependencies(ref dependencies, factory, openCallingMethod);
+            factory.MetadataManager.GetNativeLayoutMetadataDependencies(ref dependencies, factory, openImplementationMethod);
         }
 
         private void AddGenericVirtualMethodImplementation(MethodDesc callingMethod, MethodDesc implementationMethod)
@@ -116,11 +113,11 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                     uint targetTypeId = _externalReferences.GetIndex(factory.NecessaryTypeSymbol(implementationType));
                     vertex = nativeFormatWriter.GetTuple(vertex, nativeFormatWriter.GetUnsignedConstant(targetTypeId));
 
-                    var nameAndSig = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.MethodNameAndSignatureVertex(callingMethod));
-                    vertex = nativeFormatWriter.GetTuple(vertex, nativeFormatWriter.GetUnsignedConstant((uint)nameAndSig.SavedVertex.VertexOffset));
+                    int callingMethodToken = factory.MetadataManager.GetMetadataHandleForMethod(factory, callingMethod);
+                    vertex = nativeFormatWriter.GetTuple(vertex, nativeFormatWriter.GetUnsignedConstant((uint)callingMethodToken));
 
-                    nameAndSig = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.MethodNameAndSignatureVertex(implementationMethod));
-                    vertex = nativeFormatWriter.GetTuple(vertex, nativeFormatWriter.GetUnsignedConstant((uint)nameAndSig.SavedVertex.VertexOffset));
+                    int implementationMethodToken = factory.MetadataManager.GetMetadataHandleForMethod(factory, implementationMethod);
+                    vertex = nativeFormatWriter.GetTuple(vertex, nativeFormatWriter.GetUnsignedConstant((uint)implementationMethodToken));
 
                     int hashCode = callingMethod.OwningType.GetHashCode();
                     hashCode = ((hashCode << 13) ^ hashCode) ^ implementationType.GetHashCode();
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceGenericVirtualMethodTableNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceGenericVirtualMethodTableNode.cs
index 8815fea96e9b..e1f432af8ff3 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceGenericVirtualMethodTableNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceGenericVirtualMethodTableNode.cs
@@ -55,16 +55,15 @@ public static void GetGenericVirtualMethodImplementationDependencies(ref Depende
             MethodDesc openCallingMethod = callingMethod.GetTypicalMethodDefinition();
             TypeDesc openImplementationType = implementationType.GetTypeDefinition();
 
-            var openCallingMethodNameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(openCallingMethod);
-            dependencies.Add(new DependencyListEntry(factory.NativeLayout.PlacedSignatureVertex(openCallingMethodNameAndSig), "interface gvm table calling method signature"));
+            factory.MetadataManager.GetNativeLayoutMetadataDependencies(ref dependencies, factory, openCallingMethod);
 
             // Implementation could be null if this is a default interface method reabstraction or diamond. We need to record those.
             if (implementationMethod != null)
             {
                 MethodDesc openImplementationMethod = implementationMethod.GetTypicalMethodDefinition();
                 dependencies.Add(new DependencyListEntry(factory.NecessaryTypeSymbol(openImplementationMethod.OwningType), "interface gvm table implementation method owning type"));
-                var openImplementationMethodNameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(openImplementationMethod);
-                dependencies.Add(new DependencyListEntry(factory.NativeLayout.PlacedSignatureVertex(openImplementationMethodNameAndSig), "interface gvm table implementation method signature"));
+
+                factory.MetadataManager.GetNativeLayoutMetadataDependencies(ref dependencies, factory, openImplementationMethod);
             }
 
             if (!openImplementationType.IsInterface)
@@ -153,10 +152,10 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
 
                 // Emit the method signature and containing type of the current interface method
                 uint typeId = _externalReferences.GetIndex(factory.NecessaryTypeSymbol(callingMethod.OwningType));
-                var nameAndSig = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.MethodNameAndSignatureVertex(callingMethod));
+                int callingMethodToken = factory.MetadataManager.GetMetadataHandleForMethod(factory, callingMethod);
                 Vertex vertex = nativeFormatWriter.GetTuple(
                     nativeFormatWriter.GetUnsignedConstant(typeId),
-                    nativeFormatWriter.GetUnsignedConstant((uint)nameAndSig.SavedVertex.VertexOffset));
+                    nativeFormatWriter.GetUnsignedConstant((uint)callingMethodToken));
 
                 // Emit the method name / sig and containing type of each GVM target method for the current interface method entry
                 vertex = nativeFormatWriter.GetTuple(vertex, nativeFormatWriter.GetUnsignedConstant((uint)gvmEntry.Value.Count));
@@ -164,11 +163,11 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                 {
                     if (impl is MethodDesc implementationMethod)
                     {
-                        nameAndSig = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.MethodNameAndSignatureVertex(implementationMethod));
+                        int implementationMethodToken = factory.MetadataManager.GetMetadataHandleForMethod(factory, implementationMethod);
                         typeId = _externalReferences.GetIndex(factory.NecessaryTypeSymbol(implementationMethod.OwningType));
                         vertex = nativeFormatWriter.GetTuple(
                             vertex,
-                            nativeFormatWriter.GetUnsignedConstant((uint)nameAndSig.SavedVertex.VertexOffset),
+                            nativeFormatWriter.GetUnsignedConstant((uint)implementationMethodToken),
                             nativeFormatWriter.GetUnsignedConstant(typeId));
                     }
                     else
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodMetadataNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodMetadataNode.cs
index 7f44064c4508..8eaade82cc95 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodMetadataNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodMetadataNode.cs
@@ -25,12 +25,14 @@ namespace ILCompiler.DependencyAnalysis
     /// 
     internal sealed class MethodMetadataNode : DependencyNodeCore
     {
-        private readonly MethodDesc _method;
+        private readonly EcmaMethod _method;
+        private readonly bool _isMinimal;
 
-        public MethodMetadataNode(MethodDesc method)
+        public MethodMetadataNode(MethodDesc method, bool isMinimal)
         {
             Debug.Assert(method.IsTypicalMethodDefinition);
-            _method = method;
+            _method = (EcmaMethod)method;
+            _isMinimal = isMinimal;
         }
 
         public MethodDesc Method => _method;
@@ -40,7 +42,15 @@ public override IEnumerable GetStaticDependencies(NodeFacto
             DependencyList dependencies = new DependencyList();
             dependencies.Add(factory.TypeMetadata((MetadataType)_method.OwningType), "Owning type metadata");
 
-            CustomAttributeBasedDependencyAlgorithm.AddDependenciesDueToCustomAttributes(ref dependencies, factory, ((EcmaMethod)_method));
+            if (!_isMinimal)
+            {
+                CustomAttributeBasedDependencyAlgorithm.AddDependenciesDueToCustomAttributes(ref dependencies, factory, _method);
+
+                foreach (var parameterHandle in _method.MetadataReader.GetMethodDefinition(_method.Handle).GetParameters())
+                {
+                    dependencies.Add(factory.MethodParameterMetadata(new ReflectableParameter(_method.Module, parameterHandle)), "Parameter is visible");
+                }
+            }
 
             MethodSignature sig = _method.Signature;
             const string reason = "Method signature metadata";
@@ -57,9 +67,9 @@ public override IEnumerable GetStaticDependencies(NodeFacto
                         TypeMetadataNode.GetMetadataDependencies(ref dependencies, factory, sigData.type, "Modifier in a method signature");
             }
 
-            if (_method is EcmaMethod ecmaMethod)
+            if (!_isMinimal)
             {
-                DynamicDependencyAttributesOnEntityNode.AddDependenciesDueToDynamicDependencyAttribute(ref dependencies, factory, ecmaMethod);
+                DynamicDependencyAttributesOnEntityNode.AddDependenciesDueToDynamicDependencyAttribute(ref dependencies, factory, _method);
 
                 // On a reflectable method, perform generic data flow for the return type and all the parameter types
                 // This is a compensation for the DI issue described in https://github.com/dotnet/runtime/issues/81358
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodParameterMetadataNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodParameterMetadataNode.cs
new file mode 100644
index 000000000000..ff0278c3f1d8
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodParameterMetadataNode.cs
@@ -0,0 +1,41 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+
+using ILCompiler.DependencyAnalysisFramework;
+
+namespace ILCompiler.DependencyAnalysis
+{
+    /// 
+    /// Represents a parameter that has metadata generated in the current compilation.
+    /// 
+    /// 
+    /// Only expected to be used during ILScanning when scanning for reflection.
+    /// 
+    internal sealed class MethodParameterMetadataNode : DependencyNodeCore
+    {
+        private readonly ReflectableParameter _parameter;
+
+        public MethodParameterMetadataNode(ReflectableParameter parameter)
+        {
+            _parameter = parameter;
+        }
+
+        public ReflectableParameter Parameter => _parameter;
+
+        public override IEnumerable GetStaticDependencies(NodeFactory factory) => null;
+
+        protected override string GetName(NodeFactory factory)
+        {
+            return $"Reflectable parameter {_parameter.ParameterHandle} in {_parameter.Module}";
+        }
+
+        public override bool InterestingForDynamicDependencyAnalysis => false;
+        public override bool HasDynamicDependencies => false;
+        public override bool HasConditionalStaticDependencies => false;
+        public override bool StaticDependenciesAreComputed => true;
+        public override IEnumerable GetConditionalStaticDependencies(NodeFactory factory) => null;
+        public override IEnumerable SearchDynamicDependencies(List> markedNodes, int firstNode, NodeFactory factory) => null;
+    }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutInfoNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutInfoNode.cs
index 8e5b9c4734f4..3e2b17b3617e 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutInfoNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutInfoNode.cs
@@ -22,7 +22,6 @@ public sealed class NativeLayoutInfoNode : ObjectNode, ISymbolDefinitionNode, IN
         private byte[] _writerSavedBytes;
 
         private Section _signaturesSection;
-        private Section _ldTokenInfoSection;
         private Section _templatesSection;
 
         private List _vertexNodesToWrite;
@@ -34,7 +33,6 @@ public NativeLayoutInfoNode(ExternalReferencesTableNode externalReferences, Exte
 
             _writer = new NativeWriter();
             _signaturesSection = _writer.NewSection();
-            _ldTokenInfoSection = _writer.NewSection();
             _templatesSection = _writer.NewSection();
 
             _vertexNodesToWrite = new List();
@@ -51,7 +49,6 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
         public override bool StaticDependenciesAreComputed => true;
         protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler);
 
-        public Section LdTokenInfoSection => _ldTokenInfoSection;
         public Section SignaturesSection => _signaturesSection;
         public Section TemplatesSection => _templatesSection;
         public ExternalReferencesTableNode ExternalReferences => _externalReferences;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs
deleted file mode 100644
index fe00421b74e6..000000000000
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Diagnostics;
-
-using Internal.Text;
-using Internal.TypeSystem;
-
-namespace ILCompiler.DependencyAnalysis
-{
-    /// 
-    /// Represents a native layout signature. A signature is a pair where the first item is a pointer
-    /// to the TypeManager that contains the native layout info blob of interest, and the second item
-    /// is an offset into that native layout info blob
-    /// 
-    public class NativeLayoutSignatureNode : DehydratableObjectNode, ISymbolDefinitionNode
-    {
-        private TypeSystemEntity _identity;
-        private Utf8String _identityPrefix;
-        private NativeLayoutSavedVertexNode _nativeSignature;
-
-        public TypeSystemEntity Identity => _identity;
-
-        public NativeLayoutSignatureNode(NativeLayoutSavedVertexNode nativeSignature, TypeSystemEntity identity, Utf8String identityPrefix)
-        {
-            _nativeSignature = nativeSignature;
-            _identity = identity;
-            _identityPrefix = identityPrefix;
-        }
-
-        public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
-        {
-            Utf8String identityString;
-            if (_identity is MethodDesc)
-            {
-                identityString = nameMangler.GetMangledMethodName((MethodDesc)_identity);
-            }
-            else if (_identity is TypeDesc)
-            {
-                identityString = nameMangler.GetMangledTypeName((TypeDesc)_identity);
-            }
-            else if (_identity is FieldDesc)
-            {
-                identityString = nameMangler.GetMangledFieldName((FieldDesc)_identity);
-            }
-            else
-            {
-                Debug.Assert(false);
-                identityString = new Utf8String("unknown");
-            }
-
-            sb.Append(nameMangler.CompilationUnitPrefix).Append(_identityPrefix).Append(identityString);
-        }
-
-        public int Offset => 0;
-        protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler);
-        protected override ObjectNodeSection GetDehydratedSection(NodeFactory factory)
-        {
-            if (factory.Target.IsWindows)
-                return ObjectNodeSection.ReadOnlyDataSection;
-            else
-                return ObjectNodeSection.DataSection;
-        }
-        public override bool IsShareable => false;
-        public override bool StaticDependenciesAreComputed => true;
-
-        protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFactory factory)
-        {
-            DependencyList dependencies = new DependencyList();
-            dependencies.Add(new DependencyListEntry(_nativeSignature, "NativeLayoutSignatureNode target vertex"));
-            return dependencies;
-        }
-
-        protected override ObjectData GetDehydratableData(NodeFactory factory, bool relocsOnly = false)
-        {
-            // This node does not trigger generation of other nodes.
-            if (relocsOnly)
-                return new ObjectData(Array.Empty(), Array.Empty(), 1, new ISymbolDefinitionNode[] { this });
-
-            // Ensure native layout is saved to get valid Vertex offsets
-            factory.MetadataManager.NativeLayoutInfo.SaveNativeLayoutInfoWriter(factory);
-
-            ObjectDataBuilder objData = new ObjectDataBuilder(factory, relocsOnly);
-
-            objData.RequireInitialPointerAlignment();
-            objData.AddSymbol(this);
-
-            objData.EmitPointerReloc(factory.TypeManagerIndirection);
-            objData.EmitNaturalInt(_nativeSignature.SavedVertex.VertexOffset);
-
-            return objData.ToObjectData();
-        }
-
-        public override int ClassCode => 1887049331;
-
-        public override int CompareToImpl(ISortableNode other, CompilerComparer comparer)
-        {
-            NativeLayoutSignatureNode otherSignature = (NativeLayoutSignatureNode)other;
-            if (_identity is MethodDesc)
-            {
-                if (otherSignature._identity is TypeDesc || otherSignature._identity is FieldDesc)
-                    return -1;
-                return comparer.Compare((MethodDesc)_identity, (MethodDesc)((NativeLayoutSignatureNode)other)._identity);
-            }
-            else if (_identity is TypeDesc)
-            {
-                if (otherSignature._identity is MethodDesc)
-                    return 1;
-
-                if (otherSignature._identity is FieldDesc)
-                    return -1;
-
-                return comparer.Compare((TypeDesc)_identity, (TypeDesc)((NativeLayoutSignatureNode)other)._identity);
-            }
-            else if (_identity is FieldDesc)
-            {
-                if (otherSignature._identity is MethodDesc || otherSignature._identity is TypeDesc)
-                    return 1;
-                return comparer.Compare((FieldDesc)_identity, (FieldDesc)((NativeLayoutSignatureNode)other)._identity);
-            }
-            else
-            {
-                throw new NotSupportedException("New type system entity needs a comparison");
-            }
-        }
-    }
-}
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutVertexNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutVertexNode.cs
index ce1c0aa01e3c..60b9a82dd0f9 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutVertexNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutVertexNode.cs
@@ -90,7 +90,7 @@ protected Vertex SetSavedVertex(Vertex value)
         }
     }
 
-    internal abstract class NativeLayoutMethodEntryVertexNode : NativeLayoutSavedVertexNode
+    internal class NativeLayoutMethodEntryVertexNode : NativeLayoutSavedVertexNode
     {
         [Flags]
         public enum MethodEntryFlags
@@ -102,18 +102,18 @@ public enum MethodEntryFlags
         protected readonly MethodDesc _method;
         private MethodEntryFlags _flags;
         private NativeLayoutTypeSignatureVertexNode _containingTypeSig;
-        private NativeLayoutMethodSignatureVertexNode _methodSig;
         private NativeLayoutTypeSignatureVertexNode[] _instantiationArgsSig;
 
         public MethodDesc Method => _method;
 
         public virtual bool IsUnboxingStub => _method.OwningType.IsValueType && !_method.Signature.IsStatic;
 
+        protected override string GetName(NodeFactory factory) => "MethodEntryVertex_" + factory.NameMangler.GetMangledMethodName(_method);
+
         public NativeLayoutMethodEntryVertexNode(NodeFactory factory, MethodDesc method, MethodEntryFlags flags)
         {
             _method = method;
             _flags = flags;
-            _methodSig = factory.NativeLayout.MethodSignatureVertex(method.GetTypicalMethodDefinition().Signature);
 
             if ((_flags & MethodEntryFlags.CreateInstantiatedSignature) == 0)
             {
@@ -131,7 +131,6 @@ public override IEnumerable GetStaticDependencies(NodeFacto
         {
             DependencyList dependencies = new DependencyList();
 
-            dependencies.Add(new DependencyListEntry(_methodSig, "NativeLayoutMethodEntryVertexNode method signature"));
             if ((_flags & MethodEntryFlags.CreateInstantiatedSignature) != 0)
             {
                 dependencies.Add(new DependencyListEntry(context.NecessaryTypeSymbol(_method.OwningType), "NativeLayoutMethodEntryVertexNode containing type"));
@@ -154,6 +153,8 @@ public override IEnumerable GetStaticDependencies(NodeFacto
                 dependencies.Add(new DependencyListEntry(methodEntryPointNode, "NativeLayoutMethodEntryVertexNode entrypoint"));
             }
 
+            context.MetadataManager.GetNativeLayoutMetadataDependencies(ref dependencies, context, _method.GetTypicalMethodDefinition());
+
             return dependencies;
         }
 
@@ -162,8 +163,6 @@ public override Vertex WriteVertex(NodeFactory factory)
             Debug.Assert(Marked, "WriteVertex should only happen for marked vertices");
 
             Vertex containingType = GetContainingTypeVertex(factory);
-            Vertex methodSig = _methodSig.WriteVertex(factory);
-            Vertex methodNameAndSig = GetNativeWriter(factory).GetMethodNameAndSigSignature(_method.Name, methodSig);
 
             Vertex[] args = null;
             MethodFlags flags = 0;
@@ -199,12 +198,10 @@ public override Vertex WriteVertex(NodeFactory factory)
 
                 IMethodNode methodEntryPointNode = GetMethodEntrypointNode(factory);
                 fptrReferenceId = factory.MetadataManager.NativeLayoutInfo.ExternalReferences.GetIndex(methodEntryPointNode);
-
-                if (methodEntryPointNode.Method.IsCanonicalMethod(CanonicalFormKind.Universal))
-                    flags |= MethodFlags.FunctionPointerIsUSG;
             }
 
-            return GetNativeWriter(factory).GetMethodSignature((uint)flags, fptrReferenceId, containingType, methodNameAndSig, args);
+            int token = factory.MetadataManager.GetMetadataHandleForMethod(factory, _method.GetTypicalMethodDefinition());
+            return GetNativeWriter(factory).GetMethodSignature((uint)flags, fptrReferenceId, containingType, token, args);
         }
 
         private Vertex GetContainingTypeVertex(NodeFactory factory)
@@ -228,85 +225,6 @@ protected virtual IMethodNode GetMethodEntrypointNode(NodeFactory factory)
         }
     }
 
-    internal sealed class NativeLayoutMethodLdTokenVertexNode : NativeLayoutMethodEntryVertexNode
-    {
-        protected override string GetName(NodeFactory factory) => "NativeLayoutMethodLdTokenVertexNode_" + factory.NameMangler.GetMangledMethodName(_method);
-
-        public NativeLayoutMethodLdTokenVertexNode(NodeFactory factory, MethodDesc method)
-            : base(factory, method, method.IsRuntimeDeterminedExactMethod || method.IsGenericMethodDefinition ? 0 : MethodEntryFlags.CreateInstantiatedSignature)
-        {
-        }
-
-        public override IEnumerable GetStaticDependencies(NodeFactory context)
-        {
-            if (_method.IsVirtual && _method.HasInstantiation && !_method.IsGenericMethodDefinition)
-            {
-                return GetGenericVirtualMethodDependencies(context);
-            }
-            else
-            {
-                return base.GetStaticDependencies(context);
-            }
-        }
-
-        private DependencyList GetGenericVirtualMethodDependencies(NodeFactory factory)
-        {
-            var dependencies = (DependencyList)base.GetStaticDependencies(factory);
-
-            MethodDesc canonMethod = _method.GetCanonMethodTarget(CanonicalFormKind.Specific);
-
-            dependencies.Add(factory.GVMDependencies(canonMethod), "Potential generic virtual method call");
-
-            foreach (TypeDesc instArg in canonMethod.Instantiation)
-            {
-                dependencies.Add(factory.MaximallyConstructableType(instArg), "Type we need to look up for GVM dispatch");
-            }
-
-            return dependencies;
-        }
-
-        public override Vertex WriteVertex(NodeFactory factory)
-        {
-            Debug.Assert(Marked, "WriteVertex should only happen for marked vertices");
-
-            Vertex methodEntryVertex = base.WriteVertex(factory);
-            return SetSavedVertex(factory.MetadataManager.NativeLayoutInfo.LdTokenInfoSection.Place(methodEntryVertex));
-        }
-    }
-
-    internal sealed class NativeLayoutFieldLdTokenVertexNode : NativeLayoutSavedVertexNode
-    {
-        private readonly FieldDesc _field;
-        private readonly NativeLayoutTypeSignatureVertexNode _containingTypeSig;
-
-        public NativeLayoutFieldLdTokenVertexNode(NodeFactory factory, FieldDesc field)
-        {
-            _field = field;
-            _containingTypeSig = factory.NativeLayout.TypeSignatureVertex(field.OwningType);
-        }
-
-        protected override string GetName(NodeFactory factory) => "NativeLayoutFieldLdTokenVertexNode_" + factory.NameMangler.GetMangledFieldName(_field);
-
-        public override IEnumerable GetStaticDependencies(NodeFactory context)
-        {
-            return new DependencyListEntry[]
-            {
-                new DependencyListEntry(_containingTypeSig, "NativeLayoutFieldLdTokenVertexNode containing type signature"),
-            };
-        }
-
-        public override Vertex WriteVertex(NodeFactory factory)
-        {
-            Debug.Assert(Marked, "WriteVertex should only happen for marked vertices");
-
-            Vertex containingType = _containingTypeSig.WriteVertex(factory);
-
-            Vertex unplacedVertex = GetNativeWriter(factory).GetFieldSignature(containingType, _field.Name);
-
-            return SetSavedVertex(factory.MetadataManager.NativeLayoutInfo.LdTokenInfoSection.Place(unplacedVertex));
-        }
-    }
-
     internal sealed class NativeLayoutMethodSignatureVertexNode : NativeLayoutVertexNode
     {
         private Internal.TypeSystem.MethodSignature _signature;
@@ -360,31 +278,6 @@ public override Vertex WriteVertex(NodeFactory factory)
         }
     }
 
-    internal sealed class NativeLayoutMethodNameAndSignatureVertexNode : NativeLayoutVertexNode
-    {
-        private MethodDesc _method;
-        private NativeLayoutMethodSignatureVertexNode _methodSig;
-
-        protected override string GetName(NodeFactory factory) => "NativeLayoutMethodNameAndSignatureVertexNode" + factory.NameMangler.GetMangledMethodName(_method);
-
-        public NativeLayoutMethodNameAndSignatureVertexNode(NodeFactory factory, MethodDesc method)
-        {
-            _method = method;
-            _methodSig = factory.NativeLayout.MethodSignatureVertex(method.Signature);
-        }
-        public override IEnumerable GetStaticDependencies(NodeFactory context)
-        {
-            return new DependencyListEntry[] { new DependencyListEntry(_methodSig, "NativeLayoutMethodNameAndSignatureVertexNode signature vertex") };
-        }
-        public override Vertex WriteVertex(NodeFactory factory)
-        {
-            Debug.Assert(Marked, "WriteVertex should only happen for marked vertices");
-
-            Vertex methodSig = _methodSig.WriteVertex(factory);
-            return GetNativeWriter(factory).GetMethodNameAndSigSignature(_method.Name, methodSig);
-        }
-    }
-
     internal abstract class NativeLayoutTypeSignatureVertexNode : NativeLayoutVertexNode
     {
         protected readonly TypeDesc _type;
@@ -569,7 +462,8 @@ public override IEnumerable GetStaticDependencies(NodeFacto
                     //            A necessary EEType might be enough for some cases.
                     //            But we definitely need constructed if this is e.g. layout for a typehandle.
                     //            Measurements show this doesn't amount to much (0.004% - 0.3% size cost vs Necessary).
-                    new DependencyListEntry(context.MaximallyConstructableType(_type), "NativeLayoutEETypeVertexNode containing type signature")
+                    new DependencyListEntry(_type.IsGenericDefinition ? context.NecessaryTypeSymbol(_type) : context.MaximallyConstructableType(_type),
+                    "NativeLayoutEETypeVertexNode containing type signature")
                 };
             }
             public override Vertex WriteVertex(NodeFactory factory)
@@ -786,14 +680,14 @@ public NativeLayoutDictionarySignatureNode(NodeFactory nodeFactory, TypeSystemEn
         {
             if (owningMethodOrType is MethodDesc owningMethod)
             {
-                Debug.Assert(owningMethod.IsCanonicalMethod(CanonicalFormKind.Universal) || nodeFactory.LazyGenericsPolicy.UsesLazyGenerics(owningMethod));
+                Debug.Assert(nodeFactory.LazyGenericsPolicy.UsesLazyGenerics(owningMethod));
                 Debug.Assert(owningMethod.IsCanonicalMethod(CanonicalFormKind.Any));
                 Debug.Assert(owningMethod.HasInstantiation);
             }
             else
             {
                 TypeDesc owningType = (TypeDesc)owningMethodOrType;
-                Debug.Assert(owningType.IsCanonicalSubtype(CanonicalFormKind.Universal) || nodeFactory.LazyGenericsPolicy.UsesLazyGenerics(owningType));
+                Debug.Assert(nodeFactory.LazyGenericsPolicy.UsesLazyGenerics(owningType));
                 Debug.Assert(owningType.IsCanonicalSubtype(CanonicalFormKind.Any));
             }
 
@@ -933,7 +827,7 @@ public override Vertex WriteVertex(NodeFactory factory)
             DictionaryLayoutNode associatedLayout = factory.GenericDictionaryLayout(_method);
             ICollection templateLayout = associatedLayout.GetTemplateEntries(factory);
 
-            if (!(_method.IsCanonicalMethod(CanonicalFormKind.Universal) || (factory.LazyGenericsPolicy.UsesLazyGenerics(_method))) && (templateLayout.Count > 0))
+            if (!factory.LazyGenericsPolicy.UsesLazyGenerics(_method) && templateLayout.Count > 0)
             {
                 List dictionaryVertices = new List();
 
@@ -956,7 +850,6 @@ public override Vertex WriteVertex(NodeFactory factory)
     public sealed class NativeLayoutTemplateTypeLayoutVertexNode : NativeLayoutSavedVertexNode
     {
         private TypeDesc _type;
-        private bool _isUniversalCanon;
 
         public TypeDesc CanonType => _type.ConvertToCanonForm(CanonicalFormKind.Specific);
 
@@ -966,7 +859,6 @@ public NativeLayoutTemplateTypeLayoutVertexNode(NodeFactory factory, TypeDesc ty
         {
             Debug.Assert(type.IsCanonicalSubtype(CanonicalFormKind.Any));
             Debug.Assert(type.ConvertToCanonForm(CanonicalFormKind.Specific) == type, "Assert that the canonical type passed in is in standard canonical form");
-            _isUniversalCanon = type.IsCanonicalSubtype(CanonicalFormKind.Universal);
 
             _type = GetActualTemplateTypeForType(factory, type);
         }
@@ -1044,20 +936,15 @@ public override IEnumerable GetStaticDependencies(NodeFacto
                 }
             }
 
-            if (!_isUniversalCanon)
+            DefType closestCanonDefType = (DefType)_type.GetClosestDefType().ConvertToCanonForm(CanonicalFormKind.Specific);
+            if (closestCanonDefType.GCStaticFieldSize.AsInt > 0)
             {
-                DefType closestCanonDefType = (DefType)_type.GetClosestDefType().ConvertToCanonForm(CanonicalFormKind.Specific);
-                if (closestCanonDefType.GCStaticFieldSize.AsInt > 0)
-                {
-                    BagElementKind ignored;
-                    yield return new DependencyListEntry(GetStaticsNode(context, out ignored), "type gc static info");
-                }
+                yield return new DependencyListEntry(GetStaticsNode(context, out _), "type gc static info");
+            }
 
-                if (closestCanonDefType.ThreadGcStaticFieldSize.AsInt > 0)
-                {
-                    BagElementKind ignored;
-                    yield return new DependencyListEntry(GetThreadStaticsNode(context, out ignored), "type thread static info");
-                }
+            if (closestCanonDefType.ThreadGcStaticFieldSize.AsInt > 0)
+            {
+                yield return new DependencyListEntry(GetThreadStaticsNode(context, out _), "type thread static info");
             }
 
             if (_type.BaseType != null && !_type.BaseType.IsRuntimeDeterminedSubtype)
@@ -1079,126 +966,16 @@ public override IEnumerable GetStaticDependencies(NodeFacto
                     yield return new DependencyListEntry(dependency, "base type must be template loadable");
                 }
             }
-            else if (_type.IsDelegate && _isUniversalCanon)
-            {
-                // For USG delegate, we need to write the signature of the Invoke method to the native layout.
-                // This signature is used by the calling convention converter to marshal parameters during delegate calls.
-                yield return new DependencyListEntry(context.NativeLayout.MethodSignatureVertex(_type.GetMethod("Invoke", null).GetTypicalMethodDefinition().Signature), "invoke method signature");
-            }
-
-            if (_isUniversalCanon)
-            {
-                // For universal canonical template types, we need to write out field layout information so that we
-                // can correctly compute the type sizes for dynamically created types at runtime, and construct
-                // their GCDesc info
-                foreach (FieldDesc field in _type.GetFields())
-                {
-                    // If this field does not contribute to layout, skip
-                    if (field.HasRva || field.IsLiteral)
-                    {
-                        continue;
-                    }
-
-                    DependencyListEntry typeForFieldLayout;
-
-                    if (field.FieldType.IsGCPointer)
-                    {
-                        typeForFieldLayout = new DependencyListEntry(context.NativeLayout.PlacedSignatureVertex(context.NativeLayout.TypeSignatureVertex(field.Context.GetWellKnownType(WellKnownType.Object))), "universal field layout type object sized");
-                    }
-                    else if (field.FieldType.IsPointer || field.FieldType.IsFunctionPointer)
-                    {
-                        typeForFieldLayout = new DependencyListEntry(context.NativeLayout.PlacedSignatureVertex(context.NativeLayout.TypeSignatureVertex(field.Context.GetWellKnownType(WellKnownType.IntPtr))), "universal field layout type IntPtr sized");
-                    }
-                    else
-                    {
-                        typeForFieldLayout = new DependencyListEntry(context.NativeLayout.PlacedSignatureVertex(context.NativeLayout.TypeSignatureVertex(field.FieldType)), "universal field layout type");
-
-                        // And ensure the type can be properly laid out
-                        foreach (var dependency in context.NativeLayout.TemplateConstructableTypes(field.FieldType))
-                        {
-                            yield return new DependencyListEntry(dependency, "template construction dependency");
-                        }
-                    }
-
-                    yield return typeForFieldLayout;
-                }
-
-                // We also need to write out the signatures of interesting methods in the type's vtable, which
-                // will be needed by the calling convention translation logic at runtime, when the type's methods
-                // get invoked. This logic gathers nodes for entries *unconditionally* present. (entries may be conditionally
-                // present if a type has a vtable which has a size computed by usage not by IL contents)
-                List vtableSignatureNodeEntries = null;
-                int currentVTableIndexUnused = 0;
-                ProcessVTableEntriesForCallingConventionSignatureGeneration(context, VTableEntriesToProcess.AllOnTypesThatShouldProduceFullVTables, ref currentVTableIndexUnused,
-                    (int vtableIndex, bool isSealedVTableSlot, MethodDesc declMethod, MethodDesc implMethod) =>
-                    {
-                        if (implMethod.IsAbstract)
-                            return;
-
-                        if (UniversalGenericParameterLayout.VTableMethodRequiresCallingConventionConverter(implMethod))
-                        {
-                            vtableSignatureNodeEntries ??= new List();
-
-                            vtableSignatureNodeEntries.Add(context.NativeLayout.MethodSignatureVertex(declMethod.GetTypicalMethodDefinition().Signature));
-                        }
-                    }, _type, _type, _type);
-
-                if (vtableSignatureNodeEntries != null)
-                {
-                    foreach (NativeLayoutVertexNode node in vtableSignatureNodeEntries)
-                        yield return new DependencyListEntry(node, "vtable cctor sig");
-                }
-            }
         }
 
-        public override bool HasConditionalStaticDependencies => _isUniversalCanon;
-        public override IEnumerable GetConditionalStaticDependencies(NodeFactory context)
-        {
-            List conditionalDependencies = null;
-
-            if (_isUniversalCanon)
-            {
-                // We also need to write out the signatures of interesting methods in the type's vtable, which
-                // will be needed by the calling convention translation logic at runtime, when the type's methods
-                // get invoked. This logic gathers nodes for entries *conditionally* present. (entries may be conditionally
-                // present if a type has a vtable which has a size computed by usage not by IL contents)
-
-                int currentVTableIndexUnused = 0;
-                ProcessVTableEntriesForCallingConventionSignatureGeneration(context, VTableEntriesToProcess.AllOnTypesThatProducePartialVTables, ref currentVTableIndexUnused,
-                    (int vtableIndex, bool isSealedVTableSlot, MethodDesc declMethod, MethodDesc implMethod) =>
-                    {
-                        if (implMethod.IsAbstract)
-                            return;
-
-                        if (UniversalGenericParameterLayout.VTableMethodRequiresCallingConventionConverter(implMethod))
-                        {
-                            conditionalDependencies ??= new List();
-
-                            conditionalDependencies.Add(
-                                new CombinedDependencyListEntry(context.NativeLayout.MethodSignatureVertex(declMethod.GetTypicalMethodDefinition().Signature),
-                                                                context.VirtualMethodUse(declMethod),
-                                                                "conditional vtable cctor sig"));
-                        }
-                    }, _type, _type, _type);
-            }
-
-            if (conditionalDependencies != null)
-                return conditionalDependencies;
-            else
-                return Array.Empty();
-        }
+        public override bool HasConditionalStaticDependencies => false;
+        public override IEnumerable GetConditionalStaticDependencies(NodeFactory context) => null;
 
         private static int CompareDictionaryEntries(KeyValuePair left, KeyValuePair right)
         {
             return left.Key - right.Key;
         }
 
-        private bool HasInstantiationDeterminedSize()
-        {
-            Debug.Assert(_isUniversalCanon);
-            return _type.GetClosestDefType().InstanceFieldSize.IsIndeterminate;
-        }
-
         public override Vertex WriteVertex(NodeFactory factory)
         {
             Debug.Assert(Marked, "WriteVertex should only happen for marked vertices");
@@ -1208,8 +985,6 @@ public override Vertex WriteVertex(NodeFactory factory)
             DictionaryLayoutNode associatedLayout = factory.GenericDictionaryLayout(_type.ConvertToCanonForm(CanonicalFormKind.Specific).GetClosestDefType());
             ICollection templateLayout = associatedLayout.GetTemplateEntries(factory);
 
-            NativeWriter writer = GetNativeWriter(factory);
-
             // Interfaces
             if (_type.RuntimeInterfaces.Length > 0)
             {
@@ -1224,7 +999,7 @@ public override Vertex WriteVertex(NodeFactory factory)
                 layoutInfo.Append(BagElementKind.ImplementedInterfaces, implementedInterfaces.WriteVertex(factory));
             }
 
-            if (!(_isUniversalCanon || (factory.LazyGenericsPolicy.UsesLazyGenerics(_type)) )&& (templateLayout.Count > 0))
+            if (!factory.LazyGenericsPolicy.UsesLazyGenerics(_type) && templateLayout.Count > 0)
             {
                 List dictionaryVertices = new List();
 
@@ -1247,91 +1022,33 @@ public override Vertex WriteVertex(NodeFactory factory)
                 layoutInfo.AppendUnsigned(BagElementKind.ClassConstructorPointer, cctorStaticsIndex);
             }
 
-            if (!_isUniversalCanon)
+            DefType closestCanonDefType = (DefType)_type.GetClosestDefType().ConvertToCanonForm(CanonicalFormKind.Specific);
+            if (closestCanonDefType.NonGCStaticFieldSize.AsInt != 0)
             {
-                DefType closestCanonDefType = (DefType)_type.GetClosestDefType().ConvertToCanonForm(CanonicalFormKind.Specific);
-                if (closestCanonDefType.NonGCStaticFieldSize.AsInt != 0)
-                {
-                    layoutInfo.AppendUnsigned(BagElementKind.NonGcStaticDataSize, checked((uint)closestCanonDefType.NonGCStaticFieldSize.AsInt));
-                }
-
-                if (closestCanonDefType.GCStaticFieldSize.AsInt != 0)
-                {
-                    layoutInfo.AppendUnsigned(BagElementKind.GcStaticDataSize, checked((uint)closestCanonDefType.GCStaticFieldSize.AsInt));
-                    BagElementKind staticDescBagType;
-                    ISymbolNode staticsDescSymbol = GetStaticsNode(factory, out staticDescBagType);
-                    uint gcStaticsSymbolIndex = factory.MetadataManager.NativeLayoutInfo.StaticsReferences.GetIndex(staticsDescSymbol);
-                    layoutInfo.AppendUnsigned(staticDescBagType, gcStaticsSymbolIndex);
-                }
-
-                if (closestCanonDefType.ThreadGcStaticFieldSize.AsInt != 0)
-                {
-                    layoutInfo.AppendUnsigned(BagElementKind.ThreadStaticDataSize, checked((uint)closestCanonDefType.ThreadGcStaticFieldSize.AsInt));
-                    BagElementKind threadStaticDescBagType;
-                    ISymbolNode threadStaticsDescSymbol = GetThreadStaticsNode(factory, out threadStaticDescBagType);
-                    uint threadStaticsSymbolIndex = factory.MetadataManager.NativeLayoutInfo.StaticsReferences.GetIndex(threadStaticsDescSymbol);
-                    layoutInfo.AppendUnsigned(threadStaticDescBagType, threadStaticsSymbolIndex);
-                }
+                layoutInfo.AppendUnsigned(BagElementKind.NonGcStaticDataSize, checked((uint)closestCanonDefType.NonGCStaticFieldSize.AsInt));
             }
 
-            if (_type.BaseType != null && _type.BaseType.IsRuntimeDeterminedSubtype)
+            if (closestCanonDefType.GCStaticFieldSize.AsInt != 0)
             {
-                layoutInfo.Append(BagElementKind.BaseType, factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.TypeSignatureVertex(_type.BaseType)).WriteVertex(factory));
+                layoutInfo.AppendUnsigned(BagElementKind.GcStaticDataSize, checked((uint)closestCanonDefType.GCStaticFieldSize.AsInt));
+                BagElementKind staticDescBagType;
+                ISymbolNode staticsDescSymbol = GetStaticsNode(factory, out staticDescBagType);
+                uint gcStaticsSymbolIndex = factory.MetadataManager.NativeLayoutInfo.StaticsReferences.GetIndex(staticsDescSymbol);
+                layoutInfo.AppendUnsigned(staticDescBagType, gcStaticsSymbolIndex);
             }
 
-            if (_isUniversalCanon)
+            if (closestCanonDefType.ThreadGcStaticFieldSize.AsInt != 0)
             {
-                // For universal canonical template types, we need to write out field layout information so that we
-                // can correctly compute the type sizes for dynamically created types at runtime, and construct
-                // their GCDesc info
-                VertexSequence fieldsSequence = null;
-
-                foreach (FieldDesc field in _type.GetFields())
-                {
-                    // If this field does contribute to layout, skip
-                    if (field.HasRva || field.IsLiteral)
-                        continue;
-
-                    // NOTE: The order and contents of the signature vertices emitted here is what we consider a field ordinal for the
-                    // purpose of NativeLayoutFieldOffsetGenericDictionarySlotNode.
-
-                    FieldStorage fieldStorage = FieldStorage.Instance;
-                    if (field.IsStatic)
-                    {
-                        if (field.IsThreadStatic)
-                            fieldStorage = FieldStorage.TLSStatic;
-                        else if (field.HasGCStaticBase)
-                            fieldStorage = FieldStorage.GCStatic;
-                        else
-                            fieldStorage = FieldStorage.NonGCStatic;
-                    }
-
-
-                    NativeLayoutPlacedSignatureVertexNode fieldTypeSignature;
-                    if (field.FieldType.IsGCPointer)
-                    {
-                        fieldTypeSignature = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.TypeSignatureVertex(field.Context.GetWellKnownType(WellKnownType.Object)));
-                    }
-                    else if (field.FieldType.IsPointer || field.FieldType.IsFunctionPointer)
-                    {
-                        fieldTypeSignature = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.TypeSignatureVertex(field.Context.GetWellKnownType(WellKnownType.IntPtr)));
-                    }
-                    else
-                    {
-                        fieldTypeSignature = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.TypeSignatureVertex(field.FieldType));
-                    }
-
-                    Vertex staticFieldVertexData = writer.GetTuple(fieldTypeSignature.WriteVertex(factory), writer.GetUnsignedConstant((uint)fieldStorage));
-
-                    fieldsSequence ??= new VertexSequence();
-                    fieldsSequence.Append(staticFieldVertexData);
-                }
+                layoutInfo.AppendUnsigned(BagElementKind.ThreadStaticDataSize, checked((uint)closestCanonDefType.ThreadGcStaticFieldSize.AsInt));
+                BagElementKind threadStaticDescBagType;
+                ISymbolNode threadStaticsDescSymbol = GetThreadStaticsNode(factory, out threadStaticDescBagType);
+                uint threadStaticsSymbolIndex = factory.MetadataManager.NativeLayoutInfo.StaticsReferences.GetIndex(threadStaticsDescSymbol);
+                layoutInfo.AppendUnsigned(threadStaticDescBagType, threadStaticsSymbolIndex);
+            }
 
-                if (fieldsSequence != null)
-                {
-                    Vertex placedFieldsLayout = factory.MetadataManager.NativeLayoutInfo.SignaturesSection.Place(fieldsSequence);
-                    layoutInfo.Append(BagElementKind.FieldLayout, placedFieldsLayout);
-                }
+            if (_type.BaseType != null && _type.BaseType.IsRuntimeDeterminedSubtype)
+            {
+                layoutInfo.Append(BagElementKind.BaseType, factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.TypeSignatureVertex(_type.BaseType)).WriteVertex(factory));
             }
 
             factory.MetadataManager.NativeLayoutInfo.TemplatesSection.Place(layoutInfo);
@@ -1339,13 +1056,6 @@ public override Vertex WriteVertex(NodeFactory factory)
             return SetSavedVertex(layoutInfo);
         }
 
-        private enum VTableEntriesToProcess
-        {
-            AllInVTable,
-            AllOnTypesThatShouldProduceFullVTables,
-            AllOnTypesThatProducePartialVTables
-        }
-
         private static IEnumerable EnumVirtualSlotsDeclaredOnType(TypeDesc declType)
         {
             // VirtualMethodUse of Foo.Method will bring in VirtualMethodUse
@@ -1369,101 +1079,6 @@ private static IEnumerable EnumVirtualSlotsDeclaredOnType(TypeDesc d
                 yield return method;
             }
         }
-
-        /// 
-        /// Process the vtable entries of a type by calling operation with the vtable index, declaring method, and implementing method
-        /// Process them in order from 0th entry to last.
-        /// Skip generic virtual methods, as they are not present in the vtable itself
-        /// Do not adjust vtable index for generic dictionary slot
-        /// The vtable index is only actually valid if whichEntries is set to VTableEntriesToProcess.AllInVTable
-        /// 
-        private static void ProcessVTableEntriesForCallingConventionSignatureGeneration(NodeFactory factory, VTableEntriesToProcess whichEntries, ref int currentVTableIndex, Action operation, TypeDesc implType, TypeDesc declType, TypeDesc templateType)
-        {
-            if (implType.IsInterface)
-                return;
-
-            declType = declType.GetClosestDefType();
-            templateType = templateType.ConvertToCanonForm(CanonicalFormKind.Specific);
-
-            bool canShareNormalCanonicalCode = declType != declType.ConvertToCanonForm(CanonicalFormKind.Specific);
-
-            var baseType = declType.BaseType;
-            if (baseType != null)
-            {
-                Debug.Assert(templateType.BaseType != null);
-                ProcessVTableEntriesForCallingConventionSignatureGeneration(factory, whichEntries, ref currentVTableIndex, operation, implType, baseType, templateType.BaseType);
-            }
-
-            IEnumerable vtableEntriesToProcess;
-
-            if (ConstructedEETypeNode.CreationAllowed(declType))
-            {
-                switch (whichEntries)
-                {
-                    case VTableEntriesToProcess.AllInVTable:
-                        vtableEntriesToProcess = factory.VTable(declType).Slots;
-                        break;
-
-                    case VTableEntriesToProcess.AllOnTypesThatShouldProduceFullVTables:
-                        if (factory.VTable(declType).HasKnownVirtualMethodUse)
-                        {
-                            vtableEntriesToProcess = factory.VTable(declType).Slots;
-                        }
-                        else
-                        {
-                            vtableEntriesToProcess = Array.Empty();
-                        }
-                        break;
-
-                    case VTableEntriesToProcess.AllOnTypesThatProducePartialVTables:
-                        if (factory.VTable(declType).HasKnownVirtualMethodUse)
-                        {
-                            vtableEntriesToProcess = Array.Empty();
-                        }
-                        else
-                        {
-                            vtableEntriesToProcess = EnumVirtualSlotsDeclaredOnType(declType);
-                        }
-                        break;
-
-                    default:
-                        throw new Exception();
-                }
-            }
-            else
-            {
-                // If allocating an object of the MethodTable isn't permitted, don't process any vtable entries.
-                vtableEntriesToProcess = Array.Empty();
-            }
-
-            // Dictionary slot
-            if (declType.HasGenericDictionarySlot() || templateType.HasGenericDictionarySlot())
-                currentVTableIndex++;
-
-            int sealedVTableSlot = 0;
-            DefType closestDefType = implType.GetClosestDefType();
-
-            // Actual vtable slots follow
-            foreach (MethodDesc declMethod in vtableEntriesToProcess)
-            {
-                // No generic virtual methods can appear in the vtable!
-                Debug.Assert(!declMethod.HasInstantiation);
-
-                MethodDesc implMethod = closestDefType.FindVirtualFunctionTargetMethodOnObjectType(declMethod);
-
-                if (implMethod.CanMethodBeInSealedVTable(factory) && !implType.IsArrayTypeWithoutGenericInterfaces())
-                {
-                    // Sealed vtable entries on other types in the hierarchy should not be reported (types read entries
-                    // from their own sealed vtables, and not from the sealed vtables of base types).
-                    if (implMethod.OwningType == closestDefType)
-                        operation(sealedVTableSlot++, true, declMethod, implMethod);
-                }
-                else
-                {
-                    operation(currentVTableIndex++, false, declMethod, implMethod);
-                }
-            }
-        }
     }
 
     public abstract class NativeLayoutGenericDictionarySlotNode : NativeLayoutVertexNode
@@ -1665,29 +1280,11 @@ protected sealed override Vertex WriteSignatureVertex(NativeWriter writer, NodeF
     public sealed class NativeLayoutMethodDictionaryGenericDictionarySlotNode : NativeLayoutGenericDictionarySlotNode
     {
         private MethodDesc _method;
-        private WrappedMethodDictionaryVertexNode _wrappedNode;
-
-        private sealed class WrappedMethodDictionaryVertexNode : NativeLayoutMethodEntryVertexNode
-        {
-            public WrappedMethodDictionaryVertexNode(NodeFactory factory, MethodDesc method) :
-                base(factory, method, default(MethodEntryFlags))
-            {
-            }
-
-            protected override IMethodNode GetMethodEntrypointNode(NodeFactory factory)
-            {
-                throw new NotSupportedException();
-            }
-
-            protected sealed override string GetName(NodeFactory factory) => "WrappedMethodEntryVertexNodeForDictionarySlot_" + factory.NameMangler.GetMangledMethodName(_method);
-        }
-
 
         public NativeLayoutMethodDictionaryGenericDictionarySlotNode(NodeFactory factory, MethodDesc method)
         {
             Debug.Assert(method.HasInstantiation);
             _method = method;
-            _wrappedNode = new WrappedMethodDictionaryVertexNode(factory, method);
         }
 
         protected sealed override string GetName(NodeFactory factory) => "NativeLayoutMethodDictionaryGenericDictionarySlotNode_" + factory.NameMangler.GetMangledMethodName(_method);
@@ -1709,14 +1306,14 @@ public sealed override IEnumerable GetStaticDependencies(No
 
             GenericMethodsTemplateMap.GetTemplateMethodDependencies(ref dependencies, factory, _method.GetCanonMethodTarget(CanonicalFormKind.Specific));
 
-            dependencies.Add(_wrappedNode, "wrappednode");
+            dependencies.Add(factory.NativeLayout.MethodEntry(_method), "wrappednode");
 
             return dependencies;
         }
 
         protected sealed override Vertex WriteSignatureVertex(NativeWriter writer, NodeFactory factory)
         {
-            return _wrappedNode.WriteVertex(factory);
+            return factory.NativeLayout.MethodEntry(_method).WriteVertex(factory);
         }
     }
 
@@ -1738,7 +1335,7 @@ public sealed override IEnumerable GetStaticDependencies(No
         {
             var result = new DependencyList
             {
-                { factory.NativeLayout.FieldLdTokenVertex(_field), "Field Signature" }
+                { factory.NativeLayout.TypeSignatureVertex(_field.OwningType), "Owning type of field" }
             };
 
             foreach (var dependency in factory.NativeLayout.TemplateConstructableTypes(_field.OwningType))
@@ -1755,8 +1352,11 @@ public sealed override IEnumerable GetStaticDependencies(No
 
         protected sealed override Vertex WriteSignatureVertex(NativeWriter writer, NodeFactory factory)
         {
-            Vertex ldToken = factory.NativeLayout.FieldLdTokenVertex(_field).WriteVertex(factory);
-            return GetNativeWriter(factory).GetRelativeOffsetSignature(ldToken);
+            Vertex owningType = factory.NativeLayout.TypeSignatureVertex(_field.OwningType).WriteVertex(factory);
+            Vertex fieldMetadataHandle = writer.GetUnsignedConstant(
+                (uint)factory.MetadataManager.GetMetadataHandleForField(factory, _field.GetTypicalFieldDefinition()));
+
+            return writer.GetTuple(owningType, fieldMetadataHandle);
         }
     }
 
@@ -1764,7 +1364,7 @@ public sealed class NativeLayoutMethodLdTokenGenericDictionarySlotNode : NativeL
     {
         private MethodDesc _method;
 
-        public NativeLayoutMethodLdTokenGenericDictionarySlotNode(MethodDesc method)
+        public NativeLayoutMethodLdTokenGenericDictionarySlotNode(NodeFactory factory, MethodDesc method)
         {
             _method = method;
         }
@@ -1775,10 +1375,7 @@ public NativeLayoutMethodLdTokenGenericDictionarySlotNode(MethodDesc method)
 
         public sealed override IEnumerable GetStaticDependencies(NodeFactory factory)
         {
-            var result = new DependencyList
-            {
-                { factory.NativeLayout.MethodLdTokenVertex(_method), "Method Signature" }
-            };
+            var result = new DependencyList();
 
             foreach (var dependency in factory.NativeLayout.TemplateConstructableTypes(_method.OwningType))
             {
@@ -1793,13 +1390,14 @@ public sealed override IEnumerable GetStaticDependencies(No
 
             factory.MetadataManager.GetDependenciesDueToLdToken(ref result, factory, _method.GetCanonMethodTarget(CanonicalFormKind.Specific));
 
+            result.Add(factory.NativeLayout.MethodEntry(_method), "wrappednode");
+
             return result;
         }
 
         protected sealed override Vertex WriteSignatureVertex(NativeWriter writer, NodeFactory factory)
         {
-            Vertex ldToken = factory.NativeLayout.MethodLdTokenVertex(_method).WriteVertex(factory);
-            return GetNativeWriter(factory).GetRelativeOffsetSignature(ldToken);
+            return factory.NativeLayout.MethodEntry(_method).WriteVertex(factory);
         }
     }
 
@@ -1842,7 +1440,7 @@ public sealed override IEnumerable GetStaticDependencies(No
             DependencyNodeCore constrainedMethodDescriptorNode;
             if (_constrainedMethod.HasInstantiation)
             {
-                constrainedMethodDescriptorNode = factory.NativeLayout.MethodLdTokenVertex(_constrainedMethod);
+                constrainedMethodDescriptorNode = factory.NativeLayout.MethodEntry(_constrainedMethod);
             }
             else
             {
@@ -1866,6 +1464,12 @@ public sealed override IEnumerable GetStaticDependencies(No
 
             foreach (var dependency in factory.NativeLayout.TemplateConstructableTypes(_constraintType))
                 yield return new DependencyListEntry(dependency, "template construction dependency constraintType");
+
+            if (_constrainedMethod.IsVirtual && _constrainedMethod.HasInstantiation)
+            {
+                MethodDesc canonMethod = _constrainedMethod.GetCanonMethodTarget(CanonicalFormKind.Specific);
+                yield return new DependencyListEntry(factory.GVMDependencies(canonMethod), "Generic virtual method call");
+            }
         }
 
         protected sealed override Vertex WriteSignatureVertex(NativeWriter writer, NodeFactory factory)
@@ -1874,9 +1478,8 @@ protected sealed override Vertex WriteSignatureVertex(NativeWriter writer, NodeF
             if (_constrainedMethod.HasInstantiation)
             {
                 Debug.Assert(SignatureKind is FixupSignatureKind.GenericStaticConstrainedMethod);
-                Vertex constrainedMethodVertex = factory.NativeLayout.MethodLdTokenVertex(_constrainedMethod).WriteVertex(factory);
-                Vertex relativeOffsetVertex = GetNativeWriter(factory).GetRelativeOffsetSignature(constrainedMethodVertex);
-                return writer.GetTuple(constraintType, relativeOffsetVertex);
+                Vertex constrainedMethodVertex = factory.NativeLayout.MethodEntry(_constrainedMethod).WriteVertex(factory);
+                return writer.GetTuple(constraintType, constrainedMethodVertex);
             }
             else
             {
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.GenericLookups.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.GenericLookups.cs
index 7fecac2a1b57..8bed90a325e2 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.GenericLookups.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.GenericLookups.cs
@@ -99,12 +99,9 @@ public GenericLookupResult Type(TypeDesc type)
 
             public GenericLookupResult UnwrapNullableType(TypeDesc type)
             {
-                // An actual unwrap nullable lookup is only required if the type is exactly a runtime
-                // determined type associated with System.__UniversalCanon itself, or if it's
+                // An actual unwrap nullable lookup is only required if the type is exactly
                 // a runtime determined instance of Nullable.
-                if (type.IsRuntimeDeterminedType && (
-                    ((RuntimeDeterminedType)type).CanonicalType.IsCanonicalDefinitionType(CanonicalFormKind.Universal) ||
-                    ((RuntimeDeterminedType)type).CanonicalType.IsNullable))
+                if (type.IsRuntimeDeterminedType && ((RuntimeDeterminedType)type).CanonicalType.IsNullable)
                     return _unwrapNullableSymbols.GetOrAdd(type);
                 else
                 {
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.NativeLayout.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.NativeLayout.cs
index e3eb154d8d0c..5918b71a451b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.NativeLayout.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.NativeLayout.cs
@@ -37,11 +37,6 @@ private void CreateNodeCaches()
                     return new NativeLayoutMethodSignatureVertexNode(_factory, signature);
                 });
 
-                _methodNameAndSignatures = new NodeCache(method =>
-                {
-                    return new NativeLayoutMethodNameAndSignatureVertexNode(_factory, method);
-                });
-
                 _placedSignatures = new NodeCache(vertexNode =>
                 {
                     return new NativeLayoutPlacedSignatureVertexNode(vertexNode);
@@ -57,19 +52,9 @@ private void CreateNodeCaches()
                     return new NativeLayoutPlacedVertexSequenceOfUIntVertexNode(uints);
                 }, new UIntSequenceComparer());
 
-                _methodLdTokenSignatures = new NodeCache(method =>
-                {
-                    return new NativeLayoutMethodLdTokenVertexNode(_factory, method);
-                });
-
-                _fieldLdTokenSignatures = new NodeCache(field =>
-                {
-                    return new NativeLayoutFieldLdTokenVertexNode(_factory, field);
-                });
-
-                _nativeLayoutSignatureNodes = new NodeCache(key =>
+                _methodEntries = new NodeCache(method =>
                 {
-                    return new NativeLayoutSignatureNode(key.SignatureVertex, key.Identity, key.IdentityPrefix);
+                    return new NativeLayoutMethodEntryVertexNode(_factory, method, default);
                 });
 
                 _templateMethodEntries = new NodeCache(method =>
@@ -144,7 +129,7 @@ private void CreateNodeCaches()
 
                 _methodLdToken_GenericDictionarySlots = new NodeCache(method =>
                 {
-                    return new NativeLayoutMethodLdTokenGenericDictionarySlotNode(method);
+                    return new NativeLayoutMethodLdTokenGenericDictionarySlotNode(_factory, method);
                 });
 
                 _dictionarySignatures = new NodeCache(owningMethodOrType =>
@@ -215,33 +200,6 @@ public IEnumerable TemplateConstructableTypes(TypeDesc type)
                 }
             }
 
-            // Produce a set of dependencies that is necessary such that if this type
-            // needs to be used referenced from a NativeLayout template and any Universal Shared
-            // instantiation is all that is needed, that the template
-            // will be properly constructable.  (This is done by ensuring that all
-            // canonical types in the deconstruction of the type are ConstructedEEType instead
-            // of just necessary, and that the USG variant of the template is created
-            // (Which is what the actual templates signatures will ensure)
-            public IEnumerable UniversalTemplateConstructableTypes(TypeDesc type)
-            {
-                while (type.IsParameterizedType)
-                {
-                    type = ((ParameterizedType)type).ParameterType;
-                }
-
-                if (type.IsSignatureVariable)
-                    yield break;
-
-                TypeDesc canonicalType = type.ConvertToCanonForm(CanonicalFormKind.Universal);
-                yield return _factory.MaximallyConstructableType(canonicalType);
-
-                foreach (TypeDesc instantiationType in type.Instantiation)
-                {
-                    foreach (var dependency in UniversalTemplateConstructableTypes(instantiationType))
-                        yield return dependency;
-                }
-            }
-
             private NodeCache _typeSignatures;
             internal NativeLayoutTypeSignatureVertexNode TypeSignatureVertex(TypeDesc type)
             {
@@ -260,12 +218,6 @@ internal NativeLayoutMethodSignatureVertexNode MethodSignatureVertex(MethodSigna
                 return _methodSignatures.GetOrAdd(signature);
             }
 
-            private NodeCache _methodNameAndSignatures;
-            internal NativeLayoutMethodNameAndSignatureVertexNode MethodNameAndSignatureVertex(MethodDesc method)
-            {
-                return _methodNameAndSignatures.GetOrAdd(method);
-            }
-
             private NodeCache _placedSignatures;
             internal NativeLayoutPlacedSignatureVertexNode PlacedSignatureVertex(NativeLayoutVertexNode vertexNode)
             {
@@ -355,63 +307,10 @@ internal NativeLayoutPlacedVertexSequenceOfUIntVertexNode PlacedUIntVertexSequen
                 return _placedUIntVertexSequence.GetOrAdd(uints);
             }
 
-            private NodeCache _methodLdTokenSignatures;
-            internal NativeLayoutMethodLdTokenVertexNode MethodLdTokenVertex(MethodDesc method)
-            {
-                return _methodLdTokenSignatures.GetOrAdd(method);
-            }
-
-            private NodeCache _fieldLdTokenSignatures;
-            internal NativeLayoutFieldLdTokenVertexNode FieldLdTokenVertex(FieldDesc field)
-            {
-                return _fieldLdTokenSignatures.GetOrAdd(field);
-            }
-
-            private struct NativeLayoutSignatureKey : IEquatable
-            {
-                public NativeLayoutSignatureKey(NativeLayoutSavedVertexNode signatureVertex, Utf8String identityPrefix, TypeSystemEntity identity)
-                {
-                    SignatureVertex = signatureVertex;
-                    IdentityPrefix = identityPrefix;
-                    Identity = identity;
-                }
-
-                public NativeLayoutSavedVertexNode SignatureVertex { get; }
-                public Utf8String IdentityPrefix { get; }
-                public TypeSystemEntity Identity { get; }
-
-                public override bool Equals(object obj)
-                {
-                    if (!(obj is NativeLayoutSignatureKey))
-                        return false;
-
-                    return Equals((NativeLayoutSignatureKey)obj);
-                }
-
-                public override int GetHashCode()
-                {
-                    return SignatureVertex.GetHashCode();
-                }
-
-                public bool Equals(NativeLayoutSignatureKey other)
-                {
-                    if (SignatureVertex != other.SignatureVertex)
-                        return false;
-
-                    if (!IdentityPrefix.Equals(other.IdentityPrefix))
-                        return false;
-
-                    if (Identity != other.Identity)
-                        return false;
-
-                    return true;
-                }
-            }
-
-            private NodeCache _nativeLayoutSignatureNodes;
-            public NativeLayoutSignatureNode NativeLayoutSignature(NativeLayoutSavedVertexNode signature, Utf8String identityPrefix, TypeSystemEntity identity)
+            private NodeCache _methodEntries;
+            internal NativeLayoutMethodEntryVertexNode MethodEntry(MethodDesc method)
             {
-                return _nativeLayoutSignatureNodes.GetOrAdd(new NativeLayoutSignatureKey(signature, identityPrefix, identity));
+                return _methodEntries.GetOrAdd(method);
             }
 
             private NodeCache _templateMethodEntries;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs
index c9c9c52df87f..27e6ce141687 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs
@@ -545,7 +545,12 @@ private void CreateNodeCaches()
 
             _methodsWithMetadata = new NodeCache(method =>
             {
-                return new MethodMetadataNode(method);
+                return new MethodMetadataNode(method, isMinimal: false);
+            });
+
+            _methodsWithLimitedMetadata = new NodeCache(method =>
+            {
+                return new MethodMetadataNode(method, isMinimal: true);
             });
 
             _fieldsWithMetadata = new NodeCache(field =>
@@ -568,6 +573,11 @@ private void CreateNodeCaches()
                 return new CustomAttributeMetadataNode(ca);
             });
 
+            _parametersWithMetadata = new NodeCache(p =>
+            {
+                return new MethodParameterMetadataNode(p);
+            });
+
             _genericDictionaryLayouts = new NodeCache(_dictionaryLayoutProvider.GetLayout);
 
             _stringAllocators = new NodeCache(constructor =>
@@ -595,7 +605,7 @@ private IEETypeNode CreateNecessaryTypeNode(TypeDesc type)
             {
                 if (type.IsGenericDefinition)
                 {
-                    return new GenericDefinitionEETypeNode(this, type);
+                    return new ReflectionInvisibleGenericDefinitionEETypeNode(this, type);
                 }
                 else if (type.IsCanonicalDefinitionType(CanonicalFormKind.Any))
                 {
@@ -624,7 +634,11 @@ private IEETypeNode CreateConstructedTypeNode(TypeDesc type)
 
             if (_compilationModuleGroup.ContainsType(type))
             {
-                if (type.IsCanonicalSubtype(CanonicalFormKind.Any))
+                if (type.IsGenericDefinition)
+                {
+                    return new ReflectionVisibleGenericDefinitionEETypeNode(this, type);
+                }
+                else if (type.IsCanonicalSubtype(CanonicalFormKind.Any))
                 {
                     return new CanonicalEETypeNode(this, type);
                 }
@@ -707,7 +721,7 @@ public IEETypeNode ConstructedTypeSymbol(TypeDesc type)
 
         public IEETypeNode MaximallyConstructableType(TypeDesc type)
         {
-            if (ConstructedEETypeNode.CreationAllowed(type))
+            if (ConstructedEETypeNode.CreationAllowed(type) || type.IsGenericDefinition)
                 return ConstructedTypeSymbol(type);
             else
                 return NecessaryTypeSymbol(type);
@@ -1338,6 +1352,16 @@ internal MethodMetadataNode MethodMetadata(MethodDesc method)
             return _methodsWithMetadata.GetOrAdd(method);
         }
 
+        private NodeCache _methodsWithLimitedMetadata;
+
+        internal MethodMetadataNode LimitedMethodMetadata(MethodDesc method)
+        {
+            // These are only meaningful for UsageBasedMetadataManager. We should not have them
+            // in the dependency graph otherwise.
+            Debug.Assert(MetadataManager is UsageBasedMetadataManager);
+            return _methodsWithLimitedMetadata.GetOrAdd(method);
+        }
+
         private NodeCache _fieldsWithMetadata;
 
         internal FieldMetadataNode FieldMetadata(FieldDesc field)
@@ -1374,6 +1398,16 @@ internal CustomAttributeMetadataNode CustomAttributeMetadata(ReflectableCustomAt
             return _customAttributesWithMetadata.GetOrAdd(ca);
         }
 
+        private NodeCache _parametersWithMetadata;
+
+        internal MethodParameterMetadataNode MethodParameterMetadata(ReflectableParameter ca)
+        {
+            // These are only meaningful for UsageBasedMetadataManager. We should not have them
+            // in the dependency graph otherwise.
+            Debug.Assert(MetadataManager is UsageBasedMetadataManager);
+            return _parametersWithMetadata.GetOrAdd(ca);
+        }
+
         private NodeCache _frozenStringNodes;
 
         public FrozenStringNode SerializedStringObject(string data)
@@ -1390,7 +1424,7 @@ public SerializedFrozenObjectNode SerializedFrozenObject(MetadataType owningType
 
         public FrozenRuntimeTypeNode SerializedMaximallyConstructableRuntimeTypeObject(TypeDesc type)
         {
-            if (ConstructedEETypeNode.CreationAllowed(type))
+            if (ConstructedEETypeNode.CreationAllowed(type) || type.IsGenericDefinition)
                 return SerializedConstructedRuntimeTypeObject(type);
             return SerializedNecessaryRuntimeTypeObject(type);
         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NonGCStaticsNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NonGCStaticsNode.cs
index 8e548e4ce518..e03cad4f078d 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NonGCStaticsNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NonGCStaticsNode.cs
@@ -34,12 +34,36 @@ public NonGCStaticsNode(MetadataType type, PreinitializationManager preinitializ
 
         protected override ObjectNodeSection GetDehydratedSection(NodeFactory factory)
         {
-            if (HasCCtorContext
-                || _preinitializationManager.IsPreinitialized(_type))
+            if (HasCCtorContext)
             {
-                // We have data to be emitted so this needs to be in an initialized data section
+                // Needs to be writable initialized section because we need info on how to run cctor and whether it ran.
                 return ObjectNodeSection.DataSection;
             }
+            else if (_preinitializationManager.IsPreinitialized(_type))
+            {
+                // Unix linkers don't like relocs to readonly data section
+                if (!factory.Target.IsWindows)
+                    return ObjectNodeSection.DataSection;
+
+                ReadOnlyFieldPolicy readOnlyPolicy = _preinitializationManager.ReadOnlyFieldPolicy;
+
+                bool allFieldsReadOnly = true;
+                foreach (FieldDesc field in _type.GetFields())
+                {
+                    if (!IsNonGcStaticField(field))
+                        continue;
+
+                    allFieldsReadOnly = readOnlyPolicy.IsReadOnly(field);
+                    if (!allFieldsReadOnly)
+                        break;
+                }
+
+                // If all fields are read only, we can place this into a read only section
+                if (allFieldsReadOnly)
+                    return ObjectNodeSection.ReadOnlyDataSection;
+                else
+                    return ObjectNodeSection.DataSection;
+            }
             else
             {
                 // This is all zeros; place this to the BSS section
@@ -149,6 +173,9 @@ protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFact
             return dependencyList;
         }
 
+        private static bool IsNonGcStaticField(FieldDesc field)
+            => field.IsStatic && !field.HasRva && !field.IsLiteral && !field.IsThreadStatic && !field.HasGCStaticBase;
+
         protected override ObjectData GetDehydratableData(NodeFactory factory, bool relocsOnly)
         {
             ObjectDataBuilder builder = new ObjectDataBuilder(factory, relocsOnly);
@@ -193,7 +220,7 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo
                 int initialOffset = builder.CountBytes;
                 foreach (FieldDesc field in _type.GetFields())
                 {
-                    if (!field.IsStatic || field.HasRva || field.IsLiteral || field.IsThreadStatic || field.HasGCStaticBase)
+                    if (!IsNonGcStaticField(field))
                         continue;
 
                     int padding = field.Offset.AsInt - builder.CountBytes + initialOffset;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectedFieldNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectedFieldNode.cs
index 07f612e2aae5..f5359a5f6e57 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectedFieldNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectedFieldNode.cs
@@ -97,7 +97,7 @@ public override IEnumerable GetStaticDependencies(NodeFacto
             }
 
             TypeDesc fieldType = _field.FieldType.NormalizeInstantiation();
-            ReflectionInvokeMapNode.AddSignatureDependency(ref dependencies, factory, fieldType, "Type of the field");
+            ReflectionInvokeMapNode.AddSignatureDependency(ref dependencies, factory, _field, fieldType, "Type of the field", isOut: true);
 
             return dependencies;
         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionFieldMapNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionFieldMapNode.cs
index 09835d6ba607..d8001f0f2482 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionFieldMapNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionFieldMapNode.cs
@@ -77,15 +77,9 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                     flags = FieldTableFlags.Instance | FieldTableFlags.FieldOffsetEncodedDirectly;
                 }
 
-                if (fieldMapping.MetadataHandle != 0)
-                    flags |= FieldTableFlags.HasMetadataHandle;
-
                 if (field.OwningType.IsCanonicalSubtype(CanonicalFormKind.Any))
                     flags |= FieldTableFlags.IsAnyCanonicalEntry;
 
-                if (field.OwningType.IsCanonicalSubtype(CanonicalFormKind.Universal))
-                    flags |= FieldTableFlags.IsUniversalCanonicalEntry;
-
                 if (field.IsInitOnly)
                     flags |= FieldTableFlags.IsInitOnly;
 
@@ -98,78 +92,62 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                 vertex = writer.GetTuple(vertex,
                     writer.GetUnsignedConstant(declaringTypeId));
 
-                if ((flags & FieldTableFlags.HasMetadataHandle) != 0)
-                {
-                    // Only store the offset portion of the metadata handle to get better integer compression
-                    vertex = writer.GetTuple(vertex,
-                        writer.GetUnsignedConstant((uint)(fieldMapping.MetadataHandle & MetadataManager.MetadataOffsetMask)));
-                }
-                else
-                {
-                    // No metadata handle means we need to store name
-                    vertex = writer.GetTuple(vertex,
-                        writer.GetStringConstant(field.Name));
-                }
+                // Only store the offset portion of the metadata handle to get better integer compression
+                vertex = writer.GetTuple(vertex,
+                    writer.GetUnsignedConstant((uint)(fieldMapping.MetadataHandle & MetadataManager.MetadataOffsetMask)));
 
-                if ((flags & FieldTableFlags.IsUniversalCanonicalEntry) != 0)
-                {
-                    vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(checked((uint)field.GetFieldOrdinal())));
-                }
-                else
+                switch (flags & FieldTableFlags.StorageClass)
                 {
-                    switch (flags & FieldTableFlags.StorageClass)
-                    {
-                        case FieldTableFlags.ThreadStatic:
-                        case FieldTableFlags.GCStatic:
-                        case FieldTableFlags.NonGCStatic:
+                    case FieldTableFlags.ThreadStatic:
+                    case FieldTableFlags.GCStatic:
+                    case FieldTableFlags.NonGCStatic:
+                        {
+                            uint fieldOffset = (uint)field.Offset.AsInt;
+                            if (field.IsThreadStatic && field.OwningType is MetadataType mt)
+                            {
+                                fieldOffset += factory.ThreadStaticBaseOffset(mt);
+                            }
+
+                            if (field.OwningType.HasInstantiation)
+                            {
+                                vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(fieldOffset));
+                            }
+                            else
                             {
-                                uint fieldOffset = (uint)field.Offset.AsInt;
-                                if (field.IsThreadStatic && field.OwningType is MetadataType mt)
+                                MetadataType metadataType = (MetadataType)field.OwningType;
+
+                                ISymbolNode staticsNode;
+                                if (field.IsThreadStatic)
+                                {
+                                    staticsNode = factory.TypeThreadStaticIndex(metadataType);
+                                }
+                                else if (field.HasGCStaticBase)
+                                {
+                                    staticsNode = factory.TypeGCStaticsSymbol(metadataType);
+                                }
+                                else
                                 {
-                                    fieldOffset += factory.ThreadStaticBaseOffset(mt);
+                                    staticsNode = factory.TypeNonGCStaticsSymbol(metadataType);
                                 }
 
-                                if (field.OwningType.HasInstantiation)
+                                if (!field.IsThreadStatic && !field.HasGCStaticBase)
                                 {
-                                    vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(fieldOffset));
+                                    uint index = _externalReferences.GetIndex(staticsNode, (int)fieldOffset);
+                                    vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(index));
                                 }
                                 else
                                 {
-                                    MetadataType metadataType = (MetadataType)field.OwningType;
-
-                                    ISymbolNode staticsNode;
-                                    if (field.IsThreadStatic)
-                                    {
-                                        staticsNode = factory.TypeThreadStaticIndex(metadataType);
-                                    }
-                                    else if (field.HasGCStaticBase)
-                                    {
-                                        staticsNode = factory.TypeGCStaticsSymbol(metadataType);
-                                    }
-                                    else
-                                    {
-                                        staticsNode = factory.TypeNonGCStaticsSymbol(metadataType);
-                                    }
-
-                                    if (!field.IsThreadStatic && !field.HasGCStaticBase)
-                                    {
-                                        uint index = _externalReferences.GetIndex(staticsNode, (int)fieldOffset);
-                                        vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(index));
-                                    }
-                                    else
-                                    {
-                                        uint index = _externalReferences.GetIndex(staticsNode);
-                                        vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(index));
-                                        vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(fieldOffset));
-                                    }
+                                    uint index = _externalReferences.GetIndex(staticsNode);
+                                    vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(index));
+                                    vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant(fieldOffset));
                                 }
                             }
-                            break;
+                        }
+                        break;
 
-                        case FieldTableFlags.Instance:
-                            vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant((uint)field.Offset.AsInt));
-                            break;
-                    }
+                    case FieldTableFlags.Instance:
+                        vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant((uint)field.Offset.AsInt));
+                        break;
                 }
 
                 int hashCode = field.OwningType.ConvertToCanonForm(CanonicalFormKind.Specific).GetHashCode();
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionInvokeMapNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionInvokeMapNode.cs
index ae964fd498a5..82a7aef31d87 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionInvokeMapNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionInvokeMapNode.cs
@@ -55,9 +55,9 @@ public static void AddDependenciesDueToReflectability(ref DependencyList depende
                 dependencies.Add(factory.MethodEntrypoint(invokeStub), "Reflection invoke");
 
                 var signature = method.Signature;
-                AddSignatureDependency(ref dependencies, factory, signature.ReturnType, "Reflection invoke");
+                AddSignatureDependency(ref dependencies, factory, method, signature.ReturnType, "Reflection invoke", isOut: true);
                 foreach (var parameterType in signature)
-                    AddSignatureDependency(ref dependencies, factory, parameterType, "Reflection invoke");
+                    AddSignatureDependency(ref dependencies, factory, method, parameterType, "Reflection invoke", isOut: false);
             }
 
             if (method.OwningType.IsValueType && !method.Signature.IsStatic)
@@ -68,38 +68,24 @@ public static void AddDependenciesDueToReflectability(ref DependencyList depende
                 dependencies.Add(factory.AddressTakenMethodEntrypoint(method), "Body of a reflectable method");
             }
 
-            // If the method is defined in a different module than this one, a metadata token isn't known for performing the reference
-            // Use a name/sig reference instead.
-            if (!factory.MetadataManager.WillUseMetadataTokenToReferenceMethod(method))
-            {
-                dependencies.Add(factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition())),
-                    "Non metadata-local method reference");
-            }
-
             if (method.HasInstantiation)
             {
-                if (method.IsCanonicalMethod(CanonicalFormKind.Any))
-                {
-                    dependencies.Add(factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition())),
-                        "Signature of canonical method");
-                }
-
-                if (!method.IsCanonicalMethod(CanonicalFormKind.Universal))
+                foreach (var instArg in method.Instantiation)
                 {
-                    foreach (var instArg in method.Instantiation)
-                    {
-                        dependencies.Add(factory.NecessaryTypeSymbol(instArg), "Reflectable generic method inst arg");
-                    }
+                    dependencies.Add(factory.NecessaryTypeSymbol(instArg), "Reflectable generic method inst arg");
                 }
             }
 
             ReflectionVirtualInvokeMapNode.GetVirtualInvokeMapDependencies(ref dependencies, factory, method);
         }
 
-        internal static void AddSignatureDependency(ref DependencyList dependencies, NodeFactory factory, TypeDesc type, string reason)
+        internal static void AddSignatureDependency(ref DependencyList dependencies, NodeFactory factory, TypeSystemEntity referent, TypeDesc type, string reason, bool isOut)
         {
             if (type.IsByRef)
+            {
                 type = ((ParameterizedType)type).ParameterType;
+                isOut = true;
+            }
 
             // Pointer runtime type handles can be created at runtime if necessary
             while (type.IsPointer)
@@ -109,16 +95,26 @@ internal static void AddSignatureDependency(ref DependencyList dependencies, Nod
             if (type.IsPrimitive || type.IsVoid)
                 return;
 
-            // Reflection doesn't need the ability to generate MethodTables out of thin air for reference types.
-            // Skip generating the dependencies.
-            if (type.IsGCPointer)
-                return;
-
-            TypeDesc canonType = type.ConvertToCanonForm(CanonicalFormKind.Specific);
-            if (canonType.IsCanonicalSubtype(CanonicalFormKind.Any))
-                GenericTypesTemplateMap.GetTemplateTypeDependencies(ref dependencies, factory, canonType);
-            else
-                dependencies.Add(factory.MaximallyConstructableType(type), reason);
+            try
+            {
+                factory.TypeSystemContext.DetectGenericCycles(type, referent);
+
+                // Reflection might need to create boxed instances of valuetypes as part of reflection invocation.
+                // Non-valuetypes are only needed for the purposes of casting/type checks.
+                // If this is a non-exact type, we need the type loader template to get the type handle.
+                if (type.IsCanonicalSubtype(CanonicalFormKind.Any))
+                    GenericTypesTemplateMap.GetTemplateTypeDependencies(ref dependencies, factory, type.NormalizeInstantiation());
+                else if (isOut && !type.IsGCPointer)
+                    dependencies.Add(factory.MaximallyConstructableType(type.NormalizeInstantiation()), reason);
+                else
+                    dependencies.Add(factory.NecessaryTypeSymbol(type.NormalizeInstantiation()), reason);
+            }
+            catch (TypeSystemException)
+            {
+                // It's fine to continue compiling if there's a problem getting these. There's going to be a MissingMetadata
+                // exception when actually trying to invoke this and the exception will be different than the one we'd get with
+                // a JIT, but that's fine, we don't need to be bug-for-bug compatible.
+            }
         }
 
         public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
@@ -167,15 +163,9 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                 if (!method.IsAbstract)
                     flags |= InvokeTableFlags.HasEntrypoint;
 
-                if (mappingEntry.MetadataHandle != 0)
-                    flags |= InvokeTableFlags.HasMetadataHandle;
-
                 if (!factory.MetadataManager.HasReflectionInvokeStubForInvokableMethod(method))
                     flags |= InvokeTableFlags.NeedsParameterInterpretation;
 
-                if (method.IsCanonicalMethod(CanonicalFormKind.Universal))
-                    flags |= InvokeTableFlags.IsUniversalCanonicalEntry;
-
                 // TODO: native signature for P/Invokes and UnmanagedCallersOnly methods
                 if (method.IsRawPInvoke() || method.IsUnmanagedCallersOnly)
                     continue;
@@ -185,17 +175,9 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
 
                 Vertex vertex = writer.GetUnsignedConstant((uint)flags);
 
-                if ((flags & InvokeTableFlags.HasMetadataHandle) != 0)
-                {
-                    // Only store the offset portion of the metadata handle to get better integer compression
-                    vertex = writer.GetTuple(vertex,
-                        writer.GetUnsignedConstant((uint)(mappingEntry.MetadataHandle & MetadataManager.MetadataOffsetMask)));
-                }
-                else
-                {
-                    var nameAndSig = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition()));
-                    vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant((uint)nameAndSig.SavedVertex.VertexOffset));
-                }
+                // Only store the offset portion of the metadata handle to get better integer compression
+                vertex = writer.GetTuple(vertex,
+                    writer.GetUnsignedConstant((uint)(mappingEntry.MetadataHandle & MetadataManager.MetadataOffsetMask)));
 
                 // Go with a necessary type symbol. It will be upgraded to a constructed one if a constructed was emitted.
                 IEETypeNode owningTypeSymbol = factory.NecessaryTypeSymbol(method.OwningType);
@@ -219,22 +201,13 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
 
                 if ((flags & InvokeTableFlags.IsGenericMethod) != 0)
                 {
-                    if ((flags & InvokeTableFlags.RequiresInstArg) != 0)
-                    {
-                        var nameAndSigGenericMethod = factory.NativeLayout.PlacedSignatureVertex(factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition()));
-                        vertex = writer.GetTuple(vertex, writer.GetUnsignedConstant((uint)nameAndSigGenericMethod.SavedVertex.VertexOffset));
-                    }
-
-                    if ((flags & InvokeTableFlags.IsUniversalCanonicalEntry) == 0)
+                    VertexSequence args = new VertexSequence();
+                    for (int i = 0; i < method.Instantiation.Length; i++)
                     {
-                        VertexSequence args = new VertexSequence();
-                        for (int i = 0; i < method.Instantiation.Length; i++)
-                        {
-                            uint argId = _externalReferences.GetIndex(factory.NecessaryTypeSymbol(method.Instantiation[i]));
-                            args.Append(writer.GetUnsignedConstant(argId));
-                        }
-                        vertex = writer.GetTuple(vertex, args);
+                        uint argId = _externalReferences.GetIndex(factory.NecessaryTypeSymbol(method.Instantiation[i]));
+                        args.Append(writer.GetUnsignedConstant(argId));
                     }
+                    vertex = writer.GetTuple(vertex, args);
                 }
 
                 int hashCode = method.OwningType.GetHashCode();
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionVirtualInvokeMapNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionVirtualInvokeMapNode.cs
index fa6bd51940aa..cdce445f8264 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionVirtualInvokeMapNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionVirtualInvokeMapNode.cs
@@ -88,10 +88,6 @@ public static void GetVirtualInvokeMapDependencies(ref DependencyList dependenci
                     factory.NecessaryTypeSymbol(method.OwningType.ConvertToCanonForm(CanonicalFormKind.Specific)),
                     "Reflection virtual invoke owning type");
 
-                NativeLayoutMethodNameAndSignatureVertexNode nameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition());
-                NativeLayoutPlacedSignatureVertexNode placedNameAndSig = factory.NativeLayout.PlacedSignatureVertex(nameAndSig);
-                dependencies.Add(placedNameAndSig, "Reflection virtual invoke method signature");
-
                 if (!method.HasInstantiation)
                 {
                     MethodDesc slotDefiningMethod = MetadataVirtualMethodAlgorithm.FindSlotDefiningMethodForVirtualMethod(method);
@@ -162,24 +158,22 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
 
                 // Grammar of an entry in the hash table:
                 // Virtual Method uses a normal slot
-                // TypeKey + NameAndSig metadata offset into the native layout metadata + (NumberOfStepsUpParentHierarchyToType << 1) + slot
+                // TypeKey + MethodHandle + (NumberOfStepsUpParentHierarchyToType << 1) + slot
                 // OR
                 // Generic Virtual Method
-                // TypeKey + NameAndSig metadata offset into the native layout metadata + (NumberOfStepsUpParentHierarchyToType << 1 + 1)
+                // TypeKey + MethodHandle + (NumberOfStepsUpParentHierarchyToType << 1 + 1)
 
                 int parentHierarchyDistance;
                 MethodDesc declaringMethodForSlot = GetDeclaringVirtualMethodAndHierarchyDistance(method, out parentHierarchyDistance);
                 ISymbolNode containingTypeKeyNode = factory.NecessaryTypeSymbol(containingTypeKey);
-                NativeLayoutMethodNameAndSignatureVertexNode nameAndSig = factory.NativeLayout.MethodNameAndSignatureVertex(method.GetTypicalMethodDefinition());
-                NativeLayoutPlacedSignatureVertexNode placedNameAndSig = factory.NativeLayout.PlacedSignatureVertex(nameAndSig);
-
+                int token = factory.MetadataManager.GetMetadataHandleForMethod(factory, method.GetTypicalMethodDefinition());
 
                 Vertex vertex;
                 if (method.HasInstantiation)
                 {
                     vertex = writer.GetTuple(
                         writer.GetUnsignedConstant(_externalReferences.GetIndex(containingTypeKeyNode)),
-                        writer.GetUnsignedConstant((uint)placedNameAndSig.SavedVertex.VertexOffset),
+                        writer.GetUnsignedConstant((uint)token),
                         writer.GetUnsignedConstant(((uint)parentHierarchyDistance << 1) + VirtualInvokeTableEntry.GenericVirtualMethod));
                 }
                 else
@@ -190,7 +184,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
 
                     vertex = writer.GetTuple(
                         writer.GetUnsignedConstant(_externalReferences.GetIndex(containingTypeKeyNode)),
-                        writer.GetUnsignedConstant((uint)placedNameAndSig.SavedVertex.VertexOffset));
+                        writer.GetUnsignedConstant((uint)token));
 
                     vertex = writer.GetTuple(vertex,
                         writer.GetUnsignedConstant((uint)parentHierarchyDistance << 1),
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeFieldHandleNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeFieldHandleNode.cs
index ddb94c51c9df..150d4d6a0e9a 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeFieldHandleNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeFieldHandleNode.cs
@@ -38,8 +38,6 @@ protected override ObjectNodeSection GetDehydratedSection(NodeFactory factory)
                 return ObjectNodeSection.DataSection;
         }
 
-        private static readonly Utf8String s_NativeLayoutSignaturePrefix = new Utf8String("__RFHSignature_");
-
         protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFactory factory)
         {
             DependencyList result = null;
@@ -54,8 +52,10 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo
             objData.RequireInitialPointerAlignment();
             objData.AddSymbol(this);
 
-            NativeLayoutFieldLdTokenVertexNode ldtokenSigNode = factory.NativeLayout.FieldLdTokenVertex(_targetField);
-            objData.EmitPointerReloc(factory.NativeLayout.NativeLayoutSignature(ldtokenSigNode, s_NativeLayoutSignaturePrefix, _targetField));
+            int handle = relocsOnly ? 0 : factory.MetadataManager.GetMetadataHandleForField(factory, _targetField.GetTypicalFieldDefinition());
+
+            objData.EmitPointerReloc(factory.MaximallyConstructableType(_targetField.OwningType));
+            objData.EmitInt(handle);
 
             return objData.ToObjectData();
         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeMethodHandleNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeMethodHandleNode.cs
index 6e20a964a64f..7f5f3035ccbc 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeMethodHandleNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeMethodHandleNode.cs
@@ -66,8 +66,6 @@ protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFact
             return dependencies;
         }
 
-        private static readonly Utf8String s_NativeLayoutSignaturePrefix = new Utf8String("__RMHSignature_");
-
         protected override ObjectData GetDehydratableData(NodeFactory factory, bool relocsOnly = false)
         {
             ObjectDataBuilder objData = new ObjectDataBuilder(factory, relocsOnly);
@@ -75,8 +73,21 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo
             objData.RequireInitialPointerAlignment();
             objData.AddSymbol(this);
 
-            NativeLayoutMethodLdTokenVertexNode ldtokenSigNode = factory.NativeLayout.MethodLdTokenVertex(_targetMethod);
-            objData.EmitPointerReloc(factory.NativeLayout.NativeLayoutSignature(ldtokenSigNode, s_NativeLayoutSignaturePrefix, _targetMethod));
+            int handle = relocsOnly ? 0 : factory.MetadataManager.GetMetadataHandleForMethod(factory, _targetMethod.GetTypicalMethodDefinition());
+
+            objData.EmitPointerReloc(factory.MaximallyConstructableType(_targetMethod.OwningType));
+            objData.EmitInt(handle);
+
+            if (_targetMethod != _targetMethod.GetMethodDefinition())
+            {
+                objData.EmitInt(_targetMethod.Instantiation.Length);
+                foreach (TypeDesc instParam in _targetMethod.Instantiation)
+                    objData.EmitPointerReloc(factory.NecessaryTypeSymbol(instParam));
+            }
+            else
+            {
+                objData.EmitInt(0);
+            }
 
             return objData.ToObjectData();
         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs
index 9543316799c3..de94d044527b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs
@@ -143,9 +143,6 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder,
                         MethodDesc targetMethod = (MethodDesc)Target;
                         if (targetMethod.OwningType.IsInterface)
                         {
-                            // Not tested
-                            encoder.EmitINT3();
-
                             encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod));
                             encoder.EmitJMP(factory.ExternFunctionSymbol("RhpResolveInterfaceMethod"));
                         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs
index ecb3d31cd899..98e433a5058c 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs
@@ -135,9 +135,6 @@ protected override void EmitCode(NodeFactory factory, ref LoongArch64Emitter enc
                         MethodDesc targetMethod = (MethodDesc)Target;
                         if (targetMethod.OwningType.IsInterface)
                         {
-                            // Not tested
-                            encoder.EmitBreak();
-
                             encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod));
                             encoder.EmitJMP(factory.ExternFunctionSymbol("RhpResolveInterfaceMethod"));
                         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs
index a285069cfada..c6e2364766fa 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs
@@ -47,8 +47,7 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref RiscV64Emitter enco
             // should be reported by someone else - the system should not rely on it coming from here.
             if (!relocsOnly && _hasInvalidEntries)
             {
-                encoder.EmitXORI(encoder.TargetRegister.IntraProcedureCallScratch1, result, 0);
-                encoder.EmitJALR(Register.X0, encoder.TargetRegister.IntraProcedureCallScratch1, 0);
+                encoder.EmitJMPIfZero(result, GetBadSlotHelper(factory));
             }
         }
 
@@ -76,6 +75,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter
                             // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region.
                             encoder.EmitADDI(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target));
                             encoder.EmitLD(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, 0);
+                            encoder.EmitFENCE_R_RW();
                             encoder.EmitRETIfZero(encoder.TargetRegister.Arg2);
 
                             encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result);
@@ -107,6 +107,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter
 
                             encoder.EmitADDI(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target));
                             encoder.EmitLD(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, 0);
+                            encoder.EmitFENCE_R_RW();
                             encoder.EmitRETIfZero(encoder.TargetRegister.Arg3);
 
                             encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result);
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs
index 1b0737287473..cb217b1e2bff 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs
@@ -133,9 +133,6 @@ protected override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder
                         MethodDesc targetMethod = (MethodDesc)Target;
                         if (targetMethod.OwningType.IsInterface)
                         {
-                            // Not tested
-                            encoder.EmitBreak();
-
                             encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod));
                             encoder.EmitJMP(factory.ExternFunctionSymbol("RhpResolveInterfaceMethod"));
                         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TentativeMethodNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TentativeMethodNode.cs
index 18a207080772..af820e34c416 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TentativeMethodNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TentativeMethodNode.cs
@@ -65,7 +65,7 @@ public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilde
 
         public override int CompareToImpl(ISortableNode other, CompilerComparer comparer)
         {
-            return _methodNode.CompareToImpl(((TentativeMethodNode)other)._methodNode, comparer);
+            return comparer.Compare(_methodNode, ((TentativeMethodNode)other)._methodNode);
         }
 
         public ISymbolNode NodeForLinkage(NodeFactory factory)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeGVMEntriesNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeGVMEntriesNode.cs
index b5bb5760b5e2..e3a0b4d38ab1 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeGVMEntriesNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeGVMEntriesNode.cs
@@ -70,8 +70,6 @@ public override IEnumerable GetStaticDependencies(NodeFacto
 
                 foreach (var entry in ScanForInterfaceGenericVirtualMethodEntries())
                     InterfaceGenericVirtualMethodTableNode.GetGenericVirtualMethodImplementationDependencies(ref _staticDependencies, context, entry.CallingMethod, entry.ImplementationType, entry.ImplementationMethod);
-
-                Debug.Assert(_staticDependencies.Count > 0);
             }
 
             return _staticDependencies;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VTableSliceNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VTableSliceNode.cs
index 9050cf8c097f..d1a4a0dc9b73 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VTableSliceNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VTableSliceNode.cs
@@ -242,12 +242,6 @@ public override IEnumerable GetConditionalStaticDep
                         factory.VirtualMethodUse(method),
                         factory.VirtualMethodUse(method.GetCanonMethodTarget(CanonicalFormKind.Specific)),
                         "Canonically equivalent virtual method use");
-
-                if (defType.Context.SupportsUniversalCanon)
-                    yield return new CombinedDependencyListEntry(
-                        factory.VirtualMethodUse(method),
-                        factory.VirtualMethodUse(method.GetCanonMethodTarget(CanonicalFormKind.Universal)),
-                        "Universal Canonically equivalent virtual method use");
             }
         }
     }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VirtualMethodUseNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VirtualMethodUseNode.cs
index ca716e795ccb..6cf9cde51670 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VirtualMethodUseNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VirtualMethodUseNode.cs
@@ -50,7 +50,7 @@ protected override void OnMarked(NodeFactory factory)
             lazyVTableSlice?.AddEntry(_decl);
         }
 
-        public override bool HasConditionalStaticDependencies => _decl.Context.SupportsUniversalCanon && _decl.OwningType.HasInstantiation && !_decl.OwningType.IsInterface;
+        public override bool HasConditionalStaticDependencies => false;
         public override bool HasDynamicDependencies => false;
         public override bool InterestingForDynamicDependencyAnalysis => false;
 
@@ -76,32 +76,7 @@ public override IEnumerable GetStaticDependencies(NodeFacto
             return dependencies;
         }
 
-        public override IEnumerable GetConditionalStaticDependencies(NodeFactory factory)
-        {
-            Debug.Assert(_decl.OwningType.HasInstantiation);
-            Debug.Assert(!_decl.OwningType.IsInterface);
-            Debug.Assert(factory.TypeSystemContext.SupportsUniversalCanon);
-
-            DefType universalCanonicalOwningType = (DefType)_decl.OwningType.ConvertToCanonForm(CanonicalFormKind.Universal);
-            Debug.Assert(universalCanonicalOwningType.IsCanonicalSubtype(CanonicalFormKind.Universal));
-
-            if (!factory.VTable(universalCanonicalOwningType).HasKnownVirtualMethodUse)
-            {
-                // This code ensures that in cases where we don't structurally force all universal canonical instantiations
-                // to have full vtables, that we ensure that all vtables are equivalently shaped between universal and non-universal types
-                return new CombinedDependencyListEntry[] {
-                    new CombinedDependencyListEntry(
-                        factory.VirtualMethodUse(_decl.GetCanonMethodTarget(CanonicalFormKind.Universal)),
-                        factory.NativeLayout.TemplateTypeLayout(universalCanonicalOwningType),
-                        "If universal canon instantiation of method exists, ensure that the universal canonical type has the right set of dependencies")
-                };
-            }
-            else
-            {
-                return Array.Empty();
-            }
-        }
-
+        public override IEnumerable GetConditionalStaticDependencies(NodeFactory factory) => null;
         public override IEnumerable SearchDynamicDependencies(List> markedNodes, int firstNode, NodeFactory factory) => null;
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILScanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILScanner.cs
index 3e920294fae3..5a1dfb8e9ac8 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILScanner.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILScanner.cs
@@ -436,6 +436,7 @@ public override DictionaryLayoutNode GetLayout(TypeSystemEntity methodOrType)
         private sealed class ScannedDevirtualizationManager : DevirtualizationManager
         {
             private HashSet _constructedMethodTables = new HashSet();
+            private HashSet _reflectionVisibleGenericDefinitionMethodTables = new HashSet();
             private HashSet _canonConstructedMethodTables = new HashSet();
             private HashSet _canonConstructedTypes = new HashSet();
             private HashSet _unsealedTypes = new HashSet();
@@ -456,6 +457,11 @@ public ScannedDevirtualizationManager(NodeFactory factory, ImmutableArray eetypeNode.Type,
@@ -630,8 +636,7 @@ private static bool CanAssumeWholeProgramViewOnTypeUse(NodeFactory factory, Type
                 }
 
                 if (baseType.IsCanonicalSubtype(CanonicalFormKind.Any)
-                    || baseType.ConvertToCanonForm(CanonicalFormKind.Specific) != baseType
-                    || baseType.Context.SupportsUniversalCanon)
+                    || baseType.ConvertToCanonForm(CanonicalFormKind.Specific) != baseType)
                 {
                     // If the interface has a canonical form, we might not have a full view of all implementers.
                     // E.g. if we have:
@@ -736,6 +741,12 @@ public override bool CanReferenceConstructedTypeOrCanonicalFormOfType(TypeDesc t
                 return _constructedMethodTables.Contains(type) || _canonConstructedMethodTables.Contains(type);
             }
 
+            public override bool IsGenericDefinitionMethodTableReflectionVisible(TypeDesc type)
+            {
+                Debug.Assert(type.IsGenericDefinition);
+                return _reflectionVisibleGenericDefinitionMethodTables.Contains(type);
+            }
+
             public override TypeDesc[] GetImplementingClasses(TypeDesc type)
             {
                 if (_disqualifiedTypes.Contains(type))
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
index 11bec3fc36c8..f4a68315037f 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
@@ -185,6 +185,12 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                 case ReadyToRunHelper.ULng2Dbl:
                     mangledName = "RhpULng2Dbl";
                     break;
+                case ReadyToRunHelper.Lng2Flt:
+                    mangledName = "RhpLng2Flt";
+                    break;
+                case ReadyToRunHelper.ULng2Flt:
+                    mangledName = "RhpULng2Flt";
+                    break;
 
                 case ReadyToRunHelper.Dbl2Lng:
                     mangledName = "RhpDbl2Lng";
@@ -237,30 +243,30 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                     }
                     break;
 
-                case ReadyToRunHelper.Mod:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "IMod");
+                case ReadyToRunHelper.Div:
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("DivInt32", null);
                     break;
-                case ReadyToRunHelper.UMod:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "UMod");
+                case ReadyToRunHelper.UDiv:
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("DivUInt32", null);
                     break;
-                case ReadyToRunHelper.ULMod:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "ULMod");
+                case ReadyToRunHelper.LDiv:
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("DivInt64", null);
                     break;
-                case ReadyToRunHelper.LMod:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "LMod");
+                case ReadyToRunHelper.ULDiv:
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("DivUInt64", null);
                     break;
 
-                case ReadyToRunHelper.Div:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "IDiv");
+                case ReadyToRunHelper.Mod:
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("ModInt32", null);
                     break;
-                case ReadyToRunHelper.UDiv:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "UDiv");
+                case ReadyToRunHelper.UMod:
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("ModUInt32", null);
                     break;
-                case ReadyToRunHelper.ULDiv:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "ULDiv");
+                case ReadyToRunHelper.LMod:
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("ModInt64", null);
                     break;
-                case ReadyToRunHelper.LDiv:
-                    methodDesc = context.GetHelperEntryPoint("MathHelpers", "LDiv");
+                case ReadyToRunHelper.ULMod:
+                    methodDesc = context.SystemModule.GetKnownType("System", "Math").GetKnownMethod("ModUInt64", null);
                     break;
 
                 case ReadyToRunHelper.LRsz:
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs
index c35f79c7fb74..014590c509c0 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs
@@ -48,7 +48,9 @@ public abstract partial class MetadataManager : ICompilationRootProvider
         private byte[] _metadataBlob;
         private List> _typeMappings;
         private List> _fieldMappings;
+        private Dictionary _fieldHandleMap;
         private List> _methodMappings;
+        private Dictionary _methodHandleMap;
         private List _stackTraceMappings;
         protected readonly string _metadataLogFile;
         protected readonly StackTraceEmissionPolicy _stackTraceEmissionPolicy;
@@ -471,16 +473,6 @@ public bool ShouldMethodBeInInvokeMap(MethodDesc method)
 
         public void GetDependenciesDueToGenericDictionary(ref DependencyList dependencies, NodeFactory factory, MethodDesc method)
         {
-            MetadataCategory category = GetMetadataCategory(method.GetCanonMethodTarget(CanonicalFormKind.Specific));
-
-            if ((category & MetadataCategory.RuntimeMapping) != 0)
-            {
-                // If the method is visible from reflection, we need to keep track of this statically generated
-                // dictionary to make sure MakeGenericMethod works even without a type loader template
-                dependencies ??= new DependencyList();
-                dependencies.Add(factory.GenericMethodsHashtableEntry(method), "Reflection visible dictionary");
-            }
-
             if (method.Signature.IsStatic && method.IsSynchronized)
             {
                 dependencies ??= new DependencyList();
@@ -565,6 +557,11 @@ protected virtual void GetMetadataDependenciesDueToReflectability(ref Dependency
             // and property setters)
         }
 
+        public virtual void GetNativeLayoutMetadataDependencies(ref DependencyList dependencies, NodeFactory factory, MethodDesc method)
+        {
+            // MetadataManagers can override this to provide additional dependencies caused by the emission of metadata
+        }
+
         protected virtual void GetMetadataDependenciesDueToReflectability(ref DependencyList dependencies, NodeFactory factory, FieldDesc field)
         {
             // MetadataManagers can override this to provide additional dependencies caused by the emission of metadata
@@ -720,7 +717,7 @@ protected void EnsureMetadataGenerated(NodeFactory factory)
             if (_metadataBlob != null)
                 return;
 
-            ComputeMetadata(factory, out _metadataBlob, out _typeMappings, out _methodMappings, out _fieldMappings, out _stackTraceMappings);
+            ComputeMetadata(factory, out _metadataBlob, out _typeMappings, out _methodMappings, out _methodHandleMap, out _fieldMappings, out _fieldHandleMap, out _stackTraceMappings);
         }
 
         void ICompilationRootProvider.AddCompilationRoots(IRootingServiceProvider rootProvider)
@@ -733,7 +730,9 @@ protected abstract void ComputeMetadata(NodeFactory factory,
                                                 out byte[] metadataBlob,
                                                 out List> typeMappings,
                                                 out List> methodMappings,
+                                                out Dictionary methodMetadataMappings,
                                                 out List> fieldMappings,
+                                                out Dictionary fieldMetadataMappings,
                                                 out List stackTraceMapping);
 
         protected void ComputeMetadata(
@@ -742,7 +741,9 @@ protected void ComputeMetadata(
             out byte[] metadataBlob,
             out List> typeMappings,
             out List> methodMappings,
+            out Dictionary methodMetadataMappings,
             out List> fieldMappings,
+            out Dictionary fieldMetadataMappings,
             out List stackTraceMapping) where TPolicy : struct, IMetadataPolicy
         {
             var transformed = MetadataTransform.Run(policy, GetCompilationModulesWithMetadata());
@@ -817,7 +818,9 @@ protected void ComputeMetadata(
 
             typeMappings = new List>();
             methodMappings = new List>();
+            methodMetadataMappings = new Dictionary();
             fieldMappings = new List>();
+            fieldMetadataMappings = new Dictionary();
             stackTraceMapping = new List();
 
             // Generate type definition mappings
@@ -837,8 +840,15 @@ record ??= transformed.GetTransformedTypeReference(definition);
                     typeMappings.Add(new MetadataMapping(definition, writer.GetRecordHandle(record)));
             }
 
+            foreach (var methodMapping in transformed.GetTransformedMethodDefinitions())
+                methodMetadataMappings[methodMapping.Key] = writer.GetRecordHandle(methodMapping.Value);
+
             foreach (var method in GetReflectableMethods())
             {
+                MetadataRecord record = transformed.GetTransformedMethodDefinition(method.GetTypicalMethodDefinition());
+                if (record == null)
+                    continue;
+
                 if (method.IsGenericMethodDefinition || method.OwningType.IsGenericDefinition)
                 {
                     // Generic definitions don't have runtime artifacts we would need to map to.
@@ -857,15 +867,18 @@ record ??= transformed.GetTransformedTypeReference(definition);
                 if ((GetMetadataCategory(method) & MetadataCategory.RuntimeMapping) == 0)
                     continue;
 
-                MetadataRecord record = transformed.GetTransformedMethodDefinition(method.GetTypicalMethodDefinition());
-
-                if (record != null)
-                    methodMappings.Add(new MetadataMapping(method, writer.GetRecordHandle(record)));
+                methodMappings.Add(new MetadataMapping(method, writer.GetRecordHandle(record)));
             }
 
             HashSet canonicalFields = new HashSet();
             foreach (var field in GetFieldsWithRuntimeMapping())
             {
+                Field record = transformed.GetTransformedFieldDefinition(field.GetTypicalFieldDefinition());
+                if (record == null)
+                    continue;
+
+                fieldMetadataMappings[field.GetTypicalFieldDefinition()] = writer.GetRecordHandle(record);
+
                 FieldDesc fieldToAdd = field;
                 TypeDesc canonOwningType = field.OwningType.ConvertToCanonForm(CanonicalFormKind.Specific);
                 if (canonOwningType.IsCanonicalSubtype(CanonicalFormKind.Any))
@@ -879,9 +892,7 @@ record ??= transformed.GetTransformedTypeReference(definition);
                     fieldToAdd = canonField;
                 }
 
-                Field record = transformed.GetTransformedFieldDefinition(fieldToAdd.GetTypicalFieldDefinition());
-                if (record != null)
-                    fieldMappings.Add(new MetadataMapping(fieldToAdd, writer.GetRecordHandle(record)));
+                fieldMappings.Add(new MetadataMapping(fieldToAdd, writer.GetRecordHandle(record)));
             }
 
             // Generate stack trace metadata mapping
@@ -981,12 +992,40 @@ public IEnumerable> GetMethodMapping(NodeFactory fac
             return _methodMappings;
         }
 
+        public int GetMetadataHandleForMethod(NodeFactory factory, MethodDesc method)
+        {
+            if (!CanGenerateMetadata(method))
+            {
+                // We can end up here with reflection disabled or multifile compilation.
+                // If we ever productize either, we'll need to do something different.
+                // Scenarios that currently need this won't work in these modes.
+                return 0;
+            }
+
+            EnsureMetadataGenerated(factory);
+            return _methodHandleMap[method];
+        }
+
         public IEnumerable> GetFieldMapping(NodeFactory factory)
         {
             EnsureMetadataGenerated(factory);
             return _fieldMappings;
         }
 
+        public int GetMetadataHandleForField(NodeFactory factory, FieldDesc field)
+        {
+            if (!CanGenerateMetadata(field))
+            {
+                // We can end up here with reflection disabled or multifile compilation.
+                // If we ever productize either, we'll need to do something different.
+                // Scenarios that currently need this won't work in these modes.
+                return 0;
+            }
+
+            EnsureMetadataGenerated(factory);
+            return _fieldHandleMap[field];
+        }
+
         public IEnumerable GetStackTraceMapping(NodeFactory factory)
         {
             EnsureMetadataGenerated(factory);
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MultiFileCompilationModuleGroup.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MultiFileCompilationModuleGroup.cs
index 031508b7cc5f..4e0cc67c824a 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MultiFileCompilationModuleGroup.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MultiFileCompilationModuleGroup.cs
@@ -97,7 +97,7 @@ public override bool ShouldProduceFullVTable(TypeDesc type)
 
         public override bool ShouldPromoteToFullType(TypeDesc type)
         {
-            return ShouldProduceFullVTable(type);
+            return ShouldProduceFullVTable(type) || type.IsGenericDefinition;
         }
 
         public override bool PresenceOfEETypeImpliesAllMethodsOnType(TypeDesc type)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs
index 4a23db2e36ba..5204eb587c61 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs
@@ -7,6 +7,7 @@
 using System.Diagnostics;
 using System.Buffers.Binary;
 using System.Numerics;
+using System.Reflection;
 using ILCompiler.DependencyAnalysis;
 using ILCompiler.DependencyAnalysisFramework;
 using Internal.TypeSystem;
@@ -49,6 +50,7 @@ internal sealed class ElfObjectWriter : UnixObjectWriter
         private static readonly ObjectNodeSection ArmUnwindTableSection = new ObjectNodeSection(".ARM.extab", SectionType.ReadOnly);
         private static readonly ObjectNodeSection ArmAttributesSection = new ObjectNodeSection(".ARM.attributes", SectionType.ReadOnly);
         private static readonly ObjectNodeSection ArmTextThunkSection = new ObjectNodeSection(".text.thunks", SectionType.Executable);
+        private static readonly ObjectNodeSection CommentSection = new ObjectNodeSection(".comment", SectionType.ReadOnly);
 
         public ElfObjectWriter(NodeFactory factory, ObjectWritingOptions options)
             : base(factory, options)
@@ -94,6 +96,11 @@ private protected override void CreateSection(ObjectNodeSection section, string
             {
                 type = SHT_ARM_ATTRIBUTES;
             }
+            else if (section == CommentSection)
+            {
+                type = SHT_PROGBITS;
+                flags = SHF_MERGE | SHF_STRINGS;
+            }
             else if (_machine == EM_ARM && section.Type == SectionType.UnwindData)
             {
                 type = SHT_ARM_EXIDX;
@@ -567,6 +574,9 @@ private void EmitRelocationsRiscV64(int sectionIndex, List r
 
         private protected override void EmitSectionsAndLayout()
         {
+            SectionWriter commentSectionWriter = GetOrCreateSection(CommentSection);
+            commentSectionWriter.WriteUtf8String($".NET: ilc {Assembly.GetExecutingAssembly().GetCustomAttribute().InformationalVersion}");
+
             if (_machine == EM_ARM)
             {
                 // Emit EABI attributes section
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ObjectWriter.cs
index 4f633f62ceb0..04a4012355fa 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ObjectWriter.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ObjectWriter.cs
@@ -134,15 +134,6 @@ private protected bool ShouldShareSymbol(ObjectNode node, ObjectNodeSection sect
             return true;
         }
 
-        private protected static ObjectNodeSection GetSharedSection(ObjectNodeSection section, string key)
-        {
-            string standardSectionPrefix = "";
-            if (section.IsStandardSection)
-                standardSectionPrefix = ".";
-
-            return new ObjectNodeSection(standardSectionPrefix + section.Name, section.Type, key);
-        }
-
         private unsafe void EmitOrResolveRelocation(
             int sectionIndex,
             long offset,
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/PreinitializationManager.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/PreinitializationManager.cs
index 58b3f1f0dd65..e8b5d1c7221b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/PreinitializationManager.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/PreinitializationManager.cs
@@ -17,6 +17,8 @@ public class PreinitializationManager
     {
         private readonly bool _supportsLazyCctors;
 
+        public ReadOnlyFieldPolicy ReadOnlyFieldPolicy => _preinitHashTable._readOnlyPolicy;
+
         public PreinitializationManager(TypeSystemContext context, CompilationModuleGroup compilationGroup, ILProvider ilprovider, TypePreinit.TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy, FlowAnnotations flowAnnotations)
         {
             _supportsLazyCctors = context.SystemModule.GetType("System.Runtime.CompilerServices", "ClassConstructorRunner", throwIfNotFound: false) != null;
@@ -139,7 +141,7 @@ private sealed class PreinitializationInfoHashtable : LockFreeReaderHashtable.CombinedDependencyListEntry>;
 using FlowAnnotations = ILLink.Shared.TrimAnalysis.FlowAnnotations;
@@ -41,6 +42,8 @@ public class TypePreinit
         private readonly Dictionary _fieldValues = new Dictionary();
         private readonly Dictionary _internedStrings = new Dictionary();
         private readonly Dictionary _internedTypes = new Dictionary();
+        private readonly Dictionary _nestedPreinitResults = new Dictionary();
+        private readonly Dictionary _rvaFieldDatas = new Dictionary();
 
         private TypePreinit(MetadataType owningType, CompilationModuleGroup compilationGroup, ILProvider ilProvider, TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy, FlowAnnotations flowAnnotations)
         {
@@ -57,7 +60,7 @@ private TypePreinit(MetadataType owningType, CompilationModuleGroup compilationG
                 if (!field.IsStatic || field.IsLiteral || field.IsThreadStatic || field.HasRva)
                     continue;
 
-               _fieldValues.Add(field, NewUninitializedLocationValue(field.FieldType));
+                _fieldValues.Add(field, NewUninitializedLocationValue(field.FieldType, field));
             }
         }
 
@@ -109,6 +112,47 @@ public static PreinitializationInfo ScanType(CompilationModuleGroup compilationG
             return new PreinitializationInfo(type, status.FailureReason);
         }
 
+        private bool TryGetNestedPreinitResult(MethodDesc callingMethod, MetadataType type, Stack recursionProtect, ref int instructionCounter, out NestedPreinitResult result)
+        {
+            if (!_nestedPreinitResults.TryGetValue(type, out result))
+            {
+                TypePreinit nestedPreinit = new TypePreinit(type, _compilationGroup, _ilProvider, _policy, _readOnlyPolicy, _flowAnnotations);
+                recursionProtect ??= new Stack();
+                recursionProtect.Push(callingMethod);
+
+                // Since we don't reset the instruction counter as we interpret the nested cctor,
+                // remember the instruction counter before we start interpreting so that we can subtract
+                // the instructions later when we convert object instances allocated in the nested
+                // cctor to foreign instances in the currently analyzed cctor.
+                // E.g. if the nested cctor allocates a new object at the beginning of the cctor,
+                // we should treat it as a ForeignTypeInstance with allocation site ID 0, not allocation
+                // site ID of `instructionCounter + 0`.
+                // We could also reset the counter, but we use the instruction counter as a complexity cutoff
+                // and resetting it would lead to unpredictable analysis durations.
+                int baseInstructionCounter = instructionCounter;
+                Status status = nestedPreinit.TryScanMethod(type.GetStaticConstructor(), null, recursionProtect, ref instructionCounter, out Value _);
+                if (!status.IsSuccessful)
+                {
+                    result = default;
+                    return false;
+                }
+                recursionProtect.Pop();
+
+                result = new NestedPreinitResult(nestedPreinit._fieldValues, baseInstructionCounter);
+
+                _nestedPreinitResults.Add(type, result);
+            }
+
+            return true;
+        }
+
+        private byte[] GetFieldRvaData(EcmaField field)
+        {
+            if (!_rvaFieldDatas.TryGetValue(field, out byte[] result))
+                _rvaFieldDatas.Add(field, result = field.GetFieldRvaData());
+            return result;
+        }
+
         private Status TryScanMethod(MethodDesc method, Value[] parameters, Stack recursionProtect, ref int instructionCounter, out Value returnValue)
         {
             MethodIL methodIL = _ilProvider.GetMethodIL(method);
@@ -151,7 +195,7 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack -1,
-                                ILOpcode.ldc_i4_s => (sbyte)reader.ReadILByte(),
-                                ILOpcode.ldc_i4 => (int)reader.ReadILUInt32(),
-                                _ => opcode - ILOpcode.ldc_i4_0,
-                            };
-                            stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32(value));
-                        }
-                        break;
+                            ILOpcode.ldc_i4_m1 => -1,
+                            ILOpcode.ldc_i4_s => (sbyte)reader.ReadILByte(),
+                            ILOpcode.ldc_i4 => (int)reader.ReadILUInt32(),
+                            _ => opcode - ILOpcode.ldc_i4_0,
+                        };
+                        stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32(value));
+                    }
+                    break;
 
                     case ILOpcode.ldc_i8:
                         stack.Push(StackValueKind.Int64, ValueTypeValue.FromInt64((long)reader.ReadILUInt64()));
@@ -214,48 +258,48 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack 0
-                                && (elementType.IsGCPointer
-                                || (elementType.IsValueType && ((DefType)elementType).ContainsGCPointers)))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "GC pointers");
-                            }
+                        const int MaximumInterpretedArraySize = 8192;
 
-                            if (elementCount < 0
-                                || elementCount > MaximumInterpretedArraySize)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Array out of bounds");
-                            }
+                        TypeDesc elementType = (TypeDesc)methodIL.GetObject(reader.ReadILToken());
+                        if (elementCount > 0
+                            && (elementType.IsGCPointer
+                            || (elementType.IsValueType && ((DefType)elementType).ContainsGCPointers)))
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "GC pointers");
+                        }
 
-                            if (elementType.RequiresAlign8())
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
-                            }
+                        if (elementCount < 0
+                            || elementCount > MaximumInterpretedArraySize)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Array out of bounds");
+                        }
 
-                            AllocationSite allocSite = new AllocationSite(_type, instructionCounter);
-                            stack.Push(new ArrayInstance(elementType.MakeArrayType(), elementCount, allocSite));
+                        if (elementType.RequiresAlign8())
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
                         }
-                        break;
+
+                        AllocationSite allocSite = new AllocationSite(_type, instructionCounter);
+                        stack.Push(new ArrayInstance(elementType.MakeArrayType(), elementCount, allocSite));
+                    }
+                    break;
 
                     case ILOpcode.dup:
                         if (stack.Count == 0)
@@ -266,68 +310,71 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack 0)
+                            || (!returnsVoid && stack.Count != 1))
                         {
-                            bool returnsVoid = methodIL.OwningMethod.Signature.ReturnType.IsVoid;
-                            if ((returnsVoid && stack.Count > 0)
-                                || (!returnsVoid && stack.Count != 1))
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
 
-                            if (!returnsVoid)
-                            {
-                                returnValue = stack.PopIntoLocation(methodIL.OwningMethod.Signature.ReturnType);
-                            }
-                            return Status.Success;
+                        if (!returnsVoid)
+                        {
+                            returnValue = stack.PopIntoLocation(methodIL.OwningMethod.Signature.ReturnType);
                         }
+                        return Status.Success;
+                    }
 
                     case ILOpcode.nop:
                     case ILOpcode.volatile_:
                         break;
 
                     case ILOpcode.stsfld:
+                    {
+                        FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
+                        if (!field.IsStatic || field.IsLiteral)
                         {
-                            FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
-                            if (!field.IsStatic || field.IsLiteral)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
 
-                            if (field.OwningType != _type)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Store into other static");
-                            }
+                        if (field.OwningType != _type)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Store into other static");
+                        }
 
-                            if (field.IsThreadStatic || field.HasRva)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported static");
-                            }
+                        if (field.IsThreadStatic || field.HasRva)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported static");
+                        }
 
-                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
-                            }
+                        if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                        }
 
-                            if (_fieldValues[field] is IAssignableValue assignableField)
+                        if (_fieldValues[field] is IAssignableValue assignableField)
+                        {
+                            if (!assignableField.TryAssign(stack.PopIntoLocation(field.FieldType)))
                             {
+<<<<<<< HEAD
                                 if (!assignableField.TryAssign(stack.PopIntoLocation(field.FieldType)))
                                 {
                                     return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported store");
@@ -341,96 +388,88 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack>>>>>> upstream-jun
                             }
                         }
-                        break;
+                        else
+                        {
+                            Value value = stack.PopIntoLocation(field.FieldType);
+                            if (value is IInternalModelingOnlyValue)
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Value with no external representation");
+                            _fieldValues[field] = value;
+                        }
+                    }
+                    break;
 
                     case ILOpcode.ldsfld:
+                    case ILOpcode.ldsflda:
+                    {
+                        FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
+                        if (!field.IsStatic || field.IsLiteral)
                         {
-                            FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
-                            if (!field.IsStatic || field.IsLiteral)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
-
-                            if (field.IsThreadStatic || field.HasRva)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported static");
-                            }
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
 
-                            if (field.OwningType == _type)
-                            {
-                                stack.PushFromLocation(field.FieldType, _fieldValues[field]);
-                            }
-                            else if (_readOnlyPolicy.IsReadOnly(field)
-                                && field.OwningType.HasStaticConstructor
-                                && _policy.CanPreinitialize(field.OwningType))
-                            {
-                                TypePreinit nestedPreinit = new TypePreinit((MetadataType)field.OwningType, _compilationGroup, _ilProvider, _policy, _readOnlyPolicy, _flowAnnotations);
-                                recursionProtect ??= new Stack();
-                                recursionProtect.Push(methodIL.OwningMethod);
-
-                                // Since we don't reset the instruction counter as we interpret the nested cctor,
-                                // remember the instruction counter before we start interpreting so that we can subtract
-                                // the instructions later when we convert object instances allocated in the nested
-                                // cctor to foreign instances in the currently analyzed cctor.
-                                // E.g. if the nested cctor allocates a new object at the beginning of the cctor,
-                                // we should treat it as a ForeignTypeInstance with allocation site ID 0, not allocation
-                                // site ID of `instructionCounter + 0`.
-                                // We could also reset the counter, but we use the instruction counter as a complexity cutoff
-                                // and resetting it would lead to unpredictable analysis durations.
-                                int baseInstructionCounter = instructionCounter;
-                                Status status = nestedPreinit.TryScanMethod(field.OwningType.GetStaticConstructor(), null, recursionProtect, ref instructionCounter, out Value _);
-                                if (!status.IsSuccessful)
-                                {
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Nested cctor failed to preinit");
-                                }
-                                recursionProtect.Pop();
-                                Value value = nestedPreinit._fieldValues[field];
-                                if (value is ValueTypeValue)
-                                    stack.PushFromLocation(field.FieldType, value);
-                                else if (value is ReferenceTypeValue referenceType)
-                                    stack.PushFromLocation(field.FieldType, referenceType.ToForeignInstance(baseInstructionCounter, this));
-                                else
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-                            else if (_readOnlyPolicy.IsReadOnly(field)
-                                && !field.OwningType.HasStaticConstructor)
-                            {
-                                // (Effectively) read only field but no static constructor to set it: the value is default-initialized.
-                                stack.PushFromLocation(field.FieldType, NewUninitializedLocationValue(field.FieldType));
-                            }
-                            else
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Load from other non-initonly static");
-                            }
+                        if (field.IsThreadStatic)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported static");
                         }
-                        break;
 
-                    case ILOpcode.ldsflda:
+                        if (opcode != ILOpcode.ldsfld
+                            && _flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
                         {
-                            FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
-                            if (!field.IsStatic || field.IsLiteral)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                        }
 
-                            if (field.OwningType != _type)
+                        Value fieldValue;
+                        if (field.HasRva)
+                        {
+                            if (!field.IsInitOnly
+                                || field.OwningType.HasStaticConstructor
+                                || field.GetTypicalFieldDefinition() is not EcmaField ecmaField)
                             {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Address of other static");
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported RVA static");
                             }
 
-                            if (field.IsThreadStatic || field.HasRva)
+                            fieldValue = new ValueTypeValue(GetFieldRvaData(ecmaField));
+                        }
+                        else if (field.OwningType == _type)
+                        {
+                            fieldValue = _fieldValues[field];
+                        }
+                        else if (_readOnlyPolicy.IsReadOnly(field)
+                            && field.OwningType.HasStaticConstructor
+                            && _policy.CanPreinitialize(field.OwningType))
+                        {
+                            if (!TryGetNestedPreinitResult(methodIL.OwningMethod, (MetadataType)field.OwningType, recursionProtect, ref instructionCounter, out NestedPreinitResult nestedPreinitResult))
                             {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported static");
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Nested cctor failed to preinit");
                             }
 
-                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
-                            }
+                            if (!nestedPreinitResult.TryGetFieldValue(this, field, out fieldValue))
+                                return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+                        else if (_readOnlyPolicy.IsReadOnly(field)
+                            && opcode != ILOpcode.ldsflda // We need to intern these for correctness in ldsfda scenarios
+                            && !field.OwningType.HasStaticConstructor)
+                        {
+                            // (Effectively) read only field but no static constructor to set it: the value is default-initialized.
+                            fieldValue = NewUninitializedLocationValue(field.FieldType, field);
+                        }
+                        else
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Load from other non-initonly static");
+                        }
 
-                            Value fieldValue = _fieldValues[field];
+                        if (opcode == ILOpcode.ldsfld)
+                        {
+                            stack.PushFromLocation(field.FieldType, fieldValue);
+                        }
+                        else
+                        {
+                            Debug.Assert(opcode == ILOpcode.ldsflda);
                             if (fieldValue == null || !fieldValue.TryCreateByRef(out Value byRefValue))
                             {
                                 return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported byref");
@@ -438,394 +477,336 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack= 0; i--)
-                            {
-                                methodParams[i] = stack.PopIntoLocation(GetArgType(method, i));
-                            }
+                        Value[] methodParams = new Value[numParams];
+                        for (int i = numParams - 1; i >= 0; i--)
+                        {
+                            methodParams[i] = stack.PopIntoLocation(GetArgType(method, i));
+                        }
 
-                            if (opcode == ILOpcode.callvirt)
-                            {
-                                // Only support non-virtual methods for now + we don't emulate NRE on null this
-                                if (!owningType.IsValueType && (method.IsVirtual || methodParams[0] == null))
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
+                        if (opcode == ILOpcode.callvirt)
+                        {
+                            // Only support non-virtual methods for now + we don't emulate NRE on null this
+                            if (!owningType.IsValueType && (method.IsVirtual || methodParams[0] == null))
+                                return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
 
-                            Value retVal;
-                            if (!method.IsIntrinsic || !TryHandleIntrinsicCall(method, methodParams, out retVal))
+                        Value retVal;
+                        if (!method.IsIntrinsic || !TryHandleIntrinsicCall(method, methodParams, out retVal))
+                        {
+                            recursionProtect ??= new Stack();
+                            recursionProtect.Push(methodIL.OwningMethod);
+                            Status callResult = TryScanMethod(method, methodParams, recursionProtect, ref instructionCounter, out retVal);
+                            if (!callResult.IsSuccessful)
                             {
-                                recursionProtect ??= new Stack();
-                                recursionProtect.Push(methodIL.OwningMethod);
-                                Status callResult = TryScanMethod(method, methodParams, recursionProtect, ref instructionCounter, out retVal);
-                                if (!callResult.IsSuccessful)
-                                {
-                                    recursionProtect.Pop();
-                                    return callResult;
-                                }
                                 recursionProtect.Pop();
+                                return callResult;
                             }
-
-                            if (!methodSig.ReturnType.IsVoid)
-                                stack.PushFromLocation(methodSig.ReturnType, retVal);
+                            recursionProtect.Pop();
                         }
-                        break;
+
+                        if (!methodSig.ReturnType.IsVoid)
+                            stack.PushFromLocation(methodSig.ReturnType, retVal);
+                    }
+                    break;
 
                     case ILOpcode.newobj:
-                        {
-                            MethodDesc ctor = (MethodDesc)methodIL.GetObject(reader.ReadILToken());
-                            MethodSignature ctorSig = ctor.Signature;
+                    {
+                        MethodDesc ctor = (MethodDesc)methodIL.GetObject(reader.ReadILToken());
+                        MethodSignature ctorSig = ctor.Signature;
 
-                            TypeDesc owningType = ctor.OwningType;
-                            if (!_compilationGroup.CanInline(methodIL.OwningMethod, ctor)
-                                || !_compilationGroup.ContainsType(owningType))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Cannot inline");
-                            }
+                        TypeDesc owningType = ctor.OwningType;
+                        if (!_compilationGroup.CanInline(methodIL.OwningMethod, ctor)
+                            || !_compilationGroup.ContainsType(owningType))
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Cannot inline");
+                        }
 
-                            if (owningType.HasStaticConstructor
-                                    && owningType != methodIL.OwningMethod.OwningType
-                                    && !((MetadataType)owningType).IsBeforeFieldInit)
-                            {
+                        if (owningType.HasStaticConstructor
+                                && owningType != methodIL.OwningMethod.OwningType
+                                && !((MetadataType)owningType).IsBeforeFieldInit)
+                        {
+                            // Static constructor needs to execute before we do the call. If we can preinitialize, consider it executed,
+                            // otherwise there might be side effects we'd miss by letting this through.
+                            if (!TryGetNestedPreinitResult(methodIL.OwningMethod, (MetadataType)owningType, recursionProtect, ref instructionCounter, out _))
                                 return Status.Fail(methodIL.OwningMethod, opcode, "Static constructor");
-                            }
+                        }
 
-                            if (!owningType.IsDefType)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Not a class or struct");
-                            }
+                        if (!owningType.IsDefType)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Not a class or struct");
+                        }
 
-                            if (owningType.HasFinalizer)
+                        if (owningType.HasFinalizer)
+                        {
+                            // We have a finalizer. There's still a small chance it has been nopped out
+                            // with a feature switch. Check for that.
+                            byte[] finalizerMethodILBytes = _ilProvider.GetMethodIL(owningType.GetFinalizer()).GetILBytes();
+                            if (finalizerMethodILBytes.Length != 1 || finalizerMethodILBytes[0] != (byte)ILOpcode.ret)
                             {
-                                // We have a finalizer. There's still a small chance it has been nopped out
-                                // with a feature switch. Check for that.
-                                byte[] finalizerMethodILBytes = _ilProvider.GetMethodIL(owningType.GetFinalizer()).GetILBytes();
-                                if (finalizerMethodILBytes.Length != 1 || finalizerMethodILBytes[0] != (byte)ILOpcode.ret)
-                                {
-                                    // Finalizer might have observable side effects
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Finalizable class");
-                                }
+                                // Finalizer might have observable side effects
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Finalizable class");
                             }
+                        }
 
-                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(ctor))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
-                            }
+                        if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(ctor))
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                        }
 
-                            if (owningType.RequiresAlign8())
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
-                            }
+                        if (owningType.RequiresAlign8())
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
+                        }
 
-                            Value[] ctorParameters = new Value[ctorSig.Length + 1];
-                            for (int i = ctorSig.Length - 1; i >= 0; i--)
-                            {
-                                ctorParameters[i + 1] = stack.PopIntoLocation(GetArgType(ctor, i + 1));
-                            }
+                        Value[] ctorParameters = new Value[ctorSig.Length + 1];
+                        for (int i = ctorSig.Length - 1; i >= 0; i--)
+                        {
+                            ctorParameters[i + 1] = stack.PopIntoLocation(GetArgType(ctor, i + 1));
+                        }
 
-                            AllocationSite allocSite = new AllocationSite(_type, instructionCounter);
+                        AllocationSite allocSite = new AllocationSite(_type, instructionCounter);
 
-                            if (!TryGetSpanElementType(owningType, isReadOnlySpan: true, out MetadataType readOnlySpanElementType)
-                                && !TryGetSpanElementType(owningType, isReadOnlySpan: false, out readOnlySpanElementType))
+                        Value instance;
+                        if (owningType.IsDelegate)
+                        {
+                            if (!(ctorParameters[2] is MethodPointerValue methodPointer))
                             {
-                                readOnlySpanElementType = null;
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Unverifiable delegate creation");
                             }
 
-                            Value instance;
-                            if (owningType.IsDelegate)
+                            ReferenceTypeValue firstParameter = null;
+                            if (ctorParameters[1] != null)
                             {
-                                if (!(ctorParameters[2] is MethodPointerValue methodPointer))
+                                firstParameter = ctorParameters[1] as ReferenceTypeValue;
+                                if (firstParameter == null)
                                 {
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Unverifiable delegate creation");
-                                }
-
-                                ReferenceTypeValue firstParameter = null;
-                                if (ctorParameters[1] != null)
-                                {
-                                    firstParameter = ctorParameters[1] as ReferenceTypeValue;
-                                    if (firstParameter == null)
-                                    {
-                                        ThrowHelper.ThrowInvalidProgramException();
-                                    }
-                                }
-
-                                MethodDesc pointedMethod = methodPointer.PointedToMethod;
-                                if ((firstParameter == null) != pointedMethod.Signature.IsStatic)
-                                {
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Open/closed static/instance delegate mismatch");
+                                    ThrowHelper.ThrowInvalidProgramException();
                                 }
+                            }
 
-                                if (firstParameter != null && pointedMethod.HasInstantiation)
-                                {
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Delegate with fat pointer");
-                                }
+                            MethodDesc pointedMethod = methodPointer.PointedToMethod;
+                            if ((firstParameter == null) != pointedMethod.Signature.IsStatic)
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Open/closed static/instance delegate mismatch");
+                            }
 
-                                instance = new DelegateInstance(owningType, pointedMethod, firstParameter, allocSite);
+                            if (firstParameter != null && pointedMethod.HasInstantiation)
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Delegate with fat pointer");
                             }
-                            else if (readOnlySpanElementType != null && ctorSig.Length == 2 && ctorSig[0].IsByRef
-                                && ctorSig[1].IsWellKnownType(WellKnownType.Int32))
+
+                            instance = new DelegateInstance(owningType, pointedMethod, firstParameter, allocSite);
+                        }
+                        else
+                        {
+                            if (owningType.IsValueType)
                             {
-                                int length = ctorParameters[2].AsInt32();
-                                if (ctorParameters[1] is not ByRefValue byref)
+                                instance = NewUninitializedLocationValue(owningType, fieldThatOwnsMemory: null);
+                                if (!instance.TryCreateByRef(out ctorParameters[0]))
                                 {
-                                    ThrowHelper.ThrowInvalidProgramException();
-                                    return default; // unreached
+                                    return Status.Fail(methodIL.OwningMethod, opcode, "Can't make `this`");
                                 }
-
-                                byte[] bytes = byref.PointedToBytes;
-                                int byteOffset = byref.PointedToOffset;
-                                int byteLength = length * readOnlySpanElementType.InstanceFieldSize.AsInt;
-
-                                if (bytes.Length - byteOffset < byteLength)
-                                    return Status.Fail(ctor, "Out of range memory access");
-
-                                instance = new ReadOnlySpanValue(readOnlySpanElementType, bytes, byteOffset, byteLength);
                             }
-                            else if (readOnlySpanElementType != null && ctorSig.Length == 1 && ctorSig[0].IsArray
-                                && ctorParameters[1] is ArrayInstance spanArrayInstance
-                                && spanArrayInstance.TryGetReadOnlySpan(out ReadOnlySpanValue arraySpan))
+                            else
                             {
-                                instance = arraySpan;
+                                instance = new ObjectInstance((DefType)owningType, allocSite);
+                                ctorParameters[0] = instance;
                             }
-                            else
+
+                            if (((DefType)owningType).ContainsGCPointers)
                             {
-                                if (owningType.IsValueType)
-                                {
-                                    instance = new ValueTypeValue(owningType);
-                                    bool byrefCreated = instance.TryCreateByRef(out ctorParameters[0]);
-                                    Debug.Assert(byrefCreated);
-                                }
-                                else
-                                {
-                                    instance = new ObjectInstance((DefType)owningType, allocSite);
-                                    ctorParameters[0] = instance;
-                                }
+                                // We don't want to end up with GC pointers in the frozen region
+                                // because write barriers can't handle that.
 
-                                if (((DefType)owningType).ContainsGCPointers)
+                                // We can make an exception for readonly fields.
+                                bool allGcPointersAreReadonly = true;
+                                TypeDesc currentType = owningType;
+                                do
                                 {
-                                    // We don't want to end up with GC pointers in the frozen region
-                                    // because write barriers can't handle that.
-
-                                    // We can make an exception for readonly fields.
-                                    bool allGcPointersAreReadonly = true;
-                                    TypeDesc currentType = owningType;
-                                    do
+                                    foreach (FieldDesc field in currentType.GetFields())
                                     {
-                                        foreach (FieldDesc field in currentType.GetFields())
-                                        {
-                                            if (field.IsStatic)
-                                                continue;
+                                        if (field.IsStatic)
+                                            continue;
 
-                                            TypeDesc fieldType = field.FieldType;
-                                            if (fieldType.IsGCPointer)
-                                            {
-                                                if (!_readOnlyPolicy.IsReadOnly(field))
-                                                {
-                                                    allGcPointersAreReadonly = false;
-                                                    break;
-                                                }
-                                            }
-                                            else if (fieldType.IsValueType && ((DefType)fieldType).ContainsGCPointers)
+                                        TypeDesc fieldType = field.FieldType;
+                                        if (fieldType.IsGCPointer)
+                                        {
+                                            if (!_readOnlyPolicy.IsReadOnly(field))
                                             {
                                                 allGcPointersAreReadonly = false;
                                                 break;
                                             }
                                         }
-                                    } while (allGcPointersAreReadonly && (currentType = currentType.BaseType) != null && !currentType.IsValueType);
-
-                                    if (!allGcPointersAreReadonly)
-                                        return Status.Fail(methodIL.OwningMethod, opcode, "GC pointers");
-                                }
+                                        else if (fieldType.IsValueType && ((DefType)fieldType).ContainsGCPointers)
+                                        {
+                                            allGcPointersAreReadonly = false;
+                                            break;
+                                        }
+                                    }
+                                } while (allGcPointersAreReadonly && (currentType = currentType.BaseType) != null && !currentType.IsValueType);
 
-                                recursionProtect ??= new Stack();
-                                recursionProtect.Push(methodIL.OwningMethod);
-                                Status ctorCallResult = TryScanMethod(ctor, ctorParameters, recursionProtect, ref instructionCounter, out _);
-                                if (!ctorCallResult.IsSuccessful)
-                                {
-                                    recursionProtect.Pop();
-                                    return ctorCallResult;
-                                }
+                                if (!allGcPointersAreReadonly)
+                                    return Status.Fail(methodIL.OwningMethod, opcode, "GC pointers");
+                            }
 
+                            recursionProtect ??= new Stack();
+                            recursionProtect.Push(methodIL.OwningMethod);
+                            Status ctorCallResult = TryScanMethod(ctor, ctorParameters, recursionProtect, ref instructionCounter, out _);
+                            if (!ctorCallResult.IsSuccessful)
+                            {
                                 recursionProtect.Pop();
+                                return ctorCallResult;
                             }
 
-                            stack.PushFromLocation(owningType, instance);
+                            recursionProtect.Pop();
                         }
-                        break;
+
+                        stack.PushFromLocation(owningType, instance);
+                    }
+                    break;
 
                     case ILOpcode.localloc:
+                    {
+                        StackEntry entry = stack.Pop();
+                        long size = entry.ValueKind switch
                         {
-                            // Localloc returns an unmanaged pointer to the allocated memory.
-                            // We can't model that in the interpreter memory model. However,
-                            // we can have a narrow path for a common pattern in Span construction:
-                            //
-                            // ldc.i4 X
-                            // localloc
-                            // ldc.i4 X
-                            // newobj instance void valuetype System.Span`1::.ctor(void*, int32)
-                            StackEntry entry = stack.Pop();
-                            long size = entry.ValueKind switch
-                            {
-                                StackValueKind.Int32 => entry.Value.AsInt32(),
-                                StackValueKind.NativeInt => (context.Target.PointerSize == 4)
-                                    ? entry.Value.AsInt32() : entry.Value.AsInt64(),
-                                _ => long.MaxValue
-                            };
+                            StackValueKind.Int32 => entry.Value.AsInt32(),
+                            StackValueKind.NativeInt => (context.Target.PointerSize == 4)
+                                ? entry.Value.AsInt32() : entry.Value.AsInt64(),
+                            _ => long.MaxValue
+                        };
 
-                            // Arbitrary limit for allocation size to prevent compiler OOM
-                            if (size < 0 || size > 8192)
-                                return Status.Fail(methodIL.OwningMethod, ILOpcode.localloc);
+                        // Arbitrary limit for allocation size to prevent compiler OOM
+                        if (size < 0 || size > 8192)
+                            return Status.Fail(methodIL.OwningMethod, ILOpcode.localloc);
 
-                            opcode = reader.ReadILOpcode();
-                            if (opcode < ILOpcode.ldc_i4_0 || opcode > ILOpcode.ldc_i4)
-                                return Status.Fail(methodIL.OwningMethod, ILOpcode.localloc);
+                        stack.Push(StackValueKind.NativeInt, new ByRefValue(new byte[size], pointedToOffset: 0));
+                    }
+                    break;
 
-                            int maybeSpanLength = opcode switch
-                            {
-                                ILOpcode.ldc_i4_s => (sbyte)reader.ReadILByte(),
-                                ILOpcode.ldc_i4 => (int)reader.ReadILUInt32(),
-                                _ => opcode - ILOpcode.ldc_i4_0,
-                            };
+                    case ILOpcode.stfld:
+                    {
+                        FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
 
-                            opcode = reader.ReadILOpcode();
-                            if (opcode != ILOpcode.newobj)
-                                return Status.Fail(methodIL.OwningMethod, ILOpcode.localloc);
+                        if (field.IsStatic)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Static field with stfld");
+                        }
 
-                            var ctorMethod = (MethodDesc)methodIL.GetObject(reader.ReadILToken());
-                            if (!TryGetSpanElementType(ctorMethod.OwningType, isReadOnlySpan: false, out MetadataType elementType)
-                                || ctorMethod.Signature.Length != 2
-                                || !ctorMethod.Signature[0].IsPointer
-                                || !ctorMethod.Signature[1].IsWellKnownType(WellKnownType.Int32)
-                                || maybeSpanLength * elementType.InstanceFieldSize.AsInt != size)
-                                return Status.Fail(methodIL.OwningMethod, ILOpcode.localloc);
+                        Value value = stack.PopIntoLocation(field.FieldType);
+                        StackEntry instance = stack.Pop();
 
-                            var instance = new ReadOnlySpanValue(elementType, new byte[size], index: 0, (int)size);
-                            stack.PushFromLocation(ctorMethod.OwningType, instance);
+                        if (field.FieldType.IsGCPointer && value != null)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Reference field");
                         }
-                        break;
 
-                    case ILOpcode.stfld:
+                        if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
                         {
-                            FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
-
-                            if (field.IsStatic)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Static field with stfld");
-                            }
-
-                            Value value = stack.PopIntoLocation(field.FieldType);
-                            StackEntry instance = stack.Pop();
-
-                            if (field.FieldType.IsGCPointer && value != null)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Reference field");
-                            }
-
-                            if (field.FieldType.IsByRef)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Byref field");
-                            }
-
-                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
-                            }
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                        }
 
-                            if (instance.Value is not IHasInstanceFields settableInstance
-                                || !settableInstance.TrySetField(field, value))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Not settable");
-                            }
+                        if (instance.Value is not IHasInstanceFields settableInstance
+                            || !settableInstance.TrySetField(field, value))
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Not settable");
                         }
-                        break;
+                    }
+                    break;
 
                     case ILOpcode.ldfld:
-                        {
-                            FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
+                    {
+                        FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
 
-                            if (field.FieldType.IsGCPointer
-                                || field.IsStatic)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
+                        if (field.FieldType.IsGCPointer
+                            || field.IsStatic)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
 
-                            StackEntry instance = stack.Pop();
+                        StackEntry instance = stack.Pop();
 
-                            var loadableInstance = instance.Value as IHasInstanceFields;
-                            if (loadableInstance == null)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
+                        var loadableInstance = instance.Value as IHasInstanceFields;
+                        if (loadableInstance == null)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
 
-                            Value fieldValue = loadableInstance.GetField(field);
+                        Value fieldValue = loadableInstance.GetField(field);
 
-                            stack.PushFromLocation(field.FieldType, fieldValue);
-                        }
-                        break;
+                        stack.PushFromLocation(field.FieldType, fieldValue);
+                    }
+                    break;
 
                     case ILOpcode.ldflda:
+                    {
+                        FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
+                        if (field.FieldType.IsGCPointer
+                            || field.IsStatic)
                         {
-                            FieldDesc field = (FieldDesc)methodIL.GetObject(reader.ReadILToken());
-                            if (field.FieldType.IsGCPointer
-                                || field.IsStatic)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-
-                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
-                            }
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
 
-                            StackEntry instance = stack.Pop();
+                        if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                        }
 
-                            var loadableInstance = instance.Value as IHasInstanceFields;
-                            if (loadableInstance == null)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
+                        StackEntry instance = stack.Pop();
 
-                            stack.Push(StackValueKind.ByRef, loadableInstance.GetFieldAddress(field));
+                        var loadableInstance = instance.Value as IHasInstanceFields;
+                        if (loadableInstance == null)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
                         }
-                        break;
+
+                        stack.Push(StackValueKind.ByRef, loadableInstance.GetFieldAddress(field));
+                    }
+                    break;
 
                     case ILOpcode.conv_i:
                     case ILOpcode.conv_u:
@@ -839,166 +820,159 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack= ILOpcode.ldind_i1 and <= ILOpcode.ldind_ref) or ILOpcode.ldobj))
-                            {
-                                // In the interpreter memory model, there's no conversion from a byref to an integer.
-                                // Roslyn however sometimes emits a sequence of conv_u followed by ldind and we can
-                                // have a narrow path to handle that one.
-                                //
-                                // For example:
-                                //
-                                // static unsafe U Read(T val) where T : unmanaged where U : unmanaged => *(U*)&val;
-                                stack.Push(popped);
-                                goto again;
+                        }
+                        else if (popped.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
+                        {
+                            long val = popped.Value.AsInt64();
+                            switch (opcode)
+                            {
+                                case ILOpcode.conv_u:
+                                case ILOpcode.conv_i:
+                                    stack.Push(StackValueKind.NativeInt,
+                                        context.Target.PointerSize == 8 ? ValueTypeValue.FromInt64(val) : ValueTypeValue.FromInt32((int)val));
+                                    break;
+                                case ILOpcode.conv_i1:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((sbyte)val));
+                                    break;
+                                case ILOpcode.conv_i2:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((short)val));
+                                    break;
+                                case ILOpcode.conv_i4:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((int)val));
+                                    break;
+                                case ILOpcode.conv_i8:
+                                    stack.Push(StackValueKind.Int64, ValueTypeValue.FromInt64(val));
+                                    break;
+                                case ILOpcode.conv_u1:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((byte)val));
+                                    break;
+                                case ILOpcode.conv_u2:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((ushort)val));
+                                    break;
+                                case ILOpcode.conv_u4:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((int)val));
+                                    break;
+                                case ILOpcode.conv_u8:
+                                    stack.Push(StackValueKind.Int64, ValueTypeValue.FromInt64(val));
+                                    break;
+                                case ILOpcode.conv_r4:
+                                    stack.Push(StackValueKind.Float, ValueTypeValue.FromDouble((float)val));
+                                    break;
+                                case ILOpcode.conv_r8:
+                                    stack.Push(StackValueKind.Float, ValueTypeValue.FromDouble((double)val));
+                                    break;
+                                default:
+                                    return Status.Fail(methodIL.OwningMethod, opcode);
                             }
-                            else
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+                        else if (popped.ValueKind == StackValueKind.Float)
+                        {
+                            double val = popped.Value.AsDouble();
+                            switch (opcode)
+                            {
+                                case ILOpcode.conv_i:
+                                    stack.Push(StackValueKind.NativeInt,
+                                        context.Target.PointerSize == 8 ? ValueTypeValue.FromInt64((long)val) : ValueTypeValue.FromInt32((int)val));
+                                    break;
+                                case ILOpcode.conv_u:
+                                    stack.Push(StackValueKind.NativeInt,
+                                        context.Target.PointerSize == 8 ? ValueTypeValue.FromInt64((long)(ulong)val) : ValueTypeValue.FromInt32((int)(uint)val));
+                                    break;
+                                case ILOpcode.conv_i1:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((sbyte)val));
+                                    break;
+                                case ILOpcode.conv_i2:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((short)val));
+                                    break;
+                                case ILOpcode.conv_i4:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((int)val));
+                                    break;
+                                case ILOpcode.conv_i8:
+                                    stack.Push(StackValueKind.Int64, ValueTypeValue.FromInt64((long)val));
+                                    break;
+                                case ILOpcode.conv_u1:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((byte)val));
+                                    break;
+                                case ILOpcode.conv_u2:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((ushort)val));
+                                    break;
+                                case ILOpcode.conv_u4:
+                                    stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((int)(uint)val));
+                                    break;
+                                case ILOpcode.conv_u8:
+                                    stack.Push(StackValueKind.Int64, ValueTypeValue.FromInt64((long)(ulong)val));
+                                    break;
+                                case ILOpcode.conv_r4:
+                                    stack.Push(StackValueKind.Float, ValueTypeValue.FromDouble((float)val));
+                                    break;
+                                case ILOpcode.conv_r8:
+                                    stack.Push(StackValueKind.Float, ValueTypeValue.FromDouble(val));
+                                    break;
+                                default:
+                                    return Status.Fail(methodIL.OwningMethod, opcode);
                             }
                         }
-                        break;
+                        else
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+                    }
+                    break;
 
                     case ILOpcode.ldarg_0:
                     case ILOpcode.ldarg_1:
@@ -1006,64 +980,64 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack reader.ReadILByte(),
-                                ILOpcode.ldarg => reader.ReadILUInt16(),
-                                _ => opcode - ILOpcode.ldarg_0,
-                            };
-                            stack.PushFromLocation(GetArgType(methodIL.OwningMethod, index), parameters[index]);
-                        }
-                        break;
+                            ILOpcode.ldarg_s => reader.ReadILByte(),
+                            ILOpcode.ldarg => reader.ReadILUInt16(),
+                            _ => opcode - ILOpcode.ldarg_0,
+                        };
+                        stack.PushFromLocation(GetArgType(methodIL.OwningMethod, index), parameters[index]);
+                    }
+                    break;
 
                     case ILOpcode.starg_s:
                     case ILOpcode.starg:
+                    {
+                        int index = opcode == ILOpcode.starg ? reader.ReadILUInt16() : reader.ReadILByte();
+                        TypeDesc argType = GetArgType(methodIL.OwningMethod, index);
+                        if (parameters[index] is IAssignableValue assignableParam)
                         {
-                            int index = opcode == ILOpcode.starg ? reader.ReadILUInt16() : reader.ReadILByte();
-                            TypeDesc argType = GetArgType(methodIL.OwningMethod, index);
-                            if (parameters[index] is IAssignableValue assignableParam)
+                            if (!assignableParam.TryAssign(stack.PopIntoLocation(argType)))
                             {
-                                if (!assignableParam.TryAssign(stack.PopIntoLocation(argType)))
-                                {
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported store");
-                                }
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported store");
                             }
-                            else
-                                parameters[index] = stack.PopIntoLocation(argType);
                         }
-                        break;
+                        else
+                            parameters[index] = stack.PopIntoLocation(argType);
+                    }
+                    break;
 
                     case ILOpcode.ldtoken:
+                    {
+                        var token = methodIL.GetObject(reader.ReadILToken());
+                        if (token is FieldDesc field)
                         {
-                            var token = methodIL.GetObject(reader.ReadILToken());
-                            if (token is FieldDesc field)
-                            {
-                                stack.Push(new StackEntry(StackValueKind.ValueType, new RuntimeFieldHandleValue(field)));
-                            }
-                            else if (token is TypeDesc type)
-                            {
-                                stack.Push(new StackEntry(StackValueKind.ValueType, new RuntimeTypeHandleValue(type)));
-                            }
-                            else
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
+                            stack.Push(new StackEntry(StackValueKind.ValueType, new RuntimeFieldHandleValue(field)));
                         }
-                        break;
+                        else if (token is TypeDesc type)
+                        {
+                            stack.Push(new StackEntry(StackValueKind.ValueType, new RuntimeTypeHandleValue(type)));
+                        }
+                        else
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+                    }
+                    break;
 
                     case ILOpcode.ldftn:
-                        {
-                            if (constrainedType != null)
-                                return Status.Fail(methodIL.OwningMethod, ILOpcode.constrained);
+                    {
+                        if (constrainedType != null)
+                            return Status.Fail(methodIL.OwningMethod, ILOpcode.constrained);
 
-                            var method = methodIL.GetObject(reader.ReadILToken()) as MethodDesc;
-                            if (method != null)
-                                stack.Push(StackValueKind.NativeInt, new MethodPointerValue(method));
-                            else
-                                ThrowHelper.ThrowInvalidProgramException();
-                        }
-                        break;
+                        var method = methodIL.GetObject(reader.ReadILToken()) as MethodDesc;
+                        if (method != null)
+                            stack.Push(StackValueKind.NativeInt, new MethodPointerValue(method));
+                        else
+                            ThrowHelper.ThrowInvalidProgramException();
+                    }
+                    break;
 
                     case ILOpcode.ldloc_0:
                     case ILOpcode.ldloc_1:
@@ -1071,22 +1045,22 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack reader.ReadILByte(),
-                                ILOpcode.ldloc => reader.ReadILUInt16(),
-                                _ => opcode - ILOpcode.ldloc_0,
-                            };
-
-                            if (index >= locals.Length)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            ILOpcode.ldloc_s => reader.ReadILByte(),
+                            ILOpcode.ldloc => reader.ReadILUInt16(),
+                            _ => opcode - ILOpcode.ldloc_0,
+                        };
 
-                            stack.PushFromLocation(localTypes[index].Type, locals[index]);
+                        if (index >= locals.Length)
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
                         }
-                        break;
+
+                        stack.PushFromLocation(localTypes[index].Type, locals[index]);
+                    }
+                    break;
 
                     case ILOpcode.stloc_0:
                     case ILOpcode.stloc_1:
@@ -1094,79 +1068,80 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack reader.ReadILByte(),
-                                ILOpcode.stloc => reader.ReadILUInt16(),
-                                _ => opcode - ILOpcode.stloc_0,
-                            };
+                            ILOpcode.stloc_s => reader.ReadILByte(),
+                            ILOpcode.stloc => reader.ReadILUInt16(),
+                            _ => opcode - ILOpcode.stloc_0,
+                        };
 
-                            if (index >= locals.Length)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                        if (index >= locals.Length)
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
 
-                            TypeDesc localType = localTypes[index].Type;
-                            if (locals[index] is IAssignableValue assignableLocal)
+                        TypeDesc localType = localTypes[index].Type;
+                        if (locals[index] is IAssignableValue assignableLocal)
+                        {
+                            if (!assignableLocal.TryAssign(stack.PopIntoLocation(localType)))
                             {
-                                if (!assignableLocal.TryAssign(stack.PopIntoLocation(localType)))
-                                {
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported store");
-                                }
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported store");
                             }
-                            else
-                                locals[index] = stack.PopIntoLocation(localType);
-
                         }
-                        break;
+                        else
+                            locals[index] = stack.PopIntoLocation(localType);
+
+                    }
+                    break;
 
                     case ILOpcode.ldarga_s:
                     case ILOpcode.ldarga:
                     case ILOpcode.ldloca_s:
                     case ILOpcode.ldloca:
+                    {
+                        int index = opcode switch
                         {
-                            int index = opcode switch
-                            {
-                                ILOpcode.ldloca_s or ILOpcode.ldarga_s => reader.ReadILByte(),
-                                ILOpcode.ldloca or ILOpcode.ldarga => reader.ReadILUInt16(),
-                                _ => throw new NotImplementedException(), // Unreachable
-                            };
+                            ILOpcode.ldloca_s or ILOpcode.ldarga_s => reader.ReadILByte(),
+                            ILOpcode.ldloca or ILOpcode.ldarga => reader.ReadILUInt16(),
+                            _ => throw new NotImplementedException(), // Unreachable
+                        };
 
-                            Value[] storage = opcode is ILOpcode.ldloca or ILOpcode.ldloca_s ? locals : parameters;
-                            if (index >= storage.Length)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                        Value[] storage = opcode is ILOpcode.ldloca or ILOpcode.ldloca_s ? locals : parameters;
+                        if (index >= storage.Length)
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
 
-                            Value localValue = storage[index];
-                            if (localValue == null || !localValue.TryCreateByRef(out Value byrefValue))
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-                            else
-                            {
-                                stack.Push(StackValueKind.ByRef, byrefValue);
-                            }
+                        Value localValue = storage[index];
+                        if (localValue == null || !localValue.TryCreateByRef(out Value byrefValue))
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
                         }
-                        break;
+                        else
+                        {
+                            stack.Push(StackValueKind.ByRef, byrefValue);
+                        }
+                    }
+                    break;
 
                     case ILOpcode.initobj:
+                    {
+                        StackEntry popped = stack.Pop();
+                        if (popped.ValueKind != StackValueKind.ByRef)
                         {
-                            StackEntry popped = stack.Pop();
-                            if (popped.ValueKind != StackValueKind.ByRef)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
 
-                            TypeDesc token = (TypeDesc)methodIL.GetObject(reader.ReadILToken());
-                            if (token.IsGCPointer || popped.Value is not ByRefValue byrefVal)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-                            byrefVal.Initialize(token.GetElementSize().AsInt);
+                        TypeDesc token = (TypeDesc)methodIL.GetObject(reader.ReadILToken());
+                        if (token.IsGCPointer
+                            || popped.Value is not ByRefValueBase byrefVal
+                            || !byrefVal.TryInitialize(token.GetElementSize().AsInt))
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
                         }
-                        break;
+                    }
+                    break;
 
                     case ILOpcode.br:
                     case ILOpcode.brfalse:
@@ -1194,261 +1169,262 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack= ILOpcode.br ?
+                            (int)reader.ReadILUInt32() :
+                            (sbyte)reader.ReadILByte();
+                        int target = reader.Offset + delta;
+                        if (target < 0
+                            || target > reader.Size)
                         {
-                            int delta = opcode >= ILOpcode.br ?
-                                (int)reader.ReadILUInt32() :
-                                (sbyte)reader.ReadILByte();
-                            int target = reader.Offset + delta;
-                            if (target < 0
-                                || target > reader.Size)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
 
-                            ILOpcode normalizedOpcode = opcode >= ILOpcode.br ?
-                                opcode - ILOpcode.br + ILOpcode.br_s:
-                                opcode;
+                        ILOpcode normalizedOpcode = opcode >= ILOpcode.br ?
+                            opcode - ILOpcode.br + ILOpcode.br_s :
+                            opcode;
+
+                        bool branchTaken;
+                        if (normalizedOpcode == ILOpcode.brtrue_s || normalizedOpcode == ILOpcode.brfalse_s)
+                        {
+                            StackEntry condition = stack.Pop();
+                            if (condition.ValueKind == StackValueKind.Int32 || (condition.ValueKind == StackValueKind.NativeInt && context.Target.PointerSize == 4))
+                                branchTaken = normalizedOpcode == ILOpcode.brfalse_s
+                                    ? condition.Value.AsInt32() == 0 : condition.Value.AsInt32() != 0;
+                            else if (condition.ValueKind == StackValueKind.Int64 || (condition.ValueKind == StackValueKind.NativeInt && context.Target.PointerSize == 8))
+                                branchTaken = normalizedOpcode == ILOpcode.brfalse_s
+                                    ? condition.Value.AsInt64() == 0 : condition.Value.AsInt64() != 0;
+                            else if (condition.ValueKind == StackValueKind.ObjRef)
+                                branchTaken = normalizedOpcode == ILOpcode.brfalse_s
+                                    ? condition.Value == null : condition.Value != null;
+                            else
+                                return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+                        else if (normalizedOpcode == ILOpcode.blt_s || normalizedOpcode == ILOpcode.bgt_s
+                            || normalizedOpcode == ILOpcode.bge_s || normalizedOpcode == ILOpcode.beq_s
+                            || normalizedOpcode == ILOpcode.ble_s || normalizedOpcode == ILOpcode.blt_un_s
+                            || normalizedOpcode == ILOpcode.ble_un_s || normalizedOpcode == ILOpcode.bge_un_s
+                            || normalizedOpcode == ILOpcode.bgt_un_s || normalizedOpcode == ILOpcode.bne_un_s)
+                        {
+                            StackEntry value2 = stack.Pop();
+                            StackEntry value1 = stack.Pop();
 
-                            bool branchTaken;
-                            if (normalizedOpcode == ILOpcode.brtrue_s || normalizedOpcode == ILOpcode.brfalse_s)
+                            if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32)
                             {
-                                StackEntry condition = stack.Pop();
-                                if (condition.ValueKind == StackValueKind.Int32 || (condition.ValueKind == StackValueKind.NativeInt && context.Target.PointerSize == 4))
-                                    branchTaken = normalizedOpcode == ILOpcode.brfalse_s
-                                        ? condition.Value.AsInt32() == 0 : condition.Value.AsInt32() != 0;
-                                else if (condition.ValueKind == StackValueKind.Int64 || (condition.ValueKind == StackValueKind.NativeInt && context.Target.PointerSize == 8))
-                                    branchTaken = normalizedOpcode == ILOpcode.brfalse_s
-                                        ? condition.Value.AsInt64() == 0 : condition.Value.AsInt64() != 0;
-                                else if (condition.ValueKind == StackValueKind.ObjRef)
-                                    branchTaken = normalizedOpcode == ILOpcode.brfalse_s
-                                        ? condition.Value == null : condition.Value != null;
-                                else
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
+                                branchTaken = normalizedOpcode switch
+                                {
+                                    ILOpcode.blt_s => value1.Value.AsInt32() < value2.Value.AsInt32(),
+                                    ILOpcode.blt_un_s => (uint)value1.Value.AsInt32() < (uint)value2.Value.AsInt32(),
+                                    ILOpcode.bgt_s => value1.Value.AsInt32() > value2.Value.AsInt32(),
+                                    ILOpcode.bgt_un_s => (uint)value1.Value.AsInt32() > (uint)value2.Value.AsInt32(),
+                                    ILOpcode.bge_s => value1.Value.AsInt32() >= value2.Value.AsInt32(),
+                                    ILOpcode.bge_un_s => (uint)value1.Value.AsInt32() >= (uint)value2.Value.AsInt32(),
+                                    ILOpcode.beq_s => value1.Value.AsInt32() == value2.Value.AsInt32(),
+                                    ILOpcode.bne_un_s => value1.Value.AsInt32() != value2.Value.AsInt32(),
+                                    ILOpcode.ble_s => value1.Value.AsInt32() <= value2.Value.AsInt32(),
+                                    ILOpcode.ble_un_s => (uint)value1.Value.AsInt32() <= (uint)value2.Value.AsInt32(),
+                                    _ => throw new NotImplementedException() // unreachable
+                                };
                             }
-                            else if (normalizedOpcode == ILOpcode.blt_s || normalizedOpcode == ILOpcode.bgt_s
-                                || normalizedOpcode == ILOpcode.bge_s || normalizedOpcode == ILOpcode.beq_s
-                                || normalizedOpcode == ILOpcode.ble_s || normalizedOpcode == ILOpcode.blt_un_s
-                                || normalizedOpcode == ILOpcode.ble_un_s || normalizedOpcode == ILOpcode.bge_un_s
-                                || normalizedOpcode == ILOpcode.bgt_un_s || normalizedOpcode == ILOpcode.bne_un_s)
+                            else if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
                             {
-                                StackEntry value2 = stack.Pop();
-                                StackEntry value1 = stack.Pop();
-
-                                if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32)
-                                {
-                                    branchTaken = normalizedOpcode switch
-                                    {
-                                        ILOpcode.blt_s => value1.Value.AsInt32() < value2.Value.AsInt32(),
-                                        ILOpcode.blt_un_s => (uint)value1.Value.AsInt32() < (uint)value2.Value.AsInt32(),
-                                        ILOpcode.bgt_s => value1.Value.AsInt32() > value2.Value.AsInt32(),
-                                        ILOpcode.bgt_un_s => (uint)value1.Value.AsInt32() > (uint)value2.Value.AsInt32(),
-                                        ILOpcode.bge_s => value1.Value.AsInt32() >= value2.Value.AsInt32(),
-                                        ILOpcode.bge_un_s => (uint)value1.Value.AsInt32() >= (uint)value2.Value.AsInt32(),
-                                        ILOpcode.beq_s => value1.Value.AsInt32() == value2.Value.AsInt32(),
-                                        ILOpcode.bne_un_s => value1.Value.AsInt32() != value2.Value.AsInt32(),
-                                        ILOpcode.ble_s => value1.Value.AsInt32() <= value2.Value.AsInt32(),
-                                        ILOpcode.ble_un_s => (uint)value1.Value.AsInt32() <= (uint)value2.Value.AsInt32(),
-                                        _ => throw new NotImplementedException() // unreachable
-                                    };
-                                }
-                                else if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
-                                {
-                                    branchTaken = normalizedOpcode switch
-                                    {
-                                        ILOpcode.blt_s => value1.Value.AsInt64() < value2.Value.AsInt64(),
-                                        ILOpcode.blt_un_s => (ulong)value1.Value.AsInt64() < (ulong)value2.Value.AsInt64(),
-                                        ILOpcode.bgt_s => value1.Value.AsInt64() > value2.Value.AsInt64(),
-                                        ILOpcode.bgt_un_s => (ulong)value1.Value.AsInt64() > (ulong)value2.Value.AsInt64(),
-                                        ILOpcode.bge_s => value1.Value.AsInt64() >= value2.Value.AsInt64(),
-                                        ILOpcode.bge_un_s => (ulong)value1.Value.AsInt64() >= (ulong)value2.Value.AsInt64(),
-                                        ILOpcode.beq_s => value1.Value.AsInt64() == value2.Value.AsInt64(),
-                                        ILOpcode.bne_un_s => value1.Value.AsInt64() != value2.Value.AsInt64(),
-                                        ILOpcode.ble_s => value1.Value.AsInt64() <= value2.Value.AsInt64(),
-                                        ILOpcode.ble_un_s => (ulong)value1.Value.AsInt64() <= (ulong)value2.Value.AsInt64(),
-                                        _ => throw new NotImplementedException() // unreachable
-                                    };
-                                }
-                                else if (value1.ValueKind == StackValueKind.Float && value2.ValueKind == StackValueKind.Float)
+                                branchTaken = normalizedOpcode switch
                                 {
-                                    branchTaken = normalizedOpcode switch
-                                    {
-                                        ILOpcode.blt_s => value1.Value.AsDouble() < value2.Value.AsDouble(),
-                                        ILOpcode.blt_un_s => !(value1.Value.AsDouble() >= value2.Value.AsDouble()),
-                                        ILOpcode.bgt_s => value1.Value.AsDouble() > value2.Value.AsDouble(),
-                                        ILOpcode.bgt_un_s => !(value1.Value.AsDouble() <= value2.Value.AsDouble()),
-                                        ILOpcode.bge_s => value1.Value.AsDouble() >= value2.Value.AsDouble(),
-                                        ILOpcode.bge_un_s => !(value1.Value.AsDouble() < value2.Value.AsDouble()),
-                                        ILOpcode.beq_s => value1.Value.AsDouble() == value2.Value.AsDouble(),
-                                        ILOpcode.bne_un_s => value1.Value.AsDouble() != value2.Value.AsDouble(),
-                                        ILOpcode.ble_s => value1.Value.AsDouble() <= value2.Value.AsDouble(),
-                                        ILOpcode.ble_un_s => !(value1.Value.AsDouble() > value2.Value.AsDouble()),
-                                        _ => throw new NotImplementedException() // unreachable
-                                    };
-                                }
-                                else
+                                    ILOpcode.blt_s => value1.Value.AsInt64() < value2.Value.AsInt64(),
+                                    ILOpcode.blt_un_s => (ulong)value1.Value.AsInt64() < (ulong)value2.Value.AsInt64(),
+                                    ILOpcode.bgt_s => value1.Value.AsInt64() > value2.Value.AsInt64(),
+                                    ILOpcode.bgt_un_s => (ulong)value1.Value.AsInt64() > (ulong)value2.Value.AsInt64(),
+                                    ILOpcode.bge_s => value1.Value.AsInt64() >= value2.Value.AsInt64(),
+                                    ILOpcode.bge_un_s => (ulong)value1.Value.AsInt64() >= (ulong)value2.Value.AsInt64(),
+                                    ILOpcode.beq_s => value1.Value.AsInt64() == value2.Value.AsInt64(),
+                                    ILOpcode.bne_un_s => value1.Value.AsInt64() != value2.Value.AsInt64(),
+                                    ILOpcode.ble_s => value1.Value.AsInt64() <= value2.Value.AsInt64(),
+                                    ILOpcode.ble_un_s => (ulong)value1.Value.AsInt64() <= (ulong)value2.Value.AsInt64(),
+                                    _ => throw new NotImplementedException() // unreachable
+                                };
+                            }
+                            else if (value1.ValueKind == StackValueKind.Float && value2.ValueKind == StackValueKind.Float)
+                            {
+                                branchTaken = normalizedOpcode switch
                                 {
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                                }
+                                    ILOpcode.blt_s => value1.Value.AsDouble() < value2.Value.AsDouble(),
+                                    ILOpcode.blt_un_s => !(value1.Value.AsDouble() >= value2.Value.AsDouble()),
+                                    ILOpcode.bgt_s => value1.Value.AsDouble() > value2.Value.AsDouble(),
+                                    ILOpcode.bgt_un_s => !(value1.Value.AsDouble() <= value2.Value.AsDouble()),
+                                    ILOpcode.bge_s => value1.Value.AsDouble() >= value2.Value.AsDouble(),
+                                    ILOpcode.bge_un_s => !(value1.Value.AsDouble() < value2.Value.AsDouble()),
+                                    ILOpcode.beq_s => value1.Value.AsDouble() == value2.Value.AsDouble(),
+                                    ILOpcode.bne_un_s => value1.Value.AsDouble() != value2.Value.AsDouble(),
+                                    ILOpcode.ble_s => value1.Value.AsDouble() <= value2.Value.AsDouble(),
+                                    ILOpcode.ble_un_s => !(value1.Value.AsDouble() > value2.Value.AsDouble()),
+                                    _ => throw new NotImplementedException() // unreachable
+                                };
                             }
                             else
                             {
-                                Debug.Assert(normalizedOpcode == ILOpcode.br_s);
-                                branchTaken = true;
+                                return Status.Fail(methodIL.OwningMethod, opcode);
                             }
+                        }
+                        else
+                        {
+                            Debug.Assert(normalizedOpcode == ILOpcode.br_s);
+                            branchTaken = true;
+                        }
 
-                            if (branchTaken)
-                            {
-                                reader.Seek(target);
-                            }
+                        if (branchTaken)
+                        {
+                            reader.Seek(target);
                         }
-                        break;
+                    }
+                    break;
 
                     case ILOpcode.switch_:
-                        {
-                            StackEntry val = stack.Pop();
-                            if (val.ValueKind is not StackValueKind.Int32)
-                                ThrowHelper.ThrowInvalidProgramException();
+                    {
+                        StackEntry val = stack.Pop();
+                        if (val.ValueKind is not StackValueKind.Int32)
+                            ThrowHelper.ThrowInvalidProgramException();
 
-                            uint target = (uint)val.Value.AsInt32();
+                        uint target = (uint)val.Value.AsInt32();
 
-                            uint count = reader.ReadILUInt32();
-                            int nextInstruction = reader.Offset + (int)(4 * count);
-                            if (target > count)
-                            {
-                                reader.Seek(nextInstruction);
-                            }
-                            else
-                            {
-                                reader.Seek(reader.Offset + (int)(4 * target));
-                                reader.Seek(nextInstruction + (int)reader.ReadILUInt32());
-                            }
+                        uint count = reader.ReadILUInt32();
+                        int nextInstruction = reader.Offset + (int)(4 * count);
+                        if (target > count)
+                        {
+                            reader.Seek(nextInstruction);
                         }
-                        break;
+                        else
+                        {
+                            reader.Seek(reader.Offset + (int)(4 * target));
+                            reader.Seek(nextInstruction + (int)reader.ReadILUInt32());
+                        }
+                    }
+                    break;
 
                     case ILOpcode.leave:
                     case ILOpcode.leave_s:
+                    {
+                        stack.Clear();
+
+                        // We assume no finally regions (would have to run them here)
+                        // This is validated before, but we're being paranoid.
+                        foreach (ILExceptionRegion ehRegion in ehRegions)
                         {
-                            stack.Clear();
+                            Debug.Assert(ehRegion.Kind != ILExceptionRegionKind.Finally);
+                        }
 
-                            // We assume no finally regions (would have to run them here)
-                            // This is validated before, but we're being paranoid.
-                            foreach (ILExceptionRegion ehRegion in ehRegions)
-                            {
-                                Debug.Assert(ehRegion.Kind != ILExceptionRegionKind.Finally);
-                            }
-
-                            int delta = opcode == ILOpcode.leave ?
-                                (int)reader.ReadILUInt32() :
-                                (sbyte)reader.ReadILByte();
-                            int target = reader.Offset + delta;
-                            if (target < 0
-                                || target > reader.Size)
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
-
-                            reader.Seek(target);
+                        int delta = opcode == ILOpcode.leave ?
+                            (int)reader.ReadILUInt32() :
+                            (sbyte)reader.ReadILByte();
+                        int target = reader.Offset + delta;
+                        if (target < 0
+                            || target > reader.Size)
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
                         }
-                        break;
+
+                        reader.Seek(target);
+                    }
+                    break;
 
                     case ILOpcode.clt:
                     case ILOpcode.clt_un:
                     case ILOpcode.cgt:
                     case ILOpcode.cgt_un:
-                        {
-                            StackEntry value1 = stack.Pop();
-                            StackEntry value2 = stack.Pop();
+                    {
+                        StackEntry value1 = stack.Pop();
+                        StackEntry value2 = stack.Pop();
 
-                            bool condition;
-                            if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32)
-                            {
-                                if (opcode == ILOpcode.cgt)
-                                    condition = value1.Value.AsInt32() < value2.Value.AsInt32();
-                                else if (opcode == ILOpcode.cgt_un)
-                                    condition = (uint)value1.Value.AsInt32() < (uint)value2.Value.AsInt32();
-                                else if (opcode == ILOpcode.clt)
-                                    condition = value1.Value.AsInt32() > value2.Value.AsInt32();
-                                else if (opcode == ILOpcode.clt_un)
-                                    condition = (uint)value1.Value.AsInt32() > (uint)value2.Value.AsInt32();
-                                else
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-                            else if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
-                            {
-                                if (opcode == ILOpcode.cgt)
-                                    condition = value1.Value.AsInt64() < value2.Value.AsInt64();
-                                else if (opcode == ILOpcode.cgt_un)
-                                    condition = (ulong)value1.Value.AsInt64() < (ulong)value2.Value.AsInt64();
-                                else if (opcode == ILOpcode.clt)
-                                    condition = value1.Value.AsInt64() > value2.Value.AsInt64();
-                                else if (opcode == ILOpcode.clt_un)
-                                    condition = (ulong)value1.Value.AsInt64() > (ulong)value2.Value.AsInt64();
-                                else
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-                            else if (value1.ValueKind == StackValueKind.Float && value2.ValueKind == StackValueKind.Float)
-                            {
-                                if (opcode == ILOpcode.cgt)
-                                    condition = value1.Value.AsDouble() < value2.Value.AsDouble();
-                                else if (opcode == ILOpcode.cgt_un)
-                                    condition = !(value1.Value.AsDouble() >= value2.Value.AsDouble());
-                                else if (opcode == ILOpcode.clt)
-                                    condition = value1.Value.AsDouble() > value2.Value.AsDouble();
-                                else if (opcode == ILOpcode.clt_un)
-                                    condition = !(value1.Value.AsDouble() <= value2.Value.AsDouble());
-                                else
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-                            else if (value1.ValueKind == StackValueKind.ObjRef && value2.ValueKind == StackValueKind.ObjRef)
-                            {
-                                if (opcode == ILOpcode.cgt_un)
-                                    condition = value1.Value == null && value2.Value != null;
-                                else
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
+                        bool condition;
+                        if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32)
+                        {
+                            if (opcode == ILOpcode.cgt)
+                                condition = value1.Value.AsInt32() < value2.Value.AsInt32();
+                            else if (opcode == ILOpcode.cgt_un)
+                                condition = (uint)value1.Value.AsInt32() < (uint)value2.Value.AsInt32();
+                            else if (opcode == ILOpcode.clt)
+                                condition = value1.Value.AsInt32() > value2.Value.AsInt32();
+                            else if (opcode == ILOpcode.clt_un)
+                                condition = (uint)value1.Value.AsInt32() > (uint)value2.Value.AsInt32();
                             else
-                            {
                                 return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-
-                            stack.Push(StackValueKind.Int32, condition
-                                    ? ValueTypeValue.FromInt32(1)
-                                    : ValueTypeValue.FromInt32(0));
                         }
-                        break;
-
-                    case ILOpcode.ceq:
+                        else if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
                         {
-                            StackEntry value1 = stack.Pop();
-                            StackEntry value2 = stack.Pop();
-
-                            if (value1.ValueKind == value2.ValueKind)
-                            {
-                                stack.Push(StackValueKind.Int32,
-                                    Value.Equals(value1.Value, value2.Value)
-                                    ? ValueTypeValue.FromInt32(1)
-                                    : ValueTypeValue.FromInt32(0));
-                            }
+                            if (opcode == ILOpcode.cgt)
+                                condition = value1.Value.AsInt64() < value2.Value.AsInt64();
+                            else if (opcode == ILOpcode.cgt_un)
+                                condition = (ulong)value1.Value.AsInt64() < (ulong)value2.Value.AsInt64();
+                            else if (opcode == ILOpcode.clt)
+                                condition = value1.Value.AsInt64() > value2.Value.AsInt64();
+                            else if (opcode == ILOpcode.clt_un)
+                                condition = (ulong)value1.Value.AsInt64() > (ulong)value2.Value.AsInt64();
                             else
-                            {
                                 return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
                         }
-                        break;
-
-                    case ILOpcode.neg:
+                        else if (value1.ValueKind == StackValueKind.Float && value2.ValueKind == StackValueKind.Float)
                         {
-                            StackEntry value = stack.Pop();
-                            if (value.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32)
-                                stack.Push(value.ValueKind, ValueTypeValue.FromInt32(-value.Value.AsInt32()));
-                            else if (value.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
-                                stack.Push(value.ValueKind, ValueTypeValue.FromInt64(-value.Value.AsInt64()));
-                            else if (value.ValueKind == StackValueKind.Float)
-                                stack.Push(value.ValueKind, ValueTypeValue.FromDouble(-value.Value.AsDouble()));
+                            if (opcode == ILOpcode.cgt)
+                                condition = value1.Value.AsDouble() < value2.Value.AsDouble();
+                            else if (opcode == ILOpcode.cgt_un)
+                                condition = !(value1.Value.AsDouble() >= value2.Value.AsDouble());
+                            else if (opcode == ILOpcode.clt)
+                                condition = value1.Value.AsDouble() > value2.Value.AsDouble();
+                            else if (opcode == ILOpcode.clt_un)
+                                condition = !(value1.Value.AsDouble() <= value2.Value.AsDouble());
                             else
                                 return Status.Fail(methodIL.OwningMethod, opcode);
                         }
-                        break;
+                        else if (value1.ValueKind == StackValueKind.ObjRef && value2.ValueKind == StackValueKind.ObjRef)
+                        {
+                            if (opcode == ILOpcode.cgt_un)
+                                condition = value1.Value == null && value2.Value != null;
+                            else
+                                return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+                        else
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+
+                        stack.Push(StackValueKind.Int32, condition
+                                ? ValueTypeValue.FromInt32(1)
+                                : ValueTypeValue.FromInt32(0));
+                    }
+                    break;
+
+                    case ILOpcode.ceq:
+                    {
+                        StackEntry value1 = stack.Pop();
+                        StackEntry value2 = stack.Pop();
+
+                        if (value1.ValueKind == value2.ValueKind
+                            && Value.TryCompareEquality(value1.Value, value2.Value, out bool compareResult))
+                        {
+                            stack.Push(StackValueKind.Int32,
+                                compareResult
+                                ? ValueTypeValue.FromInt32(1)
+                                : ValueTypeValue.FromInt32(0));
+                        }
+                        else
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+                    }
+                    break;
+
+                    case ILOpcode.neg:
+                    {
+                        StackEntry value = stack.Pop();
+                        if (value.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int32)
+                            stack.Push(value.ValueKind, ValueTypeValue.FromInt32(-value.Value.AsInt32()));
+                        else if (value.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
+                            stack.Push(value.ValueKind, ValueTypeValue.FromInt64(-value.Value.AsInt64()));
+                        else if (value.ValueKind == StackValueKind.Float)
+                            stack.Push(value.ValueKind, ValueTypeValue.FromDouble(-value.Value.AsDouble()));
+                        else
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                    }
+                    break;
 
                     case ILOpcode.or:
                     case ILOpcode.shl:
@@ -1460,133 +1436,135 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack value1.Value.AsInt32() | value2.Value.AsInt32(),
+                                ILOpcode.shl => value1.Value.AsInt32() << value2.Value.AsInt32(),
+                                ILOpcode.add => value1.Value.AsInt32() + value2.Value.AsInt32(),
+                                ILOpcode.sub => value1.Value.AsInt32() - value2.Value.AsInt32(),
+                                ILOpcode.and => value1.Value.AsInt32() & value2.Value.AsInt32(),
+                                ILOpcode.mul => value1.Value.AsInt32() * value2.Value.AsInt32(),
+                                ILOpcode.div => value1.Value.AsInt32() / value2.Value.AsInt32(),
+                                ILOpcode.div_un => (int)((uint)value1.Value.AsInt32() / (uint)value2.Value.AsInt32()),
+                                ILOpcode.rem => value1.Value.AsInt32() % value2.Value.AsInt32(),
+                                ILOpcode.rem_un => (int)((uint)value1.Value.AsInt32() % (uint)value2.Value.AsInt32()),
+                                _ => throw new NotImplementedException(), // unreachable
+                            };
 
-                                int result = opcode switch
-                                {
-                                    ILOpcode.or => value1.Value.AsInt32() | value2.Value.AsInt32(),
-                                    ILOpcode.shl => value1.Value.AsInt32() << value2.Value.AsInt32(),
-                                    ILOpcode.add => value1.Value.AsInt32() + value2.Value.AsInt32(),
-                                    ILOpcode.sub => value1.Value.AsInt32() - value2.Value.AsInt32(),
-                                    ILOpcode.and => value1.Value.AsInt32() & value2.Value.AsInt32(),
-                                    ILOpcode.mul => value1.Value.AsInt32() * value2.Value.AsInt32(),
-                                    ILOpcode.div => value1.Value.AsInt32() / value2.Value.AsInt32(),
-                                    ILOpcode.div_un => (int)((uint)value1.Value.AsInt32() / (uint)value2.Value.AsInt32()),
-                                    ILOpcode.rem => value1.Value.AsInt32() % value2.Value.AsInt32(),
-                                    ILOpcode.rem_un => (int)((uint)value1.Value.AsInt32() % (uint)value2.Value.AsInt32()),
-                                    _ => throw new NotImplementedException(), // unreachable
-                                };
+                            stack.Push(isNint ? StackValueKind.NativeInt : StackValueKind.Int32, ValueTypeValue.FromInt32(result));
+                        }
+                        else if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
+                        {
+                            if (isDivRem && value2.Value.AsInt64() == 0)
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Division by zero");
+
+                            long result = opcode switch
+                            {
+                                ILOpcode.or => value1.Value.AsInt64() | value2.Value.AsInt64(),
+                                ILOpcode.add => value1.Value.AsInt64() + value2.Value.AsInt64(),
+                                ILOpcode.sub => value1.Value.AsInt64() - value2.Value.AsInt64(),
+                                ILOpcode.and => value1.Value.AsInt64() & value2.Value.AsInt64(),
+                                ILOpcode.mul => value1.Value.AsInt64() * value2.Value.AsInt64(),
+                                ILOpcode.div => value1.Value.AsInt64() / value2.Value.AsInt64(),
+                                ILOpcode.div_un => (long)((ulong)value1.Value.AsInt64() / (ulong)value2.Value.AsInt64()),
+                                ILOpcode.rem => value1.Value.AsInt64() % value2.Value.AsInt64(),
+                                ILOpcode.rem_un => (long)((ulong)value1.Value.AsInt64() % (ulong)value2.Value.AsInt64()),
+                                _ => throw new NotImplementedException(), // unreachable
+                            };
 
-                                stack.Push(isNint ? StackValueKind.NativeInt : StackValueKind.Int32, ValueTypeValue.FromInt32(result));
-                            }
-                            else if (value1.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64 && value2.ValueKind.WithNormalizedNativeInt(context) == StackValueKind.Int64)
-                            {
-                                if (isDivRem && value2.Value.AsInt64() == 0)
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Division by zero");
+                            stack.Push(isNint ? StackValueKind.NativeInt : StackValueKind.Int64, ValueTypeValue.FromInt64(result));
+                        }
+                        else if (value1.ValueKind == StackValueKind.Float && value2.ValueKind == StackValueKind.Float)
+                        {
+                            if (isDivRem && value2.Value.AsDouble() == 0)
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Division by zero");
 
-                                long result = opcode switch
-                                {
-                                    ILOpcode.or => value1.Value.AsInt64() | value2.Value.AsInt64(),
-                                    ILOpcode.add => value1.Value.AsInt64() + value2.Value.AsInt64(),
-                                    ILOpcode.sub => value1.Value.AsInt64() - value2.Value.AsInt64(),
-                                    ILOpcode.and => value1.Value.AsInt64() & value2.Value.AsInt64(),
-                                    ILOpcode.mul => value1.Value.AsInt64() * value2.Value.AsInt64(),
-                                    ILOpcode.div => value1.Value.AsInt64() / value2.Value.AsInt64(),
-                                    ILOpcode.div_un => (long)((ulong)value1.Value.AsInt64() / (ulong)value2.Value.AsInt64()),
-                                    ILOpcode.rem => value1.Value.AsInt64() % value2.Value.AsInt64(),
-                                    ILOpcode.rem_un => (long)((ulong)value1.Value.AsInt64() % (ulong)value2.Value.AsInt64()),
-                                    _ => throw new NotImplementedException(), // unreachable
-                                };
+                            if (opcode == ILOpcode.or || opcode == ILOpcode.shl || opcode == ILOpcode.and || opcode == ILOpcode.div_un || opcode == ILOpcode.rem_un)
+                                ThrowHelper.ThrowInvalidProgramException();
 
-                                stack.Push(isNint ? StackValueKind.NativeInt : StackValueKind.Int64, ValueTypeValue.FromInt64(result));
-                            }
-                            else if (value1.ValueKind == StackValueKind.Float && value2.ValueKind == StackValueKind.Float)
+                            double result = opcode switch
                             {
-                                if (isDivRem && value2.Value.AsDouble() == 0)
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Division by zero");
-
-                                if (opcode == ILOpcode.or || opcode == ILOpcode.shl || opcode == ILOpcode.and || opcode == ILOpcode.div_un || opcode == ILOpcode.rem_un)
-                                    ThrowHelper.ThrowInvalidProgramException();
+                                ILOpcode.add => value1.Value.AsDouble() + value2.Value.AsDouble(),
+                                ILOpcode.sub => value1.Value.AsDouble() - value2.Value.AsDouble(),
+                                ILOpcode.mul => value1.Value.AsDouble() * value2.Value.AsDouble(),
+                                ILOpcode.div => value1.Value.AsDouble() / value2.Value.AsDouble(),
+                                ILOpcode.rem => value1.Value.AsDouble() % value2.Value.AsDouble(),
+                                _ => throw new NotImplementedException(), // unreachable
+                            };
 
-                                double result = opcode switch
-                                {
-                                    ILOpcode.add => value1.Value.AsDouble() + value2.Value.AsDouble(),
-                                    ILOpcode.sub => value1.Value.AsDouble() - value2.Value.AsDouble(),
-                                    ILOpcode.mul => value1.Value.AsDouble() * value2.Value.AsDouble(),
-                                    ILOpcode.div => value1.Value.AsDouble() / value2.Value.AsDouble(),
-                                    ILOpcode.rem => value1.Value.AsDouble() % value2.Value.AsDouble(),
-                                    _ => throw new NotImplementedException(), // unreachable
-                                };
+                            stack.Push(StackValueKind.Float, ValueTypeValue.FromDouble(result));
+                        }
+                        else if (value1.ValueKind == StackValueKind.Int64 && value2.ValueKind == StackValueKind.Int32
+                            && opcode == ILOpcode.shl)
+                        {
+                            long result = value1.Value.AsInt64() << value2.Value.AsInt32();
+                            stack.Push(isNint ? StackValueKind.NativeInt : StackValueKind.Int64, ValueTypeValue.FromInt64(result));
+                        }
+                        else if ((value1.ValueKind == StackValueKind.ByRef && value2.ValueKind != StackValueKind.ByRef)
+                            || (value2.ValueKind == StackValueKind.ByRef && value1.ValueKind != StackValueKind.ByRef))
+                        {
+                            if (opcode != ILOpcode.add)
+                                ThrowHelper.ThrowInvalidProgramException();
 
-                                stack.Push(StackValueKind.Float, ValueTypeValue.FromDouble(result));
-                            }
-                            else if (value1.ValueKind == StackValueKind.Int64 && value2.ValueKind == StackValueKind.Int32
-                                && opcode == ILOpcode.shl)
-                            {
-                                long result = value1.Value.AsInt64() << value2.Value.AsInt32();
-                                stack.Push(isNint ? StackValueKind.NativeInt : StackValueKind.Int64, ValueTypeValue.FromInt64(result));
-                            }
-                            else if ((value1.ValueKind == StackValueKind.ByRef && value2.ValueKind != StackValueKind.ByRef)
-                                || (value2.ValueKind == StackValueKind.ByRef && value1.ValueKind != StackValueKind.ByRef))
-                            {
-                                if (opcode != ILOpcode.add)
-                                    ThrowHelper.ThrowInvalidProgramException();
+                            StackEntry reference = value1.ValueKind == StackValueKind.ByRef ? value1 : value2;
+                            StackEntry addend = value1.ValueKind != StackValueKind.ByRef ? value1 : value2;
 
-                                StackEntry reference = value1.ValueKind == StackValueKind.ByRef ? value1 : value2;
-                                StackEntry addend = value1.ValueKind != StackValueKind.ByRef ? value1 : value2;
+                            if (addend.ValueKind is not StackValueKind.NativeInt and not StackValueKind.Int32)
+                                ThrowHelper.ThrowInvalidProgramException();
 
-                                if (addend.ValueKind is not StackValueKind.NativeInt and not StackValueKind.Int32)
-                                    ThrowHelper.ThrowInvalidProgramException();
+                            long addition = addend.ValueKind switch
+                            {
+                                StackValueKind.Int32 => addend.Value.AsInt32(),
+                                _ => context.Target.PointerSize == 8 ? addend.Value.AsInt64() : addend.Value.AsInt32()
+                            };
 
-                                long addition = addend.ValueKind switch
-                                {
-                                    StackValueKind.Int32 => addend.Value.AsInt32(),
-                                    _ => context.Target.PointerSize == 8 ? addend.Value.AsInt64() : addend.Value.AsInt32()
-                                };
+                            if (reference.Value is not ByRefValue previousByRef)
+                                return Status.Fail(methodIL.OwningMethod, "Byref math with unsupported byref");
 
-                                var previousByRef = (ByRefValue)reference.Value;
-                                if (addition > previousByRef.PointedToBytes.Length - previousByRef.PointedToOffset
-                                    || addition + previousByRef.PointedToOffset < 0)
-                                    return Status.Fail(methodIL.OwningMethod, "Out of range byref access");
+                            if (addition > previousByRef.PointedToBytes.Length - previousByRef.PointedToOffset
+                                || addition + previousByRef.PointedToOffset < 0)
+                                return Status.Fail(methodIL.OwningMethod, "Out of range byref access");
 
-                                stack.Push(StackValueKind.ByRef, new ByRefValue(previousByRef.PointedToBytes, (int)(previousByRef.PointedToOffset + addition)));
-                            }
-                            else
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
+                            stack.Push(StackValueKind.ByRef, new ByRefValue(previousByRef.PointedToBytes, (int)(previousByRef.PointedToOffset + addition)));
                         }
-                        break;
+                        else
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
+                    }
+                    break;
 
                     case ILOpcode.ldlen:
+                    {
+                        StackEntry popped = stack.Pop();
+                        if (popped.Value is ArrayInstance arrayInstance)
                         {
-                            StackEntry popped = stack.Pop();
-                            if (popped.Value is ArrayInstance arrayInstance)
-                            {
-                                stack.Push(StackValueKind.NativeInt, context.Target.PointerSize == 8 ? ValueTypeValue.FromInt64(arrayInstance.Length) : ValueTypeValue.FromInt32(arrayInstance.Length));
-                            }
-                            else if (popped.Value == null)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Null array");
-                            }
-                            else
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            stack.Push(StackValueKind.NativeInt, context.Target.PointerSize == 8 ? ValueTypeValue.FromInt64(arrayInstance.Length) : ValueTypeValue.FromInt32(arrayInstance.Length));
                         }
-                        break;
+                        else if (popped.Value == null)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Null array");
+                        }
+                        else
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
+                    }
+                    break;
 
                     case ILOpcode.stelem:
                     case ILOpcode.stelem_i:
@@ -1596,45 +1574,45 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack context.GetWellKnownType(WellKnownType.IntPtr),
-                                ILOpcode.stelem_i1 => context.GetWellKnownType(WellKnownType.SByte),
-                                ILOpcode.stelem_i2 => context.GetWellKnownType(WellKnownType.Int16),
-                                ILOpcode.stelem_i4 => context.GetWellKnownType(WellKnownType.Int32),
-                                ILOpcode.stelem_i8 => context.GetWellKnownType(WellKnownType.Int64),
-                                ILOpcode.stelem_r4 => context.GetWellKnownType(WellKnownType.Single),
-                                ILOpcode.stelem_r8 => context.GetWellKnownType(WellKnownType.Double),
-                                _ => (TypeDesc)methodIL.GetObject(reader.ReadILToken()),
-                            };
-
-                            if (elementType.IsGCPointer)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
+                            ILOpcode.stelem_i => context.GetWellKnownType(WellKnownType.IntPtr),
+                            ILOpcode.stelem_i1 => context.GetWellKnownType(WellKnownType.SByte),
+                            ILOpcode.stelem_i2 => context.GetWellKnownType(WellKnownType.Int16),
+                            ILOpcode.stelem_i4 => context.GetWellKnownType(WellKnownType.Int32),
+                            ILOpcode.stelem_i8 => context.GetWellKnownType(WellKnownType.Int64),
+                            ILOpcode.stelem_r4 => context.GetWellKnownType(WellKnownType.Single),
+                            ILOpcode.stelem_r8 => context.GetWellKnownType(WellKnownType.Double),
+                            _ => (TypeDesc)methodIL.GetObject(reader.ReadILToken()),
+                        };
+
+                        if (elementType.IsGCPointer)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
 
-                            Value value = stack.PopIntoLocation(elementType);
-                            if (!stack.TryPopIntValue(out int index))
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
-                            StackEntry array = stack.Pop();
-                            if (array.Value is ArrayInstance arrayInstance)
-                            {
-                                if (!arrayInstance.TryStoreElement(index, value))
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Out of range access");
-                            }
-                            else if (array.Value == null)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Null array");
-                            }
-                            else
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                        Value value = stack.PopIntoLocation(elementType);
+                        if (!stack.TryPopIntValue(out int index))
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
                         }
-                        break;
+                        StackEntry array = stack.Pop();
+                        if (array.Value is ArrayInstance arrayInstance)
+                        {
+                            if (!arrayInstance.TryStoreElement(index, value))
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Out of range access");
+                        }
+                        else if (array.Value == null)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Null array");
+                        }
+                        else
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
+                    }
+                    break;
 
                     case ILOpcode.ldelem:
                     case ILOpcode.ldelem_i:
@@ -1647,95 +1625,95 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack context.GetWellKnownType(WellKnownType.IntPtr),
-                                ILOpcode.ldelem_i1 => context.GetWellKnownType(WellKnownType.SByte),
-                                ILOpcode.ldelem_u1 => context.GetWellKnownType(WellKnownType.Byte),
-                                ILOpcode.ldelem_i2 => context.GetWellKnownType(WellKnownType.Int16),
-                                ILOpcode.ldelem_u2 => context.GetWellKnownType(WellKnownType.UInt16),
-                                ILOpcode.ldelem_i4 => context.GetWellKnownType(WellKnownType.Int32),
-                                ILOpcode.ldelem_u4 => context.GetWellKnownType(WellKnownType.UInt32),
-                                ILOpcode.ldelem_i8 => context.GetWellKnownType(WellKnownType.Int64),
-                                ILOpcode.ldelem_r4 => context.GetWellKnownType(WellKnownType.Single),
-                                ILOpcode.ldelem_r8 => context.GetWellKnownType(WellKnownType.Double),
-                                _ => (TypeDesc)methodIL.GetObject(reader.ReadILToken()),
-                            };
-
-                            if (elementType.IsGCPointer)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode);
-                            }
-
-                            if (!stack.TryPopIntValue(out int index))
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            ILOpcode.ldelem_i => context.GetWellKnownType(WellKnownType.IntPtr),
+                            ILOpcode.ldelem_i1 => context.GetWellKnownType(WellKnownType.SByte),
+                            ILOpcode.ldelem_u1 => context.GetWellKnownType(WellKnownType.Byte),
+                            ILOpcode.ldelem_i2 => context.GetWellKnownType(WellKnownType.Int16),
+                            ILOpcode.ldelem_u2 => context.GetWellKnownType(WellKnownType.UInt16),
+                            ILOpcode.ldelem_i4 => context.GetWellKnownType(WellKnownType.Int32),
+                            ILOpcode.ldelem_u4 => context.GetWellKnownType(WellKnownType.UInt32),
+                            ILOpcode.ldelem_i8 => context.GetWellKnownType(WellKnownType.Int64),
+                            ILOpcode.ldelem_r4 => context.GetWellKnownType(WellKnownType.Single),
+                            ILOpcode.ldelem_r8 => context.GetWellKnownType(WellKnownType.Double),
+                            _ => (TypeDesc)methodIL.GetObject(reader.ReadILToken()),
+                        };
+
+                        if (elementType.IsGCPointer)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+                        }
 
-                            StackEntry array = stack.Pop();
-                            if (array.Value is ArrayInstance arrayInstance)
-                            {
-                                if (!arrayInstance.TryLoadElement(index, out Value value))
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Out of range access");
+                        if (!stack.TryPopIntValue(out int index))
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
 
-                                stack.PushFromLocation(elementType, value);
-                            }
-                            else if (array.Value == null)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Null array");
-                            }
-                            else if (array.Value is ForeignTypeInstance)
-                            {
-                                return Status.Fail(methodIL.OwningMethod, opcode, "Foreign array");
-                            }
-                            else
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                        StackEntry array = stack.Pop();
+                        if (array.Value is ArrayInstance arrayInstance)
+                        {
+                            if (!arrayInstance.TryLoadElement(index, out Value value))
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Out of range access");
 
+                            stack.PushFromLocation(elementType, value);
                         }
-                        break;
+                        else if (array.Value == null)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Null array");
+                        }
+                        else if (array.Value is ForeignTypeInstance)
+                        {
+                            return Status.Fail(methodIL.OwningMethod, opcode, "Foreign array");
+                        }
+                        else
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
+
+                    }
+                    break;
 
                     case ILOpcode.box:
+                    {
+                        TypeDesc type = (TypeDesc)methodIL.GetObject(reader.ReadILToken());
+                        if (type.IsValueType)
                         {
-                            TypeDesc type = (TypeDesc)methodIL.GetObject(reader.ReadILToken());
-                            if (type.IsValueType)
-                            {
-                                if (type.IsNullable)
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
+                            if (type.IsNullable)
+                                return Status.Fail(methodIL.OwningMethod, opcode);
 
-                                if (type.RequiresAlign8())
-                                    return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
+                            if (type.RequiresAlign8())
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
 
-                                Value value = stack.PopIntoLocation(type);
-                                AllocationSite allocSite = new AllocationSite(_type, instructionCounter);
-                                if (!ObjectInstance.TryBox((DefType)type, value, allocSite, out ObjectInstance boxedResult))
-                                {
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                                }
+                            Value value = stack.PopIntoLocation(type);
+                            AllocationSite allocSite = new AllocationSite(_type, instructionCounter);
+                            if (!ObjectInstance.TryBox((DefType)type, value, allocSite, out ObjectInstance boxedResult))
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode);
+                            }
 
 
-                                stack.Push(boxedResult);
-                            }
+                            stack.Push(boxedResult);
                         }
-                        break;
+                    }
+                    break;
 
                     case ILOpcode.unbox_any:
+                    {
+                        TypeDesc type = (TypeDesc)methodIL.GetObject(reader.ReadILToken());
+                        StackEntry entry = stack.Pop();
+                        if (entry.Value is ObjectInstance objInst
+                            && objInst.TryUnboxAny(type, out Value unboxed))
                         {
-                            TypeDesc type = (TypeDesc)methodIL.GetObject(reader.ReadILToken());
-                            StackEntry entry = stack.Pop();
-                            if (entry.Value is ObjectInstance objInst
-                                && objInst.TryUnboxAny(type, out Value unboxed))
-                            {
-                                stack.PushFromLocation(type, unboxed);
-                            }
-                            else
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                            stack.PushFromLocation(type, unboxed);
                         }
-                        break;
+                        else
+                        {
+                            ThrowHelper.ThrowInvalidProgramException();
+                        }
+                    }
+                    break;
 
                     case ILOpcode.ldobj:
                     case ILOpcode.ldind_i1:
@@ -1745,114 +1723,77 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack ILOpcode.ldind_i1,
-                                    TypeFlags.Boolean or TypeFlags.Byte => ILOpcode.ldind_u1,
-                                    TypeFlags.Int16 => ILOpcode.ldind_i2,
-                                    TypeFlags.Char or TypeFlags.UInt16 => ILOpcode.ldind_u2,
-                                    TypeFlags.Int32 => ILOpcode.ldind_i4,
-                                    TypeFlags.UInt32 => ILOpcode.ldind_u4,
-                                    TypeFlags.Int64 or TypeFlags.UInt64 => ILOpcode.ldind_i8,
-                                    TypeFlags.Single => ILOpcode.ldind_r4,
-                                    TypeFlags.Double => ILOpcode.ldind_r8,
-                                    _ => ILOpcode.ldobj,
-                                };
-
-                                if (opcode == ILOpcode.ldobj)
-                                {
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                                }
-                            }
+                            ILOpcode.ldind_i1 => context.GetWellKnownType(WellKnownType.SByte),
+                            ILOpcode.ldind_u1 => context.GetWellKnownType(WellKnownType.Byte),
+                            ILOpcode.ldind_i2 => context.GetWellKnownType(WellKnownType.Int16),
+                            ILOpcode.ldind_u2 => context.GetWellKnownType(WellKnownType.UInt16),
+                            ILOpcode.ldind_i4 => context.GetWellKnownType(WellKnownType.Int32),
+                            ILOpcode.ldind_u4 => context.GetWellKnownType(WellKnownType.UInt32),
+                            ILOpcode.ldind_i8 => context.GetWellKnownType(WellKnownType.Int64),
+                            _ /* ldobj */ => (TypeDesc)methodIL.GetObject(reader.ReadILToken()),
+                        };
+
+                        StackEntry entry = stack.Pop();
+                        if (entry.ValueKind != StackValueKind.ByRef && entry.ValueKind != StackValueKind.NativeInt)
+                            ThrowHelper.ThrowInvalidProgramException();
 
-                            StackEntry entry = stack.Pop();
-                            if (entry.Value is ByRefValue byRefVal)
-                            {
-                                switch (opcode)
-                                {
-                                    case ILOpcode.ldind_i1:
-                                        stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32(byRefVal.DereferenceAsSByte()));
-                                        break;
-                                    case ILOpcode.ldind_u1:
-                                        stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((byte)byRefVal.DereferenceAsSByte()));
-                                        break;
-                                    case ILOpcode.ldind_i2:
-                                        stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32(byRefVal.DereferenceAsInt16()));
-                                        break;
-                                    case ILOpcode.ldind_u2:
-                                        stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32((ushort)byRefVal.DereferenceAsInt16()));
-                                        break;
-                                    case ILOpcode.ldind_i4:
-                                    case ILOpcode.ldind_u4:
-                                        stack.Push(StackValueKind.Int32, ValueTypeValue.FromInt32(byRefVal.DereferenceAsInt32()));
-                                        break;
-                                    case ILOpcode.ldind_i8:
-                                        stack.Push(StackValueKind.Int64, ValueTypeValue.FromInt64(byRefVal.DereferenceAsInt64()));
-                                        break;
-                                    case ILOpcode.ldind_r4:
-                                        stack.Push(StackValueKind.Float, ValueTypeValue.FromDouble(byRefVal.DereferenceAsSingle()));
-                                        break;
-                                    case ILOpcode.ldind_r8:
-                                        stack.Push(StackValueKind.Float, ValueTypeValue.FromDouble(byRefVal.DereferenceAsDouble()));
-                                        break;
-                                }
-                            }
-                            else
-                            {
-                                ThrowHelper.ThrowInvalidProgramException();
-                            }
+                        if (entry.Value is ByRefValueBase byRefVal
+                            && byRefVal.TryLoad(type, out Value dereferenced))
+                        {
+                            stack.PushFromLocation(type, dereferenced);
                         }
-                        break;
+                        else
+                        {
+                            return Status.Fail(methodIL.OwningMethod, "Ldind from unsupported byref");
+                        }
+                    }
+                    break;
 
                     case ILOpcode.stobj:
+                    case ILOpcode.stind_i:
                     case ILOpcode.stind_i1:
                     case ILOpcode.stind_i2:
                     case ILOpcode.stind_i4:
                     case ILOpcode.stind_i8:
+                    {
+                        if (opcode == ILOpcode.stobj)
                         {
-                            if (opcode == ILOpcode.stobj)
-                            {
-                                TypeDesc type = methodIL.GetObject(reader.ReadILToken()) as TypeDesc;
-                                opcode = type.Category switch
-                                {
-                                    TypeFlags.SByte or TypeFlags.Boolean or TypeFlags.Byte => ILOpcode.stind_i1,
-                                    TypeFlags.Int16 or TypeFlags.Char or TypeFlags.UInt16 => ILOpcode.stind_i2,
-                                    TypeFlags.Int32 or TypeFlags.UInt32 => ILOpcode.stind_i4,
-                                    TypeFlags.Int64 or TypeFlags.UInt64 => ILOpcode.stind_i8,
-                                    _ => ILOpcode.stobj,
-                                };
-
-                                if (opcode == ILOpcode.stobj)
-                                {
-                                    return Status.Fail(methodIL.OwningMethod, opcode);
-                                }
-                            }
-
-                            Value val = opcode switch
-                            {
-                                ILOpcode.stind_i1 => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.Byte)),
-                                ILOpcode.stind_i2 => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.UInt16)),
-                                ILOpcode.stind_i4 => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.UInt32)),
-                                ILOpcode.stind_i8 => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.UInt64)),
-                                _ => throw new NotImplementedException()
+                            TypeDesc type = methodIL.GetObject(reader.ReadILToken()) as TypeDesc;
+                            opcode = type.Category switch
+                            {
+                                TypeFlags.SByte or TypeFlags.Boolean or TypeFlags.Byte => ILOpcode.stind_i1,
+                                TypeFlags.Int16 or TypeFlags.Char or TypeFlags.UInt16 => ILOpcode.stind_i2,
+                                TypeFlags.Int32 or TypeFlags.UInt32 => ILOpcode.stind_i4,
+                                TypeFlags.Int64 or TypeFlags.UInt64 => ILOpcode.stind_i8,
+                                TypeFlags.IntPtr or TypeFlags.UIntPtr => ILOpcode.stind_i,
+                                _ => ILOpcode.stobj,
                             };
+                        }
 
-                            StackEntry location = stack.Pop();
-                            if (location.ValueKind != StackValueKind.ByRef)
-                                ThrowHelper.ThrowInvalidProgramException();
+                        Value val = opcode switch
+                        {
+                            ILOpcode.stind_i1 => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.Byte)),
+                            ILOpcode.stind_i2 => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.UInt16)),
+                            ILOpcode.stind_i4 => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.UInt32)),
+                            ILOpcode.stind_i8 => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.UInt64)),
+                            ILOpcode.stind_i => stack.PopIntoLocation(context.GetWellKnownType(WellKnownType.UIntPtr)),
+                            _ => stack.Pop().Value
+                        };
+
+                        StackEntry location = stack.Pop();
+                        if (location.ValueKind != StackValueKind.ByRef && location.ValueKind != StackValueKind.NativeInt)
+                            ThrowHelper.ThrowInvalidProgramException();
 
-                            byte[] dest = ((ByRefValue)location.Value).PointedToBytes;
-                            int destOffset = ((ByRefValue)location.Value).PointedToOffset;
-                            byte[] src = ((ValueTypeValue)val).InstanceBytes;
-                            if (destOffset + src.Length > dest.Length)
-                                return Status.Fail(methodIL.OwningMethod, "Out of bound access");
-                            Array.Copy(src, 0, dest, destOffset, src.Length);
-                        }
-                        break;
+                        if (location.Value is not ByRefValueBase destValue)
+                            return Status.Fail(methodIL.OwningMethod, "Stind into usupported byref");
+
+                        if (!destValue.TryStore(val))
+                            return Status.Fail(methodIL.OwningMethod, "Byref doesn't support storing value");
+                    }
+                    break;
 
                     case ILOpcode.constrained:
                         constrainedType = methodIL.GetObject(reader.ReadILToken()) as TypeDesc;
@@ -1863,26 +1804,28 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack addressValue.PointedToBytes.Length - addressValue.PointedToOffset
-                                || sizeBytes > int.MaxValue /* paranoid check that cast to int is legit */)
-                                return Status.Fail(methodIL.OwningMethod, opcode);
+                        if (addr.Value is not ByRefValue addressValue)
+                            return Status.Fail(methodIL.OwningMethod, "initblk of unsupported byref");
 
-                            Array.Fill(addressValue.PointedToBytes, (byte)value.Value.AsInt32(), addressValue.PointedToOffset, (int)sizeBytes);
-                        }
-                        break;
+                        if (sizeBytes > addressValue.PointedToBytes.Length - addressValue.PointedToOffset
+                            || sizeBytes > int.MaxValue /* paranoid check that cast to int is legit */)
+                            return Status.Fail(methodIL.OwningMethod, opcode);
+
+                        Array.Fill(addressValue.PointedToBytes, (byte)value.Value.AsInt32(), addressValue.PointedToOffset, (int)sizeBytes);
+                    }
+                    break;
 
                     default:
                         return Status.Fail(methodIL.OwningMethod, opcode);
@@ -1909,7 +1852,7 @@ private static bool TryGetSpanElementType(TypeDesc type, bool isReadOnlySpan, ou
             return false;
         }
 
-        private static BaseValueTypeValue NewUninitializedLocationValue(TypeDesc locationType)
+        private static BaseValueTypeValue NewUninitializedLocationValue(TypeDesc locationType, FieldDesc fieldThatOwnsMemory)
         {
             if (locationType.IsGCPointer || locationType.IsByRef)
             {
@@ -1917,11 +1860,19 @@ private static BaseValueTypeValue NewUninitializedLocationValue(TypeDesc locatio
             }
             else if (TryGetSpanElementType(locationType, isReadOnlySpan: true, out MetadataType readOnlySpanElementType))
             {
-                return new ReadOnlySpanValue(readOnlySpanElementType, Array.Empty(), 0, 0);
+                return new SpanValue(readOnlySpanElementType, Array.Empty(), 0, 0);
             }
             else if (TryGetSpanElementType(locationType, isReadOnlySpan: false, out MetadataType spanElementType))
             {
-                return new ReadOnlySpanValue(spanElementType, Array.Empty(), 0, 0);
+                return new SpanValue(spanElementType, Array.Empty(), 0, 0);
+            }
+            else if (VTableLikeStructValue.IsCompatible(locationType))
+            {
+                return new VTableLikeStructValue((MetadataType)locationType, fieldThatOwnsMemory);
+            }
+            else if (ComInterfaceEntryArrayValue.IsCompatible(locationType, out TypeDesc comInterfaceEntryType))
+            {
+                return new ComInterfaceEntryArrayValue(locationType, comInterfaceEntryType);
             }
             else
             {
@@ -1963,41 +1914,51 @@ private bool TryHandleIntrinsicCall(MethodDesc method, Value[] parameters, out V
                         byte[] rvaData = Internal.TypeSystem.Ecma.EcmaFieldExtensions.GetFieldRvaData(createSpanEcmaField);
                         if (rvaData.Length % elementSize != 0)
                             return false;
-                        retVal = new ReadOnlySpanValue(elementType, rvaData, 0, rvaData.Length);
+                        retVal = new SpanValue(elementType, rvaData, 0, rvaData.Length);
                         return true;
                     }
                     return false;
-                case "get_Item":
-                    if (method.OwningType is MetadataType readonlySpanType
-                        && readonlySpanType.Name == "ReadOnlySpan`1" && readonlySpanType.Namespace == "System"
-                        && parameters[0] is ReadOnlySpanReferenceValue spanRef
-                        && parameters[1] is ValueTypeValue spanIndex)
-                    {
-                        return spanRef.TryAccessElement(spanIndex.AsInt32(), out retVal);
-                    }
-                    return false;
                 case "GetTypeFromHandle" when IsSystemType(method.OwningType)
                         && parameters[0] is RuntimeTypeHandleValue typeHandle:
+                {
+                    if (!_internedTypes.TryGetValue(typeHandle.Type, out RuntimeTypeValue runtimeType))
                     {
-                        if (!_internedTypes.TryGetValue(typeHandle.Type, out RuntimeTypeValue runtimeType))
-                        {
-                            _internedTypes.Add(typeHandle.Type, runtimeType = new RuntimeTypeValue(typeHandle.Type));
-                        }
-                        retVal = runtimeType;
-                        return true;
+                        _internedTypes.Add(typeHandle.Type, runtimeType = new RuntimeTypeValue(typeHandle.Type));
                     }
+                    retVal = runtimeType;
+                    return true;
+                }
                 case "get_IsValueType" when IsSystemType(method.OwningType)
                         && parameters[0] is RuntimeTypeValue typeToCheckForValueType:
-                    {
-                        retVal = ValueTypeValue.FromSByte(typeToCheckForValueType.TypeRepresented.IsValueType ? (sbyte)1 : (sbyte)0);
-                        return true;
-                    }
+                {
+                    retVal = ValueTypeValue.FromSByte(typeToCheckForValueType.TypeRepresented.IsValueType ? (sbyte)1 : (sbyte)0);
+                    return true;
+                }
                 case "op_Equality" when IsSystemType(method.OwningType)
                         && (parameters[0] is RuntimeTypeValue || parameters[1] is RuntimeTypeValue):
-                    {
-                        retVal = ValueTypeValue.FromSByte(parameters[0] == parameters[1] ? (sbyte)1 : (sbyte)0);
-                        return true;
-                    }
+                {
+                    retVal = ValueTypeValue.FromSByte(parameters[0] == parameters[1] ? (sbyte)1 : (sbyte)0);
+                    return true;
+                }
+                case "IsReferenceOrContainsReferences" when method.Instantiation.Length == 1
+                        && method.OwningType is MetadataType isReferenceOrContainsReferencesType
+                        && isReferenceOrContainsReferencesType.Name == "RuntimeHelpers" && isReferenceOrContainsReferencesType.Namespace == "System.Runtime.CompilerServices"
+                        && isReferenceOrContainsReferencesType.Module == method.Context.SystemModule:
+                {
+                    bool result = method.Instantiation[0].IsGCPointer || (method.Instantiation[0] is DefType defType && defType.ContainsGCPointers);
+                    retVal = ValueTypeValue.FromSByte(result ? (sbyte)1 : (sbyte)0);
+                    return true;
+                }
+                case "GetArrayDataReference" when method.Instantiation.Length == 1
+                        && method.OwningType is MetadataType getArrayDataReferenceType
+                        && getArrayDataReferenceType.Name == "MemoryMarshal" && getArrayDataReferenceType.Namespace == "System.Runtime.InteropServices"
+                        && getArrayDataReferenceType.Module == method.Context.SystemModule
+                        && parameters[0] is ArrayInstance arrayData
+                        && ((ArrayType)arrayData.Type).ElementType == method.Instantiation[0]:
+                {
+                    retVal = arrayData.GetArrayData();
+                    return true;
+                }
             }
 
             static bool IsSystemType(TypeDesc type)
@@ -2178,6 +2139,10 @@ public Value PopIntoLocation(TypeDesc locationType)
                         return popped.Value;
 
                     case StackValueKind.NativeInt:
+                        // True byref that we converted to nint at some point.
+                        if (locationType.IsByRef && popped.Value is ByRefValueBase)
+                            return popped.Value;
+
                         // If it's none of the natural pointer types, we might need to truncate.
                         if (!locationType.IsPointer
                             && !locationType.IsFunctionPointer
@@ -2217,7 +2182,8 @@ public Value PopIntoLocation(TypeDesc locationType)
                         return ValueTypeValue.FromSingle((float)popped.Value.AsDouble());
 
                     case StackValueKind.ByRef:
-                        if (!locationType.IsByRef)
+                        if (!locationType.IsByRef
+                            && locationType.Category is not TypeFlags.IntPtr and not TypeFlags.UIntPtr and not TypeFlags.Pointer and not TypeFlags.FunctionPointer)
                         {
                             ThrowHelper.ThrowInvalidProgramException();
                         }
@@ -2275,7 +2241,7 @@ private interface IHasInstanceFields
         {
             bool TrySetField(FieldDesc field, Value value);
             Value GetField(FieldDesc field);
-            ByRefValue GetFieldAddress(FieldDesc field);
+            ByRefValueBase GetFieldAddress(FieldDesc field);
         }
 
         /// 
@@ -2286,6 +2252,8 @@ private interface IInternalModelingOnlyValue
         {
         }
 
+        private interface INativeIntConvertibleValue;
+
         /// 
         /// Represents a value that can be assigned into.
         /// 
@@ -2296,19 +2264,21 @@ private interface IAssignableValue
 
         private abstract class Value : ISerializableValue
         {
-            public abstract bool Equals(Value value);
+            public abstract bool TryCompareEquality(Value value, out bool result);
 
-            public static bool Equals(Value value1, Value value2)
+            public static bool TryCompareEquality(Value value1, Value value2, out bool result)
             {
                 if (value1 == value2)
                 {
+                    result = true;
                     return true;
                 }
                 if (value1 == null || value2 == null)
                 {
-                    return false;
+                    result = false;
+                    return true;
                 }
-                return value1.Equals(value2);
+                return value1.TryCompareEquality(value2, out result);
             }
 
             public virtual bool TryCreateByRef(out Value value)
@@ -2343,101 +2313,495 @@ private abstract class BaseValueTypeValue : Value
             public abstract int Size { get; }
         }
 
-        // Also represents pointers and function pointer.
-        private sealed class ValueTypeValue : BaseValueTypeValue, IAssignableValue
-        {
-            public readonly byte[] InstanceBytes;
+        // Also represents pointers and function pointer.
+        private sealed class ValueTypeValue : BaseValueTypeValue, IAssignableValue
+        {
+            public readonly byte[] InstanceBytes;
+
+            public override int Size => InstanceBytes.Length;
+
+            public ValueTypeValue(TypeDesc type)
+            {
+                Debug.Assert(type.IsValueType || type.IsPointer || type.IsFunctionPointer);
+                InstanceBytes = new byte[type.GetElementSize().AsInt];
+            }
+
+            public ValueTypeValue(byte[] bytes)
+            {
+                InstanceBytes = bytes;
+            }
+
+            public override Value Clone()
+            {
+                return new ValueTypeValue((byte[])InstanceBytes.Clone());
+            }
+
+            public override bool TryCreateByRef(out Value value)
+            {
+                value = new ByRefValue(InstanceBytes, 0);
+                return true;
+            }
+
+            bool IAssignableValue.TryAssign(Value value)
+            {
+                if ((!(value is BaseValueTypeValue other) || other.Size != Size)
+                    && value is not INativeIntConvertibleValue)
+                {
+                    ThrowHelper.ThrowInvalidProgramException();
+                }
+
+                if (!(value is ValueTypeValue vtvalue))
+                {
+                    return false;
+                }
+
+                Array.Copy(vtvalue.InstanceBytes, InstanceBytes, InstanceBytes.Length);
+                return true;
+            }
+
+            public override bool TryCompareEquality(Value value, out bool result)
+            {
+                if (!(value is ValueTypeValue vtvalue)
+                    || vtvalue.InstanceBytes.Length != InstanceBytes.Length)
+                {
+                    result = false;
+                    return false;
+                }
+
+                for (int i = 0; i < InstanceBytes.Length; i++)
+                {
+                    if (InstanceBytes[i] != ((ValueTypeValue)value).InstanceBytes[i])
+                    {
+                        result = false;
+                        return true;
+                    }
+                }
+
+                result = true;
+                return true;
+            }
+
+            public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
+            {
+                builder.EmitBytes(InstanceBytes);
+            }
+
+            public override bool GetRawData(NodeFactory factory, out object data)
+            {
+                data = InstanceBytes;
+                return true;
+            }
+
+            private byte[] AsExactByteCount(int size)
+            {
+                if (InstanceBytes.Length != size)
+                {
+                    ThrowHelper.ThrowInvalidProgramException();
+                }
+                return InstanceBytes;
+            }
+
+            public override sbyte AsSByte() => (sbyte)AsExactByteCount(1)[0];
+            public override short AsInt16() => BitConverter.ToInt16(AsExactByteCount(2), 0);
+            public override int AsInt32() => BitConverter.ToInt32(AsExactByteCount(4), 0);
+            public override long AsInt64() => BitConverter.ToInt64(AsExactByteCount(8), 0);
+            public override float AsSingle() => BitConverter.ToSingle(AsExactByteCount(4), 0);
+            public override double AsDouble() => BitConverter.ToDouble(AsExactByteCount(8), 0);
+            public static ValueTypeValue FromSByte(sbyte value) => new ValueTypeValue(new byte[1] { (byte)value });
+            public static ValueTypeValue FromInt16(short value) => new ValueTypeValue(BitConverter.GetBytes(value));
+            public static ValueTypeValue FromInt32(int value) => new ValueTypeValue(BitConverter.GetBytes(value));
+            public static ValueTypeValue FromInt64(long value) => new ValueTypeValue(BitConverter.GetBytes(value));
+            public static ValueTypeValue FromSingle(float value) => new ValueTypeValue(BitConverter.GetBytes(value));
+            public static ValueTypeValue FromDouble(double value) => new ValueTypeValue(BitConverter.GetBytes(value));
+        }
+
+        private sealed class ComInterfaceEntryArrayValue : BaseValueTypeValue
+        {
+            private readonly FieldDesc[] _targetFields;
+            private readonly byte[][] _guidBytes;
+            private readonly MetadataType _entryType;
+
+            public override int Size => _entryType.InstanceFieldSize.AsInt * _targetFields.Length;
+
+            public ComInterfaceEntryArrayValue(TypeDesc type, TypeDesc entryType)
+            {
+                Debug.Assert(IsCompatible(type, out _));
+                Debug.Assert(IsComInterfaceEntryType(entryType));
+                Debug.Assert(((MetadataType)type).InstanceFieldSize.AsInt % ((MetadataType)entryType).InstanceFieldSize.AsInt == 0);
+
+                _entryType = (MetadataType)entryType;
+
+                int numFields = ((MetadataType)type).InstanceFieldSize.AsInt / _entryType.InstanceFieldSize.AsInt;
+                _targetFields = new FieldDesc[numFields];
+                _guidBytes = new byte[numFields][];
+                for (int i = 0; i < numFields; i++)
+                    _guidBytes[i] = new byte[16];
+            }
+
+            private static bool IsComInterfaceEntryType(TypeDesc type)
+                => type is MetadataType mdType
+                    && mdType.Name == "ComInterfaceEntry"
+                    && mdType.ContainingType is MetadataType { Name: "ComWrappers", Namespace: "System.Runtime.InteropServices" } comWrappersType
+                    && comWrappersType.Module == comWrappersType.Context.SystemModule;
+
+            public static bool IsCompatible(TypeDesc type, out TypeDesc entryType)
+            {
+                entryType = null;
+
+                if (!type.IsValueType
+                    || type.HasInstantiation
+                    || type is not MetadataType mdType
+                    || !mdType.IsSequentialLayout
+                    || mdType.GetClassLayout() is not { PackingSize: 0, Size: 0 }
+                    || mdType.IsInlineArray)
+                {
+                    return false;
+                }
+
+                foreach (FieldDesc field in type.GetFields())
+                {
+                    if (field.IsStatic)
+                        continue;
+
+                    entryType = field.FieldType;
+
+                    if (!IsComInterfaceEntryType(entryType))
+                        return false;
+                }
+
+                return entryType != null;
+            }
+
+            public override bool TryCompareEquality(Value value, out bool result)
+            {
+                result = false;
+                return false;
+            }
+
+            public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
+            {
+                for (int i = 0; i < _targetFields.Length; i++)
+                {
+                    Debug.Assert(_entryType.GetField("IID").Offset.AsInt == 0);
+                    builder.EmitBytes(_guidBytes[i]);
+
+                    Debug.Assert(_entryType.GetField("Vtable").Offset.AsInt == _guidBytes[i].Length);
+                    if (_targetFields[i] is not FieldDesc targetField)
+                    {
+                        builder.EmitZeroPointer();
+                    }
+                    else
+                    {
+                        Debug.Assert(targetField.IsStatic && !targetField.HasGCStaticBase && !targetField.IsThreadStatic && !targetField.HasRva);
+                        ISymbolNode nonGcStaticBase = factory.TypeNonGCStaticsSymbol((MetadataType)targetField.OwningType);
+                        builder.EmitPointerReloc(nonGcStaticBase, targetField.Offset.AsInt);
+                    }
+                }
+            }
+
+            public override bool GetRawData(NodeFactory factory, out object data)
+            {
+                data = null;
+                return false;
+            }
+
+            public override bool TryCreateByRef(out Value value)
+            {
+                value = new ComInterfaceEntrySlotReference(this, 0);
+                return true;
+            }
+
+            private sealed class ComInterfaceEntrySlotReference : ByRefValueBase, IHasInstanceFields
+            {
+                private readonly ComInterfaceEntryArrayValue _parent;
+                private readonly int _index;
+
+                public ComInterfaceEntrySlotReference(ComInterfaceEntryArrayValue parent, int index)
+                    => (_parent, _index) = (parent, index);
+
+                public override bool TryCompareEquality(Value value, out bool result)
+                {
+                    result = false;
+                    return false;
+                }
+
+                public override bool GetRawData(NodeFactory factory, out object data)
+                {
+                    data = null;
+                    return false;
+                }
+
+                public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
+                {
+                    throw new NotSupportedException();
+                }
+
+                bool IHasInstanceFields.TrySetField(FieldDesc field, Value value)
+                {
+                    if (field.OwningType != _parent._entryType)
+                        return false;
+
+                    if (field.Name == "IID"
+                        && value is ValueTypeValue guidValue
+                        && guidValue.Size == _parent._guidBytes[_index].Length)
+                    {
+                        Array.Copy(guidValue.InstanceBytes, _parent._guidBytes[_index], _parent._guidBytes[_index].Length);
+                        return true;
+                    }
+                    else if (field.Name == "Vtable"
+                        && value is ByRefValueBase byrefValue
+                        && byrefValue.BackingField != null)
+                    {
+                        _parent._targetFields[_index] = byrefValue.BackingField;
+                        return true;
+                    }
+
+                    return false;
+                }
+
+                Value IHasInstanceFields.GetField(FieldDesc field)
+                {
+                    // Not actually invalid, but we don't need this.
+                    ThrowHelper.ThrowInvalidProgramException();
+                    return null; // unreached
+                }
+
+                ByRefValueBase IHasInstanceFields.GetFieldAddress(FieldDesc field)
+                {
+                    if (field.OwningType == _parent._entryType)
+                    {
+                        // Get address of IID or Vtable field on ComInterfaceEntry this ref points to.
+                        // Not actually invalid, but we don't need this.
+                        ThrowHelper.ThrowInvalidProgramException();
+                    }
+                    else if (field.FieldType == _parent._entryType
+                        && _index == 0
+                        && field.Offset.AsInt % _parent._entryType.InstanceFieldSize.AsInt == 0
+                        && field.Offset.AsInt < _parent._entryType.InstanceFieldSize.AsInt * _parent._targetFields.Length)
+                    {
+                        // Get address of a field within an array of ComInterfaceEntry.
+                        int index = field.Offset.AsInt / _parent._entryType.InstanceFieldSize.AsInt;
+                        return new ComInterfaceEntrySlotReference(_parent, index);
+                    }
+
+                    ThrowHelper.ThrowInvalidProgramException();
+                    return null; // unreached
+                }
+            }
+        }
+
+        private sealed class VTableLikeStructValue : BaseValueTypeValue, IAssignableValue
+        {
+            private readonly MetadataType _type;
+            private readonly MethodDesc[] _methods;
+            private readonly FieldDesc _fieldThatOwnsMemory;
+
+            public VTableLikeStructValue(MetadataType type, FieldDesc fieldThatOwnsMemory)
+                : this(type, new MethodDesc[GetFieldCount(type)], fieldThatOwnsMemory)
+            {
+            }
+
+            private VTableLikeStructValue(MetadataType type, MethodDesc[] methods, FieldDesc fieldThatOwnsMemory)
+                => (_type, _methods, _fieldThatOwnsMemory) = (type, methods, fieldThatOwnsMemory);
+
+            private static int GetFieldCount(MetadataType type)
+            {
+                Debug.Assert(IsCompatible(type));
+                Debug.Assert(type.InstanceFieldSize.AsInt % type.Context.Target.PointerSize == 0);
+                return type.InstanceFieldSize.AsInt / type.Context.Target.PointerSize;
+            }
+
+            public override int Size => _methods.Length * _type.Context.Target.PointerSize;
+
+            public static bool IsCompatible(TypeDesc type)
+            {
+                if (!type.IsValueType
+                    || type.HasInstantiation
+                    || type is not MetadataType mdType
+                    || !mdType.IsSequentialLayout
+                    || mdType.GetClassLayout() is not { PackingSize: 0, Size: 0 }
+                    || mdType.IsInlineArray)
+                {
+                    return false;
+                }
+
+                bool hasFields = false;
+                foreach (FieldDesc field in type.GetFields())
+                {
+                    if (field.IsStatic)
+                        continue;
+
+                    hasFields = true;
+
+                    if (field.FieldType.Category != TypeFlags.FunctionPointer)
+                        return false;
+                }
 
-            public override int Size => InstanceBytes.Length;
+                return hasFields;
+            }
 
-            public ValueTypeValue(TypeDesc type)
+            public override bool TryCompareEquality(Value value, out bool result)
             {
-                Debug.Assert(type.IsValueType || type.IsPointer || type.IsFunctionPointer);
-                InstanceBytes = new byte[type.GetElementSize().AsInt];
+                result = false;
+                return false;
             }
 
-            private ValueTypeValue(byte[] bytes)
+            public override bool GetRawData(NodeFactory factory, out object data)
             {
-                InstanceBytes = bytes;
+                data = null;
+                return false;
             }
 
-            public override Value Clone()
+            public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
             {
-                return new ValueTypeValue((byte[])InstanceBytes.Clone());
+                foreach (MethodDesc method in _methods)
+                {
+                    if (method is null)
+                        builder.EmitZeroPointer();
+                    else
+                        builder.EmitPointerReloc(factory.ExactCallableAddressTakenAddress(method, isUnboxingStub: false));
+                }
             }
 
             public override bool TryCreateByRef(out Value value)
             {
-                value = new ByRefValue(InstanceBytes, 0);
+                value = new VTableLikeSlotReferenceValue(this, index: 0);
                 return true;
             }
 
+            public override Value Clone()
+            {
+                return new VTableLikeStructValue(_type, (MethodDesc[])_methods.Clone(), fieldThatOwnsMemory: null);
+            }
+
             bool IAssignableValue.TryAssign(Value value)
             {
-                if (!(value is BaseValueTypeValue other)
-                    || other.Size != Size)
+                if (value is not VTableLikeStructValue other)
+                    return false;
+
+                if (other.Size > Size)
+                    return false;
+
+                Array.Copy(other._methods, _methods, other._methods.Length);
+                return true;
+            }
+
+            private sealed class VTableLikeSlotReferenceValue : ByRefValueBase, IHasInstanceFields
+            {
+                private readonly VTableLikeStructValue _parent;
+                private readonly int _index;
+
+                public override FieldDesc BackingField => _index == 0 ? _parent._fieldThatOwnsMemory : null;
+
+                public VTableLikeSlotReferenceValue(VTableLikeStructValue parent, int index)
+                    => (_parent, _index) = (parent, index);
+
+                public override bool TryCompareEquality(Value value, out bool result)
                 {
-                    ThrowHelper.ThrowInvalidProgramException();
+                    result = false;
+                    return false;
                 }
 
-                if (!(value is ValueTypeValue vtvalue))
+                public override bool GetRawData(NodeFactory factory, out object data)
                 {
+                    data = null;
                     return false;
                 }
 
-                Array.Copy(vtvalue.InstanceBytes, InstanceBytes, InstanceBytes.Length);
-                return true;
-            }
+                public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
+                {
+                    throw new NotSupportedException();
+                }
 
-            public override bool Equals(Value value)
-            {
-                if (!(value is ValueTypeValue vtvalue)
-                    || vtvalue.InstanceBytes.Length != InstanceBytes.Length)
+                public override bool TryStore(Value value)
                 {
-                    ThrowHelper.ThrowInvalidProgramException();
+                    if (value is MethodPointerValue methodPointer)
+                    {
+                        _parent._methods[_index] = methodPointer.PointedToMethod;
+                        return true;
+                    }
+                    else if (value is VTableLikeStructValue otherStruct
+                        && _parent._methods.Length - _index >= otherStruct._methods.Length)
+                    {
+                        Array.Copy(otherStruct._methods, 0, _parent._methods, _index, otherStruct._methods.Length);
+                        return true;
+                    }
+
+                    return false;
                 }
 
-                for (int i = 0; i < InstanceBytes.Length; i++)
+                public override bool TryLoad(TypeDesc type, out Value value)
                 {
-                    if (InstanceBytes[i] != ((ValueTypeValue)value).InstanceBytes[i])
+                    if (!VTableLikeStructValue.IsCompatible(type)
+                        || type is not MetadataType mdType
+                        || mdType.InstanceFieldSize.AsInt > (_parent._methods.Length - _index) * _parent._type.Context.Target.PointerSize)
+                    {
+                        value = null;
                         return false;
+                    }
+
+                    MethodDesc[] slots = new MethodDesc[GetFieldCount(mdType)];
+                    Array.Copy(_parent._methods, _index, slots, 0, slots.Length);
+                    value = new VTableLikeStructValue(mdType, slots, fieldThatOwnsMemory: null);
+                    return true;
                 }
 
-                return true;
-            }
+                public override Value Clone() => this; // The reference is immutable
 
-            public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
-            {
-                builder.EmitBytes(InstanceBytes);
-            }
+                private int GetFieldIndex(FieldDesc field)
+                {
+                    // Not actually invalid program, just difficult to model
+                    if (!VTableLikeStructValue.IsCompatible(field.OwningType))
+                        ThrowHelper.ThrowInvalidProgramException();
 
-            public override bool GetRawData(NodeFactory factory, out object data)
-            {
-                data = InstanceBytes;
-                return true;
-            }
+                    Debug.Assert(field.Offset.AsInt % _parent._type.Context.Target.PointerSize == 0 && field.FieldType.IsFunctionPointer);
 
-            private byte[] AsExactByteCount(int size)
-            {
-                if (InstanceBytes.Length != size)
+                    int index = (field.Offset.AsInt / _parent._type.Context.Target.PointerSize) + _index;
+                    if (index >= _parent._methods.Length)
+                        ThrowHelper.ThrowInvalidProgramException();
+
+                    return index;
+                }
+
+                bool IHasInstanceFields.TrySetField(FieldDesc field, Value value)
                 {
-                    ThrowHelper.ThrowInvalidProgramException();
+                    if (value is not MethodPointerValue methodPtr)
+                        return false;
+
+                    _parent._methods[GetFieldIndex(field)] = methodPtr.PointedToMethod;
+                    return true;
                 }
-                return InstanceBytes;
-            }
 
-            public override sbyte AsSByte() => (sbyte)AsExactByteCount(1)[0];
-            public override short AsInt16() => BitConverter.ToInt16(AsExactByteCount(2), 0);
-            public override int AsInt32() => BitConverter.ToInt32(AsExactByteCount(4), 0);
-            public override long AsInt64() => BitConverter.ToInt64(AsExactByteCount(8), 0);
-            public override float AsSingle() => BitConverter.ToSingle(AsExactByteCount(4), 0);
-            public override double AsDouble() => BitConverter.ToDouble(AsExactByteCount(8), 0);
-            public static ValueTypeValue FromSByte(sbyte value) => new ValueTypeValue(new byte[1] { (byte)value });
-            public static ValueTypeValue FromInt16(short value) => new ValueTypeValue(BitConverter.GetBytes(value));
-            public static ValueTypeValue FromInt32(int value) => new ValueTypeValue(BitConverter.GetBytes(value));
-            public static ValueTypeValue FromInt64(long value) => new ValueTypeValue(BitConverter.GetBytes(value));
-            public static ValueTypeValue FromSingle(float value) => new ValueTypeValue(BitConverter.GetBytes(value));
-            public static ValueTypeValue FromDouble(double value) => new ValueTypeValue(BitConverter.GetBytes(value));
+                Value IHasInstanceFields.GetField(FieldDesc field)
+                {
+                    MethodDesc method = _parent._methods[GetFieldIndex(field)];
+
+                    if (method is not null)
+                        return new MethodPointerValue(method);
+                    else
+                        return _parent._type.Context.Target.PointerSize == 8 ? ValueTypeValue.FromInt64(0) : ValueTypeValue.FromInt32(0);
+                }
+
+                ByRefValueBase IHasInstanceFields.GetFieldAddress(FieldDesc field)
+                {
+                    return new VTableLikeSlotReferenceValue(_parent, GetFieldIndex(field));
+                }
+
+                public override bool TryInitialize(int size)
+                {
+                    if (size % _parent._type.Context.Target.PointerSize != 0)
+                        return false;
+
+                    int numSlots = size / _parent._type.Context.Target.PointerSize;
+                    if (_index + numSlots > _parent._methods.Length)
+                        return false;
+
+                    for (int i = _index; i < numSlots; i++)
+                        _parent._methods[i] = null;
+
+                    return true;
+                }
+            }
         }
 
         private sealed class RuntimeFieldHandleValue : BaseValueTypeValue, IInternalModelingOnlyValue
@@ -2451,14 +2815,16 @@ public RuntimeFieldHandleValue(FieldDesc field)
 
             public override int Size => Field.Context.Target.PointerSize;
 
-            public override bool Equals(Value value)
+            public override bool TryCompareEquality(Value value, out bool result)
             {
                 if (!(value is RuntimeFieldHandleValue))
                 {
-                    ThrowHelper.ThrowInvalidProgramException();
+                    result = false;
+                    return false;
                 }
 
-                return Field == ((RuntimeFieldHandleValue)value).Field;
+                result = Field == ((RuntimeFieldHandleValue)value).Field;
+                return true;
             }
 
             public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
@@ -2484,14 +2850,16 @@ public RuntimeTypeHandleValue(TypeDesc type)
 
             public override int Size => Type.Context.Target.PointerSize;
 
-            public override bool Equals(Value value)
+            public override bool TryCompareEquality(Value value, out bool result)
             {
                 if (!(value is RuntimeTypeHandleValue))
                 {
-                    ThrowHelper.ThrowInvalidProgramException();
+                    result = false;
+                    return false;
                 }
 
-                return Type == ((RuntimeTypeHandleValue)value).Type;
+                result = Type == ((RuntimeTypeHandleValue)value).Type;
+                return true;
             }
 
             public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
@@ -2542,14 +2910,14 @@ public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory f
             }
         }
 
-        private sealed class ReadOnlySpanValue : BaseValueTypeValue, IInternalModelingOnlyValue
+        private sealed class SpanValue : BaseValueTypeValue, IInternalModelingOnlyValue
         {
             private readonly MetadataType _elementType;
-            private readonly byte[] _bytes;
-            private readonly int _index;
-            private readonly int _length;
+            private byte[] _bytes;
+            private int _index;
+            private int _length;
 
-            public ReadOnlySpanValue(MetadataType elementType, byte[] bytes, int index, int length)
+            public SpanValue(MetadataType elementType, byte[] bytes, int index, int length)
             {
                 Debug.Assert(index <= bytes.Length);
                 Debug.Assert(length <= bytes.Length - index);
@@ -2561,11 +2929,9 @@ public ReadOnlySpanValue(MetadataType elementType, byte[] bytes, int index, int
 
             public override int Size => 2 * _elementType.Context.Target.PointerSize;
 
-            public override bool Equals(Value value)
+            public override bool TryCompareEquality(Value value, out bool result)
             {
-                // ceq instruction on ReadOnlySpans is hard to support.
-                // We should not see it in the first place.
-                ThrowHelper.ThrowInvalidProgramException();
+                result = false;
                 return false;
             }
 
@@ -2582,87 +2948,90 @@ public override bool GetRawData(NodeFactory factory, out object data)
 
             public override Value Clone()
             {
-                // ReadOnlySpan is immutable and there's no way for the data to escape
-                return this;
+                return new SpanValue(_elementType, _bytes, _index, _length);
             }
 
             public override bool TryCreateByRef(out Value value)
             {
-                value = new ReadOnlySpanReferenceValue(_elementType, _bytes, _index, _length);
+                value = new SpanReferenceValue(this);
                 return true;
             }
-        }
-
-        private sealed class ReadOnlySpanReferenceValue : Value, IHasInstanceFields
-        {
-            private readonly MetadataType _elementType;
-            private readonly byte[] _bytes;
-            private readonly int _index;
-            private readonly int _length;
 
-            public ReadOnlySpanReferenceValue(MetadataType elementType, byte[] bytes, int index, int length)
+            private sealed class SpanReferenceValue : ByRefValueBase, IHasInstanceFields
             {
-                Debug.Assert(index <= bytes.Length);
-                Debug.Assert(length <= bytes.Length - index);
-                _elementType = elementType;
-                _bytes = bytes;
-                _index = index;
-                _length = length;
-            }
+                private readonly SpanValue _value;
 
-            public override bool Equals(Value value)
-            {
-                // ceq instruction on refs to ReadOnlySpans is hard to support.
-                // We should not see it in the first place.
-                ThrowHelper.ThrowInvalidProgramException();
-                return false;
-            }
+                public SpanReferenceValue(SpanValue value)
+                {
+                    _value = value;
+                }
 
-            public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
-            {
-                throw new NotSupportedException();
-            }
+                public override bool TryCompareEquality(Value value, out bool result)
+                {
+                    result = false;
+                    return false;
+                }
 
-            public override bool GetRawData(NodeFactory factory, out object data)
-            {
-                data = null;
-                return false;
-            }
+                public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
+                {
+                    throw new NotSupportedException();
+                }
 
-            public bool TryAccessElement(int index, out Value value)
-            {
-                value = default;
-                int limit = _length / _elementType.InstanceFieldSize.AsInt;
-                if (index >= limit)
+                public override bool GetRawData(NodeFactory factory, out object data)
+                {
+                    data = null;
                     return false;
+                }
 
-                value = new ByRefValue(_bytes, _index + index * _elementType.InstanceFieldSize.AsInt);
-                return true;
-            }
+                public bool TrySetField(FieldDesc field, Value value)
+                {
+                    MetadataType elementType;
+                    if (!TryGetSpanElementType(field.OwningType, isReadOnlySpan: true, out elementType)
+                        && !TryGetSpanElementType(field.OwningType, isReadOnlySpan: false, out elementType))
+                        return false;
 
-            public bool TrySetField(FieldDesc field, Value value) => false;
+                    if (elementType != _value._elementType)
+                        return false;
 
-            public Value GetField(FieldDesc field)
-            {
-                MetadataType elementType;
-                if (!TryGetSpanElementType(field.OwningType, isReadOnlySpan: true, out elementType)
-                    && !TryGetSpanElementType(field.OwningType, isReadOnlySpan: false, out elementType))
-                    ThrowHelper.ThrowInvalidProgramException();
+                    if (field.Name == "_length")
+                    {
+                        _value._length = value.AsInt32() * _value._elementType.InstanceFieldSize.AsInt;
+                        return true;
+                    }
 
-                if (elementType != _elementType)
-                    ThrowHelper.ThrowInvalidProgramException();
+                    if (value is ByRefValue byref)
+                    {
+                        Debug.Assert(field.Name == "_reference");
+                        _value._bytes = byref.PointedToBytes;
+                        _value._index = byref.PointedToOffset;
+                        return true;
+                    }
 
-                if (field.Name == "_length")
-                    return ValueTypeValue.FromInt32(_length / _elementType.InstanceFieldSize.AsInt);
+                    return false;
+                }
 
-                Debug.Assert(field.Name == "_reference");
-                return new ByRefValue(_bytes, _index);
-            }
+                public Value GetField(FieldDesc field)
+                {
+                    MetadataType elementType;
+                    if (!TryGetSpanElementType(field.OwningType, isReadOnlySpan: true, out elementType)
+                        && !TryGetSpanElementType(field.OwningType, isReadOnlySpan: false, out elementType))
+                        ThrowHelper.ThrowInvalidProgramException();
 
-            public ByRefValue GetFieldAddress(FieldDesc field)
-            {
-                ThrowHelper.ThrowInvalidProgramException();
-                return null; // unreached
+                    if (elementType != _value._elementType)
+                        ThrowHelper.ThrowInvalidProgramException();
+
+                    if (field.Name == "_length")
+                        return ValueTypeValue.FromInt32(_value._length / elementType.InstanceFieldSize.AsInt);
+
+                    Debug.Assert(field.Name == "_reference");
+                    return new ByRefValue(_value._bytes, _value._index);
+                }
+
+                public ByRefValueBase GetFieldAddress(FieldDesc field)
+                {
+                    ThrowHelper.ThrowInvalidProgramException();
+                    return null; // unreached
+                }
             }
         }
 
@@ -2677,14 +3046,16 @@ public MethodPointerValue(MethodDesc pointedToMethod)
 
             public override int Size => PointedToMethod.Context.Target.PointerSize;
 
-            public override bool Equals(Value value)
+            public override bool TryCompareEquality(Value value, out bool result)
             {
                 if (!(value is MethodPointerValue))
                 {
-                    ThrowHelper.ThrowInvalidProgramException();
+                    result = false;
+                    return false;
                 }
 
-                return PointedToMethod == ((MethodPointerValue)value).PointedToMethod;
+                result = PointedToMethod == ((MethodPointerValue)value).PointedToMethod;
+                return true;
             }
 
             public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
@@ -2697,9 +3068,24 @@ public override bool GetRawData(NodeFactory factory, out object data)
                 data = null;
                 return false;
             }
+
+            public override Value Clone() => this; // Immutable
+        }
+
+        private abstract class ByRefValueBase : Value, INativeIntConvertibleValue
+        {
+            public virtual bool TryStore(Value value) => false;
+            public virtual bool TryLoad(TypeDesc type, out Value value)
+            {
+                value = null;
+                return false;
+            }
+            public virtual bool TryInitialize(int size) => false;
+
+            public virtual FieldDesc BackingField => null;
         }
 
-        private sealed class ByRefValue : Value, IHasInstanceFields
+        private sealed class ByRefValue : ByRefValueBase, IHasInstanceFields
         {
             public readonly byte[] PointedToBytes;
             public readonly int PointedToOffset;
@@ -2710,34 +3096,69 @@ public ByRefValue(byte[] pointedToBytes, int pointedToOffset)
                 PointedToOffset = pointedToOffset;
             }
 
-            public override bool Equals(Value value)
+            public override bool TryCompareEquality(Value value, out bool result)
             {
                 if (!(value is ByRefValue))
                 {
-                    ThrowHelper.ThrowInvalidProgramException();
+                    result = false;
+                    return false;
                 }
 
-                return PointedToBytes == ((ByRefValue)value).PointedToBytes
+                result = PointedToBytes == ((ByRefValue)value).PointedToBytes
                     && PointedToOffset == ((ByRefValue)value).PointedToOffset;
+                return true;
             }
 
             Value IHasInstanceFields.GetField(FieldDesc field) => new FieldAccessor(PointedToBytes, PointedToOffset).GetField(field);
             bool IHasInstanceFields.TrySetField(FieldDesc field, Value value) => new FieldAccessor(PointedToBytes, PointedToOffset).TrySetField(field, value);
-            ByRefValue IHasInstanceFields.GetFieldAddress(FieldDesc field) => new FieldAccessor(PointedToBytes, PointedToOffset).GetFieldAddress(field);
+            ByRefValueBase IHasInstanceFields.GetFieldAddress(FieldDesc field) => new FieldAccessor(PointedToBytes, PointedToOffset).GetFieldAddress(field);
 
-            public void Initialize(int size)
+            public override bool TryInitialize(int size)
             {
                 if ((uint)size > (uint)(PointedToBytes.Length - PointedToOffset))
                 {
-                    ThrowHelper.ThrowInvalidProgramException();
+                    return false;
                 }
 
                 for (int i = PointedToOffset; i < PointedToOffset + size; i++)
                 {
                     PointedToBytes[i] = 0;
                 }
+
+                return true;
+            }
+
+            public override bool TryStore(Value value)
+            {
+                if (value is not ValueTypeValue srcVal)
+                    return false;
+
+                byte[] src = srcVal.InstanceBytes;
+                if (PointedToOffset + src.Length > PointedToBytes.Length)
+                    return false;
+
+                Array.Copy(src, 0, PointedToBytes, PointedToOffset, src.Length);
+
+                return true;
+            }
+
+            public override bool TryLoad(TypeDesc type, out Value value)
+            {
+                if (!type.IsValueType
+                    || ((MetadataType)type).InstanceFieldSize.AsInt > PointedToBytes.Length - PointedToOffset)
+                {
+                    value = null;
+                    return false;
+                }
+
+                var result = new ValueTypeValue(type);
+                Array.Copy(PointedToBytes, PointedToOffset, result.InstanceBytes, 0, result.InstanceBytes.Length);
+                value = result;
+                return true;
             }
 
+            public override Value Clone() => this; // Immutable
+
             public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
             {
                 // This would imply we have a byref-typed static field. The layout algorithm should have blocked this.
@@ -2749,20 +3170,6 @@ public override bool GetRawData(NodeFactory factory, out object data)
                 data = null;
                 return false;
             }
-
-            private ReadOnlySpan AsExactByteCount(int count)
-            {
-                if (PointedToOffset + count > PointedToBytes.Length)
-                    ThrowHelper.ThrowInvalidProgramException();
-                return new ReadOnlySpan(PointedToBytes, PointedToOffset, count);
-            }
-
-            public sbyte DereferenceAsSByte() => (sbyte)AsExactByteCount(1)[0];
-            public short DereferenceAsInt16() => BitConverter.ToInt16(AsExactByteCount(2));
-            public int DereferenceAsInt32() => BitConverter.ToInt32(AsExactByteCount(4));
-            public long DereferenceAsInt64() => BitConverter.ToInt64(AsExactByteCount(8));
-            public float DereferenceAsSingle() => BitConverter.ToSingle(AsExactByteCount(4));
-            public double DereferenceAsDouble() => BitConverter.ToDouble(AsExactByteCount(8));
         }
 
         private abstract class ReferenceTypeValue : Value
@@ -2771,9 +3178,10 @@ private abstract class ReferenceTypeValue : Value
 
             protected ReferenceTypeValue(TypeDesc type) { Type = type; }
 
-            public override bool Equals(Value value)
+            public override bool TryCompareEquality(Value value, out bool result)
             {
-                return this == value;
+                result = this == value;
+                return true;
             }
 
             public abstract ReferenceTypeValue ToForeignInstance(int baseInstructionCounter, TypePreinit preinitContext);
@@ -3003,15 +3411,9 @@ public void WriteContent(ref ObjectDataBuilder builder, ISymbolNode thisNode, No
                 builder.EmitBytes(_data);
             }
 
-            public bool TryGetReadOnlySpan(out ReadOnlySpanValue value)
+            public ByRefValue GetArrayData()
             {
-                if (((ArrayType)Type).ParameterType is MetadataType parameterType)
-                {
-                    value = new ReadOnlySpanValue(parameterType, _data, 0, _data.Length);
-                    return true;
-                }
-                value = null;
-                return false;
+                return new ByRefValue(_data, 0);
             }
 
             public bool IsKnownImmutable => _elementCount == 0;
@@ -3111,7 +3513,7 @@ public override ReferenceTypeValue ToForeignInstance(int baseInstructionCounter,
             }
             Value IHasInstanceFields.GetField(FieldDesc field) => new FieldAccessor(_value).GetField(field);
             bool IHasInstanceFields.TrySetField(FieldDesc field, Value value) => false;
-            ByRefValue IHasInstanceFields.GetFieldAddress(FieldDesc field) => new FieldAccessor(_value).GetFieldAddress(field);
+            ByRefValueBase IHasInstanceFields.GetFieldAddress(FieldDesc field) => new FieldAccessor(_value).GetFieldAddress(field);
         }
 
         private sealed class ObjectInstance : AllocatedReferenceTypeValue, IHasInstanceFields, ISerializableReference
@@ -3161,7 +3563,7 @@ public bool TryUnboxAny(TypeDesc type, out Value value)
 
             Value IHasInstanceFields.GetField(FieldDesc field) => new FieldAccessor(_data).GetField(field);
             bool IHasInstanceFields.TrySetField(FieldDesc field, Value value) => new FieldAccessor(_data).TrySetField(field, value);
-            ByRefValue IHasInstanceFields.GetFieldAddress(FieldDesc field) => new FieldAccessor(_data).GetFieldAddress(field);
+            ByRefValueBase IHasInstanceFields.GetFieldAddress(FieldDesc field) => new FieldAccessor(_data).GetFieldAddress(field);
 
             public override void WriteFieldData(ref ObjectDataBuilder builder, NodeFactory factory)
             {
@@ -3233,6 +3635,12 @@ public bool TrySetField(FieldDesc field, Value value)
                     return false;
                 }
 
+                if (value is ByRefValueBase
+                    && (field.FieldType.IsWellKnownType(WellKnownType.IntPtr) || field.FieldType.IsWellKnownType(WellKnownType.UIntPtr)))
+                {
+                    return false;
+                }
+
                 if (value is not ValueTypeValue vtValue)
                 {
                     ThrowHelper.ThrowInvalidProgramException();
@@ -3263,6 +3671,7 @@ private struct StackEntry
 
             public StackEntry(StackValueKind valueKind, Value value)
             {
+                // TODO: can we assert invariants around value allowed for valueKind?
                 ValueKind = valueKind;
                 Value = value;
             }
@@ -3292,6 +3701,34 @@ public static Status Fail(MethodDesc method, string detail)
             }
         }
 
+        private readonly struct NestedPreinitResult
+        {
+            private readonly Dictionary _fieldValues;
+            private readonly int _baseInstructionCounter;
+
+            public NestedPreinitResult(Dictionary fieldValues, int baseInstructionCounter)
+                => (_fieldValues, _baseInstructionCounter) = (fieldValues, baseInstructionCounter);
+
+            public bool TryGetFieldValue(TypePreinit context, FieldDesc field, out Value value)
+            {
+                Value fieldValue = _fieldValues[field];
+
+                if (fieldValue is ReferenceTypeValue referenceType)
+                {
+                    value = referenceType.ToForeignInstance(_baseInstructionCounter, context);
+                    return true;
+                }
+                else if (fieldValue is BaseValueTypeValue)
+                {
+                    value = fieldValue;
+                    return true;
+                }
+
+                value = null;
+                return false;
+            }
+        }
+
         public class PreinitializationInfo
         {
             private readonly Dictionary _fieldValues;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UniversalGenericsRootProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UniversalGenericsRootProvider.cs
deleted file mode 100644
index 19494506f7be..000000000000
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UniversalGenericsRootProvider.cs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using Internal.TypeSystem;
-
-namespace ILCompiler
-{
-    /// 
-    /// Compilation roots necessary to enable universal shared generics thats
-    /// are not encompassed in other root providers
-    /// 
-    public class UniversalGenericsRootProvider : ICompilationRootProvider
-    {
-        private TypeSystemContext _context;
-
-        public UniversalGenericsRootProvider(TypeSystemContext context)
-        {
-            _context = context;
-        }
-
-        public void AddCompilationRoots(IRootingServiceProvider rootProvider)
-        {
-            if (_context.SupportsUniversalCanon)
-                rootProvider.AddCompilationRoot(_context.UniversalCanonType.MakeArrayType(), "Universal generic array support");
-        }
-    }
-}
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UsageBasedMetadataManager.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UsageBasedMetadataManager.cs
index fd0c4e025d65..18fe157a668b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UsageBasedMetadataManager.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UsageBasedMetadataManager.cs
@@ -56,6 +56,7 @@ private static (string AttributeName, DiagnosticId Id)[] _requiresAttributeMisma
         private readonly List _typesWithMetadata = new List();
         private readonly List _fieldsWithRuntimeMapping = new List();
         private readonly List _customAttributesWithMetadata = new List();
+        private readonly List _parametersWithMetadata = new List();
 
         internal IReadOnlyDictionary FeatureSwitches { get; }
 
@@ -147,6 +148,12 @@ protected override void Graph_NewMarkedNode(DependencyNodeCore obj)
                 _customAttributesWithMetadata.Add(customAttributeMetadataNode.CustomAttribute);
             }
 
+            var parameterMetadataNode = obj as MethodParameterMetadataNode;
+            if (parameterMetadataNode != null)
+            {
+                _parametersWithMetadata.Add(parameterMetadataNode.Parameter);
+            }
+
             var reflectedFieldNode = obj as ReflectedFieldNode;
             if (reflectedFieldNode != null)
             {
@@ -221,11 +228,13 @@ protected override void ComputeMetadata(NodeFactory factory,
             out byte[] metadataBlob,
             out List> typeMappings,
             out List> methodMappings,
+            out Dictionary methodMetadataMappings,
             out List> fieldMappings,
+            out Dictionary fieldMetadataMappings,
             out List stackTraceMapping)
         {
             ComputeMetadata(new GeneratedTypesAndCodeMetadataPolicy(_blockingPolicy, factory),
-                factory, out metadataBlob, out typeMappings, out methodMappings, out fieldMappings, out stackTraceMapping);
+                factory, out metadataBlob, out typeMappings, out methodMappings, out methodMetadataMappings, out fieldMappings, out fieldMetadataMappings, out stackTraceMapping);
         }
 
         protected override void GetMetadataDependenciesDueToReflectability(ref DependencyList dependencies, NodeFactory factory, MethodDesc method)
@@ -234,6 +243,21 @@ protected override void GetMetadataDependenciesDueToReflectability(ref Dependenc
             dependencies.Add(factory.MethodMetadata(method.GetTypicalMethodDefinition()), "Reflectable method");
         }
 
+        public override void GetNativeLayoutMetadataDependencies(ref DependencyList dependencies, NodeFactory factory, MethodDesc method)
+        {
+            if (CanGenerateMetadata(method))
+            {
+                dependencies ??= new DependencyList();
+                dependencies.Add(factory.LimitedMethodMetadata(method.GetTypicalMethodDefinition()), "Method referenced from native layout");
+            }
+            else
+            {
+                // We can end up here with reflection disabled or multifile compilation.
+                // If we ever productize either, we'll need to do something different.
+                // Scenarios that currently need this won't work in these modes.
+            }
+        }
+
         protected override void GetMetadataDependenciesDueToReflectability(ref DependencyList dependencies, NodeFactory factory, FieldDesc field)
         {
             dependencies ??= new DependencyList();
@@ -288,29 +312,6 @@ protected override void GetMetadataDependenciesDueToReflectability(ref Dependenc
             }
 
             MetadataType mdType = type as MetadataType;
-
-            // If anonymous type heuristic is turned on and this is an anonymous type, make sure we have
-            // method bodies for all properties. It's common to have anonymous types used with reflection
-            // and it's hard to specify them in RD.XML.
-            if ((_generationOptions & UsageBasedMetadataGenerationOptions.AnonymousTypeHeuristic) != 0)
-            {
-                if (mdType != null &&
-                    mdType.HasInstantiation &&
-                    !mdType.IsGenericDefinition &&
-                    mdType.HasCustomAttribute("System.Runtime.CompilerServices", "CompilerGeneratedAttribute") &&
-                    mdType.Name.Contains("AnonymousType"))
-                {
-                    foreach (MethodDesc method in type.GetMethods())
-                    {
-                        if (!method.Signature.IsStatic && method.IsSpecialName)
-                        {
-                            dependencies ??= new DependencyList();
-                            dependencies.Add(factory.CanonicalEntrypoint(method), "Anonymous type accessor");
-                        }
-                    }
-                }
-            }
-
             ModuleDesc module = mdType?.Module;
             if (module != null && !_rootEntireAssembliesExaminedModules.Contains(module))
             {
@@ -918,7 +919,7 @@ public MetadataManager ToAnalysisBasedMetadataManager()
             return new AnalysisBasedMetadataManager(
                 _typeSystemContext, _blockingPolicy, _resourceBlockingPolicy, _metadataLogFile, _stackTraceEmissionPolicy, _dynamicInvokeThunkGenerationPolicy, FlowAnnotations,
                 _modulesWithMetadata, _typesWithForcedEEType, reflectableTypes.ToEnumerable(), reflectableMethods.ToEnumerable(),
-                reflectableFields.ToEnumerable(), _customAttributesWithMetadata, _options);
+                reflectableFields.ToEnumerable(), _customAttributesWithMetadata, _parametersWithMetadata, _options);
         }
 
         private void AddDataflowDependency(ref DependencyList dependencies, NodeFactory factory, MethodIL methodIL, string reason)
@@ -1018,7 +1019,7 @@ public bool GeneratesMetadata(FieldDesc fieldDef)
 
             public bool GeneratesMetadata(MethodDesc methodDef)
             {
-                return _factory.MethodMetadata(methodDef).Marked;
+                return _factory.MethodMetadata(methodDef).Marked || _factory.LimitedMethodMetadata(methodDef).Marked;
             }
 
             public bool GeneratesMetadata(MetadataType typeDef)
@@ -1031,6 +1032,11 @@ public bool GeneratesMetadata(EcmaModule module, CustomAttributeHandle caHandle)
                 return _factory.CustomAttributeMetadata(new ReflectableCustomAttribute(module, caHandle)).Marked;
             }
 
+            public bool GeneratesMetadata(EcmaModule module, ParameterHandle paramHandle)
+            {
+                return _factory.MethodParameterMetadata(new ReflectableParameter(module, paramHandle)).Marked;
+            }
+
             public bool GeneratesMetadata(EcmaModule module, ExportedTypeHandle exportedTypeHandle)
             {
                 // Generate the forwarder only if we generated the target type.
@@ -1215,28 +1221,19 @@ public enum UsageBasedMetadataGenerationOptions
         /// 
         CompleteTypesOnly = 1,
 
-        /// 
-        /// Specifies that heuristic that makes anonymous types work should be applied.
-        /// 
-        /// 
-        /// Generates method bodies for properties on anonymous types even if they're not
-        /// statically used.
-        /// 
-        AnonymousTypeHeuristic = 2,
-
         /// 
         /// Scan IL for common reflection patterns to find additional compilation roots.
         /// 
-        ReflectionILScanning = 4,
+        ReflectionILScanning = 2,
 
         /// 
         /// Consider all native artifacts (native method bodies, etc) visible from reflection.
         /// 
-        CreateReflectableArtifacts = 8,
+        CreateReflectableArtifacts = 4,
 
         /// 
         /// Fully root used assemblies that are not marked IsTrimmable in metadata.
         /// 
-        RootDefaultAssemblies = 16,
+        RootDefaultAssemblies = 8,
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UserDefinedTypeDescriptor.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UserDefinedTypeDescriptor.cs
index 305a624b7ccb..c0fc70d2313c 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UserDefinedTypeDescriptor.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UserDefinedTypeDescriptor.cs
@@ -733,11 +733,11 @@ private uint GetClassTypeIndex(TypeDesc type, bool needsCompleteType)
                 statics[i] = staticsDescs[i];
             }
 
-            LayoutInt elementSize = defType.GetElementSize();
-            int elementSizeEmit = elementSize.IsIndeterminate ? 0xBAAD : elementSize.AsInt;
+            LayoutInt instanceSize = defType.IsValueType ? defType.InstanceFieldSize : defType.InstanceByteCount;
+            int instanceSizeEmit = instanceSize.IsIndeterminate ? 0xBAAD : instanceSize.AsInt;
             ClassFieldsTypeDescriptor fieldsDescriptor = new ClassFieldsTypeDescriptor
             {
-                Size = (ulong)elementSizeEmit,
+                Size = (ulong)instanceSizeEmit,
                 FieldsCount = fieldsDescs.Count,
             };
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj b/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj
index 1b16f7a13e92..90a51b83178f 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj
@@ -215,9 +215,6 @@
     
       Common\StackTraceData.cs
     
-    
-      Common\UniversalGenericParameterLayout.cs
-    
     
       Common\Utf8String.cs
     
@@ -397,9 +394,6 @@
     
     
     
-    
-      Utilities\TypeNameHelpers.cs
-    
     
       Utilities\ValueStringBuilder.cs
     
@@ -426,6 +420,7 @@
     
     
     
+    
     
     
     
@@ -520,7 +515,6 @@
     
     
     
-    
     
     
     
@@ -673,7 +667,6 @@
     
     
     
-    
     
     
     
@@ -706,9 +699,6 @@
     
       Common\ArrayBuilder.cs
     
-    
-      Common\CanonTypeKind.cs
-    
     
       Common\MethodTable.Constants.cs
     
diff --git a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/EntityMap.cs b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/EntityMap.cs
index 02d48c7a810c..492bcb1f82db 100644
--- a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/EntityMap.cs
+++ b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/EntityMap.cs
@@ -15,6 +15,8 @@ public IReadOnlyCollection Records
             get { return _map.Values; }
         }
 
+        public IEnumerable> Entries => _map;
+
         public EntityMap(IEqualityComparer comparer)
         {
             _map = new Dictionary(comparer);
diff --git a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/IMetadataPolicy.cs b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/IMetadataPolicy.cs
index 408a3be6ded5..c79bc3fe477a 100644
--- a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/IMetadataPolicy.cs
+++ b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/IMetadataPolicy.cs
@@ -43,6 +43,12 @@ public interface IMetadataPolicy
         /// 
         bool GeneratesMetadata(Cts.Ecma.EcmaModule module, Ecma.CustomAttributeHandle customAttribute);
 
+        /// 
+        /// Returns true if the parameter should generate  metadata.
+        /// If false, the parameter is not generated.
+        /// 
+        bool GeneratesMetadata(Cts.Ecma.EcmaModule module, Ecma.ParameterHandle parameter);
+
         /// 
         /// Returns true if an exported type entry should generate  metadata.
         /// 
diff --git a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/MetadataTransformResult.cs b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/MetadataTransformResult.cs
index d64c6fd67dad..04e95b91beab 100644
--- a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/MetadataTransformResult.cs
+++ b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/MetadataTransformResult.cs
@@ -97,6 +97,13 @@ public Method GetTransformedMethodDefinition(Cts.MethodDesc method)
             return rec as Method;
         }
 
+        public IEnumerable> GetTransformedMethodDefinitions()
+        {
+            foreach (KeyValuePair entry in _transform._methods.Entries)
+                if (entry.Value is Method m)
+                    yield return new KeyValuePair(entry.Key, m);
+        }
+
         /// 
         /// Attempts to retrieve a  record corresponding to the specified
         /// . Returns null if not found.
diff --git a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/Transform.Method.cs b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/Transform.Method.cs
index 02c35466167a..98845f5b6b2d 100644
--- a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/Transform.Method.cs
+++ b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/Transform.Method.cs
@@ -84,6 +84,9 @@ private void InitializeMethodDefinition(Cts.MethodDesc entity, Method record)
                 record.Parameters.Capacity = paramHandles.Count;
                 foreach (var paramHandle in paramHandles)
                 {
+                    if (!_policy.GeneratesMetadata(ecmaEntity.Module, paramHandle))
+                        continue;
+
                     Ecma.Parameter param = reader.GetParameter(paramHandle);
                     Parameter paramRecord = new Parameter
                     {
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs
index 801bd7dbe2cc..8fadcc5f3beb 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs
@@ -75,10 +75,12 @@ protected virtual void GetElementDataForNodes(ref ObjectDataBuilder builder, Nod
             }
         }
 
+        protected virtual int GetAlignmentRequirement(NodeFactory factory) { return factory.Target.PointerSize; }
+
         public override ObjectData GetData(NodeFactory factory, bool relocsOnly)
         {
             ObjectDataBuilder builder = new ObjectDataBuilder(factory, relocsOnly);
-            builder.RequireInitialPointerAlignment();
+            builder.RequireInitialAlignment(GetAlignmentRequirement(factory));
 
             if (_sorter != null)
                 _nestedNodesList.MergeSort(_sorter);
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/AttributePresenceFilterNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/AttributePresenceFilterNode.cs
index d12d18c4c0af..03d26965d4a5 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/AttributePresenceFilterNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/AttributePresenceFilterNode.cs
@@ -9,6 +9,7 @@
 using System.Reflection.Metadata;
 using System.Reflection.Metadata.Ecma335;
 
+using Internal;
 using Internal.Text;
 using Internal.TypeSystem.Ecma;
 
@@ -334,8 +335,8 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                 {
                     string name = customAttributeEntry.TypeNamespace + "." + customAttributeEntry.TypeName;
                     // This hashing algorithm MUST match exactly the logic in NativeCuckooFilter
-                    int hashOfAttribute = ReadyToRunHashCode.NameHashCode(name);
-                    uint hash = unchecked((uint)ReadyToRunHashCode.CombineTwoValuesIntoHash((uint)hashOfAttribute, (uint)customAttributeEntry.Parent));
+                    int hashOfAttribute = VersionResilientHashCode.NameHashCode(name);
+                    uint hash = unchecked((uint)VersionResilientHashCode.CombineTwoValuesIntoHash((uint)hashOfAttribute, (uint)customAttributeEntry.Parent));
                     ushort fingerprint = (ushort)(hash >> 16);
                     if (fingerprint == 0)
                     {
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs
index 54a85ff42e2c..4e4e00e9fad7 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Diagnostics;
 
 using Internal.Text;
 using Internal.TypeSystem;
@@ -66,6 +67,15 @@ public override void EncodeData(ref ObjectDataBuilder dataBuilder, NodeFactory f
             // when loaded by CoreCLR
             dataBuilder.EmitReloc(_delayLoadHelper,
                 factory.Target.PointerSize == 4 ? RelocType.IMAGE_REL_BASED_HIGHLOW : RelocType.IMAGE_REL_BASED_DIR64, factory.Target.CodeDelta);
+
+            if (Table.EntrySize == (factory.Target.PointerSize * 2))
+            {
+                dataBuilder.EmitNaturalInt(0);
+            }
+            else
+            {
+                Debug.Assert(Table.EntrySize == factory.Target.PointerSize);
+            }
         }
 
         public override IEnumerable GetStaticDependencies(NodeFactory factory)
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs
index eb996de2ac2b..4c73a0ab08bc 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs
@@ -28,7 +28,7 @@ public DelayLoadHelperMethodImport(
             MethodWithToken method,
             bool useVirtualCall,
             bool useInstantiatingStub,
-            Signature instanceSignature, 
+            Signature instanceSignature,
             MethodDesc callingMethod = null)
             : base(factory, importSectionNode, helper, instanceSignature, useVirtualCall, useJumpableStub: false, callingMethod)
         {
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/HeaderNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/HeaderNode.cs
index b652563a9583..f010b4fd0934 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/HeaderNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/HeaderNode.cs
@@ -129,13 +129,10 @@ public void Add(ReadyToRunSectionType id, DependencyNodeCore node,
 
         public override bool StaticDependenciesAreComputed => true;
 
-        public override ObjectNodeSection GetSection(NodeFactory factory)
-        {
-            if (factory.Target.IsWindows)
-                return ObjectNodeSection.ReadOnlyDataSection;
-            else
-                return ObjectNodeSection.DataSection;
-        }
+        // For R2R, we can put the header in the read-only section on non-Windows as well. Since we emit a PE image
+        // and do our own mapping, we don't need it to be writeable for the OS loader to handle absolute pointer relocs.
+        // Our R2R PE images group read-only data into the .text section, so this doesn't result in more work to map.
+        public override ObjectNodeSection GetSection(NodeFactory factory) => ObjectNodeSection.ReadOnlyDataSection;
 
         public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
         {
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs
index 45dc3fb4e240..e2356ea2816f 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs
@@ -12,11 +12,21 @@ public class ImportSectionNode : EmbeddedObjectNode
     {
         private class ImportTable : ArrayOfEmbeddedDataNode
         {
-            public ImportTable(string symbol) : base(symbol, nodeSorter: new EmbeddedObjectNodeComparer(CompilerComparer.Instance)) {}
+            private byte _alignment;
+
+            public ImportTable(string symbol, byte alignment) : base(symbol, nodeSorter: new EmbeddedObjectNodeComparer(CompilerComparer.Instance))
+            {
+                _alignment = alignment;
+            }
 
             public override bool ShouldSkipEmittingObjectNode(NodeFactory factory) => false;
 
             public override int ClassCode => (int)ObjectNodeOrder.ImportSectionNode;
+
+            protected override int GetAlignmentRequirement(NodeFactory factory)
+            {
+                return _alignment;
+            }
         }
 
         private readonly ImportTable _imports;
@@ -44,7 +54,7 @@ public ImportSectionNode(string name, ReadyToRunImportSectionType importType, Re
             _emitPrecode = emitPrecode;
             _emitGCRefMap = emitGCRefMap;
 
-            _imports = new ImportTable(_name + "_ImportBegin");
+            _imports = new ImportTable(_name + "_ImportBegin", entrySize);
             _signatures = new ArrayOfEmbeddedPointersNode(_name + "_SigBegin", new EmbeddedObjectNodeComparer(CompilerComparer.Instance));
             _signatureList = new List();
             _gcRefMap = _emitGCRefMap ? new GCRefMapNode(this) : null;
@@ -154,5 +164,7 @@ public override int CompareToImpl(ISortableNode other, CompilerComparer comparer
         {
             return _name.CompareTo(((ImportSectionNode)other)._name);
         }
+
+        public int EntrySize => _entrySize;
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InliningInfoNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InliningInfoNode.cs
index e06235040154..82c19968d977 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InliningInfoNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InliningInfoNode.cs
@@ -7,6 +7,7 @@
 using System.IO;
 using System.Reflection.Metadata.Ecma335;
 
+using Internal;
 using Internal.NativeFormat;
 using Internal.ReadyToRunConstants;
 using Internal.Text;
@@ -139,16 +140,16 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                 EcmaMethod inlinee = inlineeWithInliners.Key;
                 int inlineeRid = MetadataTokens.GetRowNumber(inlinee.Handle);
                 int hashCode;
-                
+
                 if (AllowCrossModuleInlines)
                 {
                     // CrossModuleInlineInfo format
-                    hashCode = ReadyToRunHashCode.MethodHashCode(inlinee);
+                    hashCode = VersionResilientHashCode.MethodHashCode(inlinee);
                 }
                 else
                 {
                     // InliningInfo2 format
-                    hashCode = ReadyToRunHashCode.ModuleNameHashCode(inlinee.Module);
+                    hashCode = VersionResilientHashCode.ModuleNameHashCode(inlinee.Module);
                     hashCode ^= inlineeRid;
                 }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InstanceEntryPointTableNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InstanceEntryPointTableNode.cs
index 1ad75093fada..265f2155854e 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InstanceEntryPointTableNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InstanceEntryPointTableNode.cs
@@ -8,6 +8,7 @@
 using System.Linq;
 using System.Reflection.Metadata.Ecma335;
 
+using Internal;
 using Internal.JitInterface;
 using Internal.NativeFormat;
 using Internal.Runtime;
@@ -123,7 +124,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
 
                 EntryPointVertex entryPointVertex = new EntryPointWithBlobVertex((uint)methodIndex, fixupBlob, signatureBlob);
                 hashtableSection.Place(entryPointVertex);
-                vertexHashtable.Append(unchecked((uint)ReadyToRunHashCode.MethodHashCode(method.Method)), entryPointVertex);
+                vertexHashtable.Append(unchecked((uint)VersionResilientHashCode.MethodHashCode(method.Method)), entryPointVertex);
             }
 
             MemoryStream hashtableContent = new MemoryStream();
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InstrumentationDataTableNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InstrumentationDataTableNode.cs
index 58ccf8969f7a..496413247dc9 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InstrumentationDataTableNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/InstrumentationDataTableNode.cs
@@ -9,6 +9,7 @@
 using System.Linq;
 using System.Reflection.Metadata.Ecma335;
 
+using Internal;
 using Internal.JitInterface;
 using Internal.NativeFormat;
 using Internal.Pgo;
@@ -284,7 +285,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
 
                 PgoInstrumentedDataWithSignatureBlobVertex pgoDataVertex = new PgoInstrumentedDataWithSignatureBlobVertex(signatureBlob, 0, instrumentationDataBlob);
                 hashtableSection.Place(pgoDataVertex);
-                vertexHashtable.Append(unchecked((uint)ReadyToRunHashCode.MethodHashCode(method)), pgoDataVertex);
+                vertexHashtable.Append(unchecked((uint)VersionResilientHashCode.MethodHashCode(method)), pgoDataVertex);
             }
 
             MemoryStream hashtableContent = new MemoryStream();
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ModuleTokenResolver.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ModuleTokenResolver.cs
index defd270ef7a2..391d5328a4be 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ModuleTokenResolver.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ModuleTokenResolver.cs
@@ -305,6 +305,14 @@ public void AddModuleTokenForType(TypeDesc type, ModuleToken token)
 
         public int GetModuleIndex(IEcmaModule module)
         {
+            int moduleIndex = _moduleIndexLookup(module);
+            if (moduleIndex != 0 && !(module is Internal.TypeSystem.Ecma.MutableModule))
+            {
+                if (!_compilationModuleGroup.VersionsWithModule((ModuleDesc)module))
+                {
+                    throw new InternalCompilerErrorException("Attempt to use token from a module not within the version bubble");
+                }
+            }
             return _moduleIndexLookup(module);
         }
 
@@ -316,7 +324,7 @@ public int GetModuleIndex(IEcmaModule module)
         /// 
         private class DummyTypeInfo
         {
-            public static DummyTypeInfo Instance = new DummyTypeInfo(); 
+            public static DummyTypeInfo Instance = new DummyTypeInfo();
         }
 
         private class TokenResolverProvider : ISignatureTypeProvider
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/SignatureBuilder.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/SignatureBuilder.cs
index fe5595531ad8..1fb549a6f1af 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/SignatureBuilder.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/SignatureBuilder.cs
@@ -602,6 +602,11 @@ public SignatureContext EmitFixup(NodeFactory factory, ReadyToRunFixupKind fixup
             else
             {
                 EmitByte((byte)(fixupKind | ReadyToRunFixupKind.ModuleOverride));
+                if (!(targetModule is Internal.TypeSystem.Ecma.MutableModule) && !factory.CompilationModuleGroup.VersionsWithModule((ModuleDesc)targetModule))
+                {
+                    throw new InternalCompilerErrorException("Attempt to use token from a module not within the version bubble");
+                }
+                
                 EmitUInt((uint)factory.ManifestMetadataTable.ModuleToIndex(targetModule));
                 return new SignatureContext(targetModule, outerContext.Resolver);
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypesTableNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypesTableNode.cs
index 45da68f26bd5..3acb98d21b22 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypesTableNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypesTableNode.cs
@@ -8,6 +8,7 @@
 using System.Reflection.Metadata;
 using System.Reflection.Metadata.Ecma335;
 
+using Internal;
 using Internal.NativeFormat;
 using Internal.Text;
 using Internal.TypeSystem;
@@ -45,7 +46,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                     TypeDefinition defType = defTypeInfo.MetadataReader.GetTypeDefinition(defTypeHandle);
                     string namespaceName = defTypeInfo.MetadataReader.GetString(defType.Namespace);
                     string typeName = defTypeInfo.MetadataReader.GetString(defType.Name);
-                    hashCode ^= ReadyToRunHashCode.NameHashCode(namespaceName, typeName);
+                    hashCode ^= VersionResilientHashCode.NameHashCode(namespaceName, typeName);
                     if (!defType.Attributes.IsNested())
                     {
                         break;
@@ -64,7 +65,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                     ExportedType expType = expTypeInfo.MetadataReader.GetExportedType(expTypeHandle);
                     string namespaceName = expTypeInfo.MetadataReader.GetString(expType.Namespace);
                     string typeName = expTypeInfo.MetadataReader.GetString(expType.Name);
-                    hashCode ^= ReadyToRunHashCode.NameHashCode(namespaceName, typeName);
+                    hashCode ^= VersionResilientHashCode.NameHashCode(namespaceName, typeName);
                     if (expType.Implementation.Kind != HandleKind.ExportedType)
                     {
                         // Not a nested class
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs
index 507c34b6f899..e644439b2bec 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs
@@ -58,6 +58,7 @@ public sealed class NodeFactoryOptimizationFlags
         public TypeValidationRule TypeValidation;
         public int DeterminismStress;
         public bool PrintReproArgs;
+        public bool EnableCachedInterfaceDispatchSupport;
     }
 
     // To make the code future compatible to the composite R2R story
@@ -307,7 +308,7 @@ private void CreateNodeCaches()
             {
                 return new DelayLoadHelperMethodImport(
                     this,
-                    DispatchImports,
+                    HelperImports,
                     ReadyToRunHelper.DelayLoad_Helper_Obj,
                     key.Method,
                     useVirtualCall: false,
@@ -867,7 +868,7 @@ bool HasAnyProfileDataForInput()
                 "DispatchImports",
                 ReadyToRunImportSectionType.StubDispatch,
                 ReadyToRunImportSectionFlags.PCode,
-                (byte)Target.PointerSize,
+                this.OptimizationFlags.EnableCachedInterfaceDispatchSupport ? (byte)(2 * Target.PointerSize) : (byte)Target.PointerSize,
                 emitPrecode: false,
                 emitGCRefMap: true);
             ImportSectionsTable.AddEmbeddedObject(DispatchImports);
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/SortableDependencyNodeCompilerSpecific.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/SortableDependencyNodeCompilerSpecific.cs
deleted file mode 100644
index 9c4df7c84f20..000000000000
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/SortableDependencyNodeCompilerSpecific.cs
+++ /dev/null
@@ -1,25 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-
-using ILCompiler.DependencyAnalysisFramework;
-using Internal.TypeSystem;
-
-namespace ILCompiler.DependencyAnalysis
-{
-    partial class SortableDependencyNode
-    {
-        // Custom sort order. Used to override the default sorting mechanics.
-        public int CustomSort = int.MaxValue;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        static partial void ApplyCustomSort(SortableDependencyNode x, SortableDependencyNode y, ref int result)
-        {
-            result = x.CustomSort.CompareTo(y.CustomSort);
-        }
-    }
-}
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/FileLayoutOptimizer.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/FileLayoutOptimizer.cs
new file mode 100644
index 000000000000..86a6a000505e
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/FileLayoutOptimizer.cs
@@ -0,0 +1,345 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using Internal.TypeSystem;
+
+using ILCompiler.DependencyAnalysis;
+#if READYTORUN
+using ILCompiler.DependencyAnalysis.ReadyToRun;
+#endif
+using ILCompiler.DependencyAnalysisFramework;
+using System.Linq;
+using System.Collections.Immutable;
+using System.Text;
+using System.Reflection.Metadata.Ecma335;
+using ILCompiler.PettisHansenSort;
+
+#if !READYTORUN
+using MethodWithGCInfo = ILCompiler.DependencyAnalysis.MethodCodeNode;
+#endif
+
+namespace ILCompiler
+{
+    public enum MethodLayoutAlgorithm
+    {
+        DefaultSort,
+        ExclusiveWeight,
+        HotCold,
+        InstrumentedHotCold,
+        HotWarmCold,
+#if READYTORUN
+        CallFrequency,
+#endif
+        PettisHansen,
+        Random,
+    }
+
+    public enum FileLayoutAlgorithm
+    {
+        DefaultSort,
+        MethodOrder,
+    }
+
+    class FileLayoutOptimizer
+    {
+        public FileLayoutOptimizer (Logger logger,
+                                              MethodLayoutAlgorithm methodAlgorithm,
+                                              FileLayoutAlgorithm fileAlgorithm,
+                                              ProfileDataManager profileData,
+                                              NodeFactory nodeFactory)
+        {
+            _logger = logger;
+            _methodLayoutAlgorithm = methodAlgorithm;
+            _fileLayoutAlgorithm = fileAlgorithm;
+            _profileData = profileData;
+            _nodeFactory = nodeFactory;
+        }
+
+        private Logger _logger;
+        private MethodLayoutAlgorithm _methodLayoutAlgorithm = MethodLayoutAlgorithm.DefaultSort;
+        private FileLayoutAlgorithm _fileLayoutAlgorithm = FileLayoutAlgorithm.DefaultSort;
+        private ProfileDataManager _profileData;
+        private NodeFactory _nodeFactory;
+
+        public ImmutableArray> ApplyProfilerGuidedMethodSort(ImmutableArray> nodes)
+        {
+            if (_methodLayoutAlgorithm == MethodLayoutAlgorithm.DefaultSort)
+                return nodes;
+
+            List methods = new List();
+            foreach (var node in nodes)
+            {
+                if (node is MethodWithGCInfo method)
+                {
+                    methods.Add(method);
+                }
+            }
+
+            methods = ApplyMethodSort(methods);
+
+            int sortOrder = 0;
+
+            List sortedMethodsList = methods;
+
+            foreach (var methodNode in sortedMethodsList)
+            {
+                methodNode.CustomSort = sortOrder;
+#if READYTORUN
+                MethodColdCodeNode methodColdCodeNode = methodNode.ColdCodeNode;
+                if (methodColdCodeNode != null)
+                {
+                    methodColdCodeNode.CustomSort = sortOrder + sortedMethodsList.Count;
+                }
+#endif
+                sortOrder++;
+            }
+
+            if (_fileLayoutAlgorithm == FileLayoutAlgorithm.MethodOrder)
+            {
+                // Sort the dependencies of methods by the method order
+                foreach (var method in sortedMethodsList)
+                {
+                    ApplySortToDependencies(method, 0);
+                }
+            }
+
+            var newNodesArray = nodes.ToArray();
+            newNodesArray.MergeSortAllowDuplicates(new SortableDependencyNode.ObjectNodeComparer(CompilerComparer.Instance));
+            return newNodesArray.ToImmutableArray();
+
+            void ApplySortToDependencies(DependencyNodeCore node, int depth)
+            {
+                if (depth > 5)
+                    return;
+
+                if (node is SortableDependencyNode sortableNode)
+                {
+                    if (sortableNode.CustomSort != Int32.MaxValue)
+                        return; // Node already sorted
+                    sortableNode.CustomSort += sortOrder++;
+                }
+                foreach (var dependency in node.GetStaticDependencies(_nodeFactory))
+                {
+                    ApplySortToDependencies(dependency.Node, depth + 1);
+                }
+            }
+        }
+
+        private List ApplyMethodSort(List methods)
+        {
+            switch (_methodLayoutAlgorithm)
+            {
+                case MethodLayoutAlgorithm.DefaultSort:
+                    break;
+
+                case MethodLayoutAlgorithm.ExclusiveWeight:
+                    methods.MergeSortAllowDuplicates(sortMethodWithGCInfoByWeight);
+
+                    int sortMethodWithGCInfoByWeight(MethodWithGCInfo left, MethodWithGCInfo right)
+                    {
+                        return -MethodWithGCInfoToWeight(left).CompareTo(MethodWithGCInfoToWeight(right));
+                    }
+                    break;
+
+                case MethodLayoutAlgorithm.HotCold:
+                    methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => ComputeHotColdRegion(left).CompareTo(ComputeHotColdRegion(right)));
+
+                    int ComputeHotColdRegion(MethodWithGCInfo method)
+                    {
+                        return MethodWithGCInfoToWeight(method) > 0 ? 0 : 1;
+                    }
+                    break;
+
+                case MethodLayoutAlgorithm.InstrumentedHotCold:
+                    methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => (_profileData[left.Method] != null).CompareTo(_profileData[right.Method] != null));
+                    break;
+
+                case MethodLayoutAlgorithm.HotWarmCold:
+                    methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => ComputeHotWarmColdRegion(left).CompareTo(ComputeHotWarmColdRegion(right)));
+
+                    int ComputeHotWarmColdRegion(MethodWithGCInfo method)
+                    {
+                        double weight = MethodWithGCInfoToWeight(method);
+
+                        // If weight is greater than 128 its probably signicantly used at runtime
+                        if (weight > 128)
+                            return 0;
+
+                        // If weight is less than 128 but greater than 0, then its probably used at startup
+                        // or some at runtime, but is less critical than the hot code
+                        if (weight > 0)
+                            return 1;
+
+                        // Methods without weight are probably relatively rarely used
+                        return 2;
+                    };
+                    break;
+
+#if READYTORUN
+                case MethodLayoutAlgorithm.CallFrequency:
+                    methods = MethodCallFrequencySort(methods);
+                    break;
+#endif
+
+                case MethodLayoutAlgorithm.PettisHansen:
+                    methods = PettisHansenSort(methods);
+                    break;
+
+                case MethodLayoutAlgorithm.Random:
+                    Random rand = new Random(0);
+                    for (int i = 0; i < methods.Count - 1; i++)
+                    {
+                        int j = rand.Next(i, methods.Count);
+                        MethodWithGCInfo temp = methods[i];
+                        methods[i] = methods[j];
+                        methods[j] = temp;
+                    }
+                    break;
+
+                default:
+                    throw new NotImplementedException(_methodLayoutAlgorithm.ToString());
+            }
+
+            return methods;
+        }
+
+        private double MethodWithGCInfoToWeight(MethodWithGCInfo method)
+        {
+            var profileData = _profileData[method.Method];
+            double weight = 0;
+
+            if (profileData != null)
+            {
+                weight = profileData.ExclusiveWeight;
+            }
+            return weight;
+        }
+
+        private class CallerCalleeCount
+        {
+            public readonly MethodDesc Caller;
+            public readonly MethodDesc Callee;
+            public readonly int Count;
+
+            public CallerCalleeCount(MethodDesc caller, MethodDesc callee, int count)
+            {
+                Caller = caller;
+                Callee = callee;
+                Count = count;
+            }
+        }
+
+#if READYTORUN
+        /// 
+        /// Use callchain profile information to generate method ordering. We place
+        /// callers and callees by traversing the caller-callee pairs in the callchain
+        /// profile in the order of descending hit count. All methods not present
+        /// (or not matched) in the callchain profile go last.
+        /// 
+        /// List of methods to place
+        private List MethodCallFrequencySort(List methodsToPlace)
+        {
+            if (_profileData.CallChainProfile == null)
+            {
+                return methodsToPlace;
+            }
+
+            Dictionary methodMap = new Dictionary();
+            foreach (MethodWithGCInfo methodWithGCInfo in methodsToPlace)
+            {
+                methodMap.Add(methodWithGCInfo.Method, methodWithGCInfo);
+            }
+
+            List callList = new List();
+            foreach (KeyValuePair> methodProfile in _profileData.CallChainProfile.ResolvedProfileData.Where(kvp => methodMap.ContainsKey(kvp.Key)))
+            {
+                foreach (KeyValuePair callee in methodProfile.Value.Where(kvp => methodMap.ContainsKey(kvp.Key)))
+                {
+                    callList.Add(new CallerCalleeCount(methodProfile.Key, callee.Key, callee.Value));
+                }
+            }
+            callList.Sort((a, b) => b.Count.CompareTo(a.Count));
+
+            List outputMethods = new List();
+            outputMethods.Capacity = methodsToPlace.Count;
+
+            foreach (CallerCalleeCount call in callList)
+            {
+                if (methodMap.TryGetValue(call.Caller, out MethodWithGCInfo callerWithGCInfo) && callerWithGCInfo != null)
+                {
+                    outputMethods.Add(callerWithGCInfo);
+                    methodMap[call.Caller] = null;
+                }
+                if (methodMap.TryGetValue(call.Callee, out MethodWithGCInfo calleeWithGCInfo) && calleeWithGCInfo != null)
+                {
+                    outputMethods.Add(calleeWithGCInfo);
+                    methodMap[call.Callee] = null;
+                }
+            }
+
+            // Methods unknown to the callchain profile go last
+            outputMethods.AddRange(methodMap.Values.Where(m => m != null));
+            Debug.Assert(outputMethods.Count == methodsToPlace.Count);
+            return outputMethods;
+        }
+#endif
+
+        /// 
+        /// Sort methods with Pettis-Hansen using call graph data from profile.
+        /// 
+        private List PettisHansenSort(List methodsToPlace)
+        {
+            var graphNodes = new List(methodsToPlace.Count);
+            var mdToIndex = new Dictionary();
+            int index = 0;
+            foreach (MethodWithGCInfo method in methodsToPlace)
+            {
+                mdToIndex.Add(method.Method, index);
+                graphNodes.Add(new CallGraphNode(index));
+                index++;
+            }
+
+            bool any = false;
+            foreach (MethodWithGCInfo method in methodsToPlace)
+            {
+                MethodProfileData data = _profileData[method.Method];
+                if (data == null || data.CallWeights == null)
+                    continue;
+
+                foreach ((MethodDesc other, int count) in data.CallWeights)
+                {
+                    if (!mdToIndex.TryGetValue(other, out int otherIndex))
+                        continue;
+
+                    graphNodes[mdToIndex[method.Method]].IncreaseEdge(graphNodes[otherIndex], count);
+                    any = true;
+                }
+            }
+
+            if (!any)
+            {
+#if READYTORUN
+                _logger.Writer.WriteLine("Warning: no call graph data was found or a .mibc file was not specified. Skipping Pettis Hansen method ordering.");
+#endif
+                return methodsToPlace;
+            }
+
+            List> components = PettisHansen.Sort(graphNodes);
+            // We expect to see a permutation.
+            Debug.Assert(components.SelectMany(l => l).OrderBy(i => i).SequenceEqual(Enumerable.Range(0, methodsToPlace.Count)));
+
+            List result = new List(methodsToPlace.Count);
+            foreach (List component in components)
+            {
+                foreach (int node in component)
+                    result.Add(methodsToPlace[node]);
+            }
+
+            return result;
+        }
+    }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs
index 6f7585c07e3a..ab820af948a7 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs
@@ -19,6 +19,7 @@
 using ILCompiler.DependencyAnalysis;
 using ILCompiler.DependencyAnalysis.ReadyToRun;
 using ILCompiler.DependencyAnalysisFramework;
+using ILCompiler.Reflection.ReadyToRun;
 using Internal.TypeSystem.Ecma;
 
 namespace ILCompiler
@@ -293,7 +294,7 @@ public sealed class ReadyToRunCodegenCompilation : Compilation
         private readonly Func _printReproInstructions;
 
         private readonly ProfileDataManager _profileData;
-        private readonly ReadyToRunFileLayoutOptimizer _fileLayoutOptimizer;
+        private readonly FileLayoutOptimizer _fileLayoutOptimizer;
         private readonly HashSet _methodsWhichNeedMutableILBodies = new HashSet();
         private readonly HashSet _methodsToRecompile = new HashSet();
 
@@ -333,8 +334,8 @@ internal ReadyToRunCodegenCompilation(
             bool generateProfileFile,
             int parallelism,
             ProfileDataManager profileData,
-            ReadyToRunMethodLayoutAlgorithm methodLayoutAlgorithm,
-            ReadyToRunFileLayoutAlgorithm fileLayoutAlgorithm,
+            MethodLayoutAlgorithm methodLayoutAlgorithm,
+            FileLayoutAlgorithm fileLayoutAlgorithm,
             int customPESectionAlignment,
             bool verifyTypeAndFieldLayout)
             : base(
@@ -378,7 +379,7 @@ internal ReadyToRunCodegenCompilation(
 
             _profileData = profileData;
 
-            _fileLayoutOptimizer = new ReadyToRunFileLayoutOptimizer(logger, methodLayoutAlgorithm, fileLayoutAlgorithm, profileData, _nodeFactory);
+            _fileLayoutOptimizer = new FileLayoutOptimizer(logger, methodLayoutAlgorithm, fileLayoutAlgorithm, profileData, _nodeFactory);
         }
 
         private readonly static string s_folderUpPrefix = ".." + Path.DirectorySeparatorChar;
@@ -448,7 +449,7 @@ private void RewriteComponentFile(string inputFile, string outputFile, string ow
                 ReadyToRunFlags.READYTORUN_FLAG_Component |
                 ReadyToRunFlags.READYTORUN_FLAG_NonSharedPInvokeStubs;
 
-            if (inputModule.IsPlatformNeutral)
+            if (inputModule.IsPlatformNeutral || inputModule.PEReader.IsReadyToRunPlatformNeutralSource())
             {
                 flags |= ReadyToRunFlags.READYTORUN_FLAG_PlatformNeutralSource;
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilationBuilder.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilationBuilder.cs
index 7e142fdb9920..3d937f94df19 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilationBuilder.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilationBuilder.cs
@@ -8,6 +8,7 @@
 using ILCompiler.DependencyAnalysis;
 using ILCompiler.DependencyAnalysis.ReadyToRun;
 using ILCompiler.DependencyAnalysisFramework;
+using ILCompiler.Reflection.ReadyToRun;
 using ILCompiler.Win32Resources;
 using Internal.IL;
 using Internal.JitInterface;
@@ -34,8 +35,8 @@ public sealed class ReadyToRunCodegenCompilationBuilder : CompilationBuilder
         Func _printReproInstructions;
         private InstructionSetSupport _instructionSetSupport;
         private ProfileDataManager _profileData;
-        private ReadyToRunMethodLayoutAlgorithm _r2rMethodLayoutAlgorithm;
-        private ReadyToRunFileLayoutAlgorithm _r2rFileLayoutAlgorithm;
+        private MethodLayoutAlgorithm _r2rMethodLayoutAlgorithm;
+        private FileLayoutAlgorithm _r2rFileLayoutAlgorithm;
         private int _customPESectionAlignment;
         private bool _verifyTypeAndFieldLayout;
         private bool _hotColdSplitting;
@@ -118,7 +119,7 @@ public ReadyToRunCodegenCompilationBuilder UseProfileData(ProfileDataManager pro
             return this;
         }
 
-        public ReadyToRunCodegenCompilationBuilder FileLayoutAlgorithms(ReadyToRunMethodLayoutAlgorithm r2rMethodLayoutAlgorithm, ReadyToRunFileLayoutAlgorithm r2rFileLayoutAlgorithm)
+        public ReadyToRunCodegenCompilationBuilder FileLayoutAlgorithms(MethodLayoutAlgorithm r2rMethodLayoutAlgorithm, FileLayoutAlgorithm r2rFileLayoutAlgorithm)
         {
             _r2rMethodLayoutAlgorithm = r2rMethodLayoutAlgorithm;
             _r2rFileLayoutAlgorithm = r2rFileLayoutAlgorithm;
@@ -247,7 +248,7 @@ public override ICompilation ToCompilation()
             });
 
             ReadyToRunFlags flags = ReadyToRunFlags.READYTORUN_FLAG_NonSharedPInvokeStubs;
-            if (inputModules.All(module => module.IsPlatformNeutral))
+            if (inputModules.All(module => module.IsPlatformNeutral || module.PEReader.IsReadyToRunPlatformNeutralSource()))
             {
                 flags |= ReadyToRunFlags.READYTORUN_FLAG_PlatformNeutralSource;
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilationModuleGroupBase.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilationModuleGroupBase.cs
index 09c12867e50b..a2ea17ccc111 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilationModuleGroupBase.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilationModuleGroupBase.cs
@@ -707,6 +707,11 @@ private bool IsNonVersionableWithILTokensThatDoNotNeedTranslationUncached(EcmaMe
 
         public sealed override bool GeneratesPInvoke(MethodDesc method)
         {
+            // Marshalling behavior isn't modeled as protected by R2R rules, so prevent inlining of marshalling
+            // defined outside of the version bubble.
+            if (!VersionsWithMethodBody(method))
+                return false;
+
             return !Marshaller.IsMarshallingRequired(method);
         }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs
deleted file mode 100644
index 3cd7e34c3bf4..000000000000
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs
+++ /dev/null
@@ -1,324 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-
-using Internal.TypeSystem;
-
-using ILCompiler.DependencyAnalysis;
-using ILCompiler.DependencyAnalysis.ReadyToRun;
-using ILCompiler.DependencyAnalysisFramework;
-using System.Linq;
-using System.Collections.Immutable;
-using System.Text;
-using System.Reflection.Metadata.Ecma335;
-using ILCompiler.PettisHansenSort;
-
-namespace ILCompiler
-{
-    public enum ReadyToRunMethodLayoutAlgorithm
-    {
-        DefaultSort,
-        ExclusiveWeight,
-        HotCold,
-        HotWarmCold,
-        CallFrequency,
-        PettisHansen,
-        Random,
-    }
-
-    public enum ReadyToRunFileLayoutAlgorithm
-    {
-        DefaultSort,
-        MethodOrder,
-    }
-
-    class ReadyToRunFileLayoutOptimizer
-    {
-        public ReadyToRunFileLayoutOptimizer (Logger logger,
-                                              ReadyToRunMethodLayoutAlgorithm methodAlgorithm,
-                                              ReadyToRunFileLayoutAlgorithm fileAlgorithm,
-                                              ProfileDataManager profileData,
-                                              NodeFactory nodeFactory)
-        {
-            _logger = logger;
-            _methodLayoutAlgorithm = methodAlgorithm;
-            _fileLayoutAlgorithm = fileAlgorithm;
-            _profileData = profileData;
-            _nodeFactory = nodeFactory;
-        }
-
-        private Logger _logger;
-        private ReadyToRunMethodLayoutAlgorithm _methodLayoutAlgorithm = ReadyToRunMethodLayoutAlgorithm.DefaultSort;
-        private ReadyToRunFileLayoutAlgorithm _fileLayoutAlgorithm = ReadyToRunFileLayoutAlgorithm.DefaultSort;
-        private ProfileDataManager _profileData;
-        private NodeFactory _nodeFactory;
-
-        public ImmutableArray> ApplyProfilerGuidedMethodSort(ImmutableArray> nodes)
-        {
-            if (_methodLayoutAlgorithm == ReadyToRunMethodLayoutAlgorithm.DefaultSort)
-                return nodes;
-
-            List methods = new List();
-            foreach (var node in nodes)
-            {
-                if (node is MethodWithGCInfo method)
-                {
-                    methods.Add(method);
-                }
-            }
-
-            methods = ApplyMethodSort(methods);
-
-            int sortOrder = 0;
-
-            List sortedMethodsList = methods;
-
-            foreach (var methodNode in sortedMethodsList)
-            {
-                methodNode.CustomSort = sortOrder;
-                MethodColdCodeNode methodColdCodeNode = methodNode.ColdCodeNode;
-                if (methodColdCodeNode != null)
-                {
-                    methodColdCodeNode.CustomSort = sortOrder + sortedMethodsList.Count;
-                }
-                sortOrder++;
-            }
-
-            if (_fileLayoutAlgorithm == ReadyToRunFileLayoutAlgorithm.MethodOrder)
-            {
-                // Sort the dependencies of methods by the method order
-                foreach (var method in sortedMethodsList)
-                {
-                    ApplySortToDependencies(method, 0);
-                }
-            }
-
-            var newNodesArray = nodes.ToArray();
-            newNodesArray.MergeSortAllowDuplicates(new SortableDependencyNode.ObjectNodeComparer(CompilerComparer.Instance));
-            return newNodesArray.ToImmutableArray();
-
-            void ApplySortToDependencies(DependencyNodeCore node, int depth)
-            {
-                if (depth > 5)
-                    return;
-
-                if (node is SortableDependencyNode sortableNode)
-                {
-                    if (sortableNode.CustomSort != Int32.MaxValue)
-                        return; // Node already sorted
-                    sortableNode.CustomSort += sortOrder++;
-                }
-                foreach (var dependency in node.GetStaticDependencies(_nodeFactory))
-                {
-                    ApplySortToDependencies(dependency.Node, depth + 1);
-                }
-            }
-        }
-
-        private List ApplyMethodSort(List methods)
-        {
-            switch (_methodLayoutAlgorithm)
-            {
-                case ReadyToRunMethodLayoutAlgorithm.DefaultSort:
-                    break;
-
-                case ReadyToRunMethodLayoutAlgorithm.ExclusiveWeight:
-                    methods.MergeSortAllowDuplicates(sortMethodWithGCInfoByWeight);
-
-                    int sortMethodWithGCInfoByWeight(MethodWithGCInfo left, MethodWithGCInfo right)
-                    {
-                        return -MethodWithGCInfoToWeight(left).CompareTo(MethodWithGCInfoToWeight(right));
-                    }
-                    break;
-
-                case ReadyToRunMethodLayoutAlgorithm.HotCold:
-                    methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => ComputeHotColdRegion(left).CompareTo(ComputeHotColdRegion(right)));
-
-                    int ComputeHotColdRegion(MethodWithGCInfo method)
-                    {
-                        return MethodWithGCInfoToWeight(method) > 0 ? 0 : 1;
-                    }
-                    break;
-
-                case ReadyToRunMethodLayoutAlgorithm.HotWarmCold:
-                    methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => ComputeHotWarmColdRegion(left).CompareTo(ComputeHotWarmColdRegion(right)));
-
-                    int ComputeHotWarmColdRegion(MethodWithGCInfo method)
-                    {
-                        double weight = MethodWithGCInfoToWeight(method);
-
-                        // If weight is greater than 128 its probably signicantly used at runtime
-                        if (weight > 128)
-                            return 0;
-
-                        // If weight is less than 128 but greater than 0, then its probably used at startup
-                        // or some at runtime, but is less critical than the hot code
-                        if (weight > 0)
-                            return 1;
-
-                        // Methods without weight are probably relatively rarely used
-                        return 2;
-                    };
-                    break;
-
-                case ReadyToRunMethodLayoutAlgorithm.CallFrequency:
-                    methods = MethodCallFrequencySort(methods);
-                    break;
-
-                case ReadyToRunMethodLayoutAlgorithm.PettisHansen:
-                    methods = PettisHansenSort(methods);
-                    break;
-
-                case ReadyToRunMethodLayoutAlgorithm.Random:
-                    Random rand = new Random(0);
-                    for (int i = 0; i < methods.Count - 1; i++)
-                    {
-                        int j = rand.Next(i, methods.Count);
-                        MethodWithGCInfo temp = methods[i];
-                        methods[i] = methods[j];
-                        methods[j] = temp;
-                    }
-                    break;
-
-                default:
-                    throw new NotImplementedException(_methodLayoutAlgorithm.ToString());
-            }
-
-            return methods;
-        }
-
-        private double MethodWithGCInfoToWeight(MethodWithGCInfo method)
-        {
-            var profileData = _profileData[method.Method];
-            double weight = 0;
-
-            if (profileData != null)
-            {
-                weight = profileData.ExclusiveWeight;
-            }
-            return weight;
-        }
-
-        private class CallerCalleeCount
-        {
-            public readonly MethodDesc Caller;
-            public readonly MethodDesc Callee;
-            public readonly int Count;
-
-            public CallerCalleeCount(MethodDesc caller, MethodDesc callee, int count)
-            {
-                Caller = caller;
-                Callee = callee;
-                Count = count;
-            }
-        }
-
-        /// 
-        /// Use callchain profile information to generate method ordering. We place
-        /// callers and callees by traversing the caller-callee pairs in the callchain
-        /// profile in the order of descending hit count. All methods not present
-        /// (or not matched) in the callchain profile go last.
-        /// 
-        /// List of methods to place
-        private List MethodCallFrequencySort(List methodsToPlace)
-        {
-            if (_profileData.CallChainProfile == null)
-            {
-                return methodsToPlace;
-            }
-
-            Dictionary methodMap = new Dictionary();
-            foreach (MethodWithGCInfo methodWithGCInfo in methodsToPlace)
-            {
-                methodMap.Add(methodWithGCInfo.Method, methodWithGCInfo);
-            }
-
-            List callList = new List();
-            foreach (KeyValuePair> methodProfile in _profileData.CallChainProfile.ResolvedProfileData.Where(kvp => methodMap.ContainsKey(kvp.Key)))
-            {
-                foreach (KeyValuePair callee in methodProfile.Value.Where(kvp => methodMap.ContainsKey(kvp.Key)))
-                {
-                    callList.Add(new CallerCalleeCount(methodProfile.Key, callee.Key, callee.Value));
-                }
-            }
-            callList.Sort((a, b) => b.Count.CompareTo(a.Count));
-
-            List outputMethods = new List();
-            outputMethods.Capacity = methodsToPlace.Count;
-
-            foreach (CallerCalleeCount call in callList)
-            {
-                if (methodMap.TryGetValue(call.Caller, out MethodWithGCInfo callerWithGCInfo) && callerWithGCInfo != null)
-                {
-                    outputMethods.Add(callerWithGCInfo);
-                    methodMap[call.Caller] = null;
-                }
-                if (methodMap.TryGetValue(call.Callee, out MethodWithGCInfo calleeWithGCInfo) && calleeWithGCInfo != null)
-                {
-                    outputMethods.Add(calleeWithGCInfo);
-                    methodMap[call.Callee] = null;
-                }
-            }
-
-            // Methods unknown to the callchain profile go last
-            outputMethods.AddRange(methodMap.Values.Where(m => m != null));
-            Debug.Assert(outputMethods.Count == methodsToPlace.Count);
-            return outputMethods;
-        }
-
-        /// 
-        /// Sort methods with Pettis-Hansen using call graph data from profile.
-        /// 
-        private List PettisHansenSort(List methodsToPlace)
-        {
-            var graphNodes = new List(methodsToPlace.Count);
-            var mdToIndex = new Dictionary();
-            int index = 0;
-            foreach (MethodWithGCInfo method in methodsToPlace)
-            {
-                mdToIndex.Add(method.Method, index);
-                graphNodes.Add(new CallGraphNode(index));
-                index++;
-            }
-
-            bool any = false;
-            foreach (MethodWithGCInfo method in methodsToPlace)
-            {
-                MethodProfileData data = _profileData[method.Method];
-                if (data == null || data.CallWeights == null)
-                    continue;
-
-                foreach ((MethodDesc other, int count) in data.CallWeights)
-                {
-                    if (!mdToIndex.TryGetValue(other, out int otherIndex))
-                        continue;
-
-                    graphNodes[mdToIndex[method.Method]].IncreaseEdge(graphNodes[otherIndex], count);
-                    any = true;
-                }
-            }
-
-            if (!any)
-            {
-                _logger.Writer.WriteLine("Warning: no call graph data was found or a .mibc file was not specified. Skipping Pettis Hansen method ordering.");
-                return methodsToPlace;
-            }
-
-            List> components = PettisHansen.Sort(graphNodes);
-            // We expect to see a permutation.
-            Debug.Assert(components.SelectMany(l => l).OrderBy(i => i).SequenceEqual(Enumerable.Range(0, methodsToPlace.Count)));
-
-            List result = new List(methodsToPlace.Count);
-            foreach (List component in components)
-            {
-                foreach (int node in component)
-                    result.Add(methodsToPlace[node]);
-            }
-
-            return result;
-        }
-    }
-}
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunHashCode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunHashCode.cs
deleted file mode 100644
index 94c42257c6c0..000000000000
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunHashCode.cs
+++ /dev/null
@@ -1,259 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Diagnostics;
-using System.Numerics;
-using System.Text;
-
-using Internal.TypeSystem;
-
-namespace ILCompiler
-{
-    /// 
-    /// Helper class used to calculate hash codes compatible with the CoreCLR
-    /// GetVersionResilientMethod/TypeHashCode.
-    /// 
-    public static class ReadyToRunHashCode
-    {
-        /// 
-        /// CoreCLR ComputeNameHashCode
-        /// 
-        /// Name string to hash
-        public static int NameHashCode(string name)
-        {
-            if (string.IsNullOrEmpty(name))
-            {
-                return 0;
-            }
-
-            int hash1 = 0x6DA3B944;
-            int hash2 = 0;
-
-            // DIFFERENT FROM NATIVEAOT: We hash UTF-8 bytes here, while NativeAOT hashes UTF-16 characters.
-            byte[] src = Encoding.UTF8.GetBytes(name);
-            for (int i = 0; i < src.Length; i += 2)
-            {
-                hash1 = unchecked(hash1 + RotateLeft(hash1, 5)) ^ (int)unchecked((sbyte)src[i]);
-                if (i + 1 < src.Length)
-                {
-                    hash2 = unchecked(hash2 + RotateLeft(hash2, 5)) ^ (int)unchecked((sbyte)src[i + 1]);
-                }
-                else
-                {
-                    break;
-                }
-            }
-
-            hash1 = unchecked(hash1 + RotateLeft(hash1, 8));
-            hash2 = unchecked(hash2 + RotateLeft(hash2, 8));
-
-            return unchecked((int)(hash1 ^ hash2));
-        }
-
-        /// 
-        /// Calculate hash code for a namespace - name combination.
-        /// CoreCLR 2-parameter ComputeNameHashCode
-        /// DIFFERENT FROM NATIVEAOT: NativeAOT hashes the full name as one string ("namespace.name"),
-        /// as the full name is already available. In CoreCLR we normally only have separate
-        /// strings for namespace and name, thus we hash them separately.
-        /// 
-        /// Namespace name
-        /// Type name within the namespace
-        public static int NameHashCode(string namespacePart, string namePart)
-        {
-            return NameHashCode(namespacePart) ^ NameHashCode(namePart);
-        }
-
-        /// 
-        /// CoreCLR 3-parameter GetVersionResilientTypeHashCode
-        /// 
-        /// Type to hash
-        public static int TypeTableHashCode(DefType type)
-        {
-            int hashcode = 0;
-            do
-            {
-                hashcode ^= NameHashCode(type.Namespace, type.Name);
-                type = type.ContainingType;
-            }
-            while (type != null);
-            return hashcode;
-        }
-
-        /// 
-        /// CoreCLR 1-parameter GetVersionResilientTypeHashCode
-        /// 
-        /// Type to hash
-        public static int TypeHashCode(TypeDesc type)
-        {
-            if (type.GetTypeDefinition() is DefType defType)
-            {
-                int hashcode = NameHashCode(defType.Namespace, defType.Name);
-                DefType containingType = defType.ContainingType;
-                if (containingType != null)
-                {
-                    hashcode = NestedTypeHashCode(TypeHashCode(containingType), hashcode);
-                }
-                if (type.HasInstantiation && !type.IsGenericDefinition)
-                {
-                    return GenericInstanceHashCode(hashcode, type.Instantiation);
-                }
-                else
-                {
-                    return hashcode;
-                }
-            }
-
-            if (type is ArrayType arrayType)
-            {
-                return ArrayTypeHashCode(TypeHashCode(arrayType.ElementType), arrayType.Rank);
-            }
-
-            if (type is PointerType pointerType)
-            {
-                return PointerTypeHashCode(TypeHashCode(pointerType.ParameterType));
-            }
-
-            if (type is ByRefType byRefType)
-            {
-                return ByrefTypeHashCode(TypeHashCode(byRefType.ParameterType));
-            }
-
-            throw new NotImplementedException();
-        }
-
-        /// 
-        /// CoreCLR ComputeNestedTypeHashCode
-        /// 
-        /// Hash code of the enclosing type
-        /// Hash code of the nested type name
-        private static int NestedTypeHashCode(int enclosingTypeHashcode, int nestedTypeNameHash)
-        {
-            return unchecked(enclosingTypeHashcode + RotateLeft(enclosingTypeHashcode, 11)) ^ nestedTypeNameHash;
-        }
-
-        /// 
-        /// CoreCLR ComputeArrayTypeHashCode
-        /// 
-        /// Hash code representing the array element type
-        /// Array rank
-        private static int ArrayTypeHashCode(int elementTypeHashcode, int rank)
-        {
-            // DIFFERENT FROM NATIVEAOT: This is much simplified compared to NativeAOT, to avoid converting rank to string.
-            // For single-dimensinal array, the result is identical to NativeAOT.
-            int hashCode = unchecked((int)0xd5313556 + rank);
-            if (rank == 1)
-            {
-                Debug.Assert(hashCode == NameHashCode("System.Array`1"));
-            }
-            hashCode = unchecked(hashCode + RotateLeft(hashCode, 13)) ^ elementTypeHashcode;
-            return unchecked(hashCode + RotateLeft(hashCode, 15));
-        }
-
-        /// 
-        /// CoreCLR ComputePointerTypeHashCode
-        /// 
-        /// Hash code of the pointee type
-        private static int PointerTypeHashCode(int pointeeTypeHashcode)
-        {
-            return unchecked(pointeeTypeHashcode + RotateLeft(pointeeTypeHashcode, 5)) ^ 0x12D0;
-        }
-
-        /// 
-        /// CoreCLR ComputeByrefTypeHashCode
-        /// 
-        /// Hash code representing the parameter type
-        private static int ByrefTypeHashCode(int parameterTypeHashcode)
-        {
-            return unchecked(parameterTypeHashcode + RotateLeft(parameterTypeHashcode, 7)) ^ 0x4C85;
-        }
-
-        /// 
-        /// CoreCLR ComputeGenericInstanceHashCode
-        /// 
-        /// Base hash code
-        /// Instantiation to include in the hash
-        private static int GenericInstanceHashCode(int hashcode, Instantiation instantiation)
-        {
-            for (int i = 0; i < instantiation.Length; i++)
-            {
-                int argumentHashCode = TypeHashCode(instantiation[i]);
-                hashcode = unchecked(hashcode + RotateLeft(hashcode, 13)) ^ argumentHashCode;
-            }
-            return unchecked(hashcode + RotateLeft(hashcode, 15));
-        }
-
-        /// 
-        /// CoreCLR GetVersionResilientMethodHashCode
-        /// 
-        /// Method to hash
-        public static int MethodHashCode(MethodDesc method)
-        {
-            int hashCode = TypeHashCode(method.OwningType);
-            int methodNameHashCode = NameHashCode(method.Name);
-
-            // Todo: Add signature to hash.
-            if (method.HasInstantiation && !method.IsGenericMethodDefinition)
-            {
-                hashCode ^= GenericInstanceHashCode(methodNameHashCode, method.Instantiation);
-            }
-            else
-            {
-                hashCode ^= methodNameHashCode;
-            }
-
-            return hashCode;
-        }
-
-        public static int ModuleNameHashCode(ModuleDesc module)
-        {
-            IAssemblyDesc assembly = module.Assembly;
-            Debug.Assert(assembly == module);
-            return NameHashCode(assembly.GetName().Name);
-        }
-
-        /// 
-        /// Bitwise left 32-bit rotation with wraparound.
-        /// 
-        /// Value to rotate
-        /// Number of bits
-        private static int RotateLeft(int value, int bitCount)
-        {
-            return (int)BitOperations.RotateLeft((uint)value, bitCount);
-        }
-
-        private static uint XXHash32_MixEmptyState()
-        {
-            // Unlike System.HashCode, these hash values are required to be stable, so don't
-            // mixin a random process specific value
-            return 374761393U; // Prime5
-        }
-
-        private static uint XXHash32_QueueRound(uint hash, uint queuedValue)
-        {
-            return (BitOperations.RotateLeft((hash + queuedValue * 3266489917U/*Prime3*/), 17)) * 668265263U/*Prime4*/;
-        }
-
-        private static uint XXHash32_MixFinal(uint hash)
-        {
-            hash ^= hash >> 15;
-            hash *= 2246822519U/*Prime2*/;
-            hash ^= hash >> 13;
-            hash *= 3266489917U/*Prime3*/;
-            hash ^= hash >> 16;
-            return hash;
-        }
-
-        public static uint CombineTwoValuesIntoHash(uint value1, uint value2)
-        {
-            // This matches the behavior of System.HashCode.Combine(value1, value2) as of the time of authoring
-            uint hash = XXHash32_MixEmptyState();
-            hash += 8;
-            hash = XXHash32_QueueRound(hash, value1);
-            hash = XXHash32_QueueRound(hash, value2);
-            hash = XXHash32_MixFinal(hash);
-            return hash;
-        }
-    }
-}
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/RuntimeDeterminedTypeHelper.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/RuntimeDeterminedTypeHelper.cs
index f2f56713670d..2f298d26f348 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/RuntimeDeterminedTypeHelper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/RuntimeDeterminedTypeHelper.cs
@@ -132,6 +132,7 @@ public static bool Equals(MethodWithToken methodWithToken1, MethodWithToken meth
             return Equals(methodWithToken1.Method, methodWithToken2.Method)
                 && Equals(methodWithToken1.OwningType, methodWithToken2.OwningType)
                 && Equals(methodWithToken1.ConstrainedType, methodWithToken2.ConstrainedType)
+                && Equals(methodWithToken1.Token.Module, methodWithToken2.Token.Module)
                 && methodWithToken1.Unboxing == methodWithToken2.Unboxing;
         }
 
@@ -152,7 +153,8 @@ public static bool Equals(FieldWithToken field1, FieldWithToken field2)
             {
                 return field1 == null && field2 == null;
             }
-            return RuntimeDeterminedTypeHelper.Equals(field1.Field, field2.Field);
+            return RuntimeDeterminedTypeHelper.Equals(field1.Field, field2.Field) &&
+                Equals(field1.Token.Module, field2.Token.Module);
         }
 
         public static int GetHashCode(Instantiation instantiation)
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/VersionResilientHashCode.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/VersionResilientHashCode.ReadyToRun.cs
new file mode 100644
index 000000000000..154f369dbfea
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/VersionResilientHashCode.ReadyToRun.cs
@@ -0,0 +1,120 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics;
+using System.Numerics;
+using System.Text;
+
+using Internal.TypeSystem;
+
+namespace Internal
+{
+    /// 
+    /// Managed implementation of the version-resilient hash code algorithm.
+    /// 
+    internal static partial class VersionResilientHashCode
+    {
+        /// 
+        /// CoreCLR 3-parameter GetVersionResilientTypeHashCode
+        /// 
+        /// Type to hash
+        public static int TypeTableHashCode(DefType type)
+        {
+            int hashcode = 0;
+            do
+            {
+                hashcode ^= NameHashCode(type.Namespace, type.Name);
+                type = type.ContainingType;
+            }
+            while (type != null);
+            return hashcode;
+        }
+
+        /// 
+        /// CoreCLR 1-parameter GetVersionResilientTypeHashCode
+        /// 
+        /// Type to hash
+        public static int TypeHashCode(TypeDesc type)
+        {
+            if (type.GetTypeDefinition() is DefType defType)
+            {
+                int hashcode = NameHashCode(defType.Namespace, defType.Name);
+                DefType containingType = defType.ContainingType;
+                if (containingType != null)
+                {
+                    hashcode = NestedTypeHashCode(TypeHashCode(containingType), hashcode);
+                }
+                if (type.HasInstantiation && !type.IsGenericDefinition)
+                {
+                    return GenericInstanceHashCode(hashcode, type.Instantiation);
+                }
+                else
+                {
+                    return hashcode;
+                }
+            }
+
+            if (type is ArrayType arrayType)
+            {
+                return ArrayTypeHashCode(TypeHashCode(arrayType.ElementType), arrayType.Rank);
+            }
+
+            if (type is PointerType pointerType)
+            {
+                return PointerTypeHashCode(TypeHashCode(pointerType.ParameterType));
+            }
+
+            if (type is ByRefType byRefType)
+            {
+                return ByrefTypeHashCode(TypeHashCode(byRefType.ParameterType));
+            }
+
+            throw new NotImplementedException();
+        }
+
+        /// 
+        /// CoreCLR ComputeGenericInstanceHashCode
+        /// 
+        /// Base hash code
+        /// Instantiation to include in the hash
+        private static int GenericInstanceHashCode(int hashcode, Instantiation instantiation)
+        {
+            for (int i = 0; i < instantiation.Length; i++)
+            {
+                int argumentHashCode = TypeHashCode(instantiation[i]);
+                hashcode = unchecked(hashcode + RotateLeft(hashcode, 13)) ^ argumentHashCode;
+            }
+            return unchecked(hashcode + RotateLeft(hashcode, 15));
+        }
+
+        /// 
+        /// CoreCLR GetVersionResilientMethodHashCode
+        /// 
+        /// Method to hash
+        public static int MethodHashCode(MethodDesc method)
+        {
+            int hashCode = TypeHashCode(method.OwningType);
+            int methodNameHashCode = NameHashCode(method.Name);
+
+            // Todo: Add signature to hash.
+            if (method.HasInstantiation && !method.IsGenericMethodDefinition)
+            {
+                hashCode ^= GenericInstanceHashCode(methodNameHashCode, method.Instantiation);
+            }
+            else
+            {
+                hashCode ^= methodNameHashCode;
+            }
+
+            return hashCode;
+        }
+
+        public static int ModuleNameHashCode(ModuleDesc module)
+        {
+            IAssemblyDesc assembly = module.Assembly;
+            Debug.Assert(assembly == module);
+            return NameHashCode(assembly.GetName().Name);
+        }
+    }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IL/ReadyToRunILProvider.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IL/ReadyToRunILProvider.cs
index 7fed0872c7dc..425ba366f1ef 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IL/ReadyToRunILProvider.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IL/ReadyToRunILProvider.cs
@@ -85,6 +85,11 @@ private MethodIL TryGetIntrinsicMethodIL(MethodDesc method)
                 return UnsafeIntrinsics.EmitIL(method);
             }
 
+            if (mdType.Name == "InstanceCalliHelper" && mdType.Namespace == "System.Reflection")
+            {
+                return InstanceCalliHelperIntrinsics.EmitIL(method);
+            }
+
             return null;
         }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IL/Stubs/InstanceCalliHelperIntrinsics.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IL/Stubs/InstanceCalliHelperIntrinsics.cs
new file mode 100644
index 000000000000..7f188461de73
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IL/Stubs/InstanceCalliHelperIntrinsics.cs
@@ -0,0 +1,60 @@
+// Licensed to the .NET Foundation under one or more agreements.
+ // The .NET Foundation licenses this file to you under the MIT license.
+ 
+ using System;
+ using System.Reflection.Metadata.Ecma335;
+ using Internal.TypeSystem;
+ using Internal.TypeSystem.Ecma;
+
+namespace Internal.IL
+{
+    public class InstanceCalliHelperIntrinsics
+    {
+        public static MethodIL EmitIL(MethodDesc method)
+        {
+            MethodIL methodIL = EcmaMethodIL.Create((EcmaMethod)method);
+
+            if (method.Name.StartsWith("Invoke", StringComparison.Ordinal))
+            {
+                methodIL = new ExplicitThisCall(methodIL);
+            }
+
+            return methodIL;
+        }
+
+        private class ExplicitThisCall : MethodIL
+        {
+            private readonly MethodIL _wrappedMethodIL;
+
+            public ExplicitThisCall(MethodIL wrapped)
+            {
+                _wrappedMethodIL = wrapped;
+            }
+
+            // MethodIL overrides:
+            public override int MaxStack => _wrappedMethodIL.MaxStack;
+            public override bool IsInitLocals => _wrappedMethodIL.IsInitLocals;
+            public override byte[] GetILBytes() => _wrappedMethodIL.GetILBytes();
+            public override LocalVariableDefinition[] GetLocals() => _wrappedMethodIL.GetLocals();
+            public override ILExceptionRegion[] GetExceptionRegions() => _wrappedMethodIL.GetExceptionRegions();
+            public override MethodDebugInformation GetDebugInfo() => _wrappedMethodIL.GetDebugInfo();
+
+            // MethodILScope overrides:
+            public override MethodIL GetMethodILDefinition() => _wrappedMethodIL.GetMethodILDefinition();
+            public override MethodDesc OwningMethod => _wrappedMethodIL.OwningMethod;
+            public override string ToString() => _wrappedMethodIL.ToString();
+            public override object GetObject(int token, NotFoundBehavior notFoundBehavior)
+            {
+                object item = _wrappedMethodIL.GetObject(token, notFoundBehavior);
+                if (item is MethodSignature sig)
+                {
+                    var builder = new MethodSignatureBuilder(sig);
+                    builder.Flags = (sig.Flags | MethodSignatureFlags.ExplicitThis) & ~MethodSignatureFlags.Static;
+                    item = builder.ToSignature();
+                }
+
+                return item;
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj
index 112af3a715db..d72562ba215f 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj
@@ -242,7 +242,6 @@
     
     
     
-    
     
     
     
@@ -256,12 +255,13 @@
     
     
     
-    
+    
+    
     
     
     
     
-    
+    
     
     
     
@@ -269,6 +269,7 @@
     
     
     
+    
     
     
     
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
index f58775b55919..c39971017ce7 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
@@ -1128,6 +1128,12 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_ULNG2DBL:
                     id = ReadyToRunHelper.ULng2Dbl;
                     break;
+                case CorInfoHelpFunc.CORINFO_HELP_LNG2FLT:
+                    id = ReadyToRunHelper.Lng2Flt;
+                    break;
+                case CorInfoHelpFunc.CORINFO_HELP_ULNG2FLT:
+                    id = ReadyToRunHelper.ULng2Flt;
+                    break;
 
                 case CorInfoHelpFunc.CORINFO_HELP_DIV:
                     id = ReadyToRunHelper.Div;
@@ -3079,11 +3085,6 @@ private bool pInvokeMarshalingRequired(CORINFO_METHOD_STRUCT_* handle, CORINFO_S
                         Debug.Assert(!_compilation.NodeFactory.CompilationModuleGroup.GeneratesPInvoke(method));
                         return true;
                     }
-
-                    // Marshalling behavior isn't modeled as protected by R2R rules, so disable pinvoke inlining for code outside
-                    // of the version bubble
-                    if (!_compilation.CompilationModuleGroup.VersionsWithMethodBody(method))
-                        return true;
                 }
                 catch (RequiresRuntimeJitException)
                 {
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs
index 46d7bba69b43..738910d98e92 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs
@@ -21,59 +21,18 @@ namespace ILCompiler.PEWriter
     /// metadata and IL and adding new code and data representing the R2R JITted code and
     /// additional runtime structures (R2R header and tables).
     /// 
-    public class R2RPEBuilder : PEBuilder
+    public sealed class R2RPEBuilder : PEBuilder
     {
         /// 
         /// Number of low-order RVA bits that must match file position on Linux.
         /// 
         const int RVABitsToMatchFilePos = 16;
 
-        /// 
-        /// This structure describes how a particular section moved between the original MSIL
-        /// and the output PE file. It holds beginning and end RVA of the input (MSIL) section
-        /// and a delta between the input and output starting RVA of the section.
-        /// 
-        struct SectionRVADelta
-        {
-            /// 
-            /// Starting RVA of the section in the input MSIL PE.
-            /// 
-            public readonly int StartRVA;
-
-            /// 
-            /// End RVA (one plus the last RVA in the section) of the section in the input MSIL PE.
-            /// 
-            public readonly int EndRVA;
-
-            /// 
-            /// Starting RVA of the section in the output PE minus its starting RVA in the input MSIL.
-            /// 
-            public readonly int DeltaRVA;
-
-            /// 
-            /// Initialize the section RVA delta information.
-            /// 
-            /// Starting RVA of the section in the input MSIL
-            /// End RVA of the section in the input MSIL
-            /// Output RVA of the section minus input RVA of the section
-            public SectionRVADelta(int startRVA, int endRVA, int deltaRVA)
-            {
-                StartRVA = startRVA;
-                EndRVA = endRVA;
-                DeltaRVA = deltaRVA;
-            }
-        }
-
         /// 
         /// Name of the text section.
         /// 
         public const string TextSectionName = ".text";
 
-        /// 
-        /// Name of the initialized data section.
-        /// 
-        public const string SDataSectionName = ".sdata";
-        
         /// 
         /// Name of the relocation section.
         /// 
@@ -94,46 +53,54 @@ public SectionRVADelta(int startRVA, int endRVA, int deltaRVA)
         /// 
         private TargetDetails _target;
 
-        /// 
-        /// Complete list of sections to emit into the output R2R executable.
-        /// 
-        private ImmutableArray
_sections; - /// /// Callback to retrieve the runtime function table which needs setting to the /// ExceptionTable PE directory entry. /// private Func _getRuntimeFunctionsTable; - /// - /// For each copied section, we store its initial and end RVA in the source PE file - /// and the RVA difference between the old and new file. We use this table to relocate - /// directory entries in the PE file header. - /// - private List _sectionRvaDeltas; + private class SerializedSectionData + { + /// + /// Name of the section + /// + public string Name; - /// - /// Logical section start RVAs. When emitting R2R PE executables for Linux, we must - /// align RVA's so that their 'RVABitsToMatchFilePos' lowest-order bits match the - /// file position (otherwise memory mapping of the file fails and CoreCLR silently - /// switches over to runtime JIT). PEBuilder doesn't support this today so that we - /// must store the RVA's and post-process the produced PE by patching the section - /// headers in the PE header. - /// - private int[] _sectionRVAs; + /// + /// Logical section start RVAs. When emitting R2R PE executables for Linux, we must + /// align RVA's so that their 'RVABitsToMatchFilePos' lowest-order bits match the + /// file position (otherwise memory mapping of the file fails and CoreCLR silently + /// switches over to runtime JIT). PEBuilder doesn't support this today so that we + /// must store the RVA's and post-process the produced PE by patching the section + /// headers in the PE header. + /// + public int RVA; - /// - /// Pointers to the location of the raw data. Needed to allow phyical file alignment - /// beyond 4KB. PEBuilder doesn't support this today so that we - /// must store the RVA's and post-process the produced PE by patching the section - /// headers in the PE header. - /// - private int[] _sectionPointerToRawData; + /// + /// Pointers to the location of the raw data. Needed to allow phyical file alignment + /// beyond 4KB. PEBuilder doesn't support this today so that we + /// must store the RVA's and post-process the produced PE by patching the section + /// headers in the PE header. + /// + public int PointerToRawData; + + /// + /// Maximum of virtual and physical size for each section. + /// + public int RawSize; + + /// + /// Whether or not the section has been serialized - if the RVA, pointer to raw data, + /// and size have been set. + /// + public bool IsSerialized; + } /// - /// Maximum of virtual and physical size for each section. + /// List of possible sections to emit into the output R2R executable in the order in which + /// they are expected to be serialized. Data (aside from name) is set during serialization. /// - private int[] _sectionRawSizes; + private readonly SerializedSectionData[] _sectionData; /// /// R2R PE section builder & relocator. @@ -179,7 +146,6 @@ public R2RPEBuilder( { _target = target; _getRuntimeFunctionsTable = getRuntimeFunctionsTable; - _sectionRvaDeltas = new List(); _sectionBuilder = new SectionBuilder(target); @@ -195,29 +161,21 @@ public R2RPEBuilder( _sectionBuilder.SetDllNameForExportDirectoryTable(outputFileSimpleName); } - if (_sectionBuilder.FindSection(R2RPEBuilder.RelocSectionName) == null) - { - // Always inject the relocation section to the end of section list - _sectionBuilder.AddSection( - R2RPEBuilder.RelocSectionName, - SectionCharacteristics.ContainsInitializedData | - SectionCharacteristics.MemRead | - SectionCharacteristics.MemDiscardable, - PEHeaderConstants.SectionAlignment); - } + // Always inject the relocation section to the end of section list + _sectionBuilder.AddSection( + R2RPEBuilder.RelocSectionName, + SectionCharacteristics.ContainsInitializedData | + SectionCharacteristics.MemRead | + SectionCharacteristics.MemDiscardable, + PEHeaderConstants.SectionAlignment); - ImmutableArray
.Builder sectionListBuilder = ImmutableArray.CreateBuilder
(); + List sectionData = new List(); foreach (SectionInfo sectionInfo in _sectionBuilder.GetSections()) { - ILCompiler.PEWriter.Section builderSection = _sectionBuilder.FindSection(sectionInfo.SectionName); - Debug.Assert(builderSection != null); - sectionListBuilder.Add(new Section(builderSection.Name, builderSection.Characteristics)); + sectionData.Add(new SerializedSectionData() { Name = sectionInfo.SectionName }); } - _sections = sectionListBuilder.ToImmutableArray(); - _sectionRVAs = new int[_sections.Length]; - _sectionPointerToRawData = new int[_sections.Length]; - _sectionRawSizes = new int[_sections.Length]; + _sectionData = sectionData.ToArray(); } public void SetCorHeader(ISymbolNode symbol, int headerSize) @@ -343,7 +301,7 @@ public void AddSections(OutputInfoBuilder outputInfoBuilder) sizeof(int) + // SizeOfUninitializedData sizeof(int) + // AddressOfEntryPoint sizeof(int) + // BaseOfCode - sizeof(long); // PE32: BaseOfData (int), ImageBase (int) + sizeof(long); // PE32: BaseOfData (int), ImageBase (int) // PE32+: ImageBase (long) const int OffsetOfChecksum = OffsetOfSectionAlign + sizeof(int) + // SectionAlignment @@ -361,7 +319,7 @@ public void AddSections(OutputInfoBuilder outputInfoBuilder) const int OffsetOfSizeOfImage = OffsetOfChecksum - 2 * sizeof(int); // SizeOfHeaders, SizeOfImage const int SectionHeaderNameSize = 8; - const int SectionHeaderVirtualSize = SectionHeaderNameSize; // VirtualSize follows + const int SectionHeaderVirtualSize = SectionHeaderNameSize; // VirtualSize follows const int SectionHeaderRVAOffset = SectionHeaderVirtualSize + sizeof(int); // RVA Offset follows VirtualSize + 4 bytes VirtualSize const int SectionHeaderSizeOfRawData = SectionHeaderRVAOffset + sizeof(int); // SizeOfRawData follows RVA const int SectionHeaderPointerToRawDataOffset = SectionHeaderSizeOfRawData + sizeof(int); // PointerToRawData immediately follows the SizeOfRawData @@ -375,7 +333,7 @@ public void AddSections(OutputInfoBuilder outputInfoBuilder) sizeof(int) + // PointerToRelocations sizeof(int) + // PointerToLineNumbers sizeof(short) + // NumberOfRelocations - sizeof(short) + // NumberOfLineNumbers + sizeof(short) + // NumberOfLineNumbers sizeof(int); // SectionCharacteristics /// @@ -400,13 +358,17 @@ private void UpdateSectionRVAs(Stream outputStream) 16 * sizeof(long); // directory entries int sectionHeaderOffset = DosHeaderSize + PESignatureSize + COFFHeaderSize + peHeaderSize; - int sectionCount = _sectionRVAs.Length; + int sectionCount = _sectionData.Length; for (int sectionIndex = 0; sectionIndex < sectionCount; sectionIndex++) { + SerializedSectionData section = _sectionData[sectionIndex]; + if (!section.IsSerialized) + continue; + if (_customPESectionAlignment != 0) { // When _customPESectionAlignment is set, the physical and virtual sizes are the same - byte[] sizeBytes = BitConverter.GetBytes(_sectionRawSizes[sectionIndex]); + byte[] sizeBytes = BitConverter.GetBytes(section.RawSize); Debug.Assert(sizeBytes.Length == sizeof(int)); // Update VirtualSize @@ -424,7 +386,7 @@ private void UpdateSectionRVAs(Stream outputStream) // Update RVAs { outputStream.Seek(sectionHeaderOffset + SectionHeaderSize * sectionIndex + SectionHeaderRVAOffset, SeekOrigin.Begin); - byte[] rvaBytes = BitConverter.GetBytes(_sectionRVAs[sectionIndex]); + byte[] rvaBytes = BitConverter.GetBytes(section.RVA); Debug.Assert(rvaBytes.Length == sizeof(int)); outputStream.Write(rvaBytes, 0, rvaBytes.Length); } @@ -432,15 +394,25 @@ private void UpdateSectionRVAs(Stream outputStream) // Update pointer to raw data { outputStream.Seek(sectionHeaderOffset + SectionHeaderSize * sectionIndex + SectionHeaderPointerToRawDataOffset, SeekOrigin.Begin); - byte[] rawDataBytesBytes = BitConverter.GetBytes(_sectionPointerToRawData[sectionIndex]); + byte[] rawDataBytesBytes = BitConverter.GetBytes(section.PointerToRawData); Debug.Assert(rawDataBytesBytes.Length == sizeof(int)); outputStream.Write(rawDataBytesBytes, 0, rawDataBytesBytes.Length); } } // Patch SizeOfImage to point past the end of the last section + SerializedSectionData lastSection = null; + for (int i = sectionCount - 1; i >= 0; i--) + { + if (_sectionData[i].IsSerialized) + { + lastSection = _sectionData[i]; + break; + } + } + Debug.Assert(lastSection != null); outputStream.Seek(DosHeaderSize + PESignatureSize + COFFHeaderSize + OffsetOfSizeOfImage, SeekOrigin.Begin); - int sizeOfImage = AlignmentHelper.AlignUp(_sectionRVAs[sectionCount - 1] + _sectionRawSizes[sectionCount - 1], Header.SectionAlignment); + int sizeOfImage = AlignmentHelper.AlignUp(lastSection.RVA + lastSection.RawSize, Header.SectionAlignment); byte[] sizeOfImageBytes = BitConverter.GetBytes(sizeOfImage); Debug.Assert(sizeOfImageBytes.Length == sizeof(int)); outputStream.Write(sizeOfImageBytes, 0, sizeOfImageBytes.Length); @@ -507,46 +479,15 @@ protected override PEDirectoriesBuilder GetDirectories() if (_getRuntimeFunctionsTable != null) { RuntimeFunctionsTableNode runtimeFunctionsTable = _getRuntimeFunctionsTable(); - builder.ExceptionTable = new DirectoryEntry( - relativeVirtualAddress: _sectionBuilder.GetSymbolRVA(runtimeFunctionsTable), - size: runtimeFunctionsTable.TableSizeExcludingSentinel); - } - - return builder; - } - - /// - /// Relocate a single directory entry. - /// - /// Directory entry to allocate - /// Relocated directory entry - public DirectoryEntry RelocateDirectoryEntry(DirectoryEntry entry) - { - return new DirectoryEntry(RelocateRVA(entry.RelativeVirtualAddress), entry.Size); - } - - /// - /// Relocate a given RVA using the section offset table produced during section serialization. - /// - /// RVA to relocate - /// Relocated RVA - private int RelocateRVA(int rva) - { - if (rva == 0) - { - // Zero RVA is normally used as NULL - return rva; - } - foreach (SectionRVADelta sectionRvaDelta in _sectionRvaDeltas) - { - if (rva >= sectionRvaDelta.StartRVA && rva < sectionRvaDelta.EndRVA) + if (runtimeFunctionsTable.TableSizeExcludingSentinel != 0) { - // We found the input section holding the RVA, apply its specific delt (output RVA - input RVA). - return rva + sectionRvaDelta.DeltaRVA; + builder.ExceptionTable = new DirectoryEntry( + relativeVirtualAddress: _sectionBuilder.GetSymbolRVA(runtimeFunctionsTable), + size: runtimeFunctionsTable.TableSizeExcludingSentinel); } } - Debug.Fail("RVA is not within any of the input sections - output PE may be inconsistent"); - return rva; + + return builder; } /// @@ -554,14 +495,21 @@ private int RelocateRVA(int rva) /// protected override ImmutableArray
CreateSections() { - return _sections; + ImmutableArray
.Builder sectionListBuilder = ImmutableArray.CreateBuilder
(); + foreach (SectionInfo sectionInfo in _sectionBuilder.GetSections()) + { + // Only include sections that have content. + if (!_sectionBuilder.HasContent(sectionInfo.SectionName)) + continue; + + sectionListBuilder.Add(new Section(sectionInfo.SectionName, sectionInfo.Characteristics)); + } + + return sectionListBuilder.ToImmutable(); } /// - /// Output the section with a given name. For sections existent in the source MSIL PE file - /// (.text, optionally .rsrc and .reloc), we first copy the content of the input MSIL PE file - /// and then call the section serialization callback to emit the extra content after the input - /// section content. + /// Output the section with a given name. /// /// Section name /// RVA and file location where the section will be put @@ -571,18 +519,33 @@ protected override BlobBuilder SerializeSection(string name, SectionLocation loc BlobBuilder sectionDataBuilder = null; int sectionStartRva = location.RelativeVirtualAddress; - int outputSectionIndex = _sections.Length - 1; - while (outputSectionIndex >= 0 && _sections[outputSectionIndex].Name != name) + int outputSectionIndex = _sectionData.Length - 1; + while (outputSectionIndex >= 0 && _sectionData[outputSectionIndex].Name != name) { outputSectionIndex--; } + if (outputSectionIndex < 0) + throw new ArgumentException($"Unknown section name: '{name}'", nameof(name)); + + Debug.Assert(_sectionBuilder.HasContent(name)); + SerializedSectionData outputSection = _sectionData[outputSectionIndex]; + SerializedSectionData previousSection = null; + for (int i = outputSectionIndex - 1; i >= 0; i--) + { + if (_sectionData[i].IsSerialized) + { + previousSection = _sectionData[i]; + break; + } + } + int injectedPadding = 0; if (_customPESectionAlignment != 0) { - if (outputSectionIndex > 0) + if (previousSection is not null) { - sectionStartRva = Math.Max(sectionStartRva, _sectionRVAs[outputSectionIndex - 1] + _sectionRawSizes[outputSectionIndex - 1]); + sectionStartRva = Math.Max(sectionStartRva, previousSection.RVA + previousSection.RawSize); } int newSectionStartRva = AlignmentHelper.AlignUp(sectionStartRva, _customPESectionAlignment); @@ -600,13 +563,13 @@ protected override BlobBuilder SerializeSection(string name, SectionLocation loc if (!_target.IsWindows) { const int RVAAlign = 1 << RVABitsToMatchFilePos; - if (outputSectionIndex > 0) + if (previousSection is not null) { - sectionStartRva = Math.Max(sectionStartRva, _sectionRVAs[outputSectionIndex - 1] + _sectionRawSizes[outputSectionIndex - 1]); + sectionStartRva = Math.Max(sectionStartRva, previousSection.RVA + previousSection.RawSize); // when assembly is stored in a singlefile bundle, an additional skew is introduced - // as the streams inside the bundle are not necessarily page aligned as we do not - // know the actual page size on the target system. + // as the streams inside the bundle are not necessarily page aligned as we do not + // know the actual page size on the target system. // We may need one page gap of unused VA space before the next section starts. // We will assume the page size is <= RVAAlign sectionStartRva += RVAAlign; @@ -619,36 +582,19 @@ protected override BlobBuilder SerializeSection(string name, SectionLocation loc location = new SectionLocation(sectionStartRva, location.PointerToRawData); } - if (outputSectionIndex >= 0) - { - _sectionRVAs[outputSectionIndex] = sectionStartRva; - _sectionPointerToRawData[outputSectionIndex] = location.PointerToRawData; - } + outputSection.RVA = sectionStartRva; + outputSection.PointerToRawData = location.PointerToRawData; BlobBuilder extraData = _sectionBuilder.SerializeSection(name, location); - if (extraData != null) - { - if (sectionDataBuilder == null) - { - // See above - there's a bug due to which LinkSuffix to an empty BlobBuilder screws up the blob content. - sectionDataBuilder = extraData; - } - else - { - sectionDataBuilder.LinkSuffix(extraData); - } - } - - // Make sure the section has at least 1 byte, otherwise the PE emitter goes mad, - // messes up the section map and corrups the output executable. + Debug.Assert(extraData != null); if (sectionDataBuilder == null) { - sectionDataBuilder = new BlobBuilder(); + // See above - there's a bug due to which LinkSuffix to an empty BlobBuilder screws up the blob content. + sectionDataBuilder = extraData; } - - if (sectionDataBuilder.Count == 0) + else { - sectionDataBuilder.WriteByte(0); + sectionDataBuilder.LinkSuffix(extraData); } int sectionRawSize = sectionDataBuilder.Count - injectedPadding; @@ -661,15 +607,13 @@ protected override BlobBuilder SerializeSection(string name, SectionLocation loc sectionRawSize = count; } - if (outputSectionIndex >= 0) - { - _sectionRawSizes[outputSectionIndex] = sectionRawSize; - } + outputSection.RawSize = sectionRawSize; + outputSection.IsSerialized = true; return sectionDataBuilder; } } - + /// /// Simple helper for filling in PE header information. /// diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/SectionBuilder.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/SectionBuilder.cs index e10348e562e1..f3ebd11aae86 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/SectionBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/SectionBuilder.cs @@ -944,5 +944,22 @@ public void RelocateOutputFile( // Flush remaining PE file blocks after the last relocation relocationHelper.CopyRestOfFile(); } + + internal bool HasContent(string sectionName) + { + if (sectionName == R2RPEBuilder.ExportDataSectionName) + return _exportSymbols.Count > 0 && _dllNameForExportDirectoryTable != null; + + if (sectionName == R2RPEBuilder.RelocSectionName) + { + return _sections.Any( + s => s.PlacedObjectDataToRelocate.Any( + d => d.Relocs.Any( + r => Relocation.GetFileRelocationType(r.RelocType) != RelocType.IMAGE_REL_BASED_ABSOLUTE))); + } + + Section section = FindSection(sectionName); + return section != null && section.Content.Count > 0; + } } } diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcInfo.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcInfo.cs index a64ae72b6cd4..3170dadd9dca 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcInfo.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcInfo.cs @@ -525,16 +525,21 @@ private Dictionary> GetTransitions(byte[] image, ref int totalInterruptibleLength = 0; if (NumInterruptibleRanges == 0) { - totalInterruptibleLength = CodeLength; + totalInterruptibleLength = _gcInfoTypes.NormalizeCodeLength(CodeLength); } else { foreach (InterruptibleRange range in InterruptibleRanges) { - totalInterruptibleLength += (int)(range.StopOffset - range.StartOffset); + uint normStart = _gcInfoTypes.NormalizeCodeOffset(range.StartOffset); + uint normStop = _gcInfoTypes.NormalizeCodeOffset(range.StopOffset); + totalInterruptibleLength += (int)(normStop - normStart); } } + if (SlotTable.NumTracked == 0) + return new Dictionary>(); + int numChunks = (totalInterruptibleLength + _gcInfoTypes.NUM_NORM_CODE_OFFSETS_PER_CHUNK - 1) / _gcInfoTypes.NUM_NORM_CODE_OFFSETS_PER_CHUNK; int numBitsPerPointer = (int)NativeReader.DecodeVarLengthUnsigned(image, _gcInfoTypes.POINTER_SIZE_ENCBASE, ref bitOffset); if (numBitsPerPointer == 0) @@ -629,6 +634,7 @@ private uint GetNumCouldBeLiveSlots(byte[] image, ref int bitOffset) fSkip = !fSkip; fReport = !fReport; } + Debug.Assert(readSlots == numTracked); } else { @@ -642,6 +648,7 @@ private uint GetNumCouldBeLiveSlots(byte[] image, ref int bitOffset) numCouldBeLiveSlots++; } } + Debug.Assert(numCouldBeLiveSlots > 0); return numCouldBeLiveSlots; } diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/GCInfoTypes.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/GCInfoTypes.cs index 653425781a7c..83ac2ef37476 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/GCInfoTypes.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/GCInfoTypes.cs @@ -19,7 +19,8 @@ enum InfoHdrAdjustConstants SET_EPILOGSIZE_MAX = 10, SET_EPILOGCNT_MAX = 4, SET_UNTRACKED_MAX = 3, - SET_RET_KIND_MAX = 4, + SET_RET_KIND_MAX = 3, + SET_NOGCREGIONS_MAX = 4, ADJ_ENCODING_MAX = 0x7f, MORE_BYTES_TO_FOLLOW = 0x80 }; @@ -67,6 +68,16 @@ enum InfoHdrAdjust NEXT_THREE_EPILOGSIZE = 0x78 }; + /// + /// Second set of opcodes, when first code is 0x4F + /// + enum InfoHdrAdjust2 + { + SET_RETURNKIND = 0, // 0x00-SET_RET_KIND_MAX Set ReturnKind to value + SET_NOGCREGIONS_CNT = SET_RETURNKIND + InfoHdrAdjustConstants.SET_RET_KIND_MAX + 1, // 0x04 + FFFF_NOGCREGION_CNT = SET_NOGCREGIONS_CNT + InfoHdrAdjustConstants.SET_NOGCREGIONS_MAX + 1 // 0x09 There is a count (>SET_NOGCREGIONS_MAX) after the header encoding + }; + /// /// based on macros defined in src\inc\gcinfotypes.h /// diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/PEReaderExtensions.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/PEReaderExtensions.cs index 97fce016e68b..9f2cc6ad2298 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/PEReaderExtensions.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/PEReaderExtensions.cs @@ -9,6 +9,9 @@ using System.Reflection.PortableExecutable; using System.Text; +using Internal.Runtime; +using Internal.ReadyToRunConstants; + namespace ILCompiler.Reflection.ReadyToRun { public class PEExportTable @@ -126,14 +129,35 @@ public static PEExportTable GetExportTable(this PEReader reader) } /// - /// Check whether the file is a ReadyToRun image and returns the RVA of its ReadyToRun header if positive. + /// Check whether the file is a composite ReadyToRun image and returns the RVA of its ReadyToRun header if positive. /// /// PEReader representing the executable to check for the presence of ReadyToRun header /// RVA of the ReadyToRun header if available, 0 when not /// true when the PEReader represents a ReadyToRun image, false otherwise - public static bool TryGetReadyToRunHeader(this PEReader reader, out int rva) + public static bool TryGetCompositeReadyToRunHeader(this PEReader reader, out int rva) { return reader.GetExportTable().TryGetValue("RTR_HEADER", out rva); } + + /// + /// Check whether the file is a ReadyToRun image created from platform neutral (AnyCPU) IL image. + /// + /// PEReader representing the executable to check + /// true when the PEReader represents a ReadyToRun image created from AnyCPU IL image, false otherwise + public static bool IsReadyToRunPlatformNeutralSource(this PEReader peReader) + { + var managedNativeDirectory = peReader.PEHeaders.CorHeader.ManagedNativeHeaderDirectory; + if (managedNativeDirectory.Size < 16 /* sizeof(ReadyToRunHeader) */) + return false; + + var reader = peReader.GetSectionData(managedNativeDirectory.RelativeVirtualAddress).GetReader(); + if (reader.ReadUInt32() != ReadyToRunHeaderConstants.Signature) + return false; + + reader.ReadUInt16(); // MajorVersion + reader.ReadUInt16(); // MinorVersion + + return (reader.ReadUInt32() & (uint)ReadyToRunFlags.READYTORUN_FLAG_PlatformNeutralSource) != 0; + } } } diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunReader.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunReader.cs index 77326d651362..3dc6cbb9aae6 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunReader.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunReader.cs @@ -379,10 +379,12 @@ internal IAssemblyMetadata R2RManifestMetadata } /// - /// Initializes the fields of the R2RHeader and R2RMethods + /// Minimally initializes the R2R reader. /// - /// PE image - /// The Cor header flag must be ILLibrary + /// Assembly resolver + /// Assembly metadata + /// PE image + /// PE file name public ReadyToRunReader(IAssemblyResolver assemblyResolver, IAssemblyMetadata metadata, PEReader peReader, string filename) { _assemblyResolver = assemblyResolver; @@ -392,10 +394,27 @@ public ReadyToRunReader(IAssemblyResolver assemblyResolver, IAssemblyMetadata me } /// - /// Initializes the fields of the R2RHeader and R2RMethods + /// Minimally initializes the R2R reader. /// - /// PE image - /// The Cor header flag must be ILLibrary + /// Assembly resolver + /// Assembly metadata + /// PE image + /// PE file name + /// PE image content + public ReadyToRunReader(IAssemblyResolver assemblyResolver, IAssemblyMetadata metadata, PEReader peReader, string filename, ReadOnlyMemory content) + { + _assemblyResolver = assemblyResolver; + CompositeReader = peReader; + Filename = filename; + Image = ConvertToArray(content); + Initialize(metadata); + } + + /// + /// Minimally initializes the R2R reader. + /// + /// Assembly resolver + /// PE file name public unsafe ReadyToRunReader(IAssemblyResolver assemblyResolver, string filename) { _assemblyResolver = assemblyResolver; @@ -403,6 +422,29 @@ public unsafe ReadyToRunReader(IAssemblyResolver assemblyResolver, string filena Initialize(metadata: null); } + /// + /// Minimally initializes the R2R reader. + /// + /// Assembly resolver + /// PE file name + /// PE image content + public unsafe ReadyToRunReader(IAssemblyResolver assemblyResolver, string filename, ReadOnlyMemory content) + { + _assemblyResolver = assemblyResolver; + Filename = filename; + Image = ConvertToArray(content); + Initialize(metadata: null); + } + + private unsafe byte[] ConvertToArray(ReadOnlyMemory content) + { + if (MemoryMarshal.TryGetArray(content, out ArraySegment segment) && (segment.Offset == 0) && (segment.Count == content.Length)) + { + return segment.Array; + } + return content.ToArray(); + } + public static bool IsReadyToRunImage(PEReader peReader) { if (peReader.PEHeaders == null) @@ -413,7 +455,7 @@ public static bool IsReadyToRunImage(PEReader peReader) if ((peReader.PEHeaders.CorHeader.Flags & CorFlags.ILLibrary) == 0) { - return peReader.TryGetReadyToRunHeader(out _); + return peReader.TryGetCompositeReadyToRunHeader(out _); } else { @@ -441,10 +483,9 @@ private unsafe void Initialize(IAssemblyMetadata metadata) if (CompositeReader == null) { - byte[] image = File.ReadAllBytes(Filename); - Image = image; + Image ??= File.ReadAllBytes(Filename); + byte[] image = Image; ImagePin = new PinningReference(image); - CompositeReader = new PEReader(Unsafe.As>(ref image)); } else @@ -570,7 +611,7 @@ public IReadOnlyDictionary GetCustomMethodToRuntimeFu private bool TryLocateNativeReadyToRunHeader() { - _composite = CompositeReader.TryGetReadyToRunHeader(out _readyToRunHeaderRVA); + _composite = CompositeReader.TryGetCompositeReadyToRunHeader(out _readyToRunHeaderRVA); return _composite; } @@ -1140,7 +1181,7 @@ private void CountRuntimeFunctions(bool[] isEntryPoint, IDictionary count++; i++; } while (i < isEntryPoint.Length && !isEntryPoint[i] && i < firstColdRuntimeFunction); - + if (dHotColdMap.ContainsKey(runtimeFunctionId)) { int coldSize = dHotColdMap[runtimeFunctionId].Length; diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs index 4a62123d694a..d87597855549 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs @@ -1929,6 +1929,14 @@ private void ParseHelper(StringBuilder builder) builder.Append("DBL2ULNGOVF"); break; + case ReadyToRunHelper.Lng2Flt: + builder.Append("LNG2FLT"); + break; + + case ReadyToRunHelper.ULng2Flt: + builder.Append("ULNG2FLT"); + break; + // Floating point ops case ReadyToRunHelper.DblRem: builder.Append("DBL_REM"); diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/GcInfo.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/GcInfo.cs index ddb44a1d312a..7b6f466c2f1a 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/GcInfo.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/GcInfo.cs @@ -13,6 +13,7 @@ public class GcInfo : BaseGcInfo const uint byref_OFFSET_FLAG = 0x1; public InfoHdrSmall Header { get; set; } + public NoGcRegionTable NoGCRegions { get; set; } public GcSlotTable SlotTable { get; set; } public GcInfo() { } @@ -28,6 +29,8 @@ public GcInfo(byte[] image, int offset) Header = InfoHdrDecoder.DecodeHeader(image, ref offset, CodeLength); + NoGCRegions = new NoGcRegionTable(image, Header, ref offset); + SlotTable = new GcSlotTable(image, Header, ref offset); Transitions = new Dictionary>(); @@ -54,6 +57,7 @@ public override string ToString() sb.AppendLine($" CodeLength: {CodeLength} bytes"); sb.AppendLine($" InfoHdr:"); sb.AppendLine($"{Header}"); + sb.AppendLine($"{NoGCRegions}"); sb.AppendLine($"{SlotTable}"); sb.AppendLine($" Size: {Size} bytes"); diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/InfoHdr.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/InfoHdr.cs index 14148a9ddc27..b18706b7661d 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/InfoHdr.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/InfoHdr.cs @@ -14,6 +14,7 @@ public struct InfoHdrSmall { private const uint INVALID_GS_COOKIE_OFFSET = 0; private const uint INVALID_SYNC_OFFSET = 0; + private const uint INVALID_REV_PINVOKE_OFFSET = 0xFFFFFFFF; public uint PrologSize { get; set; } public uint EpilogSize { get; set; } @@ -44,6 +45,7 @@ public struct InfoHdrSmall public uint SyncStartOffset { get; set; } public uint SyncEndOffset { get; set; } public uint RevPInvokeOffset { get; set; } + public uint NoGCRegionCnt { get; set; } public bool HasArgTabOffset { get; set; } public uint ArgTabOffset { get; set; } @@ -81,7 +83,8 @@ public InfoHdrSmall(uint prologSize, uint epilogSize, byte epilogCount, byte epi GsCookieOffset = 0; SyncStartOffset = 0; SyncEndOffset = 0; - RevPInvokeOffset = 0; + RevPInvokeOffset = INVALID_REV_PINVOKE_OFFSET; + NoGCRegionCnt = 0; HasArgTabOffset = false; ArgTabOffset = 0; @@ -138,6 +141,10 @@ public override string ToString() { sb.AppendLine($" Sync region = [{SyncStartOffset},{SyncEndOffset}]"); } + if (NoGCRegionCnt > 0) + { + sb.AppendLine($" No GC region count = {NoGCRegionCnt}"); + } sb.Append($" Epilogs:"); foreach (int epilog in Epilogs) @@ -157,7 +164,9 @@ public class InfoHdrDecoder { private const uint HAS_UNTRACKED = 0xFFFFFFFF; private const uint HAS_GS_COOKIE_OFFSET = 0xFFFFFFFF; private const uint HAS_SYNC_OFFSET = 0xFFFFFFFF; - private const uint HAS_REV_PINVOKE_FRAME_OFFSET = 0xFFFFFFFF; + private const uint INVALID_REV_PINVOKE_OFFSET = 0xFFFFFFFF; + private const uint HAS_REV_PINVOKE_FRAME_OFFSET = 0xFFFFFFFE; + private const uint HAS_NOGCREGIONS = 0xFFFFFFFF; private const uint YES = HAS_VARPTR; /// @@ -271,18 +280,25 @@ public static InfoHdrSmall DecodeHeader(byte[] image, ref int offset, int codeLe header.SyncStartOffset ^= HAS_SYNC_OFFSET; break; case (byte)InfoHdrAdjust.FLIP_REV_PINVOKE_FRAME: - header.RevPInvokeOffset ^= HAS_REV_PINVOKE_FRAME_OFFSET; + header.RevPInvokeOffset ^= (INVALID_REV_PINVOKE_OFFSET ^ HAS_REV_PINVOKE_FRAME_OFFSET); break; case (byte)InfoHdrAdjust.NEXT_OPCODE: nextByte = image[offset++]; encoding = (byte)(nextByte & (int)InfoHdrAdjustConstants.ADJ_ENCODING_MAX); // encoding here always corresponds to codes in InfoHdrAdjust2 set - - if (encoding < (int)InfoHdrAdjustConstants.SET_RET_KIND_MAX) + if (encoding <= (int)InfoHdrAdjustConstants.SET_RET_KIND_MAX) { header.ReturnKind = (ReturnKinds)encoding; } + else if (encoding < (int)InfoHdrAdjust2.FFFF_NOGCREGION_CNT) + { + header.NoGCRegionCnt = (uint)encoding - (uint)InfoHdrAdjust2.SET_NOGCREGIONS_CNT; + } + else if (encoding == (int)InfoHdrAdjust2.FFFF_NOGCREGION_CNT) + { + header.NoGCRegionCnt = HAS_NOGCREGIONS; + } else { throw new BadImageFormatException("Unexpected gcinfo header encoding"); @@ -351,6 +367,10 @@ public static InfoHdrSmall DecodeHeader(byte[] image, ref int offset, int codeLe { header.RevPInvokeOffset = NativeReader.DecodeUnsignedGc(image, ref offset); } + if (header.NoGCRegionCnt == HAS_NOGCREGIONS) + { + header.NoGCRegionCnt = NativeReader.DecodeUnsignedGc(image, ref offset); + } header.Epilogs = new List(); if (header.EpilogCount > 1 || (header.EpilogCount != 0 && !header.EpilogAtEnd)) diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/NoGcRegionTable.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/NoGcRegionTable.cs new file mode 100644 index 000000000000..4dcc332cb7ce --- /dev/null +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/x86/NoGcRegionTable.cs @@ -0,0 +1,65 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Reflection.PortableExecutable; +using System.Text; + +namespace ILCompiler.Reflection.ReadyToRun.x86 +{ + public class NoGcRegionTable + { + public class NoGcRegion + { + public uint Offset { get; set; } + public uint Size { get; set; } + + public NoGcRegion(uint offset, uint size) + { + Offset = offset; + Size = size; + } + + public override string ToString() + { + return $" [{Offset:04X}-{Offset+Size:04X})\n"; + } + } + + public List Regions { get; set; } + + public NoGcRegionTable() { } + + public NoGcRegionTable(byte[] image, InfoHdrSmall header, ref int offset) + { + Regions = new List((int)header.NoGCRegionCnt); + + uint count = header.NoGCRegionCnt; + while (count-- > 0) + { + uint regionOffset = NativeReader.DecodeUnsignedGc(image, ref offset); + uint regionSize = NativeReader.DecodeUnsignedGc(image, ref offset); + Regions.Add(new NoGcRegion(regionOffset, regionSize)); + } + } + + public override string ToString() + { + if (Regions.Count > 0) + { + StringBuilder sb = new StringBuilder(); + + sb.AppendLine($" No GC regions:"); + foreach (NoGcRegion region in Regions) + { + sb.Append(region.ToString()); + } + + return sb.ToString(); + } + + return string.Empty; + } + } +} diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilation.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilation.cs index afc1e8985041..ed5c496b005e 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilation.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilation.cs @@ -25,7 +25,12 @@ public class RyuJitCompilation : Compilation private readonly ConditionalWeakTable _corinfos = new ConditionalWeakTable(); internal readonly RyuJitCompilationOptions _compilationOptions; private readonly ProfileDataManager _profileDataManager; +<<<<<<< HEAD protected readonly MethodImportationErrorProvider _methodImportationErrorProvider; +======= + private readonly FileLayoutOptimizer _fileLayoutOptimizer; + private readonly MethodImportationErrorProvider _methodImportationErrorProvider; +>>>>>>> upstream-jun private readonly ReadOnlyFieldPolicy _readOnlyFieldPolicy; private readonly int _parallelism; @@ -44,6 +49,8 @@ public RyuJitCompilation( MethodImportationErrorProvider errorProvider, ReadOnlyFieldPolicy readOnlyFieldPolicy, RyuJitCompilationOptions options, + MethodLayoutAlgorithm methodLayoutAlgorithm, + FileLayoutAlgorithm fileLayoutAlgorithm, int parallelism) : base(dependencyGraph, nodeFactory, roots, ilProvider, debugInformationProvider, inliningPolicy, logger) { @@ -57,6 +64,8 @@ public RyuJitCompilation( _readOnlyFieldPolicy = readOnlyFieldPolicy; _parallelism = parallelism; + + _fileLayoutOptimizer = new FileLayoutOptimizer(logger, methodLayoutAlgorithm, fileLayoutAlgorithm, profileDataManager, nodeFactory); } public ProfileDataManager ProfileData => _profileDataManager; @@ -72,22 +81,16 @@ public override IEETypeNode NecessaryTypeSymbolIfPossible(TypeDesc type) // information proving that it isn't, give RyuJIT the constructed symbol even // though we just need the unconstructed one. // https://github.com/dotnet/runtimelab/issues/1128 - bool canPotentiallyConstruct = ConstructedEETypeNode.CreationAllowed(type) - && NodeFactory.DevirtualizationManager.CanReferenceConstructedMethodTable(type); - if (canPotentiallyConstruct) - return _nodeFactory.MaximallyConstructableType(type); - - return _nodeFactory.NecessaryTypeSymbol(type); + return GetLdTokenHelperForType(type) == ReadyToRunHelperId.TypeHandle + ? _nodeFactory.ConstructedTypeSymbol(type) + : _nodeFactory.NecessaryTypeSymbol(type); } public FrozenRuntimeTypeNode NecessaryRuntimeTypeIfPossible(TypeDesc type) { - bool canPotentiallyConstruct = ConstructedEETypeNode.CreationAllowed(type) - && NodeFactory.DevirtualizationManager.CanReferenceConstructedMethodTable(type); - if (canPotentiallyConstruct) - return _nodeFactory.SerializedMaximallyConstructableRuntimeTypeObject(type); - - return _nodeFactory.SerializedNecessaryRuntimeTypeObject(type); + return GetLdTokenHelperForType(type) == ReadyToRunHelperId.TypeHandle + ? _nodeFactory.SerializedConstructedRuntimeTypeObject(type) + : _nodeFactory.SerializedNecessaryRuntimeTypeObject(type); } protected override void CompileInternal(string outputFile, ObjectDumper dumper) @@ -95,6 +98,8 @@ protected override void CompileInternal(string outputFile, ObjectDumper dumper) _dependencyGraph.ComputeMarkedNodes(); var nodes = _dependencyGraph.MarkedNodeList; + nodes = _fileLayoutOptimizer.ApplyProfilerGuidedMethodSort(nodes); + NodeFactory.SetMarkingComplete(); ObjectWritingOptions options = default; diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs index 14c62a02c6fa..72f58e1b3ebc 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs @@ -17,6 +17,8 @@ public class RyuJitCompilationBuilder : CompilationBuilder // These need to provide reasonable defaults so that the user can optionally skip // calling the Use/Configure methods and still get something reasonable back. private KeyValuePair[] _ryujitOptions = Array.Empty>(); + private MethodLayoutAlgorithm _methodLayoutAlgorithm; + private FileLayoutAlgorithm _fileLayoutAlgorithm; private ILProvider _ilProvider = new NativeAotILProvider(); private ProfileDataManager _profileDataManager; private string _jitPath; @@ -44,6 +46,13 @@ public RyuJitCompilationBuilder UseJitPath(string jitPath) return this; } + public RyuJitCompilationBuilder FileLayoutAlgorithms(MethodLayoutAlgorithm methodLayoutAlgorithm, FileLayoutAlgorithm fileLayoutAlgorithm) + { + _methodLayoutAlgorithm = methodLayoutAlgorithm; + _fileLayoutAlgorithm = fileLayoutAlgorithm; + return this; + } + public override CompilationBuilder UseBackendOptions(IEnumerable options) { var builder = default(ArrayBuilder>); @@ -138,7 +147,21 @@ protected virtual RyuJitCompilation CreateCompilation(RyuJitCompilationOptions o var factory = new RyuJitNodeFactory(_context, _compilationGroup, _metadataManager, _interopStubManager, _nameMangler, _vtableSliceProvider, _dictionaryLayoutProvider, _inlinedThreadStatics, GetPreinitializationManager(), _devirtualizationManager, interner); DependencyAnalyzerBase graph = CreateDependencyGraph(factory, new ObjectNode.ObjectNodeComparer(CompilerComparer.Instance)); - return new RyuJitCompilation(graph, factory, _compilationRoots, _ilProvider, _debugInformationProvider, _logger, _inliningPolicy ?? _compilationGroup, _instructionSetSupport, _profileDataManager, _methodImportationErrorProvider, _readOnlyFieldPolicy, options, _parallelism); + return new RyuJitCompilation(graph, + factory, + _compilationRoots, + _ilProvider, + _debugInformationProvider, + _logger, + _inliningPolicy ?? _compilationGroup, + _instructionSetSupport, + _profileDataManager, + _methodImportationErrorProvider, + _readOnlyFieldPolicy, + options, + _methodLayoutAlgorithm, + _fileLayoutAlgorithm, + _parallelism); } } } diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/ILCompiler.RyuJit.csproj b/src/coreclr/tools/aot/ILCompiler.RyuJit/ILCompiler.RyuJit.csproj index d414d28d3a67..2cecccf6e49b 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/ILCompiler.RyuJit.csproj +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/ILCompiler.RyuJit.csproj @@ -15,6 +15,7 @@ binaries are up to date and which are stale. --> false Debug;Release;Checked + false @@ -98,6 +99,10 @@ + + + + diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index d89f39a66abd..02e41335d790 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -618,10 +618,18 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_DIRECT: id = ReadyToRunHelper.NewArray; break; + case CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_PTR: + return _compilation.NodeFactory.ExternSymbol("RhpNewPtrArrayFast"); case CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_ALIGN8: +<<<<<<< HEAD return _compilation.NodeFactory.ExternFunctionSymbol("RhpNewArrayAlign8"); case CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_VC: return _compilation.NodeFactory.ExternFunctionSymbol("RhpNewArray"); +======= + return _compilation.NodeFactory.ExternSymbol("RhpNewArrayFastAlign8"); + case CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_VC: + return _compilation.NodeFactory.ExternSymbol("RhpNewArrayFast"); +>>>>>>> upstream-jun case CorInfoHelpFunc.CORINFO_HELP_STACK_PROBE: return _compilation.NodeFactory.ExternFunctionSymbol("RhpStackProbe"); @@ -703,6 +711,12 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_DBL2ULNG_OVF: id = ReadyToRunHelper.Dbl2ULngOvf; break; + case CorInfoHelpFunc.CORINFO_HELP_LNG2FLT: + id = ReadyToRunHelper.Lng2Flt; + break; + case CorInfoHelpFunc.CORINFO_HELP_ULNG2FLT: + id = ReadyToRunHelper.ULng2Flt; + break; case CorInfoHelpFunc.CORINFO_HELP_FLTREM: id = ReadyToRunHelper.FltRem; @@ -1189,6 +1203,11 @@ private CorInfoHelpFunc getNewArrHelper(CORINFO_CLASS_STRUCT_* arrayCls) Debug.Assert(type.IsArray); + TypeDesc elementType = ((ArrayType)type).ElementType; + + if (elementType.GetElementSize().AsInt == _compilation.TypeSystemContext.Target.PointerSize) + return CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_PTR; + if (type.RequiresAlign8()) return CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_ALIGN8; diff --git a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/ILCompiler.Trimming.Tests.csproj b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/ILCompiler.Trimming.Tests.csproj index de47993ee6f5..f2d5356f5dc9 100644 --- a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/ILCompiler.Trimming.Tests.csproj +++ b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/ILCompiler.Trimming.Tests.csproj @@ -28,7 +28,6 @@ - diff --git a/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj b/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj index cafa4952376f..b49cc608a4c6 100644 --- a/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj +++ b/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj @@ -198,9 +198,6 @@ Utilities\CustomAttributeTypeNameParser.cs - - Utilities\TypeNameHelpers.cs - Utilities\ValueStringBuilder.cs @@ -237,6 +234,9 @@ TypeSystem\Common\FieldForInstantiatedType.cs + + TypeSystem\Common\FieldForInstantiatedType.FieldLayout.cs + TypeSystem\Common\FieldDesc.cs diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompiler.csproj b/src/coreclr/tools/aot/ILCompiler/ILCompiler.csproj index fadbaa358cdd..41fff43d243f 100644 --- a/src/coreclr/tools/aot/ILCompiler/ILCompiler.csproj +++ b/src/coreclr/tools/aot/ILCompiler/ILCompiler.csproj @@ -1,7 +1,6 @@ $(RuntimeBinDir)ilc/ - $(PackageRID) diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompiler.props b/src/coreclr/tools/aot/ILCompiler/ILCompiler.props index cbf3a8913e5e..a3ad542f7f85 100644 --- a/src/coreclr/tools/aot/ILCompiler/ILCompiler.props +++ b/src/coreclr/tools/aot/ILCompiler/ILCompiler.props @@ -7,22 +7,15 @@ 8002,NU1701 x64;x86 AnyCPU + false false true true false Debug;Release;Checked - true - - - - true - false - true - diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs b/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs index 2bed369e68db..1a8715be4a76 100644 --- a/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs +++ b/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs @@ -11,167 +11,175 @@ namespace ILCompiler { +<<<<<<< HEAD internal sealed partial class ILCompilerRootCommand : CliRootCommand +======= + internal sealed class ILCompilerRootCommand : RootCommand +>>>>>>> upstream-jun { - public CliArgument> InputFilePaths { get; } = + public Argument> InputFilePaths { get; } = new("input-file-path") { CustomParser = result => Helpers.BuildPathDictionary(result.Tokens, true), Description = "Input file(s)", Arity = ArgumentArity.OneOrMore }; - public CliOption> ReferenceFiles { get; } = + public Option> ReferenceFiles { get; } = new("--reference", "-r") { CustomParser = result => Helpers.BuildPathDictionary(result.Tokens, false), DefaultValueFactory = result => Helpers.BuildPathDictionary(result.Tokens, false), Description = "Reference file(s) for compilation" }; - public CliOption OutputFilePath { get; } = + public Option OutputFilePath { get; } = new("--out", "-o") { Description = "Output file path" }; - public CliOption Optimize { get; } = + public Option Optimize { get; } = new("--optimize", "-O") { Description = "Enable optimizations" }; - public CliOption OptimizeSpace { get; } = + public Option OptimizeSpace { get; } = new("--optimize-space", "--Os") { Description = "Enable optimizations, favor code space" }; - public CliOption OptimizeTime { get; } = + public Option OptimizeTime { get; } = new("--optimize-time", "--Ot") { Description = "Enable optimizations, favor code speed" }; - public CliOption MibcFilePaths { get; } = + public Option MibcFilePaths { get; } = new("--mibc", "-m") { DefaultValueFactory = _ => Array.Empty(), Description = "Mibc file(s) for profile guided optimization" }; - public CliOption SatelliteFilePaths { get; } = + public Option MethodLayout { get; } = + new("--method-layout") { CustomParser = MakeMethodLayoutAlgorithm, DefaultValueFactory = MakeMethodLayoutAlgorithm, Description = "Layout algorithm used by profile-driven optimization for arranging methods in a file.", HelpName = "arg" }; + public Option FileLayout { get; } = + new("--file-layout") { CustomParser = MakeFileLayoutAlgorithm, DefaultValueFactory = MakeFileLayoutAlgorithm, Description = "Layout algorithm used by profile-driven optimization for arranging non-method contents in a file.", HelpName = "arg" }; + public Option SatelliteFilePaths { get; } = new("--satellite") { DefaultValueFactory = _ => Array.Empty(), Description = "Satellite assemblies associated with inputs/references" }; - public CliOption EnableDebugInfo { get; } = + public Option EnableDebugInfo { get; } = new("--debug", "-g") { Description = "Emit debugging information" }; - public CliOption UseDwarf5 { get; } = + public Option UseDwarf5 { get; } = new("--gdwarf-5") { Description = "Generate source-level debug information with dwarf version 5" }; - public CliOption NativeLib { get; } = + public Option NativeLib { get; } = new("--nativelib") { Description = "Compile as static or shared library" }; - public CliOption SplitExeInitialization { get; } = + public Option SplitExeInitialization { get; } = new("--splitinit") { Description = "Split initialization of an executable between the library entrypoint and a main entrypoint" }; - public CliOption ExportsFile { get; } = + public Option ExportsFile { get; } = new("--exportsfile") { Description = "File to write exported symbol and method definitions" }; - public CliOption ExportUnmanagedEntryPoints { get; } = + public Option ExportUnmanagedEntryPoints { get; } = new("--export-unmanaged-entrypoints") { Description = "Controls whether the named UnmanagedCallersOnly methods are exported" }; - public CliOption ExportDynamicSymbols { get; } = + public Option ExportDynamicSymbols { get; } = new("--export-dynamic-symbol") { Description = "Add dynamic export symbol to exports file" }; - public CliOption DgmlLogFileName { get; } = + public Option DgmlLogFileName { get; } = new("--dgmllog") { Description = "Save result of dependency analysis as DGML" }; - public CliOption GenerateFullDgmlLog { get; } = + public Option GenerateFullDgmlLog { get; } = new("--fulllog") { Description = "Save detailed log of dependency analysis" }; - public CliOption ScanDgmlLogFileName { get; } = + public Option ScanDgmlLogFileName { get; } = new("--scandgmllog") { Description = "Save result of scanner dependency analysis as DGML" }; - public CliOption GenerateFullScanDgmlLog { get; } = + public Option GenerateFullScanDgmlLog { get; } = new("--scanfulllog") { Description = "Save detailed log of scanner dependency analysis" }; - public CliOption IsVerbose { get; } = + public Option IsVerbose { get; } = new("--verbose") { Description = "Enable verbose logging" }; - public CliOption SystemModuleName { get; } = + public Option SystemModuleName { get; } = new("--systemmodule") { DefaultValueFactory = _ => Helpers.DefaultSystemModule, Description = "System module name (default: System.Private.CoreLib)" }; - public CliOption Win32ResourceModuleName { get; } = + public Option Win32ResourceModuleName { get; } = new("--win32resourcemodule") { Description = "Name of the module from which to copy Win32 resources (Windows target only)" }; - public CliOption MultiFile { get; } = + public Option MultiFile { get; } = new("--multifile") { Description = "Compile only input files (do not compile referenced assemblies)" }; - public CliOption WaitForDebugger { get; } = + public Option WaitForDebugger { get; } = new("--waitfordebugger") { Description = "Pause to give opportunity to attach debugger" }; - public CliOption Resilient { get; } = + public Option Resilient { get; } = new("--resilient") { Description = "Ignore unresolved types, methods, and assemblies. Defaults to false" }; - public CliOption CodegenOptions { get; } = + public Option CodegenOptions { get; } = new("--codegenopt") { DefaultValueFactory = _ => Array.Empty(), Description = "Define a codegen option" }; - public CliOption RdXmlFilePaths { get; } = + public Option RdXmlFilePaths { get; } = new("--rdxml") { DefaultValueFactory = _ => Array.Empty(), Description = "RD.XML file(s) for compilation" }; - public CliOption LinkTrimFilePaths { get; } = + public Option LinkTrimFilePaths { get; } = new("--descriptor") { DefaultValueFactory = _ => Array.Empty(), Description = "ILLink.Descriptor file(s) for compilation" }; - public CliOption SubstitutionFilePaths { get; } = + public Option SubstitutionFilePaths { get; } = new("--substitution") { DefaultValueFactory = _ => Array.Empty(), Description = "ILLink.Substitution file(s) for compilation" }; - public CliOption MapFileName { get; } = + public Option MapFileName { get; } = new("--map") { Description = "Generate a map file" }; - public CliOption MstatFileName { get; } = + public Option MstatFileName { get; } = new("--mstat") { Description = "Generate an mstat file" }; - public CliOption SourceLinkFileName { get; } = + public Option SourceLinkFileName { get; } = new("--sourcelink") { Description = "Generate a SourceLink file" }; - public CliOption MetadataLogFileName { get; } = + public Option MetadataLogFileName { get; } = new("--metadatalog") { Description = "Generate a metadata log file" }; - public CliOption CompleteTypesMetadata { get; } = + public Option CompleteTypesMetadata { get; } = new("--completetypemetadata") { Description = "Generate complete metadata for types" }; - public CliOption ReflectionData { get; } = + public Option ReflectionData { get; } = new("--reflectiondata") { Description = "Reflection data to generate (one of: all, none)" }; - public CliOption ScanReflection { get; } = + public Option ScanReflection { get; } = new("--scanreflection") { Description = "Scan IL for reflection patterns" }; - public CliOption UseScanner { get; } = + public Option UseScanner { get; } = new("--scan") { Description = "Use IL scanner to generate optimized code (implied by -O)" }; - public CliOption NoScanner { get; } = + public Option NoScanner { get; } = new("--noscan") { Description = "Do not use IL scanner to generate optimized code" }; - public CliOption IlDump { get; } = + public Option IlDump { get; } = new("--ildump") { Description = "Dump IL assembly listing for compiler-generated IL" }; - public CliOption NoInlineTls { get; } = + public Option NoInlineTls { get; } = new("--noinlinetls") { Description = "Do not generate inline thread local statics" }; - public CliOption EmitStackTraceData { get; } = + public Option EmitStackTraceData { get; } = new("--stacktracedata") { Description = "Emit data to support generating stack trace strings at runtime" }; - public CliOption MethodBodyFolding { get; } = + public Option MethodBodyFolding { get; } = new("--methodbodyfolding") { Description = "Fold identical method bodies" }; - public CliOption InitAssemblies { get; } = + public Option InitAssemblies { get; } = new("--initassembly") { DefaultValueFactory = _ => Array.Empty(), Description = "Assembly(ies) with a library initializer" }; - public CliOption FeatureSwitches { get; } = + public Option FeatureSwitches { get; } = new("--feature") { DefaultValueFactory = _ => Array.Empty(), Description = "Feature switches to apply (format: 'Namespace.Name=[true|false]'" }; - public CliOption RuntimeOptions { get; } = + public Option RuntimeOptions { get; } = new("--runtimeopt") { DefaultValueFactory = _ => Array.Empty(), Description = "Runtime options to set" }; - public CliOption RuntimeKnobs { get; } = + public Option RuntimeKnobs { get; } = new("--runtimeknob") { DefaultValueFactory = _ => Array.Empty(), Description = "Runtime knobs to set" }; - public CliOption Parallelism { get; } = + public Option Parallelism { get; } = new("--parallelism") { CustomParser = MakeParallelism, DefaultValueFactory = MakeParallelism, Description = "Maximum number of threads to use during compilation" }; - public CliOption InstructionSet { get; } = + public Option InstructionSet { get; } = new("--instruction-set") { Description = "Instruction set to allow or disallow" }; - public CliOption MaxVectorTBitWidth { get; } = + public Option MaxVectorTBitWidth { get; } = new("--max-vectort-bitwidth") { Description = "Maximum width, in bits, that Vector is allowed to be" }; - public CliOption Guard { get; } = + public Option Guard { get; } = new("--guard") { Description = "Enable mitigations. Options: 'cf': CFG (Control Flow Guard, Windows only)" }; - public CliOption Dehydrate { get; } = + public Option Dehydrate { get; } = new("--dehydrate") { Description = "Dehydrate runtime data structures" }; - public CliOption PreinitStatics { get; } = + public Option PreinitStatics { get; } = new("--preinitstatics") { Description = "Interpret static constructors at compile time if possible (implied by -O)" }; - public CliOption NoPreinitStatics { get; } = + public Option NoPreinitStatics { get; } = new("--nopreinitstatics") { Description = "Do not interpret static constructors at compile time" }; - public CliOption InstrumentReachability { get; } = + public Option InstrumentReachability { get; } = new("--reachabilityinstrument") { Description = "Instrument code for dynamic reachability" }; - public CliOption UseReachability { get; } = + public Option UseReachability { get; } = new("--reachabilityuse") { Description = "Use dynamic reachability instrumentation data to produce minimal output" }; - public CliOption SuppressedWarnings { get; } = + public Option SuppressedWarnings { get; } = new("--nowarn") { DefaultValueFactory = _ => Array.Empty(), Description = "Disable specific warning messages" }; - public CliOption SingleWarn { get; } = + public Option SingleWarn { get; } = new("--singlewarn") { Description = "Generate single AOT/trimming warning per assembly" }; - public CliOption NoTrimWarn { get; } = + public Option NoTrimWarn { get; } = new("--notrimwarn") { Description = "Disable warnings related to trimming" }; - public CliOption NoAotWarn { get; } = + public Option NoAotWarn { get; } = new("--noaotwarn") { Description = "Disable warnings related to AOT" }; - public CliOption SingleWarnEnabledAssemblies { get; } = + public Option SingleWarnEnabledAssemblies { get; } = new("--singlewarnassembly") { DefaultValueFactory = _ => Array.Empty(), Description = "Generate single AOT/trimming warning for given assembly" }; - public CliOption SingleWarnDisabledAssemblies { get; } = + public Option SingleWarnDisabledAssemblies { get; } = new("--nosinglewarnassembly") { DefaultValueFactory = _ => Array.Empty(), Description = "Expand AOT/trimming warnings for given assembly" }; - public CliOption TreatWarningsAsErrors { get; } = + public Option TreatWarningsAsErrors { get; } = new("--warnaserror") { Description = "Treat warnings as errors" }; - public CliOption WarningsAsErrorsEnable { get; } = + public Option WarningsAsErrorsEnable { get; } = new("--warnaserr") { Description = "Enable treating specific warnings as errors" }; - public CliOption WarningsAsErrorsDisable { get; } = + public Option WarningsAsErrorsDisable { get; } = new("--nowarnaserr") { Description = "Disable treating specific warnings as errors" }; - public CliOption DirectPInvokes { get; } = + public Option DirectPInvokes { get; } = new("--directpinvoke") { DefaultValueFactory = _ => Array.Empty(), Description = "PInvoke to call directly" }; - public CliOption DirectPInvokeLists { get; } = + public Option DirectPInvokeLists { get; } = new("--directpinvokelist") { DefaultValueFactory = _ => Array.Empty(), Description = "File with list of PInvokes to call directly" }; - public CliOption RootedAssemblies { get; } = + public Option RootedAssemblies { get; } = new("--root") { DefaultValueFactory = _ => Array.Empty(), Description = "Fully generate given assembly" }; - public CliOption ConditionallyRootedAssemblies { get; } = + public Option ConditionallyRootedAssemblies { get; } = new("--conditionalroot") { DefaultValueFactory = _ => Array.Empty(), Description = "Fully generate given assembly if it's used" }; - public CliOption TrimmedAssemblies { get; } = + public Option TrimmedAssemblies { get; } = new("--trim") { DefaultValueFactory = _ => Array.Empty(), Description = "Trim the specified assembly" }; - public CliOption RootDefaultAssemblies { get; } = + public Option RootDefaultAssemblies { get; } = new("--defaultrooting") { Description = "Root assemblies that are not marked [IsTrimmable]" }; - public CliOption TargetArchitecture { get; } = + public Option TargetArchitecture { get; } = new("--targetarch") { CustomParser = MakeTargetArchitecture, DefaultValueFactory = MakeTargetArchitecture, Description = "Target architecture for cross compilation", HelpName = "arg" }; - public CliOption TargetOS { get; } = + public Option TargetOS { get; } = new("--targetos") { CustomParser = result => Helpers.GetTargetOS(result.Tokens.Count > 0 ? result.Tokens[0].Value : null), DefaultValueFactory = result => Helpers.GetTargetOS(result.Tokens.Count > 0 ? result.Tokens[0].Value : null), Description = "Target OS for cross compilation", HelpName = "arg" }; - public CliOption JitPath { get; } = + public Option JitPath { get; } = new("--jitpath") { Description = "Path to JIT compiler library" }; - public CliOption SingleMethodTypeName { get; } = + public Option SingleMethodTypeName { get; } = new("--singlemethodtypename") { Description = "Single method compilation: assembly-qualified name of the owning type" }; - public CliOption SingleMethodName { get; } = + public Option SingleMethodName { get; } = new("--singlemethodname") { Description = "Single method compilation: name of the method" }; - public CliOption MaxGenericCycleDepth { get; } = + public Option MaxGenericCycleDepth { get; } = new("--maxgenericcycle") { DefaultValueFactory = _ => CompilerTypeSystemContext.DefaultGenericCycleDepthCutoff, Description = "Max depth of generic cycle" }; - public CliOption MaxGenericCycleBreadth { get; } = + public Option MaxGenericCycleBreadth { get; } = new("--maxgenericcyclebreadth") { DefaultValueFactory = _ => CompilerTypeSystemContext.DefaultGenericCycleBreadthCutoff, Description = "Max breadth of generic cycle expansion" }; - public CliOption SingleMethodGenericArgs { get; } = + public Option SingleMethodGenericArgs { get; } = new("--singlemethodgenericarg") { Description = "Single method compilation: generic arguments to the method" }; - public CliOption MakeReproPath { get; } = + public Option MakeReproPath { get; } = new("--make-repro-path") { Description = "Path where to place a repro package" }; - public CliOption UnmanagedEntryPointsAssemblies { get; } = + public Option UnmanagedEntryPointsAssemblies { get; } = new("--generateunmanagedentrypoints") { DefaultValueFactory = _ => Array.Empty(), Description = "Generate unmanaged entrypoints for a given assembly" }; public OptimizationMode OptimizationMode { get; private set; } @@ -187,6 +195,8 @@ public ILCompilerRootCommand(string[] args) : base(".NET Native IL Compiler") Options.Add(OptimizeSpace); Options.Add(OptimizeTime); Options.Add(MibcFilePaths); + Options.Add(MethodLayout); + Options.Add(FileLayout); Options.Add(SatelliteFilePaths); Options.Add(EnableDebugInfo); Options.Add(UseDwarf5); @@ -324,64 +334,57 @@ public ILCompilerRootCommand(string[] args) : base(".NET Native IL Compiler") }); } - public static IEnumerable> GetExtendedHelp(HelpContext _) + public static void PrintExtendedHelp(ParseResult _) { - foreach (Func sectionDelegate in HelpBuilder.Default.GetLayout()) - yield return sectionDelegate; + Console.WriteLine("Options may be passed on the command line, or via response file. On the command line switch values may be specified by passing " + + "the option followed by a space followed by the value of the option, or by specifying a : between option and switch value. A response file " + + "is specified by passing the @ symbol before the response file name. In a response file all options must be specified on their own lines, and " + + "only the : syntax for switches is supported.\n"); - yield return _ => - { - Console.WriteLine("Options may be passed on the command line, or via response file. On the command line switch values may be specified by passing " + - "the option followed by a space followed by the value of the option, or by specifying a : between option and switch value. A response file " + - "is specified by passing the @ symbol before the response file name. In a response file all options must be specified on their own lines, and " + - "only the : syntax for switches is supported.\n"); - - Console.WriteLine("Use the '--' option to disambiguate between input files that have begin with -- and options. After a '--' option, all arguments are " + - "considered to be input files. If no input files begin with '--' then this option is not necessary.\n"); + Console.WriteLine("Use the '--' option to disambiguate between input files that have begin with -- and options. After a '--' option, all arguments are " + + "considered to be input files. If no input files begin with '--' then this option is not necessary.\n"); - string[] ValidArchitectures = new string[] { "arm", "arm64", "x86", "x64", "riscv64", "loongarch64" }; - string[] ValidOS = new string[] { "windows", "linux", "freebsd", "osx", "maccatalyst", "ios", "iossimulator", "tvos", "tvossimulator" }; + string[] ValidArchitectures = new string[] { "arm", "arm64", "x86", "x64", "riscv64", "loongarch64" }; + string[] ValidOS = new string[] { "windows", "linux", "freebsd", "osx", "maccatalyst", "ios", "iossimulator", "tvos", "tvossimulator" }; - Console.WriteLine("Valid switches for {0} are: '{1}'. The default value is '{2}'\n", "--targetos", string.Join("', '", ValidOS), Helpers.GetTargetOS(null).ToString().ToLowerInvariant()); + Console.WriteLine("Valid switches for {0} are: '{1}'. The default value is '{2}'\n", "--targetos", string.Join("', '", ValidOS), Helpers.GetTargetOS(null).ToString().ToLowerInvariant()); - Console.WriteLine(string.Format("Valid switches for {0} are: '{1}'. The default value is '{2}'\n", "--targetarch", string.Join("', '", ValidArchitectures), Helpers.GetTargetArchitecture(null).ToString().ToLowerInvariant())); + Console.WriteLine(string.Format("Valid switches for {0} are: '{1}'. The default value is '{2}'\n", "--targetarch", string.Join("', '", ValidArchitectures), Helpers.GetTargetArchitecture(null).ToString().ToLowerInvariant())); - Console.WriteLine("The allowable values for the --instruction-set option are described in the table below. Each architecture has a different set of valid " + - "instruction sets, and multiple instruction sets may be specified by separating the instructions sets by a ','. For example 'avx2,bmi,lzcnt'"); + Console.WriteLine("The allowable values for the --instruction-set option are described in the table below. Each architecture has a different set of valid " + + "instruction sets, and multiple instruction sets may be specified by separating the instructions sets by a ','. For example 'avx2,bmi,lzcnt'"); - foreach (string arch in ValidArchitectures) + foreach (string arch in ValidArchitectures) + { + TargetArchitecture targetArch = Helpers.GetTargetArchitecture(arch); + bool first = true; + foreach (var instructionSet in Internal.JitInterface.InstructionSetFlags.ArchitectureToValidInstructionSets(targetArch)) { - TargetArchitecture targetArch = Helpers.GetTargetArchitecture(arch); - bool first = true; - foreach (var instructionSet in Internal.JitInterface.InstructionSetFlags.ArchitectureToValidInstructionSets(targetArch)) + // Only instruction sets with are specifiable should be printed to the help text + if (instructionSet.Specifiable) { - // Only instruction sets with are specifiable should be printed to the help text - if (instructionSet.Specifiable) + if (first) { - if (first) - { - Console.Write(arch); - Console.Write(": "); - first = false; - } - else - { - Console.Write(", "); - } - Console.Write(instructionSet.Name); + Console.Write(arch); + Console.Write(": "); + first = false; } + else + { + Console.Write(", "); + } + Console.Write(instructionSet.Name); } - - if (first) continue; // no instruction-set found for this architecture - - Console.WriteLine(); } + if (first) continue; // no instruction-set found for this architecture + Console.WriteLine(); - Console.WriteLine("The following CPU names are predefined groups of instruction sets and can be used in --instruction-set too:"); - Console.WriteLine(string.Join(", ", Internal.JitInterface.InstructionSetFlags.AllCpuNames)); - return true; - }; + } + + Console.WriteLine(); + Console.WriteLine("The following CPU names are predefined groups of instruction sets and can be used in --instruction-set too:"); + Console.WriteLine(string.Join(", ", Internal.JitInterface.InstructionSetFlags.AllCpuNames)); } private static TargetArchitecture MakeTargetArchitecture(ArgumentResult result) @@ -412,6 +415,37 @@ private static int MakeParallelism(ArgumentResult result) return parallelism; } + private static MethodLayoutAlgorithm MakeMethodLayoutAlgorithm(ArgumentResult result) + { + if (result.Tokens.Count == 0) + return MethodLayoutAlgorithm.DefaultSort; + + return result.Tokens[0].Value.ToLowerInvariant() switch + { + "defaultsort" => MethodLayoutAlgorithm.DefaultSort, + "exclusiveweight" => MethodLayoutAlgorithm.ExclusiveWeight, + "hotcold" => MethodLayoutAlgorithm.HotCold, + "instrumentedhotcold" => MethodLayoutAlgorithm.InstrumentedHotCold, + "hotwarmcold" => MethodLayoutAlgorithm.HotWarmCold, + "pettishansen" => MethodLayoutAlgorithm.PettisHansen, + "random" => MethodLayoutAlgorithm.Random, + _ => throw new CommandLineException(result.Tokens[0].Value) + }; + } + + private static FileLayoutAlgorithm MakeFileLayoutAlgorithm(ArgumentResult result) + { + if (result.Tokens.Count == 0) + return FileLayoutAlgorithm.DefaultSort; + + return result.Tokens[0].Value.ToLowerInvariant() switch + { + "defaultsort" => FileLayoutAlgorithm.DefaultSort, + "methodorder" => FileLayoutAlgorithm.MethodOrder, + _ => throw new CommandLineException(result.Tokens[0].Value) + }; + } + #if DEBUG private static bool DumpReproArguments(CodeGenerationFailedException ex) { diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompiler_inbuild.csproj b/src/coreclr/tools/aot/ILCompiler/ILCompiler_inbuild.csproj index d22aff2a1dea..de290906b2e0 100644 --- a/src/coreclr/tools/aot/ILCompiler/ILCompiler_inbuild.csproj +++ b/src/coreclr/tools/aot/ILCompiler/ILCompiler_inbuild.csproj @@ -15,7 +15,7 @@ false - + true true diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompiler_publish.csproj b/src/coreclr/tools/aot/ILCompiler/ILCompiler_publish.csproj index c48b1aa3fb7a..239e3f83618a 100644 --- a/src/coreclr/tools/aot/ILCompiler/ILCompiler_publish.csproj +++ b/src/coreclr/tools/aot/ILCompiler/ILCompiler_publish.csproj @@ -2,12 +2,14 @@ <_IsPublishing>true - $(PackageRID) + + $(PortableOS)-$(TargetArchitecture) $(RuntimeBinDir)ilc-published/ true true false true + true diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs index 695a91553cf2..cd5ef1642e79 100644 --- a/src/coreclr/tools/aot/ILCompiler/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/Program.cs @@ -348,6 +348,7 @@ public int Run() // // Compile // +<<<<<<< HEAD CompilationBuilder builder; bool isLlvmCodegen = targetArchitecture == TargetArchitecture.Wasm32 || targetArchitecture == TargetArchitecture.Wasm64; @@ -359,9 +360,13 @@ public int Run() { builder = new RyuJitCompilationBuilder(typeSystemContext, compilationGroup); } +======= +>>>>>>> upstream-jun string compilationUnitPrefix = multiFile ? Path.GetFileNameWithoutExtension(outputFilePath) : ""; - builder.UseCompilationUnitPrefix(compilationUnitPrefix); + var builder = new RyuJitCompilationBuilder(typeSystemContext, compilationGroup) + .FileLayoutAlgorithms(Get(_command.MethodLayout), Get(_command.FileLayout)) + .UseCompilationUnitPrefix(compilationUnitPrefix); string[] mibcFilePaths = Get(_command.MibcFilePaths); if (mibcFilePaths.Length > 0) @@ -433,7 +438,6 @@ public int Run() resBlockingPolicy = new ManifestResourceBlockingPolicy(logger, featureSwitches, resourceBlocks); - metadataGenerationOptions |= UsageBasedMetadataGenerationOptions.AnonymousTypeHeuristic; if (Get(_command.CompleteTypesMetadata)) metadataGenerationOptions |= UsageBasedMetadataGenerationOptions.CompleteTypesOnly; if (Get(_command.ScanReflection)) @@ -805,12 +809,12 @@ private static IEnumerable ProcessWarningCodes(IEnumerable warningC } } - private T Get(CliOption option) => _command.Result.GetValue(option); + private T Get(Option option) => _command.Result.GetValue(option); private static int Main(string[] args) => - new CliConfiguration(new ILCompilerRootCommand(args) + new CommandLineConfiguration(new ILCompilerRootCommand(args) .UseVersion() - .UseExtendedHelp(ILCompilerRootCommand.GetExtendedHelp)) + .UseExtendedHelp(ILCompilerRootCommand.PrintExtendedHelp)) { ResponseFileTokenReplacer = Helpers.TryReadResponseFile, EnableDefaultExceptionHandler = false, diff --git a/src/coreclr/tools/aot/crossgen2.sln b/src/coreclr/tools/aot/crossgen2.sln deleted file mode 100644 index 1f7ac88c4db6..000000000000 --- a/src/coreclr/tools/aot/crossgen2.sln +++ /dev/null @@ -1,155 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.0.31612.314 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "crossgen2", "crossgen2\crossgen2.csproj", "{9B928D3E-06AB-45E5-BF79-F374F0AE3B98}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.DependencyAnalysisFramework", "ILCompiler.DependencyAnalysisFramework\ILCompiler.DependencyAnalysisFramework.csproj", "{FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.ReadyToRun", "ILCompiler.ReadyToRun\ILCompiler.ReadyToRun.csproj", "{83A832DE-BF4A-44C4-B361-90F5F88B979B}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.Diagnostics", "ILCompiler.Diagnostics\ILCompiler.Diagnostics.csproj", "{3EACD929-4725-4173-A845-734936BBDF87}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.Reflection.ReadyToRun", "ILCompiler.Reflection.ReadyToRun\ILCompiler.Reflection.ReadyToRun.csproj", "{0BB34BA1-1B3A-445C-9C04-0D710D1983F0}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.TypeSystem", "ILCompiler.TypeSystem\ILCompiler.TypeSystem.csproj", "{C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.TypeSystem.Tests", "ILCompiler.TypeSystem.Tests\ILCompiler.TypeSystem.Tests.csproj", "{9E65EC58-B500-4C4A-B57D-BF242129A3C6}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Checked|Any CPU = Checked|Any CPU - Checked|x64 = Checked|x64 - Checked|x86 = Checked|x86 - Debug|Any CPU = Debug|Any CPU - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|Any CPU = Release|Any CPU - Release|x64 = Release|x64 - Release|x86 = Release|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Checked|Any CPU.ActiveCfg = Checked|x86 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Checked|x64.ActiveCfg = Checked|x64 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Checked|x64.Build.0 = Checked|x64 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Checked|x86.ActiveCfg = Checked|x86 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Checked|x86.Build.0 = Checked|x86 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Debug|Any CPU.ActiveCfg = Debug|x86 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Debug|x64.ActiveCfg = Debug|x64 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Debug|x64.Build.0 = Debug|x64 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Debug|x86.ActiveCfg = Debug|x86 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Debug|x86.Build.0 = Debug|x86 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Release|Any CPU.ActiveCfg = Release|x86 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Release|x64.ActiveCfg = Release|x64 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Release|x64.Build.0 = Release|x64 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Release|x86.ActiveCfg = Release|x86 - {9B928D3E-06AB-45E5-BF79-F374F0AE3B98}.Release|x86.Build.0 = Release|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Checked|Any CPU.ActiveCfg = Checked|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Checked|x64.ActiveCfg = Checked|x64 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Checked|x64.Build.0 = Checked|x64 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Checked|x86.ActiveCfg = Checked|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Checked|x86.Build.0 = Checked|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Debug|Any CPU.ActiveCfg = Debug|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Debug|x64.ActiveCfg = Debug|x64 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Debug|x64.Build.0 = Debug|x64 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Debug|x86.ActiveCfg = Debug|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Debug|x86.Build.0 = Debug|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Release|Any CPU.ActiveCfg = Release|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Release|x64.ActiveCfg = Release|x64 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Release|x64.Build.0 = Release|x64 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Release|x86.ActiveCfg = Release|x86 - {FB2D45F2-FA4C-42B2-8E53-3E1F30CF8046}.Release|x86.Build.0 = Release|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Checked|Any CPU.ActiveCfg = Checked|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Checked|x64.ActiveCfg = Checked|x64 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Checked|x64.Build.0 = Checked|x64 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Checked|x86.ActiveCfg = Checked|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Checked|x86.Build.0 = Checked|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Debug|Any CPU.ActiveCfg = Debug|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Debug|x64.ActiveCfg = Debug|x64 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Debug|x64.Build.0 = Debug|x64 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Debug|x86.ActiveCfg = Debug|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Debug|x86.Build.0 = Debug|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Release|Any CPU.ActiveCfg = Release|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Release|x64.ActiveCfg = Release|x64 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Release|x64.Build.0 = Release|x64 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Release|x86.ActiveCfg = Release|x86 - {83A832DE-BF4A-44C4-B361-90F5F88B979B}.Release|x86.Build.0 = Release|x86 - {3EACD929-4725-4173-A845-734936BBDF87}.Checked|Any CPU.ActiveCfg = Debug|x86 - {3EACD929-4725-4173-A845-734936BBDF87}.Checked|x64.ActiveCfg = Debug|x64 - {3EACD929-4725-4173-A845-734936BBDF87}.Checked|x64.Build.0 = Debug|x64 - {3EACD929-4725-4173-A845-734936BBDF87}.Checked|x86.ActiveCfg = Debug|x86 - {3EACD929-4725-4173-A845-734936BBDF87}.Checked|x86.Build.0 = Debug|x86 - {3EACD929-4725-4173-A845-734936BBDF87}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {3EACD929-4725-4173-A845-734936BBDF87}.Debug|Any CPU.Build.0 = Debug|Any CPU - {3EACD929-4725-4173-A845-734936BBDF87}.Debug|x64.ActiveCfg = Debug|x64 - {3EACD929-4725-4173-A845-734936BBDF87}.Debug|x64.Build.0 = Debug|x64 - {3EACD929-4725-4173-A845-734936BBDF87}.Debug|x86.ActiveCfg = Debug|Any CPU - {3EACD929-4725-4173-A845-734936BBDF87}.Debug|x86.Build.0 = Debug|Any CPU - {3EACD929-4725-4173-A845-734936BBDF87}.Release|Any CPU.ActiveCfg = Release|Any CPU - {3EACD929-4725-4173-A845-734936BBDF87}.Release|Any CPU.Build.0 = Release|Any CPU - {3EACD929-4725-4173-A845-734936BBDF87}.Release|x64.ActiveCfg = Release|x64 - {3EACD929-4725-4173-A845-734936BBDF87}.Release|x64.Build.0 = Release|x64 - {3EACD929-4725-4173-A845-734936BBDF87}.Release|x86.ActiveCfg = Release|x86 - {3EACD929-4725-4173-A845-734936BBDF87}.Release|x86.Build.0 = Release|x86 - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Checked|Any CPU.Build.0 = Debug|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Checked|x64.ActiveCfg = Debug|x64 - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Checked|x64.Build.0 = Debug|x64 - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Checked|x86.ActiveCfg = Debug|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Checked|x86.Build.0 = Debug|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Debug|Any CPU.Build.0 = Debug|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Debug|x64.ActiveCfg = Debug|x64 - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Debug|x64.Build.0 = Debug|x64 - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Debug|x86.ActiveCfg = Debug|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Debug|x86.Build.0 = Debug|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Release|Any CPU.ActiveCfg = Release|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Release|Any CPU.Build.0 = Release|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Release|x64.ActiveCfg = Release|x64 - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Release|x64.Build.0 = Release|x64 - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Release|x86.ActiveCfg = Release|Any CPU - {0BB34BA1-1B3A-445C-9C04-0D710D1983F0}.Release|x86.Build.0 = Release|Any CPU - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Checked|Any CPU.ActiveCfg = Checked|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Checked|Any CPU.Build.0 = Checked|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Checked|x64.ActiveCfg = Checked|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Checked|x64.Build.0 = Checked|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Checked|x86.ActiveCfg = Checked|x86 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Checked|x86.Build.0 = Checked|x86 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Debug|Any CPU.ActiveCfg = Debug|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Debug|Any CPU.Build.0 = Debug|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Debug|x64.ActiveCfg = Debug|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Debug|x64.Build.0 = Debug|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Debug|x86.ActiveCfg = Debug|x86 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Debug|x86.Build.0 = Debug|x86 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Release|Any CPU.ActiveCfg = Release|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Release|Any CPU.Build.0 = Release|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Release|x64.ActiveCfg = Release|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Release|x64.Build.0 = Release|x64 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Release|x86.ActiveCfg = Release|x86 - {C07EE795-01F1-49F5-A4EE-F8235A1F3F7A}.Release|x86.Build.0 = Release|x86 - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Checked|Any CPU.ActiveCfg = Checked|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Checked|Any CPU.Build.0 = Checked|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Checked|x64.ActiveCfg = Checked|x64 - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Checked|x64.Build.0 = Checked|x64 - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Checked|x86.ActiveCfg = Checked|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Checked|x86.Build.0 = Checked|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Debug|Any CPU.Build.0 = Debug|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Debug|x64.ActiveCfg = Debug|x64 - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Debug|x64.Build.0 = Debug|x64 - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Debug|x86.ActiveCfg = Debug|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Debug|x86.Build.0 = Debug|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Release|Any CPU.ActiveCfg = Release|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Release|Any CPU.Build.0 = Release|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Release|x64.ActiveCfg = Release|x64 - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Release|x64.Build.0 = Release|x64 - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Release|x86.ActiveCfg = Release|Any CPU - {9E65EC58-B500-4C4A-B57D-BF242129A3C6}.Release|x86.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {A484CF9D-B203-427F-9D15-A5BBC6013421} - EndGlobalSection -EndGlobal diff --git a/src/coreclr/tools/aot/crossgen2.slnx b/src/coreclr/tools/aot/crossgen2.slnx new file mode 100644 index 000000000000..bc39e9cb4ffe --- /dev/null +++ b/src/coreclr/tools/aot/crossgen2.slnx @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs index 6265d0df45b6..fb25975a729f 100644 --- a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs +++ b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs @@ -13,134 +13,136 @@ namespace ILCompiler { - internal class Crossgen2RootCommand : CliRootCommand + internal class Crossgen2RootCommand : RootCommand { - public CliArgument> InputFilePaths { get; } = + public Argument> InputFilePaths { get; } = new("input-file-path") { CustomParser = result => Helpers.BuildPathDictionary(result.Tokens, true), Description = "Input file(s)", Arity = ArgumentArity.OneOrMore }; - public CliOption> UnrootedInputFilePaths { get; } = + public Option> UnrootedInputFilePaths { get; } = new("--unrooted-input-file-paths", "-u") { CustomParser = result => Helpers.BuildPathDictionary(result.Tokens, true), DefaultValueFactory = result => Helpers.BuildPathDictionary(result.Tokens, true), Description = SR.UnrootedInputFilesToCompile }; - public CliOption> ReferenceFilePaths { get; } = + public Option> ReferenceFilePaths { get; } = new("--reference", "-r") { CustomParser = result => Helpers.BuildPathDictionary(result.Tokens, false), DefaultValueFactory = result => Helpers.BuildPathDictionary(result.Tokens, false), Description = SR.ReferenceFiles }; - public CliOption InstructionSet { get; } = + public Option InstructionSet { get; } = new("--instruction-set") { Description = SR.InstructionSets }; - public CliOption MaxVectorTBitWidth { get; } = + public Option MaxVectorTBitWidth { get; } = new("--max-vectort-bitwidth") { Description = SR.MaxVectorTBitWidths }; - public CliOption MibcFilePaths { get; } = + public Option MibcFilePaths { get; } = new("--mibc", "-m") { DefaultValueFactory = _ => Array.Empty(), Description = SR.MibcFiles }; - public CliOption OutputFilePath { get; } = + public Option OutputFilePath { get; } = new("--out", "-o") { Description = SR.OutputFilePath }; - public CliOption CompositeRootPath { get; } = + public Option CompositeRootPath { get; } = new("--compositerootpath", "--crp") { Description = SR.CompositeRootPath }; - public CliOption Optimize { get; } = + public Option Optimize { get; } = new("--optimize", "-O") { Description = SR.EnableOptimizationsOption }; - public CliOption OptimizeDisabled { get; } = + public Option OptimizeDisabled { get; } = new("--optimize-disabled", "--Od") { Description = SR.DisableOptimizationsOption }; - public CliOption OptimizeSpace { get; } = + public Option OptimizeSpace { get; } = new("--optimize-space", "--Os") { Description = SR.OptimizeSpaceOption }; - public CliOption OptimizeTime { get; } = + public Option OptimizeTime { get; } = new("--optimize-time", "--Ot") { Description = SR.OptimizeSpeedOption }; - public CliOption TypeValidation { get; } = + public Option EnableCachedInterfaceDispatchSupport { get; } = + new("--enable-cached-interface-dispatch-support", "--CID") { Description = SR.EnableCachedInterfaceDispatchSupport }; + public Option TypeValidation { get; } = new("--type-validation") { DefaultValueFactory = _ => TypeValidationRule.Automatic, Description = SR.TypeValidation, HelpName = "arg" }; - public CliOption InputBubble { get; } = + public Option InputBubble { get; } = new("--inputbubble") { Description = SR.InputBubbleOption }; - public CliOption> InputBubbleReferenceFilePaths { get; } = + public Option> InputBubbleReferenceFilePaths { get; } = new("--inputbubbleref") { CustomParser = result => Helpers.BuildPathDictionary(result.Tokens, false), DefaultValueFactory = result => Helpers.BuildPathDictionary(result.Tokens, false), Description = SR.InputBubbleReferenceFiles }; - public CliOption Composite { get; } = + public Option Composite { get; } = new("--composite") { Description = SR.CompositeBuildMode }; - public CliOption CompositeKeyFile { get; } = + public Option CompositeKeyFile { get; } = new("--compositekeyfile") { Description = SR.CompositeKeyFile }; - public CliOption CompileNoMethods { get; } = + public Option CompileNoMethods { get; } = new("--compile-no-methods") { Description = SR.CompileNoMethodsOption }; - public CliOption OutNearInput { get; } = + public Option OutNearInput { get; } = new("--out-near-input") { Description = SR.OutNearInputOption }; - public CliOption SingleFileCompilation { get; } = + public Option SingleFileCompilation { get; } = new("--single-file-compilation") { Description = SR.SingleFileCompilationOption }; - public CliOption Partial { get; } = + public Option Partial { get; } = new("--partial") { Description = SR.PartialImageOption }; - public CliOption CompileBubbleGenerics { get; } = + public Option CompileBubbleGenerics { get; } = new("--compilebubblegenerics") { Description = SR.BubbleGenericsOption }; - public CliOption EmbedPgoData { get; } = + public Option EmbedPgoData { get; } = new("--embed-pgo-data") { Description = SR.EmbedPgoDataOption }; - public CliOption DgmlLogFileName { get; } = + public Option DgmlLogFileName { get; } = new("--dgmllog") { Description = SR.SaveDependencyLogOption }; - public CliOption GenerateFullDgmlLog { get; } = + public Option GenerateFullDgmlLog { get; } = new("--fulllog") { Description = SR.SaveDetailedLogOption }; - public CliOption IsVerbose { get; } = + public Option IsVerbose { get; } = new("--verbose") { Description = SR.VerboseLoggingOption }; - public CliOption SystemModuleName { get; } = + public Option SystemModuleName { get; } = new("--systemmodule") { DefaultValueFactory = _ => Helpers.DefaultSystemModule, Description = SR.SystemModuleOverrideOption }; - public CliOption WaitForDebugger { get; } = + public Option WaitForDebugger { get; } = new("--waitfordebugger") { Description = SR.WaitForDebuggerOption }; - public CliOption CodegenOptions { get; } = + public Option CodegenOptions { get; } = new("--codegenopt") { DefaultValueFactory = _ => Array.Empty(), Description = SR.CodeGenOptions }; - public CliOption SupportIbc { get; } = + public Option SupportIbc { get; } = new("--support-ibc") { Description = SR.SupportIbc }; - public CliOption Resilient { get; } = + public Option Resilient { get; } = new("--resilient") { Description = SR.ResilientOption }; - public CliOption ImageBase { get; } = + public Option ImageBase { get; } = new("--imagebase") { Description = SR.ImageBase }; - public CliOption TargetArchitecture { get; } = + public Option TargetArchitecture { get; } = new("--targetarch") { CustomParser = MakeTargetArchitecture, DefaultValueFactory = MakeTargetArchitecture, Description = SR.TargetArchOption, Arity = ArgumentArity.OneOrMore, HelpName = "arg" }; - public CliOption EnableGenericCycleDetection { get; } = + public Option EnableGenericCycleDetection { get; } = new("--enable-generic-cycle-detection") { Description = SR.EnableGenericCycleDetection }; - public CliOption GenericCycleDepthCutoff { get; } = + public Option GenericCycleDepthCutoff { get; } = new("--maxgenericcycle") { DefaultValueFactory = _ => ReadyToRunCompilerContext.DefaultGenericCycleDepthCutoff, Description = SR.GenericCycleDepthCutoff }; - public CliOption GenericCycleBreadthCutoff { get; } = + public Option GenericCycleBreadthCutoff { get; } = new("--maxgenericcyclebreadth") { DefaultValueFactory = _ => ReadyToRunCompilerContext.DefaultGenericCycleBreadthCutoff, Description = SR.GenericCycleBreadthCutoff }; - public CliOption TargetOS { get; } = + public Option TargetOS { get; } = new("--targetos") { CustomParser = result => Helpers.GetTargetOS(result.Tokens.Count > 0 ? result.Tokens[0].Value : null), DefaultValueFactory = result => Helpers.GetTargetOS(result.Tokens.Count > 0 ? result.Tokens[0].Value : null), Description = SR.TargetOSOption, HelpName = "arg" }; - public CliOption JitPath { get; } = + public Option JitPath { get; } = new("--jitpath") { Description = SR.JitPathOption }; - public CliOption PrintReproInstructions { get; } = + public Option PrintReproInstructions { get; } = new("--print-repro-instructions") { Description = SR.PrintReproInstructionsOption }; - public CliOption SingleMethodTypeName { get; } = + public Option SingleMethodTypeName { get; } = new("--singlemethodtypename") { Description = SR.SingleMethodTypeName }; - public CliOption SingleMethodName { get; } = + public Option SingleMethodName { get; } = new("--singlemethodname") { Description = SR.SingleMethodMethodName }; - public CliOption SingleMethodIndex { get; } = + public Option SingleMethodIndex { get; } = new("--singlemethodindex") { Description = SR.SingleMethodIndex }; - public CliOption SingleMethodGenericArgs { get; } = + public Option SingleMethodGenericArgs { get; } = new("--singlemethodgenericarg") { Description = SR.SingleMethodGenericArgs }; - public CliOption Parallelism { get; } = + public Option Parallelism { get; } = new("--parallelism") { CustomParser = MakeParallelism, DefaultValueFactory = MakeParallelism, Description = SR.ParalellismOption }; - public CliOption CustomPESectionAlignment { get; } = + public Option CustomPESectionAlignment { get; } = new("--custom-pe-section-alignment") { Description = SR.CustomPESectionAlignmentOption }; - public CliOption Map { get; } = + public Option Map { get; } = new("--map") { Description = SR.MapFileOption }; - public CliOption MapCsv { get; } = + public Option MapCsv { get; } = new("--mapcsv") { Description = SR.MapCsvFileOption }; - public CliOption Pdb { get; } = + public Option Pdb { get; } = new("--pdb") { Description = SR.PdbFileOption }; - public CliOption PdbPath { get; } = + public Option PdbPath { get; } = new("--pdb-path") { Description = SR.PdbFilePathOption }; - public CliOption PerfMap { get; } = + public Option PerfMap { get; } = new("--perfmap") { Description = SR.PerfMapFileOption }; - public CliOption PerfMapPath { get; } = + public Option PerfMapPath { get; } = new("--perfmap-path") { Description = SR.PerfMapFilePathOption }; - public CliOption PerfMapFormatVersion { get; } = + public Option PerfMapFormatVersion { get; } = new("--perfmap-format-version") { DefaultValueFactory = _ => 0, Description = SR.PerfMapFormatVersionOption }; - public CliOption CrossModuleInlining { get; } = + public Option CrossModuleInlining { get; } = new("--opt-cross-module") { Description = SR.CrossModuleInlining }; - public CliOption AsyncMethodOptimization { get; } = + public Option AsyncMethodOptimization { get; } = new("--opt-async-methods") { Description = SR.AsyncModuleOptimization }; - public CliOption NonLocalGenericsModule { get; } = + public Option NonLocalGenericsModule { get; } = new("--non-local-generics-module") { DefaultValueFactory = _ => string.Empty, Description = SR.NonLocalGenericsModule }; - public CliOption MethodLayout { get; } = - new("--method-layout") { CustomParser = MakeReadyToRunMethodLayoutAlgorithm, DefaultValueFactory = MakeReadyToRunMethodLayoutAlgorithm, Description = SR.MethodLayoutOption, HelpName = "arg" }; - public CliOption FileLayout { get; } = - new("--file-layout") { CustomParser = MakeReadyToRunFileLayoutAlgorithm, DefaultValueFactory = MakeReadyToRunFileLayoutAlgorithm, Description = SR.FileLayoutOption, HelpName = "arg" }; - public CliOption VerifyTypeAndFieldLayout { get; } = + public Option MethodLayout { get; } = + new("--method-layout") { CustomParser = MakeMethodLayoutAlgorithm, DefaultValueFactory = MakeMethodLayoutAlgorithm, Description = SR.MethodLayoutOption, HelpName = "arg" }; + public Option FileLayout { get; } = + new("--file-layout") { CustomParser = MakeFileLayoutAlgorithm, DefaultValueFactory = MakeFileLayoutAlgorithm, Description = SR.FileLayoutOption, HelpName = "arg" }; + public Option VerifyTypeAndFieldLayout { get; } = new("--verify-type-and-field-layout") { Description = SR.VerifyTypeAndFieldLayoutOption }; - public CliOption CallChainProfileFile { get; } = + public Option CallChainProfileFile { get; } = new("--callchain-profile") { Description = SR.CallChainProfileFile }; - public CliOption MakeReproPath { get; } = + public Option MakeReproPath { get; } = new("--make-repro-path") { Description = "Path where to place a repro package" }; - public CliOption HotColdSplitting { get; } = + public Option HotColdSplitting { get; } = new("--hot-cold-splitting") { Description = SR.HotColdSplittingOption }; - public CliOption SynthesizeRandomMibc { get; } = + public Option SynthesizeRandomMibc { get; } = new("--synthesize-random-mibc"); - public CliOption DeterminismStress { get; } = + public Option DeterminismStress { get; } = new("--determinism-stress"); public bool CompositeOrInputBubble { get; private set; } @@ -163,6 +165,7 @@ public Crossgen2RootCommand(string[] args) : base(SR.Crossgen2BannerText) Options.Add(OptimizeDisabled); Options.Add(OptimizeSpace); Options.Add(OptimizeTime); + Options.Add(EnableCachedInterfaceDispatchSupport); Options.Add(TypeValidation); Options.Add(InputBubble); Options.Add(InputBubbleReferenceFilePaths); @@ -282,69 +285,62 @@ public Crossgen2RootCommand(string[] args) : base(SR.Crossgen2BannerText) }); } - public static IEnumerable> GetExtendedHelp(HelpContext _) + public static void PrintExtendedHelp(ParseResult _) { - foreach (Func sectionDelegate in HelpBuilder.Default.GetLayout()) - yield return sectionDelegate; - - yield return _ => + Console.WriteLine(SR.OptionPassingHelp); + Console.WriteLine(); + Console.WriteLine(SR.DashDashHelp); + Console.WriteLine(); + + string[] ValidArchitectures = new string[] {"arm", "armel", "arm64", "x86", "x64", "riscv64", "loongarch64"}; + string[] ValidOS = new string[] {"windows", "linux", "osx", "ios", "iossimulator", "maccatalyst"}; + + Console.WriteLine(String.Format(SR.SwitchWithDefaultHelp, "--targetos", String.Join("', '", ValidOS), Helpers.GetTargetOS(null).ToString().ToLowerInvariant())); + Console.WriteLine(); + Console.WriteLine(String.Format(SR.SwitchWithDefaultHelp, "--targetarch", String.Join("', '", ValidArchitectures), Helpers.GetTargetArchitecture(null).ToString().ToLowerInvariant())); + Console.WriteLine(); + Console.WriteLine(String.Format(SR.SwitchWithDefaultHelp, "--type-validation", String.Join("', '", Enum.GetNames()), nameof(TypeValidationRule.Automatic))); + Console.WriteLine(); + + Console.WriteLine(SR.CrossModuleInliningExtraHelp); + Console.WriteLine(); + Console.WriteLine(String.Format(SR.LayoutOptionExtraHelp, "--method-layout", String.Join("', '", Enum.GetNames()))); + Console.WriteLine(); + Console.WriteLine(String.Format(SR.LayoutOptionExtraHelp, "--file-layout", String.Join("', '", Enum.GetNames()))); + Console.WriteLine(); + + Console.WriteLine(SR.InstructionSetHelp); + foreach (string arch in ValidArchitectures) { - Console.WriteLine(SR.OptionPassingHelp); - Console.WriteLine(); - Console.WriteLine(SR.DashDashHelp); - Console.WriteLine(); - - string[] ValidArchitectures = new string[] {"arm", "armel", "arm64", "x86", "x64", "riscv64", "loongarch64"}; - string[] ValidOS = new string[] {"windows", "linux", "osx", "ios", "iossimulator", "maccatalyst"}; - - Console.WriteLine(String.Format(SR.SwitchWithDefaultHelp, "--targetos", String.Join("', '", ValidOS), Helpers.GetTargetOS(null).ToString().ToLowerInvariant())); - Console.WriteLine(); - Console.WriteLine(String.Format(SR.SwitchWithDefaultHelp, "--targetarch", String.Join("', '", ValidArchitectures), Helpers.GetTargetArchitecture(null).ToString().ToLowerInvariant())); - Console.WriteLine(); - Console.WriteLine(String.Format(SR.SwitchWithDefaultHelp, "--type-validation", String.Join("', '", Enum.GetNames()), nameof(TypeValidationRule.Automatic))); - Console.WriteLine(); - - Console.WriteLine(SR.CrossModuleInliningExtraHelp); - Console.WriteLine(); - Console.WriteLine(String.Format(SR.LayoutOptionExtraHelp, "--method-layout", String.Join("', '", Enum.GetNames()))); - Console.WriteLine(); - Console.WriteLine(String.Format(SR.LayoutOptionExtraHelp, "--file-layout", String.Join("', '", Enum.GetNames()))); - Console.WriteLine(); - - Console.WriteLine(SR.InstructionSetHelp); - foreach (string arch in ValidArchitectures) + TargetArchitecture targetArch = Helpers.GetTargetArchitecture(arch); + bool first = true; + foreach (var instructionSet in Internal.JitInterface.InstructionSetFlags.ArchitectureToValidInstructionSets(targetArch)) { - TargetArchitecture targetArch = Helpers.GetTargetArchitecture(arch); - bool first = true; - foreach (var instructionSet in Internal.JitInterface.InstructionSetFlags.ArchitectureToValidInstructionSets(targetArch)) + // Only instruction sets with are specifiable should be printed to the help text + if (instructionSet.Specifiable) { - // Only instruction sets with are specifiable should be printed to the help text - if (instructionSet.Specifiable) + if (first) { - if (first) - { - Console.Write(arch); - Console.Write(": "); - first = false; - } - else - { - Console.Write(", "); - } - Console.Write(instructionSet.Name); + Console.Write(arch); + Console.Write(": "); + first = false; } + else + { + Console.Write(", "); + } + Console.Write(instructionSet.Name); } - - if (first) continue; // no instruction-set found for this architecture - - Console.WriteLine(); } + if (first) continue; // no instruction-set found for this architecture + Console.WriteLine(); - Console.WriteLine(SR.CpuFamilies); - Console.WriteLine(string.Join(", ", Internal.JitInterface.InstructionSetFlags.AllCpuNames)); - return true; - }; + } + + Console.WriteLine(); + Console.WriteLine(SR.CpuFamilies); + Console.WriteLine(string.Join(", ", Internal.JitInterface.InstructionSetFlags.AllCpuNames)); } private static TargetArchitecture MakeTargetArchitecture(ArgumentResult result) @@ -375,33 +371,34 @@ private static int MakeParallelism(ArgumentResult result) return parallelism; } - private static ReadyToRunMethodLayoutAlgorithm MakeReadyToRunMethodLayoutAlgorithm(ArgumentResult result) + private static MethodLayoutAlgorithm MakeMethodLayoutAlgorithm(ArgumentResult result) { if (result.Tokens.Count == 0 ) - return ReadyToRunMethodLayoutAlgorithm.DefaultSort; + return MethodLayoutAlgorithm.DefaultSort; return result.Tokens[0].Value.ToLowerInvariant() switch { - "defaultsort" => ReadyToRunMethodLayoutAlgorithm.DefaultSort, - "exclusiveweight" => ReadyToRunMethodLayoutAlgorithm.ExclusiveWeight, - "hotcold" => ReadyToRunMethodLayoutAlgorithm.HotCold, - "hotwarmcold" => ReadyToRunMethodLayoutAlgorithm.HotWarmCold, - "callfrequency" => ReadyToRunMethodLayoutAlgorithm.CallFrequency, - "pettishansen" => ReadyToRunMethodLayoutAlgorithm.PettisHansen, - "random" => ReadyToRunMethodLayoutAlgorithm.Random, + "defaultsort" => MethodLayoutAlgorithm.DefaultSort, + "exclusiveweight" => MethodLayoutAlgorithm.ExclusiveWeight, + "hotcold" => MethodLayoutAlgorithm.HotCold, + "instrumentedhotcold" => MethodLayoutAlgorithm.InstrumentedHotCold, + "hotwarmcold" => MethodLayoutAlgorithm.HotWarmCold, + "callfrequency" => MethodLayoutAlgorithm.CallFrequency, + "pettishansen" => MethodLayoutAlgorithm.PettisHansen, + "random" => MethodLayoutAlgorithm.Random, _ => throw new CommandLineException(SR.InvalidMethodLayout) }; } - private static ReadyToRunFileLayoutAlgorithm MakeReadyToRunFileLayoutAlgorithm(ArgumentResult result) + private static FileLayoutAlgorithm MakeFileLayoutAlgorithm(ArgumentResult result) { if (result.Tokens.Count == 0 ) - return ReadyToRunFileLayoutAlgorithm.DefaultSort; + return FileLayoutAlgorithm.DefaultSort; return result.Tokens[0].Value.ToLowerInvariant() switch { - "defaultsort" => ReadyToRunFileLayoutAlgorithm.DefaultSort, - "methodorder" => ReadyToRunFileLayoutAlgorithm.MethodOrder, + "defaultsort" => FileLayoutAlgorithm.DefaultSort, + "methodorder" => FileLayoutAlgorithm.MethodOrder, _ => throw new CommandLineException(SR.InvalidFileLayout) }; } diff --git a/src/coreclr/tools/aot/crossgen2/Program.cs b/src/coreclr/tools/aot/crossgen2/Program.cs index 1453a6cf1775..c38f577c5a3f 100644 --- a/src/coreclr/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/tools/aot/crossgen2/Program.cs @@ -159,7 +159,7 @@ public int Run() { var module = _typeSystemContext.GetModuleFromPath(inputFile.Value); if ((module.PEReader.PEHeaders.CorHeader.Flags & (CorFlags.ILLibrary | CorFlags.ILOnly)) == (CorFlags)0 - && module.PEReader.TryGetReadyToRunHeader(out int _)) + && module.PEReader.TryGetCompositeReadyToRunHeader(out int _)) { Console.WriteLine(SR.IgnoringCompositeImage, inputFile.Value); continue; @@ -595,6 +595,7 @@ private void RunSingleCompilation(Dictionary inFilePaths, Instru nodeFactoryFlags.TypeValidation = Get(_command.TypeValidation); nodeFactoryFlags.DeterminismStress = Get(_command.DeterminismStress); nodeFactoryFlags.PrintReproArgs = Get(_command.PrintReproInstructions); + nodeFactoryFlags.EnableCachedInterfaceDispatchSupport = Get(_command.EnableCachedInterfaceDispatchSupport); builder .UseMapFile(Get(_command.Map)) @@ -908,12 +909,12 @@ internal static bool IsValidPublicKey(byte[] blob) return true; } - private T Get(CliOption option) => _command.Result.GetValue(option); + private T Get(Option option) => _command.Result.GetValue(option); private static int Main(string[] args) => - new CliConfiguration(new Crossgen2RootCommand(args) + new CommandLineConfiguration(new Crossgen2RootCommand(args) .UseVersion() - .UseExtendedHelp(Crossgen2RootCommand.GetExtendedHelp)) + .UseExtendedHelp(Crossgen2RootCommand.PrintExtendedHelp)) { ResponseFileTokenReplacer = Helpers.TryReadResponseFile, EnableDefaultExceptionHandler = false, diff --git a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx index 59bfc796f397..04334beb7950 100644 --- a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx +++ b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx @@ -423,4 +423,7 @@ Number of nested occurrences of a potentially cyclic generic type to cut off - + + Enable support for cached interface dispatch + + \ No newline at end of file diff --git a/src/coreclr/tools/aot/crossgen2/crossgen2.props b/src/coreclr/tools/aot/crossgen2/crossgen2.props index 2ec464eaa54b..d2ed5e4bfcce 100644 --- a/src/coreclr/tools/aot/crossgen2/crossgen2.props +++ b/src/coreclr/tools/aot/crossgen2/crossgen2.props @@ -15,11 +15,9 @@ false Debug;Release;Checked false - true - @@ -55,12 +53,14 @@ @@ -69,6 +69,7 @@ @@ -94,6 +95,7 @@ diff --git a/src/coreclr/tools/aot/crossgen2/crossgen2_inbuild.csproj b/src/coreclr/tools/aot/crossgen2/crossgen2_inbuild.csproj index f7752670e406..6f040291280a 100644 --- a/src/coreclr/tools/aot/crossgen2/crossgen2_inbuild.csproj +++ b/src/coreclr/tools/aot/crossgen2/crossgen2_inbuild.csproj @@ -15,7 +15,7 @@ false - + true true diff --git a/src/coreclr/tools/aot/crossgen2/crossgen2_publish.csproj b/src/coreclr/tools/aot/crossgen2/crossgen2_publish.csproj index c7041857e085..f5f2fe11f658 100644 --- a/src/coreclr/tools/aot/crossgen2/crossgen2_publish.csproj +++ b/src/coreclr/tools/aot/crossgen2/crossgen2_publish.csproj @@ -2,12 +2,14 @@ <_IsPublishing>true - $(PackageRID) + + $(PortableOS)-$(TargetArchitecture) $(RuntimeBinDir)crossgen2-published/ true true false true + true diff --git a/src/coreclr/tools/aot/ilc.slnx b/src/coreclr/tools/aot/ilc.slnx new file mode 100644 index 000000000000..49b7198de620 --- /dev/null +++ b/src/coreclr/tools/aot/ilc.slnx @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h index a1a6122037d2..729a09de403e 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h @@ -76,7 +76,6 @@ struct JitInterfaceCallbacks CorInfoHelpFunc (* getCastingHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool fThrowing); CorInfoHelpFunc (* getSharedCCtorHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE clsHnd); CORINFO_CLASS_HANDLE (* getTypeForBox)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE cls); - CORINFO_CLASS_HANDLE (* getTypeForBoxOnStack)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE cls); CorInfoHelpFunc (* getBoxHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE cls); CorInfoHelpFunc (* getUnBoxHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE cls); CORINFO_OBJECT_HANDLE (* getRuntimeTypePointer)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE cls); @@ -131,6 +130,7 @@ struct JitInterfaceCallbacks bool (* runWithErrorTrap)(void * thisHandle, CorInfoExceptionClass** ppException, ICorJitInfo::errorTrapFunction function, void* parameter); bool (* runWithSPMIErrorTrap)(void * thisHandle, CorInfoExceptionClass** ppException, ICorJitInfo::errorTrapFunction function, void* parameter); void (* getEEInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_EE_INFO* pEEInfoOut); + void (* getAsyncInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_ASYNC_INFO* pAsyncInfoOut); mdMethodDef (* getMethodDefFromMethod)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE hMethod); size_t (* printMethodName)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, char* buffer, size_t bufferSize, size_t* pRequiredBufferSize); const char* (* getMethodNameFromMetadata)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, const char** className, const char** namespaceName, const char** enclosingClassNames, size_t maxEnclosingClassNames); @@ -168,6 +168,7 @@ struct JitInterfaceCallbacks CORINFO_METHOD_HANDLE (* GetDelegateCtor)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE methHnd, CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE targetMethodHnd, DelegateCtorArgs* pCtorData); void (* MethodCompileComplete)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE methHnd); bool (* getTailCallHelpers)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* callToken, CORINFO_SIG_INFO* sig, CORINFO_GET_TAILCALL_HELPERS_FLAGS flags, CORINFO_TAILCALL_HELPERS* pResult); + CORINFO_METHOD_HANDLE (* getAsyncResumptionStub)(void * thisHandle, CorInfoExceptionClass** ppException); bool (* convertPInvokeCalliToCall)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert); bool (* notifyInstructionSetUsage)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_InstructionSet instructionSet, bool supportEnabled); void (* updateEntryPointForTailCall)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CONST_LOOKUP* entryPoint); @@ -838,15 +839,6 @@ class JitInterfaceWrapper : public ICorJitInfo return temp; } - virtual CORINFO_CLASS_HANDLE getTypeForBoxOnStack( - CORINFO_CLASS_HANDLE cls) -{ - CorInfoExceptionClass* pException = nullptr; - CORINFO_CLASS_HANDLE temp = _callbacks->getTypeForBoxOnStack(_thisHandle, &pException, cls); - if (pException != nullptr) throw pException; - return temp; -} - virtual CorInfoHelpFunc getBoxHelper( CORINFO_CLASS_HANDLE cls) { @@ -1360,6 +1352,14 @@ class JitInterfaceWrapper : public ICorJitInfo if (pException != nullptr) throw pException; } + virtual void getAsyncInfo( + CORINFO_ASYNC_INFO* pAsyncInfoOut) +{ + CorInfoExceptionClass* pException = nullptr; + _callbacks->getAsyncInfo(_thisHandle, &pException, pAsyncInfoOut); + if (pException != nullptr) throw pException; +} + virtual mdMethodDef getMethodDefFromMethod( CORINFO_METHOD_HANDLE hMethod) { @@ -1734,6 +1734,14 @@ class JitInterfaceWrapper : public ICorJitInfo return temp; } + virtual CORINFO_METHOD_HANDLE getAsyncResumptionStub() +{ + CorInfoExceptionClass* pException = nullptr; + CORINFO_METHOD_HANDLE temp = _callbacks->getAsyncResumptionStub(_thisHandle, &pException); + if (pException != nullptr) throw pException; + return temp; +} + virtual bool convertPInvokeCalliToCall( CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert) diff --git a/src/coreclr/tools/cdac-build-tool/ComposeCommand.cs b/src/coreclr/tools/cdac-build-tool/ComposeCommand.cs index adb367569594..36b1576dd0a5 100644 --- a/src/coreclr/tools/cdac-build-tool/ComposeCommand.cs +++ b/src/coreclr/tools/cdac-build-tool/ComposeCommand.cs @@ -8,15 +8,15 @@ namespace Microsoft.DotNet.Diagnostics.DataContract.BuildTool; -internal sealed class ComposeCommand : CliCommand +internal sealed class ComposeCommand : Command { - private readonly CliArgument inputFiles = new("INPUT [INPUTS...]") { Arity = ArgumentArity.OneOrMore, Description = "One or more input files" }; - private readonly CliOption outputFile = new("-o") { Arity = ArgumentArity.ExactlyOne, HelpName = "OUTPUT", Required = true, Description = "Output file" }; - private readonly CliOption contractFile = new("-c") { Arity = ArgumentArity.ZeroOrMore, HelpName = "CONTRACT", Description = "Contract file (may be specified multiple times)" }; - private readonly CliOption baselinePath = new("-b", "--baseline") { Arity = ArgumentArity.ExactlyOne, HelpName = "BASELINEPATH", Description = "Directory containing the baseline contracts"}; - private readonly CliOption templateFile = new("-i", "--input-template") { Arity = ArgumentArity.ExactlyOne, HelpName = "TEMPLATE", Description = "Contract descriptor template to be filled in" }; - private readonly CliOption _verboseOption; - public ComposeCommand(CliOption verboseOption) : base("compose") + private readonly Argument inputFiles = new("INPUT [INPUTS...]") { Arity = ArgumentArity.OneOrMore, Description = "One or more input files" }; + private readonly Option outputFile = new("-o") { Arity = ArgumentArity.ExactlyOne, HelpName = "OUTPUT", Required = true, Description = "Output file" }; + private readonly Option contractFile = new("-c") { Arity = ArgumentArity.ZeroOrMore, HelpName = "CONTRACT", Description = "Contract file (may be specified multiple times)" }; + private readonly Option baselinePath = new("-b", "--baseline") { Arity = ArgumentArity.ExactlyOne, HelpName = "BASELINEPATH", Description = "Directory containing the baseline contracts"}; + private readonly Option templateFile = new("-i", "--input-template") { Arity = ArgumentArity.ExactlyOne, HelpName = "TEMPLATE", Description = "Contract descriptor template to be filled in" }; + private readonly Option _verboseOption; + public ComposeCommand(Option verboseOption) : base("compose") { _verboseOption = verboseOption; Add(inputFiles); diff --git a/src/coreclr/tools/cdac-build-tool/DataDescriptorModel.cs b/src/coreclr/tools/cdac-build-tool/DataDescriptorModel.cs index 55aa16283c4f..14ee0988ea2b 100644 --- a/src/coreclr/tools/cdac-build-tool/DataDescriptorModel.cs +++ b/src/coreclr/tools/cdac-build-tool/DataDescriptorModel.cs @@ -24,7 +24,7 @@ public class DataDescriptorModel public uint PlatformFlags { get; } // The number of indirect globals plus 1 for the placeholder at index 0 [JsonIgnore] - public int PointerDataCount => 1 + Globals.Values.Count(g => g.Value.Indirect); + public int PointerDataCount => 1 + Globals.Values.Count(g => g.Value.Kind == GlobalValue.KindEnum.Indirect); private DataDescriptorModel(string baseline, IReadOnlyDictionary types, IReadOnlyDictionary globals, IReadOnlyDictionary contracts, uint platformFlags) { @@ -36,6 +36,7 @@ private DataDescriptorModel(string baseline, IReadOnlyDictionary { - public bool Indirect { get; private init; } - public ulong Value { get; } - public static GlobalValue MakeDirect(ulong value) => new GlobalValue(value); - public static GlobalValue MakeIndirect(uint auxDataIdx) => new GlobalValue((ulong)auxDataIdx) { Indirect = true }; - private GlobalValue(ulong value) { Value = value; } + public enum KindEnum + { + Direct, + Indirect, + String + } + + public KindEnum Kind { get; private init; } + public ulong NumericValue { get; } + public string StringValue { get; } + public static GlobalValue MakeDirect(ulong value) => new GlobalValue(value) { Kind = KindEnum.Direct }; + public static GlobalValue MakeIndirect(uint auxDataIdx) => new GlobalValue((ulong)auxDataIdx) { Kind = KindEnum.Indirect }; + public static GlobalValue MakeString(string value) => new GlobalValue(value) { Kind = KindEnum.String }; + private GlobalValue(ulong value) { NumericValue = value; StringValue = string.Empty;} + private GlobalValue(string value) { StringValue = value; } - public static bool operator ==(GlobalValue left, GlobalValue right) => left.Value == right.Value && left.Indirect == right.Indirect; + public static bool operator ==(GlobalValue left, GlobalValue right) => left.Equals(right); public static bool operator !=(GlobalValue left, GlobalValue right) => !(left == right); - public bool Equals(GlobalValue other) => this == other; - public override bool Equals(object? obj) => obj is GlobalValue value && this == value; - public override int GetHashCode() => HashCode.Combine(Value, Indirect); - public override string ToString() => Indirect ? $"Indirect({Value})" : $"0x{Value:x}"; + public bool Equals(GlobalValue other) => other.Kind == Kind && other.NumericValue == NumericValue && other.StringValue == StringValue; + public override bool Equals(object? obj) => obj is GlobalValue value && Equals(value); + public override int GetHashCode() => HashCode.Combine(Kind, NumericValue, StringValue); + public override string ToString() + { + return Kind switch + { + KindEnum.Direct => $"0x{NumericValue:x}", + KindEnum.Indirect => $"Indirect({NumericValue})", + KindEnum.String => $"'{StringValue}'", + _ => throw new InvalidOperationException("Unknown GlobalValue type") + }; + } } [JsonConverter(typeof(GlobalModelJsonConverter))] diff --git a/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalValueJsonConverter.cs b/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalValueJsonConverter.cs index 429f6cc69792..40ff3a67bab4 100644 --- a/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalValueJsonConverter.cs +++ b/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalValueJsonConverter.cs @@ -15,18 +15,25 @@ public override DataDescriptorModel.GlobalValue Read(ref Utf8JsonReader reader, public override void Write(Utf8JsonWriter writer, DataDescriptorModel.GlobalValue value, JsonSerializerOptions options) { - if (!value.Indirect) + switch (value.Kind) { - // no type: just write value as a number. - // we always write as a string containing a hex number - writer.WriteStringValue($"0x{value.Value:x}"); - } - else - { - // pointer data index. write as a 1-element array containing a decimal number - writer.WriteStartArray(); - writer.WriteNumberValue(value.Value); - writer.WriteEndArray(); + case DataDescriptorModel.GlobalValue.KindEnum.Direct: + // no type: just write value as a number. + // we always write as a string containing a hex number + writer.WriteStringValue($"0x{value.NumericValue:x}"); + break; + case DataDescriptorModel.GlobalValue.KindEnum.Indirect: + // pointer data index. write as a 1-element array containing a decimal number + writer.WriteStartArray(); + writer.WriteNumberValue(value.NumericValue); + writer.WriteEndArray(); + break; + case DataDescriptorModel.GlobalValue.KindEnum.String: + // string data. write as a JSON string value + writer.WriteStringValue(value.StringValue); + break; + default: + throw new InvalidOperationException("Unknown GlobalValue type"); } } } diff --git a/src/coreclr/tools/cdac-build-tool/ObjectFileScraper.cs b/src/coreclr/tools/cdac-build-tool/ObjectFileScraper.cs index 42b0b004c898..1c379a7e61dd 100644 --- a/src/coreclr/tools/cdac-build-tool/ObjectFileScraper.cs +++ b/src/coreclr/tools/cdac-build-tool/ObjectFileScraper.cs @@ -160,6 +160,7 @@ private struct HeaderDirectory public uint GlobalLiteralValuesStart; public uint GlobalPointersStart; + public uint GlobalStringValuesStart; public uint NamesStart; public uint TypesCount; @@ -167,6 +168,7 @@ private struct HeaderDirectory public uint GlobalLiteralValuesCount; public uint GlobalPointerValuesCount; + public uint GlobalStringValuesCount; public uint NamesPoolCount; @@ -174,6 +176,7 @@ private struct HeaderDirectory public byte FieldSpecSize; public byte GlobalLiteralSpecSize; public byte GlobalPointerSpecSize; + public byte GlobalStringSpecSize; }; private static void DumpHeaderDirectory(HeaderDirectory headerDirectory) @@ -186,12 +189,14 @@ private static void DumpHeaderDirectory(HeaderDirectory headerDirectory) Fields Pool Start = 0x{headerDirectory.FieldsPoolStart:x8} Global Literals Start = 0x{headerDirectory.GlobalLiteralValuesStart:x8} Global Pointers Start = 0x{headerDirectory.GlobalPointersStart:x8} + Global Strings Start = 0x{headerDirectory.GlobalStringValuesStart:x8} Names Pool Start = 0x{headerDirectory.NamesStart:x8} Types Count = {headerDirectory.TypesCount} Fields Pool Count = {headerDirectory.FieldsPoolCount} Global Literal Values Count = {headerDirectory.GlobalLiteralValuesCount} Global Pointer Values Count = {headerDirectory.GlobalPointerValuesCount} + Global String Values count = {headerDirectory.GlobalStringValuesCount} Names Pool Count = {headerDirectory.NamesPoolCount} """); @@ -207,6 +212,7 @@ private static HeaderDirectory ReadHeader(ScraperState state) var globalLiteralValuesStart = state.ReadUInt32(); var globalPointersStart = state.ReadUInt32(); + var globalStringValuesStart = state.ReadUInt32(); var namesStart = state.ReadUInt32(); var typeCount = state.ReadUInt32(); @@ -214,6 +220,7 @@ private static HeaderDirectory ReadHeader(ScraperState state) var globalLiteralValuesCount = state.ReadUInt32(); var globalPointerValuesCount = state.ReadUInt32(); + var GlobalStringValuesCount = state.ReadUInt32(); var namesPoolCount = state.ReadUInt32(); @@ -221,6 +228,7 @@ private static HeaderDirectory ReadHeader(ScraperState state) var fieldSpecSize = state.ReadByte(); var globalLiteralSpecSize = state.ReadByte(); var globalPointerSpecSize = state.ReadByte(); + var globalStringSpecSize = state.ReadByte(); return new HeaderDirectory { FlagsAndBaselineStart = baselineStart, @@ -228,6 +236,7 @@ private static HeaderDirectory ReadHeader(ScraperState state) FieldsPoolStart = fieldPoolStart, GlobalLiteralValuesStart = globalLiteralValuesStart, GlobalPointersStart = globalPointersStart, + GlobalStringValuesStart = globalStringValuesStart, NamesStart = namesStart, TypesCount = typeCount, @@ -235,6 +244,7 @@ private static HeaderDirectory ReadHeader(ScraperState state) GlobalLiteralValuesCount = globalLiteralValuesCount, GlobalPointerValuesCount = globalPointerValuesCount, + GlobalStringValuesCount = GlobalStringValuesCount, NamesPoolCount = namesPoolCount, @@ -242,6 +252,7 @@ private static HeaderDirectory ReadHeader(ScraperState state) FieldSpecSize = fieldSpecSize, GlobalLiteralSpecSize = globalLiteralSpecSize, GlobalPointerSpecSize = globalPointerSpecSize, + GlobalStringSpecSize = globalStringSpecSize, }; } @@ -280,6 +291,12 @@ private struct GlobalPointerSpec public uint AuxDataIdx; } + private struct GlobalStringSpec + { + public uint NameIdx; + public uint ValueIdx; + } + private sealed class Content { public required bool Verbose {get; init; } @@ -289,6 +306,7 @@ private sealed class Content public required IReadOnlyList FieldSpecs { get; init; } public required IReadOnlyList GlobaLiteralSpecs { get; init; } public required IReadOnlyList GlobalPointerSpecs { get; init; } + public required IReadOnlyList GlobalStringSpecs { get; init; } public required ReadOnlyMemory NamesPool { get; init; } internal string GetPoolString(uint stringIdx) @@ -360,6 +378,14 @@ public void AddToModel(DataDescriptorModel.Builder builder) builder.AddOrUpdateGlobal(globalName, DataDescriptorModel.PointerTypeName, globalValue); WriteVerbose($"Global pointer {globalName} has index {globalValue}"); } + + foreach (var globalString in GlobalStringSpecs) + { + var globalName = GetPoolString(globalString.NameIdx); + var globalValue = DataDescriptorModel.GlobalValue.MakeString(GetPoolString(globalString.ValueIdx)); + builder.AddOrUpdateGlobal(globalName, DataDescriptorModel.StringTypeName, globalValue); + WriteVerbose($"Global string {globalName} has value {globalValue}"); + } } private void WriteVerbose(string msg) @@ -381,12 +407,17 @@ private Content ReadContent(ScraperState state, HeaderDirectory header) FieldSpec[] fieldSpecs = ReadFieldSpecs(state, header); GlobalLiteralSpec[] globalLiteralSpecs = ReadGlobalLiteralSpecs(state, header); GlobalPointerSpec[] globalPointerSpecs = ReadGlobalPointerSpecs(state, header); + GlobalStringSpec[] globalStringSpecs = ReadGlobalStringSpecs(state, header); byte[] namesPool = ReadNamesPool(state, header); byte[] endMagic = new byte[4]; state.ReadBytes(endMagic.AsSpan()); if (!CheckEndMagic(endMagic)) { + if (endMagic.All(b => b == 0)) + { + throw new InvalidOperationException("expected endMagic, got all zeros. Did you add something to the data descriptor that can't be initialized at compile time?"); + } throw new InvalidOperationException($"expected endMagic, got 0x{endMagic[0]:x} 0x{endMagic[1]:x} 0x{endMagic[2]:x} 0x{endMagic[3]:x}"); } else @@ -402,6 +433,7 @@ private Content ReadContent(ScraperState state, HeaderDirectory header) FieldSpecs = fieldSpecs, GlobaLiteralSpecs = globalLiteralSpecs, GlobalPointerSpecs = globalPointerSpecs, + GlobalStringSpecs = globalStringSpecs, NamesPool = namesPool }; } @@ -498,6 +530,26 @@ private static GlobalPointerSpec[] ReadGlobalPointerSpecs(ScraperState state, He return globalSpecs; } + private static GlobalStringSpec[] ReadGlobalStringSpecs(ScraperState state, HeaderDirectory header) + { + GlobalStringSpec[] globalSpecs = new GlobalStringSpec[header.GlobalStringValuesCount]; + state.ResetPosition(state.HeaderStart + (long)header.GlobalStringValuesStart); + for (int i = 0; i < header.GlobalStringValuesCount; i++) + { + int bytesRead = 0; + globalSpecs[i].NameIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + globalSpecs[i].ValueIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + // skip padding + if (bytesRead < header.GlobalStringSpecSize) + { + state.Skip(header.GlobalStringSpecSize - bytesRead); + } + } + return globalSpecs; + } + private static byte[] ReadNamesPool(ScraperState state, HeaderDirectory header) { byte[] namesPool = new byte[header.NamesPoolCount]; diff --git a/src/coreclr/tools/cdac-build-tool/Program.cs b/src/coreclr/tools/cdac-build-tool/Program.cs index bf86ebec0db4..c4b0b9d33ac9 100644 --- a/src/coreclr/tools/cdac-build-tool/Program.cs +++ b/src/coreclr/tools/cdac-build-tool/Program.cs @@ -10,8 +10,8 @@ public class Program { public static async Task Main(string[] args) { - CliRootCommand rootCommand = new(); - var verboseOption = new CliOption("-v", "--verbose") {Recursive = true, Description = "Verbose"}; + RootCommand rootCommand = new(); + var verboseOption = new Option("-v", "--verbose") {Recursive = true, Description = "Verbose"}; rootCommand.Add(verboseOption); rootCommand.Add(new DiagramDirective()); rootCommand.Add(new ComposeCommand(verboseOption)); diff --git a/src/coreclr/tools/cdac-build-tool/data-descriptor-blob.md b/src/coreclr/tools/cdac-build-tool/data-descriptor-blob.md index b7321edd12c9..f04765d9f13d 100644 --- a/src/coreclr/tools/cdac-build-tool/data-descriptor-blob.md +++ b/src/coreclr/tools/cdac-build-tool/data-descriptor-blob.md @@ -88,6 +88,14 @@ struct GlobalPointerSpec uint32_t Name; uint32_t PointerDataIndex; }; + +// A string global value. +// We record the name and the value, both as a string. +struct GlobalStringSpec +{ + uint32_t Name; + uint32_t valueIndex; +} ``` The main data we want to emit to the object file is an instance of the following structure: @@ -108,6 +116,7 @@ struct BinaryBlobDataDescriptor uint32_t GlobalLiteralValuesStart; uint32_t GlobalPointersStart; + uint32_t GlobalStringValuesStart; uint32_t NamesStart; uint32_t TypeCount; @@ -115,6 +124,7 @@ struct BinaryBlobDataDescriptor uint32_t GlobalLiteralValuesCount; uint32_t GlobalPointerValuesCount; + uint32_t GlobalStringValuesCount; uint32_t NamesPoolCount; @@ -122,6 +132,7 @@ struct BinaryBlobDataDescriptor uint8_t FieldSpecSize; uint8_t GlobalLiteralSpecSize; uint8_t GlobalPointerSpecSize; + uint8_t GlobalStringSpecSize; } Directory; // Platform flags (primarily pointer size) uint32_t PlatformFlags; @@ -136,6 +147,8 @@ struct BinaryBlobDataDescriptor struct GlobalLiteralSpec GlobalLiteralValues[CDacBlobGlobalLiteralsCount]; // an array of pointer globals struct GlobalPointerSpec GlobalPointerValues[CDacBlobGlobalPointersCount]; + // an array of string globals + struct GlobalStringSpec GlobalStringValues[CDacBlobGlobalStringsCount]; // all of the names that might be referenced from elsewhere in BinaryBlobDataDescriptor, // delimited by "\0" uint8_t NamesPool[sizeof(struct CDacStringPoolSizes)]; @@ -178,12 +191,15 @@ in a contiguous subsequence and are terminated by a marker `FieldSpec` with a `N For each field there is a name that gives an offset in the name pool and an offset indicating the field's offset. -The global constants are given as a sequence of `GlobalLiteralSpec` elements. Each global has a +The numeric global constants are given as a sequence of `GlobalLiteralSpec` elements. Each global has a name, type and a value. Globals that are the addresses in target memory, are in `GlobalPointerSpec` elements. Each pointer element has a name and an index in a separately compiled pointer structure that is linked into runtime . See [contract-descriptor.md](/docs/design/datacontracts/contract-descriptor.md) +Strings can be passed as `GlobalStringSpec` elements. Each string global has a name and value which +are passed as offsets into the `NamesPool`. + The `NamesPool` is a single sequence of utf-8 bytes comprising the concatenation of all the type field and global names including a terminating nul byte for each name. The same name may occur multiple times. The names could be referenced by multiple type or multiple fields. (That is, a diff --git a/src/coreclr/tools/cdac-build-tool/sample/sample.blob.c b/src/coreclr/tools/cdac-build-tool/sample/sample.blob.c index b90b7eca0e93..09641ef3531a 100644 --- a/src/coreclr/tools/cdac-build-tool/sample/sample.blob.c +++ b/src/coreclr/tools/cdac-build-tool/sample/sample.blob.c @@ -49,6 +49,12 @@ struct GlobalPointerSpec uint32_t AuxIndex; }; +struct GlobalStringSpec +{ + uint32_t Name; + uint32_t StringValue; +}; + #define CONCAT(token1,token2) token1 ## token2 #define CONCAT4(token1, token2, token3, token4) token1 ## token2 ## token3 ## token4 @@ -57,6 +63,7 @@ struct GlobalPointerSpec #define MAKE_FIELDTYPELEN_NAME(tyname,membername) CONCAT4(cdac_string_pool_membertypename__, tyname, __, membername) #define MAKE_GLOBALLEN_NAME(globalname) CONCAT(cdac_string_pool_globalname__, globalname) #define MAKE_GLOBALTYPELEN_NAME(globalname) CONCAT(cdac_string_pool_globaltypename__, globalname) +#define MAKE_GLOBALVALUELEN_NAME(globalname) CONCAT(cdac_string_pool_globalvalue__, globalname) // define a struct where the size of each field is the length of some string. we will use offsetof to get // the offset of each struct element, which will be equal to the offset of the beginning of that string in the @@ -66,33 +73,16 @@ struct CDacStringPoolSizes char cdac_string_pool_nil; // make the first real string start at offset 1 #define DECL_LEN(membername,len) char membername[(len)]; #define CDAC_BASELINE(name) DECL_LEN(cdac_string_pool_baseline_, (sizeof(name))) -#define CDAC_TYPES_BEGIN() #define CDAC_TYPE_BEGIN(name) DECL_LEN(MAKE_TYPELEN_NAME(name), sizeof(#name)) -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) #define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) DECL_LEN(MAKE_FIELDLEN_NAME(tyname,membername), sizeof(#membername)) \ DECL_LEN(MAKE_FIELDTYPELEN_NAME(tyname,membername), sizeof(#membertyname)) -#define CDAC_TYPE_END(name) -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_STRING(name, stringval) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) \ + DECL_LEN(MAKE_GLOBALVALUELEN_NAME(name), sizeof(#stringval)) #define CDAC_GLOBAL_POINTER(name,value) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) #define CDAC_GLOBAL(name,tyname,value) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) \ DECL_LEN(MAKE_GLOBALTYPELEN_NAME(name), sizeof(#tyname)) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END #undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END }; #define GET_TYPE_NAME(name) offsetof(struct CDacStringPoolSizes, MAKE_TYPELEN_NAME(name)) @@ -100,38 +90,15 @@ struct CDacStringPoolSizes #define GET_FIELDTYPE_NAME(tyname,membername) offsetof(struct CDacStringPoolSizes, MAKE_FIELDTYPELEN_NAME(tyname,membername)) #define GET_GLOBAL_NAME(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALLEN_NAME(globalname)) #define GET_GLOBALTYPE_NAME(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALTYPELEN_NAME(globalname)) +#define GET_GLOBALSTRING_VALUE(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALVALUELEN_NAME(globalname)) // count the types enum { CDacBlobTypesCount = #define CDAC_BASELINE(name) 0 -#define CDAC_TYPES_BEGIN() #define CDAC_TYPE_BEGIN(name) + 1 -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) -#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) -#define CDAC_TYPE_END(name) -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() -#define CDAC_GLOBAL_POINTER(name,value) -#define CDAC_GLOBAL(name,tyname,value) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END - , }; // count the field pool size. @@ -140,32 +107,9 @@ enum { CDacBlobFieldsPoolCount = #define CDAC_BASELINE(name) 1 -#define CDAC_TYPES_BEGIN() -#define CDAC_TYPE_BEGIN(name) -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) #define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) + 1 #define CDAC_TYPE_END(name) + 1 -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() -#define CDAC_GLOBAL_POINTER(name,value) -#define CDAC_GLOBAL(name,tyname,value) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END - , }; // count the literal globals @@ -173,32 +117,8 @@ enum { CDacBlobGlobalLiteralsCount = #define CDAC_BASELINE(name) 0 -#define CDAC_TYPES_BEGIN() -#define CDAC_TYPE_BEGIN(name) -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) -#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) -#define CDAC_TYPE_END(name) -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() -#define CDAC_GLOBAL_POINTER(name,value) #define CDAC_GLOBAL(name,tyname,value) + 1 -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END - , }; // count the aux vector globals @@ -206,32 +126,17 @@ enum { CDacBlobGlobalPointersCount = #define CDAC_BASELINE(name) 0 -#define CDAC_TYPES_BEGIN() -#define CDAC_TYPE_BEGIN(name) -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) -#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) -#define CDAC_TYPE_END(name) -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() #define CDAC_GLOBAL_POINTER(name,value) + 1 -#define CDAC_GLOBAL(name,tyname,value) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END - , +}; + +// count the global strings +enum +{ + CDacBlobGlobalStringsCount = +#define CDAC_GLOBALS_BEGIN() 0 +#define CDAC_GLOBAL_STRING(name,value) + 1 +#include "sample.data.h" }; @@ -257,32 +162,11 @@ struct CDacFieldsPoolSizes { #define DECL_LEN(membername) char membername; #define CDAC_BASELINE(name) DECL_LEN(cdac_fields_pool_start_placeholder__) -#define CDAC_TYPES_BEGIN() #define CDAC_TYPE_BEGIN(name) struct MAKE_TYPEFIELDS_TYNAME(name) { -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) #define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) DECL_LEN(CONCAT4(cdac_fields_pool_member__, tyname, __, membername)) #define CDAC_TYPE_END(name) DECL_LEN(CONCAT4(cdac_fields_pool_member__, tyname, _, endmarker)) \ } MAKE_TYPEFIELDS_TYNAME(name); -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() -#define CDAC_GLOBAL_POINTER(name,value) -#define CDAC_GLOBAL(name,tyname,value) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END #undef DECL_LEN }; @@ -303,31 +187,9 @@ struct CDacGlobalPointerIndex { #define DECL_LEN(membername) char membername; #define CDAC_BASELINE(name) DECL_LEN(cdac_global_pointer_index_start_placeholder__) -#define CDAC_TYPES_BEGIN() -#define CDAC_TYPE_BEGIN(name) -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) -#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) -#define CDAC_TYPE_END(name) -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() #define CDAC_GLOBAL_POINTER(name,value) DECL_LEN(CONCAT(cdac_global_pointer_index__, name)) -#define CDAC_GLOBAL(name,tyname,value) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END #undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END }; #define GET_GLOBAL_POINTER_INDEX(name) offsetof(struct CDacGlobalPointerIndex, CONCAT(cdac_global_pointer_index__, name)) @@ -343,6 +205,7 @@ struct BinaryBlobDataDescriptor uint32_t GlobalLiteralValuesStart; uint32_t GlobalPointersStart; + uint32_t GlobalStringValuesStart; uint32_t NamesPoolStart; uint32_t TypeCount; @@ -350,6 +213,7 @@ struct BinaryBlobDataDescriptor uint32_t GlobalLiteralValuesCount; uint32_t GlobalPointerValuesCount; + uint32_t GlobalStringValuesCount; uint32_t NamesPoolCount; @@ -357,6 +221,7 @@ struct BinaryBlobDataDescriptor uint8_t FieldSpecSize; uint8_t GlobalLiteralSpecSize; uint8_t GlobalPointerSpecSize; + uint8_t GlobalStringSpecSize; } Directory; uint32_t PlatformFlags; uint32_t BaselineName; @@ -364,6 +229,7 @@ struct BinaryBlobDataDescriptor struct FieldSpec FieldsPool[CDacBlobFieldsPoolCount]; struct GlobalLiteralSpec GlobalLiteralValues[CDacBlobGlobalLiteralsCount]; struct GlobalPointerSpec GlobalPointerValues[CDacBlobGlobalPointersCount]; + struct GlobalStringSpec GlobalStringValues[CDacBlobGlobalStringsCount]; uint8_t NamesPool[sizeof(struct CDacStringPoolSizes)]; uint8_t EndMagic[4]; }; @@ -382,16 +248,19 @@ const struct MagicAndBlob Blob = { .FieldsPoolStart = offsetof(struct BinaryBlobDataDescriptor, FieldsPool), .GlobalLiteralValuesStart = offsetof(struct BinaryBlobDataDescriptor, GlobalLiteralValues), .GlobalPointersStart = offsetof(struct BinaryBlobDataDescriptor, GlobalPointerValues), + .GlobalStringValuesStart = offsetof(struct BinaryBlobDataDescriptor, GlobalStringValues), .NamesPoolStart = offsetof(struct BinaryBlobDataDescriptor, NamesPool), .TypeCount = CDacBlobTypesCount, .FieldsPoolCount = CDacBlobFieldsPoolCount, .GlobalLiteralValuesCount = CDacBlobGlobalLiteralsCount, .GlobalPointerValuesCount = CDacBlobGlobalPointersCount, + .GlobalStringValuesCount = CDacBlobGlobalStringsCount, .NamesPoolCount = sizeof(struct CDacStringPoolSizes), .TypeSpecSize = sizeof(struct TypeSpec), .FieldSpecSize = sizeof(struct FieldSpec), .GlobalLiteralSpecSize = sizeof(struct GlobalLiteralSpec), .GlobalPointerSpecSize = sizeof(struct GlobalPointerSpec), + .GlobalStringSpecSize = sizeof(struct GlobalStringSpec), }, .EndMagic = { 0x01, 0x02, 0x03, 0x04 }, .PlatformFlags = 0x01 | (sizeof(void*) == 4 ? 0x02 : 0), @@ -399,153 +268,47 @@ const struct MagicAndBlob Blob = { .NamesPool = ("\0" // starts with a nul #define CDAC_BASELINE(name) name "\0" -#define CDAC_TYPES_BEGIN() #define CDAC_TYPE_BEGIN(name) #name "\0" -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) #define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) #membername "\0" #membertyname "\0" -#define CDAC_TYPE_END(name) -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() #define CDAC_GLOBAL_POINTER(name,value) #name "\0" #define CDAC_GLOBAL(name,tyname,value) #name "\0" #tyname "\0" -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END ), .FieldsPool = { #define CDAC_BASELINE(name) {0,}, -#define CDAC_TYPES_BEGIN() -#define CDAC_TYPE_BEGIN(name) -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) #define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) { \ .Name = GET_FIELD_NAME(tyname,membername), \ .TypeName = GET_FIELDTYPE_NAME(tyname,membername), \ .FieldOffset = offset, \ }, #define CDAC_TYPE_END(name) { 0, }, -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() -#define CDAC_GLOBAL_POINTER(name,value) -#define CDAC_GLOBAL(name,tyname,value) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END }, .Types = { -#define CDAC_BASELINE(name) -#define CDAC_TYPES_BEGIN() #define CDAC_TYPE_BEGIN(name) { \ .Name = GET_TYPE_NAME(name), \ .Fields = GET_TYPE_FIELDS(name), #define CDAC_TYPE_INDETERMINATE(name) .Size = 0, #define CDAC_TYPE_SIZE(size) .Size = size, -#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) #define CDAC_TYPE_END(name) }, -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() -#define CDAC_GLOBAL_POINTER(name,value) -#define CDAC_GLOBAL(name,tyname,value) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END }, .GlobalLiteralValues = { -#define CDAC_BASELINE(name) -#define CDAC_TYPES_BEGIN() -#define CDAC_TYPE_BEGIN(name) -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) -#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) -#define CDAC_TYPE_END(name) -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() -#define CDAC_GLOBAL_POINTER(name,value) #define CDAC_GLOBAL(name,tyname,value) { .Name = GET_GLOBAL_NAME(name), .TypeName = GET_GLOBALTYPE_NAME(name), .Value = value }, -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END }, .GlobalPointerValues = { -#define CDAC_BASELINE(name) -#define CDAC_TYPES_BEGIN() -#define CDAC_TYPE_BEGIN(name) -#define CDAC_TYPE_INDETERMINATE(name) -#define CDAC_TYPE_SIZE(size) -#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) -#define CDAC_TYPE_END(name) -#define CDAC_TYPES_END() -#define CDAC_GLOBALS_BEGIN() #define CDAC_GLOBAL_POINTER(name,value) { .Name = GET_GLOBAL_NAME(name), .AuxIndex = GET_GLOBAL_POINTER_INDEX(name) }, -#define CDAC_GLOBAL(name,tyname,value) -#define CDAC_GLOBALS_END() #include "sample.data.h" -#undef CDAC_BASELINE -#undef CDAC_TYPES_BEGIN -#undef CDAC_TYPES_END -#undef CDAC_TYPE_BEGIN -#undef CDAC_TYPE_INDETERMINATE -#undef CDAC_TYPE_SIZE -#undef CDAC_TYPE_FIELD -#undef CDAC_TYPE_END -#undef DECL_LEN -#undef CDAC_GLOBALS_BEGIN -#undef CDAC_GLOBAL_POINTER -#undef CDAC_GLOBAL -#undef CDAC_GLOBALS_END + }, + + .GlobalStringValues = { +#define CDAC_GLOBAL_STRING(name, value) { .Name = GET_GLOBAL_NAME(name), .StringValue = GET_GLOBALSTRING_VALUE(name) }, +#include "sample.data.h" }, } }; diff --git a/src/coreclr/tools/cdac-build-tool/sample/sample.data.h b/src/coreclr/tools/cdac-build-tool/sample/sample.data.h index e4b8bff98b5e..58ed59d02de4 100644 --- a/src/coreclr/tools/cdac-build-tool/sample/sample.data.h +++ b/src/coreclr/tools/cdac-build-tool/sample/sample.data.h @@ -1,3 +1,45 @@ +#ifndef CDAC_BASELINE +#define CDAC_BASELINE(identifier) +#endif +#ifndef CDAC_TYPES_BEGIN +#define CDAC_TYPES_BEGIN() +#endif +#ifndef CDAC_TYPE_BEGIN +#define CDAC_TYPE_BEGIN(tyname) +#endif +#ifndef CDAC_TYPE_SIZE +#define CDAC_TYPE_SIZE(k) +#endif +#ifndef CDAC_TYPE_INDETERMINATE +#define CDAC_TYPE_INDETERMINATE(tyname) +#endif +#ifndef CDAC_TYPE_FIELD +#define CDAC_TYPE_FIELD(tyname,fieldtyname,fieldname,off) +#endif +#ifndef CDAC_TYPE_END +#define CDAC_TYPE_END(tyname) +#endif +#ifndef CDAC_TYPES_END +#define CDAC_TYPES_END() +#endif +#ifndef CDAC_GLOBALS_BEGIN +#define CDAC_GLOBALS_BEGIN() +#endif +#ifndef CDAC_GLOBAL +#define CDAC_GLOBAL(globalname,tyname,val) +#endif +#ifndef CDAC_GLOBAL_POINTER +#define CDAC_GLOBAL_POINTER(globalname,addr) +#endif +#ifndef CDAC_GLOBAL_STRING +#define CDAC_GLOBAL_STRING(globalname,stringval) +#endif +#ifndef CDAC_GLOBALS_END +#define CDAC_GLOBALS_END() +#endif + + + CDAC_BASELINE("empty") CDAC_TYPES_BEGIN() @@ -21,4 +63,21 @@ CDAC_GLOBAL(FeatureEHFunclets, uint8, 1) CDAC_GLOBAL(FeatureEHFunclets, uint8, 0) #endif CDAC_GLOBAL(SomeMagicNumber, uint32, 42) +CDAC_GLOBAL_STRING(RuntimeIdentifier, "windows-x64") CDAC_GLOBALS_END() + + + +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef CDAC_TYPES_END +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL_STRING +#undef CDAC_GLOBALS_END diff --git a/src/coreclr/tools/dotnet-pgo/PgoRootCommand.cs b/src/coreclr/tools/dotnet-pgo/PgoRootCommand.cs index f7a7b7edd66c..179aa957b336 100644 --- a/src/coreclr/tools/dotnet-pgo/PgoRootCommand.cs +++ b/src/coreclr/tools/dotnet-pgo/PgoRootCommand.cs @@ -13,64 +13,64 @@ namespace Microsoft.Diagnostics.Tools.Pgo { - internal sealed class PgoRootCommand : CliRootCommand + internal sealed class PgoRootCommand : RootCommand { - public CliOption> InputFilesToMerge { get; } = + public Option> InputFilesToMerge { get; } = new("--input", "-i") { CustomParser = result => Helpers.BuildPathList(result.Tokens), Description = "Input .mibc files to be merged. Multiple input arguments are specified as --input file1.mibc --input file2.mibc", Required = true, Arity = ArgumentArity.OneOrMore }; - public CliOption InputFilesToCompare { get; } = + public Option InputFilesToCompare { get; } = new("--input", "-i") { Description = "The input .mibc files to be compared. Specify as --input file1.mibc --input file2.mibc", Required = true, Arity = new ArgumentArity(2, 2) /* exactly two */ }; - public CliOption InputFileToDump { get; } = + public Option InputFileToDump { get; } = new("--input", "-i") { Description = "Name of the input mibc file to dump", Required = true, Arity = ArgumentArity.ExactlyOne }; - public CliOption TraceFilePath { get; } = + public Option TraceFilePath { get; } = new("--trace", "-t") { Description = "Specify the trace file to be parsed" }; - public CliOption OutputFilePath { get; } = + public Option OutputFilePath { get; } = new("--output", "-o") { Description = "Specify the output filename to be created" }; - public CliOption PreciseDebugInfoFile { get; } = + public Option PreciseDebugInfoFile { get; } = new("--precise-debug-info-file") { Description = "Name of file of newline separated JSON objects containing precise debug info" }; - public CliOption Pid { get; } = + public Option Pid { get; } = new("--pid") { Description = "The pid within the trace of the process to examine. If this is a multi-process trace, at least one of --pid or --process-name must be specified" }; - public CliOption ProcessName { get; } = + public Option ProcessName { get; } = new("--process-name") { Description = "The process name within the trace of the process to examine. If this is a multi-process trace, at least one of --pid or --process-name must be specified" }; - public CliOption> Reference = + public Option> Reference = new("--reference", "-r") { CustomParser = result => Helpers.BuildPathList(result.Tokens), DefaultValueFactory = result => Helpers.BuildPathList(result.Tokens), Description = "If a reference is not located on disk at the same location as used in the process, it may be specified with a --reference parameter. Multiple --reference parameters may be specified. The wild cards * and ? are supported by this option" }; - public CliOption ClrInstanceId { get; } = + public Option ClrInstanceId { get; } = new("--clr-instance-id") { Description = "If the process contains multiple .NET runtimes, the instance ID must be specified" }; - public CliOption Spgo { get; } = + public Option Spgo { get; } = new("--spgo") { Description = "Base profile on samples in the input. Uses last branch records if available and otherwise raw IP samples" }; - public CliOption SpgoMinSamples { get; } = + public Option SpgoMinSamples { get; } = new("--spgo-min-samples") { DefaultValueFactory = _ => 50, Description = "The minimum number of total samples a function must have before generating profile data for it with SPGO. Default: 50" }; - public CliOption IncludeFullGraphs { get; } = + public Option IncludeFullGraphs { get; } = new("--include-full-graphs") { Description = "Include all blocks and edges in the written .mibc file, regardless of profile counts" }; - public CliOption ExcludeEventsBefore { get; } = + public Option ExcludeEventsBefore { get; } = new("--exclude-events-before") { DefaultValueFactory = _ => Double.MinValue, Description = "Exclude data from events before specified time. Time is specified as milliseconds from the start of the trace" }; - public CliOption ExcludeEventsAfter { get; } = + public Option ExcludeEventsAfter { get; } = new("--exclude-events-after") { DefaultValueFactory = _ => Double.MaxValue, Description = "Exclude data from events after specified time. Time is specified as milliseconds from the start of the trace" }; - public CliOption ExcludeEventsBeforeJittingMethod { get; } = + public Option ExcludeEventsBeforeJittingMethod { get; } = new("--exclude-events-before-jitting-method") { DefaultValueFactory = _ => string.Empty, Description = "Exclude data from events before observing a specific method getting jitted. Method is matched using a regular expression against the method name. Note that the method name is formatted the same as in PerfView which includes typed parameters." }; - public CliOption ExcludeEventsAfterJittingMethod { get; } = + public Option ExcludeEventsAfterJittingMethod { get; } = new("--exclude-events-after-jitting-method") { DefaultValueFactory = _ => string.Empty, Description = "Exclude data from events after observing a specific method getting jitted. Method is matched using a regular expression against the method name. Note that the method name is formatted the same as in PerfView which includes typed parameters." }; - public CliOption IncludeMethods { get; } = + public Option IncludeMethods { get; } = new("--include-methods") { DefaultValueFactory = _ => string.Empty, Description = "Include methods with names matching regular expression. Note that the method names are formatted the same as in PerfView which includes typed parameters." }; - public CliOption ExcludeMethods { get; } = + public Option ExcludeMethods { get; } = new("--exclude-methods") { DefaultValueFactory = _ => string.Empty, Description = "Exclude methods with names matching regular expression. Note that the method names are formatted the same as in PerfView which includes typed parameters." }; - public CliOption Compressed { get; } = + public Option Compressed { get; } = new("--compressed") { DefaultValueFactory = _ => true, Description = "Generate compressed mibc" }; - public CliOption DumpWorstOverlapGraphs { get; } = + public Option DumpWorstOverlapGraphs { get; } = new("--dump-worst-overlap-graphs") { DefaultValueFactory = _ => -1, Description = "Number of graphs to dump to .dot format in dump-worst-overlap-graphs-to directory" }; - public CliOption DumpWorstOverlapGraphsTo { get; } = + public Option DumpWorstOverlapGraphsTo { get; } = new("--dump-worst-overlap-graphs-to") { Description = "Number of graphs to dump to .dot format in dump-worst-overlap-graphs-to directory" }; - public CliOption AutomaticReferences { get; } = + public Option AutomaticReferences { get; } = new("--automatic-references") { DefaultValueFactory = _ => true, Description = "Attempt to find references by using paths embedded in the trace file. Defaults to true" }; - public CliOption IncludedAssemblies { get; } = + public Option IncludedAssemblies { get; } = new("--include-reference") { CustomParser = MakeAssemblyNameArray, DefaultValueFactory = MakeAssemblyNameArray, Description = "If specified, include in Mibc file only references to the specified assemblies. Assemblies are specified as assembly names, not filenames. For instance, `System.Private.CoreLib` not `System.Private.CoreLib.dll`. Multiple --include-reference options may be specified." }; - private CliOption _includeReadyToRun { get; } = + private Option _includeReadyToRun { get; } = new("--includeReadyToRun") { Description = "Include ReadyToRun methods in the trace file" }; - private CliOption _verbosity { get; } = + private Option _verbosity { get; } = new("--verbose") { DefaultValueFactory = _ => Verbosity.normal, Description = "Adjust verbosity level. Supported levels are minimal, normal, detailed, and diagnostic" }; - private CliOption _isSorted { get; } = + private Option _isSorted { get; } = new("--sorted") { Description = "Generate sorted output." }; - private CliOption _showTimestamp { get; } = + private Option _showTimestamp { get; } = new("--showtimestamp") { Description = "Show timestamps in output" }; public PgoFileType? FileType; @@ -97,7 +97,7 @@ private enum Verbosity public PgoRootCommand(string[] args) : base(".NET PGO Tool") { - CliCommand createMbicCommand = new("create-mibc", "Transform a trace file into a Mibc profile data file") + Command createMbicCommand = new("create-mibc", "Transform a trace file into a Mibc profile data file") { TraceFilePath, OutputFilePath, @@ -139,7 +139,7 @@ public PgoRootCommand(string[] args) : base(".NET PGO Tool") JitTraceOptions = JitTraceOptions.none; #if DEBUG - CliCommand createJitTraceCommand = new("create-jittrace","Transform a trace file into a jittrace runtime file") + Command createJitTraceCommand = new("create-jittrace","Transform a trace file into a jittrace runtime file") { TraceFilePath, OutputFilePath, @@ -179,7 +179,7 @@ public PgoRootCommand(string[] args) : base(".NET PGO Tool") Subcommands.Add(createJitTraceCommand); #endif - CliCommand mergeCommand = new("merge", "Merge multiple Mibc profile data files into one file") + Command mergeCommand = new("merge", "Merge multiple Mibc profile data files into one file") { InputFilesToMerge, OutputFilePath, @@ -201,7 +201,7 @@ public PgoRootCommand(string[] args) : base(".NET PGO Tool") Subcommands.Add(mergeCommand); - CliCommand dumpCommand = new("dump", "Dump the contents of a Mibc file") + Command dumpCommand = new("dump", "Dump the contents of a Mibc file") { _verbosity, InputFileToDump, @@ -216,7 +216,7 @@ public PgoRootCommand(string[] args) : base(".NET PGO Tool") Subcommands.Add(dumpCommand); - CliCommand compareMbicCommand = new("compare-mibc", "Compare two .mibc files") + Command compareMbicCommand = new("compare-mibc", "Compare two .mibc files") { InputFilesToCompare, DumpWorstOverlapGraphs, @@ -260,16 +260,11 @@ int ExecuteWithContext(ParseResult result, bool setVerbosity) } } - public static IEnumerable> GetExtendedHelp(HelpContext context) + public static void PrintExtendedHelp(ParseResult parseResult) { - foreach (Func sectionDelegate in HelpBuilder.Default.GetLayout()) - yield return sectionDelegate; - - if (context.Command.Name == "create-mibc" || context.Command.Name == "create-jittrace") + if (parseResult.CommandResult.Command.Name is "create-mibc" or "create-jittrace") { - yield return _ => - { - Console.WriteLine( + Console.WriteLine( @"Example tracing commands used to generate the input to this tool: ""dotnet-trace collect -p 73060 --providers Microsoft-Windows-DotNETRuntime:0x1E000080018:4"" - Capture events from process 73060 where we capture both JIT and R2R events using EventPipe tracing @@ -280,8 +275,6 @@ public static IEnumerable> GetExtendedHelp(HelpContext c ""perfview collect -LogFile:logOfCollection.txt -DataFile:jittrace.etl -Zip:false -merge:false -providers:Microsoft-Windows-DotNETRuntime:0x1E000080018:4"" - Capture Jit and R2R events via perfview of all processes running using ETW tracing "); - return true; - }; } } @@ -290,7 +283,7 @@ private static AssemblyNameInfo[] MakeAssemblyNameArray(ArgumentResult result) if (result.Tokens.Count > 0) { var includedAssemblies = new List(); - foreach (CliToken token in result.Tokens) + foreach (Token token in result.Tokens) { try { diff --git a/src/coreclr/tools/dotnet-pgo/Program.cs b/src/coreclr/tools/dotnet-pgo/Program.cs index c4745a74a204..3ed5bd51efc4 100644 --- a/src/coreclr/tools/dotnet-pgo/Program.cs +++ b/src/coreclr/tools/dotnet-pgo/Program.cs @@ -156,14 +156,14 @@ public Program(PgoRootCommand command) _inputFilesToCompare = Get(command.InputFilesToCompare); } - private T Get(CliOption option) => _command.Result.GetValue(option); - private T Get(CliArgument argument) => _command.Result.GetValue(argument); - private bool IsSet(CliOption option) => _command.Result.GetResult(option) != null; + private T Get(Option option) => _command.Result.GetValue(option); + private T Get(Argument argument) => _command.Result.GetValue(argument); + private bool IsSet(Option option) => _command.Result.GetResult(option) != null; private static int Main(string[] args) => - new CliConfiguration(new PgoRootCommand(args) + new CommandLineConfiguration(new PgoRootCommand(args) .UseVersion() - .UseExtendedHelp(PgoRootCommand.GetExtendedHelp)) + .UseExtendedHelp(PgoRootCommand.PrintExtendedHelp)) { ResponseFileTokenReplacer = Helpers.TryReadResponseFile, EnableDefaultExceptionHandler = false, @@ -1050,7 +1050,7 @@ private int InnerProcessTraceFileMain() { bool hasPid = IsSet(_command.Pid); string processName = Get(_command.ProcessName); - if (hasPid && processName == null && traceLog.Processes.Count != 1) + if (!hasPid && processName == null && traceLog.Processes.Count != 1) { PrintError("Trace file contains multiple processes to distinguish between"); PrintOutput("Either a pid or process name from the following list must be specified"); diff --git a/src/coreclr/tools/dotnet-pgo/dotnet-pgo.sln b/src/coreclr/tools/dotnet-pgo/dotnet-pgo.sln deleted file mode 100644 index 45bc67bf4014..000000000000 --- a/src/coreclr/tools/dotnet-pgo/dotnet-pgo.sln +++ /dev/null @@ -1,86 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.0.32014.148 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "dotnet-pgo", "dotnet-pgo.csproj", "{7DA4CC22-F01D-4505-845F-57C06E5C3F9F}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.Reflection.ReadyToRun", "..\aot\ILCompiler.Reflection.ReadyToRun\ILCompiler.Reflection.ReadyToRun.csproj", "{ED3FE303-74EB-43D1-BEA1-14484A14B22E}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.TypeSystem", "..\aot\ILCompiler.TypeSystem\ILCompiler.TypeSystem.csproj", "{8A811180-D605-469B-9693-EC3915B3E0DC}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Checked|Any CPU = Checked|Any CPU - Checked|x64 = Checked|x64 - Checked|x86 = Checked|x86 - Debug|Any CPU = Debug|Any CPU - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|Any CPU = Release|Any CPU - Release|x64 = Release|x64 - Release|x86 = Release|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Checked|Any CPU.ActiveCfg = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Checked|Any CPU.Build.0 = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Checked|x64.ActiveCfg = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Checked|x64.Build.0 = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Checked|x86.ActiveCfg = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Checked|x86.Build.0 = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Debug|Any CPU.Build.0 = Debug|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Debug|x64.ActiveCfg = Debug|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Debug|x64.Build.0 = Debug|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Debug|x86.ActiveCfg = Debug|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Debug|x86.Build.0 = Debug|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Release|Any CPU.ActiveCfg = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Release|Any CPU.Build.0 = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Release|x64.ActiveCfg = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Release|x64.Build.0 = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Release|x86.ActiveCfg = Release|Any CPU - {7DA4CC22-F01D-4505-845F-57C06E5C3F9F}.Release|x86.Build.0 = Release|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Checked|Any CPU.ActiveCfg = Release|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Checked|Any CPU.Build.0 = Release|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Checked|x64.ActiveCfg = Release|x64 - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Checked|x64.Build.0 = Release|x64 - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Checked|x86.ActiveCfg = Release|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Checked|x86.Build.0 = Release|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Debug|Any CPU.Build.0 = Debug|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Debug|x64.ActiveCfg = Debug|x64 - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Debug|x64.Build.0 = Debug|x64 - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Debug|x86.ActiveCfg = Debug|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Debug|x86.Build.0 = Debug|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Release|Any CPU.ActiveCfg = Release|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Release|Any CPU.Build.0 = Release|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Release|x64.ActiveCfg = Release|x64 - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Release|x64.Build.0 = Release|x64 - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Release|x86.ActiveCfg = Release|Any CPU - {ED3FE303-74EB-43D1-BEA1-14484A14B22E}.Release|x86.Build.0 = Release|Any CPU - {8A811180-D605-469B-9693-EC3915B3E0DC}.Checked|Any CPU.ActiveCfg = Checked|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Checked|Any CPU.Build.0 = Checked|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Checked|x64.ActiveCfg = Checked|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Checked|x64.Build.0 = Checked|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Checked|x86.ActiveCfg = Checked|x86 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Checked|x86.Build.0 = Checked|x86 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Debug|Any CPU.ActiveCfg = Debug|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Debug|Any CPU.Build.0 = Debug|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Debug|x64.ActiveCfg = Debug|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Debug|x64.Build.0 = Debug|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Debug|x86.ActiveCfg = Debug|x86 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Debug|x86.Build.0 = Debug|x86 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Release|Any CPU.ActiveCfg = Release|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Release|Any CPU.Build.0 = Release|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Release|x64.ActiveCfg = Release|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Release|x64.Build.0 = Release|x64 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Release|x86.ActiveCfg = Release|x86 - {8A811180-D605-469B-9693-EC3915B3E0DC}.Release|x86.Build.0 = Release|x86 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {EEBDF807-A078-4F0D-A7F3-A6386B6C0A68} - EndGlobalSection -EndGlobal diff --git a/src/coreclr/tools/dotnet-pgo/dotnet-pgo.slnx b/src/coreclr/tools/dotnet-pgo/dotnet-pgo.slnx new file mode 100644 index 000000000000..4d6d924b1af1 --- /dev/null +++ b/src/coreclr/tools/dotnet-pgo/dotnet-pgo.slnx @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/coreclr/tools/metainfo/mdinfo.cpp b/src/coreclr/tools/metainfo/mdinfo.cpp index 927949b104ee..bac3b2158117 100644 --- a/src/coreclr/tools/metainfo/mdinfo.cpp +++ b/src/coreclr/tools/metainfo/mdinfo.cpp @@ -514,7 +514,10 @@ void MDInfo::DisplayScopeInfo() VWriteLine("ScopeName : %s",ConvertToUtf8(scopeName, scopeNameUtf8, ARRAY_SIZE(scopeNameUtf8))); if (!(m_DumpFilter & MDInfo::dumpNoLogo)) - VWriteLine("MVID : %s",GUIDAsString(mvid, guidString, STRING_BUFFER_LEN)); + { + minipal_guid_as_string(mvid, guidString, STRING_BUFFER_LEN); + VWriteLine("MVID : %s", guidString); + } hr = m_pImport->GetModuleFromScope(&mdm); if (FAILED(hr)) Error("GetModuleFromScope failed.", hr); @@ -2189,15 +2192,6 @@ void MDInfo::DisplayPermissionInfo(mdPermission inPermission, const char *preFix DisplayCustomAttributes(inPermission, newPreFix); } // void MDInfo::DisplayPermissionInfo() - -// simply prints out the given GUID in standard form - -LPCSTR MDInfo::GUIDAsString(GUID inGuid, _Out_writes_(bufLen) LPSTR guidString, ULONG bufLen) -{ - GuidToLPSTR(inGuid, guidString, bufLen); - return guidString; -} // LPCSTR MDInfo::GUIDAsString() - #ifdef FEATURE_COMINTEROP LPCSTR MDInfo::VariantAsString(VARIANT *pVariant, _Out_writes_(bufLen) LPSTR buffer, ULONG bufLen) { diff --git a/src/coreclr/tools/metainfo/mdobj.cpp b/src/coreclr/tools/metainfo/mdobj.cpp index 65cae4e7850a..8d9d60428021 100644 --- a/src/coreclr/tools/metainfo/mdobj.cpp +++ b/src/coreclr/tools/metainfo/mdobj.cpp @@ -253,7 +253,7 @@ void DisplayFile(_In_z_ WCHAR* szFile, BOOL isFile, ULONG DumpFilter, _In_opt_z_ { // Open the emit scope - // We need to make sure this file isn't too long. Checking _MAX_PATH is probably safe, but since we have a much + // We need to make sure this file isn't too long. Checking MAX_PATH is probably safe, but since we have a much // larger buffer, we might as well use it all. if (u16_strlen(szFile) > 1000) return; diff --git a/src/coreclr/tools/r2rdump/Program.cs b/src/coreclr/tools/r2rdump/Program.cs index 831570fe2ea5..4755414bd664 100644 --- a/src/coreclr/tools/r2rdump/Program.cs +++ b/src/coreclr/tools/r2rdump/Program.cs @@ -496,10 +496,10 @@ public int Run() return 0; } - private T Get(CliOption option) => _command.Result.GetValue(option); + private T Get(Option option) => _command.Result.GetValue(option); public static int Main(string[] args) => - new CliConfiguration(new R2RDumpRootCommand().UseVersion()) + new CommandLineConfiguration(new R2RDumpRootCommand().UseVersion()) { ResponseFileTokenReplacer = Helpers.TryReadResponseFile }.Invoke(args); diff --git a/src/coreclr/tools/r2rdump/R2RDump.sln b/src/coreclr/tools/r2rdump/R2RDump.sln deleted file mode 100644 index 9dad744181fd..000000000000 --- a/src/coreclr/tools/r2rdump/R2RDump.sln +++ /dev/null @@ -1,104 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.0.32014.148 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "R2RDump", "R2RDump.csproj", "{00CCF6D0-5905-428E-A2A2-2A6D09D8C257}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.Reflection.ReadyToRun", "..\aot\ILCompiler.Reflection.ReadyToRun\ILCompiler.Reflection.ReadyToRun.csproj", "{E2A577E5-7AF3-49B3-BA78-7071B75ED64B}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.Diagnostics", "..\aot\ILCompiler.Diagnostics\ILCompiler.Diagnostics.csproj", "{4E9512BA-F963-472A-B689-37D4D32456F3}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.TypeSystem", "..\aot\ILCompiler.TypeSystem\ILCompiler.TypeSystem.csproj", "{99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Checked|Any CPU = Checked|Any CPU - Checked|x64 = Checked|x64 - Checked|x86 = Checked|x86 - Debug|Any CPU = Debug|Any CPU - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|Any CPU = Release|Any CPU - Release|x64 = Release|x64 - Release|x86 = Release|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Checked|Any CPU.ActiveCfg = Release|Any CPU - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Checked|Any CPU.Build.0 = Release|Any CPU - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Checked|x64.ActiveCfg = Release|x64 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Checked|x64.Build.0 = Release|x64 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Checked|x86.ActiveCfg = Debug|x86 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Checked|x86.Build.0 = Debug|x86 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Debug|Any CPU.ActiveCfg = Debug|x64 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Debug|Any CPU.Build.0 = Debug|x64 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Debug|x64.ActiveCfg = Debug|x64 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Debug|x64.Build.0 = Debug|x64 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Debug|x86.ActiveCfg = Debug|x86 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Debug|x86.Build.0 = Debug|x86 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Release|Any CPU.ActiveCfg = Release|Any CPU - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Release|Any CPU.Build.0 = Release|Any CPU - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Release|x64.ActiveCfg = Release|x64 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Release|x64.Build.0 = Release|x64 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Release|x86.ActiveCfg = Release|x86 - {00CCF6D0-5905-428E-A2A2-2A6D09D8C257}.Release|x86.Build.0 = Release|x86 - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Checked|Any CPU.ActiveCfg = Release|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Checked|Any CPU.Build.0 = Release|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Checked|x64.ActiveCfg = Release|x64 - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Checked|x64.Build.0 = Release|x64 - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Checked|x86.ActiveCfg = Release|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Checked|x86.Build.0 = Release|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Debug|Any CPU.Build.0 = Debug|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Debug|x64.ActiveCfg = Debug|x64 - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Debug|x64.Build.0 = Debug|x64 - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Debug|x86.ActiveCfg = Debug|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Debug|x86.Build.0 = Debug|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Release|Any CPU.ActiveCfg = Release|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Release|Any CPU.Build.0 = Release|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Release|x64.ActiveCfg = Release|x64 - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Release|x64.Build.0 = Release|x64 - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Release|x86.ActiveCfg = Release|Any CPU - {E2A577E5-7AF3-49B3-BA78-7071B75ED64B}.Release|x86.Build.0 = Release|Any CPU - {4E9512BA-F963-472A-B689-37D4D32456F3}.Checked|Any CPU.ActiveCfg = Checked|x86 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Checked|x64.ActiveCfg = Checked|x64 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Checked|x64.Build.0 = Checked|x64 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Checked|x86.ActiveCfg = Checked|x86 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Checked|x86.Build.0 = Checked|x86 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Debug|Any CPU.ActiveCfg = Debug|x64 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Debug|Any CPU.Build.0 = Debug|x64 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Debug|x64.ActiveCfg = Debug|x64 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Debug|x64.Build.0 = Debug|x64 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Debug|x86.ActiveCfg = Debug|x86 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Debug|x86.Build.0 = Debug|x86 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Release|Any CPU.ActiveCfg = Release|Any CPU - {4E9512BA-F963-472A-B689-37D4D32456F3}.Release|Any CPU.Build.0 = Release|Any CPU - {4E9512BA-F963-472A-B689-37D4D32456F3}.Release|x64.ActiveCfg = Release|Any CPU - {4E9512BA-F963-472A-B689-37D4D32456F3}.Release|x64.Build.0 = Release|Any CPU - {4E9512BA-F963-472A-B689-37D4D32456F3}.Release|x86.ActiveCfg = Release|x86 - {4E9512BA-F963-472A-B689-37D4D32456F3}.Release|x86.Build.0 = Release|x86 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Checked|Any CPU.ActiveCfg = Checked|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Checked|Any CPU.Build.0 = Checked|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Checked|x64.ActiveCfg = Checked|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Checked|x64.Build.0 = Checked|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Checked|x86.ActiveCfg = Checked|x86 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Checked|x86.Build.0 = Checked|x86 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Debug|Any CPU.ActiveCfg = Debug|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Debug|Any CPU.Build.0 = Debug|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Debug|x64.ActiveCfg = Debug|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Debug|x64.Build.0 = Debug|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Debug|x86.ActiveCfg = Debug|x86 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Debug|x86.Build.0 = Debug|x86 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Release|Any CPU.ActiveCfg = Release|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Release|Any CPU.Build.0 = Release|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Release|x64.ActiveCfg = Release|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Release|x64.Build.0 = Release|x64 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Release|x86.ActiveCfg = Release|x86 - {99581B2F-ECF1-46A9-B4BC-AE6A54C1BC3C}.Release|x86.Build.0 = Release|x86 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {61B9BC5F-9DAA-4BC6-9150-9CDFDDDBEA80} - EndGlobalSection -EndGlobal diff --git a/src/coreclr/tools/r2rdump/R2RDump.slnx b/src/coreclr/tools/r2rdump/R2RDump.slnx new file mode 100644 index 000000000000..87cdcb67bd45 --- /dev/null +++ b/src/coreclr/tools/r2rdump/R2RDump.slnx @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/coreclr/tools/r2rdump/R2RDumpRootCommand.cs b/src/coreclr/tools/r2rdump/R2RDumpRootCommand.cs index 818db56a3126..b7cede62ae50 100644 --- a/src/coreclr/tools/r2rdump/R2RDumpRootCommand.cs +++ b/src/coreclr/tools/r2rdump/R2RDumpRootCommand.cs @@ -8,75 +8,75 @@ namespace R2RDump { - internal sealed class R2RDumpRootCommand : CliRootCommand + internal sealed class R2RDumpRootCommand : RootCommand { - public CliOption> In { get; } = + public Option> In { get; } = new("--in", "-i") { CustomParser = result => Helpers.BuildPathList(result.Tokens), DefaultValueFactory = result => Helpers.BuildPathList(result.Tokens), Description = "Input file(s) to dump. Expects them to by ReadyToRun images" }; - public CliOption Out { get; } = + public Option Out { get; } = new("--out", "-o") { Description = "Output file path. Dumps everything to the specified file except for help message and exception messages" }; - public CliOption Raw { get; } = + public Option Raw { get; } = new("--raw") { Description = "Dump the raw bytes of each section or runtime function" }; - public CliOption Header { get; } = + public Option Header { get; } = new("--header") { Description = "Dump R2R header" }; - public CliOption Disasm { get; } = + public Option Disasm { get; } = new("--disasm", "-d") { Description = "Show disassembly of methods or runtime functions" }; - public CliOption Naked { get; } = + public Option Naked { get; } = new("--naked") { Description = "Naked dump suppresses most compilation details like placement addresses" }; - public CliOption HideOffsets { get; } = + public Option HideOffsets { get; } = new("--hide-offsets", "--ho") { Description = "Hide offsets in naked disassembly" }; - public CliOption Query { get; } = + public Option Query { get; } = new("--query", "-q") { Description = "Query method by exact name, signature, row ID or token" }; - public CliOption Keyword { get; } = + public Option Keyword { get; } = new("--keyword", "-k") { Description = "Search method by keyword" }; - public CliOption RuntimeFunction { get; } = + public Option RuntimeFunction { get; } = new("--runtimefunction", "-f") { Description = "Get one runtime function by id or relative virtual address" }; - public CliOption Section { get; } = + public Option Section { get; } = new("--section", "-s") { Description = "Get section by keyword" }; - public CliOption Unwind { get; } = + public Option Unwind { get; } = new("--unwind") { Description = "Dump unwindInfo" }; - public CliOption GC { get; } = + public Option GC { get; } = new("--gc") { Description = "Dump gcInfo and slot table" }; - public CliOption Pgo { get; } = + public Option Pgo { get; } = new("--pgo") { Description = "Dump embedded pgo instrumentation data" }; - public CliOption SectionContents { get; } = + public Option SectionContents { get; } = new("--sectionContents", "--sc") { Description = "Dump section contents" }; - public CliOption EntryPoints { get; } = + public Option EntryPoints { get; } = new("--entrypoints", "-e") { Description = "Dump list of method / instance entrypoints in the R2R file" }; - public CliOption Normalize { get; } = + public Option Normalize { get; } = new("--normalize", "-n") { Description = "Normalize dump by sorting the various tables and methods (default = unsorted i.e. file order)" }; - public CliOption HideTransitions { get; } = + public Option HideTransitions { get; } = new("--hide-transitions", "--ht") { Description = "Don't include GC transitions in disassembly output" }; - public CliOption Verbose { get; } = + public Option Verbose { get; } = new("--verbose") { Description = "Dump disassembly, unwindInfo, gcInfo and sectionContents" }; - public CliOption Diff { get; } = + public Option Diff { get; } = new("--diff") { Description = "Compare two R2R images" }; - public CliOption DiffHideSameDisasm { get; } = + public Option DiffHideSameDisasm { get; } = new("--diff-hide-same-disasm") { Description = "In matching method diff dump, hide functions with identical disassembly" }; - public CliOption CreatePDB { get; } = + public Option CreatePDB { get; } = new("--create-pdb") { Description = "Create PDB" }; - public CliOption PdbPath { get; } = + public Option PdbPath { get; } = new("--pdb-path") { Description = "PDB output path for --create-pdb" }; - public CliOption CreatePerfmap { get; } = + public Option CreatePerfmap { get; } = new("--create-perfmap") { Description = "Create PerfMap" }; - public CliOption PerfmapPath { get; } = + public Option PerfmapPath { get; } = new("--perfmap-path") { Description = "PerfMap output path for --create-perfmap" }; - public CliOption PerfmapFormatVersion { get; } = + public Option PerfmapFormatVersion { get; } = new("--perfmap-format-version") { DefaultValueFactory = _ => ILCompiler.Diagnostics.PerfMapWriter.CurrentFormatVersion, Description = "PerfMap format version for --create-perfmap" }; - public CliOption> Reference { get; } = + public Option> Reference { get; } = new("--reference", "-r") { CustomParser = result => Helpers.BuildPathList(result.Tokens), DefaultValueFactory = result => Helpers.BuildPathList(result.Tokens), Description = "Explicit reference assembly files" }; - public CliOption ReferencePath { get; } = + public Option ReferencePath { get; } = new("--referencePath", "--rp") { Description = "Search paths for reference assemblies" }; - public CliOption SignatureBinary { get; } = + public Option SignatureBinary { get; } = new("--signatureBinary", "--sb") { Description = "Append signature binary to its textual representation" }; - public CliOption InlineSignatureBinary { get; } = + public Option InlineSignatureBinary { get; } = new("--inlineSignatureBinary", "--isb") { Description = "Embed binary signature into its textual representation" }; - public CliOption ValidateDebugInfo { get; } = + public Option ValidateDebugInfo { get; } = new("--validateDebugInfo", "--val") { Description = "Validate functions reported debug info." }; public ParseResult Result; diff --git a/src/coreclr/tools/r2rtest/Buckets.cs b/src/coreclr/tools/r2rtest/Buckets.cs index 927182e4926c..4eb73314003e 100644 --- a/src/coreclr/tools/r2rtest/Buckets.cs +++ b/src/coreclr/tools/r2rtest/Buckets.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; -using System.CommandLine; using System.Diagnostics; using System.IO; using System.Linq; diff --git a/src/coreclr/tools/r2rtest/CommandLineOptions.cs b/src/coreclr/tools/r2rtest/CommandLineOptions.cs index 4f20677c4889..c892030f1af9 100644 --- a/src/coreclr/tools/r2rtest/CommandLineOptions.cs +++ b/src/coreclr/tools/r2rtest/CommandLineOptions.cs @@ -8,11 +8,11 @@ namespace R2RTest { - public class R2RTestRootCommand : CliRootCommand + public class R2RTestRootCommand : RootCommand { - void CreateCommand(string name, string description, CliOption[] options, Func action) + void CreateCommand(string name, string description, Option[] options, Func action) { - CliCommand command = new(name, description); + Command command = new(name, description); foreach (var option in GetCommonOptions()) command.Options.Add(option); foreach (var option in options) @@ -21,14 +21,14 @@ void CreateCommand(string name, string description, CliOption[] options, Func new CliOption[] { CoreRootDirectory, DotNetCli }; + Option[] GetCommonOptions() => new Option[] { CoreRootDirectory, DotNetCli }; R2RTestRootCommand() { OutputDirectory.AcceptLegalFilePathsOnly(); CreateCommand("compile-directory", "Compile all assemblies in directory", - new CliOption[] + new Option[] { InputDirectory, OutputDirectory, @@ -67,7 +67,7 @@ void CreateCommand(string name, string description, CliOption[] options, Func InputDirectory { get; } = - new CliOption("--input-directory", "-in") { Description = "Folder containing assemblies to optimize" }.AcceptExistingOnly(); + public Option InputDirectory { get; } = + new Option("--input-directory", "-in") { Description = "Folder containing assemblies to optimize" }.AcceptExistingOnly(); - public CliOption OutputDirectory { get; } = - new CliOption("--output-directory", "-out") { Description = "Folder to emit compiled assemblies" }; + public Option OutputDirectory { get; } = + new Option("--output-directory", "-out") { Description = "Folder to emit compiled assemblies" }; - public CliOption CoreRootDirectory { get; } = - new CliOption("--core-root-directory", "-cr") { Description = "Location of the CoreCLR CORE_ROOT folder", Arity = ArgumentArity.ExactlyOne }.AcceptExistingOnly(); + public Option CoreRootDirectory { get; } = + new Option("--core-root-directory", "-cr") { Description = "Location of the CoreCLR CORE_ROOT folder", Arity = ArgumentArity.ExactlyOne }.AcceptExistingOnly(); - public CliOption ReferencePath { get; } = - new CliOption("--reference-path", "-r") { Description = "Folder containing assemblies to reference during compilation", Arity = ArgumentArity.ZeroOrMore }.AcceptExistingOnly(); + public Option ReferencePath { get; } = + new Option("--reference-path", "-r") { Description = "Folder containing assemblies to reference during compilation", Arity = ArgumentArity.ZeroOrMore }.AcceptExistingOnly(); - public CliOption MibcPath { get; } = - new CliOption("--mibc-path", "-m") { Description = "Mibc files to use in compilation", Arity = ArgumentArity.ZeroOrMore }.AcceptExistingOnly(); + public Option MibcPath { get; } = + new Option("--mibc-path", "-m") { Description = "Mibc files to use in compilation", Arity = ArgumentArity.ZeroOrMore }.AcceptExistingOnly(); - public CliOption Crossgen2Path { get; } = - new CliOption("--crossgen2-path", "-c2p") { Description = "Explicit Crossgen2 path (useful for cross-targeting)" }.AcceptExistingOnly(); + public Option Crossgen2Path { get; } = + new Option("--crossgen2-path", "-c2p") { Description = "Explicit Crossgen2 path (useful for cross-targeting)" }.AcceptExistingOnly(); - public CliOption VerifyTypeAndFieldLayout { get; } = + public Option VerifyTypeAndFieldLayout { get; } = new("--verify-type-and-field-layout") { Description = "Verify that struct type layout and field offsets match between compile time and runtime. Use only for diagnostic purposes." }; - public CliOption NoJit { get; } = + public Option NoJit { get; } = new("--nojit") { Description = "Don't run tests in JITted mode" }; - public CliOption NoCrossgen2 { get; } = + public Option NoCrossgen2 { get; } = new("--nocrossgen2") { Description = "Don't run tests in Crossgen2 mode" }; - public CliOption Exe { get; } = + public Option Exe { get; } = new("--exe") { Description = "Don't compile tests, just execute them" }; - public CliOption NoExe { get; } = + public Option NoExe { get; } = new("--noexe") { Description = "Compilation-only mode (don't execute the built apps)" }; - public CliOption NoEtw { get; } = + public Option NoEtw { get; } = new("--noetw") { Description = "Don't capture jitted methods using ETW" }; - public CliOption NoCleanup { get; } = + public Option NoCleanup { get; } = new("--nocleanup") { Description = "Don't clean up compilation artifacts after test runs" }; - public CliOption Map { get; } = + public Option Map { get; } = new("--map") { Description = "Generate a map file (Crossgen2)" }; - public CliOption Pdb { get; } = + public Option Pdb { get; } = new("--pdb") { Description = "Generate PDB symbol information (Crossgen2 / Windows only)" }; - public CliOption Perfmap { get; } = + public Option Perfmap { get; } = new("--perfmap") { Description = "Generate perfmap symbol information" }; - public CliOption PerfmapFormatVersion { get; } = + public Option PerfmapFormatVersion { get; } = new("--perfmap-format-version") { DefaultValueFactory = _ => 1, Description = "Perfmap format version to generate" }; - public CliOption DegreeOfParallelism { get; } = + public Option DegreeOfParallelism { get; } = new("--degree-of-parallelism", "-dop") { Description = "Override default compilation / execution DOP (default = logical processor count)" }; - public CliOption Sequential { get; } = + public Option Sequential { get; } = new("--sequential") { Description = "Run tests sequentially" }; - public CliOption Iterations { get; } = + public Option Iterations { get; } = new("--iterations") { DefaultValueFactory = _ => 1, Description = "Number of iterations for each test execution" }; - public CliOption Framework { get; } = + public Option Framework { get; } = new("--framework") { Description = "Precompile and use native framework" }; - public CliOption UseFramework { get; } = + public Option UseFramework { get; } = new("--use-framework") { Description = "Use native framework (don't precompile, assume previously compiled)" }; - public CliOption Release { get; } = + public Option Release { get; } = new("--release") { Description = "Build the tests in release mode" }; - public CliOption LargeBubble { get; } = + public Option LargeBubble { get; } = new("--large-bubble") { Description = "Assume all input files as part of one version bubble" }; - public CliOption Composite { get; } = + public Option Composite { get; } = new("--composite") { Description = "Compile tests in composite R2R mode" }; - public CliOption Crossgen2Parallelism { get; } = + public Option Crossgen2Parallelism { get; } = new("--crossgen2-parallelism") { Description = "Max number of threads to use in Crossgen2 (default = logical processor count)" }; - public CliOption Crossgen2JitPath { get; } = + public Option Crossgen2JitPath { get; } = new("--crossgen2-jitpath") { Description = "Jit path to use for crossgen2" }; - public CliOption IssuesPath { get; } = + public Option IssuesPath { get; } = new("--issues-path", "-ip") { Description = "Path to issues.targets", Arity = ArgumentArity.ZeroOrMore }; - public CliOption CompilationTimeoutMinutes { get; } = + public Option CompilationTimeoutMinutes { get; } = new("--compilation-timeout-minutes", "-ct") { Description = "Compilation timeout (minutes)" }; - public CliOption ExecutionTimeoutMinutes { get; } = + public Option ExecutionTimeoutMinutes { get; } = new("--execution-timeout-minutes", "-et") { Description = "Execution timeout (minutes)" }; - public CliOption R2RDumpPath { get; } = - new CliOption("--r2r-dump-path") { Description = "Path to R2RDump.exe/dll" }.AcceptExistingOnly(); + public Option R2RDumpPath { get; } = + new Option("--r2r-dump-path") { Description = "Path to R2RDump.exe/dll" }.AcceptExistingOnly(); - public CliOption MeasurePerf { get; } = + public Option MeasurePerf { get; } = new("--measure-perf") { Description = "Print out compilation time" }; - public CliOption InputFileSearchString { get; } = + public Option InputFileSearchString { get; } = new("--input-file-search-string", "-input-file") { Description = "Search string for input files in the input directory" }; - public CliOption GCStress { get; } = + public Option GCStress { get; } = new("--gcstress") { Description = "Run tests with the specified GC stress level enabled (the argument value is in hex)" }; - public CliOption DotNetCli { get; } = + public Option DotNetCli { get; } = new("--dotnet-cli", "-cli") { Description = "For dev box testing, point at .NET 5 dotnet.exe or /dotnet.cmd." }; - public CliOption TargetArch { get; } = + public Option TargetArch { get; } = new("--target-arch") { Description = "Target architecture for crossgen2" }; // // compile-nuget specific options // - public CliOption PackageList { get; } = - new CliOption("--package-list", "-pl") { Description = "Text file containing a package name on each line" }.AcceptExistingOnly(); + public Option PackageList { get; } = + new Option("--package-list", "-pl") { Description = "Text file containing a package name on each line" }.AcceptExistingOnly(); // // compile-serp specific options // - public CliOption AspNetPath { get; } = - new CliOption("--asp-net-path", "-asp") { Description = "Path to SERP's ASP.NET Core folder" }.AcceptExistingOnly(); + public Option AspNetPath { get; } = + new Option("--asp-net-path", "-asp") { Description = "Path to SERP's ASP.NET Core folder" }.AcceptExistingOnly(); private static int Main(string[] args) => - new CliConfiguration(new R2RTestRootCommand().UseVersion()).Invoke(args); + new CommandLineConfiguration(new R2RTestRootCommand().UseVersion()).Invoke(args); } public partial class BuildOptions diff --git a/src/coreclr/tools/runincontext/runincontext.csproj b/src/coreclr/tools/runincontext/runincontext.csproj index b42540941f65..39797c31de8a 100644 --- a/src/coreclr/tools/runincontext/runincontext.csproj +++ b/src/coreclr/tools/runincontext/runincontext.csproj @@ -2,7 +2,7 @@ Exe $(NetCoreAppToolCurrent) - $(MicrosoftNETCoreAppRuntimewinx64Version) + $(MicrosoftNETCoreAppRefVersion) false BuildOnly $(RuntimeBinDir) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 18fd918a7a5c..cefd14edde95 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -193,6 +193,18 @@ struct Agnostic_CORINFO_EE_INFO DWORD osType; }; +struct Agnostic_CORINFO_ASYNC_INFO +{ + DWORDLONG continuationClsHnd; + DWORDLONG continuationNextFldHnd; + DWORDLONG continuationResumeFldHnd; + DWORDLONG continuationStateFldHnd; + DWORDLONG continuationFlagsFldHnd; + DWORDLONG continuationDataFldHnd; + DWORDLONG continuationGCDataFldHnd; + DWORD continuationsNeedMethodHandle; +}; + struct Agnostic_GetOSRInfo { DWORD index; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp index de8925555cb9..e91450825837 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp @@ -854,15 +854,10 @@ void CompileResult::applyRelocs(RelocContext* rc, unsigned char* block1, ULONG b { if ((section_begin <= address) && (address < section_end)) // A reloc for our section? { - INT64 delta = (INT64)(tmp.target - fixupLocation); - if (!FitsInRel28(delta)) - { - // Assume here that we would need a jump stub for this relocation and pretend - // that the jump stub is located right at the end of the method. - DWORDLONG target = (DWORDLONG)originalAddr + (DWORDLONG)blocksize1; - delta = (INT64)(target - fixupLocation); - } - PutArm64Rel28((UINT32*)address, (INT32)delta); + // Similar to x64's IMAGE_REL_BASED_REL32 handling we + // will handle this by also hardcoding the bottom bits + // of the target into the instruction. + PutArm64Rel28((UINT32*)address, (INT32)tmp.target); } wasRelocHandled = true; } @@ -911,11 +906,6 @@ void CompileResult::applyRelocs(RelocContext* rc, unsigned char* block1, ULONG b } } - if (targetArch == SPMI_TARGET_ARCHITECTURE_LOONGARCH64) - { - Assert(!"FIXME: Not Implements on loongarch64"); - } - if (IsSpmiTarget64Bit()) { if (!wasRelocHandled && (relocType == IMAGE_REL_BASED_DIR64)) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/logging.cpp b/src/coreclr/tools/superpmi/superpmi-shared/logging.cpp index e72e9b130275..63b2c5ee4a11 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/logging.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/logging.cpp @@ -20,7 +20,7 @@ bool Logger::s_initialized = false; UINT32 Logger::s_logLevel = LOGMASK_DEFAULT; HANDLE Logger::s_logFile = INVALID_HANDLE_VALUE; char* Logger::s_logFilePath = nullptr; -CRITICAL_SECTION Logger::s_critSec; +minipal_mutex Logger::s_critSec; // // Initializes the logging subsystem. This must be called before invoking any of the logging functionality. @@ -30,7 +30,7 @@ void Logger::Initialize() { if (!s_initialized) { - InitializeCriticalSection(&s_critSec); + minipal_mutex_init(&s_critSec); s_initialized = true; } } @@ -43,7 +43,7 @@ void Logger::Shutdown() { if (s_initialized) { - DeleteCriticalSection(&s_critSec); + minipal_mutex_destroy(&s_critSec); CloseLogFile(); s_initialized = false; } @@ -244,7 +244,7 @@ void Logger::LogVprintf( // maintaining chronological order is crucial, then we can implement a priority queueing system // for log messages. - EnterCriticalSection(&s_critSec); + minipal_mutex_enter(&s_critSec); if (level < LOGLEVEL_INFO) fprintf(dest, "%s: ", logLevelStr); @@ -305,7 +305,7 @@ void Logger::LogVprintf( CleanUp: #endif // !TARGET_UNIX - LeaveCriticalSection(&s_critSec); + minipal_mutex_leave(&s_critSec); delete[] fullMsg; } diff --git a/src/coreclr/tools/superpmi/superpmi-shared/logging.h b/src/coreclr/tools/superpmi/superpmi-shared/logging.h index 15d7d097fcb5..f7af01326302 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/logging.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/logging.h @@ -7,6 +7,8 @@ #ifndef _Logging #define _Logging +#include + // // General purpose logging macros // @@ -65,7 +67,7 @@ class Logger static UINT32 s_logLevel; static HANDLE s_logFile; static char* s_logFilePath; - static CRITICAL_SECTION s_critSec; + static minipal_mutex s_critSec; public: static void Initialize(); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h index b9be6659ed77..f81a8680ab5d 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h @@ -79,6 +79,7 @@ LWM(GetDefaultEqualityComparerClass, DWORDLONG, DWORDLONG) LWM(GetSZArrayHelperEnumeratorClass, DWORDLONG, DWORDLONG) LWM(GetDelegateCtor, Agnostic_GetDelegateCtorIn, Agnostic_GetDelegateCtorOut) LWM(GetEEInfo, DWORD, Agnostic_CORINFO_EE_INFO) +LWM(GetAsyncInfo, DWORD, Agnostic_CORINFO_ASYNC_INFO) LWM(GetEHinfo, DLD, Agnostic_CORINFO_EH_CLAUSE) LWM(GetStaticFieldContent, DLDDD, DD) LWM(GetObjectContent, DLDD, DD) @@ -102,6 +103,7 @@ LWM(GetLazyStringLiteralHelper, DWORDLONG, DWORD) LWM(GetLocationOfThisType, DWORDLONG, Agnostic_CORINFO_LOOKUP_KIND) LWM(IsIntrinsic, DWORDLONG, DWORD) LWM(NotifyMethodInfoUsage, DWORDLONG, DWORD) +LWM(NotifyInstructionSetUsage, DD, DWORD) LWM(GetMethodAttribs, DWORDLONG, DWORD) LWM(GetClassAssemblyName, DWORDLONG, DWORD) LWM(GetMethodClass, DWORDLONG, DWORDLONG) @@ -129,6 +131,7 @@ LWM(GetSystemVAmd64PassStructInRegisterDescriptor, DWORDLONG, Agnostic_GetSystem LWM(GetSwiftLowering, DWORDLONG, Agnostic_GetSwiftLowering) LWM(GetFpStructLowering, DWORDLONG, Agnostic_GetFpStructLowering) LWM(GetTailCallHelpers, Agnostic_GetTailCallHelpers, Agnostic_CORINFO_TAILCALL_HELPERS) +LWM(GetAsyncResumptionStub, DWORD, DWORDLONG) LWM(UpdateEntryPointForTailCall, Agnostic_CORINFO_CONST_LOOKUP, Agnostic_CORINFO_CONST_LOOKUP) LWM(GetSpecialCopyHelper, DWORDLONG, DWORDLONG) LWM(GetThreadTLSIndex, DWORD, DLD) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 351622b6a61c..e414a1d0b00c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -791,6 +791,43 @@ bool MethodContext::repNotifyMethodInfoUsage(CORINFO_METHOD_HANDLE ftn) return value != 0; } +void MethodContext::recNotifyInstructionSetUsage(CORINFO_InstructionSet isa, bool supported, bool result) +{ + if (NotifyInstructionSetUsage == nullptr) + NotifyInstructionSetUsage = new LightWeightMap(); + + DD key{}; + key.A = (DWORD)isa; + key.B = supported ? 1 : 0; + NotifyInstructionSetUsage->Add(key, result ? 1 : 0); + DEBUG_REC(dmpNotifyInstructionSetUsage(key, result ? 1 : 0)); +} +void MethodContext::dmpNotifyInstructionSetUsage(DD key, DWORD value) +{ + printf("NotifyInstructionSetUsage key isa-%u, supported-%u, res-%u", key.A, key.B, value); +} +bool MethodContext::repNotifyInstructionSetUsage(CORINFO_InstructionSet isa, bool supported) +{ + DD key{}; + key.A = (DWORD)isa; + key.B = supported ? 1 : 0; + + if (NotifyInstructionSetUsage != nullptr) + { + int index = NotifyInstructionSetUsage->GetIndex(key); + if (index != -1) + { + DWORD value = NotifyInstructionSetUsage->GetItem(index); + DEBUG_REP(dmpNotifyInstructionSetUsage(key, value)); + return value != 0; + } + } + + // Fall back to most likely implementation instead of missing, since ISA + // usage changes are quite common on normal JIT changes. + return supported; +} + void MethodContext::recGetMethodAttribs(CORINFO_METHOD_HANDLE methodHandle, DWORD attribs) { if (GetMethodAttribs == nullptr) @@ -1159,14 +1196,12 @@ const char* CorJitFlagToString(CORJIT_FLAGS::CorJitFlag flag) return "CORJIT_FLAG_ALT_JIT"; case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_FROZEN_ALLOC_ALLOWED: return "CORJIT_FLAG_FROZEN_ALLOC_ALLOWED"; - case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_READYTORUN: - return "CORJIT_FLAG_READYTORUN"; + case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_AOT: + return "CORJIT_FLAG_AOT"; case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_PROF_ENTERLEAVE: return "CORJIT_FLAG_PROF_ENTERLEAVE"; case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_PROF_NO_PINVOKE_INLINE: return "CORJIT_FLAG_PROF_NO_PINVOKE_INLINE"; - case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_PREJIT: - return "CORJIT_FLAG_PREJIT"; case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_RELOC: return "CORJIT_FLAG_RELOC"; case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_IL_STUB: @@ -1203,11 +1238,6 @@ const char* CorJitFlagToString(CORJIT_FLAGS::CorJitFlag flag) return "CORJIT_FLAG_SOFTFP_ABI"; #endif // defined(TARGET_ARM) -#if defined(TARGET_X86) || defined(TARGET_AMD64) - case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_VECTOR512_THROTTLING: - return "CORJIT_FLAG_VECTOR512_THROTTLING"; -#endif // defined(TARGET_XARCH) - default: return ""; } @@ -2145,7 +2175,7 @@ void MethodContext::recGetRuntimeTypePointer(CORINFO_CLASS_HANDLE cls, CORINFO_O GetRuntimeTypePointer = new LightWeightMap(); DWORDLONG key = CastHandle(cls); - DWORDLONG value = (DWORDLONG)result; + DWORDLONG value = CastHandle(result); GetRuntimeTypePointer->Add(key, value); DEBUG_REC(dmpGetRuntimeTypePointer(key, value)); } @@ -2166,7 +2196,7 @@ void MethodContext::recIsObjectImmutable(CORINFO_OBJECT_HANDLE objPtr, bool resu if (IsObjectImmutable == nullptr) IsObjectImmutable = new LightWeightMap(); - DWORDLONG key = (DWORDLONG)objPtr; + DWORDLONG key = CastHandle(objPtr); DWORD value = (DWORD)result; IsObjectImmutable->Add(key, value); DEBUG_REC(dmpIsObjectImmutable(key, value)); @@ -2177,7 +2207,7 @@ void MethodContext::dmpIsObjectImmutable(DWORDLONG key, DWORD value) } bool MethodContext::repIsObjectImmutable(CORINFO_OBJECT_HANDLE objPtr) { - DWORDLONG key = (DWORDLONG)objPtr; + DWORDLONG key = CastHandle(objPtr); DWORD value = LookupByKeyOrMiss(IsObjectImmutable, key, ": key %016" PRIX64 "", key); DEBUG_REP(dmpIsObjectImmutable(key, value)); return (bool)value; @@ -2224,8 +2254,8 @@ void MethodContext::recGetObjectType(CORINFO_OBJECT_HANDLE objPtr, CORINFO_CLASS if (GetObjectType == nullptr) GetObjectType = new LightWeightMap(); - DWORDLONG key = (DWORDLONG)objPtr; - DWORDLONG value = (DWORDLONG)result; + DWORDLONG key = CastHandle(objPtr); + DWORDLONG value = CastHandle(result); GetObjectType->Add(key, value); DEBUG_REC(dmpGetObjectType(key, value)); } @@ -2235,7 +2265,7 @@ void MethodContext::dmpGetObjectType(DWORDLONG key, DWORDLONG value) } CORINFO_CLASS_HANDLE MethodContext::repGetObjectType(CORINFO_OBJECT_HANDLE objPtr) { - DWORDLONG key = (DWORDLONG)objPtr; + DWORDLONG key = CastHandle(objPtr); DWORDLONG value = LookupByKeyOrMiss(GetObjectType, key, ": key %016" PRIX64 "", key); DEBUG_REP(dmpGetObjectType(key, value)); return (CORINFO_CLASS_HANDLE)value; @@ -4429,6 +4459,48 @@ void MethodContext::repGetEEInfo(CORINFO_EE_INFO* pEEInfoOut) pEEInfoOut->osType = (CORINFO_OS)value.osType; } +void MethodContext::recGetAsyncInfo(const CORINFO_ASYNC_INFO* pAsyncInfo) +{ + if (GetAsyncInfo == nullptr) + GetAsyncInfo = new LightWeightMap(); + + Agnostic_CORINFO_ASYNC_INFO value; + ZeroMemory(&value, sizeof(value)); + + value.continuationClsHnd = CastHandle(pAsyncInfo->continuationClsHnd); + value.continuationNextFldHnd = CastHandle(pAsyncInfo->continuationNextFldHnd); + value.continuationResumeFldHnd = CastHandle(pAsyncInfo->continuationResumeFldHnd); + value.continuationStateFldHnd = CastHandle(pAsyncInfo->continuationStateFldHnd); + value.continuationFlagsFldHnd = CastHandle(pAsyncInfo->continuationFlagsFldHnd); + value.continuationDataFldHnd = CastHandle(pAsyncInfo->continuationDataFldHnd); + value.continuationGCDataFldHnd = CastHandle(pAsyncInfo->continuationGCDataFldHnd); + value.continuationsNeedMethodHandle = pAsyncInfo->continuationsNeedMethodHandle ? 1 : 0; + + GetAsyncInfo->Add(0, value); + DEBUG_REC(dmpGetAsyncInfo(0, value)); +} +void MethodContext::dmpGetAsyncInfo(DWORD key, const Agnostic_CORINFO_ASYNC_INFO& value) +{ + printf("GetAsyncInfo key %u value contClsHnd-%016" PRIX64 " contNextFldHnd-%016" PRIX64 " contResumeFldHnd-%016" PRIX64 + " contStateFldHnd-%016" PRIX64 " contFlagsFldHnd-%016" PRIX64 " contDataFldHnd-%016" PRIX64 " contGCDataFldHnd-%016" PRIX64 " contsNeedMethodHandle-%d", + key, value.continuationClsHnd, value.continuationNextFldHnd, value.continuationResumeFldHnd, + value.continuationStateFldHnd, value.continuationFlagsFldHnd, value.continuationDataFldHnd, + value.continuationGCDataFldHnd, value.continuationsNeedMethodHandle); +} +void MethodContext::repGetAsyncInfo(CORINFO_ASYNC_INFO* pAsyncInfoOut) +{ + Agnostic_CORINFO_ASYNC_INFO value = LookupByKeyOrMissNoMessage(GetAsyncInfo, 0); + pAsyncInfoOut->continuationClsHnd = (CORINFO_CLASS_HANDLE)value.continuationClsHnd; + pAsyncInfoOut->continuationNextFldHnd = (CORINFO_FIELD_HANDLE)value.continuationNextFldHnd; + pAsyncInfoOut->continuationResumeFldHnd = (CORINFO_FIELD_HANDLE)value.continuationResumeFldHnd; + pAsyncInfoOut->continuationStateFldHnd = (CORINFO_FIELD_HANDLE)value.continuationStateFldHnd; + pAsyncInfoOut->continuationFlagsFldHnd = (CORINFO_FIELD_HANDLE)value.continuationFlagsFldHnd; + pAsyncInfoOut->continuationDataFldHnd = (CORINFO_FIELD_HANDLE)value.continuationDataFldHnd; + pAsyncInfoOut->continuationGCDataFldHnd = (CORINFO_FIELD_HANDLE)value.continuationGCDataFldHnd; + pAsyncInfoOut->continuationsNeedMethodHandle = value.continuationsNeedMethodHandle != 0; + DEBUG_REP(dmpGetAsyncInfo(0, value)); +} + void MethodContext::recGetGSCookie(GSCookie* pCookieVal, GSCookie** ppCookieVal) { if (GetGSCookie == nullptr) @@ -6911,6 +6983,25 @@ bool MethodContext::repGetTailCallHelpers( return true; } + +void MethodContext::recGetAsyncResumptionStub(CORINFO_METHOD_HANDLE hnd) +{ + if (GetAsyncResumptionStub == nullptr) + GetAsyncResumptionStub = new LightWeightMap(); + + GetAsyncResumptionStub->Add(0, CastHandle(hnd)); + DEBUG_REC(dmpGetAsyncResumptionStub(CastHandle(hnd))); +} +void MethodContext::dmpGetAsyncResumptionStub(DWORD key, DWORDLONG hnd) +{ + printf("GetAsyncResumptionStub key-%u, value-%016" PRIX64, key, hnd); +} +CORINFO_METHOD_HANDLE MethodContext::repGetAsyncResumptionStub() +{ + DWORDLONG hnd = LookupByKeyOrMissNoMessage(GetAsyncResumptionStub, 0); + return (CORINFO_METHOD_HANDLE)hnd; +} + void MethodContext::recUpdateEntryPointForTailCall( const CORINFO_CONST_LOOKUP& origEntryPoint, const CORINFO_CONST_LOOKUP& newEntryPoint) @@ -7462,7 +7553,7 @@ MethodContext::Environment MethodContext::cloneEnvironment() } if (GetStringConfigValue != nullptr) { - env.getStingConfigValue = new LightWeightMap(*GetStringConfigValue); + env.getStringConfigValue = new LightWeightMap(*GetStringConfigValue); } return env; } @@ -7488,7 +7579,7 @@ bool MethodContext::IsEnvironmentHeaderEqual(const Environment& prevEnv) { return false; } - if (!AreLWMHeadersEqual(prevEnv.getStingConfigValue, GetStringConfigValue)) + if (!AreLWMHeadersEqual(prevEnv.getStringConfigValue, GetStringConfigValue)) { return false; } @@ -7502,7 +7593,7 @@ bool MethodContext::IsEnvironmentContentEqual(const Environment& prevEnv) { return false; } - if (!IsStringContentEqual(prevEnv.getStingConfigValue, GetStringConfigValue)) + if (!IsStringContentEqual(prevEnv.getStringConfigValue, GetStringConfigValue)) { return false; } diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index b6997b94e6c2..1a25a3a24c5a 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -126,6 +126,10 @@ class MethodContext void dmpNotifyMethodInfoUsage(DWORDLONG key, DWORD value); bool repNotifyMethodInfoUsage(CORINFO_METHOD_HANDLE ftn); + void recNotifyInstructionSetUsage(CORINFO_InstructionSet instructionSet, bool supported, bool result); + void dmpNotifyInstructionSetUsage(DD key, DWORD supported); + bool repNotifyInstructionSetUsage(CORINFO_InstructionSet instructionSet, bool supported); + void recGetMethodAttribs(CORINFO_METHOD_HANDLE methodHandle, DWORD attribs); void dmpGetMethodAttribs(DWORDLONG key, DWORD value); DWORD repGetMethodAttribs(CORINFO_METHOD_HANDLE methodHandle); @@ -570,6 +574,10 @@ class MethodContext void dmpGetEEInfo(DWORD key, const Agnostic_CORINFO_EE_INFO& value); void repGetEEInfo(CORINFO_EE_INFO* pEEInfoOut); + void recGetAsyncInfo(const CORINFO_ASYNC_INFO* pAsyncInfo); + void dmpGetAsyncInfo(DWORD key, const Agnostic_CORINFO_ASYNC_INFO& value); + void repGetAsyncInfo(CORINFO_ASYNC_INFO* pAsyncInfoOut); + void recGetGSCookie(GSCookie* pCookieVal, GSCookie** ppCookieVal); void dmpGetGSCookie(DWORD key, DLDL value); void repGetGSCookie(GSCookie* pCookieVal, GSCookie** ppCookieVal); @@ -867,6 +875,10 @@ class MethodContext CORINFO_GET_TAILCALL_HELPERS_FLAGS flags, CORINFO_TAILCALL_HELPERS* pResult); + void recGetAsyncResumptionStub(CORINFO_METHOD_HANDLE hnd); + void dmpGetAsyncResumptionStub(DWORD key, DWORDLONG handle); + CORINFO_METHOD_HANDLE repGetAsyncResumptionStub(); + void recUpdateEntryPointForTailCall(const CORINFO_CONST_LOOKUP& origEntryPoint, const CORINFO_CONST_LOOKUP& newEntryPoint); void dmpUpdateEntryPointForTailCall(const Agnostic_CORINFO_CONST_LOOKUP& origEntryPoint, const Agnostic_CORINFO_CONST_LOOKUP& newEntryPoint); void repUpdateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint); @@ -911,12 +923,12 @@ class MethodContext struct Environment { - Environment() : getIntConfigValue(nullptr), getStingConfigValue(nullptr) + Environment() : getIntConfigValue(nullptr), getStringConfigValue(nullptr) { } LightWeightMap* getIntConfigValue; - LightWeightMap* getStingConfigValue; + LightWeightMap* getStringConfigValue; }; Environment cloneEnvironment(); @@ -1203,6 +1215,9 @@ enum mcPackets Packet_GetSZArrayHelperEnumeratorClass = 226, Packet_GetMethodInstantiationArgument = 227, Packet_GetInstantiatedEntry = 228, + Packet_NotifyInstructionSetUsage = 229, + Packet_GetAsyncInfo = 230, + Packet_GetAsyncResumptionStub = 231, }; void SetDebugDumpVariables(); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.cpp index bcad854f0906..529d8260b675 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.cpp @@ -13,6 +13,10 @@ #include "logging.h" #include "runtimedetails.h" +#if TARGET_UNIX +#include +#endif // TARGET_UNIX + // Just a helper... HANDLE MethodContextReader::OpenFile(const char* inputFile, DWORD flags) { @@ -32,6 +36,20 @@ static std::string to_lower(const std::string& input) return res; } +bool test_filename_available(const std::string& path) +{ +#ifdef TARGET_WINDOWS + DWORD attribs = GetFileAttributesA(path.c_str()); + return (attribs != INVALID_FILE_ATTRIBUTES) && !(attribs & FILE_ATTRIBUTE_DIRECTORY); +#else // TARGET_WINDOWS + struct stat stat_data; + if (stat(path.c_str(), &stat_data) != 0) + return false; + + return (stat_data.st_mode & S_IFMT) == S_IFREG; +#endif // TARGET_WINDOWS +} + // Looks for a file named foo.origSuffix.newSuffix or foo.newSuffix // but only if foo.origSuffix exists. // @@ -47,20 +65,17 @@ std::string MethodContextReader::CheckForPairedFile(const std::string& fileName, if (suffix_offset == std::string::npos || suffix_offset == 0 || (tmp != to_lower(fileName.substr(suffix_offset)))) return std::string(); - DWORD attribs = GetFileAttributesA(fileName.c_str()); - if ((attribs == INVALID_FILE_ATTRIBUTES) || (attribs & FILE_ATTRIBUTE_DIRECTORY)) + if (test_filename_available(fileName)) return std::string(); // next, check foo.orig.new from foo.orig tmp = fileName + newSuffix; - attribs = GetFileAttributesA(tmp.c_str()); - if ((attribs != INVALID_FILE_ATTRIBUTES) && !(attribs & FILE_ATTRIBUTE_DIRECTORY)) + if (test_filename_available(tmp)) return tmp; // Finally, lets try foo.new from foo.orig tmp = fileName.substr(0, suffix_offset) + newSuffix; - attribs = GetFileAttributesA(tmp.c_str()); - if ((attribs != INVALID_FILE_ATTRIBUTES) && !(attribs & FILE_ATTRIBUTE_DIRECTORY)) + if (test_filename_available(tmp)) return tmp; return std::string(); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/simpletimer.cpp b/src/coreclr/tools/superpmi/superpmi-shared/simpletimer.cpp index 3a21e77029e1..87fe784e1c78 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/simpletimer.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/simpletimer.cpp @@ -4,18 +4,14 @@ #include "standardpch.h" #include "logging.h" #include "simpletimer.h" +#include "minipal/time.h" SimpleTimer::SimpleTimer() { - start.QuadPart = 0; - stop.QuadPart = 0; - - BOOL retVal = ::QueryPerformanceFrequency(&proc_freq); - if (retVal == FALSE) - { - LogDebug("SimpleTimer::SimpleTimer unable to QPF. error was 0x%08x", ::GetLastError()); - DEBUG_BREAK; - } + start = 0; + stop = 0; + + proc_freq = minipal_hires_tick_frequency(); } SimpleTimer::~SimpleTimer() @@ -24,22 +20,12 @@ SimpleTimer::~SimpleTimer() void SimpleTimer::Start() { - BOOL retVal = ::QueryPerformanceCounter(&start); - if (retVal == FALSE) - { - LogDebug("SimpleTimer::Start unable to QPC. error was 0x%08x", ::GetLastError()); - DEBUG_BREAK; - } + start = minipal_hires_ticks(); } void SimpleTimer::Stop() { - BOOL retVal = ::QueryPerformanceCounter(&stop); - if (retVal == FALSE) - { - LogDebug("SimpleTimer::Stop unable to QPC. error was 0x%08x", ::GetLastError()); - DEBUG_BREAK; - } + stop = minipal_hires_ticks(); } double SimpleTimer::GetMilliseconds() @@ -49,5 +35,5 @@ double SimpleTimer::GetMilliseconds() double SimpleTimer::GetSeconds() { - return ((stop.QuadPart - start.QuadPart) / (double)proc_freq.QuadPart); + return ((stop - start) / (double)proc_freq); } diff --git a/src/coreclr/tools/superpmi/superpmi-shared/simpletimer.h b/src/coreclr/tools/superpmi/superpmi-shared/simpletimer.h index 69fb87605846..48aae5e12b0f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/simpletimer.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/simpletimer.h @@ -4,6 +4,8 @@ #ifndef _SimpleTimer #define _SimpleTimer +#include + class SimpleTimer { public: @@ -16,8 +18,8 @@ class SimpleTimer double GetSeconds(); private: - LARGE_INTEGER proc_freq; - LARGE_INTEGER start; - LARGE_INTEGER stop; + int64_t proc_freq; + int64_t start; + int64_t stop; }; #endif diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp index bc147da81626..8a595c25bd83 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp @@ -233,10 +233,9 @@ std::string SpmiDumpHelper::DumpJitFlags(unsigned long long flags) AddFlag(OSR); AddFlag(ALT_JIT); AddFlag(FROZEN_ALLOC_ALLOWED); - AddFlag(READYTORUN); + AddFlag(AOT); AddFlag(PROF_ENTERLEAVE); AddFlag(PROF_NO_PINVOKE_INLINE); - AddFlag(PREJIT); AddFlag(RELOC); AddFlag(IL_STUB); AddFlag(PROCSPLIT); @@ -257,10 +256,6 @@ std::string SpmiDumpHelper::DumpJitFlags(unsigned long long flags) AddFlagNumeric(RELATIVE_CODE_RELOCS, 29); AddFlagNumeric(SOFTFP_ABI, 30); - // xarch only - // - AddFlagNumeric(VECTOR512_THROTTLING, 31); - // "Extra jit flag" support // AddFlagNumeric(HAS_PGO, EXTRA_JIT_FLAGS::HAS_PGO); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h index df54e0c5b3c0..9ff495003212 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h @@ -63,12 +63,16 @@ void SetSpmiTargetArchitecture(SPMI_TARGET_ARCHITECTURE spmiTargetArchitecture); inline bool IsSpmiTarget32Bit() { - return (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_X86) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_ARM); + return (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_X86) || + (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_ARM); } inline bool IsSpmiTarget64Bit() { - return (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_AMD64) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_ARM64) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_LOONGARCH64) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_RISCV64); + return (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_AMD64) || + (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_ARM64) || + (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_LOONGARCH64) || + (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_RISCV64); } inline size_t SpmiTargetPointerSize() diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp index fdf27620abb2..835d6a22d448 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -36,6 +36,14 @@ bool interceptor_ICJI::notifyMethodInfoUsage(CORINFO_METHOD_HANDLE ftn) return temp; } +bool interceptor_ICJI::notifyInstructionSetUsage(CORINFO_InstructionSet instructionSet, bool supported) +{ + mc->cr->AddCall("notifyInstructionSetUsage"); + bool result = original_ICorJitInfo->notifyInstructionSetUsage(instructionSet, supported); + mc->recNotifyInstructionSetUsage(instructionSet, supported, result); + return result; +} + // return flags (defined above, CORINFO_FLG_PUBLIC ...) uint32_t interceptor_ICJI::getMethodAttribs(CORINFO_METHOD_HANDLE ftn /* IN */) { @@ -727,15 +735,6 @@ CORINFO_CLASS_HANDLE interceptor_ICJI::getTypeForBox(CORINFO_CLASS_HANDLE cls) return temp; } -// Class handle for a boxed value type, on the stack. -CORINFO_CLASS_HANDLE interceptor_ICJI::getTypeForBoxOnStack(CORINFO_CLASS_HANDLE cls) -{ - mc->cr->AddCall("getTypeForBoxOnStack"); - CORINFO_CLASS_HANDLE temp = original_ICorJitInfo->getTypeForBoxOnStack(cls); - mc->recGetTypeForBoxOnStack(cls, temp); - return temp; -} - // returns the correct box helper for a particular class. Note // that if this returns CORINFO_HELP_BOX, the JIT can assume // 'standard' boxing (allocate object and copy), and optimize @@ -1365,6 +1364,13 @@ void interceptor_ICJI::getEEInfo(CORINFO_EE_INFO* pEEInfoOut) mc->recGetEEInfo(pEEInfoOut); } +void interceptor_ICJI::getAsyncInfo(CORINFO_ASYNC_INFO* pAsyncInfo) +{ + mc->cr->AddCall("getAsyncInfo"); + original_ICorJitInfo->getAsyncInfo(pAsyncInfo); + mc->recGetAsyncInfo(pAsyncInfo); +} + /*********************************************************************************/ // // Diagnostic methods @@ -1780,6 +1786,14 @@ bool interceptor_ICJI::getTailCallHelpers( return result; } +CORINFO_METHOD_HANDLE interceptor_ICJI::getAsyncResumptionStub() +{ + mc->cr->AddCall("getAsyncResumptionStub"); + CORINFO_METHOD_HANDLE stub = original_ICorJitInfo->getAsyncResumptionStub(); + mc->recGetAsyncResumptionStub(stub); + return stub; +} + void interceptor_ICJI::updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) { mc->cr->AddCall("updateEntryPointForTailCall"); @@ -2024,11 +2038,6 @@ uint32_t interceptor_ICJI::getExpectedTargetArchitecture() return result; } -bool interceptor_ICJI::notifyInstructionSetUsage(CORINFO_InstructionSet instructionSet, bool supported) -{ - return original_ICorJitInfo->notifyInstructionSetUsage(instructionSet, supported); -} - CORINFO_METHOD_HANDLE interceptor_ICJI::getSpecialCopyHelper(CORINFO_CLASS_HANDLE type) { mc->cr->AddCall("getSpecialCopyHelper"); diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp index d14acec9674b..60d344324e98 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp @@ -532,13 +532,6 @@ CORINFO_CLASS_HANDLE interceptor_ICJI::getTypeForBox( return original_ICorJitInfo->getTypeForBox(cls); } -CORINFO_CLASS_HANDLE interceptor_ICJI::getTypeForBoxOnStack( - CORINFO_CLASS_HANDLE cls) -{ - mcs->AddCall("getTypeForBoxOnStack"); - return original_ICorJitInfo->getTypeForBoxOnStack(cls); -} - CorInfoHelpFunc interceptor_ICJI::getBoxHelper( CORINFO_CLASS_HANDLE cls) { @@ -969,6 +962,13 @@ void interceptor_ICJI::getEEInfo( original_ICorJitInfo->getEEInfo(pEEInfoOut); } +void interceptor_ICJI::getAsyncInfo( + CORINFO_ASYNC_INFO* pAsyncInfoOut) +{ + mcs->AddCall("getAsyncInfo"); + original_ICorJitInfo->getAsyncInfo(pAsyncInfoOut); +} + mdMethodDef interceptor_ICJI::getMethodDefFromMethod( CORINFO_METHOD_HANDLE hMethod) { @@ -1279,6 +1279,12 @@ bool interceptor_ICJI::getTailCallHelpers( return original_ICorJitInfo->getTailCallHelpers(callToken, sig, flags, pResult); } +CORINFO_METHOD_HANDLE interceptor_ICJI::getAsyncResumptionStub() +{ + mcs->AddCall("getAsyncResumptionStub"); + return original_ICorJitInfo->getAsyncResumptionStub(); +} + bool interceptor_ICJI::convertPInvokeCalliToCall( CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert) diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp index ee04f7d948bb..c04554b5844f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp @@ -467,12 +467,6 @@ CORINFO_CLASS_HANDLE interceptor_ICJI::getTypeForBox( return original_ICorJitInfo->getTypeForBox(cls); } -CORINFO_CLASS_HANDLE interceptor_ICJI::getTypeForBoxOnStack( - CORINFO_CLASS_HANDLE cls) -{ - return original_ICorJitInfo->getTypeForBoxOnStack(cls); -} - CorInfoHelpFunc interceptor_ICJI::getBoxHelper( CORINFO_CLASS_HANDLE cls) { @@ -849,6 +843,12 @@ void interceptor_ICJI::getEEInfo( original_ICorJitInfo->getEEInfo(pEEInfoOut); } +void interceptor_ICJI::getAsyncInfo( + CORINFO_ASYNC_INFO* pAsyncInfoOut) +{ + original_ICorJitInfo->getAsyncInfo(pAsyncInfoOut); +} + mdMethodDef interceptor_ICJI::getMethodDefFromMethod( CORINFO_METHOD_HANDLE hMethod) { @@ -1122,6 +1122,11 @@ bool interceptor_ICJI::getTailCallHelpers( return original_ICorJitInfo->getTailCallHelpers(callToken, sig, flags, pResult); } +CORINFO_METHOD_HANDLE interceptor_ICJI::getAsyncResumptionStub() +{ + return original_ICorJitInfo->getAsyncResumptionStub(); +} + bool interceptor_ICJI::convertPInvokeCalliToCall( CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert) diff --git a/src/coreclr/tools/superpmi/superpmi/commandline.cpp b/src/coreclr/tools/superpmi/superpmi/commandline.cpp index 7956fe10a315..6fd0acfcae69 100644 --- a/src/coreclr/tools/superpmi/superpmi/commandline.cpp +++ b/src/coreclr/tools/superpmi/superpmi/commandline.cpp @@ -127,8 +127,10 @@ void CommandLine::DumpHelp(const char* program) printf(" trying to measure JIT throughput for a specific set of methods. Default=1.\n"); printf("\n"); printf(" -target \n"); - printf(" Used by the assembly differences calculator. This specifies the target\n"); - printf(" architecture for cross-compilation. Currently allowed values: x64, x86, arm, arm64\n"); + printf(" Specifies the target architecture if doing cross-compilation.\n"); + printf(" Allowed values: x64, x86, arm, arm64, loongarch64, riscv64\n"); + printf(" Used by the assembly differences calculator; to determine a default JIT dll name;\n"); + printf(" and to avoid treating mismatched cross-compilation replay as failure.\n"); printf("\n"); printf(" -coredistools\n"); printf(" Use disassembly tools from the CoreDisTools library\n"); @@ -685,9 +687,11 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) (0 != _stricmp(o->targetArchitecture, "x86")) && (0 != _stricmp(o->targetArchitecture, "arm64")) && (0 != _stricmp(o->targetArchitecture, "arm")) && - (0 != _stricmp(o->targetArchitecture, "arm32"))) + (0 != _stricmp(o->targetArchitecture, "arm32")) && + (0 != _stricmp(o->targetArchitecture, "loongarch64")) && + (0 != _stricmp(o->targetArchitecture, "riscv64"))) { - LogError("Illegal target architecture specified with -target (use 'x64', 'x86', 'arm64', or 'arm')."); + LogError("Illegal target architecture specified with -target."); DumpHelp(argv[0]); return false; } @@ -753,6 +757,10 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) hostArch = "arm"; #elif defined(HOST_ARM64) hostArch = "arm64"; +#elif defined(HOST_LOONGARCH64) + hostArch = "loongarch64"; +#elif defined(HOST_RISCV64) + hostArch = "riscv64"; #else allowDefaultJIT = false; #endif @@ -773,6 +781,12 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) case SPMI_TARGET_ARCHITECTURE_ARM64: targetArch = "arm64"; break; + case SPMI_TARGET_ARCHITECTURE_LOONGARCH64: + targetArch = "loongarch64"; + break; + case SPMI_TARGET_ARCHITECTURE_RISCV64: + targetArch = "riscv64"; + break; default: allowDefaultJIT = false; break; @@ -810,6 +824,10 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) case SPMI_TARGET_ARCHITECTURE_ARM64: jitOSName = "universal"; break; + case SPMI_TARGET_ARCHITECTURE_LOONGARCH64: + case SPMI_TARGET_ARCHITECTURE_RISCV64: + jitOSName = "unix"; + break; default: // Can't get here if `allowDefaultJIT` was properly set above. break; diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp index d190ffe72730..98c22710cbbb 100644 --- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp @@ -608,13 +608,6 @@ CORINFO_CLASS_HANDLE MyICJI::getTypeForBox(CORINFO_CLASS_HANDLE cls) return jitInstance->mc->repGetTypeForBox(cls); } -// Class handle for a boxed value type, on the stack. -CORINFO_CLASS_HANDLE MyICJI::getTypeForBoxOnStack(CORINFO_CLASS_HANDLE cls) -{ - jitInstance->mc->cr->AddCall("getTypeForBoxOnStack"); - return jitInstance->mc->repGetTypeForBoxOnStack(cls); -} - // returns the correct box helper for a particular class. Note // that if this returns CORINFO_HELP_BOX, the JIT can assume // 'standard' boxing (allocate object and copy), and optimize @@ -1195,6 +1188,12 @@ void MyICJI::getEEInfo(CORINFO_EE_INFO* pEEInfoOut) jitInstance->mc->repGetEEInfo(pEEInfoOut); } +void MyICJI::getAsyncInfo(CORINFO_ASYNC_INFO* pAsyncInfo) +{ + jitInstance->mc->cr->AddCall("getAsyncInfo"); + jitInstance->mc->repGetAsyncInfo(pAsyncInfo); +} + /*********************************************************************************/ // // Diagnostic methods @@ -1520,6 +1519,12 @@ bool MyICJI::getTailCallHelpers( return jitInstance->mc->repGetTailCallHelpers(callToken, sig, flags, pResult); } +CORINFO_METHOD_HANDLE MyICJI::getAsyncResumptionStub() +{ + jitInstance->mc->cr->AddCall("getAsyncResumptionStub"); + return jitInstance->mc->repGetAsyncResumptionStub();; +} + bool MyICJI::convertPInvokeCalliToCall(CORINFO_RESOLVED_TOKEN* pResolvedToken, bool fMustConvert) { jitInstance->mc->cr->AddCall("convertPInvokeCalliToCall"); @@ -1529,7 +1534,7 @@ bool MyICJI::convertPInvokeCalliToCall(CORINFO_RESOLVED_TOKEN* pResolvedToken, b bool MyICJI::notifyInstructionSetUsage(CORINFO_InstructionSet instructionSet, bool supported) { jitInstance->mc->cr->AddCall("notifyInstructionSetUsage"); - return supported; + return jitInstance->mc->repNotifyInstructionSetUsage(instructionSet, supported); } void MyICJI::updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) @@ -1544,16 +1549,7 @@ void MyICJI::updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) uint32_t MyICJI::getJitFlags(CORJIT_FLAGS* jitFlags, uint32_t sizeInBytes) { jitInstance->mc->cr->AddCall("getJitFlags"); - uint32_t ret = jitInstance->mc->repGetJitFlags(jitFlags, sizeInBytes); - if (jitInstance->forceClearAltJitFlag) - { - jitFlags->Clear(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT); - } - else if (jitInstance->forceSetAltJitFlag) - { - jitFlags->Set(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT); - } - return ret; + return jitInstance->getJitFlags(jitFlags, sizeInBytes); } // Runs the given function with the given parameter under an error trap diff --git a/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp b/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp index 2b14ce5aef1e..09e7e14cc9d1 100644 --- a/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp +++ b/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp @@ -56,12 +56,12 @@ JitInstance* JitInstance::InitJit(char* nameOfJit, } } - jit->environment.getIntConfigValue = nullptr; - jit->environment.getStingConfigValue = nullptr; + jit->environment.getIntConfigValue = nullptr; + jit->environment.getStringConfigValue = nullptr; if (st1 != nullptr) st1->Start(); - HRESULT hr = jit->StartUp(nameOfJit, false, breakOnAssert, firstContext); + HRESULT hr = jit->StartUp(nameOfJit, breakOnAssert, firstContext); if (st1 != nullptr) st1->Stop(); if (hr != S_OK) @@ -75,7 +75,7 @@ JitInstance* JitInstance::InitJit(char* nameOfJit, return jit; } -HRESULT JitInstance::StartUp(char* PathToJit, bool copyJit, bool breakOnDebugBreakorAV, MethodContext* firstContext) +HRESULT JitInstance::StartUp(char* PathToJit, bool breakOnDebugBreakorAV, MethodContext* firstContext) { // startup jit DWORD dwRetVal = 0; @@ -85,8 +85,6 @@ HRESULT JitInstance::StartUp(char* PathToJit, bool copyJit, bool breakOnDebugBre SetBreakOnDebugBreakOrAV(breakOnDebugBreakorAV); char pFullPathName[MAX_PATH]; - char lpTempPathBuffer[MAX_PATH]; - char szTempFileName[MAX_PATH]; // find the full jit path dwRetVal = ::GetFullPathNameA(PathToJit, MAX_PATH, pFullPathName, nullptr); @@ -96,67 +94,15 @@ HRESULT JitInstance::StartUp(char* PathToJit, bool copyJit, bool breakOnDebugBre return E_FAIL; } - // Store the full path to the jit - PathToOriginalJit = (char*)malloc(MAX_PATH); - if (PathToOriginalJit == nullptr) - { - LogError("1st HeapAlloc failed (0x%08x)", ::GetLastError()); - return E_FAIL; - } - ::strcpy_s(PathToOriginalJit, MAX_PATH, pFullPathName); - - if (copyJit) - { - // Get a temp file location - dwRetVal = ::GetTempPathA(MAX_PATH, lpTempPathBuffer); - if (dwRetVal == 0) - { - LogError("GetTempPath failed (0x%08x)", ::GetLastError()); - return E_FAIL; - } - if (dwRetVal > MAX_PATH) - { - LogError("GetTempPath returned a path that was larger than MAX_PATH"); - return E_FAIL; - } - // Get a temp filename - uRetVal = ::GetTempFileNameA(lpTempPathBuffer, "Jit", 0, szTempFileName); - if (uRetVal == 0) - { - LogError("GetTempFileName failed (0x%08x)", ::GetLastError()); - return E_FAIL; - } - dwRetVal = (DWORD)::strlen(szTempFileName); - - // Store the full path to the temp jit - PathToTempJit = (char*)malloc(MAX_PATH); - if (PathToTempJit == nullptr) - { - LogError("2nd HeapAlloc failed 0x%08x)", ::GetLastError()); - return E_FAIL; - } - ::strcpy_s(PathToTempJit, MAX_PATH, szTempFileName); - - // Copy Temp File - bRetVal = ::CopyFileA(PathToOriginalJit, PathToTempJit, FALSE); - if (bRetVal == FALSE) - { - LogError("CopyFile failed (0x%08x)", ::GetLastError()); - return E_FAIL; - } - } - else - PathToTempJit = PathToOriginalJit; - #ifndef TARGET_UNIX // No file version APIs in the PAL // Do a quick version check DWORD dwHandle = 0; - DWORD fviSize = GetFileVersionInfoSizeA(PathToTempJit, &dwHandle); + DWORD fviSize = GetFileVersionInfoSizeA(pFullPathName, &dwHandle); if ((fviSize != 0) && (dwHandle == 0)) { unsigned char* fviData = new unsigned char[fviSize]; - if (GetFileVersionInfoA(PathToTempJit, dwHandle, fviSize, fviData)) + if (GetFileVersionInfoA(pFullPathName, dwHandle, fviSize, fviData)) { UINT size = 0; VS_FIXEDFILEINFO* verInfo = nullptr; @@ -165,7 +111,7 @@ HRESULT JitInstance::StartUp(char* PathToJit, bool copyJit, bool breakOnDebugBre if (size) { if (verInfo->dwSignature == 0xfeef04bd) - LogDebug("'%s' is version %u.%u.%u.%u", PathToTempJit, (verInfo->dwFileVersionMS) >> 16, + LogDebug("'%s' is version %u.%u.%u.%u", pFullPathName, (verInfo->dwFileVersionMS) >> 16, (verInfo->dwFileVersionMS) & 0xFFFF, (verInfo->dwFileVersionLS) >> 16, (verInfo->dwFileVersionLS) & 0xFFFF); } @@ -176,7 +122,7 @@ HRESULT JitInstance::StartUp(char* PathToJit, bool copyJit, bool breakOnDebugBre #endif // !TARGET_UNIX // Load Library - hLib = ::LoadLibraryExA(PathToTempJit, NULL, 0); + hLib = ::LoadLibraryExA(pFullPathName, NULL, 0); if (hLib == 0) { LogError("LoadLibrary failed (0x%08x)", ::GetLastError()); @@ -222,7 +168,16 @@ HRESULT JitInstance::StartUp(char* PathToJit, bool copyJit, bool breakOnDebugBre // Mismatched version ID. Fail the load. pJitInstance = NULL; - LogError("Jit Compiler has wrong version identifier"); + GUID expected = JITEEVersionIdentifier; + GUID actual = versionId; + LogError("Jit Compiler has wrong version identifier. Expected: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x. Actual: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x.", + expected.Data1, expected.Data2, expected.Data3, + expected.Data4[0], expected.Data4[1], expected.Data4[2], expected.Data4[3], + expected.Data4[4], expected.Data4[5], expected.Data4[6], expected.Data4[7], + actual.Data1, actual.Data2, actual.Data3, + actual.Data4[0], actual.Data4[1], actual.Data4[2], actual.Data4[3], + actual.Data4[4], actual.Data4[5], actual.Data4[6], actual.Data4[7]); + return -1; } @@ -231,51 +186,6 @@ HRESULT JitInstance::StartUp(char* PathToJit, bool copyJit, bool breakOnDebugBre return S_OK; } -bool JitInstance::reLoad(MethodContext* firstContext) -{ - FreeLibrary(hLib); - - // Load Library - hLib = ::LoadLibraryExA(PathToTempJit, NULL, 0); - if (hLib == 0) - { - LogError("LoadLibrary failed (0x%08x)", ::GetLastError()); - return false; - } - - // get entry points - pngetJit = (PgetJit)::GetProcAddress(hLib, "getJit"); - if (pngetJit == 0) - { - LogError("GetProcAddress 'getJit' failed (0x%08x)", ::GetLastError()); - return false; - } - pnjitStartup = (PjitStartup)::GetProcAddress(hLib, "jitStartup"); - - // Setup ICorJitHost and call jitStartup if necessary - if (pnjitStartup != nullptr) - { - mc = firstContext; - jitHost = new JitHost(*this); - if (!callJitStartup(jitHost)) - { - LogError("jitStartup failed"); - return false; - } - } - - pJitInstance = pngetJit(); - if (pJitInstance == nullptr) - { - LogError("pngetJit gave us null"); - return false; - } - - icji = InitICorJitInfo(this); - - return true; -} - #undef DLLEXPORT #ifdef _MSC_VER #define DLLEXPORT __declspec(dllexport) @@ -334,7 +244,7 @@ ReplayResults JitInstance::CompileMethod(MethodContext* MethodToCompile, int mcI pParam->pThis->mc->repCompileMethod(&pParam->info, &pParam->flags, &os); CORJIT_FLAGS jitFlags; - pParam->pThis->mc->repGetJitFlags(&jitFlags, sizeof(jitFlags)); + pParam->pThis->getJitFlags(&jitFlags, sizeof(jitFlags)); pParam->results.IsMinOpts = jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_CODE) || @@ -387,6 +297,14 @@ ReplayResults JitInstance::CompileMethod(MethodContext* MethodToCompile, int mcI matchesTargetArch = (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64); break; + case IMAGE_FILE_MACHINE_LOONGARCH64: + matchesTargetArch = (targetArch == SPMI_TARGET_ARCHITECTURE_LOONGARCH64); + break; + + case IMAGE_FILE_MACHINE_RISCV64: + matchesTargetArch = (targetArch == SPMI_TARGET_ARCHITECTURE_RISCV64); + break; + default: LogError("Unknown target architecture"); break; @@ -399,6 +317,17 @@ ReplayResults JitInstance::CompileMethod(MethodContext* MethodToCompile, int mcI { jitResult = CORJIT_OK; } + else + { + // If the target matches, but the JIT is an altjit and the user specified RunAltJitCode=0, + // then the JIT will also return CORJIT_SKIPPED, to prevent the generated code from being used. + // However, we don't want to treat that as a replay failure. + if (jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT) && + (pParam->pThis->jitHost->getIntConfigValue("RunAltJitCode", 1) == 0)) + { + jitResult = CORJIT_OK; + } + } } if ((jitResult == CORJIT_OK) || (jitResult == CORJIT_BADCODE)) @@ -535,6 +464,22 @@ const char* JitInstance::getOption(const char* key, LightWeightMap return (const char*)options->GetBuffer(options->Get(keyIndex)); } +// Returns extended flags for a particular compilation instance, adjusted for altjit. +// This is a helper call; it does not record the call in the CompileResult. +uint32_t JitInstance::getJitFlags(CORJIT_FLAGS* jitFlags, uint32_t sizeInBytes) +{ + uint32_t ret = mc->repGetJitFlags(jitFlags, sizeInBytes); + if (forceClearAltJitFlag) + { + jitFlags->Clear(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT); + } + else if (forceSetAltJitFlag) + { + jitFlags->Set(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT); + } + return ret; +} + // Used to allocate memory that needs to handed to the EE. // For eg, use this to allocated memory for reporting debug info, // which will be handed to the EE by setVars() and setBoundaries() @@ -597,7 +542,7 @@ bool JitInstance::callJitStartup(ICorJitHost* jithost) } PAL_ENDTRY - Assert(environment.getIntConfigValue == nullptr && environment.getStingConfigValue == nullptr); + Assert(environment.getIntConfigValue == nullptr && environment.getStringConfigValue == nullptr); environment = mc->cloneEnvironment(); return param.result; @@ -617,10 +562,10 @@ bool JitInstance::resetConfig(MethodContext* firstContext) environment.getIntConfigValue = nullptr; } - if (environment.getStingConfigValue != nullptr) + if (environment.getStringConfigValue != nullptr) { - delete environment.getStingConfigValue; - environment.getStingConfigValue = nullptr; + delete environment.getStringConfigValue; + environment.getStringConfigValue = nullptr; } mc = firstContext; diff --git a/src/coreclr/tools/superpmi/superpmi/jitinstance.h b/src/coreclr/tools/superpmi/superpmi/jitinstance.h index 492c29bf8e4d..b264894d3684 100644 --- a/src/coreclr/tools/superpmi/superpmi/jitinstance.h +++ b/src/coreclr/tools/superpmi/superpmi/jitinstance.h @@ -27,8 +27,6 @@ struct ReplayResults class JitInstance { private: - char* PathToOriginalJit; - char* PathToTempJit; HMODULE hLib; PgetJit pngetJit; PjitStartup pnjitStartup; @@ -62,8 +60,7 @@ class JitInstance LightWeightMap* forceOptions, LightWeightMap* options); - HRESULT StartUp(char* PathToJit, bool copyJit, bool breakOnDebugBreakorAV, MethodContext* firstContext); - bool reLoad(MethodContext* firstContext); + HRESULT StartUp(char* PathToJit, bool breakOnDebugBreakorAV, MethodContext* firstContext); bool callJitStartup(ICorJitHost* newHost); @@ -75,6 +72,8 @@ class JitInstance const char* getOption(const char* key); const char* getOption(const char* key, LightWeightMap* options); + uint32_t getJitFlags(CORJIT_FLAGS* jitFlags, uint32_t sizeInBytes); + const MethodContext::Environment& getEnvironment(); void* allocateArray(size_t size); diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp index aacf90707fdb..5589e08133de 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp @@ -149,6 +149,14 @@ bool NearDiffer::InitAsmDiff() { coreDisTargetArchitecture = Target_Arm64; } + else if (0 == _stricmp(TargetArchitecture, "loongarch64")) + { + coreDisTargetArchitecture = Target_LoongArch64; + } + else if (0 == _stricmp(TargetArchitecture, "riscv64")) + { + coreDisTargetArchitecture = Target_RiscV64; + } else { LogError("Illegal target architecture '%s'", TargetArchitecture); diff --git a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp index e26f13a7f49d..e7b62be65853 100644 --- a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp @@ -58,6 +58,10 @@ void SetSuperPmiTargetArchitecture(const char* targetArchitecture) { SetSpmiTargetArchitecture(SPMI_TARGET_ARCHITECTURE_LOONGARCH64); } + else if (0 == _stricmp(targetArchitecture, "riscv64")) + { + SetSpmiTargetArchitecture(SPMI_TARGET_ARCHITECTURE_RISCV64); + } else { LogError("Illegal target architecture '%s'", targetArchitecture); diff --git a/src/coreclr/unwinder/CMakeLists.txt b/src/coreclr/unwinder/CMakeLists.txt index c63712c500e6..01c7bca64a78 100644 --- a/src/coreclr/unwinder/CMakeLists.txt +++ b/src/coreclr/unwinder/CMakeLists.txt @@ -1,19 +1,18 @@ -include_directories(BEFORE ${VM_DIR}) -include_directories(BEFORE ${VM_DIR}/${ARCH_SOURCES_DIR}) -include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) -include_directories(BEFORE ${CLR_DIR}/unwinder) -include_directories(${CLR_DIR}/debug/ee) -include_directories(${CLR_DIR}/gc) -include_directories(${CLR_DIR}/gcdump) -include_directories(${CLR_DIR}/debug/daccess) +# helper to add set of include directories to unwinder targets +macro(add_unwinder_include_directories TARGET) + target_include_directories(${TARGET} BEFORE PRIVATE ${VM_DIR}) + target_include_directories(${TARGET} BEFORE PRIVATE ${VM_DIR}/${ARCH_SOURCES_DIR}) + target_include_directories(${TARGET} BEFORE PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + target_include_directories(${TARGET} BEFORE PRIVATE ${CLR_DIR}/unwinder) + target_include_directories(${TARGET} PRIVATE ${CLR_DIR}/debug/ee) + target_include_directories(${TARGET} PRIVATE ${CLR_DIR}/gc) + target_include_directories(${TARGET} PRIVATE ${CLR_DIR}/gcdump) + target_include_directories(${TARGET} PRIVATE ${CLR_DIR}/debug/daccess) + target_include_directories(${TARGET} PRIVATE ${ARCH_SOURCES_DIR}) +endmacro() set(UNWINDER_SOURCES baseunwinder.cpp -) - -# Include platform specific unwinder for applicable (native and cross-target) builds. -include_directories(${ARCH_SOURCES_DIR}) -list(APPEND UNWINDER_SOURCES ${ARCH_SOURCES_DIR}/unwinder.cpp ) @@ -21,11 +20,102 @@ convert_to_absolute_path(UNWINDER_SOURCES ${UNWINDER_SOURCES}) if(CLR_CMAKE_HOST_UNIX) add_library_clr(unwinder_wks OBJECT ${UNWINDER_SOURCES}) + add_unwinder_include_directories(unwinder_wks) add_dependencies(unwinder_wks eventing_headers) endif(CLR_CMAKE_HOST_UNIX) add_library_clr(unwinder_dac ${UNWINDER_SOURCES}) +add_unwinder_include_directories(unwinder_dac) add_dependencies(unwinder_dac eventing_headers) set_target_properties(unwinder_dac PROPERTIES DAC_COMPONENT TRUE) target_compile_definitions(unwinder_dac PRIVATE FEATURE_NO_HOST) +### cDAC Unwinders #### + +set(BASE_UNWINDER_SOURCES baseunwinder.cpp) +convert_to_absolute_path(BASE_UNWINDER_SOURCES ${BASE_UNWINDER_SOURCES}) +add_library_clr(unwinder_cdac_base STATIC ${BASE_UNWINDER_SOURCES}) + +target_include_directories(unwinder_cdac_base BEFORE PUBLIC ${VM_DIR}) +target_include_directories(unwinder_cdac_base BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(unwinder_cdac_base BEFORE PUBLIC ${CLR_DIR}/unwinder) +target_include_directories(unwinder_cdac_base PUBLIC ${CLR_DIR}/debug/ee) +target_include_directories(unwinder_cdac_base PUBLIC ${CLR_DIR}/gc) +target_include_directories(unwinder_cdac_base PUBLIC ${CLR_DIR}/gcdump) +target_include_directories(unwinder_cdac_base PUBLIC ${CLR_DIR}/debug/daccess) +target_compile_definitions(unwinder_cdac_base PUBLIC FEATURE_NO_HOST FEATURE_CDAC_UNWINDER) + +if (CLR_CMAKE_TARGET_WIN32) + # cDAC unwinders are statically linked into the NativeAOT runtime which is built with + # release version of the statically linked CRT. Therefore we do the same here. + set_property(TARGET unwinder_cdac_base PROPERTY MSVC_RUNTIME_LIBRARY MultiThreaded) + + # _DEBUG is always passed as a parameter if the build is a debug build. + # This causes the debug CRT on MSVC to be used so we need to undefine it. + target_compile_options(unwinder_cdac_base PRIVATE -U_DEBUG) +endif() + +install_clr(TARGETS unwinder_cdac_base DESTINATIONS cdaclibs COMPONENT cdac) + +# Helper function for platform specific cDAC uwninder builds. +function(create_platform_unwinder) + set(oneValueArgs TARGET ARCH) + set(multiValueArgs DESTINATIONS) + cmake_parse_arguments(TARGETDETAILS "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(TARGETDETAILS_ARCH STREQUAL "x64") + set(ARCH_SOURCES_DIR amd64) + elseif((TARGETDETAILS_ARCH STREQUAL "arm") OR (TARGETDETAILS_ARCH STREQUAL "armel")) + set(ARCH_SOURCES_DIR arm) + elseif(TARGETDETAILS_ARCH STREQUAL "x86") + set(ARCH_SOURCES_DIR i386) + elseif(TARGETDETAILS_ARCH STREQUAL "arm64") + set(ARCH_SOURCES_DIR arm64) + else() + clr_unknown_arch() + endif() + + set(UNWINDER_SOURCES ${ARCH_SOURCES_DIR}/unwinder.cpp) + convert_to_absolute_path(UNWINDER_SOURCES ${UNWINDER_SOURCES}) + add_library_clr(${TARGETDETAILS_TARGET} STATIC ${UNWINDER_SOURCES}) + + target_include_directories(${TARGETDETAILS_TARGET} BEFORE PRIVATE ${VM_DIR}/${ARCH_SOURCES_DIR}) + target_include_directories(${TARGETDETAILS_TARGET} PRIVATE ${ARCH_SOURCES_DIR}) + + target_link_libraries(${TARGETDETAILS_TARGET} PRIVATE unwinder_cdac_base) + if (CLR_CMAKE_TARGET_WIN32) + # cDAC unwinders are statically linked into the NativeAOT runtime which is built with + # release version of the statically linked CRT. Therefore we do the same here. + set_property(TARGET ${TARGETDETAILS_TARGET} PROPERTY MSVC_RUNTIME_LIBRARY MultiThreaded) + + # _DEBUG is always passed as a parameter if the build is a debug build. + # This causes the debug CRT on MSVC to be used so we need to undefine it. + target_compile_options(${TARGETDETAILS_TARGET} PRIVATE -U_DEBUG) + endif() + + # add the install targets + install_clr(TARGETS ${TARGETDETAILS_TARGET} DESTINATIONS ${TARGETDETAILS_DESTINATIONS} COMPONENT cdac) + + # Set the target to be built for the specified OS and ARCH + set_target_definitions_to_custom_os_and_arch(TARGET ${TARGETDETAILS_TARGET} OS win ARCH ${TARGETDETAILS_ARCH}) + + target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_NO_HOST FEATURE_CDAC_UNWINDER) +endfunction() + +if(CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_AMD64) + create_platform_unwinder(TARGET unwinder_cdac_amd64 ARCH x64 DESTINATIONS cdaclibs) + create_platform_unwinder(TARGET unwinder_cdac_arm64 ARCH arm64 DESTINATIONS cdaclibs) +endif(CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_AMD64) + +if(CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_ARM64) + create_platform_unwinder(TARGET unwinder_cdac_arm64 ARCH arm64 DESTINATIONS cdaclibs) +endif(CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_ARM64) + +if(NOT CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_AMD64) + create_platform_unwinder(TARGET unwinder_cdac_amd64 ARCH x64 DESTINATIONS cdaclibs) +endif(NOT CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_AMD64) + +if(NOT CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_ARM64) + create_platform_unwinder(TARGET unwinder_cdac_arm64 ARCH arm64 DESTINATIONS cdaclibs) +endif(NOT CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_ARM64) + diff --git a/src/coreclr/unwinder/amd64/unwinder.cpp b/src/coreclr/unwinder/amd64/unwinder.cpp index cf4b96ff0b6f..57acbd30ab06 100644 --- a/src/coreclr/unwinder/amd64/unwinder.cpp +++ b/src/coreclr/unwinder/amd64/unwinder.cpp @@ -8,6 +8,7 @@ typedef DPTR(M128A) PTR_M128A; +#ifndef FEATURE_CDAC_UNWINDER //--------------------------------------------------------------------------------------- // // Read 64 bit unsigned value from the specified address. When the unwinder is built @@ -51,12 +52,29 @@ static M128A MemoryRead128(PM128A addr) { return *dac_cast((TADDR)addr); } +#else +// Read 64 bit unsigned value from the specified addres when the unwinder is build +// for the cDAC. This triggers a callback to the cDAC host to read the memory from +// the target process. +static ULONG64 MemoryRead64(PULONG64 addr) +{ + ULONG64 value; + t_pCallbacks->readFromTarget((uint64_t)addr, &value, sizeof(value), t_pCallbacks->callbackContext); + return value; +} -#ifdef DACCESS_COMPILE - -// Report failure in the unwinder if the condition is FALSE -#define UNWINDER_ASSERT(Condition) if (!(Condition)) DacError(CORDBG_E_TARGET_INCONSISTENT) +// Read 128 bit value from the specified addres when the unwinder is build +// for the cDAC. This triggers a callback to the cDAC host to read the memory from +// the target process. +static M128A MemoryRead128(PM128A addr) +{ + M128A value; + t_pCallbacks->readFromTarget((uint64_t)addr, &value, sizeof(value), t_pCallbacks->callbackContext); + return value; +} +#endif // FEATURE_CDAC_UNWINDER +#if defined(DACCESS_COMPILE) || defined(FEATURE_CDAC_UNWINDER) //--------------------------------------------------------------------------------------- // // The InstructionBuffer class abstracts accessing assembler instructions in the function @@ -71,6 +89,19 @@ class InstructionBuffer UCHAR m_buffer[32]; // Load the instructions from the target process being debugged +#ifdef FEATURE_CDAC_UNWINDER + HRESULT Load() + { + HRESULT hr = t_pCallbacks->readFromTarget(m_address, m_buffer, sizeof(m_buffer), t_pCallbacks->callbackContext); + if (SUCCEEDED(hr)) + { + // TODO: Implement breakpoint patching for cDAC + // https://github.com/dotnet/runtime/issues/112273#issue-2838620747 + } + + return hr; + } +#else // FEATURE_CDAC_UNWINDER HRESULT Load() { HRESULT hr = DacReadAll(TO_TADDR(m_address), m_buffer, sizeof(m_buffer), false); @@ -85,6 +116,7 @@ class InstructionBuffer return hr; } +#endif // FEATURE_CDAC_UNWINDER public: @@ -129,7 +161,7 @@ class InstructionBuffer } // Get the byte at the given index from the current position - // Invoke DacError if the index is out of the buffer + // Assert that the index is within the buffer UCHAR operator[](int index) { int realIndex = m_offset + index; @@ -137,7 +169,9 @@ class InstructionBuffer return m_buffer[realIndex]; } }; +#endif // DACCESS_COMPILE || FEATURE_CDAC_UNWINDER +#ifdef DACCESS_COMPILE //--------------------------------------------------------------------------------------- // // Given the target address of an UNWIND_INFO structure, this function retrieves all the memory used for @@ -217,50 +251,57 @@ BOOL DacUnwindStackFrame(CONTEXT * pContext, KNONVOLATILE_CONTEXT_POINTERS* pCon return res; } -//--------------------------------------------------------------------------------------- -// -// Unwind the given CONTEXT to the caller CONTEXT. The given CONTEXT will be overwritten. -// -// Arguments: -// pContext - in-out parameter storing the specified CONTEXT on entry and the unwound CONTEXT on exit -// -// Return Value: -// TRUE if the unwinding is successful -// +#elif defined(FEATURE_CDAC_UNWINDER) -BOOL OOPStackUnwinderAMD64::Unwind(CONTEXT * pContext) +BOOL amd64Unwind(void* pContext, ReadFromTarget readFromTarget, GetAllocatedBuffer getAllocatedBuffer, GetStackWalkInfo getStackWalkInfo, UnwinderFail unwinderFail, void* callbackContext) { - HRESULT hr = E_FAIL; + CDACCallbacks callbacks { readFromTarget, getAllocatedBuffer, getStackWalkInfo, unwinderFail, callbackContext }; + t_pCallbacks = &callbacks; + BOOL res = OOPStackUnwinderAMD64::Unwind((CONTEXT*) pContext); + t_pCallbacks = nullptr; - ULONG64 uControlPC = (DWORD64)dac_cast(::GetIP(pContext)); + return res; +} - // get the module base - ULONG64 uImageBase; - hr = GetModuleBase(uControlPC, &uImageBase); - if (FAILED(hr)) +UNWIND_INFO * OOPStackUnwinderAMD64::GetUnwindInfo(TADDR taUnwindInfo) +{ + UNWIND_INFO unwindInfo; + if(t_pCallbacks->readFromTarget((uint64_t)taUnwindInfo, &unwindInfo, sizeof(unwindInfo), t_pCallbacks->callbackContext) != S_OK) { - return FALSE; + return NULL; } - // get the function entry - IMAGE_RUNTIME_FUNCTION_ENTRY functionEntry; - hr = GetFunctionEntry(uControlPC, &functionEntry, sizeof(functionEntry)); - if (FAILED(hr)) + DWORD cbUnwindInfo = offsetof(UNWIND_INFO, UnwindCode) + + unwindInfo.CountOfUnwindCodes * sizeof(UNWIND_CODE); + + // Check if there is a chained unwind info. If so, it has an extra RUNTIME_FUNCTION tagged to the end. + if ((unwindInfo.Flags & UNW_FLAG_CHAININFO) != 0) { - return FALSE; + // If there is an odd number of UNWIND_CODE, we need to adjust for alignment. + if ((unwindInfo.CountOfUnwindCodes & 1) != 0) + { + cbUnwindInfo += sizeof(UNWIND_CODE); + } + cbUnwindInfo += sizeof(T_RUNTIME_FUNCTION); } - // call VirtualUnwind() to do the real work - ULONG64 EstablisherFrame; - hr = VirtualUnwind(0, uImageBase, uControlPC, &functionEntry, pContext, NULL, &EstablisherFrame, NULL, NULL); + // Allocate a buffer for the unwind info from cDAC callback. + // This buffer will be freed by the cDAC host once unwinding is done. + UNWIND_INFO* pUnwindInfo; + if(t_pCallbacks->getAllocatedBuffer(cbUnwindInfo, (void**)&pUnwindInfo, t_pCallbacks->callbackContext) != S_OK) + { + return NULL; + } - return (hr == S_OK); -} + if(t_pCallbacks->readFromTarget(taUnwindInfo, pUnwindInfo, cbUnwindInfo, t_pCallbacks->callbackContext) != S_OK) + { + return NULL; + } -#else // DACCESS_COMPILE + return pUnwindInfo; +} -// Report failure in the unwinder if the condition is FALSE -#define UNWINDER_ASSERT _ASSERTE +#else // !DACCESS_COMPILE && !FEATURE_CDAC_UNWINDER // For unwinding of the jitted code on non-Windows platforms, the Instruction buffer is // just a plain pointer to the instruction data. @@ -344,13 +385,57 @@ PEXCEPTION_ROUTINE RtlVirtualUnwind_Unsafe( ContextPointers, &handlerRoutine); - _ASSERTE(SUCCEEDED(res)); + UNWINDER_ASSERT(SUCCEEDED(res)); return handlerRoutine; } +#endif // !DACCESS_COMPILE && !FEATURE_CDAC_UNWINDER -#endif // DACCESS_COMPILE +//--------------------------------------------------------------------------------------- +// +// Unwind the given CONTEXT to the caller CONTEXT. The given CONTEXT will be overwritten. +// +// Arguments: +// pContext - in-out parameter storing the specified CONTEXT on entry and the unwound CONTEXT on exit +// +// Return Value: +// TRUE if the unwinding is successful +// + +BOOL OOPStackUnwinderAMD64::Unwind(CONTEXT * pContext) +{ + HRESULT hr = E_FAIL; + + ULONG64 uControlPC = +#ifndef FEATURE_CDAC_UNWINDER + (DWORD64)dac_cast(::GetIP(pContext)); +#else // FEATURE_CDAC_UNWINDER + pContext->Rip; +#endif // FEATURE_CDAC_UNWINDER + + // get the module base + ULONG64 uImageBase; + hr = GetModuleBase(uControlPC, &uImageBase); + if (FAILED(hr)) + { + return FALSE; + } + + // get the function entry + IMAGE_RUNTIME_FUNCTION_ENTRY functionEntry; + hr = GetFunctionEntry(uControlPC, &functionEntry, sizeof(functionEntry)); + if (FAILED(hr)) + { + return FALSE; + } + + // call VirtualUnwind() to do the real work + ULONG64 EstablisherFrame; + hr = VirtualUnwind(0, uImageBase, uControlPC, &functionEntry, pContext, NULL, &EstablisherFrame, NULL, NULL); + + return (hr == S_OK); +} // // diff --git a/src/coreclr/unwinder/amd64/unwinder.h b/src/coreclr/unwinder/amd64/unwinder.h index 1c714224f32e..1e9901689214 100644 --- a/src/coreclr/unwinder/amd64/unwinder.h +++ b/src/coreclr/unwinder/amd64/unwinder.h @@ -8,6 +8,14 @@ #include "baseunwinder.h" +#ifdef FEATURE_CDAC_UNWINDER +EXTERN_C BOOL amd64Unwind(void* pContext, + ReadFromTarget readFromTarget, + GetAllocatedBuffer getAllocatedBuffer, + GetStackWalkInfo getStackWalkInfo, + UnwinderFail unwinderFail, + void* callbackContext); +#endif // FEATURE_CDAC_UNWINDER //--------------------------------------------------------------------------------------- // diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index c7d04a70255f..ed4238c98a6b 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -4,7 +4,9 @@ // #include "stdafx.h" +#ifndef FEATURE_CDAC_UNWINDER #include "utilcode.h" +#endif // FEATURE_CDAC_UNWINDER #include "crosscomp.h" #include "unwinder.h" @@ -164,13 +166,25 @@ typedef struct _ARM64_VFP_STATE // Macros for accessing memory. These can be overridden if other code // (in particular the debugger) needs to use them. -#if !defined(DEBUGGER_UNWIND) +#if !defined(DEBUGGER_UNWIND) && !defined(FEATURE_CDAC_UNWINDER) #define MEMORY_READ_BYTE(params, addr) (*dac_cast(addr)) -#define MEMORY_READ_WORD(params, addr) (*dac_cast(addr)) +#define MEMORY_READ_WORD(params, addr) (*dac_cast(addr)) #define MEMORY_READ_DWORD(params, addr) (*dac_cast(addr)) #define MEMORY_READ_QWORD(params, addr) (*dac_cast(addr)) +#elif defined(FEATURE_CDAC_UNWINDER) +template +T cdacRead(uint64_t addr) +{ + T t; + t_pCallbacks->readFromTarget(addr, &t, sizeof(t), t_pCallbacks->callbackContext); + return t; +} +#define MEMORY_READ_BYTE(params, addr) (cdacRead(addr)) +#define MEMORY_READ_WORD(params, addr) (cdacRead(addr)) +#define MEMORY_READ_DWORD(params, addr) (cdacRead(addr)) +#define MEMORY_READ_QWORD(params, addr) (cdacRead(addr)) #endif // @@ -823,7 +837,7 @@ RtlpExpandCompactToFull ( // !sav_predec_done can't even happen. // - _ASSERTE(sav_predec_done); + UNWINDER_ASSERT(sav_predec_done); DBG_OP("save_lrpair\t(%s, %i)\n", int_reg_names[intreg], sav_slot * 8); emit_save_lrpair(&op_buffer, intreg, sav_slot * 8); @@ -1063,7 +1077,7 @@ Return Value: --*/ { - _ASSERTE(UnwindCode <= 0xFF); + UNWINDER_ASSERT(UnwindCode <= 0xFF); if (UnwindCode < 0xC0) { if (ARGUMENT_PRESENT(ScopeSize)) { @@ -1621,7 +1635,7 @@ Return Value: case 0xeb: // MSFT_OP_EC_CONTEXT: // NOTE: for .NET, the arm64ec context restoring is not implemented - _ASSERTE(FALSE); + UNWINDER_ASSERT(FALSE); return STATUS_UNSUCCESSFUL; case 0xec: // MSFT_OP_CLEAR_UNWOUND_TO_CALL @@ -2335,7 +2349,7 @@ Return Value: } // - // pac (11111100): function has pointer authentication + // pac (11111100): function has pointer authentication // else if (CurCode == 0xfc) { @@ -2574,7 +2588,7 @@ Return Value: UNREFERENCED_PARAMETER(HandlerType); - _ASSERTE((UnwindFlags & ~RTL_VIRTUAL_UNWIND_VALID_FLAGS_ARM64) == 0); + UNWINDER_ASSERT((UnwindFlags & ~RTL_VIRTUAL_UNWIND_VALID_FLAGS_ARM64) == 0); if (FunctionEntry == NULL) { @@ -2648,7 +2662,7 @@ Return Value: FunctionEntry = (PRUNTIME_FUNCTION)(ImageBase + FunctionEntry->UnwindData - 3); UnwindType = (FunctionEntry->UnwindData & 3); - _ASSERTE(UnwindType != 3); + UNWINDER_ASSERT(UnwindType != 3); ControlPcRva = FunctionEntry->BeginAddress; @@ -2759,6 +2773,7 @@ BOOL OOPStackUnwinderArm64::Unwind(T_CONTEXT * pContext) return TRUE; } +#ifdef DACCESS_COMPILE BOOL DacUnwindStackFrame(T_CONTEXT *pContext, T_KNONVOLATILE_CONTEXT_POINTERS* pContextPointers) { OOPStackUnwinderArm64 unwinder; @@ -2774,6 +2789,18 @@ BOOL DacUnwindStackFrame(T_CONTEXT *pContext, T_KNONVOLATILE_CONTEXT_POINTERS* p return res; } +#elif defined(FEATURE_CDAC_UNWINDER) +BOOL arm64Unwind(void* pContext, ReadFromTarget readFromTarget, GetAllocatedBuffer getAllocatedBuffer, GetStackWalkInfo getStackWalkInfo, UnwinderFail unwinderFail, void* callbackContext) +{ + CDACCallbacks callbacks { readFromTarget, getAllocatedBuffer, getStackWalkInfo, unwinderFail, callbackContext }; + t_pCallbacks = &callbacks; + OOPStackUnwinderArm64 unwinder; + BOOL res = unwinder.Unwind((T_CONTEXT*) pContext); + t_pCallbacks = nullptr; + + return res; +} +#endif // FEATURE_CDAC_UNWINDER #if defined(HOST_UNIX) diff --git a/src/coreclr/unwinder/arm64/unwinder.h b/src/coreclr/unwinder/arm64/unwinder.h index aa03c5a59fe9..d85fdf6a09ac 100644 --- a/src/coreclr/unwinder/arm64/unwinder.h +++ b/src/coreclr/unwinder/arm64/unwinder.h @@ -8,6 +8,13 @@ #include "baseunwinder.h" +#ifdef FEATURE_CDAC_UNWINDER +EXTERN_C BOOL arm64Unwind(void* pContext, ReadFromTarget readFromTarget, + GetAllocatedBuffer getAllocatedBuffer, + GetStackWalkInfo getStackWalkInfo, + UnwinderFail unwinderFail, + void* callbackContext); +#endif // FEATURE_CDAC_UNWINDER //--------------------------------------------------------------------------------------- // diff --git a/src/coreclr/unwinder/baseunwinder.cpp b/src/coreclr/unwinder/baseunwinder.cpp index b00c2aa11483..2f2fecc78340 100644 --- a/src/coreclr/unwinder/baseunwinder.cpp +++ b/src/coreclr/unwinder/baseunwinder.cpp @@ -6,9 +6,15 @@ #include "stdafx.h" #include "baseunwinder.h" +#ifndef FEATURE_CDAC_UNWINDER EXTERN_C void GetRuntimeStackWalkInfo(IN ULONG64 ControlPc, OUT UINT_PTR* pModuleBase, OUT UINT_PTR* pFuncEntry); +#endif // FEATURE_CDAC_UNWINDER + +#ifdef FEATURE_CDAC_UNWINDER +thread_local CDACCallbacks* t_pCallbacks; +#endif // FEATURE_CDAC_UNWINDER //--------------------------------------------------------------------------------------- // @@ -27,7 +33,11 @@ EXTERN_C void GetRuntimeStackWalkInfo(IN ULONG64 ControlPc, HRESULT OOPStackUnwinder::GetModuleBase( DWORD64 address, _Out_ PDWORD64 pdwBase) { +#ifndef FEATURE_CDAC_UNWINDER GetRuntimeStackWalkInfo(address, reinterpret_cast(pdwBase), NULL); +#else // FEATURE_CDAC_UNWINDER + t_pCallbacks->getStackWalkInfo(address, reinterpret_cast(pdwBase), NULL, t_pCallbacks->callbackContext); +#endif // FEATURE_CDAC_UNWINDER return ((*pdwBase == 0) ? E_FAIL : S_OK); } @@ -50,12 +60,15 @@ HRESULT OOPStackUnwinder::GetFunctionEntry( DWORD64 addres _Out_writes_(cbBuffer) PVOID pBuffer, DWORD cbBuffer) { +#ifndef FEATURE_CDAC_UNWINDER if (cbBuffer < sizeof(T_RUNTIME_FUNCTION)) { return E_INVALIDARG; } +#endif // FEATURE_CDAC_UNWINDER PVOID pFuncEntry = NULL; +#ifndef FEATURE_CDAC_UNWINDER GetRuntimeStackWalkInfo(address, NULL, reinterpret_cast(&pFuncEntry)); if (pFuncEntry == NULL) { @@ -64,4 +77,17 @@ HRESULT OOPStackUnwinder::GetFunctionEntry( DWORD64 addres memcpy(pBuffer, pFuncEntry, cbBuffer); return S_OK; +#else // FEATURE_CDAC_UNWINDER + t_pCallbacks->getStackWalkInfo(address, NULL, reinterpret_cast(&pFuncEntry), t_pCallbacks->callbackContext); + if (pFuncEntry == NULL) + { + return E_FAIL; + } + if (t_pCallbacks->readFromTarget((DWORD64)pFuncEntry, pBuffer, cbBuffer, t_pCallbacks->callbackContext) != S_OK) + { + return E_FAIL; + } + + return S_OK; +#endif } diff --git a/src/coreclr/unwinder/baseunwinder.h b/src/coreclr/unwinder/baseunwinder.h index 241dc8a7ddfb..f620bae94474 100644 --- a/src/coreclr/unwinder/baseunwinder.h +++ b/src/coreclr/unwinder/baseunwinder.h @@ -6,6 +6,46 @@ #ifndef __unwinder_h__ #define __unwinder_h__ +#ifdef FEATURE_CDAC_UNWINDER +using ReadFromTarget = LONG (*)(ULONG64 addr, PVOID pBuffer, LONG bufferSize, PVOID callbackContext); +using GetAllocatedBuffer = LONG (*)(LONG bufferSize, PVOID* ppBuffer, PVOID callbackContext); +using GetStackWalkInfo = VOID (*)(ULONG64 controlPC, UINT_PTR* pUnwindInfoBase, UINT_PTR* pFuncEntry, PVOID callbackContext); +using UnwinderFail = VOID (*)(); + +class CDACCallbacks +{ +public: + CDACCallbacks(ReadFromTarget readFromTarget, + GetAllocatedBuffer getAllocatedBuffer, + GetStackWalkInfo getStackWalkInfo, + UnwinderFail unwinderFail, + void* callbackContext) + : readFromTarget(readFromTarget), + getAllocatedBuffer(getAllocatedBuffer), + getStackWalkInfo(getStackWalkInfo), + unwinderFail(unwinderFail), + callbackContext(callbackContext) + { } + + ReadFromTarget readFromTarget; + GetAllocatedBuffer getAllocatedBuffer; + GetStackWalkInfo getStackWalkInfo; + UnwinderFail unwinderFail; + void* callbackContext; +}; + +// thread_local used to access cDAC callbacks outside of unwinder. +extern thread_local CDACCallbacks* t_pCallbacks; +#endif // FEATURE_CDAC_UNWINDER + +// Report failure in the unwinder if the condition is FALSE +#if defined(FEATURE_CDAC_UNWINDER) +#define UNWINDER_ASSERT(Condition) if (!(Condition)) t_pCallbacks->unwinderFail() +#elif defined(DACCESS_COMPILE) +#define UNWINDER_ASSERT(Condition) if (!(Condition)) DacError(CORDBG_E_TARGET_INCONSISTENT) +#else // !DACCESS_COMPILE AND !FEATURE_CDAC_UNWINDER +#define UNWINDER_ASSERT _ASSERTE +#endif //--------------------------------------------------------------------------------------- // @@ -14,7 +54,7 @@ // are actually borrowed from dbghelp.dll. (StackWalk64() is built on top of these classes.) We have ripped // out everything we don't need such as symbol lookup and various state, and keep just enough code to support // VirtualUnwind(). The managed debugging infrastructure can't call RtlVirtualUnwind() because it doesn't -// work from out-of-processr +// work from out-of-processor // // Notes: // To see what we have changed in the borrowed source, you can diff the original version and our version. diff --git a/src/coreclr/unwinder/i386/unwinder.cpp b/src/coreclr/unwinder/i386/unwinder.cpp index d8e7e7355681..4ad7cfeccc0f 100644 --- a/src/coreclr/unwinder/i386/unwinder.cpp +++ b/src/coreclr/unwinder/i386/unwinder.cpp @@ -13,9 +13,6 @@ BOOL OOPStackUnwinderX86::Unwind(T_CONTEXT* pContextRecord, T_KNONVOLATILE_CONTE FillRegDisplay(&rd, pContextRecord); - rd.SP = pContextRecord->Esp; - rd.PCTAddr = (UINT_PTR)&(pContextRecord->Eip); - if (pContextPointers) { rd.pCurrentContextPointers = pContextPointers; @@ -26,15 +23,14 @@ BOOL OOPStackUnwinderX86::Unwind(T_CONTEXT* pContextRecord, T_KNONVOLATILE_CONTE EECodeInfo codeInfo; codeInfo.Init((PCODE) ControlPc); - GCInfoToken gcInfoToken = codeInfo.GetGCInfoToken(); - hdrInfo hdrInfoBody; - DWORD hdrInfoSize = (DWORD)DecodeGCHdrInfo(gcInfoToken, codeInfo.GetRelOffset(), &hdrInfoBody); + hdrInfo *hdrInfoBody; + PTR_CBYTE table = codeInfo.DecodeGCHdrInfo(&hdrInfoBody); if (!UnwindStackFrameX86(&rd, PTR_CBYTE(codeInfo.GetSavedMethodCode()), codeInfo.GetRelOffset(), - &hdrInfoBody, - dac_cast(gcInfoToken.Info) + hdrInfoSize, + hdrInfoBody, + table, PTR_CBYTE(codeInfo.GetJitManager()->GetFuncletStartAddress(&codeInfo)), codeInfo.IsFunclet(), true)) @@ -52,7 +48,7 @@ BOOL OOPStackUnwinderX86::Unwind(T_CONTEXT* pContextRecord, T_KNONVOLATILE_CONTE ENUM_CALLEE_SAVED_REGISTERS(); #undef CALLEE_SAVED_REGISTER - pContextRecord->Esp = rd.SP - codeInfo.GetCodeManager()->GetStackParameterSize(&codeInfo); + pContextRecord->Esp = rd.SP; pContextRecord->Eip = rd.ControlPC; return TRUE; diff --git a/src/coreclr/unwinder/stdafx.h b/src/coreclr/unwinder/stdafx.h index 8decdc68562b..e0dc4fe44b38 100644 --- a/src/coreclr/unwinder/stdafx.h +++ b/src/coreclr/unwinder/stdafx.h @@ -10,10 +10,17 @@ #define USE_COM_CONTEXT_DEF +#ifndef FEATURE_CDAC_UNWINDER #include - #include #include +#else // FEATURE_CDAC_UNWINDER +#include +#include +#include +#include +#endif // FEATURE_CDAC_UNWINDER + #ifdef DACCESS_COMPILE #include #include diff --git a/src/coreclr/utilcode/CMakeLists.txt b/src/coreclr/utilcode/CMakeLists.txt index 06282d39189e..0f50b610d655 100644 --- a/src/coreclr/utilcode/CMakeLists.txt +++ b/src/coreclr/utilcode/CMakeLists.txt @@ -27,6 +27,11 @@ set(UTILCODE_COMMON_SOURCES guidfromname.cpp memorypool.cpp loaderheap.cpp + interleavedloaderheap.cpp + loaderheap_shared.cpp + explicitcontrolloaderheap.cpp + allocmemtracker.cpp + rangelist.cpp outstring.cpp ilformatter.cpp opinfo.cpp @@ -84,7 +89,9 @@ convert_to_absolute_path(UTILCODE_SOURCES ${UTILCODE_SOURCES}) convert_to_absolute_path(UTILCODE_DAC_SOURCES ${UTILCODE_DAC_SOURCES}) convert_to_absolute_path(UTILCODE_STATICNOHOST_SOURCES ${UTILCODE_STATICNOHOST_SOURCES}) +if(NOT CLR_CMAKE_TARGET_ARCH_WASM) add_library_clr(utilcode_dac STATIC ${UTILCODE_DAC_SOURCES}) +endif() add_library_clr(utilcode OBJECT ${UTILCODE_SOURCES}) add_library_clr(utilcodestaticnohost STATIC ${UTILCODE_STATICNOHOST_SOURCES}) @@ -92,9 +99,11 @@ target_link_libraries(utilcodestaticnohost PUBLIC coreclrminipal) if(CLR_CMAKE_HOST_UNIX) target_link_libraries(utilcodestaticnohost PUBLIC nativeresourcestring) - target_link_libraries(utilcode_dac PUBLIC nativeresourcestring) + if (NOT CLR_CMAKE_TARGET_ARCH_WASM) + target_link_libraries(utilcode_dac PUBLIC nativeresourcestring) + add_dependencies(utilcode_dac coreclrpal) + endif() target_link_libraries(utilcode INTERFACE nativeresourcestring) - add_dependencies(utilcode_dac coreclrpal) add_dependencies(utilcode coreclrpal) endif(CLR_CMAKE_HOST_UNIX) @@ -107,12 +116,14 @@ if(CLR_CMAKE_HOST_WIN32) link_natvis_sources_for_target(utilcode INTERFACE utilcode.natvis) endif(CLR_CMAKE_HOST_WIN32) -set_target_properties(utilcode_dac PROPERTIES DAC_COMPONENT TRUE) -target_compile_definitions(utilcode_dac PRIVATE SELF_NO_HOST) +if (NOT CLR_CMAKE_TARGET_ARCH_WASM) + set_target_properties(utilcode_dac PROPERTIES DAC_COMPONENT TRUE) + target_compile_definitions(utilcode_dac PRIVATE SELF_NO_HOST) + add_dependencies(utilcode_dac ${UTILCODE_DEPENDENCIES}) + target_precompile_headers(utilcode_dac PRIVATE [["stdafx.h"]]) + endif() target_compile_definitions(utilcodestaticnohost PRIVATE SELF_NO_HOST) -add_dependencies(utilcode_dac ${UTILCODE_DEPENDENCIES}) add_dependencies(utilcode ${UTILCODE_DEPENDENCIES}) add_dependencies(utilcodestaticnohost ${UTILCODE_DEPENDENCIES}) -target_precompile_headers(utilcode_dac PRIVATE [["stdafx.h"]]) target_precompile_headers(utilcode PRIVATE [["stdafx.h"]]) target_precompile_headers(utilcodestaticnohost PRIVATE [["stdafx.h"]]) diff --git a/src/coreclr/utilcode/allocmemtracker.cpp b/src/coreclr/utilcode/allocmemtracker.cpp new file mode 100644 index 000000000000..2ce0f3c2e021 --- /dev/null +++ b/src/coreclr/utilcode/allocmemtracker.cpp @@ -0,0 +1,173 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "stdafx.h" // Precompiled header key. +#include "loaderheap.h" +#include "ex.h" +#include "pedecoder.h" +#define DONOT_DEFINE_ETW_CALLBACK +#include "eventtracebase.h" + +#ifndef DACCESS_COMPILE + +AllocMemTracker::AllocMemTracker() +{ + CONTRACTL + { + NOTHROW; + FORBID_FAULT; + CANNOT_TAKE_LOCK; + } + CONTRACTL_END + + m_FirstBlock.m_pNext = NULL; + m_FirstBlock.m_nextFree = 0; + m_pFirstBlock = &m_FirstBlock; + + m_fReleased = FALSE; +} + +AllocMemTracker::~AllocMemTracker() +{ + CONTRACTL + { + NOTHROW; + FORBID_FAULT; + } + CONTRACTL_END + + if (!m_fReleased) + { + AllocMemTrackerBlock *pBlock = m_pFirstBlock; + while (pBlock) + { + // Do the loop in reverse - loaderheaps work best if + // we allocate and backout in LIFO order. + for (int i = pBlock->m_nextFree - 1; i >= 0; i--) + { + AllocMemTrackerNode *pNode = &(pBlock->m_Node[i]); + pNode->m_pHeap->RealBackoutMem(pNode->m_pMem + ,pNode->m_dwRequestedSize +#ifdef _DEBUG + ,__FILE__ + ,__LINE__ + ,pNode->m_szAllocFile + ,pNode->m_allocLineNum +#endif + ); + + } + + pBlock = pBlock->m_pNext; + } + } + +// We have seen evidence of memory corruption in this data structure. +// https://github.com/dotnet/runtime/issues/54469 +// m_pFirstBlock is intended to be a linked list terminating with +// &m_FirstBlock but we are finding a nullptr in the list before +// that point. In order to investigate further we need to observe +// the corrupted memory block(s) before they are deleted below +#ifdef _DEBUG + AllocMemTrackerBlock* pDebugBlock = m_pFirstBlock; + for (int i = 0; pDebugBlock != &m_FirstBlock; i++) + { + CONSISTENCY_CHECK_MSGF(i < 10000, ("Linked list is much longer than expected, memory corruption likely\n")); + CONSISTENCY_CHECK_MSGF(pDebugBlock != nullptr, ("Linked list pointer == NULL, memory corruption likely\n")); + pDebugBlock = pDebugBlock->m_pNext; + } +#endif + + AllocMemTrackerBlock *pBlock = m_pFirstBlock; + while (pBlock != &m_FirstBlock) + { + AllocMemTrackerBlock *pNext = pBlock->m_pNext; + delete pBlock; + pBlock = pNext; + } + + INDEBUG(memset(this, 0xcc, sizeof(*this));) +} + +void *AllocMemTracker::Track(TaggedMemAllocPtr tmap) +{ + CONTRACTL + { + THROWS; + INJECT_FAULT(ThrowOutOfMemory();); + } + CONTRACTL_END + + void *pv = Track_NoThrow(tmap); + if (!pv) + { + ThrowOutOfMemory(); + } + return pv; +} + +void *AllocMemTracker::Track_NoThrow(TaggedMemAllocPtr tmap) +{ + CONTRACTL + { + NOTHROW; + INJECT_FAULT(return NULL;); + } + CONTRACTL_END + + // Calling Track() after calling SuppressRelease() is almost certainly a bug. You're supposed to call SuppressRelease() only after you're + // sure no subsequent failure will force you to backout the memory. + _ASSERTE( (!m_fReleased) && "You've already called SuppressRelease on this AllocMemTracker which implies you've passed your point of no failure. Why are you still doing allocations?"); + + + if (tmap.m_pMem != NULL) + { + AllocMemHolder holder(tmap); // If anything goes wrong in here, this holder will backout the allocation for the caller. + if (m_fReleased) + { + holder.SuppressRelease(); + } + AllocMemTrackerBlock *pBlock = m_pFirstBlock; + if (pBlock->m_nextFree == kAllocMemTrackerBlockSize) + { + AllocMemTrackerBlock *pNewBlock = new (nothrow) AllocMemTrackerBlock; + if (!pNewBlock) + { + return NULL; + } + + pNewBlock->m_pNext = m_pFirstBlock; + pNewBlock->m_nextFree = 0; + + m_pFirstBlock = pNewBlock; + + pBlock = pNewBlock; + } + + // From here on, we can't fail + pBlock->m_Node[pBlock->m_nextFree].m_pHeap = tmap.m_pHeap; + pBlock->m_Node[pBlock->m_nextFree].m_pMem = tmap.m_pMem; + pBlock->m_Node[pBlock->m_nextFree].m_dwRequestedSize = tmap.m_dwRequestedSize; +#ifdef _DEBUG + pBlock->m_Node[pBlock->m_nextFree].m_szAllocFile = tmap.m_szFile; + pBlock->m_Node[pBlock->m_nextFree].m_allocLineNum = tmap.m_lineNum; +#endif + + pBlock->m_nextFree++; + + holder.SuppressRelease(); + + + } + return (void *)tmap; +} + + +void AllocMemTracker::SuppressRelease() +{ + LIMITED_METHOD_CONTRACT; + + m_fReleased = TRUE; +} + +#endif //#ifndef DACCESS_COMPILE diff --git a/src/coreclr/utilcode/arraylist.cpp b/src/coreclr/utilcode/arraylist.cpp index ecdf1e314488..f7e2d63530c5 100644 --- a/src/coreclr/utilcode/arraylist.cpp +++ b/src/coreclr/utilcode/arraylist.cpp @@ -51,7 +51,7 @@ PTR_VOID * ArrayListBase::GetPtr(DWORD index) const ArrayListBlock *b = (ArrayListBlock*)&m_firstBlock; while (index >= b->m_blockSize) { - PREFIX_ASSUME(b->m_next != NULL); + _ASSERTE(b->m_next != NULL); index -= b->m_blockSize; b = b->m_next; } diff --git a/src/coreclr/utilcode/check.cpp b/src/coreclr/utilcode/check.cpp index e9dfeaf3fd15..af0ec7c0a935 100644 --- a/src/coreclr/utilcode/check.cpp +++ b/src/coreclr/utilcode/check.cpp @@ -36,16 +36,12 @@ BOOL BaseContract::s_alwaysEnforceContracts = 1; #define SPECIALIZE_CONTRACT_VIOLATION_HOLDER(mask) \ template<> void ContractViolationHolder::Enter() \ { \ - SCAN_SCOPE_BEGIN; \ - ANNOTATION_VIOLATION(mask); \ EnterInternal(mask); \ }; #define SPECIALIZE_AUTO_CLEANUP_CONTRACT_VIOLATION_HOLDER(mask) \ template<> AutoCleanupContractViolationHolder::AutoCleanupContractViolationHolder(BOOL fEnterViolation) \ { \ - SCAN_SCOPE_BEGIN; \ - ANNOTATION_VIOLATION(mask); \ EnterInternal(fEnterViolation ? mask : 0); \ }; diff --git a/src/coreclr/utilcode/cycletimer.cpp b/src/coreclr/utilcode/cycletimer.cpp index 946c72b278e2..10118bfa83b6 100644 --- a/src/coreclr/utilcode/cycletimer.cpp +++ b/src/coreclr/utilcode/cycletimer.cpp @@ -8,6 +8,7 @@ #include "winwrap.h" #include "assert.h" #include "utilcode.h" +#include "minipal/time.h" bool CycleTimer::GetThreadCyclesS(uint64_t* cycles) { @@ -26,25 +27,21 @@ double CycleTimer::CyclesPerSecond() // Windows *does* allow you to translate QueryPerformanceCounter counts into time, // however. So we'll assume that the clock speed stayed constant, and measure both the // QPC counts and cycles of a short loop, to get a conversion factor. - LARGE_INTEGER lpFrequency; - if (!QueryPerformanceFrequency(&lpFrequency)) return 0.0; - // Otherwise... - LARGE_INTEGER qpcStart; + int64_t lpFrequency = minipal_hires_tick_frequency(); + int64_t qpcStart = minipal_hires_ticks(); uint64_t cycleStart; - if (!QueryPerformanceCounter(&qpcStart)) return 0.0; if (!GetThreadCyclesS(&cycleStart)) return 0.0; volatile int sum = 0; for (int k = 0; k < SampleLoopSize; k++) { sum += k; } - LARGE_INTEGER qpcEnd; - if (!QueryPerformanceCounter(&qpcEnd)) return 0.0; + int64_t qpcEnd = minipal_hires_ticks(); uint64_t cycleEnd; if (!GetThreadCyclesS(&cycleEnd)) return 0.0; - double qpcTicks = ((double)qpcEnd.QuadPart) - ((double)qpcStart.QuadPart); - double secs = (qpcTicks / ((double)lpFrequency.QuadPart)); + double qpcTicks = ((double)qpcEnd) - ((double)qpcStart); + double secs = (qpcTicks / ((double)lpFrequency)); double cycles = ((double)cycleEnd) - ((double)cycleStart); return cycles / secs; } diff --git a/src/coreclr/utilcode/debug.cpp b/src/coreclr/utilcode/debug.cpp index 9e56bebac348..d91a27294ee3 100644 --- a/src/coreclr/utilcode/debug.cpp +++ b/src/coreclr/utilcode/debug.cpp @@ -38,8 +38,8 @@ static void GetExecutableFileNameUtf8(SString& value) CONTRACTL_END; SString tmp; - WCHAR * pCharBuf = tmp.OpenUnicodeBuffer(_MAX_PATH); - DWORD numChars = GetModuleFileNameW(0 /* Get current executable */, pCharBuf, _MAX_PATH); + WCHAR * pCharBuf = tmp.OpenUnicodeBuffer(MAX_PATH); + DWORD numChars = GetModuleFileNameW(0 /* Get current executable */, pCharBuf, MAX_PATH); tmp.CloseBuffer(numChars); tmp.ConvertToUTF8(value); @@ -202,7 +202,7 @@ HRESULT _OutOfMemory(LPCSTR szFile, int iLine) STATIC_CONTRACT_GC_NOTRIGGER; STATIC_CONTRACT_DEBUG_ONLY; - printf("WARNING: Out of memory condition being issued from: %s, line %d\n", szFile, iLine); + minipal_log_print(minipal_log_flags_warning, "WARNING: Out of memory condition being issued from: %s, line %d\n", szFile, iLine); return (E_OUTOFMEMORY); } @@ -257,7 +257,7 @@ bool _DbgBreakCheck( if (formattedMessages) { OutputDebugStringUtf8(formatBuffer); - fprintf(stderr, "%s", formatBuffer); + minipal_log_print_error("%s", formatBuffer); } else { @@ -268,12 +268,7 @@ bool _DbgBreakCheck( OutputDebugStringUtf8("\n"); OutputDebugStringUtf8(szExpr); OutputDebugStringUtf8("\n"); - printf("%s", szLowMemoryAssertMessage); - printf("\n"); - printf("%s", szFile); - printf("\n"); - printf("%s", szExpr); - printf("\n"); + minipal_log_print_error("%s\n%s\n%s\n", szLowMemoryAssertMessage, szFile, szExpr); } LogAssert(szFile, iLine, szExpr); @@ -494,7 +489,7 @@ void DECLSPEC_NORETURN __FreeBuildAssertFail(const char *szFile, int iLine, cons OutputDebugStringUtf8(buffer.GetUTF8()); // Write out the error to the console - printf("%s", buffer.GetUTF8()); + minipal_log_print_error("%s", buffer.GetUTF8()); // Log to the stress log. Note that we can't include the szExpr b/c that // may not be a string literal (particularly for formatt-able asserts). diff --git a/src/coreclr/utilcode/ex.cpp b/src/coreclr/utilcode/ex.cpp index 986f9b95d134..c1db6294af76 100644 --- a/src/coreclr/utilcode/ex.cpp +++ b/src/coreclr/utilcode/ex.cpp @@ -226,10 +226,6 @@ BOOL Exception::IsPreallocatedOOMException() } //------------------------------------------------------------------------------ -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif LPCSTR Exception::GetHRSymbolicName(HRESULT hr) { LIMITED_METHOD_CONTRACT; @@ -778,10 +774,6 @@ LPCSTR Exception::GetHRSymbolicName(HRESULT hr) return NULL; } } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - // --------------------------------------------------------------------------- // HRException class. Implements exception API for exceptions from HRESULTS diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp index eb20b6ab4668..024237707223 100644 --- a/src/coreclr/utilcode/executableallocator.cpp +++ b/src/coreclr/utilcode/executableallocator.cpp @@ -94,27 +94,27 @@ void ExecutableAllocator::DumpHolderUsage() LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); - fprintf(stderr, "Map time with lock sum: %lldms\n", g_mapTimeWithLockSum / (freq.QuadPart / 1000)); - fprintf(stderr, "Map time sum: %lldms\n", g_mapTimeSum / (freq.QuadPart / 1000)); - fprintf(stderr, "Map find RX time sum: %lldms\n", g_mapFindRXTimeSum / (freq.QuadPart / 1000)); - fprintf(stderr, "Map create time sum: %lldms\n", g_mapCreateTimeSum / (freq.QuadPart / 1000)); - fprintf(stderr, "Unmap time with lock sum: %lldms\n", g_unmapTimeWithLockSum / (freq.QuadPart / 1000)); - fprintf(stderr, "Unmap time sum: %lldms\n", g_unmapTimeSum / (freq.QuadPart / 1000)); + minipal_log_print_info("Map time with lock sum: %lldms\n", g_mapTimeWithLockSum / (freq.QuadPart / 1000)); + minipal_log_print_info("Map time sum: %lldms\n", g_mapTimeSum / (freq.QuadPart / 1000)); + minipal_log_print_info("Map find RX time sum: %lldms\n", g_mapFindRXTimeSum / (freq.QuadPart / 1000)); + minipal_log_print_info("Map create time sum: %lldms\n", g_mapCreateTimeSum / (freq.QuadPart / 1000)); + minipal_log_print_info("Unmap time with lock sum: %lldms\n", g_unmapTimeWithLockSum / (freq.QuadPart / 1000)); + minipal_log_print_info("Unmap time sum: %lldms\n", g_unmapTimeSum / (freq.QuadPart / 1000)); - fprintf(stderr, "Reserve count: %lld\n", g_reserveCount); - fprintf(stderr, "Release count: %lld\n", g_releaseCount); + minipal_log_print_info("Reserve count: %lld\n", g_reserveCount); + minipal_log_print_info("Release count: %lld\n", g_releaseCount); - fprintf(stderr, "g_MapRW_Calls: %lld\n", g_MapRW_Calls); - fprintf(stderr, "g_MapRW_CallsWithCacheMiss: %lld\n", g_MapRW_CallsWithCacheMiss); - fprintf(stderr, "g_MapRW_LinkedListWalkDepth: %lld\n", g_MapRW_LinkedListWalkDepth); - fprintf(stderr, "g_MapRW_LinkedListAverageDepth: %f\n", (double)g_MapRW_LinkedListWalkDepth/(double)g_MapRW_CallsWithCacheMiss); - fprintf(stderr, "g_LinkedListTotalDepth: %lld\n", g_LinkedListTotalDepth); + minipal_log_print_info("g_MapRW_Calls: %lld\n", g_MapRW_Calls); + minipal_log_print_info("g_MapRW_CallsWithCacheMiss: %lld\n", g_MapRW_CallsWithCacheMiss); + minipal_log_print_info("g_MapRW_LinkedListWalkDepth: %lld\n", g_MapRW_LinkedListWalkDepth); + minipal_log_print_info("g_MapRW_LinkedListAverageDepth: %f\n", (double)g_MapRW_LinkedListWalkDepth/(double)g_MapRW_CallsWithCacheMiss); + minipal_log_print_info("g_LinkedListTotalDepth: %lld\n", g_LinkedListTotalDepth); - fprintf(stderr, "ExecutableWriterHolder usage:\n"); + minipal_log_print_info("ExecutableWriterHolder usage:\n"); for (int i = 0; i < s_logMaxIndex; i++) { - fprintf(stderr, "Count: %d at %s:%d in %s\n", s_usageLog[i].count, s_usageLog[i].source, s_usageLog[i].line, s_usageLog[i].function); + minipal_log_print_info("Count: %d at %s:%d in %s\n", s_usageLog[i].count, s_usageLog[i].source, s_usageLog[i].line, s_usageLog[i].function); } } @@ -251,7 +251,7 @@ HRESULT ExecutableAllocator::StaticInitialize(FatalErrorHandler fatalErrorHandle { if ((customCacheSize > ARRAY_SIZE(m_cachedMapping)) || (customCacheSize <= 0)) { - printf("Invalid value in 'EXECUTABLE_ALLOCATOR_CACHE_SIZE' environment variable'\n"); + minipal_log_print_error("Invalid value in 'EXECUTABLE_ALLOCATOR_CACHE_SIZE' environment variable'\n"); return E_FAIL; } @@ -503,6 +503,11 @@ void* ExecutableAllocator::Commit(void* pStart, size_t size, bool isExecutable) } void ExecutableAllocator::Release(void* pRX) +{ + ReleaseWorker(pRX, false /* this is the standard Release of normally allocated memory */); +} + +void ExecutableAllocator::ReleaseWorker(void* pRX, bool releaseTemplate) { LIMITED_METHOD_CONTRACT; @@ -548,9 +553,19 @@ void ExecutableAllocator::Release(void* pRX) cachedMappingThatOverlaps = FindOverlappingCachedMapping(pBlock); } - if (!VMToOSInterface::ReleaseDoubleMappedMemory(m_doubleMemoryMapperHandle, pRX, pBlock->offset, pBlock->size)) + if (releaseTemplate) { - g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the double mapped memory failed")); + if (!VMToOSInterface::FreeThunksFromTemplate(pRX, pBlock->size / 2)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the template mapped memory failed")); + } + } + else + { + if (!VMToOSInterface::ReleaseDoubleMappedMemory(m_doubleMemoryMapperHandle, pRX, pBlock->offset, pBlock->size)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the double mapped memory failed")); + } } // Put the released block into the free block list pBlock->baseRX = NULL; @@ -962,3 +977,60 @@ void ExecutableAllocator::UnmapRW(void* pRW) g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed")); } } + +void* ExecutableAllocator::AllocateThunksFromTemplate(void *pTemplate, size_t templateSize) +{ + if (IsDoubleMappingEnabled() && VMToOSInterface::AllocateThunksFromTemplateRespectsStartAddress()) + { + CRITSEC_Holder csh(m_CriticalSection); + + bool isFreeBlock; + BlockRX* block = AllocateBlock(templateSize * 2, &isFreeBlock); + if (block == NULL) + { + return NULL; + } + + void* result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, templateSize * 2, 0, 0); + + if (result != NULL) + { + block->baseRX = result; + AddRXBlock(block); + } + else + { + BackoutBlock(block, isFreeBlock); + } + + void *pTemplateAddressAllocated = VMToOSInterface::AllocateThunksFromTemplate(pTemplate, templateSize, block->baseRX); + + if (pTemplateAddressAllocated == NULL) + { + ReleaseWorker(block->baseRX, false); + } + + return pTemplateAddressAllocated; + } + else + { + return VMToOSInterface::AllocateThunksFromTemplate(pTemplate, templateSize, NULL); + } +} + +void ExecutableAllocator::FreeThunksFromTemplate(void *pThunks, size_t templateSize) +{ + if (IsDoubleMappingEnabled() && VMToOSInterface::AllocateThunksFromTemplateRespectsStartAddress()) + { + ReleaseWorker(pThunks, true /* This is a release of template allocated memory */); + } + else + { + VMToOSInterface::FreeThunksFromTemplate(pThunks, templateSize); + } +} + +void* ExecutableAllocator::CreateTemplate(void* templateInImage, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)) +{ + return VMToOSInterface::CreateTemplate(templateInImage, templateSize, codePageGenerator); +} diff --git a/src/coreclr/utilcode/explicitcontrolloaderheap.cpp b/src/coreclr/utilcode/explicitcontrolloaderheap.cpp new file mode 100644 index 000000000000..a6adb7da2c37 --- /dev/null +++ b/src/coreclr/utilcode/explicitcontrolloaderheap.cpp @@ -0,0 +1,337 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "stdafx.h" // Precompiled header key. +#include "loaderheap.h" +#include "loaderheap_shared.h" +#include "ex.h" +#include "pedecoder.h" +#define DONOT_DEFINE_ETW_CALLBACK +#include "eventtracebase.h" + +#ifndef DACCESS_COMPILE + +INDEBUG(DWORD ExplicitControlLoaderHeap::s_dwNumInstancesOfLoaderHeaps = 0;) + +namespace +{ +#if !defined(SELF_NO_HOST) // ETW available only in the runtime + inline void EtwAllocRequest(ExplicitControlLoaderHeap * const pHeap, void* ptr, size_t dwSize) + { + FireEtwAllocRequest(pHeap, ptr, static_cast(dwSize), 0, 0, GetClrInstanceId()); + } +#else +#define EtwAllocRequest(pHeap, ptr, dwSize) ((void)0) +#endif // SELF_NO_HOST +} +#endif // DACCESS_COMPILE + +size_t ExplicitControlLoaderHeap::AllocMem_TotalSize(size_t dwRequestedSize) +{ + LIMITED_METHOD_CONTRACT; + + size_t dwSize = dwRequestedSize; + +#ifdef _DEBUG + dwSize += LOADER_HEAP_DEBUG_BOUNDARY; + dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); +#endif + + dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); + + return dwSize; +} + +//===================================================================================== +// UnlockedLoaderHeap methods +//===================================================================================== + +#ifndef DACCESS_COMPILE +ExplicitControlLoaderHeap::ExplicitControlLoaderHeap(bool fMakeExecutable) : + m_pAllocPtr(NULL), + m_dwTotalAlloc(0), + m_fExecutableHeap(fMakeExecutable) +{ + CONTRACTL + { + CONSTRUCTOR_CHECK; + NOTHROW; + FORBID_FAULT; + } + CONTRACTL_END; + + m_pPtrToEndOfCommittedRegion = NULL; + m_pEndReservedRegion = NULL; + m_pAllocPtr = NULL; + + m_dwCommitBlockSize = GetOsPageSize(); + +#ifdef _DEBUG + m_dwDebugWastedBytes = 0; + s_dwNumInstancesOfLoaderHeaps++; +#endif +} + +// ~LoaderHeap is not synchronised (obviously) +ExplicitControlLoaderHeap::~ExplicitControlLoaderHeap() +{ + CONTRACTL + { + DESTRUCTOR_CHECK; + NOTHROW; + FORBID_FAULT; + } + CONTRACTL_END + + LoaderHeapBlock *pSearch, *pNext; + + for (pSearch = m_pFirstBlock; pSearch; pSearch = pNext) + { + void * pVirtualAddress; + BOOL fReleaseMemory; + + pVirtualAddress = pSearch->pVirtualAddress; + fReleaseMemory = pSearch->m_fReleaseMemory; + pNext = pSearch->pNext; + + if (fReleaseMemory) + { + ExecutableAllocator::Instance()->Release(pVirtualAddress); + } + + delete pSearch; + } + + if (m_reservedBlock.m_fReleaseMemory) + { + ExecutableAllocator::Instance()->Release(m_reservedBlock.pVirtualAddress); + } + + INDEBUG(s_dwNumInstancesOfLoaderHeaps --;) +} + +void ExplicitControlLoaderHeap::SetReservedRegion(BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, BOOL fReleaseMemory) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(m_reservedBlock.pVirtualAddress == NULL); + m_reservedBlock.Init((void *)dwReservedRegionAddress, dwReservedRegionSize, fReleaseMemory); +} + +#endif // #ifndef DACCESS_COMPILE + +size_t ExplicitControlLoaderHeap::GetBytesAvailCommittedRegion() +{ + LIMITED_METHOD_CONTRACT; + + if (m_pAllocPtr < m_pPtrToEndOfCommittedRegion) + return (size_t)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr); + else + return 0; +} + +size_t ExplicitControlLoaderHeap::GetBytesAvailReservedRegion() +{ + LIMITED_METHOD_CONTRACT; + + if (m_pAllocPtr < m_pEndReservedRegion) + return (size_t)(m_pEndReservedRegion- m_pAllocPtr); + else + return 0; +} + +#ifndef DACCESS_COMPILE + +BOOL ExplicitControlLoaderHeap::CommitPages(void* pData, size_t dwSizeToCommitPart) +{ + // Commit first set of pages, since it will contain the LoaderHeapBlock + void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommitPart, m_fExecutableHeap ? TRUE : FALSE); + if (pTemp == NULL) + { + return FALSE; + } + + return TRUE; +} + +BOOL ExplicitControlLoaderHeap::ReservePages(size_t dwSizeToCommit) +{ + CONTRACTL + { + INSTANCE_CHECK; + NOTHROW; + INJECT_FAULT(return FALSE;); + } + CONTRACTL_END; + + size_t dwSizeToReserve; + + // Round to page size again + dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); + + ReservedMemoryHolder pData = NULL; + BOOL fReleaseMemory = TRUE; + + // We were provided with a reserved memory block at instance creation time, so use it if it's big enough. + if (m_reservedBlock.pVirtualAddress != NULL && + m_reservedBlock.dwVirtualSize >= dwSizeToCommit) + { + // Get the info out of the block. + pData = (PTR_BYTE)m_reservedBlock.pVirtualAddress; + dwSizeToReserve = m_reservedBlock.dwVirtualSize; + fReleaseMemory = m_reservedBlock.m_fReleaseMemory; + + // Zero the block so this memory doesn't get used again. + m_reservedBlock.Init(NULL, 0, FALSE); + } + // The caller is asking us to allocate the memory + else + { + return FALSE; + } + + // When the user passes in the reserved memory, the commit size is 0 and is adjusted to be the sizeof(LoaderHeap). + // If for some reason this is not true then we just catch this via an assertion and the dev who changed code + // would have to add logic here to handle the case when committed mem is more than the reserved mem. One option + // could be to leak the users memory and reserve+commit a new block, Another option would be to fail the alloc mem + // and notify the user to provide more reserved mem. + _ASSERTE((dwSizeToCommit <= dwSizeToReserve) && "Loaderheap tried to commit more memory than reserved by user"); + + if (!fReleaseMemory) + { + pData.SuppressRelease(); + } + + size_t dwSizeToCommitPart = dwSizeToCommit; + + if (!CommitPages(pData, dwSizeToCommitPart)) + { + return FALSE; + } + + NewHolder pNewBlock = new (nothrow) LoaderHeapBlock; + if (pNewBlock == NULL) + { + return FALSE; + } + + m_dwTotalAlloc += dwSizeToCommit; + + pNewBlock.SuppressRelease(); + pData.SuppressRelease(); + + pNewBlock->dwVirtualSize = dwSizeToReserve; + pNewBlock->pVirtualAddress = pData; + pNewBlock->pNext = m_pFirstBlock; + pNewBlock->m_fReleaseMemory = fReleaseMemory; + + // Add to the linked list + m_pFirstBlock = pNewBlock; + + m_pPtrToEndOfCommittedRegion = (BYTE *) (pData) + (dwSizeToCommit); \ + m_pAllocPtr = (BYTE *) (pData); \ + m_pEndReservedRegion = (BYTE *) (pData) + (dwSizeToReserve); + + return TRUE; +} + +// Get some more committed pages - either commit some more in the current reserved region, or, if it +// has run out, reserve another set of pages. +// Returns: FALSE if we can't get any more memory +// TRUE: We can/did get some more memory - check to see if it's sufficient for +// the caller's needs (see UnlockedAllocMem for example of use) +BOOL ExplicitControlLoaderHeap::GetMoreCommittedPages(size_t dwMinSize) +{ + CONTRACTL + { + INSTANCE_CHECK; + NOTHROW; + INJECT_FAULT(return FALSE;); + } + CONTRACTL_END; + + // If we have memory we can use, what are you doing here! + _ASSERTE(dwMinSize > (SIZE_T)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr)); + + // Does this fit in the reserved region? + if (dwMinSize <= (size_t)(m_pEndReservedRegion - m_pAllocPtr)) + { + SIZE_T dwSizeToCommit; + + dwSizeToCommit = (m_pAllocPtr + dwMinSize) - m_pPtrToEndOfCommittedRegion; + + size_t unusedRemainder = (size_t)((BYTE*)m_pPtrToEndOfCommittedRegion - m_pAllocPtr); + + PTR_BYTE pCommitBaseAddress = m_pPtrToEndOfCommittedRegion; + + if (dwSizeToCommit < m_dwCommitBlockSize) + dwSizeToCommit = min((SIZE_T)(m_pEndReservedRegion - m_pPtrToEndOfCommittedRegion), (SIZE_T)m_dwCommitBlockSize); + + // Round to page size + dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); + + size_t dwSizeToCommitPart = dwSizeToCommit; + + if (!CommitPages(pCommitBaseAddress, dwSizeToCommitPart)) + { + return FALSE; + } + + m_pPtrToEndOfCommittedRegion += dwSizeToCommit; + m_dwTotalAlloc += dwSizeToCommit; + + return TRUE; + } + + // Need to allocate a new set of reserved pages that will be located likely at a nonconsecutive virtual address. + // Waste the unused bytes + INDEBUG(m_dwDebugWastedBytes += (size_t)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr);) + + // Note, there are unused reserved pages at end of current region -can't do much about that + // Provide dwMinSize here since UnlockedReservePages will round up the commit size again + // after adding in the size of the LoaderHeapBlock header. + return ReservePages(dwMinSize); +} + +void *ExplicitControlLoaderHeap::AllocMemForCode_NoThrow(size_t dwHeaderSize, size_t dwCodeSize, DWORD dwCodeAlignment, size_t dwReserveForJumpStubs) +{ + CONTRACT(void*) + { + INSTANCE_CHECK; + NOTHROW; + INJECT_FAULT(CONTRACT_RETURN NULL;); + PRECONDITION(0 == (dwCodeAlignment & (dwCodeAlignment - 1))); // require power of 2 + POSTCONDITION(CheckPointer(RETVAL, NULL_OK)); + } + CONTRACT_END; + + INCONTRACT(_ASSERTE(!ARE_FAULTS_FORBIDDEN())); + + // We don't know how much "extra" we need to satisfy the alignment until we know + // which address will be handed out which in turn we don't know because we don't + // know whether the allocation will fit within the current reserved range. + // + // Thus, we'll request as much heap growth as is needed for the worst case (we request an extra dwCodeAlignment - 1 bytes) + + S_SIZE_T cbAllocSize = S_SIZE_T(dwHeaderSize) + S_SIZE_T(dwCodeSize) + S_SIZE_T(dwCodeAlignment - 1) + S_SIZE_T(dwReserveForJumpStubs); + if( cbAllocSize.IsOverflow() ) + { + RETURN NULL; + } + + if (cbAllocSize.Value() > GetBytesAvailCommittedRegion()) + { + if (GetMoreCommittedPages(cbAllocSize.Value()) == FALSE) + { + RETURN NULL; + } + } + + BYTE *pResult = (BYTE *)ALIGN_UP(m_pAllocPtr + dwHeaderSize, dwCodeAlignment); + EtwAllocRequest(this, pResult, (pResult + dwCodeSize) - m_pAllocPtr); + m_pAllocPtr = pResult + dwCodeSize; + + RETURN pResult; +} + + +#endif // #ifndef DACCESS_COMPILE diff --git a/src/coreclr/utilcode/format1.cpp b/src/coreclr/utilcode/format1.cpp index f8ef94406bfb..306248465e76 100644 --- a/src/coreclr/utilcode/format1.cpp +++ b/src/coreclr/utilcode/format1.cpp @@ -61,15 +61,13 @@ COR_ILMETHOD_DECODER::COR_ILMETHOD_DECODER( fErrorInInit = true; Code = 0; SetLocalVarSigTok(0); - if (wbStatus != NULL) - { - *wbStatus = FORMAT_ERROR; - } } PAL_ENDTRY if (fErrorInInit) { + if (wbStatus != NULL) + *wbStatus = FORMAT_ERROR; return; } diff --git a/src/coreclr/utilcode/hostimpl.cpp b/src/coreclr/utilcode/hostimpl.cpp index 4ba20554c824..e463af9f705a 100644 --- a/src/coreclr/utilcode/hostimpl.cpp +++ b/src/coreclr/utilcode/hostimpl.cpp @@ -3,6 +3,8 @@ #include "stdafx.h" +#include + #include "mscoree.h" #include "clrinternal.h" #include "clrhost.h" @@ -12,28 +14,28 @@ thread_local size_t t_ThreadType; CRITSEC_COOKIE ClrCreateCriticalSection(CrstType crstType, CrstFlags flags) { - CRITICAL_SECTION *cs = (CRITICAL_SECTION*)malloc(sizeof(CRITICAL_SECTION)); - InitializeCriticalSection(cs); - return (CRITSEC_COOKIE)cs; + minipal_mutex* mt = (minipal_mutex*)malloc(sizeof(minipal_mutex)); + minipal_mutex_init(mt); + return (CRITSEC_COOKIE)mt; } void ClrDeleteCriticalSection(CRITSEC_COOKIE cookie) { _ASSERTE(cookie); - DeleteCriticalSection((CRITICAL_SECTION*)cookie); + minipal_mutex_destroy((minipal_mutex*)cookie); free(cookie); } void ClrEnterCriticalSection(CRITSEC_COOKIE cookie) { _ASSERTE(cookie); - EnterCriticalSection((CRITICAL_SECTION*)cookie); + minipal_mutex_enter((minipal_mutex*)cookie); } void ClrLeaveCriticalSection(CRITSEC_COOKIE cookie) { _ASSERTE(cookie); - LeaveCriticalSection((CRITICAL_SECTION*)cookie); + minipal_mutex_leave((minipal_mutex*)cookie); } DWORD ClrSleepEx(DWORD dwMilliseconds, BOOL bAlertable) diff --git a/src/coreclr/utilcode/ilformatter.cpp b/src/coreclr/utilcode/ilformatter.cpp index 086183250673..7cca3f25b695 100644 --- a/src/coreclr/utilcode/ilformatter.cpp +++ b/src/coreclr/utilcode/ilformatter.cpp @@ -290,10 +290,6 @@ void ILFormatter::formatInstrArgs(OpInfo op, OpArgsVal arg, OutString* out, size } } -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif /***************************************************************************/ const BYTE* ILFormatter::formatStatement(const BYTE* instrPtr, OutString* out) { @@ -811,8 +807,3 @@ const BYTE* ILFormatter::formatStatement(const BYTE* instrPtr, OutString* out) { } return(instrPtr); } -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - - diff --git a/src/coreclr/utilcode/interleavedloaderheap.cpp b/src/coreclr/utilcode/interleavedloaderheap.cpp new file mode 100644 index 000000000000..082e337caebd --- /dev/null +++ b/src/coreclr/utilcode/interleavedloaderheap.cpp @@ -0,0 +1,550 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "stdafx.h" // Precompiled header key. +#include "loaderheap.h" +#include "loaderheap_shared.h" +#include "ex.h" +#include "pedecoder.h" +#define DONOT_DEFINE_ETW_CALLBACK +#include "eventtracebase.h" + +#ifndef DACCESS_COMPILE + +namespace +{ +#if !defined(SELF_NO_HOST) // ETW available only in the runtime + inline void EtwAllocRequest(UnlockedInterleavedLoaderHeap * const pHeap, void* ptr, size_t dwSize) + { + FireEtwAllocRequest(pHeap, ptr, static_cast(dwSize), 0, 0, GetClrInstanceId()); + } +#else +#define EtwAllocRequest(pHeap, ptr, dwSize) ((void)0) +#endif // SELF_NO_HOST +} + +#endif // #ifndef DACCESS_COMPILE + +//===================================================================================== +// UnlockedInterleavedLoaderHeap methods +//===================================================================================== + +#ifndef DACCESS_COMPILE + +UnlockedInterleavedLoaderHeap::UnlockedInterleavedLoaderHeap( + RangeList *pRangeList, + const InterleavedLoaderHeapConfig *pConfig) : + UnlockedLoaderHeapBase(LoaderHeapImplementationKind::Interleaved), + m_pEndReservedRegion(NULL), + m_dwGranularity(pConfig->StubSize), + m_pRangeList(pRangeList), + m_pFreeListHead(NULL), + m_pConfig(pConfig) +{ + CONTRACTL + { + CONSTRUCTOR_CHECK; + NOTHROW; + FORBID_FAULT; + } + CONTRACTL_END; + + _ASSERTE((GetStubCodePageSize() % GetOsPageSize()) == 0); // Stub code page size MUST be in increments of the page size. (Really it must be a power of 2 as well, but this is good enough) +} + +// ~LoaderHeap is not synchronised (obviously) +UnlockedInterleavedLoaderHeap::~UnlockedInterleavedLoaderHeap() +{ + CONTRACTL + { + DESTRUCTOR_CHECK; + NOTHROW; + FORBID_FAULT; + } + CONTRACTL_END + + if (m_pRangeList != NULL) + m_pRangeList->RemoveRanges((void *) this); + + LoaderHeapBlock *pSearch, *pNext; + + for (pSearch = m_pFirstBlock; pSearch; pSearch = pNext) + { + void * pVirtualAddress; + + pVirtualAddress = pSearch->pVirtualAddress; + pNext = pSearch->pNext; + + if (m_pConfig->Template != NULL) + { + ExecutableAllocator::Instance()->FreeThunksFromTemplate(pVirtualAddress, GetStubCodePageSize()); + } + else + { + ExecutableAllocator::Instance()->Release(pVirtualAddress); + } + + delete pSearch; + } +} +#endif // #ifndef DACCESS_COMPILE + +size_t UnlockedInterleavedLoaderHeap::GetBytesAvailReservedRegion() +{ + LIMITED_METHOD_CONTRACT; + + if (m_pAllocPtr < m_pEndReservedRegion) + return (size_t)(m_pEndReservedRegion- m_pAllocPtr); + else + return 0; +} + +#ifndef DACCESS_COMPILE + +BOOL UnlockedInterleavedLoaderHeap::CommitPages(void* pData, size_t dwSizeToCommitPart) +{ + _ASSERTE(m_pConfig->Template == NULL); // This path should only be used for LoaderHeaps which use the standard ExecutableAllocator functions + // Commit first set of pages, since it will contain the LoaderHeapBlock + { + void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommitPart, IsExecutable()); + if (pTemp == NULL) + { + return FALSE; + } + } + + _ASSERTE(dwSizeToCommitPart == GetStubCodePageSize()); + + { + void *pTemp = ExecutableAllocator::Instance()->Commit((BYTE*)pData + dwSizeToCommitPart, dwSizeToCommitPart, FALSE); + if (pTemp == NULL) + { + return FALSE; + } + } + + ExecutableWriterHolder codePageWriterHolder((BYTE*)pData, dwSizeToCommitPart, ExecutableAllocator::DoNotAddToCache); + m_pConfig->CodePageGenerator(codePageWriterHolder.GetRW(), (BYTE*)pData, dwSizeToCommitPart); + FlushInstructionCache(GetCurrentProcess(), pData, dwSizeToCommitPart); + + return TRUE; +} + +BOOL UnlockedInterleavedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) +{ + CONTRACTL + { + INSTANCE_CHECK; + NOTHROW; + INJECT_FAULT(return FALSE;); + } + CONTRACTL_END; + + _ASSERTE(m_pConfig->Template == NULL); // This path should only be used for LoaderHeaps which use the standard ExecutableAllocator functions + + size_t dwSizeToReserve; + + // Round to page size again + dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); + + ReservedMemoryHolder pData = NULL; + + // Figure out how much to reserve + dwSizeToReserve = dwSizeToCommit; + + // Round to VIRTUAL_ALLOC_RESERVE_GRANULARITY + dwSizeToReserve = ALIGN_UP(dwSizeToReserve, VIRTUAL_ALLOC_RESERVE_GRANULARITY); + + _ASSERTE(dwSizeToCommit <= dwSizeToReserve); + + // + // Reserve pages + // + + // Reserve the memory for even non-executable stuff close to the executable code, as it has profound effect + // on e.g. a static variable access performance. + pData = (BYTE *)ExecutableAllocator::Instance()->Reserve(dwSizeToReserve); + if (pData == NULL) + { + _ASSERTE(!"Unable to reserve memory range for a loaderheap"); + return FALSE; + } + + // When the user passes in the reserved memory, the commit size is 0 and is adjusted to be the sizeof(LoaderHeap). + // If for some reason this is not true then we just catch this via an assertion and the dev who changed code + // would have to add logic here to handle the case when committed mem is more than the reserved mem. One option + // could be to leak the users memory and reserve+commit a new block, Another option would be to fail the alloc mem + // and notify the user to provide more reserved mem. + _ASSERTE((dwSizeToCommit <= dwSizeToReserve) && "Loaderheap tried to commit more memory than reserved by user"); + + size_t dwSizeToCommitPart = dwSizeToCommit; + + // For interleaved heaps, we perform two commits, each being half of the requested size + dwSizeToCommitPart /= 2; + + if (!CommitPages(pData, dwSizeToCommitPart)) + { + return FALSE; + } + + NewHolder pNewBlock = new (nothrow) LoaderHeapBlock; + if (pNewBlock == NULL) + { + return FALSE; + } + + // Record reserved range in range list, if one is specified + // Do this AFTER the commit - otherwise we'll have bogus ranges included. + if (m_pRangeList != NULL) + { + if (!m_pRangeList->AddRange((const BYTE *) pData, + ((const BYTE *) pData) + dwSizeToReserve, + (void *) this)) + { + return FALSE; + } + } + + m_dwTotalAlloc += dwSizeToCommit; + + pNewBlock.SuppressRelease(); + pData.SuppressRelease(); + + pNewBlock->dwVirtualSize = dwSizeToReserve; + pNewBlock->pVirtualAddress = pData; + pNewBlock->pNext = m_pFirstBlock; + pNewBlock->m_fReleaseMemory = TRUE; + + // Add to the linked list + m_pFirstBlock = pNewBlock; + + dwSizeToCommit /= 2; + + m_pPtrToEndOfCommittedRegion = (BYTE *) (pData) + (dwSizeToCommit); \ + m_pAllocPtr = (BYTE *) (pData); \ + m_pEndReservedRegion = (BYTE *) (pData) + (dwSizeToReserve); + + return TRUE; +} + +void ReleaseAllocatedThunks(BYTE* thunks) +{ + ExecutableAllocator::Instance()->FreeThunksFromTemplate(thunks, GetStubCodePageSize()); +} + +using ThunkMemoryHolder = SpecializedWrapper; + + +// Get some more committed pages - either commit some more in the current reserved region, or, if it +// has run out, reserve another set of pages. +// Returns: FALSE if we can't get any more memory +// TRUE: We can/did get some more memory - check to see if it's sufficient for +// the caller's needs (see UnlockedAllocMem for example of use) +BOOL UnlockedInterleavedLoaderHeap::GetMoreCommittedPages(size_t dwMinSize) +{ + CONTRACTL + { + INSTANCE_CHECK; + NOTHROW; + INJECT_FAULT(return FALSE;); + } + CONTRACTL_END; + + if (m_pConfig->Template != NULL) + { + ThunkMemoryHolder newAllocatedThunks = (BYTE*)ExecutableAllocator::Instance()->AllocateThunksFromTemplate(m_pConfig->Template, GetStubCodePageSize()); + if (newAllocatedThunks == NULL) + { + return FALSE; + } + + NewHolder pNewBlock = new (nothrow) LoaderHeapBlock; + if (pNewBlock == NULL) + { + return FALSE; + } + + size_t dwSizeToReserve = GetStubCodePageSize() * 2; + + // Record reserved range in range list, if one is specified + // Do this AFTER the commit - otherwise we'll have bogus ranges included. + if (m_pRangeList != NULL) + { + if (!m_pRangeList->AddRange((const BYTE *) newAllocatedThunks, + ((const BYTE *) newAllocatedThunks) + dwSizeToReserve, + (void *) this)) + { + return FALSE; + } + } + + m_dwTotalAlloc += dwSizeToReserve; + + pNewBlock.SuppressRelease(); + newAllocatedThunks.SuppressRelease(); + + pNewBlock->dwVirtualSize = dwSizeToReserve; + pNewBlock->pVirtualAddress = newAllocatedThunks; + pNewBlock->pNext = m_pFirstBlock; + pNewBlock->m_fReleaseMemory = TRUE; + + // Add to the linked list + m_pFirstBlock = pNewBlock; + + m_pAllocPtr = (BYTE*)newAllocatedThunks; + m_pPtrToEndOfCommittedRegion = m_pAllocPtr + GetStubCodePageSize(); + m_pEndReservedRegion = m_pAllocPtr + dwSizeToReserve; // For consistency with the non-template path m_pEndReservedRegion is after the end of the data area + m_dwTotalAlloc += GetStubCodePageSize(); + + return TRUE; + } + + // From here, all work is only for the dynamically allocated InterleavedLoaderHeap path + + // If we have memory we can use, what are you doing here! + _ASSERTE(dwMinSize > (SIZE_T)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr)); + + // This mode interleaves data and code pages 1:1. So the code size is required to be smaller than + // or equal to the page size to ensure that the code range is consecutive. + _ASSERTE(dwMinSize <= GetStubCodePageSize()); + // For interleaved heap, we always get two memory pages - one for code and one for data + dwMinSize = 2 * GetStubCodePageSize(); + + // Does this fit in the reserved region? + if (dwMinSize <= (size_t)(m_pEndReservedRegion - m_pAllocPtr)) + { + SIZE_T dwSizeToCommit; + + // For interleaved heaps, the allocation cannot cross page boundary since there are data and executable + // pages interleaved in a 1:1 fashion. + dwSizeToCommit = dwMinSize; + + size_t unusedRemainder = (size_t)((BYTE*)m_pPtrToEndOfCommittedRegion - m_pAllocPtr); + + PTR_BYTE pCommitBaseAddress = m_pPtrToEndOfCommittedRegion; + + // The end of committed region for interleaved heaps points to the end of the executable + // page and the data pages goes right after that. So we skip the data page here. + pCommitBaseAddress += GetStubCodePageSize(); + + size_t dwSizeToCommitPart = dwSizeToCommit; + // For interleaved heaps, we perform two commits, each being half of the requested size + dwSizeToCommitPart /= 2; + + if (!CommitPages(pCommitBaseAddress, dwSizeToCommitPart)) + { + return FALSE; + } + + INDEBUG(m_dwDebugWastedBytes += unusedRemainder;) + + // For interleaved heaps, further allocations will start from the newly committed page as they cannot + // cross page boundary. + m_pAllocPtr = (BYTE*)pCommitBaseAddress; + + m_pPtrToEndOfCommittedRegion += dwSizeToCommit; + m_dwTotalAlloc += dwSizeToCommit; + + return TRUE; + } + + // Keep track of the unused memory in the current reserved region. + INDEBUG(m_dwDebugWastedBytes += (size_t)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr);) + + // Note, there are unused reserved pages at end of current region -can't do much about that + // Provide dwMinSize here since UnlockedReservePages will round up the commit size again + // after adding in the size of the LoaderHeapBlock header. + return UnlockedReservePages(dwMinSize); +} + +#ifdef _DEBUG +static DWORD ShouldInjectFault() +{ + static DWORD fInjectFault = 99; + + if (fInjectFault == 99) + fInjectFault = (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_InjectFault) != 0); + return fInjectFault; +} + +#define SHOULD_INJECT_FAULT(return_statement) \ + do { \ + if (ShouldInjectFault() & 0x1) \ + { \ + char *a = new (nothrow) char; \ + if (a == NULL) \ + { \ + return_statement; \ + } \ + delete a; \ + } \ + } while (FALSE) + +#else + +#define SHOULD_INJECT_FAULT(return_statement) do { (void)((void *)0); } while (FALSE) + +#endif + +void UnlockedInterleavedLoaderHeap::UnlockedBackoutStub(void *pMem + COMMA_INDEBUG(_In_ const char *szFile) + COMMA_INDEBUG(int lineNum) + COMMA_INDEBUG(_In_ const char *szAllocFile) + COMMA_INDEBUG(int allocLineNum)) +{ + CONTRACTL + { + INSTANCE_CHECK; + NOTHROW; + FORBID_FAULT; + } + CONTRACTL_END; + + // Because the primary use of this function is backout, we'll be nice and + // define Backout(NULL) be a legal NOP. + if (pMem == NULL) + { + return; + } + + size_t dwSize = m_dwGranularity; + + // Clear the RW page + memset((BYTE*)pMem + GetStubCodePageSize(), 0x00, dwSize); // Fill freed region with 0 + + if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize )) + { + m_pAllocPtr = (BYTE*)pMem; + } + else + { + InterleavedStubFreeListNode* newFreeNode = (InterleavedStubFreeListNode*)((BYTE*)pMem + GetStubCodePageSize()); + newFreeNode->m_pNext = m_pFreeListHead; + m_pFreeListHead = newFreeNode; + } +} + + +// Allocates memory for a single stub which is a pair of memory addresses +// The first address is the pointer at the stub code, and the second +// address is the data for the stub. These are separated by GetStubCodePageSize() +// bytes. +// +// Here is how to properly backout the memory: +// +// void *pMem = UnlockedAllocStub(d); +// UnlockedBackoutStub(pMem); +// +// If you use the AllocMemHolder or AllocMemTracker, all this is taken care of +// behind the scenes. +// +// +void *UnlockedInterleavedLoaderHeap::UnlockedAllocStub_NoThrow( + INDEBUG(_In_ const char *szFile) + COMMA_INDEBUG(int lineNum)) +{ + CONTRACT(void*) + { + NOTHROW; + + // Macro syntax can't handle this INJECT_FAULT expression - we'll use a precondition instead + //INJECT_FAULT( do{ if (*pdwExtra) {*pdwExtra = 0} RETURN NULL; } while(0) ); + + } + CONTRACT_END + + size_t dwRequestedSize = m_dwGranularity; + size_t alignment = 1; + + STATIC_CONTRACT_FAULT; + + SHOULD_INJECT_FAULT(RETURN NULL); + + void *pResult; + + INCONTRACT(_ASSERTE(!ARE_FAULTS_FORBIDDEN())); + + _ASSERTE(m_dwGranularity >= sizeof(InterleavedStubFreeListNode)); + + if (m_pFreeListHead != NULL) + { + // We have a free stub - use it + InterleavedStubFreeListNode* pFreeStubData = m_pFreeListHead; + m_pFreeListHead = pFreeStubData->m_pNext; + pFreeStubData->m_pNext = NULL; + pResult = ((BYTE*)pFreeStubData) - GetStubCodePageSize(); + } + else + { + if (dwRequestedSize > GetBytesAvailCommittedRegion()) + { + if (!GetMoreCommittedPages(dwRequestedSize)) + { + RETURN NULL; + } + } + + pResult = m_pAllocPtr; + + m_pAllocPtr += dwRequestedSize; + } + +#ifdef _DEBUG + // Check to ensure that the RW region of the allocated stub is zeroed out + BYTE *pAllocatedRWBytes = (BYTE*)pResult + GetStubCodePageSize(); + for (size_t i = 0; i < dwRequestedSize; i++) + { + _ASSERTE_MSG(pAllocatedRWBytes[i] == 0, "LoaderHeap must return zero-initialized memory"); + } + + if (m_dwDebugFlags & kCallTracing) + { + LoaderHeapSniffer::RecordEvent(this, + kAllocMem, + szFile, + lineNum, + szFile, + lineNum, + pResult, + dwRequestedSize, + dwRequestedSize + ); + } + + EtwAllocRequest(this, pResult, dwRequestedSize); +#endif //_DEBUG + + RETURN pResult; +} + +void *UnlockedInterleavedLoaderHeap::UnlockedAllocStub( + INDEBUG(_In_ const char *szFile) + COMMA_INDEBUG(int lineNum)) +{ + CONTRACTL + { + THROWS; + INJECT_FAULT(ThrowOutOfMemory()); + } + CONTRACTL_END + + void *pResult = UnlockedAllocStub_NoThrow(INDEBUG(szFile) + COMMA_INDEBUG(lineNum)); + + if (!pResult) + { + ThrowOutOfMemory(); + } + + return pResult; +} + +void InitializeLoaderHeapConfig(InterleavedLoaderHeapConfig *pConfig, size_t stubSize, void* templateInImage, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)) +{ + pConfig->StubSize = (uint32_t)stubSize; + pConfig->Template = ExecutableAllocator::Instance()->CreateTemplate(templateInImage, GetStubCodePageSize(), codePageGenerator); + pConfig->CodePageGenerator = codePageGenerator; +} + +#endif // #ifndef DACCESS_COMPILE + diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp index f6ff925023db..51947ea8a82e 100644 --- a/src/coreclr/utilcode/loaderheap.cpp +++ b/src/coreclr/utilcode/loaderheap.cpp @@ -3,6 +3,7 @@ #include "stdafx.h" // Precompiled header key. #include "loaderheap.h" +#include "loaderheap_shared.h" #include "ex.h" #include "pedecoder.h" #define DONOT_DEFINE_ETW_CALLBACK @@ -10,869 +11,27 @@ #ifndef DACCESS_COMPILE -INDEBUG(DWORD UnlockedLoaderHeap::s_dwNumInstancesOfLoaderHeaps = 0;) - -#ifdef RANDOMIZE_ALLOC -#include -static class Random -{ -public: - Random() { seed = (unsigned int)time(NULL); } - unsigned int Next() - { - return ((seed = seed * 214013L + 2531011L) >> 16) & 0x7fff; - } -private: - unsigned int seed; -} s_random; -#endif - -namespace -{ -#if !defined(SELF_NO_HOST) // ETW available only in the runtime - inline void EtwAllocRequest(UnlockedLoaderHeap * const pHeap, void* ptr, size_t dwSize) - { - FireEtwAllocRequest(pHeap, ptr, static_cast(dwSize), 0, 0, GetClrInstanceId()); - } -#else -#define EtwAllocRequest(pHeap, ptr, dwSize) ((void)0) -#endif // SELF_NO_HOST -} - -// -// RangeLists are constructed so they can be searched from multiple -// threads without locking. They do require locking in order to -// be safely modified, though. -// - -RangeList::RangeList() -{ - WRAPPER_NO_CONTRACT; - - InitBlock(&m_starterBlock); - - m_firstEmptyBlock = &m_starterBlock; - m_firstEmptyRange = 0; -} - -RangeList::~RangeList() -{ - LIMITED_METHOD_CONTRACT; - - RangeListBlock *b = m_starterBlock.next; - - while (b != NULL) - { - RangeListBlock *bNext = b->next; - delete b; - b = bNext; - } -} - -void RangeList::InitBlock(RangeListBlock *b) -{ - LIMITED_METHOD_CONTRACT; - - Range *r = b->ranges; - Range *rEnd = r + RANGE_COUNT; - while (r < rEnd) - r++->id = (TADDR)NULL; - - b->next = NULL; -} - -BOOL RangeList::AddRangeWorker(const BYTE *start, const BYTE *end, void *id) -{ - CONTRACTL - { - INSTANCE_CHECK; - NOTHROW; - GC_NOTRIGGER; - INJECT_FAULT(return FALSE;); - } - CONTRACTL_END - - _ASSERTE(id != NULL); - - RangeListBlock *b = m_firstEmptyBlock; - Range *r = b->ranges + m_firstEmptyRange; - Range *rEnd = b->ranges + RANGE_COUNT; - - while (TRUE) - { - while (r < rEnd) - { - if (r->id == (TADDR)NULL) - { - r->start = (TADDR)start; - r->end = (TADDR)end; - r->id = (TADDR)id; - - r++; - - m_firstEmptyBlock = b; - m_firstEmptyRange = r - b->ranges; - - return TRUE; - } - r++; - } - - // - // If there are no more blocks, allocate a - // new one. - // - - if (b->next == NULL) - { - RangeListBlock *newBlock = new (nothrow) RangeListBlock; - - if (newBlock == NULL) - { - m_firstEmptyBlock = b; - m_firstEmptyRange = r - b->ranges; - return FALSE; - } - - InitBlock(newBlock); - - newBlock->next = NULL; - b->next = newBlock; - } - - // - // Next block - // - - b = b->next; - r = b->ranges; - rEnd = r + RANGE_COUNT; - } -} - -void RangeList::RemoveRangesWorker(void *id) -{ - CONTRACTL - { - INSTANCE_CHECK; - NOTHROW; - GC_NOTRIGGER; - FORBID_FAULT; - } - CONTRACTL_END - - RangeListBlock *b = &m_starterBlock; - Range *r = b->ranges; - Range *rEnd = r + RANGE_COUNT; - - // - // Find the first free element, & mark it. - // - - while (TRUE) - { - // - // Clear entries in this block. - // - - while (r < rEnd) - { - if (r->id == (TADDR)id) - { - r->id = (TADDR)NULL; - } - - r++; - } - - // - // If there are no more blocks, we're done. - // - - if (b->next == NULL) - { - m_firstEmptyRange = 0; - m_firstEmptyBlock = &m_starterBlock; - - return; - } - - // - // Next block. - // - - b = b->next; - r = b->ranges; - rEnd = r + RANGE_COUNT; - } -} - -#endif // #ifndef DACCESS_COMPILE - -BOOL RangeList::IsInRangeWorker(TADDR address) -{ - CONTRACTL - { - INSTANCE_CHECK; - NOTHROW; - FORBID_FAULT; - GC_NOTRIGGER; - } - CONTRACTL_END - - SUPPORTS_DAC; - - for (const RangeListBlock* b = &m_starterBlock; b != nullptr; b = b->next) - { - for (const Range r : b->ranges) - { - if (r.id != (TADDR)nullptr && address >= r.start && address < r.end) - return TRUE; - } - } - return FALSE; -} - -#ifdef DACCESS_COMPILE - -void -RangeList::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) -{ - SUPPORTS_DAC; - WRAPPER_NO_CONTRACT; - - // This class is almost always contained in something - // else so there's no enumeration of 'this'. - - RangeListBlock* block = &m_starterBlock; - block->EnumMemoryRegions(flags); - - while (block->next.IsValid()) - { - block->next.EnumMem(); - block = block->next; - - block->EnumMemoryRegions(flags); - } -} - -void -RangeList::RangeListBlock::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) -{ - WRAPPER_NO_CONTRACT; - - Range* range; - TADDR BADFOOD; - TSIZE_T size; - int i; - - // The code below iterates each range stored in the RangeListBlock and - // dumps the memory region represented by each range. - // It is too much memory for a mini-dump, so we just bail out for mini-dumps. - if (flags == CLRDATA_ENUM_MEM_MINI || flags == CLRDATA_ENUM_MEM_TRIAGE) - { - return; - } - - BIT64_ONLY( BADFOOD = 0xbaadf00dbaadf00d; ); - NOT_BIT64( BADFOOD = 0xbaadf00d; ); - - for (i=0; iranges[i]); - if (range->id == (TADDR)NULL || range->start == (TADDR)NULL || range->end == (TADDR)NULL || - // just looking at the lower 4bytes is good enough on WIN64 - range->start == BADFOOD || range->end == BADFOOD) - { - break; - } - - size = range->end - range->start; - _ASSERTE( size < UINT32_MAX ); // ranges should be less than 4gig! - - // We can't be sure this entire range is mapped. For example, the code:StubLinkStubManager - // keeps track of all ranges in the code:LoaderAllocator::m_pStubHeap LoaderHeap, and - // code:LoaderHeap::UnlockedReservePages adds a range for the entire reserved region, instead - // of updating the RangeList when pages are committed. But in that case, the committed region of - // memory will be enumerated by the LoaderHeap anyway, so it's OK if this fails - EMEM_OUT(("MEM: RangeListBlock %p - %p\n", range->start, range->end)); - DacEnumMemoryRegion(range->start, size, false); - } -} - -#endif // #ifdef DACCESS_COMPILE - - -//===================================================================================== -// In DEBUG builds only, we tag live blocks with the requested size and the type of -// allocation (AllocMem, AllocAlignedMem, AllocateOntoReservedMem). This is strictly -// to validate that those who call Backout* are passing in the right values. -// -// For simplicity, we'll use one LoaderHeapValidationTag structure for all types even -// though not all fields are applicable to all types. -//===================================================================================== -#ifdef _DEBUG -enum AllocationType -{ - kAllocMem = 1, - kFreedMem = 4, -}; - -struct LoaderHeapValidationTag -{ - size_t m_dwRequestedSize; // What the caller requested (not what was actually allocated) - AllocationType m_allocationType; // Which api allocated this block. - const char * m_szFile; // Who allocated me - int m_lineNum; // Who allocated me - -}; -#endif //_DEBUG - - - - - -//===================================================================================== -// These classes do detailed loaderheap sniffing to help in debugging heap crashes -//===================================================================================== -#ifdef _DEBUG - -// This structure logs the results of an Alloc or Free call. They are stored in reverse time order -// with UnlockedLoaderHeap::m_pEventList pointing to the most recent event. -struct LoaderHeapEvent -{ - LoaderHeapEvent *m_pNext; - AllocationType m_allocationType; //Which api was called - const char *m_szFile; //Caller Id - int m_lineNum; //Caller Id - const char *m_szAllocFile; //(BackoutEvents): Who allocated the block? - int m_allocLineNum; //(BackoutEvents): Who allocated the block? - void *m_pMem; //Starting address of block - size_t m_dwRequestedSize; //Requested size of block - size_t m_dwSize; //Actual size of block (including validation tags, padding, everything) - - - void Describe(SString *pSString) - { - CONTRACTL - { - INSTANCE_CHECK; - DISABLED(NOTHROW); - GC_NOTRIGGER; - } - CONTRACTL_END - - pSString->AppendASCII("\n"); - - { - StackSString buf; - if (m_allocationType == kFreedMem) - { - buf.Printf(" Freed at: %s (line %d)\n", m_szFile, m_lineNum); - buf.Printf(" (block originally allocated at %s (line %d)\n", m_szAllocFile, m_allocLineNum); - } - else - { - buf.Printf(" Allocated at: %s (line %d)\n", m_szFile, m_lineNum); - } - pSString->Append(buf); - } - - if (!QuietValidate()) - { - pSString->AppendASCII(" *** THIS BLOCK HAS BEEN CORRUPTED ***\n"); - } - - - - { - StackSString buf; - buf.Printf(" Type: "); - switch (m_allocationType) - { - case kAllocMem: - buf.AppendASCII("AllocMem()\n"); - break; - case kFreedMem: - buf.AppendASCII("Free\n"); - break; - default: - break; - } - pSString->Append(buf); - } - - - { - StackSString buf; - buf.Printf(" Start of block: 0x%p\n", m_pMem); - pSString->Append(buf); - } - - { - StackSString buf; - buf.Printf(" End of block: 0x%p\n", ((BYTE*)m_pMem) + m_dwSize - 1); - pSString->Append(buf); - } - - { - StackSString buf; - buf.Printf(" Requested size: %lu (0x%lx)\n", (ULONG)m_dwRequestedSize, (ULONG)m_dwRequestedSize); - pSString->Append(buf); - } - - { - StackSString buf; - buf.Printf(" Actual size: %lu (0x%lx)\n", (ULONG)m_dwSize, (ULONG)m_dwSize); - pSString->Append(buf); - } - - pSString->AppendASCII("\n"); - } - - - - BOOL QuietValidate(); - -}; - - -class LoaderHeapSniffer -{ - public: - static DWORD InitDebugFlags() - { - WRAPPER_NO_CONTRACT; - - DWORD dwDebugFlags = 0; - if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_LoaderHeapCallTracing)) - { - dwDebugFlags |= UnlockedLoaderHeap::kCallTracing; - } - return dwDebugFlags; - } - - - static VOID RecordEvent(UnlockedLoaderHeap *pHeap, - AllocationType allocationType, - _In_ const char *szFile, - int lineNum, - _In_ const char *szAllocFile, - int allocLineNum, - void *pMem, - size_t dwRequestedSize, - size_t dwSize - ); - - static VOID ClearEvents(UnlockedLoaderHeap *pHeap) - { - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_FORBID_FAULT; - - LoaderHeapEvent *pEvent = pHeap->m_pEventList; - while (pEvent) - { - LoaderHeapEvent *pNext = pEvent->m_pNext; - delete pEvent; - pEvent = pNext; - } - pHeap->m_pEventList = NULL; - } - - - static VOID CompactEvents(UnlockedLoaderHeap *pHeap) - { - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_FORBID_FAULT; - - LoaderHeapEvent **ppEvent = &(pHeap->m_pEventList); - while (*ppEvent) - { - LoaderHeapEvent *pEvent = *ppEvent; - if (pEvent->m_allocationType != kFreedMem) - { - ppEvent = &(pEvent->m_pNext); - } - else - { - LoaderHeapEvent **ppWalk = &(pEvent->m_pNext); - BOOL fMatchFound = FALSE; - while (*ppWalk && !fMatchFound) - { - LoaderHeapEvent *pWalk = *ppWalk; - if (pWalk->m_allocationType != kFreedMem && - pWalk->m_pMem == pEvent->m_pMem && - pWalk->m_dwRequestedSize == pEvent->m_dwRequestedSize) - { - // Delete matched pairs - - // Order is important here - updating *ppWalk may change pEvent->m_pNext, and we want - // to get the updated value when we unlink pEvent. - *ppWalk = pWalk->m_pNext; - *ppEvent = pEvent->m_pNext; - - delete pEvent; - delete pWalk; - fMatchFound = TRUE; - } - else - { - ppWalk = &(pWalk->m_pNext); - } - } - - if (!fMatchFound) - { - ppEvent = &(pEvent->m_pNext); - } - } - } - } - static VOID PrintEvents(UnlockedLoaderHeap *pHeap) - { - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_FORBID_FAULT; - - printf("\n------------- LoaderHeapEvents (in reverse time order!) --------------------"); - - LoaderHeapEvent *pEvent = pHeap->m_pEventList; - while (pEvent) - { - printf("\n"); - switch (pEvent->m_allocationType) - { - case kAllocMem: printf("AllocMem "); break; - case kFreedMem: printf("BackoutMem "); break; - - } - printf(" ptr = 0x%-8p", pEvent->m_pMem); - printf(" rqsize = 0x%-8x", (DWORD)pEvent->m_dwRequestedSize); - printf(" actsize = 0x%-8x", (DWORD)pEvent->m_dwSize); - printf(" (at %s@%d)", pEvent->m_szFile, pEvent->m_lineNum); - if (pEvent->m_allocationType == kFreedMem) - { - printf(" (original allocation at %s@%d)", pEvent->m_szAllocFile, pEvent->m_allocLineNum); - } - - pEvent = pEvent->m_pNext; - - } - printf("\n------------- End of LoaderHeapEvents --------------------------------------"); - printf("\n"); - - } - - - static VOID PitchSniffer(SString *pSString) - { - WRAPPER_NO_CONTRACT; - pSString->AppendASCII("\n" - "\nBecause call-tracing wasn't turned on, we couldn't provide details about who last owned the affected memory block. To get more precise diagnostics," - "\nset the following registry DWORD value:" - "\n" - "\n HKLM\\Software\\Microsoft\\.NETFramework\\LoaderHeapCallTracing = 1" - "\n" - "\nand rerun the scenario that crashed." - "\n" - "\n"); - } - - static LoaderHeapEvent *FindEvent(UnlockedLoaderHeap *pHeap, void *pAddr) - { - LIMITED_METHOD_CONTRACT; - - LoaderHeapEvent *pEvent = pHeap->m_pEventList; - while (pEvent) - { - if (pAddr >= pEvent->m_pMem && pAddr <= ( ((BYTE*)pEvent->m_pMem) + pEvent->m_dwSize - 1)) - { - return pEvent; - } - pEvent = pEvent->m_pNext; - } - return NULL; - - } - - - static void ValidateFreeList(UnlockedLoaderHeap *pHeap); - - static void WeGotAFaultNowWhat(UnlockedLoaderHeap *pHeap) - { - WRAPPER_NO_CONTRACT; - ValidateFreeList(pHeap); - - //If none of the above popped up an assert, pop up a generic one. - _ASSERTE(!("Unexpected AV inside LoaderHeap. The usual reason is that someone overwrote the end of a block or wrote into a freed block.\n")); - - } - -}; - - -#endif - - -#ifdef _DEBUG -#define LOADER_HEAP_BEGIN_TRAP_FAULT BOOL __faulted = FALSE; EX_TRY { -#define LOADER_HEAP_END_TRAP_FAULT } EX_CATCH {__faulted = TRUE; } EX_END_CATCH(SwallowAllExceptions) if (__faulted) LoaderHeapSniffer::WeGotAFaultNowWhat(pHeap); -#else -#define LOADER_HEAP_BEGIN_TRAP_FAULT -#define LOADER_HEAP_END_TRAP_FAULT -#endif - - -//===================================================================================== -// This freelist implementation is a first cut and probably needs to be tuned. -// It should be tuned with the following assumptions: -// -// - Freeing LoaderHeap memory is done primarily for OOM backout. LoaderHeaps -// weren't designed to be general purpose heaps and shouldn't be used that way. -// -// - And hence, when memory is freed, expect it to be freed in large clumps and in a -// LIFO order. Since the LoaderHeap normally hands out memory with sequentially -// increasing addresses, blocks will typically be freed with sequentially decreasing -// addresses. -// -// The first cut of the freelist is a single-linked list of free blocks using first-fit. -// Assuming the above alloc-free pattern holds, the list will end up mostly sorted -// in increasing address order. When a block is freed, we'll attempt to coalesce it -// with the first block in the list. We could also choose to be more aggressive about -// sorting and coalescing but this should probably catch most cases in practice. -//===================================================================================== - -// When a block is freed, we place this structure on the first bytes of the freed block (Allocations -// are bumped in size if necessary to make sure there's room.) -struct LoaderHeapFreeBlock -{ - public: - LoaderHeapFreeBlock *m_pNext; // Pointer to next block on free list - size_t m_dwSize; // Total size of this block - void *m_pBlockAddress; // Virtual address of the block - -#ifndef DACCESS_COMPILE - static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap) - { - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_NOTRIGGER; - - // The new "nothrow" below failure is handled in a non-fault way, so - // make sure that callers with FORBID_FAULT can call this method without - // firing the contract violation assert. - PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure); - - LOADER_HEAP_BEGIN_TRAP_FAULT - - // It's illegal to insert a free block that's smaller than the minimum sized allocation - - // it may stay stranded on the freelist forever. -#ifdef _DEBUG - if (!(dwTotalSize >= pHeap->AllocMem_TotalSize(1))) - { - LoaderHeapSniffer::ValidateFreeList(pHeap); - _ASSERTE(dwTotalSize >= pHeap->AllocMem_TotalSize(1)); - } - - if (!(0 == (dwTotalSize & ALLOC_ALIGN_CONSTANT))) - { - LoaderHeapSniffer::ValidateFreeList(pHeap); - _ASSERTE(0 == (dwTotalSize & ALLOC_ALIGN_CONSTANT)); - } -#endif - -#ifdef DEBUG - if (!pHeap->IsInterleaved()) - { - void* pMemRW = pMem; - ExecutableWriterHolderNoLog memWriterHolder; - if (pHeap->IsExecutable()) - { - memWriterHolder.AssignExecutableWriterHolder(pMem, dwTotalSize); - pMemRW = memWriterHolder.GetRW(); - } - - memset(pMemRW, 0xcc, dwTotalSize); - } - else - { - memset((BYTE*)pMem + GetStubCodePageSize(), 0xcc, dwTotalSize); - } -#endif // DEBUG - - LoaderHeapFreeBlock *pNewBlock = new (nothrow) LoaderHeapFreeBlock; - // If we fail allocating the LoaderHeapFreeBlock, ignore the failure and don't insert the free block at all. - if (pNewBlock != NULL) - { - pNewBlock->m_pNext = *ppHead; - pNewBlock->m_dwSize = dwTotalSize; - pNewBlock->m_pBlockAddress = pMem; - *ppHead = pNewBlock; - MergeBlock(pNewBlock, pHeap); - } - - LOADER_HEAP_END_TRAP_FAULT - } - - static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, UnlockedLoaderHeap *pHeap) - { - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_NOTRIGGER; - - INCONTRACT(_ASSERTE_IMPL(!ARE_FAULTS_FORBIDDEN())); - - void *pResult = NULL; - LOADER_HEAP_BEGIN_TRAP_FAULT - - LoaderHeapFreeBlock **ppWalk = ppHead; - while (*ppWalk) - { - LoaderHeapFreeBlock *pCur = *ppWalk; - size_t dwCurSize = pCur->m_dwSize; - if (dwCurSize == dwSize) - { - pResult = pCur->m_pBlockAddress; - // Exact match. Hooray! - *ppWalk = pCur->m_pNext; - delete pCur; - break; - } - else if (dwCurSize > dwSize && (dwCurSize - dwSize) >= pHeap->AllocMem_TotalSize(1)) - { - // Partial match. Ok... - pResult = pCur->m_pBlockAddress; - *ppWalk = pCur->m_pNext; - InsertFreeBlock(ppWalk, ((BYTE*)pCur->m_pBlockAddress) + dwSize, dwCurSize - dwSize, pHeap ); - delete pCur; - break; - } - - // Either block is too small or splitting the block would leave a remainder that's smaller than - // the minimum block size. Onto next one. - - ppWalk = &( pCur->m_pNext ); - } - - if (pResult) - { - void *pResultRW = pResult; - ExecutableWriterHolderNoLog resultWriterHolder; - if (pHeap->IsExecutable()) - { - resultWriterHolder.AssignExecutableWriterHolder(pResult, dwSize); - pResultRW = resultWriterHolder.GetRW(); - } - // Callers of loaderheap assume allocated memory is zero-inited so we must preserve this invariant! - memset(pResultRW, 0, dwSize); - } - LOADER_HEAP_END_TRAP_FAULT - return pResult; - } - - private: - // Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened. - static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap) - { - STATIC_CONTRACT_NOTHROW; - - BOOL result = FALSE; - - LOADER_HEAP_BEGIN_TRAP_FAULT - - LoaderHeapFreeBlock *pNextBlock = pFreeBlock->m_pNext; - size_t dwSize = pFreeBlock->m_dwSize; - - if (pNextBlock == NULL || ((BYTE*)pNextBlock->m_pBlockAddress) != (((BYTE*)pFreeBlock->m_pBlockAddress) + dwSize)) - { - result = FALSE; - } - else - { - size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize; - LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext; - void *pMemRW = pFreeBlock->m_pBlockAddress; - ExecutableWriterHolderNoLog memWriterHolder; - if (pHeap->IsExecutable()) - { - memWriterHolder.AssignExecutableWriterHolder(pFreeBlock->m_pBlockAddress, dwCombinedSize); - pMemRW = memWriterHolder.GetRW(); - } - INDEBUG(memset(pMemRW, 0xcc, dwCombinedSize);) - pFreeBlock->m_pNext = pNextNextBlock; - pFreeBlock->m_dwSize = dwCombinedSize; - delete pNextBlock; - - result = TRUE; - } - - LOADER_HEAP_END_TRAP_FAULT - return result; - - } -#endif // DACCESS_COMPILE -}; - - - - -//===================================================================================== -// These helpers encapsulate the actual layout of a block allocated by AllocMem -// and UnlockedAllocMem(): -// -// ==> Starting address is always pointer-aligned. -// -// - x bytes of user bytes (where "x" is the actual dwSize passed into AllocMem) -// -// - y bytes of "EE" (DEBUG-ONLY) (where "y" == LOADER_HEAP_DEBUG_BOUNDARY (normally 0)) -// - z bytes of pad (DEBUG-ONLY) (where "z" is just enough to pointer-align the following byte) -// - a bytes of tag (DEBUG-ONLY) (where "a" is sizeof(LoaderHeapValidationTag) -// -// - b bytes of pad (where "b" is just enough to pointer-align the following byte) -// -// ==> Following address is always pointer-aligned -//===================================================================================== - -// Convert the requested size into the total # of bytes we'll actually allocate (including padding) -size_t UnlockedLoaderHeap::AllocMem_TotalSize(size_t dwRequestedSize) +namespace { - LIMITED_METHOD_CONTRACT; - - size_t dwSize = dwRequestedSize; - - // Interleaved heap cannot ad any extra to the requested size - if (!IsInterleaved()) +#if !defined(SELF_NO_HOST) // ETW available only in the runtime + inline void EtwAllocRequest(UnlockedLoaderHeap * const pHeap, void* ptr, size_t dwSize) { -#ifdef _DEBUG - dwSize += LOADER_HEAP_DEBUG_BOUNDARY; - dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); -#endif - - if (!m_fExplicitControl) - { -#ifdef _DEBUG - dwSize += sizeof(LoaderHeapValidationTag); -#endif - } - dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); + FireEtwAllocRequest(pHeap, ptr, static_cast(dwSize), 0, 0, GetClrInstanceId()); } - - return dwSize; +#else +#define EtwAllocRequest(pHeap, ptr, dwSize) ((void)0) +#endif // SELF_NO_HOST } - #ifdef _DEBUG -LoaderHeapValidationTag *AllocMem_GetTag(LPVOID pBlock, size_t dwRequestedSize) -{ - LIMITED_METHOD_CONTRACT; - - size_t dwSize = dwRequestedSize; - dwSize += LOADER_HEAP_DEBUG_BOUNDARY; - dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); - return (LoaderHeapValidationTag *)( ((BYTE*)pBlock) + dwSize ); -} +#define LOADER_HEAP_BEGIN_TRAP_FAULT BOOL __faulted = FALSE; EX_TRY { +#define LOADER_HEAP_END_TRAP_FAULT } EX_CATCH {__faulted = TRUE; } EX_END_CATCH(SwallowAllExceptions) if (__faulted) UnlockedLoaderHeap::WeGotAFaultNowWhat(pHeap); +#else +#define LOADER_HEAP_BEGIN_TRAP_FAULT +#define LOADER_HEAP_END_TRAP_FAULT #endif - - - +#endif // #ifndef DACCESS_COMPILE //===================================================================================== // UnlockedLoaderHeap methods @@ -885,9 +44,8 @@ UnlockedLoaderHeap::UnlockedLoaderHeap(DWORD dwReserveBlockSize, const BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, RangeList *pRangeList, - HeapKind kind, - void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size), - DWORD dwGranularity) + LoaderHeapImplementationKind kind) : + UnlockedLoaderHeapBase(kind) { CONTRACTL { @@ -897,35 +55,13 @@ UnlockedLoaderHeap::UnlockedLoaderHeap(DWORD dwReserveBlockSize, } CONTRACTL_END; - m_pFirstBlock = NULL; - m_dwReserveBlockSize = dwReserveBlockSize; m_dwCommitBlockSize = dwCommitBlockSize; - m_pPtrToEndOfCommittedRegion = NULL; m_pEndReservedRegion = NULL; - m_pAllocPtr = NULL; m_pRangeList = pRangeList; - // Round to VIRTUAL_ALLOC_RESERVE_GRANULARITY - m_dwTotalAlloc = 0; - - _ASSERTE((GetStubCodePageSize() % GetOsPageSize()) == 0); // Stub code page size MUST be in increments of the page size. (Really it must be a power of 2 as well, but this is good enough) - m_dwGranularity = dwGranularity; - -#ifdef _DEBUG - m_dwDebugWastedBytes = 0; - s_dwNumInstancesOfLoaderHeaps++; - m_pEventList = NULL; - m_dwDebugFlags = LoaderHeapSniffer::InitDebugFlags(); -#endif - - m_kind = kind; - - _ASSERTE((kind != HeapKind::Interleaved) || (codePageGenerator != NULL)); - m_codePageGenerator = codePageGenerator; - m_pFirstFreeBlock = NULL; if (dwReservedRegionAddress != NULL && dwReservedRegionSize > 0) @@ -971,15 +107,6 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap() { ExecutableAllocator::Instance()->Release(m_reservedBlock.pVirtualAddress); } - - INDEBUG(s_dwNumInstancesOfLoaderHeaps --;) -} - -void UnlockedLoaderHeap::UnlockedSetReservedRegion(BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, BOOL fReleaseMemory) -{ - WRAPPER_NO_CONTRACT; - _ASSERTE(m_reservedBlock.pVirtualAddress == NULL); - m_reservedBlock.Init((void *)dwReservedRegionAddress, dwReservedRegionSize, fReleaseMemory); } #endif // #ifndef DACCESS_COMPILE @@ -1006,16 +133,6 @@ void UnlockedLoaderHeap::DebugGuardHeap() } #endif -size_t UnlockedLoaderHeap::GetBytesAvailCommittedRegion() -{ - LIMITED_METHOD_CONTRACT; - - if (m_pAllocPtr < m_pPtrToEndOfCommittedRegion) - return (size_t)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr); - else - return 0; -} - size_t UnlockedLoaderHeap::GetBytesAvailReservedRegion() { LIMITED_METHOD_CONTRACT; @@ -1028,16 +145,6 @@ size_t UnlockedLoaderHeap::GetBytesAvailReservedRegion() #ifndef DACCESS_COMPILE -void ReleaseReservedMemory(BYTE* value) -{ - if (value) - { - ExecutableAllocator::Instance()->Release(value); - } -} - -using ReservedMemoryHolder = SpecializedWrapper; - BOOL UnlockedLoaderHeap::CommitPages(void* pData, size_t dwSizeToCommitPart) { // Commit first set of pages, since it will contain the LoaderHeapBlock @@ -1047,24 +154,12 @@ BOOL UnlockedLoaderHeap::CommitPages(void* pData, size_t dwSizeToCommitPart) return FALSE; } - if (IsInterleaved()) - { - _ASSERTE(dwSizeToCommitPart == GetStubCodePageSize()); - - void *pTemp = ExecutableAllocator::Instance()->Commit((BYTE*)pData + dwSizeToCommitPart, dwSizeToCommitPart, FALSE); - if (pTemp == NULL) - { - return FALSE; - } - - ExecutableWriterHolder codePageWriterHolder((BYTE*)pData, dwSizeToCommitPart, ExecutableAllocator::DoNotAddToCache); - m_codePageGenerator(codePageWriterHolder.GetRW(), (BYTE*)pData, dwSizeToCommitPart); - FlushInstructionCache(GetCurrentProcess(), pData, dwSizeToCommitPart); - } - return TRUE; } +#ifdef FEATURE_PERFMAP +bool PerfMapLowGranularityStubs(); +#endif // FEATURE_PERFMAP BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) { CONTRACTL @@ -1098,16 +193,16 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) // The caller is asking us to allocate the memory else { - if (m_fExplicitControl) - { - return FALSE; - } - // Figure out how much to reserve dwSizeToReserve = max(dwSizeToCommit, m_dwReserveBlockSize); - // Round to VIRTUAL_ALLOC_RESERVE_GRANULARITY - dwSizeToReserve = ALIGN_UP(dwSizeToReserve, VIRTUAL_ALLOC_RESERVE_GRANULARITY); +#ifdef FEATURE_PERFMAP // Perfmap requires that the memory assigned to stub generated regions be allocated only via fully commited memory + if (!IsInterleaved() || !PerfMapLowGranularityStubs()) +#endif // FEATURE_PERFMAP + { + // Round to VIRTUAL_ALLOC_RESERVE_GRANULARITY + dwSizeToReserve = ALIGN_UP(dwSizeToReserve, VIRTUAL_ALLOC_RESERVE_GRANULARITY); + } _ASSERTE(dwSizeToCommit <= dwSizeToReserve); @@ -1138,11 +233,6 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) } size_t dwSizeToCommitPart = dwSizeToCommit; - if (IsInterleaved()) - { - // For interleaved heaps, we perform two commits, each being half of the requested size - dwSizeToCommitPart /= 2; - } if (!CommitPages(pData, dwSizeToCommitPart)) { @@ -1180,11 +270,6 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) // Add to the linked list m_pFirstBlock = pNewBlock; - if (IsInterleaved()) - { - dwSizeToCommit /= 2; - } - m_pPtrToEndOfCommittedRegion = (BYTE *) (pData) + (dwSizeToCommit); \ m_pAllocPtr = (BYTE *) (pData); \ m_pEndReservedRegion = (BYTE *) (pData) + (dwSizeToReserve); @@ -1210,80 +295,28 @@ BOOL UnlockedLoaderHeap::GetMoreCommittedPages(size_t dwMinSize) // If we have memory we can use, what are you doing here! _ASSERTE(dwMinSize > (SIZE_T)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr)); - if (IsInterleaved()) - { - // This mode interleaves data and code pages 1:1. So the code size is required to be smaller than - // or equal to the page size to ensure that the code range is consecutive. - _ASSERTE(dwMinSize <= GetStubCodePageSize()); - // For interleaved heap, we always get two memory pages - one for code and one for data - dwMinSize = 2 * GetStubCodePageSize(); - } - // Does this fit in the reserved region? if (dwMinSize <= (size_t)(m_pEndReservedRegion - m_pAllocPtr)) { SIZE_T dwSizeToCommit; - - if (IsInterleaved()) - { - // For interleaved heaps, the allocation cannot cross page boundary since there are data and executable - // pages interleaved in a 1:1 fashion. - dwSizeToCommit = dwMinSize; - } - else - { - dwSizeToCommit = (m_pAllocPtr + dwMinSize) - m_pPtrToEndOfCommittedRegion; - } + dwSizeToCommit = (m_pAllocPtr + dwMinSize) - m_pPtrToEndOfCommittedRegion; size_t unusedRemainder = (size_t)((BYTE*)m_pPtrToEndOfCommittedRegion - m_pAllocPtr); PTR_BYTE pCommitBaseAddress = m_pPtrToEndOfCommittedRegion; - if (IsInterleaved()) - { - // The end of committed region for interleaved heaps points to the end of the executable - // page and the data pages goes right after that. So we skip the data page here. - pCommitBaseAddress += GetStubCodePageSize(); - } - else - { - if (dwSizeToCommit < m_dwCommitBlockSize) - dwSizeToCommit = min((SIZE_T)(m_pEndReservedRegion - m_pPtrToEndOfCommittedRegion), (SIZE_T)m_dwCommitBlockSize); + if (dwSizeToCommit < m_dwCommitBlockSize) + dwSizeToCommit = min((SIZE_T)(m_pEndReservedRegion - m_pPtrToEndOfCommittedRegion), (SIZE_T)m_dwCommitBlockSize); - // Round to page size - dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); - } + // Round to page size + dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); size_t dwSizeToCommitPart = dwSizeToCommit; - if (IsInterleaved()) - { - // For interleaved heaps, we perform two commits, each being half of the requested size - dwSizeToCommitPart /= 2; - } if (!CommitPages(pCommitBaseAddress, dwSizeToCommitPart)) { return FALSE; } - if (IsInterleaved()) - { - // If the remaining bytes are large enough to allocate data of the allocation granularity, add them to the free - // block list. - // Otherwise the remaining bytes that are available will be wasted. - if (unusedRemainder >= GetStubCodePageSize()) - { - LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, m_pAllocPtr, unusedRemainder, this); - } - else - { - INDEBUG(m_dwDebugWastedBytes += unusedRemainder;) - } - - // For interleaved heaps, further allocations will start from the newly committed page as they cannot - // cross page boundary. - m_pAllocPtr = (BYTE*)pCommitBaseAddress; - } - m_pPtrToEndOfCommittedRegion += dwSizeToCommit; m_dwTotalAlloc += dwSizeToCommit; @@ -1384,8 +417,7 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize INCONTRACT(_ASSERTE(!ARE_FAULTS_FORBIDDEN())); #ifdef RANDOMIZE_ALLOC - if (!m_fExplicitControl && !IsInterleaved()) - dwSize += s_random.Next() % 256; + dwSize += s_randomForLoaderHeap.Next() % 256; #endif dwSize = AllocMem_TotalSize(dwSize); @@ -1426,14 +458,11 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize "LoaderHeap must return zero-initialized memory"); } - if (!m_fExplicitControl && !IsInterleaved()) - { - LoaderHeapValidationTag *pTag = AllocMem_GetTag(pAllocatedBytes, dwRequestedSize); - pTag->m_allocationType = kAllocMem; - pTag->m_dwRequestedSize = dwRequestedSize; - pTag->m_szFile = szFile; - pTag->m_lineNum = lineNum; - } + LoaderHeapValidationTag *pTag = AllocMem_GetTag(pAllocatedBytes, dwRequestedSize); + pTag->m_allocationType = kAllocMem; + pTag->m_dwRequestedSize = dwRequestedSize; + pTag->m_szFile = szFile; + pTag->m_lineNum = lineNum; if (m_dwDebugFlags & kCallTracing) { @@ -1488,7 +517,6 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, } #ifdef _DEBUG - if (!IsInterleaved()) { DEBUG_ONLY_REGION(); @@ -1574,11 +602,11 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, size_t dwSize = AllocMem_TotalSize(dwRequestedSize); #ifdef _DEBUG - if ((m_dwDebugFlags & kCallTracing) && !IsInterleaved()) + if (m_dwDebugFlags & kCallTracing) { DEBUG_ONLY_REGION(); - LoaderHeapValidationTag *pTag = m_fExplicitControl ? NULL : AllocMem_GetTag(pMem, dwRequestedSize); + LoaderHeapValidationTag *pTag = AllocMem_GetTag(pMem, dwRequestedSize); LoaderHeapSniffer::RecordEvent(this, @@ -1596,25 +624,17 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize )) { - if (IsInterleaved()) + void *pMemRW = pMem; + ExecutableWriterHolderNoLog memWriterHolder; + if (IsExecutable()) { - // Clear the RW page - memset((BYTE*)pMem + GetStubCodePageSize(), 0x00, dwSize); // Fill freed region with 0 + memWriterHolder.AssignExecutableWriterHolder(pMem, dwSize); + pMemRW = memWriterHolder.GetRW(); } - else - { - void *pMemRW = pMem; - ExecutableWriterHolderNoLog memWriterHolder; - if (IsExecutable()) - { - memWriterHolder.AssignExecutableWriterHolder(pMem, dwSize); - pMemRW = memWriterHolder.GetRW(); - } - // Cool. This was the last block allocated. We can just undo the allocation instead - // of going to the freelist. - memset(pMemRW, 0x00, dwSize); // Fill freed region with 0 - } + // Cool. This was the last block allocated. We can just undo the allocation instead + // of going to the freelist. + memset(pMemRW, 0x00, dwSize); // Fill freed region with 0 m_pAllocPtr = (BYTE*)pMem; } else @@ -1659,7 +679,6 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz PRECONDITION( alignment != 0 ); PRECONDITION(0 == (alignment & (alignment - 1))); // require power of 2 - PRECONDITION((dwRequestedSize % m_dwGranularity) == 0); POSTCONDITION( (RETVAL) ? (0 == ( ((UINT_PTR)(RETVAL)) & (alignment - 1))) : // If non-null, pointer must be aligned (pdwExtra == NULL || 0 == *pdwExtra) // or else *pdwExtra must be set to 0 @@ -1670,10 +689,10 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz STATIC_CONTRACT_FAULT; // Set default value - if (pdwExtra) - { - *pdwExtra = 0; - } + if (pdwExtra) + { + *pdwExtra = 0; + } SHOULD_INJECT_FAULT(RETURN NULL); @@ -1704,11 +723,6 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz pResult = m_pAllocPtr; size_t extra = alignment - ((size_t)pResult & ((size_t)alignment - 1)); - if ((IsInterleaved())) - { - _ASSERTE(alignment == 1); - extra = 0; - } // On DEBUG, we force a non-zero extra so people don't forget to adjust for it on backout #ifndef _DEBUG @@ -1744,7 +758,7 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz memset(pAllocatedBytes + dwRequestedSize, 0xee, LOADER_HEAP_DEBUG_BOUNDARY); #endif - if (dwRequestedSize != 0 && !IsInterleaved()) + if (dwRequestedSize != 0) { _ASSERTE_MSG(pAllocatedBytes[0] == 0 && memcmp(pAllocatedBytes, pAllocatedBytes + 1, dwRequestedSize - 1) == 0, "LoaderHeap must return zero-initialized memory"); @@ -1760,162 +774,59 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz lineNum, ((BYTE*)pResult) - extra, dwRequestedSize + extra, - dwSize - ); - } - - EtwAllocRequest(this, pResult, dwSize); - - if (!m_fExplicitControl && !IsInterleaved()) - { - LoaderHeapValidationTag *pTag = AllocMem_GetTag(pAllocatedBytes - extra, dwRequestedSize + extra); - pTag->m_allocationType = kAllocMem; - pTag->m_dwRequestedSize = dwRequestedSize + extra; - pTag->m_szFile = szFile; - pTag->m_lineNum = lineNum; - } -#endif //_DEBUG - - if (pdwExtra) - { - *pdwExtra = extra; - } - - RETURN pResult; - -} - - - -void *UnlockedLoaderHeap::UnlockedAllocAlignedMem(size_t dwRequestedSize, - size_t dwAlignment, - size_t *pdwExtra - COMMA_INDEBUG(_In_ const char *szFile) - COMMA_INDEBUG(int lineNum)) -{ - CONTRACTL - { - THROWS; - INJECT_FAULT(ThrowOutOfMemory()); - } - CONTRACTL_END - - void *pResult = UnlockedAllocAlignedMem_NoThrow(dwRequestedSize, - dwAlignment, - pdwExtra - COMMA_INDEBUG(szFile) - COMMA_INDEBUG(lineNum)); - - if (!pResult) - { - ThrowOutOfMemory(); - } - - return pResult; - - -} - - - -void *UnlockedLoaderHeap::UnlockedAllocMemForCode_NoThrow(size_t dwHeaderSize, size_t dwCodeSize, DWORD dwCodeAlignment, size_t dwReserveForJumpStubs) -{ - CONTRACT(void*) - { - INSTANCE_CHECK; - NOTHROW; - INJECT_FAULT(CONTRACT_RETURN NULL;); - PRECONDITION(0 == (dwCodeAlignment & (dwCodeAlignment - 1))); // require power of 2 - POSTCONDITION(CheckPointer(RETVAL, NULL_OK)); - } - CONTRACT_END; - - _ASSERTE(m_fExplicitControl); - - INCONTRACT(_ASSERTE(!ARE_FAULTS_FORBIDDEN())); - - // We don't know how much "extra" we need to satisfy the alignment until we know - // which address will be handed out which in turn we don't know because we don't - // know whether the allocation will fit within the current reserved range. - // - // Thus, we'll request as much heap growth as is needed for the worst case (we request an extra dwCodeAlignment - 1 bytes) - - S_SIZE_T cbAllocSize = S_SIZE_T(dwHeaderSize) + S_SIZE_T(dwCodeSize) + S_SIZE_T(dwCodeAlignment - 1) + S_SIZE_T(dwReserveForJumpStubs); - if( cbAllocSize.IsOverflow() ) - { - RETURN NULL; + dwSize + ); } - if (cbAllocSize.Value() > GetBytesAvailCommittedRegion()) + EtwAllocRequest(this, pResult, dwSize); + + LoaderHeapValidationTag *pTag = AllocMem_GetTag(pAllocatedBytes - extra, dwRequestedSize + extra); + pTag->m_allocationType = kAllocMem; + pTag->m_dwRequestedSize = dwRequestedSize + extra; + pTag->m_szFile = szFile; + pTag->m_lineNum = lineNum; +#endif //_DEBUG + + if (pdwExtra) { - if (GetMoreCommittedPages(cbAllocSize.Value()) == FALSE) - { - RETURN NULL; - } + *pdwExtra = extra; } - BYTE *pResult = (BYTE *)ALIGN_UP(m_pAllocPtr + dwHeaderSize, dwCodeAlignment); - EtwAllocRequest(this, pResult, (pResult + dwCodeSize) - m_pAllocPtr); - m_pAllocPtr = pResult + dwCodeSize; - RETURN pResult; -} - - -#endif // #ifndef DACCESS_COMPILE -BOOL UnlockedLoaderHeap::IsExecutable() -{ - return (m_kind == HeapKind::Executable) || IsInterleaved(); } -BOOL UnlockedLoaderHeap::IsInterleaved() -{ - return m_kind == HeapKind::Interleaved; -} -#ifdef DACCESS_COMPILE -void UnlockedLoaderHeap::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) +void *UnlockedLoaderHeap::UnlockedAllocAlignedMem(size_t dwRequestedSize, + size_t dwAlignment, + size_t *pdwExtra + COMMA_INDEBUG(_In_ const char *szFile) + COMMA_INDEBUG(int lineNum)) { - WRAPPER_NO_CONTRACT; - - PTR_LoaderHeapBlock block = m_pFirstBlock; - while (block.IsValid()) + CONTRACTL { - // All we know is the virtual size of this block. We don't have any way to tell how - // much of this space was actually comitted, so don't expect that this will always - // succeed. - // @dbgtodo : Ideally we'd reduce the risk of corruption causing problems here. - // We could extend LoaderHeapBlock to track a commit size, - // but it seems wasteful (eg. makes each AppDomain objects 32 bytes larger on x64). - TADDR addr = dac_cast(block->pVirtualAddress); - TSIZE_T size = block->dwVirtualSize; - EMEM_OUT(("MEM: UnlockedLoaderHeap %p - %p\n", addr, addr + size)); - DacEnumMemoryRegion(addr, size, false); - - block = block->pNext; + THROWS; + INJECT_FAULT(ThrowOutOfMemory()); } -} + CONTRACTL_END -#endif // #ifdef DACCESS_COMPILE + void *pResult = UnlockedAllocAlignedMem_NoThrow(dwRequestedSize, + dwAlignment, + pdwExtra + COMMA_INDEBUG(szFile) + COMMA_INDEBUG(lineNum)); + if (!pResult) + { + ThrowOutOfMemory(); + } -void UnlockedLoaderHeap::EnumPageRegions (EnumPageRegionsCallback *pCallback, PTR_VOID pvArgs) -{ - WRAPPER_NO_CONTRACT; + return pResult; - PTR_LoaderHeapBlock block = m_pFirstBlock; - while (block) - { - if ((*pCallback)(pvArgs, block->pVirtualAddress, block->dwVirtualSize)) - { - break; - } - block = block->pNext; - } } +#endif // #ifndef DACCESS_COMPILE #ifdef _DEBUG @@ -1925,10 +836,11 @@ void UnlockedLoaderHeap::DumpFreeList() LIMITED_METHOD_CONTRACT; if (m_pFirstFreeBlock == NULL) { - printf("FREEDUMP: FreeList is empty\n"); + minipal_log_print_info("FREEDUMP: FreeList is empty\n"); } else { + InlineSString<128> buf; LoaderHeapFreeBlock *pBlock = m_pFirstFreeBlock; while (pBlock != NULL) { @@ -1950,97 +862,214 @@ void UnlockedLoaderHeap::DumpFreeList() } } - printf("Addr = %pxh, Size = %xh", pBlock, ((ULONG)dwsize)); - if (ccbad) printf(" *** ERROR: NOT CC'd ***"); - if (sizeunaligned) printf(" *** ERROR: size not a multiple of ALLOC_ALIGN_CONSTANT ***"); - printf("\n"); + buf.Printf("Addr = %pxh, Size = %xh", pBlock, ((ULONG)dwsize)); + if (ccbad) buf.AppendUTF8(" *** ERROR: NOT CC'd ***"); + if (sizeunaligned) buf.AppendUTF8(" *** ERROR: size not a multiple of ALLOC_ALIGN_CONSTANT ***"); + buf.AppendUTF8("\n"); + + minipal_log_print_info(buf.GetUTF8()); + buf.Clear(); pBlock = pBlock->m_pNext; } } } +#endif //_DEBUG -void UnlockedLoaderHeap::UnlockedClearEvents() +#ifndef DACCESS_COMPILE +/*static*/ void LoaderHeapFreeBlock::InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap) { - WRAPPER_NO_CONTRACT; - LoaderHeapSniffer::ClearEvents(this); -} + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; -void UnlockedLoaderHeap::UnlockedCompactEvents() -{ - WRAPPER_NO_CONTRACT; - LoaderHeapSniffer::CompactEvents(this); -} + // The new "nothrow" below failure is handled in a non-fault way, so + // make sure that callers with FORBID_FAULT can call this method without + // firing the contract violation assert. + PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure); -void UnlockedLoaderHeap::UnlockedPrintEvents() -{ - WRAPPER_NO_CONTRACT; - LoaderHeapSniffer::PrintEvents(this); -} + LOADER_HEAP_BEGIN_TRAP_FAULT + // It's illegal to insert a free block that's smaller than the minimum sized allocation - + // it may stay stranded on the freelist forever. +#ifdef _DEBUG + if (!(dwTotalSize >= pHeap->AllocMem_TotalSize(1))) + { + UnlockedLoaderHeap::ValidateFreeList(pHeap); + _ASSERTE(dwTotalSize >= pHeap->AllocMem_TotalSize(1)); + } -#endif //_DEBUG + if (!(0 == (dwTotalSize & ALLOC_ALIGN_CONSTANT))) + { + UnlockedLoaderHeap::ValidateFreeList(pHeap); + _ASSERTE(0 == (dwTotalSize & ALLOC_ALIGN_CONSTANT)); + } +#endif -//************************************************************************************ -// LOADERHEAP SNIFFER METHODS -//************************************************************************************ -#ifdef _DEBUG +#ifdef DEBUG + { + void* pMemRW = pMem; + ExecutableWriterHolderNoLog memWriterHolder; + if (pHeap->IsExecutable()) + { + memWriterHolder.AssignExecutableWriterHolder(pMem, dwTotalSize); + pMemRW = memWriterHolder.GetRW(); + } -/*static*/ VOID LoaderHeapSniffer::RecordEvent(UnlockedLoaderHeap *pHeap, - AllocationType allocationType, - _In_ const char *szFile, - int lineNum, - _In_ const char *szAllocFile, - int allocLineNum, - void *pMem, - size_t dwRequestedSize, - size_t dwSize - ) -{ - CONTRACTL + memset(pMemRW, 0xcc, dwTotalSize); + } +#endif // DEBUG + + LoaderHeapFreeBlock *pNewBlock = new (nothrow) LoaderHeapFreeBlock; + // If we fail allocating the LoaderHeapFreeBlock, ignore the failure and don't insert the free block at all. + if (pNewBlock != NULL) { - NOTHROW; - GC_NOTRIGGER; - FORBID_FAULT; //If we OOM in here, we just throw the event away. + pNewBlock->m_pNext = *ppHead; + pNewBlock->m_dwSize = dwTotalSize; + pNewBlock->m_pBlockAddress = pMem; + *ppHead = pNewBlock; + MergeBlock(pNewBlock, pHeap); } - CONTRACTL_END - LoaderHeapEvent *pNewEvent; + LOADER_HEAP_END_TRAP_FAULT +} + +/*static*/ void *LoaderHeapFreeBlock::AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, UnlockedLoaderHeap *pHeap) +{ + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + + INCONTRACT(_ASSERTE_IMPL(!ARE_FAULTS_FORBIDDEN())); + + void *pResult = NULL; + LOADER_HEAP_BEGIN_TRAP_FAULT + + LoaderHeapFreeBlock **ppWalk = ppHead; + while (*ppWalk) { + LoaderHeapFreeBlock *pCur = *ppWalk; + size_t dwCurSize = pCur->m_dwSize; + if (dwCurSize == dwSize) { - FAULT_NOT_FATAL(); - pNewEvent = new (nothrow) LoaderHeapEvent; + pResult = pCur->m_pBlockAddress; + // Exact match. Hooray! + *ppWalk = pCur->m_pNext; + delete pCur; + break; } - if (!pNewEvent) + else if (dwCurSize > dwSize && (dwCurSize - dwSize) >= pHeap->AllocMem_TotalSize(1)) { - if (!(pHeap->m_dwDebugFlags & pHeap->kEncounteredOOM)) - { - pHeap->m_dwDebugFlags |= pHeap->kEncounteredOOM; - _ASSERTE(!"LOADERHEAPSNIFFER: Failed allocation of LoaderHeapEvent. Call tracing information will be incomplete."); - } + // Partial match. Ok... + pResult = pCur->m_pBlockAddress; + *ppWalk = pCur->m_pNext; + InsertFreeBlock(ppWalk, ((BYTE*)pCur->m_pBlockAddress) + dwSize, dwCurSize - dwSize, pHeap ); + delete pCur; + break; } - else + + // Either block is too small or splitting the block would leave a remainder that's smaller than + // the minimum block size. Onto next one. + + ppWalk = &( pCur->m_pNext ); + } + + if (pResult) + { + void *pResultRW = pResult; + ExecutableWriterHolderNoLog resultWriterHolder; + if (pHeap->IsExecutable()) { - pNewEvent->m_allocationType = allocationType; - pNewEvent->m_szFile = szFile; - pNewEvent->m_lineNum = lineNum; - pNewEvent->m_szAllocFile = szAllocFile; - pNewEvent->m_allocLineNum = allocLineNum; - pNewEvent->m_pMem = pMem; - pNewEvent->m_dwRequestedSize = dwRequestedSize; - pNewEvent->m_dwSize = dwSize; - - pNewEvent->m_pNext = pHeap->m_pEventList; - pHeap->m_pEventList = pNewEvent; + resultWriterHolder.AssignExecutableWriterHolder(pResult, dwSize); + pResultRW = resultWriterHolder.GetRW(); } + // Callers of loaderheap assume allocated memory is zero-inited so we must preserve this invariant! + memset(pResultRW, 0, dwSize); } + LOADER_HEAP_END_TRAP_FAULT + return pResult; +} + +// Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened. +/*static*/ BOOL LoaderHeapFreeBlock::MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap) +{ + STATIC_CONTRACT_NOTHROW; + + BOOL result = FALSE; + + LOADER_HEAP_BEGIN_TRAP_FAULT + + LoaderHeapFreeBlock *pNextBlock = pFreeBlock->m_pNext; + size_t dwSize = pFreeBlock->m_dwSize; + + if (pNextBlock == NULL || ((BYTE*)pNextBlock->m_pBlockAddress) != (((BYTE*)pFreeBlock->m_pBlockAddress) + dwSize)) + { + result = FALSE; + } + else + { + size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize; + LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext; + void *pMemRW = pFreeBlock->m_pBlockAddress; + ExecutableWriterHolderNoLog memWriterHolder; + if (pHeap->IsExecutable()) + { + memWriterHolder.AssignExecutableWriterHolder(pFreeBlock->m_pBlockAddress, dwCombinedSize); + pMemRW = memWriterHolder.GetRW(); + } + INDEBUG(memset(pMemRW, 0xcc, dwCombinedSize);) + pFreeBlock->m_pNext = pNextNextBlock; + pFreeBlock->m_dwSize = dwCombinedSize; + delete pNextBlock; + + result = TRUE; + } + + LOADER_HEAP_END_TRAP_FAULT + return result; } +#endif // !DACCESS_COMPILE + +//===================================================================================== +// These helpers encapsulate the actual layout of a block allocated by AllocMem +// and UnlockedAllocMem(): +// +// ==> Starting address is always pointer-aligned. +// +// - x bytes of user bytes (where "x" is the actual dwSize passed into AllocMem) +// +// - y bytes of "EE" (DEBUG-ONLY) (where "y" == LOADER_HEAP_DEBUG_BOUNDARY (normally 0)) +// - z bytes of pad (DEBUG-ONLY) (where "z" is just enough to pointer-align the following byte) +// - a bytes of tag (DEBUG-ONLY) (where "a" is sizeof(LoaderHeapValidationTag) +// +// - b bytes of pad (where "b" is just enough to pointer-align the following byte) +// +// ==> Following address is always pointer-aligned +//===================================================================================== + +// Convert the requested size into the total # of bytes we'll actually allocate (including padding) +size_t UnlockedLoaderHeap::AllocMem_TotalSize(size_t dwRequestedSize) +{ + LIMITED_METHOD_CONTRACT; + + size_t dwSize = dwRequestedSize; + // Interleaved heap cannot ad any extra to the requested size +#ifdef _DEBUG + dwSize += LOADER_HEAP_DEBUG_BOUNDARY; + dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); +#endif + +#ifdef _DEBUG + dwSize += sizeof(LoaderHeapValidationTag); +#endif + dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); + return dwSize; +} +#ifdef _DEBUG /*static*/ -void LoaderHeapSniffer::ValidateFreeList(UnlockedLoaderHeap *pHeap) +void UnlockedLoaderHeap::ValidateFreeList(UnlockedLoaderHeap *pHeap) { CANNOT_HAVE_CONTRACT; @@ -2140,7 +1169,7 @@ void LoaderHeapSniffer::ValidateFreeList(UnlockedLoaderHeap *pHeap) } else { - LoaderHeapEvent *pBadAddrEvent = FindEvent(pHeap, pBadAddr); + LoaderHeapEvent *pBadAddrEvent = LoaderHeapSniffer::FindEvent(pHeap, pBadAddr); message.AppendASCII("*** CALL TRACING ENABLED ***\n\n"); @@ -2154,7 +1183,7 @@ void LoaderHeapSniffer::ValidateFreeList(UnlockedLoaderHeap *pHeap) message.AppendASCII("\nNo known owner of last corrupted address.\n"); } - LoaderHeapEvent *pPrevEvent = FindEvent(pHeap, ((BYTE*)pProbeThis) - 1); + LoaderHeapEvent *pPrevEvent = LoaderHeapSniffer::FindEvent(pHeap, ((BYTE*)pProbeThis) - 1); int count = 3; while (count-- && @@ -2162,7 +1191,7 @@ void LoaderHeapSniffer::ValidateFreeList(UnlockedLoaderHeap *pHeap) ( ((UINT_PTR)pProbeThis) - ((UINT_PTR)(pPrevEvent->m_pMem)) + pPrevEvent->m_dwSize ) < 1024) { message.AppendASCII("\nThis block is located close to the corruption point. "); - if (!pHeap->IsInterleaved() && pPrevEvent->QuietValidate()) + if (pPrevEvent->QuietValidate()) { message.AppendASCII("If it was overrun, it might have caused this."); } @@ -2171,200 +1200,20 @@ void LoaderHeapSniffer::ValidateFreeList(UnlockedLoaderHeap *pHeap) message.AppendASCII("*** CORRUPTION DETECTED IN THIS BLOCK ***"); } pPrevEvent->Describe(&message); - pPrevEvent = FindEvent(pHeap, ((BYTE*)(pPrevEvent->m_pMem)) - 1); + pPrevEvent = LoaderHeapSniffer::FindEvent(pHeap, ((BYTE*)(pPrevEvent->m_pMem)) - 1); } - - } DbgAssertDialog(__FILE__, __LINE__, (char*) message.GetUTF8()); - } - - - } - - -BOOL LoaderHeapEvent::QuietValidate() +/*static*/ void UnlockedLoaderHeap::WeGotAFaultNowWhat(UnlockedLoaderHeap *pHeap) { WRAPPER_NO_CONTRACT; + ValidateFreeList(pHeap); - if (m_allocationType == kAllocMem) - { - LoaderHeapValidationTag *pTag = AllocMem_GetTag(m_pMem, m_dwRequestedSize); - return (pTag->m_allocationType == m_allocationType && pTag->m_dwRequestedSize == m_dwRequestedSize); - } - else - { - // We can't easily validate freed blocks. - return TRUE; - } -} - - -#endif //_DEBUG - -#ifndef DACCESS_COMPILE - -AllocMemTracker::AllocMemTracker() -{ - CONTRACTL - { - NOTHROW; - FORBID_FAULT; - CANNOT_TAKE_LOCK; - } - CONTRACTL_END - - m_FirstBlock.m_pNext = NULL; - m_FirstBlock.m_nextFree = 0; - m_pFirstBlock = &m_FirstBlock; - - m_fReleased = FALSE; -} - -AllocMemTracker::~AllocMemTracker() -{ - CONTRACTL - { - NOTHROW; - FORBID_FAULT; - } - CONTRACTL_END - - if (!m_fReleased) - { - AllocMemTrackerBlock *pBlock = m_pFirstBlock; - while (pBlock) - { - // Do the loop in reverse - loaderheaps work best if - // we allocate and backout in LIFO order. - for (int i = pBlock->m_nextFree - 1; i >= 0; i--) - { - AllocMemTrackerNode *pNode = &(pBlock->m_Node[i]); - pNode->m_pHeap->RealBackoutMem(pNode->m_pMem - ,pNode->m_dwRequestedSize -#ifdef _DEBUG - ,__FILE__ - ,__LINE__ - ,pNode->m_szAllocFile - ,pNode->m_allocLineNum -#endif - ); - - } - - pBlock = pBlock->m_pNext; - } - } - -// We have seen evidence of memory corruption in this data structure. -// https://github.com/dotnet/runtime/issues/54469 -// m_pFirstBlock is intended to be a linked list terminating with -// &m_FirstBlock but we are finding a nullptr in the list before -// that point. In order to investigate further we need to observe -// the corrupted memory block(s) before they are deleted below -#ifdef _DEBUG - AllocMemTrackerBlock* pDebugBlock = m_pFirstBlock; - for (int i = 0; pDebugBlock != &m_FirstBlock; i++) - { - CONSISTENCY_CHECK_MSGF(i < 10000, ("Linked list is much longer than expected, memory corruption likely\n")); - CONSISTENCY_CHECK_MSGF(pDebugBlock != nullptr, ("Linked list pointer == NULL, memory corruption likely\n")); - pDebugBlock = pDebugBlock->m_pNext; - } -#endif - - AllocMemTrackerBlock *pBlock = m_pFirstBlock; - while (pBlock != &m_FirstBlock) - { - AllocMemTrackerBlock *pNext = pBlock->m_pNext; - delete pBlock; - pBlock = pNext; - } - - INDEBUG(memset(this, 0xcc, sizeof(*this));) -} - -void *AllocMemTracker::Track(TaggedMemAllocPtr tmap) -{ - CONTRACTL - { - THROWS; - INJECT_FAULT(ThrowOutOfMemory();); - } - CONTRACTL_END - - void *pv = Track_NoThrow(tmap); - if (!pv) - { - ThrowOutOfMemory(); - } - return pv; -} - -void *AllocMemTracker::Track_NoThrow(TaggedMemAllocPtr tmap) -{ - CONTRACTL - { - NOTHROW; - INJECT_FAULT(return NULL;); - } - CONTRACTL_END - - // Calling Track() after calling SuppressRelease() is almost certainly a bug. You're supposed to call SuppressRelease() only after you're - // sure no subsequent failure will force you to backout the memory. - _ASSERTE( (!m_fReleased) && "You've already called SuppressRelease on this AllocMemTracker which implies you've passed your point of no failure. Why are you still doing allocations?"); - - - if (tmap.m_pMem != NULL) - { - AllocMemHolder holder(tmap); // If anything goes wrong in here, this holder will backout the allocation for the caller. - if (m_fReleased) - { - holder.SuppressRelease(); - } - AllocMemTrackerBlock *pBlock = m_pFirstBlock; - if (pBlock->m_nextFree == kAllocMemTrackerBlockSize) - { - AllocMemTrackerBlock *pNewBlock = new (nothrow) AllocMemTrackerBlock; - if (!pNewBlock) - { - return NULL; - } - - pNewBlock->m_pNext = m_pFirstBlock; - pNewBlock->m_nextFree = 0; - - m_pFirstBlock = pNewBlock; + //If none of the above popped up an assert, pop up a generic one. + _ASSERTE(!("Unexpected AV inside LoaderHeap. The usual reason is that someone overwrote the end of a block or wrote into a freed block.\n")); - pBlock = pNewBlock; - } - - // From here on, we can't fail - pBlock->m_Node[pBlock->m_nextFree].m_pHeap = tmap.m_pHeap; - pBlock->m_Node[pBlock->m_nextFree].m_pMem = tmap.m_pMem; - pBlock->m_Node[pBlock->m_nextFree].m_dwRequestedSize = tmap.m_dwRequestedSize; -#ifdef _DEBUG - pBlock->m_Node[pBlock->m_nextFree].m_szAllocFile = tmap.m_szFile; - pBlock->m_Node[pBlock->m_nextFree].m_allocLineNum = tmap.m_lineNum; -#endif - - pBlock->m_nextFree++; - - holder.SuppressRelease(); - - - } - return (void *)tmap; -} - - -void AllocMemTracker::SuppressRelease() -{ - LIMITED_METHOD_CONTRACT; - - m_fReleased = TRUE; } - -#endif //#ifndef DACCESS_COMPILE +#endif // _DEBUG \ No newline at end of file diff --git a/src/coreclr/utilcode/loaderheap_shared.cpp b/src/coreclr/utilcode/loaderheap_shared.cpp new file mode 100644 index 000000000000..d286db2568c1 --- /dev/null +++ b/src/coreclr/utilcode/loaderheap_shared.cpp @@ -0,0 +1,430 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "stdafx.h" // Precompiled header key. +#include "loaderheap.h" +#include "loaderheap_shared.h" +#include "ex.h" +#include "pedecoder.h" + +#ifdef RANDOMIZE_ALLOC +RandomForLoaderHeap s_randomForLoaderHeap; +#endif + +#ifndef DACCESS_COMPILE +INDEBUG(DWORD UnlockedLoaderHeapBase::s_dwNumInstancesOfLoaderHeaps = 0;) + +UnlockedLoaderHeapBase::UnlockedLoaderHeapBase(LoaderHeapImplementationKind kind) : + m_kind(kind), + m_dwTotalAlloc(0), + m_pAllocPtr(NULL), + m_pPtrToEndOfCommittedRegion(NULL) +#ifdef _DEBUG + , + m_dwDebugFlags(LoaderHeapSniffer::InitDebugFlags()), + m_pEventList(NULL), + m_dwDebugWastedBytes(0) +#endif // _DEBUG +{ + LIMITED_METHOD_CONTRACT; +#ifdef _DEBUG + s_dwNumInstancesOfLoaderHeaps++; +#endif +} + +UnlockedLoaderHeapBase::~UnlockedLoaderHeapBase() +{ + CONTRACTL + { + DESTRUCTOR_CHECK; + NOTHROW; + FORBID_FAULT; + } + CONTRACTL_END + + INDEBUG(s_dwNumInstancesOfLoaderHeaps --;) +} + + +void ReleaseReservedMemory(BYTE* value) +{ + if (value) + { + ExecutableAllocator::Instance()->Release(value); + } +} +#endif // DACCESS_COMPILE + +#ifdef _DEBUG +void LoaderHeapEvent::Describe(SString *pSString) +{ + CONTRACTL + { + INSTANCE_CHECK; + DISABLED(NOTHROW); + GC_NOTRIGGER; + } + CONTRACTL_END + + pSString->AppendASCII("\n"); + + { + StackSString buf; + if (m_allocationType == kFreedMem) + { + buf.Printf(" Freed at: %s (line %d)\n", m_szFile, m_lineNum); + buf.Printf(" (block originally allocated at %s (line %d)\n", m_szAllocFile, m_allocLineNum); + } + else + { + buf.Printf(" Allocated at: %s (line %d)\n", m_szFile, m_lineNum); + } + pSString->Append(buf); + } + + if (!QuietValidate()) + { + pSString->AppendASCII(" *** THIS BLOCK HAS BEEN CORRUPTED ***\n"); + } + + + + { + StackSString buf; + buf.Printf(" Type: "); + switch (m_allocationType) + { + case kAllocMem: + buf.AppendASCII("AllocMem()\n"); + break; + case kFreedMem: + buf.AppendASCII("Free\n"); + break; + default: + break; + } + pSString->Append(buf); + } + + + { + StackSString buf; + buf.Printf(" Start of block: 0x%p\n", m_pMem); + pSString->Append(buf); + } + + { + StackSString buf; + buf.Printf(" End of block: 0x%p\n", ((BYTE*)m_pMem) + m_dwSize - 1); + pSString->Append(buf); + } + + { + StackSString buf; + buf.Printf(" Requested size: %lu (0x%lx)\n", (ULONG)m_dwRequestedSize, (ULONG)m_dwRequestedSize); + pSString->Append(buf); + } + + { + StackSString buf; + buf.Printf(" Actual size: %lu (0x%lx)\n", (ULONG)m_dwSize, (ULONG)m_dwSize); + pSString->Append(buf); + } + + pSString->AppendASCII("\n"); +} + +BOOL LoaderHeapEvent::QuietValidate() +{ + WRAPPER_NO_CONTRACT; + + if (m_allocationType == kAllocMem) + { + LoaderHeapValidationTag *pTag = AllocMem_GetTag(m_pMem, m_dwRequestedSize); + return (pTag->m_allocationType == m_allocationType && pTag->m_dwRequestedSize == m_dwRequestedSize); + } + else + { + // We can't easily validate freed blocks. + return TRUE; + } +} + +/*static*/ DWORD LoaderHeapSniffer::InitDebugFlags() +{ + WRAPPER_NO_CONTRACT; + + DWORD dwDebugFlags = 0; + if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_LoaderHeapCallTracing)) + { + dwDebugFlags |= UnlockedLoaderHeap::kCallTracing; + } + return dwDebugFlags; +} + +/*static*/ VOID LoaderHeapSniffer::RecordEvent(UnlockedLoaderHeapBase *pHeap, + AllocationType allocationType, + _In_ const char *szFile, + int lineNum, + _In_ const char *szAllocFile, + int allocLineNum, + void *pMem, + size_t dwRequestedSize, + size_t dwSize + ) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + FORBID_FAULT; //If we OOM in here, we just throw the event away. + } + CONTRACTL_END + + LoaderHeapEvent *pNewEvent; + { + { + FAULT_NOT_FATAL(); + pNewEvent = new (nothrow) LoaderHeapEvent; + } + if (!pNewEvent) + { + if (!(pHeap->m_dwDebugFlags & pHeap->kEncounteredOOM)) + { + pHeap->m_dwDebugFlags |= pHeap->kEncounteredOOM; + _ASSERTE(!"LOADERHEAPSNIFFER: Failed allocation of LoaderHeapEvent. Call tracing information will be incomplete."); + } + } + else + { + pNewEvent->m_allocationType = allocationType; + pNewEvent->m_szFile = szFile; + pNewEvent->m_lineNum = lineNum; + pNewEvent->m_szAllocFile = szAllocFile; + pNewEvent->m_allocLineNum = allocLineNum; + pNewEvent->m_pMem = pMem; + pNewEvent->m_dwRequestedSize = dwRequestedSize; + pNewEvent->m_dwSize = dwSize; + + pNewEvent->m_pNext = pHeap->m_pEventList; + pHeap->m_pEventList = pNewEvent; + } + } +} + +/*static*/ VOID LoaderHeapSniffer::ClearEvents(UnlockedLoaderHeapBase *pHeap) +{ + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_FORBID_FAULT; + + LoaderHeapEvent *pEvent = pHeap->m_pEventList; + while (pEvent) + { + LoaderHeapEvent *pNext = pEvent->m_pNext; + delete pEvent; + pEvent = pNext; + } + pHeap->m_pEventList = NULL; +} + +/*static*/ VOID LoaderHeapSniffer::CompactEvents(UnlockedLoaderHeapBase *pHeap) +{ + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_FORBID_FAULT; + + LoaderHeapEvent **ppEvent = &(pHeap->m_pEventList); + while (*ppEvent) + { + LoaderHeapEvent *pEvent = *ppEvent; + if (pEvent->m_allocationType != kFreedMem) + { + ppEvent = &(pEvent->m_pNext); + } + else + { + LoaderHeapEvent **ppWalk = &(pEvent->m_pNext); + BOOL fMatchFound = FALSE; + while (*ppWalk && !fMatchFound) + { + LoaderHeapEvent *pWalk = *ppWalk; + if (pWalk->m_allocationType != kFreedMem && + pWalk->m_pMem == pEvent->m_pMem && + pWalk->m_dwRequestedSize == pEvent->m_dwRequestedSize) + { + // Delete matched pairs + + // Order is important here - updating *ppWalk may change pEvent->m_pNext, and we want + // to get the updated value when we unlink pEvent. + *ppWalk = pWalk->m_pNext; + *ppEvent = pEvent->m_pNext; + + delete pEvent; + delete pWalk; + fMatchFound = TRUE; + } + else + { + ppWalk = &(pWalk->m_pNext); + } + } + + if (!fMatchFound) + { + ppEvent = &(pEvent->m_pNext); + } + } + } +} + +/*static*/ VOID LoaderHeapSniffer::PrintEvents(UnlockedLoaderHeapBase *pHeap) +{ + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_FORBID_FAULT; + + printf("\n------------- LoaderHeapEvents (in reverse time order!) --------------------"); + + LoaderHeapEvent *pEvent = pHeap->m_pEventList; + while (pEvent) + { + printf("\n"); + switch (pEvent->m_allocationType) + { + case kAllocMem: printf("AllocMem "); break; + case kFreedMem: printf("BackoutMem "); break; + + } + printf(" ptr = 0x%-8p", pEvent->m_pMem); + printf(" rqsize = 0x%-8x", (DWORD)pEvent->m_dwRequestedSize); + printf(" actsize = 0x%-8x", (DWORD)pEvent->m_dwSize); + printf(" (at %s@%d)", pEvent->m_szFile, pEvent->m_lineNum); + if (pEvent->m_allocationType == kFreedMem) + { + printf(" (original allocation at %s@%d)", pEvent->m_szAllocFile, pEvent->m_allocLineNum); + } + + pEvent = pEvent->m_pNext; + + } + printf("\n------------- End of LoaderHeapEvents --------------------------------------"); + printf("\n"); + +} + + +/*static*/ VOID LoaderHeapSniffer::PitchSniffer(SString *pSString) +{ + WRAPPER_NO_CONTRACT; + pSString->AppendASCII("\n" + "\nBecause call-tracing wasn't turned on, we couldn't provide details about who last owned the affected memory block. To get more precise diagnostics," + "\nset the following environment variable:" + "\n" + "\n DOTNET_LoaderHeapCallTracing=1" + "\n" + "\nand rerun the scenario that crashed." + "\n" + "\n"); +} + +/*static*/ LoaderHeapEvent *LoaderHeapSniffer::FindEvent(UnlockedLoaderHeapBase *pHeap, void *pAddr) +{ + LIMITED_METHOD_CONTRACT; + + LoaderHeapEvent *pEvent = pHeap->m_pEventList; + while (pEvent) + { + if (pAddr >= pEvent->m_pMem && pAddr <= ( ((BYTE*)pEvent->m_pMem) + pEvent->m_dwSize - 1)) + { + return pEvent; + } + pEvent = pEvent->m_pNext; + } + return NULL; + +} + +LoaderHeapValidationTag *AllocMem_GetTag(LPVOID pBlock, size_t dwRequestedSize) +{ + LIMITED_METHOD_CONTRACT; + + size_t dwSize = dwRequestedSize; + dwSize += LOADER_HEAP_DEBUG_BOUNDARY; + dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); + return (LoaderHeapValidationTag *)( ((BYTE*)pBlock) + dwSize ); +} + +#endif // _DEBUG + +#ifndef DACCESS_COMPILE +size_t UnlockedLoaderHeapBase::GetBytesAvailCommittedRegion() +{ + LIMITED_METHOD_CONTRACT; + + if (m_pAllocPtr < m_pPtrToEndOfCommittedRegion) + return (size_t)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr); + else + return 0; +} +#endif // DACCESS_COMPILE + +#ifdef DACCESS_COMPILE + +void UnlockedLoaderHeapBaseTraversable::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) +{ + WRAPPER_NO_CONTRACT; + + PTR_LoaderHeapBlock block = m_pFirstBlock; + while (block.IsValid()) + { + // All we know is the virtual size of this block. We don't have any way to tell how + // much of this space was actually comitted, so don't expect that this will always + // succeed. + // @dbgtodo : Ideally we'd reduce the risk of corruption causing problems here. + // We could extend LoaderHeapBlock to track a commit size, + // but it seems wasteful + TADDR addr = dac_cast(block->pVirtualAddress); + TSIZE_T size = block->dwVirtualSize; + EMEM_OUT(("MEM: UnlockedLoaderHeap %p - %p\n", addr, addr + size)); + DacEnumMemoryRegion(addr, size, false); + + block = block->pNext; + } +} + +void UnlockedLoaderHeapBaseTraversable::EnumPageRegions (EnumPageRegionsCallback *pCallback, PTR_VOID pvArgs) +{ + WRAPPER_NO_CONTRACT; + + PTR_LoaderHeapBlock block = m_pFirstBlock; + while (block) + { + if ((*pCallback)(pvArgs, block->pVirtualAddress, block->dwVirtualSize)) + { + break; + } + + block = block->pNext; + } +} +#endif // #ifdef DACCESS_COMPILE + +#ifdef _DEBUG + +void UnlockedLoaderHeapBase::UnlockedClearEvents() +{ + WRAPPER_NO_CONTRACT; + LoaderHeapSniffer::ClearEvents(this); +} + +void UnlockedLoaderHeapBase::UnlockedCompactEvents() +{ + WRAPPER_NO_CONTRACT; + LoaderHeapSniffer::CompactEvents(this); +} + +void UnlockedLoaderHeapBase::UnlockedPrintEvents() +{ + WRAPPER_NO_CONTRACT; + LoaderHeapSniffer::PrintEvents(this); +} + +#endif //_DEBUG diff --git a/src/coreclr/utilcode/loaderheap_shared.h b/src/coreclr/utilcode/loaderheap_shared.h new file mode 100644 index 000000000000..7735856f2056 --- /dev/null +++ b/src/coreclr/utilcode/loaderheap_shared.h @@ -0,0 +1,141 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef LOADERHEAP_SHARED +#define LOADERHEAP_SHARED + +void ReleaseReservedMemory(BYTE* value); +using ReservedMemoryHolder = SpecializedWrapper; + +#ifdef RANDOMIZE_ALLOC +#include +static class RandomForLoaderHeap +{ +public: + Random() { seed = (unsigned int)time(NULL); } + unsigned int Next() + { + return ((seed = seed * 214013L + 2531011L) >> 16) & 0x7fff; + } +private: + unsigned int seed; +}; + +extern RandomForLoaderHeap s_randomForLoaderHeap; +#endif + + + +//===================================================================================== +// In DEBUG builds only, we tag live blocks with the requested size and the type of +// allocation (AllocMem, AllocAlignedMem, AllocateOntoReservedMem). This is strictly +// to validate that those who call Backout* are passing in the right values. +// +// For simplicity, we'll use one LoaderHeapValidationTag structure for all types even +// though not all fields are applicable to all types. +//===================================================================================== +#ifdef _DEBUG +enum AllocationType +{ + kAllocMem = 1, + kFreedMem = 4, +}; + +struct LoaderHeapValidationTag +{ + size_t m_dwRequestedSize; // What the caller requested (not what was actually allocated) + AllocationType m_allocationType; // Which api allocated this block. + const char * m_szFile; // Who allocated me + int m_lineNum; // Who allocated me + +}; +#endif //_DEBUG + +//===================================================================================== +// These classes do detailed loaderheap sniffing to help in debugging heap crashes +//===================================================================================== +#ifdef _DEBUG + +// This structure logs the results of an Alloc or Free call. They are stored in reverse time order +// with UnlockedLoaderHeap::m_pEventList pointing to the most recent event. +struct LoaderHeapEvent +{ + LoaderHeapEvent *m_pNext; + AllocationType m_allocationType; //Which api was called + const char *m_szFile; //Caller Id + int m_lineNum; //Caller Id + const char *m_szAllocFile; //(BackoutEvents): Who allocated the block? + int m_allocLineNum; //(BackoutEvents): Who allocated the block? + void *m_pMem; //Starting address of block + size_t m_dwRequestedSize; //Requested size of block + size_t m_dwSize; //Actual size of block (including validation tags, padding, everything) + + + void Describe(SString *pSString); + BOOL QuietValidate(); +}; + + +class LoaderHeapSniffer +{ + public: + static DWORD InitDebugFlags(); + static VOID RecordEvent(UnlockedLoaderHeapBase *pHeap, + AllocationType allocationType, + _In_ const char *szFile, + int lineNum, + _In_ const char *szAllocFile, + int allocLineNum, + void *pMem, + size_t dwRequestedSize, + size_t dwSize + ); + static VOID ClearEvents(UnlockedLoaderHeapBase *pHeap); + static VOID CompactEvents(UnlockedLoaderHeapBase *pHeap); + static VOID PrintEvents(UnlockedLoaderHeapBase *pHeap); + static VOID PitchSniffer(SString *pSString); + static LoaderHeapEvent *FindEvent(UnlockedLoaderHeapBase *pHeap, void *pAddr); +}; + +LoaderHeapValidationTag *AllocMem_GetTag(LPVOID pBlock, size_t dwRequestedSize); + +#endif // _DEBUG + +//===================================================================================== +// This freelist implementation is a first cut and probably needs to be tuned. +// It should be tuned with the following assumptions: +// +// - Freeing LoaderHeap memory is done primarily for OOM backout. LoaderHeaps +// weren't designed to be general purpose heaps and shouldn't be used that way. +// +// - And hence, when memory is freed, expect it to be freed in large clumps and in a +// LIFO order. Since the LoaderHeap normally hands out memory with sequentially +// increasing addresses, blocks will typically be freed with sequentially decreasing +// addresses. +// +// The first cut of the freelist is a single-linked list of free blocks using first-fit. +// Assuming the above alloc-free pattern holds, the list will end up mostly sorted +// in increasing address order. When a block is freed, we'll attempt to coalesce it +// with the first block in the list. We could also choose to be more aggressive about +// sorting and coalescing but this should probably catch most cases in practice. +//===================================================================================== + +// When a block is freed, we place this structure on the first bytes of the freed block (Allocations +// are bumped in size if necessary to make sure there's room.) +struct LoaderHeapFreeBlock +{ + public: + LoaderHeapFreeBlock *m_pNext; // Pointer to next block on free list + size_t m_dwSize; // Total size of this block + void *m_pBlockAddress; // Virtual address of the block + +#ifndef DACCESS_COMPILE + static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap); + static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, UnlockedLoaderHeap *pHeap); + private: + // Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened. + static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap); +#endif // DACCESS_COMPILE +}; + +#endif // LOADERHEAP_SHARED \ No newline at end of file diff --git a/src/coreclr/utilcode/log.cpp b/src/coreclr/utilcode/log.cpp index 56ddf6aef9c2..e56255f83629 100644 --- a/src/coreclr/utilcode/log.cpp +++ b/src/coreclr/utilcode/log.cpp @@ -260,7 +260,6 @@ bool Logging2On(DWORD facility2, DWORD level) { // VOID LogSpewValist(DWORD facility, DWORD level, const char *fmt, va_list args) { - SCAN_IGNORE_FAULT; // calls to new (nothrow) in logging code are OK STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -275,7 +274,6 @@ VOID LogSpewValist(DWORD facility, DWORD level, const char *fmt, va_list args) VOID LogSpew2Valist(DWORD facility2, DWORD level, const char *fmt, va_list args) { - SCAN_IGNORE_FAULT; // calls to new (nothrow) in logging code are OK STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -290,7 +288,6 @@ VOID LogSpew2Valist(DWORD facility2, DWORD level, const char *fmt, va_list args) VOID LogSpewAlwaysValist(const char *fmt, va_list args) { - SCAN_IGNORE_FAULT; // calls to new (nothrow) in logging code are OK STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -372,10 +369,9 @@ VOID LogSpewAlwaysValist(const char *fmt, va_list args) if (LogFlags & LOG_ENABLE_CONSOLE_LOGGING) { - WriteFile(GetStdHandle(STD_OUTPUT_HANDLE), pBuffer, buflen, &written, 0); - //@TODO ...Unnecessary to flush console? + minipal_log_write_info(pBuffer); if (LogFlags & LOG_ENABLE_FLUSH_FILE) - FlushFileBuffers( GetStdHandle(STD_OUTPUT_HANDLE) ); + minipal_log_sync_info(); } if (LogFlags & LOG_ENABLE_DEBUGGER_LOGGING) @@ -415,6 +411,5 @@ VOID LogSpewAlways (const char *fmt, ... ) LogSpewValist (LF_ALWAYS, LL_ALWAYS, fmt, args); va_end(args); } - #endif // LOGGING diff --git a/src/coreclr/utilcode/longfilepathwrappers.cpp b/src/coreclr/utilcode/longfilepathwrappers.cpp index 09e3118fc8fe..2ea72de53838 100644 --- a/src/coreclr/utilcode/longfilepathwrappers.cpp +++ b/src/coreclr/utilcode/longfilepathwrappers.cpp @@ -342,53 +342,6 @@ CreateFileWrapper( return ret; } -BOOL -GetFileAttributesExWrapper( - _In_ LPCWSTR lpFileName, - _In_ GET_FILEEX_INFO_LEVELS fInfoLevelId, - _Out_writes_bytes_(sizeof(WIN32_FILE_ATTRIBUTE_DATA)) LPVOID lpFileInformation - ) -{ - CONTRACTL - { - NOTHROW; - } - CONTRACTL_END; - - HRESULT hr = S_OK; - BOOL ret = FALSE; - DWORD lastError = 0; - - EX_TRY - { - LongPathString path(LongPathString::Literal, lpFileName); - - if (SUCCEEDED(LongFile::NormalizePath(path))) - { - ret = GetFileAttributesExW( - path.GetUnicode(), - fInfoLevelId, - lpFileInformation - ); - - } - - lastError = GetLastError(); - } - EX_CATCH_HRESULT(hr); - - if (hr != S_OK ) - { - SetLastError(hr); - } - else if(ret == FALSE) - { - SetLastError(lastError); - } - - return ret; -} - BOOL CopyFileExWrapper( _In_ LPCWSTR lpExistingFileName, diff --git a/src/coreclr/utilcode/md5.cpp b/src/coreclr/utilcode/md5.cpp index 1a68e66455e7..e8e60c452cdb 100644 --- a/src/coreclr/utilcode/md5.cpp +++ b/src/coreclr/utilcode/md5.cpp @@ -61,17 +61,7 @@ void MD5::HashMore(const void* pvInput, ULONG cbInput) // Hash the now-full buffer MD5Transform(m_state, (ULONG*)&m_data[0]); -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:22019) // Suppress this OACR warning 22019: - // 'cbInput-=cbRemaining' may be greater than 'cbInput'. This can be caused by integer underflow. - // This could yield an incorrect loop index 'cbInput>=64' - // We only enter the else clause here if cbInput >= cbRemaining -#endif cbInput -= cbRemaining; -#ifdef _PREFAST_ -#pragma warning(pop) -#endif pbInput += cbRemaining; // Hash the data in 64-byte runs, starting just after what we've copied diff --git a/src/coreclr/utilcode/memorypool.cpp b/src/coreclr/utilcode/memorypool.cpp index ec2dcd24c85c..d5b21a101735 100644 --- a/src/coreclr/utilcode/memorypool.cpp +++ b/src/coreclr/utilcode/memorypool.cpp @@ -230,7 +230,7 @@ void *MemoryPool::AllocateElementNoThrow() } // if we come there means that addblock succeeded and m_freelist isn't null anymore - PREFIX_ASSUME(m_freeList!= NULL); + _ASSERTE(m_freeList!= NULL); m_freeList = m_freeList->next; return element; diff --git a/src/coreclr/utilcode/pedecoder.cpp b/src/coreclr/utilcode/pedecoder.cpp index 47f67b7a26f7..25fa70691906 100644 --- a/src/coreclr/utilcode/pedecoder.cpp +++ b/src/coreclr/utilcode/pedecoder.cpp @@ -1274,7 +1274,7 @@ const void *PEDecoder::GetResource(COUNT_T offset, COUNT_T *pSize) const void * resourceBlob = (void *)GetRvaData(VAL32(pDir->VirtualAddress) + offset); // Holds if CheckResource(offset) == TRUE - PREFIX_ASSUME(resourceBlob != NULL); + _ASSERTE(resourceBlob != NULL); if (pSize != NULL) *pSize = GET_UNALIGNED_VAL32(resourceBlob); @@ -1461,7 +1461,7 @@ CHECK PEDecoder::CheckILOnlyImportDlls() const // Get the import directory entry PIMAGE_DATA_DIRECTORY pDirEntryImport = GetDirectoryEntry(IMAGE_DIRECTORY_ENTRY_IMPORT); CHECK(pDirEntryImport != NULL); - PREFIX_ASSUME(pDirEntryImport != NULL); + _ASSERTE(pDirEntryImport != NULL); // There should be space for 2 entries. (mscoree and NULL) CHECK(VAL32(pDirEntryImport->Size) >= (2 * sizeof(IMAGE_IMPORT_DESCRIPTOR))); @@ -1469,7 +1469,7 @@ CHECK PEDecoder::CheckILOnlyImportDlls() const // Get the import data PIMAGE_IMPORT_DESCRIPTOR pID = (PIMAGE_IMPORT_DESCRIPTOR) GetDirectoryData(pDirEntryImport); CHECK(pID != NULL); - PREFIX_ASSUME(pID != NULL); + _ASSERTE(pID != NULL); // Entry 0: ILT, Name, IAT must be be non-null. Forwarder, DateTime should be NULL. CHECK( IMAGE_IMPORT_DESC_FIELD(pID[0], Characteristics) != 0 @@ -1631,7 +1631,7 @@ CHECK PEDecoder::CheckILOnlyEntryPoint() const static const BYTE s_DllOrExeMain[] = JMP_DWORD_PTR_DS_OPCODE; // 403570: prefix complained about stub being possibly NULL. - // Unsure here. PREFIX_ASSUME might be also correct as indices are + // Unsure here. _ASSERTE might be also correct as indices are // verified in the above CHECK statement. CHECK(stub != NULL); CHECK(memcmp(stub, s_DllOrExeMain, JMP_DWORD_PTR_DS_OPCODE_SIZE) == 0); @@ -2499,7 +2499,6 @@ BOOL PEDecoder::ForceRelocForDLL(LPCWSTR lpFileName) { #ifdef _DEBUG STATIC_CONTRACT_NOTHROW; \ - ANNOTATION_DEBUG_ONLY; \ STATIC_CONTRACT_CANNOT_TAKE_LOCK; #endif diff --git a/src/coreclr/utilcode/posterror.cpp b/src/coreclr/utilcode/posterror.cpp index c2a959a9a438..c4d70859b564 100644 --- a/src/coreclr/utilcode/posterror.cpp +++ b/src/coreclr/utilcode/posterror.cpp @@ -200,25 +200,12 @@ HRESULT FillErrorInfo( // Return status. if (FAILED(hr = pICreateErr->SetDescription((LPWSTR) szMsg))) goto Exit1; - // suppress PreFast warning about passing literal string to non-const API. - // This API (ICreateErrorInfo::SetHelpFile) is documented to take a const argument, but - // we can't put const in the signature because it would break existing implementors of - // the API. -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma warning(disable:6298) -#endif - // Set the help file and help context. //@todo: we don't have a help file yet. if (FAILED(hr = pICreateErr->SetHelpFile(const_cast(W("complib.hlp")))) || FAILED(hr = pICreateErr->SetHelpContext(dwHelpContext))) goto Exit1; -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - // Get the IErrorInfo pointer. if (FAILED(hr = pICreateErr->QueryInterface(IID_IErrorInfo, (PVOID *) &pIErrInfo))) goto Exit1; diff --git a/src/coreclr/utilcode/prettyprintsig.cpp b/src/coreclr/utilcode/prettyprintsig.cpp index 31f6a93c4a19..34c862a636f0 100644 --- a/src/coreclr/utilcode/prettyprintsig.cpp +++ b/src/coreclr/utilcode/prettyprintsig.cpp @@ -304,7 +304,7 @@ static PCCOR_SIGNATURE PrettyPrintType( { typePtr = PrettyPrintType(typePtr, (typeEnd - typePtr), out, pIMDI); unsigned rank = CorSigUncompressData(typePtr); - PREFIX_ASSUME(rank <= 0xffffff); + _ASSERTE(rank <= 0xffffff); // TODO what is the syntax for the rank 0 case? if (rank == 0) @@ -554,11 +554,6 @@ static HRESULT PrettyPrintClass( CQuickBytes *out, // where to put the pretty printed string IMDInternalImport *pIMDI); // ptr to IMDInternal class with ComSig - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif //***************************************************************************** //***************************************************************************** // pretty prints 'type' to the buffer 'out' returns a pointer to the next type, @@ -674,7 +669,7 @@ static HRESULT PrettyPrintTypeA( sprintf_s(tempBuffer, 64, "pMT: %p", pMT); IfFailGo(appendStrA(out, tempBuffer)); break; - + case ELEMENT_TYPE_CMOD_INTERNAL: { bool required = *typePtr++ != 0; @@ -724,7 +719,7 @@ static HRESULT PrettyPrintTypeA( { IfFailGo(PrettyPrintTypeA(typePtr, (typeEnd - typePtr), out, pIMDI)); unsigned rank = CorSigUncompressData(typePtr); - PREFIX_ASSUME(rank <= 0xffffff); + _ASSERTE(rank <= 0xffffff); // TODO what is the syntax for the rank 0 case? if (rank == 0) { @@ -840,9 +835,6 @@ static HRESULT PrettyPrintTypeA( ErrExit: return hr; } // PrettyPrintTypeA -#ifdef _PREFAST_ -#pragma warning(pop) -#endif // pretty prints the class 'type' to the buffer 'out' static HRESULT PrettyPrintClass( diff --git a/src/coreclr/utilcode/rangelist.cpp b/src/coreclr/utilcode/rangelist.cpp new file mode 100644 index 000000000000..39b4173c18f7 --- /dev/null +++ b/src/coreclr/utilcode/rangelist.cpp @@ -0,0 +1,277 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "stdafx.h" // Precompiled header key. +#include "loaderheap.h" +#include "loaderheap_shared.h" +#include "ex.h" +#include "pedecoder.h" +#define DONOT_DEFINE_ETW_CALLBACK +#include "eventtracebase.h" + +#ifndef DACCESS_COMPILE + +// +// RangeLists are constructed so they can be searched from multiple +// threads without locking. They do require locking in order to +// be safely modified, though. +// + +RangeList::RangeList() +{ + WRAPPER_NO_CONTRACT; + + InitBlock(&m_starterBlock); + + m_firstEmptyBlock = &m_starterBlock; + m_firstEmptyRange = 0; +} + +RangeList::~RangeList() +{ + LIMITED_METHOD_CONTRACT; + + RangeListBlock *b = m_starterBlock.next; + + while (b != NULL) + { + RangeListBlock *bNext = b->next; + delete b; + b = bNext; + } +} + +void RangeList::InitBlock(RangeListBlock *b) +{ + LIMITED_METHOD_CONTRACT; + + Range *r = b->ranges; + Range *rEnd = r + RANGE_COUNT; + while (r < rEnd) + r++->id = (TADDR)NULL; + + b->next = NULL; +} + +BOOL RangeList::AddRangeWorker(const BYTE *start, const BYTE *end, void *id) +{ + CONTRACTL + { + INSTANCE_CHECK; + NOTHROW; + GC_NOTRIGGER; + INJECT_FAULT(return FALSE;); + } + CONTRACTL_END + + _ASSERTE(id != NULL); + + RangeListBlock *b = m_firstEmptyBlock; + Range *r = b->ranges + m_firstEmptyRange; + Range *rEnd = b->ranges + RANGE_COUNT; + + while (TRUE) + { + while (r < rEnd) + { + if (r->id == (TADDR)NULL) + { + r->start = (TADDR)start; + r->end = (TADDR)end; + r->id = (TADDR)id; + + r++; + + m_firstEmptyBlock = b; + m_firstEmptyRange = r - b->ranges; + + return TRUE; + } + r++; + } + + // + // If there are no more blocks, allocate a + // new one. + // + + if (b->next == NULL) + { + RangeListBlock *newBlock = new (nothrow) RangeListBlock; + + if (newBlock == NULL) + { + m_firstEmptyBlock = b; + m_firstEmptyRange = r - b->ranges; + return FALSE; + } + + InitBlock(newBlock); + + newBlock->next = NULL; + b->next = newBlock; + } + + // + // Next block + // + + b = b->next; + r = b->ranges; + rEnd = r + RANGE_COUNT; + } +} + +void RangeList::RemoveRangesWorker(void *id) +{ + CONTRACTL + { + INSTANCE_CHECK; + NOTHROW; + GC_NOTRIGGER; + FORBID_FAULT; + } + CONTRACTL_END + + RangeListBlock *b = &m_starterBlock; + Range *r = b->ranges; + Range *rEnd = r + RANGE_COUNT; + + // + // Find the first free element, & mark it. + // + + while (TRUE) + { + // + // Clear entries in this block. + // + + while (r < rEnd) + { + if (r->id == (TADDR)id) + { + r->id = (TADDR)NULL; + } + + r++; + } + + // + // If there are no more blocks, we're done. + // + + if (b->next == NULL) + { + m_firstEmptyRange = 0; + m_firstEmptyBlock = &m_starterBlock; + + return; + } + + // + // Next block. + // + + b = b->next; + r = b->ranges; + rEnd = r + RANGE_COUNT; + } +} + +#endif // #ifndef DACCESS_COMPILE + +BOOL RangeList::IsInRangeWorker(TADDR address) +{ + CONTRACTL + { + INSTANCE_CHECK; + NOTHROW; + FORBID_FAULT; + GC_NOTRIGGER; + } + CONTRACTL_END + + SUPPORTS_DAC; + + for (const RangeListBlock* b = &m_starterBlock; b != nullptr; b = b->next) + { + for (const Range r : b->ranges) + { + if (r.id != (TADDR)nullptr && address >= r.start && address < r.end) + return TRUE; + } + } + return FALSE; +} + +#ifdef DACCESS_COMPILE + +void +RangeList::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) +{ + SUPPORTS_DAC; + WRAPPER_NO_CONTRACT; + + // This class is almost always contained in something + // else so there's no enumeration of 'this'. + + RangeListBlock* block = &m_starterBlock; + block->EnumMemoryRegions(flags); + + while (block->next.IsValid()) + { + block->next.EnumMem(); + block = block->next; + + block->EnumMemoryRegions(flags); + } +} + +void +RangeList::RangeListBlock::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) +{ + WRAPPER_NO_CONTRACT; + + Range* range; + TADDR BADFOOD; + TSIZE_T size; + int i; + + // The code below iterates each range stored in the RangeListBlock and + // dumps the memory region represented by each range. + // It is too much memory for a mini-dump, so we just bail out for mini-dumps. + if (flags == CLRDATA_ENUM_MEM_MINI || flags == CLRDATA_ENUM_MEM_TRIAGE) + { + return; + } + + BIT64_ONLY( BADFOOD = 0xbaadf00dbaadf00d; ); + NOT_BIT64( BADFOOD = 0xbaadf00d; ); + + for (i=0; iranges[i]); + if (range->id == (TADDR)NULL || range->start == (TADDR)NULL || range->end == (TADDR)NULL || + // just looking at the lower 4bytes is good enough on WIN64 + range->start == BADFOOD || range->end == BADFOOD) + { + break; + } + + size = range->end - range->start; + _ASSERTE( size < UINT32_MAX ); // ranges should be less than 4gig! + + // We can't be sure this entire range is mapped. For example, the code:StubLinkStubManager + // keeps track of all ranges in the code:LoaderAllocator::m_pStubHeap LoaderHeap, and + // code:LoaderHeap::UnlockedReservePages adds a range for the entire reserved region, instead + // of updating the RangeList when pages are committed. But in that case, the committed region of + // memory will be enumerated by the LoaderHeap anyway, so it's OK if this fails + EMEM_OUT(("MEM: RangeListBlock %p - %p\n", range->start, range->end)); + DacEnumMemoryRegion(range->start, size, false); + } +} + +#endif // #ifdef DACCESS_COMPILE + + diff --git a/src/coreclr/utilcode/sstring.cpp b/src/coreclr/utilcode/sstring.cpp index d500d48bbeb4..e0fb60f256d4 100644 --- a/src/coreclr/utilcode/sstring.cpp +++ b/src/coreclr/utilcode/sstring.cpp @@ -23,10 +23,6 @@ // Have one internal, well-known, literal for the empty string. const BYTE SString::s_EmptyBuffer[2] = { 0 }; -// @todo: these need to be initialized by calling GetACP() - -UINT SString::s_ACP = 0; - #ifndef DACCESS_COMPILE static BYTE s_EmptySpace[sizeof(SString)] = { 0 }; #endif // DACCESS_COMPILE @@ -38,18 +34,14 @@ void SString::Startup() STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; - if (s_ACP == 0) - { - UINT ACP = GetACP(); - #ifndef DACCESS_COMPILE - s_Empty = PTR_SString(new (s_EmptySpace) SString()); - s_Empty->SetNormalized(); -#endif // DACCESS_COMPILE - - MemoryBarrier(); - s_ACP = ACP; + if (s_Empty == NULL) + { + SString* emptyString = new (s_EmptySpace) SString(); + emptyString->SetNormalized(); + s_Empty = PTR_SString(emptyString); } +#endif // DACCESS_COMPILE } CHECK SString::CheckStartup() @@ -690,7 +682,7 @@ void SString::ConvertToUnicode() const { StackSString s; ConvertToUnicode(s); - PREFIX_ASSUME(!s.IsImmutable()); + _ASSERTE(!s.IsImmutable()); (const_cast(this))->Set(s); } } @@ -772,7 +764,7 @@ void SString::ConvertToUTF8() const { StackSString s; ConvertToUTF8(s); - PREFIX_ASSUME(!s.IsImmutable()); + _ASSERTE(!s.IsImmutable()); (const_cast(this))->Set(s); } } diff --git a/src/coreclr/utilcode/stacktrace.cpp b/src/coreclr/utilcode/stacktrace.cpp index 33bf7ed41a69..c0e0263e31b0 100644 --- a/src/coreclr/utilcode/stacktrace.cpp +++ b/src/coreclr/utilcode/stacktrace.cpp @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -//----------------------------------------------------------------------------- - -//----------------------------------------------------------------------------- #include "stdafx.h" @@ -30,7 +27,6 @@ HINSTANCE LoadImageHlp() STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; STATIC_CONTRACT_CANNOT_TAKE_LOCK; - SCAN_IGNORE_FAULT; // Faults from Wsz funcs are handled. return WszLoadLibrary(W("imagehlp.dll"), NULL, 0); } @@ -39,7 +35,6 @@ HINSTANCE LoadDbgHelp() { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; - SCAN_IGNORE_FAULT; // Faults from Wsz funcs are handled. return WszLoadLibrary(W("dbghelp.dll"), NULL, 0); } @@ -276,7 +271,6 @@ LPSTR FillSymbolSearchPathThrows(CQuickBytes &qb) { STATIC_CONTRACT_GC_NOTRIGGER; STATIC_CONTRACT_CANNOT_TAKE_LOCK; - SCAN_IGNORE_FAULT; // Faults from Wsz funcs are handled. #ifndef DACCESS_COMPILE // not allowed to do allocation if current thread suspends EE. @@ -361,7 +355,7 @@ LPSTR FillSymbolSearchPath(CQuickBytes &qb) STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; STATIC_CONTRACT_CANNOT_TAKE_LOCK; - SCAN_IGNORE_FAULT; // Faults from Wsz funcs are handled. + LPSTR retval = NULL; HRESULT hr = S_OK; @@ -896,36 +890,6 @@ CONTEXT * pContext // @parm Context to start the stack trace at; null for curre } #endif // !defined(DACCESS_COMPILE) -/**************************************************************************** -* GetStringFromAddr * -*-------------------* -* Description: -* Returns a string from an address. -****************************************************************************/ -void GetStringFromAddr -( -DWORD_PTR dwAddr, -_Out_writes_(cchMaxAssertStackLevelStringLen) LPSTR szString // Place to put string. - // Buffer must hold at least cchMaxAssertStackLevelStringLen. -) -{ - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_NOTRIGGER; - - LOCAL_ASSERT(szString); - - SYM_INFO si; - FillSymbolInfo(&si, dwAddr); - - sprintf_s(szString, - cchMaxAssertStackLevelStringLen, - "%s! %s + 0x%p (0x%p)", - (si.achModule[0]) ? si.achModule : "", - (si.achSymbol[0]) ? si.achSymbol : "", - (void*)si.dwOffset, - (void*)dwAddr); -} - /**************************************************************************** * MagicDeinit * *-------------* diff --git a/src/coreclr/utilcode/stresslog.cpp b/src/coreclr/utilcode/stresslog.cpp index 8908f30a9e91..e837e86712d7 100644 --- a/src/coreclr/utilcode/stresslog.cpp +++ b/src/coreclr/utilcode/stresslog.cpp @@ -15,6 +15,7 @@ #include "ex.h" #define DONOT_DEFINE_ETW_CALLBACK #include "eventtracebase.h" +#include "minipal/time.h" #if !defined(STRESS_LOG_READONLY) #ifdef HOST_WINDOWS @@ -58,15 +59,11 @@ uint64_t getTimeStamp() { } #else // HOST_X86 -uint64_t getTimeStamp() { +uint64_t getTimeStamp() +{ STATIC_CONTRACT_LEAF; - LARGE_INTEGER ret; - ZeroMemory(&ret, sizeof(LARGE_INTEGER)); - - QueryPerformanceCounter(&ret); - - return ret.QuadPart; + return (uint64_t)minipal_hires_ticks(); } #endif // HOST_X86 @@ -127,10 +124,7 @@ uint64_t getTickFrequency() */ uint64_t getTickFrequency() { - LARGE_INTEGER ret; - ZeroMemory(&ret, sizeof(LARGE_INTEGER)); - QueryPerformanceFrequency(&ret); - return ret.QuadPart; + return (uint64_t)minipal_hires_tick_frequency(); } #endif // HOST_X86 diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp index def2633e41be..dac90c2ad979 100644 --- a/src/coreclr/utilcode/util.cpp +++ b/src/coreclr/utilcode/util.cpp @@ -731,8 +731,8 @@ DWORD LCM(DWORD u, DWORD v) if (m_nGroups > 1) { m_enableGCCPUGroups = TRUE; - m_threadUseAllCpuGroups = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_Thread_UseAllCpuGroups, groupCount > 1) != 0; - m_threadAssignCpuGroups = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_Thread_AssignCpuGroups) != 0; + m_threadUseAllCpuGroups = Configuration::GetKnobBooleanValue(W("System.Threading.Thread.UseAllCpuGroups"), CLRConfig::EXTERNAL_Thread_UseAllCpuGroups, groupCount > 1) != 0; + m_threadAssignCpuGroups = Configuration::GetKnobBooleanValue(W("System.Threading.Thread.AssignCpuGroups"), CLRConfig::EXTERNAL_Thread_AssignCpuGroups) != 0; // Save the processor group affinity of the initial thread GROUP_AFFINITY groupAffinity; @@ -1923,35 +1923,6 @@ HRESULT validateTokenSig( return S_OK; } // validateTokenSig() -HRESULT GetImageRuntimeVersionString(PVOID pMetaData, LPCSTR* pString) -{ - CONTRACTL - { - NOTHROW; - } - CONTRACTL_END; - - _ASSERTE(pString); - STORAGESIGNATURE* pSig = (STORAGESIGNATURE*) pMetaData; - - // Verify the signature. - - // If signature didn't match, you shouldn't be here. - if (pSig->GetSignature() != STORAGE_MAGIC_SIG) - return CLDB_E_FILE_CORRUPT; - - // The version started in version 1.1 - if (pSig->GetMajorVer() < 1) - return CLDB_E_FILE_OLDVER; - - if (pSig->GetMajorVer() == 1 && pSig->GetMinorVer() < 1) - return CLDB_E_FILE_OLDVER; - - // Header data starts after signature. - *pString = (LPCSTR) pSig->pVersion; - return S_OK; -} - //***************************************************************************** // Convert a UTF8 string to Unicode, into a CQuickArray. //***************************************************************************** @@ -2340,7 +2311,7 @@ void PutLoongArch64JIR(UINT32 * pCode, INT64 imm38) UINT32 pcInstr = *pCode; - _ASSERTE(pcInstr == 0x1e00000e); // Must be pcaddu18i R14, 0 + _ASSERTE(pcInstr == 0x1e000010); // Must be pcaddu18i t4, 0 INT64 relOff = imm38 & 0x20000; INT64 imm = imm38 + relOff; @@ -2649,7 +2620,7 @@ namespace Com { STANDARD_VM_CONTRACT; - WCHAR wszClsid[GUID_STR_BUFFER_LEN]; + WCHAR wszClsid[MINIPAL_GUID_BUFFER_LEN]; if (GuidToLPWSTR(rclsid, wszClsid) == 0) return E_UNEXPECTED; diff --git a/src/coreclr/utilcode/util_nodependencies.cpp b/src/coreclr/utilcode/util_nodependencies.cpp index 3a08158b0daa..dbb40efb3555 100644 --- a/src/coreclr/utilcode/util_nodependencies.cpp +++ b/src/coreclr/utilcode/util_nodependencies.cpp @@ -45,7 +45,7 @@ void InitRunningOnVersionStatus () else { // The current platform isn't supported. Display a message to this effect and exit. - fprintf(stderr, "Platform not supported: Windows 7 is the minimum supported version\n"); + minipal_log_print_error("Platform not supported: Windows 7 is the minimum supported version\n"); TerminateProcess(GetCurrentProcess(), NON_SUPPORTED_PLATFORM_TERMINATE_ERROR_CODE); } #endif // HOST_WINDOWS @@ -434,27 +434,6 @@ HRESULT GetDebuggerSettingInfoWorker(_Out_writes_to_opt_(*pcchDebuggerString, *p #endif //!defined(FEATURE_UTILCODE_NO_DEPENDENCIES) || defined(_DEBUG) - -//***************************************************************************** -// Convert a GUID into a pointer to a string -//***************************************************************************** -int -GuidToLPSTR( - REFGUID guid, // The GUID to convert. - _Out_writes_(cchGuid) LPSTR szGuid, // String into which the GUID is stored - DWORD cchGuid) // Count in chars -{ - if (cchGuid < GUID_STR_BUFFER_LEN) - return 0; - - return sprintf_s(szGuid, cchGuid, "{%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}", - guid.Data1, guid.Data2, guid.Data3, - guid.Data4[0], guid.Data4[1], - guid.Data4[2], guid.Data4[3], - guid.Data4[4], guid.Data4[5], - guid.Data4[6], guid.Data4[7]) + 1; -} - //***************************************************************************** // Convert hex value into a wide string of hex digits //***************************************************************************** @@ -508,7 +487,7 @@ GuidToLPWSTR( // successive fields break the GUID into the form DWORD-WORD-WORD-WORD-WORD.DWORD // covering the 128-bit GUID. The string includes enclosing braces, which are an OLE convention. - if (cchGuid < GUID_STR_BUFFER_LEN) + if (cchGuid < MINIPAL_GUID_BUFFER_LEN) return 0; // {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index c6edfe5a6ff5..b9a272765a80 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -1,10 +1,13 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) +set(RUNTIME_DIR ../runtime) + # Needed due to the cmunged files being in the binary folders, the set(CMAKE_INCLUDE_CURRENT_DIR ON) is not enough include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${ARCH_SOURCES_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../interop/inc) include_directories(${CLR_SRC_NATIVE_DIR}) +include_directories(${RUNTIME_DIR}) # needed when zLib compression is used include_directories(${CLR_SRC_NATIVE_DIR}/libs/System.IO.Compression.Native) @@ -13,6 +16,10 @@ include_directories(${CLR_SRC_NATIVE_DIR}/libs/Common) add_definitions(-DUNICODE) add_definitions(-D_UNICODE) +if(CLR_CMAKE_TARGET_ANDROID) + add_definitions(-DFEATURE_EMULATED_TLS) +endif(CLR_CMAKE_TARGET_ANDROID) + if(FEATURE_AUTO_TRACE) add_definitions(-DFEATURE_AUTO_TRACE) endif(FEATURE_AUTO_TRACE) @@ -41,13 +48,19 @@ if(FEATURE_PERFTRACING) include_directories(${CORECLR_USEREVENTS_SHIM_DIR}) endif(FEATURE_PERFTRACING) +add_compile_definitions($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:FEATURE_CACHED_INTERFACE_DISPATCH>) +add_compile_definitions($<${FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH}:FEATURE_VIRTUAL_STUB_DISPATCH>) + +if(CLR_CMAKE_TARGET_ARCH_WASM) + add_compile_definitions(FEATURE_STATICALLY_LINKED) +endif() + set(VM_SOURCES_DAC_AND_WKS_COMMON appdomain.cpp array.cpp assembly.cpp assemblybinder.cpp binder.cpp - bundle.cpp castcache.cpp callcounting.cpp cdacplatformmetadata.cpp @@ -156,6 +169,7 @@ set(VM_HEADERS_DAC_AND_WKS_COMMON codeman.h codeman.inl codeversion.h + conditionalweaktable.h contractimpl.h crst.h debugdebugger.h @@ -269,6 +283,7 @@ endif(FEATURE_JIT_PITCHING) set(VM_SOURCES_DAC ${VM_SOURCES_DAC_AND_WKS_COMMON} + conditionalweaktable.cpp # The usage of conditionalweaktable is only in the DAC, but we put the headers in the VM to enable validation. threaddebugblockinginfo.cpp ) @@ -287,12 +302,17 @@ set(VM_SOURCES_WKS ${VM_SOURCES_DAC_AND_WKS_COMMON} appdomainnative.cpp assemblynative.cpp + assemblyprobeextension.cpp assemblyspec.cpp baseassemblyspec.cpp + bundle.cpp + ${RUNTIME_DIR}/CachedInterfaceDispatch.cpp + CachedInterfaceDispatch_Coreclr.cpp cachelinealloc.cpp callconvbuilder.cpp callhelpers.cpp callsiteinspect.cpp + callstubgenerator.cpp clrconfignative.cpp clrex.cpp clrvarargs.cpp @@ -331,10 +351,14 @@ set(VM_SOURCES_WKS hosting.cpp hostinformation.cpp ilmarshalers.cpp + instancecalli.cpp interopconverter.cpp interoputil.cpp + interpexec.cpp + interpframeallocator.cpp invokeutil.cpp jithelpers.cpp + jitinterfacegen.cpp managedmdimport.cpp marshalnative.cpp methodtablebuilder.cpp @@ -368,6 +392,8 @@ set(VM_SOURCES_WKS threaddebugblockinginfo.cpp threadsuspend.cpp typeparse.cpp + unsafeaccessors.cpp + asyncthunks.cpp weakreferencenative.cpp yieldprocessornormalized.cpp ${VM_SOURCES_GDBJIT} @@ -381,6 +407,7 @@ set(VM_HEADERS_WKS ../inc/jithelpers.h appdomainnative.hpp assemblynative.hpp + ../inc/assemblyprobeextension.h assemblyspec.hpp assemblyspecbase.h baseassemblyspec.h @@ -430,6 +457,9 @@ set(VM_HEADERS_WKS interopconverter.h interoputil.h interoputil.inl + interpexec.h + interpframeallocator.h + callstubgenerator.h invokeutil.h managedmdimport.hpp marshalnative.h @@ -608,7 +638,9 @@ if(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.asm ${ARCH_SOURCES_DIR}/AsmHelpers.asm + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerAMD64.asm ${ARCH_SOURCES_DIR}/ComCallPreStub.asm ${ARCH_SOURCES_DIR}/GenericComCallStubs.asm @@ -621,12 +653,16 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm ${ARCH_SOURCES_DIR}/ThePreStubAMD64.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm ${ARCH_SOURCES_DIR}/Context.asm ${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm ${ARCH_SOURCES_DIR}/UMThunkStub.asm ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm + ${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.asm ) set(VM_HEADERS_WKS_ARCH_ASM @@ -635,35 +671,47 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) elseif(CLR_CMAKE_TARGET_ARCH_I386) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm + ${ARCH_SOURCES_DIR}/AllocSlow.asm ${ARCH_SOURCES_DIR}/asmhelpers.asm - ${ARCH_SOURCES_DIR}/gmsasm.asm + ${ARCH_SOURCES_DIR}/ehhelpers.asm ${ARCH_SOURCES_DIR}/jithelp.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm - ) + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.asm + ) set(VM_HEADERS_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmconstants.h ) elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.asm ${ARCH_SOURCES_DIR}/AsmHelpers.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm - ) + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm + ${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.asm + ) set(VM_HEADERS_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmconstants.h ) endif() + set(ASM_SUFFIX asm) else(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S ${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S ${ARCH_SOURCES_DIR}/getstate.S @@ -674,63 +722,83 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/redirectedhandledjitcase.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/theprestubamd64.S ${ARCH_SOURCES_DIR}/thunktemplates.S ${ARCH_SOURCES_DIR}/Context.S ${ARCH_SOURCES_DIR}/unixasmhelpers.S ${ARCH_SOURCES_DIR}/umthunkstub.S ${ARCH_SOURCES_DIR}/virtualcallstubamd64.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S + ${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.S ) elseif(CLR_CMAKE_TARGET_ARCH_I386) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/ehhelpers.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/jithelp.S - ${ARCH_SOURCES_DIR}/gmsasm.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/umthunkstub.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/ehhelpers.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerloongarch64.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) elseif(CLR_CMAKE_TARGET_ARCH_RISCV64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerriscv64.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) endif() + set(ASM_SUFFIX S) endif(CLR_CMAKE_TARGET_WIN32) - if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/cgenamd64.cpp ${ARCH_SOURCES_DIR}/excepamd64.cpp - ${ARCH_SOURCES_DIR}/gmsamd64.cpp ${ARCH_SOURCES_DIR}/stublinkeramd64.cpp ) @@ -738,16 +806,14 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/asmconstants.h ${ARCH_SOURCES_DIR}/cgencpu.h ${ARCH_SOURCES_DIR}/excepcpu.h - ${ARCH_SOURCES_DIR}/gmscpu.h ${ARCH_SOURCES_DIR}/stublinkeramd64.h ) set(VM_SOURCES_WKS_ARCH - ${ARCH_SOURCES_DIR}/jitinterfaceamd64.cpp ${ARCH_SOURCES_DIR}/profiler.cpp exceptionhandling.cpp gcinfodecoder.cpp - jitinterfacegen.cpp + writebarriermanager.cpp ) set(VM_HEADERS_WKS_ARCH @@ -757,14 +823,12 @@ elseif(CLR_CMAKE_TARGET_ARCH_I386) set(VM_SOURCES_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/cgenx86.cpp ${ARCH_SOURCES_DIR}/excepx86.cpp - ${ARCH_SOURCES_DIR}/gmsx86.cpp ${ARCH_SOURCES_DIR}/stublinkerx86.cpp ) set(VM_HEADERS_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/cgencpu.h ${ARCH_SOURCES_DIR}/excepcpu.h - ${ARCH_SOURCES_DIR}/gmscpu.h ${ARCH_SOURCES_DIR}/stublinkerx86.h ) @@ -814,6 +878,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH ${ARCH_SOURCES_DIR}/profiler.cpp gcinfodecoder.cpp + writebarriermanager.cpp ) if(CLR_CMAKE_HOST_UNIX) @@ -936,10 +1001,12 @@ convert_to_absolute_path(VM_SOURCES_WKS_ARCH_ASM ${VM_SOURCES_WKS_ARCH_ASM}) convert_to_absolute_path(VM_SOURCES_DAC ${VM_SOURCES_DAC}) convert_to_absolute_path(VM_SOURCES_WKS_SPECIAL ${VM_SOURCES_WKS_SPECIAL}) -add_library_clr(cee_dac ${VM_SOURCES_DAC}) -add_dependencies(cee_dac eventing_headers) -set_target_properties(cee_dac PROPERTIES DAC_COMPONENT TRUE) -target_precompile_headers(cee_dac PRIVATE [["common.h"]]) +if (NOT CLR_CMAKE_TARGET_ARCH_WASM) + add_library_clr(cee_dac ${VM_SOURCES_DAC}) + add_dependencies(cee_dac eventing_headers) + set_target_properties(cee_dac PROPERTIES DAC_COMPONENT TRUE) + target_precompile_headers(cee_dac PRIVATE [["common.h"]]) +endif() add_subdirectory(wks) diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h new file mode 100644 index 000000000000..b5fe78322927 --- /dev/null +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -0,0 +1,195 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __CACHEDINTERFACEDISPATCHPAL_H__ +#define __CACHEDINTERFACEDISPATCHPAL_H__ + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +extern "C" void RhpInitialInterfaceDispatch(); + + +bool InterfaceDispatch_InitializePal(); + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); +// Allocate memory aligned at sizeof(void*) boundaries + +void *InterfaceDispatch_AllocPointerAligned(size_t size); + +enum Flags +{ + // The low 2 bits of the m_pCache pointer are treated specially so that we can avoid the need for + // extra fields on this type. + // OR if the m_pCache value is less than 0x1000 then this is a vtable offset and should be used as such + IDC_CachePointerPointsIsVTableOffset = 0x2, + IDC_CachePointerPointsAtCache = 0x0, + IDC_CachePointerMask = 0x3, + IDC_CachePointerMaskShift = 0x2, +}; + +enum class DispatchCellType +{ + InterfaceAndSlot = 0x0, + VTableOffset = 0x2, +}; + +struct DispatchCellInfo +{ +private: + static DispatchCellType CellTypeFromToken(DispatchToken token) + { + if (token.IsThisToken()) + { + return DispatchCellType::VTableOffset; + } + return DispatchCellType::InterfaceAndSlot; + } +public: + + DispatchCellInfo(DispatchToken token, bool hasCache) : + CellType(CellTypeFromToken(token)), + Token(token), + HasCache(hasCache ? 1 : 0) + { + + } + const DispatchCellType CellType; + const DispatchToken Token; + + uintptr_t GetVTableOffset() const + { + if (CellType == DispatchCellType::VTableOffset) + { + // The vtable offset is stored in a pointer sized field, but actually represents 2 values. + // 1. The offset of the first indirection to use. which is stored in the upper half of the + // pointer sized field (bits 16-31 of a 32 bit pointer, or bits 32-63 of a 64 bit pointer). + // + // 2. The offset of the second indirection, which is a stored is the upper half of the lower + // half of the pointer size field (bits 8-15 of a 32 bit pointer, or bits 16-31 of a 64 + // bit pointer) This second offset is always less than 255, so we only really need a single + // byte, and the assembly code on some architectures may take a dependency on that + // so the VTableOffsetToSlot function has a mask to ensure that it is only ever a single byte. + uint32_t slot = Token.GetSlotNumber(); + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + + uintptr_t offsetOfIndirectionPortion = (((uintptr_t)offsetOfIndirection) << ((TARGET_POINTER_SIZE * 8) / 2)); + uintptr_t offsetAfterIndirectionPortion = (((uintptr_t)offsetAfterIndirection) << ((TARGET_POINTER_SIZE * 8) / 4)); + uintptr_t flagPortion = (uintptr_t)IDC_CachePointerPointsIsVTableOffset; + + uintptr_t result = offsetOfIndirectionPortion | offsetAfterIndirectionPortion | flagPortion; + _ASSERTE(slot == VTableOffsetToSlot(result)); + return result; + } + return 0; + } + + static unsigned VTableOffsetToSlot(uintptr_t vtableOffset) + { + // See comment in GetVTableOffset() for what we're doing here. + unsigned offsetOfIndirection = (unsigned)(vtableOffset >> ((TARGET_POINTER_SIZE * 8) / 2)); + unsigned offsetAfterIndirection = (unsigned)(vtableOffset >> ((TARGET_POINTER_SIZE * 8) / 4)) & 0xFF; + unsigned slotGroupPerChunk = (offsetOfIndirection - MethodTable::GetVtableOffset()) / TARGET_POINTER_SIZE; + unsigned slot = (slotGroupPerChunk * VTABLE_SLOTS_PER_CHUNK) + (offsetAfterIndirection / TARGET_POINTER_SIZE); + return slot; + } + + const uint8_t HasCache = 0; +}; + +struct InterfaceDispatchCacheHeader +{ +private: + enum Flags + { + CH_TypeAndSlotIndex = 0x0, + CH_MetadataToken = 0x1, + CH_Mask = 0x3, + CH_Shift = 0x2, + }; + +public: + void Initialize(DispatchToken token) + { + m_token = token; + } + + void Initialize(const DispatchCellInfo *pNewCellInfo) + { + m_token = pNewCellInfo->Token; + } + + DispatchCellInfo GetDispatchCellInfo() + { + DispatchCellInfo cellInfo(m_token, true); + return cellInfo; + } + +private: + DispatchToken m_token; + TADDR padding; // Ensure that the size of this structure is a multiple of 2 pointers +}; + +// One of these is allocated per interface call site. It holds the stub to call, data to pass to that stub +// (cache information) and the interface contract, i.e. the interface type and slot being called. +struct InterfaceDispatchCell +{ + // The first two fields must remain together and at the beginning of the structure. This is due to the + // synchronization requirements of the code that updates these at runtime and the instructions generated + // by the binder for interface call sites. + TADDR m_pStub; // Call this code to execute the interface dispatch + Volatile m_pCache; // Context used by the stub above (one or both of the low two bits are set + // for initial dispatch, and if not set, using this as a cache pointer or + // as a vtable offset.) + DispatchCellInfo GetDispatchCellInfo() + { + // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be + // modified on another thread while this function is executing.) + TADDR cachePointerValue = m_pCache; + + if (IsCache(cachePointerValue)) + { + return ((InterfaceDispatchCacheHeader*)cachePointerValue)->GetDispatchCellInfo(); + } + else if (DispatchToken::IsCachedInterfaceDispatchToken(cachePointerValue)) + { + return DispatchCellInfo(DispatchToken::FromCachedInterfaceDispatchToken(cachePointerValue), false); + } + else + { + _ASSERTE(IsVTableOffset(cachePointerValue)); + unsigned slot = DispatchCellInfo::VTableOffsetToSlot(cachePointerValue); + return DispatchCellInfo(DispatchToken::CreateDispatchToken(slot), false); + } + } + + static bool IsCache(TADDR value) + { + return (value & IDC_CachePointerMask) == 0; + } + + static bool IsVTableOffset(TADDR value) + { + return (value & IDC_CachePointerPointsIsVTableOffset) == IDC_CachePointerPointsIsVTableOffset; + } + + InterfaceDispatchCacheHeader* GetCache() const + { + // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be + // modified on another thread while this function is executing.) + TADDR cachePointerValue = m_pCache; + if (IsCache(cachePointerValue)) + { + return (InterfaceDispatchCacheHeader*)cachePointerValue; + } + else + { + return nullptr; + } + } +}; + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#endif // __CACHEDINTERFACEDISPATCHPAL_H__ \ No newline at end of file diff --git a/src/coreclr/vm/CachedInterfaceDispatch_Coreclr.cpp b/src/coreclr/vm/CachedInterfaceDispatch_Coreclr.cpp new file mode 100644 index 000000000000..66a359ffbd3b --- /dev/null +++ b/src/coreclr/vm/CachedInterfaceDispatch_Coreclr.cpp @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +bool InterfaceDispatch_InitializePal() +{ + return true; +} + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) +{ + return (void*)SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocAlignedMem(size, sizeof(TADDR) * 2); +} + +// Allocate memory aligned at sizeof(void*) boundaries + +void *InterfaceDispatch_AllocPointerAligned(size_t size) +{ + return (void*)SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocAlignedMem(size, sizeof(TADDR)); +} diff --git a/src/coreclr/vm/ClrEtwAllMeta.lst b/src/coreclr/vm/ClrEtwAllMeta.lst index 3e9ce9804d48..f5659cfa3ad0 100644 --- a/src/coreclr/vm/ClrEtwAllMeta.lst +++ b/src/coreclr/vm/ClrEtwAllMeta.lst @@ -645,6 +645,12 @@ nomac:StressLogTask:::StressLogEvent_V1 ################## nomac:CLRStackStress:::CLRStackWalkStress +################################# +# Debugger events +################################# +nostack:DebugIPCEvent:::DebugIPCEventStart +nostack:DebugIPCEvent:::DebugIPCEventEnd + ################################# # Events from the Mono profiler provider ################################# diff --git a/src/coreclr/vm/FrameTypes.h b/src/coreclr/vm/FrameTypes.h index 59e1c56356e8..83a4e0aa6de4 100644 --- a/src/coreclr/vm/FrameTypes.h +++ b/src/coreclr/vm/FrameTypes.h @@ -14,17 +14,10 @@ FRAME_TYPE_NAME(ResumableFrame) FRAME_TYPE_NAME(RedirectedThreadFrame) #endif // FEATURE_HIJACK FRAME_TYPE_NAME(FaultingExceptionFrame) -#ifdef FEATURE_EH_FUNCLETS FRAME_TYPE_NAME(SoftwareExceptionFrame) -#endif // FEATURE_EH_FUNCLETS #ifdef DEBUGGING_SUPPORTED FRAME_TYPE_NAME(FuncEvalFrame) #endif // DEBUGGING_SUPPORTED -FRAME_TYPE_NAME(HelperMethodFrame) -FRAME_TYPE_NAME(HelperMethodFrame_1OBJ) -FRAME_TYPE_NAME(HelperMethodFrame_2OBJ) -FRAME_TYPE_NAME(HelperMethodFrame_3OBJ) -FRAME_TYPE_NAME(HelperMethodFrame_PROTECTOBJ) #ifdef FEATURE_COMINTEROP FRAME_TYPE_NAME(ComMethodFrame) FRAME_TYPE_NAME(CLRToCOMMethodFrame) @@ -38,17 +31,14 @@ FRAME_TYPE_NAME(PrestubMethodFrame) FRAME_TYPE_NAME(CallCountingHelperFrame) FRAME_TYPE_NAME(StubDispatchFrame) FRAME_TYPE_NAME(ExternalMethodFrame) -#ifdef FEATURE_READYTORUN FRAME_TYPE_NAME(DynamicHelperFrame) -#endif -FRAME_TYPE_NAME(ProtectByRefsFrame) FRAME_TYPE_NAME(ProtectValueClassFrame) FRAME_TYPE_NAME(DebuggerClassInitMarkFrame) FRAME_TYPE_NAME(DebuggerExitFrame) FRAME_TYPE_NAME(DebuggerU2MCatchHandlerFrame) FRAME_TYPE_NAME(ExceptionFilterFrame) -#if defined(_DEBUG) -FRAME_TYPE_NAME(AssumeByrefFromJITStackFrame) -#endif // _DEBUG +#ifdef FEATURE_INTERPRETER +FRAME_TYPE_NAME(InterpreterFrame) +#endif // FEATURE_INTERPRETER #undef FRAME_TYPE_NAME diff --git a/src/coreclr/vm/amd64/AllocSlow.S b/src/coreclr/vm/amd64/AllocSlow.S new file mode 100644 index 000000000000..44f6dd33fae0 --- /dev/null +++ b/src/coreclr/vm/amd64/AllocSlow.S @@ -0,0 +1,65 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// +// Object* RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path +// +LEAF_ENTRY RhpNew, _TEXT + + mov rsi, 0 + jmp C_FUNC(RhpNewObject) + +LEAF_END RhpNew, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME rdx + + mov rsi, 0 + call C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* RhpNewArrayMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME rdx + + call C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME rdx + + call C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/amd64/AllocSlow.asm b/src/coreclr/vm/amd64/AllocSlow.asm new file mode 100644 index 000000000000..fbe8876ee13d --- /dev/null +++ b/src/coreclr/vm/amd64/AllocSlow.asm @@ -0,0 +1,245 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc +include asmconstants.inc + +EXTERN RhpNewObject : PROC +EXTERN RhpNewVariableSizeObject : PROC +EXTERN RhpGcAllocMaybeFrozen : PROC +EXTERN RhExceptionHandling_FailedAllocation_Helper : PROC + +EXTERN g_global_alloc_lock : DWORD +EXTERN g_global_alloc_context : QWORD + +; +; Object* RhpNew(MethodTable *pMT) +; +; Allocate non-array object, slow path. +; +LEAF_ENTRY RhpNew, _TEXT + + mov rdx, 0 + jmp RhpNewObject + +LEAF_END RhpNew, _TEXT + +; +; Object* RhpNewMaybeFrozen(MethodTable *pMT) +; +; Allocate non-array object, may be on frozen heap. +; +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT + + PUSH_COOP_PINVOKE_FRAME r8 + + mov rdx, 0 + call RhpGcAllocMaybeFrozen + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewMaybeFrozen, _TEXT + +; +; Object* RhpNewArrayMaybeFrozen(MethodTable *pMT, INT_PTR size) +; +; Allocate array object, may be on frozen heap. +; +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT + + PUSH_COOP_PINVOKE_FRAME r8 + + call RhpGcAllocMaybeFrozen + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +; +; void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +; +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT + + PUSH_COOP_PINVOKE_FRAME r8 + + call RhExceptionHandling_FailedAllocation_Helper + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT + +; +; void RhpNewFast_UP(MethodTable *pMT) +; +; Allocate non-array object, uniprocessor version +; +LEAF_ENTRY RhpNewFast_UP, _TEXT + + inc [g_global_alloc_lock] + jnz RhpNewFast_UP_RarePath + + ;; + ;; rcx contains MethodTable pointer + ;; + mov r8d, [rcx + OFFSETOF__MethodTable__m_uBaseSize] + + ;; + ;; eax: base size + ;; rcx: MethodTable pointer + ;; rdx: ee_alloc_context pointer + ;; + + mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + add r8, rax + cmp r8, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja RhpNewFast_UP_RarePath_Unlock + + ;; set the new alloc pointer + mov [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 + + ;; set the new object's MethodTable pointer + mov [rax], rcx + mov [g_global_alloc_lock], -1 + ret + +RhpNewFast_UP_RarePath_Unlock: + mov [g_global_alloc_lock], -1 + +RhpNewFast_UP_RarePath: + xor edx, edx + jmp RhpNewObject + +LEAF_END RhpNewFast_UP, _TEXT + +; +; Shared code for RhNewString_UP, RhpNewArrayFast_UP and RhpNewPtrArrayFast_UP +; RAX == string/array size +; RCX == MethodTable +; RDX == character/element count +; +NEW_ARRAY_FAST_UP MACRO + + inc [g_global_alloc_lock] + jnz RhpNewVariableSizeObject + + mov r8, rax + add rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + jc NewArrayFast_RarePath + + ; rax == new alloc ptr + ; rcx == MethodTable + ; rdx == element count + ; r8 == array size + cmp rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja NewArrayFast_RarePath + + mov [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], rax + + ; calc the new object pointer + sub rax, r8 + + mov [rax + OFFSETOF__Object__m_pEEType], rcx + mov [rax + OFFSETOF__Array__m_Length], edx + mov [g_global_alloc_lock], -1 + ret + +NewArrayFast_RarePath: + mov [g_global_alloc_lock], -1 + jmp RhpNewVariableSizeObject + +ENDM + +; +; Object* RhNewString_UP(MethodTable *pMT, DWORD stringLength) +; +; Allocate a string, uniprocessor version +; +LEAF_ENTRY RhNewString_UP, _TEXT + + ; we want to limit the element count to the non-negative 32-bit int range + cmp rdx, MAX_STRING_LENGTH + ja StringSizeOverflow + + ; Compute overall allocation size (align(base size + (element size * elements), 8)). + lea rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)] + and rax, -8 + + NEW_ARRAY_FAST_UP + +StringSizeOverflow: + ; We get here if the size of the final string object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an OOM exception that the caller of this allocator understands. + + ; rcx holds MethodTable pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +LEAF_END RhNewString_UP, _TEXT + +; +; Object* RhpNewArrayFast_UP(MethodTable *pMT, INT_PTR elementCount) +; Object* RhpNewArrayFast_UP_OBJ(MethodTable *pMT, INT_PTR elementCount) +; +; Allocate one dimensional, zero based array (SZARRAY), uniprocessor version +; +LEAF_ENTRY RhpNewArrayFast_UP, _TEXT + + ; we want to limit the element count to the non-negative 32-bit int range + cmp rdx, 07fffffffh + ja ArraySizeOverflow + + ; save element count + mov r8, rdx + + ; Compute overall allocation size (align(base size + (element size * elements), 8)). + movzx eax, word ptr [rcx + OFFSETOF__MethodTable__m_usComponentSize] + imul rax, rdx + lea rax, [rax + SZARRAY_BASE_SIZE + 7] + and rax, -8 + + mov rdx, r8 + + NEW_ARRAY_FAST_UP + +ArraySizeOverflow: + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. + + ; rcx holds MethodTable pointer already + mov edx, 1 ; Indicate that we should throw OverflowException + jmp RhExceptionHandling_FailedAllocation + +LEAF_END RhpNewArrayFast_UP, _TEXT + +; +; Object* RhpNewPtrArrayFast_UP(MethodTable *pMT, INT_PTR elementCount) +; +; Allocate one dimensional, zero based array (SZARRAY) of pointer sized elements, +; uniprocessor version +; +LEAF_ENTRY RhpNewPtrArrayFast_UP, _TEXT + + ; Delegate overflow handling to the generic helper conservatively + + cmp rdx, (40000000h / 8) ; sizeof(void*) + jae RhpNewVariableSizeObject + + ; In this case we know the element size is sizeof(void *), or 8 for x64 + ; This helps us in two ways - we can shift instead of multiplying, and + ; there's no need to align the size either + + lea eax, [edx * 8 + SZARRAY_BASE_SIZE] + + ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed + ; to be a multiple of 8. + + NEW_ARRAY_FAST_UP + +LEAF_END RhpNewPtrArrayFast_UP, _TEXT + + end diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 55251c3ec70f..f06d64ec1ef3 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -18,60 +18,6 @@ endif extern g_pPollGC:QWORD extern g_TrapReturningThreads:DWORD -; EXTERN_C int __fastcall HelperMethodFrameRestoreState( -; INDEBUG_COMMA(HelperMethodFrame *pFrame) -; MachState *pState -; ) -LEAF_ENTRY HelperMethodFrameRestoreState, _TEXT - -ifdef _DEBUG - mov rcx, rdx -endif - - ; Check if the MachState is valid - xor eax, eax - cmp qword ptr [rcx + OFFSETOF__MachState___pRetAddr], rax - jne @F - REPRET -@@: - - ; - ; If a preserved register were pushed onto the stack between - ; the managed caller and the H_M_F, m_pReg will point to its - ; location on the stack and it would have been updated on the - ; stack by the GC already and it will be popped back into the - ; appropriate register when the appropriate epilog is run. - ; - ; Otherwise, the register is preserved across all the code - ; in this HCALL or FCALL, so we need to update those registers - ; here because the GC will have updated our copies in the - ; frame. - ; - ; So, if m_pReg points into the MachState, we need to update - ; the register here. That's what this macro does. - ; -RestoreReg macro reg, regnum - lea rax, [rcx + OFFSETOF__MachState__m_Capture + 8 * regnum] - mov rdx, [rcx + OFFSETOF__MachState__m_Ptrs + 8 * regnum] - cmp rax, rdx - cmove reg, [rax] - endm - - ; regnum has to match ENUM_CALLEE_SAVED_REGISTERS macro - RestoreReg Rdi, 0 - RestoreReg Rsi, 1 - RestoreReg Rbx, 2 - RestoreReg Rbp, 3 - RestoreReg R12, 4 - RestoreReg R13, 5 - RestoreReg R14, 6 - RestoreReg R15, 7 - - xor eax, eax - ret - -LEAF_END HelperMethodFrameRestoreState, _TEXT - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; NDirectImportThunk @@ -203,10 +149,11 @@ NESTED_ENTRY OnHijackTripThread, _TEXT push rax ; make room for the real return address (Rip) push rdx PUSH_CALLEE_SAVED_REGISTERS + push_vol_reg rcx push_vol_reg rax mov rcx, rsp - alloc_stack 38h ; make extra room for xmm0, argument home slots and align the SP + alloc_stack 30h ; make extra room for xmm0 and argument home slots save_xmm128_postrsp xmm0, 20h @@ -216,8 +163,9 @@ NESTED_ENTRY OnHijackTripThread, _TEXT movdqa xmm0, [rsp + 20h] - add rsp, 38h + add rsp, 30h pop rax + pop rcx POP_CALLEE_SAVED_REGISTERS pop rdx ret ; return to the correct place, adjusted by our caller @@ -463,11 +411,11 @@ NESTED_ENTRY JIT_Patchpoint, _TEXT NESTED_END JIT_Patchpoint, _TEXT ; first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL -LEAF_ENTRY JIT_PartialCompilationPatchpoint, _TEXT +LEAF_ENTRY JIT_PatchpointForced, _TEXT mov rdx, rcx xor rcx, rcx jmp JIT_Patchpoint -LEAF_END JIT_PartialCompilationPatchpoint, _TEXT +LEAF_END JIT_PatchpointForced, _TEXT endif ; FEATURE_TIERED_COMPILATION @@ -480,7 +428,132 @@ JIT_PollGCRarePath: TAILJMP_RAX LEAF_END JIT_PollGC, _TEXT +; rcx -This pointer +; rdx -ReturnBuffer +LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT + mov METHODDESC_REGISTER, [METHODDESC_REGISTER + ThisPtrRetBufPrecodeData__Target] + mov r11, rcx + mov rcx, rdx + mov rdx, r11 + jmp METHODDESC_REGISTER +LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT + +;; +;; Prologue of all funclet calling helpers (CallXXXXFunclet) +;; +FUNCLET_CALL_PROLOGUE macro localsCount, alignStack + PUSH_CALLEE_SAVED_REGISTERS + + arguments_scratch_area_size = 20h + xmm_save_area_size = 10 * 10h ;; xmm6..xmm15 save area + stack_alloc_size = arguments_scratch_area_size + localsCount * 8 + alignStack * 8 + xmm_save_area_size + rsp_offsetof_arguments = stack_alloc_size + 8*8h + 8h + rsp_offsetof_locals = arguments_scratch_area_size + xmm_save_area_size + + alloc_stack stack_alloc_size + + save_xmm128_postrsp xmm6, (arguments_scratch_area_size + 0 * 10h) + save_xmm128_postrsp xmm7, (arguments_scratch_area_size + 1 * 10h) + save_xmm128_postrsp xmm8, (arguments_scratch_area_size + 2 * 10h) + save_xmm128_postrsp xmm9, (arguments_scratch_area_size + 3 * 10h) + save_xmm128_postrsp xmm10, (arguments_scratch_area_size + 4 * 10h) + save_xmm128_postrsp xmm11, (arguments_scratch_area_size + 5 * 10h) + save_xmm128_postrsp xmm12, (arguments_scratch_area_size + 6 * 10h) + save_xmm128_postrsp xmm13, (arguments_scratch_area_size + 7 * 10h) + save_xmm128_postrsp xmm14, (arguments_scratch_area_size + 8 * 10h) + save_xmm128_postrsp xmm15, (arguments_scratch_area_size + 9 * 10h) + + END_PROLOGUE +endm + +;; +;; Epilogue of all funclet calling helpers (CallXXXXFunclet) +;; +FUNCLET_CALL_EPILOGUE macro + movdqa xmm6, [rsp + arguments_scratch_area_size + 0 * 10h] + movdqa xmm7, [rsp + arguments_scratch_area_size + 1 * 10h] + movdqa xmm8, [rsp + arguments_scratch_area_size + 2 * 10h] + movdqa xmm9, [rsp + arguments_scratch_area_size + 3 * 10h] + movdqa xmm10, [rsp + arguments_scratch_area_size + 4 * 10h] + movdqa xmm11, [rsp + arguments_scratch_area_size + 5 * 10h] + movdqa xmm12, [rsp + arguments_scratch_area_size + 6 * 10h] + movdqa xmm13, [rsp + arguments_scratch_area_size + 7 * 10h] + movdqa xmm14, [rsp + arguments_scratch_area_size + 8 * 10h] + movdqa xmm15, [rsp + arguments_scratch_area_size + 9 * 10h] + + add rsp, stack_alloc_size + + POP_CALLEE_SAVED_REGISTERS +endm + +; This helper enables us to call into a funclet after restoring Fp register +NESTED_ENTRY CallEHFunclet, _TEXT + ; On entry: + ; + ; RCX = throwable + ; RDX = PC to invoke + ; R8 = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame + ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; + + FUNCLET_CALL_PROLOGUE 0, 1 + + ; Restore RBX, RBP, RSI, RDI, R12, R13, R14, R15 from CONTEXT + mov rbp, [r8 + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] + mov rsi, [r8 + OFFSETOF__CONTEXT__Rsi - OFFSETOF__CONTEXT__Rbx] + mov rdi, [r8 + OFFSETOF__CONTEXT__Rdi - OFFSETOF__CONTEXT__Rbx] + mov r12, [r8 + OFFSETOF__CONTEXT__R12 - OFFSETOF__CONTEXT__Rbx] + mov r13, [r8 + OFFSETOF__CONTEXT__R13 - OFFSETOF__CONTEXT__Rbx] + mov r14, [r8 + OFFSETOF__CONTEXT__R14 - OFFSETOF__CONTEXT__Rbx] + mov r15, [r8 + OFFSETOF__CONTEXT__R15 - OFFSETOF__CONTEXT__Rbx] + + ; Restore XMM registers from CONTEXT + movdqa xmm6, [r8 + OFFSETOF__CONTEXT__Xmm6 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm7, [r8 + OFFSETOF__CONTEXT__Xmm7 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm8, [r8 + OFFSETOF__CONTEXT__Xmm8 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm9, [r8 + OFFSETOF__CONTEXT__Xmm9 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm10, [r8 + OFFSETOF__CONTEXT__Xmm10 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm11, [r8 + OFFSETOF__CONTEXT__Xmm11 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm12, [r8 + OFFSETOF__CONTEXT__Xmm12 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm13, [r8 + OFFSETOF__CONTEXT__Xmm13 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm14, [r8 + OFFSETOF__CONTEXT__Xmm14 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm15, [r8 + OFFSETOF__CONTEXT__Xmm15 - OFFSETOF__CONTEXT__Rbx] + + ; Save the SP of this function. + mov [r9], rsp + ; Invoke the funclet + call rdx + + FUNCLET_CALL_EPILOGUE + ret +NESTED_END CallEHFunclet, _TEXT + +; This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the +; frame pointer for accessing the locals in the parent method. +NESTED_ENTRY CallEHFilterFunclet, _TEXT + ; On entry: + ; + ; RCX = throwable + ; RDX = RBP of main function + ; R8 = PC to invoke + ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; + + FUNCLET_CALL_PROLOGUE 0, 1 + + ; Save the SP of this function + mov [r9], rsp + ; Restore RBP to match main function RBP + mov rbp, rdx + ; Invoke the filter funclet + call r8 + + FUNCLET_CALL_EPILOGUE + ret +NESTED_END CallEHFilterFunclet, _TEXT + ifdef FEATURE_INTERPRETER + NESTED_ENTRY InterpreterStub, _TEXT PROLOG_WITH_TRANSITION_BLOCK @@ -495,6 +568,289 @@ NESTED_ENTRY InterpreterStub, _TEXT EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END InterpreterStub, _TEXT + +; Copy arguments from the interpreter stack to the processor stack. +; The CPU stack slots are aligned to pointer size. +LEAF_ENTRY Load_Stack, _TEXT + push rdi + push rsi + push rcx + mov edi, dword ptr [r11 + 8] ; SP offset + mov ecx, dword ptr [r11 + 12] ; number of stack slots + add edi, 20h ; the 3 pushes above plus return address + add rdi, rsp + mov rsi, r10 + shr rcx, 3 + rep movsq + mov r10, rsi + pop rcx + pop rsi + pop rdi + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Stack, _TEXT + +; Routines for passing value type arguments by reference in general purpose registers RCX, RDX, R8, R9 + +LEAF_ENTRY Load_Ref_RCX, _TEXT + mov rcx, r10 + add r10, [r11 + 8] ; size of the value type + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_RCX, _TEXT + +LEAF_ENTRY Load_Ref_RDX, _TEXT + mov rdx, r10 + add r10, [r11 + 8] ; size of the value type + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_RDX, _TEXT + +LEAF_ENTRY Load_Ref_R8, _TEXT + mov r8, r10 + add r10, [r11 + 8] ; size of the value type + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_R8, _TEXT + +LEAF_ENTRY Load_Ref_R9, _TEXT + mov r9, r10 + add r10, [r11 + 8] ; size of the value type + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_R9, _TEXT + +; Routines for passing arguments by value in general purpose registers RCX, RDX, R8, R9 + +LEAF_ENTRY Load_RCX, _TEXT + mov rcx, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX, _TEXT + +LEAF_ENTRY Load_RCX_RDX, _TEXT + mov rcx, [r10] + mov rdx, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_RDX, _TEXT + +LEAF_ENTRY Load_RCX_RDX_R8, _TEXT + mov rcx, [r10] + mov rdx, [r10 + 8] + mov r8, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_RDX_R8, _TEXT + +LEAF_ENTRY Load_RCX_RDX_R8_R9, _TEXT + mov rcx, [r10] + mov rdx, [r10 + 8] + mov r8, [r10 + 16] + mov r9, [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_RDX_R8_R9, _TEXT + +LEAF_ENTRY Load_RDX, _TEXT + mov rdx, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX, _TEXT + +LEAF_ENTRY Load_RDX_R8, _TEXT + mov rdx, [r10] + mov r8, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_R8, _TEXT + +LEAF_ENTRY Load_RDX_R8_R9, _TEXT + mov rdx, [r10] + mov r8, [r10 + 8] + mov r9, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_R8_R9, _TEXT + +LEAF_ENTRY Load_R8, _TEXT + mov r8, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R8, _TEXT + +LEAF_ENTRY Load_R8_R9, _TEXT + mov r8, [r10] + mov r9, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R8_R9, _TEXT + +LEAF_ENTRY Load_R9, _TEXT + mov r9, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R9, _TEXT + +; Routines for passing arguments in floating point registers XMM0..XMM3 + +LEAF_ENTRY Load_XMM0, _TEXT + movsd xmm0, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + add r10, 10h + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM1, _TEXT + movsd xmm1, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM2, _TEXT + movsd xmm2, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM3, _TEXT + movsd xmm3, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3, _TEXT + +NESTED_ENTRY CallJittedMethodRetVoid, _TEXT + push_vol_reg rbp + mov rbp, rsp +END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + call qword ptr [r11] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetVoid, _TEXT + +NESTED_ENTRY CallJittedMethodRetBuff, _TEXT + push_vol_reg rbp + mov rbp, rsp +END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + mov rcx, r8 ; return buffer + call qword ptr [r11] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetBuff, _TEXT + +NESTED_ENTRY CallJittedMethodRetDouble, _TEXT + push_nonvol_reg rbp + mov rbp, rsp + push_vol_reg r8 + push_vol_reg rax ; align +END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + call qword ptr [r11] + add rsp, 20h + mov r8, [rbp - 8] + movsd real8 ptr [r8], xmm0 + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetDouble, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8, _TEXT + push_nonvol_reg rbp + mov rbp, rsp + push_vol_reg r8 + push_vol_reg rax ; align +END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + call qword ptr [r11] + add rsp, 20h + mov r8, [rbp - 8] + mov qword ptr [r8], rax + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetI8, _TEXT + endif ; FEATURE_INTERPRETER - end \ No newline at end of file + end diff --git a/src/coreclr/vm/amd64/AsmMacros.inc b/src/coreclr/vm/amd64/AsmMacros.inc index 2d14b9c31e8f..8d0562057125 100644 --- a/src/coreclr/vm/amd64/AsmMacros.inc +++ b/src/coreclr/vm/amd64/AsmMacros.inc @@ -191,17 +191,17 @@ Section ends ; Inlined version of GetThread ; Trashes rax and r11 ; -gCurrentThreadInfo TEXTEQU +t_CurrentThreadInfo TEXTEQU INLINE_GETTHREAD macro Reg EXTERN _tls_index : DWORD - EXTERN gCurrentThreadInfo:DWORD + EXTERN t_CurrentThreadInfo:DWORD mov r11d, [_tls_index] mov rax, gs:[OFFSET__TEB__ThreadLocalStoragePointer] mov rax, [rax + r11 * 8] - mov r11d, SECTIONREL gCurrentThreadInfo + mov r11d, SECTIONREL t_CurrentThreadInfo mov Reg, [rax + r11] endm @@ -422,6 +422,13 @@ PROLOG_WITH_TRANSITION_BLOCK macro extraLocals := <0>, stackAllocOnEntry := <0>, endm +EPILOG_WITH_TRANSITION_BLOCK macro + + add rsp, __PWTB_StackAlloc + POP_CALLEE_SAVED_REGISTERS + + endm + EPILOG_WITH_TRANSITION_BLOCK_RETURN macro add rsp, __PWTB_StackAlloc @@ -438,3 +445,45 @@ EPILOG_WITH_TRANSITION_BLOCK_TAILCALL macro POP_CALLEE_SAVED_REGISTERS endm + +t_runtime_thread_locals TEXTEQU + +; Inlined version of GetThreadEEAllocContext. Trashes RegTrash, r11 +INLINE_GET_ALLOC_CONTEXT_BASE macro Reg, RegTrash + + EXTERN _tls_index : DWORD + EXTERN t_runtime_thread_locals : DWORD + + mov r11d, [_tls_index] + mov RegTrash, gs:[OFFSET__TEB__ThreadLocalStoragePointer] + mov RegTrash, [RegTrash + r11 * 8] + mov r11d, SECTIONREL t_runtime_thread_locals + lea Reg, [RegTrash + r11] + + endm + +OFFSETOF__ee_alloc_context EQU OFFSETOF__RuntimeThreadLocals__ee_alloc_context + +; Pushes a TransitionBlock on the stack without saving the argument registers. See +; the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +PUSH_COOP_PINVOKE_FRAME macro target + + __PWTB_StackAlloc = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES + 8 ; alignment to make the stack 16b aligned + __PWTB_TransitionBlock = __PWTB_StackAlloc + + PUSH_CALLEE_SAVED_REGISTERS + alloc_stack __PWTB_StackAlloc + END_PROLOGUE + lea target, [rsp + __PWTB_TransitionBlock] + + endm + +POP_COOP_PINVOKE_FRAME macro + + add rsp, __PWTB_StackAlloc + POP_CALLEE_SAVED_REGISTERS + + endm + +;; GC type flags +GC_ALLOC_FINALIZE equ 1 diff --git a/src/coreclr/vm/amd64/AsmMacros_Shared.h b/src/coreclr/vm/amd64/AsmMacros_Shared.h new file mode 100644 index 000000000000..7141b33b8f81 --- /dev/null +++ b/src/coreclr/vm/amd64/AsmMacros_Shared.h @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include "asmconstants.h" +#include "unixasmmacros.inc" + diff --git a/src/coreclr/vm/amd64/AsmMacros_Shared.inc b/src/coreclr/vm/amd64/AsmMacros_Shared.inc new file mode 100644 index 000000000000..ba247ce00441 --- /dev/null +++ b/src/coreclr/vm/amd64/AsmMacros_Shared.inc @@ -0,0 +1,24 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +; This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +include AsmConstants.inc +include AsmMacros.inc + +EXTERN g_lowest_address : QWORD +EXTERN g_highest_address : QWORD +EXTERN g_ephemeral_low : QWORD +EXTERN g_ephemeral_high : QWORD +EXTERN g_card_table : QWORD + +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +EXTERN g_card_bundle_table : QWORD +endif + +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +EXTERN g_write_watch_table : QWORD +endif + +EXTERN RhpGcAlloc : PROC +EXTERN RhExceptionHandling_FailedAllocation : PROC diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S new file mode 100644 index 000000000000..bc0edaccea05 --- /dev/null +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S @@ -0,0 +1,80 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +// Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + + // r11 currently contains the indirection cell address. + // load r11 to point to the vtable offset (which is stored in the m_pCache field). + mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + // r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + // to get to the VTable chunk + mov rax, r11 + shr rax, 32 + + // Load the MethodTable from the object instance in rdi, and add it to the vtable offset + // to get the address in the vtable chunk list of what we want to dereference +#ifdef TARGET_APPLE +// Apple's linker has issues which break unwind info if +// an ALTERNATE_ENTRY is present in the middle of a function see https://github.com/dotnet/runtime/pull/114982#discussion_r2083272768 +.cfi_endproc +#endif + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation +#ifdef TARGET_APPLE +.cfi_startproc +#endif + add rax, [rdi] + + // Load the target address of the vtable chunk into rax + mov rax, [rax] + + // Compute the chunk offset + shr r11d, 16 + + // Load the target address of the virtual function into rax + mov rax, [rax + r11] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +// On Input: +// r11 contains the address of the indirection cell +// [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock + mov rsi, r11 // indirection cell + + call C_FUNC(CID_ResolveWorker) + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END RhpInterfaceDispatchSlow, _TEXT + +// On Input: +// r11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) +NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock + mov rsi, r11 // indirection cell + + call C_FUNC(CID_VirtualOpenDelegateDispatchWorker) + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm new file mode 100644 index 000000000000..f5cacb3207e1 --- /dev/null +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm @@ -0,0 +1,76 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +include +include AsmConstants.inc + +ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + extern CID_ResolveWorker:proc + extern CID_VirtualOpenDelegateDispatchWorker:proc + +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r11 currently contains the indirection cell address. + ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + mov rax, r11 + shr rax, 32 + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference +ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + add rax, [rcx] + + ;; Load the target address of the vtable chunk into rax + mov rax, [rax] + + ;; Compute the chunk offset + shr r11d, 16 + + ;; Load the target address of the virtual function into rax + mov rax, [rax + r11] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; On Input: +;; r11 contains the address of the indirection cell +;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + + call CID_ResolveWorker + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END RhpInterfaceDispatchSlow, _TEXT + +;; On Input: +;; r11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) +;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + + call CID_VirtualOpenDelegateDispatchWorker + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + +endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + + end \ No newline at end of file diff --git a/src/coreclr/vm/amd64/Context.S b/src/coreclr/vm/amd64/Context.S index 62ea993df9fa..11830a6e4366 100644 --- a/src/coreclr/vm/amd64/Context.S +++ b/src/coreclr/vm/amd64/Context.S @@ -12,6 +12,8 @@ #define CONTEXT_INTEGER 2 // Signature: EXTERN_C void STDCALL ClrRestoreNonvolatileContextWorker(PCONTEXT ContextRecord, DWORD64 ssp); +// Note that this method is used to invoke EH funclets that take arguments in rcx and rdx, which is why those +// registers are restored below in addition to the non-volatile registers. NESTED_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT, NoHandler push_nonvol_reg rbp set_cfa_register rbp, 0 diff --git a/src/coreclr/vm/amd64/ExternalMethodFixupThunk.asm b/src/coreclr/vm/amd64/ExternalMethodFixupThunk.asm index 58aa0b9fe3fb..8fba12d4c6e6 100644 --- a/src/coreclr/vm/amd64/ExternalMethodFixupThunk.asm +++ b/src/coreclr/vm/amd64/ExternalMethodFixupThunk.asm @@ -5,7 +5,6 @@ include include AsmConstants.inc extern ExternalMethodFixupWorker:proc - extern ProcessCLRException:proc ifdef FEATURE_READYTORUN extern DynamicHelperWorker:proc diff --git a/src/coreclr/vm/amd64/GenericCLRToCOMCallStubs.asm b/src/coreclr/vm/amd64/GenericCLRToCOMCallStubs.asm index 67d749d8e6c6..ae49ccdd7814 100644 --- a/src/coreclr/vm/amd64/GenericCLRToCOMCallStubs.asm +++ b/src/coreclr/vm/amd64/GenericCLRToCOMCallStubs.asm @@ -8,10 +8,8 @@ include asmconstants.inc extern CLRToCOMWorker:proc -extern ProcessCLRException:proc - -NESTED_ENTRY GenericCLRToCOMCallStub, _TEXT, ProcessCLRException +NESTED_ENTRY GenericCLRToCOMCallStub, _TEXT PROLOG_WITH_TRANSITION_BLOCK 8 diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 7e38053605dd..f5b26b662441 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -29,7 +29,7 @@ EXTERN g_card_bundle_table:QWORD endif ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -EXTERN g_sw_ww_table:QWORD +EXTERN g_write_watch_table:QWORD EXTERN g_sw_ww_enabled_for_gc_heap:BYTE endif @@ -141,7 +141,7 @@ ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP je CheckCardTable mov rax, rdi shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift - add rax, qword ptr [g_sw_ww_table] + add rax, qword ptr [g_write_watch_table] cmp byte ptr [rax], 0h jne CheckCardTable mov byte ptr [rax], 0FFh diff --git a/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm b/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm index bb73c266d0eb..97ead4f00581 100644 --- a/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm +++ b/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm @@ -26,9 +26,8 @@ include asmconstants.inc ; whenever they change. ; ; At anyone time, the memory pointed to by JIT_WriteBarrier will contain one -; of these functions. See StompWriteBarrierResize and StompWriteBarrierEphemeral -; in VM\AMD64\JITInterfaceAMD64.cpp and InitJITHelpers1 in VM\JITInterfaceGen.cpp -; for more info. +; of these functions. See StompWriteBarrierResize, StompWriteBarrierEphemeral +; and InitJITWriteBarrierHelpers in VM\AMD64\JITInterfaceAMD64.cpp for more info. ; ; READ THIS!!!!!! ; it is imperative that the addresses of the values that we overwrite diff --git a/src/coreclr/vm/amd64/JitHelpers_Slow.asm b/src/coreclr/vm/amd64/JitHelpers_Slow.asm index 63bc8cc43f1a..d269472be198 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Slow.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Slow.asm @@ -27,7 +27,7 @@ EXTERN g_card_bundle_table:QWORD endif ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -EXTERN g_sw_ww_table:QWORD +EXTERN g_write_watch_table:QWORD EXTERN g_sw_ww_enabled_for_gc_heap:BYTE endif @@ -39,22 +39,8 @@ EXTERN g_GCShadow:QWORD EXTERN g_GCShadowEnd:QWORD endif -JIT_NEW equ ?JIT_New@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@@Z -CopyValueClassUnchecked equ ?CopyValueClassUnchecked@@YAXPEAX0PEAVMethodTable@@@Z -JIT_Box equ ?JIT_Box@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@PEAX@Z -g_pStringClass equ ?g_pStringClass@@3PEAVMethodTable@@EA -FramedAllocateString equ ?FramedAllocateString@@YAPEAVStringObject@@K@Z -JIT_NewArr1 equ ?JIT_NewArr1@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@_J@Z - INVALIDGCVALUE equ 0CCCCCCCDh -extern JIT_NEW:proc -extern CopyValueClassUnchecked:proc -extern JIT_Box:proc -extern g_pStringClass:QWORD -extern FramedAllocateString:proc -extern JIT_NewArr1:proc - ifdef _DEBUG ; Version for when we're sure to be in the GC, checks whether or not the card ; needs to be updated @@ -121,7 +107,7 @@ ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP je CheckCardTable mov r10, rcx shr r10, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift - add r10, qword ptr [g_sw_ww_table] + add r10, qword ptr [g_write_watch_table] cmp byte ptr [r10], 0h jne CheckCardTable mov byte ptr [r10], 0FFh @@ -165,258 +151,5 @@ endif LEAF_END_MARKED JIT_WriteBarrier_Debug, _TEXT endif - -extern g_global_alloc_lock:dword -extern g_global_alloc_context:qword - -LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT - - mov r8d, [rcx + OFFSET__MethodTable__m_BaseSize] - - ; m_BaseSize is guaranteed to be a multiple of 8. - - inc [g_global_alloc_lock] - jnz JIT_NEW - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], rcx - mov [g_global_alloc_lock], -1 - - ret - - AllocFailed: - mov [g_global_alloc_lock], -1 - jmp JIT_NEW -LEAF_END JIT_TrialAllocSFastSP, _TEXT - -; HCIMPL2(Object*, JIT_Box, CORINFO_CLASS_HANDLE type, void* unboxedData) -NESTED_ENTRY JIT_BoxFastUP, _TEXT - - ; m_BaseSize is guaranteed to be a multiple of 8. - mov r8d, [rcx + OFFSET__MethodTable__m_BaseSize] - - inc [g_global_alloc_lock] - jnz JIT_Box - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - - cmp r8, r10 - ja NoAlloc - - test rdx, rdx - je NullRef - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], rcx - mov [g_global_alloc_lock], -1 - - ; Check whether the object contains pointers - test dword ptr [rcx + OFFSETOF__MethodTable__m_dwFlags], MethodTable__enum_flag_ContainsGCPointers - jnz ContainsPointers - - ; We have no pointers - emit a simple inline copy loop - - mov ecx, [rcx + OFFSET__MethodTable__m_BaseSize] - sub ecx, 18h ; sizeof(ObjHeader) + sizeof(Object) + last slot - - CopyLoop: - mov r8, [rdx+rcx] - mov [rax+rcx+8], r8 - - sub ecx, 8 - jge CopyLoop - REPRET - - ContainsPointers: - - ; Do call to CopyValueClassUnchecked(object, data, pMT) - - push_vol_reg rax - alloc_stack 20h - END_PROLOGUE - - mov r8, rcx - lea rcx, [rax + 8] - call CopyValueClassUnchecked - - add rsp, 20h - pop rax - ret - - NoAlloc: - NullRef: - mov [g_global_alloc_lock], -1 - jmp JIT_Box -NESTED_END JIT_BoxFastUP, _TEXT - -LEAF_ENTRY AllocateStringFastUP, _TEXT - - ; We were passed the number of characters in ECX - - ; we need to load the method table for string from the global - - mov r11, [g_pStringClass] - - ; Instead of doing elaborate overflow checks, we just limit the number of elements - ; to (LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR) or less. - ; This will avoid all overflow problems, as well as making sure - ; big string objects are correctly allocated in the big object heap. - - cmp ecx, (ASM_LARGE_OBJECT_SIZE - 256)/2 - jae FramedAllocateString - - ; Calculate the final size to allocate. - ; We need to calculate baseSize + cnt*2, then round that up by adding 7 and anding ~7. - - lea r8d, [STRING_BASE_SIZE + ecx*2 + 7] - and r8d, -8 - - inc [g_global_alloc_lock] - jnz FramedAllocateString - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], r11 - mov [g_global_alloc_lock], -1 - - mov [rax + OFFSETOF__StringObject__m_StringLength], ecx - - ret - - AllocFailed: - mov [g_global_alloc_lock], -1 - jmp FramedAllocateString -LEAF_END AllocateStringFastUP, _TEXT - -; HCIMPL2(Object*, JIT_NewArr1VC_UP, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT - - ; We were passed a (shared) method table in RCX, which contains the element type. - - ; The element count is in RDX - - ; NOTE: if this code is ported for CORINFO_HELP_NEWSFAST_ALIGN8, it will need - ; to emulate the double-specific behavior of JIT_TrialAlloc::GenAllocArray. - - ; Do a conservative check here. This is to avoid overflow while doing the calculations. We don't - ; have to worry about "large" objects, since the allocation quantum is never big enough for - ; LARGE_OBJECT_SIZE. - - ; For Value Classes, this needs to be 2^16 - slack (2^32 / max component size), - ; The slack includes the size for the array header and round-up ; for alignment. Use 256 for the - ; slack value out of laziness. - - ; In both cases we do a final overflow check after adding to the alloc_ptr. - - cmp rdx, (65535 - 256) - jae JIT_NewArr1 - - movzx r8d, word ptr [rcx + OFFSETOF__MethodTable__m_dwFlags] ; component size is low 16 bits - imul r8d, edx ; signed mul, but won't overflow due to length restriction above - add r8d, dword ptr [rcx + OFFSET__MethodTable__m_BaseSize] - - ; round the size to a multiple of 8 - - add r8d, 7 - and r8d, -8 - - inc [g_global_alloc_lock] - jnz JIT_NewArr1 - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - jc AllocFailed - - cmp r8, r10 - ja AllocFailed - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], rcx - mov [g_global_alloc_lock], -1 - - mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx - - ret - - AllocFailed: - mov [g_global_alloc_lock], -1 - jmp JIT_NewArr1 -LEAF_END JIT_NewArr1VC_UP, _TEXT - - -; HCIMPL2(Object*, JIT_NewArr1OBJ_UP, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -LEAF_ENTRY JIT_NewArr1OBJ_UP, _TEXT - - ; We were passed a (shared) method table in RCX, which contains the element type. - - ; The element count is in RDX - - ; NOTE: if this code is ported for CORINFO_HELP_NEWSFAST_ALIGN8, it will need - ; to emulate the double-specific behavior of JIT_TrialAlloc::GenAllocArray. - - ; Verifies that LARGE_OBJECT_SIZE fits in 32-bit. This allows us to do array size - ; arithmetic using 32-bit registers. - .erre ASM_LARGE_OBJECT_SIZE lt 100000000h - - cmp rdx, (ASM_LARGE_OBJECT_SIZE - 256)/8 ; sizeof(void*) - jae OversizedArray - - ; In this case we know the element size is sizeof(void *), or 8 for x64 - ; This helps us in two ways - we can shift instead of multiplying, and - ; there's no need to align the size either - - mov r8d, dword ptr [rcx + OFFSET__MethodTable__m_BaseSize] - lea r8d, [r8d + edx * 8] - - ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed - ; to be a multiple of 8. - - inc [g_global_alloc_lock] - jnz JIT_NewArr1 - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], rcx - mov [g_global_alloc_lock], -1 - - mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx - - ret - - AllocFailed: - mov [g_global_alloc_lock], -1 - - OversizedArray: - jmp JIT_NewArr1 -LEAF_END JIT_NewArr1OBJ_UP, _TEXT - - end diff --git a/src/coreclr/vm/amd64/StubPrecodeDynamicHelpers.S b/src/coreclr/vm/amd64/StubPrecodeDynamicHelpers.S new file mode 100644 index 000000000000..b0d92303ffb4 --- /dev/null +++ b/src/coreclr/vm/amd64/StubPrecodeDynamicHelpers.S @@ -0,0 +1,264 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +#ifdef FEATURE_STUBPRECODE_DYNAMIC_HELPERS + +#define SecretArg_Reg r10 +#define FirstArg_Reg rdi +#define SecondArg_Reg rsi +#define SecondArg_DwordReg esi +#define ThirdArg_Reg rdx +#define ThirdArg_DwordReg edx +#define FourthArg_Reg rcx + +#define DATA_SLOT(field) r10 + OFFSETOF__DynamicHelperStubArgs__ ## field +#define GENERIC_DICT_DATA_SLOT(field) r10 + OFFSETOF__GenericDictionaryDynamicHelperStubData__ ## field + +LEAF_ENTRY DynamicHelper_CallHelper_1Arg, _TEXT + mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + jmp QWORD PTR [DATA_SLOT(Helper)] +LEAF_END DynamicHelper_CallHelper_1Arg, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_AddSecondArg, _TEXT + mov SecondArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + jmp QWORD PTR [DATA_SLOT(Helper)] +LEAF_END DynamicHelper_CallHelper_AddSecondArg, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_2Arg, _TEXT + mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + mov SecondArg_Reg, QWORD PTR [DATA_SLOT(Constant2)] + jmp QWORD PTR [DATA_SLOT(Helper)] +LEAF_END DynamicHelper_CallHelper_2Arg, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_ArgMove, _TEXT + mov SecondArg_Reg, FirstArg_Reg + mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + jmp QWORD PTR [DATA_SLOT(Helper)] +LEAF_END DynamicHelper_CallHelper_ArgMove, _TEXT + +LEAF_ENTRY DynamicHelper_Return, _TEXT + ret +LEAF_END DynamicHelper_Return, _TEXT + +LEAF_ENTRY DynamicHelper_ReturnConst, _TEXT + mov rax, SecretArg_Reg + ret +LEAF_END DynamicHelper_ReturnConst, _TEXT + +LEAF_ENTRY DynamicHelper_ReturnIndirConst, _TEXT + mov rax, QWORD PTR [SecretArg_Reg] + ret +LEAF_END DynamicHelper_ReturnIndirConst, _TEXT + +LEAF_ENTRY DynamicHelper_ReturnIndirConstWithOffset, _TEXT + mov rax, QWORD PTR [DATA_SLOT(Constant1)] + mov rax, QWORD PTR [rax] + add rax, QWORD PTR [DATA_SLOT(Constant2)] + ret +LEAF_END DynamicHelper_ReturnIndirConstWithOffset, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_AddThirdArg, _TEXT + mov ThirdArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + jmp QWORD PTR [DATA_SLOT(Helper)] +LEAF_END DynamicHelper_CallHelper_AddThirdArg, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_AddThirdAndFourthArg, _TEXT + mov ThirdArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + mov FourthArg_Reg, QWORD PTR [DATA_SLOT(Constant2)] + jmp QWORD PTR [DATA_SLOT(Helper)] +LEAF_END DynamicHelper_CallHelper_AddThirdAndFourthArg, _TEXT + +// Generic dictionaries can have 2 or 3 indirections (5 indirs of 32bit size, and 2 8 byte quantities) = 40 bytes +// If it has 2 its for a Method, and the first indirection is always offsetof(InstantiatiedMethodDesc, m_pPerInstInfo) +// If it has 3 its for a Class, and the first indirection is always MethodTable::GetOffsetOfPerInstInfo +// It can also have 0, 0, to just return the class type +// Test For Null Or Not (If not present, cannot have a size check) +// SizeCheck or not (Only needed if size > Some number) +// +// Also special case where we just return the TypeHandle or MethodDesc itself +// Should probably have special case for 1, 2, 3 generic arg of MethodDesc/MethodTable + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + // SizeCheck + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SizeOffset)] + mov ThirdArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SlotOffset)] + cmp qword ptr[rax + SecondArg_Reg], ThirdArg_Reg + jle LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall) + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + // Null test + test rax, rax + je LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall) + ret +LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall): + mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)] + PREPARE_EXTERNAL_VAR g_pClassWithSlotAndModule, rax + jmp [rax] +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_TestForNull, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + // Null test + test rax, rax + je LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall) + ret +LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall): + mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)] + PREPARE_EXTERNAL_VAR g_pClassWithSlotAndModule, rax + jmp [rax] +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_TestForNull, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // SizeCheck + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SizeOffset)] + mov ThirdArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SlotOffset)] + cmp qword ptr[rax + SecondArg_Reg], ThirdArg_Reg + jle LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall) + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + // Null test + test rax, rax + je LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall) + ret +LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall): + mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)] + PREPARE_EXTERNAL_VAR g_pMethodWithSlotAndModule, rax + jmp [rax] +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_TestForNull, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + // Null test + test rax, rax + je LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall) + ret +LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall): + mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)] + PREPARE_EXTERNAL_VAR g_pMethodWithSlotAndModule, rax + jmp [rax] +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_TestForNull, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_0, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + mov rax, QWORD PTR [rax] + // Standard Indirection + mov rax, QWORD PTR [rax] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_0, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_1, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + mov rax, QWORD PTR [rax] + // Standard Indirection + mov rax, QWORD PTR [rax + 0x8] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_1, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_2, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + mov rax, QWORD PTR [rax] + // Standard Indirection + mov rax, QWORD PTR [rax + 0x10] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_2, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_3, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + mov rax, QWORD PTR [rax] + // Standard Indirection + mov rax, QWORD PTR [rax + 0x18] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_3, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_0, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + mov rax, QWORD PTR [rax] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_0, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_1, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + mov rax, QWORD PTR [rax + 0x8] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_1, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_2, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + mov rax, QWORD PTR [rax + 0x10] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_2, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_3, _TEXT + // First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + mov rax, QWORD PTR [rax + 0x18] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_3, _TEXT + +#endif //// FEATURE_STUBPRECODE_DYNAMIC_HELPERS diff --git a/src/coreclr/vm/amd64/StubPrecodeDynamicHelpers.asm b/src/coreclr/vm/amd64/StubPrecodeDynamicHelpers.asm new file mode 100644 index 000000000000..b30da0715429 --- /dev/null +++ b/src/coreclr/vm/amd64/StubPrecodeDynamicHelpers.asm @@ -0,0 +1,278 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +include +include AsmConstants.inc + +ifdef FEATURE_STUBPRECODE_DYNAMIC_HELPERS + +SecretArg_Reg equ r10 +FirstArg_Reg equ rcx +SecondArg_Reg equ rdx +SecondArg_DwordReg equ edx +ThirdArg_Reg equ r8 +ThirdArg_DwordReg equ r8d +FourthArg_Reg equ r9 + +DATA_SLOT macro field + exitm @CatStr(r10, < + OFFSETOF__DynamicHelperStubArgs__>, field) +endm + +GENERIC_DICT_DATA_SLOT macro field + exitm @CatStr(r10, < + OFFSETOF__GenericDictionaryDynamicHelperStubData__>, field) +endm + +extern g_pClassWithSlotAndModule:QWORD +extern g_pMethodWithSlotAndModule:QWORD + +LEAF_ENTRY DynamicHelper_CallHelper_1Arg, _TEXT + mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + mov rax, QWORD PTR [DATA_SLOT(Helper)] + TAILJMP_RAX +LEAF_END DynamicHelper_CallHelper_1Arg, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_AddSecondArg, _TEXT + mov SecondArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + mov rax, QWORD PTR [DATA_SLOT(Helper)] + TAILJMP_RAX +LEAF_END DynamicHelper_CallHelper_AddSecondArg, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_2Arg, _TEXT + mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + mov SecondArg_Reg, QWORD PTR [DATA_SLOT(Constant2)] + mov rax, QWORD PTR [DATA_SLOT(Helper)] + TAILJMP_RAX +LEAF_END DynamicHelper_CallHelper_2Arg, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_ArgMove, _TEXT + mov SecondArg_Reg, FirstArg_Reg + mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + mov rax, QWORD PTR [DATA_SLOT(Helper)] + TAILJMP_RAX +LEAF_END DynamicHelper_CallHelper_ArgMove, _TEXT + +LEAF_ENTRY DynamicHelper_Return, _TEXT + ret +LEAF_END DynamicHelper_Return, _TEXT + +LEAF_ENTRY DynamicHelper_ReturnConst, _TEXT + mov rax, SecretArg_Reg + ret +LEAF_END DynamicHelper_ReturnConst, _TEXT + +LEAF_ENTRY DynamicHelper_ReturnIndirConst, _TEXT + mov rax, QWORD PTR [SecretArg_Reg] + ret +LEAF_END DynamicHelper_ReturnIndirConst, _TEXT + +LEAF_ENTRY DynamicHelper_ReturnIndirConstWithOffset, _TEXT + mov rax, QWORD PTR [DATA_SLOT(Constant1)] + mov rax, QWORD PTR [rax] + add rax, QWORD PTR [DATA_SLOT(Constant2)] + ret +LEAF_END DynamicHelper_ReturnIndirConstWithOffset, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_AddThirdArg, _TEXT + mov ThirdArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + mov rax, QWORD PTR [DATA_SLOT(Helper)] + TAILJMP_RAX +LEAF_END DynamicHelper_CallHelper_AddThirdArg, _TEXT + +LEAF_ENTRY DynamicHelper_CallHelper_AddThirdAndFourthArg, _TEXT + mov ThirdArg_Reg, QWORD PTR [DATA_SLOT(Constant1)] + mov FourthArg_Reg, QWORD PTR [DATA_SLOT(Constant2)] + mov rax, QWORD PTR [DATA_SLOT(Helper)] + TAILJMP_RAX +LEAF_END DynamicHelper_CallHelper_AddThirdAndFourthArg, _TEXT + +; Generic dictionaries can have 2 or 3 indirections (5 indirs of 32bit size, and 2 8 byte quantities) = 40 bytes +; If it has 2 its for a Method, and the first indirection is always offsetof(InstantiatiedMethodDesc, m_pPerInstInfo) +; If it has 3 its for a Class, and the first indirection is always MethodTable::GetOffsetOfPerInstInfo +; It can also have 0, 0, to just return the class type +; Test For Null Or Not (If not present, cannot have a size check) +; SizeCheck or not (Only needed if size > Some number) +; +; Also special case where we just return the TypeHandle or MethodDesc itself +; Should probably have special case for 1, 2, 3 generic arg of MethodDesc/MethodTable + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ; SizeCheck + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SizeOffset)] + mov ThirdArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SlotOffset)] + cmp qword ptr[rax + SecondArg_Reg], ThirdArg_Reg + jle DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ; Null test + test rax, rax + je DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall + ret +DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall: + mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)] + mov rax, QWORD PTR [g_pClassWithSlotAndModule] + TAILJMP_RAX +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_TestForNull, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ; Null test + test rax, rax + je DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall + ret +DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall: + mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)] + mov rax, QWORD PTR [g_pClassWithSlotAndModule] + TAILJMP_RAX +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_TestForNull, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; SizeCheck + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SizeOffset)] + mov ThirdArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SlotOffset)] + cmp qword ptr[rax + SecondArg_Reg], ThirdArg_Reg + jle DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ; Null test + test rax, rax + je DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall + ret +DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall: + mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)] + mov rax, QWORD PTR [g_pMethodWithSlotAndModule] + TAILJMP_RAX +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_TestForNull, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ; Null test + test rax, rax + je DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall + ret +DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall: + mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)] + mov rax, QWORD PTR [g_pMethodWithSlotAndModule] + TAILJMP_RAX +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_TestForNull, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)] + mov rax, QWORD PTR [SecondArg_Reg+rax] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_0, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + mov rax, QWORD PTR [rax] + ; Standard Indirection + mov rax, QWORD PTR [rax] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_0, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_1, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + mov rax, QWORD PTR [rax] + ; Standard Indirection + mov rax, QWORD PTR [rax + 8h] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_1, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_2, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + mov rax, QWORD PTR [rax] + ; Standard Indirection + mov rax, QWORD PTR [rax + 10h] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_2, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_3, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + mov rax, QWORD PTR [rax] + ; Standard Indirection + mov rax, QWORD PTR [rax + 18h] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_3, _TEXT + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_0, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + mov rax, QWORD PTR [rax] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_0, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_1, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + mov rax, QWORD PTR [rax + 08h] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_1, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_2, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + mov rax, QWORD PTR [rax + 10h] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_2, _TEXT + + +LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_3, _TEXT + ; First indirection + mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + mov rax, QWORD PTR [rax + 18h] + ret +LEAF_END DynamicHelper_GenericDictionaryLookup_Method_3, _TEXT + +endif ;; FEATURE_STUBPRECODE_DYNAMIC_HELPERS + end \ No newline at end of file diff --git a/src/coreclr/vm/amd64/ThePreStubAMD64.asm b/src/coreclr/vm/amd64/ThePreStubAMD64.asm index 2be63172dcfc..da90bbb9c0a3 100644 --- a/src/coreclr/vm/amd64/ThePreStubAMD64.asm +++ b/src/coreclr/vm/amd64/ThePreStubAMD64.asm @@ -5,9 +5,8 @@ include include AsmConstants.inc extern PreStubWorker:proc - extern ProcessCLRException:proc -NESTED_ENTRY ThePreStub, _TEXT, ProcessCLRException +NESTED_ENTRY ThePreStub, _TEXT PROLOG_WITH_TRANSITION_BLOCK diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index b533789980c5..6170adf6d4f2 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -4,7 +4,9 @@ include include AsmConstants.inc -CHAIN_SUCCESS_COUNTER equ ?g_dispatch_cache_chain_success_counter@@3_KA +ifdef FEATURE_VIRTUAL_STUB_DISPATCH + +CHAIN_SUCCESS_COUNTER equ g_dispatch_cache_chain_success_counter extern VSD_ResolveWorker:proc extern CHAIN_SUCCESS_COUNTER:dword @@ -83,4 +85,5 @@ Fail: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT +endif ;; FEATURE_VIRTUAL_STUB_DISPATCH end diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index bd86950467f4..9c935d0fa74e 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -34,6 +34,9 @@ #define DynamicHelperFrameFlags_ObjectArg 1 #define DynamicHelperFrameFlags_ObjectArg2 2 +#define ThisPtrRetBufPrecodeData__Target 0x00 +ASMCONSTANTS_C_ASSERT(ThisPtrRetBufPrecodeData__Target == offsetof(ThisPtrRetBufPrecodeData, Target)); + #define ASMCONSTANT_OFFSETOF_ASSERT(struct, member) \ ASMCONSTANTS_C_ASSERT(OFFSETOF__##struct##__##member == offsetof(struct, member)); @@ -110,28 +113,71 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pFrame == offsetof(Thread, m_pFrame)); #define Thread_m_pFrame OFFSETOF__Thread__m_pFrame +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); #define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + offsetof(gc_alloc_context, alloc_ptr)); -#define OFFSETOF__ee_alloc_context__m_CombinedLimit 0x0 -ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__m_CombinedLimit == offsetof(ee_alloc_context, m_CombinedLimit)); +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); #define OFFSETOF__ThreadExceptionState__m_pCurrentTracker 0x000 ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadExceptionState__m_pCurrentTracker == offsetof(ThreadExceptionState, m_pCurrentTracker)); +#define OFFSETOF__DynamicHelperStubArgs__Constant1 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicHelperStubArgs__Constant1 + == offsetof(DynamicHelperStubArgs, Constant1)); + +#define OFFSETOF__DynamicHelperStubArgs__Constant2 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicHelperStubArgs__Constant2 + == offsetof(DynamicHelperStubArgs, Constant2)); + +#define OFFSETOF__DynamicHelperStubArgs__Helper 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicHelperStubArgs__Helper + == offsetof(DynamicHelperStubArgs, Helper)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__SecondIndir 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__SecondIndir + == offsetof(GenericDictionaryDynamicHelperStubData, SecondIndir)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__LastIndir 0x4 +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__LastIndir + == offsetof(GenericDictionaryDynamicHelperStubData, LastIndir)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__SizeOffset 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__SizeOffset + == offsetof(GenericDictionaryDynamicHelperStubData, SizeOffset)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__SlotOffset 0xc +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__SlotOffset + == offsetof(GenericDictionaryDynamicHelperStubData, SlotOffset)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__HandleArgs 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__HandleArgs + == offsetof(GenericDictionaryDynamicHelperStubData, HandleArgs)); + +#ifdef FEATURE_INTERPRETER +#define OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo DBG_FRE(0x48, 0x20) +#else +#define OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo DBG_FRE(0x40, 0x18) +#endif // FEATURE_INTERPRETER +ASMCONSTANTS_C_ASSERT(OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo + == offsetof(InstantiatedMethodDesc, m_pPerInstInfo)); #define OFFSETOF__MethodTable__m_dwFlags 0x00 ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); -#define OFFSET__MethodTable__m_BaseSize 0x04 -ASMCONSTANTS_C_ASSERT(OFFSET__MethodTable__m_BaseSize - == offsetof(MethodTable, m_BaseSize)); +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); + +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); #define OFFSETOF__MethodTable__m_wNumInterfaces 0x0E ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_wNumInterfaces @@ -148,14 +194,19 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pEEClass #define METHODTABLE_OFFSET_VTABLE DBG_FRE(0x48, 0x40) ASMCONSTANTS_C_ASSERT(METHODTABLE_OFFSET_VTABLE == sizeof(MethodTable)); -#define OFFSETOF__MethodTable__m_ElementType DBG_FRE(0x38, 0x30) -ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_ElementType - == offsetof(MethodTable, m_ElementTypeHnd)); +#define OFFSETOF__MethodTable__m_pPerInstInfo DBG_FRE(0x38, 0x30) +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pPerInstInfo + == offsetof(MethodTable, m_pPerInstInfo)); #define OFFSETOF__MethodTable__m_pInterfaceMap DBG_FRE(0x40, 0x38) ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pInterfaceMap == offsetof(MethodTable, m_pInterfaceMap)); +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); + +#define OFFSETOF__Array__m_Length 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); #define MethodTable_VtableSlotsPerChunk 8 ASMCONSTANTS_C_ASSERT(MethodTable_VtableSlotsPerChunk == VTABLE_SLOTS_PER_CHUNK) @@ -197,54 +248,19 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicStaticsInfo__m_pNonGCStatics ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicStaticsInfo__m_pGCStatics == offsetof(DynamicStaticsInfo, m_pGCStatics)); - -// MachState offsets (AMD64\gmscpu.h) - -#define OFFSETOF__MachState__m_Rip 0x00 -ASMCONSTANTS_C_ASSERT(OFFSETOF__MachState__m_Rip - == offsetof(MachState, m_Rip)); - -#define OFFSETOF__MachState__m_Rsp 0x08 -ASMCONSTANTS_C_ASSERT(OFFSETOF__MachState__m_Rsp - == offsetof(MachState, m_Rsp)); - -#define OFFSETOF__MachState__m_Capture 0x10 -ASMCONSTANTS_C_ASSERT(OFFSETOF__MachState__m_Capture - == offsetof(MachState, m_Capture)); - -#ifdef UNIX_AMD64_ABI -#define OFFSETOF__MachState__m_Ptrs 0x40 -#define OFFSETOF__MachState___pRetAddr 0x70 -#define OFFSETOF__LazyMachState__m_CaptureRip 0xA8 -#define OFFSETOF__LazyMachState__m_CaptureRsp 0xB0 -#else -#define OFFSETOF__MachState__m_Ptrs 0x50 -#define OFFSETOF__MachState___pRetAddr 0x90 -#define OFFSETOF__LazyMachState__m_CaptureRip 0x98 -#define OFFSETOF__LazyMachState__m_CaptureRsp 0xA0 -#endif -ASMCONSTANTS_C_ASSERT(OFFSETOF__MachState__m_Ptrs - == offsetof(MachState, m_Ptrs)); -ASMCONSTANTS_C_ASSERT(OFFSETOF__MachState___pRetAddr - == offsetof(MachState, _pRetAddr)); -ASMCONSTANTS_C_ASSERT(OFFSETOF__LazyMachState__m_CaptureRip - == offsetof(LazyMachState, m_CaptureRip)); -ASMCONSTANTS_C_ASSERT(OFFSETOF__LazyMachState__m_CaptureRsp - == offsetof(LazyMachState, m_CaptureRsp)); - #define OFFSETOF__VASigCookie__pNDirectILStub 0x8 ASMCONSTANTS_C_ASSERT(OFFSETOF__VASigCookie__pNDirectILStub == offsetof(VASigCookie, pNDirectILStub)); #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) // Expression is too complicated, is currently: -// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + -// /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + -// /*XSTATE*/ + 8 + 8 + -// /*XSTATE_AVX*/ 16*16 + -// /*XSTATE_AVX512_KMASK*/ 8*8 + -// /*XSTATE_AVX512_ZMM_H*/ 32*16 + -// /*XSTATE_AVX512_ZMM*/ 64*16 + +// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + +// /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + +// /*XSTATE*/ + 8 + 8 + +// /*XSTATE_AVX*/ 16*16 + +// /*XSTATE_AVX512_KMASK*/ 8*8 + +// /*XSTATE_AVX512_ZMM_H*/ 32*16 + +// /*XSTATE_AVX512_ZMM*/ 64*16 + // /*XSTATE_APX*/ 8*16) #define SIZEOF__CONTEXT (3232) #else @@ -419,10 +435,6 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__FaultingExceptionFrame__m_SSP ASMCONSTANTS_C_ASSERT(OFFSETOF__PtrArray__m_NumComponents == offsetof(PtrArray, m_NumComponents)); -#define OFFSETOF__PtrArray__m_Array 0x10 -ASMCONSTANTS_C_ASSERT(OFFSETOF__PtrArray__m_Array - == offsetof(PtrArray, m_Array)); - #ifndef TARGET_UNIX #define OFFSET__TEB__ThreadLocalStoragePointer 0x58 ASMCONSTANTS_C_ASSERT(OFFSET__TEB__ThreadLocalStoragePointer == offsetof(TEB, ThreadLocalStoragePointer)); @@ -447,19 +459,16 @@ ASMCONSTANTS_RUNTIME_ASSERT(DELEGATE_FIELD_OFFSET__METHOD_AUX == Object::GetOffs CoreLibBinder::GetFieldOffset(FIELD__DELEGATE__METHOD_PTR_AUX)); -#define ASM_LARGE_OBJECT_SIZE 85000 -ASMCONSTANTS_C_ASSERT(ASM_LARGE_OBJECT_SIZE == LARGE_OBJECT_SIZE); +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); -#define OFFSETOF__ArrayBase__m_NumComponents 8 -ASMCONSTANTS_C_ASSERT(OFFSETOF__ArrayBase__m_NumComponents - == offsetof(ArrayBase, m_NumComponents)); +#define STRING_COMPONENT_SIZE 2 -#define STRING_BASE_SIZE 0x16 -ASMCONSTANTS_RUNTIME_ASSERT(STRING_BASE_SIZE == StringObject::GetBaseSize()); +#define STRING_BASE_SIZE 0x16 +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); -#define OFFSETOF__StringObject__m_StringLength 0x8 -ASMCONSTANTS_C_ASSERT(OFFSETOF__StringObject__m_StringLength - == offsetof(StringObject, m_StringLength)); +#define SZARRAY_BASE_SIZE 0x18 +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(DWORD)); // For JIT_PInvokeBegin and JIT_PInvokeEnd helpers #define OFFSETOF__InlinedCallFrame__m_Datum 0x10 @@ -526,8 +535,8 @@ ASMCONSTANTS_C_ASSERT(FixupPrecodeData__PrecodeFixupThunk == offsetof(FixupPreco #define StubPrecodeData__Target 0x08 ASMCONSTANTS_C_ASSERT(StubPrecodeData__Target == offsetof(StubPrecodeData, Target)) -#define StubPrecodeData__MethodDesc 0x00 -ASMCONSTANTS_C_ASSERT(StubPrecodeData__MethodDesc == offsetof(StubPrecodeData, MethodDesc)) +#define StubPrecodeData__SecretParam 0x00 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__SecretParam == offsetof(StubPrecodeData, SecretParam)) #define CallCountingStubData__RemainingCallCountCell 0x00 ASMCONSTANTS_C_ASSERT(CallCountingStubData__RemainingCallCountCell == offsetof(CallCountingStubData, RemainingCallCountCell)) @@ -538,6 +547,14 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCoun #define CallCountingStubData__TargetForThresholdReached 0x10 ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +#define OFFSETOF__InterfaceDispatchCache__m_rgEntries 0x20 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCache__m_rgEntries == offsetof(InterfaceDispatchCache, m_rgEntries)) + +#define OFFSETOF__InterfaceDispatchCell__m_pCache 0x08 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCell__m_pCache == offsetof(InterfaceDispatchCell, m_pCache)) +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 0x1 #define PROFILE_LEAVE 0x2 diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 6d2efe5d7c7d..4bd36922a520 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -334,7 +334,102 @@ LOCAL_LABEL(JIT_PollGCRarePath): jmp rax LEAF_END JIT_PollGC, _TEXT +//rdi -This pointer +//rsi -ReturnBuffer +LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT + mov METHODDESC_REGISTER, [METHODDESC_REGISTER + ThisPtrRetBufPrecodeData__Target] + mov r11, rsi + mov rsi, rdi + mov rdi, r11 + jmp METHODDESC_REGISTER +LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT + +// +// Prologue of all funclet calling helpers (CallXXXXFunclet) +// +.macro FUNCLET_CALL_PROLOGUE localsCount, alignStack + push_nonvol_reg r15 // save preserved regs for OS stackwalker + push_nonvol_reg r14 // ... + push_nonvol_reg r13 // ... + push_nonvol_reg r12 // ... + push_nonvol_reg rbx // ... + push_nonvol_reg rbp // ... + + stack_alloc_size = \localsCount * 8 + \alignStack * 8 + + alloc_stack stack_alloc_size + + END_PROLOGUE +.endm + +// +// Epilogue of all funclet calling helpers (CallXXXXFunclet) +// +.macro FUNCLET_CALL_EPILOGUE + free_stack stack_alloc_size + + pop_nonvol_reg rbp + pop_nonvol_reg rbx + pop_nonvol_reg r12 + pop_nonvol_reg r13 + pop_nonvol_reg r14 + pop_nonvol_reg r15 +.endm + +// This helper enables us to call into a funclet after restoring Fp register +NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler + // On entry: + // + // RDI = throwable + // RSI = PC to invoke + // RDX = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame + // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // + + FUNCLET_CALL_PROLOGUE 0, 1 + + // Restore RBX, RBP, R12, R13, R14, R15 from CONTEXT + mov rbp, [rdx + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] + mov r12, [rdx + OFFSETOF__CONTEXT__R12 - OFFSETOF__CONTEXT__Rbx] + mov r13, [rdx + OFFSETOF__CONTEXT__R13 - OFFSETOF__CONTEXT__Rbx] + mov r14, [rdx + OFFSETOF__CONTEXT__R14 - OFFSETOF__CONTEXT__Rbx] + mov r15, [rdx + OFFSETOF__CONTEXT__R15 - OFFSETOF__CONTEXT__Rbx] + + // Save the SP of this function. + mov [rcx], rsp + // Invoke the funclet + call rsi + + FUNCLET_CALL_EPILOGUE + ret +NESTED_END CallEHFunclet, _TEXT + +// This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the +// frame pointer for accessing the locals in the parent method. +NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler + // On entry: + // + // RDI = throwable + // RSI = RBP of main function + // RDX = PC to invoke + // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // + + FUNCLET_CALL_PROLOGUE 0, 1 + + // Restore RBP + mov rbp, rsi + // Save the SP of this function + mov [rcx], rsp + // Invoke the filter funclet + call rdx + + FUNCLET_CALL_EPILOGUE + ret +NESTED_END CallEHFilterFunclet, _TEXT + #ifdef FEATURE_INTERPRETER + NESTED_ENTRY InterpreterStub, _TEXT, NoHandler PROLOG_WITH_TRANSITION_BLOCK 8, 0, 0, 0, 0 @@ -351,4 +446,706 @@ NESTED_ENTRY InterpreterStub, _TEXT, NoHandler EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END InterpreterStub, _TEXT + +// Copy arguments from the interpreter stack to the processor stack. +// The CPU stack slots are aligned to pointer size. +LEAF_ENTRY Load_Stack, _TEXT + push rdi + push rsi + push rcx + mov edi, dword ptr [r11 + 8] // SP offset + mov ecx, dword ptr [r11 + 12] // number of stack slots + add edi, 0x20 // the 3 pushes above plus return address + add rdi, rsp + mov rsi, r10 + shr rcx, 3 + rep movsq + mov r10, rsi + pop rcx + pop rsi + pop rdi + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Stack, _TEXT + +// Routines for passing arguments by value in general purpose registers RDI, RSI, RDX, RCX, R8, R9 + +LEAF_ENTRY Load_RDI, _TEXT + mov rdi, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI, _TEXT + +LEAF_ENTRY Load_RDI_RSI, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI, _TEXT + +LEAF_ENTRY Load_RDI_RSI_RDX, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + mov rdx, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI_RDX, _TEXT + +LEAF_ENTRY Load_RDI_RSI_RDX_RCX, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + mov rdx, [r10 + 16] + mov rcx, [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI_RDX_RCX, _TEXT + +LEAF_ENTRY Load_RDI_RSI_RDX_RCX_R8, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + mov rdx, [r10 + 16] + mov rcx, [r10 + 24] + mov r8, [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Load_RDI_RSI_RDX_RCX_R8_R9, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + mov rdx, [r10 + 16] + mov rcx, [r10 + 24] + mov r8, [r10 + 32] + mov r9, [r10 + 40] + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Load_RSI, _TEXT + mov rsi, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI, _TEXT + +LEAF_ENTRY Load_RSI_RDX, _TEXT + mov rsi, [r10] + mov rdx, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI_RDX, _TEXT + +LEAF_ENTRY Load_RSI_RDX_RCX, _TEXT + mov rsi, [r10] + mov rdx, [r10 + 8] + mov rcx, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI_RDX_RCX, _TEXT + +LEAF_ENTRY Load_RSI_RDX_RCX_R8, _TEXT + mov rsi, [r10] + mov rdx, [r10 + 8] + mov rcx, [r10 + 16] + mov r8, [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Load_RSI_RDX_RCX_R8_R9, _TEXT + mov rsi, [r10] + mov rdx, [r10 + 8] + mov rcx, [r10 + 16] + mov r8, [r10 + 24] + mov r9, [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Load_RDX, _TEXT + mov rdx, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX, _TEXT + +LEAF_ENTRY Load_RDX_RCX, _TEXT + mov rdx, [r10] + mov rcx, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_RCX, _TEXT + +LEAF_ENTRY Load_RDX_RCX_R8, _TEXT + mov rdx, [r10] + mov rcx, [r10 + 8] + mov r8, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Load_RDX_RCX_R8_R9, _TEXT + mov rdx, [r10] + mov rcx, [r10 + 8] + mov r8, [r10 + 16] + mov r9, [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Load_RCX, _TEXT + mov rcx, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX, _TEXT + +LEAF_ENTRY Load_RCX_R8, _TEXT + mov rcx, [r10] + mov r8, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_R8, _TEXT + +LEAF_ENTRY Load_RCX_R8_R9, _TEXT + mov rcx, [r10] + mov r8, [r10 + 8] + mov r9, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_R8_R9, _TEXT + +LEAF_ENTRY Load_RDX_R8, _TEXT + mov rdx, [r10] + mov r8, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_R8, _TEXT + +LEAF_ENTRY Load_RDX_R8_R9, _TEXT + mov rdx, [r10] + mov r8, [r10 + 8] + mov r9, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_R8_R9, _TEXT + +LEAF_ENTRY Load_R8, _TEXT + mov r8, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R8, _TEXT + +LEAF_ENTRY Load_R8_R9, _TEXT + mov r8, [r10] + mov r9, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R8_R9, _TEXT + +LEAF_ENTRY Load_R9, _TEXT + mov r9, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R9, _TEXT + +// Routines for passing arguments in floating point registers XMM0..XMM7 + +LEAF_ENTRY Load_XMM0, _TEXT + movsd xmm0, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3_XMM4, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + movsd xmm4, real8 ptr [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + movsd xmm4, real8 ptr [r10 + 32] + movsd xmm5, real8 ptr [r10 + 40] + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + movsd xmm4, real8 ptr [r10 + 32] + movsd xmm5, real8 ptr [r10 + 40] + movsd xmm6, real8 ptr [r10 + 48] + add r10, 56 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + movsd xmm4, real8 ptr [r10 + 32] + movsd xmm5, real8 ptr [r10 + 40] + movsd xmm6, real8 ptr [r10 + 48] + movsd xmm7, real8 ptr [r10 + 56] + add r10, 64 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM1, _TEXT + movsd xmm1, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3_XMM4, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + movsd xmm4, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + movsd xmm4, real8 ptr [r10 + 24] + movsd xmm5, real8 ptr [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + movsd xmm4, real8 ptr [r10 + 24] + movsd xmm5, real8 ptr [r10 + 32] + movsd xmm6, real8 ptr [r10 + 40] + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + movsd xmm4, real8 ptr [r10 + 24] + movsd xmm5, real8 ptr [r10 + 32] + movsd xmm6, real8 ptr [r10 + 40] + movsd xmm7, real8 ptr [r10 + 48] + add r10, 56 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM2, _TEXT + movsd xmm2, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3_XMM4, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + movsd xmm4, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + movsd xmm4, real8 ptr [r10 + 16] + movsd xmm5, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + movsd xmm4, real8 ptr [r10 + 16] + movsd xmm5, real8 ptr [r10 + 24] + movsd xmm6, real8 ptr [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + movsd xmm4, real8 ptr [r10 + 16] + movsd xmm5, real8 ptr [r10 + 24] + movsd xmm6, real8 ptr [r10 + 32] + movsd xmm7, real8 ptr [r10 + 40] + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM3, _TEXT + movsd xmm3, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3, _TEXT + +LEAF_ENTRY Load_XMM3_XMM4, _TEXT + movsd xmm3, real8 ptr [r10] + movsd xmm4, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3_XMM4, _TEXT + +LEAF_ENTRY Load_XMM3_XMM4_XMM5, _TEXT + movsd xmm3, real8 ptr [r10] + movsd xmm4, real8 ptr [r10 + 8] + movsd xmm5, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd xmm3, real8 ptr [r10] + movsd xmm4, real8 ptr [r10 + 8] + movsd xmm5, real8 ptr [r10 + 16] + movsd xmm6, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm3, real8 ptr [r10] + movsd xmm4, real8 ptr [r10 + 8] + movsd xmm5, real8 ptr [r10 + 16] + movsd xmm6, real8 ptr [r10 + 24] + movsd xmm7, real8 ptr [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM4, _TEXT + movsd xmm4, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM4, _TEXT + +LEAF_ENTRY Load_XMM4_XMM5, _TEXT + movsd xmm4, real8 ptr [r10] + movsd xmm5, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM4_XMM5_XMM6, _TEXT + movsd xmm4, real8 ptr [r10] + movsd xmm5, real8 ptr [r10 + 8] + movsd xmm6, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm4, real8 ptr [r10] + movsd xmm5, real8 ptr [r10 + 8] + movsd xmm6, real8 ptr [r10 + 16] + movsd xmm7, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM5, _TEXT + movsd xmm5, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM5, _TEXT + +LEAF_ENTRY Load_XMM5_XMM6, _TEXT + movsd xmm5, real8 ptr [r10] + movsd xmm6, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM5_XMM6_XMM7, _TEXT + movsd xmm5, real8 ptr [r10] + movsd xmm6, real8 ptr [r10 + 8] + movsd xmm7, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM6, _TEXT + movsd xmm6, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM6, _TEXT + +LEAF_ENTRY Load_XMM6_XMM7, _TEXT + movsd xmm6, real8 ptr [r10] + movsd xmm7, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM7, _TEXT + movsd xmm7, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM7, _TEXT + + +NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + alloc_stack 0x10 + save_reg_postrsp r10, 0 +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov r10, [rsp] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetVoid, _TEXT + +NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + alloc_stack 0x10 + save_reg_postrsp r10, 0 +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + mov rdi, rdx // return buffer + call qword ptr [r11] + mov r10, [rsp] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetBuff, _TEXT + +NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + //pop rdx + mov rdx, [rbp - 8] + movsd real8 ptr [rdx], xmm0 + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetDouble, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov rdx, [rbp - 8] + mov qword ptr [rdx], rax + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetI8, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8I8, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov rcx, [rbp - 8] + mov qword ptr [rcx], rax + mov qword ptr [rcx + 8], rdx + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetI8I8, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8Double, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov rcx, [rbp - 8] + mov qword ptr [rcx], rax + movsd real8 ptr [rcx + 8], xmm0 + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetI8Double, _TEXT + +NESTED_ENTRY CallJittedMethodRetDoubleI8, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov rcx, [rbp - 8] + movsd real8 ptr [rcx], xmm0 + mov qword ptr [rcx + 8], rax + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetDoubleI8, _TEXT + +NESTED_ENTRY CallJittedMethodRetDoubleDouble, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov rcx, [rbp - 8] + movsd real8 ptr [rcx], xmm0 + movsd real8 ptr [rcx + 8], xmm1 + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetDoubleDouble, _TEXT + #endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index ec952dd0f6f8..7fa9d44d1baa 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -1,8 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// + // Various helper routines for generating AMD64 assembly code. -// // Precompiled Header @@ -25,6 +24,10 @@ #include "clrtocomcall.h" #endif // FEATURE_COMINTEROP +#ifdef FEATURE_PERFMAP +#include "perfmap.h" +#endif + void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegisters * pRegs) { LIMITED_METHOD_CONTRACT; @@ -138,93 +141,6 @@ void InlinedCallFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateF LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK InlinedCallFrame::UpdateRegDisplay_Impl(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); } -void HelperMethodFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) -{ - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - MODE_ANY; - PRECONDITION(m_MachState._pRetAddr == PTR_TADDR(&m_MachState.m_Rip)); - SUPPORTS_DAC; - } - CONTRACTL_END; - -#ifndef DACCESS_COMPILE - if (updateFloats) - { - UpdateFloatingPointRegisters(pRD); - _ASSERTE(pRD->pCurrentContext->Rip == m_MachState.m_Rip); - } -#endif // DACCESS_COMPILE - - pRD->IsCallerContextValid = FALSE; - pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. - - // - // Copy the saved state from the frame to the current context. - // - - LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK HelperMethodFrame::UpdateRegDisplay cached ip:%p, sp:%p\n", m_MachState.m_Rip, m_MachState.m_Rsp)); - -#if defined(DACCESS_COMPILE) - // For DAC, we may get here when the HMF is still uninitialized. - // So we may need to unwind here. - if (!m_MachState.isValid()) - { - // This allocation throws on OOM. - MachState* pUnwoundState = (MachState*)DacAllocHostOnlyInstance(sizeof(*pUnwoundState), true); - - EnsureInit(pUnwoundState); - - pRD->pCurrentContext->Rip = pRD->ControlPC = pUnwoundState->m_Rip; - pRD->pCurrentContext->Rsp = pRD->SP = pUnwoundState->m_Rsp; - -#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = pUnwoundState->m_Capture.regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - -#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = &pRD->pCurrentContext->regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - - ClearRegDisplayArgumentAndScratchRegisters(pRD); - - return; - } -#endif // DACCESS_COMPILE - - pRD->pCurrentContext->Rip = pRD->ControlPC = m_MachState.m_Rip; - pRD->pCurrentContext->Rsp = pRD->SP = m_MachState.m_Rsp; - -#ifdef TARGET_UNIX - -#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = (m_MachState.m_Ptrs.p##regname != NULL) ? \ - *m_MachState.m_Ptrs.p##regname : m_MachState.m_Unwound.regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - -#else // TARGET_UNIX - -#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = *m_MachState.m_Ptrs.p##regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - -#endif // TARGET_UNIX - -#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = (DWORD64 *)(TADDR *)m_MachState.m_Ptrs.p##regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - - // - // Clear all knowledge of scratch registers. We're skipping to any - // arbitrary point on the stack, and frames aren't required to preserve or - // keep track of these anyways. - // - - ClearRegDisplayArgumentAndScratchRegisters(pRD); -} - void FaultingExceptionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) { LIMITED_METHOD_DAC_CONTRACT; @@ -335,7 +251,6 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats pRD->pCurrentContextPointers->Rsi = NULL; pRD->pCurrentContextPointers->Rdi = NULL; #endif - pRD->pCurrentContextPointers->Rcx = NULL; #ifdef UNIX_AMD64_ABI pRD->pCurrentContextPointers->Rdx = (PULONG64)&m_Args->Rdx; #else // UNIX_AMD64_ABI @@ -347,18 +262,9 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats pRD->pCurrentContextPointers->R11 = NULL; pRD->pCurrentContextPointers->Rax = (PULONG64)&m_Args->Rax; + pRD->pCurrentContextPointers->Rcx = (PULONG64)&m_Args->Rcx; SyncRegDisplayToCurrentContext(pRD); - -/* - // This only describes the top-most frame - pRD->pContext = NULL; - - - pRD->PCTAddr = dac_cast(m_Args) + offsetof(HijackArgs, Rip); - //pRD->pPC = PTR_SLOT(pRD->PCTAddr); - pRD->SP = (ULONG64)(pRD->PCTAddr + sizeof(TADDR)); -*/ } #endif // FEATURE_HIJACK @@ -553,77 +459,6 @@ void emitJump(LPBYTE pBufferRX, LPBYTE pBufferRW, LPVOID target) _ASSERTE(DbgIsExecutable(pBufferRX, 12)); } -void UMEntryThunkCode::Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam) -{ - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - MODE_ANY; - } - CONTRACTL_END; - - // padding // CC CC CC CC - // mov r10, pUMEntryThunk // 49 ba xx xx xx xx xx xx xx xx // METHODDESC_REGISTER - // mov rax, pJmpDest // 48 b8 xx xx xx xx xx xx xx xx // need to ensure this imm64 is qword aligned - // TAILJMP_RAX // 48 FF E0 - -#ifdef _DEBUG - m_padding[0] = X86_INSTR_INT3; - m_padding[1] = X86_INSTR_INT3; - m_padding[2] = X86_INSTR_INT3; - m_padding[3] = X86_INSTR_INT3; -#endif // _DEBUG - m_movR10[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT; - m_movR10[1] = 0xBA; - m_uet = pvSecretParam; - m_movRAX[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; - m_movRAX[1] = 0xB8; - m_execstub = pTargetCode; - m_jmpRAX[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; - m_jmpRAX[1] = 0xFF; - m_jmpRAX[2] = 0xE0; - - _ASSERTE(DbgIsExecutable(&pEntryThunkCodeRX->m_movR10[0], &pEntryThunkCodeRX->m_jmpRAX[3]-&pEntryThunkCodeRX->m_movR10[0])); - FlushInstructionCache(GetCurrentProcess(),pEntryThunkCodeRX,sizeof(UMEntryThunkCode)); -} - -void UMEntryThunkCode::Poison() -{ - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - MODE_ANY; - } - CONTRACTL_END; - - ExecutableWriterHolder thunkWriterHolder(this, sizeof(UMEntryThunkCode)); - UMEntryThunkCode *pThisRW = thunkWriterHolder.GetRW(); - - pThisRW->m_execstub = (BYTE *)UMEntryThunk::ReportViolation; - - pThisRW->m_movR10[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; -#ifdef _WIN32 - // mov rcx, pUMEntryThunk // 48 b9 xx xx xx xx xx xx xx xx - pThisRW->m_movR10[1] = 0xB9; -#else - // mov rdi, pUMEntryThunk // 48 bf xx xx xx xx xx xx xx xx - pThisRW->m_movR10[1] = 0xBF; -#endif - - ClrFlushInstructionCache(&m_movR10[0], &m_jmpRAX[3]-&m_movR10[0], /* hasCodeExecutedBefore */ true); -} - -UMEntryThunk* UMEntryThunk::Decode(LPVOID pCallback) -{ - LIMITED_METHOD_CONTRACT; - - UMEntryThunkCode *pThunkCode = (UMEntryThunkCode*)((BYTE*)pCallback - UMEntryThunkCode::GetEntryPointOffset()); - - return (UMEntryThunk*)pThunkCode->m_uet; -} - INT32 rel32UsingJumpStub(INT32 UNALIGNED * pRel32, PCODE target, MethodDesc *pMethod, LoaderAllocator *pLoaderAllocator /* = NULL */, bool throwOnOutOfMemoryWithinRange /*= true*/) { @@ -761,10 +596,11 @@ DWORD GetOffsetAtEndOfFunction(ULONGLONG uImageBase, // // Allocation of dynamic helpers // +#ifndef FEATURE_STUBPRECODE_DYNAMIC_HELPERS #define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR) -#define BEGIN_DYNAMIC_HELPER_EMIT(size) \ +#define BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \ SIZE_T cb = size; \ SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \ BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \ @@ -773,6 +609,14 @@ DWORD GetOffsetAtEndOfFunction(ULONGLONG uImageBase, size_t rxOffset = pStartRX - pStart; \ BYTE * p = pStart; +#ifdef FEATURE_PERFMAP +#define BEGIN_DYNAMIC_HELPER_EMIT(size) \ + BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \ + PerfMap::LogStubs(__FUNCTION__, "DynamicHelper", (PCODE)p, size, PerfMapStubType::Individual); +#else +#define BEGIN_DYNAMIC_HELPER_EMIT(size) BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) +#endif + #define END_DYNAMIC_HELPER_EMIT() \ _ASSERTE(pStart + cb == p); \ while (p < pStart + cbAligned) *p++ = X86_INSTR_INT3; \ @@ -1124,6 +968,7 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, END_DYNAMIC_HELPER_EMIT(); } } +#endif // !FEATURE_STUBPRECODE_DYNAMIC_HELPERS #endif // FEATURE_READYTORUN diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index a1bf37c295ab..db9e1c717b53 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -450,6 +450,46 @@ inline TADDR GetFP(const CONTEXT * context) return (TADDR)(context->Rbp); } +inline void SetFirstArgReg(CONTEXT *context, TADDR value) +{ + LIMITED_METHOD_DAC_CONTRACT; +#ifdef UNIX_AMD64_ABI + context->Rdi = (DWORD64)value; +#else + context->Rcx = (DWORD64)value; +#endif +} + +inline TADDR GetFirstArgReg(CONTEXT *context) +{ + LIMITED_METHOD_DAC_CONTRACT; +#ifdef UNIX_AMD64_ABI + return (TADDR)(context->Rdi); +#else + return (TADDR)(context->Rcx); +#endif +} + +inline void SetSecondArgReg(CONTEXT *context, TADDR value) +{ + LIMITED_METHOD_DAC_CONTRACT; +#ifdef UNIX_AMD64_ABI + context->Rsi = (DWORD64)value; +#else + context->Rdx = (DWORD64)value; +#endif +} + +inline TADDR GetSecondArgReg(CONTEXT *context) +{ + LIMITED_METHOD_DAC_CONTRACT; +#ifdef UNIX_AMD64_ABI + return (TADDR)(context->Rsi); +#else + return (TADDR)(context->Rdx); +#endif +} + extern "C" TADDR GetCurrentSP(); // Emits: @@ -505,52 +545,9 @@ inline PCODE decodeBackToBackJump(PCODE pCode) extern "C" void setFPReturn(int fpSize, INT64 retVal); extern "C" void getFPReturn(int fpSize, INT64 *retval); - -#include -struct DECLSPEC_ALIGN(8) UMEntryThunkCode -{ - // padding // CC CC CC CC - // mov r10, pUMEntryThunk // 49 ba xx xx xx xx xx xx xx xx // METHODDESC_REGISTER - // mov rax, pJmpDest // 48 b8 xx xx xx xx xx xx xx xx // need to ensure this imm64 is qword aligned - // TAILJMP_RAX // 48 FF E0 - - BYTE m_padding[4]; - BYTE m_movR10[2]; // MOV R10, - LPVOID m_uet; // pointer to start of this structure - BYTE m_movRAX[2]; // MOV RAX, - DECLSPEC_ALIGN(8) - const BYTE* m_execstub; // pointer to destination code // ensure this is qword aligned - BYTE m_jmpRAX[3]; // JMP RAX - BYTE m_padding2[5]; - - void Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam); - void Poison(); - - LPCBYTE GetEntryPoint() const - { - LIMITED_METHOD_CONTRACT; - - return (LPCBYTE)&m_movR10; - } - - static int GetEntryPointOffset() - { - LIMITED_METHOD_CONTRACT; - - return offsetof(UMEntryThunkCode, m_movR10); - } -}; -#include - struct HijackArgs { -#ifndef FEATURE_MULTIREG_RETURN - union - { - ULONG64 Rax; - ULONG64 ReturnValue[1]; - }; -#else // !FEATURE_MULTIREG_RETURN +#ifdef UNIX_AMD64_ABI union { struct @@ -560,7 +557,18 @@ struct HijackArgs }; ULONG64 ReturnValue[2]; }; -#endif // !FEATURE_MULTIREG_RETURN +#else // UNIX_AMD64_ABI + union + { + ULONG64 Rax; + ULONG64 ReturnValue[1]; + }; +#endif // UNIX_AMD64_ABI + union + { + ULONG64 Rcx; + ULONG64 AsyncRet; + }; CalleeSavedRegisters Regs; #ifdef TARGET_WINDOWS ULONG64 Rsp; diff --git a/src/coreclr/vm/amd64/excepamd64.cpp b/src/coreclr/vm/amd64/excepamd64.cpp index c679a67b996f..b25dd5c7f09f 100644 --- a/src/coreclr/vm/amd64/excepamd64.cpp +++ b/src/coreclr/vm/amd64/excepamd64.cpp @@ -599,26 +599,44 @@ AdjustContextForVirtualStub( PCODE f_IP = GetIP(pContext); - StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(f_IP); - - if (sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB) + bool isVirtualStubNullCheck = false; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (VirtualCallStubManager::isCachedInterfaceDispatchStubAVLocation(f_IP)) { - if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_CMP_IND_THIS_REG_RAX) // cmp [THIS_REG], rax - { - _ASSERTE(!"AV in DispatchStub at unknown instruction"); - return FALSE; - } + isVirtualStubNullCheck = true; } - else - if (sk == STUB_CODE_BLOCK_VSD_RESOLVE_STUB) +#endif // FEATURE_CACHED_INTERFACE_DISPATCH +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + if (!isVirtualStubNullCheck) { - if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_MOV_RAX_IND_THIS_REG) // mov rax, [THIS_REG] + StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(f_IP); + + if (sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB) { - _ASSERTE(!"AV in ResolveStub at unknown instruction"); - return FALSE; + if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_CMP_IND_THIS_REG_RAX) // cmp [THIS_REG], rax + { + _ASSERTE(!"AV in DispatchStub at unknown instruction"); + } + else + { + isVirtualStubNullCheck = true; + } + } + else + if (sk == STUB_CODE_BLOCK_VSD_RESOLVE_STUB) + { + if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_MOV_RAX_IND_THIS_REG) // mov rax, [THIS_REG] + { + _ASSERTE(!"AV in ResolveStub at unknown instruction"); + } + else + { + isVirtualStubNullCheck = true; + } } } - else +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + if (!isVirtualStubNullCheck) { return FALSE; } diff --git a/src/coreclr/vm/amd64/getstate.S b/src/coreclr/vm/amd64/getstate.S index 04d9ab707d59..f693f6071155 100644 --- a/src/coreclr/vm/amd64/getstate.S +++ b/src/coreclr/vm/amd64/getstate.S @@ -20,25 +20,3 @@ LEAF_ENTRY GetCurrentIP, _TEXT ret LEAF_END GetCurrentIP, _TEXT - - -// EXTERN_C void LazyMachStateCaptureState(struct LazyMachState *pState) -LEAF_ENTRY LazyMachStateCaptureState, _TEXT - - mov rdx, [rsp] // get the return address - - mov [rdi + OFFSETOF__MachState__m_Capture + 0*8], r12 - mov [rdi + OFFSETOF__MachState__m_Capture + 1*8], r13 - mov [rdi + OFFSETOF__MachState__m_Capture + 2*8], r14 - mov [rdi + OFFSETOF__MachState__m_Capture + 3*8], r15 - mov [rdi + OFFSETOF__MachState__m_Capture + 4*8], rbx - mov [rdi + OFFSETOF__MachState__m_Capture + 5*8], rbp - - mov qword ptr [rdi + OFFSETOF__MachState___pRetAddr], 0 - - mov [rdi + OFFSETOF__LazyMachState__m_CaptureRip], rdx - mov [rdi + OFFSETOF__LazyMachState__m_CaptureRsp], rsp - - ret - -LEAF_END LazyMachStateCaptureState, _TEXT diff --git a/src/coreclr/vm/amd64/getstate.asm b/src/coreclr/vm/amd64/getstate.asm index 4831b758b9dd..7033b7bf0c3d 100644 --- a/src/coreclr/vm/amd64/getstate.asm +++ b/src/coreclr/vm/amd64/getstate.asm @@ -50,29 +50,4 @@ LEAF_ENTRY get_cycle_count, _TEXT ret LEAF_END get_cycle_count, _TEXT - -; EXTERN_C void LazyMachStateCaptureState(struct LazyMachState *pState) -LEAF_ENTRY LazyMachStateCaptureState, _TEXT - - mov rdx, [rsp] ; get the return address - - mov [rcx + OFFSETOF__MachState__m_Capture + 0*8], rdi - mov [rcx + OFFSETOF__MachState__m_Capture + 1*8], rsi - mov [rcx + OFFSETOF__MachState__m_Capture + 2*8], rbx - mov [rcx + OFFSETOF__MachState__m_Capture + 3*8], rbp - mov [rcx + OFFSETOF__MachState__m_Capture + 4*8], r12 - mov [rcx + OFFSETOF__MachState__m_Capture + 5*8], r13 - mov [rcx + OFFSETOF__MachState__m_Capture + 6*8], r14 - mov [rcx + OFFSETOF__MachState__m_Capture + 7*8], r15 - - mov qword ptr [rcx + OFFSETOF__MachState___pRetAddr], 0 - - mov [rcx + OFFSETOF__LazyMachState__m_CaptureRip], rdx - mov [rcx + OFFSETOF__LazyMachState__m_CaptureRsp], rsp - - ret - -LEAF_END LazyMachStateCaptureState, _TEXT - - end diff --git a/src/coreclr/vm/amd64/gmsamd64.cpp b/src/coreclr/vm/amd64/gmsamd64.cpp deleted file mode 100644 index 8af5247d07c3..000000000000 --- a/src/coreclr/vm/amd64/gmsamd64.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/**************************************************************/ -/* gmsAMD64.cpp */ -/**************************************************************/ - -#include "common.h" -#include "gmscpu.h" - -void LazyMachState::unwindLazyState(LazyMachState* baseState, - MachState* unwoundState, - DWORD threadId, - int funCallDepth /* = 1 */) -{ - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - SUPPORTS_DAC; - } - CONTRACTL_END; - - CONTEXT ctx; - KNONVOLATILE_CONTEXT_POINTERS nonVolRegPtrs; - - ctx.ContextFlags = 0; // Read by PAL_VirtualUnwind. - - ctx.Rip = baseState->m_CaptureRip; - ctx.Rsp = baseState->m_CaptureRsp + 8; // +8 for return addr pushed before calling LazyMachStateCaptureState - -#define CALLEE_SAVED_REGISTER(regname) ctx.regname = unwoundState->m_Capture.regname = baseState->m_Capture.regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - -#if !defined(DACCESS_COMPILE) - - // For DAC, if we get here, it means that the LazyMachState is uninitialized and we have to unwind it. - // The API we use to unwind in DAC is StackWalk64(), which does not support the context pointers. -#define CALLEE_SAVED_REGISTER(regname) nonVolRegPtrs.regname = (PDWORD64)&unwoundState->m_Capture.regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - -#endif // !DACCESS_COMPILE - - LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK LazyMachState::unwindLazyState(ip:%p,sp:%p)\n", baseState->m_CaptureRip, baseState->m_CaptureRsp)); - - PCODE pvControlPc; - - do - { - -#ifndef TARGET_UNIX - pvControlPc = Thread::VirtualUnwindCallFrame(&ctx, &nonVolRegPtrs); -#else // !TARGET_UNIX - -#if defined(DACCESS_COMPILE) - HRESULT hr = DacVirtualUnwind(threadId, &ctx, &nonVolRegPtrs); - if (FAILED(hr)) - { - DacError(hr); - } -#else - BOOL success = PAL_VirtualUnwind(&ctx, &nonVolRegPtrs); - if (!success) - { - _ASSERTE(!"unwindLazyState: Unwinding failed"); - EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE); - } -#endif // DACCESS_COMPILE - - pvControlPc = GetIP(&ctx); -#endif // !TARGET_UNIX - - if (funCallDepth > 0) - { - --funCallDepth; - if (funCallDepth == 0) - break; - } - else - { - // Determine whether given IP resides in JITted code. (It returns nonzero in that case.) - // Use it now to see if we've unwound to managed code yet. - BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc); - - if (fIsManagedCode) - break; - } - } - while(TRUE); - - // - // Update unwoundState so that HelperMethodFrameRestoreState knows which - // registers have been potentially modified. - // - - unwoundState->m_Rip = ctx.Rip; - unwoundState->m_Rsp = ctx.Rsp; - - // For DAC, the return value of this function may be used after unwoundState goes out of scope. so we cannot do - // "unwoundState->_pRetAddr = PTR_TADDR(&unwoundState->m_Rip)". - unwoundState->_pRetAddr = PTR_TADDR(unwoundState->m_Rsp - 8); - -#ifdef TARGET_UNIX -#define CALLEE_SAVED_REGISTER(regname) unwoundState->m_Unwound.regname = ctx.regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER -#endif - -#if defined(DACCESS_COMPILE) - - // For DAC, we have to update the registers directly, since we don't have context pointers. -#define CALLEE_SAVED_REGISTER(regname) unwoundState->m_Capture.regname = ctx.regname; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - - // Since we don't have context pointers in this case, just assing them to NULL. -#define CALLEE_SAVED_REGISTER(regname) unwoundState->m_Ptrs.p##regname = NULL; - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - -#else // !DACCESS_COMPILE - -#define CALLEE_SAVED_REGISTER(regname) unwoundState->m_Ptrs.p##regname = PTR_TADDR(nonVolRegPtrs.regname); - ENUM_CALLEE_SAVED_REGISTERS(); -#undef CALLEE_SAVED_REGISTER - -#endif // DACCESS_COMPILE -} diff --git a/src/coreclr/vm/amd64/gmscpu.h b/src/coreclr/vm/amd64/gmscpu.h deleted file mode 100644 index 411f1cf0c71b..000000000000 --- a/src/coreclr/vm/amd64/gmscpu.h +++ /dev/null @@ -1,184 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/**************************************************************/ -/* gmscpu.h */ -/**************************************************************/ -/* HelperFrame is defines 'GET_STATE(machState)' macro, which - figures out what the state of the machine will be when the - current method returns. It then stores the state in the - JIT_machState structure. */ - -/**************************************************************/ - -#ifndef __gmsAMD64_h__ -#define __gmsAMD64_h__ - -#ifdef _DEBUG -class HelperMethodFrame; -struct MachState; -EXTERN_C MachState* __stdcall HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal); -#endif // _DEBUG - -// A MachState indicates the register state of the processor at some point in time (usually -// just before or after a call is made). It can be made one of two ways. Either explicitly -// (when you for some reason know the values of all the registers), or implicitly using the -// GET_STATE macros. - -typedef DPTR(struct MachState) PTR_MachState; -struct MachState -{ - MachState() - { - LIMITED_METHOD_DAC_CONTRACT; - INDEBUG(memset((void*)this, 0xCC, sizeof(MachState));) - } - - bool isValid() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast(_pRetAddr) != INVALID_POINTER_CC); return(_pRetAddr != nullptr); } - TADDR* pRetAddr() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(isValid()); return(_pRetAddr); } - TADDR GetRetAddr() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(isValid()); return *_pRetAddr; } -#ifndef DACCESS_COMPILE - void SetRetAddr(TADDR* addr) { _ASSERTE(isValid()); _pRetAddr = addr; } -#endif - - friend class HelperMethodFrame; - friend class CheckAsmOffsets; - friend struct LazyMachState; -#ifdef _DEBUG - friend MachState* __stdcall HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal); -#endif - -protected: - PCODE m_Rip; - TADDR m_Rsp; - - // - // These "capture" fields are READ ONLY once initialized by - // LazyMachStateCaptureState because we are racing to update - // the MachState when we do a stackwalk so, we must not update - // any state used to initialize the unwind from the captured - // state to the managed caller. - // - // Note also, that these fields need to be in the base struct - // because the context pointers below may point up to these - // fields. - // - CalleeSavedRegisters m_Capture; - - // context pointers for preserved registers - CalleeSavedRegistersPointers m_Ptrs; - - PTR_TADDR _pRetAddr; - -#ifdef TARGET_UNIX - // On PAL, we don't always have the context pointers available due to - // a limitation of an unwinding library. In such case, preserve - // the unwound values. - CalleeSavedRegisters m_Unwound; -#endif -}; - -/********************************************************************/ -/* This allows you to defer the computation of the Machine state - until later. Note that we don't reuse slots, because we want - this to be threadsafe without locks */ - -EXTERN_C void LazyMachStateCaptureState(struct LazyMachState *pState); - -typedef DPTR(struct LazyMachState) PTR_LazyMachState; -struct LazyMachState : public MachState -{ - // compute the machine state of the processor as it will exist just - // after the return after at most'funCallDepth' number of functions. - // if 'testFtn' is non-NULL, the return address is tested at each - // return instruction encountered. If this test returns non-NULL, - // then stack walking stops (thus you can walk up to the point that the - // return address matches some criteria - - // Normally this is called with funCallDepth=1 and testFtn = 0 so that - // it returns the state of the processor after the function that called 'captureState()' - void setLazyStateFromUnwind(MachState* copy); - static void unwindLazyState(LazyMachState* baseState, - MachState* lazyState, - DWORD threadId, - int funCallDepth = 1); - - friend class HelperMethodFrame; - friend class CheckAsmOffsets; - - // - // These "capture" fields are READ ONLY once initialized by - // LazyMachStateCaptureState because we are racing to update - // the MachState when we do a stackwalk so, we must not update - // any state used to initialize the unwind from the captured - // state to the managed caller. - // - ULONG64 m_CaptureRip; - ULONG64 m_CaptureRsp; -}; - -inline void LazyMachState::setLazyStateFromUnwind(MachState* copy) -{ - LIMITED_METHOD_CONTRACT; - -#if defined(DACCESS_COMPILE) - // This function cannot be called in DAC because DAC cannot update target memory. - DacError(E_FAIL); - return; - -#else // !DACCESS_COMPILE - this->m_Rip = copy->m_Rip; - this->m_Rsp = copy->m_Rsp; - -#ifdef TARGET_UNIX - this->m_Unwound = copy->m_Unwound; -#endif - - // Capture* has already been set, so there is no need to touch it - - // loop over the nonvolatile context pointers and make - // sure to properly copy interior pointers into the - // new struct - - PULONG64* pSrc = (PULONG64 *)©->m_Ptrs; - PULONG64* pDst = (PULONG64 *)&this->m_Ptrs; - - const PULONG64 LowerBoundDst = (PULONG64) this; - const PULONG64 LowerBoundSrc = (PULONG64) copy; - - const PULONG64 UpperBoundSrc = (PULONG64) ((BYTE*)LowerBoundSrc + sizeof(*copy)); - - for (int i = 0; i < NUM_CALLEE_SAVED_REGISTERS; i++) - { - PULONG64 valueSrc = *pSrc++; - - if ((LowerBoundSrc <= valueSrc) && (valueSrc < UpperBoundSrc)) - { - // make any pointer interior to 'src' interior to 'dst' - valueSrc = (PULONG64)((BYTE*)valueSrc - (BYTE*)LowerBoundSrc + (BYTE*)LowerBoundDst); - } - - *pDst++ = valueSrc; - } - - // this has to be last because we depend on write ordering to - // synchronize the race implicit in updating this struct - VolatileStore(&_pRetAddr, (PTR_TADDR)(TADDR)&m_Rip); - -#endif // !DACCESS_COMPILE -} - -// Do the initial capture of the machine state. This is meant to be -// as light weight as possible, as we may never need the state that -// we capture. Thus to complete the process you need to call -// 'getMachState()', which finishes the process -EXTERN_C void LazyMachStateCaptureState(struct LazyMachState *pState); - -// CAPTURE_STATE captures just enough register state so that the state of the -// processor can be deterined just after the routine that has CAPTURE_STATE in -// it returns. - -#define CAPTURE_STATE(machState, ret) \ - LazyMachStateCaptureState(machState) - -#endif // __gmsAMD64_h__ diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index b4b1df87c73b..37c2f5f98fd1 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -98,7 +98,7 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT je LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier) mov rax, rdi shr rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift - add rax, qword ptr [C_VAR(g_sw_ww_table)] + add rax, qword ptr [C_VAR(g_write_watch_table)] cmp byte ptr [rax], 0x0 jne LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier) mov byte ptr [rax], 0xFF diff --git a/src/coreclr/vm/amd64/jithelpers_slow.S b/src/coreclr/vm/amd64/jithelpers_slow.S index c0c80324710c..d955e0d039fe 100644 --- a/src/coreclr/vm/amd64/jithelpers_slow.S +++ b/src/coreclr/vm/amd64/jithelpers_slow.S @@ -74,7 +74,7 @@ LEAF_ENTRY JIT_WriteBarrier_Debug, _TEXT je CheckCardTable_Debug mov r10, rdi shr r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift - PREPARE_EXTERNAL_VAR g_sw_ww_table, r11 + PREPARE_EXTERNAL_VAR g_write_watch_table, r11 add r10, qword ptr [r11] cmp byte ptr [r10], 0x0 jne CheckCardTable_Debug diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp deleted file mode 100644 index f9cd2a968a02..000000000000 --- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp +++ /dev/null @@ -1,1041 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// =========================================================================== -// File: JITinterfaceCpu.CPP -// =========================================================================== - -// This contains JITinterface routines that are specific to the -// AMD64 platform. They are modeled after the X86 specific routines -// found in JITinterfaceX86.cpp or JIThelp.asm - - -#include "common.h" -#include "jitinterface.h" -#include "eeconfig.h" -#include "excep.h" -#include "threadsuspend.h" - -extern uint8_t* g_ephemeral_low; -extern uint8_t* g_ephemeral_high; -extern uint32_t* g_card_table; -extern uint32_t* g_card_bundle_table; - -// Patch Labels for the various write barriers -EXTERN_C void JIT_WriteBarrier_End(); - -EXTERN_C void JIT_WriteBarrier_PreGrow64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_PreGrow64_End(); - -EXTERN_C void JIT_WriteBarrier_PostGrow64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_Upper(); -EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_PostGrow64_End(); - -#ifdef FEATURE_SVR_GC -EXTERN_C void JIT_WriteBarrier_SVR64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_SVR64_PatchLabel_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_SVR64_PatchLabel_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_SVR64_End(); -#endif // FEATURE_SVR_GC - -EXTERN_C void JIT_WriteBarrier_Byte_Region64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionToGeneration(); -EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionShrDest(); -EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_Upper(); -EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionShrSrc(); -EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_Byte_Region64_End(); - -EXTERN_C void JIT_WriteBarrier_Bit_Region64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionToGeneration(); -EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionShrDest(); -EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_Upper(); -EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionShrSrc(); -EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_Bit_Region64_End(); - - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_End(); - -EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_WriteWatchTable(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_End(); - -#ifdef FEATURE_SVR_GC -EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_WriteWatchTable(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_End(); -#endif // FEATURE_SVR_GC - -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_WriteWatchTable(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionToGeneration(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionShrDest(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_Upper(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionShrSrc(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_End(); - -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_WriteWatchTable(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionToGeneration(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionShrDest(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_Upper(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionShrSrc(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_End(); - -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - -WriteBarrierManager g_WriteBarrierManager; - -// Use this somewhat hokey macro to concatenate the function start with the patch -// label. This allows the code below to look relatively nice, but relies on the -// naming convention which we have established for these helpers. -#define CALC_PATCH_LOCATION(func,label,offset) CalculatePatchLocation((PVOID)func, (PVOID)func##_##label, offset) - -WriteBarrierManager::WriteBarrierManager() : - m_currentWriteBarrier(WRITE_BARRIER_UNINITIALIZED) -{ - LIMITED_METHOD_CONTRACT; -} - -#ifndef CODECOVERAGE // Deactivate alignment validation for code coverage builds - // because the instrumentation tool will not preserve alignment - // constraints and we will fail. - -void WriteBarrierManager::Validate() -{ - CONTRACTL - { - MODE_ANY; - GC_NOTRIGGER; - NOTHROW; - } - CONTRACTL_END; - - // we have an invariant that the addresses of all the values that we update in our write barrier - // helpers must be naturally aligned, this is so that the update can happen atomically since there - // are places where these values are updated while the EE is running - // NOTE: we can't call this from the ctor since our infrastructure isn't ready for assert dialogs - - PBYTE pLowerBoundImmediate, pUpperBoundImmediate, pCardTableImmediate; - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - PBYTE pCardBundleTableImmediate; -#endif - - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardTable, 2); - - _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif - - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2); - pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Upper, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif - -#ifdef FEATURE_SVR_GC - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -#endif // FEATURE_SVR_GC - - PBYTE pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionToGeneration, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); - - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Lower, 2); - pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Upper, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif - - pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionToGeneration, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); - - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Lower, 2); - pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Upper, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - PBYTE pWriteWatchTableImmediate; - - pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_WriteWatchTable, 2); - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_Lower, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardTable, 2); - - _ASSERTE_ALL_BUILDS((reinterpret_cast(pWriteWatchTableImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif - - pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_WriteWatchTable, 2); - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Lower, 2); - pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Upper, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardTable, 2); - - _ASSERTE_ALL_BUILDS((reinterpret_cast(pWriteWatchTableImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif - -#ifdef FEATURE_SVR_GC - pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_WriteWatchTable, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pWriteWatchTableImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -#endif // FEATURE_SVR_GC - - pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionToGeneration, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); - - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Lower, 2); - pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Upper, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif - - pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionToGeneration, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); - - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Lower, 2); - pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Upper, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); -#endif - -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -} - -#endif // CODECOVERAGE - - -PCODE WriteBarrierManager::GetCurrentWriteBarrierCode() -{ - LIMITED_METHOD_CONTRACT; - - switch (m_currentWriteBarrier) - { - case WRITE_BARRIER_PREGROW64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_PreGrow64); - case WRITE_BARRIER_POSTGROW64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_PostGrow64); -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_SVR64); -#endif // FEATURE_SVR_GC - case WRITE_BARRIER_BYTE_REGIONS64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_Byte_Region64); - case WRITE_BARRIER_BIT_REGIONS64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_Bit_Region64); -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - case WRITE_BARRIER_WRITE_WATCH_PREGROW64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_PreGrow64); - case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_PostGrow64); -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_SVR64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_SVR64); -#endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_Byte_Region64); - case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_Bit_Region64); -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - default: - UNREACHABLE_MSG("unexpected m_currentWriteBarrier!"); - }; -} - -size_t WriteBarrierManager::GetSpecificWriteBarrierSize(WriteBarrierType writeBarrier) -{ -// marked asm functions are those which use the LEAF_END_MARKED macro to end them which -// creates a public Name_End label which can be used to figure out their size without -// having to create unwind info. -#define MARKED_FUNCTION_SIZE(pfn) (size_t)((LPBYTE)GetEEFuncEntryPoint(pfn##_End) - (LPBYTE)GetEEFuncEntryPoint(pfn)) - - switch (writeBarrier) - { - case WRITE_BARRIER_PREGROW64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PreGrow64); - case WRITE_BARRIER_POSTGROW64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PostGrow64); -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_SVR64); -#endif // FEATURE_SVR_GC - case WRITE_BARRIER_BYTE_REGIONS64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_Byte_Region64); - case WRITE_BARRIER_BIT_REGIONS64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_Bit_Region64); -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - case WRITE_BARRIER_WRITE_WATCH_PREGROW64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_PreGrow64); - case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_PostGrow64); -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_SVR64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_SVR64); -#endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_Byte_Region64); - case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_Bit_Region64); -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - case WRITE_BARRIER_BUFFER: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier); - default: - UNREACHABLE_MSG("unexpected m_currentWriteBarrier!"); - }; -#undef MARKED_FUNCTION_SIZE -} - -size_t WriteBarrierManager::GetCurrentWriteBarrierSize() -{ - return GetSpecificWriteBarrierSize(m_currentWriteBarrier); -} - -PBYTE WriteBarrierManager::CalculatePatchLocation(LPVOID base, LPVOID label, int offset) -{ - // the label should always come after the entrypoint for this funtion - _ASSERTE_ALL_BUILDS((LPBYTE)label > (LPBYTE)base); - - return (GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier) + ((LPBYTE)GetEEFuncEntryPoint(label) - (LPBYTE)GetEEFuncEntryPoint(base) + offset)); -} - - -int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, bool isRuntimeSuspended) -{ - GCX_MAYBE_COOP_NO_THREAD_BROKEN((!isRuntimeSuspended && GetThreadNULLOk() != NULL)); - int stompWBCompleteActions = SWB_PASS; - if (!isRuntimeSuspended && m_currentWriteBarrier != WRITE_BARRIER_UNINITIALIZED) - { - ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_FOR_GC_PREP); - stompWBCompleteActions |= SWB_EE_RESTART; - } - - _ASSERTE(m_currentWriteBarrier != newWriteBarrier); - m_currentWriteBarrier = newWriteBarrier; - - // the memcpy must come before the switch statement because the asserts inside the switch - // are actually looking into the JIT_WriteBarrier buffer - { - ExecutableWriterHolder writeBarrierWriterHolder(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), GetCurrentWriteBarrierSize()); - memcpy(writeBarrierWriterHolder.GetRW(), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize()); - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } - - switch (newWriteBarrier) - { - case WRITE_BARRIER_PREGROW64: - { - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - } - - case WRITE_BARRIER_POSTGROW64: - { - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2); - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Upper, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - } - -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR64: - { - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - } -#endif // FEATURE_SVR_GC - - case WRITE_BARRIER_BYTE_REGIONS64: - m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionToGeneration, 2); - m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionShrDest, 3); - m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionShrSrc, 3); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Lower, 2); - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Upper, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrDest); - _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrSrc); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - - case WRITE_BARRIER_BIT_REGIONS64: - m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionToGeneration, 2); - m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionShrDest, 3); - m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionShrSrc, 3); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Lower, 2); - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Upper, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrDest); - _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrSrc); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - case WRITE_BARRIER_WRITE_WATCH_PREGROW64: - { - m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_WriteWatchTable, 2); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_Lower, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - } - - case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: - { - m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_WriteWatchTable, 2); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Lower, 2); - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Upper, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - } - -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_SVR64: - { - m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_WriteWatchTable, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - } -#endif // FEATURE_SVR_GC - - case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: - m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_WriteWatchTable, 2); - m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionToGeneration, 2); - m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionShrDest, 3); - m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionShrSrc, 3); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Lower, 2); - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Upper, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrDest); - _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrSrc); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - - case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: - m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_WriteWatchTable, 2); - m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionToGeneration, 2); - m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionShrDest, 3); - m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionShrSrc, 3); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Lower, 2); - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Upper, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardTable, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrDest); - _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrSrc); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); -#endif - break; - - -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - - default: - UNREACHABLE_MSG("unexpected write barrier type!"); - } - - stompWBCompleteActions |= UpdateEphemeralBounds(true); - stompWBCompleteActions |= UpdateWriteWatchAndCardTableLocations(true, false); - - return stompWBCompleteActions; -} - -#undef CALC_PATCH_LOCATION - -void WriteBarrierManager::Initialize() -{ - CONTRACTL - { - MODE_ANY; - GC_NOTRIGGER; - NOTHROW; - } - CONTRACTL_END; - - - // Ensure that the generic JIT_WriteBarrier function buffer is large enough to hold any of the more specific - // write barrier implementations. - size_t cbWriteBarrierBuffer = GetSpecificWriteBarrierSize(WRITE_BARRIER_BUFFER); - - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_PREGROW64)); - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_POSTGROW64)); -#ifdef FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_SVR64)); -#endif // FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_BYTE_REGIONS64)); - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_BIT_REGIONS64)); -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_PREGROW64)); - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_POSTGROW64)); -#ifdef FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_SVR64)); -#endif // FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64)); - _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64)); -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - -#if !defined(CODECOVERAGE) - Validate(); -#endif -} - -bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, bool bUseBitwiseWriteBarrier, WriteBarrierType* pNewWriteBarrierType) -{ - // Init code for the JIT_WriteBarrier assembly routine. Since it will be bashed everytime the GC Heap - // changes size, we want to do most of the work just once. - // - // The actual JIT_WriteBarrier routine will only be called in free builds, but we keep this code (that - // modifies it) around in debug builds to check that it works (with assertions). - - - WriteBarrierType writeBarrierType = m_currentWriteBarrier; - - for(;;) - { - switch (writeBarrierType) - { - case WRITE_BARRIER_UNINITIALIZED: -#ifdef _DEBUG - // The default slow write barrier has some good asserts - if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK)) { - break; - } -#endif - if (g_region_shr != 0) - { - writeBarrierType = bUseBitwiseWriteBarrier ? WRITE_BARRIER_BIT_REGIONS64: WRITE_BARRIER_BYTE_REGIONS64; - } - else - { - writeBarrierType = GCHeapUtilities::IsServerHeap() ? WRITE_BARRIER_SVR64 : WRITE_BARRIER_PREGROW64; - } - continue; - - case WRITE_BARRIER_PREGROW64: - if (bReqUpperBoundsCheck) - { - writeBarrierType = WRITE_BARRIER_POSTGROW64; - } - break; - - case WRITE_BARRIER_POSTGROW64: - break; - -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR64: - break; -#endif // FEATURE_SVR_GC - - case WRITE_BARRIER_BYTE_REGIONS64: - case WRITE_BARRIER_BIT_REGIONS64: - break; - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - case WRITE_BARRIER_WRITE_WATCH_PREGROW64: - if (bReqUpperBoundsCheck) - { - writeBarrierType = WRITE_BARRIER_WRITE_WATCH_POSTGROW64; - } - break; - - case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: - break; - -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_SVR64: - break; -#endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: - case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: - break; -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - - default: - UNREACHABLE_MSG("unexpected write barrier type!"); - } - break; - } - - *pNewWriteBarrierType = writeBarrierType; - return m_currentWriteBarrier != writeBarrierType; -} - -int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) -{ - WriteBarrierType newType; - if (NeedDifferentWriteBarrier(false, g_region_use_bitwise_write_barrier, &newType)) - { - return ChangeWriteBarrierTo(newType, isRuntimeSuspended); - } - - int stompWBCompleteActions = SWB_PASS; - -#ifdef _DEBUG - // Using debug-only write barrier? - if (m_currentWriteBarrier == WRITE_BARRIER_UNINITIALIZED) - return stompWBCompleteActions; -#endif - - switch (m_currentWriteBarrier) - { - case WRITE_BARRIER_POSTGROW64: - case WRITE_BARRIER_BYTE_REGIONS64: - case WRITE_BARRIER_BIT_REGIONS64: -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: - case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: - case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - { - // Change immediate if different from new g_ephermeral_high. - if (*(UINT64*)m_pUpperBoundImmediate != (size_t)g_ephemeral_high) - { - ExecutableWriterHolder upperBoundWriterHolder((UINT64*)m_pUpperBoundImmediate, sizeof(UINT64)); - *upperBoundWriterHolder.GetRW() = (size_t)g_ephemeral_high; - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } - } - FALLTHROUGH; - case WRITE_BARRIER_PREGROW64: -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - case WRITE_BARRIER_WRITE_WATCH_PREGROW64: -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - { - // Change immediate if different from new g_ephermeral_low. - if (*(UINT64*)m_pLowerBoundImmediate != (size_t)g_ephemeral_low) - { - ExecutableWriterHolder lowerBoundImmediateWriterHolder((UINT64*)m_pLowerBoundImmediate, sizeof(UINT64)); - *lowerBoundImmediateWriterHolder.GetRW() = (size_t)g_ephemeral_low; - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } - break; - } - -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR64: -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - case WRITE_BARRIER_WRITE_WATCH_SVR64: -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - { - break; - } -#endif // FEATURE_SVR_GC - - default: - UNREACHABLE_MSG("unexpected m_currentWriteBarrier in UpdateEphemeralBounds"); - } - - return stompWBCompleteActions; -} - -int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) -{ - // If we are told that we require an upper bounds check (GC did some heap reshuffling), - // we need to switch to the WriteBarrier_PostGrow function for good. - - WriteBarrierType newType; - if (NeedDifferentWriteBarrier(bReqUpperBoundsCheck, g_region_use_bitwise_write_barrier, &newType)) - { - return ChangeWriteBarrierTo(newType, isRuntimeSuspended); - } - - int stompWBCompleteActions = SWB_PASS; - -#ifdef _DEBUG - // Using debug-only write barrier? - if (m_currentWriteBarrier == WRITE_BARRIER_UNINITIALIZED) - return stompWBCompleteActions; -#endif - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - switch (m_currentWriteBarrier) - { - case WRITE_BARRIER_WRITE_WATCH_PREGROW64: - case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_SVR64: -#endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: - case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: - if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)g_sw_ww_table) - { - ExecutableWriterHolder writeWatchTableImmediateWriterHolder((UINT64*)m_pWriteWatchTableImmediate, sizeof(UINT64)); - *writeWatchTableImmediateWriterHolder.GetRW() = (size_t)g_sw_ww_table; - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } - break; - - default: - break; // clang seems to require all enum values to be covered for some reason - } -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - - switch (m_currentWriteBarrier) - { - case WRITE_BARRIER_BYTE_REGIONS64: - case WRITE_BARRIER_BIT_REGIONS64: - case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: - case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: - if (*(UINT64*)m_pRegionToGenTableImmediate != (size_t)g_region_to_generation_table) - { - ExecutableWriterHolder writeWatchTableImmediateWriterHolder((UINT64*)m_pRegionToGenTableImmediate, sizeof(UINT64)); - *writeWatchTableImmediateWriterHolder.GetRW() = (size_t)g_region_to_generation_table; - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } - if (*m_pRegionShrDest != g_region_shr) - { - ExecutableWriterHolder writeWatchTableImmediateWriterHolder(m_pRegionShrDest, sizeof(UINT8)); - *writeWatchTableImmediateWriterHolder.GetRW() = g_region_shr; - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } - if (*m_pRegionShrSrc != g_region_shr) - { - ExecutableWriterHolder writeWatchTableImmediateWriterHolder(m_pRegionShrSrc, sizeof(UINT8)); - *writeWatchTableImmediateWriterHolder.GetRW() = g_region_shr; - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } - break; - - default: - break; // clang seems to require all enum values to be covered for some reason - } - - if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table) - { - ExecutableWriterHolder cardTableImmediateWriterHolder((UINT64*)m_pCardTableImmediate, sizeof(UINT64)); - *cardTableImmediateWriterHolder.GetRW() = (size_t)g_card_table; - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - if (*(UINT64*)m_pCardBundleTableImmediate != (size_t)g_card_bundle_table) - { - ExecutableWriterHolder cardBundleTableImmediateWriterHolder((UINT64*)m_pCardBundleTableImmediate, sizeof(UINT64)); - *cardBundleTableImmediateWriterHolder.GetRW() = (size_t)g_card_bundle_table; - stompWBCompleteActions |= SWB_ICACHE_FLUSH; - } -#endif - - return stompWBCompleteActions; -} - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -int WriteBarrierManager::SwitchToWriteWatchBarrier(bool isRuntimeSuspended) -{ - WriteBarrierType newWriteBarrierType; - switch (m_currentWriteBarrier) - { - case WRITE_BARRIER_UNINITIALIZED: - // Using the debug-only write barrier - return SWB_PASS; - - case WRITE_BARRIER_PREGROW64: - newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_PREGROW64; - break; - - case WRITE_BARRIER_POSTGROW64: - newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_POSTGROW64; - break; - -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR64: - newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_SVR64; - break; -#endif // FEATURE_SVR_GC - - case WRITE_BARRIER_BYTE_REGIONS64: - newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64; - break; - - case WRITE_BARRIER_BIT_REGIONS64: - newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64; - break; - - default: - UNREACHABLE(); - } - - return ChangeWriteBarrierTo(newWriteBarrierType, isRuntimeSuspended); -} - -int WriteBarrierManager::SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) -{ - WriteBarrierType newWriteBarrierType; - switch (m_currentWriteBarrier) - { - case WRITE_BARRIER_UNINITIALIZED: - // Using the debug-only write barrier - return SWB_PASS; - - case WRITE_BARRIER_WRITE_WATCH_PREGROW64: - newWriteBarrierType = WRITE_BARRIER_PREGROW64; - break; - - case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: - newWriteBarrierType = WRITE_BARRIER_POSTGROW64; - break; - -#ifdef FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_SVR64: - newWriteBarrierType = WRITE_BARRIER_SVR64; - break; -#endif // FEATURE_SVR_GC - - case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: - newWriteBarrierType = WRITE_BARRIER_BYTE_REGIONS64; - break; - - case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: - newWriteBarrierType = WRITE_BARRIER_BIT_REGIONS64; - break; - - default: - UNREACHABLE(); - } - - return ChangeWriteBarrierTo(newWriteBarrierType, isRuntimeSuspended); -} -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - -// This function bashes the super fast amd64 version of the JIT_WriteBarrier -// helper. It should be called by the GC whenever the ephermeral region -// bounds get changed, but still remain on the top of the GC Heap. -int StompWriteBarrierEphemeral(bool isRuntimeSuspended) -{ - WRAPPER_NO_CONTRACT; - - return g_WriteBarrierManager.UpdateEphemeralBounds(isRuntimeSuspended); -} - -// This function bashes the super fast amd64 versions of the JIT_WriteBarrier -// helpers. It should be called by the GC whenever the ephermeral region gets moved -// from being at the top of the GC Heap, and/or when the cards table gets moved. -int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) -{ - WRAPPER_NO_CONTRACT; - - return g_WriteBarrierManager.UpdateWriteWatchAndCardTableLocations(isRuntimeSuspended, bReqUpperBoundsCheck); -} - -void FlushWriteBarrierInstructionCache() -{ - FlushInstructionCache(GetCurrentProcess(), GetWriteBarrierCodeLocation((PVOID)JIT_WriteBarrier), g_WriteBarrierManager.GetCurrentWriteBarrierSize()); -} - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -int SwitchToWriteWatchBarrier(bool isRuntimeSuspended) -{ - WRAPPER_NO_CONTRACT; - - return g_WriteBarrierManager.SwitchToWriteWatchBarrier(isRuntimeSuspended); -} - -int SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) -{ - WRAPPER_NO_CONTRACT; - - return g_WriteBarrierManager.SwitchToNonWriteWatchBarrier(isRuntimeSuspended); -} -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP diff --git a/src/coreclr/vm/amd64/profiler.cpp b/src/coreclr/vm/amd64/profiler.cpp index f49cd3ddc07c..62c431ab4a04 100644 --- a/src/coreclr/vm/amd64/profiler.cpp +++ b/src/coreclr/vm/amd64/profiler.cpp @@ -156,7 +156,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig * pSig, void * platformSpecificHa EECodeInfo codeInfo((PCODE)pData->ip); // We want to pass the caller SP here. - pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledRsp), &codeInfo); + pData->hiddenArg = EECodeManager::GetExactGenericsToken((TADDR)(pData->probeRsp), (TADDR)(pData->rbp), &codeInfo); } } } diff --git a/src/coreclr/vm/amd64/redirectedhandledjitcase.S b/src/coreclr/vm/amd64/redirectedhandledjitcase.S index c326de585712..8a90b6bf8806 100644 --- a/src/coreclr/vm/amd64/redirectedhandledjitcase.S +++ b/src/coreclr/vm/amd64/redirectedhandledjitcase.S @@ -16,10 +16,10 @@ // // IN: rdi: original IP before redirect - // rsi: Rip from the Thread::GetAbortContext() // mov rdx, rsp + mov rsi, rdi // This push of the return address must not be recorded in the unwind // info. After this push, unwinding will work. diff --git a/src/coreclr/vm/amd64/thunktemplates.S b/src/coreclr/vm/amd64/thunktemplates.S index c13a79067cc6..611556da202b 100644 --- a/src/coreclr/vm/amd64/thunktemplates.S +++ b/src/coreclr/vm/amd64/thunktemplates.S @@ -5,13 +5,159 @@ #include "unixasmmacros.inc" #include "asmconstants.h" +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE + +#define POINTER_SIZE 0x08 + +#define THUNKS_MAP_SIZE 0x4000 + +#define PAGE_SIZE 0x4000 +#define PAGE_SIZE_LOG2 14 + + +#define DATA_SLOT(stub, field, thunkSize, thunkTemplateName) C_FUNC(thunkTemplateName) + THUNKS_MAP_SIZE + stub##Data__##field + IN_PAGE_INDEX * thunkSize + +// ---------- +// StubPrecode +// ---------- + +#define STUB_PRECODE_CODESIZE 0x18 // 3 instructions, 13 bytes encoded + 11 bytes of padding +#define STUB_PRECODE_DATASIZE 0x18 // 2 qwords + a BYTE +.set STUB_PRECODE_NUM_THUNKS_PER_MAPPING,(THUNKS_MAP_SIZE / STUB_PRECODE_CODESIZE) + +.macro THUNKS_BLOCK_STUB_PRECODE + IN_PAGE_INDEX = 0 + .rept STUB_PRECODE_NUM_THUNKS_PER_MAPPING + + mov r10, [rip + DATA_SLOT(StubPrecode, SecretParam, STUB_PRECODE_CODESIZE, StubPrecodeCodeTemplate)] + jmp [rip + DATA_SLOT(StubPrecode, Target, STUB_PRECODE_CODESIZE, StubPrecodeCodeTemplate)] + // The above is 13 bytes + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY StubPrecodeCodeTemplate + THUNKS_BLOCK_STUB_PRECODE +LEAF_END_MARKED StubPrecodeCodeTemplate, _TEXT + +// ---------- +// FixupPrecode +// ---------- + +#define FIXUP_PRECODE_CODESIZE 0x18 +#define FIXUP_PRECODE_DATASIZE 0x18 // 3 qwords +.set FIXUP_PRECODE_NUM_THUNKS_PER_MAPPING,(THUNKS_MAP_SIZE / FIXUP_PRECODE_CODESIZE) + +.macro THUNKS_BLOCK_FIXUP_PRECODE + IN_PAGE_INDEX = 0 + .rept FIXUP_PRECODE_NUM_THUNKS_PER_MAPPING + + jmp [rip + DATA_SLOT(FixupPrecode, Target, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate)] + mov r10, [rip + DATA_SLOT(FixupPrecode, MethodDesc, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate)] + jmp [rip + DATA_SLOT(FixupPrecode, PrecodeFixupThunk, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate)] + // The above is 19 bytes + int 3 + int 3 + int 3 + int 3 + int 3 + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY FixupPrecodeCodeTemplate + THUNKS_BLOCK_FIXUP_PRECODE + // We need 16 bytes of padding to pad this out + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 +LEAF_END_MARKED FixupPrecodeCodeTemplate, _TEXT + +// ---------- +// CallCountingStub +// ---------- + +#define CALLCOUNTING_CODESIZE 0x18 +#define CALLCOUNTING_DATASIZE 0x18 // 3 qwords +.set CALLCOUNTING_NUM_THUNKS_PER_MAPPING, (THUNKS_MAP_SIZE / CALLCOUNTING_CODESIZE) +.macro THUNKS_BLOCK_CALLCOUNTING + IN_PAGE_INDEX = 0 + .rept CALLCOUNTING_NUM_THUNKS_PER_MAPPING + + mov rax,QWORD PTR [rip + DATA_SLOT(CallCountingStub, RemainingCallCountCell, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate)] + dec WORD PTR [rax] + je 0f + jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForMethod, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate)] + 0: + jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForThresholdReached, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate)] + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY CallCountingStubCodeTemplate + THUNKS_BLOCK_CALLCOUNTING + // We need 16 bytes of padding to pad this out + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 +LEAF_END_MARKED CallCountingStubCodeTemplate, _TEXT + +#endif + // STUB_PAGE_SIZE must match the behavior of GetStubCodePageSize() on this architecture/os STUB_PAGE_SIZE = 16384 +#ifdef DATA_SLOT +#undef DATA_SLOT +#endif + #define DATA_SLOT(stub, field) C_FUNC(stub##Code) + STUB_PAGE_SIZE + stub##Data__##field LEAF_ENTRY StubPrecodeCode, _TEXT - mov r10, [rip + DATA_SLOT(StubPrecode, MethodDesc)] + mov r10, [rip + DATA_SLOT(StubPrecode, SecretParam)] jmp [rip + DATA_SLOT(StubPrecode, Target)] LEAF_END_MARKED StubPrecodeCode, _TEXT diff --git a/src/coreclr/vm/amd64/thunktemplates.asm b/src/coreclr/vm/amd64/thunktemplates.asm index c841a38b4f5a..4c462df6ea19 100644 --- a/src/coreclr/vm/amd64/thunktemplates.asm +++ b/src/coreclr/vm/amd64/thunktemplates.asm @@ -12,7 +12,7 @@ DATA_SLOT macro stub, field endm LEAF_ENTRY StubPrecodeCode, _TEXT - mov r10, QWORD PTR [DATA_SLOT(StubPrecode, MethodDesc)] + mov r10, QWORD PTR [DATA_SLOT(StubPrecode, SecretParam)] jmp QWORD PTR [DATA_SLOT(StubPrecode, Target)] LEAF_END_MARKED StubPrecodeCode, _TEXT diff --git a/src/coreclr/vm/amd64/unixasmhelpers.S b/src/coreclr/vm/amd64/unixasmhelpers.S index 10ab11933cae..cc864816bf03 100644 --- a/src/coreclr/vm/amd64/unixasmhelpers.S +++ b/src/coreclr/vm/amd64/unixasmhelpers.S @@ -5,57 +5,6 @@ #include "unixasmmacros.inc" #include "asmconstants.h" -// EXTERN_C int __fastcall HelperMethodFrameRestoreState( -// INDEBUG_COMMA(HelperMethodFrame *pFrame) -// MachState *pState -// ) -LEAF_ENTRY HelperMethodFrameRestoreState, _TEXT - -#ifdef _DEBUG - mov rdi, rsi -#endif - - // Check if the MachState is valid - xor eax, eax - cmp qword ptr [rdi + OFFSETOF__MachState___pRetAddr], rax - jne DoRestore - REPRET -DoRestore: - - // - // If a preserved register were pushed onto the stack between - // the managed caller and the H_M_F, m_pReg will point to its - // location on the stack and it would have been updated on the - // stack by the GC already and it will be popped back into the - // appropriate register when the appropriate epilog is run. - // - // Otherwise, the register is preserved across all the code - // in this HCALL or FCALL, so we need to update those registers - // here because the GC will have updated our copies in the - // frame. - // - // So, if m_pReg points into the MachState, we need to update - // the register here. That's what this macro does. - // -#define RestoreReg(reg, regnum) \ - lea rax, [rdi + OFFSETOF__MachState__m_Capture + 8 * regnum]; \ - mov rdx, [rdi + OFFSETOF__MachState__m_Ptrs + 8 * regnum]; \ - cmp rax, rdx; \ - cmove reg, [rax]; - - // regnum has to match ENUM_CALLEE_SAVED_REGISTERS macro - RestoreReg(R12, 0) - RestoreReg(R13, 1) - RestoreReg(R14, 2) - RestoreReg(R15, 3) - RestoreReg(Rbx, 4) - RestoreReg(Rbp, 5) - - xor eax, eax - ret - -LEAF_END HelperMethodFrameRestoreState, _TEXT - ////////////////////////////////////////////////////////////////////////// // // NDirectImportThunk @@ -108,7 +57,7 @@ NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler POP_ARGUMENT_REGISTERS jmp r10 - + NESTED_END NDirectImportThunk, _TEXT //------------------------------------------------ @@ -152,13 +101,16 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler PUSH_CALLEE_SAVED_REGISTERS + // Push rcx for the async continuation + push_register rcx + // Push rdx for the second half of the return value push_register rdx // Push rax again - this is where integer/pointer return values are returned push_register rax mov rdi, rsp - alloc_stack 0x28 + alloc_stack 0x20 // First float return register movdqa [rsp], xmm0 @@ -171,9 +123,11 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler movdqa xmm0, [rsp] movdqa xmm1, [rsp+0x10] - free_stack 0x28 + free_stack 0x20 + pop_register rax pop_register rdx + pop_register rcx POP_CALLEE_SAVED_REGISTERS ret @@ -205,10 +159,10 @@ NESTED_ENTRY JIT_Patchpoint, _TEXT, NoHandler NESTED_END JIT_Patchpoint, _TEXT // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL -LEAF_ENTRY JIT_PartialCompilationPatchpoint, _TEXT +LEAF_ENTRY JIT_PatchpointForced, _TEXT mov rsi, rdi xor rdi, rdi jmp C_FUNC(JIT_Patchpoint) -LEAF_END JIT_PartialCompilationPatchpoint, _TEXT +LEAF_END JIT_PatchpointForced, _TEXT #endif // FEATURE_TIERED_COMPILATION diff --git a/src/coreclr/vm/amd64/virtualcallstubamd64.S b/src/coreclr/vm/amd64/virtualcallstubamd64.S index 09c2d6084425..822eaaf2718f 100644 --- a/src/coreclr/vm/amd64/virtualcallstubamd64.S +++ b/src/coreclr/vm/amd64/virtualcallstubamd64.S @@ -4,6 +4,8 @@ .intel_syntax noprefix #include "unixasmmacros.inc" +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + // This is the number of times a successful chain lookup will occur before the // entry is promoted to the front of the chain. This is declared as extern because // the default value (CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT) is defined in the header. @@ -87,3 +89,4 @@ Fail_RWCLAS: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT +#endif // FEATURE_VIRTUAL_STUB_DISPATCH \ No newline at end of file diff --git a/src/coreclr/vm/amd64/virtualcallstubcpu.hpp b/src/coreclr/vm/amd64/virtualcallstubcpu.hpp index 7388c4549a24..7809070d6d65 100644 --- a/src/coreclr/vm/amd64/virtualcallstubcpu.hpp +++ b/src/coreclr/vm/amd64/virtualcallstubcpu.hpp @@ -17,8 +17,6 @@ #include "dbginterface.h" -//#define STUB_LOGGING - #pragma pack(push, 1) // since we are placing code, we want byte packing of the structs diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index 7762d7550520..9491200b91e8 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -20,7 +20,6 @@ #include "mlinfo.h" #include "posterror.h" #include "assemblynative.hpp" -#include "shimload.h" #include "stringliteralmap.h" #include "frozenobjectheap.h" #include "codeman.h" @@ -726,7 +725,6 @@ void SystemDomain::Attach() ILStubManager::Init(); InteropDispatchStubManager::Init(); StubLinkStubManager::Init(); - ThunkHeapStubManager::Init(); TailCallStubManager::Init(); #ifdef FEATURE_TIERED_COMPILATION CallCountingStubManager::Init(); @@ -878,25 +876,12 @@ void SystemDomain::Init() m_pSystemPEAssembly = NULL; m_pSystemAssembly = NULL; - DWORD size = 0; - // Get the install directory so we can find CoreLib - hr = GetInternalSystemDirectory(NULL, &size); - if (hr != HRESULT_FROM_WIN32(ERROR_INSUFFICIENT_BUFFER)) - ThrowHR(hr); - - // GetInternalSystemDirectory returns a size, including the null! - WCHAR* buffer = m_SystemDirectory.OpenUnicodeBuffer(size - 1); - IfFailThrow(GetInternalSystemDirectory(buffer, &size)); - m_SystemDirectory.CloseBuffer(); + IfFailThrow(GetClrModuleDirectory(m_SystemDirectory)); m_SystemDirectory.Normalize(); // At this point m_SystemDirectory should already be canonicalized m_BaseLibrary.Append(m_SystemDirectory); - if (!m_BaseLibrary.EndsWith(SString{ DIRECTORY_SEPARATOR_CHAR_W })) - { - m_BaseLibrary.Append(DIRECTORY_SEPARATOR_CHAR_W); - } m_BaseLibrary.Append(g_pwBaseLibrary); m_BaseLibrary.Normalize(); @@ -923,15 +908,6 @@ void SystemDomain::Init() CoreLibBinder::GetField(FIELD__THREAD_BLOCKING_INFO__OFFSET_OF_LOCK_OWNER_OS_THREAD_ID) ->SetStaticValue32(AwareLock::GetOffsetOfHoldingOSThreadId()); } - -#ifdef _DEBUG - BOOL fPause = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_PauseOnLoad); - - while (fPause) - { - ClrSleepEx(20, TRUE); - } -#endif // _DEBUG } void SystemDomain::LazyInitGlobalStringLiteralMap() @@ -1008,6 +984,12 @@ extern "C" PCODE g_pGetNonGCStaticBase; PCODE g_pGetNonGCStaticBase; extern "C" PCODE g_pPollGC; PCODE g_pPollGC; +#if defined(TARGET_X86) && defined(TARGET_WINDOWS) +extern "C" PCODE g_pThrowOverflowException; +PCODE g_pThrowOverflowException; +extern "C" PCODE g_pThrowDivideByZeroException; +PCODE g_pThrowDivideByZeroException; +#endif // defined(TARGET_X86) && defined(TARGET_WINDOWS) void SystemDomain::LoadBaseSystemClasses() { @@ -1140,13 +1122,13 @@ void SystemDomain::LoadBaseSystemClasses() g_pStackFrameIteratorClass = CoreLibBinder::GetClass(CLASS__STACKFRAMEITERATOR); #endif - // Make sure that FCall mapping for Monitor.Enter is initialized. We need it in case Monitor.Enter is used only as JIT helper. - // For more details, see comment in code:JITutil_MonEnterWorker around "__me = GetEEFuncEntryPointMacro(JIT_MonEnter)". - ECall::GetFCallImpl(CoreLibBinder::GetMethod(METHOD__MONITOR__ENTER)); - g_pGetGCStaticBase = CoreLibBinder::GetMethod(METHOD__STATICSHELPERS__GET_GC_STATIC)->GetMultiCallableAddrOfCode(); g_pGetNonGCStaticBase = CoreLibBinder::GetMethod(METHOD__STATICSHELPERS__GET_NONGC_STATIC)->GetMultiCallableAddrOfCode(); g_pPollGC = CoreLibBinder::GetMethod(METHOD__THREAD__POLLGC)->GetMultiCallableAddrOfCode(); +#if defined(TARGET_X86) && defined(TARGET_WINDOWS) + g_pThrowOverflowException = CoreLibBinder::GetMethod(METHOD__THROWHELPERS__THROWOVERFLOWEXCEPTION)->GetMultiCallableAddrOfCode(); + g_pThrowDivideByZeroException = CoreLibBinder::GetMethod(METHOD__THROWHELPERS__THROWDIVIDEBYZEROEXCEPTION)->GetMultiCallableAddrOfCode(); +#endif // TARGET_32BIT #ifdef PROFILING_SUPPORTED // Note that g_profControlBlock.fBaseSystemClassesLoaded must be set to TRUE only after @@ -1496,7 +1478,7 @@ void AppDomain::Create() _ASSERTE(m_pTheAppDomain == NULL); - AppDomainRefHolder pDomain(new AppDomain()); + NewHolder pDomain(new AppDomain()); pDomain->Init(); pDomain->SetStage(AppDomain::STAGE_OPEN); pDomain->CreateDefaultBinder(); @@ -1625,8 +1607,6 @@ AppDomain::AppDomain() , m_pDelayedLoaderAllocatorUnloadList{NULL} , m_friendlyName{NULL} , m_pRootAssembly{NULL} - , m_dwFlags{0} - , m_cRef{1} #ifdef FEATURE_COMINTEROP , m_pRCWCache{NULL} #endif //FEATURE_COMINTEROP @@ -1634,7 +1614,6 @@ AppDomain::AppDomain() , m_pRCWRefCache{NULL} #endif // FEATURE_COMWRAPPERS , m_Stage{STAGE_CREATING} - , m_MemoryPressure{0} , m_ForceTrivialWaitOperations{false} #ifdef FEATURE_TYPEEQUIVALENCE , m_pTypeEquivalenceTable{NULL} @@ -3364,35 +3343,6 @@ PEAssembly *AppDomain::TryResolveAssemblyUsingEvent(AssemblySpec *pSpec) return result; } - -ULONG AppDomain::AddRef() -{ - LIMITED_METHOD_CONTRACT; - return InterlockedIncrement(&m_cRef); -} - -ULONG AppDomain::Release() -{ - CONTRACTL - { - NOTHROW; - GC_TRIGGERS; - MODE_ANY; - PRECONDITION(m_cRef > 0); - } - CONTRACTL_END; - - ULONG cRef = InterlockedDecrement(&m_cRef); - if (!cRef) - { - _ASSERTE (m_Stage == STAGE_CREATING); - delete this; - } - return (cRef); -} - - - void AppDomain::RaiseLoadingAssemblyEvent(Assembly *pAssembly) { CONTRACTL @@ -3443,26 +3393,30 @@ void AppDomain::RaiseLoadingAssemblyEvent(Assembly *pAssembly) EX_END_CATCH(SwallowAllExceptions); } -BOOL AppDomain::OnUnhandledException(OBJECTREF *pThrowable, BOOL isTerminating/*=TRUE*/) +void AppDomain::OnUnhandledException(OBJECTREF* pThrowable) { - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_TRIGGERS; - STATIC_CONTRACT_MODE_ANY; - - BOOL retVal = FALSE; - - GCX_COOP(); + CONTRACTL + { + NOTHROW; + GC_TRIGGERS; + MODE_COOPERATIVE; + PRECONDITION(pThrowable != NULL); + } + CONTRACTL_END; EX_TRY { - retVal = GetAppDomain()->RaiseUnhandledExceptionEvent(pThrowable, isTerminating); + MethodDescCallSite raiseEvent(METHOD__APPCONTEXT__ON_UNHANDLED_EXCEPTION); + ARG_SLOT args[] = + { + ObjToArgSlot(*pThrowable) + }; + raiseEvent.Call(args); } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions) // Swallow any errors. - - return retVal; } void AppDomain::RaiseExitProcessEvent() @@ -3483,41 +3437,6 @@ void AppDomain::RaiseExitProcessEvent() onProcessExit.Call(NULL); } -BOOL -AppDomain::RaiseUnhandledExceptionEvent(OBJECTREF *pThrowable, BOOL isTerminating) -{ - CONTRACTL - { - THROWS; - GC_TRIGGERS; - MODE_COOPERATIVE; - INJECT_FAULT(COMPlusThrowOM();); - } - CONTRACTL_END; - - _ASSERTE(pThrowable != NULL && IsProtectedByGCFrame(pThrowable)); - - OBJECTREF orDelegate = CoreLibBinder::GetField(FIELD__APPCONTEXT__UNHANDLED_EXCEPTION)->GetStaticOBJECTREF(); - if (orDelegate == NULL) - return FALSE; - - struct { - OBJECTREF Delegate; - OBJECTREF Sender; - } gc; - gc.Delegate = orDelegate; - gc.Sender = NULL; - - GCPROTECT_BEGIN(gc); - if (orDelegate != NULL) - { - DistributeUnhandledExceptionReliably(&gc.Delegate, &gc.Sender, pThrowable, isTerminating); - } - GCPROTECT_END(); - return TRUE; -} - - DefaultAssemblyBinder *AppDomain::CreateDefaultBinder() { CONTRACT(DefaultAssemblyBinder *) @@ -4388,27 +4307,6 @@ HRESULT RuntimeInvokeHostAssemblyResolver(INT_PTR pManagedAssemblyLoadContextToB } #endif // !defined(DACCESS_COMPILE) -//approximate size of loader data -//maintained for each assembly -#define APPROX_LOADER_DATA_PER_ASSEMBLY 8196 - -size_t AppDomain::EstimateSize() -{ - CONTRACTL - { - NOTHROW; - GC_TRIGGERS; - MODE_ANY; - } - CONTRACTL_END; - - size_t retval = sizeof(AppDomain); - retval += GetLoaderAllocator()->EstimateSize(); - //very rough estimate - retval += GetAssemblyCount() * APPROX_LOADER_DATA_PER_ASSEMBLY; - return retval; -} - #ifdef DACCESS_COMPILE void diff --git a/src/coreclr/vm/appdomain.hpp b/src/coreclr/vm/appdomain.hpp index b5938414511b..116611f82541 100644 --- a/src/coreclr/vm/appdomain.hpp +++ b/src/coreclr/vm/appdomain.hpp @@ -50,17 +50,6 @@ class RCWCache; class RCWRefCache; #endif // FEATURE_COMWRAPPERS -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4200) // Disable zero-sized array warning -#endif - - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - - // The pinned heap handle bucket class is used to contain handles allocated // from an array contained in the pinned heap. class PinnedHeapHandleBucket @@ -260,7 +249,6 @@ class PEFileListLock : public ListLock DEBUG_NOINLINE static void HolderEnter(PEFileListLock *pThis) { WRAPPER_NO_CONTRACT; - ANNOTATION_SPECIAL_HOLDER_CALLER_NEEDS_DYNAMIC_CONTRACT; pThis->Enter(); } @@ -268,7 +256,6 @@ class PEFileListLock : public ListLock DEBUG_NOINLINE static void HolderLeave(PEFileListLock *pThis) { WRAPPER_NO_CONTRACT; - ANNOTATION_SPECIAL_HOLDER_CALLER_NEEDS_DYNAMIC_CONTRACT; pThis->Leave(); } @@ -975,6 +962,8 @@ class AppDomain final { return m_array.Iterate(); } + + friend struct cdac_data; }; // class DomainAssemblyList // Conceptually a list of code:Assembly structures, protected by lock code:GetAssemblyListLock @@ -1139,14 +1128,6 @@ class AppDomain final BOOL ContainsAssembly(Assembly * assem); - //**************************************************************************************** - // - // Reference count. When an appdomain is first created the reference is bump - // to one when it is added to the list of domains (see SystemDomain). An explicit - // Removal from the list is necessary before it will be deleted. - ULONG AddRef(void); - ULONG Release(void) DAC_EMPTY_RET(0); - //**************************************************************************************** LPCWSTR GetFriendlyName(); LPCWSTR GetFriendlyNameForDebugger(); @@ -1164,9 +1145,8 @@ class AppDomain final // in a lazy fashion so executables do not take the perf hit unless the load other // assemblies #ifndef DACCESS_COMPILE - static BOOL OnUnhandledException(OBJECTREF *pThrowable, BOOL isTerminating = TRUE); - -#endif + static void OnUnhandledException(OBJECTREF *pThrowable); +#endif // !DACCESS_COMPILE // True iff a debugger is attached to the process (same as CORDebuggerAttached) BOOL IsDebuggerAttached (void); @@ -1243,20 +1223,6 @@ class AppDomain final // Only call this routine when you can guarantee there are no loads in progress. void ClearBinderContext(); - void SetIgnoreUnhandledExceptions() - { - LIMITED_METHOD_CONTRACT; - - m_dwFlags |= IGNORE_UNHANDLED_EXCEPTIONS; - } - - BOOL IgnoreUnhandledExceptions() - { - LIMITED_METHOD_CONTRACT; - - return (m_dwFlags & IGNORE_UNHANDLED_EXCEPTIONS); - } - static void ExceptionUnwind(Frame *pFrame); BOOL IsActive() @@ -1351,7 +1317,6 @@ class AppDomain final PTR_LoaderHeap GetHighFrequencyHeap(); private: - size_t EstimateSize(); EEClassFactoryInfoHashTable* SetupClassFactHash(); #ifdef FEATURE_COMINTEROP DispIDCache* SetupRefDispIDCache(); @@ -1377,8 +1342,6 @@ class AppDomain final friend class Assembly; private: - BOOL RaiseUnhandledExceptionEvent(OBJECTREF *pThrowable, BOOL isTerminating); - enum Stage { STAGE_CREATING, STAGE_READYFORMANAGEDCODE, @@ -1433,9 +1396,6 @@ class AppDomain final return GetLoaderAllocator()->GetGCRefPoint(); } - void AddMemoryPressure(); - void RemoveMemoryPressure(); - PTR_Assembly GetRootAssembly() { LIMITED_METHOD_CONTRACT; @@ -1462,13 +1422,6 @@ class AppDomain final PTR_CWSTR m_friendlyName; PTR_Assembly m_pRootAssembly; - // General purpose flags. - DWORD m_dwFlags; - - // When an application domain is created the ref count is artificially incremented - // by one. For it to hit zero an explicit close must have happened. - LONG m_cRef; // Ref count. - // Map of loaded composite native images indexed by base load addresses CrstExplicitInit m_nativeImageLoadCrst; MapSHash m_nativeImageMap; @@ -1583,14 +1536,7 @@ class AppDomain final public: - enum { - CONTEXT_INITIALIZED = 0x0001, - // unused = 0x0400, - IGNORE_UNHANDLED_EXCEPTIONS = 0x10000, // AppDomain was created using the APPDOMAIN_IGNORE_UNHANDLED_EXCEPTIONS flag - }; - AssemblySpecBindingCache m_AssemblyCache; - size_t m_MemoryPressure; ArrayList m_NativeDllSearchDirectories; bool m_ForceTrivialWaitOperations; @@ -1667,10 +1613,16 @@ class AppDomain final TieredCompilationManager m_tieredCompilationManager; #endif + + friend struct cdac_data; }; // class AppDomain -// Just a ref holder -typedef ReleaseHolder AppDomainRefHolder; +template<> +struct cdac_data +{ + static constexpr size_t RootAssembly = offsetof(AppDomain, m_pRootAssembly); + static constexpr size_t DomainAssemblyList = offsetof(AppDomain, m_Assemblies) + offsetof(AppDomain::DomainAssemblyList, m_array); +}; typedef DPTR(class SystemDomain) PTR_SystemDomain; @@ -1892,36 +1844,6 @@ class SystemDomain final return m_BaseLibrary; } -#ifndef DACCESS_COMPILE - BOOL IsBaseLibrary(SString &path) - { - WRAPPER_NO_CONTRACT; - - // See if it is the installation path to CoreLib - if (path.EqualsCaseInsensitive(m_BaseLibrary)) - return TRUE; - - // Or, it might be the location of CoreLib - if (System()->SystemAssembly() != NULL - && path.EqualsCaseInsensitive(System()->SystemAssembly()->GetPEAssembly()->GetPath())) - return TRUE; - - return FALSE; - } - - BOOL IsBaseLibrarySatellite(SString &path) - { - WRAPPER_NO_CONTRACT; - - // See if it is the installation path to corelib.resources - SString s(SString::Ascii,g_psBaseLibrarySatelliteAssemblyName); - if (path.EqualsCaseInsensitive(s)) - return TRUE; - - return FALSE; - } -#endif // DACCESS_COMPILE - // Return the system directory LPCWSTR SystemDirectory() { @@ -1962,9 +1884,6 @@ class SystemDomain final InlineSString<100> m_SystemDirectory; - // @TODO: CTS, we can keep the com modules in a single assembly or in different assemblies. - // We are currently using different assemblies but this is potentitially to slow... - // Global domain that every one uses SPTR_DECL(SystemDomain, m_pSystemDomain); @@ -2005,8 +1924,17 @@ inline static BOOL IsUnderDomainLock() { LIMITED_METHOD_CONTRACT; return m_Syste bool enumThis); #endif + friend struct ::cdac_data; }; // class SystemDomain +#ifndef DACCESS_COMPILE +template<> +struct cdac_data +{ + static constexpr PTR_SystemDomain* SystemDomain = &SystemDomain::m_pSystemDomain; +}; +#endif // DACCESS_COMPILE + #include "comreflectioncache.inl" #endif diff --git a/src/coreclr/vm/appdomain.inl b/src/coreclr/vm/appdomain.inl index ee77268eae68..899bc9a21539 100644 --- a/src/coreclr/vm/appdomain.inl +++ b/src/coreclr/vm/appdomain.inl @@ -14,26 +14,8 @@ #ifndef _APPDOMAIN_I #define _APPDOMAIN_I -#ifndef DACCESS_COMPILE - #include "appdomain.hpp" -inline void AppDomain::AddMemoryPressure() -{ - STANDARD_VM_CONTRACT; - m_MemoryPressure=EstimateSize(); - GCInterface::AddMemoryPressure(m_MemoryPressure); -} - -inline void AppDomain::RemoveMemoryPressure() -{ - WRAPPER_NO_CONTRACT; - - GCInterface::RemoveMemoryPressure(m_MemoryPressure); -} - -#endif // DACCESS_COMPILE - inline AppDomain::PathIterator AppDomain::IterateNativeDllSearchDirectories() { WRAPPER_NO_CONTRACT; diff --git a/src/coreclr/vm/appdomainnative.cpp b/src/coreclr/vm/appdomainnative.cpp index ada5e6c7b689..f8cae6a32ea2 100644 --- a/src/coreclr/vm/appdomainnative.cpp +++ b/src/coreclr/vm/appdomainnative.cpp @@ -129,6 +129,22 @@ extern "C" void QCALLTYPE String_IsInterned(QCall::StringHandleOnStack str) END_QCALL; } +extern "C" STRINGREF* QCALLTYPE String_StrCns(UINT32 rid, CORINFO_MODULE_HANDLE scopeHnd) +{ + QCALL_CONTRACT; + + STRINGREF* hndStr = NULL; + + BEGIN_QCALL; + + // Retrieve the handle to the CLR string object. + hndStr = ConstructStringLiteral(scopeHnd, RidToToken(rid, mdtString)); + + END_QCALL; + + return hndStr; +} + extern "C" void QCALLTYPE String_Intern(QCall::StringHandleOnStack str) { QCALL_CONTRACT; diff --git a/src/coreclr/vm/appdomainnative.hpp b/src/coreclr/vm/appdomainnative.hpp index 7fb86136bf19..921af74fa9d9 100644 --- a/src/coreclr/vm/appdomainnative.hpp +++ b/src/coreclr/vm/appdomainnative.hpp @@ -16,6 +16,7 @@ #include "qcall.h" +extern "C" STRINGREF* QCALLTYPE String_StrCns(UINT32 rid, CORINFO_MODULE_HANDLE scopeHnd); extern "C" void QCALLTYPE String_Intern(QCall::StringHandleOnStack str); extern "C" void QCALLTYPE String_IsInterned(QCall::StringHandleOnStack str); diff --git a/src/coreclr/vm/arm/AllocSlow.S b/src/coreclr/vm/arm/AllocSlow.S new file mode 100644 index 000000000000..0052ea77dbce --- /dev/null +++ b/src/coreclr/vm/arm/AllocSlow.S @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +.syntax unified +.thumb + +// +// Object* RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path +// +LEAF_ENTRY RhpNew, _TEXT + mov r1, #0 + b C_FUNC(RhpNewObject) +LEAF_END RhpNew, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME r2 + + mov r1, #0 + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + bx lr +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* RhpNewArrayMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME r2 + + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + bx lr +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME r2 + + bl C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + POP_COOP_PINVOKE_FRAME + bx lr +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/arm/AsmMacros_Shared.h b/src/coreclr/vm/arm/AsmMacros_Shared.h new file mode 100644 index 000000000000..afe0afb7b8ed --- /dev/null +++ b/src/coreclr/vm/arm/AsmMacros_Shared.h @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + + +#include "asmconstants.h" +#include "unixasmmacros.inc" diff --git a/src/coreclr/vm/arm/asmconstants.h b/src/coreclr/vm/arm/asmconstants.h index 4495b42dca78..47efe69e5f9c 100644 --- a/src/coreclr/vm/arm/asmconstants.h +++ b/src/coreclr/vm/arm/asmconstants.h @@ -36,50 +36,46 @@ ASMCONSTANTS_C_ASSERT(FRAMETYPE_InlinedCallFrame == (int)FrameIdentifier::Inline #define DynamicHelperFrameFlags_ObjectArg 1 #define DynamicHelperFrameFlags_ObjectArg2 2 +#define ThisPtrRetBufPrecodeData__Target 0x00 +ASMCONSTANTS_C_ASSERT(ThisPtrRetBufPrecodeData__Target == offsetof(ThisPtrRetBufPrecodeData, Target)); + #define REDIRECTSTUB_SP_OFFSET_CONTEXT 0 +#define OFFSETOF__MethodTable__m_dwFlags 0x00 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); + +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); -// Offset of the array containing the address of captured registers in MachState -#define MachState__captureR4_R11 0x0 -ASMCONSTANTS_C_ASSERT(MachState__captureR4_R11 == offsetof(MachState, captureR4_R11)) +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); -// Offset of the array containing the address of preserved registers in MachState -#define MachState___R4_R11 0x20 -ASMCONSTANTS_C_ASSERT(MachState___R4_R11 == offsetof(MachState, _R4_R11)) +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); -#define MachState__isValid 0x48 -ASMCONSTANTS_C_ASSERT(MachState__isValid == offsetof(MachState, _isValid)) +#define OFFSETOF__Array__m_Length 0x4 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); -#define LazyMachState_captureR4_R11 MachState__captureR4_R11 -ASMCONSTANTS_C_ASSERT(LazyMachState_captureR4_R11 == offsetof(LazyMachState, captureR4_R11)) +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); -#define LazyMachState_captureSp (MachState__isValid+4) -ASMCONSTANTS_C_ASSERT(LazyMachState_captureSp == offsetof(LazyMachState, captureSp)) +#define STRING_COMPONENT_SIZE 2 -#define LazyMachState_captureIp (LazyMachState_captureSp+4) -ASMCONSTANTS_C_ASSERT(LazyMachState_captureIp == offsetof(LazyMachState, captureIp)) +#define STRING_BASE_SIZE 0xE +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); -#define MethodTable__m_BaseSize 0x04 -ASMCONSTANTS_C_ASSERT(MethodTable__m_BaseSize == offsetof(MethodTable, m_BaseSize)); +#define SZARRAY_BASE_SIZE 0xC +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD)); -#define MethodTable__m_dwFlags 0x0 -ASMCONSTANTS_C_ASSERT(MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); +#define ASM_MIN_OBJECT_SIZE 0xC +ASMCONSTANTS_C_ASSERT(ASM_MIN_OBJECT_SIZE == MIN_OBJECT_SIZE); #define MethodTable__enum_flag_ContainsGCPointers 0x01000000 ASMCONSTANTS_C_ASSERT(MethodTable__enum_flag_ContainsGCPointers == MethodTable::enum_flag_ContainsGCPointers); -#define MethodTable__m_ElementType DBG_FRE(0x24, 0x20) -ASMCONSTANTS_C_ASSERT(MethodTable__m_ElementType == offsetof(MethodTable, m_ElementTypeHnd)); - #define SIZEOF__MethodTable DBG_FRE(0x2c, 0x28) ASMCONSTANTS_C_ASSERT(SIZEOF__MethodTable == sizeof(MethodTable)); -#define ArrayBase__m_NumComponents 0x4 -ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); - -#define PtrArray__m_Array 0x8 -ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array)); - #define TypeHandle_CanCast 0x1 // TypeHandle::CanCast #define SIZEOF__Frame 0x8 @@ -128,6 +124,16 @@ ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPre ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame)); #define Thread_m_pFrame Thread__m_pFrame +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); + +#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + + offsetof(gc_alloc_context, alloc_ptr)); + +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); + #define ASM__VTABLE_SLOTS_PER_CHUNK 8 ASMCONSTANTS_C_ASSERT(ASM__VTABLE_SLOTS_PER_CHUNK == VTABLE_SLOTS_PER_CHUNK) @@ -192,8 +198,8 @@ ASMCONSTANTS_C_ASSERT(FixupPrecodeData__MethodDesc == offsetof(FixupPrecodeData, #define FixupPrecodeData__PrecodeFixupThunk 0x08 ASMCONSTANTS_C_ASSERT(FixupPrecodeData__PrecodeFixupThunk == offsetof(FixupPrecodeData, PrecodeFixupThunk)) -#define StubPrecodeData__MethodDesc 0x00 -ASMCONSTANTS_C_ASSERT(StubPrecodeData__MethodDesc == offsetof(StubPrecodeData, MethodDesc)) +#define StubPrecodeData__SecretParam 0x00 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__SecretParam == offsetof(StubPrecodeData, SecretParam)) #define StubPrecodeData__Target 0x04 ASMCONSTANTS_C_ASSERT(StubPrecodeData__Target == offsetof(StubPrecodeData, Target)) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 5017a582f3ab..bf655df04704 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -135,23 +135,6 @@ CallDescrWorkerInternalReturnAddressOffset: // ------------------------------------------------------------------ -// void LazyMachStateCaptureState(struct LazyMachState *pState)// - LEAF_ENTRY LazyMachStateCaptureState, _TEXT - - // marks that this is not yet valid - mov r1, #0 - str r1, [r0, #MachState__isValid] - - str lr, [r0, #LazyMachState_captureIp] - str sp, [r0, #LazyMachState_captureSp] - - add r1, r0, #LazyMachState_captureR4_R11 - stm r1, {r4-r11} - - mov pc, lr - - LEAF_END LazyMachStateCaptureState, _TEXT - // // r12 = UMEntryThunk* // @@ -260,46 +243,6 @@ ThePreStubPatchLabel: NESTED_END ResolveWorkerChainLookupAsmStub, _TEXT - // - // If a preserved register were pushed onto the stack between - // the managed caller and the H_M_F, _R4_R11 will point to its - // location on the stack and it would have been updated on the - // stack by the GC already and it will be popped back into the - // appropriate register when the appropriate epilog is run. - // - // Otherwise, the register is preserved across all the code - // in this HCALL or FCALL, so we need to update those registers - // here because the GC will have updated our copies in the - // frame. - // - // So, if _R4_R11 points into the MachState, we need to update - // the register here. That's what this macro does. - // - - .macro RestoreRegMS regIndex, reg - - // Incoming: - // - // R0 = address of MachState - // - // $regIndex: Index of the register (R4-R11). For R4, index is 4. - // For R5, index is 5, and so on. - // - // $reg: Register name (e.g. R4, R5, etc) - // - // Get the address of the specified captured register from machine state - add r2, r0, #(MachState__captureR4_R11 + ((\regIndex-4)*4)) - - // Get the address of the specified preserved register from machine state - ldr r3, [r0, #(MachState___R4_R11 + ((\regIndex-4)*4))] - - cmp r2, r3 - bne 0f - ldr \reg, [r2] -0: - - .endm - #ifdef PROFILING_SUPPORTED // @@ -334,7 +277,7 @@ LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler PROLOG_PUSH "{r0,r3,r9,r12}" - // for the 5 arguments that do not need popped plus 4 bytes of alignment + // for the 5 arguments that do not need popped plus 4 bytes of alignment alloc_stack 6*4 // push fp regs @@ -362,7 +305,7 @@ NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler // clear hiddenArg movw r2, #0 str r2, [sp, PROFILE_PLATFORM_SPECIFIC_DATA__hiddenArg] - + // set the flag to indicate what hook this is movw r2, \flags str r2, [sp, PROFILE_PLATFORM_SPECIFIC_DATA__flags] @@ -390,38 +333,6 @@ GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL #endif -// EXTERN_C int __fastcall HelperMethodFrameRestoreState( -// INDEBUG_COMMA(HelperMethodFrame *pFrame) -// MachState *pState -// ) - LEAF_ENTRY HelperMethodFrameRestoreState, _TEXT - -#ifdef _DEBUG - mov r0, r1 -#endif - - // If machine state is invalid, then simply exit - ldr r1, [r0, #MachState__isValid] - cmp r1, #0 - beq LOCAL_LABEL(Done) - - RestoreRegMS 4, R4 - RestoreRegMS 5, R5 - RestoreRegMS 6, R6 - RestoreRegMS 7, R7 - RestoreRegMS 8, R8 - RestoreRegMS 9, R9 - RestoreRegMS 10, R10 - RestoreRegMS 11, R11 -LOCAL_LABEL(Done): - // Its imperative that the return value of HelperMethodFrameRestoreState is zero - // as it is used in the state machine to loop until it becomes zero. - // Refer to HELPER_METHOD_FRAME_END macro for details. - mov r0,#0 - bx lr - - LEAF_END HelperMethodFrameRestoreState, _TEXT - #if 0 // ------------------------------------------------------------------ // Macro to generate Redirection Stubs @@ -561,6 +472,9 @@ LOCAL_LABEL(stackProbe_loop): __\name\()__g_ephemeral_low_offset = 0xffff __\name\()__g_ephemeral_high_offset = 0xffff __\name\()__g_card_table_offset = 0xffff +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + __\name\()__g_sw_ww_table_offset = 0xffff +#endif .endm .macro LOAD_GC_GLOBAL name, regName, globalName @@ -603,6 +517,28 @@ LOCAL_LABEL(stackProbe_loop): 0: .endm +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + .macro UPDATE_WRITE_WATCH_TABLE name, ptrReg, mp, tmpReg + + LOAD_GC_GLOBAL \name, __wbScratch, g_write_watch_table + cbz __wbScratch, 2f + add __wbScratch, __wbScratch, \ptrReg, lsr #0xc // SoftwareWriteWatch::AddressToTableByteIndexShift + + .if(\mp) + ldrb \tmpReg, [__wbScratch] + cmp \tmpReg, #0xff + itt ne + movne \tmpReg, 0xff + strbne \tmpReg, [__wbScratch] + .else + mov \tmpReg, #0xff + strb \tmpReg, [__wbScratch] + .endif + +2: + .endm +#endif + .macro CHECK_GC_HEAP_RANGE name, ptrReg, label LOAD_GC_GLOBAL \name, __wbScratch, g_lowest_address cmp \ptrReg, __wbScratch @@ -621,6 +557,9 @@ LOCAL_LABEL(stackProbe_loop): str r1, [r0] UPDATE_GC_SHADOW \name, r0, r1 +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + UPDATE_WRITE_WATCH_TABLE \name, r0, \mp, r12 +#endif UPDATE_CARD_TABLE \name, r0, r1, \mp, \post, r0 bx lr LEAF_END_MARKED \name, _TEXT @@ -632,6 +571,9 @@ LOCAL_LABEL(stackProbe_loop): str r1, [r0] CHECK_GC_HEAP_RANGE \name, r0, 1f UPDATE_GC_SHADOW \name, r0, r1 +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + UPDATE_WRITE_WATCH_TABLE \name, r0, 0, r12 +#endif UPDATE_CARD_TABLE \name, r0, r1, 0, \post, r0 1: bx lr @@ -645,6 +587,9 @@ LOCAL_LABEL(stackProbe_loop): str r1, [r0] CHECK_GC_HEAP_RANGE \name, r0, 1f UPDATE_GC_SHADOW \name, r0, r1 +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + UPDATE_WRITE_WATCH_TABLE \name, r0, 1, r12 +#endif UPDATE_CARD_TABLE \name, r0, r1, 1, \post, r0 bx lr 1: @@ -664,6 +609,9 @@ LOCAL_LABEL(stackProbe_loop): str r2, [r0] CHECK_GC_HEAP_RANGE \name, r0, 1f UPDATE_GC_SHADOW \name, r0, r2 +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + UPDATE_WRITE_WATCH_TABLE \name, r0, \mp, r12 +#endif UPDATE_CARD_TABLE \name, r0, r2, \mp, \post, r2 1: add r0, #4 @@ -681,6 +629,9 @@ LOCAL_LABEL(stackProbe_loop): .word __\name\()__g_ephemeral_low_offset .word __\name\()__g_ephemeral_high_offset .word __\name\()__g_card_table_offset +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + .word __\name\()__g_write_watch_table_offset +#endif .endm // There 4 versions of each write barriers. A 2x2 combination of multi-proc/single-proc and pre/post grow version @@ -844,22 +795,19 @@ DelayLoad_Helper\suffix: // ------------------------------------------------------------------ // Hijack function for functions which return a value type NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler - PROLOG_PUSH "{r0,r4-r11,lr}" + // saving r1 as well, as it can have partial return value when return is > 32 bits + PROLOG_PUSH "{r0,r1,r2,r4-r11,lr}" PROLOG_VPUSH "{d0-d3}" // saving as d0-d3 can have the floating point return value - PROLOG_PUSH "{r1}" // saving as r1 can have partial return value when return is > 32 bits - alloc_stack 4 // 8 byte align CHECK_STACK_ALIGNMENT - add r0, sp, #40 + add r0, sp, #32 bl C_FUNC(OnHijackWorker) - free_stack 4 - EPILOG_POP "{r1}" EPILOG_VPOP "{d0-d3}" - EPILOG_POP "{r0,r4-r11,pc}" + EPILOG_POP "{r0,r1,r2,r4-r11,pc}" NESTED_END OnHijackTripThread, _TEXT #endif @@ -937,3 +885,15 @@ LOCAL_LABEL(JIT_PollGCRarePath): ldr r2, [r2] EPILOG_BRANCH_REG r2 LEAF_END JIT_PollGC, _TEXT + + +//x0 -This pointer +//x1 -ReturnBuffer +LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT + ldr r12, [r12, #ThisPtrRetBufPrecodeData__Target] + // Use XOR swap technique to set avoid the need to spill to the stack + eor r0, r0, r1 + eor r1, r0, r1 + eor r0, r0, r1 + EPILOG_BRANCH_REG r12 +LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT diff --git a/src/coreclr/vm/arm/asmmacros.h b/src/coreclr/vm/arm/asmmacros.h index db1a6a7cd3c0..51cc3c8cf4d3 100644 --- a/src/coreclr/vm/arm/asmmacros.h +++ b/src/coreclr/vm/arm/asmmacros.h @@ -160,8 +160,8 @@ __PWTB_StackAlloc SETA __PWTB_TransitionBlock ; __tls_array equ 0x2C ;; offsetof(TEB, ThreadLocalStoragePointer) - GBLS __SECTIONREL_gCurrentThreadInfo -__SECTIONREL_gCurrentThreadInfo SETS "SECTIONREL_gCurrentThreadInfo" + GBLS __SECTIONREL_t_CurrentThreadInfo +__SECTIONREL_t_CurrentThreadInfo SETS "SECTIONREL_t_CurrentThreadInfo" MACRO INLINE_GETTHREAD $destReg, $trashReg @@ -172,8 +172,8 @@ __SECTIONREL_gCurrentThreadInfo SETS "SECTIONREL_gCurrentThreadInfo" mrc p15, 0, $trashReg, c13, c0, 2 ldr $trashReg, [$trashReg, #__tls_array] ldr $destReg, [$trashReg, $destReg, lsl #2] - ldr $trashReg, $__SECTIONREL_gCurrentThreadInfo - ldr $destReg,[$destReg, $trashReg] ; return gCurrentThreadInfo.m_pThread + ldr $trashReg, $__SECTIONREL_t_CurrentThreadInfo + ldr $destReg,[$destReg, $trashReg] ; return t_CurrentThreadInfo.m_pThread MEND ;----------------------------------------------------------------------------- @@ -181,15 +181,15 @@ __SECTIONREL_gCurrentThreadInfo SETS "SECTIONREL_gCurrentThreadInfo" ; INLINE_GETTHREAD. Optionally, it can be also used after any function that used INLINE_GETTHREAD ; to improve density, or to reduce distance between the constant pool and its use. ; - SETALIAS gCurrentThreadInfo, ?gCurrentThreadInfo@@3UThreadLocalInfo@@A + SETALIAS t_CurrentThreadInfo, ?t_CurrentThreadInfo@@3UThreadLocalInfo@@A MACRO INLINE_GETTHREAD_CONSTANT_POOL - EXTERN $gCurrentThreadInfo + EXTERN $t_CurrentThreadInfo -$__SECTIONREL_gCurrentThreadInfo - DCDU $gCurrentThreadInfo +$__SECTIONREL_t_CurrentThreadInfo + DCDU $t_CurrentThreadInfo RELOC 15 ;; SECREL -__SECTIONREL_gCurrentThreadInfo SETS "$__SECTIONREL_gCurrentThreadInfo":CC:"_" +__SECTIONREL_t_CurrentThreadInfo SETS "$__SECTIONREL_t_CurrentThreadInfo":CC:"_" MEND diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 864b4bd48a42..1d3c5c6a2071 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -52,9 +52,6 @@ struct ArgLocDesc; extern PCODE GetPreStubEntryPoint(); -// CPU-dependent functions -Stub * GenerateInitPInvokeFrameHelper(); - EXTERN_C void checkStack(void); #define THUMB_CODE 1 @@ -258,6 +255,30 @@ inline TADDR GetFP(const T_CONTEXT * context) return (TADDR)(context->R11); } +inline void SetFirstArgReg(T_CONTEXT *context, TADDR value) +{ + LIMITED_METHOD_DAC_CONTRACT; + context->R0 = DWORD(value); +} + +inline TADDR GetFirstArgReg(T_CONTEXT *context) +{ + LIMITED_METHOD_DAC_CONTRACT; + return (TADDR)(context->R0); +} + +inline void SetSecondArgReg(T_CONTEXT *context, TADDR value) +{ + LIMITED_METHOD_DAC_CONTRACT; + context->R1 = DWORD(value); +} + +inline TADDR GetSecondArgReg(T_CONTEXT *context) +{ + LIMITED_METHOD_DAC_CONTRACT; + return (TADDR)(context->R1); +} + inline void ClearITState(T_CONTEXT *context) { LIMITED_METHOD_DAC_CONTRACT; context->Cpsr = context->Cpsr & 0xf9ff03ff; @@ -555,27 +576,6 @@ class StubLinkerCPU : public StubLinker ThumbEmitJumpRegister(thumbRegLr); } - void ThumbEmitGetThread(ThumbReg dest); - - void ThumbEmitNop() - { - // nop - Emit16(0xbf00); - } - - void ThumbEmitBreakpoint() - { - // Permanently undefined instruction #0xfe (see ARMv7-A A6.2.6). The debugger seems to accept this as - // a reasonable breakpoint substitute (it's what DebugBreak uses). Bkpt #0, on the other hand, always - // seems to flow directly to the kernel debugger (even if we ignore it there it doesn't seem to be - // picked up by the user mode debugger). -#ifdef __linux__ - Emit16(0xde01); -#else - Emit16(0xdefe); -#endif - } - void ThumbEmitMovConstant(ThumbReg dest, int constant) { _ASSERT(dest != thumbRegPc); @@ -663,14 +663,6 @@ class StubLinkerCPU : public StubLinker Emit16((WORD)(0x0b00 | (source << 12) | offset)); } - void ThumbEmitLoadOffsetScaledReg(ThumbReg dest, ThumbReg base, ThumbReg offset, int shift) - { - _ASSERTE(shift >=0 && shift <=3); - - Emit16((WORD)(0xf850 | base)); - Emit16((WORD)((dest << 12) | (shift << 4) | offset)); - } - void ThumbEmitCallRegister(ThumbReg target) { // blx regTarget @@ -752,14 +744,12 @@ class StubLinkerCPU : public StubLinker void ThumbEmitAddReg(ThumbReg dest, ThumbReg source) { - _ASSERTE(dest != source); Emit16((WORD)(0x4400 | ((dest & 0x8)<<4) | (source<<3) | (dest & 0x7))); } void ThumbEmitAdd(ThumbReg dest, ThumbReg source, unsigned int value) { - if(value<4096) { // addw dest, source, #value @@ -780,18 +770,6 @@ class StubLinkerCPU : public StubLinker } } - void ThumbEmitSub(ThumbReg dest, ThumbReg source, unsigned int value) - { - _ASSERTE(value < 4096); - - // subw dest, source, #value - unsigned int i = (value & 0x800) >> 11; - unsigned int imm3 = (value & 0x700) >> 8; - unsigned int imm8 = value & 0xff; - Emit16((WORD)(0xf2a0 | (i << 10) | source)); - Emit16((WORD)((imm3 << 12) | (dest << 8) | imm8)); - } - void ThumbEmitIncrement(ThumbReg dest, unsigned int value) { while (value) @@ -847,18 +825,6 @@ class StubLinkerCPU : public StubLinker } } - void ThumbEmitLoadStoreMultiple(ThumbReg base, bool load, WORD registers) - { - _ASSERTE(CountBits(registers) > 1); - _ASSERTE((registers & 0xFF00) == 0); // This only supports the small encoding - _ASSERTE(base < 8); // This only supports the small encoding - _ASSERTE((base.Mask() & registers) == 0); // This only supports the small encoding - - // (LDM|STM) base, {registers} - WORD flag = load ? 0x0800 : 0; - Emit16(0xc000 | flag | ((base & 7) << 8) | (registers & 0xFF)); - } - void ThumbEmitPop(WORD registers) { _ASSERTE(registers != 0); @@ -889,24 +855,6 @@ class StubLinkerCPU : public StubLinker } } - void ThumbEmitLoadVFPSingleRegIndirect(ThumbVFPSingleReg dest, ThumbReg source, int offset) - { - _ASSERTE((offset >= -1020) && (offset <= 1020)); - _ASSERTE(offset%4==0); - - Emit16((WORD) (0xed10 | ((offset > 0 ? 0x1: 0x0) << 7) | ((dest & 0x1) << 6) | source)); - Emit16((WORD) (0x0a00 | ((dest & 0x1e) << 11) | (abs(offset)>>2))); - } - - void ThumbEmitLoadVFPDoubleRegIndirect(ThumbVFPDoubleReg dest, ThumbReg source, int offset) - { - _ASSERTE((offset >= -1020) && (offset <= 1020)); - _ASSERTE(offset%4==0); - - Emit16((WORD) (0xed10 | ((offset > 0 ? 0x1: 0x0) << 7) | ((dest & 0x10) << 6) | source)); - Emit16((WORD) (0x0b00 | ((dest & 0xf) << 12) | (abs(offset)>>2))); - } - // Scratches r12. void ThumbEmitTailCallManagedMethod(MethodDesc *pMD); @@ -914,40 +862,6 @@ class StubLinkerCPU : public StubLinker VOID EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg); }; -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable:4359) // Prevent "warning C4359: 'UMEntryThunkCode': Alignment specifier is less than actual alignment (8), and will be ignored." in crossbitness scenario -#endif // _MSC_VER - -struct DECLSPEC_ALIGN(4) UMEntryThunkCode -{ - WORD m_code[4]; - - TADDR m_pTargetCode; - TADDR m_pvSecretParam; - - void Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam); - void Poison(); - - LPCBYTE GetEntryPoint() const - { - LIMITED_METHOD_CONTRACT; - - return (LPCBYTE)((TADDR)this | THUMB_CODE); - } - - static int GetEntryPointOffset() - { - LIMITED_METHOD_CONTRACT; - - return 0; - } -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif // _MSC_VER - struct HijackArgs { union @@ -958,6 +872,16 @@ struct HijackArgs // this is only used by functions OnHijackWorker() }; + // saving r1 as well, as it can have partial return value when return is > 32 bits + // also keeps the struct size 8-byte aligned. + DWORD R1; + + union + { + DWORD R2; + size_t AsyncRet; + }; + // // Non-volatile Integer registers // @@ -996,63 +920,6 @@ inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode, bool // Create alias for optimized implementations of helpers provided on this platform // -//------------------------------------------------------------------------ -// -// Precode definitions -// -//------------------------------------------------------------------------ -// -// Note: If you introduce new precode implementation below, then please -// update PrecodeStubManager::CheckIsStub_Internal to account for it. - -// Precode to shuffle this and retbuf for closed delegates over static methods with return buffer -struct ThisPtrRetBufPrecode { - - static const int Type = 0x01; - - // mov r12, r0 - // mov r0, r1 - // mov r1, r12 - // ldr pc, [pc, #0] ; =m_pTarget - // dcd pTarget - // dcd pMethodDesc - WORD m_rgCode[6]; - TADDR m_pTarget; - TADDR m_pMethodDesc; - - void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); - - TADDR GetMethodDesc() - { - LIMITED_METHOD_DAC_CONTRACT; - - return m_pMethodDesc; - } - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pTarget; - } - -#ifndef DACCESS_COMPILE - BOOL SetTargetInterlocked(TADDR target, TADDR expected) - { - CONTRACTL - { - THROWS; - GC_TRIGGERS; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(ThisPtrRetBufPrecode)); - return InterlockedCompareExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected; - } -#endif // !DACCESS_COMPILE -}; -typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; - - //********************************************************************** // Miscellaneous //********************************************************************** diff --git a/src/coreclr/vm/arm/ehhelpers.S b/src/coreclr/vm/arm/ehhelpers.S index eeb6e3894a57..ab75b1cb6c1e 100644 --- a/src/coreclr/vm/arm/ehhelpers.S +++ b/src/coreclr/vm/arm/ehhelpers.S @@ -126,21 +126,22 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_PUSH "{r7, lr}" - PROLOG_STACK_SAVE r7 + PROLOG_PUSH "{r11, lr}" // On entry: // // R0 = throwable - // R1 = SP of the caller of the method/funclet containing the filter + // R1 = FP of main method // R2 = PC to invoke // R3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // // Save the SP of this function str sp, [r3] + // Restore frame pointer + mov r11, r1 // Invoke the filter funclet blx r2 - EPILOG_POP "{r7, pc}" + EPILOG_POP "{r11, pc}" NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/arm/gmscpu.h b/src/coreclr/vm/arm/gmscpu.h deleted file mode 100644 index 42641889ad9e..000000000000 --- a/src/coreclr/vm/arm/gmscpu.h +++ /dev/null @@ -1,173 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/**************************************************************/ -/* gmscpu.h */ -/**************************************************************/ -/* HelperFrame is defines 'GET_STATE(machState)' macro, which - figures out what the state of the machine will be when the - current method returns. It then stores the state in the - JIT_machState structure. */ - -/**************************************************************/ - -#ifndef __gmscpu_h__ -#define __gmscpu_h__ - -#define __gmscpu_h__ - -#ifdef _DEBUG -class HelperMethodFrame; -struct MachState; -EXTERN_C MachState* __stdcall HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal); -#endif - - // A MachState indicates the register state of the processor at some point in time (usually - // just before or after a call is made). It can be made one of two ways. Either explicitly - // (when you for some reason know the values of all the registers), or implicitly using the - // GET_STATE macros. - -typedef DPTR(struct MachState) PTR_MachState; -struct MachState { - - BOOL isValid() { LIMITED_METHOD_DAC_CONTRACT; return _isValid; } - TADDR GetRetAddr() { LIMITED_METHOD_DAC_CONTRACT; return _pc; } - - friend class HelperMethodFrame; - friend class CheckAsmOffsets; - friend struct LazyMachState; - - -protected: - // The simplest way to understand the relationship between capturedR4_R11 (registers - // representing the captured state) and _R4_R11 (pointers to registers representing - // preserved state) is as follows: - // - // 1) LazyMachState::unwindLazyState is invoked by HelperMethodFrame to initialize the captured - // state. It then performs an unwind and copies the register pointers to _R4_R11. - // - // 2) HelperMethodFrame::UpdateRegdisplay is invoked by our StackWalker that initializes - // the regdisplay with the updated register state. - // - // 3) HelperMethodFrameRestoreState is invoked when the HMF state machine exits and it - // restores the values of unmodified registers. - - TADDR captureR4_R11[8]; // Registers R4..R11 at the time of capture - - PTR_DWORD _R4_R11[8]; // Preserved registers - - TADDR _pc; // program counter after the function returns - TADDR _sp; // stack pointer after the function returns - - BOOL _isValid; -}; - -/********************************************************************/ -/* This allows you to defer the computation of the Machine state - until later. Note that we don't reuse slots, because we want - this to be threadsafe without locks */ - -struct LazyMachState : public MachState { - // compute the machine state of the processor as it will exist just - // after the return after at most'funCallDepth' number of functions. - // if 'testFtn' is non-NULL, the return address is tested at each - // return instruction encountered. If this test returns non-NULL, - // then stack walking stops (thus you can walk up to the point that the - // return address matches some criteria - - // Normally this is called with funCallDepth=1 and testFtn = 0 so that - // it returns the state of the processor after the function that called 'captureState()' - void setLazyStateFromUnwind(MachState* copy); - static void unwindLazyState(LazyMachState* baseState, - MachState* lazyState, - DWORD threadId, - int funCallDepth = 1); - - friend class HelperMethodFrame; - friend class CheckAsmOffsets; -private: - TADDR captureSp; // Stack pointer at the time of capture - TADDR captureIp; // Instruction pointer at the time of capture -}; - -// R4 - R11 -#define NUM_NONVOLATILE_CONTEXT_POINTERS 8 - -inline void LazyMachState::setLazyStateFromUnwind(MachState* copy) -{ - LIMITED_METHOD_CONTRACT; - -#if defined(DACCESS_COMPILE) - // This function cannot be called in DAC because DAC cannot update target memory. - DacError(E_FAIL); - return; - -#else // !DACCESS_COMPILE - this->_pc = copy->_pc; - this->_sp = copy->_sp; - - // Capture* has already been set, so there is no need to touch it. - // This was setup in LazyMachState::unwindLazyState just before we - // called into the OS for unwind. - - // Prepare to loop over the nonvolatile context pointers for and - // make sure to properly copy interior pointers into the new struct. - - PDWORD* pSrc = ©->_R4_R11[0]; - PDWORD* pDst = &this->_R4_R11[0]; - - const PDWORD LowerBoundDst = (PDWORD) this; - const PDWORD LowerBoundSrc = (PDWORD) copy; - - // Calculate the upperbound till which we need to loop (i.e. the highest address till - // which we have saved non-volatile pointers). - const PDWORD UpperBoundSrc = (PDWORD) (((BYTE*)LowerBoundSrc) + offsetof(LazyMachState, _pc)); - -#ifdef _DEBUG - int count = 0; -#endif // _DEBUG - - while (((PDWORD)pSrc) < UpperBoundSrc) - { -#ifdef _DEBUG - count++; -#endif // _DEBUG - - PDWORD valueSrc = *pSrc++; - - // If any non-volatile register pointer is pointing to the corresponding register field - // in the MachState, then make the corresponding pointer in "this" MachState point - // to the corresponding field. - if ((LowerBoundSrc <= valueSrc) && (valueSrc < UpperBoundSrc)) - { - valueSrc = (PDWORD)((BYTE*)valueSrc - (BYTE*)LowerBoundSrc + (BYTE*)LowerBoundDst); - } - - *pDst++ = valueSrc; - } - - CONSISTENCY_CHECK_MSGF(count == NUM_NONVOLATILE_CONTEXT_POINTERS, ("count != NUM_NONVOLATILE_CONTEXT_POINTERS, actually = %d", count)); - - // this has to be last because we depend on write ordering to - // synchronize the race implicit in updating this struct - VolatileStore(&_isValid, TRUE); - -#endif // !DACCESS_COMPILE - -} -typedef DPTR(LazyMachState) PTR_LazyMachState; - -// Do the initial capture of the machine state. This is meant to be -// as light weight as possible, as we may never need the state that -// we capture. Thus to complete the process you need to call -// 'getMachState()', which finishes the process -EXTERN_C void LazyMachStateCaptureState(struct LazyMachState *pState); - -// CAPTURE_STATE captures just enough register state so that the state of the -// processor can be deterined just after the routine that has CAPTURE_STATE in -// it returns. - -#define CAPTURE_STATE(machState, ret) \ - LazyMachStateCaptureState(machState) - -#endif diff --git a/src/coreclr/vm/arm/patchedcode.S b/src/coreclr/vm/arm/patchedcode.S index 95d53ae96a61..94a6b44d2854 100644 --- a/src/coreclr/vm/arm/patchedcode.S +++ b/src/coreclr/vm/arm/patchedcode.S @@ -26,17 +26,17 @@ // See ValidateWriteBarriers on how the sizes of these should be calculated .align 4 LEAF_ENTRY JIT_WriteBarrier, _TEXT - .space (0x84) + .space (0xA8) LEAF_END_MARKED JIT_WriteBarrier, _TEXT .align 4 LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT - .space (0x9C) + .space (0xC0) LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT .align 4 LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT - .space (0xA0) + .space (0xC4) LEAF_END_MARKED JIT_ByRefWriteBarrier , _TEXT LEAF_ENTRY JIT_PatchedWriteBarrierLast, _TEXT diff --git a/src/coreclr/vm/arm/pinvokestubs.S b/src/coreclr/vm/arm/pinvokestubs.S index cc84c541a183..b18d20f7d0c7 100644 --- a/src/coreclr/vm/arm/pinvokestubs.S +++ b/src/coreclr/vm/arm/pinvokestubs.S @@ -154,6 +154,42 @@ LOCAL_LABEL(RarePath): LEAF_END JIT_PInvokeEnd, _TEXT +// ------------------------------------------------------------------ +// IN: +// InlinedCallFrame (r4) = pointer to the InlinedCallFrame data +// OUT: +// Thread (r5) = pointer to Thread +// +// + LEAF_ENTRY JIT_InitPInvokeFrame, _TEXT + + PROLOG_PUSH "{r0-r4, lr}" + + bl C_FUNC(GetThreadHelper) + mov r5, r0 + + // set first slot to the value of InlinedCallFrame identifier (checked by runtime code) + mov r6, #FRAMETYPE_InlinedCallFrame + str r6, [r4] + + // pFrame->m_Next = pThread->m_pFrame; + ldr r6, [r5, #Thread_m_pFrame] + str r6, [r4, #Frame__m_Next] + + str r11, [r4, #InlinedCallFrame__m_pCalleeSavedFP] + str r9, [r4, #InlinedCallFrame__m_pSPAfterProlog] + mov r6, 0 + str r6, [r4, #InlinedCallFrame__m_pCallerReturnAddress] + add r6, sp, 24 + str r6, [r4, #InlinedCallFrame__m_pCallSiteSP] + + // pThread->m_pFrame = pFrame; + str r4, [r5, #Thread_m_pFrame] + + EPILOG_POP "{r0-r4, pc}" + + LEAF_END JIT_InitPInvokeFrame, _TEXT + // ------------------------------------------------------------------ // VarargPInvokeStub & VarargPInvokeGenILStub // There is a separate stub when the method has a hidden return buffer arg. diff --git a/src/coreclr/vm/arm/profiler.cpp b/src/coreclr/vm/arm/profiler.cpp index a57d1cc3eb81..64cea7804427 100644 --- a/src/coreclr/vm/arm/profiler.cpp +++ b/src/coreclr/vm/arm/profiler.cpp @@ -163,7 +163,7 @@ Stack for the above call will look as follows (stack growing downwards): EECodeInfo codeInfo((PCODE)pData->Pc); // We want to pass the caller SP here. - pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo); + pData->hiddenArg = EECodeManager::GetExactGenericsToken((TADDR)(pData->probeSp), (TADDR)(pData->R11), &codeInfo); } } } diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index 08448a0ab0d5..da16361ddc3f 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -25,6 +25,10 @@ #include "ecall.h" #include "threadsuspend.h" +#ifdef FEATURE_PERFMAP +#include "perfmap.h" +#endif + // target write barriers EXTERN_C void JIT_WriteBarrier(Object **dst, Object *ref); EXTERN_C void JIT_WriteBarrier_End(); @@ -287,6 +291,9 @@ struct WriteBarrierDescriptor DWORD m_dw_g_ephemeral_low_offset; // Offset of the instruction reading g_ephemeral_low DWORD m_dw_g_ephemeral_high_offset; // Offset of the instruction reading g_ephemeral_high DWORD m_dw_g_card_table_offset; // Offset of the instruction reading g_card_table +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + DWORD m_dw_g_write_watch_table_offset;// Offset of the instruction reading g_write_watch_table +#endif }; // Infrastructure used for mapping of the source and destination of current WB patching @@ -455,6 +462,9 @@ void UpdateGCWriteBarriers(bool postGrow = false) GWB_PATCH_OFFSET(g_ephemeral_low); GWB_PATCH_OFFSET(g_ephemeral_high); GWB_PATCH_OFFSET(g_card_table); +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + GWB_PATCH_OFFSET(g_write_watch_table); +#endif } pDesc++; @@ -463,6 +473,12 @@ void UpdateGCWriteBarriers(bool postGrow = false) int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) { + if (!IsWriteBarrierCopyEnabled()) + { + // If we didn't copy the write barriers, then don't update them. + return SWB_PASS; + } + // The runtime is not always suspended when this is called (unlike StompWriteBarrierEphemeral) but we have // no way to update the barrier code atomically on ARM since each 32-bit value we change is loaded over // two instructions. So we have to suspend the EE (which forces code out of the barrier functions) before @@ -488,246 +504,60 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) int StompWriteBarrierEphemeral(bool isRuntimeSuspended) { + if (!IsWriteBarrierCopyEnabled()) + { + // If we didn't copy the write barriers, then don't update them. + return SWB_PASS; + } + UNREFERENCED_PARAMETER(isRuntimeSuspended); _ASSERTE(isRuntimeSuspended); UpdateGCWriteBarriers(); return SWB_ICACHE_FLUSH; } -void FlushWriteBarrierInstructionCache() -{ - // We've changed code so we must flush the instruction cache. - BYTE *pbAlteredRange; - DWORD cbAlteredRange; - ComputeWriteBarrierRange(&pbAlteredRange, &cbAlteredRange); - FlushInstructionCache(GetCurrentProcess(), pbAlteredRange, cbAlteredRange); -} - - -#endif // !DACCESS_COMPILE - -void LazyMachState::unwindLazyState(LazyMachState* baseState, - MachState* unwoundstate, - DWORD threadId, - int funCallDepth) +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +int SwitchToWriteWatchBarrier(bool isRuntimeSuspended) { - T_CONTEXT ctx; - T_KNONVOLATILE_CONTEXT_POINTERS nonVolRegPtrs; - - ctx.ContextFlags = 0; // Read by PAL_VirtualUnwind. - - ctx.Pc = baseState->captureIp; - ctx.Sp = baseState->captureSp; - - ctx.R4 = unwoundstate->captureR4_R11[0] = baseState->captureR4_R11[0]; - ctx.R5 = unwoundstate->captureR4_R11[1] = baseState->captureR4_R11[1]; - ctx.R6 = unwoundstate->captureR4_R11[2] = baseState->captureR4_R11[2]; - ctx.R7 = unwoundstate->captureR4_R11[3] = baseState->captureR4_R11[3]; - ctx.R8 = unwoundstate->captureR4_R11[4] = baseState->captureR4_R11[4]; - ctx.R9 = unwoundstate->captureR4_R11[5] = baseState->captureR4_R11[5]; - ctx.R10 = unwoundstate->captureR4_R11[6] = baseState->captureR4_R11[6]; - ctx.R11 = unwoundstate->captureR4_R11[7] = baseState->captureR4_R11[7]; - -#if !defined(DACCESS_COMPILE) - // For DAC, if we get here, it means that the LazyMachState is uninitialized and we have to unwind it. - // The API we use to unwind in DAC is StackWalk64(), which does not support the context pointers. - // - // Restore the integer registers to KNONVOLATILE_CONTEXT_POINTERS to be used for unwinding. - nonVolRegPtrs.R4 = &unwoundstate->captureR4_R11[0]; - nonVolRegPtrs.R5 = &unwoundstate->captureR4_R11[1]; - nonVolRegPtrs.R6 = &unwoundstate->captureR4_R11[2]; - nonVolRegPtrs.R7 = &unwoundstate->captureR4_R11[3]; - nonVolRegPtrs.R8 = &unwoundstate->captureR4_R11[4]; - nonVolRegPtrs.R9 = &unwoundstate->captureR4_R11[5]; - nonVolRegPtrs.R10 = &unwoundstate->captureR4_R11[6]; - nonVolRegPtrs.R11 = &unwoundstate->captureR4_R11[7]; -#endif // DACCESS_COMPILE - - LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK LazyMachState::unwindLazyState(ip:%p,sp:%p)\n", baseState->captureIp, baseState->captureSp)); - - PCODE pvControlPc; - - do + if (!IsWriteBarrierCopyEnabled()) { -#ifdef DACCESS_COMPILE - HRESULT hr = DacVirtualUnwind(threadId, &ctx, &nonVolRegPtrs); - if (FAILED(hr)) - { - DacError(hr); - } -#else // DACCESS_COMPILE - BOOL success = PAL_VirtualUnwind(&ctx, &nonVolRegPtrs); - if (!success) - { - _ASSERTE(!"unwindLazyState: Unwinding failed"); - EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE); - } -#endif // DACCESS_COMPILE - pvControlPc = GetIP(&ctx); - if (funCallDepth > 0) - { - --funCallDepth; - if (funCallDepth == 0) - break; - } - else - { - // Determine whether given IP resides in JITted code. (It returns nonzero in that case.) - // Use it now to see if we've unwound to managed code yet. - BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc); - - if (fIsManagedCode) - break; - } + // If we didn't copy the write barriers, then don't update them. + return SWB_PASS; } - while(TRUE); - - // - // Update unwoundState so that HelperMethodFrameRestoreState knows which - // registers have been potentially modified. - // - - unwoundstate->_pc = ctx.Pc; - unwoundstate->_sp = ctx.Sp; - -#ifdef DACCESS_COMPILE - // For DAC builds, we update the registers directly since we dont have context pointers - unwoundstate->captureR4_R11[0] = ctx.R4; - unwoundstate->captureR4_R11[1] = ctx.R5; - unwoundstate->captureR4_R11[2] = ctx.R6; - unwoundstate->captureR4_R11[3] = ctx.R7; - unwoundstate->captureR4_R11[4] = ctx.R8; - unwoundstate->captureR4_R11[5] = ctx.R9; - unwoundstate->captureR4_R11[6] = ctx.R10; - unwoundstate->captureR4_R11[7] = ctx.R11; -#else // !DACCESS_COMPILE - // For non-DAC builds, update the register state from context pointers - unwoundstate->_R4_R11[0] = (PDWORD)nonVolRegPtrs.R4; - unwoundstate->_R4_R11[1] = (PDWORD)nonVolRegPtrs.R5; - unwoundstate->_R4_R11[2] = (PDWORD)nonVolRegPtrs.R6; - unwoundstate->_R4_R11[3] = (PDWORD)nonVolRegPtrs.R7; - unwoundstate->_R4_R11[4] = (PDWORD)nonVolRegPtrs.R8; - unwoundstate->_R4_R11[5] = (PDWORD)nonVolRegPtrs.R9; - unwoundstate->_R4_R11[6] = (PDWORD)nonVolRegPtrs.R10; - unwoundstate->_R4_R11[7] = (PDWORD)nonVolRegPtrs.R11; -#endif // DACCESS_COMPILE - unwoundstate->_isValid = true; + UNREFERENCED_PARAMETER(isRuntimeSuspended); + _ASSERTE(isRuntimeSuspended); + UpdateGCWriteBarriers(); + return SWB_ICACHE_FLUSH; } -void HelperMethodFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) +int SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) { - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - MODE_ANY; - SUPPORTS_DAC; - } - CONTRACTL_END; - -#ifndef DACCESS_COMPILE - if (updateFloats) - { - UpdateFloatingPointRegisters(pRD); - _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress()); - } -#endif // DACCESS_COMPILE - - pRD->IsCallerContextValid = FALSE; - pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. - - // - // Copy the saved state from the frame to the current context. - // - - LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK HelperMethodFrame::UpdateRegDisplay cached ip:%p, sp:%p\n", m_MachState._pc, m_MachState._sp)); - - #if defined(DACCESS_COMPILE) - // For DAC, we may get here when the HMF is still uninitialized. - // So we may need to unwind here. - if (!m_MachState.isValid()) + if (!IsWriteBarrierCopyEnabled()) { - // This allocation throws on OOM. - MachState* pUnwoundState = (MachState*)DacAllocHostOnlyInstance(sizeof(*pUnwoundState), true); - - EnsureInit(pUnwoundState); - - pRD->pCurrentContext->Pc = pRD->ControlPC = pUnwoundState->_pc; - pRD->pCurrentContext->Sp = pRD->SP = pUnwoundState->_sp; - - pRD->pCurrentContext->R4 = (DWORD)(pUnwoundState->captureR4_R11[0]); - pRD->pCurrentContext->R5 = (DWORD)(pUnwoundState->captureR4_R11[1]); - pRD->pCurrentContext->R6 = (DWORD)(pUnwoundState->captureR4_R11[2]); - pRD->pCurrentContext->R7 = (DWORD)(pUnwoundState->captureR4_R11[3]); - pRD->pCurrentContext->R8 = (DWORD)(pUnwoundState->captureR4_R11[4]); - pRD->pCurrentContext->R9 = (DWORD)(pUnwoundState->captureR4_R11[5]); - pRD->pCurrentContext->R10 = (DWORD)(pUnwoundState->captureR4_R11[6]); - pRD->pCurrentContext->R11 = (DWORD)(pUnwoundState->captureR4_R11[7]); - - pRD->pCurrentContextPointers->R4 = &pRD->pCurrentContext->R4; - pRD->pCurrentContextPointers->R5 = &pRD->pCurrentContext->R5; - pRD->pCurrentContextPointers->R6 = &pRD->pCurrentContext->R6; - pRD->pCurrentContextPointers->R7 = &pRD->pCurrentContext->R7; - pRD->pCurrentContextPointers->R8 = &pRD->pCurrentContext->R8; - pRD->pCurrentContextPointers->R9 = &pRD->pCurrentContext->R9; - pRD->pCurrentContextPointers->R10 = &pRD->pCurrentContext->R10; - pRD->pCurrentContextPointers->R11 = &pRD->pCurrentContext->R11; - pRD->pCurrentContextPointers->Lr = &pRD->pCurrentContext->Lr; - - return; + // If we didn't copy the write barriers, then don't update them. + return SWB_PASS; } -#endif // DACCESS_COMPILE - // reset pContext; it's only valid for active (top-most) frame - pRD->pContext = NULL; - pRD->ControlPC = GetReturnAddress(); - pRD->SP = (DWORD)(size_t)m_MachState._sp; - - pRD->pCurrentContext->Pc = pRD->ControlPC; - pRD->pCurrentContext->Sp = pRD->SP; - - pRD->pCurrentContext->R4 = *m_MachState._R4_R11[0]; - pRD->pCurrentContext->R5 = *m_MachState._R4_R11[1]; - pRD->pCurrentContext->R6 = *m_MachState._R4_R11[2]; - pRD->pCurrentContext->R7 = *m_MachState._R4_R11[3]; - pRD->pCurrentContext->R8 = *m_MachState._R4_R11[4]; - pRD->pCurrentContext->R9 = *m_MachState._R4_R11[5]; - pRD->pCurrentContext->R10 = *m_MachState._R4_R11[6]; - pRD->pCurrentContext->R11 = *m_MachState._R4_R11[7]; - - pRD->pCurrentContextPointers->R4 = m_MachState._R4_R11[0]; - pRD->pCurrentContextPointers->R5 = m_MachState._R4_R11[1]; - pRD->pCurrentContextPointers->R6 = m_MachState._R4_R11[2]; - pRD->pCurrentContextPointers->R7 = m_MachState._R4_R11[3]; - pRD->pCurrentContextPointers->R8 = m_MachState._R4_R11[4]; - pRD->pCurrentContextPointers->R9 = m_MachState._R4_R11[5]; - pRD->pCurrentContextPointers->R10 = m_MachState._R4_R11[6]; - pRD->pCurrentContextPointers->R11 = m_MachState._R4_R11[7]; - pRD->pCurrentContextPointers->Lr = NULL; + UNREFERENCED_PARAMETER(isRuntimeSuspended); + _ASSERTE(isRuntimeSuspended); + UpdateGCWriteBarriers(); + return SWB_ICACHE_FLUSH; } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -#ifndef DACCESS_COMPILE - -void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +void FlushWriteBarrierInstructionCache() { - WRAPPER_NO_CONTRACT; - - int n = 0; - - m_rgCode[n++] = 0x4684; // mov r12, r0 - m_rgCode[n++] = 0x4608; // mov r0, r1 - m_rgCode[n++] = 0xea4f; // mov r1, r12 - m_rgCode[n++] = 0x010c; - m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #0] - m_rgCode[n++] = 0xf000; - - _ASSERTE(n == ARRAY_SIZE(m_rgCode)); - - m_pTarget = GetPreStubEntryPoint(); - m_pMethodDesc = (TADDR)pMD; + // We've changed code so we must flush the instruction cache. + BYTE *pbAlteredRange; + DWORD cbAlteredRange; + ComputeWriteBarrierRange(&pbAlteredRange, &cbAlteredRange); + FlushInstructionCache(GetCurrentProcess(), pbAlteredRange, cbAlteredRange); } +#endif // !DACCESS_COMPILE + +#ifndef DACCESS_COMPILE /* Rough pseudo-code of interface dispatching: @@ -1101,88 +931,6 @@ void ResolveHolder::Initialize(ResolveHolder* pResolveHolderRX, _ASSERTE(patcherTarget == (PCODE)NULL); } -Stub *GenerateInitPInvokeFrameHelper() -{ - CONTRACT(Stub*) - { - THROWS; - GC_NOTRIGGER; - MODE_ANY; - - POSTCONDITION(CheckPointer(RETVAL)); - } - CONTRACT_END; - - CPUSTUBLINKER sl; - CPUSTUBLINKER *psl = &sl; - - CORINFO_EE_INFO::InlinedCallFrameInfo FrameInfo; - InlinedCallFrame::GetEEInfo(&FrameInfo); - - ThumbReg regFrame = ThumbReg(4); - ThumbReg regThread = ThumbReg(5); - ThumbReg regScratch = ThumbReg(6); - ThumbReg regR9 = ThumbReg(9); - - // Erect frame to perform call to GetThread - psl->ThumbEmitProlog(1, sizeof(ArgumentRegisters), FALSE); // Save r4 for aligned stack - - // Save argument registers around the GetThread call. Don't bother with using ldm/stm since this inefficient path anyway. - for (int reg = 0; reg < 4; reg++) - psl->ThumbEmitStoreRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r) + sizeof(*ArgumentRegisters::r) * reg); - - psl->ThumbEmitGetThread(regThread); - - for (int reg = 0; reg < 4; reg++) - psl->ThumbEmitLoadRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r) + sizeof(*ArgumentRegisters::r) * reg); - - // mov [regFrame], FrameIdentifier::InlinedCallFrame - psl->ThumbEmitMovConstant(regScratch, (DWORD)FrameIdentifier::InlinedCallFrame); - psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, 0); - - // ldr regScratch, [regThread + offsetof(Thread, m_pFrame)] - // str regScratch, [regFrame + FrameInfo.offsetOfFrameLink] - psl->ThumbEmitLoadRegIndirect(regScratch, regThread, offsetof(Thread, m_pFrame)); - psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfFrameLink); - - // str FP, [regFrame + FrameInfo.offsetOfCalleeSavedFP] - psl->ThumbEmitStoreRegIndirect(thumbRegFp, regFrame, FrameInfo.offsetOfCalleeSavedFP); - - // str R9, [regFrame + FrameInfo.offsetOfSPAfterProlog] - psl->ThumbEmitStoreRegIndirect(regR9, regFrame, FrameInfo.offsetOfSPAfterProlog); - - // mov [regFrame + FrameInfo.offsetOfReturnAddress], 0 - psl->ThumbEmitMovConstant(regScratch, 0); - psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfReturnAddress); - - DWORD cbSavedRegs = sizeof(ArgumentRegisters) + 2 * 4; // r0-r3, r4, lr - psl->ThumbEmitAdd(regScratch, thumbRegSp, cbSavedRegs); - psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfCallSiteSP); - - // mov [regThread + offsetof(Thread, m_pFrame)], regFrame - psl->ThumbEmitStoreRegIndirect(regFrame, regThread, offsetof(Thread, m_pFrame)); - - // leave current Thread in R4 - - psl->ThumbEmitEpilog(); - - // A single process-wide stub that will never unload - RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetStubHeap()); -} - -void StubLinkerCPU::ThumbEmitGetThread(ThumbReg dest) -{ - ThumbEmitMovConstant(ThumbReg(0), (TADDR)GetThreadHelper); - - ThumbEmitCallRegister(ThumbReg(0)); - - if (dest != ThumbReg(0)) - { - ThumbEmitMovRegReg(dest, ThumbReg(0)); - } -} - - // Emits code to adjust for a static delegate target. VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) { @@ -1666,10 +1414,17 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats pRD->IsCallerSPValid = FALSE; pRD->pCurrentContext->Pc = m_ReturnAddress; - pRD->pCurrentContext->Sp = PTR_TO_TADDR(m_Args) + sizeof(struct HijackArgs); + size_t s = sizeof(struct HijackArgs); + _ASSERTE(s%4 == 0); // HijackArgs contains register values and hence will be a multiple of 4 + // stack must be multiple of 8. So if s is not multiple of 8 then there must be padding of 4 bytes + s = s + s%8; + pRD->pCurrentContext->Sp = PTR_TO_TADDR(m_Args) + s ; pRD->pCurrentContext->R0 = m_Args->R0; + pRD->pCurrentContext->R2 = m_Args->R2; + pRD->volatileCurrContextPointers.R0 = &m_Args->R0; + pRD->volatileCurrContextPointers.R2 = &m_Args->R2; pRD->pCurrentContext->R4 = m_Args->R4; pRD->pCurrentContext->R5 = m_Args->R5; @@ -1694,60 +1449,6 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats } #endif // FEATURE_HIJACK -class UMEntryThunk * UMEntryThunk::Decode(void *pCallback) -{ - _ASSERTE(offsetof(UMEntryThunkCode, m_code) == 0); - UMEntryThunkCode * pCode = (UMEntryThunkCode*)((ULONG_PTR)pCallback & ~THUMB_CODE); - - // We may be called with an unmanaged external code pointer instead. So if it doesn't look like one of our - // stubs (see UMEntryThunkCode::Encode below) then we'll return NULL. Luckily in these scenarios our - // caller will perform a hash lookup on successful return to verify our result in case random unmanaged - // code happens to look like ours. - if ((pCode->m_code[0] == 0xf8df) && - (pCode->m_code[1] == 0xc008) && - (pCode->m_code[2] == 0xf8df) && - (pCode->m_code[3] == 0xf000)) - { - return (UMEntryThunk*)pCode->m_pvSecretParam; - } - - return NULL; -} - -void UMEntryThunkCode::Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam) -{ - // ldr r12, [pc + 8] - m_code[0] = 0xf8df; - m_code[1] = 0xc008; - // ldr pc, [pc] - m_code[2] = 0xf8df; - m_code[3] = 0xf000; - - m_pTargetCode = (TADDR)pTargetCode; - m_pvSecretParam = (TADDR)pvSecretParam; - - FlushInstructionCache(GetCurrentProcess(),&pEntryThunkCodeRX->m_code,sizeof(m_code)); -} - -#ifndef DACCESS_COMPILE - -void UMEntryThunkCode::Poison() -{ - ExecutableWriterHolder thunkWriterHolder(this, sizeof(UMEntryThunkCode)); - UMEntryThunkCode *pThisRW = thunkWriterHolder.GetRW(); - - pThisRW->m_pTargetCode = (TADDR)UMEntryThunk::ReportViolation; - - // ldr r0, [pc + 8] - pThisRW->m_code[0] = 0x4802; - // nop - pThisRW->m_code[1] = 0xbf00; - - ClrFlushInstructionCache(&m_code,sizeof(m_code)); -} - -#endif // DACCESS_COMPILE - ///////////////////////////// UNIMPLEMENTED ////////////////////////////////// #ifndef DACCESS_COMPILE @@ -1756,30 +1457,11 @@ void UMEntryThunkCode::Poison() extern "C" void STDCALL JIT_PatchedCodeStart(); extern "C" void STDCALL JIT_PatchedCodeLast(); -void InitJITHelpers1() +void InitJITWriteBarrierHelpers() { STANDARD_VM_CONTRACT; - - // Allocation helpers, faster but non-logging. - if (!(TrackAllocationsEnabled() - || LoggingOn(LF_GCALLOC, LL_INFO10) -#ifdef _DEBUG - || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) -#endif // _DEBUG - )) - { - _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable); - - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); - } } - VOID ResetCurrentContext() { LIMITED_METHOD_CONTRACT; @@ -1808,7 +1490,7 @@ void MovRegImm(BYTE* p, int reg, TADDR imm) #define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR) -#define BEGIN_DYNAMIC_HELPER_EMIT(size) \ +#define BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \ SIZE_T cb = size; \ SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \ BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \ @@ -1817,6 +1499,15 @@ void MovRegImm(BYTE* p, int reg, TADDR imm) size_t rxOffset = pStartRX - pStart; \ BYTE * p = pStart; +#ifdef FEATURE_PERFMAP +#define BEGIN_DYNAMIC_HELPER_EMIT(size) \ + BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) \ + PerfMap::LogStubs(__FUNCTION__, "DynamicHelper", (PCODE)p, size, PerfMapStubType::Individual); +#else +#define BEGIN_DYNAMIC_HELPER_EMIT(size) BEGIN_DYNAMIC_HELPER_EMIT_WORKER(size) +#endif + + #define END_DYNAMIC_HELPER_EMIT() \ _ASSERTE(pStart + cb == p); \ while (p < pStart + cbAligned) { *(WORD *)p = 0xdefe; p += 2; } \ diff --git a/src/coreclr/vm/arm/thunktemplates.S b/src/coreclr/vm/arm/thunktemplates.S index 8744c8ebb632..b535090660a6 100644 --- a/src/coreclr/vm/arm/thunktemplates.S +++ b/src/coreclr/vm/arm/thunktemplates.S @@ -14,7 +14,7 @@ PAGE_SIZE = 4096 #define DATA_SLOT(stub, field) . - (. - stub##Code) + PAGE_SIZE + stub##Data__##field LEAF_ENTRY StubPrecodeCode - ldr r12, DATA_SLOT(StubPrecode, MethodDesc) + ldr r12, DATA_SLOT(StubPrecode, SecretParam) ldr pc, DATA_SLOT(StubPrecode, Target) LEAF_END_MARKED StubPrecodeCode diff --git a/src/coreclr/vm/arm/virtualcallstubcpu.hpp b/src/coreclr/vm/arm/virtualcallstubcpu.hpp index 0b6c00104ebc..59a66270fc7e 100644 --- a/src/coreclr/vm/arm/virtualcallstubcpu.hpp +++ b/src/coreclr/vm/arm/virtualcallstubcpu.hpp @@ -10,8 +10,6 @@ #include "asmconstants.h" #endif -//#define STUB_LOGGING - #include // Since we are placing code, we want byte packing of the structs #define USES_LOOKUP_STUBS 1 diff --git a/src/coreclr/vm/arm64/AllocSlow.S b/src/coreclr/vm/arm64/AllocSlow.S new file mode 100644 index 000000000000..469c63c3966c --- /dev/null +++ b/src/coreclr/vm/arm64/AllocSlow.S @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// +// Object* RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path. +// +LEAF_ENTRY RhpNew, _TEXT + + mov x1, 0 + b C_FUNC(RhpNewObject) + +LEAF_END RhpNew, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x2 + + mov x1, 0 + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* RhpNewArrayMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x2 + + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x2 + + bl C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/arm64/AllocSlow.asm b/src/coreclr/vm/arm64/AllocSlow.asm new file mode 100644 index 000000000000..79e13f7a6c07 --- /dev/null +++ b/src/coreclr/vm/arm64/AllocSlow.asm @@ -0,0 +1,73 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + + IMPORT RhpNewObject + IMPORT RhpGcAllocMaybeFrozen + IMPORT RhExceptionHandling_FailedAllocation_Helper + + TEXTAREA + +; +; Object* RhpNew(MethodTable *pMT) +; +; Allocate non-array object, slow path. +; + LEAF_ENTRY RhpNew + + mov x1, #0 + b RhpNewObject + + LEAF_END + +; +; Object* RhpNewMaybeFrozen(MethodTable *pMT) +; +; Allocate non-array object, may be on frozen heap. +; + NESTED_ENTRY RhpNewMaybeFrozen + + PUSH_COOP_PINVOKE_FRAME x2 + + mov x1, 0 + bl RhpGcAllocMaybeFrozen + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + NESTED_END + +; +; Object* RhpNewMaybeFrozen(MethodTable *pMT, INT_PTR size) +; +; Allocate array object, may be on frozen heap. +; + NESTED_ENTRY RhpNewArrayMaybeFrozen + + PUSH_COOP_PINVOKE_FRAME x2 + + bl RhpGcAllocMaybeFrozen + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + NESTED_END + +; +; void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +; + NESTED_ENTRY RhExceptionHandling_FailedAllocation + + PUSH_COOP_PINVOKE_FRAME x2 + + bl RhExceptionHandling_FailedAllocation_Helper + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + NESTED_END RhExceptionHandling_FailedAllocation + + END \ No newline at end of file diff --git a/src/coreclr/vm/arm64/AsmMacros_Shared.h b/src/coreclr/vm/arm64/AsmMacros_Shared.h new file mode 100644 index 000000000000..04249f3464cd --- /dev/null +++ b/src/coreclr/vm/arm64/AsmMacros_Shared.h @@ -0,0 +1,112 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#ifdef TARGET_WINDOWS +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + + IMPORT g_lowest_address + IMPORT g_highest_address + IMPORT g_ephemeral_low + IMPORT g_ephemeral_high + IMPORT g_card_table + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + IMPORT g_card_bundle_table +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + IMPORT g_write_watch_table +#endif + + IMPORT RhpGcAlloc + IMPORT RhExceptionHandling_FailedAllocation + +;;----------------------------------------------------------------------------- +;; Macro for loading a 64-bit constant by a minimal number of instructions +;; Since the asssembles doesn't support 64 bit arithmetics in expressions, +;; the value is passed in as lo, hi pair. + MACRO + MOVL64 $Reg, $ConstantLo, $ConstantHi + + LCLS MovInstr +MovInstr SETS "movz" + + IF ((($ConstantHi):SHR:16):AND:0xffff) != 0 + $MovInstr $Reg, #((($Constant):SHR:16):AND:0xffff), lsl #48 +MovInstr SETS "movk" + ENDIF + + IF (($ConstantHi):AND:0xffff) != 0 + $MovInstr $Reg, #(($ConstantHi):AND:0xffff), lsl #32 +MovInstr SETS "movk" + ENDIF + + IF ((($ConstantLo):SHR:16):AND:0xffff) != 0 + $MovInstr $Reg, #((($ConstantLo):SHR:16):AND:0xffff), lsl #16 +MovInstr SETS "movk" + ENDIF + + $MovInstr $Reg, #(($ConstantLo):AND:0xffff) + MEND + +;;----------------------------------------------------------------------------- +;; Macro for loading a 64bit value of a global variable into a register + MACRO + PREPARE_EXTERNAL_VAR_INDIRECT $Name, $Reg + + adrp $Reg, $Name + ldr $Reg, [$Reg, $Name] + MEND + +;; ---------------------------------------------------------------------------- - +;; Macro for loading a 32bit value of a global variable into a register + MACRO + PREPARE_EXTERNAL_VAR_INDIRECT_W $Name, $RegNum + + adrp x$RegNum, $Name + ldr w$RegNum, [x$RegNum, $Name] + MEND + +;; ---------------------------------------------------------------------------- - +;; +;; Macro to add a memory barrier. Equal to __sync_synchronize(). +;; + + MACRO + InterlockedOperationBarrier + + dmb ish + MEND + +#else +#include "asmconstants.h" +#include "unixasmmacros.inc" + +.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg +#if defined(__APPLE__) + adrp \HelperReg, C_FUNC(\Name)@GOTPAGE + ldr \HelperReg, [\HelperReg, C_FUNC(\Name)@GOTPAGEOFF] + ldr \HelperReg, [\HelperReg] +#else + adrp \HelperReg, C_FUNC(\Name) + ldr \HelperReg, [\HelperReg, :lo12:C_FUNC(\Name)] +#endif +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT_W Name, HelperReg +#if defined(__APPLE__) + adrp x\HelperReg, C_FUNC(\Name)@GOTPAGE + ldr x\HelperReg, [x\HelperReg, C_FUNC(\Name)@GOTPAGEOFF] + ldr w\HelperReg, [x\HelperReg] +#else + adrp x\HelperReg, C_FUNC(\Name) + ldr w\HelperReg, [x\HelperReg, :lo12:C_FUNC(\Name)] +#endif +.endm + +#endif + diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S new file mode 100644 index 000000000000..068936ef5235 --- /dev/null +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + + // Load the MethodTable from the object instance in x0 + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + ldr x9, [x0] + + // x11 currently contains the indirection cell address. + // load x11 to point to the vtable offset (which is stored in the m_pCache field). + ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // x11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + // to get to the VTable chunk + lsr x10, x11, #32 + + // Add the MethodTable to the vtable offset + // to get the address in the vtable chunk list of what we want to dereference + add x9, x10, x9 + + // Load the target address of the vtable chunk into x9 + ldr x9, [x9] + + // Compute the chunk offset + ubfx x10, x11, #16, #16 + + // Load the target address of the virtual function into x9 + ldr x9, [x9, x10] + + EPILOG_BRANCH_REG x9 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// x11 contains the interface dispatch cell address. +// + NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov x1, x11 // indirection cell + + bl C_FUNC(CID_ResolveWorker) + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END RhpInterfaceDispatchSlow, _TEXT + +// x11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) + NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov x1, x11 // indirection cell + + bl C_FUNC(CID_VirtualOpenDelegateDispatchWorker) + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm new file mode 100644 index 000000000000..4b117a0336e6 --- /dev/null +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm @@ -0,0 +1,83 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + TEXTAREA + + EXTERN CID_ResolveWorker + EXTERN CID_VirtualOpenDelegateDispatchWorker + +;; +;; Stub dispatch routine for dispatch to a vtable slot +;; + LEAF_ENTRY RhpVTableOffsetDispatch + + ;; x11 currently contains the indirection cell address. + ;; load x11 to point to the vtable offset (which is stored in the m_pCache field). + ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; x11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + lsr x10, x11, #32 + + ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + ldr x9, [x0] + add x9, x10, x9 + + ;; Load the target address of the vtable chunk into x9 + ldr x9, [x9] + + ;; Compute the chunk offset + ubfx x10, x11, #16, #16 + + ;; Load the target address of the virtual function into x9 + ldr x9, [x9, x10] + + EPILOG_BRANCH_REG x9 + LEAF_END RhpVTableOffsetDispatch + +;; +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; x11 contains the interface dispatch cell address. +;; + NESTED_ENTRY RhpInterfaceDispatchSlow + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + mov x1, x11 ; indirection cell + + bl CID_ResolveWorker + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END RhpInterfaceDispatchSlow + +;; x11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) + NESTED_ENTRY CID_VirtualOpenDelegateDispatch + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + mov x1, x11 ; indirection cell + + bl CID_VirtualOpenDelegateDispatchWorker + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END CID_VirtualOpenDelegateDispatch + +#endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + + END diff --git a/src/coreclr/vm/arm64/StubPrecodeDynamicHelpers.S b/src/coreclr/vm/arm64/StubPrecodeDynamicHelpers.S new file mode 100644 index 000000000000..82c9ec3ecc9b --- /dev/null +++ b/src/coreclr/vm/arm64/StubPrecodeDynamicHelpers.S @@ -0,0 +1,284 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +#ifdef FEATURE_STUBPRECODE_DYNAMIC_HELPERS + +#define SecretArg_Reg x12 +#define FirstArg_Reg x0 +#define SecondArg_Reg x1 +#define SecondArg_DwordReg w1 +#define ThirdArg_Reg x2 +#define ThirdArg_DwordReg w2 +#define FourthArg_Reg x3 + +#define DATA_SLOT(field) [x12, #OFFSETOF__DynamicHelperStubArgs__ ## field] +#define GENERIC_DICT_DATA_SLOT(field) [x12, #OFFSETOF__GenericDictionaryDynamicHelperStubData__ ## field] + + LEAF_ENTRY DynamicHelper_CallHelper_1Arg, _TEXT + ldr FirstArg_Reg, DATA_SLOT(Constant1) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END DynamicHelper_CallHelper_1Arg, _TEXT + + LEAF_ENTRY DynamicHelper_CallHelper_AddSecondArg, _TEXT + ldr SecondArg_Reg, DATA_SLOT(Constant1) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END DynamicHelper_CallHelper_AddSecondArg, _TEXT + + LEAF_ENTRY DynamicHelper_CallHelper_2Arg, _TEXT + ldr FirstArg_Reg, DATA_SLOT(Constant1) + ldr SecondArg_Reg, DATA_SLOT(Constant2) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END DynamicHelper_CallHelper_2Arg, _TEXT + + LEAF_ENTRY DynamicHelper_CallHelper_ArgMove, _TEXT + mov SecondArg_Reg, FirstArg_Reg + ldr FirstArg_Reg, DATA_SLOT(Constant1) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END DynamicHelper_CallHelper_ArgMove, _TEXT + + LEAF_ENTRY DynamicHelper_Return, _TEXT + ret lr + LEAF_END DynamicHelper_Return, _TEXT + + LEAF_ENTRY DynamicHelper_ReturnConst, _TEXT + mov FirstArg_Reg, SecretArg_Reg + ret lr + LEAF_END DynamicHelper_ReturnConst, _TEXT + + LEAF_ENTRY DynamicHelper_ReturnIndirConst, _TEXT + ldr FirstArg_Reg, [SecretArg_Reg, #0] + ret lr + LEAF_END DynamicHelper_ReturnIndirConst, _TEXT + + LEAF_ENTRY DynamicHelper_ReturnIndirConstWithOffset, _TEXT + ldr FirstArg_Reg, DATA_SLOT(Constant1) + ldr FirstArg_Reg, [FirstArg_Reg] + ldr SecondArg_Reg, DATA_SLOT(Constant2) + add FirstArg_Reg, FirstArg_Reg, SecondArg_Reg + ret lr + LEAF_END DynamicHelper_ReturnIndirConstWithOffset, _TEXT + + LEAF_ENTRY DynamicHelper_CallHelper_AddThirdArg, _TEXT + ldr ThirdArg_Reg, DATA_SLOT(Constant1) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END DynamicHelper_CallHelper_AddThirdArg, _TEXT + + LEAF_ENTRY DynamicHelper_CallHelper_AddThirdAndFourthArg, _TEXT + ldr ThirdArg_Reg, DATA_SLOT(Constant1) + ldr FourthArg_Reg, DATA_SLOT(Constant2) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END DynamicHelper_CallHelper_AddThirdAndFourthArg, _TEXT + + // Generic dictionaries can have 2 or 3 indirections (5 indirs of 32bit size, and 2 8 byte quantities) = 40 bytes + // If it has 2 its for a Method, and the first indirection is always offsetof(InstantiatiedMethodDesc, m_pPerInstInfo) + // If it has 3 its for a Class, and the first indirection is always MethodTable::GetOffsetOfPerInstInfo + // It can also have 0, 0, to just return the class type + // Test For Null Or Not (If not present, cannot have a size check) + // SizeCheck or not (Only needed if size > Some number) + // + // Also special case where we just return the TypeHandle or MethodDesc itself + // Should probably have special case for 1, 2, 3 generic arg of MethodDesc/MethodTable + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull, _TEXT + // Save Generic Context + mov x4, FirstArg_Reg + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SecondIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + // SizeCheck + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SizeOffset) + ldr ThirdArg_DwordReg, GENERIC_DICT_DATA_SLOT(SlotOffset) + ldr FourthArg_Reg, [FirstArg_Reg, SecondArg_Reg] + cmp FourthArg_Reg, ThirdArg_Reg + b.ls LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall) + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + // Null test + cbz FirstArg_Reg, LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall) + ret lr +LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall): + mov FirstArg_Reg, x4 + ldr SecondArg_Reg, GENERIC_DICT_DATA_SLOT(HandleArgs) + PREPARE_EXTERNAL_VAR g_pClassWithSlotAndModule, x3 + ldr x3, [x3] + EPILOG_BRANCH_REG x3 + LEAF_END DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull, _TEXT + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_TestForNull, _TEXT + // Save Generic Context + mov x4, FirstArg_Reg + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SecondIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + // Null test + cbz FirstArg_Reg, LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall) + ret lr +LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall): + mov FirstArg_Reg, x4 + ldr SecondArg_Reg, GENERIC_DICT_DATA_SLOT(HandleArgs) + PREPARE_EXTERNAL_VAR g_pClassWithSlotAndModule, x3 + ldr x3, [x3] + EPILOG_BRANCH_REG x3 + LEAF_END DynamicHelper_GenericDictionaryLookup_Class_TestForNull, _TEXT + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SecondIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Class, _TEXT + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull, _TEXT + // Save Generic Context + mov x4, FirstArg_Reg + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // SizeCheck + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SizeOffset) + ldr ThirdArg_DwordReg, GENERIC_DICT_DATA_SLOT(SlotOffset) + ldr FourthArg_Reg, [FirstArg_Reg, SecondArg_Reg] + cmp FourthArg_Reg, ThirdArg_Reg + b.ls LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall) + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + // Null test + cbz FirstArg_Reg, LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall) + ret lr +LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall): + mov FirstArg_Reg, x4 + ldr SecondArg_Reg, GENERIC_DICT_DATA_SLOT(HandleArgs) + PREPARE_EXTERNAL_VAR g_pMethodWithSlotAndModule, x3 + ldr x3, [x3] + EPILOG_BRANCH_REG x3 + LEAF_END DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull, _TEXT + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_TestForNull, _TEXT + // Save Generic Context + mov x4, FirstArg_Reg + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + // Null test + cbz FirstArg_Reg, LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall) + ret lr +LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall): + mov FirstArg_Reg, x4 + ldr SecondArg_Reg, GENERIC_DICT_DATA_SLOT(HandleArgs) + PREPARE_EXTERNAL_VAR g_pMethodWithSlotAndModule, x3 + ldr x3, [x3] + EPILOG_BRANCH_REG x3 + LEAF_END DynamicHelper_GenericDictionaryLookup_Method_TestForNull, _TEXT + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Method, _TEXT + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_0, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_0, _TEXT + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_1, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x8] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_1, _TEXT + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_2, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x10] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_2, _TEXT + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_3, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x18] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_3, _TEXT + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_0, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Method_0, _TEXT + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_1, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x8] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Method_1, _TEXT + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_2, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x10] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Method_2, _TEXT + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_3, _TEXT + // First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + // Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x18] + ret lr + LEAF_END DynamicHelper_GenericDictionaryLookup_Method_3, _TEXT + +#endif //// FEATURE_STUBPRECODE_DYNAMIC_HELPERS diff --git a/src/coreclr/vm/arm64/StubPrecodeDynamicHelpers.asm b/src/coreclr/vm/arm64/StubPrecodeDynamicHelpers.asm new file mode 100644 index 000000000000..32e494f96fc0 --- /dev/null +++ b/src/coreclr/vm/arm64/StubPrecodeDynamicHelpers.asm @@ -0,0 +1,293 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + +#ifdef FEATURE_STUBPRECODE_DYNAMIC_HELPERS + +TEXTAREA + + IMPORT g_pClassWithSlotAndModule + IMPORT g_pMethodWithSlotAndModule + +#define SecretArg_Reg x12 +#define FirstArg_Reg x0 +#define SecondArg_Reg x1 +#define SecondArg_DwordReg w1 +#define ThirdArg_Reg x2 +#define ThirdArg_DwordReg w2 +#define FourthArg_Reg x3 + +#define HASH_SYMBOL # +#define DATA_SLOT(field) [x12, HASH_SYMBOL OFFSETOF__DynamicHelperStubArgs__##field] +#define GENERIC_DICT_DATA_SLOT(field) [x12, HASH_SYMBOL OFFSETOF__GenericDictionaryDynamicHelperStubData__ ## field] + + LEAF_ENTRY DynamicHelper_CallHelper_1Arg + ldr FirstArg_Reg, DATA_SLOT(Constant1) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END + + LEAF_ENTRY DynamicHelper_CallHelper_AddSecondArg + ldr SecondArg_Reg, DATA_SLOT(Constant1) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END + + LEAF_ENTRY DynamicHelper_CallHelper_2Arg + ldr FirstArg_Reg, DATA_SLOT(Constant1) + ldr SecondArg_Reg, DATA_SLOT(Constant2) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END + + LEAF_ENTRY DynamicHelper_CallHelper_ArgMove + mov SecondArg_Reg, FirstArg_Reg + ldr FirstArg_Reg, DATA_SLOT(Constant1) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END + + LEAF_ENTRY DynamicHelper_Return + ret lr + LEAF_END + + LEAF_ENTRY DynamicHelper_ReturnConst + mov FirstArg_Reg, SecretArg_Reg + ret lr + LEAF_END + + LEAF_ENTRY DynamicHelper_ReturnIndirConst + ldr FirstArg_Reg, [SecretArg_Reg, #0] + ret lr + LEAF_END + + LEAF_ENTRY DynamicHelper_ReturnIndirConstWithOffset + ldr FirstArg_Reg, DATA_SLOT(Constant1) + ldr FirstArg_Reg, [FirstArg_Reg] + ldr SecondArg_Reg, DATA_SLOT(Constant2) + add FirstArg_Reg, FirstArg_Reg, SecondArg_Reg + ret lr + LEAF_END + + LEAF_ENTRY DynamicHelper_CallHelper_AddThirdArg + ldr ThirdArg_Reg, DATA_SLOT(Constant1) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END + + LEAF_ENTRY DynamicHelper_CallHelper_AddThirdAndFourthArg + ldr ThirdArg_Reg, DATA_SLOT(Constant1) + ldr FourthArg_Reg, DATA_SLOT(Constant2) + ldr x12, DATA_SLOT(Helper) + EPILOG_BRANCH_REG x12 + LEAF_END + + ; Generic dictionaries can have 2 or 3 indirections (5 indirs of 32bit size, and 2 8 byte quantities) = 40 bytes + ; If it has 2 its for a Method, and the first indirection is always offsetof(InstantiatiedMethodDesc, m_pPerInstInfo) + ; If it has 3 its for a Class, and the first indirection is always MethodTable::GetOffsetOfPerInstInfo + ; It can also have 0, 0, to just return the class type + ; Test For Null Or Not (If not present, cannot have a size check) + ; SizeCheck or not (Only needed if size > Some number) + ; + ; Also special case where we just return the TypeHandle or MethodDesc itself + ; Should probably have special case for 1, 2, 3 generic arg of MethodDesc/MethodTable + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull + ; Save Generic Context + mov x4, FirstArg_Reg + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SecondIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ; SizeCheck + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SizeOffset) + ldr ThirdArg_DwordReg, GENERIC_DICT_DATA_SLOT(SlotOffset) + ldr FourthArg_Reg, [FirstArg_Reg, SecondArg_Reg] + cmp FourthArg_Reg, ThirdArg_Reg + b.ls DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ; Null test + cbz FirstArg_Reg, DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall + ret lr +DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall + mov FirstArg_Reg, x4 + ldr SecondArg_Reg, GENERIC_DICT_DATA_SLOT(HandleArgs) + ldr x3, =g_pClassWithSlotAndModule + ldr x3, [x3] + EPILOG_BRANCH_REG x3 + LEAF_END + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_TestForNull + ; Save Generic Context + mov x4, FirstArg_Reg + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SecondIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ; Null test + cbz FirstArg_Reg, DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall + ret lr +DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall + mov FirstArg_Reg, x4 + ldr SecondArg_Reg, GENERIC_DICT_DATA_SLOT(HandleArgs) + ldr x3, =g_pClassWithSlotAndModule + ldr x3, [x3] + EPILOG_BRANCH_REG x3 + LEAF_END + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SecondIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ret lr + LEAF_END + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull + ; Save Generic Context + mov x4, FirstArg_Reg + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; SizeCheck + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(SizeOffset) + ldr ThirdArg_DwordReg, GENERIC_DICT_DATA_SLOT(SlotOffset) + ldr FourthArg_Reg, [FirstArg_Reg, SecondArg_Reg] + cmp FourthArg_Reg, ThirdArg_Reg + b.ls DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ; Null test + cbz FirstArg_Reg, DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall + ret lr +DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall + mov FirstArg_Reg, x4 + ldr SecondArg_Reg, GENERIC_DICT_DATA_SLOT(HandleArgs) + ldr x3, =g_pMethodWithSlotAndModule + ldr x3, [x3] + EPILOG_BRANCH_REG x3 + LEAF_END + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_TestForNull + ; Save Generic Context + mov x4, FirstArg_Reg + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ; Null test + cbz FirstArg_Reg, DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall + ret lr +DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall + mov FirstArg_Reg, x4 + ldr SecondArg_Reg, GENERIC_DICT_DATA_SLOT(HandleArgs) + ldr x3, =g_pMethodWithSlotAndModule + ldr x3, [x3] + EPILOG_BRANCH_REG x3 + LEAF_END + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + ldr SecondArg_DwordReg, GENERIC_DICT_DATA_SLOT(LastIndir) + ldr FirstArg_Reg, [SecondArg_Reg, FirstArg_Reg] + ret lr + LEAF_END + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_0 + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + ret lr + LEAF_END + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_1 + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x8] + ret lr + LEAF_END + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_2 + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x10] + ret lr + LEAF_END + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_3 + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__MethodTable__m_pPerInstInfo] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x18] + ret lr + LEAF_END + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_0 + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg] + ret lr + LEAF_END + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_1 + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x8] + ret lr + LEAF_END + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_2 + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x10] + ret lr + LEAF_END + + + LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_3 + ; First indirection + ldr FirstArg_Reg, [FirstArg_Reg, #OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo] + ; Standard Indirection + ldr FirstArg_Reg, [FirstArg_Reg, #0x18] + ret lr + LEAF_END + +#endif ;; FEATURE_STUBPRECODE_DYNAMIC_HELPERS + + END diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 87003693b871..3178ad1e297b 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -36,6 +36,9 @@ ASMCONSTANTS_C_ASSERT(FRAMETYPE_InlinedCallFrame == (int)FrameIdentifier::Inline #define DynamicHelperFrameFlags_ObjectArg 1 #define DynamicHelperFrameFlags_ObjectArg2 2 +#define ThisPtrRetBufPrecodeData__Target 0x00 +ASMCONSTANTS_C_ASSERT(ThisPtrRetBufPrecodeData__Target == offsetof(ThisPtrRetBufPrecodeData, Target)); + #define Thread__m_fPreemptiveGCDisabled 0x04 #define Thread__m_pFrame 0x08 @@ -45,6 +48,16 @@ ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame)); #define Thread_m_pFrame Thread__m_pFrame #define Thread_m_fPreemptiveGCDisabled Thread__m_fPreemptiveGCDisabled +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); + +#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + + offsetof(gc_alloc_context, alloc_ptr)); + +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); + #define METHODDESC_REGISTER x12 #define SIZEOF__ArgumentRegisters 0x40 @@ -75,31 +88,6 @@ ASMCONSTANTS_C_ASSERT(CallDescrData__pTarget == offsetof(CallDescrD ASMCONSTANTS_C_ASSERT(CallDescrData__pRetBuffArg == offsetof(CallDescrData, pRetBuffArg)) ASMCONSTANTS_C_ASSERT(CallDescrData__returnValue == offsetof(CallDescrData, returnValue)) - -// Offset of the array containing the address of captured registers in MachState -#define MachState__captureX19_X29 0x0 -ASMCONSTANTS_C_ASSERT(MachState__captureX19_X29 == offsetof(MachState, captureX19_X29)) - -// Offset of the array containing the address of preserved registers in MachState -#define MachState__ptrX19_X29 0x58 -ASMCONSTANTS_C_ASSERT(MachState__ptrX19_X29 == offsetof(MachState, ptrX19_X29)) - -#define MachState__isValid 0xc0 -ASMCONSTANTS_C_ASSERT(MachState__isValid == offsetof(MachState, _isValid)) - -#define LazyMachState_captureX19_X29 MachState__captureX19_X29 -ASMCONSTANTS_C_ASSERT(LazyMachState_captureX19_X29 == offsetof(LazyMachState, captureX19_X29)) - -#ifdef __APPLE__ -#define LazyMachState_captureSp (MachState__isValid+8+88) // padding for alignment -#else // __APPLE__ -#define LazyMachState_captureSp (MachState__isValid+8) // padding for alignment -#endif // __APPLE -ASMCONSTANTS_C_ASSERT(LazyMachState_captureSp == offsetof(LazyMachState, captureSp)) - -#define LazyMachState_captureIp (LazyMachState_captureSp+8) -ASMCONSTANTS_C_ASSERT(LazyMachState_captureIp == offsetof(LazyMachState, captureIp)) - #define VASigCookie__pNDirectILStub 0x8 ASMCONSTANTS_C_ASSERT(VASigCookie__pNDirectILStub == offsetof(VASigCookie, pNDirectILStub)) @@ -113,25 +101,79 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); #endif ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); +#define OFFSETOF__DynamicHelperStubArgs__Constant1 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicHelperStubArgs__Constant1 + == offsetof(DynamicHelperStubArgs, Constant1)); + +#define OFFSETOF__DynamicHelperStubArgs__Constant2 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicHelperStubArgs__Constant2 + == offsetof(DynamicHelperStubArgs, Constant2)); + +#define OFFSETOF__DynamicHelperStubArgs__Helper 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicHelperStubArgs__Helper + == offsetof(DynamicHelperStubArgs, Helper)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__SecondIndir 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__SecondIndir + == offsetof(GenericDictionaryDynamicHelperStubData, SecondIndir)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__LastIndir 0x4 +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__LastIndir + == offsetof(GenericDictionaryDynamicHelperStubData, LastIndir)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__SizeOffset 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__SizeOffset + == offsetof(GenericDictionaryDynamicHelperStubData, SizeOffset)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__SlotOffset 0xc +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__SlotOffset + == offsetof(GenericDictionaryDynamicHelperStubData, SlotOffset)); + +#define OFFSETOF__GenericDictionaryDynamicHelperStubData__HandleArgs 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__GenericDictionaryDynamicHelperStubData__HandleArgs + == offsetof(GenericDictionaryDynamicHelperStubData, HandleArgs)); + +#ifdef FEATURE_INTERPRETER +#define OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo DBG_FRE(0x48, 0x20) +#else +#define OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo DBG_FRE(0x40, 0x18) +#endif // FEATURE_INTERPRETER +ASMCONSTANTS_C_ASSERT(OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo + == offsetof(InstantiatedMethodDesc, m_pPerInstInfo)); //========================================= -#define MethodTable__m_dwFlags 0x0 -ASMCONSTANTS_C_ASSERT(MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); +#define OFFSETOF__MethodTable__m_dwFlags 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); + +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); -#define MethodTable__m_BaseSize 0x04 -ASMCONSTANTS_C_ASSERT(MethodTable__m_BaseSize == offsetof(MethodTable, m_BaseSize)); +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); -#define MethodTable__m_ElementType DBG_FRE(0x38, 0x30) -ASMCONSTANTS_C_ASSERT(MethodTable__m_ElementType == offsetof(MethodTable, m_ElementTypeHnd)); +#define OFFSETOF__MethodTable__m_pPerInstInfo DBG_FRE(0x38, 0x30) +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pPerInstInfo + == offsetof(MethodTable, m_pPerInstInfo)); -#define ArrayBase__m_NumComponents 0x8 -ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); -#define PtrArray__m_Array 0x10 -ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array)); +#define OFFSETOF__Array__m_Length 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); #define TypeHandle_CanCast 0x1 // TypeHandle::CanCast +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); + +#define STRING_COMPONENT_SIZE 2 + +#define STRING_BASE_SIZE 0x16 +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); + +#define SZARRAY_BASE_SIZE 0x18 +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(DWORD)); + //========================================= @@ -173,12 +215,12 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FixupPrecode == sizeof(FixupPrecode)); ASMCONSTANTS_C_ASSERT(MethodDesc_ALIGNMENT_SHIFT == MethodDesc::ALIGNMENT_SHIFT); //ASMCONSTANTS_C_ASSERT((1<Fp); } +inline void SetFirstArgReg(T_CONTEXT *context, TADDR value) +{ + LIMITED_METHOD_DAC_CONTRACT; + SetReg(context, 0, value); +} + +inline TADDR GetFirstArgReg(T_CONTEXT *context) +{ + LIMITED_METHOD_DAC_CONTRACT; + return GetReg(context, 0); +} + +inline void SetSecondArgReg(T_CONTEXT *context, TADDR value) +{ + LIMITED_METHOD_DAC_CONTRACT; + SetReg(context, 1, value); +} + +inline TADDR GetSecondArgReg(T_CONTEXT *context) +{ + LIMITED_METHOD_DAC_CONTRACT; + return GetReg(context, 1); +} inline TADDR GetMem(PCODE address, SIZE_T size, bool signExtend) { @@ -448,7 +471,6 @@ class StubLinkerCPU : public StubLinker { private: - void EmitLoadStoreRegPairImm(DWORD flags, int regNum1, int regNum2, IntReg Xn, int offset, BOOL isVec); void EmitLoadStoreRegImm(DWORD flags, int regNum, IntReg Xn, int offset, BOOL isVec, int log2Size = 3); public: @@ -483,63 +505,22 @@ class StubLinkerCPU : public StubLinker void EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg); #endif // FEATURE_SHARE_GENERIC_CODE -#ifdef _DEBUG - void EmitNop() { Emit32(0xD503201F); } -#endif - void EmitBreakPoint() { Emit32(0xD43E0000); } void EmitMovConstant(IntReg target, UINT64 constant); - void EmitCmpImm(IntReg reg, int imm); - void EmitCmpReg(IntReg Xn, IntReg Xm); - void EmitCondFlagJump(CodeLabel * target, UINT cond); void EmitJumpRegister(IntReg regTarget); void EmitMovReg(IntReg dest, IntReg source); void EmitAddImm(IntReg Xd, IntReg Xn, unsigned int value); - void EmitLoadStoreRegPairImm(DWORD flags, IntReg Xt1, IntReg Xt2, IntReg Xn, int offset=0); - void EmitLoadStoreRegPairImm(DWORD flags, VecReg Vt1, VecReg Vt2, IntReg Xn, int offset=0); - void EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset=0, int log2Size = 3); void EmitLoadStoreRegImm(DWORD flags, VecReg Vt, IntReg Xn, int offset=0); - void EmitLoadRegReg(IntReg Xt, IntReg Xn, IntReg Xm, DWORD option); - - void EmitCallRegister(IntReg reg); - void EmitRet(IntReg reg); - - }; // preferred alignment for data #define DATA_ALIGNMENT 8 -struct DECLSPEC_ALIGN(16) UMEntryThunkCode -{ - DWORD m_code[4]; - - TADDR m_pTargetCode; - TADDR m_pvSecretParam; - - void Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam); - void Poison(); - - LPCBYTE GetEntryPoint() const - { - LIMITED_METHOD_CONTRACT; - - return (LPCBYTE)this; - } - - static int GetEntryPointOffset() - { - LIMITED_METHOD_CONTRACT; - - return 0; - } -}; - struct HijackArgs { DWORD64 X29; // frame pointer @@ -558,6 +539,12 @@ struct HijackArgs size_t ReturnValue[2]; }; union + { + DWORD64 X2; + size_t AsyncRet; + }; + DWORD64 Pad; + union { struct { NEON128 Q0; @@ -569,46 +556,4 @@ struct HijackArgs }; }; -// Precode to shuffle this and retbuf for closed delegates over static methods with return buffer -struct ThisPtrRetBufPrecode { - - static const int Type = 0x10; - - UINT32 m_rgCode[6]; - TADDR m_pTarget; - TADDR m_pMethodDesc; - - void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); - - TADDR GetMethodDesc() - { - LIMITED_METHOD_DAC_CONTRACT; - - return m_pMethodDesc; - } - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pTarget; - } - -#ifndef DACCESS_COMPILE - BOOL SetTargetInterlocked(TADDR target, TADDR expected) - { - CONTRACTL - { - THROWS; - GC_NOTRIGGER; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(ThisPtrRetBufPrecode)); - return (TADDR)InterlockedCompareExchange64( - (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; - } -#endif // !DACCESS_COMPILE -}; -typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; - #endif // __cgencpu_h__ diff --git a/src/coreclr/vm/arm64/gmscpu.h b/src/coreclr/vm/arm64/gmscpu.h deleted file mode 100644 index f33230702afc..000000000000 --- a/src/coreclr/vm/arm64/gmscpu.h +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/**************************************************************/ -/* gmscpu.h */ -/**************************************************************/ -/* HelperFrame is defines 'GET_STATE(machState)' macro, which - figures out what the state of the machine will be when the - current method returns. It then stores the state in the - JIT_machState structure. */ - -/**************************************************************/ - -#ifndef __gmscpu_h__ -#define __gmscpu_h__ - -#define __gmscpu_h__ - -// X19 - X29 -#define NUM_NONVOLATILE_CONTEXT_POINTERS 11 - -struct MachState { - ULONG64 captureX19_X29[NUM_NONVOLATILE_CONTEXT_POINTERS]; // preserved registers - PTR_ULONG64 ptrX19_X29[NUM_NONVOLATILE_CONTEXT_POINTERS]; // pointers to preserved registers - TADDR _pc; // program counter after the function returns - TADDR _sp; // stack pointer after the function returns - BOOL _isValid; -#ifdef __APPLE__ - // libunwind on macOS doesn't support context pointers and we cannot modify the captureX19_X29, - // so we store the unwound values in a separate array. - ULONG64 unwoundX19_X29[NUM_NONVOLATILE_CONTEXT_POINTERS]; // preserved registers -#endif // __APPLE__ - - BOOL isValid() { LIMITED_METHOD_DAC_CONTRACT; return _isValid; } - TADDR GetRetAddr() { LIMITED_METHOD_DAC_CONTRACT; return _pc; } -}; - -struct LazyMachState : public MachState{ - - TADDR captureSp; // Stack pointer at the time of capture - TADDR captureIp; // Instruction pointer at the time of capture - - void setLazyStateFromUnwind(MachState* copy); - static void unwindLazyState(LazyMachState* baseState, - MachState* lazyState, - DWORD threadId, - int funCallDepth = 1); -}; - -inline void LazyMachState::setLazyStateFromUnwind(MachState* copy) -{ -#if defined(DACCESS_COMPILE) - // This function cannot be called in DAC because DAC cannot update target memory. - DacError(E_FAIL); - return; - -#else // !DACCESS_COMPILE - - _sp = copy->_sp; - _pc = copy->_pc; - -#ifdef __APPLE__ - memcpy(unwoundX19_X29, copy->unwoundX19_X29, sizeof(unwoundX19_X29)); -#endif // __APPLE__ - - // Capture* has already been set, so there is no need to touch it - - // loop over the nonvolatile context pointers and make - // sure to properly copy interior pointers into the - // new struct - - PULONG64* pSrc = (PULONG64 *)©->ptrX19_X29; - PULONG64* pDst = (PULONG64 *)&this->ptrX19_X29; - - const PULONG64 LowerBoundDst = (PULONG64) this; - const PULONG64 LowerBoundSrc = (PULONG64) copy; - - const PULONG64 UpperBoundSrc = (PULONG64) ((BYTE*)LowerBoundSrc + sizeof(*copy)); - - for (int i = 0; i < NUM_NONVOLATILE_CONTEXT_POINTERS; i++) - { - PULONG64 valueSrc = *pSrc++; - - if ((LowerBoundSrc <= valueSrc) && (valueSrc < UpperBoundSrc)) - { - // make any pointer interior to 'src' interior to 'dst' - valueSrc = (PULONG64)((BYTE*)valueSrc - (BYTE*)LowerBoundSrc + (BYTE*)LowerBoundDst); - } - - *pDst++ = valueSrc; - } - - - // this has to be last because we depend on write ordering to - // synchronize the race implicit in updating this struct - VolatileStore(&_isValid, TRUE); -#endif // DACCESS_COMPILE -} - -// Do the initial capture of the machine state. This is meant to be -// as light weight as possible, as we may never need the state that -// we capture. -EXTERN_C void LazyMachStateCaptureState(struct LazyMachState *pState); - -#define CAPTURE_STATE(machState, ret) \ - LazyMachStateCaptureState(machState) - - -#endif diff --git a/src/coreclr/vm/arm64/patchedcode.S b/src/coreclr/vm/arm64/patchedcode.S index 0e223cbc1d33..2bea90942e66 100644 --- a/src/coreclr/vm/arm64/patchedcode.S +++ b/src/coreclr/vm/arm64/patchedcode.S @@ -3,6 +3,7 @@ #include "asmconstants.h" #include "unixasmmacros.inc" +#include "patchedcodeconstants.h" //----------------------------------------------------------------------------- // The following Macros help in WRITE_BARRIER Implementations @@ -85,6 +86,10 @@ WRITE_BARRIER_END JIT_CheckedWriteBarrier //----------------------------------------------------------------------------- // void JIT_WriteBarrier(Object** dst, Object* src) +// +// Empty function which at runtime is patched with one of the JIT_WriteBarrier_ +// functions below. +// // On entry: // x14 : the destination address (LHS of the assignment) // x15 : the object reference (RHS of the assignment) @@ -99,25 +104,82 @@ WRITE_BARRIER_END JIT_CheckedWriteBarrier // if you add more trashed registers. // WRITE_BARRIER_ENTRY JIT_WriteBarrier - stlr x15, [x14] + // This must be greater than the largest JIT_WriteBarrier_ function. + .space JIT_WriteBarrier_Size, 0 +WRITE_BARRIER_END JIT_WriteBarrier +//----------------------------------------------------------------------------- +// JIT_WriteBarrier_Table +// +// Patchable literal pool +// + .balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line +WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table +PATCH_LABEL JIT_WriteBarrier_Patch_Label_CardTable + .quad 0 +PATCH_LABEL JIT_WriteBarrier_Patch_Label_CardBundleTable + .quad 0 +PATCH_LABEL JIT_WriteBarrier_Patch_Label_WriteWatchTable + .quad 0 +PATCH_LABEL JIT_WriteBarrier_Patch_Label_Lower + .quad 0 +PATCH_LABEL JIT_WriteBarrier_Patch_Label_Upper + .quad 0 +LOCAL_LABEL(wbs_lowest_address): +PATCH_LABEL JIT_WriteBarrier_Patch_Label_LowestAddress + .quad 0 +LOCAL_LABEL(wbs_highest_address): +PATCH_LABEL JIT_WriteBarrier_Patch_Label_HighestAddress + .quad 0 +PATCH_LABEL JIT_WriteBarrier_Patch_Label_RegionToGeneration + .quad 0 +PATCH_LABEL JIT_WriteBarrier_Patch_Label_RegionShr + .quad 0 #ifdef WRITE_BARRIER_CHECK +PATCH_LABEL JIT_WriteBarrier_Patch_Label_GCShadow + .quad 0 +PATCH_LABEL JIT_WriteBarrier_Patch_Label_GCShadowEnd + .quad 0 +#endif +WRITE_BARRIER_END JIT_WriteBarrier_Table + +// ------------------------------------------------------------------ +// End of the writeable code region +LEAF_ENTRY JIT_PatchedCodeLast, _TEXT + ret lr +LEAF_END JIT_PatchedCodeLast, _TEXT + + + +//----------------------------------------------------------------------------- +// The following Macros are used by the different JIT_WriteBarrier_ functions. +// +// + +.macro WRITE_BARRIER_ENTRY_STUB start +FIXUP_LABEL(\start): + stlr x15, [x14] +.endm + + +.macro WRITE_BARRIER_SHADOW_UPDATE_STUB start + #ifdef WRITE_BARRIER_CHECK // Update GC Shadow Heap // Do not perform the work if g_GCShadow is 0 - ldr x12, LOCAL_LABEL(wbs_GCShadow) - cbz x12, LOCAL_LABEL(ShadowUpdateEnd) + ldr x12, JIT_WriteBarrier_Offset_GCShadow + FIXUP_LABEL(\start) + cbz x12, LOCAL_LABEL(ShadowUpdateEnd\@) // Compute address of shadow heap location: // pShadow = g_GCShadow + (x14 - g_lowest_address) - ldr x17, LOCAL_LABEL(wbs_lowest_address) + ldr x17, JIT_WriteBarrier_Offset_LowestAddress + FIXUP_LABEL(\start) sub x17, x14, x17 add x12, x17, x12 // if (pShadow >= g_GCShadowEnd) goto end - ldr x17, LOCAL_LABEL(wbs_GCShadowEnd) + ldr x17, JIT_WriteBarrier_Offset_GCShadowEnd + FIXUP_LABEL(\start) cmp x12, x17 - bhs LOCAL_LABEL(ShadowUpdateEnd) + bhs LOCAL_LABEL(ShadowUpdateEnd\@) // *pShadow = x15 str x15, [x12] @@ -129,96 +191,305 @@ WRITE_BARRIER_ENTRY JIT_WriteBarrier // if ([x14] == x15) goto end ldr x17, [x14] cmp x17, x15 - beq LOCAL_LABEL(ShadowUpdateEnd) + beq LOCAL_LABEL(ShadowUpdateEnd\@) // *pShadow = INVALIDGCVALUE (0xcccccccd) movz x17, #0xcccd movk x17, #0xcccc, LSL #16 str x17, [x12] - -LOCAL_LABEL(ShadowUpdateEnd): #endif +LOCAL_LABEL(ShadowUpdateEnd\@): +.endm + +.macro WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB start, exit #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // Update the write watch table if necessary - ldr x12, LOCAL_LABEL(wbs_sw_ww_table) - cbz x12, LOCAL_LABEL(CheckCardTable) + ldr x12, JIT_WriteBarrier_Offset_WriteWatchTable + FIXUP_LABEL(\start) add x12, x12, x14, lsr #0xc // SoftwareWriteWatch::AddressToTableByteIndexShift ldrb w17, [x12] - cbnz x17, LOCAL_LABEL(CheckCardTable) + cbnz x17, LOCAL_LABEL(WriteWatchForGCHeapEnd\@) mov w17, #0xFF strb w17, [x12] +LOCAL_LABEL(WriteWatchForGCHeapEnd\@): #endif +.endm -LOCAL_LABEL(CheckCardTable): - // Branch to Exit if the reference is not in the Gen0 heap - ldr x12, LOCAL_LABEL(wbs_ephemeral_low) - ldr x17, LOCAL_LABEL(wbs_ephemeral_high) + +.macro WRITE_BARRIER_CHECK_EPHEMERAL_LOW_STUB start, exit + // Branch to Exit if the reference is not in the ephemeral generations. + ldr x12, JIT_WriteBarrier_Offset_Lower + FIXUP_LABEL(\start) + cmp x15, x12 + blo LOCAL_LABEL(\exit) +.endm + + +.macro WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB start, exit + // Branch to Exit if the reference is not in the ephemeral generations. + ldr x12, JIT_WriteBarrier_Offset_Lower + FIXUP_LABEL(\start) + ldr x17, JIT_WriteBarrier_Offset_Upper + FIXUP_LABEL(\start) cmp x15, x12 ccmp x15, x17, #0x2, hs - bhs LOCAL_LABEL(Exit) + bhs LOCAL_LABEL(\exit) +.endm + + +.macro WRITE_BARRIER_REGION_CHECK_STUB start, exit + // Calculate region generations + ldr x17, JIT_WriteBarrier_Offset_RegionToGeneration + FIXUP_LABEL(\start) + ldr w12, JIT_WriteBarrier_Offset_RegionShr + FIXUP_LABEL(\start) + lsr x15, x15, x12 + add x15, x15, x17 // x15 = (RHS >> wbs_region_shr) + wbs_region_to_generation_table + lsr x12, x14, x12 + add x12, x12, x17 // x12 = (LHS >> wbs_region_shr) + wbs_region_to_generation_table + // Check whether the region we are storing into is gen 0 - nothing to do in this case + ldrb w12, [x12] + cbz w12, LOCAL_LABEL(\exit) + + // Return if the new reference is not from old to young + ldrb w15, [x15] + cmp w15, w12 + bhs LOCAL_LABEL(\exit) +.endm + + +.macro WRITE_BARRIER_CHECK_BIT_REGIONS_CARD_TABLE_STUB start, exit // Check if we need to update the card table - ldr x12, LOCAL_LABEL(wbs_card_table) + lsr w17, w14, 8 + and w17, w17, 7 + movz w15, 1 + lsl w17, w15, w17 // w17 = 1 << (LHS >> 8 && 7) + ldr x12, JIT_WriteBarrier_Offset_CardTable + FIXUP_LABEL(\start) add x15, x12, x14, lsr #11 - ldrb w12, [x15] + ldrb w12, [x15] // w12 = [(LHS >> 11) + g_card_table] + tst w12, w17 + bne LOCAL_LABEL(\exit) + + // Atomically update the card table + // Requires LSE, but the code is only compiled for 8.0 + .word 0x383131FF // stsetb w17, [x15] +.endm + + +.macro WRITE_BARRIER_CHECK_CARD_TABLE_STUB start, exit + // Check if we need to update the card table + ldr x12, JIT_WriteBarrier_Offset_CardTable + FIXUP_LABEL(\start) + add x15, x12, x14, lsr #11 + ldrb w12, [x15] // w12 = [(RHS >> 11) + g_card_table] cmp x12, 0xFF - beq LOCAL_LABEL(Exit) + beq LOCAL_LABEL(\exit) // Update the card table mov x12, 0xFF strb w12, [x15] +.endm + +.macro WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB start, exit #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Check if we need to update the card bundle table - ldr x12, LOCAL_LABEL(wbs_card_bundle_table) + ldr x12, JIT_WriteBarrier_Offset_CardBundleTable + FIXUP_LABEL(\start) add x15, x12, x14, lsr #21 ldrb w12, [x15] cmp x12, 0xFF - beq LOCAL_LABEL(Exit) + beq LOCAL_LABEL(\exit) // Update the card bundle mov x12, 0xFF strb w12, [x15] #endif +.endm + -LOCAL_LABEL(Exit): +.macro WRITE_BARRIER_RETURN_STUB exit +LOCAL_LABEL(\exit): // Increment by 8 to implement JIT_ByRefWriteBarrier contract. // TODO: Consider duplicating the logic to get rid of this redundant 'add' // for JIT_WriteBarrier/JIT_CheckedWriteBarrier add x14, x14, 8 ret lr -WRITE_BARRIER_END JIT_WriteBarrier +.endm - // Begin patchable literal pool - .balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line -WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table -LOCAL_LABEL(wbs_begin): -LOCAL_LABEL(wbs_card_table): - .quad 0 -LOCAL_LABEL(wbs_card_bundle_table): - .quad 0 -LOCAL_LABEL(wbs_sw_ww_table): - .quad 0 -LOCAL_LABEL(wbs_ephemeral_low): - .quad 0 -LOCAL_LABEL(wbs_ephemeral_high): - .quad 0 -LOCAL_LABEL(wbs_lowest_address): - .quad 0 -LOCAL_LABEL(wbs_highest_address): - .quad 0 -#ifdef WRITE_BARRIER_CHECK -LOCAL_LABEL(wbs_GCShadow): - .quad 0 -LOCAL_LABEL(wbs_GCShadowEnd): - .quad 0 -#endif -WRITE_BARRIER_END JIT_WriteBarrier_Table +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_PreGrow64(Object** dst, Object* src) +// +// Skipped functionality: +// Does not update the write watch table +// Does not check wbs_ephemeral_high +// No region checks +// +LEAF_ENTRY JIT_WriteBarrier_PreGrow64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_PreGrow64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_PreGrow64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_STUB Start_PreGrow64, Exit_PreGrow64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Start_PreGrow64, Exit_PreGrow64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_PreGrow64, Exit_PreGrow64 + WRITE_BARRIER_RETURN_STUB Exit_PreGrow64 +LEAF_END_MARKED JIT_WriteBarrier_PreGrow64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_PostGrow64(Object** dst, Object* src) +// +// Skipped functionality: +// Does not update the write watch table +// No region checks +// +LEAF_ENTRY JIT_WriteBarrier_PostGrow64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_PostGrow64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_PostGrow64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB Start_PostGrow64, Exit_PostGrow64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Start_PostGrow64, Exit_PostGrow64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_PostGrow64, Exit_PostGrow64 + WRITE_BARRIER_RETURN_STUB Exit_PostGrow64 +LEAF_END_MARKED JIT_WriteBarrier_PostGrow64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_SVR64(Object** dst, Object* src) +// +// SVR GC has multiple heaps, so it cannot provide one single ephemeral region to bounds check +// against, so we just skip the bounds checking all together and do our card table update unconditionally. +// +// Skipped functionality: +// Does not update the write watch table +// Does not check wbs_ephemeral_high or wbs_ephemeral_low +// No region checks +// +LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_SVR64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_SVR64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Start_SVR64, Exit_SVR64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_SVR64, Exit_SVR64 + WRITE_BARRIER_RETURN_STUB Exit_SVR64 +LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_Byte_Region64(Object** dst, Object* src) +// +// Skipped functionality: +// Does not update the write watch table +// Bitwise updates for region checks +// +LEAF_ENTRY JIT_WriteBarrier_Byte_Region64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_Byte_Region64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_Byte_Region64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB Start_Byte_Region64, Exit_Byte_Region64 + WRITE_BARRIER_REGION_CHECK_STUB Start_Byte_Region64, Exit_Byte_Region64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Start_Byte_Region64, Exit_Byte_Region64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_Byte_Region64, Exit_Byte_Region64 + WRITE_BARRIER_RETURN_STUB Exit_Byte_Region64 +LEAF_END_MARKED JIT_WriteBarrier_Byte_Region64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_Bit_Region64(Object** dst, Object* src) +// +// Skipped functionality: +// Does not update the write watch table +// Does not call check card table stub +// +LEAF_ENTRY JIT_WriteBarrier_Bit_Region64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_Bit_Region64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_Bit_Region64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB Start_Bit_Region64, Exit_Bit_Region64 + WRITE_BARRIER_REGION_CHECK_STUB Start_Bit_Region64, Exit_Bit_Region64 + WRITE_BARRIER_CHECK_BIT_REGIONS_CARD_TABLE_STUB Start_Bit_Region64, Exit_Bit_Region64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_Bit_Region64, Exit_Bit_Region64 + WRITE_BARRIER_RETURN_STUB Exit_Bit_Region64 +LEAF_END_MARKED JIT_WriteBarrier_Bit_Region64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_WriteWatch_PreGrow64(Object** dst, Object* src) +// +// Skipped functionality: +// Does not check wbs_ephemeral_high +// No region checks +// +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_WriteWatch_PreGrow64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_WriteWatch_PreGrow64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB Start_WriteWatch_PreGrow64, Exit_WriteWatch_PreGrow64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_STUB Start_WriteWatch_PreGrow64, Exit_WriteWatch_PreGrow64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Start_WriteWatch_PreGrow64, Exit_WriteWatch_PreGrow64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_WriteWatch_PreGrow64, Exit_WriteWatch_PreGrow64 + WRITE_BARRIER_RETURN_STUB Exit_WriteWatch_PreGrow64 +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_WriteWatch_PostGrow64(Object** dst, Object* src) +// +// Skipped functionality: +// No region checks +// +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_WriteWatch_PostGrow64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_WriteWatch_PostGrow64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB Start_WriteWatch_PostGrow64, Exit_WriteWatch_PostGrow64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB Start_WriteWatch_PostGrow64, Exit_WriteWatch_PostGrow64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Start_WriteWatch_PostGrow64, Exit_WriteWatch_PostGrow64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_WriteWatch_PostGrow64, Exit_WriteWatch_PostGrow64 + WRITE_BARRIER_RETURN_STUB Exit_WriteWatch_PostGrow64 +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_WriteWatch_SVR64(Object** dst, Object* src) +// +// SVR GC has multiple heaps, so it cannot provide one single ephemeral region to bounds check +// against, so we just skip the bounds checking all together and do our card table update unconditionally. +// +// Skipped functionality: +// Does not check wbs_ephemeral_high or wbs_ephemeral_low +// No region checks +// +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_SVR64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_WriteWatch_SVR64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_WriteWatch_SVR64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB Start_WriteWatch_SVR64, Exit_WriteWatch_SVR64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Start_WriteWatch_SVR64, Exit_WriteWatch_SVR64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_WriteWatch_SVR64, Exit_WriteWatch_SVR64 + WRITE_BARRIER_RETURN_STUB Exit_WriteWatch_SVR64 +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_WriteWatch_Byte_Region64(Object** dst, Object* src) +// +// Skipped functionality: +// Bitwise updates for region checks +// +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Byte_Region64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_WriteWatch_Byte_Region64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_WriteWatch_Byte_Region64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB Start_WriteWatch_Byte_Region64, Exit_WriteWatch_Byte_Region64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB Start_WriteWatch_Byte_Region64, Exit_WriteWatch_Byte_Region64 + WRITE_BARRIER_REGION_CHECK_STUB Start_WriteWatch_Byte_Region64, Exit_WriteWatch_Byte_Region64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Start_WriteWatch_Byte_Region64, Exit_WriteWatch_Byte_Region64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_WriteWatch_Byte_Region64, Exit_WriteWatch_Byte_Region64 + WRITE_BARRIER_RETURN_STUB Exit_WriteWatch_Byte_Region64 +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Byte_Region64, _TEXT + + +//----------------------------------------------------------------------------- +// void JIT_WriteBarrier_WriteWatch_Bit_Region64(Object** dst, Object* src) +// +// Skipped functionality: +// Does not call check card table stub +// +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Bit_Region64, _TEXT + WRITE_BARRIER_ENTRY_STUB Start_WriteWatch_Bit_Region64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Start_WriteWatch_Bit_Region64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB Start_WriteWatch_Bit_Region64, Exit_WriteWatch_Bit_Region64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB Start_WriteWatch_Bit_Region64, Exit_WriteWatch_Bit_Region64 + WRITE_BARRIER_REGION_CHECK_STUB Start_WriteWatch_Bit_Region64, Exit_WriteWatch_Bit_Region64 + WRITE_BARRIER_CHECK_BIT_REGIONS_CARD_TABLE_STUB Start_WriteWatch_Bit_Region64, Exit_WriteWatch_Bit_Region64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Start_WriteWatch_Bit_Region64, Exit_WriteWatch_Bit_Region64 + WRITE_BARRIER_RETURN_STUB Exit_WriteWatch_Bit_Region64 +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Bit_Region64, _TEXT -// ------------------------------------------------------------------ -// End of the writeable code region -LEAF_ENTRY JIT_PatchedCodeLast, _TEXT - ret lr -LEAF_END JIT_PatchedCodeLast, _TEXT diff --git a/src/coreclr/vm/arm64/patchedcode.asm b/src/coreclr/vm/arm64/patchedcode.asm index 454b8cac0c4a..500f1044d488 100644 --- a/src/coreclr/vm/arm64/patchedcode.asm +++ b/src/coreclr/vm/arm64/patchedcode.asm @@ -4,6 +4,7 @@ #include "ksarm64.h" #include "asmconstants.h" #include "asmmacros.h" +#include "patchedcodeconstants.h" ;;like TEXTAREA, but with 64 byte alignment so that we can align the patchable pool below to 64 without warning AREA |.text|,ALIGN=6,CODE,READONLY @@ -38,32 +39,6 @@ ret lr LEAF_END - ; Begin patchable literal pool - ALIGN 64 ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line - WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table -wbs_begin -wbs_card_table - DCQ 0 -wbs_card_bundle_table - DCQ 0 -wbs_sw_ww_table - DCQ 0 -wbs_ephemeral_low - DCQ 0 -wbs_ephemeral_high - DCQ 0 -wbs_lowest_address - DCQ 0 -wbs_highest_address - DCQ 0 -#ifdef WRITE_BARRIER_CHECK -wbs_GCShadow - DCQ 0 -wbs_GCShadowEnd - DCQ 0 -#endif - WRITE_BARRIER_END JIT_WriteBarrier_Table - ;----------------------------------------------------------------------------- ; void JIT_ByRefWriteBarrier ; On entry: @@ -117,6 +92,9 @@ NotInHeap ;----------------------------------------------------------------------------- ; void JIT_WriteBarrier(Object** dst, Object* src) +; +; Empty function which at runtime is patched with one of the JIT_WriteBarrier_ +; functions below. ; On entry: ; x14 : the destination address (LHS of the assignment) ; x15 : the object reference (RHS of the assignment) @@ -131,107 +109,395 @@ NotInHeap ; if you add more trashed registers. ; WRITE_BARRIER_ENTRY JIT_WriteBarrier - stlr x15, [x14] +; This must be greater than the largest JIT_WriteBarrier_ function. + space (232*4), 0 + WRITE_BARRIER_END JIT_WriteBarrier + ; Begin patchable literal pool + ALIGN 64 ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line + WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table + PATCH_LABEL JIT_WriteBarrier_Patch_Label_CardTable + DCQ 0 + PATCH_LABEL JIT_WriteBarrier_Patch_Label_CardBundleTable + DCQ 0 + PATCH_LABEL JIT_WriteBarrier_Patch_Label_WriteWatchTable + DCQ 0 + PATCH_LABEL JIT_WriteBarrier_Patch_Label_Lower + DCQ 0 + PATCH_LABEL JIT_WriteBarrier_Patch_Label_Upper + DCQ 0 +wbs_lowest_address + PATCH_LABEL JIT_WriteBarrier_Patch_Label_LowestAddress + DCQ 0 +wbs_highest_address + PATCH_LABEL JIT_WriteBarrier_Patch_Label_HighestAddress + DCQ 0 + PATCH_LABEL JIT_WriteBarrier_Patch_Label_RegionToGeneration + DCQ 0 + PATCH_LABEL JIT_WriteBarrier_Patch_Label_RegionShr + DCQ 0 #ifdef WRITE_BARRIER_CHECK + PATCH_LABEL JIT_WriteBarrier_Patch_Label_GCShadow + DCQ 0 + PATCH_LABEL JIT_WriteBarrier_Patch_Label_GCShadowEnd + DCQ 0 +#endif + WRITE_BARRIER_END JIT_WriteBarrier_Table + +; ------------------------------------------------------------------ +; End of the writeable code region + LEAF_ENTRY JIT_PatchedCodeLast + ret lr + LEAF_END + + + +;----------------------------------------------------------------------------- +; The following Macros are used by the different JIT_WriteBarrier_ functions. +; +; + + MACRO + WRITE_BARRIER_ENTRY_STUB $name +start$name + stlr x15, [x14] + MEND + + + MACRO + WRITE_BARRIER_SHADOW_UPDATE_STUB $name + #ifdef WRITE_BARRIER_CHECK ; Update GC Shadow Heap ; Do not perform the work if g_GCShadow is 0 - ldr x12, wbs_GCShadow - cbz x12, ShadowUpdateEnd + ldr x12, JIT_WriteBarrier_Offset_GCShadow + start$name + cbz x12, ShadowUpdateEnd$name ; Compute address of shadow heap location: - ; pShadow = $g_GCShadow + (x14 - g_lowest_address) - ldr x17, wbs_lowest_address - sub x17, x14, x17 - add x12, x17, x12 + ; pShadow = g_GCShadow + (x14 - g_lowest_address) + ldr x17, JIT_WriteBarrier_Offset_LowestAddress + start$name + sub x17, x14, x17 + add x12, x17, x12 - ; if (pShadow >= $g_GCShadowEnd) goto end - ldr x17, wbs_GCShadowEnd - cmp x12, x17 - bhs ShadowUpdateEnd + ; if (pShadow >= g_GCShadowEnd) goto end + ldr x17, JIT_WriteBarrier_Offset_GCShadowEnd + start$name + cmp x12, x17 + bhs ShadowUpdateEnd$name ; *pShadow = x15 - str x15, [x12] + str x15, [x12] ; Ensure that the write to the shadow heap occurs before the read from the GC heap so that race ; conditions are caught by INVALIDGCVALUE. - dmb ish + dmb ish ; if ([x14] == x15) goto end - ldr x17, [x14] - cmp x17, x15 - beq ShadowUpdateEnd + ldr x17, [x14] + cmp x17, x15 + beq ShadowUpdateEnd$name ; *pShadow = INVALIDGCVALUE (0xcccccccd) - movz x17, #0xcccd - movk x17, #0xcccc, LSL #16 - str x17, [x12] - -ShadowUpdateEnd -#endif + movz x17, #0xcccd + movk x17, #0xcccc, LSL #16 + str x17, [x12] + #endif +ShadowUpdateEnd$name + MEND -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + MACRO + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB $name + #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP ; Update the write watch table if necessary - ldr x12, wbs_sw_ww_table - cbz x12, CheckCardTable - add x12, x12, x14, LSR #0xC // SoftwareWriteWatch::AddressToTableByteIndexShift - ldrb w17, [x12] - cbnz x17, CheckCardTable - mov w17, 0xFF - strb w17, [x12] -#endif + ldr x12, JIT_WriteBarrier_Offset_WriteWatchTable + start$name + ; SoftwareWriteWatch::AddressToTableByteIndexShift + add x12, x12, x14, lsr #0xc + ldrb w17, [x12] + cbnz x17, WriteWatchForGCHeapEnd$name + mov w17, #0xFF + strb w17, [x12] +WriteWatchForGCHeapEnd$name + #endif + MEND -CheckCardTable - ; Branch to Exit if the reference is not in the Gen0 heap - ldr x12, wbs_ephemeral_low - ldr x17, wbs_ephemeral_high - cmp x15, x12 - ccmp x15, x17, #0x2, hs - bhs Exit + MACRO + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_STUB $name + ; Branch to Exit if the reference is not in the ephemeral generations. + ldr x12, JIT_WriteBarrier_Offset_Lower + start$name + cmp x15, x12 + blo exit$name + MEND - ; Check if we need to update the card table - ldr x12, wbs_card_table + MACRO + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB $name + ; Branch to Exit if the reference is not in the ephemeral generations. + ldr x12, JIT_WriteBarrier_Offset_Lower + start$name + ldr x17, JIT_WriteBarrier_Offset_Upper + start$name + cmp x15, x12 + ccmp x15, x17, #0x2, hs + bhs exit$name + MEND + + MACRO + WRITE_BARRIER_REGION_CHECK_STUB $name + ; Calculate region generations + ldr x17, JIT_WriteBarrier_Offset_RegionToGeneration + start$name + ldr w12, JIT_WriteBarrier_Offset_RegionShr + start$name + lsr x15, x15, x12 + add x15, x15, x17 ; x15 = (RHS >> wbs_region_shr) + wbs_region_to_generation_table + lsr x12, x14, x12 + add x12, x12, x17 ; x12 = (LHS >> wbs_region_shr) + wbs_region_to_generation_table + + ; Check whether the region we are storing into is gen 0 - nothing to do in this case + ldrb w12, [x12] + cbz w12, exit$name + + ; Return if the new reference is not from old to young + ldrb w15, [x15] + cmp w15, w12 + bhs exit$name + MEND - ; x15 := pointer into card table - add x15, x12, x14, lsr #11 + MACRO + WRITE_BARRIER_CHECK_BIT_REGIONS_CARD_TABLE_STUB $name + ; Check if we need to update the card table + lsr w17, w14, 8 + and w17, w17, 7 + movz w15, 1 + lsl w17, w15, w17 ; w17 = 1 << (LHS >> 8 && 7) + ldr x12, JIT_WriteBarrier_Offset_CardTable + start$name + add x15, x12, x14, lsr #11 + ldrb w12, [x15] ; w12 = [(LHS >> 11) + g_card_table] + tst w12, w17 + bne exit$name + + ; Atomically update the card table + ; Requires LSE, but the code is only compiled for 8.0 + ; stsetb w17, [x15] + DCD 0x383131FF + MEND - ldrb w12, [x15] - cmp x12, 0xFF - beq Exit + MACRO + WRITE_BARRIER_CHECK_CARD_TABLE_STUB $name + ; Check if we need to update the card table + ldr x12, JIT_WriteBarrier_Offset_CardTable + start$name + add x15, x12, x14, lsr #11 + ; w12 = [(RHS >> 11) + g_card_table] + ldrb w12, [x15] + cmp x12, 0xFF + beq exit$name ; Update the card table - mov x12, 0xFF - strb w12, [x15] + mov x12, 0xFF + strb w12, [x15] + MEND -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + MACRO + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB $name + #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES ; Check if we need to update the card bundle table - ldr x12, wbs_card_bundle_table - - ; x15 := pointer into card bundle table - add x15, x12, x14, lsr #21 - - ldrb w12, [x15] - cmp x12, 0xFF - beq Exit - - mov x12, 0xFF - strb w12, [x15] -#endif + ldr x12, JIT_WriteBarrier_Offset_CardBundleTable + start$name + add x15, x12, x14, lsr #21 + ldrb w12, [x15] + cmp x12, 0xFF + beq exit$name + + ; Update the card bundle + mov x12, 0xFF + strb w12, [x15] + #endif + MEND -Exit + MACRO + WRITE_BARRIER_RETURN_STUB $name +exit$name ; Increment by 8 to implement JIT_ByRefWriteBarrier contract. ; TODO: Consider duplicating the logic to get rid of this redundant 'add' ; for JIT_WriteBarrier/JIT_CheckedWriteBarrier - add x14, x14, 8 - ret lr - WRITE_BARRIER_END JIT_WriteBarrier + add x14, x14, 8 + ret lr + MEND + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_PreGrow64(Object** dst, Object* src) + ; + ; Skipped functionality: + ; Does not update the write watch table + ; Does not check wbs_ephemeral_high + ; No region checks + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_PreGrow64 + WRITE_BARRIER_ENTRY_STUB PreGrow64 + WRITE_BARRIER_SHADOW_UPDATE_STUB PreGrow64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_STUB PreGrow64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB PreGrow64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB PreGrow64 + WRITE_BARRIER_RETURN_STUB PreGrow64 + WRITE_BARRIER_END JIT_WriteBarrier_PreGrow64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_PostGrow64(Object** dst, Object* src) + ; + ; Skipped functionality: + ; Does not update the write watch table + ; No region checks + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_PostGrow64 + WRITE_BARRIER_ENTRY_STUB PostGrow64 + WRITE_BARRIER_SHADOW_UPDATE_STUB PostGrow64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB PostGrow64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB PostGrow64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB PostGrow64 + WRITE_BARRIER_RETURN_STUB PostGrow64 + WRITE_BARRIER_END JIT_WriteBarrier_PostGrow64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_SVR64(Object** dst, Object* src) + ; + ; SVR GC has multiple heaps, so it cannot provide one single ephemeral region to bounds check + ; against, so we just skip the bounds checking all together and do our card table update unconditionally. + ; + ; Skipped functionality: + ; Does not update the write watch table + ; Does not check wbs_ephemeral_high or wbs_ephemeral_low + ; No region checks + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_SVR64 + WRITE_BARRIER_ENTRY_STUB SVR64 + WRITE_BARRIER_SHADOW_UPDATE_STUB SVR64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB SVR64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB SVR64 + WRITE_BARRIER_RETURN_STUB SVR64 + WRITE_BARRIER_END JIT_WriteBarrier_SVR64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_Byte_Region64(Object** dst, Object* src) + ; + ; Skipped functionality: + ; Does not update the write watch table + ; Bitwise updates for region checks + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_Byte_Region64 + WRITE_BARRIER_ENTRY_STUB Byte_Region64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Byte_Region64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB Byte_Region64 + WRITE_BARRIER_REGION_CHECK_STUB Byte_Region64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB Byte_Region64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Byte_Region64 + WRITE_BARRIER_RETURN_STUB Byte_Region64 + WRITE_BARRIER_END JIT_WriteBarrier_Byte_Region64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_Bit_Region64(Object** dst, Object* src) + ; + ; Skipped functionality: + ; Does not update the write watch table + ; Does not call check card table stub + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_Bit_Region64 + WRITE_BARRIER_ENTRY_STUB Bit_Region64 + WRITE_BARRIER_SHADOW_UPDATE_STUB Bit_Region64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB Bit_Region64 + WRITE_BARRIER_REGION_CHECK_STUB Bit_Region64 + WRITE_BARRIER_CHECK_BIT_REGIONS_CARD_TABLE_STUB Bit_Region64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB Bit_Region64 + WRITE_BARRIER_RETURN_STUB Bit_Region64 + WRITE_BARRIER_END JIT_WriteBarrier_Bit_Region64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_WriteWatch_PreGrow64(Object** dst, Object* src) + ; + ; Skipped functionality: + ; Does not check wbs_ephemeral_high + ; No region checks + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_WriteWatch_PreGrow64 + WRITE_BARRIER_ENTRY_STUB WriteWatch_PreGrow64 + WRITE_BARRIER_SHADOW_UPDATE_STUB WriteWatch_PreGrow64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB WriteWatch_PreGrow64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_STUB WriteWatch_PreGrow64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB WriteWatch_PreGrow64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB WriteWatch_PreGrow64 + WRITE_BARRIER_RETURN_STUB WriteWatch_PreGrow64 + WRITE_BARRIER_END JIT_WriteBarrier_WriteWatch_PreGrow64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_WriteWatch_PostGrow64(Object** dst, Object* src) + ; + ; Skipped functionality: + ; No region checks + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_WriteWatch_PostGrow64 + WRITE_BARRIER_ENTRY_STUB WriteWatch_PostGrow64 + WRITE_BARRIER_SHADOW_UPDATE_STUB WriteWatch_PostGrow64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB WriteWatch_PostGrow64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB WriteWatch_PostGrow64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB WriteWatch_PostGrow64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB WriteWatch_PostGrow64 + WRITE_BARRIER_RETURN_STUB WriteWatch_PostGrow64 + WRITE_BARRIER_END JIT_WriteBarrier_WriteWatch_PostGrow64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_WriteWatch_SVR64(Object** dst, Object* src) + ; + ; SVR GC has multiple heaps, so it cannot provide one single ephemeral region to bounds check + ; against, so we just skip the bounds checking all together and do our card table update unconditionally. + ; + ; Skipped functionality: + ; Does not check wbs_ephemeral_high or wbs_ephemeral_low + ; No region checks + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_WriteWatch_SVR64 + WRITE_BARRIER_ENTRY_STUB WriteWatch_SVR64 + WRITE_BARRIER_SHADOW_UPDATE_STUB WriteWatch_SVR64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB WriteWatch_SVR64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB WriteWatch_SVR64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB WriteWatch_SVR64 + WRITE_BARRIER_RETURN_STUB WriteWatch_SVR64 + WRITE_BARRIER_END JIT_WriteBarrier_WriteWatch_SVR64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_WriteWatch_Byte_Region64(Object** dst, Object* src) + ; + ; Skipped functionality: + ; Bitwise updates for region checks + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_WriteWatch_Byte_Region64 + WRITE_BARRIER_ENTRY_STUB WriteWatch_Byte_Region64 + WRITE_BARRIER_SHADOW_UPDATE_STUB WriteWatch_Byte_Region64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB WriteWatch_Byte_Region64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB WriteWatch_Byte_Region64 + WRITE_BARRIER_REGION_CHECK_STUB WriteWatch_Byte_Region64 + WRITE_BARRIER_CHECK_CARD_TABLE_STUB WriteWatch_Byte_Region64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB WriteWatch_Byte_Region64 + WRITE_BARRIER_RETURN_STUB WriteWatch_Byte_Region64 + WRITE_BARRIER_END JIT_WriteBarrier_WriteWatch_Byte_Region64 + + + ;----------------------------------------------------------------------------- + ; void JIT_WriteBarrier_WriteWatch_Bit_Region64(Object** dst, Object* src) + ; + ; Skipped functionality: + ; Does not call check card table stub + ; + WRITE_BARRIER_ENTRY JIT_WriteBarrier_WriteWatch_Bit_Region64 + WRITE_BARRIER_ENTRY_STUB WriteWatch_Bit_Region64 + WRITE_BARRIER_SHADOW_UPDATE_STUB WriteWatch_Bit_Region64 + WRITE_BARRIER_WRITE_WATCH_FOR_GC_HEAP_STUB WriteWatch_Bit_Region64 + WRITE_BARRIER_CHECK_EPHEMERAL_LOW_AND_HIGH_STUB WriteWatch_Bit_Region64 + WRITE_BARRIER_REGION_CHECK_STUB WriteWatch_Bit_Region64 + WRITE_BARRIER_CHECK_BIT_REGIONS_CARD_TABLE_STUB WriteWatch_Bit_Region64 + WRITE_BARRIER_CHECK_CARD_BUNDLE_TABLE_STUB WriteWatch_Bit_Region64 + WRITE_BARRIER_RETURN_STUB WriteWatch_Bit_Region64 + WRITE_BARRIER_END JIT_WriteBarrier_WriteWatch_Bit_Region64 -; ------------------------------------------------------------------ -; End of the writeable code region - LEAF_ENTRY JIT_PatchedCodeLast - ret lr - LEAF_END ; Must be at very end of file END diff --git a/src/coreclr/vm/arm64/patchedcodeconstants.h b/src/coreclr/vm/arm64/patchedcodeconstants.h new file mode 100644 index 000000000000..1b4a32d210bd --- /dev/null +++ b/src/coreclr/vm/arm64/patchedcodeconstants.h @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// =========================================================================== +// File: patchedcodeconstants.h +// =========================================================================== + +#ifndef PATCHEDCODECONSTANTS_H +#define PATCHEDCODECONSTANTS_H + +// These are fixed constants becuase MacOS doesn't allow label arithmetic in +// LDR instructions. Asserts in writebarriermanager CALC_TABLE_LOCATION ensure +// the values are correct. + +#define JIT_WriteBarrier_Size 0x3a0 + +#ifdef TARGET_WINDOWS +#define JIT_WriteBarrier_Table_Offset (0x30 + JIT_WriteBarrier_Size) +#else +#define JIT_WriteBarrier_Table_Offset (0x2c + JIT_WriteBarrier_Size) +#endif + +#define JIT_WriteBarrier_Offset_CardTable (0x0 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_CardBundleTable (0x8 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_WriteWatchTable (0x10 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_Lower (0x18 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_Upper (0x20 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_LowestAddress (0x28 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_HighestAddress (0x30 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_RegionToGeneration (0x38 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_RegionShr (0x40 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_GCShadow (0x48 + JIT_WriteBarrier_Table_Offset) +#define JIT_WriteBarrier_Offset_GCShadowEnd (0x50 + JIT_WriteBarrier_Table_Offset) + +#endif // PATCHEDCODECONSTANTS_H \ No newline at end of file diff --git a/src/coreclr/vm/arm64/profiler.cpp b/src/coreclr/vm/arm64/profiler.cpp index 471677347ea5..8adaeb283c1e 100644 --- a/src/coreclr/vm/arm64/profiler.cpp +++ b/src/coreclr/vm/arm64/profiler.cpp @@ -86,7 +86,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHan EECodeInfo codeInfo((PCODE)pData->Pc); // We want to pass the caller SP here. - pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo); + pData->hiddenArg = EECodeManager::GetExactGenericsToken((TADDR)(pData->probeSp), (TADDR)(pData->Fp), &codeInfo); } } } diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index 427bf3420033..8f01ee8ef5fc 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -1,8 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// -// File: stubs.cpp -// + // This file contains stub functions for unimplemented features need to // run on the ARM64 platform. @@ -13,70 +11,13 @@ #include "virtualcallstub.h" #include "jitinterface.h" #include "ecall.h" +#include "writebarriermanager.h" +#ifdef FEATURE_PERFMAP +#include "perfmap.h" +#endif #ifndef DACCESS_COMPILE -//----------------------------------------------------------------------- -// InstructionFormat for B.cond -//----------------------------------------------------------------------- -class ConditionalBranchInstructionFormat : public InstructionFormat -{ - - public: - ConditionalBranchInstructionFormat() : InstructionFormat(InstructionFormat::k32) - { - LIMITED_METHOD_CONTRACT; - } - - virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) - { - LIMITED_METHOD_CONTRACT; - - _ASSERTE(refsize == InstructionFormat::k32); - - return 4; - } - - virtual UINT GetHotSpotOffset(UINT refsize, UINT variationCode) - { - WRAPPER_NO_CONTRACT; - return 0; - } - - - virtual BOOL CanReach(UINT refSize, UINT variationCode, BOOL fExternal, INT_PTR offset) - { - _ASSERTE(!fExternal || "ARM64:NYI - CompareAndBranchInstructionFormat::CanReach external"); - if (fExternal) - return false; - - if (offset < -1048576 || offset > 1048572) - return false; - return true; - } - // B.
+ /// CoreCLR 1-parameter GetVersionResilientTypeHashCode + ///